-
Notifications
You must be signed in to change notification settings - Fork 0
/
simraw.R
268 lines (224 loc) · 8.95 KB
/
simraw.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
## Load the libraries
library(ggplot2)
library(INLA)
library(spdep) # poly2nb
library(sp) # over
library("RColorBrewer") # pallete
library(matrixStats) # rowMeans, rowQuantiles, etc
library(data.table) # between CI
library(knitr) # table
library(rgeoboundaries) # gb_adm
library(sf)
library(raster) # to read the population object
library(dplyr) # left_join
library(maptools) # unionSpatialPolygon
library(fields) # image
#library(miceadds) # source.all
library(RandomFields) # GF
# schell script inputs, 16 scenarios, 3 models (iid, bym2, spde)
input = commandArgs(trailingOnly=TRUE)
# scenario.idx = as.integer(input[1])
# model = input[2]
scenario.idx = 13
model = "bym2"
print(scenario.idx)
print(model)
## Folder structure
## should contain folders Results, Code/auxiliary_functions and Data with pop.RData
## Load data
load("Data/pop.RData")
# source.all("Code/auxiliary_functions/") # miceadds library
source("Code/auxiliary_functions/areal_mesh_param.R")
source("Code/auxiliary_functions/create_matrix.R")
source("Code/auxiliary_functions/create_mesh.R")
source("Code/auxiliary_functions/create_stack.R")
source("Code/auxiliary_functions/fit_spde.R")
source("Code/auxiliary_functions/extractCoords.R")
source("Code/auxiliary_functions/MC_samples.R")
source("Code/auxiliary_functions/scenario_param.R")
## Scenarios
map <- st_geometry(gb_adm1("nigeria"))
n = length(map)
map <- as_Spatial(map, IDs =as.character(1:n))
# Transform the CRS to km
map.km <- spTransform(
map, CRS('+proj=utm +zone=23 +south +ellps=GRS80 +units=km'))
scenarios = scenario_param(sp = map.km)
weights = "pop.int"
n.rep = 1
range = scenarios[[scenario.idx]]$range
sigma2 = scenarios[[scenario.idx]]$sigma2u
beta0 = scenarios[[scenario.idx]]$beta0
adm.lev = scenarios[[scenario.idx]]$adm.lev
## Administrative boundaries
if(adm.lev == 1){
map <- st_geometry(gb_adm1("nigeria"))
} else {
map <- st_geometry(gb_adm2("nigeria"))
}
n = length(map)
map <- as_Spatial(map, IDs =as.character(1:n))
# Transform the CRS to km
map.km <- spTransform(
map, CRS('+proj=utm +zone=23 +south +ellps=GRS80 +units=km'))
## Data
pop.dat.ng <- crop(pop.dat.km, map.km)
# Create a SpatialPolygonsDataFrame
df = data.frame(lat = coordinates(map.km)[,1],
lon = coordinates(map.km)[,2],
ID = seq(1:length(map.km)),
row.names = 1:length(map.km))
# Polygons ids are ID i
mapdf.km = SpatialPolygonsDataFrame(map.km, df)
## Simulation of the CG model
# 1-2. GF at the population raster locations
coo = xyFromCell(pop.dat.km, 1:ncell(pop.dat.km)) # coordinates from all cells of raster r
colnames(coo) = c("coords.x1", "coords.x2")
m.model <- RMmatern(nu = 1, var = sigma2, scale = range/2)
u <- RFsimulate(m.model, x= coo[,1], y= coo[,2])
r.GF = pop.dat.km # copy the raster but change the values
values(r.GF) <- u@data$variable1 # assign the data values to the raster object
# 3. linear predictor
eta = beta0 + values(r.GF)
# 4. relative risk surface
p.true = exp(eta)/(1+exp(eta))
pop = values(pop.dat.km)
p.av.true = sum(pop*p.true)/sum(pop) # average probability
rr.true = p.true/p.av.true # relative risk
r.RRt = pop.dat.km
values(r.RRt) = rr.true
r.RRt <- mask(r.RRt, map.km)
# 5. disease counts based on true p and n
ysim = rpois(dim(coo)[1], lambda = pop*p.true)
r.y <- pop.dat.km
values(r.y) <- ysim
# 6. pop and counts aggregation
mapdf.km@data$pop <- extract(pop.dat.km, map.km, fun = sum, na.rm = TRUE)
mapdf.km@data$count <- extract(r.y, map.km, fun = sum, na.rm = TRUE)
# 7. expected counts
E = mapdf.km@data$pop*sum(mapdf.km@data$count)/sum(mapdf.km@data$pop)
mapdf.km@data$E <- E
## Define precision matrix once for all simulations
if(model == "bym2"){
# create rook (right) adjacency matrix, the simplest, condiering neighbours
adj.r <- poly2nb(map.km, queen = FALSE) # not a queen
# convert nb object to graph object compatible with INLA, save it and read it
nb2INLA(file.path("Data/graph", eval(scenario.idx), eval(model), fsep = ""), adj.r)
ng.graph = inla.read.graph(file.path("Data/graph", eval(scenario.idx), eval(model), fsep = ""))
}
## Define mesh, spde model, projector and prediction matrix once for all simulations
if(model == "spde"){
# mesh
param = areal_mesh_param(spdf = map.km)
mesh = create_mesh(spdf = map.km, max.edge = param$max.edge, cutoff = param$cutoff,
offset = param$offset, plot = F)
# spde model
spde <- inla.spde2.pcmatern(
mesh, alpha=2,
prior.sigma=c(1, 0.01), # P(sigma > 1) = 0.01,
prior.range=c(range, 0.5)) # P(r < range) = 0.5
# projector matrix
if(weights == "equal" ){
Aa <- create_matrix(sp = map.km, mesh = mesh, weights = "equal")
} else {
Aa <- create_matrix(sp = map.km, mesh = mesh, weights = "pop.int", pop.raster = pop.dat.ng)
}
# prediction lattice and matrix
# coo are the coordinates of the population raster
Ap = inla.spde.make.A(mesh = mesh, loc = coo)
}
## Simualtion study
# globally defined
# m.model, coo, r.GF, pop, r.RRt, r.y, mapdf.km@data$pop pop.dat.km, map.km, r.RRm
sim.coverage = function(id) {
## simulate the data
u <- RFsimulate(m.model, x= coo[,1], y= coo[,2]) # update the GF values
values(r.GF) <- u@data$variable1
# 3. linear predictor
eta = beta0 + values(r.GF)
# 4. relative risk surface
p.true = exp(eta)/(1+exp(eta))
p.av.true = sum(pop*p.true)/sum(pop) # average probability
rr.true = p.true/p.av.true # relative risk
values(r.RRt) = rr.true # update the simulated relative risk values
r.RRt <- mask(r.RRt, map.km) # contains NA
# 5. disease counts based on true p and n
ysim = rpois(dim(coo)[1], lambda = pop*p.true)
values(r.y) <- ysim
# 6. pop and counts aggregation
mapdf.km@data$count <- extract(r.y, map.km, fun = sum, na.rm = TRUE)
# 7. expected counts
E = mapdf.km@data$pop*sum(mapdf.km@data$count)/sum(mapdf.km@data$pop)
mapdf.km@data$E <- E
## INFERENCE and prediction
if(model == "spde"){
stack.a <- create_stack(tag = "area", matrix = Aa, spdf = mapdf.km, spde = spde)
res <- fit_spde(spde = spde, stack = stack.a)
# prediction and CI
rrMC = MC_samples(result = res, N = 2e3, Ap = Ap)
r.RRm <- r.RRt
values(r.RRm) <- rrMC$rr$mean
r.RRm = mask(r.RRm, map.km) # contains NA
r.RRql = r.RRqu = r.RRm
values(r.RRql) = rrMC$rr$q1
r.RRql = mask(r.RRql, map.km)
values(r.RRqu) = rrMC$rr$q9
r.RRqu = mask(r.RRqu, map.km)
CI <- cbind(values(r.RRql), values(r.RRqu))
} else {
if(model == "bym2"){
formula.model <- count ~ 1 + f(ID, model = "bym2", graph = ng.graph,
constr = T, scale.model = T,
hyper=list(prec=list(prior="pc.prec", param=c(1, 0.01)), # P(sd >1) = 0.01
phi =list(prior="pc", param=c(0.5, 0.5)))) # P(phi <0.5) = 0.5)
} else {
formula.model <- count ~ 1 + f(ID, model = "iid")
}
res = inla(formula = formula.model,
family = "poisson",
data = mapdf.km@data,
E = E,
control.compute = list(dic = T, waic = T, cpo = T, config = TRUE),
control.predictor = list(compute = TRUE),
num.threads="4:1",
quantiles = c(0.1, 0.9))
# prediction and CI
mapdf.km@data$RR.m <- res$summary.fitted.values[, "mean"]
mapdf.km@data$RR.qu <- res$summary.fitted.values[, "0.9quant"]
mapdf.km@data$RR.ql <- res$summary.fitted.values[, "0.1quant"]
r.RRm <- rasterize(mapdf.km, pop.dat.km, field = "RR.m", fun = "sum") # has NA
r.RRql <- rasterize(mapdf.km, pop.dat.km, field = "RR.ql", fun = "sum") # CI
r.RRqu <- rasterize(mapdf.km, pop.dat.km, field = "RR.qu", fun = "sum") # CI
CI <- cbind(values(r.RRql), values(r.RRqu))
}
## model performance measures
# absolute error
ae = abs(values(r.RRt) - values(r.RRm)) # true - posterior mean
# credible interval lenght
cov.len = CI[,2] - CI[,1]
# binary coverage vector
cov.bin <- c()
for(idx in 1:length(r.RRt)){
cov.bin[idx] = between(values(r.RRt)[idx], CI[idx, 1], CI[idx, 2])
}
# save the perf measures for cells inside spatial polygons
return(list(cov.bin = cov.bin[!is.na(cov.bin)],
cov.len = cov.len[!is.na(cov.len)],
ae = ae[!is.na(ae)]))
}
## Run the model for n.rep number of datasets
res.sim.raw = mclapply(as.list(1:n.rep), mc.cores=3, FUN = sim.coverage)
m = length(res.sim.raw[[1]]$cov.bin)
## Extract model performance matrices of dimension n.rep x m
cov.bin <- matrix(NA, nrow = n.rep, ncol = m)
cov.len <- matrix(NA, nrow = n.rep, ncol = m)
ae <- matrix(NA, nrow = n.rep, ncol = m)
for(id in 1:n.rep){
# index of the list corresponds to the sim index
cov.bin[id, ] = res.sim.raw[[id]]$cov.bin
cov.len[id, ] = res.sim.raw[[id]]$cov.len
ae[id, ] = res.sim.raw[[id]]$ae
}
## Save the results
save(cov.bin, cov.len, ae, file = file.path( "Results/mp", eval(scenario.idx),
eval(model), ".RData", fsep = ""))