Skip to content

Commit

Permalink
improved performance of write.pop.projection.summary
Browse files Browse the repository at this point in the history
  • Loading branch information
hanase committed Oct 19, 2024
1 parent 8699df4 commit e082b89
Show file tree
Hide file tree
Showing 6 changed files with 112 additions and 47 deletions.
4 changes: 3 additions & 1 deletion ChangeLog
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
10.0-1.900x (09/18/2024)
10.0-1.90xx (10/18/2024)
------
Added vwBaseYear2024 dataset.

Performance improvements in write.pop.projection.summary().

Incorporated use of 2024 migration dataset by age.

pop.predict.subnat can handle 1x1 subnational projections.
Expand Down
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
Package: bayesPop
Type: Package
Title: Probabilistic Population Projection
Version: 10.0-1.9017
Date: 2024-09-19
Version: 10.0-1.9018
Date: 2024-10-18
Author: Hana Sevcikova, Adrian Raftery, Thomas Buettner
Maintainer: Hana Sevcikova <[email protected]>
Depends: R (>= 3.5.0), bayesTFR (>= 7.1-0), bayesLife (>= 5.0-0), MortCast (>= 2.6-1)
Expand Down
28 changes: 23 additions & 5 deletions R/get_outputs.R
Original file line number Diff line number Diff line change
Expand Up @@ -397,7 +397,7 @@ is.saved.pi <- function(pop.pred, pi, warning=TRUE) {
}

get.pop.traj.quantiles.byage <- function(quantile.array, pop.pred, country.index, country.code, year.index,
trajectories=NULL, pi=80, q=NULL, ...) {
trajectories=NULL, pi=80, q=NULL, reload = FALSE, ...) {
# quantile.array should be 4d-array (country x age x quantiles x time)
al <- if(!is.null(q)) q else c((1-pi/100)/2, (1+pi/100)/2)
found <- FALSE
Expand All @@ -415,12 +415,30 @@ get.pop.traj.quantiles.byage <- function(quantile.array, pop.pred, country.index
}
}
if(!found) { # non-standard quantiles
if(is.null(trajectories)) {
if(is.null(trajectories) && !reload) {
warning('Quantiles not found')
return(NULL)
}
cqp <- apply(trajectories[,year.index,,drop=FALSE], 1,
quantile, al, na.rm = TRUE)
do.reload <- FALSE
if (is.null(trajectories)) {
if(pop.pred$nr.traj > 0) do.reload <- TRUE
} else {
if (dim(trajectories)[3] < 2000 && pop.pred$nr.traj > dim(trajectories)[3] && reload) do.reload <- TRUE
}
if(do.reload) {
#load 2000 trajectories maximum (quantiles are computed from all trajectories)
traj.reload <- get.pop.trajectories.multiple.age(pop.pred, country.code, nr.traj=2000, ...)
trajectories <- traj.reload$trajectories
}
if (!is.null(trajectories)) {
if(length(dim(trajectories)) < 3) trajectories <- abind(trajectories, along=3) # only one trajectory
if(length(year.index) == 1){
cqp <- apply(trajectories[,year.index,,drop=FALSE], 1, quantile, al, na.rm = TRUE)
} else cqp <- apply(trajectories[,year.index,,drop=FALSE], c(1,2), quantile, al, na.rm = TRUE)
} else {
warning('Quantiles not found')
return(NULL)
}
}
return(cqp)
}
Expand All @@ -429,7 +447,7 @@ get.pop.traj.quantiles.byage <- function(quantile.array, pop.pred, country.index
get.pop.traj.quantiles <- function(quantile.array, pop.pred, country.index=NULL, country.code=NULL,
trajectories=NULL, pi=80, q=NULL, reload=TRUE, ...) {
# quantile.array should be 3d-array (country x quantiles x time).
# If country.index is NULL or there is just one country in the prediciton object,
# If country.index is NULL or there is just one country in the prediction object,
# the country dimension can be omitted
al <- if(!is.null(q)) q else c((1-pi/100)/2, (1+pi/100)/2)
found <- FALSE
Expand Down
115 changes: 78 additions & 37 deletions R/predict.pop.R
Original file line number Diff line number Diff line change
Expand Up @@ -2512,44 +2512,79 @@ write.expression <- function(pop.pred, expression, output.dir, file.suffix='expr
if(byage) age.index <- !age.index
ages <- 1:length(pop.pred$ages)
if(adjust && is.null(pop.pred$adjust.env)) pop.pred$adjust.env <- new.env()
age.lables <- get.age.labels(pop.pred$ages, single.year = pop.pred$annual)
sex <- NULL # to make CRAN check happy
all.quantiles <- NULL
if(byage && bysex && is.null(vital.event)){
# preload the quantiles (saves tons of time)
all.quantiles[["male"]] <- .get.pop.quantiles(pop.pred, what='Mage', adjust=adjust, allow.negative.adj = allow.negative.adj)
all.quantiles[["female"]] <- .get.pop.quantiles(pop.pred, what='Fage', adjust=adjust, allow.negative.adj = allow.negative.adj)
}
subtract.from.age <- 0
observed.data <- NULL
for (country in 1:nrow(pop.pred$countries)) {
country.obj <- get.country.object(country, country.table=pop.pred$countries, index=TRUE)
for(sex in c('both', 'male', 'female')[sex.index]) {
for(sx in c('both', 'male', 'female')[sex.index]) {
quant.all.ages <- NULL
if(!is.null(vital.event)) {
sum.over.ages <- age.index[1]
if(include.observed)
observed <- get.popVE.trajectories.and.quantiles(pop.pred, country.obj$code, event=vital.event,
sex=sex, age='all', sum.over.ages=sum.over.ages, is.observed=TRUE)
sex=sx, age='all', sum.over.ages=sum.over.ages, is.observed=TRUE)
traj.and.quantiles <- get.popVE.trajectories.and.quantiles(pop.pred, country.obj$code, event=vital.event,
sex=sex, age='all', sum.over.ages=sum.over.ages)
sex=sx, age='all', sum.over.ages=sum.over.ages)
if(is.null(traj.and.quantiles$trajectories)) {
warning('Problem with loading ', vital.event, '. Possibly no vital events stored during prediction.')
return()
}
if(!sum.over.ages) { # This is because births have only subset of ages
if(!sum.over.ages) {
quant.all.ages[["50"]] <- traj.and.quantiles$quantiles[,"0.5",]
quant.all.ages[["80"]] <- abind(traj.and.quantiles$quantiles[,"0.1",],
traj.and.quantiles$quantiles[,"0.9",],
along = 0)
quant.all.ages[["95"]] <- abind(traj.and.quantiles$quantiles[,"0.025",],
traj.and.quantiles$quantiles[,"0.975",],
along = 0)
# This is because births have only subset of ages
ages <- traj.and.quantiles$age.idx.raw
age.index <- age.index[1:(length(ages)+1)]
subtract.from.age <- traj.and.quantiles$age.idx.raw[1]-traj.and.quantiles$age.idx[1]
}

}
} else {
if(sx == "both" && byage){ # if sx is not 'both', then we already have the quantiles pre-computed in all.quantiles
traj <- get.pop.trajectories.multiple.age(pop.pred, country.obj$code, nr.traj=2000, sex = sx, adjust = adjust)$trajectories
quant.all.ages[["50"]] <- get.pop.traj.quantiles.byage(NULL, pop.pred, country.obj$index, country.obj$code, q=0.5,
trajectories=traj, year.index = 1:nr.proj, sex=sx)
quant.all.ages[["80"]] <- get.pop.traj.quantiles.byage(NULL, pop.pred, country.obj$index, country.obj$code, pi = 80,
trajectories=traj, year.index = 1:nr.proj, sex=sx)
quant.all.ages[["95"]] <- get.pop.traj.quantiles.byage(NULL, pop.pred, country.obj$index, country.obj$code, pi = 95,
trajectories=traj, year.index = 1:nr.proj, sex=sx)
}
}
for(age in c('all', ages)[age.index]) {
this.result <- cbind(
country.name=rep(country.obj$name, nr.var),
country.code=rep(country.obj$code, nr.var),
variant=variant.names)
if(sex != 'both')
this.result <- cbind(this.result, sex=rep(sex, nr.var))
if(age != 'all') {
age <- as.integer(age)
this.result <- cbind(this.result, age=rep(get.age.labels(pop.pred$ages, single.year = pop.pred$annual)[age], nr.var))
this.result <- data.table::data.table(
country.name=country.obj$name,
country.code=country.obj$code,
variant=variant.names
)
if(sx != 'both')
this.result <- this.result[ , sex := sx]
this.age <- age
if(this.age != 'all') {
this.age <- as.integer(this.age)
this.result <- this.result[, age := age.lables[this.age]]
}
if(is.null(vital.event)) {
if(include.observed)
observed.data <- get.pop.observed(pop.pred, country.obj$code, sex=sex, age=age)
quant <- get.pop.trajectories(pop.pred, country.obj$code, nr.traj=0, sex=sex, age=age,
observed.data <- get.pop.observed(pop.pred, country.obj$code, sex=sx, age=this.age)
quant <- NULL
if(!is.null(all.quantiles))
quant <- all.quantiles[[sx]][,this.age,,]
else {
if(is.null(quant.all.ages))
quant <- get.pop.trajectories(pop.pred, country.obj$code, nr.traj=0, sex=sx, age=this.age,
adjust=adjust, allow.negative.adj = allow.negative.adj)$quantiles
}
traj <- NULL
reload <- TRUE
} else { # vital event
Expand All @@ -2558,44 +2593,50 @@ write.expression <- function(pop.pred, expression, output.dir, file.suffix='expr
if(include.observed)
observed.data <- observed$trajectories[,,1]
if(!sum.over.ages) {
quant <- quant[age-subtract.from.age,,]
traj <- traj[age-subtract.from.age,,]
#quant <- quant[this.age-subtract.from.age,,]
#traj <- traj[this.age-subtract.from.age,,]
if(include.observed) {
if (age-subtract.from.age > nrow(observed.data)) # because observed goes only up to 100+
if (this.age-subtract.from.age > nrow(observed.data)) # because observed goes only up to 100+
observed.data <- rep(0, ncol(observed.data))
else
observed.data <- observed.data[age-subtract.from.age,]
observed.data <- observed.data[this.age-subtract.from.age,]
}
}
reload <- FALSE
#stop('')
}
proj.result <- round(rbind(
get.pop.traj.quantiles(quant, pop.pred, country.obj$index, country.obj$code, q=0.5,
trajectories=traj, reload=reload, sex=sex, age=age),
get.pop.traj.quantiles(quant, pop.pred, country.obj$index, country.obj$code, pi=80,
trajectories=traj, reload=reload, sex=sex, age=age),
get.pop.traj.quantiles(quant, pop.pred, country.obj$index, country.obj$code, pi=95,
trajectories=traj, reload=reload, sex=sex, age=age)),
digits)
if(is.null(quant.all.ages)){
#browser()
proj.result <- rbind(
get.pop.traj.quantiles(quant, pop.pred, country.obj$index, country.obj$code, q=0.5,
trajectories=traj, reload=reload, sex=sx, age=this.age),
get.pop.traj.quantiles(quant, pop.pred, country.obj$index, country.obj$code, pi=80,
trajectories=traj, reload=reload, sex=sx, age=this.age),
get.pop.traj.quantiles(quant, pop.pred, country.obj$index, country.obj$code, pi=95,
trajectories=traj, reload=reload, sex=sx, age=this.age))
} else {
proj.result <- rbind(quant.all.ages[["50"]][this.age-subtract.from.age,],
quant.all.ages[["80"]][,this.age-subtract.from.age,],
quant.all.ages[["95"]][,this.age-subtract.from.age,]
)
}
proj.result <- round(proj.result, digits)

if(!is.null(observed.data)) {
# put it into the same shape as proj.result minus the last observed
observed.data <- round(rbind(observed.data, NULL), digits)
observed.data <- observed.data[rep(1, nrow(proj.result)), -ncol(observed.data)]
proj.result <- cbind(observed.data, proj.result)
}
colnames(proj.result) <- col.names
#stop('')
this.result <- cbind(this.result, proj.result)
this.result <- cbind(this.result, data.table::data.table(proj.result))
result <- rbind(result, this.result)
}
}
}
colnames(result)[colnames(result)==names(header)] <- header
suffix <- paste(file.suffix, paste(c('sex', 'age')[c(bysex, byage)], collapse=''), sep='')
file <- paste('projection_summary_', suffix, '.csv', sep='')
write.table(result, file=file.path(output.dir, file), sep=',', row.names=FALSE, col.names=TRUE,
quote=which(is.element(colnames(result), c('country_name', 'variant', 'sex', 'age'))))
colnames(result)[colnames(result)==names(header)] <- unlist(header)
suffix <- paste0(file.suffix, paste(c('sex', 'age')[c(bysex, byage)], collapse=''))
file <- paste0('projection_summary_', suffix, '.csv')
data.table::fwrite(result, file=file.path(output.dir, file), sep=',', quote=TRUE)
cat('Stored into: ', file.path(output.dir, file), '\n')
}

Expand Down
2 changes: 1 addition & 1 deletion tests/run_tests.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
library(bayesPop)
source('test_functions.R')

CRAN <- TRUE
CRAN <- FALSE

test.expressions()

Expand Down
6 changes: 5 additions & 1 deletion tests/test_functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -50,11 +50,15 @@ test.prediction <- function(parallel = FALSE) {
stopifnot(all(round(tfr[1,1,2:dim(tfr)[3],1],2) == tfr.should.be))

# check that writing summary is OK
write.pop.projection.summary(pred, what=c("popsexage"), output.dir=sim.dir)
write.pop.projection.summary(pred, what=c("popsexage", "popage"), output.dir=sim.dir)
t <- read.table(file.path(sim.dir, 'projection_summary_tpopsexage.csv'), sep=',', header=TRUE)
s <- summary(pred, country = 528)
stopifnot(round(s$projections[1,1]) == sum(t[t$variant == "median", "X2020"]))

t <- read.table(file.path(sim.dir, 'projection_summary_tpopage.csv'), sep=',', header=TRUE)
s <- pop.byage.table(pred, country = 528, year = 2040)
stopifnot(round(s[2,1]) == t[t$variant == "median" & t$age == "5-9", "X2040"])

pred <- pop.predict(countries=528, keep.vital.events=TRUE,
nr.traj = 3, verbose=FALSE, output.dir=sim.dir, replace.output=TRUE, end.year=2040,
inputs=list(tfr.file='median_', e0M.file='median_'), parallel = parallel)
Expand Down

0 comments on commit e082b89

Please sign in to comment.