From e76b1d76a7671b552f270594dcca6abed28c5628 Mon Sep 17 00:00:00 2001 From: Peiran Liu Date: Tue, 6 Apr 2021 15:45:39 -0400 Subject: [PATCH 01/48] fix bug --- R/mcmc_ini.R | 3 +-- R/wpp_data.R | 4 ++-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/R/mcmc_ini.R b/R/mcmc_ini.R index 5452b21b..358cafbd 100755 --- a/R/mcmc_ini.R +++ b/R/mcmc_ini.R @@ -483,8 +483,7 @@ mcmc.ini <- function(chain.id, mcmc.meta, iter=100, dontsave=dontsave.pars, uncertainty=uncertainty ), class='bayesTFR.mcmc') - - ################################################################## + ################################################################## # distortions ################################################################## # note: the eps will always be NA outside (tau_c, lambda-1)!! diff --git a/R/wpp_data.R b/R/wpp_data.R index a6ee8d5f..ad5e580c 100644 --- a/R/wpp_data.R +++ b/R/wpp_data.R @@ -208,12 +208,12 @@ get.observed.time.matrix.and.regions <- function(data, loc_data, start.year=1950 ncol.tfr <- length(num.columns) cols.starty <- as.integer(substr(names.tfr.data[num.columns], 1,4)) cols.endy <- as.integer(substr(names.tfr.data[num.columns], 6,9)) - years.to.interp.to <- (cols.starty[1]+3):cols.endy[length(cols.endy)] + years.to.interp.to <- (cols.starty[1]):cols.endy[length(cols.endy)] years.to.interp.from <- seq(cols.starty[1]+3, cols.endy[length(cols.endy)]-2, by = 5) tfr_data_new <- matrix(NA, nrow = nrow(tfr_data), ncol = length(years.to.interp.to)) for(row in 1:nrow(tfr_data_new)) tfr_data_new[row, ] <- approx(years.to.interp.from, tfr_data[row, num.columns], - xout = years.to.interp.to)$y + xout = years.to.interp.to, rule=2)$y tfr_data_new <- cbind(tfr_data[, -num.columns], tfr_data_new) colnames(tfr_data_new) <- c(colnames(tfr_data)[-num.columns], years.to.interp.to) tfr_data <- tfr_data_new From ef6a2416f6c8fed7cda345b7cc4f697ae98c1096 Mon Sep 17 00:00:00 2001 From: Peiran Liu Date: Tue, 6 Apr 2021 17:31:35 -0400 Subject: [PATCH 02/48] fix bug for continuous variable --- R/run_mcmc.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/run_mcmc.R b/R/run_mcmc.R index e0468c6e..8ff5c4bc 100755 --- a/R/run_mcmc.R +++ b/R/run_mcmc.R @@ -262,7 +262,7 @@ mcmc.run.chain <- function(chain.id, meta, thin=1, iter=100, starting.values=NUL gamma.ini=gamma.ini[chain.id], save.all.parameters=save.all.parameters, verbose=verbose, uncertainty=uncertainty, iso.unbiased=iso.unbiased, - covariates=covariates, cont_covariates=NULL) + covariates=covariates, cont_covariates=cont_covariates) if (uncertainty) { this.sv <- list() From f280f15cf870b52dad375698665f1c133cb40d5b Mon Sep 17 00:00:00 2001 From: Peiran Liu Date: Tue, 6 Apr 2021 20:29:18 -0400 Subject: [PATCH 03/48] allow using source.col.name to specify the source column for iso.unbiased --- R/mcmc_ini.R | 4 ++-- R/run_mcmc.R | 16 +++++++++------- R/updated_functions.R | 6 +++--- 3 files changed, 14 insertions(+), 12 deletions(-) diff --git a/R/mcmc_ini.R b/R/mcmc_ini.R index 358cafbd..d4ef9b27 100755 --- a/R/mcmc_ini.R +++ b/R/mcmc_ini.R @@ -402,7 +402,7 @@ mcmc.ini <- function(chain.id, mcmc.meta, iter=100, d.ini=0.17, save.all.parameters=FALSE, verbose=FALSE, uncertainty=FALSE, iso.unbiased=NULL, - covariates=c('source', 'method'), cont_covariates=NULL) { + covariates=c('source', 'method'), cont_covariates=NULL, source.col.name = "source") { nr_countries <- mcmc.meta$nr_countries @@ -495,7 +495,7 @@ mcmc.ini <- function(chain.id, mcmc.meta, iter=100, # mcmc <- get.obs.estimate.diff(mcmc) # mcmc <- estimate.bias.sd.raw(mcmc) mcmc <- get.obs.estimate.diff.original(mcmc) - mcmc <- estimate.bias.sd.original(mcmc, iso.unbiased, covariates, cont_covariates) + mcmc <- estimate.bias.sd.original(mcmc, iso.unbiased, covariates, cont_covariates, source.col.name) } # mcmc <- as.list(mcmc) diff --git a/R/run_mcmc.R b/R/run_mcmc.R index 8ff5c4bc..fb765260 100755 --- a/R/run_mcmc.R +++ b/R/run_mcmc.R @@ -26,6 +26,7 @@ run.tfr.mcmc <- function(nr.chains=3, iter=62000, output.dir=file.path(getwd(), phase3.starting.values=NULL, proposal_cov_gammas = NULL, # should be a list with elements 'values' and 'country_codes' iso.unbiased = NULL, covariates = c('source', 'method'), cont_covariates = NULL, + source.col.name="source", seed = NULL, parallel=FALSE, nr.nodes=nr.chains, save.all.parameters = FALSE, compression.type='None', auto.conf = list(max.loops=5, iter=62000, iter.incr=10000, nr.chains=3, thin=80, burnin=2000), @@ -196,10 +197,10 @@ run.tfr.mcmc <- function(nr.chains=3, iter=62000, output.dir=file.path(getwd(), chain.set <- bDem.performParallel(nr.nodes, 1:nr.chains, mcmc.run.chain, initfun=init.nodes, seed = seed, meta=bayesTFR.mcmc.meta, thin=thin, starting.values=starting.values, iter=iter, S.ini=S.ini, a.ini=a.ini, - b.ini=b.ini, sigma0.ini=sigma0.ini, Triangle_c4.ini=Triangle_c4.ini, const.ini=const.ini, - gamma.ini=gamma.ini, save.all.parameters=save.all.parameters, verbose=verbose, - verbose.iter=verbose.iter, uncertainty=uncertainty, iso.unbiased=iso.unbiased, - covariates=covariates, cont_covariates=cont_covariates, ...) + b.ini=b.ini, sigma0.ini=sigma0.ini, Triangle_c4.ini=Triangle_c4.ini, const.ini=const.ini, + gamma.ini=gamma.ini, save.all.parameters=save.all.parameters, verbose=verbose, + verbose.iter=verbose.iter, uncertainty=uncertainty, iso.unbiased=iso.unbiased, + covariates=covariates, cont_covariates=cont_covariates, source.col.name=source.col.name, ...) } else { # run chains sequentially chain.set <- list() for (chain in 1:nr.chains) { @@ -208,7 +209,7 @@ run.tfr.mcmc <- function(nr.chains=3, iter=62000, output.dir=file.path(getwd(), sigma0.ini=sigma0.ini, Triangle_c4.ini=Triangle_c4.ini, const.ini=const.ini, gamma.ini=gamma.ini, save.all.parameters=save.all.parameters, verbose=verbose, verbose.iter=verbose.iter, uncertainty=uncertainty, iso.unbiased=iso.unbiased, - covariates=covariates, cont_covariates=cont_covariates) + covariates=covariates, cont_covariates=cont_covariates, source.col.name=source.col.name) } } names(chain.set) <- 1:nr.chains @@ -240,7 +241,7 @@ mcmc.run.chain <- function(chain.id, meta, thin=1, iter=100, starting.values=NUL S.ini, a.ini, b.ini, sigma0.ini, Triangle_c4.ini, const.ini, gamma.ini=1, save.all.parameters=FALSE, verbose=FALSE, verbose.iter=10, uncertainty=FALSE, iso.unbiased=NULL, - covariates=c('source', 'method'), cont_covariates=NULL) { + covariates=c('source', 'method'), cont_covariates=NULL, source.col.name="source") { cat('\n\nChain nr.', chain.id, '\n') if (verbose) { @@ -262,7 +263,8 @@ mcmc.run.chain <- function(chain.id, meta, thin=1, iter=100, starting.values=NUL gamma.ini=gamma.ini[chain.id], save.all.parameters=save.all.parameters, verbose=verbose, uncertainty=uncertainty, iso.unbiased=iso.unbiased, - covariates=covariates, cont_covariates=cont_covariates) + covariates=covariates, cont_covariates=cont_covariates, + source.col.name=source.col.name) if (uncertainty) { this.sv <- list() diff --git a/R/updated_functions.R b/R/updated_functions.R index d29ed303..a86ebd5d 100644 --- a/R/updated_functions.R +++ b/R/updated_functions.R @@ -128,7 +128,7 @@ estimate.bias.sd.raw <- function(mcmc) } estimate.bias.sd.original <- function(mcmc, iso.unbiased=NULL, covariates=c('source', 'method'), - cont_covariates=NULL) + cont_covariates=NULL, source.col.name="source") { mcmc$meta$raw_data.original$bias <- NA mcmc$meta$raw_data.original$std <- NA @@ -180,10 +180,10 @@ estimate.bias.sd.original <- function(mcmc, iso.unbiased=NULL, covariates=c('sou ## Optional if (ISO.code$code %in% iso.unbiased) { - if ("source" %in% covariates) + if (source.col.name %in% covariates) { index.by.country.vr.estimate <- which((mcmc$meta$raw_data.original$country_code == ISO.code$code) & - (mcmc$meta$raw_data.original[, "source"] %in% c("VR", 'Estimate'))) + (mcmc$meta$raw_data.original[, source.col.name] %in% c("VR", 'Estimate'))) mcmc$meta$raw_data.original$bias[index.by.country.vr.estimate] <- 0 mcmc$meta$raw_data.original$std[index.by.country.vr.estimate] <- 0.016 } From 039738334487ebcfdfdcfdb8bb58938b7928033f Mon Sep 17 00:00:00 2001 From: Peiran Liu Date: Tue, 6 Apr 2021 21:02:55 -0400 Subject: [PATCH 04/48] update documents --- man/run-mcmc.Rd | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/man/run-mcmc.Rd b/man/run-mcmc.Rd index 971dd115..b7a89e34 100644 --- a/man/run-mcmc.Rd +++ b/man/run-mcmc.Rd @@ -36,7 +36,7 @@ run.tfr.mcmc(nr.chains = 3, iter = 62000, Triangle_c4.ini = NULL, const.ini = NULL, gamma.ini = 1, phase3.starting.values = NULL, proposal_cov_gammas = NULL, iso.unbiased = NULL, covariates = c("source", "method"), cont_covariates = NULL, - seed = NULL, parallel = FALSE, nr.nodes = nr.chains, + source.col.name="source", seed = NULL, parallel = FALSE, nr.nodes = nr.chains, save.all.parameters = FALSE, compression.type = 'None', auto.conf = list(max.loops = 5, iter = 62000, iter.incr = 10000, nr.chains = 3, thin = 80, burnin = 2000), @@ -103,6 +103,7 @@ continue.tfr.mcmc(iter, chain.ids = NULL, \item{proposal_cov_gammas}{Proposal for the gamma covariance matrices for each country. It should be a list with two values: \code{values} and \code{country_codes}. The structure corresponds to the object returned by the function \code{\link{get.cov.gammas}}. By default the covariance matrices are obtained using \code{data(proposal_cov_gammas_cii)}. This argument overwrite the defaults for countries contained the argument.} \item{iso.unbiased}{Codes of countries for which the vital registration TFR estimates are considered unbiased. Only used if \code{uncertainty = TRUE}. See details below.} \item{covariates, cont_covariates}{Categorical and continuous features used in estimating bias and standard deviation if \code{uncertainty = TRUE}. See details below.} + \item{source.col.name}{Specify the name for source column used for iso.unbiased variable. } \item{seed}{Seed of the random number generator. If \code{NULL} no seed is set. It can be used to generate reproducible results.} \item{parallel}{Logical determining if the simulation should run multiple chains in parallel. If it is \code{TRUE}, the package \pkg{\link[snowFT]{snowFT}} is required. In such a case further arguments can be passed, see description of \dots.} \item{nr.nodes}{Relevant only if \code{parallel} is \code{TRUE}. It gives the number of nodes for running the simulation in parallel. By default it equals to the number of chains.} From d25855befad93e0b8de863ef5c73ac1706b698ef Mon Sep 17 00:00:00 2001 From: Hana Sevcikova Date: Thu, 8 Apr 2021 12:22:12 -0700 Subject: [PATCH 05/48] version changed --- DESCRIPTION | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 4cc625e8..a8f72f86 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: bayesTFR -Version: 7.0-4 -Date: 2021-03-31 +Version: 7.0-4.9001 +Date: 2021-04-07 Title: Bayesian Fertility Projection Author: Hana Sevcikova (hanas@uw.edu), Leontine Alkema (alkema@nus.edu.sg), Peiran Liu (prliu@uw.edu), Adrian Raftery (raftery@uw.edu), Bailey Fosdick (bfosdick@uw.edu), Patrick Gerland (gerland@un.org) Maintainer: Hana Sevcikova From e2bfd8ed133de9760d6704299a3c6ffc1b7c8377 Mon Sep 17 00:00:00 2001 From: Hana Sevcikova Date: Fri, 9 Apr 2021 11:49:10 -0700 Subject: [PATCH 06/48] changes in plot function; ignore last.oberved if using uncertainty --- R/mcmc_ini.R | 4 ++-- R/plot_functions.R | 26 +++++++++++++++++++++----- R/wpp_data.R | 9 +++++---- 3 files changed, 28 insertions(+), 11 deletions(-) diff --git a/R/mcmc_ini.R b/R/mcmc_ini.R index d4ef9b27..fed1e57b 100755 --- a/R/mcmc_ini.R +++ b/R/mcmc_ini.R @@ -131,7 +131,6 @@ find.tau.lambda.and.DLcountries <- function(tfr_matrix, min.TFRlevel.for.start.a T_end_c <- lambda_c <-rep(T_end, nr_countries) T.suppl <- if(is.null(suppl.data$regions)) 0 else dim(suppl.data$tfr_matrix)[1] tau_c <- start_c <- rep(NA, nr_countries) - for (country in 1:nr_countries) { data <- get.observed.with.supplemental(country, tfr_matrix, suppl.data) has.suppl <- length(data) > T_end && !is.na(suppl.data$index.from.all.countries[country]) @@ -218,7 +217,8 @@ mcmc.meta.ini <- function(..., if(present.year-3 > wpp.year) warning("present.year is much larger then wpp.year. Make sure WPP data for present.year are available.") tfr.with.regions <- set_wpp_regions(start.year=start.year, present.year=present.year, wpp.year=wpp.year, my.tfr.file = my.tfr.file, my.locations.file=my.locations.file, - annual = mcmc.input$annual.simulation, verbose=verbose) + annual = mcmc.input$annual.simulation, ignore.last.observed = uncertainty, + verbose=verbose) meta <- do.meta.ini(mcmc.input, tfr.with.regions, proposal_cov_gammas=proposal_cov_gammas, verbose=verbose, uncertainty=uncertainty, my.tfr.raw.file=my.tfr.raw.file, ar.phase2=ar.phase2) diff --git a/R/plot_functions.R b/R/plot_functions.R index fd398f82..f4cff6c5 100644 --- a/R/plot_functions.R +++ b/R/plot_functions.R @@ -491,9 +491,13 @@ tfr.estimation.plot <- function(mcmc.list=NULL, country.code=NULL, ISO.code=NULL wpp.data <- get.observed.tfr(country.obj$index, mcmc.list$meta, "tfr_matrix_all") wpp.data <- data.frame(year=quantile_tbl$year, tfr = as.numeric(wpp.data)) - wpp.data <- wpp.data[wpp.data$year %% 5 == 3,] + #wpp.data <- wpp.data[wpp.data$year %% 5 == 3,] q <- q + ggplot2::geom_line(data = wpp.data, ggplot2::aes_string(x="year", y="tfr"), size = 0.8) + ggplot2::theme_bw() + - ggplot2::geom_point(data = wpp.data, ggplot2::aes_string(x="year", y="tfr")) + ggplot2::geom_point(data = wpp.data, ggplot2::aes_string(x="year", y="tfr"), size = 0.7) + + # re-draw the median + q <- q + ggplot2::geom_line(ggplot2::aes_string(x="year", y="Q50"), size = 0.8, color="red") + + ggplot2::geom_point(ggplot2::aes_string(x="year", y="Q50"), size = 1, color="red") if (save.image) { @@ -609,12 +613,20 @@ tfr.trajectories.plot <- function(tfr.pred, country, pi=c(80, 95), } # plot median tfr.median <- get.median.from.prediction(tfr.pred, country$index, country$code) - lines(x2, tfr.median, type='l', col=col[3], lwd=lwd[3]) + if(uncertainty) { + unc.last.time <- which(tfr.object$tfr_quantile$year == x2[1]) + tfr.median[1] <- unlist(tfr.object$tfr_quantile[unc.last.time,])[length(pi)+1] + } + lines(x2, tfr.median, type='l', col=col[3], lwd=lwd[3]) # plot given CIs lty <- 2:(length(pi)+1) for (i in 1:length(pi)) { cqp <- get.traj.quantiles(tfr.pred, country$index, country$code, trajectories$trajectories, pi[i]) if (!is.null(cqp)) { + if(uncertainty) { + cqp[1,1] <- unlist(tfr.object$tfr_quantile[unc.last.time, ])[length(pi)+1-i] + cqp[2,1] <- unlist(tfr.object$tfr_quantile[unc.last.time, ])[length(pi)+1+i] + } lines(x2, cqp[1,], type='l', col=col[4], lty=lty[i], lwd=lwd[4]) lines(x2, cqp[2,], type='l', col=col[4], lty=lty[i], lwd=lwd[4]) } @@ -649,8 +661,12 @@ tfr.trajectories.plot <- function(tfr.pred, country, pi=c(80, 95), lty <- c(lty, max(lty)+1) llty <- length(lty) up.low <- get.half.child.variant(median=tfr.median) - lines(x2, up.low[1,], type='l', col=col[5], lty=lty[llty], lwd=lwd[5]) - lines(x2, up.low[2,], type='l', col=col[5], lty=lty[llty], lwd=lwd[5]) + if(uncertainty) { + up.low <- up.low[,-1] + x2t <- x2[-1] + } else x2t <- x2 + lines(x2t, up.low[1,], type='l', col=col[5], lty=lty[llty], lwd=lwd[5]) + lines(x2t, up.low[2,], type='l', col=col[5], lty=lty[llty], lwd=lwd[5]) legend <- c(legend, '+/- 0.5 child') cols <- c(cols, col[5]) lwds <- c(lwds, lwd[5]) diff --git a/R/wpp_data.R b/R/wpp_data.R index ad5e580c..fdfa028e 100644 --- a/R/wpp_data.R +++ b/R/wpp_data.R @@ -1,7 +1,7 @@ # Read in the UN estimates set_wpp_regions <- function(start.year=1950, present.year=2010, wpp.year=2012, my.tfr.file=NULL, - my.locations.file=NULL, annual = FALSE, verbose=FALSE) { + my.locations.file=NULL, annual = FALSE, ignore.last.observed = FALSE, verbose=FALSE) { # outputs: # tfr_matrix_all, with each column one countries UN estimates # tfr_matrix with NAs after last observed data point @@ -32,7 +32,8 @@ set_wpp_regions <- function(start.year=1950, present.year=2010, wpp.year=2012, m start.year=start.year, present.year=present.year, annual = annual, verbose=verbose, - interpolate = annual && is.null(my.tfr.file)) + interpolate = annual && is.null(my.tfr.file), + ignore.last.observed = ignore.last.observed) if(!annual) { TFRmatrixsuppl.regions <- .get.suppl.matrix.and.regions(un.object, TFRmatrix.regions, loc_data, start.year, present.year) @@ -198,7 +199,7 @@ read.UNlocations <- function(data, wpp.year, package="bayesTFR", my.locations.fi get.observed.time.matrix.and.regions <- function(data, loc_data, start.year=1950, present.year=2010, annual = FALSE, datacolnames=c(country.code='country_code', country.name='country', reg.name='reg_name', reg.code='reg_code', area.name='area_name', area.code='area_code'), - interpolate = FALSE) { + interpolate = FALSE, ignore.last.observed = FALSE) { tfr_data <- data nr_countries <- length(tfr_data[,1]) if (annual && interpolate) # interpolate 5-year data @@ -266,7 +267,7 @@ get.observed.time.matrix.and.regions <- function(data, loc_data, start.year=1950 if(has.area.name) area_name <- c(area_name, paste(loc_data$area_name[loc_index])) if(has.area.code) area_code <- c(area_code, loc_data$area_code[loc_index]) # set NAs for entries that are not observed data (after last.observed) - tfr_matrix[(tfr_data[i,'last.observed'] < mid.years), i] <- NA + if(!ignore.last.observed) tfr_matrix[(tfr_data[i,'last.observed'] < mid.years), i] <- NA all.na[i] <- all(is.na(tfr_matrix[,i])) } country.codes <- tfr_data[[datacolnames['country.code']]] From bc202e86ac026dd5d4fdc5a7c8d8119fe9772d55 Mon Sep 17 00:00:00 2001 From: Hana Sevcikova Date: Fri, 9 Apr 2021 13:14:29 -0700 Subject: [PATCH 07/48] avoid log(0) in mcmc sampling --- DESCRIPTION | 4 ++-- R/mcmc_sampling.R | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index a8f72f86..e37bf371 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: bayesTFR -Version: 7.0-4.9001 -Date: 2021-04-07 +Version: 7.0-4.9002 +Date: 2021-04-09 Title: Bayesian Fertility Projection Author: Hana Sevcikova (hanas@uw.edu), Leontine Alkema (alkema@nus.edu.sg), Peiran Liu (prliu@uw.edu), Adrian Raftery (raftery@uw.edu), Bailey Fosdick (bfosdick@uw.edu), Patrick Gerland (gerland@un.org) Maintainer: Hana Sevcikova diff --git a/R/mcmc_sampling.R b/R/mcmc_sampling.R index 419f6565..b1ef2b1c 100755 --- a/R/mcmc_sampling.R +++ b/R/mcmc_sampling.R @@ -160,7 +160,7 @@ tfr.mcmc.sampling <- function(mcmc, thin=1, start.iter=2, verbose=FALSE, verbose delta4.squared <- mcenv$delta4^2 delta4.0.squared <- delta4_0^2 Triangle_c4_transformed <- - log((mcenv$Triangle_c4[id_DL] - Triangle_c4.low)/ + log(pmax(mcenv$Triangle_c4[id_DL] - Triangle_c4.low, 1e-10)/ (Triangle_c4.up-mcenv$Triangle_c4[id_DL])) mcenv$Triangle4 <- rnorm(1, mean = (sum(Triangle_c4_transformed)/(delta4.squared) + Triangle4_0/delta4.0.squared)/ From cbbd47340876b521d3a9d765f0ce30281ac405b6 Mon Sep 17 00:00:00 2001 From: Hana Sevcikova Date: Mon, 12 Apr 2021 09:45:52 -0700 Subject: [PATCH 08/48] fix issue with suppl data in extra function --- R/wpp_data.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/wpp_data.R b/R/wpp_data.R index fdfa028e..4d24aba0 100644 --- a/R/wpp_data.R +++ b/R/wpp_data.R @@ -387,12 +387,12 @@ set.wpp.extra <- function(meta, countries=NULL, my.tfr.file=NULL, my.locations.f extra.wpp <- .extra.matrix.regions(data=data, countries=countries, meta=meta, my.locations.file=my.locations.file, verbose=verbose, annual=annual, uncertainty=uncertainty, interpolate = is.null(my.tfr.file) && annual) - if(!is.null(extra.wpp)) { + if(!is.null(extra.wpp) && !annual) { locations <- read.UNlocations(data$data, wpp.year=meta$wpp.year, my.locations.file=my.locations.file, verbose=verbose) suppl.wpp <- .get.suppl.matrix.and.regions(un.object, extra.wpp, locations$loc_data, meta$start.year, meta$present.year) extra.wpp$suppl.data <- .get.suppl.data.list(suppl.wpp) - } + } return(extra.wpp) } From 655312037282dff338b7c90a6f992b24775ceae4 Mon Sep 17 00:00:00 2001 From: Hana Sevcikova Date: Mon, 12 Apr 2021 14:53:22 -0700 Subject: [PATCH 09/48] fixed source.col.name in run mcmc extra --- R/run_mcmc.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/run_mcmc.R b/R/run_mcmc.R index fb765260..c1c3c0fc 100755 --- a/R/run_mcmc.R +++ b/R/run_mcmc.R @@ -367,7 +367,7 @@ run.tfr.mcmc.extra <- function(sim.dir=file.path(getwd(), 'bayesTFR.output'), thin=1, thin.extra=1, burnin=2000, parallel=FALSE, nr.nodes=NULL, my.locations.file = NULL, uncertainty=FALSE, my.tfr.raw.file=NULL, iso.unbiased=NULL, - covariates=c('source', 'method'), cont_covariates=NULL, + covariates=c('source', 'method'), cont_covariates=NULL, source.col.name="source", verbose=FALSE, verbose.iter=100, ...) { mcmc.set <- get.tfr.mcmc(sim.dir) meta.old <- mcmc.set$meta @@ -430,7 +430,7 @@ run.tfr.mcmc.extra <- function(sim.dir=file.path(getwd(), 'bayesTFR.output'), if(uncertainty) { mcmc.set$mcmc.list[[chain]] <- get.obs.estimate.diff.original(mcmc.set$mcmc.list[[chain]]) - mcmc.set$mcmc.list[[chain]] <- estimate.bias.sd.original(mcmc.set$mcmc.list[[chain]], iso.unbiased, covariates, cont_covariates) + mcmc.set$mcmc.list[[chain]] <- estimate.bias.sd.original(mcmc.set$mcmc.list[[chain]], iso.unbiased, covariates, cont_covariates, source.col.name=source.col.name) mcmc.set$mcmc.list[[chain]]$eps_unc <- list() if (is.null(mcmc.set$mcmc.list[[chain]]$meta$raw_data_extra)) mcmc.set$mcmc.list[[chain]]$meta$raw_data_extra <- list() for (country in countries) From 85d9946cc105181d4385cb827fa2319fdabca29f Mon Sep 17 00:00:00 2001 From: Hana Sevcikova Date: Tue, 13 Apr 2021 00:29:24 -0700 Subject: [PATCH 10/48] fixes related to the size of the meta file --- R/get_outputs.R | 3 ++- R/predict_tfr.R | 1 + R/run_mcmc.R | 18 ++++++++++++------ R/updated_functions.R | 17 +++++++++++------ 4 files changed, 26 insertions(+), 13 deletions(-) diff --git a/R/get_outputs.R b/R/get_outputs.R index e9163970..2db57e92 100644 --- a/R/get_outputs.R +++ b/R/get_outputs.R @@ -143,7 +143,7 @@ create.thinned.tfr.mcmc <- function(mcmc.set, thin=1, burnin=0, output.dir=NULL, country.obj <- get.country.object(country, mcmc.set$meta, index=TRUE) if (country.obj$index %in% mcmc.set$meta$extra) { - nr.points.cs <- floor(mcmc.set$meta$extra_iter[country.obj$index] / mcmc.set$meta$extra_thin[country.obj$index]) + nr.points.cs <- floor((mcmc.set$meta$extra_iter[country.obj$index]-burnin) / mcmc.set$meta$extra_thin[country.obj$index]) nr.points.cs <- nr.points.cs * length(mcmc.set$mcmc.list) if (nr.points.cs >= nr.points) thin.index.cs <- round(seq(1, nr.points.cs, length.out = nr.points)) else @@ -167,6 +167,7 @@ create.thinned.tfr.mcmc <- function(mcmc.set, thin=1, burnin=0, output.dir=NULL, compression.type=thinned.mcmc$compression.type) } } + if (mcmc.set$meta$nr_countries > mcmc.set$meta$nr_countries_estimation) { .update.thinned.extras(mcmc.set, (mcmc.set$meta$nr_countries_estimation+1):mcmc.set$meta$nr_countries, burnin=burnin, nr.points=nr.points, dir=outdir.thin.mcmc, verbose=verbose) diff --git a/R/predict_tfr.R b/R/predict_tfr.R index 5dff9e84..e19ded9a 100644 --- a/R/predict_tfr.R +++ b/R/predict_tfr.R @@ -197,6 +197,7 @@ make.tfr.prediction <- function(mcmc.set, start.year=NULL, end.year=2100, replac floor((mcmc.set$meta$extra_iter[country] - burn) / mcmc.set$meta$extra_thin[country]) * length(mcmc.set$mcmc.list)) } } + stored.iter <- stored.iter.extra } mcthin <- max(sapply(mcmc.set$mcmc.list, function(x) x$thin)) if(!is.null(nr.traj) && !is.null(thin)) { diff --git a/R/run_mcmc.R b/R/run_mcmc.R index c1c3c0fc..ba69fac9 100755 --- a/R/run_mcmc.R +++ b/R/run_mcmc.R @@ -430,7 +430,8 @@ run.tfr.mcmc.extra <- function(sim.dir=file.path(getwd(), 'bayesTFR.output'), if(uncertainty) { mcmc.set$mcmc.list[[chain]] <- get.obs.estimate.diff.original(mcmc.set$mcmc.list[[chain]]) - mcmc.set$mcmc.list[[chain]] <- estimate.bias.sd.original(mcmc.set$mcmc.list[[chain]], iso.unbiased, covariates, cont_covariates, source.col.name=source.col.name) + mcmc.set$mcmc.list[[chain]] <- estimate.bias.sd.original(mcmc.set$mcmc.list[[chain]], iso.unbiased, covariates, cont_covariates, + source.col.name=source.col.name, countries = Eini$index) mcmc.set$mcmc.list[[chain]]$eps_unc <- list() if (is.null(mcmc.set$mcmc.list[[chain]]$meta$raw_data_extra)) mcmc.set$mcmc.list[[chain]]$meta$raw_data_extra <- list() for (country in countries) @@ -477,9 +478,9 @@ run.tfr.mcmc.extra <- function(sim.dir=file.path(getwd(), 'bayesTFR.output'), } if(uncertainty) { - if (!dir.exists(file.path(meta$output.dir, 'extra.meta'))) dir.create(file.path(meta$output.dir, 'extra.meta')) - if (!dir.exists(file.path(meta$output.dir, 'extra.meta', countries[1]))) dir.create(file.path(meta$output.dir, 'extra.meta', countries[1])) - store.bayesTFR.meta.object(meta, file.path(meta$output.dir, 'extra.meta', countries[1])) + #if (!dir.exists(file.path(meta$output.dir, 'extra.meta'))) dir.create(file.path(meta$output.dir, 'extra.meta')) + #if (!dir.exists(file.path(meta$output.dir, 'extra.meta', countries[1]))) dir.create(file.path(meta$output.dir, 'extra.meta', countries[1])) + #store.bayesTFR.meta.object(meta, file.path(meta$output.dir, 'extra.meta', countries[1])) for (name in c("country.ind.by.year", "ind.by.year", "id_phase1_by_year", "id_phase2_by_year", "id_phase3_by_year", "id_phase3", "nr.countries")) { @@ -503,8 +504,13 @@ run.tfr.mcmc.extra <- function(sim.dir=file.path(getwd(), 'bayesTFR.output'), } } meta[['extra']] <- sort(unique(meta[['extra']])) - } - store.bayesTFR.meta.object(meta, file.path(meta$output.dir)) + # unload parent for storing purposes + parent <- meta$parent + meta[["parent"]] <- NULL + } else parent <- NULL + + store.bayesTFR.meta.object(meta, meta$output.dir) + meta[["parent"]] <- parent mcmc.set$meta <- meta cat('\n') invisible(mcmc.set) diff --git a/R/updated_functions.R b/R/updated_functions.R index a86ebd5d..6394293f 100644 --- a/R/updated_functions.R +++ b/R/updated_functions.R @@ -128,7 +128,7 @@ estimate.bias.sd.raw <- function(mcmc) } estimate.bias.sd.original <- function(mcmc, iso.unbiased=NULL, covariates=c('source', 'method'), - cont_covariates=NULL, source.col.name="source") + cont_covariates=NULL, source.col.name="source", countries = NULL) { mcmc$meta$raw_data.original$bias <- NA mcmc$meta$raw_data.original$std <- NA @@ -137,7 +137,8 @@ estimate.bias.sd.original <- function(mcmc, iso.unbiased=NULL, covariates=c('sou mcmc$meta$bias_model <- list() mcmc$meta$std_model <- list() } - for(country in 1:mcmc$meta$nr_countries) + if(is.null(countries)) countries <- 1:mcmc$meta$nr_countries + for(country in countries) { ISO.code <- get.country.object(country, meta=mcmc$meta, index=TRUE) if (is.na(ISO.code$code)) next @@ -168,10 +169,10 @@ estimate.bias.sd.original <- function(mcmc, iso.unbiased=NULL, covariates=c('sou } } - m1 <- lm(as.formula(paste0('diff ~ ', regressor))) + m1 <- lm(as.formula(paste0('diff ~ ', regressor)), model = FALSE) bias <- predict(m1) abs.residual <- abs(residuals(m1)) - m2 <- lm(as.formula(paste0('abs.residual ~ ', regressor))) + m2 <- lm(as.formula(paste0('abs.residual ~ ', regressor)), model = FALSE) std <- predict(m2) * sqrt(pi/2) std[std < 1e-6] <- 0.1 std[std < (abs(bias) / 2)] <- abs(bias[std < (abs(bias) / 2)]) @@ -192,11 +193,15 @@ estimate.bias.sd.original <- function(mcmc, iso.unbiased=NULL, covariates=c('sou warning("Covariate source not found. iso.unbiased is not used.\n") } } + # to save space + attr(m1$terms, ".Environment") <- NULL + attr(m2$terms, ".Environment") <- NULL + m1$fitted.values <- NULL + m2$fitted.values <- NULL + mcmc$meta$bias_model[[country]] <- m1 mcmc$meta$std_model[[country]] <- m2 } - - return(mcmc) } From af06d4b4aaad1b3ff1b72c5caace5f9d604f7003 Mon Sep 17 00:00:00 2001 From: Hana Sevcikova Date: Wed, 14 Apr 2021 16:31:47 -0700 Subject: [PATCH 11/48] option of storing extra country to different directory; removed burnin and thin from tfr.trajectories.plot --- R/get_outputs.R | 13 ++++++++----- R/plot_functions.R | 10 +++------- R/predict_tfr.R | 14 +++++++++----- 3 files changed, 20 insertions(+), 17 deletions(-) diff --git a/R/get_outputs.R b/R/get_outputs.R index 2db57e92..1a1c7606 100644 --- a/R/get_outputs.R +++ b/R/get_outputs.R @@ -86,7 +86,8 @@ get.thinned.tfr.mcmc <- function(mcmc.set, thin=1, burnin=0) { return(NULL) } -create.thinned.tfr.mcmc <- function(mcmc.set, thin=1, burnin=0, output.dir=NULL, verbose=TRUE, uncertainty=FALSE) { +create.thinned.tfr.mcmc <- function(mcmc.set, thin=1, burnin=0, output.dir=NULL, verbose=TRUE, uncertainty=FALSE, + update.with.countries = NULL) { #Return a thinned mcmc.set object with burnin removed and all chanins collapsed into one mcthin <- max(sapply(mcmc.set$mcmc.list, function(x) x$thin)) thin <- max(c(thin, mcthin)) @@ -139,7 +140,8 @@ create.thinned.tfr.mcmc <- function(mcmc.set, thin=1, burnin=0, output.dir=NULL, if(verbose) cat('done.\nStoring country-specific parameters ...') par.names.cs <- tfr.parameter.names.cs(trans=FALSE) if (uncertainty) par.names.cs <- c(par.names.cs, 'tfr') - for (country in mcmc.set$meta$id_DL){ + DLcntries <- if(!is.null(update.with.countries)) update.with.countries[update.with.countries %in% mcmc.set$meta$id_DL] else mcmc.set$meta$id_DL + for (country in DLcntries){ country.obj <- get.country.object(country, mcmc.set$meta, index=TRUE) if (country.obj$index %in% mcmc.set$meta$extra) { @@ -167,10 +169,11 @@ create.thinned.tfr.mcmc <- function(mcmc.set, thin=1, burnin=0, output.dir=NULL, compression.type=thinned.mcmc$compression.type) } } - if (mcmc.set$meta$nr_countries > mcmc.set$meta$nr_countries_estimation) { - .update.thinned.extras(mcmc.set, (mcmc.set$meta$nr_countries_estimation+1):mcmc.set$meta$nr_countries, - burnin=burnin, nr.points=nr.points, dir=outdir.thin.mcmc, verbose=verbose) + cindex <- if(!is.null(update.with.countries)) update.with.countries[update.with.countries > mcmc.set$meta$nr_countries_estimation] else + (mcmc.set$meta$nr_countries_estimation+1):mcmc.set$meta$nr_countries + if(length(cindex) > 0) + .update.thinned.extras(mcmc.set, cindex, burnin=burnin, nr.points=nr.points, dir=outdir.thin.mcmc, verbose=verbose) } invisible(structure(list(meta=meta, mcmc.list=list(thinned.mcmc)), class='bayesTFR.mcmc.set')) } diff --git a/R/plot_functions.R b/R/plot_functions.R index f4cff6c5..b40c24c7 100644 --- a/R/plot_functions.R +++ b/R/plot_functions.R @@ -516,7 +516,7 @@ tfr.trajectories.plot <- function(tfr.pred, country, pi=c(80, 95), xlim=NULL, ylim=NULL, type='b', xlab='Year', ylab='TFR', main=NULL, lwd=c(2,2,2,2,2,1), col=c('black', 'green', 'red', 'red', 'blue', '#00000020'), - show.legend=TRUE, add=FALSE, uncertainty=FALSE, thin=NULL, burnin=NULL, col_unc="purple", ... + show.legend=TRUE, add=FALSE, uncertainty=FALSE, col_unc="purple", ... ) { # lwd/col is a vector of 6 line widths/colors for: # 1. observed data, 2. imputed missing data, 3. median, 4. quantiles, 5. +- 0.5 child, 6. trajectories @@ -529,12 +529,8 @@ tfr.trajectories.plot <- function(tfr.pred, country, pi=c(80, 95), } if (uncertainty) { - sim.dir <- file.path(tfr.pred$output.directory, "..") - if (missing(thin) || is.null(thin)) thin <- tfr.pred$thin - if (missing(burnin) || is.null(burnin)) burnin <- tfr.pred$burnin - tfr.object <- get.tfr.estimation(mcmc.list=NULL, country.code=country, ISO.code=NULL, sim.dir=sim.dir, - burnin=burnin, thin=thin, probs=sort(c((1-pi/100)/2, 0.5, pi/100 + (1-pi/100)/2))) - + tfr.object <- get.tfr.estimation(mcmc.list=tfr.pred$mcmc.set, country.code=country, ISO.code=NULL, + probs=sort(c((1-pi/100)/2, 0.5, pi/100 + (1-pi/100)/2))) } col <- .match.colors.with.default(col, c('black', 'green', 'red', 'red', 'blue', '#00000020')) country.obj <- get.country.object(country, tfr.pred$mcmc.set$meta) diff --git a/R/predict_tfr.R b/R/predict_tfr.R index e19ded9a..d9a0cac9 100644 --- a/R/predict_tfr.R +++ b/R/predict_tfr.R @@ -88,7 +88,8 @@ get.burnin.nrtraj.from.diagnostics <- function(sim.dir, ...) { } tfr.predict.extra <- function(sim.dir=file.path(getwd(), 'bayesTFR.output'), - prediction.dir=sim.dir, countries = NULL, save.as.ascii=0, verbose=TRUE, uncertainty=FALSE) { + prediction.dir=sim.dir, countries = NULL, save.as.ascii=0, verbose=TRUE, uncertainty=FALSE, + all.countries.required = TRUE) { # Run prediction for given countries/regions (as codes). If they are not given it will be set to countries # for which there are MCMC results but no prediction. # It is to be used after running run.tfr.mcmc.extra @@ -121,7 +122,8 @@ tfr.predict.extra <- function(sim.dir=file.path(getwd(), 'bayesTFR.output'), use.correlation=pred$use.correlation, mu=pred$mu, rho=pred$rho, sigmaAR1=pred$sigmaAR1, min.tfr=pred$min.tfr, countries=countries.idx, save.as.ascii=0, output.dir=prediction.dir, force.creating.thinned.mcmc=TRUE, - write.summary.files=FALSE, write.trajectories=TRUE, verbose=verbose, uncertainty=uncertainty) + write.summary.files=FALSE, write.trajectories=TRUE, verbose=verbose, + uncertainty=uncertainty, all.countries.required = all.countries.required) # merge the two predictions code.other.countries <- setdiff(pred$mcmc.set$meta$regions$country_code, @@ -152,7 +154,8 @@ tfr.predict.extra <- function(sim.dir=file.path(getwd(), 'bayesTFR.output'), if (uncertainty) countries.save <- countries do.convert.trajectories(pred=bayesTFR.prediction, n=save.as.ascii, output.dir=pred$output.dir, countries = countries.save, verbose=verbose) - tfr.write.projection.summary.and.parameters(pred=bayesTFR.prediction, output.dir=pred$output.dir) + if(all.countries.required) + tfr.write.projection.summary.and.parameters(pred=bayesTFR.prediction, output.dir=pred$output.dir) cat('\nPrediction stored into', pred$output.dir, '\n') invisible(bayesTFR.prediction) @@ -168,7 +171,7 @@ make.tfr.prediction <- function(mcmc.set, start.year=NULL, end.year=2100, replac save.as.ascii=0, output.dir = NULL, write.summary.files=TRUE, is.mcmc.set.thinned=FALSE, force.creating.thinned.mcmc=FALSE, write.trajectories=TRUE, - verbose=verbose, uncertainty=FALSE){ + verbose=verbose, uncertainty=FALSE, all.countries.required = TRUE){ # if 'countries' is given, it is an index # sigmaAR1 can be a vector. The last element will be repeated up to nr.projections meta <- mcmc.set$meta @@ -238,7 +241,8 @@ make.tfr.prediction <- function(mcmc.set, start.year=NULL, end.year=2100, replac unblock.gtk('bDem.TFRpred') load.mcmc.set <- if(has.thinned.mcmc && !force.creating.thinned.mcmc) thinned.mcmc else create.thinned.tfr.mcmc(mcmc.set, thin=thin, burnin=burnin, - output.dir=output.dir, verbose=verbose, uncertainty=uncertainty) + output.dir=output.dir, verbose=verbose, uncertainty=uncertainty, + update.with.countries = if(all.countries.required) NULL else countries) nr_simu <- load.mcmc.set$mcmc.list[[1]]$finished.iter if (verbose) cat('Load variance parameters.\n') From 28a7bab88b4f57406574a04b9e74faebb54dc0ef Mon Sep 17 00:00:00 2001 From: Hana Sevcikova Date: Thu, 15 Apr 2021 10:25:46 -0700 Subject: [PATCH 12/48] added option of averaging gamma cov for extra countries --- R/mcmc_ini.R | 5 +++-- R/run_mcmc.R | 4 ++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/R/mcmc_ini.R b/R/mcmc_ini.R index fed1e57b..af867c7e 100755 --- a/R/mcmc_ini.R +++ b/R/mcmc_ini.R @@ -504,7 +504,8 @@ mcmc.ini <- function(chain.id, mcmc.meta, iter=100, mcmc.meta.ini.extra <- function(mcmc.set, countries=NULL, my.tfr.file = NULL, my.locations.file=NULL, burnin = 200, verbose=FALSE, uncertainty=FALSE, - my.tfr.raw.file=ifelse(uncertainty, file.path(find.package("bayesTFR"), "data", "rawTFR.csv"), NULL)) { + my.tfr.raw.file=ifelse(uncertainty, file.path(find.package("bayesTFR"), "data", "rawTFR.csv"), NULL), + average.gammas.cov = TRUE) { update.regions <- function(reg, ereg, id.replace, is.new, is.old) { nreg <- list() for (name in c('code', 'area_code', 'country_code')) { @@ -536,7 +537,7 @@ mcmc.meta.ini.extra <- function(mcmc.set, countries=NULL, my.tfr.file = NULL, has.mock.suppl <- TRUE } Emeta <- do.meta.ini(meta, tfr.with.regions=tfr.with.regions, - use.average.gammas.cov=TRUE, burnin=burnin, + use.average.gammas.cov=average.gammas.cov, burnin=burnin, verbose=verbose, uncertainty=uncertainty, my.tfr.raw.file = my.tfr.raw.file) # join the new meta with the existing one diff --git a/R/run_mcmc.R b/R/run_mcmc.R index ba69fac9..6db57a08 100755 --- a/R/run_mcmc.R +++ b/R/run_mcmc.R @@ -368,7 +368,7 @@ run.tfr.mcmc.extra <- function(sim.dir=file.path(getwd(), 'bayesTFR.output'), my.locations.file = NULL, uncertainty=FALSE, my.tfr.raw.file=NULL, iso.unbiased=NULL, covariates=c('source', 'method'), cont_covariates=NULL, source.col.name="source", - verbose=FALSE, verbose.iter=100, ...) { + average.gammas.cov = TRUE, verbose=FALSE, verbose.iter=100, ...) { mcmc.set <- get.tfr.mcmc(sim.dir) meta.old <- mcmc.set$meta @@ -380,7 +380,7 @@ run.tfr.mcmc.extra <- function(sim.dir=file.path(getwd(), 'bayesTFR.output'), Eini <- mcmc.meta.ini.extra(mcmc.set, countries=countries, my.tfr.file=my.tfr.file, my.locations.file=my.locations.file, burnin=burnin, verbose=verbose, uncertainty=uncertainty, - my.tfr.raw.file=my.tfr.raw.file) + my.tfr.raw.file=my.tfr.raw.file, average.gammas.cov = average.gammas.cov) if(length(Eini$index) <= 0) { cat('\nNothing to be done.\n') return(invisible(mcmc.set)) From 3a60f2d7790df086a3ce5ea3308fa04414d17be4 Mon Sep 17 00:00:00 2001 From: Hana Sevcikova Date: Thu, 15 Apr 2021 11:43:40 -0700 Subject: [PATCH 13/48] help files updated --- DESCRIPTION | 2 +- man/get.thinned.tfr.mcmc.Rd | 10 ++++++---- man/plot-trajectories.Rd | 4 +--- man/run-mcmc.Rd | 6 ++++-- man/run.tfr.mcmc.extra.Rd | 3 +++ man/tfr.predict.extra.Rd | 6 ++++-- 6 files changed, 19 insertions(+), 12 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index e37bf371..1f96e362 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: bayesTFR -Version: 7.0-4.9002 +Version: 7.0-4.9003 Date: 2021-04-09 Title: Bayesian Fertility Projection Author: Hana Sevcikova (hanas@uw.edu), Leontine Alkema (alkema@nus.edu.sg), Peiran Liu (prliu@uw.edu), Adrian Raftery (raftery@uw.edu), Bailey Fosdick (bfosdick@uw.edu), Patrick Gerland (gerland@un.org) diff --git a/man/get.thinned.tfr.mcmc.Rd b/man/get.thinned.tfr.mcmc.Rd index 6e517145..4bc91bc2 100644 --- a/man/get.thinned.tfr.mcmc.Rd +++ b/man/get.thinned.tfr.mcmc.Rd @@ -8,14 +8,15 @@ Creating and Accessing Thinned MCMCs } \description{ The function \code{get.thinned.tfr.mcmc} accesses -a thinned and burned version of the given Phase II MCMC set. \code{create.thinned.tfr.mcmc} creates such a set. +a thinned and burned version of the given Phase II MCMC set. \code{create.thinned.tfr.mcmc} creates or updates such a set. } \usage{ get.thinned.tfr.mcmc(mcmc.set, thin = 1, burnin = 0) create.thinned.tfr.mcmc(mcmc.set, thin = 1, burnin = 0, - output.dir = NULL, verbose = TRUE, uncertainty = FALSE) + output.dir = NULL, verbose = TRUE, uncertainty = FALSE, + update.with.countries = NULL) } \arguments{ \item{mcmc.set}{Object of class \code{\link{bayesTFR.mcmc.set}} of Phase II.} @@ -23,13 +24,14 @@ create.thinned.tfr.mcmc(mcmc.set, thin = 1, burnin = 0, \item{output.dir}{Output directory. It is only used if the output goes to a non-standard directory.} \item{verbose}{Logical switching log messages on and off.} \item{uncertainty}{If users want to save the thinned estimated TFR in the new mcmc object, this parameter should be set \code{TRUE}.} + \item{update.with.countries}{If an existing set is to be updated, this should be a vector of country indices for the update.} } \details{ -The function \code{create.thinned.tfr.mcmc} is called from \code{\link{tfr.predict}} and thus, the resulting object contains exactly the same MCMCs used for generating projections. In addition, it can be also called from \code{\link{tfr.diagnose}} if desired, so that the projection process can re-use such a set that lead to a convergence. +The function \code{create.thinned.tfr.mcmc} is called from \code{\link{tfr.predict}} and thus, the resulting object contains exactly the same MCMCs used for generating projections. In addition, it can be also called from \code{\link{tfr.diagnose}} if desired, so that the projection process can re-use such a set that leads to a convergence. The thinning is done as follows: The given \code{burnin} is removed from the beginning of each chain in the original MCMC set. Then each chain is thinned by \code{thin} using equal spacing and all chains are collapsed into one single chain per parameter. They are stored in the main simulation directory under the name \file{thinned_mcmc_\emph{t}_\emph{b}} where \emph{t} is the value of \code{thin} and \emph{b} the value of \code{burnin}. -If \code{uncertainty=TRUE}, the estimated TFR is thinned and saved either. +If \code{uncertainty=TRUE}, the estimated TFR is thinned and saved as well. } \value{ Both functions return an object of class \code{\link{bayesTFR.mcmc.set}}. \code{get.thinned.tfr.mcmc} returns \code{NULL} if such object does not exist. diff --git a/man/plot-trajectories.Rd b/man/plot-trajectories.Rd index 8f24acd0..349e5d4d 100644 --- a/man/plot-trajectories.Rd +++ b/man/plot-trajectories.Rd @@ -18,7 +18,7 @@ tfr.trajectories.plot(tfr.pred, country, pi = c(80, 95), main = NULL, lwd = c(2, 2, 2, 2, 2, 1), col=c('black', 'green', 'red', 'red', 'blue', '#00000020'), show.legend = TRUE, add = FALSE, uncertainty = FALSE, - thin = NULL, burnin = NULL, col_unc = "purple", \dots) + col_unc = "purple", \dots) tfr.trajectories.plot.all(tfr.pred, output.dir = file.path(getwd(), 'TFRtrajectories'), @@ -47,8 +47,6 @@ tfr.trajectories.table(tfr.pred, country, pi = c(80, 95), \item{main}{Main title for the plot(s). In \code{tfr.trajectories.plot.all} any occurence of the string \dQuote{XXX} is replaced by the country name.} \item{verbose}{Logical switching log messages on and off.} \item{uncertainty}{Logical: \code{TRUE} means uncertainty of past TFR should be plotted with the same level of uncertainty interval.} - \item{thin}{Thinning for past TFR estimation.} - \item{burnin}{Burn-in for past TFR estimation.} \item{col_unc}{Color of past TFR estimation uncertainty plot.} } \details{ diff --git a/man/run-mcmc.Rd b/man/run-mcmc.Rd index b7a89e34..224ea8d0 100644 --- a/man/run-mcmc.Rd +++ b/man/run-mcmc.Rd @@ -103,7 +103,7 @@ continue.tfr.mcmc(iter, chain.ids = NULL, \item{proposal_cov_gammas}{Proposal for the gamma covariance matrices for each country. It should be a list with two values: \code{values} and \code{country_codes}. The structure corresponds to the object returned by the function \code{\link{get.cov.gammas}}. By default the covariance matrices are obtained using \code{data(proposal_cov_gammas_cii)}. This argument overwrite the defaults for countries contained the argument.} \item{iso.unbiased}{Codes of countries for which the vital registration TFR estimates are considered unbiased. Only used if \code{uncertainty = TRUE}. See details below.} \item{covariates, cont_covariates}{Categorical and continuous features used in estimating bias and standard deviation if \code{uncertainty = TRUE}. See details below.} - \item{source.col.name}{Specify the name for source column used for iso.unbiased variable. } + \item{source.col.name}{If \code{uncertainty} is \code{TRUE} this is a column name within the given covariates that determines the data source. It is used if \code{iso.unbiased} is given to identify the vital registration records.} \item{seed}{Seed of the random number generator. If \code{NULL} no seed is set. It can be used to generate reproducible results.} \item{parallel}{Logical determining if the simulation should run multiple chains in parallel. If it is \code{TRUE}, the package \pkg{\link[snowFT]{snowFT}} is required. In such a case further arguments can be passed, see description of \dots.} \item{nr.nodes}{Relevant only if \code{parallel} is \code{TRUE}. It gives the number of nodes for running the simulation in parallel. By default it equals to the number of chains.} @@ -131,10 +131,12 @@ Optionally, \code{my.tfr.file} can contain a column called \code{last.observed} The package contains a dataset called \file{my_tfr_template} (in \file{extdata} directory) which is a template for user-specified \code{my.tfr.file}. -The parameter \code{uncertainty} is set to control whether past TFR is considered to be precise (\code{FALSE}), or need to be estimated from the raw data (\code{TRUE}). In the latter case, the raw TFR observations are taken either from the \code{\link{rawTFR}} dataset (default) or from a file given by the \code{my.tfr.raw.file} argument. The Bayesian hierarchical model considers the past TFR as unknown, estimates it and stores in \code{output.dir}. Details can be found in Liu and Raftery (2020). The \code{covariates}, \code{cont_covariates} arguments are for listing categorical and continuous features for estimating bias and standard deviation of past TFR observations. If a country is known to have unbiased vital registration (VR) records, one can include it in the \code{iso.unbiased} argument as those countries will estimate their past VR records to have 0 bias and 0.0161 standard deviation. +The parameter \code{uncertainty} is set to control whether past TFR is considered to be precise (\code{FALSE}), or need to be estimated from the raw data (\code{TRUE}). In the latter case, the raw TFR observations are taken either from the \code{\link{rawTFR}} dataset (default) or from a file given by the \code{my.tfr.raw.file} argument. The Bayesian hierarchical model considers the past TFR as unknown, estimates it and stores in \code{output.dir}. Details can be found in Liu and Raftery (2020). The \code{covariates}, \code{cont_covariates} arguments are for listing categorical and continuous features for estimating bias and standard deviation of past TFR observations. If a country is known to have unbiased vital registration (VR) records, one can include it in the \code{iso.unbiased} argument as those countries will estimate their past VR records to have 0 bias and 0.0161 standard deviation. The VR records are identified as having \dQuote{VR} in the column given by \code{source.col.name} (\dQuote{source} by default). If \code{annual=TRUE}, which implies using annual data for training the model, the parameter \code{ar.phase2} will be activated. If \code{ar.phase2} is set to \code{TRUE}, then the model of Phase II will change from \eqn{d_{c,t} = g_{c,t} + \epsilon_{c,t}} to \eqn{d_{c,t}-g_{c,t} = \phi(d_{c,t-1}-g_{c,t-1}) + \epsilon_{c,t}}. \eqn{\phi} is considered as country-independent and is called \code{rho_phase2}. +Furthermore, if \code{annual} is \code{TRUE} and \code{my.tfr.file} is given, the data in the file must be on annual basis and no matching with the WPP dataset takes place. + } \value{ diff --git a/man/run.tfr.mcmc.extra.Rd b/man/run.tfr.mcmc.extra.Rd index fb4e2f98..7974364a 100644 --- a/man/run.tfr.mcmc.extra.Rd +++ b/man/run.tfr.mcmc.extra.Rd @@ -14,6 +14,7 @@ run.tfr.mcmc.extra(sim.dir = file.path(getwd(), "bayesTFR.output"), parallel = FALSE, nr.nodes = NULL, my.locations.file = NULL, uncertainty = FALSE, my.tfr.raw.file = NULL, iso.unbiased = NULL, covariates = c('source', 'method'), cont_covariates = NULL, + source.col.name = "source", average.gammas.cov = TRUE, verbose = FALSE, verbose.iter = 100, \dots) } \arguments{ @@ -45,6 +46,8 @@ Relevant only if \code{parallel} is \code{TRUE}. It gives the number of nodes fo \item{my.tfr.raw.file}{File name of the raw TFR used when uncertainty is \code{TRUE}. See details in \code{\link{run.tfr.mcmc}}.} \item{iso.unbiased}{Codes of countries for which the vital registration TFR estimates are considered unbiased. See details in \code{\link{run.tfr.mcmc}}.} \item{covariates, cont_covariates}{Categorical and continuous features used in estimating bias and standard deviation if \code{uncertainty} is \code{TRUE}. See details in \code{\link{run.tfr.mcmc}}.} +\item{source.col.name}{If \code{uncertainty} is \code{TRUE} this is a column name within the given covariates that determines the data source. It is used if \code{iso.unbiased} is given to identify the vital registration records.} +\item{average.gammas.cov}{Set this to \code{FALSE} if the processed country has been included in the main simulation. In such a case the proposal gamma covariance matrix is taken from the \code{proposal_cov_gammas_cii} dataset. By default, the matrix is taken as an average from all countries.} \item{verbose}{Logical switching log messages on and off.} \item{verbose.iter}{Integer determining how often (in number of iterations) log messages are outputted during the estimation.} \item{\dots}{Additional parameters to be passed to the function \code{\link[snowFT]{performParallel}}, if \code{parallel} is \code{TRUE}.} diff --git a/man/tfr.predict.extra.Rd b/man/tfr.predict.extra.Rd index 2eb0a87f..3cf0fecc 100644 --- a/man/tfr.predict.extra.Rd +++ b/man/tfr.predict.extra.Rd @@ -11,7 +11,8 @@ Using the posterior parameter samples the function generates posterior trajecto \usage{ tfr.predict.extra(sim.dir = file.path(getwd(), 'bayesTFR.output'), prediction.dir = sim.dir, countries = NULL, - save.as.ascii = 0, verbose = TRUE, uncertainty=FALSE) + save.as.ascii = 0, verbose = TRUE, uncertainty=FALSE, + all.countries.required = TRUE) } \arguments{ \item{sim.dir}{Directory with the MCMC simulation results.} @@ -19,7 +20,8 @@ tfr.predict.extra(sim.dir = file.path(getwd(), 'bayesTFR.output'), \item{countries}{Vector of country codes for which the prediction should be made. If it is \code{NULL}, the prediction is run for all countries that are included in the MCMC object but for which no prediction was generated.} \item{save.as.ascii}{Either a number determining how many trajectories should be converted into an ascii file, or \dQuote{all} in which case all trajectories are converted. It should be set to 0, if no converions is desired. Note that the convertion is done on all countries.} \item{verbose}{Logical switching log messages on and off.} - \item{uncertainty}{Logical. If the MCMC steps has considered uncertainty of past TFR and \code{uncertainty=TRUE}, starting point of prediction trajectories will be the last estimated trajectories of TFR. Otherwise, it will use last observed TFR as starting point of prediction.} + \item{uncertainty}{Logical. If the MCMC steps considered uncertainty of past TFR and \code{uncertainty=TRUE}, starting point of prediction trajectories will be the last estimated trajectories of TFR. Otherwise, it will use the last observed TFR as starting point of prediction.} + \item{all.countries.required}{If \code{FALSE} it is not required that MCMCs of all countries are present.} } \details{ In order to use this function, a prediction object must exist, i.e. the function \code{\link{tfr.predict}} must have been processed prior to using this function. From 02015295f48038a379667c022006eb62bb28e236 Mon Sep 17 00:00:00 2001 From: Peiran Liu Date: Sun, 18 Apr 2021 17:28:42 -0400 Subject: [PATCH 14/48] update for iso.unbiased --- R/updated_functions.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/updated_functions.R b/R/updated_functions.R index 6394293f..d02ab5fb 100644 --- a/R/updated_functions.R +++ b/R/updated_functions.R @@ -184,7 +184,7 @@ estimate.bias.sd.original <- function(mcmc, iso.unbiased=NULL, covariates=c('sou if (source.col.name %in% covariates) { index.by.country.vr.estimate <- which((mcmc$meta$raw_data.original$country_code == ISO.code$code) & - (mcmc$meta$raw_data.original[, source.col.name] %in% c("VR", 'Estimate'))) + (mcmc$meta$raw_data.original[, source.col.name] %in% c("VR"))) mcmc$meta$raw_data.original$bias[index.by.country.vr.estimate] <- 0 mcmc$meta$raw_data.original$std[index.by.country.vr.estimate] <- 0.016 } From 082df8efcb924f7202eba2da202cfb03b44fe5cd Mon Sep 17 00:00:00 2001 From: Hana Sevcikova Date: Tue, 20 Apr 2021 13:20:07 -0700 Subject: [PATCH 15/48] fixed storing extra_iter --- R/run_mcmc.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/run_mcmc.R b/R/run_mcmc.R index 6db57a08..9d271d5f 100755 --- a/R/run_mcmc.R +++ b/R/run_mcmc.R @@ -497,7 +497,7 @@ run.tfr.mcmc.extra <- function(sim.dir=file.path(getwd(), 'bayesTFR.output'), if (!is.null(country.idx)) { meta[['extra']] <- c(meta[['extra']], country.idx) - meta[['extra_iter']][country.idx] <- mcmc.set$mcmc.list[[1]]$iter + meta[['extra_iter']][country.idx] <- if(is.null(iter)) mcmc.set$mcmc.list[[1]]$length else iter meta[['extra_thin']][country.idx] <- thin.extra meta[['extra_covariates']][[country.idx]] <- covariates meta[['extra_cont_covariates']][[country.idx]] <- cont_covariates From f85b45189370c918b002664e7a64be50550b1ed7 Mon Sep 17 00:00:00 2001 From: Peiran Liu Date: Tue, 6 Apr 2021 15:45:39 -0400 Subject: [PATCH 16/48] fix bug --- R/mcmc_ini.R | 3 +-- R/wpp_data.R | 4 ++-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/R/mcmc_ini.R b/R/mcmc_ini.R index 5452b21b..358cafbd 100755 --- a/R/mcmc_ini.R +++ b/R/mcmc_ini.R @@ -483,8 +483,7 @@ mcmc.ini <- function(chain.id, mcmc.meta, iter=100, dontsave=dontsave.pars, uncertainty=uncertainty ), class='bayesTFR.mcmc') - - ################################################################## + ################################################################## # distortions ################################################################## # note: the eps will always be NA outside (tau_c, lambda-1)!! diff --git a/R/wpp_data.R b/R/wpp_data.R index a6ee8d5f..ad5e580c 100644 --- a/R/wpp_data.R +++ b/R/wpp_data.R @@ -208,12 +208,12 @@ get.observed.time.matrix.and.regions <- function(data, loc_data, start.year=1950 ncol.tfr <- length(num.columns) cols.starty <- as.integer(substr(names.tfr.data[num.columns], 1,4)) cols.endy <- as.integer(substr(names.tfr.data[num.columns], 6,9)) - years.to.interp.to <- (cols.starty[1]+3):cols.endy[length(cols.endy)] + years.to.interp.to <- (cols.starty[1]):cols.endy[length(cols.endy)] years.to.interp.from <- seq(cols.starty[1]+3, cols.endy[length(cols.endy)]-2, by = 5) tfr_data_new <- matrix(NA, nrow = nrow(tfr_data), ncol = length(years.to.interp.to)) for(row in 1:nrow(tfr_data_new)) tfr_data_new[row, ] <- approx(years.to.interp.from, tfr_data[row, num.columns], - xout = years.to.interp.to)$y + xout = years.to.interp.to, rule=2)$y tfr_data_new <- cbind(tfr_data[, -num.columns], tfr_data_new) colnames(tfr_data_new) <- c(colnames(tfr_data)[-num.columns], years.to.interp.to) tfr_data <- tfr_data_new From 1d28f9e00a72528efb29292941120958b676bbed Mon Sep 17 00:00:00 2001 From: Peiran Liu Date: Tue, 6 Apr 2021 17:31:35 -0400 Subject: [PATCH 17/48] fix bug for continuous variable --- R/run_mcmc.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/run_mcmc.R b/R/run_mcmc.R index e0468c6e..8ff5c4bc 100755 --- a/R/run_mcmc.R +++ b/R/run_mcmc.R @@ -262,7 +262,7 @@ mcmc.run.chain <- function(chain.id, meta, thin=1, iter=100, starting.values=NUL gamma.ini=gamma.ini[chain.id], save.all.parameters=save.all.parameters, verbose=verbose, uncertainty=uncertainty, iso.unbiased=iso.unbiased, - covariates=covariates, cont_covariates=NULL) + covariates=covariates, cont_covariates=cont_covariates) if (uncertainty) { this.sv <- list() From 6842cdc8fdfeb3b564213a8e2adf718189ef9d7a Mon Sep 17 00:00:00 2001 From: Peiran Liu Date: Tue, 6 Apr 2021 20:29:18 -0400 Subject: [PATCH 18/48] allow using source.col.name to specify the source column for iso.unbiased --- R/mcmc_ini.R | 4 ++-- R/run_mcmc.R | 16 +++++++++------- R/updated_functions.R | 6 +++--- 3 files changed, 14 insertions(+), 12 deletions(-) diff --git a/R/mcmc_ini.R b/R/mcmc_ini.R index 358cafbd..d4ef9b27 100755 --- a/R/mcmc_ini.R +++ b/R/mcmc_ini.R @@ -402,7 +402,7 @@ mcmc.ini <- function(chain.id, mcmc.meta, iter=100, d.ini=0.17, save.all.parameters=FALSE, verbose=FALSE, uncertainty=FALSE, iso.unbiased=NULL, - covariates=c('source', 'method'), cont_covariates=NULL) { + covariates=c('source', 'method'), cont_covariates=NULL, source.col.name = "source") { nr_countries <- mcmc.meta$nr_countries @@ -495,7 +495,7 @@ mcmc.ini <- function(chain.id, mcmc.meta, iter=100, # mcmc <- get.obs.estimate.diff(mcmc) # mcmc <- estimate.bias.sd.raw(mcmc) mcmc <- get.obs.estimate.diff.original(mcmc) - mcmc <- estimate.bias.sd.original(mcmc, iso.unbiased, covariates, cont_covariates) + mcmc <- estimate.bias.sd.original(mcmc, iso.unbiased, covariates, cont_covariates, source.col.name) } # mcmc <- as.list(mcmc) diff --git a/R/run_mcmc.R b/R/run_mcmc.R index 8ff5c4bc..fb765260 100755 --- a/R/run_mcmc.R +++ b/R/run_mcmc.R @@ -26,6 +26,7 @@ run.tfr.mcmc <- function(nr.chains=3, iter=62000, output.dir=file.path(getwd(), phase3.starting.values=NULL, proposal_cov_gammas = NULL, # should be a list with elements 'values' and 'country_codes' iso.unbiased = NULL, covariates = c('source', 'method'), cont_covariates = NULL, + source.col.name="source", seed = NULL, parallel=FALSE, nr.nodes=nr.chains, save.all.parameters = FALSE, compression.type='None', auto.conf = list(max.loops=5, iter=62000, iter.incr=10000, nr.chains=3, thin=80, burnin=2000), @@ -196,10 +197,10 @@ run.tfr.mcmc <- function(nr.chains=3, iter=62000, output.dir=file.path(getwd(), chain.set <- bDem.performParallel(nr.nodes, 1:nr.chains, mcmc.run.chain, initfun=init.nodes, seed = seed, meta=bayesTFR.mcmc.meta, thin=thin, starting.values=starting.values, iter=iter, S.ini=S.ini, a.ini=a.ini, - b.ini=b.ini, sigma0.ini=sigma0.ini, Triangle_c4.ini=Triangle_c4.ini, const.ini=const.ini, - gamma.ini=gamma.ini, save.all.parameters=save.all.parameters, verbose=verbose, - verbose.iter=verbose.iter, uncertainty=uncertainty, iso.unbiased=iso.unbiased, - covariates=covariates, cont_covariates=cont_covariates, ...) + b.ini=b.ini, sigma0.ini=sigma0.ini, Triangle_c4.ini=Triangle_c4.ini, const.ini=const.ini, + gamma.ini=gamma.ini, save.all.parameters=save.all.parameters, verbose=verbose, + verbose.iter=verbose.iter, uncertainty=uncertainty, iso.unbiased=iso.unbiased, + covariates=covariates, cont_covariates=cont_covariates, source.col.name=source.col.name, ...) } else { # run chains sequentially chain.set <- list() for (chain in 1:nr.chains) { @@ -208,7 +209,7 @@ run.tfr.mcmc <- function(nr.chains=3, iter=62000, output.dir=file.path(getwd(), sigma0.ini=sigma0.ini, Triangle_c4.ini=Triangle_c4.ini, const.ini=const.ini, gamma.ini=gamma.ini, save.all.parameters=save.all.parameters, verbose=verbose, verbose.iter=verbose.iter, uncertainty=uncertainty, iso.unbiased=iso.unbiased, - covariates=covariates, cont_covariates=cont_covariates) + covariates=covariates, cont_covariates=cont_covariates, source.col.name=source.col.name) } } names(chain.set) <- 1:nr.chains @@ -240,7 +241,7 @@ mcmc.run.chain <- function(chain.id, meta, thin=1, iter=100, starting.values=NUL S.ini, a.ini, b.ini, sigma0.ini, Triangle_c4.ini, const.ini, gamma.ini=1, save.all.parameters=FALSE, verbose=FALSE, verbose.iter=10, uncertainty=FALSE, iso.unbiased=NULL, - covariates=c('source', 'method'), cont_covariates=NULL) { + covariates=c('source', 'method'), cont_covariates=NULL, source.col.name="source") { cat('\n\nChain nr.', chain.id, '\n') if (verbose) { @@ -262,7 +263,8 @@ mcmc.run.chain <- function(chain.id, meta, thin=1, iter=100, starting.values=NUL gamma.ini=gamma.ini[chain.id], save.all.parameters=save.all.parameters, verbose=verbose, uncertainty=uncertainty, iso.unbiased=iso.unbiased, - covariates=covariates, cont_covariates=cont_covariates) + covariates=covariates, cont_covariates=cont_covariates, + source.col.name=source.col.name) if (uncertainty) { this.sv <- list() diff --git a/R/updated_functions.R b/R/updated_functions.R index d29ed303..a86ebd5d 100644 --- a/R/updated_functions.R +++ b/R/updated_functions.R @@ -128,7 +128,7 @@ estimate.bias.sd.raw <- function(mcmc) } estimate.bias.sd.original <- function(mcmc, iso.unbiased=NULL, covariates=c('source', 'method'), - cont_covariates=NULL) + cont_covariates=NULL, source.col.name="source") { mcmc$meta$raw_data.original$bias <- NA mcmc$meta$raw_data.original$std <- NA @@ -180,10 +180,10 @@ estimate.bias.sd.original <- function(mcmc, iso.unbiased=NULL, covariates=c('sou ## Optional if (ISO.code$code %in% iso.unbiased) { - if ("source" %in% covariates) + if (source.col.name %in% covariates) { index.by.country.vr.estimate <- which((mcmc$meta$raw_data.original$country_code == ISO.code$code) & - (mcmc$meta$raw_data.original[, "source"] %in% c("VR", 'Estimate'))) + (mcmc$meta$raw_data.original[, source.col.name] %in% c("VR", 'Estimate'))) mcmc$meta$raw_data.original$bias[index.by.country.vr.estimate] <- 0 mcmc$meta$raw_data.original$std[index.by.country.vr.estimate] <- 0.016 } From 2803952a813915ecdeacb35e1872145999f1c73c Mon Sep 17 00:00:00 2001 From: Peiran Liu Date: Tue, 6 Apr 2021 21:02:55 -0400 Subject: [PATCH 19/48] update documents --- man/run-mcmc.Rd | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/man/run-mcmc.Rd b/man/run-mcmc.Rd index 971dd115..b7a89e34 100644 --- a/man/run-mcmc.Rd +++ b/man/run-mcmc.Rd @@ -36,7 +36,7 @@ run.tfr.mcmc(nr.chains = 3, iter = 62000, Triangle_c4.ini = NULL, const.ini = NULL, gamma.ini = 1, phase3.starting.values = NULL, proposal_cov_gammas = NULL, iso.unbiased = NULL, covariates = c("source", "method"), cont_covariates = NULL, - seed = NULL, parallel = FALSE, nr.nodes = nr.chains, + source.col.name="source", seed = NULL, parallel = FALSE, nr.nodes = nr.chains, save.all.parameters = FALSE, compression.type = 'None', auto.conf = list(max.loops = 5, iter = 62000, iter.incr = 10000, nr.chains = 3, thin = 80, burnin = 2000), @@ -103,6 +103,7 @@ continue.tfr.mcmc(iter, chain.ids = NULL, \item{proposal_cov_gammas}{Proposal for the gamma covariance matrices for each country. It should be a list with two values: \code{values} and \code{country_codes}. The structure corresponds to the object returned by the function \code{\link{get.cov.gammas}}. By default the covariance matrices are obtained using \code{data(proposal_cov_gammas_cii)}. This argument overwrite the defaults for countries contained the argument.} \item{iso.unbiased}{Codes of countries for which the vital registration TFR estimates are considered unbiased. Only used if \code{uncertainty = TRUE}. See details below.} \item{covariates, cont_covariates}{Categorical and continuous features used in estimating bias and standard deviation if \code{uncertainty = TRUE}. See details below.} + \item{source.col.name}{Specify the name for source column used for iso.unbiased variable. } \item{seed}{Seed of the random number generator. If \code{NULL} no seed is set. It can be used to generate reproducible results.} \item{parallel}{Logical determining if the simulation should run multiple chains in parallel. If it is \code{TRUE}, the package \pkg{\link[snowFT]{snowFT}} is required. In such a case further arguments can be passed, see description of \dots.} \item{nr.nodes}{Relevant only if \code{parallel} is \code{TRUE}. It gives the number of nodes for running the simulation in parallel. By default it equals to the number of chains.} From 030db386e181a3cb78ebfc69123589491e8b92a5 Mon Sep 17 00:00:00 2001 From: Hana Sevcikova Date: Thu, 8 Apr 2021 12:22:12 -0700 Subject: [PATCH 20/48] version changed --- DESCRIPTION | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 4cc625e8..a8f72f86 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: bayesTFR -Version: 7.0-4 -Date: 2021-03-31 +Version: 7.0-4.9001 +Date: 2021-04-07 Title: Bayesian Fertility Projection Author: Hana Sevcikova (hanas@uw.edu), Leontine Alkema (alkema@nus.edu.sg), Peiran Liu (prliu@uw.edu), Adrian Raftery (raftery@uw.edu), Bailey Fosdick (bfosdick@uw.edu), Patrick Gerland (gerland@un.org) Maintainer: Hana Sevcikova From c8fc43d3621295e8b966a68878347f09b6b5c3ab Mon Sep 17 00:00:00 2001 From: Hana Sevcikova Date: Fri, 9 Apr 2021 11:49:10 -0700 Subject: [PATCH 21/48] changes in plot function; ignore last.oberved if using uncertainty --- R/mcmc_ini.R | 4 ++-- R/plot_functions.R | 26 +++++++++++++++++++++----- R/wpp_data.R | 9 +++++---- 3 files changed, 28 insertions(+), 11 deletions(-) diff --git a/R/mcmc_ini.R b/R/mcmc_ini.R index d4ef9b27..fed1e57b 100755 --- a/R/mcmc_ini.R +++ b/R/mcmc_ini.R @@ -131,7 +131,6 @@ find.tau.lambda.and.DLcountries <- function(tfr_matrix, min.TFRlevel.for.start.a T_end_c <- lambda_c <-rep(T_end, nr_countries) T.suppl <- if(is.null(suppl.data$regions)) 0 else dim(suppl.data$tfr_matrix)[1] tau_c <- start_c <- rep(NA, nr_countries) - for (country in 1:nr_countries) { data <- get.observed.with.supplemental(country, tfr_matrix, suppl.data) has.suppl <- length(data) > T_end && !is.na(suppl.data$index.from.all.countries[country]) @@ -218,7 +217,8 @@ mcmc.meta.ini <- function(..., if(present.year-3 > wpp.year) warning("present.year is much larger then wpp.year. Make sure WPP data for present.year are available.") tfr.with.regions <- set_wpp_regions(start.year=start.year, present.year=present.year, wpp.year=wpp.year, my.tfr.file = my.tfr.file, my.locations.file=my.locations.file, - annual = mcmc.input$annual.simulation, verbose=verbose) + annual = mcmc.input$annual.simulation, ignore.last.observed = uncertainty, + verbose=verbose) meta <- do.meta.ini(mcmc.input, tfr.with.regions, proposal_cov_gammas=proposal_cov_gammas, verbose=verbose, uncertainty=uncertainty, my.tfr.raw.file=my.tfr.raw.file, ar.phase2=ar.phase2) diff --git a/R/plot_functions.R b/R/plot_functions.R index fd398f82..f4cff6c5 100644 --- a/R/plot_functions.R +++ b/R/plot_functions.R @@ -491,9 +491,13 @@ tfr.estimation.plot <- function(mcmc.list=NULL, country.code=NULL, ISO.code=NULL wpp.data <- get.observed.tfr(country.obj$index, mcmc.list$meta, "tfr_matrix_all") wpp.data <- data.frame(year=quantile_tbl$year, tfr = as.numeric(wpp.data)) - wpp.data <- wpp.data[wpp.data$year %% 5 == 3,] + #wpp.data <- wpp.data[wpp.data$year %% 5 == 3,] q <- q + ggplot2::geom_line(data = wpp.data, ggplot2::aes_string(x="year", y="tfr"), size = 0.8) + ggplot2::theme_bw() + - ggplot2::geom_point(data = wpp.data, ggplot2::aes_string(x="year", y="tfr")) + ggplot2::geom_point(data = wpp.data, ggplot2::aes_string(x="year", y="tfr"), size = 0.7) + + # re-draw the median + q <- q + ggplot2::geom_line(ggplot2::aes_string(x="year", y="Q50"), size = 0.8, color="red") + + ggplot2::geom_point(ggplot2::aes_string(x="year", y="Q50"), size = 1, color="red") if (save.image) { @@ -609,12 +613,20 @@ tfr.trajectories.plot <- function(tfr.pred, country, pi=c(80, 95), } # plot median tfr.median <- get.median.from.prediction(tfr.pred, country$index, country$code) - lines(x2, tfr.median, type='l', col=col[3], lwd=lwd[3]) + if(uncertainty) { + unc.last.time <- which(tfr.object$tfr_quantile$year == x2[1]) + tfr.median[1] <- unlist(tfr.object$tfr_quantile[unc.last.time,])[length(pi)+1] + } + lines(x2, tfr.median, type='l', col=col[3], lwd=lwd[3]) # plot given CIs lty <- 2:(length(pi)+1) for (i in 1:length(pi)) { cqp <- get.traj.quantiles(tfr.pred, country$index, country$code, trajectories$trajectories, pi[i]) if (!is.null(cqp)) { + if(uncertainty) { + cqp[1,1] <- unlist(tfr.object$tfr_quantile[unc.last.time, ])[length(pi)+1-i] + cqp[2,1] <- unlist(tfr.object$tfr_quantile[unc.last.time, ])[length(pi)+1+i] + } lines(x2, cqp[1,], type='l', col=col[4], lty=lty[i], lwd=lwd[4]) lines(x2, cqp[2,], type='l', col=col[4], lty=lty[i], lwd=lwd[4]) } @@ -649,8 +661,12 @@ tfr.trajectories.plot <- function(tfr.pred, country, pi=c(80, 95), lty <- c(lty, max(lty)+1) llty <- length(lty) up.low <- get.half.child.variant(median=tfr.median) - lines(x2, up.low[1,], type='l', col=col[5], lty=lty[llty], lwd=lwd[5]) - lines(x2, up.low[2,], type='l', col=col[5], lty=lty[llty], lwd=lwd[5]) + if(uncertainty) { + up.low <- up.low[,-1] + x2t <- x2[-1] + } else x2t <- x2 + lines(x2t, up.low[1,], type='l', col=col[5], lty=lty[llty], lwd=lwd[5]) + lines(x2t, up.low[2,], type='l', col=col[5], lty=lty[llty], lwd=lwd[5]) legend <- c(legend, '+/- 0.5 child') cols <- c(cols, col[5]) lwds <- c(lwds, lwd[5]) diff --git a/R/wpp_data.R b/R/wpp_data.R index ad5e580c..fdfa028e 100644 --- a/R/wpp_data.R +++ b/R/wpp_data.R @@ -1,7 +1,7 @@ # Read in the UN estimates set_wpp_regions <- function(start.year=1950, present.year=2010, wpp.year=2012, my.tfr.file=NULL, - my.locations.file=NULL, annual = FALSE, verbose=FALSE) { + my.locations.file=NULL, annual = FALSE, ignore.last.observed = FALSE, verbose=FALSE) { # outputs: # tfr_matrix_all, with each column one countries UN estimates # tfr_matrix with NAs after last observed data point @@ -32,7 +32,8 @@ set_wpp_regions <- function(start.year=1950, present.year=2010, wpp.year=2012, m start.year=start.year, present.year=present.year, annual = annual, verbose=verbose, - interpolate = annual && is.null(my.tfr.file)) + interpolate = annual && is.null(my.tfr.file), + ignore.last.observed = ignore.last.observed) if(!annual) { TFRmatrixsuppl.regions <- .get.suppl.matrix.and.regions(un.object, TFRmatrix.regions, loc_data, start.year, present.year) @@ -198,7 +199,7 @@ read.UNlocations <- function(data, wpp.year, package="bayesTFR", my.locations.fi get.observed.time.matrix.and.regions <- function(data, loc_data, start.year=1950, present.year=2010, annual = FALSE, datacolnames=c(country.code='country_code', country.name='country', reg.name='reg_name', reg.code='reg_code', area.name='area_name', area.code='area_code'), - interpolate = FALSE) { + interpolate = FALSE, ignore.last.observed = FALSE) { tfr_data <- data nr_countries <- length(tfr_data[,1]) if (annual && interpolate) # interpolate 5-year data @@ -266,7 +267,7 @@ get.observed.time.matrix.and.regions <- function(data, loc_data, start.year=1950 if(has.area.name) area_name <- c(area_name, paste(loc_data$area_name[loc_index])) if(has.area.code) area_code <- c(area_code, loc_data$area_code[loc_index]) # set NAs for entries that are not observed data (after last.observed) - tfr_matrix[(tfr_data[i,'last.observed'] < mid.years), i] <- NA + if(!ignore.last.observed) tfr_matrix[(tfr_data[i,'last.observed'] < mid.years), i] <- NA all.na[i] <- all(is.na(tfr_matrix[,i])) } country.codes <- tfr_data[[datacolnames['country.code']]] From d58ba6c3f45b984ce21fcefbb15585bb51bdfb7f Mon Sep 17 00:00:00 2001 From: Hana Sevcikova Date: Fri, 9 Apr 2021 13:14:29 -0700 Subject: [PATCH 22/48] avoid log(0) in mcmc sampling --- DESCRIPTION | 4 ++-- R/mcmc_sampling.R | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index a8f72f86..e37bf371 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: bayesTFR -Version: 7.0-4.9001 -Date: 2021-04-07 +Version: 7.0-4.9002 +Date: 2021-04-09 Title: Bayesian Fertility Projection Author: Hana Sevcikova (hanas@uw.edu), Leontine Alkema (alkema@nus.edu.sg), Peiran Liu (prliu@uw.edu), Adrian Raftery (raftery@uw.edu), Bailey Fosdick (bfosdick@uw.edu), Patrick Gerland (gerland@un.org) Maintainer: Hana Sevcikova diff --git a/R/mcmc_sampling.R b/R/mcmc_sampling.R index 419f6565..b1ef2b1c 100755 --- a/R/mcmc_sampling.R +++ b/R/mcmc_sampling.R @@ -160,7 +160,7 @@ tfr.mcmc.sampling <- function(mcmc, thin=1, start.iter=2, verbose=FALSE, verbose delta4.squared <- mcenv$delta4^2 delta4.0.squared <- delta4_0^2 Triangle_c4_transformed <- - log((mcenv$Triangle_c4[id_DL] - Triangle_c4.low)/ + log(pmax(mcenv$Triangle_c4[id_DL] - Triangle_c4.low, 1e-10)/ (Triangle_c4.up-mcenv$Triangle_c4[id_DL])) mcenv$Triangle4 <- rnorm(1, mean = (sum(Triangle_c4_transformed)/(delta4.squared) + Triangle4_0/delta4.0.squared)/ From 0f0924cfc5ec4b4528da0ed3130de1e8d78f25f1 Mon Sep 17 00:00:00 2001 From: Hana Sevcikova Date: Mon, 12 Apr 2021 09:45:52 -0700 Subject: [PATCH 23/48] fix issue with suppl data in extra function --- R/wpp_data.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/wpp_data.R b/R/wpp_data.R index fdfa028e..4d24aba0 100644 --- a/R/wpp_data.R +++ b/R/wpp_data.R @@ -387,12 +387,12 @@ set.wpp.extra <- function(meta, countries=NULL, my.tfr.file=NULL, my.locations.f extra.wpp <- .extra.matrix.regions(data=data, countries=countries, meta=meta, my.locations.file=my.locations.file, verbose=verbose, annual=annual, uncertainty=uncertainty, interpolate = is.null(my.tfr.file) && annual) - if(!is.null(extra.wpp)) { + if(!is.null(extra.wpp) && !annual) { locations <- read.UNlocations(data$data, wpp.year=meta$wpp.year, my.locations.file=my.locations.file, verbose=verbose) suppl.wpp <- .get.suppl.matrix.and.regions(un.object, extra.wpp, locations$loc_data, meta$start.year, meta$present.year) extra.wpp$suppl.data <- .get.suppl.data.list(suppl.wpp) - } + } return(extra.wpp) } From ed13bee94335fe2cd0c42460f44f0d432470177d Mon Sep 17 00:00:00 2001 From: Hana Sevcikova Date: Mon, 12 Apr 2021 14:53:22 -0700 Subject: [PATCH 24/48] fixed source.col.name in run mcmc extra --- R/run_mcmc.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/run_mcmc.R b/R/run_mcmc.R index fb765260..c1c3c0fc 100755 --- a/R/run_mcmc.R +++ b/R/run_mcmc.R @@ -367,7 +367,7 @@ run.tfr.mcmc.extra <- function(sim.dir=file.path(getwd(), 'bayesTFR.output'), thin=1, thin.extra=1, burnin=2000, parallel=FALSE, nr.nodes=NULL, my.locations.file = NULL, uncertainty=FALSE, my.tfr.raw.file=NULL, iso.unbiased=NULL, - covariates=c('source', 'method'), cont_covariates=NULL, + covariates=c('source', 'method'), cont_covariates=NULL, source.col.name="source", verbose=FALSE, verbose.iter=100, ...) { mcmc.set <- get.tfr.mcmc(sim.dir) meta.old <- mcmc.set$meta @@ -430,7 +430,7 @@ run.tfr.mcmc.extra <- function(sim.dir=file.path(getwd(), 'bayesTFR.output'), if(uncertainty) { mcmc.set$mcmc.list[[chain]] <- get.obs.estimate.diff.original(mcmc.set$mcmc.list[[chain]]) - mcmc.set$mcmc.list[[chain]] <- estimate.bias.sd.original(mcmc.set$mcmc.list[[chain]], iso.unbiased, covariates, cont_covariates) + mcmc.set$mcmc.list[[chain]] <- estimate.bias.sd.original(mcmc.set$mcmc.list[[chain]], iso.unbiased, covariates, cont_covariates, source.col.name=source.col.name) mcmc.set$mcmc.list[[chain]]$eps_unc <- list() if (is.null(mcmc.set$mcmc.list[[chain]]$meta$raw_data_extra)) mcmc.set$mcmc.list[[chain]]$meta$raw_data_extra <- list() for (country in countries) From 65df9c7468c01a068b62ee67ffe0a77532cbeb52 Mon Sep 17 00:00:00 2001 From: Hana Sevcikova Date: Tue, 13 Apr 2021 00:29:24 -0700 Subject: [PATCH 25/48] fixes related to the size of the meta file --- R/get_outputs.R | 3 ++- R/predict_tfr.R | 1 + R/run_mcmc.R | 18 ++++++++++++------ R/updated_functions.R | 17 +++++++++++------ 4 files changed, 26 insertions(+), 13 deletions(-) diff --git a/R/get_outputs.R b/R/get_outputs.R index e9163970..2db57e92 100644 --- a/R/get_outputs.R +++ b/R/get_outputs.R @@ -143,7 +143,7 @@ create.thinned.tfr.mcmc <- function(mcmc.set, thin=1, burnin=0, output.dir=NULL, country.obj <- get.country.object(country, mcmc.set$meta, index=TRUE) if (country.obj$index %in% mcmc.set$meta$extra) { - nr.points.cs <- floor(mcmc.set$meta$extra_iter[country.obj$index] / mcmc.set$meta$extra_thin[country.obj$index]) + nr.points.cs <- floor((mcmc.set$meta$extra_iter[country.obj$index]-burnin) / mcmc.set$meta$extra_thin[country.obj$index]) nr.points.cs <- nr.points.cs * length(mcmc.set$mcmc.list) if (nr.points.cs >= nr.points) thin.index.cs <- round(seq(1, nr.points.cs, length.out = nr.points)) else @@ -167,6 +167,7 @@ create.thinned.tfr.mcmc <- function(mcmc.set, thin=1, burnin=0, output.dir=NULL, compression.type=thinned.mcmc$compression.type) } } + if (mcmc.set$meta$nr_countries > mcmc.set$meta$nr_countries_estimation) { .update.thinned.extras(mcmc.set, (mcmc.set$meta$nr_countries_estimation+1):mcmc.set$meta$nr_countries, burnin=burnin, nr.points=nr.points, dir=outdir.thin.mcmc, verbose=verbose) diff --git a/R/predict_tfr.R b/R/predict_tfr.R index 5dff9e84..e19ded9a 100644 --- a/R/predict_tfr.R +++ b/R/predict_tfr.R @@ -197,6 +197,7 @@ make.tfr.prediction <- function(mcmc.set, start.year=NULL, end.year=2100, replac floor((mcmc.set$meta$extra_iter[country] - burn) / mcmc.set$meta$extra_thin[country]) * length(mcmc.set$mcmc.list)) } } + stored.iter <- stored.iter.extra } mcthin <- max(sapply(mcmc.set$mcmc.list, function(x) x$thin)) if(!is.null(nr.traj) && !is.null(thin)) { diff --git a/R/run_mcmc.R b/R/run_mcmc.R index c1c3c0fc..ba69fac9 100755 --- a/R/run_mcmc.R +++ b/R/run_mcmc.R @@ -430,7 +430,8 @@ run.tfr.mcmc.extra <- function(sim.dir=file.path(getwd(), 'bayesTFR.output'), if(uncertainty) { mcmc.set$mcmc.list[[chain]] <- get.obs.estimate.diff.original(mcmc.set$mcmc.list[[chain]]) - mcmc.set$mcmc.list[[chain]] <- estimate.bias.sd.original(mcmc.set$mcmc.list[[chain]], iso.unbiased, covariates, cont_covariates, source.col.name=source.col.name) + mcmc.set$mcmc.list[[chain]] <- estimate.bias.sd.original(mcmc.set$mcmc.list[[chain]], iso.unbiased, covariates, cont_covariates, + source.col.name=source.col.name, countries = Eini$index) mcmc.set$mcmc.list[[chain]]$eps_unc <- list() if (is.null(mcmc.set$mcmc.list[[chain]]$meta$raw_data_extra)) mcmc.set$mcmc.list[[chain]]$meta$raw_data_extra <- list() for (country in countries) @@ -477,9 +478,9 @@ run.tfr.mcmc.extra <- function(sim.dir=file.path(getwd(), 'bayesTFR.output'), } if(uncertainty) { - if (!dir.exists(file.path(meta$output.dir, 'extra.meta'))) dir.create(file.path(meta$output.dir, 'extra.meta')) - if (!dir.exists(file.path(meta$output.dir, 'extra.meta', countries[1]))) dir.create(file.path(meta$output.dir, 'extra.meta', countries[1])) - store.bayesTFR.meta.object(meta, file.path(meta$output.dir, 'extra.meta', countries[1])) + #if (!dir.exists(file.path(meta$output.dir, 'extra.meta'))) dir.create(file.path(meta$output.dir, 'extra.meta')) + #if (!dir.exists(file.path(meta$output.dir, 'extra.meta', countries[1]))) dir.create(file.path(meta$output.dir, 'extra.meta', countries[1])) + #store.bayesTFR.meta.object(meta, file.path(meta$output.dir, 'extra.meta', countries[1])) for (name in c("country.ind.by.year", "ind.by.year", "id_phase1_by_year", "id_phase2_by_year", "id_phase3_by_year", "id_phase3", "nr.countries")) { @@ -503,8 +504,13 @@ run.tfr.mcmc.extra <- function(sim.dir=file.path(getwd(), 'bayesTFR.output'), } } meta[['extra']] <- sort(unique(meta[['extra']])) - } - store.bayesTFR.meta.object(meta, file.path(meta$output.dir)) + # unload parent for storing purposes + parent <- meta$parent + meta[["parent"]] <- NULL + } else parent <- NULL + + store.bayesTFR.meta.object(meta, meta$output.dir) + meta[["parent"]] <- parent mcmc.set$meta <- meta cat('\n') invisible(mcmc.set) diff --git a/R/updated_functions.R b/R/updated_functions.R index a86ebd5d..6394293f 100644 --- a/R/updated_functions.R +++ b/R/updated_functions.R @@ -128,7 +128,7 @@ estimate.bias.sd.raw <- function(mcmc) } estimate.bias.sd.original <- function(mcmc, iso.unbiased=NULL, covariates=c('source', 'method'), - cont_covariates=NULL, source.col.name="source") + cont_covariates=NULL, source.col.name="source", countries = NULL) { mcmc$meta$raw_data.original$bias <- NA mcmc$meta$raw_data.original$std <- NA @@ -137,7 +137,8 @@ estimate.bias.sd.original <- function(mcmc, iso.unbiased=NULL, covariates=c('sou mcmc$meta$bias_model <- list() mcmc$meta$std_model <- list() } - for(country in 1:mcmc$meta$nr_countries) + if(is.null(countries)) countries <- 1:mcmc$meta$nr_countries + for(country in countries) { ISO.code <- get.country.object(country, meta=mcmc$meta, index=TRUE) if (is.na(ISO.code$code)) next @@ -168,10 +169,10 @@ estimate.bias.sd.original <- function(mcmc, iso.unbiased=NULL, covariates=c('sou } } - m1 <- lm(as.formula(paste0('diff ~ ', regressor))) + m1 <- lm(as.formula(paste0('diff ~ ', regressor)), model = FALSE) bias <- predict(m1) abs.residual <- abs(residuals(m1)) - m2 <- lm(as.formula(paste0('abs.residual ~ ', regressor))) + m2 <- lm(as.formula(paste0('abs.residual ~ ', regressor)), model = FALSE) std <- predict(m2) * sqrt(pi/2) std[std < 1e-6] <- 0.1 std[std < (abs(bias) / 2)] <- abs(bias[std < (abs(bias) / 2)]) @@ -192,11 +193,15 @@ estimate.bias.sd.original <- function(mcmc, iso.unbiased=NULL, covariates=c('sou warning("Covariate source not found. iso.unbiased is not used.\n") } } + # to save space + attr(m1$terms, ".Environment") <- NULL + attr(m2$terms, ".Environment") <- NULL + m1$fitted.values <- NULL + m2$fitted.values <- NULL + mcmc$meta$bias_model[[country]] <- m1 mcmc$meta$std_model[[country]] <- m2 } - - return(mcmc) } From 950691d107458f29257e9e7bbe3dbc4135a9a56f Mon Sep 17 00:00:00 2001 From: Hana Sevcikova Date: Wed, 14 Apr 2021 16:31:47 -0700 Subject: [PATCH 26/48] option of storing extra country to different directory; removed burnin and thin from tfr.trajectories.plot --- R/get_outputs.R | 13 ++++++++----- R/plot_functions.R | 10 +++------- R/predict_tfr.R | 14 +++++++++----- 3 files changed, 20 insertions(+), 17 deletions(-) diff --git a/R/get_outputs.R b/R/get_outputs.R index 2db57e92..1a1c7606 100644 --- a/R/get_outputs.R +++ b/R/get_outputs.R @@ -86,7 +86,8 @@ get.thinned.tfr.mcmc <- function(mcmc.set, thin=1, burnin=0) { return(NULL) } -create.thinned.tfr.mcmc <- function(mcmc.set, thin=1, burnin=0, output.dir=NULL, verbose=TRUE, uncertainty=FALSE) { +create.thinned.tfr.mcmc <- function(mcmc.set, thin=1, burnin=0, output.dir=NULL, verbose=TRUE, uncertainty=FALSE, + update.with.countries = NULL) { #Return a thinned mcmc.set object with burnin removed and all chanins collapsed into one mcthin <- max(sapply(mcmc.set$mcmc.list, function(x) x$thin)) thin <- max(c(thin, mcthin)) @@ -139,7 +140,8 @@ create.thinned.tfr.mcmc <- function(mcmc.set, thin=1, burnin=0, output.dir=NULL, if(verbose) cat('done.\nStoring country-specific parameters ...') par.names.cs <- tfr.parameter.names.cs(trans=FALSE) if (uncertainty) par.names.cs <- c(par.names.cs, 'tfr') - for (country in mcmc.set$meta$id_DL){ + DLcntries <- if(!is.null(update.with.countries)) update.with.countries[update.with.countries %in% mcmc.set$meta$id_DL] else mcmc.set$meta$id_DL + for (country in DLcntries){ country.obj <- get.country.object(country, mcmc.set$meta, index=TRUE) if (country.obj$index %in% mcmc.set$meta$extra) { @@ -167,10 +169,11 @@ create.thinned.tfr.mcmc <- function(mcmc.set, thin=1, burnin=0, output.dir=NULL, compression.type=thinned.mcmc$compression.type) } } - if (mcmc.set$meta$nr_countries > mcmc.set$meta$nr_countries_estimation) { - .update.thinned.extras(mcmc.set, (mcmc.set$meta$nr_countries_estimation+1):mcmc.set$meta$nr_countries, - burnin=burnin, nr.points=nr.points, dir=outdir.thin.mcmc, verbose=verbose) + cindex <- if(!is.null(update.with.countries)) update.with.countries[update.with.countries > mcmc.set$meta$nr_countries_estimation] else + (mcmc.set$meta$nr_countries_estimation+1):mcmc.set$meta$nr_countries + if(length(cindex) > 0) + .update.thinned.extras(mcmc.set, cindex, burnin=burnin, nr.points=nr.points, dir=outdir.thin.mcmc, verbose=verbose) } invisible(structure(list(meta=meta, mcmc.list=list(thinned.mcmc)), class='bayesTFR.mcmc.set')) } diff --git a/R/plot_functions.R b/R/plot_functions.R index f4cff6c5..b40c24c7 100644 --- a/R/plot_functions.R +++ b/R/plot_functions.R @@ -516,7 +516,7 @@ tfr.trajectories.plot <- function(tfr.pred, country, pi=c(80, 95), xlim=NULL, ylim=NULL, type='b', xlab='Year', ylab='TFR', main=NULL, lwd=c(2,2,2,2,2,1), col=c('black', 'green', 'red', 'red', 'blue', '#00000020'), - show.legend=TRUE, add=FALSE, uncertainty=FALSE, thin=NULL, burnin=NULL, col_unc="purple", ... + show.legend=TRUE, add=FALSE, uncertainty=FALSE, col_unc="purple", ... ) { # lwd/col is a vector of 6 line widths/colors for: # 1. observed data, 2. imputed missing data, 3. median, 4. quantiles, 5. +- 0.5 child, 6. trajectories @@ -529,12 +529,8 @@ tfr.trajectories.plot <- function(tfr.pred, country, pi=c(80, 95), } if (uncertainty) { - sim.dir <- file.path(tfr.pred$output.directory, "..") - if (missing(thin) || is.null(thin)) thin <- tfr.pred$thin - if (missing(burnin) || is.null(burnin)) burnin <- tfr.pred$burnin - tfr.object <- get.tfr.estimation(mcmc.list=NULL, country.code=country, ISO.code=NULL, sim.dir=sim.dir, - burnin=burnin, thin=thin, probs=sort(c((1-pi/100)/2, 0.5, pi/100 + (1-pi/100)/2))) - + tfr.object <- get.tfr.estimation(mcmc.list=tfr.pred$mcmc.set, country.code=country, ISO.code=NULL, + probs=sort(c((1-pi/100)/2, 0.5, pi/100 + (1-pi/100)/2))) } col <- .match.colors.with.default(col, c('black', 'green', 'red', 'red', 'blue', '#00000020')) country.obj <- get.country.object(country, tfr.pred$mcmc.set$meta) diff --git a/R/predict_tfr.R b/R/predict_tfr.R index e19ded9a..d9a0cac9 100644 --- a/R/predict_tfr.R +++ b/R/predict_tfr.R @@ -88,7 +88,8 @@ get.burnin.nrtraj.from.diagnostics <- function(sim.dir, ...) { } tfr.predict.extra <- function(sim.dir=file.path(getwd(), 'bayesTFR.output'), - prediction.dir=sim.dir, countries = NULL, save.as.ascii=0, verbose=TRUE, uncertainty=FALSE) { + prediction.dir=sim.dir, countries = NULL, save.as.ascii=0, verbose=TRUE, uncertainty=FALSE, + all.countries.required = TRUE) { # Run prediction for given countries/regions (as codes). If they are not given it will be set to countries # for which there are MCMC results but no prediction. # It is to be used after running run.tfr.mcmc.extra @@ -121,7 +122,8 @@ tfr.predict.extra <- function(sim.dir=file.path(getwd(), 'bayesTFR.output'), use.correlation=pred$use.correlation, mu=pred$mu, rho=pred$rho, sigmaAR1=pred$sigmaAR1, min.tfr=pred$min.tfr, countries=countries.idx, save.as.ascii=0, output.dir=prediction.dir, force.creating.thinned.mcmc=TRUE, - write.summary.files=FALSE, write.trajectories=TRUE, verbose=verbose, uncertainty=uncertainty) + write.summary.files=FALSE, write.trajectories=TRUE, verbose=verbose, + uncertainty=uncertainty, all.countries.required = all.countries.required) # merge the two predictions code.other.countries <- setdiff(pred$mcmc.set$meta$regions$country_code, @@ -152,7 +154,8 @@ tfr.predict.extra <- function(sim.dir=file.path(getwd(), 'bayesTFR.output'), if (uncertainty) countries.save <- countries do.convert.trajectories(pred=bayesTFR.prediction, n=save.as.ascii, output.dir=pred$output.dir, countries = countries.save, verbose=verbose) - tfr.write.projection.summary.and.parameters(pred=bayesTFR.prediction, output.dir=pred$output.dir) + if(all.countries.required) + tfr.write.projection.summary.and.parameters(pred=bayesTFR.prediction, output.dir=pred$output.dir) cat('\nPrediction stored into', pred$output.dir, '\n') invisible(bayesTFR.prediction) @@ -168,7 +171,7 @@ make.tfr.prediction <- function(mcmc.set, start.year=NULL, end.year=2100, replac save.as.ascii=0, output.dir = NULL, write.summary.files=TRUE, is.mcmc.set.thinned=FALSE, force.creating.thinned.mcmc=FALSE, write.trajectories=TRUE, - verbose=verbose, uncertainty=FALSE){ + verbose=verbose, uncertainty=FALSE, all.countries.required = TRUE){ # if 'countries' is given, it is an index # sigmaAR1 can be a vector. The last element will be repeated up to nr.projections meta <- mcmc.set$meta @@ -238,7 +241,8 @@ make.tfr.prediction <- function(mcmc.set, start.year=NULL, end.year=2100, replac unblock.gtk('bDem.TFRpred') load.mcmc.set <- if(has.thinned.mcmc && !force.creating.thinned.mcmc) thinned.mcmc else create.thinned.tfr.mcmc(mcmc.set, thin=thin, burnin=burnin, - output.dir=output.dir, verbose=verbose, uncertainty=uncertainty) + output.dir=output.dir, verbose=verbose, uncertainty=uncertainty, + update.with.countries = if(all.countries.required) NULL else countries) nr_simu <- load.mcmc.set$mcmc.list[[1]]$finished.iter if (verbose) cat('Load variance parameters.\n') From 5e1c97ead196682a06e1bd3e1874d699d9f2ba7c Mon Sep 17 00:00:00 2001 From: Hana Sevcikova Date: Thu, 15 Apr 2021 10:25:46 -0700 Subject: [PATCH 27/48] added option of averaging gamma cov for extra countries --- R/mcmc_ini.R | 5 +++-- R/run_mcmc.R | 4 ++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/R/mcmc_ini.R b/R/mcmc_ini.R index fed1e57b..af867c7e 100755 --- a/R/mcmc_ini.R +++ b/R/mcmc_ini.R @@ -504,7 +504,8 @@ mcmc.ini <- function(chain.id, mcmc.meta, iter=100, mcmc.meta.ini.extra <- function(mcmc.set, countries=NULL, my.tfr.file = NULL, my.locations.file=NULL, burnin = 200, verbose=FALSE, uncertainty=FALSE, - my.tfr.raw.file=ifelse(uncertainty, file.path(find.package("bayesTFR"), "data", "rawTFR.csv"), NULL)) { + my.tfr.raw.file=ifelse(uncertainty, file.path(find.package("bayesTFR"), "data", "rawTFR.csv"), NULL), + average.gammas.cov = TRUE) { update.regions <- function(reg, ereg, id.replace, is.new, is.old) { nreg <- list() for (name in c('code', 'area_code', 'country_code')) { @@ -536,7 +537,7 @@ mcmc.meta.ini.extra <- function(mcmc.set, countries=NULL, my.tfr.file = NULL, has.mock.suppl <- TRUE } Emeta <- do.meta.ini(meta, tfr.with.regions=tfr.with.regions, - use.average.gammas.cov=TRUE, burnin=burnin, + use.average.gammas.cov=average.gammas.cov, burnin=burnin, verbose=verbose, uncertainty=uncertainty, my.tfr.raw.file = my.tfr.raw.file) # join the new meta with the existing one diff --git a/R/run_mcmc.R b/R/run_mcmc.R index ba69fac9..6db57a08 100755 --- a/R/run_mcmc.R +++ b/R/run_mcmc.R @@ -368,7 +368,7 @@ run.tfr.mcmc.extra <- function(sim.dir=file.path(getwd(), 'bayesTFR.output'), my.locations.file = NULL, uncertainty=FALSE, my.tfr.raw.file=NULL, iso.unbiased=NULL, covariates=c('source', 'method'), cont_covariates=NULL, source.col.name="source", - verbose=FALSE, verbose.iter=100, ...) { + average.gammas.cov = TRUE, verbose=FALSE, verbose.iter=100, ...) { mcmc.set <- get.tfr.mcmc(sim.dir) meta.old <- mcmc.set$meta @@ -380,7 +380,7 @@ run.tfr.mcmc.extra <- function(sim.dir=file.path(getwd(), 'bayesTFR.output'), Eini <- mcmc.meta.ini.extra(mcmc.set, countries=countries, my.tfr.file=my.tfr.file, my.locations.file=my.locations.file, burnin=burnin, verbose=verbose, uncertainty=uncertainty, - my.tfr.raw.file=my.tfr.raw.file) + my.tfr.raw.file=my.tfr.raw.file, average.gammas.cov = average.gammas.cov) if(length(Eini$index) <= 0) { cat('\nNothing to be done.\n') return(invisible(mcmc.set)) From 04b83d1fc6c97eb621c726a1f4151e7373046dc1 Mon Sep 17 00:00:00 2001 From: Hana Sevcikova Date: Thu, 15 Apr 2021 11:43:40 -0700 Subject: [PATCH 28/48] help files updated --- DESCRIPTION | 2 +- man/get.thinned.tfr.mcmc.Rd | 10 ++++++---- man/plot-trajectories.Rd | 4 +--- man/run-mcmc.Rd | 6 ++++-- man/run.tfr.mcmc.extra.Rd | 3 +++ man/tfr.predict.extra.Rd | 6 ++++-- 6 files changed, 19 insertions(+), 12 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index e37bf371..1f96e362 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: bayesTFR -Version: 7.0-4.9002 +Version: 7.0-4.9003 Date: 2021-04-09 Title: Bayesian Fertility Projection Author: Hana Sevcikova (hanas@uw.edu), Leontine Alkema (alkema@nus.edu.sg), Peiran Liu (prliu@uw.edu), Adrian Raftery (raftery@uw.edu), Bailey Fosdick (bfosdick@uw.edu), Patrick Gerland (gerland@un.org) diff --git a/man/get.thinned.tfr.mcmc.Rd b/man/get.thinned.tfr.mcmc.Rd index 6e517145..4bc91bc2 100644 --- a/man/get.thinned.tfr.mcmc.Rd +++ b/man/get.thinned.tfr.mcmc.Rd @@ -8,14 +8,15 @@ Creating and Accessing Thinned MCMCs } \description{ The function \code{get.thinned.tfr.mcmc} accesses -a thinned and burned version of the given Phase II MCMC set. \code{create.thinned.tfr.mcmc} creates such a set. +a thinned and burned version of the given Phase II MCMC set. \code{create.thinned.tfr.mcmc} creates or updates such a set. } \usage{ get.thinned.tfr.mcmc(mcmc.set, thin = 1, burnin = 0) create.thinned.tfr.mcmc(mcmc.set, thin = 1, burnin = 0, - output.dir = NULL, verbose = TRUE, uncertainty = FALSE) + output.dir = NULL, verbose = TRUE, uncertainty = FALSE, + update.with.countries = NULL) } \arguments{ \item{mcmc.set}{Object of class \code{\link{bayesTFR.mcmc.set}} of Phase II.} @@ -23,13 +24,14 @@ create.thinned.tfr.mcmc(mcmc.set, thin = 1, burnin = 0, \item{output.dir}{Output directory. It is only used if the output goes to a non-standard directory.} \item{verbose}{Logical switching log messages on and off.} \item{uncertainty}{If users want to save the thinned estimated TFR in the new mcmc object, this parameter should be set \code{TRUE}.} + \item{update.with.countries}{If an existing set is to be updated, this should be a vector of country indices for the update.} } \details{ -The function \code{create.thinned.tfr.mcmc} is called from \code{\link{tfr.predict}} and thus, the resulting object contains exactly the same MCMCs used for generating projections. In addition, it can be also called from \code{\link{tfr.diagnose}} if desired, so that the projection process can re-use such a set that lead to a convergence. +The function \code{create.thinned.tfr.mcmc} is called from \code{\link{tfr.predict}} and thus, the resulting object contains exactly the same MCMCs used for generating projections. In addition, it can be also called from \code{\link{tfr.diagnose}} if desired, so that the projection process can re-use such a set that leads to a convergence. The thinning is done as follows: The given \code{burnin} is removed from the beginning of each chain in the original MCMC set. Then each chain is thinned by \code{thin} using equal spacing and all chains are collapsed into one single chain per parameter. They are stored in the main simulation directory under the name \file{thinned_mcmc_\emph{t}_\emph{b}} where \emph{t} is the value of \code{thin} and \emph{b} the value of \code{burnin}. -If \code{uncertainty=TRUE}, the estimated TFR is thinned and saved either. +If \code{uncertainty=TRUE}, the estimated TFR is thinned and saved as well. } \value{ Both functions return an object of class \code{\link{bayesTFR.mcmc.set}}. \code{get.thinned.tfr.mcmc} returns \code{NULL} if such object does not exist. diff --git a/man/plot-trajectories.Rd b/man/plot-trajectories.Rd index 8f24acd0..349e5d4d 100644 --- a/man/plot-trajectories.Rd +++ b/man/plot-trajectories.Rd @@ -18,7 +18,7 @@ tfr.trajectories.plot(tfr.pred, country, pi = c(80, 95), main = NULL, lwd = c(2, 2, 2, 2, 2, 1), col=c('black', 'green', 'red', 'red', 'blue', '#00000020'), show.legend = TRUE, add = FALSE, uncertainty = FALSE, - thin = NULL, burnin = NULL, col_unc = "purple", \dots) + col_unc = "purple", \dots) tfr.trajectories.plot.all(tfr.pred, output.dir = file.path(getwd(), 'TFRtrajectories'), @@ -47,8 +47,6 @@ tfr.trajectories.table(tfr.pred, country, pi = c(80, 95), \item{main}{Main title for the plot(s). In \code{tfr.trajectories.plot.all} any occurence of the string \dQuote{XXX} is replaced by the country name.} \item{verbose}{Logical switching log messages on and off.} \item{uncertainty}{Logical: \code{TRUE} means uncertainty of past TFR should be plotted with the same level of uncertainty interval.} - \item{thin}{Thinning for past TFR estimation.} - \item{burnin}{Burn-in for past TFR estimation.} \item{col_unc}{Color of past TFR estimation uncertainty plot.} } \details{ diff --git a/man/run-mcmc.Rd b/man/run-mcmc.Rd index b7a89e34..224ea8d0 100644 --- a/man/run-mcmc.Rd +++ b/man/run-mcmc.Rd @@ -103,7 +103,7 @@ continue.tfr.mcmc(iter, chain.ids = NULL, \item{proposal_cov_gammas}{Proposal for the gamma covariance matrices for each country. It should be a list with two values: \code{values} and \code{country_codes}. The structure corresponds to the object returned by the function \code{\link{get.cov.gammas}}. By default the covariance matrices are obtained using \code{data(proposal_cov_gammas_cii)}. This argument overwrite the defaults for countries contained the argument.} \item{iso.unbiased}{Codes of countries for which the vital registration TFR estimates are considered unbiased. Only used if \code{uncertainty = TRUE}. See details below.} \item{covariates, cont_covariates}{Categorical and continuous features used in estimating bias and standard deviation if \code{uncertainty = TRUE}. See details below.} - \item{source.col.name}{Specify the name for source column used for iso.unbiased variable. } + \item{source.col.name}{If \code{uncertainty} is \code{TRUE} this is a column name within the given covariates that determines the data source. It is used if \code{iso.unbiased} is given to identify the vital registration records.} \item{seed}{Seed of the random number generator. If \code{NULL} no seed is set. It can be used to generate reproducible results.} \item{parallel}{Logical determining if the simulation should run multiple chains in parallel. If it is \code{TRUE}, the package \pkg{\link[snowFT]{snowFT}} is required. In such a case further arguments can be passed, see description of \dots.} \item{nr.nodes}{Relevant only if \code{parallel} is \code{TRUE}. It gives the number of nodes for running the simulation in parallel. By default it equals to the number of chains.} @@ -131,10 +131,12 @@ Optionally, \code{my.tfr.file} can contain a column called \code{last.observed} The package contains a dataset called \file{my_tfr_template} (in \file{extdata} directory) which is a template for user-specified \code{my.tfr.file}. -The parameter \code{uncertainty} is set to control whether past TFR is considered to be precise (\code{FALSE}), or need to be estimated from the raw data (\code{TRUE}). In the latter case, the raw TFR observations are taken either from the \code{\link{rawTFR}} dataset (default) or from a file given by the \code{my.tfr.raw.file} argument. The Bayesian hierarchical model considers the past TFR as unknown, estimates it and stores in \code{output.dir}. Details can be found in Liu and Raftery (2020). The \code{covariates}, \code{cont_covariates} arguments are for listing categorical and continuous features for estimating bias and standard deviation of past TFR observations. If a country is known to have unbiased vital registration (VR) records, one can include it in the \code{iso.unbiased} argument as those countries will estimate their past VR records to have 0 bias and 0.0161 standard deviation. +The parameter \code{uncertainty} is set to control whether past TFR is considered to be precise (\code{FALSE}), or need to be estimated from the raw data (\code{TRUE}). In the latter case, the raw TFR observations are taken either from the \code{\link{rawTFR}} dataset (default) or from a file given by the \code{my.tfr.raw.file} argument. The Bayesian hierarchical model considers the past TFR as unknown, estimates it and stores in \code{output.dir}. Details can be found in Liu and Raftery (2020). The \code{covariates}, \code{cont_covariates} arguments are for listing categorical and continuous features for estimating bias and standard deviation of past TFR observations. If a country is known to have unbiased vital registration (VR) records, one can include it in the \code{iso.unbiased} argument as those countries will estimate their past VR records to have 0 bias and 0.0161 standard deviation. The VR records are identified as having \dQuote{VR} in the column given by \code{source.col.name} (\dQuote{source} by default). If \code{annual=TRUE}, which implies using annual data for training the model, the parameter \code{ar.phase2} will be activated. If \code{ar.phase2} is set to \code{TRUE}, then the model of Phase II will change from \eqn{d_{c,t} = g_{c,t} + \epsilon_{c,t}} to \eqn{d_{c,t}-g_{c,t} = \phi(d_{c,t-1}-g_{c,t-1}) + \epsilon_{c,t}}. \eqn{\phi} is considered as country-independent and is called \code{rho_phase2}. +Furthermore, if \code{annual} is \code{TRUE} and \code{my.tfr.file} is given, the data in the file must be on annual basis and no matching with the WPP dataset takes place. + } \value{ diff --git a/man/run.tfr.mcmc.extra.Rd b/man/run.tfr.mcmc.extra.Rd index fb4e2f98..7974364a 100644 --- a/man/run.tfr.mcmc.extra.Rd +++ b/man/run.tfr.mcmc.extra.Rd @@ -14,6 +14,7 @@ run.tfr.mcmc.extra(sim.dir = file.path(getwd(), "bayesTFR.output"), parallel = FALSE, nr.nodes = NULL, my.locations.file = NULL, uncertainty = FALSE, my.tfr.raw.file = NULL, iso.unbiased = NULL, covariates = c('source', 'method'), cont_covariates = NULL, + source.col.name = "source", average.gammas.cov = TRUE, verbose = FALSE, verbose.iter = 100, \dots) } \arguments{ @@ -45,6 +46,8 @@ Relevant only if \code{parallel} is \code{TRUE}. It gives the number of nodes fo \item{my.tfr.raw.file}{File name of the raw TFR used when uncertainty is \code{TRUE}. See details in \code{\link{run.tfr.mcmc}}.} \item{iso.unbiased}{Codes of countries for which the vital registration TFR estimates are considered unbiased. See details in \code{\link{run.tfr.mcmc}}.} \item{covariates, cont_covariates}{Categorical and continuous features used in estimating bias and standard deviation if \code{uncertainty} is \code{TRUE}. See details in \code{\link{run.tfr.mcmc}}.} +\item{source.col.name}{If \code{uncertainty} is \code{TRUE} this is a column name within the given covariates that determines the data source. It is used if \code{iso.unbiased} is given to identify the vital registration records.} +\item{average.gammas.cov}{Set this to \code{FALSE} if the processed country has been included in the main simulation. In such a case the proposal gamma covariance matrix is taken from the \code{proposal_cov_gammas_cii} dataset. By default, the matrix is taken as an average from all countries.} \item{verbose}{Logical switching log messages on and off.} \item{verbose.iter}{Integer determining how often (in number of iterations) log messages are outputted during the estimation.} \item{\dots}{Additional parameters to be passed to the function \code{\link[snowFT]{performParallel}}, if \code{parallel} is \code{TRUE}.} diff --git a/man/tfr.predict.extra.Rd b/man/tfr.predict.extra.Rd index 2eb0a87f..3cf0fecc 100644 --- a/man/tfr.predict.extra.Rd +++ b/man/tfr.predict.extra.Rd @@ -11,7 +11,8 @@ Using the posterior parameter samples the function generates posterior trajecto \usage{ tfr.predict.extra(sim.dir = file.path(getwd(), 'bayesTFR.output'), prediction.dir = sim.dir, countries = NULL, - save.as.ascii = 0, verbose = TRUE, uncertainty=FALSE) + save.as.ascii = 0, verbose = TRUE, uncertainty=FALSE, + all.countries.required = TRUE) } \arguments{ \item{sim.dir}{Directory with the MCMC simulation results.} @@ -19,7 +20,8 @@ tfr.predict.extra(sim.dir = file.path(getwd(), 'bayesTFR.output'), \item{countries}{Vector of country codes for which the prediction should be made. If it is \code{NULL}, the prediction is run for all countries that are included in the MCMC object but for which no prediction was generated.} \item{save.as.ascii}{Either a number determining how many trajectories should be converted into an ascii file, or \dQuote{all} in which case all trajectories are converted. It should be set to 0, if no converions is desired. Note that the convertion is done on all countries.} \item{verbose}{Logical switching log messages on and off.} - \item{uncertainty}{Logical. If the MCMC steps has considered uncertainty of past TFR and \code{uncertainty=TRUE}, starting point of prediction trajectories will be the last estimated trajectories of TFR. Otherwise, it will use last observed TFR as starting point of prediction.} + \item{uncertainty}{Logical. If the MCMC steps considered uncertainty of past TFR and \code{uncertainty=TRUE}, starting point of prediction trajectories will be the last estimated trajectories of TFR. Otherwise, it will use the last observed TFR as starting point of prediction.} + \item{all.countries.required}{If \code{FALSE} it is not required that MCMCs of all countries are present.} } \details{ In order to use this function, a prediction object must exist, i.e. the function \code{\link{tfr.predict}} must have been processed prior to using this function. From 83e81d1b753d6e67432823e4716e53b26ed4481a Mon Sep 17 00:00:00 2001 From: Peiran Liu Date: Sun, 18 Apr 2021 17:28:42 -0400 Subject: [PATCH 29/48] update for iso.unbiased --- R/updated_functions.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/updated_functions.R b/R/updated_functions.R index 6394293f..d02ab5fb 100644 --- a/R/updated_functions.R +++ b/R/updated_functions.R @@ -184,7 +184,7 @@ estimate.bias.sd.original <- function(mcmc, iso.unbiased=NULL, covariates=c('sou if (source.col.name %in% covariates) { index.by.country.vr.estimate <- which((mcmc$meta$raw_data.original$country_code == ISO.code$code) & - (mcmc$meta$raw_data.original[, source.col.name] %in% c("VR", 'Estimate'))) + (mcmc$meta$raw_data.original[, source.col.name] %in% c("VR"))) mcmc$meta$raw_data.original$bias[index.by.country.vr.estimate] <- 0 mcmc$meta$raw_data.original$std[index.by.country.vr.estimate] <- 0.016 } From a7f8eb550000206a7e89c39954a291e7c99d8ce5 Mon Sep 17 00:00:00 2001 From: Peiran Liu Date: Tue, 22 Jun 2021 21:10:02 -0400 Subject: [PATCH 30/48] fix bug for San Marino run --- R/run_mcmc.R | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/R/run_mcmc.R b/R/run_mcmc.R index 6db57a08..db34993e 100755 --- a/R/run_mcmc.R +++ b/R/run_mcmc.R @@ -385,10 +385,13 @@ run.tfr.mcmc.extra <- function(sim.dir=file.path(getwd(), 'bayesTFR.output'), cat('\nNothing to be done.\n') return(invisible(mcmc.set)) } + #TODO: we should consider in the future that in the original run, no MCMC3 steps are conducted, + #but when running for specific countries we have that. This could possibly break the simulation. if (uncertainty && has.tfr3.mcmc(sim.dir)) { mcmc3.set <- get.tfr3.mcmc(sim.dir) - Eini$meta[['id_phase3']] <- intersect(mcmc3.set$meta$id_phase3, which(mcmc.set$meta$regions$country_code %in% countries)) + Eini$meta[['id_phase3']] <- intersect(which(Eini$meta$lambda_c < Eini$meta$T_end_c), + which(Eini$meta$regions$country_code %in% countries)) for (par.name in tfr3.parameter.names()) { for (suffix in c('prior.range', 'ini', 'ini.range')) @@ -402,7 +405,6 @@ run.tfr.mcmc.extra <- function(sim.dir=file.path(getwd(), 'bayesTFR.output'), meta <- Eini$meta chain.ids <- names(mcmc.set$mcmc.list) mcthin <- 1 - if(verbose) cat('\n') for (chain in chain.ids) { # update meta in each chain if(verbose) cat('Updating meta in chain', chain, '\n') @@ -437,7 +439,7 @@ run.tfr.mcmc.extra <- function(sim.dir=file.path(getwd(), 'bayesTFR.output'), for (country in countries) { df_country <- mcmc.set$mcmc.list[[chain]]$meta$raw_data.original - country.obj <- get.country.object(country, meta.old) + country.obj <- get.country.object(country, Eini$meta) if (!is.null(country.obj$index)) { df_country <- df_country[df_country$country_code == country,] From e4314f681c06b14a63823d86a78863faefed88ae Mon Sep 17 00:00:00 2001 From: Hana Sevcikova Date: Fri, 29 Oct 2021 14:53:17 -0700 Subject: [PATCH 31/48] resolved conflict --- R/predict_tfr.R | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/R/predict_tfr.R b/R/predict_tfr.R index c62b0781..fe4e7f35 100644 --- a/R/predict_tfr.R +++ b/R/predict_tfr.R @@ -189,24 +189,8 @@ make.tfr.prediction <- function(mcmc.set, start.year=NULL, end.year=2100, replac burn <- if(is.mcmc.set.thinned) 0 else burnin total.iter <- get.total.iterations(mcmc.set$mcmc.list, burn) stored.iter <- get.stored.mcmc.length(mcmc.set$mcmc.list, burn) -<<<<<<< HEAD - if (!is.null(mcmc.set$meta$extra) && !is.null(countries)) - { - stored.iter.extra <- stored.iter - for (country in mcmc.set$meta$extra) - { - if (country %in% countries) - { - stored.iter.extra <- min(stored.iter.extra, - floor((mcmc.set$meta$extra_iter[country] - burn) / mcmc.set$meta$extra_thin[country]) * length(mcmc.set$mcmc.list)) - } - } - stored.iter <- stored.iter.extra - } -======= stored.iter <- min(stored.iter, get.stored.mcmc.length.extra(mcmc.set$meta, countries, nr.chains =length(mcmc.set$mcmc.list), burnin = burn), na.rm = TRUE) ->>>>>>> master mcthin <- max(sapply(mcmc.set$mcmc.list, function(x) x$thin)) if(!is.null(nr.traj) && !is.null(thin)) { warning('Both nr.traj and thin are given. Argument thin will be ignored.') From 7988e361316ef4e1bdc6b8df2eba9978ab8cfa79 Mon Sep 17 00:00:00 2001 From: Hana Sevcikova Date: Mon, 19 Feb 2024 18:12:34 -0800 Subject: [PATCH 32/48] rgdal changes; bug fix in ar.phase2 --- ChangeLog | 8 +++++++ DESCRIPTION | 6 ++--- R/plot_functions.R | 11 +++++---- R/predict_tfr.R | 7 ++++-- man/convert.trajectories.Rd | 13 ++++++----- man/write.projection.summary.Rd | 40 ++++++++++++++++----------------- 6 files changed, 48 insertions(+), 37 deletions(-) diff --git a/ChangeLog b/ChangeLog index 800fc593..e7f8e12f 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,11 @@ +7.4-2.9001 (2024-02-19) +----- +Fixed bug related to a combination of ar.phase2 and missing data (thanks to Shunqi Cheng). + +7.4-1/2 (2023-10-17) +----- +Removed rgdal from suggested packages. + 7.4-0 (2023-09-14) ----- Annual subnational projections are now possible, controlled by the argument "annual" diff --git a/DESCRIPTION b/DESCRIPTION index b0bb03fb..1d9becfe 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,12 +1,12 @@ Package: bayesTFR -Version: 7.4-0 -Date: 2023-09-14 +Version: 7.4-2.9001 +Date: 2024-02-19 Title: Bayesian Fertility Projection Author: Hana Sevcikova (hanas@uw.edu), Leontine Alkema (alkema@nus.edu.sg), Peiran Liu (prliu@uw.edu), Adrian Raftery (raftery@uw.edu), Bailey Fosdick (bfosdick@uw.edu), Patrick Gerland (gerland@un.org) Maintainer: Hana Sevcikova Depends: R (>= 3.5.0) Imports: mvtnorm, MASS, coda, graphics, grDevices, stats, utils, wpp2019, data.table, lifecycle -Suggests: rworldmap, snowFT, googleVis, rgdal, sp, wpp2017, wpp2015, wpp2012, wpp2010, ggplot2, sf, spData, scales +Suggests: rworldmap, snowFT, googleVis, sp, wpp2017, wpp2015, wpp2012, wpp2010, ggplot2, sf, spData, scales Description: Making probabilistic projections of total fertility rate for all countries of the world, using a Bayesian hierarchical model . Subnational probabilistic projections are also supported . License: GPL-3 | file LICENSE URL: https://bayespop.csss.washington.edu diff --git a/R/plot_functions.R b/R/plot_functions.R index ddf2d1e3..f6e74785 100644 --- a/R/plot_functions.R +++ b/R/plot_functions.R @@ -1210,11 +1210,10 @@ tfr.map <- function(pred, quantile=0.5, year=NULL, par.name=NULL, adjusted=FALSE tfr <- data.frame(cbind(un=data.period$country.codes, tfr=data)) map <- rworldmap::getMap(resolution=resolution) #first get countries excluding Antarctica which crashes spTransform (says the help page for joinCountryData2Map) - sPDF <- map[-which(map$ADMIN=='Antarctica'), ] - if(requireNamespace("rgdal", quietly=TRUE)) { - #transform map to the Robinson projection - sPDF <- sp::spTransform(sPDF, CRSobj=sp::CRS("+proj=robin +ellps=WGS84")) - } + sPDF <- map[-which(map$ADMIN=='Antarctica'), ] + #transform map to the Robinson projection + sPDF <- sp::spTransform(sPDF, CRSobj = sp::CRS("+proj=robin +ellps=WGS84")) + ## recode missing UN codes and UN member states sPDF$UN <- sPDF$ISO_N3 ## N. Cyprus -> assign to Cyprus @@ -1223,7 +1222,7 @@ tfr.map <- function(pred, quantile=0.5, year=NULL, par.name=NULL, adjusted=FALSE sPDF$UN[sPDF$ISO3=="KOS"] <- 688 ## W. Sahara -> no UN numerical code assigned in Natural Earth map (its ISO3 changed in rworlmap 1.3.6) sPDF$UN[sPDF$ISO3=="ESH"] <- 732 - ## Somaliland -> assign to Somalia -> fixed in rworlmap version 1.3.6 + ## Somaliland -> assign to Somalia (SOM) -> fixed in rworlmap version 1.3.6 #sPDF$UN[sPDF$ISO3=="SOL"] <- 706 #mtfr <- joinCountryData2Map(tfr, joinCode='UN', nameJoinColumn='un') diff --git a/R/predict_tfr.R b/R/predict_tfr.R index 80260f38..36881cac 100644 --- a/R/predict_tfr.R +++ b/R/predict_tfr.R @@ -392,10 +392,13 @@ make.tfr.prediction <- function(mcmc.set, start.year=NULL, end.year=2100, replac } else { - for(year in 1:fps.end.obs.index) + last.not.na <- 1 + for(year in 1:fps.end.obs.index) { all.f_ps[icountry,year,] <- all.tfr.list[[prediction.countries[icountry]]][all.T_end.min+year-1] + if(!is.na(all.f_ps[icountry,year,1])) last.not.na <- year + } if (!is.null(mcmc.set$meta$ar.phase2) && (mcmc.set$meta$ar.phase2)) - f_ps_previous[, icountry] <- all.tfr.list[[prediction.countries[icountry]]][all.T_end.min+year-2] + f_ps_previous[, icountry] <- all.tfr.list[[prediction.countries[icountry]]][all.T_end.min+last.not.na-2] } first.two.na <- which(is.na(all.f_ps[icountry,,1]))[1:2] which.Wsecond[icountry] <- first.two.na[2] diff --git a/man/convert.trajectories.Rd b/man/convert.trajectories.Rd index d8a188db..5795d384 100644 --- a/man/convert.trajectories.Rd +++ b/man/convert.trajectories.Rd @@ -19,12 +19,13 @@ convert.tfr.trajectories(dir = file.path(getwd(), 'bayesTFR.output'), } \details{ The function creates two files. One is called \dQuote{ascii_trajectories.csv}, it is a comma-separated table with the following columns: -\itemize{\item{\dQuote{LocID}}{country code} - \item{\dQuote{Period}}{prediction interval, e.g. 2015-2020} - \item{\dQuote{Year}}{middle year of the prediction interval} - \item{\dQuote{Trajectory}}{identifier of the trajectory} - \item{\dQuote{TF}}{total fertility rate} -} +\describe{ + \item{LocID}{country code} + \item{Period}{prediction interval, e.g. 2015-2020} + \item{Year}{middle year of the prediction interval} + \item{Trajectory}{identifier of the trajectory} + \item{TF}{total fertility rate} + } The second file is called \dQuote{ascii_trajectories_wide.csv}, it is also a comma-separated table and it contains the same information as above but in a \sQuote{transposed} format. I.e. the data for one country are ordered in columns, thus, there is one column per country. The country columns are ordered alphabetically. diff --git a/man/write.projection.summary.Rd b/man/write.projection.summary.Rd index fd8d96d9..8580981e 100644 --- a/man/write.projection.summary.Rd +++ b/man/write.projection.summary.Rd @@ -22,32 +22,32 @@ write.projection.summary(dir = file.path(getwd(), "bayesTFR.output"), } \details{ The first file that the function creates is called \file{projection_summary_user_friendly.csv} (or \file{projection_summary_user_friendly_adjusted.csv} if \code{adjusted=TRUE}), it is a comma-separated table with the following columns: -\itemize{\item{\dQuote{country_name}: }{country name} - \item{\dQuote{country_code}: }{country code} - \item{\dQuote{variant}: }{name of the variant, such as \dQuote{median}, \dQuote{lower 80}, \dQuote{upper 80}, \dQuote{lower 95}, \dQuote{upper 95}, \dQuote{-0.5child}, \dQuote{+0.5child}, \dQuote{constant}} - \item{period1: }{e.g. \dQuote{2005-2010}: TFR for the first time period. If \code{est.uncertainty} is \code{TRUE}, the first time period is the first observed time period. Otherwise it is the last observed one.} - \item{period2: }{e.g. \dQuote{2010-2015}: TFR for the second time period} - \item{\dots }{further columns with TFR projections} +\describe{\item{country_name}{country name} + \item{country_code}{country code} + \item{variant}{name of the variant, such as \dQuote{median}, \dQuote{lower 80}, \dQuote{upper 80}, \dQuote{lower 95}, \dQuote{upper 95}, \dQuote{-0.5child}, \dQuote{+0.5child}, \dQuote{constant}} + \item{\emph{period1:}}{e.g. \dQuote{2005-2010}: TFR for the first time period. If \code{est.uncertainty} is \code{TRUE}, the first time period is the first observed time period. Otherwise it is the last observed one.} + \item{\emph{period2:}}{e.g. \dQuote{2010-2015}: TFR for the second time period} + \item{\dots}{further columns with TFR projections} } The second file, called \file{projection_summary.csv} (or \file{projection_summary_adjusted.csv} if \code{adjusted=TRUE}), also comma-separated table, contains the same information as above in a UN-specific format: -\itemize{\item{\dQuote{RevID}: }{revision number, passed to the function as an argument} - \item{\dQuote{VarID}: }{variant identifier, extracted from the \code{\link{UN_variants}} dataset} - \item{\dQuote{LocID}: }{country code} - \item{\dQuote{TimeID}: }{time identifier, extracted from the \code{\link{UN_time}} dataset} - \item{\dQuote{TFR}: }{the total fertility rate for this variant, location and time period} +\describe{\item{RevID}{revision number, passed to the function as an argument} + \item{VarID}{variant identifier, extracted from the \code{\link{UN_variants}} dataset} + \item{LocID}{country code} + \item{TimeID}{time identifier, extracted from the \code{\link{UN_time}} dataset} + \item{TFR}{the total fertility rate for this variant, location and time period} } The third comma-separated file, called \file{projection_summary_parameters.csv} contains the following columns: -\itemize{\item{\dQuote{country_name}: }{country name} - \item{\dQuote{country_code}: }{country code} - \item{\dQuote{TF_time_start_decline}: }{start period of TFR decline} - \item{\dQuote{TF_max}: }{TFR at the onset of the fertitility transition (median of the \eqn{U_c} parameter)} - \item{\dQuote{TF_max_decrement}: }{maximum decrement of TFR decline (median of the \eqn{d_c} parameter)} - \item{\dQuote{TF_end_level}: }{median of the end level of the fertility transition (\eqn{\Delta_{c4}}{Triangle_c4} parameter)} - \item{\dQuote{TF_end_level_low}: }{2.5 percentile of the \eqn{\Delta_{c4}}{Triangle_c4} distribution} - \item{\dQuote{TF_end_level_high}: }{97.5 percentile of the \eqn{\Delta_{c4}}{Triangle_c4} distribution} - \item{\dQuote{TF_time_end_decline}: }{period of the end decline, measured using the prediction median} +\describe{\item{country_name}{country name} + \item{country_code}{country code} + \item{TF_time_start_decline}{start period of TFR decline} + \item{TF_max}{TFR at the onset of the fertitility transition (median of the \eqn{U_c} parameter)} + \item{TF_max_decrement}{maximum decrement of TFR decline (median of the \eqn{d_c} parameter)} + \item{TF_end_level}{median of the end level of the fertility transition (\eqn{\Delta_{c4}}{Triangle_c4} parameter)} + \item{TF_end_level_low}{2.5 percentile of the \eqn{\Delta_{c4}}{Triangle_c4} distribution} + \item{TF_end_level_high}{97.5 percentile of the \eqn{\Delta_{c4}}{Triangle_c4} distribution} + \item{TF_time_end_decline}{period of the end decline, measured using the prediction median} } Note that this file is not created if \code{adjusted=TRUE}. } From ea2c4e9e36874468bc5af1fa488a2e1f7fb6ae32 Mon Sep 17 00:00:00 2001 From: Hana Sevcikova Date: Wed, 5 Jun 2024 17:56:58 -0700 Subject: [PATCH 33/48] added mean variant to summaries --- ChangeLog | 4 +++- DESCRIPTION | 4 ++-- R/get_outputs.R | 4 +++- R/plot_functions.R | 13 +++++++++++++ R/predict_tfr.R | 17 +++++++++++------ data-raw/UN_variants.txt | 1 + data/UN_variants.rda | Bin 605 -> 617 bytes man/get.tfr.estimation.Rd | 6 +++--- man/write.projection.summary.Rd | 6 +++--- 9 files changed, 39 insertions(+), 16 deletions(-) diff --git a/ChangeLog b/ChangeLog index e7f8e12f..683d78e4 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,7 +1,9 @@ -7.4-2.9001 (2024-02-19) +7.4-2.90xx (2024-06-05) ----- Fixed bug related to a combination of ar.phase2 and missing data (thanks to Shunqi Cheng). +Added the mean variant into summary outputs. + 7.4-1/2 (2023-10-17) ----- Removed rgdal from suggested packages. diff --git a/DESCRIPTION b/DESCRIPTION index 1d9becfe..ea93a2cc 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: bayesTFR -Version: 7.4-2.9001 -Date: 2024-02-19 +Version: 7.4-2.9002 +Date: 2024-06-05 Title: Bayesian Fertility Projection Author: Hana Sevcikova (hanas@uw.edu), Leontine Alkema (alkema@nus.edu.sg), Peiran Liu (prliu@uw.edu), Adrian Raftery (raftery@uw.edu), Bailey Fosdick (bfosdick@uw.edu), Patrick Gerland (gerland@un.org) Maintainer: Hana Sevcikova diff --git a/R/get_outputs.R b/R/get_outputs.R index 10fd7ac6..1c998703 100644 --- a/R/get_outputs.R +++ b/R/get_outputs.R @@ -365,7 +365,9 @@ get.tfr.estimation <- function(mcmc.list=NULL, country=NULL, sim.dir=NULL, output <- list(tfr_table=tfr_table, country.obj = country.obj) if (!is.null(probs)) { - tfr_quantile <- apply(tfr_table, 2, quantile, probs = probs) + if(length(probs) == 1 && probs == "mean"){ + tfr_quantile <- apply(tfr_table, 2, mean) + } else tfr_quantile <- apply(tfr_table, 2, quantile, probs = probs) if (is.null(dim(tfr_quantile))) tfr_quantile <- matrix(tfr_quantile, nrow = 1) tfr_quantile <- data.table::data.table(t(tfr_quantile)) start.year <- mcmc.list$meta$start.year diff --git a/R/plot_functions.R b/R/plot_functions.R index f6e74785..5ca80e6a 100644 --- a/R/plot_functions.R +++ b/R/plot_functions.R @@ -369,6 +369,19 @@ get.median.from.prediction <- function(tfr.pred, country.index, country.code=NUL return(get.quantile.from.prediction(tfr.pred, quantile=0.5, country.index=country.index, country.code=country.code, adjusted=adjusted, ...)) } +get.mean.from.prediction <- function(tfr.pred, country.index, country.code=NULL, adjusted=TRUE, + est.uncertainty = FALSE) { + mean.values <- tfr.pred$traj.mean.sd[country.index, 1,] + if(est.uncertainty && has.est.uncertainty(tfr.pred$mcmc.set)){ # get the right value for present year + tfr.est <- get.tfr.estimation(mcmc.list=tfr.pred$mcmc.set, country = country.code, probs="mean", adjust = adjusted) + unc.last.time <- which(tfr.est$tfr_quantile$year == dimnames(tfr.pred$quantiles)[[3]][1]) + mean.values[1] <- unlist(tfr.est$tfr_quantile[unc.last.time, 1]) + } + if (!adjusted) return(mean.values) + shift <- get.tfr.shift(country.code, tfr.pred) + if(!is.null(shift)) mean.values <- mean.values + shift + return(mean.values) +} get.traj.quantiles <- function(tfr.pred, country.index, country.code, trajectories=NULL, pi=80, adjusted=TRUE, est.uncertainty = FALSE) { diff --git a/R/predict_tfr.R b/R/predict_tfr.R index 36881cac..51ab44c7 100644 --- a/R/predict_tfr.R +++ b/R/predict_tfr.R @@ -1118,12 +1118,12 @@ get.projection.summary.header.bayesTFR.prediction <- function(pred, ...) "get.UN.variant.names" <- function(pred, ...) UseMethod("get.UN.variant.names") get.UN.variant.names.bayesTFR.prediction <- function(pred, ...) - return(c('BHM median', 'BHM80 lower', 'BHM80 upper', 'BHM95 lower', 'BHM95 upper', 'Low', + return(c('BHM median', 'BHM80 lower', 'BHM80 upper', 'BHM95 lower', 'BHM95 upper', 'BHM mean', 'Low', 'High', 'Constant fertility')) "get.friendly.variant.names" <- function(pred, ...) UseMethod("get.friendly.variant.names") get.friendly.variant.names.bayesTFR.prediction <- function(pred, ...) - return(c('median', 'lower 80', 'upper 80', 'lower 95', 'upper 95', '-0.5child', '+0.5child', 'constant')) + return(c('median', 'lower 80', 'upper 80', 'lower 95', 'upper 95', 'mean', '-0.5child', '+0.5child', 'constant')) get.wpp.revision.number <- function(pred) { wpps <- c(2008, 2010, 2012, seq(2015, by = 2, length = 3), seq(2022, by = 2, length = 10)) @@ -1173,7 +1173,7 @@ do.write.projection.summary <- function(pred, output.dir, revision=NULL, indicat UN.variant.names <- get.UN.variant.names(pred) friendly.variant.names <- get.friendly.variant.names(pred) # the code is dependent on the following order of the variants (it's presumed): - # median, lower 80, upper 80, lower 95, upper 95, -1/2child, +1/2 child, constant + # median, lower 80, upper 80, lower 95, upper 95, mean, -1/2child, +1/2 child, constant nr.var <- length(UN.variant.names) result1 <- result2 <- NULL @@ -1184,7 +1184,10 @@ do.write.projection.summary <- function(pred, output.dir, revision=NULL, indicat if(est.uncertainty){ # add uncertainty est.object <- get.tfr.estimation(mcmc.list = pred$mcmc.set, country = country.obj$code, probs = c(0.5, 0.1, 0.9, 0.025, 0.975), adjust = adjusted) + est.object.mean <- get.tfr.estimation(mcmc.list = pred$mcmc.set, country = country.obj$code, + probs = "mean", adjust = adjusted) this.tfr.unc <- as.matrix(est.object$tfr_quantile)[1:ltfr, -ncol(est.object$tfr_quantile)] + this.tfr.unc <- cbind(this.tfr.unc, mean = unlist(est.object.mean$tfr_quantile[1:ltfr, 1])) #this.tfr <- unlist(est.object$tfr_quantile[,1])[1:ltfr] } this.result1 <- cbind( @@ -1194,15 +1197,18 @@ do.write.projection.summary <- function(pred, output.dir, revision=NULL, indicat # prediction median <- get.median.from.prediction(pred, country.obj$index, country.obj$code, adjusted=adjusted, est.uncertainty = est.uncertainty) + mean <- get.mean.from.prediction(pred, country.obj$index, country.obj$code, adjusted=adjusted, + est.uncertainty = est.uncertainty) proj.result <- rbind(median, get.traj.quantiles(pred, country.obj$index, country.obj$code, pi=80, adjusted=adjusted, est.uncertainty = est.uncertainty), get.traj.quantiles(pred, country.obj$index, country.obj$code, pi=95, adjusted=adjusted, - est.uncertainty = est.uncertainty)) + est.uncertainty = est.uncertainty), + mean) if(any(friendly.variant.names == '-0.5child')) proj.result <- rbind(proj.result, get.half.child.variant(median)) - #browser() + proj.result <- round(rbind(proj.result, rep(median[1], nr.proj)), precision) # constant variant if(est.uncertainty){ # user-friendly output contains observed years as well @@ -1219,7 +1225,6 @@ do.write.projection.summary <- function(pred, output.dir, revision=NULL, indicat result1 <- rbind(result1, this.result1) for(ivar in 1:nr.var) { this.obs.tfr <- if(est.uncertainty) obs.tfr[ivar,] else this.tfr - #browser() result2 <- rbind(result2, cbind(revision=rep(revision, nr.proj.all), variant=rep(e$UN_variants[e$UN_variants$Vshort==UN.variant.names[ivar],'VarID'], nr.proj.all), country=rep(country.obj$code, nr.proj.all), diff --git a/data-raw/UN_variants.txt b/data-raw/UN_variants.txt index 26602326..7bc15975 100644 --- a/data-raw/UN_variants.txt +++ b/data-raw/UN_variants.txt @@ -22,3 +22,4 @@ 13 204 "BHM80 lower" "BHM80 lower" "Standard" 13 206 "BHM95 upper" "BHM95 upper" "Standard" 13 207 "BHM95 lower" "BHM95 lower" "Standard" +13 208 "BHM mean" "BHM mean" "Standard" diff --git a/data/UN_variants.rda b/data/UN_variants.rda index af9da415454005d2a34074ad9e4a96351b3a3d51..3399da64d2c587104ed9f2b52d80b5ac19f50648 100644 GIT binary patch literal 617 zcmV-v0+#(kT4*^jL0KkKS#k$K2LJ+I|HuFT+U!DSQeZu28&I$B-=IJM03E;p06+i% zzyeh`)_{hRMvVY8XaE2J01XCeXaH&I1d^xfnI5CjGMWLP000^q0004@pyNhNj3W>X zCW8Q)4Fe`k88R3^VI)Y?Q%0Eto@pl3Xgxq}K*;oD)Bwo!4KgtuQqhg4mFtX%TdwG` z!4?Ua6zl+a&oW4$$|jMCS3#D{)*!tBkb@(F@id*;DQinvR5gs z+^M;Lq6K2Z%0RwF6m!&9M+vZ6QcYq~qzH@?yp=1G%FluRB|89BRaCydgIK`GVzJcW0t;!h`-JLh7{^OuyCUHUvESH? zq^C0q^9@xMD^Jzmh-CQ&AUb!$mzSFU!?4QXK)3+xun*7?Rh31c3MxP#@n ziR4V9h_qoT z)|jV9akIc8i%%4)L0cjtC2(n`Gzgg|2qJ7i5DP4@K_Vi%o-gJYZ}E2|Q-ui!AaoFL Dbxj0o literal 605 zcmV-j0;2swT4*^jL0KkKS(EU8tN;Q_f5rd*+U!DSQeZu28&I$B-=IJM03CoJ00F=P zRXWW;VI?4^=|gE5sisDThJXMuG-7Nai>|Pm5VrC>!y710Z@)MSV-El4f!Wb!0!g&a@2n-awaZE`gJ_q}ahb%y9tUefQG62^SLoy1I64e$v z_O%yML{2)K6ePz%qeT{Cugt7qWHD@sMh-f@Up$D_iCDX3u3DI!ceSeGMWP4IkyK#F zTMF1dbmI#$1VB%0FyZm5idf`nrC%d(Fd^cfI#&!*MDq zTTwL4OSYPk;!PRSGhoLTMH`8c8V!Z{G-Xy(L!BP%Y)s277wz1vajntG3)i8JH=OGx z`mrxkFOJErDZ6xUM>ADZue-Rs!H~|(ZMMxO(NN2mt<)BVWhBQpua=IjbuKTA&gL30 r)2O{tXDb#a>@i7V=*6rV$3mw*bc6-No{#1jZ}E2|Q-ui$KL`52Pkj!` diff --git a/man/get.tfr.estimation.Rd b/man/get.tfr.estimation.Rd index cad29870..760c5aa9 100644 --- a/man/get.tfr.estimation.Rd +++ b/man/get.tfr.estimation.Rd @@ -17,17 +17,17 @@ get.tfr.estimation(mcmc.list = NULL, country = NULL, \item{sim.dir}{Directory with the MCMC simulation results. Only used if \code{mcmc.list} is \code{NULL}.} \item{burnin}{Burn-in for getting trajectories and quantiles. A positive burn-in \eqn{x} will remove first \eqn{x} iterations from each chain.} \item{thin}{Thin for getting trajectories and quantiles. Thinning level \eqn{x} greater than 1 will store one iteration per \eqn{x} samples.} - \item{probs}{A vector of numbers between \eqn{[0,1]} specifying which estimation quantiles should be outputted. If it is set to \code{NULL} no quantiles are returned.} + \item{probs}{A vector of numbers between \eqn{[0,1]} specifying which estimation quantiles should be outputted. If it is set to \code{NULL} no quantiles are returned. It can be set to the word \dQuote{mean}, in which case the estimation mean is outputted.} \item{adjust}{Logical indicating whether the adjusted median and trajectories should be returned.} \item{country.code, ISO.code}{Deprecated arguments. Use argument \code{country} instead.} } \details{ -This function is used to obtain the TFR estimation trajectories as well as corresponding quantiles if the \code{mcmc.list} has been obtained while taking account for uncertainty about the past, i.e. \code{uncertainty=TRUE} in \code{\link{run.tfr.mcmc}}. Quantiles are included in the results if \code{probs} is not \code{NULL}. +This function is used to obtain the TFR estimation trajectories as well as corresponding quantiles if the \code{mcmc.list} has been obtained while taking account for uncertainty about the past, i.e. \code{uncertainty=TRUE} in \code{\link{run.tfr.mcmc}}. Quantiles or the mean are included in the results if \code{probs} is not \code{NULL}. } \value{ \item{tfr_table}{Table storing the trajectories. It is a matrix with rows equal to number of trajectories, and column equal to number of time periods.} \item{country.obj}{A list storing information about the country which the trajectories and quantiles correspond to. It corresponds to the output of \code{\link{get.country.object}}.} - \item{tfr_quantile}{Optional. A data.table object, storing the quantiles of estimates for each time period as specified by the \code{probs} argument. The time periods are contained in the \code{year} column.} + \item{tfr_quantile}{Optional. A data.table object, storing the quantiles or the mean of estimates for each time period as specified by the \code{probs} argument. The time periods are contained in the \code{year} column.} } \author{ Peiran Liu, Hana Sevcikova diff --git a/man/write.projection.summary.Rd b/man/write.projection.summary.Rd index 8580981e..4900bb17 100644 --- a/man/write.projection.summary.Rd +++ b/man/write.projection.summary.Rd @@ -1,12 +1,12 @@ \name{write.projection.summary} -\Rdversion{1.1} +%\Rdversion{1.1} \alias{write.projection.summary} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Writing Projection Summary Files } \description{ -The function creates two files containing projection summaries, such as the median, the lower and upper bound of the 80 and 90\% probability intervals, respectively, the +/- 0.5 child variant and the constant variant. One file is in a user-friendly format, whereas the other is in a UN-specific format with internal coding of the time and the variants. In addition, a file containing some of the model parameters is created. +The function creates two files containing projection summaries, such as the median, mean, the lower and upper bound of the 80 and 90\% probability intervals, respectively, the +/- 0.5 child variant and the constant variant. One file is in a user-friendly format, whereas the other is in a UN-specific format with internal coding of the time and the variants. In addition, a file containing some of the model parameters is created. } \usage{ write.projection.summary(dir = file.path(getwd(), "bayesTFR.output"), @@ -24,7 +24,7 @@ write.projection.summary(dir = file.path(getwd(), "bayesTFR.output"), The first file that the function creates is called \file{projection_summary_user_friendly.csv} (or \file{projection_summary_user_friendly_adjusted.csv} if \code{adjusted=TRUE}), it is a comma-separated table with the following columns: \describe{\item{country_name}{country name} \item{country_code}{country code} - \item{variant}{name of the variant, such as \dQuote{median}, \dQuote{lower 80}, \dQuote{upper 80}, \dQuote{lower 95}, \dQuote{upper 95}, \dQuote{-0.5child}, \dQuote{+0.5child}, \dQuote{constant}} + \item{variant}{name of the variant, such as \dQuote{median}, \dQuote{lower 80}, \dQuote{upper 80}, \dQuote{lower 95}, \dQuote{upper 95}, \dQuote{mean}, \dQuote{-0.5child}, \dQuote{+0.5child}, \dQuote{constant}} \item{\emph{period1:}}{e.g. \dQuote{2005-2010}: TFR for the first time period. If \code{est.uncertainty} is \code{TRUE}, the first time period is the first observed time period. Otherwise it is the last observed one.} \item{\emph{period2:}}{e.g. \dQuote{2010-2015}: TFR for the second time period} \item{\dots}{further columns with TFR projections} From a93ad81e71944ecc829b9abe5faa4f62d16cd3e4 Mon Sep 17 00:00:00 2001 From: Hana Sevcikova Date: Wed, 5 Jun 2024 20:30:01 -0700 Subject: [PATCH 34/48] arguments traj.index and show.median added to tfr.trajectories.plot --- ChangeLog | 2 ++ DESCRIPTION | 2 +- R/plot_functions.R | 17 ++++++++++++++++- man/plot-trajectories.Rd | 3 +++ 4 files changed, 22 insertions(+), 2 deletions(-) diff --git a/ChangeLog b/ChangeLog index 683d78e4..c1df6ce1 100644 --- a/ChangeLog +++ b/ChangeLog @@ -4,6 +4,8 @@ Fixed bug related to a combination of ar.phase2 and missing data (thanks to Shun Added the mean variant into summary outputs. +tfr.trajectories.plot() got an argument to plot the means, as well as selected trajectories. + 7.4-1/2 (2023-10-17) ----- Removed rgdal from suggested packages. diff --git a/DESCRIPTION b/DESCRIPTION index ea93a2cc..4a426e70 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: bayesTFR -Version: 7.4-2.9002 +Version: 7.4-2.9003 Date: 2024-06-05 Title: Bayesian Fertility Projection Author: Hana Sevcikova (hanas@uw.edu), Leontine Alkema (alkema@nus.edu.sg), Peiran Liu (prliu@uw.edu), Adrian Raftery (raftery@uw.edu), Bailey Fosdick (bfosdick@uw.edu), Patrick Gerland (gerland@un.org) diff --git a/R/plot_functions.R b/R/plot_functions.R index 5ca80e6a..43fe2ee5 100644 --- a/R/plot_functions.R +++ b/R/plot_functions.R @@ -564,7 +564,8 @@ tfr.estimation.plot <- function(mcmc.list = NULL, country = NULL, sim.dir = NULL tfr.trajectories.plot <- function(tfr.pred, country, pi=c(80, 95), half.child.variant=TRUE, nr.traj=NULL, adjusted.only = TRUE, typical.trajectory=FALSE, - mark.estimation.points=FALSE, + mark.estimation.points=FALSE, + traj.index = NULL, show.mean = FALSE, xlim=NULL, ylim=NULL, type='b', xlab='Year', ylab='TFR', main=NULL, lwd=c(2,2,2,2,2,1), col=c('black', 'green', 'red', 'red', 'blue', '#00000020'), @@ -608,7 +609,11 @@ tfr.trajectories.plot <- function(tfr.pred, country, pi=c(80, 95), } x1 <- as.integer(c(names(y1.part1), names(y1.part2))) x2 <- as.numeric(dimnames(tfr.pred$quantiles)[[3]]) + + if(!is.null(traj.index)) nr.traj <- length(traj.index) trajectories <- get.trajectories(tfr.pred, country$code, nr.traj, typical.trajectory=typical.trajectory) + if(!is.null(traj.index) && !is.null(trajectories$trajectories)) trajectories$index <- traj.index + # plot historical data: observed if (!add) { if(is.null(xlim)) xlim <- c(min(x1,x2), max(x1,x2)) @@ -681,6 +686,7 @@ tfr.trajectories.plot <- function(tfr.pred, country, pi=c(80, 95), } } } + if (uncertainty) { col_median <- length(pi)+1 @@ -713,6 +719,15 @@ tfr.trajectories.plot <- function(tfr.pred, country, pi=c(80, 95), legend <- c(legend, median.legend, if(length(pi) > 0) paste0(pi, '% PI') else c()) cols <- c(cols, col[3], rep(col[4], length(pi))) lwds <- c(lwds, lwd[3], rep(lwd[4], length(pi))) + if(show.mean){ + # plot mean + tfr.mean <- get.mean.from.prediction(tfr.pred, country$index, country$code) + lines(x2, tfr.mean, type='l', col=col[3], lwd=1, lty=max(lty)+1) + legend <- c(legend, 'mean') + cols <- c(cols, col[3]) + lwds <- c(lwds, 1) + lty <- c(max(lty)+1, lty) + } if (half.child.variant) { lty <- c(lty, max(lty)+1) llty <- length(lty) diff --git a/man/plot-trajectories.Rd b/man/plot-trajectories.Rd index 6ce45c3b..e3732ade 100644 --- a/man/plot-trajectories.Rd +++ b/man/plot-trajectories.Rd @@ -14,6 +14,7 @@ tfr.trajectories.plot(tfr.pred, country, pi = c(80, 95), half.child.variant = TRUE, nr.traj = NULL, adjusted.only = TRUE, typical.trajectory = FALSE, mark.estimation.points = FALSE, + traj.index = NULL, show.mean = FALSE, xlim = NULL, ylim = NULL, type = 'b', xlab = 'Year', ylab = 'TFR', main = NULL, lwd = c(2, 2, 2, 2, 2, 1), col=c('black', 'green', 'red', 'red', 'blue', '#00000020'), @@ -37,6 +38,8 @@ tfr.trajectories.table(tfr.pred, country, pi = c(80, 95), \item{adjusted.only}{Logical. By default, if the projection or estimation median is adjusted using e.g. \code{\link{tfr.median.set}} or \code{\link{tfr.median.set.all}}, the function plots the adjusted median. If \code{adjusted.only=FALSE} the original (non-adjusted) median is plotted as well.} \item{typical.trajectory}{Logical. If \code{TRUE} one trajectory is shown that has the smallest distance to the median.} \item{mark.estimation.points}{Logical. If \code{TRUE}, points that were not used in the estimation (phase I) are shown in lighter color than points in phase II and III.} + \item{traj.index}{Vector of trajectory indices to show. If not given, the trajectories are selected using equidistant spacing.} + \item{show.mean}{Logical indicating if the mean of the distribution should be shown.} \item{xlim, ylim, type, xlab, ylab}{Graphical parameters passed to the \code{plot} function.} \item{lwd, col}{Vector of six elements giving the line width and color for: 1. observed data, 2. imputed missing data, 3. median, 4. quantiles, 5. half-child variant, 6. trajectories.} \item{show.legend}{Logical controlling whether the legend should be drawn.} From bfc2b75d8a82f4cc1e40bc77363c5230d2dfed07 Mon Sep 17 00:00:00 2001 From: Hana Sevcikova Date: Wed, 5 Jun 2024 20:59:49 -0700 Subject: [PATCH 35/48] export mean function --- NAMESPACE | 1 + 1 file changed, 1 insertion(+) diff --git a/NAMESPACE b/NAMESPACE index c1717b9a..4977dc3c 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -102,6 +102,7 @@ export( tfr.country.dlcurves, tfr.dl.coverage, get.median.from.prediction, + get.mean.from.prediction, bdem.parameter.traces, get.traj.ascii.header, get.nr.countries, From ebc1bc8d60fd7cef2e185881aef26834dff58c6c Mon Sep 17 00:00:00 2001 From: Hana Sevcikova Date: Wed, 5 Jun 2024 21:04:26 -0700 Subject: [PATCH 36/48] minor edits --- ChangeLog | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index c1df6ce1..ed16bae6 100644 --- a/ChangeLog +++ b/ChangeLog @@ -4,7 +4,8 @@ Fixed bug related to a combination of ar.phase2 and missing data (thanks to Shun Added the mean variant into summary outputs. -tfr.trajectories.plot() got an argument to plot the means, as well as selected trajectories. +tfr.trajectories.plot() got arguments to plot the means (show.mean), +as well as selected trajectories (traj.index). 7.4-1/2 (2023-10-17) ----- From 2cf3d4bdeeff36f049dbdfd1b895d4024a17a967 Mon Sep 17 00:00:00 2001 From: Hana Sevcikova Date: Wed, 5 Jun 2024 21:07:43 -0700 Subject: [PATCH 37/48] internal function --- man/bayesTFR-internal.Rd | 1 + 1 file changed, 1 insertion(+) diff --git a/man/bayesTFR-internal.Rd b/man/bayesTFR-internal.Rd index 92fe91ad..362cbd42 100644 --- a/man/bayesTFR-internal.Rd +++ b/man/bayesTFR-internal.Rd @@ -43,6 +43,7 @@ \alias{get.mcmc.list.list} \alias{get.mcmc.meta} \alias{get.median.from.prediction} +\alias{get.mean.from.prediction} \alias{get.meta.only} \alias{get.newTFRcolnames} \alias{get.nr.countries} From 67a883009ef30e753e9586a53254c10fac10f1d4 Mon Sep 17 00:00:00 2001 From: Hana Sevcikova Date: Thu, 20 Jun 2024 17:49:49 -0700 Subject: [PATCH 38/48] argument show.median added --- ChangeLog | 4 +- R/plot_functions.R | 102 +++++++++++++++++++++++++++------------ man/plot-trajectories.Rd | 4 +- 3 files changed, 74 insertions(+), 36 deletions(-) diff --git a/ChangeLog b/ChangeLog index ed16bae6..7ccef417 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,10 +1,10 @@ -7.4-2.90xx (2024-06-05) +7.4-2.90xx (2024-06-20) ----- Fixed bug related to a combination of ar.phase2 and missing data (thanks to Shunqi Cheng). Added the mean variant into summary outputs. -tfr.trajectories.plot() got arguments to plot the means (show.mean), +tfr.trajectories.plot() got arguments to plot the means (show.mean), medians (show.median), as well as selected trajectories (traj.index). 7.4-1/2 (2023-10-17) diff --git a/R/plot_functions.R b/R/plot_functions.R index 43fe2ee5..a155f7a1 100644 --- a/R/plot_functions.R +++ b/R/plot_functions.R @@ -565,7 +565,7 @@ tfr.trajectories.plot <- function(tfr.pred, country, pi=c(80, 95), half.child.variant=TRUE, nr.traj=NULL, adjusted.only = TRUE, typical.trajectory=FALSE, mark.estimation.points=FALSE, - traj.index = NULL, show.mean = FALSE, + traj.index = NULL, show.mean = FALSE, show.median = TRUE, xlim=NULL, ylim=NULL, type='b', xlab='Year', ylab='TFR', main=NULL, lwd=c(2,2,2,2,2,1), col=c('black', 'green', 'red', 'red', 'blue', '#00000020'), @@ -666,79 +666,117 @@ tfr.trajectories.plot <- function(tfr.pred, country, pi=c(80, 95), lines(x2, trajectories$trajectories[,trajectories$index[i]], type='l', col=col[6], lwd=lwd[6]) } } - # plot median - tfr.median <- get.median.from.prediction(tfr.pred, country$index, country$code) - if(uncertainty) { + if(uncertainty) unc.last.time <- which(tfr.object$tfr_quantile$year == x2[1]) - tfr.median[1] <- unlist(tfr.object$tfr_quantile[unc.last.time,])[length(pi)+1] + + # extract median & mean + tfr.median <- tfr.mean <- tfr.main.proj <- NULL + if(show.median) + tfr.median <- get.median.from.prediction(tfr.pred, country$index, country$code) + if(show.mean) + tfr.mean <- get.mean.from.prediction(tfr.pred, country$index, country$code) + + # set the main projection (solid line) + main.proj.name <- "" + if(!is.null(tfr.median)){ + tfr.main.proj <- tfr.median + main.proj.name <- "median" + } else { + if(!is.null(tfr.mean)){ + tfr.main.proj <- tfr.mean + main.proj.name <- "mean" + } } - lines(x2, tfr.median, type='l', col=col[3], lwd=lwd[3]) - lty <- 1 + lty <- c() + if(!is.null(tfr.main.proj)){ + if(uncertainty) # replace last observed with estimated median + tfr.main.proj[1] <- unlist(tfr.object$tfr_quantile[unc.last.time,])[length(pi)+1] + # draw main projection + lines(x2, tfr.main.proj, type='l', col=col[3], lwd=lwd[3]) + lty <- 1 + } + # plot given CIs if(length(pi) > 0){ - lty <- c(lty, 2:(length(pi)+1)) + pi.lty <- 2:(length(pi)+1) + lty <- c(lty, pi.lty) for (i in 1:length(pi)) { cqp <- get.traj.quantiles(tfr.pred, country$index, country$code, trajectories$trajectories, pi[i], est.uncertainty = uncertainty) if (!is.null(cqp)) { - lines(x2, cqp[1,], type='l', col=col[4], lty=lty[i+1], lwd=lwd[4]) - lines(x2, cqp[2,], type='l', col=col[4], lty=lty[i+1], lwd=lwd[4]) + lines(x2, cqp[1,], type='l', col=col[4], lty=pi.lty[i], lwd=lwd[4]) + lines(x2, cqp[2,], type='l', col=col[4], lty=pi.lty[i], lwd=lwd[4]) } } } - + max.lty <- if(length(lty) == 0) 1 else max(lty) + if (uncertainty) { col_median <- length(pi)+1 lines(tfr.object$tfr_quantile$year, as.data.frame(tfr.object$tfr_quantile)[, col_median], type='l', col=col_unc, lwd=lwd[3]) if(!adjusted.only) { # plot unadjusted estimation median - unadj.lty <- max(lty)+1 + unadj.lty <- max.lty+1 tfr.object.unadj <- get.tfr.estimation(mcmc.list=tfr.pred$mcmc.set, country = country.obj$code, probs=0.5, adjust = FALSE) - lines(tfr.object.unadj$tfr_quantile$year, as.data.frame(tfr.object.unadj$tfr_quantile)$V1, type='l', col=col_unc, lwd=lwd[3], lty = max(lty)+1) + lines(tfr.object.unadj$tfr_quantile$year, as.data.frame(tfr.object.unadj$tfr_quantile)$V1, type='l', col=col_unc, lwd=lwd[3], lty = unadj.lty) } if(length(pi) > 0) { for (i in 1:length(pi)) { - lines(tfr.object$tfr_quantile$year, as.data.frame(tfr.object$tfr_quantile)[, length(pi)+1-i], type='l', col=col_unc, lty=lty[i+1], lwd=lwd[4]) - lines(tfr.object$tfr_quantile$year, as.data.frame(tfr.object$tfr_quantile)[, length(pi)+1+i], type='l', col=col_unc, lty=lty[i+1], lwd=lwd[4]) + lines(tfr.object$tfr_quantile$year, as.data.frame(tfr.object$tfr_quantile)[, length(pi)+1-i], type='l', col=col_unc, lty=pi.lty[i], lwd=lwd[4]) + lines(tfr.object$tfr_quantile$year, as.data.frame(tfr.object$tfr_quantile)[, length(pi)+1+i], type='l', col=col_unc, lty=pi.lty[i], lwd=lwd[4]) } } } legend <- c() cols <- c() lwds <- c() - if(!adjusted.only) { # plot unadjusted median - bhm.median <- get.median.from.prediction(tfr.pred, country$index, country$code, adjusted=FALSE) - lines(x2, bhm.median, type='l', col=col[3], lwd=lwd[3], lty=max(lty)+1) - legend <- c(legend, 'BHM median') + if(!adjusted.only) { # plot unadjusted median & mean + if(main.proj.name == "mean"){ + bhm.main <- get.mean.from.prediction(tfr.pred, country$index, country$code, adjusted=FALSE) + bhm.main.name <- 'BHM mean' + } else { + bhm.main <- get.median.from.prediction(tfr.pred, country$index, country$code, adjusted=FALSE) + bhm.main.name <- 'BHM median' + } + lines(x2, bhm.main, type='l', col=col[3], lwd=lwd[3], lty=max.lty+1) + legend <- c(legend, bhm.main.name) + cols <- c(cols, col[3]) + lwds <- c(lwds, lwd[3]) + lty <- c(max.lty+1, lty) + max.lty <- max(lty) + } + main.legend <- "" + if(main.proj.name != ""){ + main.legend <- if(adjusted.only) main.proj.name else paste('adj.', main.proj.name) + legend <- c(legend, main.legend) cols <- c(cols, col[3]) lwds <- c(lwds, lwd[3]) - lty <- c(max(lty)+1, lty) } - median.legend <- if(adjusted.only) 'median' else 'adj. median' - legend <- c(legend, median.legend, if(length(pi) > 0) paste0(pi, '% PI') else c()) - cols <- c(cols, col[3], rep(col[4], length(pi))) - lwds <- c(lwds, lwd[3], rep(lwd[4], length(pi))) - if(show.mean){ - # plot mean - tfr.mean <- get.mean.from.prediction(tfr.pred, country$index, country$code) + legend <- c(legend, if(length(pi) > 0) paste0(pi, '% PI') else c()) + cols <- c(cols, rep(col[4], length(pi))) + lwds <- c(lwds, rep(lwd[4], length(pi))) + if(show.median && show.mean){ + # plot mean in addition to median lines(x2, tfr.mean, type='l', col=col[3], lwd=1, lty=max(lty)+1) legend <- c(legend, 'mean') cols <- c(cols, col[3]) lwds <- c(lwds, 1) - lty <- c(max(lty)+1, lty) + lty <- c(lty, max.lty+1) + max.lty <- max(lty) } - if (half.child.variant) { - lty <- c(lty, max(lty)+1) + if (half.child.variant && !is.null(tfr.main.proj)) { + lty <- c(lty, max.lty+1) + max.lty <- max(lty) llty <- length(lty) - up.low <- get.half.child.variant(median=tfr.median) + up.low <- get.half.child.variant(median=tfr.main.proj) if(uncertainty) { up.low <- up.low[,-1] x2t <- x2[-1] } else x2t <- x2 lines(x2t, up.low[1,], type='l', col=col[5], lty=lty[llty], lwd=lwd[5]) lines(x2t, up.low[2,], type='l', col=col[5], lty=lty[llty], lwd=lwd[5]) - legend <- c(legend, '+/- 0.5 child') + legend <- c(legend, paste(main.proj.name, '+/- 0.5 child')) cols <- c(cols, col[5]) lwds <- c(lwds, lwd[5]) } diff --git a/man/plot-trajectories.Rd b/man/plot-trajectories.Rd index e3732ade..ff14bf1e 100644 --- a/man/plot-trajectories.Rd +++ b/man/plot-trajectories.Rd @@ -14,7 +14,7 @@ tfr.trajectories.plot(tfr.pred, country, pi = c(80, 95), half.child.variant = TRUE, nr.traj = NULL, adjusted.only = TRUE, typical.trajectory = FALSE, mark.estimation.points = FALSE, - traj.index = NULL, show.mean = FALSE, + traj.index = NULL, show.mean = FALSE, show.median = TRUE, xlim = NULL, ylim = NULL, type = 'b', xlab = 'Year', ylab = 'TFR', main = NULL, lwd = c(2, 2, 2, 2, 2, 1), col=c('black', 'green', 'red', 'red', 'blue', '#00000020'), @@ -39,7 +39,7 @@ tfr.trajectories.table(tfr.pred, country, pi = c(80, 95), \item{typical.trajectory}{Logical. If \code{TRUE} one trajectory is shown that has the smallest distance to the median.} \item{mark.estimation.points}{Logical. If \code{TRUE}, points that were not used in the estimation (phase I) are shown in lighter color than points in phase II and III.} \item{traj.index}{Vector of trajectory indices to show. If not given, the trajectories are selected using equidistant spacing.} - \item{show.mean}{Logical indicating if the mean of the distribution should be shown.} + \item{show.mean, show.median}{Logical indicating if the mean or/and the median of the distribution should be shown.} \item{xlim, ylim, type, xlab, ylab}{Graphical parameters passed to the \code{plot} function.} \item{lwd, col}{Vector of six elements giving the line width and color for: 1. observed data, 2. imputed missing data, 3. median, 4. quantiles, 5. half-child variant, 6. trajectories.} \item{show.legend}{Logical controlling whether the legend should be drawn.} From 6898f9993662e424eda7b9f6c3caaca75f843070 Mon Sep 17 00:00:00 2001 From: Hana Sevcikova Date: Wed, 11 Sep 2024 14:31:52 -0700 Subject: [PATCH 39/48] added include_2024 --- ChangeLog | 4 +++- DESCRIPTION | 4 ++-- R/plot_functions.R | 1 - data/include_2024.R | 6 ++++++ 4 files changed, 11 insertions(+), 4 deletions(-) create mode 100644 data/include_2024.R diff --git a/ChangeLog b/ChangeLog index 7ccef417..2e06dfd9 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,7 @@ -7.4-2.90xx (2024-06-20) +7.4-2.90xx (2024-09-11) ----- +Added dataset include_2024. + Fixed bug related to a combination of ar.phase2 and missing data (thanks to Shunqi Cheng). Added the mean variant into summary outputs. diff --git a/DESCRIPTION b/DESCRIPTION index 4a426e70..d77a2111 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: bayesTFR -Version: 7.4-2.9003 -Date: 2024-06-05 +Version: 7.4-2.9004 +Date: 2024-09-11 Title: Bayesian Fertility Projection Author: Hana Sevcikova (hanas@uw.edu), Leontine Alkema (alkema@nus.edu.sg), Peiran Liu (prliu@uw.edu), Adrian Raftery (raftery@uw.edu), Bailey Fosdick (bfosdick@uw.edu), Patrick Gerland (gerland@un.org) Maintainer: Hana Sevcikova diff --git a/R/plot_functions.R b/R/plot_functions.R index a155f7a1..3cc32148 100644 --- a/R/plot_functions.R +++ b/R/plot_functions.R @@ -746,7 +746,6 @@ tfr.trajectories.plot <- function(tfr.pred, country, pi=c(80, 95), lty <- c(max.lty+1, lty) max.lty <- max(lty) } - main.legend <- "" if(main.proj.name != ""){ main.legend <- if(adjusted.only) main.proj.name else paste('adj.', main.proj.name) legend <- c(legend, main.legend) diff --git a/data/include_2024.R b/data/include_2024.R new file mode 100644 index 00000000..a6af5cc8 --- /dev/null +++ b/data/include_2024.R @@ -0,0 +1,6 @@ +include_2024 <- local({ + e <- new.env() + load("include_2022.rda", envir= e) + e$include_2022 +}) + From f1ab941aa518b1e42479841bc1d1df6265b64da9 Mon Sep 17 00:00:00 2001 From: Hana Sevcikova Date: Wed, 25 Sep 2024 16:24:08 -0700 Subject: [PATCH 40/48] fix setting lambda if last observed < T --- DESCRIPTION | 4 ++-- R/mcmc_ini.R | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index d77a2111..49d5353d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: bayesTFR -Version: 7.4-2.9004 -Date: 2024-09-11 +Version: 7.4-2.9005 +Date: 2024-09-25 Title: Bayesian Fertility Projection Author: Hana Sevcikova (hanas@uw.edu), Leontine Alkema (alkema@nus.edu.sg), Peiran Liu (prliu@uw.edu), Adrian Raftery (raftery@uw.edu), Bailey Fosdick (bfosdick@uw.edu), Patrick Gerland (gerland@un.org) Maintainer: Hana Sevcikova diff --git a/R/mcmc_ini.R b/R/mcmc_ini.R index f06719bf..d1ae8d34 100755 --- a/R/mcmc_ini.R +++ b/R/mcmc_ini.R @@ -95,8 +95,8 @@ find.lambda.for.one.country <- function(tfr, T_end, annual = FALSE) { if(annual) # convert lambda from 5-year scale to annual scale { lambda <- min(which(year.bin == lambda) + 2, Tendorig) - if (length(year.bin) - lambda < 5) { # if in the last time period, set it to the end of the period - lambda <- length(year.bin) + if (Tendorig - lambda < 5) { # if in the last (observed) time period, set it to the end of the period + lambda <- Tendorig while(is.na(tfrorig[lambda])) lambda <- lambda - 1 # move it before the last NA if any } } From ff61dd613357185937f676f43200bb641bfdb8dc Mon Sep 17 00:00:00 2001 From: Hana Sevcikova Date: Thu, 26 Sep 2024 10:35:45 -0700 Subject: [PATCH 41/48] removed unnecessary while loop --- DESCRIPTION | 4 ++-- R/mcmc_ini.R | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 49d5353d..b94dacd0 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: bayesTFR -Version: 7.4-2.9005 -Date: 2024-09-25 +Version: 7.4-2.9006 +Date: 2024-09-26 Title: Bayesian Fertility Projection Author: Hana Sevcikova (hanas@uw.edu), Leontine Alkema (alkema@nus.edu.sg), Peiran Liu (prliu@uw.edu), Adrian Raftery (raftery@uw.edu), Bailey Fosdick (bfosdick@uw.edu), Patrick Gerland (gerland@un.org) Maintainer: Hana Sevcikova diff --git a/R/mcmc_ini.R b/R/mcmc_ini.R index d1ae8d34..e4ec7a00 100755 --- a/R/mcmc_ini.R +++ b/R/mcmc_ini.R @@ -97,7 +97,7 @@ find.lambda.for.one.country <- function(tfr, T_end, annual = FALSE) { lambda <- min(which(year.bin == lambda) + 2, Tendorig) if (Tendorig - lambda < 5) { # if in the last (observed) time period, set it to the end of the period lambda <- Tendorig - while(is.na(tfrorig[lambda])) lambda <- lambda - 1 # move it before the last NA if any + #while(is.na(tfrorig[lambda])) lambda <- lambda - 1 # move it before the last NA if any - not needed anymore } } From 867d2e6843d5c2591302a5d27eb0df4551b54737 Mon Sep 17 00:00:00 2001 From: Hana Sevcikova Date: Thu, 26 Sep 2024 14:59:10 -0700 Subject: [PATCH 42/48] adjust mean --- R/tfr_median_set_new.R | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/R/tfr_median_set_new.R b/R/tfr_median_set_new.R index 755922ed..f10f8e43 100644 --- a/R/tfr_median_set_new.R +++ b/R/tfr_median_set_new.R @@ -167,7 +167,7 @@ tfr.median.reset.estimation <- function(sim.dir, countries = NULL) invisible(output) } -.do.shift.prediction.to.wpp <- function(pred, wpp.dataset, wpp.year = NULL, verbose = TRUE){ +.do.shift.prediction.to.wpp <- function(pred, wpp.dataset, stat = "median", wpp.year = NULL, verbose = TRUE){ country_code <- NULL # for CRAN check not to complain meta <- pred$mcmc.set$meta wpp.year <- if(!is.null(wpp.year)) wpp.year else meta$wpp.year @@ -192,8 +192,10 @@ tfr.median.reset.estimation <- function(sim.dir, countries = NULL) } } cntry <- countries[icntry] + if(!stat %in% c("median", "mean")) stop("Argument 'stat' must be 'median' or 'mean', but is ", stat, ".") to.match <- merge(data.table::data.table(year = pred.years, median = pred$quantiles[icntry, "0.5", ]), wppdatal[country_code == cntry], by = "year", all.x = TRUE) + if(stat == "mean") to.match[["median"]] <- to.match[["median"]] + pred$traj.mean.sd[icntry, 1, ] - to.match[["median"]] # difference between the mean and median to.match$wpp[is.na(to.match$wpp)] <- to.match$median[is.na(to.match$wpp)] # no shift for years that don't match to.match$shift <- to.match$wpp - to.match$median if(sum(to.match$shift) != 0) From 882c7549300909b86e639cd304b761eb07d2a0c2 Mon Sep 17 00:00:00 2001 From: Hana Sevcikova Date: Thu, 26 Sep 2024 16:26:27 -0700 Subject: [PATCH 43/48] version updated --- ChangeLog | 4 +++- DESCRIPTION | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/ChangeLog b/ChangeLog index 2e06dfd9..b409f744 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,7 @@ -7.4-2.90xx (2024-09-11) +7.4-2.90xx (2024-09-26) ----- +Added option of shifting projection to wpp by matching means. + Added dataset include_2024. Fixed bug related to a combination of ar.phase2 and missing data (thanks to Shunqi Cheng). diff --git a/DESCRIPTION b/DESCRIPTION index b94dacd0..7ee45681 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: bayesTFR -Version: 7.4-2.9006 +Version: 7.4-2.9007 Date: 2024-09-26 Title: Bayesian Fertility Projection Author: Hana Sevcikova (hanas@uw.edu), Leontine Alkema (alkema@nus.edu.sg), Peiran Liu (prliu@uw.edu), Adrian Raftery (raftery@uw.edu), Bailey Fosdick (bfosdick@uw.edu), Patrick Gerland (gerland@un.org) From 63c877b5cd7040819b57f1b0674fea084888a248 Mon Sep 17 00:00:00 2001 From: Hana Sevcikova Date: Fri, 27 Sep 2024 12:07:31 -0700 Subject: [PATCH 44/48] allow to have multiple predictions in one sim.dir --- NAMESPACE | 1 + R/get_outputs.R | 23 ++++++++++++++---- R/predict_tfr.R | 53 ++++++++++++++++++++++-------------------- R/predict_tfr_subnat.R | 4 ++-- R/tfr_median_set_new.R | 51 ++++++++++++++++++++++------------------ 5 files changed, 77 insertions(+), 55 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 4977dc3c..ce920a48 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -49,6 +49,7 @@ export( get.thinned.tfr.mcmc, get.tfr.prediction, has.tfr.prediction, + available.tfr.predictions, get.tfr.convergence.all, get.tfr.convergence, get.tfr3.convergence.all, diff --git a/R/get_outputs.R b/R/get_outputs.R index 1c998703..34306f13 100644 --- a/R/get_outputs.R +++ b/R/get_outputs.R @@ -222,14 +222,23 @@ create.thinned.tfr.mcmc <- function(mcmc.set, thin=1, burnin=0, output.dir=NULL, return(thin.mcmc.set) } -has.tfr.prediction <- function(mcmc=NULL, sim.dir=NULL) { +has.tfr.prediction <- function(mcmc=NULL, sim.dir=NULL, subdir = "predictions") { if (!is.null(mcmc)) sim.dir <- if(is.character(mcmc)) mcmc else mcmc$meta$output.dir if (is.null(sim.dir)) stop('Either mcmc or directory must be given.') - if(file.exists(file.path(sim.dir, 'predictions', 'prediction.rda'))) return(TRUE) + if(file.exists(file.path(sim.dir, subdir, 'prediction.rda'))) return(TRUE) return(FALSE) } -get.tfr.prediction <- function(mcmc=NULL, sim.dir=NULL, mcmc.dir=NULL) { +available.tfr.predictions <- function(mcmc=NULL, sim.dir=NULL, full.names = FALSE){ + if (!is.null(mcmc)) sim.dir <- if(is.character(mcmc)) mcmc else mcmc$meta$output.dir + if (is.null(sim.dir)) stop('Either mcmc or directory must be given.') + all.dirs <- list.dirs(sim.dir, recursive = FALSE, full.names = TRUE) + pred.dirs <- names(unlist(sapply(all.dirs, function(x) list.files(x, pattern = "^prediction.rda$")))) + if(length(pred.dirs) == 0 || full.names == TRUE) return(pred.dirs) + return(sapply(pred.dirs, basename, USE.NAMES = FALSE)) +} + +get.tfr.prediction <- function(mcmc=NULL, sim.dir=NULL, mcmc.dir=NULL, subdir = "predictions") { ############ # Returns an object of class bayesTFR.prediction # Set mcmc.dir to NA, if the prediction object should not have a pointer @@ -238,11 +247,15 @@ get.tfr.prediction <- function(mcmc=NULL, sim.dir=NULL, mcmc.dir=NULL) { if (!is.null(mcmc)) sim.dir <- if(is.character(mcmc)) mcmc else mcmc$meta$output.dir if (is.null(sim.dir)) stop('Either mcmc or directory must be given.') - output.dir <- file.path(sim.dir, 'predictions') + output.dir <- file.path(sim.dir, subdir) pred.file <- file.path(output.dir, 'prediction.rda') if(!file.exists(pred.file)) { warning('File ', pred.file, ' does not exist.') - return(NULL) + if(length((alt.preds <- available.tfr.predictions(sim.dir = sim.dir))) > 0){ + output.dir <- file.path(sim.dir, alt.preds[1]) + pred.file <- file.path(output.dir, 'prediction.rda') + warning('Extracting predictions from ', alt.preds[1]) + } else return(NULL) } load(file=pred.file) bayesTFR.prediction$output.directory <- normalizePath(output.dir) diff --git a/R/predict_tfr.R b/R/predict_tfr.R index 51ab44c7..b0095d8f 100644 --- a/R/predict_tfr.R +++ b/R/predict_tfr.R @@ -6,7 +6,7 @@ tfr.predict <- function(mcmc.set=NULL, end.year=2100, use.tfr3=TRUE, burnin3=2000, mu=2.1, rho=0.8859, sigmaAR1=0.1016, min.tfr=0.5, use.correlation=FALSE, - save.as.ascii=0, output.dir = NULL, + save.as.ascii=0, output.dir = NULL, subdir = "predictions", low.memory=TRUE, seed=NULL, verbose=TRUE, uncertainty=FALSE, ...) { if(!is.null(mcmc.set)) { @@ -55,7 +55,7 @@ tfr.predict <- function(mcmc.set=NULL, end.year=2100, start.year=start.year, nr.traj=nr.traj, burnin=burnin, thin=thin, use.tfr3=has.phase3, burnin3=burnin3, mu=mu, rho=rho, sigmaAR1 = sigmaAR1, min.tfr=min.tfr, use.correlation=use.correlation, save.as.ascii=save.as.ascii, - output.dir=output.dir, verbose=verbose, uncertainty=uncertainty, ...)) + output.dir=output.dir, subdir = subdir, verbose=verbose, uncertainty=uncertainty, ...)) } .find.burnin.nr.traj.from.diag <- function(diag.list, verbose = FALSE) { @@ -88,7 +88,8 @@ get.burnin.nrtraj.from.diagnostics <- function(sim.dir, ...) { } tfr.predict.extra <- function(sim.dir=file.path(getwd(), 'bayesTFR.output'), - prediction.dir=sim.dir, countries = NULL, save.as.ascii=0, verbose=TRUE, uncertainty=FALSE, + prediction.dir=sim.dir, subdir = "predictions", countries = NULL, save.as.ascii=0, + verbose=TRUE, uncertainty=FALSE, all.countries.required = TRUE, use.correlation = NULL) { # Run prediction for given countries/regions (as codes). If they are not given it will be set to countries # for which there are MCMC results but no prediction. @@ -96,9 +97,9 @@ tfr.predict.extra <- function(sim.dir=file.path(getwd(), 'bayesTFR.output'), mcmc.set <- get.tfr.mcmc(sim.dir) if(is.null(mcmc.set)) stop('Error in "sim.dir" argument.') - pred <- get.tfr.prediction(sim.dir=prediction.dir) + pred <- get.tfr.prediction(sim.dir=prediction.dir, subdir = subdir) if(is.null(pred)) - stop('Error in "prediction.dir" argument.') + stop('Error in "sim.dir", "prediction.dir" or/and "subdir" argument. Use available.tfr.predictions() to check on valid predictions directories.') if(length(setdiff(pred$mcmc.set$meta$regions$country_code, mcmc.set$meta$regions$country_code)) > 0) stop('Prediction is inconsistent with the mcmc results. Use tfr.predict.') if(is.null(countries)) { @@ -125,7 +126,7 @@ tfr.predict.extra <- function(sim.dir=file.path(getwd(), 'bayesTFR.output'), use.correlation=use.correlation, mu=pred$mu, rho=pred$rho, sigmaAR1=pred$sigmaAR1, min.tfr=pred$min.tfr, countries=countries.idx, save.as.ascii=0, output.dir=prediction.dir, - force.creating.thinned.mcmc=TRUE, + subdir = subdir, force.creating.thinned.mcmc=TRUE, write.summary.files=FALSE, write.trajectories=TRUE, verbose=verbose, uncertainty=uncertainty, all.countries.required = all.countries.required) @@ -173,7 +174,8 @@ make.tfr.prediction <- function(mcmc.set, start.year=NULL, end.year=2100, replac use.correlation=FALSE, countries = NULL, adj.factor1=NA, adj.factor2=0, forceAR1=FALSE, boost.first.period.in.phase2=TRUE, - save.as.ascii=0, output.dir = NULL, write.summary.files=TRUE, + save.as.ascii=0, output.dir = NULL, subdir = "predictions", + write.summary.files=TRUE, is.mcmc.set.thinned=FALSE, force.creating.thinned.mcmc=FALSE, write.trajectories=TRUE, verbose=verbose, uncertainty=FALSE, all.countries.required = TRUE){ @@ -213,10 +215,10 @@ make.tfr.prediction <- function(mcmc.set, start.year=NULL, end.year=2100, replac #setup output directory if (is.null(output.dir)) output.dir <- meta$output.dir - outdir <- file.path(output.dir, 'predictions') + outdir <- file.path(output.dir, subdir) if(is.null(countries)) { - if(!replace.output && has.tfr.prediction(sim.dir=output.dir)) + if(!replace.output && has.tfr.prediction(sim.dir=output.dir, subdir = subdir)) stop('Prediction in ', outdir, ' already exists.\nSet replace.output=TRUE if you want to overwrite existing projections.') unlink(outdir, recursive=TRUE) @@ -1005,11 +1007,11 @@ do.convert.trajectories <- function(pred, n, output.dir, countries=NULL, verbose convert.tfr.trajectories <- function(dir=file.path(getwd(), 'bayesTFR.output'), - n=1000, output.dir=NULL, + n=1000, subdir = "predictions", output.dir=NULL, verbose=FALSE) { # Converts all trajectory rda files into UN ascii, selecting n trajectories by equal spacing. if(n <= 0) return() - pred <- get.tfr.prediction(sim.dir=dir) + pred <- get.tfr.prediction(sim.dir=dir, subdir = subdir) if (is.null(output.dir)) output.dir <- pred$output.directory if(!file.exists(output.dir)) dir.create(output.dir, recursive=TRUE) cat('Converting trajectories from', dir, '\n') @@ -1028,11 +1030,11 @@ convert.tfr.trajectories <- function(dir=file.path(getwd(), 'bayesTFR.output'), } write.projection.summary <- function(dir=file.path(getwd(), 'bayesTFR.output'), - output.dir=NULL, revision=NULL, adjusted=FALSE, + subdir = "predictions", output.dir=NULL, revision=NULL, adjusted=FALSE, est.uncertainty = FALSE, ...) { # Writes three prediction summary files, one in a user-friendly format, one in a UN-format, # and one parameter file. - pred <- get.tfr.prediction(sim.dir=dir) + pred <- get.tfr.prediction(sim.dir=dir, subdir = subdir) if (is.null(output.dir)) output.dir <- pred$output.directory if(!file.exists(output.dir)) dir.create(output.dir, recursive=TRUE) tfr.write.projection.summary.and.parameters(pred, output.dir, revision=revision, adjusted=adjusted, @@ -1302,19 +1304,19 @@ get.tfr.shift <- function(country.code, pred) { return(pred) } -tfr.median.reset <- function(sim.dir, countries = NULL) { +tfr.median.reset <- function(sim.dir, countries = NULL, ...) { if(is.null(countries)) { - pred <- get.tfr.prediction(sim.dir) + pred <- get.tfr.prediction(sim.dir, ...) pred$median.shift <- NULL store.bayesTFR.prediction(pred) cat('\nMedians for all countries reset.\n') } else - for(country in countries) pred <- tfr.median.shift(sim.dir, country, reset=TRUE) + for(country in countries) pred <- tfr.median.shift(sim.dir, country, reset=TRUE, ...) invisible(pred) } -tfr.median.shift <- function(sim.dir, country, reset=FALSE, shift=0, from=NULL, to=NULL) { - pred <- get.tfr.prediction(sim.dir) +tfr.median.shift <- function(sim.dir, country, reset=FALSE, shift=0, from=NULL, to=NULL, ...) { + pred <- get.tfr.prediction(sim.dir, ...) pred <- .bdem.median.shift(pred, type='tfr', country=country, reset=reset, shift=shift, from=from, to=to) store.bayesTFR.prediction(pred) @@ -1354,16 +1356,17 @@ tfr.median.shift <- function(sim.dir, country, reset=FALSE, shift=0, from=NULL, return(pred) } -tfr.median.set <- function(sim.dir, country, values, years=NULL) { - pred <- get.tfr.prediction(sim.dir) +tfr.median.set <- function(sim.dir, country, values, years=NULL, ...) { + pred <- get.tfr.prediction(sim.dir, ...) pred <- .bdem.median.set(pred, 'tfr', country=country, values=values, years=years) store.bayesTFR.prediction(pred) invisible(pred) } -tfr.median.adjust <- function(sim.dir, countries, factor1=2/3, factor2=1/3, forceAR1=FALSE) { - pred <- get.tfr.prediction(sim.dir) - if (is.null(pred)) stop('No valid prediction in ', sim.dir) +tfr.median.adjust <- function(sim.dir, countries, factor1=2/3, factor2=1/3, forceAR1=FALSE, subdir = "predictions") { + pred <- get.tfr.prediction(sim.dir, subdir = subdir) + if (is.null(pred)) stop('Prediction not found in ', file.path(sim.dir, subdir), + '. Check available.tfr.predictions() and use the subdir argument to set non-standard prediction subdirectory.') mcmc.set <- pred$mcmc.set if(is.null(countries)) { cat('\nNo countries given. Nothing to be done.\n') @@ -1382,7 +1385,7 @@ tfr.median.adjust <- function(sim.dir, countries, factor1=2/3, factor2=1/3, forc use.tfr3=pred$use.tfr3, mcmc3.set=m3.set, burnin3=pred$burnin3, mu=pred$mu, rho=pred$rho, sigmaAR1=pred$sigmaAR1, min.tfr=pred$min.tfr, countries=countries.idx, adj.factor1=factor1, adj.factor2=factor2, - forceAR1=forceAR1, save.as.ascii=0, output.dir=sim.dir, + forceAR1=forceAR1, save.as.ascii=0, output.dir=sim.dir, subdir = subdir, write.summary.files=FALSE, is.mcmc.set.thinned=TRUE, write.trajectories=FALSE, verbose=FALSE) new.means <- new.pred$traj.mean.sd[,1,2:dim(new.pred$traj.mean.sd)[3]] @@ -1390,7 +1393,7 @@ tfr.median.adjust <- function(sim.dir, countries, factor1=2/3, factor2=1/3, forc tfr.median.set(sim.dir, countries[icountry], new.means[get.country.object(countries[icountry], mcmc.set$meta)$index,]) } # reload adjusted prediction - invisible(get.tfr.prediction(sim.dir)) + invisible(get.tfr.prediction(sim.dir, subdir = subdir)) } tfr.correlation <- function(meta, cor.pred=NULL, low.coeffs=c(0.11, 0.26, 0.05, 0.09), diff --git a/R/predict_tfr_subnat.R b/R/predict_tfr_subnat.R index 3d441db8..e83108fc 100644 --- a/R/predict_tfr_subnat.R +++ b/R/predict_tfr_subnat.R @@ -1,11 +1,11 @@ tfr.predict.subnat <- function(countries, my.tfr.file, sim.dir=file.path(getwd(), 'bayesTFR.output'), - end.year=2100, start.year=NULL, output.dir = NULL, annual = NULL, + end.year=2100, start.year=NULL, subdir = "predictions", output.dir = NULL, annual = NULL, nr.traj=NULL, seed = NULL, min.tfr = 0.5, ar.pars = NULL, save.as.ascii = 0, verbose = TRUE) { # Run subnational projections, using the Scale AR(1) model applied to a national bayesTFR simulation # sim.dir is the world-national simulation. Set output.dir to store results somewhere else. - wpred <- get.tfr.prediction(sim.dir) # contains national projections + wpred <- get.tfr.prediction(sim.dir, subdir = subdir) # contains national projections wdata <- wpred$tfr_matrix_reconstructed wmeta <- wpred$mcmc.set$meta if(!is.null(seed)) set.seed(seed) diff --git a/R/tfr_median_set_new.R b/R/tfr_median_set_new.R index f10f8e43..c9ca7318 100644 --- a/R/tfr_median_set_new.R +++ b/R/tfr_median_set_new.R @@ -30,9 +30,9 @@ tfr.shift.estimation.to.wpp <- function(sim.dir, ..., verbose = TRUE){ meta$median.shift.estimation[[as.character(cntry)]] <- merged$shift } store.bayesTFR.meta.object(meta, meta$output.dir) - has.predictions <- has.tfr.prediction(sim.dir = sim.dir) - if(has.predictions) { # need to save this also in the collapsed chain of the prediction - pred <- get.tfr.prediction(sim.dir = sim.dir) + predictions <- available.tfr.predictions(sim.dir = sim.dir) + for(pred.subdir in predictions) { # need to save this also in the collapsed chain of the prediction + pred <- get.tfr.prediction(sim.dir = sim.dir, subdir = pred.subdir) pred$mcmc.set$meta$median.shift.estimation <- meta$median.shift.estimation store.bayesTFR.prediction(pred) store.bayesTFR.meta.object(pred$mcmc.set$meta, pred$mcmc.set$meta$output.dir) @@ -47,12 +47,13 @@ get.tfr.shift.estimation <- function(country.code, meta) return(meta$median.shift.estimation[[as.character(country.code)]]) } -tfr.median.set.all <- function(sim.dir, country, values, years=NULL, burnin = 0, thin = 1) +tfr.median.set.all <- function(sim.dir, country, values, years=NULL, burnin = 0, thin = 1, + subdir = "predictions") { mcmc.set <- get.tfr.mcmc(sim.dir) meta <- mcmc.set$meta country.obj <- get.country.object(country, meta=meta) - has.predictions <- has.tfr.prediction(sim.dir = sim.dir) + has.predictions <- has.tfr.prediction(sim.dir = sim.dir, subdir = subdir) # browser() bdem.shift.estimation <- do.call('get.tfr.shift.estimation', list(country.obj$code, meta)) estimation.years <- as.numeric(dimnames(meta$tfr_matrix)[[1]]) @@ -63,7 +64,7 @@ tfr.median.set.all <- function(sim.dir, country, values, years=NULL, burnin = 0, years.missing <- is.null(years) if (has.predictions) { - pred <- get.tfr.prediction(sim.dir = sim.dir) + pred <- get.tfr.prediction(sim.dir = sim.dir, subdir = subdir) pred.years <- as.numeric(dimnames(pred$quantiles)[[3]]) bdem.shift <- do.call('get.tfr.shift', list(country.obj$code, pred)) nr.proj <- pred$nr.projections + 1 @@ -130,8 +131,10 @@ tfr.median.reset.estimation <- function(sim.dir, countries = NULL) { mcmc.set <- get.tfr.mcmc(sim.dir) meta <- mcmc.set$meta - has.prediction <- has.tfr.prediction(sim.dir = sim.dir) - if (has.prediction) pred <- get.tfr.prediction(sim.dir = sim.dir) + has.prediction <- has.tfr.prediction(sim.dir = sim.dir, subdir = subdir) + #if (has.prediction) pred <- get.tfr.prediction(sim.dir = sim.dir) + predictions <- available.tfr.predictions(sim.dir = sim.dir) + has.estimation <- (!is.null(mcmc.set$mcmc.list[[1]]$uncertainty) && mcmc.set$mcmc.list[[1]]$uncertainty) output <- list() if (has.estimation && !is.null(meta$median.shift.estimation)) @@ -146,23 +149,25 @@ tfr.median.reset.estimation <- function(sim.dir, countries = NULL) } output[['meta']] <- meta store.bayesTFR.meta.object(meta, meta$output.dir) - if (has.prediction){ + for(pred.subdir in predictions) { # need to save this also in the prediction mcmc object + pred <- get.tfr.prediction(sim.dir = sim.dir, subdir = pred.subdir) pred$mcmc.set$meta$median.shift.estimation <- meta$median.shift.estimation store.bayesTFR.prediction(pred) } } - if (has.prediction && !is.null(pred$median.shift)) - { - if(is.null(countries)) pred$median.shift <- NULL # reset all countries - else { - for (country in countries) - { - country.obj <- get.country.object(country, meta=meta) - pred$median.shift[[as.character(country.obj$code)]] <- NULL - } - } - output[['pred']] <- pred - store.bayesTFR.prediction(pred) + for(pred.subdir in predictions) { + pred <- get.tfr.prediction(sim.dir = sim.dir, subdir = pred.subdir) + if(is.null(pred$median.shift)) next + if(is.null(countries)) pred$median.shift <- NULL # reset all countries + else { + for (country in countries) + { + country.obj <- get.country.object(country, meta=meta) + pred$median.shift[[as.character(country.obj$code)]] <- NULL + } + } + output[['pred']] <- pred # this just returns the last prediction object which is not correct if there are more than one prediction directories + store.bayesTFR.prediction(pred) } invisible(output) } @@ -206,8 +211,8 @@ tfr.median.reset.estimation <- function(sim.dir, countries = NULL) } -tfr.shift.prediction.to.wpp <- function(sim.dir, ...){ - pred <- get.tfr.prediction(sim.dir) +tfr.shift.prediction.to.wpp <- function(sim.dir, subdir = "predictions", ...){ + pred <- get.tfr.prediction(sim.dir, subdir = subdir) new.pred <- .do.shift.prediction.to.wpp(pred, wpp.dataset = "tfrprojMed", ...) store.bayesTFR.prediction(new.pred) invisible(new.pred) From 8c8ee15a3e81a5ef476205b550a55d97d0500937 Mon Sep 17 00:00:00 2001 From: Hana Sevcikova Date: Fri, 27 Sep 2024 15:08:39 -0700 Subject: [PATCH 45/48] fixed documentation --- ChangeLog | 3 +++ DESCRIPTION | 4 ++-- R/predict_tfr.R | 2 +- R/tfr_median_set_new.R | 3 --- man/convert.trajectories.Rd | 3 ++- man/get.tfr.prediction.Rd | 21 +++++++++++++++++---- man/include.Rd | 2 ++ man/predict-tfr.Rd | 6 ++++-- man/tfr.median.set.Rd | 16 +++++++++------- man/tfr.median.set.all.Rd | 3 ++- man/tfr.predict.extra.Rd | 3 ++- man/tfr.predict.subnat.Rd | 7 ++++--- man/write.projection.summary.Rd | 4 +++- 13 files changed, 51 insertions(+), 26 deletions(-) diff --git a/ChangeLog b/ChangeLog index b409f744..4d6ad835 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,8 @@ 7.4-2.90xx (2024-09-26) ----- +Added option of keeping multiple predictions directories in one simulation directory +(argument "subdir" added to various functions). + Added option of shifting projection to wpp by matching means. Added dataset include_2024. diff --git a/DESCRIPTION b/DESCRIPTION index 7ee45681..4d8da610 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: bayesTFR -Version: 7.4-2.9007 -Date: 2024-09-26 +Version: 7.4-2.9008 +Date: 2024-09-27 Title: Bayesian Fertility Projection Author: Hana Sevcikova (hanas@uw.edu), Leontine Alkema (alkema@nus.edu.sg), Peiran Liu (prliu@uw.edu), Adrian Raftery (raftery@uw.edu), Bailey Fosdick (bfosdick@uw.edu), Patrick Gerland (gerland@un.org) Maintainer: Hana Sevcikova diff --git a/R/predict_tfr.R b/R/predict_tfr.R index b0095d8f..54d1f9d2 100644 --- a/R/predict_tfr.R +++ b/R/predict_tfr.R @@ -215,7 +215,7 @@ make.tfr.prediction <- function(mcmc.set, start.year=NULL, end.year=2100, replac #setup output directory if (is.null(output.dir)) output.dir <- meta$output.dir - outdir <- file.path(output.dir, subdir) + outdir <- file.path(output.dir, basename(subdir)) if(is.null(countries)) { if(!replace.output && has.tfr.prediction(sim.dir=output.dir, subdir = subdir)) diff --git a/R/tfr_median_set_new.R b/R/tfr_median_set_new.R index c9ca7318..1dd17d5a 100644 --- a/R/tfr_median_set_new.R +++ b/R/tfr_median_set_new.R @@ -131,10 +131,7 @@ tfr.median.reset.estimation <- function(sim.dir, countries = NULL) { mcmc.set <- get.tfr.mcmc(sim.dir) meta <- mcmc.set$meta - has.prediction <- has.tfr.prediction(sim.dir = sim.dir, subdir = subdir) - #if (has.prediction) pred <- get.tfr.prediction(sim.dir = sim.dir) predictions <- available.tfr.predictions(sim.dir = sim.dir) - has.estimation <- (!is.null(mcmc.set$mcmc.list[[1]]$uncertainty) && mcmc.set$mcmc.list[[1]]$uncertainty) output <- list() if (has.estimation && !is.null(meta$median.shift.estimation)) diff --git a/man/convert.trajectories.Rd b/man/convert.trajectories.Rd index 5795d384..52d0d765 100644 --- a/man/convert.trajectories.Rd +++ b/man/convert.trajectories.Rd @@ -9,11 +9,12 @@ Converts TFR trajectories stored in a binary format into two CSV files of a UN-s } \usage{ convert.tfr.trajectories(dir = file.path(getwd(), 'bayesTFR.output'), - n = 1000, output.dir = NULL, verbose = FALSE) + n = 1000, subdir = "predictions", output.dir = NULL, verbose = FALSE) } \arguments{ \item{dir}{Directory containing the prediction object. It should correspond to the \code{output.dir} argument of the \code{\link{tfr.predict}} function.} \item{n}{Number of trajectories to be stored. It can be either a single number or the word \dQuote{all} in which case all trajectories are stored.} + \item{subdir}{Name of subdirectory of \code{dir} containing the prediction.} \item{output.dir}{Directory in which the resulting files will be stored. If \code{NULL} the same directory is used as for the prediction.} \item{verbose}{Logical switching log messages on and off.} } diff --git a/man/get.tfr.prediction.Rd b/man/get.tfr.prediction.Rd index 5bd42d96..da24f52f 100644 --- a/man/get.tfr.prediction.Rd +++ b/man/get.tfr.prediction.Rd @@ -2,30 +2,43 @@ \Rdversion{1.1} \alias{get.tfr.prediction} \alias{has.tfr.prediction} +\alias{available.tfr.predictions} \title{ Accessing a Prediction Object } \description{ -Function \code{get.tfr.prediction} retrieves results of a prediction and creates an object of class \code{\link{bayesTFR.prediction}}. Function \code{has.tfr.prediction} checks an existence of such results. +Function \code{get.tfr.prediction} retrieves results of a prediction and creates an object of class \code{\link{bayesTFR.prediction}}. Function \code{has.tfr.prediction} checks an existence of such results. Function \code{available.tfr.predictions} lists predictions available in the given simulation directory. } \usage{ -get.tfr.prediction(mcmc = NULL, sim.dir = NULL, mcmc.dir = NULL) +get.tfr.prediction(mcmc = NULL, sim.dir = NULL, mcmc.dir = NULL, + subdir = "predictions") -has.tfr.prediction(mcmc = NULL, sim.dir = NULL) +has.tfr.prediction(mcmc = NULL, sim.dir = NULL, subdir = "predictions") + +available.tfr.predictions(mcmc = NULL, sim.dir = NULL, full.names = FALSE) } \arguments{ \item{mcmc}{Object of class \code{\link{bayesTFR.mcmc.set}} used to make the prediction. It must correspond to a Phase II MCMC. If it is \code{NULL}, the prediction is loaded from directory given by \code{sim.dir}.} \item{sim.dir}{Directory where the prediction is stored. It should correspond to the value of the \code{output.dir} argument used in the \code{\link{tfr.predict}} function. Only relevant if \code{mcmc} is \code{NULL}.} \item{mcmc.dir}{Optional argument to be used only in a special case when the mcmc object contained in the prediction object was estimated in different directory than in the one to which it points to (for example due to moving or renaming the original directory). The argument causes that the mcmc is redirected to the given directory. It can be set to \code{NA} if no loading of the mcmc object is desired.} + \item{subdir}{Subdirectory of \code{sim.dir} for this particular prediction.} + \item{full.names}{Logical. If \code{TRUE}, the directory names are given as full paths, otherwise (default) only the base names.} } \details{ -If \code{mcmc} is not \code{NULL}, the search directory is set to \code{mcmc$meta$output.dir}. This approach assumes that the prediction was stored in the same directory as the MCMC simulation, i.e. the \code{output.dir} argument of the \code{\link{tfr.predict}} function was set to \code{NULL}. If it is not the case, the argument \code{mcmc.dir} should be used.} +If \code{mcmc} is not \code{NULL}, the search directory is set to \code{mcmc$meta$output.dir}. This approach assumes that the prediction was stored in the same directory as the MCMC simulation, i.e. the \code{output.dir} argument of the \code{\link{tfr.predict}} function was set to \code{NULL}. If it is not the case, the argument \code{mcmc.dir} should be used. + +Usually, all predictions are stored in the subdirectory \dQuote{predictions} of the simulation directory. If the subdirectory has a different name, the argument \code{subdir} should be used. This allows to keep multiple predictions in one (MCMC) simulation directory. + +The function \code{available.tfr.predictions} can be used to view all available predictions in the simulation directory. +} \value{ Function \code{has.tfr.prediction} returns a logical indicating if a prediction exists for the given \code{mcmc}. + Function \code{available.tfr.predictions} returns a vector of directory names containing TFR predictions. + Function \code{get.tfr.prediction} returns an object of class \code{\link{bayesTFR.prediction}}. } diff --git a/man/include.Rd b/man/include.Rd index 991f181b..6735cbab 100644 --- a/man/include.Rd +++ b/man/include.Rd @@ -6,6 +6,7 @@ \alias{include_2017} \alias{include_2019} \alias{include_2022} +\alias{include_2024} \docType{data} \title{ @@ -15,6 +16,7 @@ Inclusion Codes Data sets containing codes that determine which countries are to be included into a simulation or/and projections. } \usage{ +data(include_2024) data(include_2022) data(include_2019) data(include_2017) diff --git a/man/predict-tfr.Rd b/man/predict-tfr.Rd index 5030f1c7..d850dc45 100644 --- a/man/predict-tfr.Rd +++ b/man/predict-tfr.Rd @@ -17,7 +17,8 @@ tfr.predict(mcmc.set = NULL, end.year = 2100, use.diagnostics = FALSE, use.tfr3 = TRUE, burnin3 = 2000, mu = 2.1, rho = 0.8859, sigmaAR1 = 0.1016, min.tfr = 0.5, use.correlation = FALSE, save.as.ascii = 0, output.dir = NULL, - low.memory = TRUE, seed = NULL, verbose = TRUE, uncertainty = FALSE, \dots) + subdir = "predictions", low.memory = TRUE, seed = NULL, + verbose = TRUE, uncertainty = FALSE, \dots) } \arguments{ @@ -34,6 +35,7 @@ tfr.predict(mcmc.set = NULL, end.year = 2100, \item{burnin3}{Burnin used for phase III MCMCs. Only relevant if \code{use.tfr3} is \code{TRUE}.} \item{save.as.ascii}{Either a number determining how many trajectories should be converted into an ASCII file, or \dQuote{all} in which case all trajectories are converted. It should be set to 0, if no conversion is desired.} \item{output.dir}{Directory into which the resulting prediction object and the trajectories are stored. If it is \code{NULL}, it is set to either \code{sim.dir}, or to \code{output.dir} of \code{mcmc.set$meta} if \code{mcmc.set} is given.} + \item{subdir}{Subdirectory of \code{output.dir} to store the predictions. It is defined relative to \code{output.dir} and can only have one level.} \item{low.memory}{Logical indicating if the prediction should run in a low-memory mode. If it is \code{FALSE}, the whole traces of all parameters, including the burnin, are loaded into memory. Otherwise, burnins are discarded and parameters are loaded as they are needed and are not kept in the memory.} \item{mu}{Long-term mean \eqn{\mu}{mu} in the AR(1) projection model. Only used if \code{use.tfr3} is \code{FALSE}.} \item{rho}{Autoregressive parameter \eqn{\rho}{rho} in AR(1) projection model. If it is \code{NULL} it is estimated from the data. Only used if \code{use.tfr3} is \code{FALSE}.} @@ -52,7 +54,7 @@ If Phase III is projected using a BHM (i.e. if \code{use.tfr3} is \code{TRUE}), The projection is run for all missing values before the present year, if any. Medians over the trajectories are used as imputed values and the trajectories are discarded. The process then continues by projecting the future values where all generated trajectories are kept. -The resulting prediction object is saved into \file{\{output.dir\}/predictions}. Trajectories for all countries are saved into the same directory in a binary format, one file per country. At the end of the projection, if \code{save.as.ascii} is larger than 0, the function converts the given number of trajectories into a CSV file of a UN-specific format. They are selected by equal spacing (see function \code{\link{convert.tfr.trajectories}} for more details on the conversion). In addition, two summary files are created: one in a user-friendly format, the other using a UN-specific coding of the variants and time (see \code{\link{write.projection.summary}} for more details). +The resulting prediction object is saved into \file{\{output.dir\}/\{subdir\}}. Trajectories for all countries are saved into the same directory in a binary format, one file per country. At the end of the projection, if \code{save.as.ascii} is larger than 0, the function converts the given number of trajectories into a CSV file of a UN-specific format. They are selected by equal spacing (see function \code{\link{convert.tfr.trajectories}} for more details on the conversion). In addition, two summary files are created: one in a user-friendly format, the other using a UN-specific coding of the variants and time (see \code{\link{write.projection.summary}} for more details). } \value{ diff --git a/man/tfr.median.set.Rd b/man/tfr.median.set.Rd index f0bbdf13..71df3be0 100644 --- a/man/tfr.median.set.Rd +++ b/man/tfr.median.set.Rd @@ -12,16 +12,17 @@ Editing Medians of the Projection These functions are to be used by expert analysts. They allow to change the projection medians either to specific values, including the WPP values, or shift the medians by a given constant, or by a specific adjusting procedure. } \usage{ -tfr.median.set(sim.dir, country, values, years = NULL) +tfr.median.set(sim.dir, country, values, years = NULL, \dots) tfr.median.shift(sim.dir, country, reset = FALSE, shift = 0, - from = NULL, to = NULL) + from = NULL, to = NULL, \dots) -tfr.median.adjust(sim.dir, countries, factor1 = 2/3, factor2 = 1/3, forceAR1 = FALSE) +tfr.median.adjust(sim.dir, countries, factor1 = 2/3, factor2 = 1/3, forceAR1 = FALSE, + subdir = "predictions") -tfr.median.reset(sim.dir, countries = NULL) +tfr.median.reset(sim.dir, countries = NULL, \dots) -tfr.shift.prediction.to.wpp(sim.dir, ...) +tfr.shift.prediction.to.wpp(sim.dir, subdir = "predictions", \dots) } \arguments{ \item{sim.dir}{Directory containing the prediction object.} @@ -36,13 +37,14 @@ tfr.shift.prediction.to.wpp(sim.dir, ...) \item{to}{Year until which the offset/reset should be done. By default, it is set to the last prediction period.} \item{factor1, factor2}{Adjusting factors for the first and second projection period, respectively (see below).} \item{forceAR1}{Logical. If \code{TRUE}, the given countries are forced to enter Phase III (i.e. the AR(1) process) in the first projection period.} - \item{\dots}{Additional arguments passed to the underlying adjustment function. It can be \code{verbose} to show/hide the progress of the adjustment and \code{wpp.year} to adjust it to if it differs from the wpp year of the simulation.} + \item{subdir}{Subdirectory of \code{sim.dir} containing the predictions.} + \item{\dots}{Additional arguments passed to the underlying adjustment function. For \code{tfr.shift.prediction.to.wpp} it can be \code{stat} with values \dQuote{median} (default) or \dQuote{mean} to specify which statistics should be adjusted; \code{verbose} to show/hide the progress of the adjustment and \code{wpp.year} to adjust it to if it differs from the wpp year of the simulation. For the other functions it can be \code{subdir} to specify the location of the prediction.} } \details{ The function \code{tfr.median.set} can be used to set the medians of the given country to specific values. Function \code{tfr.median.shift} can be used to offset the medians by a specific constant, or to reset the medians to their original BHM values. Function \code{tfr.median.adjust} runs the prediction procedure for the given countries with an additional decrement in the model in the first two projection periods. In the first projection period it is computed as \code{factor1*S} where \code{S} is a difference between observed decrement and the expected decrement (by the double logistic function) in the last observed period. In the second projection period, in the formula \code{factor1} is replaced by \code{factor2}. If \code{forceAR1} is set to \code{TRUE}, we recommend to set \code{factor1} and \code{factor2} to 0. The function then calls \code{tfr.median.set} in order to store the new median for each country. -Function\code{tfr.shift.prediction.to.wpp} shifts the projected medians so that they correspond to the values found in the \code{tfrprojMed} datasets of the \pkg{wpp} package that either corresponds to the package used for the simulation itself or is given by the \code{wpp.year} argument. If using \pkg{wpp2022}, the dataset name is automatically adjusted depending if it is an annual or a 5-year simulation. +Function\code{tfr.shift.prediction.to.wpp} shifts the projected medians or means (if \code{stat} is \dQuote{mean}), so that they correspond to the values found in the \code{tfrprojMed} datasets of the \pkg{wpp} package that either corresponds to the package used for the simulation itself or is given by the \code{wpp.year} argument. If using \pkg{wpp2022} or higher, the dataset name is automatically adjusted depending if it is an annual or a 5-year simulation. Note that regardless if it is an adjustment of the median or mean, the corresponding offset is always converted to a shift of the median. Function \code{tfr.median.reset} resets medians of the given countries to the original values. By default it deletes adjustments for all countries. diff --git a/man/tfr.median.set.all.Rd b/man/tfr.median.set.all.Rd index 325cbaca..d85a4782 100644 --- a/man/tfr.median.set.all.Rd +++ b/man/tfr.median.set.all.Rd @@ -12,7 +12,7 @@ These functions are to be used by expert analysts. They allow to change the esti } \usage{ tfr.median.set.all(sim.dir, country, values, years = NULL, - burnin = 0, thin = 1) + burnin = 0, thin = 1, subdir = "predictions") tfr.median.reset.estimation(sim.dir, countries = NULL) @@ -27,6 +27,7 @@ tfr.shift.estimation.to.wpp(sim.dir, \dots, verbose = TRUE) \item{years}{Numeric vector giving years which \code{values} correspond to. Ideally it should be of the same length as \code{values}.} \item{burnin}{Burnin to use when computing the estimation median.} \item{thin}{Thinning interval to use when computing the estimation median.} + \item{subdir}{Subdirectory of \code{sim.dir} containing the predictions.} \item{\dots}{Can be used to pass \code{burnin} \code{thin} to the underlying funcions.} \item{verbose}{Logical. If \code{TRUE} a progress of the adjustment is shown.} } diff --git a/man/tfr.predict.extra.Rd b/man/tfr.predict.extra.Rd index c41c84c0..3fc0f880 100644 --- a/man/tfr.predict.extra.Rd +++ b/man/tfr.predict.extra.Rd @@ -10,13 +10,14 @@ Using the posterior parameter samples the function generates posterior trajecto } \usage{ tfr.predict.extra(sim.dir = file.path(getwd(), 'bayesTFR.output'), - prediction.dir = sim.dir, countries = NULL, + prediction.dir = sim.dir, subdir = "predictions", countries = NULL, save.as.ascii = 0, verbose = TRUE, uncertainty=FALSE, all.countries.required = TRUE, use.correlation = NULL) } \arguments{ \item{sim.dir}{Directory with the MCMC simulation results.} \item{prediction.dir}{Directory where the prediction object and the trajectories are stored.} + \item{subdir}{Subdirectory of \code{prediction.dir} containing the predictions.} \item{countries}{Vector of country codes for which the prediction should be made. If it is \code{NULL}, the prediction is run for all countries that are included in the MCMC object but for which no prediction was generated.} \item{save.as.ascii}{Either a number determining how many trajectories should be converted into an ascii file, or \dQuote{all} in which case all trajectories are converted. It should be set to 0, if no conversion is desired. Note that the conversion is done on all countries.} \item{verbose}{Logical switching log messages on and off.} diff --git a/man/tfr.predict.subnat.Rd b/man/tfr.predict.subnat.Rd index 9af7f788..5c5e40b4 100644 --- a/man/tfr.predict.subnat.Rd +++ b/man/tfr.predict.subnat.Rd @@ -10,9 +10,9 @@ Generates posterior trajectories of the total fertility rate for subregions of g \usage{ tfr.predict.subnat(countries, my.tfr.file, sim.dir = file.path(getwd(), "bayesTFR.output"), - end.year = 2100, start.year = NULL, output.dir = NULL, - annual = NULL, nr.traj = NULL, seed = NULL, min.tfr = 0.5, - ar.pars = NULL, save.as.ascii = 0, verbose = TRUE) + end.year = 2100, start.year = NULL, subdir = "predictions", + output.dir = NULL, annual = NULL, nr.traj = NULL, seed = NULL, + min.tfr = 0.5, ar.pars = NULL, save.as.ascii = 0, verbose = TRUE) } \arguments{ @@ -21,6 +21,7 @@ tfr.predict.subnat(countries, my.tfr.file, \item{sim.dir}{Simulation directory with the national projections generated using \code{\link{tfr.predict}}.} \item{end.year}{End year of the projections.} \item{start.year}{Start year of the projections. By default, projections start at the same time point as the national projections.} + \item{subdir}{Subdirectory of \code{sim.dir} containing the national predictions.} \item{output.dir}{Directory into which the resulting prediction objects and the trajectories are stored. See below for details.} \item{annual}{Logical indicating if the subnational projection should be on an annual scale or a 5-year scale. By default, the scale is matched to the national simulation. If the subnational and national scales are not the same, the national trajectories are either interpolated (if \code{annual = TRUE} and the national simulation is not annual) or averaged to 5-year values (if \code{annual = FALSE} and the national simulation is annual).} diff --git a/man/write.projection.summary.Rd b/man/write.projection.summary.Rd index 4900bb17..c90e3ac1 100644 --- a/man/write.projection.summary.Rd +++ b/man/write.projection.summary.Rd @@ -10,10 +10,12 @@ The function creates two files containing projection summaries, such as the medi } \usage{ write.projection.summary(dir = file.path(getwd(), "bayesTFR.output"), - output.dir = NULL, revision = NULL, adjusted = FALSE, est.uncertainty = FALSE, \dots) + subdir = "predictions", output.dir = NULL, revision = NULL, adjusted = FALSE, + est.uncertainty = FALSE, \dots) } \arguments{ \item{dir}{Directory containing the prediction object. It should correspond to the \code{output.dir} argument of the \code{\link{tfr.predict}} function.} + \item{subdir}{Subdirectory of \code{dir} containing the predictions.} \item{output.dir}{Directory in which the resulting file will be stored. If \code{NULL} the same directory is used as for the prediction.} \item{revision}{UN WPP revision number. If \code{NULL} it is determined from the corresponding WPP year: WPP 2008 corresponds to revision 13, every subsequent WPP increases the revision number by one. Used as a constant in the second file only.} \item{adjusted}{Logical. By default the function writes summary using the original BHM projections. If the projection medians are adjusted (using e.g. \code{\link{tfr.median.set}}), setting this argument to \code{TRUE} causes writing the adjusted projections.} From f01d309e20899b6d660539039cce8adf67a7fa00 Mon Sep 17 00:00:00 2001 From: Hana Sevcikova Date: Fri, 25 Oct 2024 17:39:34 -0700 Subject: [PATCH 46/48] version 7.4-3 --- DESCRIPTION | 29 +++++++++++++++++++++++++---- tests/run_tests.R | 4 ++-- 2 files changed, 27 insertions(+), 6 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 4d8da610..8ae96cd8 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,9 +1,30 @@ Package: bayesTFR -Version: 7.4-2.9008 -Date: 2024-09-27 +Version: 7.4-3 +Date: 2024-10-25 Title: Bayesian Fertility Projection -Author: Hana Sevcikova (hanas@uw.edu), Leontine Alkema (alkema@nus.edu.sg), Peiran Liu (prliu@uw.edu), Adrian Raftery (raftery@uw.edu), Bailey Fosdick (bfosdick@uw.edu), Patrick Gerland (gerland@un.org) -Maintainer: Hana Sevcikova +Authors@R: c(person(given = "Hana", + family = "Sevcikova", + role = c("cre", "aut"), + email = "hanas@uw.edu"), + person(given = "Leontine", + family = "Alkema", + role = "aut"), + person(given = "Peiran", + family = "Liu", + role = "aut"), + person(given = "Adrian", + family = "Raftery", + role = "aut", + email = "raftery@uw.edu"), + person(given = "Bailey", + family = "Fosdick", + role = "aut"), + person(given = "Patrick", + family = "Gerland", + role = "aut", + email = "gerland@un.org" + ) + ) Depends: R (>= 3.5.0) Imports: mvtnorm, MASS, coda, graphics, grDevices, stats, utils, wpp2019, data.table, lifecycle Suggests: rworldmap, snowFT, googleVis, sp, wpp2017, wpp2015, wpp2012, wpp2010, ggplot2, sf, spData, scales diff --git a/tests/run_tests.R b/tests/run_tests.R index 6250a03a..7daa33cc 100644 --- a/tests/run_tests.R +++ b/tests/run_tests.R @@ -28,13 +28,13 @@ if(!cran) { test.plot.all() test.reproduce.simulation() test.subnational.predictions() - for (wpp in c(2019, 2022)){ + for (wpp in c(2019, 2024)){ test.run.mcmc.simulation(wpp.year = wpp) test.run.mcmc.simulation.with.uncertainty(wpp.year = wpp) test.thinned.simulation(wpp.year = wpp) test.run.annual.simulation(wpp.year = wpp) } - for(wpp in rev(c(2010, 2012, 2015, 2017, 2022))) { # these are either suggested packages or not on CRAN + for(wpp in rev(c(2010, 2012, 2015, 2017, 2024))) { # these are either suggested packages or not on CRAN test.load.UNtfr(wpp) test.load.UNlocations(wpp) test.create.tfr.matrix(wpp) From d8144452e196cfed494e85a79c5032718ba1a220 Mon Sep 17 00:00:00 2001 From: Hana Sevcikova Date: Thu, 31 Oct 2024 17:50:43 -0700 Subject: [PATCH 47/48] minor print statement change --- DESCRIPTION | 4 ++-- R/predict_tfr.R | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 8ae96cd8..d11b493b 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: bayesTFR -Version: 7.4-3 -Date: 2024-10-25 +Version: 7.4-3.9001 +Date: 2024-10-31 Title: Bayesian Fertility Projection Authors@R: c(person(given = "Hana", family = "Sevcikova", diff --git a/R/predict_tfr.R b/R/predict_tfr.R index 54d1f9d2..2c0f755a 100644 --- a/R/predict_tfr.R +++ b/R/predict_tfr.R @@ -1014,7 +1014,7 @@ convert.tfr.trajectories <- function(dir=file.path(getwd(), 'bayesTFR.output'), pred <- get.tfr.prediction(sim.dir=dir, subdir = subdir) if (is.null(output.dir)) output.dir <- pred$output.directory if(!file.exists(output.dir)) dir.create(output.dir, recursive=TRUE) - cat('Converting trajectories from', dir, '\n') + cat('Converting trajectories from', file.path(dir, subdir), '\n') if (is.null(pred$na.index)) { if(verbose) cat('Finding NA values in each country ...\n') for (country in 1:pred$mcmc.set$meta$nr_countries) { From 9c7116a00e8f6cb59956c859bee6659b65a574a4 Mon Sep 17 00:00:00 2001 From: Hana Sevcikova Date: Fri, 1 Nov 2024 10:58:12 -0700 Subject: [PATCH 48/48] two countries re-classified as small --- ChangeLog | 6 +- DESCRIPTION | 4 +- data-raw/create_includes.R | 3 + data-raw/include_2024.txt | 287 +++++++++++++++++++++++++++++++++++++ data/include_2024.R | 6 - data/include_2024.rda | Bin 0 -> 818 bytes 6 files changed, 297 insertions(+), 9 deletions(-) create mode 100644 data-raw/include_2024.txt delete mode 100644 data/include_2024.R create mode 100644 data/include_2024.rda diff --git a/ChangeLog b/ChangeLog index 4d6ad835..2edf0427 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,8 @@ -7.4-2.90xx (2024-09-26) +7.4-4 (2024-11-01) +----- +Two countries added to small countries in include_2024. + +7.4-3 (2024-10-31) ----- Added option of keeping multiple predictions directories in one simulation directory (argument "subdir" added to various functions). diff --git a/DESCRIPTION b/DESCRIPTION index d11b493b..2160ce19 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: bayesTFR -Version: 7.4-3.9001 -Date: 2024-10-31 +Version: 7.4-4 +Date: 2024-11-01 Title: Bayesian Fertility Projection Authors@R: c(person(given = "Hana", family = "Sevcikova", diff --git a/data-raw/create_includes.R b/data-raw/create_includes.R index 72362bdf..7d51eb96 100644 --- a/data-raw/create_includes.R +++ b/data-raw/create_includes.R @@ -3,6 +3,9 @@ library(usethis) # Run this from the data-raw directory +include_2024 <- as.data.frame(fread("include_2024.txt", sep = "\t")[, .(country_code, include_code)]) +use_data(include_2024, overwrite = TRUE) + include_2022 <- as.data.frame(fread("include_2022.txt", sep = "\t")[, .(country_code, include_code)]) use_data(include_2022, overwrite = TRUE) diff --git a/data-raw/include_2024.txt b/data-raw/include_2024.txt new file mode 100644 index 00000000..5bdb28ac --- /dev/null +++ b/data-raw/include_2024.txt @@ -0,0 +1,287 @@ +country_code name include_code +900 World 0 +1834 Sub-Saharan Africa 0 +1833 Northern Africa and Western Asia 0 +1831 Central and Southern Asia 0 +1832 Eastern and South-Eastern Asia 0 +1830 Latin America and the Caribbean 0 +1835 Oceania (excluding Australia and New Zealand) 0 +1836 Australia/New Zealand 0 +1829 Europe and Northern America 0 +901 More developed regions 0 +902 Less developed regions 0 +941 Least developed countries 0 +934 Less developed regions, excluding least developed countries 0 +948 Less developed regions, excluding China 0 +1636 Land-locked Developing Countries (LLDC) 0 +1637 Small Island Developing States (SIDS) 0 +1503 High-income countries 0 +1517 Middle-income countries 0 +1502 Upper-middle-income countries 0 +1501 Lower-middle-income countries 0 +1500 Low-income countries 0 +1518 No income group available 0 +903 Africa 0 +910 Eastern Africa 0 +108 Burundi 2 +174 Comoros 2 +262 Djibouti 2 +232 Eritrea 2 +231 Ethiopia 2 +404 Kenya 2 +450 Madagascar 2 +454 Malawi 2 +480 Mauritius 2 +175 Mayotte 2 +508 Mozambique 2 +638 Reunion 2 +646 Rwanda 2 +690 Seychelles 2 +706 Somalia 2 +728 South Sudan 2 +800 Uganda 2 +834 United Republic of Tanzania 2 +894 Zambia 2 +716 Zimbabwe 2 +911 Middle Africa 0 +24 Angola 2 +120 Cameroon 2 +140 Central African Republic 2 +148 Chad 2 +178 Congo 2 +180 Democratic Republic of the Congo 2 +226 Equatorial Guinea 2 +266 Gabon 2 +678 Sao Tome and Principe 2 +912 Northern Africa 0 +12 Algeria 2 +818 Egypt 2 +434 Libya 2 +504 Morocco 2 +729 Sudan 2 +788 Tunisia 2 +732 Western Sahara 2 +913 Southern Africa 0 +72 Botswana 2 +748 Eswatini 2 +426 Lesotho 2 +516 Namibia 2 +710 South Africa 2 +914 Western Africa 0 +204 Benin 2 +854 Burkina Faso 2 +132 Cabo Verde 2 +384 Cote d'Ivoire 2 +270 Gambia 2 +288 Ghana 2 +324 Guinea 2 +624 Guinea-Bissau 2 +430 Liberia 2 +466 Mali 2 +478 Mauritania 2 +562 Niger 2 +566 Nigeria 2 +654 Saint Helena 1 +686 Senegal 2 +694 Sierra Leone 2 +768 Togo 2 +935 Asia 0 +5500 Central Asia 0 +398 Kazakhstan 2 +417 Kyrgyzstan 2 +762 Tajikistan 2 +795 Turkmenistan 2 +860 Uzbekistan 2 +906 Eastern Asia 0 +156 China 2 +344 China, Hong Kong SAR 2 +446 China, Macao SAR 2 +158 China, Taiwan Province of China 2 +408 Dem. People's Republic of Korea 2 +392 Japan 2 +496 Mongolia 2 +410 Republic of Korea 2 +5501 Southern Asia 0 +4 Afghanistan 2 +50 Bangladesh 2 +64 Bhutan 2 +356 India 2 +364 Iran (Islamic Republic of) 2 +462 Maldives 2 +524 Nepal 2 +586 Pakistan 2 +144 Sri Lanka 2 +920 South-Eastern Asia 0 +96 Brunei Darussalam 2 +116 Cambodia 2 +360 Indonesia 2 +418 Lao People's Democratic Republic 2 +458 Malaysia 2 +104 Myanmar 2 +608 Philippines 2 +702 Singapore 2 +764 Thailand 2 +626 Timor-Leste 2 +704 Viet Nam 2 +922 Western Asia 0 +51 Armenia 2 +31 Azerbaijan 2 +48 Bahrain 2 +196 Cyprus 2 +268 Georgia 2 +368 Iraq 2 +376 Israel 2 +400 Jordan 2 +414 Kuwait 2 +422 Lebanon 2 +512 Oman 2 +634 Qatar 2 +682 Saudi Arabia 2 +275 State of Palestine 2 +760 Syrian Arab Republic 2 +792 Turkiye 2 +784 United Arab Emirates 2 +887 Yemen 2 +908 Europe 0 +923 Eastern Europe 0 +112 Belarus 2 +100 Bulgaria 2 +203 Czechia 2 +348 Hungary 2 +616 Poland 2 +498 Republic of Moldova 2 +642 Romania 2 +643 Russian Federation 2 +703 Slovakia 2 +804 Ukraine 2 +924 Northern Europe 0 +208 Denmark 2 +233 Estonia 2 +234 Faroe Islands 1 +246 Finland 2 +831 Guernsey 1 +352 Iceland 2 +372 Ireland 2 +833 Isle of Man 1 +832 Jersey 2 +428 Latvia 2 +440 Lithuania 2 +578 Norway 2 +752 Sweden 2 +826 United Kingdom 2 +925 Southern Europe 0 +8 Albania 2 +20 Andorra 1 +70 Bosnia and Herzegovina 2 +191 Croatia 2 +292 Gibraltar 1 +300 Greece 2 +336 Holy See 1 +380 Italy 2 +412 Kosovo (under UNSC res. 1244) 2 +470 Malta 2 +499 Montenegro 2 +807 North Macedonia 2 +620 Portugal 2 +674 San Marino 1 +688 Serbia 2 +705 Slovenia 2 +724 Spain 2 +926 Western Europe 0 +40 Austria 2 +56 Belgium 2 +250 France 2 +276 Germany 2 +438 Liechtenstein 1 +442 Luxembourg 2 +492 Monaco 1 +528 Netherlands 2 +756 Switzerland 2 +904 Latin America and the Caribbean 0 +915 Caribbean 0 +660 Anguilla 1 +28 Antigua and Barbuda 2 +533 Aruba 2 +44 Bahamas 2 +52 Barbados 2 +535 Bonaire, Sint Eustatius and Saba 1 +92 British Virgin Islands 1 +136 Cayman Islands 1 +192 Cuba 2 +531 Curacao 2 +212 Dominica 1 +214 Dominican Republic 2 +308 Grenada 2 +312 Guadeloupe 2 +332 Haiti 2 +388 Jamaica 2 +474 Martinique 2 +500 Montserrat 1 +630 Puerto Rico 2 +652 Saint-Barthelemy 1 +659 Saint Kitts and Nevis 1 +662 Saint Lucia 2 +663 Saint Martin (French part) 1 +670 Saint Vincent and the Grenadines 2 +534 Sint Maarten (Dutch part) 1 +780 Trinidad and Tobago 2 +796 Turks and Caicos Islands 1 +850 United States Virgin Islands 1 +916 Central America 0 +84 Belize 2 +188 Costa Rica 2 +222 El Salvador 2 +320 Guatemala 2 +340 Honduras 2 +484 Mexico 2 +558 Nicaragua 2 +591 Panama 2 +931 South America 0 +32 Argentina 2 +68 Bolivia (Plurinational State of) 2 +76 Brazil 2 +152 Chile 2 +170 Colombia 2 +218 Ecuador 2 +238 Falkland Islands (Malvinas) 1 +254 French Guiana 2 +328 Guyana 2 +600 Paraguay 2 +604 Peru 2 +740 Suriname 2 +858 Uruguay 2 +862 Venezuela (Bolivarian Republic of) 2 +905 Northern America 0 +60 Bermuda 1 +124 Canada 2 +304 Greenland 1 +666 Saint Pierre and Miquelon 1 +840 United States of America 2 +909 Oceania 0 +927 Australia/New Zealand 0 +36 Australia 2 +554 New Zealand 2 +928 Melanesia 0 +242 Fiji 2 +540 New Caledonia 2 +598 Papua New Guinea 2 +90 Solomon Islands 2 +548 Vanuatu 2 +954 Micronesia 0 +316 Guam 2 +296 Kiribati 2 +584 Marshall Islands 1 +583 Micronesia (Fed. States of) 2 +520 Nauru 1 +580 Northern Mariana Islands 1 +585 Palau 1 +957 Polynesia 0 +16 American Samoa 1 +184 Cook Islands 1 +258 French Polynesia 2 +570 Niue 1 +882 Samoa 2 +772 Tokelau 1 +776 Tonga 2 +798 Tuvalu 1 +876 Wallis and Futuna Islands 1 diff --git a/data/include_2024.R b/data/include_2024.R deleted file mode 100644 index a6af5cc8..00000000 --- a/data/include_2024.R +++ /dev/null @@ -1,6 +0,0 @@ -include_2024 <- local({ - e <- new.env() - load("include_2022.rda", envir= e) - e$include_2022 -}) - diff --git a/data/include_2024.rda b/data/include_2024.rda new file mode 100644 index 0000000000000000000000000000000000000000..e416e64168ce41f977635c604288f5ebbbeef32e GIT binary patch literal 818 zcmV-21I_$GT4*^jL0KkKS(e4~000CZ|NsB@PtC{uT~zh-)m86j-}hhl{onbQ)mQZ$ zRd>|)T~)vWoIr;;1u8a>18NMK10kW14Lw69hJzuH0gwiP>H|P%(?HPCrhou33?`Wx zVrcaQQKMA#KT2eJjT&eG0009+KmY&$0004?000000002>KU5J<+Ek550000D003k( z8UPHM00000007aZjQ{`|X_Fu{0FzY`Kn(=ICYS&K08JQ}00LkD000dD0GLbw0GI#* zV?+fZX`)dDQX)kn%C-SQB7!l9g2@&NBL#r4f`k?VDbqzUgb=P|qnEa%W8lsb?hB;Pxt5nC)ye(DsV0m2(s}+BtYErg90E7Vu8bAd0>zPLi%+kJK zeeF4|1WH$rj(dDkXC)LZgd_4!1^}<0n-&AN2evbTw^|`Q-1Aq`-ru!@gV*vAVLfB?7{#Wn!jeI&%5?}YlNTv5{U>%S(ris8z#v}QppUI zjD&}+g2_G1Dl$k)St~7-Du&1Q#?GBFH2Mop;&jSs<0foD4Fklt z43)sd>ZLgI#47udjM0qXIUp#8IVu#w&Bx^6W(lNG;Y;)i1!5Vd2ry7ch=QRWdS*75 zf<*-*F;NIWM5(J-nXH5$npOg}3Iu`*Mt8yHx#_5saMU(PGDZ+GM+j~kV+dgY+z(oi4~03;xQE-={$y^^?uO3>LL0H7rQLjQ}oBAh5lOJeze0O|-(p#T5? literal 0 HcmV?d00001