From e76b1d76a7671b552f270594dcca6abed28c5628 Mon Sep 17 00:00:00 2001
From: Peiran Liu <prliu@uw.edu>
Date: Tue, 6 Apr 2021 15:45:39 -0400
Subject: [PATCH 01/48] fix bug

---
 R/mcmc_ini.R | 3 +--
 R/wpp_data.R | 4 ++--
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/R/mcmc_ini.R b/R/mcmc_ini.R
index 5452b21b..358cafbd 100755
--- a/R/mcmc_ini.R
+++ b/R/mcmc_ini.R
@@ -483,8 +483,7 @@ mcmc.ini <- function(chain.id, mcmc.meta, iter=100,
                         dontsave=dontsave.pars, uncertainty=uncertainty
                         ),
                    class='bayesTFR.mcmc')
-                   
-	##################################################################
+  ##################################################################
 	# distortions
 	##################################################################
 	# note: the eps will always be NA outside (tau_c, lambda-1)!!
diff --git a/R/wpp_data.R b/R/wpp_data.R
index a6ee8d5f..ad5e580c 100644
--- a/R/wpp_data.R
+++ b/R/wpp_data.R
@@ -208,12 +208,12 @@ get.observed.time.matrix.and.regions <- function(data, loc_data, start.year=1950
 	    ncol.tfr <- length(num.columns)
 	    cols.starty <- as.integer(substr(names.tfr.data[num.columns], 1,4))
 	    cols.endy <- as.integer(substr(names.tfr.data[num.columns], 6,9))
-	    years.to.interp.to <- (cols.starty[1]+3):cols.endy[length(cols.endy)]
+	    years.to.interp.to <- (cols.starty[1]):cols.endy[length(cols.endy)]
 	    years.to.interp.from <- seq(cols.starty[1]+3, cols.endy[length(cols.endy)]-2, by = 5)
 	    tfr_data_new <- matrix(NA, nrow = nrow(tfr_data), ncol = length(years.to.interp.to))
 	    for(row in 1:nrow(tfr_data_new)) 
 	        tfr_data_new[row, ] <- approx(years.to.interp.from, tfr_data[row, num.columns], 
-	                                      xout = years.to.interp.to)$y
+	                                      xout = years.to.interp.to, rule=2)$y
 	    tfr_data_new <- cbind(tfr_data[, -num.columns], tfr_data_new) 
 	    colnames(tfr_data_new) <- c(colnames(tfr_data)[-num.columns], years.to.interp.to)
 	  tfr_data <- tfr_data_new

From ef6a2416f6c8fed7cda345b7cc4f697ae98c1096 Mon Sep 17 00:00:00 2001
From: Peiran Liu <prliu@uw.edu>
Date: Tue, 6 Apr 2021 17:31:35 -0400
Subject: [PATCH 02/48] fix bug for continuous variable

---
 R/run_mcmc.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/run_mcmc.R b/R/run_mcmc.R
index e0468c6e..8ff5c4bc 100755
--- a/R/run_mcmc.R
+++ b/R/run_mcmc.R
@@ -262,7 +262,7 @@ mcmc.run.chain <- function(chain.id, meta, thin=1, iter=100, starting.values=NUL
 	                 gamma.ini=gamma.ini[chain.id],
 	                 save.all.parameters=save.all.parameters,
 	                 verbose=verbose, uncertainty=uncertainty, iso.unbiased=iso.unbiased, 
-	                 covariates=covariates, cont_covariates=NULL)
+	                 covariates=covariates, cont_covariates=cont_covariates)
 	if (uncertainty)
 	{
 	  this.sv <- list()

From f280f15cf870b52dad375698665f1c133cb40d5b Mon Sep 17 00:00:00 2001
From: Peiran Liu <prliu@uw.edu>
Date: Tue, 6 Apr 2021 20:29:18 -0400
Subject: [PATCH 03/48] allow using source.col.name to specify the source
 column for iso.unbiased

---
 R/mcmc_ini.R          |  4 ++--
 R/run_mcmc.R          | 16 +++++++++-------
 R/updated_functions.R |  6 +++---
 3 files changed, 14 insertions(+), 12 deletions(-)

diff --git a/R/mcmc_ini.R b/R/mcmc_ini.R
index 358cafbd..d4ef9b27 100755
--- a/R/mcmc_ini.R
+++ b/R/mcmc_ini.R
@@ -402,7 +402,7 @@ mcmc.ini <- function(chain.id, mcmc.meta, iter=100,
 					 d.ini=0.17,
 					 save.all.parameters=FALSE,
 					 verbose=FALSE, uncertainty=FALSE, iso.unbiased=NULL,
-					 covariates=c('source', 'method'), cont_covariates=NULL) {
+					 covariates=c('source', 'method'), cont_covariates=NULL, source.col.name = "source") {
 				 		 	
 	nr_countries <- mcmc.meta$nr_countries
 
@@ -495,7 +495,7 @@ mcmc.ini <- function(chain.id, mcmc.meta, iter=100,
 	  # mcmc <- get.obs.estimate.diff(mcmc)
 	  # mcmc <- estimate.bias.sd.raw(mcmc)
 	  mcmc <- get.obs.estimate.diff.original(mcmc)
-	  mcmc <- estimate.bias.sd.original(mcmc, iso.unbiased, covariates, cont_covariates)
+	  mcmc <- estimate.bias.sd.original(mcmc, iso.unbiased, covariates, cont_covariates, source.col.name)
 	}
 	
 	# mcmc <- as.list(mcmc)
diff --git a/R/run_mcmc.R b/R/run_mcmc.R
index 8ff5c4bc..fb765260 100755
--- a/R/run_mcmc.R
+++ b/R/run_mcmc.R
@@ -26,6 +26,7 @@ run.tfr.mcmc <- function(nr.chains=3, iter=62000, output.dir=file.path(getwd(),
 						phase3.starting.values=NULL,
 					 	proposal_cov_gammas = NULL, # should be a list with elements 'values' and 'country_codes'
 						iso.unbiased = NULL, covariates = c('source', 'method'), cont_covariates = NULL, 
+						source.col.name="source",
 					 	seed = NULL, parallel=FALSE, nr.nodes=nr.chains, 
 					 	save.all.parameters = FALSE, compression.type='None',
 					 	auto.conf = list(max.loops=5, iter=62000, iter.incr=10000, nr.chains=3, thin=80, burnin=2000),
@@ -196,10 +197,10 @@ run.tfr.mcmc <- function(nr.chains=3, iter=62000, output.dir=file.path(getwd(),
 		chain.set <- bDem.performParallel(nr.nodes, 1:nr.chains, mcmc.run.chain, 
 						initfun=init.nodes, seed = seed, meta=bayesTFR.mcmc.meta, 
 						thin=thin, starting.values=starting.values, iter=iter, S.ini=S.ini, a.ini=a.ini,
-                        b.ini=b.ini, sigma0.ini=sigma0.ini, Triangle_c4.ini=Triangle_c4.ini, const.ini=const.ini,
-                        gamma.ini=gamma.ini, save.all.parameters=save.all.parameters, verbose=verbose, 
-                        verbose.iter=verbose.iter, uncertainty=uncertainty, iso.unbiased=iso.unbiased, 
-						            covariates=covariates, cont_covariates=cont_covariates, ...)
+						b.ini=b.ini, sigma0.ini=sigma0.ini, Triangle_c4.ini=Triangle_c4.ini, const.ini=const.ini,
+						gamma.ini=gamma.ini, save.all.parameters=save.all.parameters, verbose=verbose, 
+						verbose.iter=verbose.iter, uncertainty=uncertainty, iso.unbiased=iso.unbiased, 
+						covariates=covariates, cont_covariates=cont_covariates, source.col.name=source.col.name, ...)
 	} else { # run chains sequentially
 		chain.set <- list()
 		for (chain in 1:nr.chains) {
@@ -208,7 +209,7 @@ run.tfr.mcmc <- function(nr.chains=3, iter=62000, output.dir=file.path(getwd(),
 					 	sigma0.ini=sigma0.ini, Triangle_c4.ini=Triangle_c4.ini, const.ini=const.ini, 
 					 	gamma.ini=gamma.ini, save.all.parameters=save.all.parameters,
 					 	verbose=verbose, verbose.iter=verbose.iter, uncertainty=uncertainty, iso.unbiased=iso.unbiased, 
-					 	covariates=covariates, cont_covariates=cont_covariates)
+					 	covariates=covariates, cont_covariates=cont_covariates, source.col.name=source.col.name)
 		}
 	}
 	names(chain.set) <- 1:nr.chains
@@ -240,7 +241,7 @@ mcmc.run.chain <- function(chain.id, meta, thin=1, iter=100, starting.values=NUL
 							S.ini, a.ini, b.ini, sigma0.ini, Triangle_c4.ini, const.ini, gamma.ini=1,
 							save.all.parameters=FALSE,
 							verbose=FALSE, verbose.iter=10, uncertainty=FALSE, iso.unbiased=NULL, 
-							covariates=c('source', 'method'), cont_covariates=NULL) {
+							covariates=c('source', 'method'), cont_covariates=NULL, source.col.name="source") {
 								
 	cat('\n\nChain nr.', chain.id, '\n')
     if (verbose) {
@@ -262,7 +263,8 @@ mcmc.run.chain <- function(chain.id, meta, thin=1, iter=100, starting.values=NUL
 	                 gamma.ini=gamma.ini[chain.id],
 	                 save.all.parameters=save.all.parameters,
 	                 verbose=verbose, uncertainty=uncertainty, iso.unbiased=iso.unbiased, 
-	                 covariates=covariates, cont_covariates=cont_covariates)
+	                 covariates=covariates, cont_covariates=cont_covariates,
+                   source.col.name=source.col.name)
 	if (uncertainty)
 	{
 	  this.sv <- list()
diff --git a/R/updated_functions.R b/R/updated_functions.R
index d29ed303..a86ebd5d 100644
--- a/R/updated_functions.R
+++ b/R/updated_functions.R
@@ -128,7 +128,7 @@ estimate.bias.sd.raw <- function(mcmc)
 }
 
 estimate.bias.sd.original <- function(mcmc, iso.unbiased=NULL, covariates=c('source', 'method'), 
-                                      cont_covariates=NULL)
+                                      cont_covariates=NULL, source.col.name="source")
 {
   mcmc$meta$raw_data.original$bias <- NA
   mcmc$meta$raw_data.original$std <- NA
@@ -180,10 +180,10 @@ estimate.bias.sd.original <- function(mcmc, iso.unbiased=NULL, covariates=c('sou
     ## Optional
     if (ISO.code$code %in% iso.unbiased)
     {
-      if ("source" %in% covariates)
+      if (source.col.name %in% covariates)
       {
         index.by.country.vr.estimate <- which((mcmc$meta$raw_data.original$country_code == ISO.code$code) & 
-                                                (mcmc$meta$raw_data.original[, "source"] %in% c("VR", 'Estimate')))
+                                                (mcmc$meta$raw_data.original[, source.col.name] %in% c("VR", 'Estimate')))
         mcmc$meta$raw_data.original$bias[index.by.country.vr.estimate] <- 0
         mcmc$meta$raw_data.original$std[index.by.country.vr.estimate] <- 0.016
       }

From 039738334487ebcfdfdcfdb8bb58938b7928033f Mon Sep 17 00:00:00 2001
From: Peiran Liu <prliu@uw.edu>
Date: Tue, 6 Apr 2021 21:02:55 -0400
Subject: [PATCH 04/48] update documents

---
 man/run-mcmc.Rd | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/man/run-mcmc.Rd b/man/run-mcmc.Rd
index 971dd115..b7a89e34 100644
--- a/man/run-mcmc.Rd
+++ b/man/run-mcmc.Rd
@@ -36,7 +36,7 @@ run.tfr.mcmc(nr.chains = 3, iter = 62000,
     Triangle_c4.ini = NULL, const.ini = NULL, gamma.ini = 1, 
     phase3.starting.values = NULL, proposal_cov_gammas = NULL, 
     iso.unbiased = NULL, covariates = c("source", "method"), cont_covariates = NULL, 
-    seed = NULL, parallel = FALSE, nr.nodes = nr.chains, 
+    source.col.name="source", seed = NULL, parallel = FALSE, nr.nodes = nr.chains, 
     save.all.parameters = FALSE, compression.type = 'None',
     auto.conf = list(max.loops = 5, iter = 62000, iter.incr = 10000, 
         nr.chains = 3, thin = 80, burnin = 2000),
@@ -103,6 +103,7 @@ continue.tfr.mcmc(iter, chain.ids = NULL,
   \item{proposal_cov_gammas}{Proposal for the gamma covariance matrices for each country. It should be a list with two values: \code{values} and \code{country_codes}. The structure corresponds to the object returned by the function \code{\link{get.cov.gammas}}. By default the covariance matrices are obtained using \code{data(proposal_cov_gammas_cii)}. This argument overwrite the defaults for countries contained the argument.}
   \item{iso.unbiased}{Codes of countries for which the vital registration TFR estimates are considered unbiased. Only used if \code{uncertainty = TRUE}. See details below.}
   \item{covariates, cont_covariates}{Categorical and continuous features used in estimating bias and standard deviation if \code{uncertainty = TRUE}. See details below.}
+  \item{source.col.name}{Specify the name for source column used for iso.unbiased variable. }
   \item{seed}{Seed of the random number generator. If \code{NULL} no seed is set. It can be used to generate reproducible results.}
   \item{parallel}{Logical determining if the simulation should run multiple chains in parallel. If it is \code{TRUE}, the package \pkg{\link[snowFT]{snowFT}} is required. In such a case further arguments can be passed, see description of \dots.}
   \item{nr.nodes}{Relevant only if \code{parallel} is \code{TRUE}. It gives the number of nodes for running the simulation in parallel. By default it equals to the number of chains.}

From d25855befad93e0b8de863ef5c73ac1706b698ef Mon Sep 17 00:00:00 2001
From: Hana Sevcikova <hanas@uw.edu>
Date: Thu, 8 Apr 2021 12:22:12 -0700
Subject: [PATCH 05/48] version changed

---
 DESCRIPTION | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 4cc625e8..a8f72f86 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: bayesTFR
-Version: 7.0-4
-Date: 2021-03-31
+Version: 7.0-4.9001
+Date: 2021-04-07
 Title: Bayesian Fertility Projection
 Author: Hana Sevcikova (hanas@uw.edu), Leontine Alkema (alkema@nus.edu.sg), Peiran Liu (prliu@uw.edu), Adrian Raftery (raftery@uw.edu), Bailey Fosdick (bfosdick@uw.edu), Patrick Gerland (gerland@un.org)
 Maintainer: Hana Sevcikova <hanas@uw.edu>

From e2bfd8ed133de9760d6704299a3c6ffc1b7c8377 Mon Sep 17 00:00:00 2001
From: Hana Sevcikova <hanas@uw.edu>
Date: Fri, 9 Apr 2021 11:49:10 -0700
Subject: [PATCH 06/48] changes in plot function; ignore last.oberved if using
 uncertainty

---
 R/mcmc_ini.R       |  4 ++--
 R/plot_functions.R | 26 +++++++++++++++++++++-----
 R/wpp_data.R       |  9 +++++----
 3 files changed, 28 insertions(+), 11 deletions(-)

diff --git a/R/mcmc_ini.R b/R/mcmc_ini.R
index d4ef9b27..fed1e57b 100755
--- a/R/mcmc_ini.R
+++ b/R/mcmc_ini.R
@@ -131,7 +131,6 @@ find.tau.lambda.and.DLcountries <- function(tfr_matrix, min.TFRlevel.for.start.a
     T_end_c <- lambda_c <-rep(T_end, nr_countries)
     T.suppl <- if(is.null(suppl.data$regions)) 0 else dim(suppl.data$tfr_matrix)[1]
     tau_c <- start_c <- rep(NA, nr_countries)
-
     for (country in 1:nr_countries) {
         data <- get.observed.with.supplemental(country, tfr_matrix, suppl.data)
     	has.suppl <- length(data) > T_end && !is.na(suppl.data$index.from.all.countries[country])
@@ -218,7 +217,8 @@ mcmc.meta.ini <- function(...,
 	if(present.year-3 > wpp.year) warning("present.year is much larger then wpp.year. Make sure WPP data for present.year are available.")
 	tfr.with.regions <- set_wpp_regions(start.year=start.year, present.year=present.year, wpp.year=wpp.year, 
 										my.tfr.file = my.tfr.file, my.locations.file=my.locations.file, 
-										annual = mcmc.input$annual.simulation, verbose=verbose)
+										annual = mcmc.input$annual.simulation, ignore.last.observed = uncertainty,
+										verbose=verbose)
 	meta <- do.meta.ini(mcmc.input, tfr.with.regions,  
 						proposal_cov_gammas=proposal_cov_gammas, verbose=verbose, 
 						uncertainty=uncertainty, my.tfr.raw.file=my.tfr.raw.file, ar.phase2=ar.phase2)
diff --git a/R/plot_functions.R b/R/plot_functions.R
index fd398f82..f4cff6c5 100644
--- a/R/plot_functions.R
+++ b/R/plot_functions.R
@@ -491,9 +491,13 @@ tfr.estimation.plot <- function(mcmc.list=NULL, country.code=NULL, ISO.code=NULL
   
   wpp.data <- get.observed.tfr(country.obj$index, mcmc.list$meta, "tfr_matrix_all")
   wpp.data <- data.frame(year=quantile_tbl$year, tfr = as.numeric(wpp.data))
-  wpp.data <- wpp.data[wpp.data$year %% 5 == 3,]
+  #wpp.data <- wpp.data[wpp.data$year %% 5 == 3,]
   q <- q + ggplot2::geom_line(data = wpp.data, ggplot2::aes_string(x="year", y="tfr"), size = 0.8) + ggplot2::theme_bw() + 
-    ggplot2::geom_point(data = wpp.data, ggplot2::aes_string(x="year", y="tfr"))
+      ggplot2::geom_point(data = wpp.data, ggplot2::aes_string(x="year", y="tfr"), size = 0.7)
+
+  # re-draw the median
+  q <- q + ggplot2::geom_line(ggplot2::aes_string(x="year", y="Q50"), size = 0.8, color="red") +
+      ggplot2::geom_point(ggplot2::aes_string(x="year", y="Q50"), size = 1, color="red")
   
   if (save.image)
   {
@@ -609,12 +613,20 @@ tfr.trajectories.plot <- function(tfr.pred, country, pi=c(80, 95),
   }
   # plot median
   tfr.median <- get.median.from.prediction(tfr.pred, country$index, country$code)
-  lines(x2, tfr.median, type='l', col=col[3], lwd=lwd[3]) 
+  if(uncertainty) {
+      unc.last.time <- which(tfr.object$tfr_quantile$year == x2[1])
+      tfr.median[1] <- unlist(tfr.object$tfr_quantile[unc.last.time,])[length(pi)+1]
+  }
+  lines(x2, tfr.median, type='l', col=col[3], lwd=lwd[3])
   # plot given CIs
   lty <- 2:(length(pi)+1)
   for (i in 1:length(pi)) {
     cqp <- get.traj.quantiles(tfr.pred, country$index, country$code, trajectories$trajectories, pi[i])
     if (!is.null(cqp)) {
+        if(uncertainty) {
+            cqp[1,1] <- unlist(tfr.object$tfr_quantile[unc.last.time, ])[length(pi)+1-i]
+            cqp[2,1] <- unlist(tfr.object$tfr_quantile[unc.last.time, ])[length(pi)+1+i]
+        }
       lines(x2, cqp[1,], type='l', col=col[4], lty=lty[i], lwd=lwd[4])
       lines(x2, cqp[2,], type='l', col=col[4], lty=lty[i], lwd=lwd[4])
     }
@@ -649,8 +661,12 @@ tfr.trajectories.plot <- function(tfr.pred, country, pi=c(80, 95),
     lty <- c(lty, max(lty)+1)
     llty <- length(lty)
     up.low <- get.half.child.variant(median=tfr.median)
-    lines(x2, up.low[1,], type='l', col=col[5], lty=lty[llty], lwd=lwd[5])
-    lines(x2, up.low[2,], type='l', col=col[5], lty=lty[llty], lwd=lwd[5])
+    if(uncertainty) {
+        up.low <- up.low[,-1]
+        x2t <- x2[-1]
+    } else x2t <- x2
+    lines(x2t, up.low[1,], type='l', col=col[5], lty=lty[llty], lwd=lwd[5])
+    lines(x2t, up.low[2,], type='l', col=col[5], lty=lty[llty], lwd=lwd[5])
     legend <- c(legend, '+/- 0.5 child')
     cols <- c(cols, col[5])
     lwds <- c(lwds, lwd[5])
diff --git a/R/wpp_data.R b/R/wpp_data.R
index ad5e580c..fdfa028e 100644
--- a/R/wpp_data.R
+++ b/R/wpp_data.R
@@ -1,7 +1,7 @@
 # Read in the UN estimates
 
 set_wpp_regions <- function(start.year=1950, present.year=2010, wpp.year=2012, my.tfr.file=NULL, 
-							my.locations.file=NULL, annual = FALSE, verbose=FALSE) {
+							my.locations.file=NULL, annual = FALSE, ignore.last.observed = FALSE, verbose=FALSE) {
 # outputs:
 # tfr_matrix_all, with each column one countries UN estimates
 # tfr_matrix with NAs after last observed data point
@@ -32,7 +32,8 @@ set_wpp_regions <- function(start.year=1950, present.year=2010, wpp.year=2012, m
 												start.year=start.year, 
 												present.year=present.year,
 												annual = annual, verbose=verbose, 
-												interpolate = annual && is.null(my.tfr.file))
+												interpolate = annual && is.null(my.tfr.file),
+												ignore.last.observed = ignore.last.observed)
 	if(!annual) {
 	    TFRmatrixsuppl.regions <- .get.suppl.matrix.and.regions(un.object, TFRmatrix.regions, loc_data, 
 									start.year, present.year)
@@ -198,7 +199,7 @@ read.UNlocations <- function(data, wpp.year, package="bayesTFR", my.locations.fi
 get.observed.time.matrix.and.regions <- function(data, loc_data, start.year=1950, present.year=2010, annual = FALSE,
 												 datacolnames=c(country.code='country_code', country.name='country', reg.name='reg_name',
 															reg.code='reg_code', area.name='area_name', area.code='area_code'), 
-												 interpolate = FALSE) {
+												 interpolate = FALSE, ignore.last.observed = FALSE) {
 	tfr_data <- data
 	nr_countries <- length(tfr_data[,1])
 	if (annual && interpolate) # interpolate 5-year data
@@ -266,7 +267,7 @@ get.observed.time.matrix.and.regions <- function(data, loc_data, start.year=1950
  		if(has.area.name) area_name <- c(area_name, paste(loc_data$area_name[loc_index]))
  		if(has.area.code) area_code <- c(area_code, loc_data$area_code[loc_index])
 		# set NAs for entries that are not observed data (after last.observed) 
-		tfr_matrix[(tfr_data[i,'last.observed'] < mid.years), i] <- NA
+		if(!ignore.last.observed) tfr_matrix[(tfr_data[i,'last.observed'] < mid.years), i] <- NA
 		all.na[i] <- all(is.na(tfr_matrix[,i]))
 	}
 	country.codes <- tfr_data[[datacolnames['country.code']]]

From bc202e86ac026dd5d4fdc5a7c8d8119fe9772d55 Mon Sep 17 00:00:00 2001
From: Hana Sevcikova <hanas@uw.edu>
Date: Fri, 9 Apr 2021 13:14:29 -0700
Subject: [PATCH 07/48] avoid log(0) in mcmc sampling

---
 DESCRIPTION       | 4 ++--
 R/mcmc_sampling.R | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index a8f72f86..e37bf371 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: bayesTFR
-Version: 7.0-4.9001
-Date: 2021-04-07
+Version: 7.0-4.9002
+Date: 2021-04-09
 Title: Bayesian Fertility Projection
 Author: Hana Sevcikova (hanas@uw.edu), Leontine Alkema (alkema@nus.edu.sg), Peiran Liu (prliu@uw.edu), Adrian Raftery (raftery@uw.edu), Bailey Fosdick (bfosdick@uw.edu), Patrick Gerland (gerland@un.org)
 Maintainer: Hana Sevcikova <hanas@uw.edu>
diff --git a/R/mcmc_sampling.R b/R/mcmc_sampling.R
index 419f6565..b1ef2b1c 100755
--- a/R/mcmc_sampling.R
+++ b/R/mcmc_sampling.R
@@ -160,7 +160,7 @@ tfr.mcmc.sampling <- function(mcmc, thin=1, start.iter=2, verbose=FALSE, verbose
         delta4.squared <- mcenv$delta4^2
         delta4.0.squared <- delta4_0^2
         Triangle_c4_transformed <- 
-                        log((mcenv$Triangle_c4[id_DL] - Triangle_c4.low)/
+                        log(pmax(mcenv$Triangle_c4[id_DL] - Triangle_c4.low, 1e-10)/
                                         (Triangle_c4.up-mcenv$Triangle_c4[id_DL])) 
         mcenv$Triangle4 <- rnorm(1, mean = (sum(Triangle_c4_transformed)/(delta4.squared) + 
         										Triangle4_0/delta4.0.squared)/

From cbbd47340876b521d3a9d765f0ce30281ac405b6 Mon Sep 17 00:00:00 2001
From: Hana Sevcikova <hanas@uw.edu>
Date: Mon, 12 Apr 2021 09:45:52 -0700
Subject: [PATCH 08/48] fix issue with suppl data in extra function

---
 R/wpp_data.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/R/wpp_data.R b/R/wpp_data.R
index fdfa028e..4d24aba0 100644
--- a/R/wpp_data.R
+++ b/R/wpp_data.R
@@ -387,12 +387,12 @@ set.wpp.extra <- function(meta, countries=NULL, my.tfr.file=NULL, my.locations.f
 	extra.wpp <- .extra.matrix.regions(data=data, countries=countries, meta=meta, my.locations.file=my.locations.file, 
 	                                   verbose=verbose, annual=annual, uncertainty=uncertainty, 
 	                                   interpolate = is.null(my.tfr.file) && annual)
-	if(!is.null(extra.wpp)) {
+	if(!is.null(extra.wpp) && !annual) {
 		locations <- read.UNlocations(data$data, wpp.year=meta$wpp.year, my.locations.file=my.locations.file, verbose=verbose)
 		suppl.wpp <- .get.suppl.matrix.and.regions(un.object, extra.wpp, locations$loc_data, 
 									meta$start.year, meta$present.year)
 		extra.wpp$suppl.data <- .get.suppl.data.list(suppl.wpp)
-	}							
+	} 
 	return(extra.wpp)
 }
 

From 655312037282dff338b7c90a6f992b24775ceae4 Mon Sep 17 00:00:00 2001
From: Hana Sevcikova <hanas@uw.edu>
Date: Mon, 12 Apr 2021 14:53:22 -0700
Subject: [PATCH 09/48] fixed source.col.name in run mcmc extra

---
 R/run_mcmc.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/R/run_mcmc.R b/R/run_mcmc.R
index fb765260..c1c3c0fc 100755
--- a/R/run_mcmc.R
+++ b/R/run_mcmc.R
@@ -367,7 +367,7 @@ run.tfr.mcmc.extra <- function(sim.dir=file.path(getwd(), 'bayesTFR.output'),
 								thin=1, thin.extra=1, burnin=2000, parallel=FALSE, nr.nodes=NULL, 
 								my.locations.file = NULL,
 								uncertainty=FALSE, my.tfr.raw.file=NULL, iso.unbiased=NULL, 
-								covariates=c('source', 'method'), cont_covariates=NULL, 
+								covariates=c('source', 'method'), cont_covariates=NULL, source.col.name="source",
 								verbose=FALSE, verbose.iter=100, ...) {
   mcmc.set <- get.tfr.mcmc(sim.dir)
   meta.old <- mcmc.set$meta
@@ -430,7 +430,7 @@ run.tfr.mcmc.extra <- function(sim.dir=file.path(getwd(), 'bayesTFR.output'),
 		if(uncertainty)
 		{
 		  mcmc.set$mcmc.list[[chain]] <- get.obs.estimate.diff.original(mcmc.set$mcmc.list[[chain]])
-		  mcmc.set$mcmc.list[[chain]] <- estimate.bias.sd.original(mcmc.set$mcmc.list[[chain]], iso.unbiased, covariates, cont_covariates)
+		  mcmc.set$mcmc.list[[chain]] <- estimate.bias.sd.original(mcmc.set$mcmc.list[[chain]], iso.unbiased, covariates, cont_covariates, source.col.name=source.col.name)
 		  mcmc.set$mcmc.list[[chain]]$eps_unc <- list()
 		  if (is.null(mcmc.set$mcmc.list[[chain]]$meta$raw_data_extra)) mcmc.set$mcmc.list[[chain]]$meta$raw_data_extra <- list()
 		  for (country in countries)

From 85d9946cc105181d4385cb827fa2319fdabca29f Mon Sep 17 00:00:00 2001
From: Hana Sevcikova <hanas@uw.edu>
Date: Tue, 13 Apr 2021 00:29:24 -0700
Subject: [PATCH 10/48] fixes related to the size of the meta file

---
 R/get_outputs.R       |  3 ++-
 R/predict_tfr.R       |  1 +
 R/run_mcmc.R          | 18 ++++++++++++------
 R/updated_functions.R | 17 +++++++++++------
 4 files changed, 26 insertions(+), 13 deletions(-)

diff --git a/R/get_outputs.R b/R/get_outputs.R
index e9163970..2db57e92 100644
--- a/R/get_outputs.R
+++ b/R/get_outputs.R
@@ -143,7 +143,7 @@ create.thinned.tfr.mcmc <- function(mcmc.set, thin=1, burnin=0, output.dir=NULL,
 		country.obj <- get.country.object(country, mcmc.set$meta, index=TRUE)
 		if (country.obj$index %in% mcmc.set$meta$extra)
 		{
-		  nr.points.cs <- floor(mcmc.set$meta$extra_iter[country.obj$index] / mcmc.set$meta$extra_thin[country.obj$index])
+		  nr.points.cs <- floor((mcmc.set$meta$extra_iter[country.obj$index]-burnin) / mcmc.set$meta$extra_thin[country.obj$index])
 		  nr.points.cs <- nr.points.cs * length(mcmc.set$mcmc.list)
 		  if (nr.points.cs >= nr.points) thin.index.cs <- round(seq(1, nr.points.cs, length.out = nr.points))
 		  else 
@@ -167,6 +167,7 @@ create.thinned.tfr.mcmc <- function(mcmc.set, thin=1, burnin=0, output.dir=NULL,
 										compression.type=thinned.mcmc$compression.type)
 		}
 	}
+	
 	if (mcmc.set$meta$nr_countries > mcmc.set$meta$nr_countries_estimation) {
 		.update.thinned.extras(mcmc.set, (mcmc.set$meta$nr_countries_estimation+1):mcmc.set$meta$nr_countries,
 								burnin=burnin, nr.points=nr.points, dir=outdir.thin.mcmc, verbose=verbose)
diff --git a/R/predict_tfr.R b/R/predict_tfr.R
index 5dff9e84..e19ded9a 100644
--- a/R/predict_tfr.R
+++ b/R/predict_tfr.R
@@ -197,6 +197,7 @@ make.tfr.prediction <- function(mcmc.set, start.year=NULL, end.year=2100, replac
 	                               floor((mcmc.set$meta$extra_iter[country] - burn) / mcmc.set$meta$extra_thin[country]) * length(mcmc.set$mcmc.list))
 	    }
 	  }
+	  stored.iter <- stored.iter.extra
 	}
 	mcthin <- max(sapply(mcmc.set$mcmc.list, function(x) x$thin))
 	if(!is.null(nr.traj) && !is.null(thin)) {
diff --git a/R/run_mcmc.R b/R/run_mcmc.R
index c1c3c0fc..ba69fac9 100755
--- a/R/run_mcmc.R
+++ b/R/run_mcmc.R
@@ -430,7 +430,8 @@ run.tfr.mcmc.extra <- function(sim.dir=file.path(getwd(), 'bayesTFR.output'),
 		if(uncertainty)
 		{
 		  mcmc.set$mcmc.list[[chain]] <- get.obs.estimate.diff.original(mcmc.set$mcmc.list[[chain]])
-		  mcmc.set$mcmc.list[[chain]] <- estimate.bias.sd.original(mcmc.set$mcmc.list[[chain]], iso.unbiased, covariates, cont_covariates, source.col.name=source.col.name)
+		  mcmc.set$mcmc.list[[chain]] <- estimate.bias.sd.original(mcmc.set$mcmc.list[[chain]], iso.unbiased, covariates, cont_covariates, 
+		                                                           source.col.name=source.col.name, countries = Eini$index)
 		  mcmc.set$mcmc.list[[chain]]$eps_unc <- list()
 		  if (is.null(mcmc.set$mcmc.list[[chain]]$meta$raw_data_extra)) mcmc.set$mcmc.list[[chain]]$meta$raw_data_extra <- list()
 		  for (country in countries)
@@ -477,9 +478,9 @@ run.tfr.mcmc.extra <- function(sim.dir=file.path(getwd(), 'bayesTFR.output'),
 	}
 	if(uncertainty)
 	{
-	  if (!dir.exists(file.path(meta$output.dir, 'extra.meta'))) dir.create(file.path(meta$output.dir, 'extra.meta'))
-	  if (!dir.exists(file.path(meta$output.dir, 'extra.meta', countries[1]))) dir.create(file.path(meta$output.dir, 'extra.meta', countries[1]))
-	  store.bayesTFR.meta.object(meta, file.path(meta$output.dir, 'extra.meta', countries[1]))
+	  #if (!dir.exists(file.path(meta$output.dir, 'extra.meta'))) dir.create(file.path(meta$output.dir, 'extra.meta'))
+	  #if (!dir.exists(file.path(meta$output.dir, 'extra.meta', countries[1]))) dir.create(file.path(meta$output.dir, 'extra.meta', countries[1]))
+	  #store.bayesTFR.meta.object(meta, file.path(meta$output.dir, 'extra.meta', countries[1]))
 	  for (name in c("country.ind.by.year", "ind.by.year", "id_phase1_by_year", "id_phase2_by_year", "id_phase3_by_year", 
 	                 "id_phase3", "nr.countries"))
 	  {
@@ -503,8 +504,13 @@ run.tfr.mcmc.extra <- function(sim.dir=file.path(getwd(), 'bayesTFR.output'),
 	    }
 	  }
 	  meta[['extra']] <- sort(unique(meta[['extra']]))
-	}
-	store.bayesTFR.meta.object(meta, file.path(meta$output.dir))
+	  # unload parent for storing purposes
+	  parent <- meta$parent
+	  meta[["parent"]] <- NULL
+	} else parent <- NULL
+	
+	store.bayesTFR.meta.object(meta, meta$output.dir)
+	meta[["parent"]] <- parent
 	mcmc.set$meta <- meta
 	cat('\n')
 	invisible(mcmc.set)
diff --git a/R/updated_functions.R b/R/updated_functions.R
index a86ebd5d..6394293f 100644
--- a/R/updated_functions.R
+++ b/R/updated_functions.R
@@ -128,7 +128,7 @@ estimate.bias.sd.raw <- function(mcmc)
 }
 
 estimate.bias.sd.original <- function(mcmc, iso.unbiased=NULL, covariates=c('source', 'method'), 
-                                      cont_covariates=NULL, source.col.name="source")
+                                      cont_covariates=NULL, source.col.name="source", countries = NULL)
 {
   mcmc$meta$raw_data.original$bias <- NA
   mcmc$meta$raw_data.original$std <- NA
@@ -137,7 +137,8 @@ estimate.bias.sd.original <- function(mcmc, iso.unbiased=NULL, covariates=c('sou
     mcmc$meta$bias_model <- list()
     mcmc$meta$std_model <- list()
   }
-  for(country in 1:mcmc$meta$nr_countries)
+  if(is.null(countries)) countries <- 1:mcmc$meta$nr_countries
+  for(country in countries)
   {
     ISO.code <- get.country.object(country, meta=mcmc$meta, index=TRUE)
     if (is.na(ISO.code$code)) next
@@ -168,10 +169,10 @@ estimate.bias.sd.original <- function(mcmc, iso.unbiased=NULL, covariates=c('sou
       }
     }
     
-    m1 <- lm(as.formula(paste0('diff ~ ', regressor)))
+    m1 <- lm(as.formula(paste0('diff ~ ', regressor)), model = FALSE)
     bias <- predict(m1)
     abs.residual <- abs(residuals(m1))
-    m2 <- lm(as.formula(paste0('abs.residual ~ ', regressor)))
+    m2 <- lm(as.formula(paste0('abs.residual ~ ', regressor)), model = FALSE)
     std <- predict(m2) * sqrt(pi/2)
     std[std < 1e-6] <- 0.1
     std[std < (abs(bias) / 2)] <- abs(bias[std < (abs(bias) / 2)]) 
@@ -192,11 +193,15 @@ estimate.bias.sd.original <- function(mcmc, iso.unbiased=NULL, covariates=c('sou
         warning("Covariate source not found. iso.unbiased is not used.\n")
       }
     }
+    # to save space
+    attr(m1$terms, ".Environment") <- NULL
+    attr(m2$terms, ".Environment") <- NULL
+    m1$fitted.values <- NULL
+    m2$fitted.values <- NULL
+    
     mcmc$meta$bias_model[[country]] <- m1
     mcmc$meta$std_model[[country]] <- m2
   }
-  
-  
   return(mcmc)
 }
 

From af06d4b4aaad1b3ff1b72c5caace5f9d604f7003 Mon Sep 17 00:00:00 2001
From: Hana Sevcikova <hanas@uw.edu>
Date: Wed, 14 Apr 2021 16:31:47 -0700
Subject: [PATCH 11/48] option of storing extra country to different directory;
 removed burnin and thin from tfr.trajectories.plot

---
 R/get_outputs.R    | 13 ++++++++-----
 R/plot_functions.R | 10 +++-------
 R/predict_tfr.R    | 14 +++++++++-----
 3 files changed, 20 insertions(+), 17 deletions(-)

diff --git a/R/get_outputs.R b/R/get_outputs.R
index 2db57e92..1a1c7606 100644
--- a/R/get_outputs.R
+++ b/R/get_outputs.R
@@ -86,7 +86,8 @@ get.thinned.tfr.mcmc <- function(mcmc.set, thin=1, burnin=0) {
 	return(NULL)
 }
 	
-create.thinned.tfr.mcmc <- function(mcmc.set, thin=1, burnin=0, output.dir=NULL, verbose=TRUE, uncertainty=FALSE) {
+create.thinned.tfr.mcmc <- function(mcmc.set, thin=1, burnin=0, output.dir=NULL, verbose=TRUE, uncertainty=FALSE,
+                                    update.with.countries = NULL) {
 	#Return a thinned mcmc.set object with burnin removed and all chanins collapsed into one
   mcthin <- max(sapply(mcmc.set$mcmc.list, function(x) x$thin))
 	thin <- max(c(thin, mcthin))
@@ -139,7 +140,8 @@ create.thinned.tfr.mcmc <- function(mcmc.set, thin=1, burnin=0, output.dir=NULL,
 	if(verbose) cat('done.\nStoring country-specific parameters ...')
 	par.names.cs <- tfr.parameter.names.cs(trans=FALSE)
 	if (uncertainty) par.names.cs <- c(par.names.cs, 'tfr')
-	for (country in mcmc.set$meta$id_DL){
+	DLcntries <- if(!is.null(update.with.countries)) update.with.countries[update.with.countries %in% mcmc.set$meta$id_DL] else mcmc.set$meta$id_DL
+	for (country in DLcntries){
 		country.obj <- get.country.object(country, mcmc.set$meta, index=TRUE)
 		if (country.obj$index %in% mcmc.set$meta$extra)
 		{
@@ -167,10 +169,11 @@ create.thinned.tfr.mcmc <- function(mcmc.set, thin=1, burnin=0, output.dir=NULL,
 										compression.type=thinned.mcmc$compression.type)
 		}
 	}
-	
 	if (mcmc.set$meta$nr_countries > mcmc.set$meta$nr_countries_estimation) {
-		.update.thinned.extras(mcmc.set, (mcmc.set$meta$nr_countries_estimation+1):mcmc.set$meta$nr_countries,
-								burnin=burnin, nr.points=nr.points, dir=outdir.thin.mcmc, verbose=verbose)
+	    cindex <- if(!is.null(update.with.countries)) update.with.countries[update.with.countries > mcmc.set$meta$nr_countries_estimation] else 
+	        (mcmc.set$meta$nr_countries_estimation+1):mcmc.set$meta$nr_countries
+	    if(length(cindex) > 0)
+		    .update.thinned.extras(mcmc.set, cindex, burnin=burnin, nr.points=nr.points, dir=outdir.thin.mcmc, verbose=verbose)
 	}
 	invisible(structure(list(meta=meta, mcmc.list=list(thinned.mcmc)), class='bayesTFR.mcmc.set'))
 }
diff --git a/R/plot_functions.R b/R/plot_functions.R
index f4cff6c5..b40c24c7 100644
--- a/R/plot_functions.R
+++ b/R/plot_functions.R
@@ -516,7 +516,7 @@ tfr.trajectories.plot <- function(tfr.pred, country, pi=c(80, 95),
                                   xlim=NULL, ylim=NULL, type='b', 
                                   xlab='Year', ylab='TFR', main=NULL, lwd=c(2,2,2,2,2,1), 
                                   col=c('black', 'green', 'red', 'red', 'blue', '#00000020'),
-                                  show.legend=TRUE, add=FALSE, uncertainty=FALSE, thin=NULL, burnin=NULL, col_unc="purple", ...
+                                  show.legend=TRUE, add=FALSE, uncertainty=FALSE, col_unc="purple", ...
 ) {
   # lwd/col is a vector of 6 line widths/colors for: 
   #	1. observed data, 2. imputed missing data, 3. median, 4. quantiles, 5. +- 0.5 child, 6. trajectories
@@ -529,12 +529,8 @@ tfr.trajectories.plot <- function(tfr.pred, country, pi=c(80, 95),
   }
   if (uncertainty)
   {
-    sim.dir <- file.path(tfr.pred$output.directory, "..")
-    if (missing(thin) || is.null(thin)) thin <- tfr.pred$thin
-    if (missing(burnin) || is.null(burnin)) burnin <- tfr.pred$burnin
-    tfr.object <- get.tfr.estimation(mcmc.list=NULL, country.code=country, ISO.code=NULL, sim.dir=sim.dir, 
-                                     burnin=burnin, thin=thin, probs=sort(c((1-pi/100)/2, 0.5, pi/100 + (1-pi/100)/2)))
-    
+    tfr.object <- get.tfr.estimation(mcmc.list=tfr.pred$mcmc.set, country.code=country, ISO.code=NULL, 
+                                     probs=sort(c((1-pi/100)/2, 0.5, pi/100 + (1-pi/100)/2)))
   }
   col <- .match.colors.with.default(col, c('black', 'green', 'red', 'red', 'blue', '#00000020'))
   country.obj <- get.country.object(country, tfr.pred$mcmc.set$meta)
diff --git a/R/predict_tfr.R b/R/predict_tfr.R
index e19ded9a..d9a0cac9 100644
--- a/R/predict_tfr.R
+++ b/R/predict_tfr.R
@@ -88,7 +88,8 @@ get.burnin.nrtraj.from.diagnostics <- function(sim.dir, ...) {
 }
 
 tfr.predict.extra <- function(sim.dir=file.path(getwd(), 'bayesTFR.output'), 
-					prediction.dir=sim.dir, countries = NULL, save.as.ascii=0, verbose=TRUE, uncertainty=FALSE) {
+					prediction.dir=sim.dir, countries = NULL, save.as.ascii=0, verbose=TRUE, uncertainty=FALSE,
+					all.countries.required = TRUE) {
 	# Run prediction for given countries/regions (as codes). If they are not given it will be set to countries 
 	# for which there are MCMC results but no prediction.
 	# It is to be used after running run.tfr.mcmc.extra
@@ -121,7 +122,8 @@ tfr.predict.extra <- function(sim.dir=file.path(getwd(), 'bayesTFR.output'),
 									use.correlation=pred$use.correlation, mu=pred$mu, rho=pred$rho, sigmaAR1=pred$sigmaAR1, 
 									min.tfr=pred$min.tfr, countries=countries.idx, save.as.ascii=0, output.dir=prediction.dir,
 									force.creating.thinned.mcmc=TRUE,
-									write.summary.files=FALSE, write.trajectories=TRUE, verbose=verbose, uncertainty=uncertainty)
+									write.summary.files=FALSE, write.trajectories=TRUE, verbose=verbose, 
+									uncertainty=uncertainty, all.countries.required = all.countries.required)
 									
 	# merge the two predictions
 	code.other.countries <- setdiff(pred$mcmc.set$meta$regions$country_code, 
@@ -152,7 +154,8 @@ tfr.predict.extra <- function(sim.dir=file.path(getwd(), 'bayesTFR.output'),
 	if (uncertainty) countries.save <- countries
 	do.convert.trajectories(pred=bayesTFR.prediction, n=save.as.ascii, output.dir=pred$output.dir, countries = countries.save,
 							verbose=verbose)
-	tfr.write.projection.summary.and.parameters(pred=bayesTFR.prediction, output.dir=pred$output.dir)
+	if(all.countries.required)
+	    tfr.write.projection.summary.and.parameters(pred=bayesTFR.prediction, output.dir=pred$output.dir)
 	
 	cat('\nPrediction stored into', pred$output.dir, '\n')
 	invisible(bayesTFR.prediction)
@@ -168,7 +171,7 @@ make.tfr.prediction <- function(mcmc.set, start.year=NULL, end.year=2100, replac
 							    save.as.ascii=0, output.dir = NULL, write.summary.files=TRUE, 
 							    is.mcmc.set.thinned=FALSE, force.creating.thinned.mcmc=FALSE,
 							    write.trajectories=TRUE, 
-							    verbose=verbose, uncertainty=FALSE){
+							    verbose=verbose, uncertainty=FALSE, all.countries.required = TRUE){
 	# if 'countries' is given, it is an index
 	# sigmaAR1 can be a vector. The last element will be repeated up to nr.projections
   meta <- mcmc.set$meta
@@ -238,7 +241,8 @@ make.tfr.prediction <- function(mcmc.set, start.year=NULL, end.year=2100, replac
 	unblock.gtk('bDem.TFRpred')
 	load.mcmc.set <- if(has.thinned.mcmc && !force.creating.thinned.mcmc) thinned.mcmc
 					 else create.thinned.tfr.mcmc(mcmc.set, thin=thin, burnin=burnin, 
-					 							output.dir=output.dir, verbose=verbose, uncertainty=uncertainty)
+					 							output.dir=output.dir, verbose=verbose, uncertainty=uncertainty,
+					 							update.with.countries = if(all.countries.required) NULL else countries)
 	nr_simu <- load.mcmc.set$mcmc.list[[1]]$finished.iter
 
 	if (verbose) cat('Load variance parameters.\n')

From 28a7bab88b4f57406574a04b9e74faebb54dc0ef Mon Sep 17 00:00:00 2001
From: Hana Sevcikova <hanas@uw.edu>
Date: Thu, 15 Apr 2021 10:25:46 -0700
Subject: [PATCH 12/48] added option of averaging gamma cov for extra countries

---
 R/mcmc_ini.R | 5 +++--
 R/run_mcmc.R | 4 ++--
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/R/mcmc_ini.R b/R/mcmc_ini.R
index fed1e57b..af867c7e 100755
--- a/R/mcmc_ini.R
+++ b/R/mcmc_ini.R
@@ -504,7 +504,8 @@ mcmc.ini <- function(chain.id, mcmc.meta, iter=100,
 
 mcmc.meta.ini.extra <- function(mcmc.set, countries=NULL, my.tfr.file = NULL, 
 									my.locations.file=NULL, burnin = 200, verbose=FALSE, uncertainty=FALSE, 
-									my.tfr.raw.file=ifelse(uncertainty, file.path(find.package("bayesTFR"), "data", "rawTFR.csv"), NULL)) {
+									my.tfr.raw.file=ifelse(uncertainty, file.path(find.package("bayesTFR"), "data", "rawTFR.csv"), NULL),
+									average.gammas.cov = TRUE) {
 	update.regions <- function(reg, ereg, id.replace, is.new, is.old) {
 		nreg <- list()
 		for (name in c('code', 'area_code', 'country_code')) {
@@ -536,7 +537,7 @@ mcmc.meta.ini.extra <- function(mcmc.set, countries=NULL, my.tfr.file = NULL,
 		has.mock.suppl <- TRUE
 	}
 	Emeta <- do.meta.ini(meta, tfr.with.regions=tfr.with.regions, 
-								use.average.gammas.cov=TRUE, burnin=burnin,
+								use.average.gammas.cov=average.gammas.cov, burnin=burnin,
 						 		verbose=verbose, uncertainty=uncertainty, my.tfr.raw.file = my.tfr.raw.file)
 			 		
 	# join the new meta with the existing one
diff --git a/R/run_mcmc.R b/R/run_mcmc.R
index ba69fac9..6db57a08 100755
--- a/R/run_mcmc.R
+++ b/R/run_mcmc.R
@@ -368,7 +368,7 @@ run.tfr.mcmc.extra <- function(sim.dir=file.path(getwd(), 'bayesTFR.output'),
 								my.locations.file = NULL,
 								uncertainty=FALSE, my.tfr.raw.file=NULL, iso.unbiased=NULL, 
 								covariates=c('source', 'method'), cont_covariates=NULL, source.col.name="source",
-								verbose=FALSE, verbose.iter=100, ...) {
+								average.gammas.cov = TRUE, verbose=FALSE, verbose.iter=100, ...) {
   mcmc.set <- get.tfr.mcmc(sim.dir)
   meta.old <- mcmc.set$meta
   
@@ -380,7 +380,7 @@ run.tfr.mcmc.extra <- function(sim.dir=file.path(getwd(), 'bayesTFR.output'),
   
   Eini <- mcmc.meta.ini.extra(mcmc.set, countries=countries, my.tfr.file=my.tfr.file, my.locations.file=my.locations.file, 
 												burnin=burnin, verbose=verbose, uncertainty=uncertainty, 
-												my.tfr.raw.file=my.tfr.raw.file)
+												my.tfr.raw.file=my.tfr.raw.file, average.gammas.cov = average.gammas.cov)
 	if(length(Eini$index) <= 0) {
 		cat('\nNothing to be done.\n')
 		return(invisible(mcmc.set))

From 3a60f2d7790df086a3ce5ea3308fa04414d17be4 Mon Sep 17 00:00:00 2001
From: Hana Sevcikova <hanas@uw.edu>
Date: Thu, 15 Apr 2021 11:43:40 -0700
Subject: [PATCH 13/48] help files updated

---
 DESCRIPTION                 |  2 +-
 man/get.thinned.tfr.mcmc.Rd | 10 ++++++----
 man/plot-trajectories.Rd    |  4 +---
 man/run-mcmc.Rd             |  6 ++++--
 man/run.tfr.mcmc.extra.Rd   |  3 +++
 man/tfr.predict.extra.Rd    |  6 ++++--
 6 files changed, 19 insertions(+), 12 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index e37bf371..1f96e362 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,5 +1,5 @@
 Package: bayesTFR
-Version: 7.0-4.9002
+Version: 7.0-4.9003
 Date: 2021-04-09
 Title: Bayesian Fertility Projection
 Author: Hana Sevcikova (hanas@uw.edu), Leontine Alkema (alkema@nus.edu.sg), Peiran Liu (prliu@uw.edu), Adrian Raftery (raftery@uw.edu), Bailey Fosdick (bfosdick@uw.edu), Patrick Gerland (gerland@un.org)
diff --git a/man/get.thinned.tfr.mcmc.Rd b/man/get.thinned.tfr.mcmc.Rd
index 6e517145..4bc91bc2 100644
--- a/man/get.thinned.tfr.mcmc.Rd
+++ b/man/get.thinned.tfr.mcmc.Rd
@@ -8,14 +8,15 @@ Creating and Accessing Thinned MCMCs
 }
 \description{
 The function \code{get.thinned.tfr.mcmc} accesses 
-a thinned and burned version of the given Phase II MCMC set. \code{create.thinned.tfr.mcmc} creates such a set.
+a thinned and burned version of the given Phase II MCMC set. \code{create.thinned.tfr.mcmc} creates or updates such a set.
 }
 
 \usage{
 get.thinned.tfr.mcmc(mcmc.set, thin = 1, burnin = 0)
 
 create.thinned.tfr.mcmc(mcmc.set, thin = 1, burnin = 0, 
-    output.dir = NULL, verbose = TRUE, uncertainty = FALSE)
+    output.dir = NULL, verbose = TRUE, uncertainty = FALSE,
+    update.with.countries = NULL)
 }
 \arguments{
   \item{mcmc.set}{Object of class \code{\link{bayesTFR.mcmc.set}} of Phase II.}
@@ -23,13 +24,14 @@ create.thinned.tfr.mcmc(mcmc.set, thin = 1, burnin = 0,
   \item{output.dir}{Output directory. It is only used if the output goes to a non-standard directory.}
   \item{verbose}{Logical switching log messages on and off.}
   \item{uncertainty}{If users want to save the thinned estimated TFR in the new mcmc object, this parameter should be set \code{TRUE}.}
+  \item{update.with.countries}{If an existing set is to be updated, this should be a vector of country indices for the update.}
 }
 \details{
-The function \code{create.thinned.tfr.mcmc} is called from \code{\link{tfr.predict}} and thus, the resulting object contains exactly the same MCMCs used for generating projections. In addition, it can be also called from \code{\link{tfr.diagnose}} if desired, so that the projection process can re-use such a set that lead to a convergence. 
+The function \code{create.thinned.tfr.mcmc} is called from \code{\link{tfr.predict}} and thus, the resulting object contains exactly the same MCMCs used for generating projections. In addition, it can be also called from \code{\link{tfr.diagnose}} if desired, so that the projection process can re-use such a set that leads to a convergence. 
 
 The thinning is done as follows: The given \code{burnin} is removed from the beginning of each chain in the original MCMC set. Then each chain is thinned by \code{thin} using equal spacing and all chains are collapsed into one single chain per parameter. They are stored in the main simulation directory under the name \file{thinned_mcmc_\emph{t}_\emph{b}} where \emph{t} is the value  of \code{thin} and \emph{b} the value of \code{burnin}.   
 
-If \code{uncertainty=TRUE}, the estimated TFR is thinned and saved either.
+If \code{uncertainty=TRUE}, the estimated TFR is thinned and saved as well.
 }
 \value{
 Both functions return an object of class \code{\link{bayesTFR.mcmc.set}}. \code{get.thinned.tfr.mcmc} returns \code{NULL} if such object does not exist.
diff --git a/man/plot-trajectories.Rd b/man/plot-trajectories.Rd
index 8f24acd0..349e5d4d 100644
--- a/man/plot-trajectories.Rd
+++ b/man/plot-trajectories.Rd
@@ -18,7 +18,7 @@ tfr.trajectories.plot(tfr.pred, country, pi = c(80, 95),
     main = NULL, lwd = c(2, 2, 2, 2, 2, 1), 
     col=c('black', 'green', 'red', 'red', 'blue', '#00000020'),
     show.legend = TRUE, add = FALSE, uncertainty = FALSE, 
-    thin = NULL, burnin = NULL, col_unc = "purple", \dots)
+    col_unc = "purple", \dots)
 	
 tfr.trajectories.plot.all(tfr.pred, 
     output.dir = file.path(getwd(), 'TFRtrajectories'),
@@ -47,8 +47,6 @@ tfr.trajectories.table(tfr.pred, country, pi = c(80, 95),
   \item{main}{Main title for the plot(s). In \code{tfr.trajectories.plot.all} any occurence of the string \dQuote{XXX} is replaced by the country name.}
   \item{verbose}{Logical switching log messages on and off.}
   \item{uncertainty}{Logical: \code{TRUE} means uncertainty of past TFR should be plotted with the same level of uncertainty interval.}
-  \item{thin}{Thinning for past TFR estimation.}
-  \item{burnin}{Burn-in for past TFR estimation.}
   \item{col_unc}{Color of past TFR estimation uncertainty plot.}
 }
 \details{
diff --git a/man/run-mcmc.Rd b/man/run-mcmc.Rd
index b7a89e34..224ea8d0 100644
--- a/man/run-mcmc.Rd
+++ b/man/run-mcmc.Rd
@@ -103,7 +103,7 @@ continue.tfr.mcmc(iter, chain.ids = NULL,
   \item{proposal_cov_gammas}{Proposal for the gamma covariance matrices for each country. It should be a list with two values: \code{values} and \code{country_codes}. The structure corresponds to the object returned by the function \code{\link{get.cov.gammas}}. By default the covariance matrices are obtained using \code{data(proposal_cov_gammas_cii)}. This argument overwrite the defaults for countries contained the argument.}
   \item{iso.unbiased}{Codes of countries for which the vital registration TFR estimates are considered unbiased. Only used if \code{uncertainty = TRUE}. See details below.}
   \item{covariates, cont_covariates}{Categorical and continuous features used in estimating bias and standard deviation if \code{uncertainty = TRUE}. See details below.}
-  \item{source.col.name}{Specify the name for source column used for iso.unbiased variable. }
+  \item{source.col.name}{If \code{uncertainty} is \code{TRUE} this is a column name within the given covariates that determines the data source. It is used if \code{iso.unbiased} is given to identify the vital registration records.}
   \item{seed}{Seed of the random number generator. If \code{NULL} no seed is set. It can be used to generate reproducible results.}
   \item{parallel}{Logical determining if the simulation should run multiple chains in parallel. If it is \code{TRUE}, the package \pkg{\link[snowFT]{snowFT}} is required. In such a case further arguments can be passed, see description of \dots.}
   \item{nr.nodes}{Relevant only if \code{parallel} is \code{TRUE}. It gives the number of nodes for running the simulation in parallel. By default it equals to the number of chains.}
@@ -131,10 +131,12 @@ Optionally, \code{my.tfr.file} can contain a column called \code{last.observed}
 
 The package contains a dataset called \file{my_tfr_template} (in \file{extdata} directory) which is a template for user-specified \code{my.tfr.file}.
 
-The parameter \code{uncertainty} is set to control whether past TFR is considered to be precise (\code{FALSE}), or need to be estimated from the raw data (\code{TRUE}). In the latter case, the raw TFR observations are taken either from the \code{\link{rawTFR}} dataset (default) or from a file given by the \code{my.tfr.raw.file} argument.  The Bayesian hierarchical model considers the past TFR as unknown, estimates it and stores in \code{output.dir}. Details can be found in Liu and Raftery (2020). The \code{covariates}, \code{cont_covariates} arguments are for listing categorical and continuous features for estimating bias and standard deviation of past TFR observations. If a country is known to have unbiased vital registration (VR) records, one can include it in the  \code{iso.unbiased} argument as those countries will estimate their past VR records to have 0 bias and 0.0161 standard deviation.
+The parameter \code{uncertainty} is set to control whether past TFR is considered to be precise (\code{FALSE}), or need to be estimated from the raw data (\code{TRUE}). In the latter case, the raw TFR observations are taken either from the \code{\link{rawTFR}} dataset (default) or from a file given by the \code{my.tfr.raw.file} argument.  The Bayesian hierarchical model considers the past TFR as unknown, estimates it and stores in \code{output.dir}. Details can be found in Liu and Raftery (2020). The \code{covariates}, \code{cont_covariates} arguments are for listing categorical and continuous features for estimating bias and standard deviation of past TFR observations. If a country is known to have unbiased vital registration (VR) records, one can include it in the  \code{iso.unbiased} argument as those countries will estimate their past VR records to have 0 bias and 0.0161 standard deviation. The VR records are identified as having \dQuote{VR} in the column given by \code{source.col.name} (\dQuote{source} by default).
 
 If \code{annual=TRUE}, which implies using annual data for training the model, the parameter \code{ar.phase2} will be activated. If \code{ar.phase2} is set to \code{TRUE}, then the model of Phase II will change from \eqn{d_{c,t} = g_{c,t} + \epsilon_{c,t}} to \eqn{d_{c,t}-g_{c,t} = \phi(d_{c,t-1}-g_{c,t-1}) + \epsilon_{c,t}}. \eqn{\phi} is considered as country-independent and is called \code{rho_phase2}.
 
+Furthermore, if \code{annual} is \code{TRUE} and \code{my.tfr.file} is given, the data in the file must be on annual basis and no matching with the WPP dataset takes place. 
+
 
 }
 \value{
diff --git a/man/run.tfr.mcmc.extra.Rd b/man/run.tfr.mcmc.extra.Rd
index fb4e2f98..7974364a 100644
--- a/man/run.tfr.mcmc.extra.Rd
+++ b/man/run.tfr.mcmc.extra.Rd
@@ -14,6 +14,7 @@ run.tfr.mcmc.extra(sim.dir = file.path(getwd(), "bayesTFR.output"),
     parallel = FALSE, nr.nodes = NULL,  my.locations.file = NULL,
     uncertainty = FALSE, my.tfr.raw.file = NULL, iso.unbiased = NULL, 
     covariates = c('source', 'method'), cont_covariates = NULL, 
+    source.col.name = "source", average.gammas.cov = TRUE, 
     verbose = FALSE, verbose.iter = 100, \dots)
 }
 \arguments{
@@ -45,6 +46,8 @@ Relevant only if \code{parallel} is \code{TRUE}. It gives the number of nodes fo
 \item{my.tfr.raw.file}{File name of the raw TFR used when uncertainty is \code{TRUE}. See details in \code{\link{run.tfr.mcmc}}.}
 \item{iso.unbiased}{Codes of countries for which the vital registration TFR estimates are considered unbiased. See details in \code{\link{run.tfr.mcmc}}.}
 \item{covariates, cont_covariates}{Categorical and continuous features used in estimating bias and standard deviation if \code{uncertainty} is \code{TRUE}. See details in \code{\link{run.tfr.mcmc}}.}
+\item{source.col.name}{If \code{uncertainty} is \code{TRUE} this is a column name within the given covariates that determines the data source. It is used if \code{iso.unbiased} is given to identify the vital registration records.}
+\item{average.gammas.cov}{Set this to \code{FALSE} if the processed country has been included in the main simulation. In such a case the proposal gamma covariance matrix is taken from the \code{proposal_cov_gammas_cii} dataset. By default, the matrix is taken as an average from all countries.}
 \item{verbose}{Logical switching log messages on and off.}
 \item{verbose.iter}{Integer determining how often (in number of iterations) log messages are outputted during the estimation.}
 \item{\dots}{Additional parameters to be passed to the function \code{\link[snowFT]{performParallel}}, if \code{parallel} is \code{TRUE}.}
diff --git a/man/tfr.predict.extra.Rd b/man/tfr.predict.extra.Rd
index 2eb0a87f..3cf0fecc 100644
--- a/man/tfr.predict.extra.Rd
+++ b/man/tfr.predict.extra.Rd
@@ -11,7 +11,8 @@ Using the posterior parameter samples the function  generates posterior trajecto
 \usage{
 tfr.predict.extra(sim.dir = file.path(getwd(), 'bayesTFR.output'), 
     prediction.dir = sim.dir, countries = NULL, 
-    save.as.ascii = 0, verbose = TRUE, uncertainty=FALSE)
+    save.as.ascii = 0, verbose = TRUE, uncertainty=FALSE,
+    all.countries.required = TRUE)
 	}
 \arguments{
   \item{sim.dir}{Directory with the MCMC simulation results.}
@@ -19,7 +20,8 @@ tfr.predict.extra(sim.dir = file.path(getwd(), 'bayesTFR.output'),
   \item{countries}{Vector of country codes for which the prediction should be made. If it is \code{NULL}, the prediction is run for all countries that are included in the MCMC object but for which no prediction was generated.}
   \item{save.as.ascii}{Either a number determining how many trajectories should be converted into an ascii file, or \dQuote{all} in which case all trajectories are converted. It should be set to 0, if no converions is desired. Note that the convertion is done on all countries.}
   \item{verbose}{Logical switching log messages on and off.}
-  \item{uncertainty}{Logical. If the MCMC steps has considered uncertainty of past TFR and \code{uncertainty=TRUE}, starting point of prediction trajectories will be the last estimated trajectories of TFR. Otherwise, it will use last observed TFR as starting point of prediction.}
+  \item{uncertainty}{Logical. If the MCMC steps considered uncertainty of past TFR and \code{uncertainty=TRUE}, starting point of prediction trajectories will be the last estimated trajectories of TFR. Otherwise, it will use the last observed TFR as starting point of prediction.}
+  \item{all.countries.required}{If \code{FALSE} it is not required that MCMCs of all countries are present.}
   }
 \details{
 In order to use this function, a prediction object must exist, i.e. the function \code{\link{tfr.predict}} must have been processed prior to using this function.

From 02015295f48038a379667c022006eb62bb28e236 Mon Sep 17 00:00:00 2001
From: Peiran Liu <prliu@uw.edu>
Date: Sun, 18 Apr 2021 17:28:42 -0400
Subject: [PATCH 14/48] update for iso.unbiased

---
 R/updated_functions.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/updated_functions.R b/R/updated_functions.R
index 6394293f..d02ab5fb 100644
--- a/R/updated_functions.R
+++ b/R/updated_functions.R
@@ -184,7 +184,7 @@ estimate.bias.sd.original <- function(mcmc, iso.unbiased=NULL, covariates=c('sou
       if (source.col.name %in% covariates)
       {
         index.by.country.vr.estimate <- which((mcmc$meta$raw_data.original$country_code == ISO.code$code) & 
-                                                (mcmc$meta$raw_data.original[, source.col.name] %in% c("VR", 'Estimate')))
+                                                (mcmc$meta$raw_data.original[, source.col.name] %in% c("VR")))
         mcmc$meta$raw_data.original$bias[index.by.country.vr.estimate] <- 0
         mcmc$meta$raw_data.original$std[index.by.country.vr.estimate] <- 0.016
       }

From 082df8efcb924f7202eba2da202cfb03b44fe5cd Mon Sep 17 00:00:00 2001
From: Hana Sevcikova <hanas@uw.edu>
Date: Tue, 20 Apr 2021 13:20:07 -0700
Subject: [PATCH 15/48] fixed storing extra_iter

---
 R/run_mcmc.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/run_mcmc.R b/R/run_mcmc.R
index 6db57a08..9d271d5f 100755
--- a/R/run_mcmc.R
+++ b/R/run_mcmc.R
@@ -497,7 +497,7 @@ run.tfr.mcmc.extra <- function(sim.dir=file.path(getwd(), 'bayesTFR.output'),
 	    if (!is.null(country.idx)) 
 	    {
 	      meta[['extra']] <- c(meta[['extra']], country.idx)
-	      meta[['extra_iter']][country.idx] <- mcmc.set$mcmc.list[[1]]$iter
+	      meta[['extra_iter']][country.idx] <- if(is.null(iter)) mcmc.set$mcmc.list[[1]]$length else iter
 	      meta[['extra_thin']][country.idx] <- thin.extra
 	      meta[['extra_covariates']][[country.idx]] <- covariates
 	      meta[['extra_cont_covariates']][[country.idx]] <- cont_covariates

From f85b45189370c918b002664e7a64be50550b1ed7 Mon Sep 17 00:00:00 2001
From: Peiran Liu <prliu@uw.edu>
Date: Tue, 6 Apr 2021 15:45:39 -0400
Subject: [PATCH 16/48] fix bug

---
 R/mcmc_ini.R | 3 +--
 R/wpp_data.R | 4 ++--
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/R/mcmc_ini.R b/R/mcmc_ini.R
index 5452b21b..358cafbd 100755
--- a/R/mcmc_ini.R
+++ b/R/mcmc_ini.R
@@ -483,8 +483,7 @@ mcmc.ini <- function(chain.id, mcmc.meta, iter=100,
                         dontsave=dontsave.pars, uncertainty=uncertainty
                         ),
                    class='bayesTFR.mcmc')
-                   
-	##################################################################
+  ##################################################################
 	# distortions
 	##################################################################
 	# note: the eps will always be NA outside (tau_c, lambda-1)!!
diff --git a/R/wpp_data.R b/R/wpp_data.R
index a6ee8d5f..ad5e580c 100644
--- a/R/wpp_data.R
+++ b/R/wpp_data.R
@@ -208,12 +208,12 @@ get.observed.time.matrix.and.regions <- function(data, loc_data, start.year=1950
 	    ncol.tfr <- length(num.columns)
 	    cols.starty <- as.integer(substr(names.tfr.data[num.columns], 1,4))
 	    cols.endy <- as.integer(substr(names.tfr.data[num.columns], 6,9))
-	    years.to.interp.to <- (cols.starty[1]+3):cols.endy[length(cols.endy)]
+	    years.to.interp.to <- (cols.starty[1]):cols.endy[length(cols.endy)]
 	    years.to.interp.from <- seq(cols.starty[1]+3, cols.endy[length(cols.endy)]-2, by = 5)
 	    tfr_data_new <- matrix(NA, nrow = nrow(tfr_data), ncol = length(years.to.interp.to))
 	    for(row in 1:nrow(tfr_data_new)) 
 	        tfr_data_new[row, ] <- approx(years.to.interp.from, tfr_data[row, num.columns], 
-	                                      xout = years.to.interp.to)$y
+	                                      xout = years.to.interp.to, rule=2)$y
 	    tfr_data_new <- cbind(tfr_data[, -num.columns], tfr_data_new) 
 	    colnames(tfr_data_new) <- c(colnames(tfr_data)[-num.columns], years.to.interp.to)
 	  tfr_data <- tfr_data_new

From 1d28f9e00a72528efb29292941120958b676bbed Mon Sep 17 00:00:00 2001
From: Peiran Liu <prliu@uw.edu>
Date: Tue, 6 Apr 2021 17:31:35 -0400
Subject: [PATCH 17/48] fix bug for continuous variable

---
 R/run_mcmc.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/run_mcmc.R b/R/run_mcmc.R
index e0468c6e..8ff5c4bc 100755
--- a/R/run_mcmc.R
+++ b/R/run_mcmc.R
@@ -262,7 +262,7 @@ mcmc.run.chain <- function(chain.id, meta, thin=1, iter=100, starting.values=NUL
 	                 gamma.ini=gamma.ini[chain.id],
 	                 save.all.parameters=save.all.parameters,
 	                 verbose=verbose, uncertainty=uncertainty, iso.unbiased=iso.unbiased, 
-	                 covariates=covariates, cont_covariates=NULL)
+	                 covariates=covariates, cont_covariates=cont_covariates)
 	if (uncertainty)
 	{
 	  this.sv <- list()

From 6842cdc8fdfeb3b564213a8e2adf718189ef9d7a Mon Sep 17 00:00:00 2001
From: Peiran Liu <prliu@uw.edu>
Date: Tue, 6 Apr 2021 20:29:18 -0400
Subject: [PATCH 18/48] allow using source.col.name to specify the source
 column for iso.unbiased

---
 R/mcmc_ini.R          |  4 ++--
 R/run_mcmc.R          | 16 +++++++++-------
 R/updated_functions.R |  6 +++---
 3 files changed, 14 insertions(+), 12 deletions(-)

diff --git a/R/mcmc_ini.R b/R/mcmc_ini.R
index 358cafbd..d4ef9b27 100755
--- a/R/mcmc_ini.R
+++ b/R/mcmc_ini.R
@@ -402,7 +402,7 @@ mcmc.ini <- function(chain.id, mcmc.meta, iter=100,
 					 d.ini=0.17,
 					 save.all.parameters=FALSE,
 					 verbose=FALSE, uncertainty=FALSE, iso.unbiased=NULL,
-					 covariates=c('source', 'method'), cont_covariates=NULL) {
+					 covariates=c('source', 'method'), cont_covariates=NULL, source.col.name = "source") {
 				 		 	
 	nr_countries <- mcmc.meta$nr_countries
 
@@ -495,7 +495,7 @@ mcmc.ini <- function(chain.id, mcmc.meta, iter=100,
 	  # mcmc <- get.obs.estimate.diff(mcmc)
 	  # mcmc <- estimate.bias.sd.raw(mcmc)
 	  mcmc <- get.obs.estimate.diff.original(mcmc)
-	  mcmc <- estimate.bias.sd.original(mcmc, iso.unbiased, covariates, cont_covariates)
+	  mcmc <- estimate.bias.sd.original(mcmc, iso.unbiased, covariates, cont_covariates, source.col.name)
 	}
 	
 	# mcmc <- as.list(mcmc)
diff --git a/R/run_mcmc.R b/R/run_mcmc.R
index 8ff5c4bc..fb765260 100755
--- a/R/run_mcmc.R
+++ b/R/run_mcmc.R
@@ -26,6 +26,7 @@ run.tfr.mcmc <- function(nr.chains=3, iter=62000, output.dir=file.path(getwd(),
 						phase3.starting.values=NULL,
 					 	proposal_cov_gammas = NULL, # should be a list with elements 'values' and 'country_codes'
 						iso.unbiased = NULL, covariates = c('source', 'method'), cont_covariates = NULL, 
+						source.col.name="source",
 					 	seed = NULL, parallel=FALSE, nr.nodes=nr.chains, 
 					 	save.all.parameters = FALSE, compression.type='None',
 					 	auto.conf = list(max.loops=5, iter=62000, iter.incr=10000, nr.chains=3, thin=80, burnin=2000),
@@ -196,10 +197,10 @@ run.tfr.mcmc <- function(nr.chains=3, iter=62000, output.dir=file.path(getwd(),
 		chain.set <- bDem.performParallel(nr.nodes, 1:nr.chains, mcmc.run.chain, 
 						initfun=init.nodes, seed = seed, meta=bayesTFR.mcmc.meta, 
 						thin=thin, starting.values=starting.values, iter=iter, S.ini=S.ini, a.ini=a.ini,
-                        b.ini=b.ini, sigma0.ini=sigma0.ini, Triangle_c4.ini=Triangle_c4.ini, const.ini=const.ini,
-                        gamma.ini=gamma.ini, save.all.parameters=save.all.parameters, verbose=verbose, 
-                        verbose.iter=verbose.iter, uncertainty=uncertainty, iso.unbiased=iso.unbiased, 
-						            covariates=covariates, cont_covariates=cont_covariates, ...)
+						b.ini=b.ini, sigma0.ini=sigma0.ini, Triangle_c4.ini=Triangle_c4.ini, const.ini=const.ini,
+						gamma.ini=gamma.ini, save.all.parameters=save.all.parameters, verbose=verbose, 
+						verbose.iter=verbose.iter, uncertainty=uncertainty, iso.unbiased=iso.unbiased, 
+						covariates=covariates, cont_covariates=cont_covariates, source.col.name=source.col.name, ...)
 	} else { # run chains sequentially
 		chain.set <- list()
 		for (chain in 1:nr.chains) {
@@ -208,7 +209,7 @@ run.tfr.mcmc <- function(nr.chains=3, iter=62000, output.dir=file.path(getwd(),
 					 	sigma0.ini=sigma0.ini, Triangle_c4.ini=Triangle_c4.ini, const.ini=const.ini, 
 					 	gamma.ini=gamma.ini, save.all.parameters=save.all.parameters,
 					 	verbose=verbose, verbose.iter=verbose.iter, uncertainty=uncertainty, iso.unbiased=iso.unbiased, 
-					 	covariates=covariates, cont_covariates=cont_covariates)
+					 	covariates=covariates, cont_covariates=cont_covariates, source.col.name=source.col.name)
 		}
 	}
 	names(chain.set) <- 1:nr.chains
@@ -240,7 +241,7 @@ mcmc.run.chain <- function(chain.id, meta, thin=1, iter=100, starting.values=NUL
 							S.ini, a.ini, b.ini, sigma0.ini, Triangle_c4.ini, const.ini, gamma.ini=1,
 							save.all.parameters=FALSE,
 							verbose=FALSE, verbose.iter=10, uncertainty=FALSE, iso.unbiased=NULL, 
-							covariates=c('source', 'method'), cont_covariates=NULL) {
+							covariates=c('source', 'method'), cont_covariates=NULL, source.col.name="source") {
 								
 	cat('\n\nChain nr.', chain.id, '\n')
     if (verbose) {
@@ -262,7 +263,8 @@ mcmc.run.chain <- function(chain.id, meta, thin=1, iter=100, starting.values=NUL
 	                 gamma.ini=gamma.ini[chain.id],
 	                 save.all.parameters=save.all.parameters,
 	                 verbose=verbose, uncertainty=uncertainty, iso.unbiased=iso.unbiased, 
-	                 covariates=covariates, cont_covariates=cont_covariates)
+	                 covariates=covariates, cont_covariates=cont_covariates,
+                   source.col.name=source.col.name)
 	if (uncertainty)
 	{
 	  this.sv <- list()
diff --git a/R/updated_functions.R b/R/updated_functions.R
index d29ed303..a86ebd5d 100644
--- a/R/updated_functions.R
+++ b/R/updated_functions.R
@@ -128,7 +128,7 @@ estimate.bias.sd.raw <- function(mcmc)
 }
 
 estimate.bias.sd.original <- function(mcmc, iso.unbiased=NULL, covariates=c('source', 'method'), 
-                                      cont_covariates=NULL)
+                                      cont_covariates=NULL, source.col.name="source")
 {
   mcmc$meta$raw_data.original$bias <- NA
   mcmc$meta$raw_data.original$std <- NA
@@ -180,10 +180,10 @@ estimate.bias.sd.original <- function(mcmc, iso.unbiased=NULL, covariates=c('sou
     ## Optional
     if (ISO.code$code %in% iso.unbiased)
     {
-      if ("source" %in% covariates)
+      if (source.col.name %in% covariates)
       {
         index.by.country.vr.estimate <- which((mcmc$meta$raw_data.original$country_code == ISO.code$code) & 
-                                                (mcmc$meta$raw_data.original[, "source"] %in% c("VR", 'Estimate')))
+                                                (mcmc$meta$raw_data.original[, source.col.name] %in% c("VR", 'Estimate')))
         mcmc$meta$raw_data.original$bias[index.by.country.vr.estimate] <- 0
         mcmc$meta$raw_data.original$std[index.by.country.vr.estimate] <- 0.016
       }

From 2803952a813915ecdeacb35e1872145999f1c73c Mon Sep 17 00:00:00 2001
From: Peiran Liu <prliu@uw.edu>
Date: Tue, 6 Apr 2021 21:02:55 -0400
Subject: [PATCH 19/48] update documents

---
 man/run-mcmc.Rd | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/man/run-mcmc.Rd b/man/run-mcmc.Rd
index 971dd115..b7a89e34 100644
--- a/man/run-mcmc.Rd
+++ b/man/run-mcmc.Rd
@@ -36,7 +36,7 @@ run.tfr.mcmc(nr.chains = 3, iter = 62000,
     Triangle_c4.ini = NULL, const.ini = NULL, gamma.ini = 1, 
     phase3.starting.values = NULL, proposal_cov_gammas = NULL, 
     iso.unbiased = NULL, covariates = c("source", "method"), cont_covariates = NULL, 
-    seed = NULL, parallel = FALSE, nr.nodes = nr.chains, 
+    source.col.name="source", seed = NULL, parallel = FALSE, nr.nodes = nr.chains, 
     save.all.parameters = FALSE, compression.type = 'None',
     auto.conf = list(max.loops = 5, iter = 62000, iter.incr = 10000, 
         nr.chains = 3, thin = 80, burnin = 2000),
@@ -103,6 +103,7 @@ continue.tfr.mcmc(iter, chain.ids = NULL,
   \item{proposal_cov_gammas}{Proposal for the gamma covariance matrices for each country. It should be a list with two values: \code{values} and \code{country_codes}. The structure corresponds to the object returned by the function \code{\link{get.cov.gammas}}. By default the covariance matrices are obtained using \code{data(proposal_cov_gammas_cii)}. This argument overwrite the defaults for countries contained the argument.}
   \item{iso.unbiased}{Codes of countries for which the vital registration TFR estimates are considered unbiased. Only used if \code{uncertainty = TRUE}. See details below.}
   \item{covariates, cont_covariates}{Categorical and continuous features used in estimating bias and standard deviation if \code{uncertainty = TRUE}. See details below.}
+  \item{source.col.name}{Specify the name for source column used for iso.unbiased variable. }
   \item{seed}{Seed of the random number generator. If \code{NULL} no seed is set. It can be used to generate reproducible results.}
   \item{parallel}{Logical determining if the simulation should run multiple chains in parallel. If it is \code{TRUE}, the package \pkg{\link[snowFT]{snowFT}} is required. In such a case further arguments can be passed, see description of \dots.}
   \item{nr.nodes}{Relevant only if \code{parallel} is \code{TRUE}. It gives the number of nodes for running the simulation in parallel. By default it equals to the number of chains.}

From 030db386e181a3cb78ebfc69123589491e8b92a5 Mon Sep 17 00:00:00 2001
From: Hana Sevcikova <hanas@uw.edu>
Date: Thu, 8 Apr 2021 12:22:12 -0700
Subject: [PATCH 20/48] version changed

---
 DESCRIPTION | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 4cc625e8..a8f72f86 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: bayesTFR
-Version: 7.0-4
-Date: 2021-03-31
+Version: 7.0-4.9001
+Date: 2021-04-07
 Title: Bayesian Fertility Projection
 Author: Hana Sevcikova (hanas@uw.edu), Leontine Alkema (alkema@nus.edu.sg), Peiran Liu (prliu@uw.edu), Adrian Raftery (raftery@uw.edu), Bailey Fosdick (bfosdick@uw.edu), Patrick Gerland (gerland@un.org)
 Maintainer: Hana Sevcikova <hanas@uw.edu>

From c8fc43d3621295e8b966a68878347f09b6b5c3ab Mon Sep 17 00:00:00 2001
From: Hana Sevcikova <hanas@uw.edu>
Date: Fri, 9 Apr 2021 11:49:10 -0700
Subject: [PATCH 21/48] changes in plot function; ignore last.oberved if using
 uncertainty

---
 R/mcmc_ini.R       |  4 ++--
 R/plot_functions.R | 26 +++++++++++++++++++++-----
 R/wpp_data.R       |  9 +++++----
 3 files changed, 28 insertions(+), 11 deletions(-)

diff --git a/R/mcmc_ini.R b/R/mcmc_ini.R
index d4ef9b27..fed1e57b 100755
--- a/R/mcmc_ini.R
+++ b/R/mcmc_ini.R
@@ -131,7 +131,6 @@ find.tau.lambda.and.DLcountries <- function(tfr_matrix, min.TFRlevel.for.start.a
     T_end_c <- lambda_c <-rep(T_end, nr_countries)
     T.suppl <- if(is.null(suppl.data$regions)) 0 else dim(suppl.data$tfr_matrix)[1]
     tau_c <- start_c <- rep(NA, nr_countries)
-
     for (country in 1:nr_countries) {
         data <- get.observed.with.supplemental(country, tfr_matrix, suppl.data)
     	has.suppl <- length(data) > T_end && !is.na(suppl.data$index.from.all.countries[country])
@@ -218,7 +217,8 @@ mcmc.meta.ini <- function(...,
 	if(present.year-3 > wpp.year) warning("present.year is much larger then wpp.year. Make sure WPP data for present.year are available.")
 	tfr.with.regions <- set_wpp_regions(start.year=start.year, present.year=present.year, wpp.year=wpp.year, 
 										my.tfr.file = my.tfr.file, my.locations.file=my.locations.file, 
-										annual = mcmc.input$annual.simulation, verbose=verbose)
+										annual = mcmc.input$annual.simulation, ignore.last.observed = uncertainty,
+										verbose=verbose)
 	meta <- do.meta.ini(mcmc.input, tfr.with.regions,  
 						proposal_cov_gammas=proposal_cov_gammas, verbose=verbose, 
 						uncertainty=uncertainty, my.tfr.raw.file=my.tfr.raw.file, ar.phase2=ar.phase2)
diff --git a/R/plot_functions.R b/R/plot_functions.R
index fd398f82..f4cff6c5 100644
--- a/R/plot_functions.R
+++ b/R/plot_functions.R
@@ -491,9 +491,13 @@ tfr.estimation.plot <- function(mcmc.list=NULL, country.code=NULL, ISO.code=NULL
   
   wpp.data <- get.observed.tfr(country.obj$index, mcmc.list$meta, "tfr_matrix_all")
   wpp.data <- data.frame(year=quantile_tbl$year, tfr = as.numeric(wpp.data))
-  wpp.data <- wpp.data[wpp.data$year %% 5 == 3,]
+  #wpp.data <- wpp.data[wpp.data$year %% 5 == 3,]
   q <- q + ggplot2::geom_line(data = wpp.data, ggplot2::aes_string(x="year", y="tfr"), size = 0.8) + ggplot2::theme_bw() + 
-    ggplot2::geom_point(data = wpp.data, ggplot2::aes_string(x="year", y="tfr"))
+      ggplot2::geom_point(data = wpp.data, ggplot2::aes_string(x="year", y="tfr"), size = 0.7)
+
+  # re-draw the median
+  q <- q + ggplot2::geom_line(ggplot2::aes_string(x="year", y="Q50"), size = 0.8, color="red") +
+      ggplot2::geom_point(ggplot2::aes_string(x="year", y="Q50"), size = 1, color="red")
   
   if (save.image)
   {
@@ -609,12 +613,20 @@ tfr.trajectories.plot <- function(tfr.pred, country, pi=c(80, 95),
   }
   # plot median
   tfr.median <- get.median.from.prediction(tfr.pred, country$index, country$code)
-  lines(x2, tfr.median, type='l', col=col[3], lwd=lwd[3]) 
+  if(uncertainty) {
+      unc.last.time <- which(tfr.object$tfr_quantile$year == x2[1])
+      tfr.median[1] <- unlist(tfr.object$tfr_quantile[unc.last.time,])[length(pi)+1]
+  }
+  lines(x2, tfr.median, type='l', col=col[3], lwd=lwd[3])
   # plot given CIs
   lty <- 2:(length(pi)+1)
   for (i in 1:length(pi)) {
     cqp <- get.traj.quantiles(tfr.pred, country$index, country$code, trajectories$trajectories, pi[i])
     if (!is.null(cqp)) {
+        if(uncertainty) {
+            cqp[1,1] <- unlist(tfr.object$tfr_quantile[unc.last.time, ])[length(pi)+1-i]
+            cqp[2,1] <- unlist(tfr.object$tfr_quantile[unc.last.time, ])[length(pi)+1+i]
+        }
       lines(x2, cqp[1,], type='l', col=col[4], lty=lty[i], lwd=lwd[4])
       lines(x2, cqp[2,], type='l', col=col[4], lty=lty[i], lwd=lwd[4])
     }
@@ -649,8 +661,12 @@ tfr.trajectories.plot <- function(tfr.pred, country, pi=c(80, 95),
     lty <- c(lty, max(lty)+1)
     llty <- length(lty)
     up.low <- get.half.child.variant(median=tfr.median)
-    lines(x2, up.low[1,], type='l', col=col[5], lty=lty[llty], lwd=lwd[5])
-    lines(x2, up.low[2,], type='l', col=col[5], lty=lty[llty], lwd=lwd[5])
+    if(uncertainty) {
+        up.low <- up.low[,-1]
+        x2t <- x2[-1]
+    } else x2t <- x2
+    lines(x2t, up.low[1,], type='l', col=col[5], lty=lty[llty], lwd=lwd[5])
+    lines(x2t, up.low[2,], type='l', col=col[5], lty=lty[llty], lwd=lwd[5])
     legend <- c(legend, '+/- 0.5 child')
     cols <- c(cols, col[5])
     lwds <- c(lwds, lwd[5])
diff --git a/R/wpp_data.R b/R/wpp_data.R
index ad5e580c..fdfa028e 100644
--- a/R/wpp_data.R
+++ b/R/wpp_data.R
@@ -1,7 +1,7 @@
 # Read in the UN estimates
 
 set_wpp_regions <- function(start.year=1950, present.year=2010, wpp.year=2012, my.tfr.file=NULL, 
-							my.locations.file=NULL, annual = FALSE, verbose=FALSE) {
+							my.locations.file=NULL, annual = FALSE, ignore.last.observed = FALSE, verbose=FALSE) {
 # outputs:
 # tfr_matrix_all, with each column one countries UN estimates
 # tfr_matrix with NAs after last observed data point
@@ -32,7 +32,8 @@ set_wpp_regions <- function(start.year=1950, present.year=2010, wpp.year=2012, m
 												start.year=start.year, 
 												present.year=present.year,
 												annual = annual, verbose=verbose, 
-												interpolate = annual && is.null(my.tfr.file))
+												interpolate = annual && is.null(my.tfr.file),
+												ignore.last.observed = ignore.last.observed)
 	if(!annual) {
 	    TFRmatrixsuppl.regions <- .get.suppl.matrix.and.regions(un.object, TFRmatrix.regions, loc_data, 
 									start.year, present.year)
@@ -198,7 +199,7 @@ read.UNlocations <- function(data, wpp.year, package="bayesTFR", my.locations.fi
 get.observed.time.matrix.and.regions <- function(data, loc_data, start.year=1950, present.year=2010, annual = FALSE,
 												 datacolnames=c(country.code='country_code', country.name='country', reg.name='reg_name',
 															reg.code='reg_code', area.name='area_name', area.code='area_code'), 
-												 interpolate = FALSE) {
+												 interpolate = FALSE, ignore.last.observed = FALSE) {
 	tfr_data <- data
 	nr_countries <- length(tfr_data[,1])
 	if (annual && interpolate) # interpolate 5-year data
@@ -266,7 +267,7 @@ get.observed.time.matrix.and.regions <- function(data, loc_data, start.year=1950
  		if(has.area.name) area_name <- c(area_name, paste(loc_data$area_name[loc_index]))
  		if(has.area.code) area_code <- c(area_code, loc_data$area_code[loc_index])
 		# set NAs for entries that are not observed data (after last.observed) 
-		tfr_matrix[(tfr_data[i,'last.observed'] < mid.years), i] <- NA
+		if(!ignore.last.observed) tfr_matrix[(tfr_data[i,'last.observed'] < mid.years), i] <- NA
 		all.na[i] <- all(is.na(tfr_matrix[,i]))
 	}
 	country.codes <- tfr_data[[datacolnames['country.code']]]

From d58ba6c3f45b984ce21fcefbb15585bb51bdfb7f Mon Sep 17 00:00:00 2001
From: Hana Sevcikova <hanas@uw.edu>
Date: Fri, 9 Apr 2021 13:14:29 -0700
Subject: [PATCH 22/48] avoid log(0) in mcmc sampling

---
 DESCRIPTION       | 4 ++--
 R/mcmc_sampling.R | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index a8f72f86..e37bf371 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: bayesTFR
-Version: 7.0-4.9001
-Date: 2021-04-07
+Version: 7.0-4.9002
+Date: 2021-04-09
 Title: Bayesian Fertility Projection
 Author: Hana Sevcikova (hanas@uw.edu), Leontine Alkema (alkema@nus.edu.sg), Peiran Liu (prliu@uw.edu), Adrian Raftery (raftery@uw.edu), Bailey Fosdick (bfosdick@uw.edu), Patrick Gerland (gerland@un.org)
 Maintainer: Hana Sevcikova <hanas@uw.edu>
diff --git a/R/mcmc_sampling.R b/R/mcmc_sampling.R
index 419f6565..b1ef2b1c 100755
--- a/R/mcmc_sampling.R
+++ b/R/mcmc_sampling.R
@@ -160,7 +160,7 @@ tfr.mcmc.sampling <- function(mcmc, thin=1, start.iter=2, verbose=FALSE, verbose
         delta4.squared <- mcenv$delta4^2
         delta4.0.squared <- delta4_0^2
         Triangle_c4_transformed <- 
-                        log((mcenv$Triangle_c4[id_DL] - Triangle_c4.low)/
+                        log(pmax(mcenv$Triangle_c4[id_DL] - Triangle_c4.low, 1e-10)/
                                         (Triangle_c4.up-mcenv$Triangle_c4[id_DL])) 
         mcenv$Triangle4 <- rnorm(1, mean = (sum(Triangle_c4_transformed)/(delta4.squared) + 
         										Triangle4_0/delta4.0.squared)/

From 0f0924cfc5ec4b4528da0ed3130de1e8d78f25f1 Mon Sep 17 00:00:00 2001
From: Hana Sevcikova <hanas@uw.edu>
Date: Mon, 12 Apr 2021 09:45:52 -0700
Subject: [PATCH 23/48] fix issue with suppl data in extra function

---
 R/wpp_data.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/R/wpp_data.R b/R/wpp_data.R
index fdfa028e..4d24aba0 100644
--- a/R/wpp_data.R
+++ b/R/wpp_data.R
@@ -387,12 +387,12 @@ set.wpp.extra <- function(meta, countries=NULL, my.tfr.file=NULL, my.locations.f
 	extra.wpp <- .extra.matrix.regions(data=data, countries=countries, meta=meta, my.locations.file=my.locations.file, 
 	                                   verbose=verbose, annual=annual, uncertainty=uncertainty, 
 	                                   interpolate = is.null(my.tfr.file) && annual)
-	if(!is.null(extra.wpp)) {
+	if(!is.null(extra.wpp) && !annual) {
 		locations <- read.UNlocations(data$data, wpp.year=meta$wpp.year, my.locations.file=my.locations.file, verbose=verbose)
 		suppl.wpp <- .get.suppl.matrix.and.regions(un.object, extra.wpp, locations$loc_data, 
 									meta$start.year, meta$present.year)
 		extra.wpp$suppl.data <- .get.suppl.data.list(suppl.wpp)
-	}							
+	} 
 	return(extra.wpp)
 }
 

From ed13bee94335fe2cd0c42460f44f0d432470177d Mon Sep 17 00:00:00 2001
From: Hana Sevcikova <hanas@uw.edu>
Date: Mon, 12 Apr 2021 14:53:22 -0700
Subject: [PATCH 24/48] fixed source.col.name in run mcmc extra

---
 R/run_mcmc.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/R/run_mcmc.R b/R/run_mcmc.R
index fb765260..c1c3c0fc 100755
--- a/R/run_mcmc.R
+++ b/R/run_mcmc.R
@@ -367,7 +367,7 @@ run.tfr.mcmc.extra <- function(sim.dir=file.path(getwd(), 'bayesTFR.output'),
 								thin=1, thin.extra=1, burnin=2000, parallel=FALSE, nr.nodes=NULL, 
 								my.locations.file = NULL,
 								uncertainty=FALSE, my.tfr.raw.file=NULL, iso.unbiased=NULL, 
-								covariates=c('source', 'method'), cont_covariates=NULL, 
+								covariates=c('source', 'method'), cont_covariates=NULL, source.col.name="source",
 								verbose=FALSE, verbose.iter=100, ...) {
   mcmc.set <- get.tfr.mcmc(sim.dir)
   meta.old <- mcmc.set$meta
@@ -430,7 +430,7 @@ run.tfr.mcmc.extra <- function(sim.dir=file.path(getwd(), 'bayesTFR.output'),
 		if(uncertainty)
 		{
 		  mcmc.set$mcmc.list[[chain]] <- get.obs.estimate.diff.original(mcmc.set$mcmc.list[[chain]])
-		  mcmc.set$mcmc.list[[chain]] <- estimate.bias.sd.original(mcmc.set$mcmc.list[[chain]], iso.unbiased, covariates, cont_covariates)
+		  mcmc.set$mcmc.list[[chain]] <- estimate.bias.sd.original(mcmc.set$mcmc.list[[chain]], iso.unbiased, covariates, cont_covariates, source.col.name=source.col.name)
 		  mcmc.set$mcmc.list[[chain]]$eps_unc <- list()
 		  if (is.null(mcmc.set$mcmc.list[[chain]]$meta$raw_data_extra)) mcmc.set$mcmc.list[[chain]]$meta$raw_data_extra <- list()
 		  for (country in countries)

From 65df9c7468c01a068b62ee67ffe0a77532cbeb52 Mon Sep 17 00:00:00 2001
From: Hana Sevcikova <hanas@uw.edu>
Date: Tue, 13 Apr 2021 00:29:24 -0700
Subject: [PATCH 25/48] fixes related to the size of the meta file

---
 R/get_outputs.R       |  3 ++-
 R/predict_tfr.R       |  1 +
 R/run_mcmc.R          | 18 ++++++++++++------
 R/updated_functions.R | 17 +++++++++++------
 4 files changed, 26 insertions(+), 13 deletions(-)

diff --git a/R/get_outputs.R b/R/get_outputs.R
index e9163970..2db57e92 100644
--- a/R/get_outputs.R
+++ b/R/get_outputs.R
@@ -143,7 +143,7 @@ create.thinned.tfr.mcmc <- function(mcmc.set, thin=1, burnin=0, output.dir=NULL,
 		country.obj <- get.country.object(country, mcmc.set$meta, index=TRUE)
 		if (country.obj$index %in% mcmc.set$meta$extra)
 		{
-		  nr.points.cs <- floor(mcmc.set$meta$extra_iter[country.obj$index] / mcmc.set$meta$extra_thin[country.obj$index])
+		  nr.points.cs <- floor((mcmc.set$meta$extra_iter[country.obj$index]-burnin) / mcmc.set$meta$extra_thin[country.obj$index])
 		  nr.points.cs <- nr.points.cs * length(mcmc.set$mcmc.list)
 		  if (nr.points.cs >= nr.points) thin.index.cs <- round(seq(1, nr.points.cs, length.out = nr.points))
 		  else 
@@ -167,6 +167,7 @@ create.thinned.tfr.mcmc <- function(mcmc.set, thin=1, burnin=0, output.dir=NULL,
 										compression.type=thinned.mcmc$compression.type)
 		}
 	}
+	
 	if (mcmc.set$meta$nr_countries > mcmc.set$meta$nr_countries_estimation) {
 		.update.thinned.extras(mcmc.set, (mcmc.set$meta$nr_countries_estimation+1):mcmc.set$meta$nr_countries,
 								burnin=burnin, nr.points=nr.points, dir=outdir.thin.mcmc, verbose=verbose)
diff --git a/R/predict_tfr.R b/R/predict_tfr.R
index 5dff9e84..e19ded9a 100644
--- a/R/predict_tfr.R
+++ b/R/predict_tfr.R
@@ -197,6 +197,7 @@ make.tfr.prediction <- function(mcmc.set, start.year=NULL, end.year=2100, replac
 	                               floor((mcmc.set$meta$extra_iter[country] - burn) / mcmc.set$meta$extra_thin[country]) * length(mcmc.set$mcmc.list))
 	    }
 	  }
+	  stored.iter <- stored.iter.extra
 	}
 	mcthin <- max(sapply(mcmc.set$mcmc.list, function(x) x$thin))
 	if(!is.null(nr.traj) && !is.null(thin)) {
diff --git a/R/run_mcmc.R b/R/run_mcmc.R
index c1c3c0fc..ba69fac9 100755
--- a/R/run_mcmc.R
+++ b/R/run_mcmc.R
@@ -430,7 +430,8 @@ run.tfr.mcmc.extra <- function(sim.dir=file.path(getwd(), 'bayesTFR.output'),
 		if(uncertainty)
 		{
 		  mcmc.set$mcmc.list[[chain]] <- get.obs.estimate.diff.original(mcmc.set$mcmc.list[[chain]])
-		  mcmc.set$mcmc.list[[chain]] <- estimate.bias.sd.original(mcmc.set$mcmc.list[[chain]], iso.unbiased, covariates, cont_covariates, source.col.name=source.col.name)
+		  mcmc.set$mcmc.list[[chain]] <- estimate.bias.sd.original(mcmc.set$mcmc.list[[chain]], iso.unbiased, covariates, cont_covariates, 
+		                                                           source.col.name=source.col.name, countries = Eini$index)
 		  mcmc.set$mcmc.list[[chain]]$eps_unc <- list()
 		  if (is.null(mcmc.set$mcmc.list[[chain]]$meta$raw_data_extra)) mcmc.set$mcmc.list[[chain]]$meta$raw_data_extra <- list()
 		  for (country in countries)
@@ -477,9 +478,9 @@ run.tfr.mcmc.extra <- function(sim.dir=file.path(getwd(), 'bayesTFR.output'),
 	}
 	if(uncertainty)
 	{
-	  if (!dir.exists(file.path(meta$output.dir, 'extra.meta'))) dir.create(file.path(meta$output.dir, 'extra.meta'))
-	  if (!dir.exists(file.path(meta$output.dir, 'extra.meta', countries[1]))) dir.create(file.path(meta$output.dir, 'extra.meta', countries[1]))
-	  store.bayesTFR.meta.object(meta, file.path(meta$output.dir, 'extra.meta', countries[1]))
+	  #if (!dir.exists(file.path(meta$output.dir, 'extra.meta'))) dir.create(file.path(meta$output.dir, 'extra.meta'))
+	  #if (!dir.exists(file.path(meta$output.dir, 'extra.meta', countries[1]))) dir.create(file.path(meta$output.dir, 'extra.meta', countries[1]))
+	  #store.bayesTFR.meta.object(meta, file.path(meta$output.dir, 'extra.meta', countries[1]))
 	  for (name in c("country.ind.by.year", "ind.by.year", "id_phase1_by_year", "id_phase2_by_year", "id_phase3_by_year", 
 	                 "id_phase3", "nr.countries"))
 	  {
@@ -503,8 +504,13 @@ run.tfr.mcmc.extra <- function(sim.dir=file.path(getwd(), 'bayesTFR.output'),
 	    }
 	  }
 	  meta[['extra']] <- sort(unique(meta[['extra']]))
-	}
-	store.bayesTFR.meta.object(meta, file.path(meta$output.dir))
+	  # unload parent for storing purposes
+	  parent <- meta$parent
+	  meta[["parent"]] <- NULL
+	} else parent <- NULL
+	
+	store.bayesTFR.meta.object(meta, meta$output.dir)
+	meta[["parent"]] <- parent
 	mcmc.set$meta <- meta
 	cat('\n')
 	invisible(mcmc.set)
diff --git a/R/updated_functions.R b/R/updated_functions.R
index a86ebd5d..6394293f 100644
--- a/R/updated_functions.R
+++ b/R/updated_functions.R
@@ -128,7 +128,7 @@ estimate.bias.sd.raw <- function(mcmc)
 }
 
 estimate.bias.sd.original <- function(mcmc, iso.unbiased=NULL, covariates=c('source', 'method'), 
-                                      cont_covariates=NULL, source.col.name="source")
+                                      cont_covariates=NULL, source.col.name="source", countries = NULL)
 {
   mcmc$meta$raw_data.original$bias <- NA
   mcmc$meta$raw_data.original$std <- NA
@@ -137,7 +137,8 @@ estimate.bias.sd.original <- function(mcmc, iso.unbiased=NULL, covariates=c('sou
     mcmc$meta$bias_model <- list()
     mcmc$meta$std_model <- list()
   }
-  for(country in 1:mcmc$meta$nr_countries)
+  if(is.null(countries)) countries <- 1:mcmc$meta$nr_countries
+  for(country in countries)
   {
     ISO.code <- get.country.object(country, meta=mcmc$meta, index=TRUE)
     if (is.na(ISO.code$code)) next
@@ -168,10 +169,10 @@ estimate.bias.sd.original <- function(mcmc, iso.unbiased=NULL, covariates=c('sou
       }
     }
     
-    m1 <- lm(as.formula(paste0('diff ~ ', regressor)))
+    m1 <- lm(as.formula(paste0('diff ~ ', regressor)), model = FALSE)
     bias <- predict(m1)
     abs.residual <- abs(residuals(m1))
-    m2 <- lm(as.formula(paste0('abs.residual ~ ', regressor)))
+    m2 <- lm(as.formula(paste0('abs.residual ~ ', regressor)), model = FALSE)
     std <- predict(m2) * sqrt(pi/2)
     std[std < 1e-6] <- 0.1
     std[std < (abs(bias) / 2)] <- abs(bias[std < (abs(bias) / 2)]) 
@@ -192,11 +193,15 @@ estimate.bias.sd.original <- function(mcmc, iso.unbiased=NULL, covariates=c('sou
         warning("Covariate source not found. iso.unbiased is not used.\n")
       }
     }
+    # to save space
+    attr(m1$terms, ".Environment") <- NULL
+    attr(m2$terms, ".Environment") <- NULL
+    m1$fitted.values <- NULL
+    m2$fitted.values <- NULL
+    
     mcmc$meta$bias_model[[country]] <- m1
     mcmc$meta$std_model[[country]] <- m2
   }
-  
-  
   return(mcmc)
 }
 

From 950691d107458f29257e9e7bbe3dbc4135a9a56f Mon Sep 17 00:00:00 2001
From: Hana Sevcikova <hanas@uw.edu>
Date: Wed, 14 Apr 2021 16:31:47 -0700
Subject: [PATCH 26/48] option of storing extra country to different directory;
 removed burnin and thin from tfr.trajectories.plot

---
 R/get_outputs.R    | 13 ++++++++-----
 R/plot_functions.R | 10 +++-------
 R/predict_tfr.R    | 14 +++++++++-----
 3 files changed, 20 insertions(+), 17 deletions(-)

diff --git a/R/get_outputs.R b/R/get_outputs.R
index 2db57e92..1a1c7606 100644
--- a/R/get_outputs.R
+++ b/R/get_outputs.R
@@ -86,7 +86,8 @@ get.thinned.tfr.mcmc <- function(mcmc.set, thin=1, burnin=0) {
 	return(NULL)
 }
 	
-create.thinned.tfr.mcmc <- function(mcmc.set, thin=1, burnin=0, output.dir=NULL, verbose=TRUE, uncertainty=FALSE) {
+create.thinned.tfr.mcmc <- function(mcmc.set, thin=1, burnin=0, output.dir=NULL, verbose=TRUE, uncertainty=FALSE,
+                                    update.with.countries = NULL) {
 	#Return a thinned mcmc.set object with burnin removed and all chanins collapsed into one
   mcthin <- max(sapply(mcmc.set$mcmc.list, function(x) x$thin))
 	thin <- max(c(thin, mcthin))
@@ -139,7 +140,8 @@ create.thinned.tfr.mcmc <- function(mcmc.set, thin=1, burnin=0, output.dir=NULL,
 	if(verbose) cat('done.\nStoring country-specific parameters ...')
 	par.names.cs <- tfr.parameter.names.cs(trans=FALSE)
 	if (uncertainty) par.names.cs <- c(par.names.cs, 'tfr')
-	for (country in mcmc.set$meta$id_DL){
+	DLcntries <- if(!is.null(update.with.countries)) update.with.countries[update.with.countries %in% mcmc.set$meta$id_DL] else mcmc.set$meta$id_DL
+	for (country in DLcntries){
 		country.obj <- get.country.object(country, mcmc.set$meta, index=TRUE)
 		if (country.obj$index %in% mcmc.set$meta$extra)
 		{
@@ -167,10 +169,11 @@ create.thinned.tfr.mcmc <- function(mcmc.set, thin=1, burnin=0, output.dir=NULL,
 										compression.type=thinned.mcmc$compression.type)
 		}
 	}
-	
 	if (mcmc.set$meta$nr_countries > mcmc.set$meta$nr_countries_estimation) {
-		.update.thinned.extras(mcmc.set, (mcmc.set$meta$nr_countries_estimation+1):mcmc.set$meta$nr_countries,
-								burnin=burnin, nr.points=nr.points, dir=outdir.thin.mcmc, verbose=verbose)
+	    cindex <- if(!is.null(update.with.countries)) update.with.countries[update.with.countries > mcmc.set$meta$nr_countries_estimation] else 
+	        (mcmc.set$meta$nr_countries_estimation+1):mcmc.set$meta$nr_countries
+	    if(length(cindex) > 0)
+		    .update.thinned.extras(mcmc.set, cindex, burnin=burnin, nr.points=nr.points, dir=outdir.thin.mcmc, verbose=verbose)
 	}
 	invisible(structure(list(meta=meta, mcmc.list=list(thinned.mcmc)), class='bayesTFR.mcmc.set'))
 }
diff --git a/R/plot_functions.R b/R/plot_functions.R
index f4cff6c5..b40c24c7 100644
--- a/R/plot_functions.R
+++ b/R/plot_functions.R
@@ -516,7 +516,7 @@ tfr.trajectories.plot <- function(tfr.pred, country, pi=c(80, 95),
                                   xlim=NULL, ylim=NULL, type='b', 
                                   xlab='Year', ylab='TFR', main=NULL, lwd=c(2,2,2,2,2,1), 
                                   col=c('black', 'green', 'red', 'red', 'blue', '#00000020'),
-                                  show.legend=TRUE, add=FALSE, uncertainty=FALSE, thin=NULL, burnin=NULL, col_unc="purple", ...
+                                  show.legend=TRUE, add=FALSE, uncertainty=FALSE, col_unc="purple", ...
 ) {
   # lwd/col is a vector of 6 line widths/colors for: 
   #	1. observed data, 2. imputed missing data, 3. median, 4. quantiles, 5. +- 0.5 child, 6. trajectories
@@ -529,12 +529,8 @@ tfr.trajectories.plot <- function(tfr.pred, country, pi=c(80, 95),
   }
   if (uncertainty)
   {
-    sim.dir <- file.path(tfr.pred$output.directory, "..")
-    if (missing(thin) || is.null(thin)) thin <- tfr.pred$thin
-    if (missing(burnin) || is.null(burnin)) burnin <- tfr.pred$burnin
-    tfr.object <- get.tfr.estimation(mcmc.list=NULL, country.code=country, ISO.code=NULL, sim.dir=sim.dir, 
-                                     burnin=burnin, thin=thin, probs=sort(c((1-pi/100)/2, 0.5, pi/100 + (1-pi/100)/2)))
-    
+    tfr.object <- get.tfr.estimation(mcmc.list=tfr.pred$mcmc.set, country.code=country, ISO.code=NULL, 
+                                     probs=sort(c((1-pi/100)/2, 0.5, pi/100 + (1-pi/100)/2)))
   }
   col <- .match.colors.with.default(col, c('black', 'green', 'red', 'red', 'blue', '#00000020'))
   country.obj <- get.country.object(country, tfr.pred$mcmc.set$meta)
diff --git a/R/predict_tfr.R b/R/predict_tfr.R
index e19ded9a..d9a0cac9 100644
--- a/R/predict_tfr.R
+++ b/R/predict_tfr.R
@@ -88,7 +88,8 @@ get.burnin.nrtraj.from.diagnostics <- function(sim.dir, ...) {
 }
 
 tfr.predict.extra <- function(sim.dir=file.path(getwd(), 'bayesTFR.output'), 
-					prediction.dir=sim.dir, countries = NULL, save.as.ascii=0, verbose=TRUE, uncertainty=FALSE) {
+					prediction.dir=sim.dir, countries = NULL, save.as.ascii=0, verbose=TRUE, uncertainty=FALSE,
+					all.countries.required = TRUE) {
 	# Run prediction for given countries/regions (as codes). If they are not given it will be set to countries 
 	# for which there are MCMC results but no prediction.
 	# It is to be used after running run.tfr.mcmc.extra
@@ -121,7 +122,8 @@ tfr.predict.extra <- function(sim.dir=file.path(getwd(), 'bayesTFR.output'),
 									use.correlation=pred$use.correlation, mu=pred$mu, rho=pred$rho, sigmaAR1=pred$sigmaAR1, 
 									min.tfr=pred$min.tfr, countries=countries.idx, save.as.ascii=0, output.dir=prediction.dir,
 									force.creating.thinned.mcmc=TRUE,
-									write.summary.files=FALSE, write.trajectories=TRUE, verbose=verbose, uncertainty=uncertainty)
+									write.summary.files=FALSE, write.trajectories=TRUE, verbose=verbose, 
+									uncertainty=uncertainty, all.countries.required = all.countries.required)
 									
 	# merge the two predictions
 	code.other.countries <- setdiff(pred$mcmc.set$meta$regions$country_code, 
@@ -152,7 +154,8 @@ tfr.predict.extra <- function(sim.dir=file.path(getwd(), 'bayesTFR.output'),
 	if (uncertainty) countries.save <- countries
 	do.convert.trajectories(pred=bayesTFR.prediction, n=save.as.ascii, output.dir=pred$output.dir, countries = countries.save,
 							verbose=verbose)
-	tfr.write.projection.summary.and.parameters(pred=bayesTFR.prediction, output.dir=pred$output.dir)
+	if(all.countries.required)
+	    tfr.write.projection.summary.and.parameters(pred=bayesTFR.prediction, output.dir=pred$output.dir)
 	
 	cat('\nPrediction stored into', pred$output.dir, '\n')
 	invisible(bayesTFR.prediction)
@@ -168,7 +171,7 @@ make.tfr.prediction <- function(mcmc.set, start.year=NULL, end.year=2100, replac
 							    save.as.ascii=0, output.dir = NULL, write.summary.files=TRUE, 
 							    is.mcmc.set.thinned=FALSE, force.creating.thinned.mcmc=FALSE,
 							    write.trajectories=TRUE, 
-							    verbose=verbose, uncertainty=FALSE){
+							    verbose=verbose, uncertainty=FALSE, all.countries.required = TRUE){
 	# if 'countries' is given, it is an index
 	# sigmaAR1 can be a vector. The last element will be repeated up to nr.projections
   meta <- mcmc.set$meta
@@ -238,7 +241,8 @@ make.tfr.prediction <- function(mcmc.set, start.year=NULL, end.year=2100, replac
 	unblock.gtk('bDem.TFRpred')
 	load.mcmc.set <- if(has.thinned.mcmc && !force.creating.thinned.mcmc) thinned.mcmc
 					 else create.thinned.tfr.mcmc(mcmc.set, thin=thin, burnin=burnin, 
-					 							output.dir=output.dir, verbose=verbose, uncertainty=uncertainty)
+					 							output.dir=output.dir, verbose=verbose, uncertainty=uncertainty,
+					 							update.with.countries = if(all.countries.required) NULL else countries)
 	nr_simu <- load.mcmc.set$mcmc.list[[1]]$finished.iter
 
 	if (verbose) cat('Load variance parameters.\n')

From 5e1c97ead196682a06e1bd3e1874d699d9f2ba7c Mon Sep 17 00:00:00 2001
From: Hana Sevcikova <hanas@uw.edu>
Date: Thu, 15 Apr 2021 10:25:46 -0700
Subject: [PATCH 27/48] added option of averaging gamma cov for extra countries

---
 R/mcmc_ini.R | 5 +++--
 R/run_mcmc.R | 4 ++--
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/R/mcmc_ini.R b/R/mcmc_ini.R
index fed1e57b..af867c7e 100755
--- a/R/mcmc_ini.R
+++ b/R/mcmc_ini.R
@@ -504,7 +504,8 @@ mcmc.ini <- function(chain.id, mcmc.meta, iter=100,
 
 mcmc.meta.ini.extra <- function(mcmc.set, countries=NULL, my.tfr.file = NULL, 
 									my.locations.file=NULL, burnin = 200, verbose=FALSE, uncertainty=FALSE, 
-									my.tfr.raw.file=ifelse(uncertainty, file.path(find.package("bayesTFR"), "data", "rawTFR.csv"), NULL)) {
+									my.tfr.raw.file=ifelse(uncertainty, file.path(find.package("bayesTFR"), "data", "rawTFR.csv"), NULL),
+									average.gammas.cov = TRUE) {
 	update.regions <- function(reg, ereg, id.replace, is.new, is.old) {
 		nreg <- list()
 		for (name in c('code', 'area_code', 'country_code')) {
@@ -536,7 +537,7 @@ mcmc.meta.ini.extra <- function(mcmc.set, countries=NULL, my.tfr.file = NULL,
 		has.mock.suppl <- TRUE
 	}
 	Emeta <- do.meta.ini(meta, tfr.with.regions=tfr.with.regions, 
-								use.average.gammas.cov=TRUE, burnin=burnin,
+								use.average.gammas.cov=average.gammas.cov, burnin=burnin,
 						 		verbose=verbose, uncertainty=uncertainty, my.tfr.raw.file = my.tfr.raw.file)
 			 		
 	# join the new meta with the existing one
diff --git a/R/run_mcmc.R b/R/run_mcmc.R
index ba69fac9..6db57a08 100755
--- a/R/run_mcmc.R
+++ b/R/run_mcmc.R
@@ -368,7 +368,7 @@ run.tfr.mcmc.extra <- function(sim.dir=file.path(getwd(), 'bayesTFR.output'),
 								my.locations.file = NULL,
 								uncertainty=FALSE, my.tfr.raw.file=NULL, iso.unbiased=NULL, 
 								covariates=c('source', 'method'), cont_covariates=NULL, source.col.name="source",
-								verbose=FALSE, verbose.iter=100, ...) {
+								average.gammas.cov = TRUE, verbose=FALSE, verbose.iter=100, ...) {
   mcmc.set <- get.tfr.mcmc(sim.dir)
   meta.old <- mcmc.set$meta
   
@@ -380,7 +380,7 @@ run.tfr.mcmc.extra <- function(sim.dir=file.path(getwd(), 'bayesTFR.output'),
   
   Eini <- mcmc.meta.ini.extra(mcmc.set, countries=countries, my.tfr.file=my.tfr.file, my.locations.file=my.locations.file, 
 												burnin=burnin, verbose=verbose, uncertainty=uncertainty, 
-												my.tfr.raw.file=my.tfr.raw.file)
+												my.tfr.raw.file=my.tfr.raw.file, average.gammas.cov = average.gammas.cov)
 	if(length(Eini$index) <= 0) {
 		cat('\nNothing to be done.\n')
 		return(invisible(mcmc.set))

From 04b83d1fc6c97eb621c726a1f4151e7373046dc1 Mon Sep 17 00:00:00 2001
From: Hana Sevcikova <hanas@uw.edu>
Date: Thu, 15 Apr 2021 11:43:40 -0700
Subject: [PATCH 28/48] help files updated

---
 DESCRIPTION                 |  2 +-
 man/get.thinned.tfr.mcmc.Rd | 10 ++++++----
 man/plot-trajectories.Rd    |  4 +---
 man/run-mcmc.Rd             |  6 ++++--
 man/run.tfr.mcmc.extra.Rd   |  3 +++
 man/tfr.predict.extra.Rd    |  6 ++++--
 6 files changed, 19 insertions(+), 12 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index e37bf371..1f96e362 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,5 +1,5 @@
 Package: bayesTFR
-Version: 7.0-4.9002
+Version: 7.0-4.9003
 Date: 2021-04-09
 Title: Bayesian Fertility Projection
 Author: Hana Sevcikova (hanas@uw.edu), Leontine Alkema (alkema@nus.edu.sg), Peiran Liu (prliu@uw.edu), Adrian Raftery (raftery@uw.edu), Bailey Fosdick (bfosdick@uw.edu), Patrick Gerland (gerland@un.org)
diff --git a/man/get.thinned.tfr.mcmc.Rd b/man/get.thinned.tfr.mcmc.Rd
index 6e517145..4bc91bc2 100644
--- a/man/get.thinned.tfr.mcmc.Rd
+++ b/man/get.thinned.tfr.mcmc.Rd
@@ -8,14 +8,15 @@ Creating and Accessing Thinned MCMCs
 }
 \description{
 The function \code{get.thinned.tfr.mcmc} accesses 
-a thinned and burned version of the given Phase II MCMC set. \code{create.thinned.tfr.mcmc} creates such a set.
+a thinned and burned version of the given Phase II MCMC set. \code{create.thinned.tfr.mcmc} creates or updates such a set.
 }
 
 \usage{
 get.thinned.tfr.mcmc(mcmc.set, thin = 1, burnin = 0)
 
 create.thinned.tfr.mcmc(mcmc.set, thin = 1, burnin = 0, 
-    output.dir = NULL, verbose = TRUE, uncertainty = FALSE)
+    output.dir = NULL, verbose = TRUE, uncertainty = FALSE,
+    update.with.countries = NULL)
 }
 \arguments{
   \item{mcmc.set}{Object of class \code{\link{bayesTFR.mcmc.set}} of Phase II.}
@@ -23,13 +24,14 @@ create.thinned.tfr.mcmc(mcmc.set, thin = 1, burnin = 0,
   \item{output.dir}{Output directory. It is only used if the output goes to a non-standard directory.}
   \item{verbose}{Logical switching log messages on and off.}
   \item{uncertainty}{If users want to save the thinned estimated TFR in the new mcmc object, this parameter should be set \code{TRUE}.}
+  \item{update.with.countries}{If an existing set is to be updated, this should be a vector of country indices for the update.}
 }
 \details{
-The function \code{create.thinned.tfr.mcmc} is called from \code{\link{tfr.predict}} and thus, the resulting object contains exactly the same MCMCs used for generating projections. In addition, it can be also called from \code{\link{tfr.diagnose}} if desired, so that the projection process can re-use such a set that lead to a convergence. 
+The function \code{create.thinned.tfr.mcmc} is called from \code{\link{tfr.predict}} and thus, the resulting object contains exactly the same MCMCs used for generating projections. In addition, it can be also called from \code{\link{tfr.diagnose}} if desired, so that the projection process can re-use such a set that leads to a convergence. 
 
 The thinning is done as follows: The given \code{burnin} is removed from the beginning of each chain in the original MCMC set. Then each chain is thinned by \code{thin} using equal spacing and all chains are collapsed into one single chain per parameter. They are stored in the main simulation directory under the name \file{thinned_mcmc_\emph{t}_\emph{b}} where \emph{t} is the value  of \code{thin} and \emph{b} the value of \code{burnin}.   
 
-If \code{uncertainty=TRUE}, the estimated TFR is thinned and saved either.
+If \code{uncertainty=TRUE}, the estimated TFR is thinned and saved as well.
 }
 \value{
 Both functions return an object of class \code{\link{bayesTFR.mcmc.set}}. \code{get.thinned.tfr.mcmc} returns \code{NULL} if such object does not exist.
diff --git a/man/plot-trajectories.Rd b/man/plot-trajectories.Rd
index 8f24acd0..349e5d4d 100644
--- a/man/plot-trajectories.Rd
+++ b/man/plot-trajectories.Rd
@@ -18,7 +18,7 @@ tfr.trajectories.plot(tfr.pred, country, pi = c(80, 95),
     main = NULL, lwd = c(2, 2, 2, 2, 2, 1), 
     col=c('black', 'green', 'red', 'red', 'blue', '#00000020'),
     show.legend = TRUE, add = FALSE, uncertainty = FALSE, 
-    thin = NULL, burnin = NULL, col_unc = "purple", \dots)
+    col_unc = "purple", \dots)
 	
 tfr.trajectories.plot.all(tfr.pred, 
     output.dir = file.path(getwd(), 'TFRtrajectories'),
@@ -47,8 +47,6 @@ tfr.trajectories.table(tfr.pred, country, pi = c(80, 95),
   \item{main}{Main title for the plot(s). In \code{tfr.trajectories.plot.all} any occurence of the string \dQuote{XXX} is replaced by the country name.}
   \item{verbose}{Logical switching log messages on and off.}
   \item{uncertainty}{Logical: \code{TRUE} means uncertainty of past TFR should be plotted with the same level of uncertainty interval.}
-  \item{thin}{Thinning for past TFR estimation.}
-  \item{burnin}{Burn-in for past TFR estimation.}
   \item{col_unc}{Color of past TFR estimation uncertainty plot.}
 }
 \details{
diff --git a/man/run-mcmc.Rd b/man/run-mcmc.Rd
index b7a89e34..224ea8d0 100644
--- a/man/run-mcmc.Rd
+++ b/man/run-mcmc.Rd
@@ -103,7 +103,7 @@ continue.tfr.mcmc(iter, chain.ids = NULL,
   \item{proposal_cov_gammas}{Proposal for the gamma covariance matrices for each country. It should be a list with two values: \code{values} and \code{country_codes}. The structure corresponds to the object returned by the function \code{\link{get.cov.gammas}}. By default the covariance matrices are obtained using \code{data(proposal_cov_gammas_cii)}. This argument overwrite the defaults for countries contained the argument.}
   \item{iso.unbiased}{Codes of countries for which the vital registration TFR estimates are considered unbiased. Only used if \code{uncertainty = TRUE}. See details below.}
   \item{covariates, cont_covariates}{Categorical and continuous features used in estimating bias and standard deviation if \code{uncertainty = TRUE}. See details below.}
-  \item{source.col.name}{Specify the name for source column used for iso.unbiased variable. }
+  \item{source.col.name}{If \code{uncertainty} is \code{TRUE} this is a column name within the given covariates that determines the data source. It is used if \code{iso.unbiased} is given to identify the vital registration records.}
   \item{seed}{Seed of the random number generator. If \code{NULL} no seed is set. It can be used to generate reproducible results.}
   \item{parallel}{Logical determining if the simulation should run multiple chains in parallel. If it is \code{TRUE}, the package \pkg{\link[snowFT]{snowFT}} is required. In such a case further arguments can be passed, see description of \dots.}
   \item{nr.nodes}{Relevant only if \code{parallel} is \code{TRUE}. It gives the number of nodes for running the simulation in parallel. By default it equals to the number of chains.}
@@ -131,10 +131,12 @@ Optionally, \code{my.tfr.file} can contain a column called \code{last.observed}
 
 The package contains a dataset called \file{my_tfr_template} (in \file{extdata} directory) which is a template for user-specified \code{my.tfr.file}.
 
-The parameter \code{uncertainty} is set to control whether past TFR is considered to be precise (\code{FALSE}), or need to be estimated from the raw data (\code{TRUE}). In the latter case, the raw TFR observations are taken either from the \code{\link{rawTFR}} dataset (default) or from a file given by the \code{my.tfr.raw.file} argument.  The Bayesian hierarchical model considers the past TFR as unknown, estimates it and stores in \code{output.dir}. Details can be found in Liu and Raftery (2020). The \code{covariates}, \code{cont_covariates} arguments are for listing categorical and continuous features for estimating bias and standard deviation of past TFR observations. If a country is known to have unbiased vital registration (VR) records, one can include it in the  \code{iso.unbiased} argument as those countries will estimate their past VR records to have 0 bias and 0.0161 standard deviation.
+The parameter \code{uncertainty} is set to control whether past TFR is considered to be precise (\code{FALSE}), or need to be estimated from the raw data (\code{TRUE}). In the latter case, the raw TFR observations are taken either from the \code{\link{rawTFR}} dataset (default) or from a file given by the \code{my.tfr.raw.file} argument.  The Bayesian hierarchical model considers the past TFR as unknown, estimates it and stores in \code{output.dir}. Details can be found in Liu and Raftery (2020). The \code{covariates}, \code{cont_covariates} arguments are for listing categorical and continuous features for estimating bias and standard deviation of past TFR observations. If a country is known to have unbiased vital registration (VR) records, one can include it in the  \code{iso.unbiased} argument as those countries will estimate their past VR records to have 0 bias and 0.0161 standard deviation. The VR records are identified as having \dQuote{VR} in the column given by \code{source.col.name} (\dQuote{source} by default).
 
 If \code{annual=TRUE}, which implies using annual data for training the model, the parameter \code{ar.phase2} will be activated. If \code{ar.phase2} is set to \code{TRUE}, then the model of Phase II will change from \eqn{d_{c,t} = g_{c,t} + \epsilon_{c,t}} to \eqn{d_{c,t}-g_{c,t} = \phi(d_{c,t-1}-g_{c,t-1}) + \epsilon_{c,t}}. \eqn{\phi} is considered as country-independent and is called \code{rho_phase2}.
 
+Furthermore, if \code{annual} is \code{TRUE} and \code{my.tfr.file} is given, the data in the file must be on annual basis and no matching with the WPP dataset takes place. 
+
 
 }
 \value{
diff --git a/man/run.tfr.mcmc.extra.Rd b/man/run.tfr.mcmc.extra.Rd
index fb4e2f98..7974364a 100644
--- a/man/run.tfr.mcmc.extra.Rd
+++ b/man/run.tfr.mcmc.extra.Rd
@@ -14,6 +14,7 @@ run.tfr.mcmc.extra(sim.dir = file.path(getwd(), "bayesTFR.output"),
     parallel = FALSE, nr.nodes = NULL,  my.locations.file = NULL,
     uncertainty = FALSE, my.tfr.raw.file = NULL, iso.unbiased = NULL, 
     covariates = c('source', 'method'), cont_covariates = NULL, 
+    source.col.name = "source", average.gammas.cov = TRUE, 
     verbose = FALSE, verbose.iter = 100, \dots)
 }
 \arguments{
@@ -45,6 +46,8 @@ Relevant only if \code{parallel} is \code{TRUE}. It gives the number of nodes fo
 \item{my.tfr.raw.file}{File name of the raw TFR used when uncertainty is \code{TRUE}. See details in \code{\link{run.tfr.mcmc}}.}
 \item{iso.unbiased}{Codes of countries for which the vital registration TFR estimates are considered unbiased. See details in \code{\link{run.tfr.mcmc}}.}
 \item{covariates, cont_covariates}{Categorical and continuous features used in estimating bias and standard deviation if \code{uncertainty} is \code{TRUE}. See details in \code{\link{run.tfr.mcmc}}.}
+\item{source.col.name}{If \code{uncertainty} is \code{TRUE} this is a column name within the given covariates that determines the data source. It is used if \code{iso.unbiased} is given to identify the vital registration records.}
+\item{average.gammas.cov}{Set this to \code{FALSE} if the processed country has been included in the main simulation. In such a case the proposal gamma covariance matrix is taken from the \code{proposal_cov_gammas_cii} dataset. By default, the matrix is taken as an average from all countries.}
 \item{verbose}{Logical switching log messages on and off.}
 \item{verbose.iter}{Integer determining how often (in number of iterations) log messages are outputted during the estimation.}
 \item{\dots}{Additional parameters to be passed to the function \code{\link[snowFT]{performParallel}}, if \code{parallel} is \code{TRUE}.}
diff --git a/man/tfr.predict.extra.Rd b/man/tfr.predict.extra.Rd
index 2eb0a87f..3cf0fecc 100644
--- a/man/tfr.predict.extra.Rd
+++ b/man/tfr.predict.extra.Rd
@@ -11,7 +11,8 @@ Using the posterior parameter samples the function  generates posterior trajecto
 \usage{
 tfr.predict.extra(sim.dir = file.path(getwd(), 'bayesTFR.output'), 
     prediction.dir = sim.dir, countries = NULL, 
-    save.as.ascii = 0, verbose = TRUE, uncertainty=FALSE)
+    save.as.ascii = 0, verbose = TRUE, uncertainty=FALSE,
+    all.countries.required = TRUE)
 	}
 \arguments{
   \item{sim.dir}{Directory with the MCMC simulation results.}
@@ -19,7 +20,8 @@ tfr.predict.extra(sim.dir = file.path(getwd(), 'bayesTFR.output'),
   \item{countries}{Vector of country codes for which the prediction should be made. If it is \code{NULL}, the prediction is run for all countries that are included in the MCMC object but for which no prediction was generated.}
   \item{save.as.ascii}{Either a number determining how many trajectories should be converted into an ascii file, or \dQuote{all} in which case all trajectories are converted. It should be set to 0, if no converions is desired. Note that the convertion is done on all countries.}
   \item{verbose}{Logical switching log messages on and off.}
-  \item{uncertainty}{Logical. If the MCMC steps has considered uncertainty of past TFR and \code{uncertainty=TRUE}, starting point of prediction trajectories will be the last estimated trajectories of TFR. Otherwise, it will use last observed TFR as starting point of prediction.}
+  \item{uncertainty}{Logical. If the MCMC steps considered uncertainty of past TFR and \code{uncertainty=TRUE}, starting point of prediction trajectories will be the last estimated trajectories of TFR. Otherwise, it will use the last observed TFR as starting point of prediction.}
+  \item{all.countries.required}{If \code{FALSE} it is not required that MCMCs of all countries are present.}
   }
 \details{
 In order to use this function, a prediction object must exist, i.e. the function \code{\link{tfr.predict}} must have been processed prior to using this function.

From 83e81d1b753d6e67432823e4716e53b26ed4481a Mon Sep 17 00:00:00 2001
From: Peiran Liu <prliu@uw.edu>
Date: Sun, 18 Apr 2021 17:28:42 -0400
Subject: [PATCH 29/48] update for iso.unbiased

---
 R/updated_functions.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/updated_functions.R b/R/updated_functions.R
index 6394293f..d02ab5fb 100644
--- a/R/updated_functions.R
+++ b/R/updated_functions.R
@@ -184,7 +184,7 @@ estimate.bias.sd.original <- function(mcmc, iso.unbiased=NULL, covariates=c('sou
       if (source.col.name %in% covariates)
       {
         index.by.country.vr.estimate <- which((mcmc$meta$raw_data.original$country_code == ISO.code$code) & 
-                                                (mcmc$meta$raw_data.original[, source.col.name] %in% c("VR", 'Estimate')))
+                                                (mcmc$meta$raw_data.original[, source.col.name] %in% c("VR")))
         mcmc$meta$raw_data.original$bias[index.by.country.vr.estimate] <- 0
         mcmc$meta$raw_data.original$std[index.by.country.vr.estimate] <- 0.016
       }

From a7f8eb550000206a7e89c39954a291e7c99d8ce5 Mon Sep 17 00:00:00 2001
From: Peiran Liu <prliu@uw.edu>
Date: Tue, 22 Jun 2021 21:10:02 -0400
Subject: [PATCH 30/48] fix bug for San Marino run

---
 R/run_mcmc.R | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/R/run_mcmc.R b/R/run_mcmc.R
index 6db57a08..db34993e 100755
--- a/R/run_mcmc.R
+++ b/R/run_mcmc.R
@@ -385,10 +385,13 @@ run.tfr.mcmc.extra <- function(sim.dir=file.path(getwd(), 'bayesTFR.output'),
 		cat('\nNothing to be done.\n')
 		return(invisible(mcmc.set))
 	}
+  #TODO: we should consider in the future that in the original run, no MCMC3 steps are conducted, 
+  #but when running for specific countries we have that. This could possibly break the simulation.
 	if (uncertainty && has.tfr3.mcmc(sim.dir))
 	{
 	  mcmc3.set <- get.tfr3.mcmc(sim.dir)
-	  Eini$meta[['id_phase3']] <- intersect(mcmc3.set$meta$id_phase3, which(mcmc.set$meta$regions$country_code %in% countries))
+	  Eini$meta[['id_phase3']] <- intersect(which(Eini$meta$lambda_c < Eini$meta$T_end_c), 
+	                                        which(Eini$meta$regions$country_code %in% countries))
 	  for (par.name in tfr3.parameter.names())
 	  {
 	    for (suffix in c('prior.range', 'ini', 'ini.range'))
@@ -402,7 +405,6 @@ run.tfr.mcmc.extra <- function(sim.dir=file.path(getwd(), 'bayesTFR.output'),
 	meta <- Eini$meta
 	chain.ids <- names(mcmc.set$mcmc.list)
 	mcthin <- 1
-	
 	if(verbose) cat('\n')
 	for (chain in chain.ids) { # update meta in each chain
 		if(verbose) cat('Updating meta in chain', chain, '\n')
@@ -437,7 +439,7 @@ run.tfr.mcmc.extra <- function(sim.dir=file.path(getwd(), 'bayesTFR.output'),
 		  for (country in countries)
 		  {
 		    df_country <- mcmc.set$mcmc.list[[chain]]$meta$raw_data.original
-		    country.obj <- get.country.object(country, meta.old)
+		    country.obj <- get.country.object(country, Eini$meta)
 		    if (!is.null(country.obj$index))
 		    {
 		      df_country <- df_country[df_country$country_code == country,]

From e4314f681c06b14a63823d86a78863faefed88ae Mon Sep 17 00:00:00 2001
From: Hana Sevcikova <hanas@uw.edu>
Date: Fri, 29 Oct 2021 14:53:17 -0700
Subject: [PATCH 31/48] resolved conflict

---
 R/predict_tfr.R | 16 ----------------
 1 file changed, 16 deletions(-)

diff --git a/R/predict_tfr.R b/R/predict_tfr.R
index c62b0781..fe4e7f35 100644
--- a/R/predict_tfr.R
+++ b/R/predict_tfr.R
@@ -189,24 +189,8 @@ make.tfr.prediction <- function(mcmc.set, start.year=NULL, end.year=2100, replac
 	burn <- if(is.mcmc.set.thinned) 0 else burnin
 	total.iter <- get.total.iterations(mcmc.set$mcmc.list, burn)
 	stored.iter <- get.stored.mcmc.length(mcmc.set$mcmc.list, burn)
-<<<<<<< HEAD
-	if (!is.null(mcmc.set$meta$extra) && !is.null(countries)) 
-	{
-	  stored.iter.extra <- stored.iter
-	  for (country in mcmc.set$meta$extra)
-	  {
-	    if (country %in% countries)
-	    {
-	      stored.iter.extra <- min(stored.iter.extra, 
-	                               floor((mcmc.set$meta$extra_iter[country] - burn) / mcmc.set$meta$extra_thin[country]) * length(mcmc.set$mcmc.list))
-	    }
-	  }
-	  stored.iter <- stored.iter.extra
-	}
-=======
 	stored.iter <- min(stored.iter, get.stored.mcmc.length.extra(mcmc.set$meta, countries, 
 	                                                             nr.chains =length(mcmc.set$mcmc.list), burnin = burn), na.rm = TRUE)
->>>>>>> master
 	mcthin <- max(sapply(mcmc.set$mcmc.list, function(x) x$thin))
 	if(!is.null(nr.traj) && !is.null(thin)) {
 		warning('Both nr.traj and thin are given. Argument thin will be ignored.')

From 7988e361316ef4e1bdc6b8df2eba9978ab8cfa79 Mon Sep 17 00:00:00 2001
From: Hana Sevcikova <hanas@uw.edu>
Date: Mon, 19 Feb 2024 18:12:34 -0800
Subject: [PATCH 32/48] rgdal changes; bug fix in ar.phase2

---
 ChangeLog                       |  8 +++++++
 DESCRIPTION                     |  6 ++---
 R/plot_functions.R              | 11 +++++----
 R/predict_tfr.R                 |  7 ++++--
 man/convert.trajectories.Rd     | 13 ++++++-----
 man/write.projection.summary.Rd | 40 ++++++++++++++++-----------------
 6 files changed, 48 insertions(+), 37 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 800fc593..e7f8e12f 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,11 @@
+7.4-2.9001 (2024-02-19)
+-----
+Fixed bug related to a combination of ar.phase2 and missing data (thanks to Shunqi Cheng).
+
+7.4-1/2 (2023-10-17)
+-----
+Removed rgdal from suggested packages.
+
 7.4-0 (2023-09-14)
 -----
 Annual subnational projections are now possible, controlled by the argument "annual" 
diff --git a/DESCRIPTION b/DESCRIPTION
index b0bb03fb..1d9becfe 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,12 +1,12 @@
 Package: bayesTFR
-Version: 7.4-0
-Date: 2023-09-14
+Version: 7.4-2.9001
+Date: 2024-02-19
 Title: Bayesian Fertility Projection
 Author: Hana Sevcikova (hanas@uw.edu), Leontine Alkema (alkema@nus.edu.sg), Peiran Liu (prliu@uw.edu), Adrian Raftery (raftery@uw.edu), Bailey Fosdick (bfosdick@uw.edu), Patrick Gerland (gerland@un.org)
 Maintainer: Hana Sevcikova <hanas@uw.edu>
 Depends: R (>= 3.5.0)
 Imports: mvtnorm, MASS, coda, graphics, grDevices, stats, utils, wpp2019, data.table, lifecycle
-Suggests: rworldmap, snowFT, googleVis, rgdal, sp, wpp2017, wpp2015, wpp2012, wpp2010, ggplot2, sf, spData, scales
+Suggests: rworldmap, snowFT, googleVis, sp, wpp2017, wpp2015, wpp2012, wpp2010, ggplot2, sf, spData, scales
 Description: Making probabilistic projections of total fertility rate for all countries of the world, using a Bayesian hierarchical model <doi:10.1007/s13524-011-0040-5> <doi:10.18637/jss.v106.i08>. Subnational probabilistic projections are also supported <doi:10.4054/DemRes.2018.38.60>.
 License: GPL-3 | file LICENSE
 URL: https://bayespop.csss.washington.edu
diff --git a/R/plot_functions.R b/R/plot_functions.R
index ddf2d1e3..f6e74785 100644
--- a/R/plot_functions.R
+++ b/R/plot_functions.R
@@ -1210,11 +1210,10 @@ tfr.map <- function(pred, quantile=0.5, year=NULL, par.name=NULL, adjusted=FALSE
     tfr <- data.frame(cbind(un=data.period$country.codes, tfr=data))
     map <- rworldmap::getMap(resolution=resolution)
     #first get countries excluding Antarctica which crashes spTransform (says the help page for joinCountryData2Map)
-    sPDF <- map[-which(map$ADMIN=='Antarctica'), ]	
-    if(requireNamespace("rgdal", quietly=TRUE)) {
-        #transform map to the Robinson projection
-        sPDF <- sp::spTransform(sPDF, CRSobj=sp::CRS("+proj=robin +ellps=WGS84"))
-    }
+    sPDF <- map[-which(map$ADMIN=='Antarctica'), ]
+    #transform map to the Robinson projection
+    sPDF <- sp::spTransform(sPDF, CRSobj = sp::CRS("+proj=robin +ellps=WGS84"))
+
     ## recode missing UN codes and UN member states
     sPDF$UN <- sPDF$ISO_N3
     ## N. Cyprus -> assign to Cyprus
@@ -1223,7 +1222,7 @@ tfr.map <- function(pred, quantile=0.5, year=NULL, par.name=NULL, adjusted=FALSE
     sPDF$UN[sPDF$ISO3=="KOS"] <- 688
     ## W. Sahara -> no UN numerical code assigned in Natural Earth map (its ISO3 changed in rworlmap 1.3.6)
     sPDF$UN[sPDF$ISO3=="ESH"] <- 732
-    ## Somaliland -> assign to Somalia -> fixed in rworlmap version 1.3.6
+    ## Somaliland -> assign to Somalia (SOM) -> fixed in rworlmap version 1.3.6
     #sPDF$UN[sPDF$ISO3=="SOL"] <- 706
     
     #mtfr <- joinCountryData2Map(tfr, joinCode='UN', nameJoinColumn='un')
diff --git a/R/predict_tfr.R b/R/predict_tfr.R
index 80260f38..36881cac 100644
--- a/R/predict_tfr.R
+++ b/R/predict_tfr.R
@@ -392,10 +392,13 @@ make.tfr.prediction <- function(mcmc.set, start.year=NULL, end.year=2100, replac
 	  }
 	  else
 	  {
-	    for(year in 1:fps.end.obs.index) 
+	      last.not.na <- 1
+	    for(year in 1:fps.end.obs.index) {
 	      all.f_ps[icountry,year,] <- all.tfr.list[[prediction.countries[icountry]]][all.T_end.min+year-1]
+	      if(!is.na(all.f_ps[icountry,year,1])) last.not.na <- year
+	    }
 	    if (!is.null(mcmc.set$meta$ar.phase2) && (mcmc.set$meta$ar.phase2))
-	      f_ps_previous[, icountry] <- all.tfr.list[[prediction.countries[icountry]]][all.T_end.min+year-2]
+	      f_ps_previous[, icountry] <- all.tfr.list[[prediction.countries[icountry]]][all.T_end.min+last.not.na-2]
 	  }
 		first.two.na <- which(is.na(all.f_ps[icountry,,1]))[1:2]
 		which.Wsecond[icountry] <- first.two.na[2]
diff --git a/man/convert.trajectories.Rd b/man/convert.trajectories.Rd
index d8a188db..5795d384 100644
--- a/man/convert.trajectories.Rd
+++ b/man/convert.trajectories.Rd
@@ -19,12 +19,13 @@ convert.tfr.trajectories(dir = file.path(getwd(), 'bayesTFR.output'),
 }
 \details{
 The function creates two files. One is called \dQuote{ascii_trajectories.csv}, it is a comma-separated table with the following columns: 
-\itemize{\item{\dQuote{LocID}}{country code} 
-		\item{\dQuote{Period}}{prediction interval, e.g. 2015-2020} 
-		\item{\dQuote{Year}}{middle year of the prediction interval}
-		\item{\dQuote{Trajectory}}{identifier of the trajectory}
-		\item{\dQuote{TF}}{total fertility rate}
-}
+\describe{
+    \item{LocID}{country code} 
+		\item{Period}{prediction interval, e.g. 2015-2020} 
+		\item{Year}{middle year of the prediction interval}
+		\item{Trajectory}{identifier of the trajectory}
+		\item{TF}{total fertility rate}
+    }
 
 The second file is called \dQuote{ascii_trajectories_wide.csv}, it is also a comma-separated table and it contains the same information as above but in a \sQuote{transposed} format. I.e. the data for one country are ordered in columns, thus, there is one column per country. The country columns are ordered alphabetically. 
 
diff --git a/man/write.projection.summary.Rd b/man/write.projection.summary.Rd
index fd8d96d9..8580981e 100644
--- a/man/write.projection.summary.Rd
+++ b/man/write.projection.summary.Rd
@@ -22,32 +22,32 @@ write.projection.summary(dir = file.path(getwd(), "bayesTFR.output"),
 }
 \details{
 The first file that the function creates is called \file{projection_summary_user_friendly.csv} (or \file{projection_summary_user_friendly_adjusted.csv} if \code{adjusted=TRUE}), it is a comma-separated table with the following columns: 
-\itemize{\item{\dQuote{country_name}: }{country name}
-		 \item{\dQuote{country_code}: }{country code}
-		 \item{\dQuote{variant}: }{name of the variant, such as \dQuote{median}, \dQuote{lower 80}, \dQuote{upper 80}, \dQuote{lower 95}, \dQuote{upper 95}, \dQuote{-0.5child}, \dQuote{+0.5child}, \dQuote{constant}}
-		\item{period1: }{e.g. \dQuote{2005-2010}: TFR for the first time period. If \code{est.uncertainty} is \code{TRUE}, the first time period is the first observed time period. Otherwise it is the last observed one.}
-		\item{period2: }{e.g. \dQuote{2010-2015}: TFR for the second time period}
-		\item{\dots }{further columns with TFR projections}
+\describe{\item{country_name}{country name}
+		 \item{country_code}{country code}
+		 \item{variant}{name of the variant, such as \dQuote{median}, \dQuote{lower 80}, \dQuote{upper 80}, \dQuote{lower 95}, \dQuote{upper 95}, \dQuote{-0.5child}, \dQuote{+0.5child}, \dQuote{constant}}
+		\item{\emph{period1:}}{e.g. \dQuote{2005-2010}: TFR for the first time period. If \code{est.uncertainty} is \code{TRUE}, the first time period is the first observed time period. Otherwise it is the last observed one.}
+		\item{\emph{period2:}}{e.g. \dQuote{2010-2015}: TFR for the second time period}
+		\item{\dots}{further columns with TFR projections}
 	}
 
 The second file, called \file{projection_summary.csv} (or \file{projection_summary_adjusted.csv} if \code{adjusted=TRUE}), also comma-separated table, contains the same information as above in a UN-specific format:
-\itemize{\item{\dQuote{RevID}: }{revision number, passed to the function as an argument}
-		  \item{\dQuote{VarID}: }{variant identifier, extracted from the \code{\link{UN_variants}} dataset}
-		  \item{\dQuote{LocID}: }{country code}
-		  \item{\dQuote{TimeID}: }{time identifier, extracted from the \code{\link{UN_time}} dataset}
-		  \item{\dQuote{TFR}: }{the total fertility rate for this variant, location and time period}
+\describe{\item{RevID}{revision number, passed to the function as an argument}
+		  \item{VarID}{variant identifier, extracted from the \code{\link{UN_variants}} dataset}
+		  \item{LocID}{country code}
+		  \item{TimeID}{time identifier, extracted from the \code{\link{UN_time}} dataset}
+		  \item{TFR}{the total fertility rate for this variant, location and time period}
 }
 
 The third comma-separated file, called \file{projection_summary_parameters.csv} contains the following columns:
-\itemize{\item{\dQuote{country_name}: }{country name}
-		 \item{\dQuote{country_code}: }{country code}
-		 \item{\dQuote{TF_time_start_decline}: }{start period of TFR decline}
-		 \item{\dQuote{TF_max}: }{TFR at the onset of the fertitility transition (median of the \eqn{U_c} parameter)}
-		 \item{\dQuote{TF_max_decrement}: }{maximum decrement of TFR decline (median of the \eqn{d_c} parameter)}
-		 \item{\dQuote{TF_end_level}: }{median of the end level of the fertility transition (\eqn{\Delta_{c4}}{Triangle_c4} parameter)}
-		 \item{\dQuote{TF_end_level_low}: }{2.5 percentile of the \eqn{\Delta_{c4}}{Triangle_c4} distribution}
-		 \item{\dQuote{TF_end_level_high}: }{97.5 percentile of the \eqn{\Delta_{c4}}{Triangle_c4} distribution}
-		 \item{\dQuote{TF_time_end_decline}: }{period of the end decline, measured using the prediction median}
+\describe{\item{country_name}{country name}
+		 \item{country_code}{country code}
+		 \item{TF_time_start_decline}{start period of TFR decline}
+		 \item{TF_max}{TFR at the onset of the fertitility transition (median of the \eqn{U_c} parameter)}
+		 \item{TF_max_decrement}{maximum decrement of TFR decline (median of the \eqn{d_c} parameter)}
+		 \item{TF_end_level}{median of the end level of the fertility transition (\eqn{\Delta_{c4}}{Triangle_c4} parameter)}
+		 \item{TF_end_level_low}{2.5 percentile of the \eqn{\Delta_{c4}}{Triangle_c4} distribution}
+		 \item{TF_end_level_high}{97.5 percentile of the \eqn{\Delta_{c4}}{Triangle_c4} distribution}
+		 \item{TF_time_end_decline}{period of the end decline, measured using the prediction median}
 	}
 Note that this file is not created if \code{adjusted=TRUE}.
 }

From ea2c4e9e36874468bc5af1fa488a2e1f7fb6ae32 Mon Sep 17 00:00:00 2001
From: Hana Sevcikova <hanas@uw.edu>
Date: Wed, 5 Jun 2024 17:56:58 -0700
Subject: [PATCH 33/48] added mean variant to summaries

---
 ChangeLog                       |   4 +++-
 DESCRIPTION                     |   4 ++--
 R/get_outputs.R                 |   4 +++-
 R/plot_functions.R              |  13 +++++++++++++
 R/predict_tfr.R                 |  17 +++++++++++------
 data-raw/UN_variants.txt        |   1 +
 data/UN_variants.rda            | Bin 605 -> 617 bytes
 man/get.tfr.estimation.Rd       |   6 +++---
 man/write.projection.summary.Rd |   6 +++---
 9 files changed, 39 insertions(+), 16 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index e7f8e12f..683d78e4 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,7 +1,9 @@
-7.4-2.9001 (2024-02-19)
+7.4-2.90xx (2024-06-05)
 -----
 Fixed bug related to a combination of ar.phase2 and missing data (thanks to Shunqi Cheng).
 
+Added the mean variant into summary outputs.
+
 7.4-1/2 (2023-10-17)
 -----
 Removed rgdal from suggested packages.
diff --git a/DESCRIPTION b/DESCRIPTION
index 1d9becfe..ea93a2cc 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: bayesTFR
-Version: 7.4-2.9001
-Date: 2024-02-19
+Version: 7.4-2.9002
+Date: 2024-06-05
 Title: Bayesian Fertility Projection
 Author: Hana Sevcikova (hanas@uw.edu), Leontine Alkema (alkema@nus.edu.sg), Peiran Liu (prliu@uw.edu), Adrian Raftery (raftery@uw.edu), Bailey Fosdick (bfosdick@uw.edu), Patrick Gerland (gerland@un.org)
 Maintainer: Hana Sevcikova <hanas@uw.edu>
diff --git a/R/get_outputs.R b/R/get_outputs.R
index 10fd7ac6..1c998703 100644
--- a/R/get_outputs.R
+++ b/R/get_outputs.R
@@ -365,7 +365,9 @@ get.tfr.estimation <- function(mcmc.list=NULL, country=NULL, sim.dir=NULL,
   output <- list(tfr_table=tfr_table, country.obj = country.obj)
   if (!is.null(probs))
   {
-    tfr_quantile <- apply(tfr_table, 2, quantile, probs = probs)
+      if(length(probs) == 1 && probs == "mean"){
+          tfr_quantile <- apply(tfr_table, 2, mean)
+      } else tfr_quantile <- apply(tfr_table, 2, quantile, probs = probs)
     if (is.null(dim(tfr_quantile))) tfr_quantile <- matrix(tfr_quantile, nrow = 1)
     tfr_quantile <- data.table::data.table(t(tfr_quantile))
     start.year <- mcmc.list$meta$start.year
diff --git a/R/plot_functions.R b/R/plot_functions.R
index f6e74785..5ca80e6a 100644
--- a/R/plot_functions.R
+++ b/R/plot_functions.R
@@ -369,6 +369,19 @@ get.median.from.prediction <- function(tfr.pred, country.index, country.code=NUL
 	return(get.quantile.from.prediction(tfr.pred, quantile=0.5, country.index=country.index, 
 										country.code=country.code, adjusted=adjusted, ...))
 }
+get.mean.from.prediction <- function(tfr.pred, country.index, country.code=NULL, adjusted=TRUE,
+                                         est.uncertainty = FALSE) {
+    mean.values <- tfr.pred$traj.mean.sd[country.index, 1,]
+    if(est.uncertainty && has.est.uncertainty(tfr.pred$mcmc.set)){ # get the right value for present year
+        tfr.est <- get.tfr.estimation(mcmc.list=tfr.pred$mcmc.set, country = country.code, probs="mean", adjust = adjusted)
+        unc.last.time <- which(tfr.est$tfr_quantile$year == dimnames(tfr.pred$quantiles)[[3]][1])
+        mean.values[1] <- unlist(tfr.est$tfr_quantile[unc.last.time, 1])
+    }
+    if (!adjusted) return(mean.values)
+    shift <- get.tfr.shift(country.code, tfr.pred)
+    if(!is.null(shift)) mean.values <- mean.values + shift
+    return(mean.values)
+}
 	
 get.traj.quantiles <- function(tfr.pred, country.index, country.code, trajectories=NULL, pi=80, 
                                adjusted=TRUE, est.uncertainty = FALSE) {
diff --git a/R/predict_tfr.R b/R/predict_tfr.R
index 36881cac..51ab44c7 100644
--- a/R/predict_tfr.R
+++ b/R/predict_tfr.R
@@ -1118,12 +1118,12 @@ get.projection.summary.header.bayesTFR.prediction <- function(pred, ...)
 
 "get.UN.variant.names" <- function(pred, ...) UseMethod("get.UN.variant.names")
 get.UN.variant.names.bayesTFR.prediction <- function(pred, ...) 
-	return(c('BHM median', 'BHM80 lower',  'BHM80 upper', 'BHM95 lower',  'BHM95 upper', 'Low', 
+	return(c('BHM median', 'BHM80 lower',  'BHM80 upper', 'BHM95 lower',  'BHM95 upper', 'BHM mean', 'Low', 
 					'High', 'Constant fertility'))
 					
 "get.friendly.variant.names" <- function(pred, ...) UseMethod("get.friendly.variant.names")
 get.friendly.variant.names.bayesTFR.prediction <- function(pred, ...)
-	return(c('median', 'lower 80', 'upper 80', 'lower 95', 'upper 95', '-0.5child', '+0.5child', 'constant'))
+	return(c('median', 'lower 80', 'upper 80', 'lower 95', 'upper 95', 'mean', '-0.5child', '+0.5child', 'constant'))
 
 get.wpp.revision.number <- function(pred) {
 	wpps <- c(2008, 2010, 2012, seq(2015, by = 2, length = 3), seq(2022, by = 2, length = 10))
@@ -1173,7 +1173,7 @@ do.write.projection.summary <- function(pred, output.dir, revision=NULL, indicat
 	UN.variant.names <- get.UN.variant.names(pred)
 	friendly.variant.names <- get.friendly.variant.names(pred)
 	# the code is dependent on the following order of the variants (it's presumed):
-	# median, lower 80, upper 80, lower 95, upper 95, -1/2child, +1/2 child, constant
+	# median, lower 80, upper 80, lower 95, upper 95, mean, -1/2child, +1/2 child, constant
 	nr.var <- length(UN.variant.names)
 	result1 <- result2 <- NULL
 	
@@ -1184,7 +1184,10 @@ do.write.projection.summary <- function(pred, output.dir, revision=NULL, indicat
 		if(est.uncertainty){ # add uncertainty
 		    est.object <- get.tfr.estimation(mcmc.list = pred$mcmc.set, country = country.obj$code, 
 		                                     probs = c(0.5, 0.1, 0.9, 0.025, 0.975), adjust = adjusted)
+		    est.object.mean <- get.tfr.estimation(mcmc.list = pred$mcmc.set, country = country.obj$code, 
+		                                     probs = "mean", adjust = adjusted)
 		    this.tfr.unc <- as.matrix(est.object$tfr_quantile)[1:ltfr, -ncol(est.object$tfr_quantile)]
+		    this.tfr.unc <- cbind(this.tfr.unc, mean = unlist(est.object.mean$tfr_quantile[1:ltfr, 1]))
 		    #this.tfr <- unlist(est.object$tfr_quantile[,1])[1:ltfr]
 		}
 		this.result1 <- cbind(
@@ -1194,15 +1197,18 @@ do.write.projection.summary <- function(pred, output.dir, revision=NULL, indicat
 		# prediction
 		median <- get.median.from.prediction(pred, country.obj$index, country.obj$code, adjusted=adjusted, 
 		                                     est.uncertainty = est.uncertainty)
+		mean <- get.mean.from.prediction(pred, country.obj$index, country.obj$code, adjusted=adjusted, 
+		                                     est.uncertainty = est.uncertainty)
 		proj.result <- rbind(median, 
 							   get.traj.quantiles(pred, country.obj$index, country.obj$code, pi=80, adjusted=adjusted, 
 							                      est.uncertainty = est.uncertainty),
 							   get.traj.quantiles(pred, country.obj$index, country.obj$code, pi=95, adjusted=adjusted, 
-							                      est.uncertainty = est.uncertainty))
+							                      est.uncertainty = est.uncertainty),
+							   mean)
 		if(any(friendly.variant.names == '-0.5child'))
 			proj.result <- rbind(proj.result,
 					   get.half.child.variant(median))
-		#browser()
+
 		proj.result <- round(rbind(proj.result,
 							   		rep(median[1], nr.proj)), precision) # constant variant
 		if(est.uncertainty){ # user-friendly output contains observed years as well
@@ -1219,7 +1225,6 @@ do.write.projection.summary <- function(pred, output.dir, revision=NULL, indicat
 		result1 <- rbind(result1, this.result1)
 		for(ivar in 1:nr.var) {
 		    this.obs.tfr <- if(est.uncertainty) obs.tfr[ivar,] else this.tfr
-		    #browser()
 			result2 <- rbind(result2, cbind(revision=rep(revision, nr.proj.all), 
 								   variant=rep(e$UN_variants[e$UN_variants$Vshort==UN.variant.names[ivar],'VarID'], nr.proj.all),
 								   country=rep(country.obj$code, nr.proj.all),
diff --git a/data-raw/UN_variants.txt b/data-raw/UN_variants.txt
index 26602326..7bc15975 100644
--- a/data-raw/UN_variants.txt
+++ b/data-raw/UN_variants.txt
@@ -22,3 +22,4 @@
 13	204	"BHM80 lower"	"BHM80 lower"	"Standard"
 13	206	"BHM95 upper"	"BHM95 upper"	"Standard"
 13	207	"BHM95 lower"	"BHM95 lower"	"Standard"
+13	208	"BHM mean"	"BHM mean"	"Standard"
diff --git a/data/UN_variants.rda b/data/UN_variants.rda
index af9da415454005d2a34074ad9e4a96351b3a3d51..3399da64d2c587104ed9f2b52d80b5ac19f50648 100644
GIT binary patch
literal 617
zcmV-v0+#(kT4*^jL0KkKS#k$K2LJ+I|HuFT+U!DSQeZu28&I$B-=IJM03E;p06+i%
zzyeh`)_{hRMvVY8XaE2J01XCeXaH&I1d^xfnI5CjGMWLP000^q0004@pyNhNj3W>X
zCW8Q)4Fe`k88R3^VI)Y?Q%0Eto@pl3Xgxq}K*;oD)Bwo!4KgtuQqhg4mFtX%TdwG`
z!4?Ua6zl+a&oW4$$|jMCS3#D{<bX^+B!MlrcK!$R>)*!tBkb@(F@id*;DQinvR5gs
z+^M;Lq6K2Z%0RwF6m!&9M+vZ6QcYq~qzH@?yp=1G%FluR<YCIO8j7!k8!QMm5DA$8
zG6W}NIC-7eAju?!_+_z>B|89BRaCydgIK`GVzJcW0t;!h`-JLh7{^OuyCUHUvESH?
zq^C0q^9@xMD^Jzmh-CQ&AUb!$mzSFU!?4QXK)3+xun*7?Rh31c3MxP<iMRkL<A`W5
z9f={)#YvB(6%ZHOv_Gr!x&=A?(vi4FKD}V193m{nM3sj0r$Q5ZR@Ke6ejE;FNp$8s
zi3CWC!0FOr7le2dw2)nzL<ZvzdNl=M)MRoZD<mU%!C~l0m|~HRF%+aZBT?22e>#@n
ziR<UFc{ale48mkGd;+X6Lm`12LaBDb*%P7&V9Y86UN?x*Qf9C(b_m%yFj_e1VmVxC
z(TjeB5h9RIjVEB@)HK9K5fpdB5@SRWr5$Dwmkx}?LPY^`(8^^C=0Ic?S>4V9h_qoT
z)|jV9akIc8i%%4)L0cjtC2(n`Gzgg|2qJ7i5DP4@K_Vi%o-gJYZ}E2|Q-ui!AaoFL
Dbxj0o

literal 605
zcmV-j0;2swT4*^jL0KkKS(EU8tN;Q_f5rd*+U!DSQeZu28&I$B-=IJM03CoJ00F=P
zRXWW;VI?4^=|gE5sisDThJXMuG-7N<qtpY`^#VyxRKXsJsK^Zf0005%Gynh^4jO5K
z84WbZ$%F$<0AVy~lK==B1xX}nsF-L2NC0RCfEqN=0h2>ai>|Pm5VrC>!y<c=IxR3k
zf`&m$03I~ZBvi;Baf-fyH4JzM%8A6RWb?}&Z?C_~Ca3poL`*54;m`ycES2h|?p0p;
z>710Z@)MSV-El4f!Wb!0!g&a@2n-awaZE`gJ_q}ahb%y9tUefQG62^SLoy1I64e$v
z_O%yML{2)K6ePz%qeT{Cugt7qWHD@sMh-f@Up$D_iCDX3u3DI!ceSeGMWP4IkyK#F
zTMF1dbmI#$1VB%0FyZm5idf`n<OH%tOX?+d)y(26xD~0l)C$3v&Se@|97jfxuf&X(
zC)Z_fC+Cvpe@}}H9EjoI;3fwgGV8P#(x<JzqYg&#+=!s=bliWun0W)pzz(NUVFm;j
zQ;cde@nE@LbKxGdNabwVd^lJ`V#L+_NmS2S=tg{tFmJhve>rC%d(Fd^cfI#&!*MDq
zTTwL4OSYPk;!PRSGhoLTMH`8c8V!Z{G-Xy(L!BP%Y)s277wz1vajntG3)i8JH=OGx
z`mrxkFOJErDZ6xUM>ADZue-Rs!H~|(ZMMxO(NN2mt<)BVWhBQpua=IjbuKTA&gL30
r)2O{tXDb#a>@i7V=*6rV$3mw*bc6-No{#1jZ}E2|Q-ui$KL`52Pkj!`

diff --git a/man/get.tfr.estimation.Rd b/man/get.tfr.estimation.Rd
index cad29870..760c5aa9 100644
--- a/man/get.tfr.estimation.Rd
+++ b/man/get.tfr.estimation.Rd
@@ -17,17 +17,17 @@ get.tfr.estimation(mcmc.list = NULL, country = NULL,
   \item{sim.dir}{Directory with the MCMC simulation results. Only used if \code{mcmc.list} is \code{NULL}.}
   \item{burnin}{Burn-in for getting trajectories and quantiles. A positive burn-in \eqn{x} will remove first \eqn{x} iterations from each chain.}
   \item{thin}{Thin for getting trajectories and quantiles. Thinning level \eqn{x} greater than 1 will store one iteration per \eqn{x} samples.}
-  \item{probs}{A vector of numbers between \eqn{[0,1]} specifying which estimation quantiles should be outputted. If it is set to \code{NULL} no quantiles are returned.}
+  \item{probs}{A vector of numbers between \eqn{[0,1]} specifying which estimation quantiles should be outputted. If it is set to \code{NULL} no quantiles are returned. It can be set to the word \dQuote{mean}, in which case the estimation mean is outputted.}
   \item{adjust}{Logical indicating whether the adjusted median and trajectories should be returned.}
   \item{country.code, ISO.code}{Deprecated arguments. Use argument \code{country} instead.}
 }
 \details{
-This function is used to obtain the TFR estimation trajectories as well as corresponding quantiles if the \code{mcmc.list} has been obtained while taking account for uncertainty about the past, i.e. \code{uncertainty=TRUE} in \code{\link{run.tfr.mcmc}}. Quantiles are included in the results if \code{probs} is not \code{NULL}.
+This function is used to obtain the TFR estimation trajectories as well as corresponding quantiles if the \code{mcmc.list} has been obtained while taking account for uncertainty about the past, i.e. \code{uncertainty=TRUE} in \code{\link{run.tfr.mcmc}}. Quantiles or the mean are included in the results if \code{probs} is not \code{NULL}.
 }
 \value{
   \item{tfr_table}{Table storing the trajectories. It is a matrix with rows equal to number of trajectories, and column equal to number of time periods.}
   \item{country.obj}{A list storing information about the country which the trajectories and quantiles correspond to. It corresponds to the output of \code{\link{get.country.object}}.}
-  \item{tfr_quantile}{Optional. A data.table object, storing the quantiles of estimates for each time period as specified by the \code{probs} argument. The time periods are contained in the \code{year} column.}
+  \item{tfr_quantile}{Optional. A data.table object, storing the quantiles or the mean of estimates for each time period as specified by the \code{probs} argument. The time periods are contained in the \code{year} column.}
 }
 \author{
 Peiran Liu, Hana Sevcikova
diff --git a/man/write.projection.summary.Rd b/man/write.projection.summary.Rd
index 8580981e..4900bb17 100644
--- a/man/write.projection.summary.Rd
+++ b/man/write.projection.summary.Rd
@@ -1,12 +1,12 @@
 \name{write.projection.summary}
-\Rdversion{1.1}
+%\Rdversion{1.1}
 \alias{write.projection.summary}
 %- Also NEED an '\alias' for EACH other topic documented here.
 \title{
 Writing Projection Summary Files
 }
 \description{
-The function creates two files containing projection summaries, such as the median, the lower and upper bound of the 80 and 90\% probability intervals, respectively, the +/- 0.5 child variant and the constant variant. One file is in a user-friendly format, whereas the other is in a UN-specific format with internal coding of the time and the variants. In addition, a file containing  some of the model parameters is created.
+The function creates two files containing projection summaries, such as the median, mean, the lower and upper bound of the 80 and 90\% probability intervals, respectively, the +/- 0.5 child variant and the constant variant. One file is in a user-friendly format, whereas the other is in a UN-specific format with internal coding of the time and the variants. In addition, a file containing  some of the model parameters is created.
 }
 \usage{
 write.projection.summary(dir = file.path(getwd(), "bayesTFR.output"), 
@@ -24,7 +24,7 @@ write.projection.summary(dir = file.path(getwd(), "bayesTFR.output"),
 The first file that the function creates is called \file{projection_summary_user_friendly.csv} (or \file{projection_summary_user_friendly_adjusted.csv} if \code{adjusted=TRUE}), it is a comma-separated table with the following columns: 
 \describe{\item{country_name}{country name}
 		 \item{country_code}{country code}
-		 \item{variant}{name of the variant, such as \dQuote{median}, \dQuote{lower 80}, \dQuote{upper 80}, \dQuote{lower 95}, \dQuote{upper 95}, \dQuote{-0.5child}, \dQuote{+0.5child}, \dQuote{constant}}
+		 \item{variant}{name of the variant, such as \dQuote{median}, \dQuote{lower 80}, \dQuote{upper 80}, \dQuote{lower 95}, \dQuote{upper 95}, \dQuote{mean}, \dQuote{-0.5child}, \dQuote{+0.5child}, \dQuote{constant}}
 		\item{\emph{period1:}}{e.g. \dQuote{2005-2010}: TFR for the first time period. If \code{est.uncertainty} is \code{TRUE}, the first time period is the first observed time period. Otherwise it is the last observed one.}
 		\item{\emph{period2:}}{e.g. \dQuote{2010-2015}: TFR for the second time period}
 		\item{\dots}{further columns with TFR projections}

From a93ad81e71944ecc829b9abe5faa4f62d16cd3e4 Mon Sep 17 00:00:00 2001
From: Hana Sevcikova <hanas@uw.edu>
Date: Wed, 5 Jun 2024 20:30:01 -0700
Subject: [PATCH 34/48] arguments traj.index and show.median added to
 tfr.trajectories.plot

---
 ChangeLog                |  2 ++
 DESCRIPTION              |  2 +-
 R/plot_functions.R       | 17 ++++++++++++++++-
 man/plot-trajectories.Rd |  3 +++
 4 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 683d78e4..c1df6ce1 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -4,6 +4,8 @@ Fixed bug related to a combination of ar.phase2 and missing data (thanks to Shun
 
 Added the mean variant into summary outputs.
 
+tfr.trajectories.plot() got an argument to plot the means, as well as selected trajectories.
+
 7.4-1/2 (2023-10-17)
 -----
 Removed rgdal from suggested packages.
diff --git a/DESCRIPTION b/DESCRIPTION
index ea93a2cc..4a426e70 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,5 +1,5 @@
 Package: bayesTFR
-Version: 7.4-2.9002
+Version: 7.4-2.9003
 Date: 2024-06-05
 Title: Bayesian Fertility Projection
 Author: Hana Sevcikova (hanas@uw.edu), Leontine Alkema (alkema@nus.edu.sg), Peiran Liu (prliu@uw.edu), Adrian Raftery (raftery@uw.edu), Bailey Fosdick (bfosdick@uw.edu), Patrick Gerland (gerland@un.org)
diff --git a/R/plot_functions.R b/R/plot_functions.R
index 5ca80e6a..43fe2ee5 100644
--- a/R/plot_functions.R
+++ b/R/plot_functions.R
@@ -564,7 +564,8 @@ tfr.estimation.plot <- function(mcmc.list = NULL, country = NULL, sim.dir = NULL
 tfr.trajectories.plot <- function(tfr.pred, country, pi=c(80, 95), 
                                   half.child.variant=TRUE, nr.traj=NULL,
                                   adjusted.only = TRUE, typical.trajectory=FALSE,
-                                  mark.estimation.points=FALSE,
+                                  mark.estimation.points=FALSE, 
+                                  traj.index = NULL, show.mean = FALSE,
                                   xlim=NULL, ylim=NULL, type='b', 
                                   xlab='Year', ylab='TFR', main=NULL, lwd=c(2,2,2,2,2,1), 
                                   col=c('black', 'green', 'red', 'red', 'blue', '#00000020'),
@@ -608,7 +609,11 @@ tfr.trajectories.plot <- function(tfr.pred, country, pi=c(80, 95),
   }
   x1 <- as.integer(c(names(y1.part1), names(y1.part2)))
   x2 <- as.numeric(dimnames(tfr.pred$quantiles)[[3]])
+
+  if(!is.null(traj.index)) nr.traj <- length(traj.index)
   trajectories <- get.trajectories(tfr.pred, country$code, nr.traj, typical.trajectory=typical.trajectory)
+  if(!is.null(traj.index) && !is.null(trajectories$trajectories)) trajectories$index <- traj.index
+  
   # plot historical data: observed
   if (!add) {
     if(is.null(xlim)) xlim <- c(min(x1,x2), max(x1,x2))
@@ -681,6 +686,7 @@ tfr.trajectories.plot <- function(tfr.pred, country, pi=c(80, 95),
         }
     }
   }
+
   if (uncertainty)
   {
     col_median <- length(pi)+1
@@ -713,6 +719,15 @@ tfr.trajectories.plot <- function(tfr.pred, country, pi=c(80, 95),
   legend <- c(legend, median.legend, if(length(pi) > 0) paste0(pi, '% PI') else c())
   cols <- c(cols, col[3], rep(col[4], length(pi)))
   lwds <- c(lwds, lwd[3], rep(lwd[4], length(pi)))
+  if(show.mean){
+      # plot mean
+      tfr.mean <- get.mean.from.prediction(tfr.pred, country$index, country$code)
+      lines(x2, tfr.mean, type='l', col=col[3], lwd=1, lty=max(lty)+1)
+      legend <- c(legend, 'mean')
+      cols <- c(cols, col[3])
+      lwds <- c(lwds, 1)
+      lty <- c(max(lty)+1, lty)
+  }
   if (half.child.variant) {
     lty <- c(lty, max(lty)+1)
     llty <- length(lty)
diff --git a/man/plot-trajectories.Rd b/man/plot-trajectories.Rd
index 6ce45c3b..e3732ade 100644
--- a/man/plot-trajectories.Rd
+++ b/man/plot-trajectories.Rd
@@ -14,6 +14,7 @@ tfr.trajectories.plot(tfr.pred, country, pi = c(80, 95),
     half.child.variant = TRUE, nr.traj = NULL, 
     adjusted.only = TRUE, typical.trajectory = FALSE, 
     mark.estimation.points = FALSE,
+    traj.index = NULL, show.mean = FALSE,
     xlim = NULL, ylim = NULL, type = 'b', xlab = 'Year', ylab = 'TFR', 
     main = NULL, lwd = c(2, 2, 2, 2, 2, 1), 
     col=c('black', 'green', 'red', 'red', 'blue', '#00000020'),
@@ -37,6 +38,8 @@ tfr.trajectories.table(tfr.pred, country, pi = c(80, 95),
   \item{adjusted.only}{Logical. By default, if the projection or estimation median is adjusted using e.g. \code{\link{tfr.median.set}} or \code{\link{tfr.median.set.all}}, the function plots the adjusted median. If \code{adjusted.only=FALSE} the original (non-adjusted) median is plotted as well.}
   \item{typical.trajectory}{Logical. If \code{TRUE} one trajectory is shown that has the smallest distance to the median.}
   \item{mark.estimation.points}{Logical. If \code{TRUE}, points that were not used in the estimation (phase I) are shown in lighter color than points in phase II and III.}
+  \item{traj.index}{Vector of trajectory indices to show. If not given, the trajectories are selected using equidistant spacing.}
+  \item{show.mean}{Logical indicating if the mean of the distribution should be shown.}
   \item{xlim, ylim, type, xlab, ylab}{Graphical parameters passed to the \code{plot} function.}
     \item{lwd, col}{Vector of six elements giving the line width and color for: 1. observed data, 2. imputed missing data, 3. median, 4. quantiles, 5. half-child variant, 6. trajectories.}
   \item{show.legend}{Logical controlling whether the legend should be drawn.}

From bfc2b75d8a82f4cc1e40bc77363c5230d2dfed07 Mon Sep 17 00:00:00 2001
From: Hana Sevcikova <hanas@uw.edu>
Date: Wed, 5 Jun 2024 20:59:49 -0700
Subject: [PATCH 35/48] export mean function

---
 NAMESPACE | 1 +
 1 file changed, 1 insertion(+)

diff --git a/NAMESPACE b/NAMESPACE
index c1717b9a..4977dc3c 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -102,6 +102,7 @@ export(
 	tfr.country.dlcurves,
 	tfr.dl.coverage,
 	get.median.from.prediction,
+	get.mean.from.prediction,
 	bdem.parameter.traces,
 	get.traj.ascii.header,
 	get.nr.countries,

From ebc1bc8d60fd7cef2e185881aef26834dff58c6c Mon Sep 17 00:00:00 2001
From: Hana Sevcikova <hanas@uw.edu>
Date: Wed, 5 Jun 2024 21:04:26 -0700
Subject: [PATCH 36/48] minor edits

---
 ChangeLog | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/ChangeLog b/ChangeLog
index c1df6ce1..ed16bae6 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -4,7 +4,8 @@ Fixed bug related to a combination of ar.phase2 and missing data (thanks to Shun
 
 Added the mean variant into summary outputs.
 
-tfr.trajectories.plot() got an argument to plot the means, as well as selected trajectories.
+tfr.trajectories.plot() got arguments to plot the means (show.mean), 
+as well as selected trajectories (traj.index).
 
 7.4-1/2 (2023-10-17)
 -----

From 2cf3d4bdeeff36f049dbdfd1b895d4024a17a967 Mon Sep 17 00:00:00 2001
From: Hana Sevcikova <hanas@uw.edu>
Date: Wed, 5 Jun 2024 21:07:43 -0700
Subject: [PATCH 37/48] internal function

---
 man/bayesTFR-internal.Rd | 1 +
 1 file changed, 1 insertion(+)

diff --git a/man/bayesTFR-internal.Rd b/man/bayesTFR-internal.Rd
index 92fe91ad..362cbd42 100644
--- a/man/bayesTFR-internal.Rd
+++ b/man/bayesTFR-internal.Rd
@@ -43,6 +43,7 @@
 \alias{get.mcmc.list.list}
 \alias{get.mcmc.meta}
 \alias{get.median.from.prediction}
+\alias{get.mean.from.prediction}
 \alias{get.meta.only}
 \alias{get.newTFRcolnames}
 \alias{get.nr.countries}

From 67a883009ef30e753e9586a53254c10fac10f1d4 Mon Sep 17 00:00:00 2001
From: Hana Sevcikova <hanas@uw.edu>
Date: Thu, 20 Jun 2024 17:49:49 -0700
Subject: [PATCH 38/48] argument show.median added

---
 ChangeLog                |   4 +-
 R/plot_functions.R       | 102 +++++++++++++++++++++++++++------------
 man/plot-trajectories.Rd |   4 +-
 3 files changed, 74 insertions(+), 36 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index ed16bae6..7ccef417 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,10 +1,10 @@
-7.4-2.90xx (2024-06-05)
+7.4-2.90xx (2024-06-20)
 -----
 Fixed bug related to a combination of ar.phase2 and missing data (thanks to Shunqi Cheng).
 
 Added the mean variant into summary outputs.
 
-tfr.trajectories.plot() got arguments to plot the means (show.mean), 
+tfr.trajectories.plot() got arguments to plot the means (show.mean), medians (show.median),
 as well as selected trajectories (traj.index).
 
 7.4-1/2 (2023-10-17)
diff --git a/R/plot_functions.R b/R/plot_functions.R
index 43fe2ee5..a155f7a1 100644
--- a/R/plot_functions.R
+++ b/R/plot_functions.R
@@ -565,7 +565,7 @@ tfr.trajectories.plot <- function(tfr.pred, country, pi=c(80, 95),
                                   half.child.variant=TRUE, nr.traj=NULL,
                                   adjusted.only = TRUE, typical.trajectory=FALSE,
                                   mark.estimation.points=FALSE, 
-                                  traj.index = NULL, show.mean = FALSE,
+                                  traj.index = NULL, show.mean = FALSE, show.median = TRUE,
                                   xlim=NULL, ylim=NULL, type='b', 
                                   xlab='Year', ylab='TFR', main=NULL, lwd=c(2,2,2,2,2,1), 
                                   col=c('black', 'green', 'red', 'red', 'blue', '#00000020'),
@@ -666,79 +666,117 @@ tfr.trajectories.plot <- function(tfr.pred, country, pi=c(80, 95),
       lines(x2, trajectories$trajectories[,trajectories$index[i]], type='l', col=col[6], lwd=lwd[6])
     }
   }
-  # plot median
-  tfr.median <- get.median.from.prediction(tfr.pred, country$index, country$code)
-  if(uncertainty) {
+  if(uncertainty) 
       unc.last.time <- which(tfr.object$tfr_quantile$year == x2[1])
-      tfr.median[1] <- unlist(tfr.object$tfr_quantile[unc.last.time,])[length(pi)+1]
+  
+  # extract median & mean
+  tfr.median <- tfr.mean <- tfr.main.proj <- NULL
+  if(show.median)
+    tfr.median <- get.median.from.prediction(tfr.pred, country$index, country$code)
+  if(show.mean)
+    tfr.mean <- get.mean.from.prediction(tfr.pred, country$index, country$code)      
+  
+  # set the main projection (solid line)
+  main.proj.name <- ""
+  if(!is.null(tfr.median)){
+      tfr.main.proj <- tfr.median
+      main.proj.name <- "median"
+  } else {
+      if(!is.null(tfr.mean)){
+        tfr.main.proj <- tfr.mean
+        main.proj.name <- "mean"
+      }
   }
-  lines(x2, tfr.median, type='l', col=col[3], lwd=lwd[3])
-  lty <- 1
+  lty <- c()
+  if(!is.null(tfr.main.proj)){
+    if(uncertainty) # replace last observed with estimated median
+        tfr.main.proj[1] <- unlist(tfr.object$tfr_quantile[unc.last.time,])[length(pi)+1]
+    # draw main projection
+    lines(x2, tfr.main.proj, type='l', col=col[3], lwd=lwd[3])
+    lty <- 1
+  }
+  
   # plot given CIs
   if(length(pi) > 0){
-    lty <- c(lty, 2:(length(pi)+1))
+      pi.lty <- 2:(length(pi)+1)
+    lty <- c(lty, pi.lty)
     for (i in 1:length(pi)) {
         cqp <- get.traj.quantiles(tfr.pred, country$index, country$code, trajectories$trajectories, pi[i], 
                               est.uncertainty = uncertainty)
         if (!is.null(cqp)) {
-            lines(x2, cqp[1,], type='l', col=col[4], lty=lty[i+1], lwd=lwd[4])
-            lines(x2, cqp[2,], type='l', col=col[4], lty=lty[i+1], lwd=lwd[4])
+            lines(x2, cqp[1,], type='l', col=col[4], lty=pi.lty[i], lwd=lwd[4])
+            lines(x2, cqp[2,], type='l', col=col[4], lty=pi.lty[i], lwd=lwd[4])
         }
     }
   }
-
+  max.lty <- if(length(lty) == 0) 1 else max(lty)
+  
   if (uncertainty)
   {
     col_median <- length(pi)+1
     lines(tfr.object$tfr_quantile$year, as.data.frame(tfr.object$tfr_quantile)[, col_median], type='l', col=col_unc, lwd=lwd[3]) 
     if(!adjusted.only) { # plot unadjusted estimation median
-        unadj.lty <- max(lty)+1
+        unadj.lty <- max.lty+1
         tfr.object.unadj <- get.tfr.estimation(mcmc.list=tfr.pred$mcmc.set, country = country.obj$code, 
                                          probs=0.5, adjust = FALSE)
-        lines(tfr.object.unadj$tfr_quantile$year, as.data.frame(tfr.object.unadj$tfr_quantile)$V1, type='l', col=col_unc, lwd=lwd[3], lty = max(lty)+1) 
+        lines(tfr.object.unadj$tfr_quantile$year, as.data.frame(tfr.object.unadj$tfr_quantile)$V1, type='l', col=col_unc, lwd=lwd[3], lty = unadj.lty) 
     }
     if(length(pi) > 0) {
         for (i in 1:length(pi)) {
-            lines(tfr.object$tfr_quantile$year, as.data.frame(tfr.object$tfr_quantile)[, length(pi)+1-i], type='l', col=col_unc, lty=lty[i+1], lwd=lwd[4])
-            lines(tfr.object$tfr_quantile$year, as.data.frame(tfr.object$tfr_quantile)[, length(pi)+1+i], type='l', col=col_unc, lty=lty[i+1], lwd=lwd[4])
+            lines(tfr.object$tfr_quantile$year, as.data.frame(tfr.object$tfr_quantile)[, length(pi)+1-i], type='l', col=col_unc, lty=pi.lty[i], lwd=lwd[4])
+            lines(tfr.object$tfr_quantile$year, as.data.frame(tfr.object$tfr_quantile)[, length(pi)+1+i], type='l', col=col_unc, lty=pi.lty[i], lwd=lwd[4])
         }
     }
   }
   legend <- c()
   cols <- c()
   lwds <- c()
-  if(!adjusted.only) { # plot unadjusted median
-    bhm.median <- get.median.from.prediction(tfr.pred, country$index, country$code, adjusted=FALSE)
-    lines(x2, bhm.median, type='l', col=col[3], lwd=lwd[3], lty=max(lty)+1)
-    legend <- c(legend, 'BHM median')
+  if(!adjusted.only) { # plot unadjusted median & mean
+      if(main.proj.name == "mean"){
+          bhm.main <- get.mean.from.prediction(tfr.pred, country$index, country$code, adjusted=FALSE)
+          bhm.main.name <- 'BHM mean'
+      } else {
+          bhm.main <- get.median.from.prediction(tfr.pred, country$index, country$code, adjusted=FALSE)
+          bhm.main.name <- 'BHM median'
+      }
+    lines(x2, bhm.main, type='l', col=col[3], lwd=lwd[3], lty=max.lty+1)
+    legend <- c(legend, bhm.main.name)
+    cols <- c(cols, col[3])
+    lwds <- c(lwds, lwd[3])
+    lty <- c(max.lty+1, lty)
+    max.lty <- max(lty)
+  }
+  main.legend <- ""
+  if(main.proj.name != ""){
+    main.legend <- if(adjusted.only) main.proj.name else paste('adj.', main.proj.name)
+    legend <- c(legend, main.legend)
     cols <- c(cols, col[3])
     lwds <- c(lwds, lwd[3])
-    lty <- c(max(lty)+1, lty)
   }
-  median.legend <- if(adjusted.only) 'median' else 'adj. median'
-  legend <- c(legend, median.legend, if(length(pi) > 0) paste0(pi, '% PI') else c())
-  cols <- c(cols, col[3], rep(col[4], length(pi)))
-  lwds <- c(lwds, lwd[3], rep(lwd[4], length(pi)))
-  if(show.mean){
-      # plot mean
-      tfr.mean <- get.mean.from.prediction(tfr.pred, country$index, country$code)
+  legend <- c(legend, if(length(pi) > 0) paste0(pi, '% PI') else c())
+  cols <- c(cols, rep(col[4], length(pi)))
+  lwds <- c(lwds, rep(lwd[4], length(pi)))
+  if(show.median && show.mean){
+      # plot mean in addition to median
       lines(x2, tfr.mean, type='l', col=col[3], lwd=1, lty=max(lty)+1)
       legend <- c(legend, 'mean')
       cols <- c(cols, col[3])
       lwds <- c(lwds, 1)
-      lty <- c(max(lty)+1, lty)
+      lty <- c(lty, max.lty+1)
+      max.lty <- max(lty)
   }
-  if (half.child.variant) {
-    lty <- c(lty, max(lty)+1)
+  if (half.child.variant && !is.null(tfr.main.proj)) {
+    lty <- c(lty, max.lty+1)
+    max.lty <- max(lty)
     llty <- length(lty)
-    up.low <- get.half.child.variant(median=tfr.median)
+    up.low <- get.half.child.variant(median=tfr.main.proj)
     if(uncertainty) {
         up.low <- up.low[,-1]
         x2t <- x2[-1]
     } else x2t <- x2
     lines(x2t, up.low[1,], type='l', col=col[5], lty=lty[llty], lwd=lwd[5])
     lines(x2t, up.low[2,], type='l', col=col[5], lty=lty[llty], lwd=lwd[5])
-    legend <- c(legend, '+/- 0.5 child')
+    legend <- c(legend, paste(main.proj.name, '+/- 0.5 child'))
     cols <- c(cols, col[5])
     lwds <- c(lwds, lwd[5])
   }
diff --git a/man/plot-trajectories.Rd b/man/plot-trajectories.Rd
index e3732ade..ff14bf1e 100644
--- a/man/plot-trajectories.Rd
+++ b/man/plot-trajectories.Rd
@@ -14,7 +14,7 @@ tfr.trajectories.plot(tfr.pred, country, pi = c(80, 95),
     half.child.variant = TRUE, nr.traj = NULL, 
     adjusted.only = TRUE, typical.trajectory = FALSE, 
     mark.estimation.points = FALSE,
-    traj.index = NULL, show.mean = FALSE,
+    traj.index = NULL, show.mean = FALSE, show.median = TRUE,
     xlim = NULL, ylim = NULL, type = 'b', xlab = 'Year', ylab = 'TFR', 
     main = NULL, lwd = c(2, 2, 2, 2, 2, 1), 
     col=c('black', 'green', 'red', 'red', 'blue', '#00000020'),
@@ -39,7 +39,7 @@ tfr.trajectories.table(tfr.pred, country, pi = c(80, 95),
   \item{typical.trajectory}{Logical. If \code{TRUE} one trajectory is shown that has the smallest distance to the median.}
   \item{mark.estimation.points}{Logical. If \code{TRUE}, points that were not used in the estimation (phase I) are shown in lighter color than points in phase II and III.}
   \item{traj.index}{Vector of trajectory indices to show. If not given, the trajectories are selected using equidistant spacing.}
-  \item{show.mean}{Logical indicating if the mean of the distribution should be shown.}
+  \item{show.mean, show.median}{Logical indicating if the mean or/and the median of the distribution should be shown.}
   \item{xlim, ylim, type, xlab, ylab}{Graphical parameters passed to the \code{plot} function.}
     \item{lwd, col}{Vector of six elements giving the line width and color for: 1. observed data, 2. imputed missing data, 3. median, 4. quantiles, 5. half-child variant, 6. trajectories.}
   \item{show.legend}{Logical controlling whether the legend should be drawn.}

From 6898f9993662e424eda7b9f6c3caaca75f843070 Mon Sep 17 00:00:00 2001
From: Hana Sevcikova <hanas@uw.edu>
Date: Wed, 11 Sep 2024 14:31:52 -0700
Subject: [PATCH 39/48] added include_2024

---
 ChangeLog           | 4 +++-
 DESCRIPTION         | 4 ++--
 R/plot_functions.R  | 1 -
 data/include_2024.R | 6 ++++++
 4 files changed, 11 insertions(+), 4 deletions(-)
 create mode 100644 data/include_2024.R

diff --git a/ChangeLog b/ChangeLog
index 7ccef417..2e06dfd9 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,7 @@
-7.4-2.90xx (2024-06-20)
+7.4-2.90xx (2024-09-11)
 -----
+Added dataset include_2024.
+
 Fixed bug related to a combination of ar.phase2 and missing data (thanks to Shunqi Cheng).
 
 Added the mean variant into summary outputs.
diff --git a/DESCRIPTION b/DESCRIPTION
index 4a426e70..d77a2111 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: bayesTFR
-Version: 7.4-2.9003
-Date: 2024-06-05
+Version: 7.4-2.9004
+Date: 2024-09-11
 Title: Bayesian Fertility Projection
 Author: Hana Sevcikova (hanas@uw.edu), Leontine Alkema (alkema@nus.edu.sg), Peiran Liu (prliu@uw.edu), Adrian Raftery (raftery@uw.edu), Bailey Fosdick (bfosdick@uw.edu), Patrick Gerland (gerland@un.org)
 Maintainer: Hana Sevcikova <hanas@uw.edu>
diff --git a/R/plot_functions.R b/R/plot_functions.R
index a155f7a1..3cc32148 100644
--- a/R/plot_functions.R
+++ b/R/plot_functions.R
@@ -746,7 +746,6 @@ tfr.trajectories.plot <- function(tfr.pred, country, pi=c(80, 95),
     lty <- c(max.lty+1, lty)
     max.lty <- max(lty)
   }
-  main.legend <- ""
   if(main.proj.name != ""){
     main.legend <- if(adjusted.only) main.proj.name else paste('adj.', main.proj.name)
     legend <- c(legend, main.legend)
diff --git a/data/include_2024.R b/data/include_2024.R
new file mode 100644
index 00000000..a6af5cc8
--- /dev/null
+++ b/data/include_2024.R
@@ -0,0 +1,6 @@
+include_2024 <- local({
+    e <- new.env()
+    load("include_2022.rda", envir= e)
+    e$include_2022
+})
+

From f1ab941aa518b1e42479841bc1d1df6265b64da9 Mon Sep 17 00:00:00 2001
From: Hana Sevcikova <hanas@uw.edu>
Date: Wed, 25 Sep 2024 16:24:08 -0700
Subject: [PATCH 40/48] fix setting lambda if last observed < T

---
 DESCRIPTION  | 4 ++--
 R/mcmc_ini.R | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index d77a2111..49d5353d 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: bayesTFR
-Version: 7.4-2.9004
-Date: 2024-09-11
+Version: 7.4-2.9005
+Date: 2024-09-25
 Title: Bayesian Fertility Projection
 Author: Hana Sevcikova (hanas@uw.edu), Leontine Alkema (alkema@nus.edu.sg), Peiran Liu (prliu@uw.edu), Adrian Raftery (raftery@uw.edu), Bailey Fosdick (bfosdick@uw.edu), Patrick Gerland (gerland@un.org)
 Maintainer: Hana Sevcikova <hanas@uw.edu>
diff --git a/R/mcmc_ini.R b/R/mcmc_ini.R
index f06719bf..d1ae8d34 100755
--- a/R/mcmc_ini.R
+++ b/R/mcmc_ini.R
@@ -95,8 +95,8 @@ find.lambda.for.one.country <- function(tfr, T_end, annual = FALSE) {
 	if(annual)  # convert lambda from 5-year scale to annual scale
 	{
 	  lambda <- min(which(year.bin == lambda) + 2, Tendorig)
-	  if (length(year.bin) - lambda < 5) { # if in the last time period, set it to the end of the period
-	      lambda <- length(year.bin) 
+	  if (Tendorig - lambda < 5) { # if in the last (observed) time period, set it to the end of the period
+	      lambda <- Tendorig
 	      while(is.na(tfrorig[lambda])) lambda <- lambda - 1 # move it before the last NA if any
 	  }
 	}

From ff61dd613357185937f676f43200bb641bfdb8dc Mon Sep 17 00:00:00 2001
From: Hana Sevcikova <hanas@uw.edu>
Date: Thu, 26 Sep 2024 10:35:45 -0700
Subject: [PATCH 41/48] removed unnecessary while loop

---
 DESCRIPTION  | 4 ++--
 R/mcmc_ini.R | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 49d5353d..b94dacd0 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: bayesTFR
-Version: 7.4-2.9005
-Date: 2024-09-25
+Version: 7.4-2.9006
+Date: 2024-09-26
 Title: Bayesian Fertility Projection
 Author: Hana Sevcikova (hanas@uw.edu), Leontine Alkema (alkema@nus.edu.sg), Peiran Liu (prliu@uw.edu), Adrian Raftery (raftery@uw.edu), Bailey Fosdick (bfosdick@uw.edu), Patrick Gerland (gerland@un.org)
 Maintainer: Hana Sevcikova <hanas@uw.edu>
diff --git a/R/mcmc_ini.R b/R/mcmc_ini.R
index d1ae8d34..e4ec7a00 100755
--- a/R/mcmc_ini.R
+++ b/R/mcmc_ini.R
@@ -97,7 +97,7 @@ find.lambda.for.one.country <- function(tfr, T_end, annual = FALSE) {
 	  lambda <- min(which(year.bin == lambda) + 2, Tendorig)
 	  if (Tendorig - lambda < 5) { # if in the last (observed) time period, set it to the end of the period
 	      lambda <- Tendorig
-	      while(is.na(tfrorig[lambda])) lambda <- lambda - 1 # move it before the last NA if any
+	      #while(is.na(tfrorig[lambda])) lambda <- lambda - 1 # move it before the last NA if any - not needed anymore
 	  }
 	}
 

From 867d2e6843d5c2591302a5d27eb0df4551b54737 Mon Sep 17 00:00:00 2001
From: Hana Sevcikova <hanas@uw.edu>
Date: Thu, 26 Sep 2024 14:59:10 -0700
Subject: [PATCH 42/48] adjust mean

---
 R/tfr_median_set_new.R | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/R/tfr_median_set_new.R b/R/tfr_median_set_new.R
index 755922ed..f10f8e43 100644
--- a/R/tfr_median_set_new.R
+++ b/R/tfr_median_set_new.R
@@ -167,7 +167,7 @@ tfr.median.reset.estimation <- function(sim.dir, countries = NULL)
   invisible(output)
 }
 
-.do.shift.prediction.to.wpp <- function(pred, wpp.dataset, wpp.year = NULL, verbose = TRUE){
+.do.shift.prediction.to.wpp <- function(pred, wpp.dataset, stat = "median", wpp.year = NULL, verbose = TRUE){
   country_code <- NULL # for CRAN check not to complain
   meta <- pred$mcmc.set$meta
   wpp.year <- if(!is.null(wpp.year)) wpp.year else meta$wpp.year
@@ -192,8 +192,10 @@ tfr.median.reset.estimation <- function(sim.dir, countries = NULL)
       }
     }
     cntry <- countries[icntry]
+    if(!stat %in% c("median", "mean")) stop("Argument 'stat' must be 'median' or 'mean', but is ", stat, ".")
     to.match <- merge(data.table::data.table(year = pred.years, median = pred$quantiles[icntry, "0.5", ]), 
                           wppdatal[country_code == cntry], by = "year", all.x = TRUE)
+    if(stat == "mean") to.match[["median"]] <- to.match[["median"]] + pred$traj.mean.sd[icntry, 1, ] - to.match[["median"]] # difference between the mean and median
     to.match$wpp[is.na(to.match$wpp)] <- to.match$median[is.na(to.match$wpp)] # no shift for years that don't match
     to.match$shift <- to.match$wpp - to.match$median
     if(sum(to.match$shift) != 0)

From 882c7549300909b86e639cd304b761eb07d2a0c2 Mon Sep 17 00:00:00 2001
From: Hana Sevcikova <hanas@uw.edu>
Date: Thu, 26 Sep 2024 16:26:27 -0700
Subject: [PATCH 43/48] version updated

---
 ChangeLog   | 4 +++-
 DESCRIPTION | 2 +-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 2e06dfd9..b409f744 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,7 @@
-7.4-2.90xx (2024-09-11)
+7.4-2.90xx (2024-09-26)
 -----
+Added option of shifting projection to wpp by matching means. 
+
 Added dataset include_2024.
 
 Fixed bug related to a combination of ar.phase2 and missing data (thanks to Shunqi Cheng).
diff --git a/DESCRIPTION b/DESCRIPTION
index b94dacd0..7ee45681 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,5 +1,5 @@
 Package: bayesTFR
-Version: 7.4-2.9006
+Version: 7.4-2.9007
 Date: 2024-09-26
 Title: Bayesian Fertility Projection
 Author: Hana Sevcikova (hanas@uw.edu), Leontine Alkema (alkema@nus.edu.sg), Peiran Liu (prliu@uw.edu), Adrian Raftery (raftery@uw.edu), Bailey Fosdick (bfosdick@uw.edu), Patrick Gerland (gerland@un.org)

From 63c877b5cd7040819b57f1b0674fea084888a248 Mon Sep 17 00:00:00 2001
From: Hana Sevcikova <hanas@uw.edu>
Date: Fri, 27 Sep 2024 12:07:31 -0700
Subject: [PATCH 44/48] allow to have multiple predictions in one sim.dir

---
 NAMESPACE              |  1 +
 R/get_outputs.R        | 23 ++++++++++++++----
 R/predict_tfr.R        | 53 ++++++++++++++++++++++--------------------
 R/predict_tfr_subnat.R |  4 ++--
 R/tfr_median_set_new.R | 51 ++++++++++++++++++++++------------------
 5 files changed, 77 insertions(+), 55 deletions(-)

diff --git a/NAMESPACE b/NAMESPACE
index 4977dc3c..ce920a48 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -49,6 +49,7 @@ export(
 	get.thinned.tfr.mcmc,
 	get.tfr.prediction,
 	has.tfr.prediction,
+	available.tfr.predictions,
 	get.tfr.convergence.all,
 	get.tfr.convergence,
 	get.tfr3.convergence.all,
diff --git a/R/get_outputs.R b/R/get_outputs.R
index 1c998703..34306f13 100644
--- a/R/get_outputs.R
+++ b/R/get_outputs.R
@@ -222,14 +222,23 @@ create.thinned.tfr.mcmc <- function(mcmc.set, thin=1, burnin=0, output.dir=NULL,
 	return(thin.mcmc.set)
 }
 
-has.tfr.prediction <- function(mcmc=NULL, sim.dir=NULL) {
+has.tfr.prediction <- function(mcmc=NULL, sim.dir=NULL, subdir = "predictions") {
 	if (!is.null(mcmc)) sim.dir <- if(is.character(mcmc)) mcmc else mcmc$meta$output.dir
 	if (is.null(sim.dir)) stop('Either mcmc or directory must be given.')
-	if(file.exists(file.path(sim.dir, 'predictions', 'prediction.rda'))) return(TRUE)
+	if(file.exists(file.path(sim.dir, subdir, 'prediction.rda'))) return(TRUE)
 	return(FALSE)
 }
 
-get.tfr.prediction <- function(mcmc=NULL, sim.dir=NULL, mcmc.dir=NULL) {
+available.tfr.predictions <- function(mcmc=NULL, sim.dir=NULL, full.names = FALSE){
+    if (!is.null(mcmc)) sim.dir <- if(is.character(mcmc)) mcmc else mcmc$meta$output.dir
+    if (is.null(sim.dir)) stop('Either mcmc or directory must be given.')
+    all.dirs <- list.dirs(sim.dir, recursive = FALSE, full.names = TRUE)
+    pred.dirs <- names(unlist(sapply(all.dirs, function(x) list.files(x, pattern = "^prediction.rda$"))))
+    if(length(pred.dirs) == 0 || full.names == TRUE) return(pred.dirs)
+    return(sapply(pred.dirs, basename, USE.NAMES = FALSE))
+}
+
+get.tfr.prediction <- function(mcmc=NULL, sim.dir=NULL, mcmc.dir=NULL, subdir = "predictions") {
 	############
 	# Returns an object of class bayesTFR.prediction
 	# Set mcmc.dir to NA, if the prediction object should not have a pointer 
@@ -238,11 +247,15 @@ get.tfr.prediction <- function(mcmc=NULL, sim.dir=NULL, mcmc.dir=NULL) {
 	if (!is.null(mcmc)) 
 		sim.dir <- if(is.character(mcmc)) mcmc else mcmc$meta$output.dir
 	if (is.null(sim.dir)) stop('Either mcmc or directory must be given.')
-	output.dir <- file.path(sim.dir, 'predictions')
+	output.dir <- file.path(sim.dir, subdir)
 	pred.file <- file.path(output.dir, 'prediction.rda')
 	if(!file.exists(pred.file)) {
 		warning('File ', pred.file, ' does not exist.')
-		return(NULL)
+	    if(length((alt.preds <- available.tfr.predictions(sim.dir = sim.dir))) > 0){
+	        output.dir <- file.path(sim.dir, alt.preds[1])
+	        pred.file <- file.path(output.dir, 'prediction.rda')
+	        warning('Extracting predictions from ', alt.preds[1])
+	    } else return(NULL)
 	}
 	load(file=pred.file)
 	bayesTFR.prediction$output.directory <- normalizePath(output.dir)
diff --git a/R/predict_tfr.R b/R/predict_tfr.R
index 51ab44c7..b0095d8f 100644
--- a/R/predict_tfr.R
+++ b/R/predict_tfr.R
@@ -6,7 +6,7 @@ tfr.predict <- function(mcmc.set=NULL, end.year=2100,
 						use.tfr3=TRUE, burnin3=2000,
 						mu=2.1, rho=0.8859, sigmaAR1=0.1016,
 						min.tfr=0.5, use.correlation=FALSE,
-						save.as.ascii=0, output.dir = NULL,
+						save.as.ascii=0, output.dir = NULL, subdir = "predictions", 
 						low.memory=TRUE,
 						seed=NULL, verbose=TRUE, uncertainty=FALSE, ...) {
 	if(!is.null(mcmc.set)) {
@@ -55,7 +55,7 @@ tfr.predict <- function(mcmc.set=NULL, end.year=2100,
 					start.year=start.year, nr.traj=nr.traj, burnin=burnin, thin=thin, use.tfr3=has.phase3, burnin3=burnin3,
 					mu=mu, rho=rho,  sigmaAR1 = sigmaAR1, min.tfr=min.tfr, use.correlation=use.correlation,
 					save.as.ascii=save.as.ascii,
-					output.dir=output.dir, verbose=verbose, uncertainty=uncertainty, ...))			
+					output.dir=output.dir, subdir = subdir, verbose=verbose, uncertainty=uncertainty, ...))			
 }
 
 .find.burnin.nr.traj.from.diag <- function(diag.list, verbose = FALSE) {
@@ -88,7 +88,8 @@ get.burnin.nrtraj.from.diagnostics <- function(sim.dir, ...) {
 }
 
 tfr.predict.extra <- function(sim.dir=file.path(getwd(), 'bayesTFR.output'), 
-					prediction.dir=sim.dir, countries = NULL, save.as.ascii=0, verbose=TRUE, uncertainty=FALSE,
+					prediction.dir=sim.dir, subdir = "predictions", countries = NULL, save.as.ascii=0, 
+					verbose=TRUE, uncertainty=FALSE,
 					all.countries.required = TRUE, use.correlation = NULL) {
 	# Run prediction for given countries/regions (as codes). If they are not given it will be set to countries 
 	# for which there are MCMC results but no prediction.
@@ -96,9 +97,9 @@ tfr.predict.extra <- function(sim.dir=file.path(getwd(), 'bayesTFR.output'),
     mcmc.set <- get.tfr.mcmc(sim.dir)
 	if(is.null(mcmc.set))
 		stop('Error in "sim.dir" argument.')
-	pred <- get.tfr.prediction(sim.dir=prediction.dir)
+	pred <- get.tfr.prediction(sim.dir=prediction.dir, subdir = subdir)
 	if(is.null(pred))
-		stop('Error in "prediction.dir" argument.')
+		stop('Error in "sim.dir", "prediction.dir" or/and "subdir" argument. Use available.tfr.predictions() to check on valid predictions directories.')
 	if(length(setdiff(pred$mcmc.set$meta$regions$country_code, mcmc.set$meta$regions$country_code)) > 0)
 		stop('Prediction is inconsistent with the mcmc results. Use tfr.predict.')
 	if(is.null(countries)) {
@@ -125,7 +126,7 @@ tfr.predict.extra <- function(sim.dir=file.path(getwd(), 'bayesTFR.output'),
 									use.correlation=use.correlation, 
 									mu=pred$mu, rho=pred$rho, sigmaAR1=pred$sigmaAR1, 
 									min.tfr=pred$min.tfr, countries=countries.idx, save.as.ascii=0, output.dir=prediction.dir,
-									force.creating.thinned.mcmc=TRUE,
+									subdir = subdir, force.creating.thinned.mcmc=TRUE,
 									write.summary.files=FALSE, write.trajectories=TRUE, verbose=verbose, 
 									uncertainty=uncertainty, all.countries.required = all.countries.required)
 									
@@ -173,7 +174,8 @@ make.tfr.prediction <- function(mcmc.set, start.year=NULL, end.year=2100, replac
 								use.correlation=FALSE, countries = NULL,
 								adj.factor1=NA, adj.factor2=0, forceAR1=FALSE,
 								boost.first.period.in.phase2=TRUE,
-							    save.as.ascii=0, output.dir = NULL, write.summary.files=TRUE, 
+							    save.as.ascii=0, output.dir = NULL, subdir = "predictions", 
+								write.summary.files=TRUE, 
 							    is.mcmc.set.thinned=FALSE, force.creating.thinned.mcmc=FALSE,
 							    write.trajectories=TRUE, 
 							    verbose=verbose, uncertainty=FALSE, all.countries.required = TRUE){
@@ -213,10 +215,10 @@ make.tfr.prediction <- function(mcmc.set, start.year=NULL, end.year=2100, replac
 	
 	#setup output directory
 	if (is.null(output.dir)) output.dir <- meta$output.dir
-	outdir <- file.path(output.dir, 'predictions')
+	outdir <- file.path(output.dir, subdir)
 	
 	if(is.null(countries)) {
-		if(!replace.output && has.tfr.prediction(sim.dir=output.dir))
+		if(!replace.output && has.tfr.prediction(sim.dir=output.dir, subdir = subdir))
 			stop('Prediction in ', outdir,
 			' already exists.\nSet replace.output=TRUE if you want to overwrite existing projections.')
 		unlink(outdir, recursive=TRUE)
@@ -1005,11 +1007,11 @@ do.convert.trajectories <- function(pred, n, output.dir, countries=NULL, verbose
 
 
 convert.tfr.trajectories <- function(dir=file.path(getwd(), 'bayesTFR.output'), 
-								 n=1000, output.dir=NULL, 
+								 n=1000, subdir = "predictions", output.dir=NULL, 
 								 verbose=FALSE) {
 	# Converts all trajectory rda files into UN ascii, selecting n trajectories by equal spacing.
 	if(n <= 0) return()
-	pred <- get.tfr.prediction(sim.dir=dir)
+	pred <- get.tfr.prediction(sim.dir=dir, subdir = subdir)
 	if (is.null(output.dir)) output.dir <- pred$output.directory
 	if(!file.exists(output.dir)) dir.create(output.dir, recursive=TRUE)
 	cat('Converting trajectories from', dir, '\n')
@@ -1028,11 +1030,11 @@ convert.tfr.trajectories <- function(dir=file.path(getwd(), 'bayesTFR.output'),
 }
 
 write.projection.summary <- function(dir=file.path(getwd(), 'bayesTFR.output'), 
-									 output.dir=NULL, revision=NULL, adjusted=FALSE, 
+                                     subdir = "predictions", output.dir=NULL, revision=NULL, adjusted=FALSE, 
 									 est.uncertainty = FALSE, ...) {
 # Writes three prediction summary files, one in a user-friendly format, one in a UN-format,
 # and one parameter file.
-	pred <- get.tfr.prediction(sim.dir=dir)
+	pred <- get.tfr.prediction(sim.dir=dir, subdir = subdir)
 	if (is.null(output.dir)) output.dir <- pred$output.directory
 	if(!file.exists(output.dir)) dir.create(output.dir, recursive=TRUE)
 	tfr.write.projection.summary.and.parameters(pred, output.dir, revision=revision, adjusted=adjusted, 
@@ -1302,19 +1304,19 @@ get.tfr.shift <- function(country.code, pred) {
 	return(pred)
 }
 
-tfr.median.reset <- function(sim.dir, countries = NULL) {
+tfr.median.reset <- function(sim.dir, countries = NULL, ...) {
     if(is.null(countries)) {
-        pred <- get.tfr.prediction(sim.dir)
+        pred <- get.tfr.prediction(sim.dir, ...)
         pred$median.shift <- NULL
         store.bayesTFR.prediction(pred)
         cat('\nMedians for all countries reset.\n')
     } else
-	    for(country in countries) pred <- tfr.median.shift(sim.dir, country, reset=TRUE)
+	    for(country in countries) pred <- tfr.median.shift(sim.dir, country, reset=TRUE, ...)
 	invisible(pred)
 }
 
-tfr.median.shift <- function(sim.dir, country, reset=FALSE, shift=0, from=NULL, to=NULL) {
-	pred <- get.tfr.prediction(sim.dir)
+tfr.median.shift <- function(sim.dir, country, reset=FALSE, shift=0, from=NULL, to=NULL, ...) {
+	pred <- get.tfr.prediction(sim.dir, ...)
 	pred <- .bdem.median.shift(pred, type='tfr', country=country, reset=reset, 
 				shift=shift, from=from, to=to)
 	store.bayesTFR.prediction(pred)
@@ -1354,16 +1356,17 @@ tfr.median.shift <- function(sim.dir, country, reset=FALSE, shift=0, from=NULL,
 	return(pred)
 }
 
-tfr.median.set <- function(sim.dir, country, values, years=NULL) {
-	pred <- get.tfr.prediction(sim.dir)
+tfr.median.set <- function(sim.dir, country, values, years=NULL, ...) {
+	pred <- get.tfr.prediction(sim.dir, ...)
 	pred <- .bdem.median.set(pred, 'tfr', country=country, values=values, years=years)
 	store.bayesTFR.prediction(pred)
 	invisible(pred)
 }
 
-tfr.median.adjust <- function(sim.dir, countries, factor1=2/3, factor2=1/3, forceAR1=FALSE) {
-	pred <- get.tfr.prediction(sim.dir)
-	if (is.null(pred)) stop('No valid prediction in ', sim.dir)
+tfr.median.adjust <- function(sim.dir, countries, factor1=2/3, factor2=1/3, forceAR1=FALSE, subdir = "predictions") {
+	pred <- get.tfr.prediction(sim.dir, subdir = subdir)
+	if (is.null(pred)) stop('Prediction not found in ', file.path(sim.dir, subdir), 
+	                        '. Check available.tfr.predictions() and use the subdir argument to set non-standard prediction subdirectory.')
 	mcmc.set <- pred$mcmc.set
 	if(is.null(countries)) {
 		cat('\nNo countries given. Nothing to be done.\n')
@@ -1382,7 +1385,7 @@ tfr.median.adjust <- function(sim.dir, countries, factor1=2/3, factor2=1/3, forc
 									use.tfr3=pred$use.tfr3, mcmc3.set=m3.set, burnin3=pred$burnin3,
 									mu=pred$mu, rho=pred$rho, sigmaAR1=pred$sigmaAR1, min.tfr=pred$min.tfr,
 									countries=countries.idx, adj.factor1=factor1, adj.factor2=factor2,
-									forceAR1=forceAR1, save.as.ascii=0, output.dir=sim.dir,
+									forceAR1=forceAR1, save.as.ascii=0, output.dir=sim.dir, subdir = subdir,
 									write.summary.files=FALSE, is.mcmc.set.thinned=TRUE, 
 									write.trajectories=FALSE, verbose=FALSE)
 	new.means <- new.pred$traj.mean.sd[,1,2:dim(new.pred$traj.mean.sd)[3]]
@@ -1390,7 +1393,7 @@ tfr.median.adjust <- function(sim.dir, countries, factor1=2/3, factor2=1/3, forc
 		tfr.median.set(sim.dir, countries[icountry], new.means[get.country.object(countries[icountry], mcmc.set$meta)$index,])
 	}
 	# reload adjusted prediction
-	invisible(get.tfr.prediction(sim.dir))
+	invisible(get.tfr.prediction(sim.dir, subdir = subdir))
 }
 
 tfr.correlation <- function(meta, cor.pred=NULL, low.coeffs=c(0.11, 0.26, 0.05, 0.09),
diff --git a/R/predict_tfr_subnat.R b/R/predict_tfr_subnat.R
index 3d441db8..e83108fc 100644
--- a/R/predict_tfr_subnat.R
+++ b/R/predict_tfr_subnat.R
@@ -1,11 +1,11 @@
 tfr.predict.subnat <- function(countries, my.tfr.file, sim.dir=file.path(getwd(), 'bayesTFR.output'),
-                               end.year=2100, start.year=NULL, output.dir = NULL, annual = NULL,
+                               end.year=2100, start.year=NULL, subdir = "predictions", output.dir = NULL, annual = NULL,
                                nr.traj=NULL, seed = NULL, min.tfr = 0.5, ar.pars = NULL, 
                                save.as.ascii = 0, verbose = TRUE) {
   # Run subnational projections, using the Scale AR(1) model applied to a national bayesTFR simulation 
   # sim.dir is the world-national simulation. Set output.dir to store results somewhere else.  
 
-  wpred <- get.tfr.prediction(sim.dir) # contains national projections
+  wpred <- get.tfr.prediction(sim.dir, subdir = subdir) # contains national projections
   wdata <- wpred$tfr_matrix_reconstructed
   wmeta <- wpred$mcmc.set$meta
   if(!is.null(seed)) set.seed(seed)
diff --git a/R/tfr_median_set_new.R b/R/tfr_median_set_new.R
index f10f8e43..c9ca7318 100644
--- a/R/tfr_median_set_new.R
+++ b/R/tfr_median_set_new.R
@@ -30,9 +30,9 @@ tfr.shift.estimation.to.wpp <- function(sim.dir, ..., verbose = TRUE){
       meta$median.shift.estimation[[as.character(cntry)]] <- merged$shift
   }
   store.bayesTFR.meta.object(meta, meta$output.dir)
-  has.predictions <- has.tfr.prediction(sim.dir = sim.dir)
-  if(has.predictions) { # need to save this also in the collapsed chain of the prediction
-    pred <- get.tfr.prediction(sim.dir = sim.dir)
+  predictions <- available.tfr.predictions(sim.dir = sim.dir)
+  for(pred.subdir in predictions) { # need to save this also in the collapsed chain of the prediction
+    pred <- get.tfr.prediction(sim.dir = sim.dir, subdir = pred.subdir)
     pred$mcmc.set$meta$median.shift.estimation <- meta$median.shift.estimation
     store.bayesTFR.prediction(pred)
     store.bayesTFR.meta.object(pred$mcmc.set$meta, pred$mcmc.set$meta$output.dir)
@@ -47,12 +47,13 @@ get.tfr.shift.estimation <- function(country.code, meta)
   return(meta$median.shift.estimation[[as.character(country.code)]])
 }
 
-tfr.median.set.all <- function(sim.dir, country, values, years=NULL, burnin = 0, thin = 1)
+tfr.median.set.all <- function(sim.dir, country, values, years=NULL, burnin = 0, thin = 1, 
+                               subdir = "predictions")
 {
   mcmc.set <- get.tfr.mcmc(sim.dir)
   meta <- mcmc.set$meta
   country.obj <- get.country.object(country, meta=meta)
-  has.predictions <- has.tfr.prediction(sim.dir = sim.dir)
+  has.predictions <- has.tfr.prediction(sim.dir = sim.dir, subdir = subdir)
   # browser()
   bdem.shift.estimation <- do.call('get.tfr.shift.estimation', list(country.obj$code, meta))
   estimation.years <- as.numeric(dimnames(meta$tfr_matrix)[[1]])
@@ -63,7 +64,7 @@ tfr.median.set.all <- function(sim.dir, country, values, years=NULL, burnin = 0,
   years.missing <- is.null(years)
   if (has.predictions)
   {
-    pred <- get.tfr.prediction(sim.dir = sim.dir)
+    pred <- get.tfr.prediction(sim.dir = sim.dir, subdir = subdir)
     pred.years <- as.numeric(dimnames(pred$quantiles)[[3]])
     bdem.shift <- do.call('get.tfr.shift', list(country.obj$code, pred))
     nr.proj <- pred$nr.projections + 1
@@ -130,8 +131,10 @@ tfr.median.reset.estimation <- function(sim.dir, countries = NULL)
 {
   mcmc.set <- get.tfr.mcmc(sim.dir)
   meta <- mcmc.set$meta
-  has.prediction <- has.tfr.prediction(sim.dir = sim.dir)
-  if (has.prediction) pred <- get.tfr.prediction(sim.dir = sim.dir)
+  has.prediction <- has.tfr.prediction(sim.dir = sim.dir, subdir = subdir)
+  #if (has.prediction) pred <- get.tfr.prediction(sim.dir = sim.dir)
+  predictions <- available.tfr.predictions(sim.dir = sim.dir)
+
   has.estimation <- (!is.null(mcmc.set$mcmc.list[[1]]$uncertainty) && mcmc.set$mcmc.list[[1]]$uncertainty)
   output <- list()
   if (has.estimation && !is.null(meta$median.shift.estimation)) 
@@ -146,23 +149,25 @@ tfr.median.reset.estimation <- function(sim.dir, countries = NULL)
     }
     output[['meta']] <- meta
     store.bayesTFR.meta.object(meta, meta$output.dir)
-    if (has.prediction){
+    for(pred.subdir in predictions) { # need to save this also in the prediction mcmc object
+        pred <- get.tfr.prediction(sim.dir = sim.dir, subdir = pred.subdir)
         pred$mcmc.set$meta$median.shift.estimation <- meta$median.shift.estimation
         store.bayesTFR.prediction(pred)
     }
   }
-  if (has.prediction && !is.null(pred$median.shift)) 
-  {
-    if(is.null(countries)) pred$median.shift <- NULL # reset all countries
-    else {
-      for (country in countries) 
-      {
-        country.obj <- get.country.object(country, meta=meta)
-        pred$median.shift[[as.character(country.obj$code)]] <- NULL
-      }
-    }
-    output[['pred']] <- pred
-    store.bayesTFR.prediction(pred)
+    for(pred.subdir in predictions) {
+        pred <- get.tfr.prediction(sim.dir = sim.dir, subdir = pred.subdir)
+        if(is.null(pred$median.shift)) next
+        if(is.null(countries)) pred$median.shift <- NULL # reset all countries
+        else {
+            for (country in countries) 
+            {
+                country.obj <- get.country.object(country, meta=meta)
+                pred$median.shift[[as.character(country.obj$code)]] <- NULL
+            }
+        }
+        output[['pred']] <- pred # this just returns the last prediction object which is not correct if there are more than one prediction directories
+        store.bayesTFR.prediction(pred)
   }
   invisible(output)
 }
@@ -206,8 +211,8 @@ tfr.median.reset.estimation <- function(sim.dir, countries = NULL)
 }
 
 
-tfr.shift.prediction.to.wpp <- function(sim.dir, ...){
-  pred <- get.tfr.prediction(sim.dir)
+tfr.shift.prediction.to.wpp <- function(sim.dir, subdir = "predictions", ...){
+  pred <- get.tfr.prediction(sim.dir, subdir = subdir)
   new.pred <- .do.shift.prediction.to.wpp(pred, wpp.dataset = "tfrprojMed", ...)
   store.bayesTFR.prediction(new.pred)
   invisible(new.pred)

From 8c8ee15a3e81a5ef476205b550a55d97d0500937 Mon Sep 17 00:00:00 2001
From: Hana Sevcikova <hanas@uw.edu>
Date: Fri, 27 Sep 2024 15:08:39 -0700
Subject: [PATCH 45/48] fixed documentation

---
 ChangeLog                       |  3 +++
 DESCRIPTION                     |  4 ++--
 R/predict_tfr.R                 |  2 +-
 R/tfr_median_set_new.R          |  3 ---
 man/convert.trajectories.Rd     |  3 ++-
 man/get.tfr.prediction.Rd       | 21 +++++++++++++++++----
 man/include.Rd                  |  2 ++
 man/predict-tfr.Rd              |  6 ++++--
 man/tfr.median.set.Rd           | 16 +++++++++-------
 man/tfr.median.set.all.Rd       |  3 ++-
 man/tfr.predict.extra.Rd        |  3 ++-
 man/tfr.predict.subnat.Rd       |  7 ++++---
 man/write.projection.summary.Rd |  4 +++-
 13 files changed, 51 insertions(+), 26 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index b409f744..4d6ad835 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,8 @@
 7.4-2.90xx (2024-09-26)
 -----
+Added option of keeping multiple predictions directories in one simulation directory
+(argument "subdir" added to various functions).
+
 Added option of shifting projection to wpp by matching means. 
 
 Added dataset include_2024.
diff --git a/DESCRIPTION b/DESCRIPTION
index 7ee45681..4d8da610 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: bayesTFR
-Version: 7.4-2.9007
-Date: 2024-09-26
+Version: 7.4-2.9008
+Date: 2024-09-27
 Title: Bayesian Fertility Projection
 Author: Hana Sevcikova (hanas@uw.edu), Leontine Alkema (alkema@nus.edu.sg), Peiran Liu (prliu@uw.edu), Adrian Raftery (raftery@uw.edu), Bailey Fosdick (bfosdick@uw.edu), Patrick Gerland (gerland@un.org)
 Maintainer: Hana Sevcikova <hanas@uw.edu>
diff --git a/R/predict_tfr.R b/R/predict_tfr.R
index b0095d8f..54d1f9d2 100644
--- a/R/predict_tfr.R
+++ b/R/predict_tfr.R
@@ -215,7 +215,7 @@ make.tfr.prediction <- function(mcmc.set, start.year=NULL, end.year=2100, replac
 	
 	#setup output directory
 	if (is.null(output.dir)) output.dir <- meta$output.dir
-	outdir <- file.path(output.dir, subdir)
+	outdir <- file.path(output.dir, basename(subdir))
 	
 	if(is.null(countries)) {
 		if(!replace.output && has.tfr.prediction(sim.dir=output.dir, subdir = subdir))
diff --git a/R/tfr_median_set_new.R b/R/tfr_median_set_new.R
index c9ca7318..1dd17d5a 100644
--- a/R/tfr_median_set_new.R
+++ b/R/tfr_median_set_new.R
@@ -131,10 +131,7 @@ tfr.median.reset.estimation <- function(sim.dir, countries = NULL)
 {
   mcmc.set <- get.tfr.mcmc(sim.dir)
   meta <- mcmc.set$meta
-  has.prediction <- has.tfr.prediction(sim.dir = sim.dir, subdir = subdir)
-  #if (has.prediction) pred <- get.tfr.prediction(sim.dir = sim.dir)
   predictions <- available.tfr.predictions(sim.dir = sim.dir)
-
   has.estimation <- (!is.null(mcmc.set$mcmc.list[[1]]$uncertainty) && mcmc.set$mcmc.list[[1]]$uncertainty)
   output <- list()
   if (has.estimation && !is.null(meta$median.shift.estimation)) 
diff --git a/man/convert.trajectories.Rd b/man/convert.trajectories.Rd
index 5795d384..52d0d765 100644
--- a/man/convert.trajectories.Rd
+++ b/man/convert.trajectories.Rd
@@ -9,11 +9,12 @@ Converts TFR trajectories stored in a binary format into two CSV files of a UN-s
 }
 \usage{
 convert.tfr.trajectories(dir = file.path(getwd(), 'bayesTFR.output'), 
-    n = 1000, output.dir = NULL, verbose = FALSE)
+    n = 1000, subdir = "predictions", output.dir = NULL, verbose = FALSE)
 }
 \arguments{
   \item{dir}{Directory containing the prediction object. It should correspond to the \code{output.dir} argument of the \code{\link{tfr.predict}} function.}
   \item{n}{Number of trajectories to be stored. It can be either a single number or the word \dQuote{all} in which case all trajectories are stored.}
+  \item{subdir}{Name of subdirectory of \code{dir} containing the prediction.}
   \item{output.dir}{Directory in which the resulting files will be stored. If \code{NULL} the same directory is used as for the prediction.}
   \item{verbose}{Logical switching log messages on and off.}
 }
diff --git a/man/get.tfr.prediction.Rd b/man/get.tfr.prediction.Rd
index 5bd42d96..da24f52f 100644
--- a/man/get.tfr.prediction.Rd
+++ b/man/get.tfr.prediction.Rd
@@ -2,30 +2,43 @@
 \Rdversion{1.1}
 \alias{get.tfr.prediction}
 \alias{has.tfr.prediction}
+\alias{available.tfr.predictions}
 
 \title{
 Accessing a Prediction Object
 }
 \description{
-Function \code{get.tfr.prediction} retrieves results of a prediction and creates an object of class \code{\link{bayesTFR.prediction}}. Function \code{has.tfr.prediction} checks an existence of such results.
+Function \code{get.tfr.prediction} retrieves results of a prediction and creates an object of class \code{\link{bayesTFR.prediction}}. Function \code{has.tfr.prediction} checks an existence of such results. Function \code{available.tfr.predictions} lists predictions available in the given simulation directory.
 }
 \usage{
-get.tfr.prediction(mcmc = NULL, sim.dir = NULL, mcmc.dir = NULL)
+get.tfr.prediction(mcmc = NULL, sim.dir = NULL, mcmc.dir = NULL,
+                    subdir = "predictions")
 
-has.tfr.prediction(mcmc = NULL, sim.dir = NULL)
+has.tfr.prediction(mcmc = NULL, sim.dir = NULL, subdir = "predictions")
+
+available.tfr.predictions(mcmc = NULL, sim.dir = NULL, full.names = FALSE)
 }
 
 \arguments{
   \item{mcmc}{Object of class \code{\link{bayesTFR.mcmc.set}} used to make the prediction. It must correspond to a Phase II MCMC. If it is \code{NULL}, the prediction is loaded from directory given by \code{sim.dir}.}
   \item{sim.dir}{Directory where the prediction is stored.  It should correspond to the value of the \code{output.dir} argument used in the \code{\link{tfr.predict}} function.  Only relevant if \code{mcmc} is \code{NULL}.}
   \item{mcmc.dir}{Optional argument to be used only in a special case when the mcmc object contained in the prediction object was estimated in different directory than in the one to which it points to (for example due to moving or renaming the original directory). The argument causes that the mcmc is redirected to the given directory. It can be set to \code{NA} if no loading of the mcmc object is desired.}
+  \item{subdir}{Subdirectory of \code{sim.dir} for this particular prediction.}
+  \item{full.names}{Logical. If \code{TRUE}, the directory names are given as full paths, otherwise (default) only the base names.}
 }
 \details{
-If \code{mcmc} is not \code{NULL}, the search directory is set to \code{mcmc$meta$output.dir}. This approach assumes that the prediction was stored in the same directory as the MCMC simulation, i.e. the \code{output.dir} argument of the \code{\link{tfr.predict}} function was set to \code{NULL}. If it is not the case, the argument \code{mcmc.dir} should be used.}
+If \code{mcmc} is not \code{NULL}, the search directory is set to \code{mcmc$meta$output.dir}. This approach assumes that the prediction was stored in the same directory as the MCMC simulation, i.e. the \code{output.dir} argument of the \code{\link{tfr.predict}} function was set to \code{NULL}. If it is not the case, the argument \code{mcmc.dir} should be used.
+
+Usually, all predictions are stored in the subdirectory \dQuote{predictions} of the simulation directory. If the subdirectory has a different name, the argument \code{subdir} should be used. This allows to keep multiple predictions in one (MCMC) simulation directory.
+
+The function \code{available.tfr.predictions} can be used to view all available predictions in the simulation directory.  
+}
 
 \value{
 	Function \code{has.tfr.prediction} returns a logical indicating if a prediction exists for the given \code{mcmc}. 
 	
+	Function \code{available.tfr.predictions} returns a vector of directory names containing TFR predictions. 
+	
 	Function \code{get.tfr.prediction} returns an
 object of class \code{\link{bayesTFR.prediction}}.
 }
diff --git a/man/include.Rd b/man/include.Rd
index 991f181b..6735cbab 100644
--- a/man/include.Rd
+++ b/man/include.Rd
@@ -6,6 +6,7 @@
 \alias{include_2017}
 \alias{include_2019}
 \alias{include_2022}
+\alias{include_2024}
 
 \docType{data}
 \title{
@@ -15,6 +16,7 @@ Inclusion Codes
 Data sets containing codes that determine which countries are to be included into a simulation or/and projections. 
 }
 \usage{
+data(include_2024)
 data(include_2022)
 data(include_2019)
 data(include_2017)
diff --git a/man/predict-tfr.Rd b/man/predict-tfr.Rd
index 5030f1c7..d850dc45 100644
--- a/man/predict-tfr.Rd
+++ b/man/predict-tfr.Rd
@@ -17,7 +17,8 @@ tfr.predict(mcmc.set = NULL, end.year = 2100,
     use.diagnostics = FALSE, use.tfr3 = TRUE, burnin3 = 2000,
     mu = 2.1, rho = 0.8859, sigmaAR1 = 0.1016, min.tfr = 0.5,
     use.correlation = FALSE, save.as.ascii = 0, output.dir = NULL, 
-    low.memory = TRUE, seed = NULL, verbose = TRUE, uncertainty = FALSE, \dots)
+    subdir = "predictions", low.memory = TRUE, seed = NULL, 
+    verbose = TRUE, uncertainty = FALSE, \dots)
 }
 
 \arguments{
@@ -34,6 +35,7 @@ tfr.predict(mcmc.set = NULL, end.year = 2100,
   \item{burnin3}{Burnin used for phase III MCMCs. Only relevant if \code{use.tfr3} is \code{TRUE}.}
   \item{save.as.ascii}{Either a number determining how many trajectories should be converted into an ASCII file, or \dQuote{all} in which case all trajectories are converted. It should be set to 0, if no conversion is desired.}
   \item{output.dir}{Directory into which the resulting prediction object and the trajectories are stored. If it is \code{NULL}, it is set to either \code{sim.dir}, or to \code{output.dir} of \code{mcmc.set$meta} if \code{mcmc.set} is given.}
+  \item{subdir}{Subdirectory of \code{output.dir} to store the predictions. It is defined relative to \code{output.dir} and can only have one level.}
   \item{low.memory}{Logical indicating if the prediction should run in a low-memory mode. If it is \code{FALSE}, the whole traces of all parameters, including the burnin, are loaded into memory. Otherwise, burnins are discarded and parameters are loaded as they are needed and are not kept in the memory.}
   \item{mu}{Long-term mean \eqn{\mu}{mu} in the AR(1) projection model. Only used if \code{use.tfr3} is \code{FALSE}.}
   \item{rho}{Autoregressive parameter \eqn{\rho}{rho} in AR(1) projection model. If it is \code{NULL} it is estimated from the data. Only used if \code{use.tfr3} is \code{FALSE}.}
@@ -52,7 +54,7 @@ If Phase III is projected using a BHM (i.e. if \code{use.tfr3} is \code{TRUE}),
 
 The projection is run for all missing values before the present year, if any. Medians over the trajectories are used as  imputed values and the trajectories are discarded. The process then continues by projecting the future values where all generated trajectories are kept.
 
-The resulting prediction object is saved into \file{\{output.dir\}/predictions}. Trajectories for all countries are saved into the same directory in a binary format, one file per country. At the end of the projection, if \code{save.as.ascii} is larger than 0, the function converts the given number of trajectories into a CSV file of a UN-specific format. They are selected by equal spacing (see function \code{\link{convert.tfr.trajectories}} for more details on the conversion). In addition, two summary files are created: one in a user-friendly format, the other using a UN-specific coding of the variants and time (see \code{\link{write.projection.summary}} for more details).
+The resulting prediction object is saved into \file{\{output.dir\}/\{subdir\}}. Trajectories for all countries are saved into the same directory in a binary format, one file per country. At the end of the projection, if \code{save.as.ascii} is larger than 0, the function converts the given number of trajectories into a CSV file of a UN-specific format. They are selected by equal spacing (see function \code{\link{convert.tfr.trajectories}} for more details on the conversion). In addition, two summary files are created: one in a user-friendly format, the other using a UN-specific coding of the variants and time (see \code{\link{write.projection.summary}} for more details).
 }
 
 \value{
diff --git a/man/tfr.median.set.Rd b/man/tfr.median.set.Rd
index f0bbdf13..71df3be0 100644
--- a/man/tfr.median.set.Rd
+++ b/man/tfr.median.set.Rd
@@ -12,16 +12,17 @@ Editing Medians of the Projection
 These functions are to be used by expert analysts. They allow to change the projection medians either to specific values, including the WPP values, or shift the medians by a given constant, or by a specific adjusting procedure.
 }
 \usage{
-tfr.median.set(sim.dir, country, values, years = NULL)
+tfr.median.set(sim.dir, country, values, years = NULL, \dots)
 
 tfr.median.shift(sim.dir, country, reset = FALSE, shift = 0, 
-    from = NULL, to = NULL)
+    from = NULL, to = NULL, \dots)
     
-tfr.median.adjust(sim.dir, countries, factor1 = 2/3, factor2 = 1/3, forceAR1 = FALSE)
+tfr.median.adjust(sim.dir, countries, factor1 = 2/3, factor2 = 1/3, forceAR1 = FALSE,
+    subdir = "predictions")
 
-tfr.median.reset(sim.dir, countries = NULL)
+tfr.median.reset(sim.dir, countries = NULL, \dots)
 
-tfr.shift.prediction.to.wpp(sim.dir, ...)
+tfr.shift.prediction.to.wpp(sim.dir, subdir = "predictions", \dots)
 }
 \arguments{
   \item{sim.dir}{Directory containing the prediction object.}
@@ -36,13 +37,14 @@ tfr.shift.prediction.to.wpp(sim.dir, ...)
   \item{to}{Year until which the offset/reset should be done. By default, it is set to the last prediction period.}
   \item{factor1, factor2}{Adjusting factors for the first and second projection period, respectively (see below).}
   \item{forceAR1}{Logical. If \code{TRUE}, the given countries are forced to enter Phase III (i.e. the AR(1) process) in the first projection period.}
-  \item{\dots}{Additional arguments passed to the underlying adjustment function. It can be \code{verbose} to show/hide the progress of the adjustment and \code{wpp.year} to adjust it to if it differs from the wpp year of the simulation.}
+  \item{subdir}{Subdirectory of \code{sim.dir} containing the predictions.}
+  \item{\dots}{Additional arguments passed to the underlying adjustment function. For \code{tfr.shift.prediction.to.wpp} it can be \code{stat} with values \dQuote{median} (default) or \dQuote{mean} to specify which statistics should be adjusted; \code{verbose} to show/hide the progress of the adjustment and \code{wpp.year} to adjust it to if it differs from the wpp year of the simulation. For the other functions it can be \code{subdir} to specify the location of the prediction.}
 }
 \details{
 The function \code{tfr.median.set} can be used to set the medians of the given country to specific values. Function \code{tfr.median.shift} can be used to offset the medians by a specific constant, or to reset the medians to their original BHM values.
 Function \code{tfr.median.adjust} runs the prediction procedure for the given countries with an additional decrement in the model in the first two projection periods. In the first projection period it is computed as \code{factor1*S} where \code{S} is a difference between observed decrement and the expected decrement (by the double logistic function) in the last observed period. In the second projection period, in the formula \code{factor1} is replaced by \code{factor2}. If \code{forceAR1} is set to \code{TRUE}, we recommend to set \code{factor1} and \code{factor2} to 0. The function then calls \code{tfr.median.set} in order to store the new median for each country.
 
-Function\code{tfr.shift.prediction.to.wpp} shifts the projected medians so that they correspond to the values found in the \code{tfrprojMed} datasets of the \pkg{wpp} package that either corresponds to the package used for the simulation itself or is given by the \code{wpp.year} argument. If using \pkg{wpp2022}, the dataset name is automatically adjusted depending if it is an annual or a 5-year simulation.
+Function\code{tfr.shift.prediction.to.wpp} shifts the projected medians or means (if \code{stat} is \dQuote{mean}), so that they correspond to the values found in the \code{tfrprojMed} datasets of the \pkg{wpp} package that either corresponds to the package used for the simulation itself or is given by the \code{wpp.year} argument. If using \pkg{wpp2022} or higher, the dataset name is automatically adjusted depending if it is an annual or a 5-year simulation. Note that regardless if it is an adjustment of the median or mean, the corresponding offset is always converted to a shift of the median. 
 
 Function \code{tfr.median.reset} resets medians of the given countries to the original values. By default it deletes adjustments for all countries.
 
diff --git a/man/tfr.median.set.all.Rd b/man/tfr.median.set.all.Rd
index 325cbaca..d85a4782 100644
--- a/man/tfr.median.set.all.Rd
+++ b/man/tfr.median.set.all.Rd
@@ -12,7 +12,7 @@ These functions are to be used by expert analysts. They allow to change the esti
 }
 \usage{
 tfr.median.set.all(sim.dir, country, values, years = NULL, 
-    burnin = 0, thin = 1)
+    burnin = 0, thin = 1, subdir = "predictions")
 
 tfr.median.reset.estimation(sim.dir, countries = NULL)
 
@@ -27,6 +27,7 @@ tfr.shift.estimation.to.wpp(sim.dir, \dots, verbose = TRUE)
   \item{years}{Numeric vector giving years which \code{values} correspond to. Ideally it should be of the same length as \code{values}.}
   \item{burnin}{Burnin to use when computing the estimation median.}
   \item{thin}{Thinning interval to use when computing the estimation median.}
+  \item{subdir}{Subdirectory of \code{sim.dir} containing the predictions.}
   \item{\dots}{Can be used to pass \code{burnin} \code{thin} to the underlying funcions.}
   \item{verbose}{Logical. If \code{TRUE} a progress of the adjustment is shown.}
 }
diff --git a/man/tfr.predict.extra.Rd b/man/tfr.predict.extra.Rd
index c41c84c0..3fc0f880 100644
--- a/man/tfr.predict.extra.Rd
+++ b/man/tfr.predict.extra.Rd
@@ -10,13 +10,14 @@ Using the posterior parameter samples the function  generates posterior trajecto
 }
 \usage{
 tfr.predict.extra(sim.dir = file.path(getwd(), 'bayesTFR.output'), 
-    prediction.dir = sim.dir, countries = NULL, 
+    prediction.dir = sim.dir, subdir = "predictions", countries = NULL, 
     save.as.ascii = 0, verbose = TRUE, uncertainty=FALSE,
     all.countries.required = TRUE, use.correlation = NULL)
 	}
 \arguments{
   \item{sim.dir}{Directory with the MCMC simulation results.}
   \item{prediction.dir}{Directory where the prediction object and the trajectories are stored.}
+  \item{subdir}{Subdirectory of \code{prediction.dir} containing the predictions.}
   \item{countries}{Vector of country codes for which the prediction should be made. If it is \code{NULL}, the prediction is run for all countries that are included in the MCMC object but for which no prediction was generated.}
   \item{save.as.ascii}{Either a number determining how many trajectories should be converted into an ascii file, or \dQuote{all} in which case all trajectories are converted. It should be set to 0, if no conversion is desired. Note that the conversion is done on all countries.}
   \item{verbose}{Logical switching log messages on and off.}
diff --git a/man/tfr.predict.subnat.Rd b/man/tfr.predict.subnat.Rd
index 9af7f788..5c5e40b4 100644
--- a/man/tfr.predict.subnat.Rd
+++ b/man/tfr.predict.subnat.Rd
@@ -10,9 +10,9 @@ Generates posterior trajectories of the total fertility rate for subregions of g
 \usage{
 tfr.predict.subnat(countries, my.tfr.file, 
                 sim.dir = file.path(getwd(), "bayesTFR.output"), 
-                end.year = 2100, start.year = NULL, output.dir = NULL, 
-                annual = NULL, nr.traj = NULL, seed = NULL, min.tfr = 0.5, 
-                ar.pars = NULL, save.as.ascii = 0, verbose = TRUE)
+                end.year = 2100, start.year = NULL, subdir = "predictions", 
+                output.dir = NULL, annual = NULL, nr.traj = NULL, seed = NULL, 
+                min.tfr = 0.5, ar.pars = NULL, save.as.ascii = 0, verbose = TRUE)
 }
 
 \arguments{
@@ -21,6 +21,7 @@ tfr.predict.subnat(countries, my.tfr.file,
   \item{sim.dir}{Simulation directory with the national projections generated using \code{\link{tfr.predict}}.}
  \item{end.year}{End year of the projections.}
  \item{start.year}{Start year of the projections. By default, projections start at the same time point as the national projections.} 
+   \item{subdir}{Subdirectory of \code{sim.dir} containing the national predictions.}
    \item{output.dir}{Directory into which the resulting prediction objects and the trajectories are stored. See below for details.}
 \item{annual}{Logical indicating if the subnational projection should be on an annual scale or a 5-year scale. By default, 
 the scale is matched to the national simulation. If the subnational and national scales are not the same, the national trajectories are either interpolated (if \code{annual = TRUE} and the national simulation is not annual) or averaged to 5-year values (if \code{annual = FALSE} and the national simulation is annual).}
diff --git a/man/write.projection.summary.Rd b/man/write.projection.summary.Rd
index 4900bb17..c90e3ac1 100644
--- a/man/write.projection.summary.Rd
+++ b/man/write.projection.summary.Rd
@@ -10,10 +10,12 @@ The function creates two files containing projection summaries, such as the medi
 }
 \usage{
 write.projection.summary(dir = file.path(getwd(), "bayesTFR.output"), 
-    output.dir = NULL, revision = NULL, adjusted = FALSE, est.uncertainty = FALSE, \dots)
+    subdir = "predictions", output.dir = NULL, revision = NULL, adjusted = FALSE, 
+    est.uncertainty = FALSE, \dots)
 }
 \arguments{
   \item{dir}{Directory containing the prediction object. It should correspond to the \code{output.dir} argument of the \code{\link{tfr.predict}} function.}
+  \item{subdir}{Subdirectory of \code{dir} containing the predictions.}
   \item{output.dir}{Directory in which the resulting file will be stored. If \code{NULL} the same directory is used as for the prediction.}
   \item{revision}{UN WPP revision number. If \code{NULL} it is determined from the corresponding WPP year: WPP 2008 corresponds to revision 13, every subsequent WPP increases the revision number by one. Used as a constant in the second file only.}
   \item{adjusted}{Logical. By default the function writes summary using the original BHM projections. If the projection medians are adjusted (using e.g. \code{\link{tfr.median.set}}), setting this argument to \code{TRUE} causes writing the adjusted projections.}

From f01d309e20899b6d660539039cce8adf67a7fa00 Mon Sep 17 00:00:00 2001
From: Hana Sevcikova <hanas@uw.edu>
Date: Fri, 25 Oct 2024 17:39:34 -0700
Subject: [PATCH 46/48] version 7.4-3

---
 DESCRIPTION       | 29 +++++++++++++++++++++++++----
 tests/run_tests.R |  4 ++--
 2 files changed, 27 insertions(+), 6 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 4d8da610..8ae96cd8 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,9 +1,30 @@
 Package: bayesTFR
-Version: 7.4-2.9008
-Date: 2024-09-27
+Version: 7.4-3
+Date: 2024-10-25
 Title: Bayesian Fertility Projection
-Author: Hana Sevcikova (hanas@uw.edu), Leontine Alkema (alkema@nus.edu.sg), Peiran Liu (prliu@uw.edu), Adrian Raftery (raftery@uw.edu), Bailey Fosdick (bfosdick@uw.edu), Patrick Gerland (gerland@un.org)
-Maintainer: Hana Sevcikova <hanas@uw.edu>
+Authors@R: c(person(given = "Hana",
+                    family = "Sevcikova",
+                    role = c("cre", "aut"),
+                    email = "hanas@uw.edu"),
+            person(given = "Leontine",
+                      family = "Alkema",
+                      role = "aut"),
+            person(given = "Peiran",
+                      family = "Liu",
+                      role = "aut"),
+             person(given = "Adrian",
+                      family = "Raftery",
+                      role = "aut",
+                      email = "raftery@uw.edu"),
+            person(given = "Bailey",
+                      family = "Fosdick",
+                      role = "aut"),
+            person(given = "Patrick",
+                      family = "Gerland",
+                      role = "aut",
+                      email = "gerland@un.org"
+                      )
+           )
 Depends: R (>= 3.5.0)
 Imports: mvtnorm, MASS, coda, graphics, grDevices, stats, utils, wpp2019, data.table, lifecycle
 Suggests: rworldmap, snowFT, googleVis, sp, wpp2017, wpp2015, wpp2012, wpp2010, ggplot2, sf, spData, scales
diff --git a/tests/run_tests.R b/tests/run_tests.R
index 6250a03a..7daa33cc 100644
--- a/tests/run_tests.R
+++ b/tests/run_tests.R
@@ -28,13 +28,13 @@ if(!cran) {
 	test.plot.all()
 	test.reproduce.simulation()
 	test.subnational.predictions()
-	for (wpp in c(2019, 2022)){
+	for (wpp in c(2019, 2024)){
 	    test.run.mcmc.simulation(wpp.year = wpp)
 	    test.run.mcmc.simulation.with.uncertainty(wpp.year = wpp)
 	    test.thinned.simulation(wpp.year = wpp)
 	    test.run.annual.simulation(wpp.year = wpp)
 	}
-	for(wpp in rev(c(2010, 2012, 2015, 2017, 2022))) { # these are either suggested packages or not on CRAN
+	for(wpp in rev(c(2010, 2012, 2015, 2017, 2024))) { # these are either suggested packages or not on CRAN
 	    test.load.UNtfr(wpp)
 	    test.load.UNlocations(wpp)
 	    test.create.tfr.matrix(wpp)

From d8144452e196cfed494e85a79c5032718ba1a220 Mon Sep 17 00:00:00 2001
From: Hana Sevcikova <hanas@uw.edu>
Date: Thu, 31 Oct 2024 17:50:43 -0700
Subject: [PATCH 47/48] minor print statement change

---
 DESCRIPTION     | 4 ++--
 R/predict_tfr.R | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 8ae96cd8..d11b493b 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: bayesTFR
-Version: 7.4-3
-Date: 2024-10-25
+Version: 7.4-3.9001
+Date: 2024-10-31
 Title: Bayesian Fertility Projection
 Authors@R: c(person(given = "Hana",
                     family = "Sevcikova",
diff --git a/R/predict_tfr.R b/R/predict_tfr.R
index 54d1f9d2..2c0f755a 100644
--- a/R/predict_tfr.R
+++ b/R/predict_tfr.R
@@ -1014,7 +1014,7 @@ convert.tfr.trajectories <- function(dir=file.path(getwd(), 'bayesTFR.output'),
 	pred <- get.tfr.prediction(sim.dir=dir, subdir = subdir)
 	if (is.null(output.dir)) output.dir <- pred$output.directory
 	if(!file.exists(output.dir)) dir.create(output.dir, recursive=TRUE)
-	cat('Converting trajectories from', dir, '\n')
+	cat('Converting trajectories from', file.path(dir, subdir), '\n')
 	if (is.null(pred$na.index)) {
 		if(verbose) cat('Finding NA values in each country ...\n')
 		for (country in 1:pred$mcmc.set$meta$nr_countries) {

From 9c7116a00e8f6cb59956c859bee6659b65a574a4 Mon Sep 17 00:00:00 2001
From: Hana Sevcikova <hanas@uw.edu>
Date: Fri, 1 Nov 2024 10:58:12 -0700
Subject: [PATCH 48/48] two countries re-classified as small

---
 ChangeLog                  |   6 +-
 DESCRIPTION                |   4 +-
 data-raw/create_includes.R |   3 +
 data-raw/include_2024.txt  | 287 +++++++++++++++++++++++++++++++++++++
 data/include_2024.R        |   6 -
 data/include_2024.rda      | Bin 0 -> 818 bytes
 6 files changed, 297 insertions(+), 9 deletions(-)
 create mode 100644 data-raw/include_2024.txt
 delete mode 100644 data/include_2024.R
 create mode 100644 data/include_2024.rda

diff --git a/ChangeLog b/ChangeLog
index 4d6ad835..2edf0427 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,4 +1,8 @@
-7.4-2.90xx (2024-09-26)
+7.4-4 (2024-11-01)
+-----
+Two countries added to small countries in include_2024.
+
+7.4-3 (2024-10-31)
 -----
 Added option of keeping multiple predictions directories in one simulation directory
 (argument "subdir" added to various functions).
diff --git a/DESCRIPTION b/DESCRIPTION
index d11b493b..2160ce19 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: bayesTFR
-Version: 7.4-3.9001
-Date: 2024-10-31
+Version: 7.4-4
+Date: 2024-11-01
 Title: Bayesian Fertility Projection
 Authors@R: c(person(given = "Hana",
                     family = "Sevcikova",
diff --git a/data-raw/create_includes.R b/data-raw/create_includes.R
index 72362bdf..7d51eb96 100644
--- a/data-raw/create_includes.R
+++ b/data-raw/create_includes.R
@@ -3,6 +3,9 @@ library(usethis)
 
 # Run this from the data-raw directory
 
+include_2024 <- as.data.frame(fread("include_2024.txt", sep = "\t")[, .(country_code, include_code)])
+use_data(include_2024, overwrite = TRUE)
+
 include_2022 <- as.data.frame(fread("include_2022.txt", sep = "\t")[, .(country_code, include_code)])
 use_data(include_2022, overwrite = TRUE)
 
diff --git a/data-raw/include_2024.txt b/data-raw/include_2024.txt
new file mode 100644
index 00000000..5bdb28ac
--- /dev/null
+++ b/data-raw/include_2024.txt
@@ -0,0 +1,287 @@
+country_code	name	include_code
+900	World	0
+1834	Sub-Saharan Africa	0
+1833	Northern Africa and Western Asia	0
+1831	Central and Southern Asia	0
+1832	Eastern and South-Eastern Asia	0
+1830	Latin America and the Caribbean	0
+1835	Oceania (excluding Australia and New Zealand)	0
+1836	Australia/New Zealand	0
+1829	Europe and Northern America	0
+901	More developed regions	0
+902	Less developed regions	0
+941	Least developed countries	0
+934	Less developed regions, excluding least developed countries	0
+948	Less developed regions, excluding China	0
+1636	Land-locked Developing Countries (LLDC)	0
+1637	Small Island Developing States (SIDS)	0
+1503	High-income countries	0
+1517	Middle-income countries	0
+1502	Upper-middle-income countries	0
+1501	Lower-middle-income countries	0
+1500	Low-income countries	0
+1518	No income group available	0
+903	Africa	0
+910	Eastern Africa	0
+108	Burundi	2
+174	Comoros	2
+262	Djibouti	2
+232	Eritrea	2
+231	Ethiopia	2
+404	Kenya	2
+450	Madagascar	2
+454	Malawi	2
+480	Mauritius	2
+175	Mayotte	2
+508	Mozambique	2
+638	Reunion	2
+646	Rwanda	2
+690	Seychelles	2
+706	Somalia	2
+728	South Sudan	2
+800	Uganda	2
+834	United Republic of Tanzania	2
+894	Zambia	2
+716	Zimbabwe	2
+911	Middle Africa	0
+24	Angola	2
+120	Cameroon	2
+140	Central African Republic	2
+148	Chad	2
+178	Congo	2
+180	Democratic Republic of the Congo	2
+226	Equatorial Guinea	2
+266	Gabon	2
+678	Sao Tome and Principe	2
+912	Northern Africa	0
+12	Algeria	2
+818	Egypt	2
+434	Libya	2
+504	Morocco	2
+729	Sudan	2
+788	Tunisia	2
+732	Western Sahara	2
+913	Southern Africa	0
+72	Botswana	2
+748	Eswatini	2
+426	Lesotho	2
+516	Namibia	2
+710	South Africa	2
+914	Western Africa	0
+204	Benin	2
+854	Burkina Faso	2
+132	Cabo Verde	2
+384	Cote d'Ivoire	2
+270	Gambia	2
+288	Ghana	2
+324	Guinea	2
+624	Guinea-Bissau	2
+430	Liberia	2
+466	Mali	2
+478	Mauritania	2
+562	Niger	2
+566	Nigeria	2
+654	Saint Helena	1
+686	Senegal	2
+694	Sierra Leone	2
+768	Togo	2
+935	Asia	0
+5500	Central Asia	0
+398	Kazakhstan	2
+417	Kyrgyzstan	2
+762	Tajikistan	2
+795	Turkmenistan	2
+860	Uzbekistan	2
+906	Eastern Asia	0
+156	China	2
+344	China, Hong Kong SAR	2
+446	China, Macao SAR	2
+158	China, Taiwan Province of China	2
+408	Dem. People's Republic of Korea	2
+392	Japan	2
+496	Mongolia	2
+410	Republic of Korea	2
+5501	Southern Asia	0
+4	Afghanistan	2
+50	Bangladesh	2
+64	Bhutan	2
+356	India	2
+364	Iran (Islamic Republic of)	2
+462	Maldives	2
+524	Nepal	2
+586	Pakistan	2
+144	Sri Lanka	2
+920	South-Eastern Asia	0
+96	Brunei Darussalam	2
+116	Cambodia	2
+360	Indonesia	2
+418	Lao People's Democratic Republic	2
+458	Malaysia	2
+104	Myanmar	2
+608	Philippines	2
+702	Singapore	2
+764	Thailand	2
+626	Timor-Leste	2
+704	Viet Nam	2
+922	Western Asia	0
+51	Armenia	2
+31	Azerbaijan	2
+48	Bahrain	2
+196	Cyprus	2
+268	Georgia	2
+368	Iraq	2
+376	Israel	2
+400	Jordan	2
+414	Kuwait	2
+422	Lebanon	2
+512	Oman	2
+634	Qatar	2
+682	Saudi Arabia	2
+275	State of Palestine	2
+760	Syrian Arab Republic	2
+792	Turkiye	2
+784	United Arab Emirates	2
+887	Yemen	2
+908	Europe	0
+923	Eastern Europe	0
+112	Belarus	2
+100	Bulgaria	2
+203	Czechia	2
+348	Hungary	2
+616	Poland	2
+498	Republic of Moldova	2
+642	Romania	2
+643	Russian Federation	2
+703	Slovakia	2
+804	Ukraine	2
+924	Northern Europe	0
+208	Denmark	2
+233	Estonia	2
+234	Faroe Islands	1
+246	Finland	2
+831	Guernsey	1
+352	Iceland	2
+372	Ireland	2
+833	Isle of Man	1
+832	Jersey	2
+428	Latvia	2
+440	Lithuania	2
+578	Norway	2
+752	Sweden	2
+826	United Kingdom	2
+925	Southern Europe	0
+8	Albania	2
+20	Andorra	1
+70	Bosnia and Herzegovina	2
+191	Croatia	2
+292	Gibraltar	1
+300	Greece	2
+336	Holy See	1
+380	Italy	2
+412	Kosovo (under UNSC res. 1244)	2
+470	Malta	2
+499	Montenegro	2
+807	North Macedonia	2
+620	Portugal	2
+674	San Marino	1
+688	Serbia	2
+705	Slovenia	2
+724	Spain	2
+926	Western Europe	0
+40	Austria	2
+56	Belgium	2
+250	France	2
+276	Germany	2
+438	Liechtenstein	1
+442	Luxembourg	2
+492	Monaco	1
+528	Netherlands	2
+756	Switzerland	2
+904	Latin America and the Caribbean	0
+915	Caribbean	0
+660	Anguilla	1
+28	Antigua and Barbuda	2
+533	Aruba	2
+44	Bahamas	2
+52	Barbados	2
+535	Bonaire, Sint Eustatius and Saba	1
+92	British Virgin Islands	1
+136	Cayman Islands	1
+192	Cuba	2
+531	Curacao	2
+212	Dominica	1
+214	Dominican Republic	2
+308	Grenada	2
+312	Guadeloupe	2
+332	Haiti	2
+388	Jamaica	2
+474	Martinique	2
+500	Montserrat	1
+630	Puerto Rico	2
+652	Saint-Barthelemy	1
+659	Saint Kitts and Nevis	1
+662	Saint Lucia	2
+663	Saint Martin (French part)	1
+670	Saint Vincent and the Grenadines	2
+534	Sint Maarten (Dutch part)	1
+780	Trinidad and Tobago	2
+796	Turks and Caicos Islands	1
+850	United States Virgin Islands	1
+916	Central America	0
+84	Belize	2
+188	Costa Rica	2
+222	El Salvador	2
+320	Guatemala	2
+340	Honduras	2
+484	Mexico	2
+558	Nicaragua	2
+591	Panama	2
+931	South America	0
+32	Argentina	2
+68	Bolivia (Plurinational State of)	2
+76	Brazil	2
+152	Chile	2
+170	Colombia	2
+218	Ecuador	2
+238	Falkland Islands (Malvinas)	1
+254	French Guiana	2
+328	Guyana	2
+600	Paraguay	2
+604	Peru	2
+740	Suriname	2
+858	Uruguay	2
+862	Venezuela (Bolivarian Republic of)	2
+905	Northern America	0
+60	Bermuda	1
+124	Canada	2
+304	Greenland	1
+666	Saint Pierre and Miquelon	1
+840	United States of America	2
+909	Oceania	0
+927	Australia/New Zealand	0
+36	Australia	2
+554	New Zealand	2
+928	Melanesia	0
+242	Fiji	2
+540	New Caledonia	2
+598	Papua New Guinea	2
+90	Solomon Islands	2
+548	Vanuatu	2
+954	Micronesia	0
+316	Guam	2
+296	Kiribati	2
+584	Marshall Islands	1
+583	Micronesia (Fed. States of)	2
+520	Nauru	1
+580	Northern Mariana Islands	1
+585	Palau	1
+957	Polynesia	0
+16	American Samoa	1
+184	Cook Islands	1
+258	French Polynesia	2
+570	Niue	1
+882	Samoa	2
+772	Tokelau	1
+776	Tonga	2
+798	Tuvalu	1
+876	Wallis and Futuna Islands	1
diff --git a/data/include_2024.R b/data/include_2024.R
deleted file mode 100644
index a6af5cc8..00000000
--- a/data/include_2024.R
+++ /dev/null
@@ -1,6 +0,0 @@
-include_2024 <- local({
-    e <- new.env()
-    load("include_2022.rda", envir= e)
-    e$include_2022
-})
-
diff --git a/data/include_2024.rda b/data/include_2024.rda
new file mode 100644
index 0000000000000000000000000000000000000000..e416e64168ce41f977635c604288f5ebbbeef32e
GIT binary patch
literal 818
zcmV-21I_$GT4*^jL0KkKS(e4~000CZ|NsB@PtC{uT~zh-)m86j-}hhl{onbQ)mQZ$
zRd>|)T~)vWoIr;;1u8a>18NMK10kW14Lw69hJzuH0gwiP>H|P%(?HPCrhou33?`Wx
zVrcaQQKMA#KT2eJjT&eG0009+KmY&$0004?000000002>KU5J<+Ek550000D003k(
z8UPHM00000007aZjQ{`|X_Fu{0FzY`Kn(=ICYS&K08JQ}00LkD000dD0GLbw0GI#*
zV?+fZX`)dDQX)kn%C-SQB7!l9g2@&NBL#r4f`k?VD<KOVHX3T{a0=#)t0@ij2GN=9
z&31#M`fYbN?j>bqzUgb=P|qnEa%W8lsb?hB;Pxt5nC)ye(DsV0m2(s}+Bt<C<-)Y8
z7Hyzo!O_d^iHnwt-CwD8LfP;HAP09_rex!PZGfGP4hI8q**dI^ERQ(TRV0@j#Y0`I
z%~4KO3%i%1VOZ}Ic^MUNZq26RHtf<JFkQ=w6Ltk>YErg90E7Vu8bAd0>zPLi%+kJK
zeeF4|1WH$rj(dDkXC)LZgd_4!1^}<0n-&AN2evbTw^|`Q-1<h|zXL3e`KuurnU4J&
z|BXbG0RRCOQ=?q_(Q*!q_aF2`<T6k?<R5k2_5G~O871TNs;#Wsvu_1V{|f`#B|0?4
zmp*La_|>Aq`-ru!@gV*vAVLfB?7{#Wn!jeI&%5?}YlNTv5{U>%S(ris8z#v}QppUI
zjD&}+g2_G1Dl$k)St<hxj3p?cAh1xOlpzHc0*eJ%Lkbxd0{haol}HdPNhAiUfQ3d$
zO*8?sskWwysm@6nk+@B!+$cmKX*fa$K>~7-Du&1Q#?GBFH2Mop;&jSs<0foD4Fklt
z43)sd>ZLgI#47udjM0qXIUp#8IVu#w&Bx^6W(lNG;Y;)i1!5Vd2ry7ch=QRWdS*75
zf<*-*F;NIWM5(J-nXH5$npOg}3Iu`*Mt8yHx#_5saMU(PGDZ+GM+j~kV+dgY+z<dm
w95yi^8*FF>(oi4~03;xQE-={$y^^?uO3>LL0H7rQLjQ}oBAh5lOJeze0O|-(p#T5?

literal 0
HcmV?d00001