From 2c627c526f9c3edb080be08794c09e746ea3aa3c Mon Sep 17 00:00:00 2001 From: Hana Sevcikova Date: Sun, 24 Mar 2024 20:36:23 -0700 Subject: [PATCH] added argument use.wpp.data --- ChangeLog | 2 ++ DESCRIPTION | 2 +- R/run_mcmc.R | 22 +++++++++++++++------- R/wpp_data.R | 16 ++++++++++------ man/run.e0.mcmc.Rd | 8 +++++--- 5 files changed, 33 insertions(+), 17 deletions(-) diff --git a/ChangeLog b/ChangeLog index 1a4a291..b302b99 100644 --- a/ChangeLog +++ b/ChangeLog @@ -6,6 +6,8 @@ Fixed bug in e0.joint.plot for annual prediction object. Fixed bug in setting time index when imputation is present in e0.predict.subnat(). +Added argument use.wpp.data to run.e0.mcmc(). + 5.2-0 (09/15/2023) ----- Annual subnational projections are now possible, via the argument "annual" diff --git a/DESCRIPTION b/DESCRIPTION index 962c58d..8be3f3f 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: bayesLife Type: Package Title: Bayesian Projection of Life Expectancy -Version: 5.2-0.9004 +Version: 5.2-0.9005 Date: 2024-03-24 Author: Hana Sevcikova, Adrian Raftery, Jennifer Chunn Maintainer: Hana Sevcikova diff --git a/R/run_mcmc.R b/R/run_mcmc.R index c58e761..11d0921 100644 --- a/R/run_mcmc.R +++ b/R/run_mcmc.R @@ -70,7 +70,7 @@ run.e0.mcmc <- function(sex=c("Female", "Male"), nr.chains = 3, iter = 160000, output.dir = file.path(getwd(), 'bayesLife.output'), thin = 10, replace.output = FALSE, annual = FALSE, start.year = 1873, present.year = 2020, wpp.year = 2019, - my.e0.file = NULL, my.locations.file = NULL, + my.e0.file = NULL, my.locations.file = NULL, use.wpp.data = TRUE, constant.variance = FALSE, seed = NULL, parallel = FALSE, nr.nodes = nr.chains, compression.type = 'None', verbose = FALSE, verbose.iter = 100, mcmc.options = NULL, ...) { @@ -120,7 +120,9 @@ run.e0.mcmc <- function(sex=c("Female", "Male"), nr.chains = 3, iter = 160000, my.e0.file = my.e0.file, my.locations.file = my.locations.file, output.dir = output.dir, mcmc.options = mcoptions, constant.variance = constant.variance, - compression.type = compression.type, verbose = verbose) + compression.type = compression.type, + use.wpp.data = use.wpp.data, + verbose = verbose) store.bayesLife.meta.object(bayesLife.mcmc.meta, output.dir) starting.values <- match.ini.to.chains(nr.chains, annual = annual) iter <- .match.length.to.nr.chains(iter, nr.chains, "iter") @@ -466,19 +468,24 @@ e0.mcmc.run.chain.extra <- function(chain.id, mcmc.list, countries, posterior.sa e0.mcmc.meta.ini <- function(sex = "F", nr.chains = 1, start.year = 1950, present.year = 2020, wpp.year = 2019, my.e0.file = NULL, my.locations.file = NULL, annual.simulation = FALSE, output.dir = file.path(getwd(), 'bayesLife.output'), - mcmc.options = NULL, ..., verbose=FALSE) { + mcmc.options = NULL, use.wpp.data = TRUE, ..., verbose=FALSE) { mcmc.input <- c(list(sex = sex, nr.chains = nr.chains, start.year = start.year, present.year = present.year, wpp.year = wpp.year, my.e0.file = my.e0.file, annual.simulation = annual.simulation, - output.dir = output.dir, mcmc.options = mcmc.options), list(...)) + use.wpp.data = use.wpp.data, output.dir = output.dir, + mcmc.options = mcmc.options), list(...)) if(present.year - 3 > wpp.year) - warning("present.year is much larger then wpp.year. Make sure WPP data for present.year are available.") + warning("present.year is much larger then wpp.year. Make sure WPP data for present.year are available.") + if(!use.wpp.data && is.null(my.e0.file)) { + warning("If use.wpp.data is set to FALSE, my.e0.file should be given. The simulation will use default WPP data.") + use.wpp.data <- TRUE + } data <- get.wpp.e0.data (sex, start.year = start.year, present.year = present.year, wpp.year = wpp.year, my.e0.file = my.e0.file, include.hiv = mcmc.options$include.hiv.countries, my.locations.file = my.locations.file, - annual = annual.simulation, verbose = verbose) + annual = annual.simulation, use.wpp.data = use.wpp.data, verbose = verbose) part.ini <- .do.part.e0.mcmc.meta.ini(data, mcmc.input) new.meta <- c(mcmc.input, part.ini) if(!is.null(mcmc.options$meta.ini.fun)) @@ -587,7 +594,8 @@ e0.mcmc.meta.ini.extra <- function(mcmc.set, countries = NULL, my.e0.file = NULL #create e0 matrix only for the extra countries e0.with.regions <- set.e0.wpp.extra(meta, countries=countries, my.e0.file = my.e0.file, my.locations.file = my.locations.file, - annual = meta$annual.simulation, verbose = verbose) + annual = meta$annual.simulation, use.wpp.data = meta$use.wpp.data, + verbose = verbose) if(is.null(e0.with.regions)) return(list(meta = meta, index = c())) # join old and new country.overwrites option; remove possible duplicates if(!is.null(country.overwrites)) { diff --git a/R/wpp_data.R b/R/wpp_data.R index 06c9424..0c35db1 100644 --- a/R/wpp_data.R +++ b/R/wpp_data.R @@ -1,6 +1,7 @@ get.wpp.e0.data <- function(sex = 'M', start.year = 1950, present.year = 2015, wpp.year = 2017, my.e0.file = NULL, include.hiv = FALSE, - my.locations.file = NULL, annual = FALSE, verbose = FALSE) { + my.locations.file = NULL, annual = FALSE, use.wpp.data = TRUE, + verbose = FALSE) { sex <- toupper(sex) if(sex != 'M' && sex != 'F') stop('Allowed values for argument "sex" are "M" and "F".') @@ -9,7 +10,7 @@ get.wpp.e0.data <- function(sex = 'M', start.year = 1950, present.year = 2015, ######################################## un.object <- read.UNe0(sex=sex, wpp.year=wpp.year, my.e0.file=my.e0.file, present.year=present.year, annual = annual, - verbose=verbose) + use.wpp.data = use.wpp.data, verbose=verbose) data <- un.object$data.object$data # get region and area data locations <- bayesTFR:::read.UNlocations(data, wpp.year=wpp.year, my.locations.file=my.locations.file, @@ -90,10 +91,11 @@ read.UNe0 <- function(sex, wpp.year, my.e0.file=NULL, annual = FALSE, ...) { } set.e0.wpp.extra <- function(meta, countries=NULL, my.e0.file=NULL, my.locations.file=NULL, - annual = FALSE, verbose=FALSE) { + annual = FALSE, verbose=FALSE, use.wpp.data = TRUE) { #'countries' is a vector of country or region codes un.object <- read.UNe0(sex=meta$sex, wpp.year=meta$wpp.year, my.e0.file=my.e0.file, - present.year=meta$present.year, annual = annual, verbose=verbose) + present.year=meta$present.year, annual = annual, + use.wpp.data = use.wpp.data, verbose=verbose) data <- un.object$data.object extra.wpp <- bayesTFR:::.extra.matrix.regions(data=data, countries=countries, meta=meta, package="bayesLife", my.locations.file=my.locations.file, @@ -117,7 +119,8 @@ set.e0.wpp.extra <- function(meta, countries=NULL, my.e0.file=NULL, my.locations return(extra.wpp) } -get.wpp.e0.data.for.countries <- function(meta, sex='M', my.e0.file=NULL, my.locations.file=NULL, verbose=FALSE) { +get.wpp.e0.data.for.countries <- function(meta, sex='M', my.e0.file=NULL, + my.locations.file=NULL, verbose=FALSE) { sex <- toupper(sex) if(sex != 'M' && sex != 'F') stop('Allowed values for argument "sex" are "M" and "F".') @@ -125,7 +128,8 @@ get.wpp.e0.data.for.countries <- function(meta, sex='M', my.e0.file=NULL, my.loc # set data and match with areas ######################################## un.object <- read.UNe0(sex=sex, wpp.year=meta$wpp.year, present.year=meta$present.year, - my.e0.file=my.e0.file, annual = meta$annual.simulation, verbose=verbose) + my.e0.file=my.e0.file, annual = meta$annual.simulation, + use.wpp.data = meta$use.wpp.data, verbose=verbose) data <- un.object$data.object$data # get region and area data locations <- bayesTFR:::read.UNlocations(data, wpp.year=meta$wpp.year, diff --git a/man/run.e0.mcmc.Rd b/man/run.e0.mcmc.Rd index f8bcc39..0324e8e 100644 --- a/man/run.e0.mcmc.Rd +++ b/man/run.e0.mcmc.Rd @@ -14,8 +14,9 @@ run.e0.mcmc(sex = c("Female", "Male"), nr.chains = 3, iter = 160000, output.dir = file.path(getwd(), "bayesLife.output"), thin = 10, replace.output = FALSE, annual = FALSE, start.year = 1873, present.year = 2020, wpp.year = 2019, - my.e0.file = NULL, my.locations.file = NULL, constant.variance = FALSE, - seed = NULL, parallel = FALSE, nr.nodes = nr.chains, compression.type = 'None', + my.e0.file = NULL, my.locations.file = NULL, use.wpp.data = TRUE, + constant.variance = FALSE, seed = NULL, + parallel = FALSE, nr.nodes = nr.chains, compression.type = 'None', verbose = FALSE, verbose.iter = 100, mcmc.options = NULL, \dots) continue.e0.mcmc(iter, chain.ids = NULL, @@ -37,6 +38,7 @@ continue.e0.mcmc(iter, chain.ids = NULL, \item{wpp.year}{Year for which WPP data is used. The functions loads a package called \pkg{wpp}\eqn{x} where \eqn{x} is the \code{wpp.year} and uses the \code{\link[wpp2019]{e0}*} datasets.} \item{my.e0.file}{File name containing user-specified e0 time series for one or more countries. See Details below.} \item{my.locations.file}{File name containing user-specified locations. See Details below.} + \item{use.wpp.data}{Logical indicating if default WPP data should be used, i.e. if \code{my.e0.file} will be matched with the WPP data in terms of time periods and locations. If \code{FALSE}, it is assumed that the \code{my.e0.file} contains all locations and time periods to be included in the simulation.} \item{constant.variance}{Logical indicating if the model should be estimated using constant variance. It should only be used if the standard deviation lowess is to be analysed, see \code{\link{compute.loess}}.} \item{seed}{Seed of the random number generator. If \code{NULL} no seed is set. It can be used to generate reproducible results.} \item{parallel}{Logical determining if the simulation should run multiple chains in parallel. If it is \code{TRUE}, the package \pkg{snowFT} is required.} @@ -56,7 +58,7 @@ The function \code{run.e0.mcmc} creates an object of class \code{\link{bayesLife Using the function \code{continue.e0.mcmc} one can continue simulating an existing MCMCs by \code{iter} iterations for either all or selected chains. The global options used for generating the existing MCMCs will be used. Only the \code{auto.conf} option can be overwritten by passing the new value as an argument. -The function loads observed data (further denoted as WPP dataset), depending on the specified sex, from the \code{\link[wpp2019]{e0F}} (\code{\link[wpp2019]{e0M}}) and \code{\link[wpp2019]{e0F_supplemental}} (\code{\link[wpp2019]{e0M_supplemental}}) datasets in a \pkg{wpp}\eqn{x} package where \eqn{x} is the \code{wpp.year}. It is then merged with the \code{\link{include}} dataset that corresponds to the same \code{wpp.year}. The argument \code{my.e0.file} can be used to overwrite those default data. Such a file can include a subset of countries contained in the WPP dataset, as well as a set of new countries. In the former case, +The function loads observed data (further denoted as WPP dataset), depending on the specified sex, from the \code{\link[wpp2019]{e0F}} (\code{\link[wpp2019]{e0M}}) and \code{\link[wpp2019]{e0F_supplemental}} (\code{\link[wpp2019]{e0M_supplemental}}) datasets in a \pkg{wpp}\eqn{x} package where \eqn{x} is the \code{wpp.year}. It is then merged with the \code{\link{include}} dataset that corresponds to the same \code{wpp.year}. The argument \code{my.e0.file} can be used to overwrite those default data. If \code{use.wpp.data} is \code{FALSE}, it fully replaces the default dataset. Otherwise (by default), such a file can include a subset of countries contained in the WPP dataset, as well as a set of new countries. In the former case, the function replaces the corresponding country data from the WPP dataset with values in this file. Only columns are replaced that match column names of the WPP dataset, and in addition, columns \sQuote{last.observed} and \sQuote{include_code} are used, if present. Countries are merged with WPP using the column \sQuote{country_code}. In addition, in order the countries to be included in the simulation, in both cases (whether they are included in the WPP dataset or not), they must be contained in the table of locations (\code{\link[wpp2019]{UNlocations}}). In addition, their corresponding \sQuote{include_code} must be set to 2. If the column \sQuote{include_code} is present in \code{my.e0.file}, its value overwrites the default include code, unless is -1. If \code{annual} is \code{TRUE} the default WPP dataset is not used and the \code{my.e0.file} argument must provide the dataset to be used for estimation. Its time-related columns should be single years.