From 059f10b0fe372d2f87a83caf49deb551b1e81332 Mon Sep 17 00:00:00 2001 From: Hana Sevcikova Date: Thu, 7 Nov 2024 15:49:54 -0800 Subject: [PATCH] adjust to wpp function --- DESCRIPTION | 6 +- NAMESPACE | 2 + R/bayesMig.R | 2 +- R/mcmc_ini.R | 4 +- R/mig_adjust.R | 175 +++++++++++++++++++++++++++++++++++++++++++++ R/predict_mig.R | 115 +---------------------------- man/internal.Rd | 2 +- man/mig.adjust.Rd | 19 ++++- man/mig.predict.Rd | 5 ++ 9 files changed, 208 insertions(+), 122 deletions(-) create mode 100644 R/mig_adjust.R diff --git a/DESCRIPTION b/DESCRIPTION index ae9d83a..eec41e6 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,12 +1,12 @@ Package: bayesMig -Version: 0.4-6.9004 -Date: 2024-06-07 +Version: 0.4-6.9005 +Date: 2024-11-07 Title: Bayesian Projection of Migration Authors@R: c(person("Jon", "Azose", role = "aut"), person("Hana", "Sevcikova", email = "hanas@uw.edu", role = c("aut", "cre")), person("Adrian", "Raftery", role = c("aut"))) Depends: R (>= 3.5.0), bayesTFR -Imports: coda, truncnorm, wpp2019 +Imports: coda, truncnorm, wpp2019, data.table Suggests: Description: Producing probabilistic projections of net migration rate for all countries of the world or for subnational units using a Bayesian hierarchical model by Azose an Raftery (2015) . diff --git a/NAMESPACE b/NAMESPACE index ef09f34..5f06250 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -70,6 +70,7 @@ export(mig.partraces.cs.plot) export(mig.partraces.plot) export(mig.predict) export(mig.raftery.diag) +export(mig.shift.prediction.to.wpp) export(mig.trajectories.plot) export(mig.trajectories.plot.all) export(mig.trajectories.table) @@ -79,6 +80,7 @@ export(store.bayesMig.convergence) export(store.bayesMig.prediction) import(bayesTFR) import(coda) +import(data.table) import(grDevices) import(graphics) import(stats) diff --git a/R/bayesMig.R b/R/bayesMig.R index 7c332a3..2a75920 100644 --- a/R/bayesMig.R +++ b/R/bayesMig.R @@ -12,7 +12,7 @@ #' Maintainer: Hana Sevcikova #' #' @docType package -#' @import coda truncnorm wpp2019 +#' @import coda truncnorm wpp2019 data.table #' @import grDevices graphics stats utils bayesTFR #' @importFrom utils data #' diff --git a/R/mcmc_ini.R b/R/mcmc_ini.R index 0ed4f47..03e03f0 100644 --- a/R/mcmc_ini.R +++ b/R/mcmc_ini.R @@ -25,11 +25,11 @@ do.meta.ini <- function(meta, burnin=200, verbose=FALSE) { annual <- meta$annual.simulation #If the user input their own migration file: if(is.null(my.mig.file)){ - if(! wpp.year %in% c(2017, 2019, 2022)){ + if(! (wpp.year %in% c(2017, 2019) || wpp.year >= 2022)){ stop("Only 2017, 2019 and 2022 revisions of WPP are currently supported by bayesMig.") } if(annual && wpp.year < 2022) - warning("If annual is TRUE and wpp.year is not 2022, 5-year data will be interpolated. Otherwise supply annual my.mig.file.") + warning("If annual is TRUE and wpp.year is not >= 2022, 5-year data will be interpolated. Otherwise supply annual my.mig.file.") } migdata <- get.wpp.mig.data (start.year = start.year, present.year = present.year, wpp.year = wpp.year, my.mig.file = my.mig.file, diff --git a/R/mig_adjust.R b/R/mig_adjust.R new file mode 100644 index 0000000..4d595a2 --- /dev/null +++ b/R/mig_adjust.R @@ -0,0 +1,175 @@ +#' @title Adjusting the Projection Medians +#' +#' @description These functions are to be used by expert analysts. They allow to +#' change the projection medians either to specific values, or shift the medians +#' by a given constant or align one projection object with another. +#' +#' @param sim.dir Directory containing the prediction object. +#' @param country Name or numerical code of a country. +#' @param values Vector of the new median values. +#' @param years Numeric vector giving years for which to change the median. +#' In \code{mig.median.set} it gives years which \code{values} correspond to. +#' Ideally it should be of the same length as \code{values}. If it is \code{NULL}, +#' \code{values} are set starting from the first prediction time period. +#' If \code{values} correspond to consecutive years, only the first year might be given here. +#' In \code{mig.align.predictions} it gives years for which the medians should be aligned. +#' @param \dots Additional arguments passed to the underlying adjustment functions, such as +#' \code{verbose} to show/hide the progress of the adjustment. +#' For \code{mig.shift.prediction.to.wpp} it can be \code{stat} with values \dQuote{median} (default) +#' or \dQuote{mean} to specify which statistics should be adjusted; +#' \code{wpp.year} to adjust it to if it differs from the wpp year of the simulation. +#' +#' @details The function \code{mig.median.set} can be used to set the medians of the +#' given country to specific values. +#' +#' @return All functions return an updated object of class \code{\link{bayesMig.prediction}}. +#' @export +#' @rdname mig.adjust +mig.median.set <- function(sim.dir, country, values, years=NULL, ...) { + pred <- get.mig.prediction(sim.dir) + new.pred <- bayesTFR:::.bdem.median.set(pred, type='mig', country=country, + values=values, years=years, ...) + store.bayesMig.prediction(new.pred) + invisible(new.pred) +} + +#' @export +#' @keywords internal +#' @rdname internal +#' +get.mig.shift <- function(country.code, pred) return(bayesTFR::get.tfr.shift(country.code, pred)) + +#' @param reset Logical. If \code{TRUE} medians in a range of \code{from} and \code{to} are +#' reset to their original values. +#' @param shift Constant by which the medians should be offset. It is not used if \code{reset} is \code{TRUE}. +#' @param from Year from which the offset/reset should start. By default, it starts at the first prediction period. +#' @param to Year until which the offset/reset should be done. By default, it is set to the last prediction period. +#' +#' @details Function \code{mig.median.shift} can be used to offset the medians by a specific constant, or to reset +#' the medians to their original values. +#' @export +#' @rdname mig.adjust +mig.median.shift <- function(sim.dir, country, reset = FALSE, shift = 0, + from = NULL, to = NULL) { + pred <- get.mig.prediction(sim.dir) + new.pred <- bayesTFR:::.bdem.median.shift(pred, type='mig', country=country, reset=reset, + shift=shift, from=from, to=to) + store.bayesMig.prediction(new.pred) + invisible(new.pred) +} + +#' @param countries Vector of country names or codes. If this argument is \code{NULL} (default), +#' the reset is done for all countries. +#' +#' @details Function \code{mig.median.reset} resets medians of the given countries +#' to the original values. By default it deletes adjustments for all countries. +#' @export +#' @rdname mig.adjust +mig.median.reset <- function(sim.dir, countries = NULL) { + if(is.null(countries)) { + pred <- get.mig.prediction(sim.dir) + pred$median.shift <- NULL + store.bayesMig.prediction(pred) + cat('\nMedians for all countries reset.\n') + } else + for(country in countries) pred <- mig.median.shift(sim.dir, country, reset=TRUE) + invisible(pred) +} + +#' @param sim.dir1 Directory with the bayesMig prediction object to be adjusted. +#' @param sim.dir2 Directory with the bayesMig prediction object used to align the medians from \code{sim.dir1} to. +#' @param country.codes Numerical codes of countries to adjust. By default all countries +#' found in \code{sim.dir2} are adjusted in \code{sim.dir1}. +#' @details Function \code{mig.align.predictions} shifts medians stored in \code{sim.dir1} to match +#' the medians in \code{sim.dir1}. +#' +#' In all cases, if a median is modified, the corresponding offset is stored in the prediction object +#' (element \code{median.shift}). All functions write the updated prediction object back to disk. All +#' functions in the package that use trajectories and trajectory statistics use the \code{median.shift} +#' values to offset the results correspondingly, i.e. trajectories are shifted the same way as the +#' medians. +#' @export +#' @rdname mig.adjust +mig.align.predictions <- function(sim.dir1, sim.dir2, country.codes = NULL, + years = NULL, ...){ + pred1 <- get.mig.prediction(sim.dir1) + pred2 <- get.mig.prediction(sim.dir2) + cntries1 <- if(is.null(country.codes)) get.countries.table(pred1)$code else country.codes + cntries2 <- get.countries.table(pred2)$code + avail.years <- dimnames(pred2$quantiles)[[3]] + if(is.null(years)) + years <- avail.years + else years <- intersect(as.character(years), avail.years) + for(cntry in cntries1){ + cidx <- which(cntries2 == cntry) + if(length(cidx) == 0){ + warning("Country ", cntry, " not found in sim.dir2. No adjustment for this country.") + next + } + adjust.to <- get.median.from.prediction(pred2, cidx, cntry)[years] + pred <- mig.median.set(sim.dir1, cntry, values = adjust.to, + years = as.integer(names(adjust.to)), ...) + } + invisible(pred) +} + +.do.mig.shift.prediction.to.wpp <- function(pred, stat = "median", wpp.year = NULL, verbose = TRUE){ + country_code <- year <- NULL # for CRAN check not to complain + meta <- pred$mcmc.set$meta + wpp.year <- if(!is.null(wpp.year)) wpp.year else meta$wpp.year + countries <- get.countries.table(pred$mcmc.set)$code + if(wpp.year < 2024) stop("Function not implemented for WPP < 2024.") + n <- if(meta$annual.simulation) "1" else "5" + migcounts.wpp <- bayesTFR:::load.bdem.dataset(paste0("migproj", n, "dt"), wpp.year = wpp.year, verbose = verbose) + popcounts.wpp <- bayesTFR:::load.bdem.dataset(paste0("popproj", n, "dt"), wpp.year = wpp.year, verbose = verbose) + if(!meta$annual.simulation) migcounts.wpp[, year := year + 2] # align migration and pop years + + migrates.wpp <- merge(migcounts.wpp[, c("country_code", "name", "year", "mig"), with = FALSE], + popcounts.wpp[, c("country_code", "name", "year", "pop"), with = FALSE], + by = c("country_code", "name", "year") + ) + migrates.wpp$wpp <- migrates.wpp$mig / (migrates.wpp$pop - migrates.wpp$mig) + if(!meta$annual.simulation) migrates.wpp[, year := year - 2] + + pred.years <- as.numeric(dimnames(pred$quantiles)[[3]]) + pred$median.shift <- NULL + + if(verbose) cat("\n") + for(icntry in seq_along(countries)) { + if (verbose) { + if(interactive()) cat("\rAdjusting countries' prediction to wpp", wpp.year, " ... ", round(icntry/length(countries) * 100), ' %') + else { + if (icntry == 1) + cat("Adjusting countries' prediction to wpp", wpp.year, " ... ") + cat(icntry, ", ") + } + } + cntry <- countries[icntry] + if(!stat %in% c("median", "mean")) stop("Argument 'stat' must be 'median' or 'mean', but is ", stat, ".") + to.match <- merge(data.table::data.table(year = pred.years, median = pred$quantiles[icntry, "0.5", ]), + migrates.wpp[country_code == cntry, c("year", "wpp"), with = FALSE], + by = "year", all.x = TRUE) + if(stat == "mean") to.match[["median"]] <- to.match[["median"]] + pred$traj.mean.sd[icntry, 1, ] - to.match[["median"]] # difference between the mean and median + to.match$wpp[is.na(to.match$wpp)] <- to.match$median[is.na(to.match$wpp)] # no shift for years that don't match + to.match$shift <- to.match$wpp - to.match$median + if(sum(to.match$shift) != 0) + pred$median.shift[[as.character(cntry)]] <- to.match$shift + } + if(verbose) cat("\n") + return(pred) +} + +#' @details Function \code{mig.shift.prediction.to.wpp} shifts the projected medians or means +#' (if \code{stat} is \dQuote{mean}), so that they correspond to the values found in the \code{migproj1dt} or \code{migproj5dt} +#' datasets of the \pkg{wpp} package that either corresponds to the package used for the simulation itself +#' or is given by the \code{wpp.year} argument. Currently, the function only works for \pkg{wpp2024}. +#' Note that regardless if it is an adjustment of the median or mean, the corresponding offset is always +#' converted to a shift of the median. +#' @export +#' @rdname mig.adjust +mig.shift.prediction.to.wpp <- function(sim.dir, ...){ + pred <- get.mig.prediction(sim.dir) + new.pred <- .do.mig.shift.prediction.to.wpp(pred, ...) + store.bayesMig.prediction(new.pred) + invisible(new.pred) +} diff --git a/R/predict_mig.R b/R/predict_mig.R index 7b55255..2d94ed6 100644 --- a/R/predict_mig.R +++ b/R/predict_mig.R @@ -38,6 +38,9 @@ #' the historical cummulative thresholds for non-GCC countries. If this argument is \code{TRUE}, this distinction is not made. #' It is important to set it to \code{TRUE} in a sub-national simulation to avoid any random overlaps #' of UN codes and user-defined codes. +#' @param fixed.thresholds List with optional elements \dQuote{lower} and \dQuote{upper}. Each of them is a list defining +#' lower and upper bounds of the future migration rate for specific locations. The name of each item is the location code +#' and the value is one number defining the corresponding threshold. #' @param post.last.observed If a user-specific data file was used during estimation and the data #' contained the \dQuote{last.observed} column, this argument determines how to treat the time periods #' between the last observed point and the start year of the prediction, for locations where there is @@ -578,115 +581,3 @@ get.migration.thresholds <- function(meta, nperiods=6, ignore.gcc = FALSE) { return(df) } - -#' @title Adjusting the Projection Medians -#' -#' @description These functions are to be used by expert analysts. They allow to -#' change the projection medians either to specific values, or shift the medians -#' by a given constant or align one projection object with another. -#' -#' @param sim.dir Directory containing the prediction object. -#' @param country Name or numerical code of a country. -#' @param values Vector of the new median values. -#' @param years Numeric vector giving years for which to change the median. -#' In \code{mig.median.set} it gives years which \code{values} correspond to. -#' Ideally it should be of the same length as \code{values}. If it is \code{NULL}, -#' \code{values} are set starting from the first prediction time period. -#' If \code{values} correspond to consecutive years, only the first year might be given here. -#' In \code{mig.align.predictions} it gives years for which the medians should be aligned. -#' @param \dots Additional arguments passed to the underlying adjustment function. -#' It can be \code{verbose} to show/hide the progress of the adjustment. -#' -#' @details The function \code{mig.median.set} can be used to set the medians of the -#' given country to specific values. -#' -#' @return All functions return an updated object of class \code{\link{bayesMig.prediction}}. -#' @export -#' @rdname mig.adjust -mig.median.set <- function(sim.dir, country, values, years=NULL, ...) { - pred <- get.mig.prediction(sim.dir) - new.pred <- bayesTFR:::.bdem.median.set(pred, type='mig', country=country, - values=values, years=years, ...) - store.bayesMig.prediction(new.pred) - invisible(new.pred) -} - -#' @export -#' @keywords internal -#' @rdname internal -#' -get.mig.shift <- function(country.code, pred) return(bayesTFR::get.tfr.shift(country.code, pred)) - -#' @param reset Logical. If \code{TRUE} medians in a range of \code{from} and \code{to} are -#' reset to their original values. -#' @param shift Constant by which the medians should be offset. It is not used if \code{reset} is \code{TRUE}. -#' @param from Year from which the offset/reset should start. By default, it starts at the first prediction period. -#' @param to Year until which the offset/reset should be done. By default, it is set to the last prediction period. -#' -#' @details Function \code{mig.median.shift} can be used to offset the medians by a specific constant, or to reset -#' the medians to their original values. -#' @export -#' @rdname mig.adjust -mig.median.shift <- function(sim.dir, country, reset = FALSE, shift = 0, - from = NULL, to = NULL) { - pred <- get.mig.prediction(sim.dir) - new.pred <- bayesTFR:::.bdem.median.shift(pred, type='mig', country=country, reset=reset, - shift=shift, from=from, to=to) - store.bayesMig.prediction(new.pred) - invisible(new.pred) -} - -#' @param countries Vector of country names or codes. If this argument is \code{NULL} (default), -#' the reset is done for all countries. -#' -#' @details Function \code{mig.median.reset} resets medians of the given countries -#' to the original values. By default it deletes adjustments for all countries. -#' @export -#' @rdname mig.adjust -mig.median.reset <- function(sim.dir, countries = NULL) { - if(is.null(countries)) { - pred <- get.mig.prediction(sim.dir) - pred$median.shift <- NULL - store.bayesMig.prediction(pred) - cat('\nMedians for all countries reset.\n') - } else - for(country in countries) pred <- mig.median.shift(sim.dir, country, reset=TRUE) - invisible(pred) -} - -#' @param sim.dir1 Directory with the bayesMig prediction object to be adjusted. -#' @param sim.dir2 Directory with the bayesMig prediction object used to align the medians from \code{sim.dir1} to. -#' @param country.codes Numerical codes of countries to adjust. By default all countries -#' found in \code{sim.dir2} are adjusted in \code{sim.dir1}. -#' @details Function \code{mig.align.predictions} shifts medians stored in \code{sim.dir1} to match -#' the medians in \code{sim.dir1}. -#' -#' In all cases, if a median is modified, the corresponding offset is stored in the prediction object -#' (element \code{median.shift}). All functions write the updated prediction object back to disk. All -#' functions in the package that use trajectories and trajectory statistics use the \code{median.shift} -#' values to offset the results correspondingly, i.e. trajectories are shifted the same way as the -#' medians. -#' @export -#' @rdname mig.adjust -mig.align.predictions <- function(sim.dir1, sim.dir2, country.codes = NULL, - years = NULL, ...){ - pred1 <- get.mig.prediction(sim.dir1) - pred2 <- get.mig.prediction(sim.dir2) - cntries1 <- if(is.null(country.codes)) get.countries.table(pred1)$code else country.codes - cntries2 <- get.countries.table(pred2)$code - avail.years <- dimnames(pred2$quantiles)[[3]] - if(is.null(years)) - years <- avail.years - else years <- intersect(as.character(years), avail.years) - for(cntry in cntries1){ - cidx <- which(cntries2 == cntry) - if(length(cidx) == 0){ - warning("Country ", cntry, " not found in sim.dir2. No adjustment for this country.") - next - } - adjust.to <- get.median.from.prediction(pred2, cidx, cntry)[years] - pred <- mig.median.set(sim.dir1, cntry, values = adjust.to, - years = as.integer(names(adjust.to)), ...) - } - invisible(pred) -} \ No newline at end of file diff --git a/man/internal.Rd b/man/internal.Rd index 0b9d574..f582148 100644 --- a/man/internal.Rd +++ b/man/internal.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/mcmc_storage.R, R/predict_mig.R +% Please edit documentation in R/mcmc_storage.R, R/mig_adjust.R \name{store.bayesMig.convergence} \alias{store.bayesMig.convergence} \alias{store.bayesMig.prediction} diff --git a/man/mig.adjust.Rd b/man/mig.adjust.Rd index 78c8a48..e241f14 100644 --- a/man/mig.adjust.Rd +++ b/man/mig.adjust.Rd @@ -1,10 +1,11 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/predict_mig.R +% Please edit documentation in R/mig_adjust.R \name{mig.median.set} \alias{mig.median.set} \alias{mig.median.shift} \alias{mig.median.reset} \alias{mig.align.predictions} +\alias{mig.shift.prediction.to.wpp} \title{Adjusting the Projection Medians} \usage{ mig.median.set(sim.dir, country, values, years = NULL, ...) @@ -27,6 +28,8 @@ mig.align.predictions( years = NULL, ... ) + +mig.shift.prediction.to.wpp(sim.dir, ...) } \arguments{ \item{sim.dir}{Directory containing the prediction object.} @@ -42,8 +45,11 @@ Ideally it should be of the same length as \code{values}. If it is \code{NULL}, If \code{values} correspond to consecutive years, only the first year might be given here. In \code{mig.align.predictions} it gives years for which the medians should be aligned.} -\item{\dots}{Additional arguments passed to the underlying adjustment function. -It can be \code{verbose} to show/hide the progress of the adjustment.} +\item{\dots}{Additional arguments passed to the underlying adjustment functions, such as +\code{verbose} to show/hide the progress of the adjustment. +For \code{mig.shift.prediction.to.wpp} it can be \code{stat} with values \dQuote{median} (default) +or \dQuote{mean} to specify which statistics should be adjusted; +\code{wpp.year} to adjust it to if it differs from the wpp year of the simulation.} \item{reset}{Logical. If \code{TRUE} medians in a range of \code{from} and \code{to} are reset to their original values.} @@ -90,4 +96,11 @@ Function \code{mig.align.predictions} shifts medians stored in \code{sim.dir1} t functions in the package that use trajectories and trajectory statistics use the \code{median.shift} values to offset the results correspondingly, i.e. trajectories are shifted the same way as the medians. + +Function \code{mig.shift.prediction.to.wpp} shifts the projected medians or means + (if \code{stat} is \dQuote{mean}), so that they correspond to the values found in the \code{migproj1dt} or \code{migproj5dt} + datasets of the \pkg{wpp} package that either corresponds to the package used for the simulation itself + or is given by the \code{wpp.year} argument. Currently, the function only works for \pkg{wpp2024}. + Note that regardless if it is an adjustment of the median or mean, the corresponding offset is always + converted to a shift of the median. } diff --git a/man/mig.predict.Rd b/man/mig.predict.Rd index 655207a..53641c5 100644 --- a/man/mig.predict.Rd +++ b/man/mig.predict.Rd @@ -16,6 +16,7 @@ mig.predict( burnin = 20000, use.cummulative.threshold = FALSE, ignore.gcc.in.threshold = FALSE, + fixed.thresholds = NULL, post.last.observed = c("obsdata", "alldata", "impute"), save.as.ascii = 0, output.dir = NULL, @@ -68,6 +69,10 @@ the historical cummulative thresholds for non-GCC countries. If this argument is It is important to set it to \code{TRUE} in a sub-national simulation to avoid any random overlaps of UN codes and user-defined codes.} +\item{fixed.thresholds}{List with optional elements \dQuote{lower} and \dQuote{upper}. Each of them is a list defining +lower and upper bounds of the future migration rate for specific locations. The name of each item is the location code +and the value is one number defining the corresponding threshold.} + \item{post.last.observed}{If a user-specific data file was used during estimation and the data contained the \dQuote{last.observed} column, this argument determines how to treat the time periods between the last observed point and the start year of the prediction, for locations where there is