diff --git a/DESCRIPTION b/DESCRIPTION index 68b013bf..998a1c6e 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -56,6 +56,7 @@ Imports: jsonlite, loo, purrr, + pracma, qtl, reshape2, usethis, diff --git a/NAMESPACE b/NAMESPACE index afb7a5e9..d66db97f 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -2,10 +2,13 @@ export(extract_seromodel_summary) export(fit_seromodel) +export(generate_sim_data) export(get_cohort_ages) export(get_exposure_matrix) export(get_foi_central_estimates) export(get_prev_expanded) +export(get_sim_n_seropositive) +export(get_sim_probability) export(get_table_rhats) export(plot_foi) export(plot_info_table) diff --git a/R/seroprevalence_data.R b/R/seroprevalence_data.R index 933e8f27..b828957a 100644 --- a/R/seroprevalence_data.R +++ b/R/seroprevalence_data.R @@ -22,7 +22,9 @@ #' \tabular{ll}{ #' \code{age_mean_f} \tab Floor value of the average between age_min and age_max \cr \tab \cr #' \code{sample_size} \tab The size of the sample \cr \tab \cr -#' \code{birth_year} \tab The year in which the individuals of each age group were bornt \cr \tab \cr +#' \code{birth_year} \tab Years in which the subjects were borned +#' according to the age group marker \code{age_mean_f}\cr \tab \cr +#' according to the age group marker \code{age_mean_f}\cr \tab \cr #' \code{prev_obs} \tab Observed prevalence \cr \tab \cr #' \code{prev_obs_lower} \tab Lower limit of the confidence interval for the observed prevalence \cr \tab \cr #' \code{prev_obs_upper} \tab Upper limit of the confidence interval for the observed prevalence \cr \tab \cr @@ -37,21 +39,24 @@ prepare_serodata <- function(serodata = serodata, #Check that serodata has the right columns stopifnot("serodata must contain the right columns" = all(c("survey", "total", "counts", "age_min", "age_max", "tsur", - "country","test","antibody" + "country", "test", "antibody" ) %in% colnames(serodata) + ) | + all(c("survey", "total", "counts", "age_mean_f", "tsur", + "country", "test", "antibody") %in% + colnames(serodata) ) - ) + ) if(!any(colnames(serodata) == "age_mean_f")){ serodata <- serodata %>% dplyr::mutate(age_mean_f = floor((age_min + age_max) / 2), sample_size = sum(total)) - } + } if(!any(colnames(serodata) == "birth_year")){ serodata <- serodata %>% dplyr::mutate(birth_year = .data$tsur - .data$age_mean_f) } - serodata <- serodata %>% cbind( Hmisc::binconf( @@ -76,8 +81,8 @@ prepare_serodata <- function(serodata = serodata, #' Function that prepares a pre-processed serological survey dataset to plot the binomial confidence intervals of the seroprevalence grouped by #' age group #' -#' This function prepapares a given pre-processed serological dataset (see \code{\link{prepare_serodata}}) to plot the binomial confidence intervals -#' of its corresponding seroprevalence grouped by age group. +#' This function prepapares a given pre-processed serological dataset (see \code{\link{prepare_serodata}}) to plot the binomial confidence intervals +#' of its corresponding seroprevalence grouped by age group. #' @inheritParams run_seromodel #' @return data set with the binomial confidence intervals #' @examples @@ -115,4 +120,190 @@ prepare_bin_data <- function(serodata) { p_obs_bin_u = .data$Upper ) return(xx) -} \ No newline at end of file +} + +#' Function that generates the probabilities of being previously exposed to a pathogen. +#' +#' @param sim_data A dataframe object containing the following columns: +#' \tabular{ll}{ +#' \code{tsur} \tab Year of the survey\cr \tab \cr +#' \code{age_mean_f} \tab Age \cr \tab \cr +#' \code{birth_year} \tab Years in which the subjects were borned +#' according to the age group marker \code{age_mean_f}\cr \tab \cr +#' } +#' @param foi Numeric atomic vector corresponding to the desired Force-of-Infection ordered from past to present +#' @param seed The seed for random number generation. +#' @return A dataframe containing the following columns: +#' \tabular{ll}{ +#' \code{age} \tab Exposure ages \cr \tab \cr +#' \code{probability} \tab Probability to obtain a seropositive case for each age according to the provided FoI\cr \tab \cr +#' } +#' @export +get_sim_probability <- function(sim_data, foi) { + cohort_ages <- get_cohort_ages(sim_data) + exposure_ages <- rev(cohort_ages$age) + exposure_matrix <- get_exposure_matrix(sim_data) + probabilities <- purrr::map_dbl(exposure_ages, ~1-exp(-pracma::dot(exposure_matrix[., ], foi))) + + sim_probability <- data.frame( + age = exposure_ages, + probability = probabilities + ) + return(sim_probability) +} + +#' Function that generates a sample of counts of seropositive individuals by sampling from a binomial distribution +#' +#' @param sim_data A dataframe object containing the following columns: +#' \tabular{ll}{ +#' \code{tsur} \tab Year of the survey\cr \tab \cr +#' \code{age_mean_f} \tab Age \cr \tab \cr +#' \code{birth_year} \tab Years in which the subjects were borned +#' according to the age group marker \code{age_mean_f}\cr \tab \cr +#' } +#' @param foi Numeric atomic vector corresponding to the desired Force-of-Infection ordered from past to present +#' @param sample_size_by_age Sample size for each age group: either a single integer indicating that the sample size is +#' the same for all ages or a vector of sample sizes the same length as +#' This corresponds to the number of trials \code{size} in \link[stats]{rbinom}. +#' @param seed The seed for random number generation. +#' @return A dataframe containing the following columns: +#' \tabular{ll}{ +#' \code{age} \tab Age by the time of the survey \cr \tab \cr +#' \code{n_seropositive} \tab Number of positive cases sampled according to the provided FoI \cr \tab \cr +#' } +#' simulated list of counts following a binomial distribution in accordance with a given +#' force of infection and age class sizes. +#' @examples +#'\dontrun{ +#' foi <- rep(0.02, 50) +#' sim_data <- generate_sim_data(foi = foi, +#' sample_size_by_age = 5, +#' tsur = 2050, +#' birth_year_min = 2000, +#' survey_label = 'foi_sim') +#' sim_n_seropositive <- get_sim_n_seropositive(sim_data = sim_data, +#' foi = foi) +#' } +#' @export +get_sim_n_seropositive <- function(sim_data, foi, sample_size_by_age, seed = 1234) { + sim_probability <- get_sim_probability(sim_data = sim_data, foi = foi) + + set.seed(seed = seed) + n_seropositive <- purrr::map_int(sim_probability$probability, ~rbinom(1, sample_size_by_age, .)) + + sim_n_seropositive <- data.frame( + age = sim_probability$age, + n_seropositive = n_seropositive + ) + return(sim_n_seropositive) +} + +#' Function that generates a simulated serosurvey according to the specified FoI +#' +#' @param foi Numeric atomic vector corresponding to the desired Force-of-Infection ordered from past to present +#' @param sample_size_by_age Size of each age group specified by either an atomic +#' vector of the same size as \code{foi} or an integer. +#' This corresponds to the number of trials \code{size} in \link[stats]{rbinom}. +#' @param tsur Year in which the serosurvey was conducted. +#' @param birth_year_min Minimum age of year in the simulated serosurvey. +#' @param survey_label Label for the resulting simulated serosurvey. +#' @return Dataframe object containing the simulated data generated from \code{foi} +#' @examples +#'\dontrun{ +#' sample_size_by_age = 5 +#' foi <- rep(0.02, 50) +#' sim_data <- generate_sim_data(foi = foi, +#' sample_size_by_age = sample_size_by_age, +#' tsur = 2050, +#' birth_year_min = 2000, +#' survey_label = 'sim_constant_foi') +#' } +#' @export +generate_sim_data <- function(foi, + sample_size_by_age, + tsur, + birth_year_min, + survey_label, + test = "fake", + antibody = "IgG", + seed = 1234 + ){ + sim_data <- data.frame(birth_year = c(birth_year_min:(tsur - 1))) %>% + mutate(tsur = tsur, + country = 'None', + test = test, + antibody = antibody, + survey = survey_label, + age_mean_f = tsur - birth_year) + sim_n_seropositive <- get_sim_n_seropositive(sim_data, foi, sample_size_by_age, seed = seed) + sim_data <- sim_data %>% + mutate(counts = sim_n_seropositive$n_seropositive, + total = sample_size_by_age) %>% + prepare_serodata() + + return(sim_data) +} + +#' Method for constructing age-group variable from age column +#' +#' This function was taken from \link[vaccineff]{get_age_group}. +#' This method splits an age interval from age_min to age_max into +#' (age_max-age_min)/step intervals. +#' By default age_min is set 0, however it can be assigned by +#' convenience. +#' If the method finds ages greater or equal than age_max +#' it assigns the string ">{age_max}". +#' To avoid errors it is necessary to set step < age_max. +#' It is also suggested to choose the step such +#' that age_max%%(step+1) = 0. +#' @param age vector containing age information +#' @param step step used to split the age interval +#' @return age_group factor variable grouping \code{age} by the age intervals +#' specified by \code{min(age)}, \code{max(age)}. +get_age_group <- function(age, step) { + age_min <- min(age) + age_max <- max(age) + n_steps <- as.integer((age_max - age_min) / step) + 1 + limits_low <- c(as.integer(seq(age_min, + age_max, + length.out = n_steps))) + limits_hgh <- limits_low + step + lim_labels <- paste(as.character(limits_low), as.character(limits_hgh), + sep = "-") + lim_labels[length(lim_labels)] <- paste0("+", + limits_low[length(limits_low)]) + lim_breaks <- c(-Inf, limits_low[2:length(limits_low)] - 1, Inf) + + age_group <- cut(age, + breaks = lim_breaks, + labels = lim_labels, + # this is for the intervals to be closed to the left and open to the right + right = FALSE) + return(age_group) +} + +#' Function that groups a simulated serological dataset by age +#' +#' @param sim_data Dataframe with the same structure as the output of \code{\link{generate_sim_data}}. +#' @param col_age name of the column containing the age information +#' @param step step used to split the age interval +#' @return Dataframe object containing grouped simulated data generated from \code{foi} +group_sim_data <- function(sim_data, + col_age = "age_mean_f", + step = 5) { + age <- sim_data[[col_age]] + sim_data$age_group <- get_age_group(age = age, step = step) + sim_data_grouped <- sim_data %>% + group_by(age_group) %>% + dplyr::summarise(total = sum(total), counts = sum(counts)) %>% + mutate(tsur = sim_data$tsur[1], + country = "None", + survey = sim_data$survey[1], + test = sim_data$test[1], + antibody = sim_data$antibody[1]) %>% + mutate(age_min = as.integer(sub("\\-.*", "", age_group)), + age_max = as.integer(sub(".*\\-", "", age_group))) %>% + prepare_serodata() + + return(sim_data_grouped) +} diff --git a/R/visualisation.R b/R/visualisation.R index 6c26051f..755b89fb 100644 --- a/R/visualisation.R +++ b/R/visualisation.R @@ -27,7 +27,7 @@ plot_seroprev <- function(serodata, #' Function that generates a seropositivity plot corresponding to the specified fitted serological model #' #' This function generates a seropositivity plot of the specified serological model object. This includes the original data grouped by age -#' as well as the obtained fitting from the model implementation. Age is located on the x axis and seropositivity on the y axis with its +#' as well as the obtained fitting from the model implementation. Age is located on the x axis and seropositivity on the y axis with its #' corresponding confidence interval. #' @inheritParams get_foi_central_estimates #' @inheritParams run_seromodel @@ -108,7 +108,7 @@ plot_seroprev_fitted <- function(seromodel_object, #' Function that generates a Force-of-Infection plot corresponding to the specified fitted serological model #' #' This function generates a Force-of-Infection plot from the results obtained by fitting a serological model. -#' This includes the corresponding binomial confidence interval. +#' This includes the corresponding binomial confidence interval. #' The x axis corresponds to the decades covered by the survey the y axis to the Force-of-Infection. #' @inheritParams get_foi_central_estimates #' @param size_text Text size use in the theme of the graph returned by the function. @@ -138,7 +138,6 @@ plot_foi <- function(seromodel_object, foi <- rstan::extract(seromodel_object, "foi", inc_warmup = FALSE)[[1]] - #-------- This bit is to get the actual length of the foi data foi_data <- get_foi_central_estimates(seromodel_object = seromodel_object, cohort_ages = cohort_ages) @@ -168,16 +167,23 @@ plot_foi <- function(seromodel_object, ggplot2::xlab("Year") #TODO Add warning for foi_sim of different length than exposure years if (!is.null(foi_sim)){ - foi_data_length <- nrow(foi_data) - foi_sim_length <- length(foi_sim) - remove_x_values <- foi_sim_length - foi_data_length - - foi_sim_data <- data.frame(year = foi_data$year, - foi_sim = foi_sim[-c(1:remove_x_values)]) - foi_plot <- foi_plot + - ggplot2::geom_line(data = foi_sim_data, ggplot2::aes(x = year, y = foi_sim), - colour = "#b30909", - size = size_text/8) + if (nrow(foi_data) != length(foi_sim)) { + remove_x_values <- length(foi_sim) - nrow(foi_data) + foi_sim_data <- data.frame(year = foi_data$year, + foi_sim = foi_sim[-c(1:remove_x_values)]) + foi_plot <- foi_plot + + ggplot2::geom_line(data = foi_sim_data, ggplot2::aes(x = year, y = foi_sim), + colour = "#b30909", + size = size_text / 8) + } + else{ + foi_sim_data <- data.frame(year = foi_data$year, + foi_sim = foi_sim) + foi_plot <- foi_plot + + ggplot2::geom_line(data = foi_sim_data, ggplot2::aes(x = year, y = foi_sim), + colour = "#b30909", + size = size_text / 8) + } } } } else { @@ -208,8 +214,8 @@ plot_foi <- function(seromodel_object, #' Function that generates a plot of the R-hat estimates of the specified fitted serological model #' -#' This function generates a plot of the R-hat estimates obtained for a specified fitted serological model \code{seromodel_object}. -#' The x axis corresponds to the decades covered by the survey and the y axis to the value of the rhats. +#' This function generates a plot of the R-hat estimates obtained for a specified fitted serological model \code{seromodel_object}. +#' The x axis corresponds to the decades covered by the survey and the y axis to the value of the rhats. #' All rhats must be smaller than 1 to ensure convergence (for further details check \link[bayesplot]{rhat}). #' @inheritParams get_foi_central_estimates #' @param size_text Text size use in the theme of the graph returned by the function. @@ -322,7 +328,7 @@ plot_seromodel <- function(seromodel_object, model_summary <- extract_seromodel_summary(seromodel_object = seromodel_object, serodata = serodata) summary_table <- t( - dplyr::select(model_summary, + dplyr::select(model_summary, c('foi_model', 'dataset', 'elpd', 'se', 'converged'))) summary_plot <- plot_info_table(summary_table, size_text = size_text) @@ -333,7 +339,7 @@ plot_seromodel <- function(seromodel_object, foi_plot, rhats_plot, ncol = 1, - nrow = 4, + nrow = 4, rel_heights = c(0.5, 1, 1, 1) ) } @@ -358,7 +364,7 @@ plot_seromodel <- function(seromodel_object, ggplot2::ylab(" ") + ggplot2::xlab(" ") g1 <- g0 - # TODO: This + # TODO: This g0 <- g0 + ggplot2::labs(subtitle = seromodel_object$model_name) + ggplot2::theme(plot.title = ggplot2::element_text(size = 10)) @@ -369,7 +375,7 @@ plot_seromodel <- function(seromodel_object, return(plot_arrange) } -# TODO Improve documentation of @return. +# TODO Improve documentation of @return. # TODO Give more details about the generated plot #' Function that generates a plot for a given table #' @@ -399,4 +405,4 @@ plot_info_table <- function(info, size_text) { fontface = "bold") return(p) -} \ No newline at end of file +} diff --git a/man/generate_sim_data.Rd b/man/generate_sim_data.Rd new file mode 100644 index 00000000..0f68f61d --- /dev/null +++ b/man/generate_sim_data.Rd @@ -0,0 +1,47 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/seroprevalence_data.R +\name{generate_sim_data} +\alias{generate_sim_data} +\title{Function that generates a simulated serosurvey according to the specified FoI} +\usage{ +generate_sim_data( + foi, + sample_size_by_age, + tsur, + birth_year_min, + survey_label, + test = "fake", + antibody = "IgG", + seed = 1234 +) +} +\arguments{ +\item{foi}{Numeric atomic vector corresponding to the desired Force-of-Infection ordered from past to present} + +\item{sample_size_by_age}{Size of each age group specified by either an atomic +vector of the same size as \code{foi} or an integer. +This corresponds to the number of trials \code{size} in \link[stats]{rbinom}.} + +\item{tsur}{Year in which the serosurvey was conducted.} + +\item{birth_year_min}{Minimum age of year in the simulated serosurvey.} + +\item{survey_label}{Label for the resulting simulated serosurvey.} +} +\value{ +Dataframe object containing the simulated data generated from \code{foi} +} +\description{ +Function that generates a simulated serosurvey according to the specified FoI +} +\examples{ +\dontrun{ +sample_size_by_age = 5 +foi <- rep(0.02, 50) +sim_data <- generate_sim_data(foi = foi, + sample_size_by_age = sample_size_by_age, + tsur = 2050, + birth_year_min = 2000, + survey_label = 'sim_constant_foi') +} +} diff --git a/man/get_age_group.Rd b/man/get_age_group.Rd new file mode 100644 index 00000000..9df6f8db --- /dev/null +++ b/man/get_age_group.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/seroprevalence_data.R +\name{get_age_group} +\alias{get_age_group} +\title{Method for constructing age-group variable from age column} +\usage{ +get_age_group(age, step) +} +\arguments{ +\item{age}{vector containing age information} + +\item{step}{step used to split the age interval} +} +\value{ +age_group factor variable grouping \code{age} by the age intervals +specified by \code{min(age)}, \code{max(age)}. +} +\description{ +This function was taken from \link[vaccineff]{get_age_group}. +This method splits an age interval from age_min to age_max into +(age_max-age_min)/step intervals. +By default age_min is set 0, however it can be assigned by +convenience. +If the method finds ages greater or equal than age_max +it assigns the string ">{age_max}". +To avoid errors it is necessary to set step < age_max. +It is also suggested to choose the step such +that age_max%%(step+1) = 0. +} diff --git a/man/get_sim_n_seropositive.Rd b/man/get_sim_n_seropositive.Rd new file mode 100644 index 00000000..10b1a5ea --- /dev/null +++ b/man/get_sim_n_seropositive.Rd @@ -0,0 +1,49 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/seroprevalence_data.R +\name{get_sim_n_seropositive} +\alias{get_sim_n_seropositive} +\title{Function that generates a sample of counts of seropositive individuals by sampling from a binomial distribution} +\usage{ +get_sim_n_seropositive(sim_data, foi, sample_size_by_age, seed = 1234) +} +\arguments{ +\item{sim_data}{A dataframe object containing the following columns: +\tabular{ll}{ +\code{tsur} \tab Year of the survey\cr \tab \cr +\code{age_mean_f} \tab Age \cr \tab \cr +\code{birth_year} \tab Years in which the subjects were borned +according to the age group marker \code{age_mean_f}\cr \tab \cr +}} + +\item{foi}{Numeric atomic vector corresponding to the desired Force-of-Infection ordered from past to present} + +\item{sample_size_by_age}{Sample size for each age group: either a single integer indicating that the sample size is +the same for all ages or a vector of sample sizes the same length as +This corresponds to the number of trials \code{size} in \link[stats]{rbinom}.} + +\item{seed}{The seed for random number generation.} +} +\value{ +A dataframe containing the following columns: +\tabular{ll}{ +\code{age} \tab Age by the time of the survey \cr \tab \cr +\code{n_seropositive} \tab Number of positive cases sampled according to the provided FoI \cr \tab \cr +} +simulated list of counts following a binomial distribution in accordance with a given +force of infection and age class sizes. +} +\description{ +Function that generates a sample of counts of seropositive individuals by sampling from a binomial distribution +} +\examples{ +\dontrun{ +foi <- rep(0.02, 50) +sim_data <- generate_sim_data(foi = foi, + sample_size_by_age = 5, + tsur = 2050, + birth_year_min = 2000, + survey_label = 'foi_sim') +sim_n_seropositive <- get_sim_n_seropositive(sim_data = sim_data, + foi = foi) +} +} diff --git a/man/get_sim_probability.Rd b/man/get_sim_probability.Rd new file mode 100644 index 00000000..624fe1ed --- /dev/null +++ b/man/get_sim_probability.Rd @@ -0,0 +1,31 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/seroprevalence_data.R +\name{get_sim_probability} +\alias{get_sim_probability} +\title{Function that generates the probabilities of being previously exposed to a pathogen.} +\usage{ +get_sim_probability(sim_data, foi) +} +\arguments{ +\item{sim_data}{A dataframe object containing the following columns: +\tabular{ll}{ +\code{tsur} \tab Year of the survey\cr \tab \cr +\code{age_mean_f} \tab Age \cr \tab \cr +\code{birth_year} \tab Years in which the subjects were borned +according to the age group marker \code{age_mean_f}\cr \tab \cr +}} + +\item{foi}{Numeric atomic vector corresponding to the desired Force-of-Infection ordered from past to present} + +\item{seed}{The seed for random number generation.} +} +\value{ +A dataframe containing the following columns: +\tabular{ll}{ +\code{age} \tab Exposure ages \cr \tab \cr +\code{probability} \tab Probability to obtain a seropositive case for each age according to the provided FoI\cr \tab \cr +} +} +\description{ +Function that generates the probabilities of being previously exposed to a pathogen. +} diff --git a/man/group_sim_data.Rd b/man/group_sim_data.Rd new file mode 100644 index 00000000..cbe1f391 --- /dev/null +++ b/man/group_sim_data.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/seroprevalence_data.R +\name{group_sim_data} +\alias{group_sim_data} +\title{Function that groups a simulated serological dataset by age} +\usage{ +group_sim_data(sim_data, col_age = "age_mean_f", step = 5) +} +\arguments{ +\item{sim_data}{Dataframe with the same structure as the output of \code{\link{generate_sim_data}}.} + +\item{col_age}{name of the column containing the age information} + +\item{step}{step used to split the age interval} +} +\value{ +Dataframe object containing grouped simulated data generated from \code{foi} +} +\description{ +Function that groups a simulated serological dataset by age +} diff --git a/man/plot_foi.Rd b/man/plot_foi.Rd index 8dd29d7d..158837ce 100644 --- a/man/plot_foi.Rd +++ b/man/plot_foi.Rd @@ -28,7 +28,7 @@ A ggplot2 object containing the Force-of-infection-vs-time including the corresp } \description{ This function generates a Force-of-Infection plot from the results obtained by fitting a serological model. -This includes the corresponding binomial confidence interval. +This includes the corresponding binomial confidence interval. The x axis corresponds to the decades covered by the survey the y axis to the Force-of-Infection. } \examples{ diff --git a/man/plot_rhats.Rd b/man/plot_rhats.Rd index eb206112..00de4a0d 100644 --- a/man/plot_rhats.Rd +++ b/man/plot_rhats.Rd @@ -17,8 +17,8 @@ plot_rhats(seromodel_object, cohort_ages, size_text = 25) The rhats-convergence plot of the selected model. } \description{ -This function generates a plot of the R-hat estimates obtained for a specified fitted serological model \code{seromodel_object}. -The x axis corresponds to the decades covered by the survey and the y axis to the value of the rhats. +This function generates a plot of the R-hat estimates obtained for a specified fitted serological model \code{seromodel_object}. +The x axis corresponds to the decades covered by the survey and the y axis to the value of the rhats. All rhats must be smaller than 1 to ensure convergence (for further details check \link[bayesplot]{rhat}). } \examples{ diff --git a/man/plot_seroprev_fitted.Rd b/man/plot_seroprev_fitted.Rd index 837765a5..48a27081 100644 --- a/man/plot_seroprev_fitted.Rd +++ b/man/plot_seroprev_fitted.Rd @@ -37,7 +37,7 @@ A ggplot object containing the seropositivity-vs-age graph including the data, t } \description{ This function generates a seropositivity plot of the specified serological model object. This includes the original data grouped by age -as well as the obtained fitting from the model implementation. Age is located on the x axis and seropositivity on the y axis with its +as well as the obtained fitting from the model implementation. Age is located on the x axis and seropositivity on the y axis with its corresponding confidence interval. } \examples{ diff --git a/man/prepare_bin_data.Rd b/man/prepare_bin_data.Rd index e7c8c1bd..52804508 100644 --- a/man/prepare_bin_data.Rd +++ b/man/prepare_bin_data.Rd @@ -33,7 +33,7 @@ The last six colums can be added to \code{serodata} by means of the function \co data set with the binomial confidence intervals } \description{ -This function prepapares a given pre-processed serological dataset (see \code{\link{prepare_serodata}}) to plot the binomial confidence intervals +This function prepapares a given pre-processed serological dataset (see \code{\link{prepare_serodata}}) to plot the binomial confidence intervals of its corresponding seroprevalence grouped by age group. } \examples{ diff --git a/man/prepare_serodata.Rd b/man/prepare_serodata.Rd index 8e3c316a..837e080b 100644 --- a/man/prepare_serodata.Rd +++ b/man/prepare_serodata.Rd @@ -28,7 +28,9 @@ serodata with additional columns necessary for the analysis. These columns are: \tabular{ll}{ \code{age_mean_f} \tab Floor value of the average between age_min and age_max \cr \tab \cr \code{sample_size} \tab The size of the sample \cr \tab \cr -\code{birth_year} \tab The year in which the individuals of each age group were bornt \cr \tab \cr +\code{birth_year} \tab Years in which the subjects were borned +according to the age group marker \code{age_mean_f}\cr \tab \cr +according to the age group marker \code{age_mean_f}\cr \tab \cr \code{prev_obs} \tab Observed prevalence \cr \tab \cr \code{prev_obs_lower} \tab Lower limit of the confidence interval for the observed prevalence \cr \tab \cr \code{prev_obs_upper} \tab Upper limit of the confidence interval for the observed prevalence \cr \tab \cr diff --git a/tests/testthat/test_get_sim.R b/tests/testthat/test_get_sim.R new file mode 100644 index 00000000..222c44d6 --- /dev/null +++ b/tests/testthat/test_get_sim.R @@ -0,0 +1,44 @@ +test_that("test get_sim functionalities", { + + library(serofoi) + library(dplyr) + + tsur <- 2050 + birth_year_min <- 2000 + survey_label <- "foi_sim" + country = 'None' + test = "test" + antibody = "IgG" + + sim_data <- data.frame(birth_year = c(birth_year_min:(tsur - 1))) %>% + mutate(tsur = tsur, + country = country, + test = test, + antibody = antibody, + survey = survey_label, + age_mean_f = tsur - birth_year) + + #----- Test function generate_sim_probability + n_years <- 50 + foi_sim <- rep(0.02, n_years) + sim_probability <- get_sim_probability(sim_data = sim_data, + foi = foi_sim) + + exposure_matrix <- matrix(1, n_years, n_years) + exposure_matrix[lower.tri(exposure_matrix)] <- 0 + probabilities <- purrr::map_dbl(1:n_years, ~1-exp(-pracma::dot(exposure_matrix[., ], foi_sim))) + + expect_s3_class(sim_probability, "data.frame") + expect_type(sim_probability$age, "integer") + expect_type(sim_probability$probability, "double") + expect_true(all(probabilities == sim_probability$probability)) + + #----- Test function generate_sim_n_seropositivity + sample_size_by_age <- 5 + sim_n_seropositive <- get_sim_n_seropositive(sim_data = sim_data, + foi = foi_sim, + sample_size_by_age = sample_size_by_age) + expect_s3_class(sim_n_seropositive, "data.frame") + expect_type(sim_n_seropositive$age, "integer") + expect_type(sim_n_seropositive$n_seropositive, "integer") +}) diff --git a/tests/testthat/test_sim_data.R b/tests/testthat/test_sim_data.R new file mode 100644 index 00000000..3ae6440d --- /dev/null +++ b/tests/testthat/test_sim_data.R @@ -0,0 +1,51 @@ +test_that("simulated data", { + library(dplyr) + library(serofoi) + + seed <- 1234 + sample_size_by_age <- 10^7 + tsur <- 2050 + birth_year_min <- 2000 + + #----- Test for constant FoI + + foi_values <- c(0.001, 0.01, 0.1, 0.3, 0.4) + for(foi_value in foi_values) { + foi_sim <- rep(foi_value, tsur - birth_year_min) + #----- Test function generate_sim_data + sim_data <- generate_sim_data(foi = foi_sim, + sample_size_by_age = sample_size_by_age, + tsur = tsur, + birth_year_min = birth_year_min, + survey_label = 'foi_sim_constant', + seed = seed) + prev_exact <- 1 - exp(- foi_value * sim_data$age_mean_f) + + expect_s3_class(sim_data, "data.frame") + expect_length(sim_data$birth_year, tsur - birth_year_min) + expect_equal(sim_data$prev_obs, prev_exact, tolerance = TRUE) + + #----- Test function group_sim_data + sim_data <- sim_data %>% mutate(age_min = age_mean_f, age_max = age_mean_f) + sim_data_grouped <- group_sim_data(sim_data = sim_data) + expect_s3_class(sim_data_grouped, "data.frame") + expect_s3_class(sim_data_grouped$age_group, "factor") + } + + #----- Test for time-varying FoI + no_transm <- 0.0000000001 + foi_sim <- c(rep(0.2, 25), rep(0.1, 10), rep(no_transm, 15)) + + sim_data <- generate_sim_data(foi = foi_sim, + sample_size_by_age = sample_size_by_age, + tsur = tsur, + birth_year_min = birth_year_min, + survey_label = 'sw_dec_foi', + seed = seed) + + prev_exact <- 1 - exp(-cumsum(rev(foi_sim))) + + expect_s3_class(sim_data, "data.frame") + expect_length(sim_data$birth_year, tsur - birth_year_min) + expect_equal(sim_data$prev_obs, prev_exact, tolerance = TRUE) +})