Skip to content

Commit

Permalink
Add functions to simulate datasets to the seroprevalence_data module. (
Browse files Browse the repository at this point in the history
…#58)

* fix: add exception to function plot_foi() to plot a FOI trend with different length along with the data for the case when their sizes don't coincide

* clean test_visualisation

* feature: add three functions to simulate datasets. get_sim_counts() generates a list with simulated counts by age following a binomial distribution. generate_sim_data() uses the counts generated by get_sim_counts() to create a dataframe with the necessary structure to use other functions of the package. group_sim_data() serves to group the previously generated dataset by age group; right now it groups the data by periods of 5 years.

* add test_simulate_data to test the data simulation functions in the seroprevalence_data module
  • Loading branch information
ntorresd committed Oct 10, 2023
1 parent 1dd6d5d commit 8304d04
Show file tree
Hide file tree
Showing 7 changed files with 343 additions and 10 deletions.
127 changes: 127 additions & 0 deletions R/seroprevalence_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -115,4 +115,131 @@ prepare_bin_data <- function(serodata) {
p_obs_bin_u = .data$Upper
)
return(xx)
}

# TODO: Complete the documentation of get_sim_counts
#' Function that randomly generates a sample of counts for a simulated dataset
#'
#' @param sim_data A dataframe object containing the following columns:
#' \tabular{ll}{
#' \code{birth_year} \tab List of years in which the subjects were borned \cr \tab \cr
#' \code{tsur} \tab Year of the survey\cr \tab \cr
#' \code{country} \tab Default to 'none'.\cr \tab \cr
#' \code{survey} \tab Survey label \cr \tab \cr
#' \code{age_mean_f} \tab Age \cr \tab \cr
#' }
#' @return A simulated list of counts following a binomial distribution in accordance with a given force of infection and age class sizes.
#' @examples
#'\dontrun{
#'
#' }
#' @export
get_sim_counts <- function(sim_data, foi, size_age_class, seed = 1234){
exposure_ages <- get_exposure_ages(sim_data)
exposure_matrix <- get_exposure_matrix(sim_data)

set.seed(seed = seed)
sim_probabilities <- purrr::map_dbl(exposure_ages, ~1-exp(-pracma::dot(exposure_matrix[., ], foi)))
sim_counts <- purrr::map_int(sim_probabilities, ~rbinom(1, size_age_class, .))

return(sim_counts)
}

# TODO: Complete the documentation of generate_sim_data
#' Function that generates simulated data from a given Force-of-Infection
#'
#' @param foi Numeric atomic vector corresponding to the desired Force-of-Infection
#' @return Dataframe object containing the simulated data generated from \code{foi}
#' @examples
#'\dontrun{
#' size_age_class = 5
#' foi <- rep(0.02, 50)
#' sim_data <- generate_sim_data(foi = foi,
#' size_age_class = size_age_class,
#' tsur = 2050,
#' birth_year_min = 2000,
#' survey_label = 'sim_constant_foi')
#' }
#' @export
generate_sim_data <- function(foi,
size_age_class,
tsur,
birth_year_min,
survey_label,
test = "fake",
antibody = "IgG",
seed = 1234
){
sim_data <- data.frame(birth_year = c(birth_year_min:(tsur - 1))) %>%
mutate(tsur = tsur,
country = 'None',
test = test,
antibody = antibody,
survey = survey_label,
age_mean_f = tsur - birth_year)
sim_data <- sim_data %>%
mutate(counts = get_sim_counts(sim_data, foi, size_age_class, seed = seed),
total = size_age_class) %>%
prepare_serodata(add_age_mean_f = FALSE)

return(sim_data)
}

# TODO: Complete the documentation of group_sim_data
#' Function that generates grouped simulated data from a given Force-of-Infection
#'
#' @param sim_data Dataframe with the structure of the output of \code{\linl{generate_sim_data}}.
#' @return Dataframe object containing grouped simulated data generated from \code{foi}
#' @examples
#'\dontrun{
#' size_age_class = 5
#' foi <- rep(0.02, 50)
#' sim_data <- generate_sim_data(foi = foi,
#' size_age_class = size_age_class,
#' tsur = 2050,
#' birth_year_min = 2000,
#' survey_label = 'sim_constant_foi')
#' sim_data_grouped <- group_sim_data(sim_data = sim_data,
#' foi = foi,
#' size_age_class = size_age_class,
#' tsur = 2050,
#' birth_year_min = 2000,
#' survey_label = 'sim_constant_foi_grouped')
#' }
#' @export
group_sim_data <- function(sim_data,
foi,
size_age_class,
tsur,
birth_year_min,
survey_label,
test = "fake",
antibody = "IgG",
seed = 1234) {

sim_data <- sim_data %>% mutate(age_group = 'NA', age = age_mean_f) %>% arrange(age)
sim_data$age_group[sim_data$age > 0 & sim_data$age < 5] <- '01-04'
sim_data$age_group[sim_data$age > 4 & sim_data$age < 10] <- '05-09'
sim_data$age_group[sim_data$age > 9 & sim_data$age < 15] <- '10-14'
sim_data$age_group[sim_data$age > 14 & sim_data$age < 20] <- '15-19'
sim_data$age_group[sim_data$age > 19 & sim_data$age < 25] <- '20-24'
sim_data$age_group[sim_data$age > 24 & sim_data$age < 30] <- '25-29'
sim_data$age_group[sim_data$age > 29 & sim_data$age < 35] <- '30-34'
sim_data$age_group[sim_data$age > 34 & sim_data$age < 40] <- '35-39'
sim_data$age_group[sim_data$age > 39 & sim_data$age < 45] <- '40-44'
sim_data$age_group[sim_data$age > 44 & sim_data$age < 51] <- '45-50'


sim_data_grouped <- sim_data %>% group_by(age_group) %>%
dplyr::summarise(total = sum(total), counts = sum(counts)) %>%
mutate(tsur = sim_data$tsur[1],
country = "None",
survey = survey_label,
test = test,
antibody = antibody) %>%
mutate(age_min = as.numeric(substr(age_group, 1, 2)),
age_max = as.numeric(substr(age_group, 4, 5))) %>%
prepare_serodata()

return(sim_data_grouped)
}
27 changes: 17 additions & 10 deletions R/visualisation.R
Original file line number Diff line number Diff line change
Expand Up @@ -168,16 +168,23 @@ plot_foi <- function(seromodel_object,
ggplot2::xlab("Year")
#TODO Add warning for foi_sim of different length than exposure years
if (!is.null(foi_sim)){
foi_data_length <- nrow(foi_data)
foi_sim_length <- length(foi_sim)
remove_x_values <- foi_sim_length - foi_data_length

foi_sim_data <- data.frame(year = foi_data$year,
foi_sim = foi_sim[-c(1:remove_x_values)])
foi_plot <- foi_plot +
ggplot2::geom_line(data = foi_sim_data, ggplot2::aes(x = year, y = foi_sim),
colour = "#b30909",
size = size_text/8)
if (nrow(foi_data) != length(foi_sim)) {
remove_x_values <- length(foi_sim) - nrow(foi_data)
foi_sim_data <- data.frame(year = foi_data$year,
foi_sim = foi_sim[-c(1:remove_x_values)])
foi_plot <- foi_plot +
ggplot2::geom_line(data = foi_sim_data, ggplot2::aes(x = year, y = foi_sim),
colour = "#b30909",
size = size_text / 8)
}
else{
foi_sim_data <- data.frame(year = foi_data$year,
foi_sim = foi_sim)
foi_plot <- foi_plot +
ggplot2::geom_line(data = foi_sim_data, ggplot2::aes(x = year, y = foi_sim),
colour = "#b30909",
size = size_text / 8)
}
}
}
} else {
Expand Down
11 changes: 11 additions & 0 deletions tests/testthat/extdata/constant_foi_sim_data_grouped.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
"age_group","total","counts","tsur","country","survey","test","antibody","age_min","age_max","age_mean_f","sample_size","birth_year","prev_obs","prev_obs_lower","prev_obs_upper"
"01-04",20,1,2050,"None","foi_model","fake","IgG",1,4,2,250,2048,0.05,0.00126508949794981,0.248732762772028
"05-09",25,2,2050,"None","foi_model","fake","IgG",5,9,7,250,2043,0.08,0.00983959001879751,0.260305842105214
"10-14",25,7,2050,"None","foi_model","fake","IgG",10,14,12,250,2038,0.28,0.120716688504067,0.493876821806256
"15-19",25,5,2050,"None","foi_model","fake","IgG",15,19,17,250,2033,0.2,0.068311464012484,0.407037432278677
"20-24",25,9,2050,"None","foi_model","fake","IgG",20,24,22,250,2028,0.36,0.179716820583655,0.57479365044615
"25-29",25,7,2050,"None","foi_model","fake","IgG",25,29,27,250,2023,0.28,0.120716688504067,0.493876821806256
"30-34",25,8,2050,"None","foi_model","fake","IgG",30,34,32,250,2018,0.32,0.14949542261357,0.535000717497372
"35-39",25,11,2050,"None","foi_model","fake","IgG",35,39,37,250,2013,0.44,0.244023665147208,0.650718366008664
"40-44",25,16,2050,"None","foi_model","fake","IgG",40,44,42,250,2008,0.64,0.425206349553849,0.820283179416345
"45-50",30,18,2050,"None","foi_model","fake","IgG",45,50,47,250,2003,0.6,0.406034930051819,0.773442351171406
51 changes: 51 additions & 0 deletions tests/testthat/extdata/constant_foi_sim_data_no_grouped.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
"birth_year","tsur","country","test","antibody","survey","age_mean_f","counts","total","prev_obs","prev_obs_lower","prev_obs_upper","age_min","age_max"
2049,2050,"None","fake","IgG","foi_sim",1,0,5,0,0,0.521823750104981,1,1
2048,2050,"None","fake","IgG","foi_sim",2,0,5,0,0,0.521823750104981,2,2
2047,2050,"None","fake","IgG","foi_sim",3,0,5,0,0,0.521823750104981,3,3
2046,2050,"None","fake","IgG","foi_sim",4,1,5,0.2,0.00505076337946806,0.716417936118089,4,4
2045,2050,"None","fake","IgG","foi_sim",5,0,5,0,0,0.521823750104981,5,5
2044,2050,"None","fake","IgG","foi_sim",6,0,5,0,0,0.521823750104981,6,6
2043,2050,"None","fake","IgG","foi_sim",7,1,5,0.2,0.00505076337946806,0.716417936118089,7,7
2042,2050,"None","fake","IgG","foi_sim",8,0,5,0,0,0.521823750104981,8,8
2041,2050,"None","fake","IgG","foi_sim",9,1,5,0.2,0.00505076337946806,0.716417936118089,9,9
2040,2050,"None","fake","IgG","foi_sim",10,1,5,0.2,0.00505076337946806,0.716417936118089,10,10
2039,2050,"None","fake","IgG","foi_sim",11,2,5,0.4,0.0527449505263169,0.853367200365327,11,11
2038,2050,"None","fake","IgG","foi_sim",12,4,5,0.8,0.283582063881911,0.994949236620532,12,12
2037,2050,"None","fake","IgG","foi_sim",13,0,5,0,0,0.521823750104981,13,13
2036,2050,"None","fake","IgG","foi_sim",14,0,5,0,0,0.521823750104981,14,14
2035,2050,"None","fake","IgG","foi_sim",15,2,5,0.4,0.0527449505263169,0.853367200365327,15,15
2034,2050,"None","fake","IgG","foi_sim",16,0,5,0,0,0.521823750104981,16,16
2033,2050,"None","fake","IgG","foi_sim",17,1,5,0.2,0.00505076337946806,0.716417936118089,17,17
2032,2050,"None","fake","IgG","foi_sim",18,1,5,0.2,0.00505076337946806,0.716417936118089,18,18
2031,2050,"None","fake","IgG","foi_sim",19,1,5,0.2,0.00505076337946806,0.716417936118089,19,19
2030,2050,"None","fake","IgG","foi_sim",20,1,5,0.2,0.00505076337946806,0.716417936118089,20,20
2029,2050,"None","fake","IgG","foi_sim",21,0,5,0,0,0.521823750104981,21,21
2028,2050,"None","fake","IgG","foi_sim",22,3,5,0.6,0.146632799634673,0.947255049473683,22,22
2027,2050,"None","fake","IgG","foi_sim",23,3,5,0.6,0.146632799634673,0.947255049473683,23,23
2026,2050,"None","fake","IgG","foi_sim",24,2,5,0.4,0.0527449505263169,0.853367200365327,24,24
2025,2050,"None","fake","IgG","foi_sim",25,3,5,0.6,0.146632799634673,0.947255049473683,25,25
2024,2050,"None","fake","IgG","foi_sim",26,1,5,0.2,0.00505076337946806,0.716417936118089,26,26
2023,2050,"None","fake","IgG","foi_sim",27,0,5,0,0,0.521823750104981,27,27
2022,2050,"None","fake","IgG","foi_sim",28,1,5,0.2,0.00505076337946806,0.716417936118089,28,28
2021,2050,"None","fake","IgG","foi_sim",29,2,5,0.4,0.0527449505263169,0.853367200365327,29,29
2020,2050,"None","fake","IgG","foi_sim",30,2,5,0.4,0.0527449505263169,0.853367200365327,30,30
2019,2050,"None","fake","IgG","foi_sim",31,1,5,0.2,0.00505076337946806,0.716417936118089,31,31
2018,2050,"None","fake","IgG","foi_sim",32,1,5,0.2,0.00505076337946806,0.716417936118089,32,32
2017,2050,"None","fake","IgG","foi_sim",33,2,5,0.4,0.0527449505263169,0.853367200365327,33,33
2016,2050,"None","fake","IgG","foi_sim",34,2,5,0.4,0.0527449505263169,0.853367200365327,34,34
2015,2050,"None","fake","IgG","foi_sim",35,1,5,0.2,0.00505076337946806,0.716417936118089,35,35
2014,2050,"None","fake","IgG","foi_sim",36,3,5,0.6,0.146632799634673,0.947255049473683,36,36
2013,2050,"None","fake","IgG","foi_sim",37,1,5,0.2,0.00505076337946806,0.716417936118089,37,37
2012,2050,"None","fake","IgG","foi_sim",38,3,5,0.6,0.146632799634673,0.947255049473683,38,38
2011,2050,"None","fake","IgG","foi_sim",39,3,5,0.6,0.146632799634673,0.947255049473683,39,39
2010,2050,"None","fake","IgG","foi_sim",40,2,5,0.4,0.0527449505263169,0.853367200365327,40,40
2009,2050,"None","fake","IgG","foi_sim",41,3,5,0.6,0.146632799634673,0.947255049473683,41,41
2008,2050,"None","fake","IgG","foi_sim",42,2,5,0.4,0.0527449505263169,0.853367200365327,42,42
2007,2050,"None","fake","IgG","foi_sim",43,4,5,0.8,0.283582063881911,0.994949236620532,43,43
2006,2050,"None","fake","IgG","foi_sim",44,5,5,1,0.478176249895019,1,44,44
2005,2050,"None","fake","IgG","foi_sim",45,3,5,0.6,0.146632799634673,0.947255049473683,45,45
2004,2050,"None","fake","IgG","foi_sim",46,2,5,0.4,0.0527449505263169,0.853367200365327,46,46
2003,2050,"None","fake","IgG","foi_sim",47,3,5,0.6,0.146632799634673,0.947255049473683,47,47
2002,2050,"None","fake","IgG","foi_sim",48,3,5,0.6,0.146632799634673,0.947255049473683,48,48
2001,2050,"None","fake","IgG","foi_sim",49,3,5,0.6,0.146632799634673,0.947255049473683,49,49
2000,2050,"None","fake","IgG","foi_sim",50,4,5,0.8,0.283582063881911,0.994949236620532,50,50
11 changes: 11 additions & 0 deletions tests/testthat/extdata/sw_dec_foi_sim_data_grouped.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
"age_group","total","counts","tsur","country","survey","test","antibody","age_min","age_max","age_mean_f","sample_size","birth_year","prev_obs","prev_obs_lower","prev_obs_upper"
"01-04",20,0,2050,"None","foi_model","fake","IgG",1,4,2,250,2048,0,0,0.168433470983085
"05-09",25,0,2050,"None","foi_model","fake","IgG",5,9,7,250,2043,0,0,0.137185171530713
"10-14",25,0,2050,"None","foi_model","fake","IgG",10,14,12,250,2038,0,0,0.137185171530713
"15-19",25,3,2050,"None","foi_model","fake","IgG",15,19,17,250,2033,0.12,0.0254653966477332,0.312190307286235
"20-24",25,7,2050,"None","foi_model","fake","IgG",20,24,22,250,2028,0.28,0.120716688504067,0.493876821806256
"25-29",25,21,2050,"None","foi_model","fake","IgG",25,29,27,250,2023,0.84,0.639171545540728,0.95462054762829
"30-34",25,25,2050,"None","foi_model","fake","IgG",30,34,32,250,2018,1,0.862814828469287,1
"35-39",25,23,2050,"None","foi_model","fake","IgG",35,39,37,250,2013,0.92,0.739694157894786,0.990160409981202
"40-44",25,25,2050,"None","foi_model","fake","IgG",40,44,42,250,2008,1,0.862814828469287,1
"45-50",30,30,2050,"None","foi_model","fake","IgG",45,50,47,250,2003,1,0.884296691777972,1
51 changes: 51 additions & 0 deletions tests/testthat/extdata/sw_dec_foi_sim_data_no_grouped.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
"birth_year","tsur","country","test","antibody","survey","age_mean_f","counts","total","prev_obs","prev_obs_lower","prev_obs_upper","age_min","age_max"
2049,2050,"None","fake","IgG","foi_sim",1,0,5,0,0,0.521823750104981,1,1
2048,2050,"None","fake","IgG","foi_sim",2,0,5,0,0,0.521823750104981,2,2
2047,2050,"None","fake","IgG","foi_sim",3,0,5,0,0,0.521823750104981,3,3
2046,2050,"None","fake","IgG","foi_sim",4,0,5,0,0,0.521823750104981,4,4
2045,2050,"None","fake","IgG","foi_sim",5,0,5,0,0,0.521823750104981,5,5
2044,2050,"None","fake","IgG","foi_sim",6,0,5,0,0,0.521823750104981,6,6
2043,2050,"None","fake","IgG","foi_sim",7,0,5,0,0,0.521823750104981,7,7
2042,2050,"None","fake","IgG","foi_sim",8,0,5,0,0,0.521823750104981,8,8
2041,2050,"None","fake","IgG","foi_sim",9,0,5,0,0,0.521823750104981,9,9
2040,2050,"None","fake","IgG","foi_sim",10,0,5,0,0,0.521823750104981,10,10
2039,2050,"None","fake","IgG","foi_sim",11,0,5,0,0,0.521823750104981,11,11
2038,2050,"None","fake","IgG","foi_sim",12,0,5,0,0,0.521823750104981,12,12
2037,2050,"None","fake","IgG","foi_sim",13,0,5,0,0,0.521823750104981,13,13
2036,2050,"None","fake","IgG","foi_sim",14,0,5,0,0,0.521823750104981,14,14
2035,2050,"None","fake","IgG","foi_sim",15,0,5,0,0,0.521823750104981,15,15
2034,2050,"None","fake","IgG","foi_sim",16,0,5,0,0,0.521823750104981,16,16
2033,2050,"None","fake","IgG","foi_sim",17,1,5,0.2,0.00505076337946806,0.716417936118089,17,17
2032,2050,"None","fake","IgG","foi_sim",18,1,5,0.2,0.00505076337946806,0.716417936118089,18,18
2031,2050,"None","fake","IgG","foi_sim",19,1,5,0.2,0.00505076337946806,0.716417936118089,19,19
2030,2050,"None","fake","IgG","foi_sim",20,2,5,0.4,0.0527449505263169,0.853367200365327,20,20
2029,2050,"None","fake","IgG","foi_sim",21,0,5,0,0,0.521823750104981,21,21
2028,2050,"None","fake","IgG","foi_sim",22,1,5,0.2,0.00505076337946806,0.716417936118089,22,22
2027,2050,"None","fake","IgG","foi_sim",23,1,5,0.2,0.00505076337946806,0.716417936118089,23,23
2026,2050,"None","fake","IgG","foi_sim",24,3,5,0.6,0.146632799634673,0.947255049473683,24,24
2025,2050,"None","fake","IgG","foi_sim",25,2,5,0.4,0.0527449505263169,0.853367200365327,25,25
2024,2050,"None","fake","IgG","foi_sim",26,4,5,0.8,0.283582063881911,0.994949236620532,26,26
2023,2050,"None","fake","IgG","foi_sim",27,5,5,1,0.478176249895019,1,27,27
2022,2050,"None","fake","IgG","foi_sim",28,5,5,1,0.478176249895019,1,28,28
2021,2050,"None","fake","IgG","foi_sim",29,5,5,1,0.478176249895019,1,29,29
2020,2050,"None","fake","IgG","foi_sim",30,5,5,1,0.478176249895019,1,30,30
2019,2050,"None","fake","IgG","foi_sim",31,5,5,1,0.478176249895019,1,31,31
2018,2050,"None","fake","IgG","foi_sim",32,5,5,1,0.478176249895019,1,32,32
2017,2050,"None","fake","IgG","foi_sim",33,5,5,1,0.478176249895019,1,33,33
2016,2050,"None","fake","IgG","foi_sim",34,5,5,1,0.478176249895019,1,34,34
2015,2050,"None","fake","IgG","foi_sim",35,4,5,0.8,0.283582063881911,0.994949236620532,35,35
2014,2050,"None","fake","IgG","foi_sim",36,5,5,1,0.478176249895019,1,36,36
2013,2050,"None","fake","IgG","foi_sim",37,4,5,0.8,0.283582063881911,0.994949236620532,37,37
2012,2050,"None","fake","IgG","foi_sim",38,5,5,1,0.478176249895019,1,38,38
2011,2050,"None","fake","IgG","foi_sim",39,5,5,1,0.478176249895019,1,39,39
2010,2050,"None","fake","IgG","foi_sim",40,5,5,1,0.478176249895019,1,40,40
2009,2050,"None","fake","IgG","foi_sim",41,5,5,1,0.478176249895019,1,41,41
2008,2050,"None","fake","IgG","foi_sim",42,5,5,1,0.478176249895019,1,42,42
2007,2050,"None","fake","IgG","foi_sim",43,5,5,1,0.478176249895019,1,43,43
2006,2050,"None","fake","IgG","foi_sim",44,5,5,1,0.478176249895019,1,44,44
2005,2050,"None","fake","IgG","foi_sim",45,5,5,1,0.478176249895019,1,45,45
2004,2050,"None","fake","IgG","foi_sim",46,5,5,1,0.478176249895019,1,46,46
2003,2050,"None","fake","IgG","foi_sim",47,5,5,1,0.478176249895019,1,47,47
2002,2050,"None","fake","IgG","foi_sim",48,5,5,1,0.478176249895019,1,48,48
2001,2050,"None","fake","IgG","foi_sim",49,5,5,1,0.478176249895019,1,49,49
2000,2050,"None","fake","IgG","foi_sim",50,5,5,1,0.478176249895019,1,50,50
Loading

0 comments on commit 8304d04

Please sign in to comment.