diff --git a/DESCRIPTION b/DESCRIPTION
index 68b013bf..998a1c6e 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -56,6 +56,7 @@ Imports:
     jsonlite,
     loo,
     purrr,
+    pracma,
     qtl,
     reshape2,
     usethis,
diff --git a/NAMESPACE b/NAMESPACE
index afb7a5e9..d66db97f 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -2,10 +2,13 @@
 
 export(extract_seromodel_summary)
 export(fit_seromodel)
+export(generate_sim_data)
 export(get_cohort_ages)
 export(get_exposure_matrix)
 export(get_foi_central_estimates)
 export(get_prev_expanded)
+export(get_sim_n_seropositive)
+export(get_sim_probability)
 export(get_table_rhats)
 export(plot_foi)
 export(plot_info_table)
diff --git a/R/seroprevalence_data.R b/R/seroprevalence_data.R
index 933e8f27..b828957a 100644
--- a/R/seroprevalence_data.R
+++ b/R/seroprevalence_data.R
@@ -22,7 +22,9 @@
 #' \tabular{ll}{
 #' \code{age_mean_f} \tab Floor value of the average between age_min and age_max \cr \tab \cr
 #' \code{sample_size} \tab The size of the sample \cr \tab \cr
-#' \code{birth_year} \tab The year in which the individuals of each age group were bornt \cr \tab \cr
+#' \code{birth_year} \tab Years in which the subjects were borned
+#' according to the age group marker \code{age_mean_f}\cr \tab \cr
+#' according to the age group marker \code{age_mean_f}\cr \tab \cr
 #' \code{prev_obs} \tab Observed prevalence \cr \tab \cr
 #' \code{prev_obs_lower} \tab Lower limit of the confidence interval for the observed prevalence \cr \tab \cr
 #' \code{prev_obs_upper} \tab Upper limit of the confidence interval for the observed prevalence \cr \tab \cr
@@ -37,21 +39,24 @@ prepare_serodata <- function(serodata = serodata,
   #Check that serodata has the right columns
   stopifnot("serodata must contain the right columns" =
             all(c("survey", "total", "counts", "age_min", "age_max", "tsur",
-                  "country","test","antibody"
+                  "country", "test", "antibody"
                   ) %in%
                   colnames(serodata)
+                ) |
+            all(c("survey", "total", "counts", "age_mean_f", "tsur",
+                  "country", "test", "antibody") %in%
+                  colnames(serodata)
                 )
-            )
+  )
   if(!any(colnames(serodata) == "age_mean_f")){
     serodata <- serodata %>%
       dplyr::mutate(age_mean_f = floor((age_min + age_max) / 2), sample_size = sum(total))
-  }  
+  }
 
   if(!any(colnames(serodata) == "birth_year")){
     serodata <- serodata %>%
       dplyr::mutate(birth_year = .data$tsur - .data$age_mean_f)
   }
-  
   serodata <- serodata %>%
     cbind(
       Hmisc::binconf(
@@ -76,8 +81,8 @@ prepare_serodata <- function(serodata = serodata,
 #' Function that prepares a pre-processed serological survey dataset to plot the binomial confidence intervals of the seroprevalence grouped by
 #' age group
 #'
-#' This function prepapares a given pre-processed serological dataset (see \code{\link{prepare_serodata}}) to plot the binomial confidence intervals 
-#' of its corresponding seroprevalence grouped by age group. 
+#' This function prepapares a given pre-processed serological dataset (see \code{\link{prepare_serodata}}) to plot the binomial confidence intervals
+#' of its corresponding seroprevalence grouped by age group.
 #' @inheritParams run_seromodel
 #' @return data set with the binomial confidence intervals
 #' @examples
@@ -115,4 +120,190 @@ prepare_bin_data <- function(serodata) {
     p_obs_bin_u = .data$Upper
   )
   return(xx)
-}
\ No newline at end of file
+}
+
+#' Function that generates the probabilities of being previously exposed to a pathogen.
+#'
+#' @param sim_data A dataframe object containing the following columns:
+#' \tabular{ll}{
+#' \code{tsur} \tab Year of the survey\cr \tab \cr
+#' \code{age_mean_f} \tab Age \cr \tab \cr
+#' \code{birth_year} \tab Years in which the subjects were borned
+#' according to the age group marker \code{age_mean_f}\cr \tab \cr
+#' }
+#' @param foi Numeric atomic vector corresponding to the desired Force-of-Infection ordered from past to present
+#' @param seed The seed for random number generation.
+#' @return A dataframe containing the following columns:
+#' \tabular{ll}{
+#' \code{age} \tab Exposure ages \cr \tab \cr
+#' \code{probability} \tab Probability to obtain a seropositive case for each age according to the provided FoI\cr \tab \cr
+#' }
+#' @export
+get_sim_probability <- function(sim_data, foi) {
+  cohort_ages <- get_cohort_ages(sim_data)
+  exposure_ages <- rev(cohort_ages$age)
+  exposure_matrix <- get_exposure_matrix(sim_data)
+  probabilities <- purrr::map_dbl(exposure_ages, ~1-exp(-pracma::dot(exposure_matrix[., ], foi)))
+
+  sim_probability <- data.frame(
+    age = exposure_ages,
+    probability = probabilities
+  )
+  return(sim_probability)
+}
+
+#' Function that generates a sample of counts of seropositive individuals by sampling from a binomial distribution
+#'
+#' @param sim_data A dataframe object containing the following columns:
+#' \tabular{ll}{
+#' \code{tsur} \tab Year of the survey\cr \tab \cr
+#' \code{age_mean_f} \tab Age \cr \tab \cr
+#' \code{birth_year} \tab Years in which the subjects were borned
+#' according to the age group marker \code{age_mean_f}\cr \tab \cr
+#' }
+#' @param foi Numeric atomic vector corresponding to the desired Force-of-Infection ordered from past to present
+#' @param sample_size_by_age Sample size for each age group: either a single integer indicating that the sample size is 
+#' the same for all ages or a vector of sample sizes the same length as
+#' This corresponds to the number of trials \code{size} in \link[stats]{rbinom}.
+#' @param seed The seed for random number generation.
+#' @return A dataframe containing the following columns:
+#' \tabular{ll}{
+#' \code{age} \tab Age by the time of the survey \cr \tab \cr
+#' \code{n_seropositive} \tab Number of positive cases sampled according to the provided FoI \cr \tab \cr
+#' }
+#' simulated list of counts following a binomial distribution in accordance with a given
+#' force of infection and age class sizes.
+#' @examples
+#'\dontrun{
+#' foi <- rep(0.02, 50)
+#' sim_data <- generate_sim_data(foi = foi,
+#'                               sample_size_by_age = 5,
+#'                               tsur = 2050,
+#'                               birth_year_min = 2000,
+#'                               survey_label = 'foi_sim')
+#' sim_n_seropositive <- get_sim_n_seropositive(sim_data = sim_data,
+#'                                              foi = foi)
+#' }
+#' @export
+get_sim_n_seropositive <- function(sim_data, foi, sample_size_by_age, seed = 1234) {
+  sim_probability <- get_sim_probability(sim_data = sim_data, foi = foi)
+
+  set.seed(seed = seed)
+  n_seropositive <- purrr::map_int(sim_probability$probability, ~rbinom(1, sample_size_by_age, .))
+
+  sim_n_seropositive <- data.frame(
+    age = sim_probability$age,
+    n_seropositive = n_seropositive
+  )
+  return(sim_n_seropositive)
+}
+
+#' Function that generates a simulated serosurvey according to the specified FoI
+#'
+#' @param foi Numeric atomic vector corresponding to the desired Force-of-Infection ordered from past to present
+#' @param sample_size_by_age Size of each age group specified by either an atomic
+#' vector of the same size as \code{foi} or an integer.
+#' This corresponds to the number of trials \code{size} in \link[stats]{rbinom}.
+#' @param tsur Year in which the serosurvey was conducted.
+#' @param birth_year_min Minimum age of year in the simulated serosurvey.
+#' @param survey_label Label for the resulting simulated serosurvey.
+#' @return Dataframe object containing the simulated data generated from \code{foi}
+#' @examples
+#'\dontrun{
+#' sample_size_by_age = 5
+#' foi <- rep(0.02, 50)
+#' sim_data <- generate_sim_data(foi = foi,
+#'                               sample_size_by_age = sample_size_by_age,
+#'                               tsur = 2050,
+#'                               birth_year_min = 2000,
+#'                               survey_label = 'sim_constant_foi')
+#' }
+#' @export
+generate_sim_data <- function(foi,
+                              sample_size_by_age,
+                              tsur,
+                              birth_year_min,
+                              survey_label,
+                              test = "fake",
+                              antibody = "IgG",
+                              seed = 1234
+                              ){
+    sim_data <- data.frame(birth_year = c(birth_year_min:(tsur - 1))) %>%
+        mutate(tsur = tsur,
+            country = 'None',
+            test = test,
+            antibody = antibody,
+            survey = survey_label,
+            age_mean_f = tsur - birth_year)
+    sim_n_seropositive <- get_sim_n_seropositive(sim_data, foi, sample_size_by_age, seed = seed)
+    sim_data <- sim_data %>%
+        mutate(counts = sim_n_seropositive$n_seropositive,
+               total = sample_size_by_age) %>%
+        prepare_serodata()
+
+    return(sim_data)
+}
+
+#' Method for constructing age-group variable from age column
+#'
+#' This function was taken from \link[vaccineff]{get_age_group}.
+#' This method splits an age interval from age_min to age_max into
+#' (age_max-age_min)/step intervals.
+#' By default age_min is set 0, however it can be assigned by
+#' convenience.
+#' If the method finds ages greater or equal than age_max
+#' it assigns the string ">{age_max}".
+#' To avoid errors it is necessary to set step < age_max.
+#' It is also suggested to choose the step such
+#' that age_max%%(step+1) = 0.
+#' @param age vector containing age information
+#' @param  step step used to split the age interval
+#' @return age_group factor variable grouping \code{age} by the age intervals 
+#' specified by \code{min(age)}, \code{max(age)}.
+get_age_group <- function(age, step) {
+  age_min <- min(age)
+  age_max <- max(age)
+  n_steps <- as.integer((age_max - age_min) / step) + 1
+  limits_low <- c(as.integer(seq(age_min,
+                                 age_max,
+                                 length.out = n_steps)))
+  limits_hgh <- limits_low + step
+  lim_labels <- paste(as.character(limits_low), as.character(limits_hgh),
+                      sep = "-")
+  lim_labels[length(lim_labels)] <- paste0("+",
+                                           limits_low[length(limits_low)])
+  lim_breaks <- c(-Inf, limits_low[2:length(limits_low)] - 1, Inf)
+
+  age_group <- cut(age,
+                   breaks = lim_breaks,
+                   labels = lim_labels,
+                   # this is for the intervals to be closed to the left and open to the right
+                   right = FALSE)
+  return(age_group)
+}
+
+#' Function that groups a simulated serological dataset by age
+#'
+#' @param sim_data Dataframe with the same structure as the output of \code{\link{generate_sim_data}}.
+#' @param col_age name of the column containing the age information
+#' @param step step used to split the age interval
+#' @return Dataframe object containing grouped simulated data generated from \code{foi}
+group_sim_data <- function(sim_data,
+                          col_age = "age_mean_f",
+                          step = 5) {
+    age <- sim_data[[col_age]]
+    sim_data$age_group <-  get_age_group(age = age, step = step)
+    sim_data_grouped <- sim_data %>%
+      group_by(age_group) %>%
+    dplyr::summarise(total = sum(total), counts = sum(counts)) %>%
+      mutate(tsur = sim_data$tsur[1],
+              country = "None",
+              survey = sim_data$survey[1],
+              test = sim_data$test[1],
+              antibody = sim_data$antibody[1]) %>%
+      mutate(age_min = as.integer(sub("\\-.*", "", age_group)),
+              age_max = as.integer(sub(".*\\-", "", age_group))) %>%
+      prepare_serodata()
+
+    return(sim_data_grouped)
+}
diff --git a/R/visualisation.R b/R/visualisation.R
index 6c26051f..755b89fb 100644
--- a/R/visualisation.R
+++ b/R/visualisation.R
@@ -27,7 +27,7 @@ plot_seroprev <- function(serodata,
 #' Function that generates a seropositivity plot corresponding to the specified fitted serological model
 #'
 #' This function generates a seropositivity plot of the specified serological model object. This includes the original data grouped by age
-#' as well as the obtained fitting from the model implementation. Age is located on the x axis and seropositivity on the y axis with its 
+#' as well as the obtained fitting from the model implementation. Age is located on the x axis and seropositivity on the y axis with its
 #' corresponding confidence interval.
 #' @inheritParams get_foi_central_estimates
 #' @inheritParams run_seromodel
@@ -108,7 +108,7 @@ plot_seroprev_fitted <- function(seromodel_object,
 #' Function that generates a Force-of-Infection plot corresponding to the specified fitted serological model
 #'
 #' This function generates a Force-of-Infection plot from the results obtained by fitting a serological model.
-#' This includes the corresponding binomial confidence interval. 
+#' This includes the corresponding binomial confidence interval.
 #' The x axis corresponds to the decades covered by the survey the y axis to the Force-of-Infection.
 #' @inheritParams get_foi_central_estimates
 #' @param size_text Text size use in the theme of the graph returned by the function.
@@ -138,7 +138,6 @@ plot_foi <- function(seromodel_object,
       foi <- rstan::extract(seromodel_object,
                             "foi",
                             inc_warmup = FALSE)[[1]]
-
       #-------- This bit is to get the actual length of the foi data
       foi_data <- get_foi_central_estimates(seromodel_object = seromodel_object,
                                             cohort_ages = cohort_ages)
@@ -168,16 +167,23 @@ plot_foi <- function(seromodel_object,
         ggplot2::xlab("Year")
       #TODO Add warning for foi_sim of different length than exposure years
       if (!is.null(foi_sim)){
-        foi_data_length <- nrow(foi_data)
-        foi_sim_length <- length(foi_sim)
-        remove_x_values <- foi_sim_length - foi_data_length
-
-        foi_sim_data <- data.frame(year = foi_data$year, 
-                                  foi_sim = foi_sim[-c(1:remove_x_values)])    
-        foi_plot <- foi_plot + 
-          ggplot2::geom_line(data = foi_sim_data, ggplot2::aes(x = year, y = foi_sim),
-                            colour = "#b30909",
-                            size = size_text/8)
+        if (nrow(foi_data) != length(foi_sim)) {
+          remove_x_values <- length(foi_sim) - nrow(foi_data)
+          foi_sim_data <- data.frame(year = foi_data$year,
+                                    foi_sim = foi_sim[-c(1:remove_x_values)])
+          foi_plot <- foi_plot +
+            ggplot2::geom_line(data = foi_sim_data, ggplot2::aes(x = year, y = foi_sim),
+                              colour = "#b30909",
+                              size = size_text / 8)
+        }
+        else{
+          foi_sim_data <- data.frame(year = foi_data$year,
+                                    foi_sim = foi_sim)
+          foi_plot <- foi_plot +
+            ggplot2::geom_line(data = foi_sim_data, ggplot2::aes(x = year, y = foi_sim),
+                              colour = "#b30909",
+                              size = size_text / 8)
+        }
       }
     }
   } else {
@@ -208,8 +214,8 @@ plot_foi <- function(seromodel_object,
 
 #' Function that generates a plot of the R-hat estimates of the specified fitted serological model
 #'
-#' This function generates a plot of the R-hat estimates obtained for a specified fitted serological model \code{seromodel_object}. 
-#' The x axis corresponds to the decades covered by the survey and the y axis to the value of the rhats. 
+#' This function generates a plot of the R-hat estimates obtained for a specified fitted serological model \code{seromodel_object}.
+#' The x axis corresponds to the decades covered by the survey and the y axis to the value of the rhats.
 #' All rhats must be smaller than 1 to ensure convergence (for further details check \link[bayesplot]{rhat}).
 #' @inheritParams get_foi_central_estimates
 #' @param size_text Text size use in the theme of the graph returned by the function.
@@ -322,7 +328,7 @@ plot_seromodel <- function(seromodel_object,
       model_summary <- extract_seromodel_summary(seromodel_object = seromodel_object,
                                                  serodata = serodata)
       summary_table <- t(
-        dplyr::select(model_summary, 
+        dplyr::select(model_summary,
         c('foi_model', 'dataset', 'elpd', 'se', 'converged')))
       summary_plot <-
         plot_info_table(summary_table, size_text = size_text)
@@ -333,7 +339,7 @@ plot_seromodel <- function(seromodel_object,
         foi_plot,
         rhats_plot,
         ncol = 1,
-        nrow = 4, 
+        nrow = 4,
         rel_heights = c(0.5, 1, 1, 1)
       )
     }
@@ -358,7 +364,7 @@ plot_seromodel <- function(seromodel_object,
       ggplot2::ylab(" ") +
       ggplot2::xlab(" ")
     g1 <- g0
-    # TODO: This 
+    # TODO: This
     g0 <- g0 + ggplot2::labs(subtitle = seromodel_object$model_name) +
       ggplot2::theme(plot.title = ggplot2::element_text(size = 10))
 
@@ -369,7 +375,7 @@ plot_seromodel <- function(seromodel_object,
   return(plot_arrange)
 }
 
-# TODO Improve documentation of @return. 
+# TODO Improve documentation of @return.
 # TODO Give more details about the generated plot
 #' Function that generates a plot for a given table
 #'
@@ -399,4 +405,4 @@ plot_info_table <- function(info, size_text) {
                        fontface = "bold")
 
   return(p)
-}
\ No newline at end of file
+}
diff --git a/man/generate_sim_data.Rd b/man/generate_sim_data.Rd
new file mode 100644
index 00000000..0f68f61d
--- /dev/null
+++ b/man/generate_sim_data.Rd
@@ -0,0 +1,47 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/seroprevalence_data.R
+\name{generate_sim_data}
+\alias{generate_sim_data}
+\title{Function that generates a simulated serosurvey according to the specified FoI}
+\usage{
+generate_sim_data(
+  foi,
+  sample_size_by_age,
+  tsur,
+  birth_year_min,
+  survey_label,
+  test = "fake",
+  antibody = "IgG",
+  seed = 1234
+)
+}
+\arguments{
+\item{foi}{Numeric atomic vector corresponding to the desired Force-of-Infection ordered from past to present}
+
+\item{sample_size_by_age}{Size of each age group specified by either an atomic
+vector of the same size as \code{foi} or an integer.
+This corresponds to the number of trials \code{size} in \link[stats]{rbinom}.}
+
+\item{tsur}{Year in which the serosurvey was conducted.}
+
+\item{birth_year_min}{Minimum age of year in the simulated serosurvey.}
+
+\item{survey_label}{Label for the resulting simulated serosurvey.}
+}
+\value{
+Dataframe object containing the simulated data generated from \code{foi}
+}
+\description{
+Function that generates a simulated serosurvey according to the specified FoI
+}
+\examples{
+\dontrun{
+sample_size_by_age = 5
+foi <- rep(0.02, 50)
+sim_data <- generate_sim_data(foi = foi,
+                              sample_size_by_age = sample_size_by_age,
+                              tsur = 2050,
+                              birth_year_min = 2000,
+                              survey_label = 'sim_constant_foi')
+}
+}
diff --git a/man/get_age_group.Rd b/man/get_age_group.Rd
new file mode 100644
index 00000000..9df6f8db
--- /dev/null
+++ b/man/get_age_group.Rd
@@ -0,0 +1,29 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/seroprevalence_data.R
+\name{get_age_group}
+\alias{get_age_group}
+\title{Method for constructing age-group variable from age column}
+\usage{
+get_age_group(age, step)
+}
+\arguments{
+\item{age}{vector containing age information}
+
+\item{step}{step used to split the age interval}
+}
+\value{
+age_group factor variable grouping \code{age} by the age intervals 
+specified by \code{min(age)}, \code{max(age)}.
+}
+\description{
+This function was taken from \link[vaccineff]{get_age_group}.
+This method splits an age interval from age_min to age_max into
+(age_max-age_min)/step intervals.
+By default age_min is set 0, however it can be assigned by
+convenience.
+If the method finds ages greater or equal than age_max
+it assigns the string ">{age_max}".
+To avoid errors it is necessary to set step < age_max.
+It is also suggested to choose the step such
+that age_max%%(step+1) = 0.
+}
diff --git a/man/get_sim_n_seropositive.Rd b/man/get_sim_n_seropositive.Rd
new file mode 100644
index 00000000..10b1a5ea
--- /dev/null
+++ b/man/get_sim_n_seropositive.Rd
@@ -0,0 +1,49 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/seroprevalence_data.R
+\name{get_sim_n_seropositive}
+\alias{get_sim_n_seropositive}
+\title{Function that generates a sample of counts of seropositive individuals by sampling from a binomial distribution}
+\usage{
+get_sim_n_seropositive(sim_data, foi, sample_size_by_age, seed = 1234)
+}
+\arguments{
+\item{sim_data}{A dataframe object containing the following columns:
+\tabular{ll}{
+\code{tsur} \tab Year of the survey\cr \tab \cr
+\code{age_mean_f} \tab Age \cr \tab \cr
+\code{birth_year} \tab Years in which the subjects were borned
+according to the age group marker \code{age_mean_f}\cr \tab \cr
+}}
+
+\item{foi}{Numeric atomic vector corresponding to the desired Force-of-Infection ordered from past to present}
+
+\item{sample_size_by_age}{Sample size for each age group: either a single integer indicating that the sample size is 
+the same for all ages or a vector of sample sizes the same length as
+This corresponds to the number of trials \code{size} in \link[stats]{rbinom}.}
+
+\item{seed}{The seed for random number generation.}
+}
+\value{
+A dataframe containing the following columns:
+\tabular{ll}{
+\code{age} \tab Age by the time of the survey \cr \tab \cr
+\code{n_seropositive} \tab Number of positive cases sampled according to the provided FoI \cr \tab \cr
+}
+simulated list of counts following a binomial distribution in accordance with a given
+force of infection and age class sizes.
+}
+\description{
+Function that generates a sample of counts of seropositive individuals by sampling from a binomial distribution
+}
+\examples{
+\dontrun{
+foi <- rep(0.02, 50)
+sim_data <- generate_sim_data(foi = foi,
+                              sample_size_by_age = 5,
+                              tsur = 2050,
+                              birth_year_min = 2000,
+                              survey_label = 'foi_sim')
+sim_n_seropositive <- get_sim_n_seropositive(sim_data = sim_data,
+                                             foi = foi)
+}
+}
diff --git a/man/get_sim_probability.Rd b/man/get_sim_probability.Rd
new file mode 100644
index 00000000..624fe1ed
--- /dev/null
+++ b/man/get_sim_probability.Rd
@@ -0,0 +1,31 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/seroprevalence_data.R
+\name{get_sim_probability}
+\alias{get_sim_probability}
+\title{Function that generates the probabilities of being previously exposed to a pathogen.}
+\usage{
+get_sim_probability(sim_data, foi)
+}
+\arguments{
+\item{sim_data}{A dataframe object containing the following columns:
+\tabular{ll}{
+\code{tsur} \tab Year of the survey\cr \tab \cr
+\code{age_mean_f} \tab Age \cr \tab \cr
+\code{birth_year} \tab Years in which the subjects were borned
+according to the age group marker \code{age_mean_f}\cr \tab \cr
+}}
+
+\item{foi}{Numeric atomic vector corresponding to the desired Force-of-Infection ordered from past to present}
+
+\item{seed}{The seed for random number generation.}
+}
+\value{
+A dataframe containing the following columns:
+\tabular{ll}{
+\code{age} \tab Exposure ages \cr \tab \cr
+\code{probability} \tab Probability to obtain a seropositive case for each age according to the provided FoI\cr \tab \cr
+}
+}
+\description{
+Function that generates the probabilities of being previously exposed to a pathogen.
+}
diff --git a/man/group_sim_data.Rd b/man/group_sim_data.Rd
new file mode 100644
index 00000000..cbe1f391
--- /dev/null
+++ b/man/group_sim_data.Rd
@@ -0,0 +1,21 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/seroprevalence_data.R
+\name{group_sim_data}
+\alias{group_sim_data}
+\title{Function that groups a simulated serological dataset by age}
+\usage{
+group_sim_data(sim_data, col_age = "age_mean_f", step = 5)
+}
+\arguments{
+\item{sim_data}{Dataframe with the same structure as the output of \code{\link{generate_sim_data}}.}
+
+\item{col_age}{name of the column containing the age information}
+
+\item{step}{step used to split the age interval}
+}
+\value{
+Dataframe object containing grouped simulated data generated from \code{foi}
+}
+\description{
+Function that groups a simulated serological dataset by age
+}
diff --git a/man/plot_foi.Rd b/man/plot_foi.Rd
index 8dd29d7d..158837ce 100644
--- a/man/plot_foi.Rd
+++ b/man/plot_foi.Rd
@@ -28,7 +28,7 @@ A ggplot2 object containing the Force-of-infection-vs-time including the corresp
 }
 \description{
 This function generates a Force-of-Infection plot from the results obtained by fitting a serological model.
-This includes the corresponding binomial confidence interval. 
+This includes the corresponding binomial confidence interval.
 The x axis corresponds to the decades covered by the survey the y axis to the Force-of-Infection.
 }
 \examples{
diff --git a/man/plot_rhats.Rd b/man/plot_rhats.Rd
index eb206112..00de4a0d 100644
--- a/man/plot_rhats.Rd
+++ b/man/plot_rhats.Rd
@@ -17,8 +17,8 @@ plot_rhats(seromodel_object, cohort_ages, size_text = 25)
 The rhats-convergence plot of the selected model.
 }
 \description{
-This function generates a plot of the R-hat estimates obtained for a specified fitted serological model \code{seromodel_object}. 
-The x axis corresponds to the decades covered by the survey and the y axis to the value of the rhats. 
+This function generates a plot of the R-hat estimates obtained for a specified fitted serological model \code{seromodel_object}.
+The x axis corresponds to the decades covered by the survey and the y axis to the value of the rhats.
 All rhats must be smaller than 1 to ensure convergence (for further details check \link[bayesplot]{rhat}).
 }
 \examples{
diff --git a/man/plot_seroprev_fitted.Rd b/man/plot_seroprev_fitted.Rd
index 837765a5..48a27081 100644
--- a/man/plot_seroprev_fitted.Rd
+++ b/man/plot_seroprev_fitted.Rd
@@ -37,7 +37,7 @@ A ggplot object containing the seropositivity-vs-age graph including the data, t
 }
 \description{
 This function generates a seropositivity plot of the specified serological model object. This includes the original data grouped by age
-as well as the obtained fitting from the model implementation. Age is located on the x axis and seropositivity on the y axis with its 
+as well as the obtained fitting from the model implementation. Age is located on the x axis and seropositivity on the y axis with its
 corresponding confidence interval.
 }
 \examples{
diff --git a/man/prepare_bin_data.Rd b/man/prepare_bin_data.Rd
index e7c8c1bd..52804508 100644
--- a/man/prepare_bin_data.Rd
+++ b/man/prepare_bin_data.Rd
@@ -33,7 +33,7 @@ The last six colums can be added to \code{serodata} by means of the function \co
 data set with the binomial confidence intervals
 }
 \description{
-This function prepapares a given pre-processed serological dataset (see \code{\link{prepare_serodata}}) to plot the binomial confidence intervals 
+This function prepapares a given pre-processed serological dataset (see \code{\link{prepare_serodata}}) to plot the binomial confidence intervals
 of its corresponding seroprevalence grouped by age group.
 }
 \examples{
diff --git a/man/prepare_serodata.Rd b/man/prepare_serodata.Rd
index 8e3c316a..837e080b 100644
--- a/man/prepare_serodata.Rd
+++ b/man/prepare_serodata.Rd
@@ -28,7 +28,9 @@ serodata with additional columns necessary for the analysis. These columns are:
 \tabular{ll}{
 \code{age_mean_f} \tab Floor value of the average between age_min and age_max \cr \tab \cr
 \code{sample_size} \tab The size of the sample \cr \tab \cr
-\code{birth_year} \tab The year in which the individuals of each age group were bornt \cr \tab \cr
+\code{birth_year} \tab Years in which the subjects were borned
+according to the age group marker \code{age_mean_f}\cr \tab \cr
+according to the age group marker \code{age_mean_f}\cr \tab \cr
 \code{prev_obs} \tab Observed prevalence \cr \tab \cr
 \code{prev_obs_lower} \tab Lower limit of the confidence interval for the observed prevalence \cr \tab \cr
 \code{prev_obs_upper} \tab Upper limit of the confidence interval for the observed prevalence \cr \tab \cr
diff --git a/tests/testthat/test_get_sim.R b/tests/testthat/test_get_sim.R
new file mode 100644
index 00000000..222c44d6
--- /dev/null
+++ b/tests/testthat/test_get_sim.R
@@ -0,0 +1,44 @@
+test_that("test get_sim functionalities", {
+
+  library(serofoi)
+  library(dplyr)
+
+  tsur <- 2050
+  birth_year_min <- 2000
+  survey_label <- "foi_sim"
+  country = 'None'
+  test = "test"
+  antibody = "IgG"
+
+  sim_data <- data.frame(birth_year = c(birth_year_min:(tsur - 1))) %>%
+    mutate(tsur = tsur,
+           country = country,
+           test = test,
+           antibody = antibody,
+           survey = survey_label,
+           age_mean_f = tsur - birth_year)
+
+  #----- Test function generate_sim_probability
+  n_years <- 50
+  foi_sim <- rep(0.02, n_years)
+  sim_probability <- get_sim_probability(sim_data = sim_data,
+                                         foi = foi_sim)
+
+  exposure_matrix <- matrix(1, n_years, n_years)
+  exposure_matrix[lower.tri(exposure_matrix)] <- 0
+  probabilities <- purrr::map_dbl(1:n_years, ~1-exp(-pracma::dot(exposure_matrix[., ], foi_sim)))
+
+  expect_s3_class(sim_probability, "data.frame")
+  expect_type(sim_probability$age, "integer")
+  expect_type(sim_probability$probability, "double")
+  expect_true(all(probabilities == sim_probability$probability))
+
+  #----- Test function generate_sim_n_seropositivity
+  sample_size_by_age <- 5
+  sim_n_seropositive <- get_sim_n_seropositive(sim_data = sim_data,
+                                               foi = foi_sim,
+                                               sample_size_by_age = sample_size_by_age)
+  expect_s3_class(sim_n_seropositive, "data.frame")
+  expect_type(sim_n_seropositive$age, "integer")
+  expect_type(sim_n_seropositive$n_seropositive, "integer")
+})
diff --git a/tests/testthat/test_sim_data.R b/tests/testthat/test_sim_data.R
new file mode 100644
index 00000000..3ae6440d
--- /dev/null
+++ b/tests/testthat/test_sim_data.R
@@ -0,0 +1,51 @@
+test_that("simulated data", {
+    library(dplyr)
+    library(serofoi)
+
+    seed <- 1234
+    sample_size_by_age <- 10^7
+    tsur <- 2050
+    birth_year_min <- 2000
+
+    #----- Test for constant FoI
+
+    foi_values <- c(0.001, 0.01, 0.1, 0.3, 0.4)
+    for(foi_value in foi_values) {
+      foi_sim <- rep(foi_value, tsur - birth_year_min)
+      #----- Test function generate_sim_data
+      sim_data <- generate_sim_data(foi = foi_sim,
+                                    sample_size_by_age = sample_size_by_age,
+                                    tsur = tsur,
+                                    birth_year_min = birth_year_min,
+                                    survey_label = 'foi_sim_constant',
+                                    seed = seed)
+      prev_exact <- 1 - exp(- foi_value * sim_data$age_mean_f)
+
+      expect_s3_class(sim_data, "data.frame")
+      expect_length(sim_data$birth_year, tsur - birth_year_min)
+      expect_equal(sim_data$prev_obs, prev_exact, tolerance = TRUE)
+
+      #----- Test function group_sim_data
+      sim_data <- sim_data %>% mutate(age_min = age_mean_f, age_max = age_mean_f)
+      sim_data_grouped <- group_sim_data(sim_data = sim_data)
+      expect_s3_class(sim_data_grouped, "data.frame")
+      expect_s3_class(sim_data_grouped$age_group, "factor")
+    }
+
+    #----- Test for time-varying FoI
+    no_transm <- 0.0000000001
+    foi_sim <- c(rep(0.2, 25), rep(0.1, 10), rep(no_transm, 15))
+
+    sim_data <- generate_sim_data(foi = foi_sim,
+                                  sample_size_by_age = sample_size_by_age,
+                                  tsur = tsur,
+                                  birth_year_min = birth_year_min,
+                                  survey_label = 'sw_dec_foi',
+                                  seed = seed)
+
+    prev_exact <- 1 - exp(-cumsum(rev(foi_sim)))
+
+    expect_s3_class(sim_data, "data.frame")
+    expect_length(sim_data$birth_year, tsur - birth_year_min)
+    expect_equal(sim_data$prev_obs, prev_exact, tolerance = TRUE)
+})