Skip to content

Commit

Permalink
Merge pull request #11 from robboyd/master
Browse files Browse the repository at this point in the history
Pull in Rob B's updates to reporting rate to allow coarser time period
  • Loading branch information
AugustT authored Oct 11, 2022
2 parents 84594ee + 8898a78 commit 02eed2a
Show file tree
Hide file tree
Showing 10 changed files with 166 additions and 54 deletions.
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
Package: sparta
Type: Package
Title: Trend Analysis for Unstructured Data
Version: 0.2.14
Date: 2020-02-15
Version: 0.2.18
Date: 2020-03-27
Authors@R: c(person("Tom", "August", role = c("aut", "cre"), email = "[email protected]"),
person("Gary", "Powney", role = c("aut")),
person("Charlie", "Outhwaite", role = c("aut")),
Expand Down
8 changes: 6 additions & 2 deletions R/dataGaps.r
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@ dataGaps<-function(years, minmod, maxmod, mindat, maxdat){
actualyears <- (years-1) + minmod
middleyears <- subset(actualyears,actualyears %in% seq(mindat,maxdat,1))
middleyears <- sort(unique(middleyears))
gap_middle <- max(diff(middleyears)-1)
if(length(middleyears)>1){
gap_middle <- max(diff(middleyears)-1)
} else {
gap_middle <- NA
}
return(list(gap_start=gap_start,gap_end=gap_end,gap_middle=gap_middle))
}
}
67 changes: 54 additions & 13 deletions R/occDetFunc.r
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@
#' Other options are `EqualWt` or `HighSpec`, which define the application of "rules of thumb" defined in Pocock et al 2019.
#' Defaults to 1, in which case the model is applied for so long there is a single record of the focal species.
#' @param provenance An optional text string allowing the user to identify the dataset.
#' @param rem_aggs_with_missing_regions An option which if TRUE will remove all aggregates which contain at least one region with no data.
#' If FALSE, only aggregates where ALL regions in that aggregate contain no data, are dropped. Defaults to TRUE
#'
#' @details \code{modeltype} is used to choose the model as well as the associated initial values,
#' and parameters to monitor. Elements to choose from can be separated into the following components:
Expand Down Expand Up @@ -179,7 +181,8 @@ occDetFunc <- function (taxa_name, occDetdata, spp_vis, n_iterations = 5000, nyr
seed = NULL, model.function = NULL, regional_codes = NULL,
region_aggs = NULL, additional.parameters = NULL,
additional.BUGS.elements = NULL, additional.init.values = NULL,
return_data = FALSE, criterion = 1, provenance = NULL, saveMatrix = FALSE){
return_data = FALSE, criterion = 1, provenance = NULL, saveMatrix = FALSE,
rem_aggs_with_missing_regions = TRUE){

################## BASIC CHECKS
# first run the error checks
Expand All @@ -192,8 +195,8 @@ occDetFunc <- function (taxa_name, occDetdata, spp_vis, n_iterations = 5000, nyr
# Check the taxa_name is one of my species
if(!taxa_name %in% colnames(spp_vis)) stop('taxa_name is not the name of a taxa in spp_vis')
##################
min_year <- min(occDetdata$TP)

min_year_original <- min_year <- min(occDetdata$TP)
# only include sites which have more than nyr of records
yps <- rowSums(acast(occDetdata, site ~ TP, length, value.var = 'L') > 0)
sites_to_include <- names(yps[yps >= nyr])
Expand Down Expand Up @@ -357,6 +360,13 @@ occDetFunc <- function (taxa_name, occDetdata, spp_vis, n_iterations = 5000, nyr
# need to get a measure of whether the species was on that site in that year, unequivocally, in zst
zst <- acast(occDetdata, id ~ factor(TP), value.var = 'focal', max, fill = 0) # initial values for the latent state = observed state

# Calculate min year. We're doing it now as it's fine if we've dropped the first year(s) of data and nothing in the middle
min_year <- min(occDetdata$TP)
if(min_year != min_year_original){
warning(paste0('\nThe first year of the data has changed, as the first years of data were dropped.\n',
'The original first year was ',min_year_original,'. It is now ',min_year,'\n'))
}

# if the max_year is not null, edit the zst table to add the additional years required
if(!is.null(max_year)){

Expand Down Expand Up @@ -493,16 +503,47 @@ occDetFunc <- function (taxa_name, occDetdata, spp_vis, n_iterations = 5000, nyr
region_names <- setdiff(region_names, zero_sites)

# remove region aggregates
rem_aggs <- unlist(lapply(region_aggs, FUN = function(x) any(zero_sites %in% x)))
rem_aggs_names <- names(region_aggs)[rem_aggs]

# remove aggs if you need to
if(length(rem_aggs_names) > 0){
warning(paste('The following region aggregates have to be removed as they contain a region with no data:',
paste(rem_aggs_names, collapse = ', '),
'- These region aggregates will not be included in the model'))
region_aggs <- region_aggs[!names(region_aggs) %in% rem_aggs_names]
parameters <- parameters[!parameters %in% paste0('psi.fs.r_', rem_aggs_names)]
if(rem_aggs_with_missing_regions){
rem_aggs <- unlist(lapply(region_aggs, FUN = function(x) any(zero_sites %in% x)))
rem_aggs_names <- names(region_aggs)[rem_aggs]

# remove aggs if you need to
if(length(rem_aggs_names) > 0){
warning(paste('The following region aggregates have to be removed as they contain a region with no data:',
paste(rem_aggs_names, collapse = ', '),
'- These region aggregates will not be included in the model\n',
'If you want to keep aggregates with one or more missing regions,',
'set rem_aggs_with_missing_regions=FALSE'))
region_aggs <- region_aggs[!names(region_aggs) %in% rem_aggs_names]
parameters <- parameters[!parameters %in% paste0('psi.fs.r_', rem_aggs_names)]
}
} else {
rem_aggs <- unlist(lapply(region_aggs, FUN = function(x) all(x %in% zero_sites)))
rem_aggs_names <- names(region_aggs)[rem_aggs]
edit_aggs <- unlist(lapply(region_aggs, FUN = function(x) any(zero_sites %in% x)))
edit_aggs_names <- names(region_aggs)[edit_aggs & !(rem_aggs)]

# edit aggs if you need to
if(length(edit_aggs_names) > 0){
warning(paste('The following region aggregates have to be edited as they contain regions with no data:',
paste(edit_aggs_names, collapse = ', '),
'\n- These region aggregates will still be included in the model\n'))
# Recreate aggs, removing regions without data
region_aggs_new <- lapply(region_aggs, FUN = function(agg){
agg[!(agg %in% zero_sites)]
})
names(region_aggs_new) <- names(region_aggs)
region_aggs <- region_aggs_new
}

# remove aggs completely if you need to
if(length(rem_aggs_names) > 0){
warning(paste('The following region aggregates have to be removed as all regions within them have no data:',
paste(rem_aggs_names, collapse = ', '),
'- These region aggregates will not be included in the model'))
region_aggs <- region_aggs[!names(region_aggs) %in% rem_aggs_names]
parameters <- parameters[!parameters %in% paste0('psi.fs.r_', rem_aggs_names)]
}
}
}

Expand Down
23 changes: 19 additions & 4 deletions R/occDetModel.r
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,13 @@
#' @param site A character vector of site names, as long as the number of observations.
#' @param survey A vector as long as the number of observations.
#' This must be a Date if includeJDay = \code{TRUE}
#' @param replicate An optional vector to identify replicate samples (visits) per survey. Need not be globally unique (e.g can be 1, 2, .. n within surveys)
#' @param closure_period An optional vector of integers specifying the closure period.
#' If \code{FALSE} then closure_period will be extracted as the year from the survey.
#' @param criterion Determines whether the model should be run. If an integer then this defines the threshold number of records (50 in Outhwaite et al 2019).
#' Other options are `EqualWt` or `HighSpec`, which define the application of "rules of thumb" defined in Pocock et al 2019.
#' Defaults to 1, in which case the model is applied for so long there is a single record of the focal species.
#' @param provenance An optional text string allowing the user to identify the dataset.
#' @param species_list A character vector of taxa names for which models should be run. This is
#' optional and by default models will be run for all taxa
#' @param write_results logical, should results be saved to \code{output_dir}. This is
Expand Down Expand Up @@ -168,11 +175,15 @@
#' region_aggs = list(agg1 = c('region1', 'region2')))
#' }
#' @export
#' @references Roy, H.E., Adriaens, T., Isaac, N.J.B. et al. (2012) Invasive alien predator
#' causes rapid declines of native European ladybirds. Diversity & Distributions,
#' 18, 717-725.
#' @references Isaac, N.J.B., van Strien, A.J., August, T.A., de Zeeuw, M.P. and Roy, D.B. (2014).
#' Statistics for citizen science: extracting signals of change from noisy ecological data.
#' \emph{Methods in Ecology and Evolution}, 5: 1052-1060.
#' @references Outhwaite, C.L., Chandler, R.E., Powney, G.D., Collen, B., Gregory, R.D. & Isaac, N.J.B. (2018).
#' Prior specification in Bayesian occupancy modelling improves analysis of species occurrence data.
#' \emph{Ecological Indicators}, 93: 333-343.

occDetModel <- function(taxa, site, survey,
occDetModel <- function(taxa, site, survey,
replicate = NULL, closure_period = NULL, criterion = 1, provenance = NULL,
species_list = unique(taxa), write_results = TRUE,
output_dir = getwd(), nyr = 2, n_iterations = 5000,
burnin = 1500, thinning = 3, n_chains = 3,
Expand Down Expand Up @@ -202,6 +213,8 @@ occDetModel <- function(taxa, site, survey,
visitData <- formatOccData(taxa = taxa,
site = site,
survey = survey,
replicate = replicate,
closure_period = replicate,
includeJDay = includeJDay)

### loop through the species list running the Bayesian occupancy model function ###
Expand All @@ -228,6 +241,8 @@ occDetModel <- function(taxa, site, survey,
additional.parameters = additional.parameters,
additional.BUGS.elements = additional.BUGS.elements,
additional.init.values = additional.init.values,
provenance = provenance,
criterion = criterion,
return_data = return_data)
}

Expand Down
33 changes: 23 additions & 10 deletions R/plot_DetectionOverTime.R
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,18 @@
#' @export


plot_DetectionOverTime <- function(model, spname = NULL, min.yr = NULL){
plot_DetectionOverTime <- function(model, spname = NULL, min.yr = NULL, CI=95){


# convert the CI into quantiles
# first check that CI is sensible
if((CI > 100) | (CI <= 0)) stop("Credible intervals must be between 0 and 100")
CI2q <- function(CI) {
q <- (1 - CI/100)/2
return(c(q, 1-q))
}
quant <- CI2q(CI)

sims_list <- model$BUGSoutput$sims.list

# the base: alpha.p is common to all models:
Expand All @@ -47,11 +57,14 @@ plot_DetectionOverTime <- function(model, spname = NULL, min.yr = NULL){
# the model was fitted with categorical list length
pDet2 <- pDet1 + sims_list$dtype2.p[,1]
pDet4 <- pDet1 + sims_list$dtype3.p[,1]
}
# there is also an option to ignore list length,
# in which case the probability of detection is assumed to be constant across surveys
# i.e. if the survey was systematic

} else {
# there is also an option to ignore list length,
# in which case the probability of detection is assumed to be constant across surveys
# i.e. if the survey was systematic
# in this case the there are no estimates for the other pDet values.
pDet2 <- pDet4 <- NA
}

pDet <- melt(list(pDet1, pDet2, pDet4))
names(pDet) <- c("it", "year", "lgt_pDet", "ListLength")
pDet$ListLength[pDet$ListLength==3] <- 4 # the "third" category is for a list of length 4
Expand All @@ -61,9 +74,9 @@ plot_DetectionOverTime <- function(model, spname = NULL, min.yr = NULL){
# now summarize these posterior distributions
pDet_summary <-ddply(
pDet, .(year, ListLength), summarise,
mean_pDet = mean(pDet),
lower95CI = quantile(pDet, 0.025),
upper95CI = quantile(pDet, 0.975))
mean_pDet = mean(pDet, na.rm=T),
lower95CI = quantile(pDet, quant[1], na.rm=T),
upper95CI = quantile(pDet, quant[2], na.rm=T))

# if the user has supplied a year then switch the x axis to start at that minimum
if(!is.null(min.yr)) pDet_summary$year <- pDet_summary$year + min.yr - 1
Expand All @@ -75,7 +88,7 @@ plot_DetectionOverTime <- function(model, spname = NULL, min.yr = NULL){
ylab("Detection probability") +
ggtitle(spname) +
theme_bw()
gp
gp

}

Expand Down
31 changes: 18 additions & 13 deletions R/reportingRateModel.r
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,12 @@
#' @param taxa A character vector of taxon names, as long as the number of observations.
#' @param site A character vector of site names, as long as the number of observations.
#' @param time_period A numeric vector of user defined time periods, or a date vector,
#' as long as the number of observations.
#' as long as the number of observations. Dates should be used where one wants to fit models at the "visit"
#' (typically one day) level. To fit models at coarser temporal resolutions then \code{time_period}
#' should be numeric. Note that duplicae site x time_period x species records are dropped, so
#' your data will be substantially degraded if broad time periods are used.
#' @param list_length Logical, if \code{TRUE} then list length is added to the models as a fixed
#' effect. Note that since list_length is a property of each visit the model will run as
#' effect. Note that since list_length is a property of each visit (or coarser period) the model will run as
#' a binomial model rather that as a bernoulli model.
#' @param site_effect Logical, if \code{TRUE} then site is added to the models as a random
#' effect.
Expand Down Expand Up @@ -91,7 +94,7 @@

reportingRateModel <- function(taxa, site, time_period, list_length = FALSE, site_effect = FALSE,
species_to_include = unique(taxa), overdispersion = FALSE,
verbose = FALSE, family = 'Binomial', print_progress = FALSE) {
verbose = FALSE, family = 'Binomial', print_progress = TRUE) {

# Do error checks
errorChecks(taxa = taxa, site = site, time_period = time_period, list_length = list_length,
Expand All @@ -100,12 +103,12 @@ reportingRateModel <- function(taxa, site, time_period, list_length = FALSE, sit

# Create dataframe from vectors
taxa_data <- distinct(data.frame(taxa, site, time_period))

# Reshape the data so that it is suitable for model
space_time <- dcast(taxa_data, time_period + site ~ ., value.var='taxa',
fun.aggregate = function(x) length(unique(x)))
names(space_time)[ncol(space_time)] <- 'listLength'

# time_period could be a numeric or a date. If it is a date extract the year
if('POSIXct' %in% class(time_period) | 'Date' %in% class(time_period)){

Expand All @@ -114,20 +117,22 @@ reportingRateModel <- function(taxa, site, time_period, list_length = FALSE, sit
intercept_year <- median(unique(as.numeric(space_time$year)))
space_time$year <- space_time$year - intercept_year

} else{

stop('non-dates not yet supported for time_period')

} else {
# note that the term "year" is used loosely; may be a time period of any length
space_time$year <- space_time$time_period
intercept_year <- median(unique(space_time$year))
space_time$year <- space_time$year - intercept_year

}

model_formula <- formulaBuilder(family,
list_length,
site_effect,
overdispersion)

counter <- data.frame(species = species_to_include,
count = 1:length(species_to_include))

# Run an apply across all species which undertakes the modelling
all_coefs <- lapply(species_to_include, function(species_name){ # the sort ensures species are done in order

Expand Down Expand Up @@ -156,10 +161,10 @@ reportingRateModel <- function(taxa, site, time_period, list_length = FALSE, sit
}

})

# Bind the results of the apply into a data.frame
coef_df <- do.call(rbind.fill, all_coefs)

# Get some attributes of the dataset
attributes(coef_df) <- c(attributes(coef_df), list(intercept_year = intercept_year,
min_year = min(unique(as.numeric(space_time$year))),
Expand Down
6 changes: 5 additions & 1 deletion man/occDetFunc.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 02eed2a

Please sign in to comment.