Merge branch 'master' of https://github.com/BiologicalRecordsCentre/s…

…parta
BiologicalRecordsCentre · Apr 10, 2019 · 34cd906 · 34cd906
2 parents cafb6e2 + f699487
commit 34cd906
Show file tree

Hide file tree

Showing 23 changed files with 439 additions and 349 deletions.
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -8,4 +8,5 @@ pre_vignette
 logo.png
 frescalo.exe
 ^docs$
-vignettes\sparta_vignette.Rmd
+vignettes\sparta_vignette.Rmd
+misc/
diff --git a/.gitignore b/.gitignore
@@ -8,4 +8,6 @@
 
 # etc
 frescalo.exe
-sparta.Rproj
+sparta.Rproj
+
+misc/
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,8 +1,8 @@
 Package: sparta
 Type: Package
 Title: Trend Analysis for Unstructured Data
-Version: 0.1.48
-Date: 2019-01-15
+Version: 0.1.49
+Date: 2019-03-07
 Authors@R: c(person("Tom", "August", role = c("aut", "cre"), email = "[email protected]"),
              person("Gary", "Powney", role = c("aut")),
              person("Charlie", "Outhwaite", role = c("aut")),
@@ -26,7 +26,8 @@ Imports:
     gdata,
     R2jags,
     LearnBayes,
-    rmarkdown
+    rmarkdown,
+    boot
 Suggests:
     testthat (>= 0.8.0),
     roxygen2,

diff --git a/NAMESPACE b/NAMESPACE
@@ -9,6 +9,7 @@ export(WSS)
 export(createWeights)
 export(dataDiagnostics)
 export(date2timeperiod)
+export(detection_phenology)
 export(formatOccData)
 export(frescalo)
 export(getBugsData)
@@ -24,6 +25,7 @@ export(occDetModel)
 export(occurrenceChange)
 export(recsOverTime)
 export(reportingRateModel)
+export(simOccData)
 export(siteSelection)
 export(siteSelectionMinL)
 export(siteSelectionMinTP)
@@ -35,6 +37,7 @@ import(ggplot2)
 import(lme4)
 import(rmarkdown)
 import(sp)
+importFrom(boot,inv.logit)
 importFrom(dplyr,distinct)
 importFrom(dplyr,group_by)
 importFrom(dplyr,summarise)

diff --git a/Occ_viz_html.html b/Occ_viz_html.html
diff --git a/R/detection_phenology.R b/R/detection_phenology.R
@@ -0,0 +1,84 @@
+#' Diagnostics for the detectability with respect to Julian Date 
+#' 
+#' Creates a plot of detectability over the season and calculates some simple statistics
+#'
+#' @param model a fitted sparta model of class \code{OccDet}.
+#' @param spname optional name of the species (used for plotting)
+#' @param bins number of points to estimate across the year. Defaults to 12
+#' 
+#' @details 
+#' Takes a object of \code{OccDet} fitted with the \code{jul_date} option
+#' 
+#' Calculate the phenology of detection and produces a plot of detectability over time for the reference data type.
+#'
+#' @return Some numbers.
+#' 
+#' @references van Strien, A.J., Termaat, T., Groenendijk, D., Mensing, V. & Kéry, M. (2010) 
+#'             Site-occupancy models may offer new opportunities for dragonfly monitoring based on daily species lists. 
+#'             \emph{Basic and Applied Ecology}, 11, 495–503.
+#' @export
+
+
+###########################################
+# calculate some stats
+# test
+# correct for starting on 1 July
+###########################################
+
+
+detection_phenology <- function(model, spname=NULL, bins=12){
+
+  require(sparta)
+  require(reshape2)
+  require(plyr)
+  require(boot)
+  require(ggplot2)
+
+  data <- model$BUGSoutput$sims.list
+
+  if(!"beta1" %in% names(data))
+    stop("no phenological effect was modelled!")
+
+  # the base: alpha.p is common to all models: 
+  # it's the logit probability of detection on a single species list
+  # use the average value across years
+  pDet1 <- rowMeans(data$alpha.p)
+  # pDet1 is an array of dims equal to (niter, nyr)
+
+  # Julian dates are 1:
+  jul_dates <- seq(from=1, to=365, length.out=bins)
+
+  cjd <- jul_dates - 182
+  # we ran the Julian Date option
+  # So let's scale the detection probabilities to end June (day 180)
+  pDet <- melt(sapply(cjd, function(jd){
+    pDet1 + jd * data$beta1[,1] +  jd^2 * data$beta2[,1]
+  }))
+
+  names(pDet) <- c("it", "bin","lgt_pDet")
+  pDet$pDet <- inv.logit(pDet$lgt_pDet)
+
+  # now summarize these posterior distributions
+  pDet_summary <-ddply(
+    pDet, .(bin), summarise, 
+    mean_pDet = mean(pDet),
+    lower95CI = quantile(pDet, 0.025),
+    upper95CI = quantile(pDet, 0.975))
+
+  # now convert the jds back to their equivalent Julian Dates
+  pDet_summary$cJulDate <- cjd[pDet_summary$bin] 
+  pDet_summary$JulianDay <- jul_dates[pDet_summary$bin] 
+
+  # now plot the detection over time
+  gp <- ggplot(data=pDet_summary, x=JulianDay, y=mean_pDet) +
+    geom_line(aes(x=JulianDay, y=mean_pDet)) +
+    geom_ribbon(aes(x=JulianDay, ymin=lower95CI, ymax=upper95CI), alpha=0.2) +
+    ylab("Detection probability") +
+    ggtitle(spname) +
+    theme_bw()
+  gp 
+
+  # calculate some simple stats
+
+}
+
diff --git a/R/errorChecks.r b/R/errorChecks.r
@@ -1,6 +1,6 @@
 #' @importFrom dplyr distinct
 
-errorChecks <- function(taxa = NULL, site = NULL, survey = NULL, closure_period = NULL, time_period = NULL, 
+errorChecks <- function(taxa = NULL, site = NULL, survey = NULL, replicate = NULL, closure_period = NULL, time_period = NULL, 
                         startDate = NULL, endDate = NULL, Date = NULL, 
                         time_periodsDF = NULL, dist = NULL, sim = NULL,
                         dist_sub = NULL, sim_sub = NULL, minSite = NULL, useIterations = NULL,
@@ -17,6 +17,7 @@ errorChecks <- function(taxa = NULL, site = NULL, survey = NULL, closure_period
                           site=site,
                           survey=survey,
                           closure_period=closure_period,
+                          replicate=replicate,
                           time_period=time_period,
                           startDate=startDate,
                           endDate=endDate)
@@ -51,21 +52,30 @@ errorChecks <- function(taxa = NULL, site = NULL, survey = NULL, closure_period
 
   }
 
+  ###### Make sure there are no NAs
+
   ### Checks for taxa ###
   if(!is.null(taxa)){    
-    # Make sure there are no NAs
     if(!all(!is.na(taxa))) stop('taxa must not contain NAs')    
   }
 
   ### Checks for site ###
   if(!is.null(site)){    
-    # Make sure there are no NAs
     if(!all(!is.na(site))) stop('site must not contain NAs')    
   }
 
+  ### Checks for closure period ###
+  if(!is.null(closure_period)){    
+    if(!all(!is.na(closure_period))) stop('closure_period must not contain NAs')    
+  }
+
+  ### Checks for replicate ###
+  if(!is.null(replicate)){    
+    if(!all(!is.na(replicate))) stop('replicate must not contain NAs')    
+  }
+
   ### Checks for time_period ###
   if(!is.null(time_period)){    
-    # Make sure there are no NAs
     if(!all(!is.na(time_period))) stop('time_period must not contain NAs')    
   }
 

diff --git a/R/formatOccData.r b/R/formatOccData.r
@@ -8,6 +8,7 @@
 #' @param site A character vector of site names, as long as the number of observations.
 #' @param survey A vector as long as the number of observations. 
 #'        This must be a Date if either closure_period is not supplied or if includeJDay = \code{TRUE}
+#' @param replicate An optional vector to identify replicate samples (visits) per survey. Need not be globally unique (e.g can be 1, 2, .. n within surveys) 
 #' @param closure_period An optional vector of integers specifying the closure period. 
 #'        If \code{FALSE} then closure_period will be extracted as the year from the survey.
 #' @param includeJDay Logical. If \code{TRUE} a Julian day column is returned in the
@@ -85,17 +86,23 @@
 #'                                 site = site,
 #'                                 survey = survey,
 #'                                 closure_period = closure_period)
-#' }
+#'  
+#' # format the unicorns data
+#' formatted_data <- formatOccData(taxa = unicorns$CONCEPT,
+#'                                survey = unicorns$Date,
+#'                                site = unicorns$kmsq)
+#'}
+#' 
 #' @export
 #' @importFrom dplyr distinct
 #' @importFrom dplyr summarise
 #' @importFrom dplyr group_by
 #' @importFrom reshape2 dcast
 
-formatOccData <- function(taxa, site, survey, closure_period = NULL, includeJDay = FALSE){
+formatOccData <- function(taxa, site, survey, replicate = NULL, closure_period = NULL, includeJDay = FALSE){
 
   # Do error checks
-  errorChecks(taxa = taxa, site = site, survey = survey, closure_period = closure_period)
+  errorChecks(taxa = taxa, site = site, survey = survey, replicate = replicate, closure_period = closure_period)
 
   # Additional error check whether survey is a date
   if(!'POSIXct' %in% class(survey) & !'Date' %in% class(survey)){
@@ -109,8 +116,10 @@ formatOccData <- function(taxa, site, survey, closure_period = NULL, includeJDay
    }
   } # otherwise survey can be anything (including a character vector)
 
+  if(is.null(replicate)) replicate <- rep(1, length(survey)) # all 
+
   # Create dataframe from vectors
-  taxa_data <- data.frame(taxa, site, survey)
+  taxa_data <- data.frame(taxa, site, survey, replicate)
 
   # now add the time period (TP). It's defined by closure_period if supplied
   # TP was formerly referred to as year
@@ -122,9 +131,10 @@ formatOccData <- function(taxa, site, survey, closure_period = NULL, includeJDay
     taxa_data$TP <- lookup$TP[match(closure_period, lookup$cp)]
 
     #check that surveys are nested within closure_periods
-    temp <- taxa_data 
-    temp <- group_by(temp, survey)  
-    temp <- summarise(temp, ns = length(unique(temp$TP)))
+    temp <- taxa_data %>% 
+            group_by(survey) %>% 
+            group_by(replicate) %>% 
+            summarise(ns = length(unique(TP)))
     if(any(temp$ns) > 1) {
       # return a warning but assume they know what they're doing
       warning(paste(as.numeric(table(temp$ns >1)[2]), 'survey identities appear in multiple closure periods'))
@@ -139,7 +149,7 @@ formatOccData <- function(taxa, site, survey, closure_period = NULL, includeJDay
   taxa_data <- distinct(taxa_data)
 
   # add list length column
-  taxa_data$visit <- paste(taxa_data$site, taxa_data$survey, sep="") # create a factor which combines date and site
+  taxa_data$visit <- paste(taxa_data$site, taxa_data$survey, taxa_data$replicate, sep="") # create a factor which combines site, survey & replicate
   visit_Ls <- as.data.frame(table(taxa_data$visit))
   names(visit_Ls) <- c("visit", "L")
   taxa_data <- merge(taxa_data, visit_Ls)

diff --git a/R/getBugsData.R b/R/getBugsData.R
@@ -55,7 +55,9 @@ getBugsData <- function(bugs_data, modeltype, verbose = FALSE,
 
         jul_date = {
           if(verbose) cat('Adding bugs_data elements for Julian Date\n')
-          JulDate <- occDetData$Jul_date
+
+          JulDate <- occDetData$Jul_date #- 182 # centering has already been done in formatOccData!
+
           bugs_data <- c(bugs_data,
                          JulDate = list(as.numeric(JulDate)))
           return(bugs_data)

diff --git a/R/occDetFunc.r b/R/occDetFunc.r
@@ -168,7 +168,7 @@ occDetFunc <- function (taxa_name, occDetdata, spp_vis, n_iterations = 5000, nyr
                         seed = NULL, model.function = NULL, regional_codes = NULL,
                         region_aggs = NULL, additional.parameters = NULL,
                         additional.BUGS.elements = NULL, additional.init.values = NULL,
-                        return_data = FALSE){
+                        return_data = TRUE){
 
   J_success <- requireNamespace("R2jags", quietly = TRUE)
 
@@ -205,7 +205,11 @@ occDetFunc <- function (taxa_name, occDetdata, spp_vis, n_iterations = 5000, nyr
   if(!taxa_name %in% colnames(spp_vis)) stop('taxa_name is not the name of a taxa in spp_vis')
 
   # Add the focal column (was the species recorded on the visit?). Use the spp_vis dataframe to extract this info
+  nrow1 <- nrow(occDetdata)
   occDetdata <- merge(occDetdata, spp_vis[,c("visit", taxa_name)])
+
+  if(nrow1 != nrow(occDetdata)) stop('some visits have been lost')
+
   names(occDetdata)[names(occDetdata) == taxa_name] <- "focal"
 
   # If we are using regional codes do some checks
@@ -257,8 +261,15 @@ occDetFunc <- function (taxa_name, occDetdata, spp_vis, n_iterations = 5000, nyr
 
   # look for missing years before time frame can be extended using max_year parameter
   years <- (max(occDetdata$TP) - min(occDetdata$TP))+1
-  if(length(unique(occDetdata$TP)) != years) stop('It looks like you have years with no data. This will crash BUGS')
-
+  if(length(unique(occDetdata$TP)) != years) {
+    # find out which years have no data
+    missing_yrs <-  with(occDetdata, setdiff(min(TP):max(TP), unique(TP)))
+    if(length(missing_yrs) ==1)
+      error_msg <- paste0('There are no visits in year ', missing_yrs,'. This will crash BUGS')
+    else 
+      error_msg <- paste0('There are ', length(missing_yrs),' years with no visits, including ', missing_yrs[1],'. This will crash BUGS')
+    stop(error_msg)
+  }
   # Record the max and min values of TP
   min_year <- min(occDetdata$TP)