new unicorns dataset; changes to vignette

DylanCarbone · May 13, 2024 · 7979f9f · 7979f9f
1 parent 2337f64
commit 7979f9f
Show file tree

Hide file tree

Showing 15 changed files with 249 additions and 33 deletions.
diff --git a/.gitignore b/.gitignore
@@ -10,7 +10,7 @@
 # etc
 frescalo.exe
 sparta.Rproj
-
 misc/
 /doc/
 /Meta/
+create_unicorn.r
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -52,6 +52,6 @@ VignetteBuilder:
     knitr
 Encoding: UTF-8
 LazyData: TRUE
-RoxygenNote: 7.2.3
+RoxygenNote: 7.3.1
 SystemRequirements: JAGS
     (https://sourceforge.net/projects/mcmc-jags/files/JAGS/)
diff --git a/NAMESPACE b/NAMESPACE
@@ -52,3 +52,4 @@ importFrom(plyr,rbind.fill)
 importFrom(reshape2,acast)
 importFrom(reshape2,dcast)
 importFrom(reshape2,melt)
+importFrom(runjags,testjags)
diff --git a/R/detect_jags.r b/R/detect_jags.r
@@ -0,0 +1,5 @@
+# Internal function to detect JAGS installation.
+#' @importFrom runjags testjags
+detect_jags <- function(){
+    return(suppressWarnings(runjags::testjags(silent = TRUE)$JAGS.found))
+}
diff --git a/R/formatOccData.r b/R/formatOccData.r
@@ -87,13 +87,11 @@
 #'                                 survey = survey,
 #'                                 closure_period = closure_period)
 #'  
-#' # format the unicorns data
+#' # OR format the unicorns data
+#' formatted_data <- formatOccData(taxa = unicorns$species,
+#'                                survey = unicorns$start_date,
+#'                                site = unicorns$site)
 #'
-#' unicorns <- unicorns[complete.cases(unicorns$kmsq), ]
-#'
-#' formatted_data <- formatOccData(taxa = unicorns$CONCEPT,
-#'                                survey = unicorns$Date,
-#'                                site = unicorns$kmsq)
 #'}
 #' 
 #' @export

diff --git a/R/frescalo.R b/R/frescalo.R
@@ -151,17 +151,17 @@
 #' # Load data
 #' data(unicorns)
 #'
-#' # Run frescalo (data is save to the working directory as sinkdir is not given)
 #' fres_out <- frescalo(Data = unicorns,
+#'                      frespath = file.path(getwd(), "Frescalo.exe"),
 #'                      time_periods = data.frame(start=c(1980,1990),end=c(1989,1999)),
-#'                      site_col = 'hectad',
-#'                      sp_col = 'CONCEPT',
-#'                      start_col = 'TO_STARTDATE',
-#'                      end_col = 'Date')
+#'                      site_col = 'site',
+#'                      sp_col = 'species',
+#'                      start_col = 'start_date',
+#'                      end_col = 'end_date')
 #'}
 
 frescalo <-
-  function(Data,#your Data (.rdata files) as a file path (or list of file paths)
+  function(Data, #your Data as a dataframe object
            frespath, #path to the exe
            time_periods, #a list of vector pairs used in frescalo (ie 'c((1990,1995),(1996,2000))')
            site_col, # name of site column

diff --git a/R/frescalo_checks.r b/R/frescalo_checks.r
@@ -8,7 +8,7 @@ frescalo_checks <- function(site_col, sp_col, year_col, start_col, end_col,
   # Check column names are in the data
   new.colnames <- c(site_col,sp_col,year_col,start_col,end_col)
   missingColNames <- new.colnames[!new.colnames %in% names(Data)]
-  if(length(missingColNames) > 0) stop(paste(unlist(missingColNames),'is not the name of a column in data'))
+  if(length(missingColNames) > 0) stop(paste(unlist(missingColNames),'is not the name of a column in data\n\n'))
 
   # Remove columns that are not needed
   Data <- Data[,names(Data) %in% new.colnames]

diff --git a/R/unicorns-data.r b/R/unicorns-data.r
@@ -1,6 +1,6 @@
 #' @name unicorns
 #' @title A fictional dataset of unicorn sightings
-#' @description This is a fictional occurrence dataset of 70 species of unicorn in the UK.
+#' @description This is a fictional occurrence dataset of 20 species of unicorn in the UK. The dataset has column names, start_date, end_date, site and species respectively, corresponding to the start datetime of a occurence, end datetime of the occurence, site ID, and species ID.
 #' @docType data
 #' @usage unicorns
 #' @author Tom August, 2015-07-01

diff --git a/data/unicorns.rda b/data/unicorns.rda
diff --git a/man/formatOccData.Rd b/man/formatOccData.Rd
diff --git a/man/frescalo.Rd b/man/frescalo.Rd
diff --git a/man/sparta.Rd b/man/sparta.Rd
diff --git a/man/unicorns.Rd b/man/unicorns.Rd
diff --git a/pre_vignette/sparta_vignette.Rmd b/pre_vignette/sparta_vignette.Rmd
@@ -189,7 +189,7 @@ head(telfer_results)
 
 The reporting rates models in sparta are all either GLMs or GLMMs with year as a continuous covariate but are flexible, giving the user a number of options for their analyses. These options include the addition of covariates to account for biases in the data including a random site effect and fixed effect of list length.
 
-In [Isaac et al (2014)](http://onlinelibrary.wiley.com/doi/10.1111/2041-210X.12254/abstract) it was shown that reporting rate models can be susceptible to type 1 errors under certain scenarios and that with site and list length covariates the models performed better when the data were bias. These methods were found to out perform simple methods like Telfer.
+In [Isaac et al (2014)](http://onlinelibrary.wiley.com/doi/10.1111/2041-210X.12254/abstract) it was shown that reporting rate models can be susceptible to type 1 errors under certain scenarios and that with site and list length covariates the models performed better when the data were biased. These methods were found to out perform simple methods like Telfer.
 
 The common feature among these models is that the quantity under consideration is the 'probability of being recorded'. When binomial models are used (as is the default), it's the 'probability for an average visit' for the Bernoulli version it is the probability of being recorded per time period.
 
@@ -281,7 +281,7 @@ nrow(myDataSubset)
 
 ### Running Reporting Rate Models
 
-Once you have subset your data using the above functions (or perhaps not at all) the reporting rate models can be applied using the function `reportingRateModel`. This function offers flexibility in the model you wish to fit, allowing the user to specify whether list length and site should be used as covariates, whether over-dispersion should be used, and whether the family should be binomial or Bernoulli. A number of these variants are presented in [Isaac et al (2014)](http://onlinelibrary.wiley.com/doi/10.1111/2041-210X.12254/abstract). While multi-species data is required it is not nessecary to model all species. In fact you can save a significant amount of time by only modelling hte species you are interested in.
+Once you have subset your data using the above functions (or perhaps not at all) the reporting rate models can be applied using the function `reportingRateModel`. This function offers flexibility in the model you wish to fit, allowing the user to specify whether list length and site should be used as covariates, whether over-dispersion should be used, and whether the family should be binomial or Bernoulli. A number of these variants are presented in [Isaac et al (2014)](http://onlinelibrary.wiley.com/doi/10.1111/2041-210X.12254/abstract). While multi-species data is required it is not nessecary to model all species. In fact you can save a significant amount of time by only modelling the species you are interested in.
 
 ```{r,cache = TRUE}
 # Run the reporting rate model using list length as a fixed effect and 
@@ -321,7 +321,7 @@ with(RR_out,
 
 The returned object is a data frame with one row per species. Each column gives information on an element of the model output including covariate estimates, standard errors and p-values. This object also has some attributes giving the year that was chosen as the intercept, the number of visits in the dataset and the model formula used. 
 
-These models can take a long time to run when your data set is larg or you have a large number of species to model. To make this faster it is possible to parallelise this process across species which can significantly improve your run times. Here is an example of how we would parallelise the above example using hte R package snowfall.
+These models can take a long time to run when your data set is large or you have a large number of species to model. To make this faster it is possible to parallelise this process across species which can significantly improve your run times. Here is an example of how we would parallelise the above example using hte R package snowfall.
 
 ```{r,cache = TRUE}
 # Load in snowfall
@@ -411,13 +411,31 @@ This function works in a very similar fashion to that of the previous functions
 # Here is our data
 str(myData)
 
+################################################
+# Create new entries for the year 1986
+new_dates <- seq(as.Date("1986-01-01"), as.Date("1986-12-31"), by = "1 month")
+
+# Create a dataframe with similar values for the new dates
+new_data <- data.frame(taxa = rep("x", length(new_dates)),
+                       site = rep("A99", length(new_dates)),
+                       time_period = new_dates,
+                       tp = NA)
+
+# Append the new data to the existing dataframe
+myData <- rbind(myData, new_data)
+
+# View the updated dataframe
+head(myData)
+
+################################################
+
 # Run an occupancy model for three species
 # Here we use very small number of iterations 
 # to avoid a long run time
 system.time({
 occ_out <- occDetModel(taxa = myData$taxa,
                        site = myData$site,
-                       time_period = myData$time_period,
+                       survey = myData$time_period,
                        species_list = c('a','b','c','d'),
                        write_results = FALSE,
                        n_iterations = 200,
@@ -447,7 +465,7 @@ head(occ_out$a$BUGSoutput$summary)
 plot(occ_out$a)
 ```
 
-He we have run a small example but in reality these models are usually run for many thousands of iterations, making the analysis of more than a handful of species impractical. For those with access to the necessary facilities it is possible to parallelise across species. To do this we use a pair of functions that are used internally by `occDetModel`. These are `formatOccData` which is used to format our occurrence data into the format needed by JAGS, and `occDetFunc`, the function which undertakes the modelling.
+Here we have run a small example but in reality these models are usually run for many thousands of iterations, making the analysis of more than a handful of species impractical. For those with access to the necessary facilities it is possible to parallelise across species. To do this we use a pair of functions that are used internally by `occDetModel`. These are `formatOccData` which is used to format our occurrence data into the format needed by JAGS, and `occDetFunc`, the function which undertakes the modelling.
 
 ```{r,cache = TRUE}
 # First format our data