Merge pull request #1 from BiologicalRecordsCentre/master

rebase
BiologicalRecordsCentre · Apr 11, 2019 · 13727d7 · 13727d7
2 parents ad0a09d + 8b17947
commit 13727d7
Show file tree

Hide file tree

Showing 6 changed files with 315 additions and 5 deletions.
diff --git a/R/errorChecks.r b/R/errorChecks.r
@@ -233,15 +233,15 @@ errorChecks <- function(taxa = NULL, site = NULL, survey = NULL, replicate = NUL
 
   ### check BUGS parameters ###
   if(!is.null(c(n_iterations, burnin, thinning, n_chains))){
-
-    if(burnin > n_iterations) stop('Burn in (burnin) must not be larger that the number of iteration (n_iterations)')
-    if(thinning > n_iterations) stop('thinning must not be larger that the number of iteration (n_iterations)')
-
     if(!is.numeric(n_iterations)) stop('n_iterations should be numeric')
     if(!is.numeric(burnin)) stop('burnin should be numeric')
     if(!is.numeric(thinning)) stop('thinning should be numeric')
     if(!is.numeric(n_chains)) stop('n_chains should be numeric')
 
+
+    if(burnin > n_iterations) stop('Burn in (burnin) must not be larger that the number of iteration (n_iterations)')
+    if(thinning > n_iterations) stop('thinning must not be larger that the number of iteration (n_iterations)')
+
   }
 
   if(!is.null(seed)){

diff --git a/R/occurrenceChange.r b/R/occurrenceChange.r
@@ -118,7 +118,7 @@ occurrenceChange <- function(firstYear, lastYear, bayesOut, change = 'growthrate
       data_table <- data.frame(occ = series[as.character(years)], year = (years - min(years) + 1))
 
       # run model
-      model <- glm(occ ~ year, data = data_table, family = 'binomial')
+      model <- glm(occ ~ year, data = data_table, family = 'quasibinomial')
 
       # create predicted values
       predicted <- plogis(predict(model))

diff --git a/inst/doc/sparta_vignette.pdf b/inst/doc/sparta_vignette.pdf
diff --git a/tests/testthat/testerrorChecks.R b/tests/testthat/testerrorChecks.R
@@ -0,0 +1,167 @@
+context("Test errorChecks")
+
+set.seed(seed = 128)
+
+# Create data
+n <- 3000 #size of dataset
+nyr <- 10 # number of years in data
+nSamples <- 30 # set number of dates
+nSites <- 15 # set number of sites
+
+# Create somes dates
+first <- as.POSIXct(strptime("2010/01/01", "%Y/%m/%d")) 
+last <- as.POSIXct(strptime(paste(2010+(nyr-1),"/12/31", sep=''), "%Y/%m/%d")) 
+dt <- last-first 
+rDates <- first + (runif(nSamples)*dt)
+
+# taxa are set as random letters
+taxa <- sample(letters, size = n, TRUE)
+
+# three sites are visited randomly
+site <- sample(paste('A', 1:nSites, sep=''), size = n, TRUE)
+
+# the date of visit is selected at random from those created earlier
+time_period <- sample(rDates, size = n, TRUE)
+time_period_missing <- sample(c(rDates, NA), size = n, TRUE)
+
+dist_sub <- rnorm(n, 10, 1)
+sim_sub <- rnorm(n, 10, 1)
+
+dist_sub_fac <- as.factor(rnorm(n, 10, 1))
+sim_sub_fac <- as.factor(rnorm(n, 10, 1))
+
+
+dist_sub_chr <- as.character(rnorm(n, 10, 1))
+sim_sub_chr <- as.character(rnorm(n, 10, 1))
+
+dist_sub_fac <- as.factor(rnorm(n, 10, 1))
+sim_sub_fac <- as.factor(rnorm(n, 10, 1))
+
+
+# combine this to a dataframe 
+df <- data.frame(taxa = taxa,
+                 site = site,
+                 time_period = as.character(time_period),
+                 time_period_missing = time_period_missing,
+                 dist_sub = dist_sub,
+                 sim_sub = sim_sub,
+                 dist_sub_chr = dist_sub_chr,
+                 sim_sub_chr = sim_sub_chr,
+                 dist_sub_fac = dist_sub_fac,
+                 sim_sub_fac = sim_sub_fac)
+
+
+useIterations_num <- 1
+useIterations_chr <- "TRUE"
+
+
+temp <- tempfile()
+
+dir.create(temp)
+
+test_that("Test errors", {
+  expect_error(errorChecks(startDate = df$time_period),
+               'startDate is not in a date format. This should be of class "Date" or "POSIXct"')
+
+  expect_error(errorChecks(startDate = df$time_period_missing),
+               'startDate must not contain NAs')
+
+  expect_error(errorChecks(Date = df$time_period),
+               'Date must be a data.frame or date vector')
+
+  expect_error(errorChecks(Date = df$time_period_missing),
+               'Date must not contain NAs')
+
+  expect_error(errorChecks(endDate = df$time_period),
+               'endDate is not in a date format. This should be of class "Date" or "POSIXct"')
+
+  expect_error(errorChecks(endDate = df$time_period_missing),
+               'endDate must not contain NAs')
+
+  expect_error(errorChecks(dist_sub = df$dist_sub_chr, sim_sub = df$sim_sub),
+               'dist_sub must be integer or numeric')
+
+  expect_error(errorChecks(dist_sub = df$dist_sub, sim_sub = df$sim_sub_chr),
+               'sim_sub must be integer or numeric')
+
+  expect_error(errorChecks(dist_sub = df$dist_sub_fac, sim_sub = df$sim_sub),
+               'dist_sub must be integer or numeric')
+
+  expect_error(errorChecks(dist_sub = df$dist_sub, sim_sub = df$sim_sub_fac),
+               'sim_sub must be integer or numeric')
+
+  expect_error(errorChecks(useIterations = useIterations_chr),
+               'useIterations must be logical')
+
+  expect_error(errorChecks(useIterations = useIterations_num),
+               'useIterations must be logical')
+
+  expect_error(errorChecks(iterations = "1000"),
+               'iterations must be numeric or integer')
+
+  expect_error(errorChecks(family = "Poisson"),
+               'family must be either Binomial or Bernoulli')
+
+  expect_error(errorChecks(n_iterations = 1000, burnin = 500, thinning = 1100, n_chains = 3),
+               'thinning must not be larger that the number of iteration (n_iterations)',
+               fixed = TRUE)
+
+  expect_error(errorChecks(n_iterations = "1000", burnin = 500, thinning = 5, n_chains = 3),
+               'n_iterations should be numeric')
+
+  expect_error(errorChecks(n_iterations = 1000, burnin = "500", thinning = 5, n_chains = 3),
+               'burnin should be numeric')
+
+  expect_error(errorChecks(n_iterations = 1000, burnin = 500, thinning = "5", n_chains = 3),
+               'thinning should be numeric')
+
+  expect_error(errorChecks(n_iterations = 1000, burnin = 500, thinning = 5, n_chains = "3"),
+               'n_chains should be numeric')
+
+  expect_error(errorChecks(seed = "1"),
+               'seed muct be numeric')
+
+  expect_error(errorChecks(year_col = NA, start_col = NA, end_col = time_period[1]),
+               'year_col or start_col and end_col must be given')
+
+  expect_error(errorChecks(year_col = NA, start_col = df$time_period[1], end_col = NA),
+               'year_col or start_col and end_col must be given')
+
+  expect_error(errorChecks(year_col = NA, start_col = df$time_period[1], end_col = df$time_period[1]),
+               'year_col cannot be used at the same time as start_col and end_col')
+
+  expect_error(errorChecks(phi = 0.1),
+               "phi is outside permitted range of 0.50 to 0.95")
+
+  expect_error(errorChecks(phi = 0.99),
+               "phi is outside permitted range of 0.50 to 0.95")
+
+  expect_error(errorChecks(alpha = 0.05),
+               "alpha is outside permitted range of 0.08 to 0.50")
+
+  expect_error(errorChecks(alpha = 0.99),
+               "alpha is outside permitted range of 0.08 to 0.50")
+
+  expect_error(errorChecks(non_benchmark_sp = c(1,5,10,12)),
+               'non_benchmark_sp must be a character vector')
+
+  expect_error(errorChecks(non_benchmark_sp = 12),
+               'non_benchmark_sp must be a character vector')
+
+  expect_error(errorChecks(fres_site_filter = c(1,5,10,12)),
+               'fres_site_filter must be a character vector')
+
+  expect_error(errorChecks(fres_site_filter = 12),
+               'fres_site_filter must be a character vector')
+
+  expect_error(errorChecks(time_periods = as.matrix(df$time_period)),
+               'time_periods should be a data.frame. e.g. "data.frame(start=c(1980,1990),end=c(1989,1999))"',
+               fixed = TRUE)
+
+  expect_error(errorChecks(frespath = temp),
+               "filepath is not the path to a '.exe' file")
+
+  expect_error(errorChecks(frespath = "file.exe"),
+               'file.exe does not exist')
+
+})
diff --git a/tests/testthat/testoccDetFunc.r b/tests/testthat/testoccDetFunc.r
@@ -456,6 +456,51 @@ test_that("Test occDetFunc using regions and region aggregates", {
                         regional_codes = regions,
                         region_aggs = list(agg1 = c('region1', 'region2'))),
                  'The following regions have no data and')
+
+  # test with regional_codes not as a dataframe
+  expect_error(results <- occDetFunc(taxa_name = 'a',
+                                       n_iterations = 50,
+                                       burnin = 15, 
+                                       occDetdata = visitData$occDetdata,
+                                       spp_vis = visitData$spp_vis,
+                                       write_results = FALSE,
+                                       seed = 111,
+                                       modeltype = c("ranwalk", "halfcauchy"),
+                                       regional_codes = as.matrix(regions),
+                                       region_aggs = list(agg1 = c('region1', 'region2'))),
+                 'regional_codes should be a data.frame')
+
+  # test with NAs in regional_codes
+  regionsNA <- regions
+  regionsNA[1,3] <- NA
+
+  expect_warning(results <- occDetFunc(taxa_name = 'a',
+                                     n_iterations = 50,
+                                     burnin = 15, 
+                                     occDetdata = visitData$occDetdata,
+                                     spp_vis = visitData$spp_vis,
+                                     write_results = FALSE,
+                                     seed = 111,
+                                     modeltype = c("ranwalk", "halfcauchy"),
+                                     regional_codes = regionsNA,
+                                     region_aggs = list(agg1 = c('region1', 'region2'))),
+               "NAs are present in regional_codes, these will be replaced with 0's")
+
+  # test with sites in multiple regions
+  regionsmulti <- regions
+  regionsmulti[1,3] <- 1
+
+  expect_error(results <- occDetFunc(taxa_name = 'a',
+                                       n_iterations = 50,
+                                       burnin = 15, 
+                                       occDetdata = visitData$occDetdata,
+                                       spp_vis = visitData$spp_vis,
+                                       write_results = FALSE,
+                                       seed = 111,
+                                       modeltype = c("ranwalk", "halfcauchy"),
+                                       regional_codes = regionsmulti,
+                                       region_aggs = list(agg1 = c('region1', 'region2'))),
+                  '1 sites are assigned to more than one region in regional_codes')
 
   sink()
 

diff --git a/tests/testthat/testoccurrenceChange.R b/tests/testthat/testoccurrenceChange.R
@@ -0,0 +1,98 @@
+context("Test occurrenceChange")
+
+# Create a small dataset
+n <- 15000 #size of dataset
+nyr <- 20 # number of years in data
+nSamples <- 100 # set number of dates
+nSites <- 50 # set number of sites
+set.seed(seed = 648)
+
+# Create somes dates
+first <- as.Date(strptime("1980/01/01", "%Y/%m/%d")) 
+last <- as.Date(strptime(paste(1980+(nyr-1),"/12/31", sep=''), "%Y/%m/%d")) 
+dt <- last-first 
+rDates <- first + (runif(nSamples)*dt)
+
+# taxa are set as random letters
+taxa <- sample(letters, size = n, TRUE)
+
+# three sites are visited randomly
+site <- sample(paste('A', 1:nSites, sep=''), size = n, TRUE)
+
+# the date of visit is selected at random from those created earlier
+survey <- sample(rDates, size = n, TRUE)
+
+# run with regions
+# Create region definitions
+regions <- data.frame(site = unique(site),
+                      region1 = c(rep(1, 20), rep(0, 30)),
+                      region2 = c(rep(0, 20), rep(1, 15), rep(0, 15)),
+                      region3 = c(rep(0, 20), rep(0, 15), rep(1, 15)))
+
+# run the model with these data for one species
+temp <- capture.output(results <- occDetModel(taxa = taxa,
+                       site = site,
+                       survey = survey,
+                       species_list = c('a'),
+                       write_results = FALSE,
+                       n_iterations = 200,
+                       burnin = 10,
+                       thinning = 2,
+                       regional_codes = regions))
+
+
+test_that("Test errors and warnings", {
+
+  expect_error(occurrenceChange(firstYear = 1900, lastYear = 1990, bayesOut = results$a),
+               'firstYear must be in the year range of the data')
+  expect_error(occurrenceChange(firstYear = 1990, lastYear = 2050, bayesOut = results$a),
+               'lastYear must be in the year range of the data')  
+  expect_error(occurrenceChange(firstYear = 1990, lastYear = 1990, bayesOut = results$a, change = results$a),
+               'Change must be a character string identifying the change metric.  Either: difference, percentdif, growthrate or lineargrowth')
+  expect_error(occurrenceChange(firstYear = 1990, lastYear = 1990, bayesOut = results$a, change = as.character('Galina')),
+               'The change metric must be one of the following: difference, percentdif, growthrate or lineargrowth')
+  expect_error(occurrenceChange(firstYear = 1990, lastYear = 1990, bayesOut = results$a, region = results$a),
+               'region must be a character string identifying the regional estimates that change is to be calculated for.')
+  expect_error(occurrenceChange(firstYear = 1990, lastYear = 1990, bayesOut = results$a, region = as.character('Galina')),
+               'region must match that used in the model output file, check spelling.')
+})
+
+# estimate the growthrate (default change measure) for one species ('a')      
+changeGrowthrate <- occurrenceChange(firstYear = 1990, lastYear = 1999, bayesOut = results$a, change = 'growthrate')
+
+# estimate the lineargrowth for one species ('a') 
+changeLineargrowth <- occurrenceChange(firstYear = 1990, lastYear = 1999, bayesOut = results$a, change = 'lineargrowth')
+
+# estimate the difference for one species ('a') 
+changeDifference <- occurrenceChange(firstYear = 1990, lastYear = 1999, bayesOut = results$a, change = 'difference')
+
+# estimate the percentdif for one species ('a') 
+changePercentdif <- occurrenceChange(firstYear = 1990, lastYear = 1999, bayesOut = results$a, change = 'percentdif')
+
+
+
+test_that("Test occurrenceChange functionality", {
+
+  # growthrate
+  expect_equal(nrow(changeGrowthrate$data[, 1]), nrow(changeGrowthrate$data[, 2]), nrow(changeGrowthrate$data[, 3]))
+  expect_true(all(changeGrowthrate$data[, 1:2] <= 1))
+  expect_equal(length(changeGrowthrate), 4)
+  expect_equal((((changeGrowthrate$data[,2]/changeGrowthrate$data[,1])^(1/10))-1)*100, changeGrowthrate$data[, 3])
+  # Lineargrowth
+  expect_equal(nrow(changeLineargrowth$data[, 1]), nrow(changeLineargrowth$data[, 2]), nrow(changeLineargrowth$data[, 3]))
+  expect_true(all(changeLineargrowth$data[, 1:2] <= 1))
+  expect_equal(length(changeLineargrowth), 4)
+  expect_equal(((changeLineargrowth$data[,2] - changeLineargrowth$data[,1]) / changeLineargrowth$data[,1]), changeLineargrowth$data[, 3])
+  # Difference
+  expect_equal(nrow(changeDifference$data[, 1]), nrow(changeDifference$data[, 2]), nrow(changeDifference$data[, 3]))
+  expect_true(all(changeDifference$data[, 1:2] <= 1))
+  expect_equal(length(changeDifference), 4)
+  expect_equal(changeDifference$data[,2] - changeDifference$data[,1], changeDifference$data[, 3])
+  # percentdif
+  expect_equal(nrow(changePercentdif$data[, 1]), nrow(changePercentdif$data[, 2]), nrow(changePercentdif$data[, 3]))
+  expect_true(all(changePercentdif$data[, 1:2] <= 1))
+  expect_equal(length(changePercentdif), 4)
+  expect_equal((((changePercentdif$data[,2] - changePercentdif$data[,1])/changePercentdif$data[,1])*100), changePercentdif$data[, 3])
+
+})
+