diff --git a/.Rbuildignore b/.Rbuildignore index 0dd08a2..17d3d7f 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -12,3 +12,5 @@ ^pkgdown$ ^CRAN-SUBMISSION$ ^CONTRIBUTING\.md$ +^inst/joss$ +^inst/tutorials$ diff --git a/DESCRIPTION b/DESCRIPTION index 278ae0e..24f5339 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: SSN2 Title: Spatial Modeling on Stream Networks -Version: 0.1.1 +Version: 0.2.0 Authors@R: c( person(given = "Michael", family = "Dumelle", @@ -26,7 +26,7 @@ License: GPL-3 Encoding: UTF-8 LazyData: true Roxygen: list(markdown = TRUE) -RoxygenNote: 7.2.3 +RoxygenNote: 7.3.2 Depends: R (>= 2.10) Imports: @@ -36,10 +36,10 @@ Imports: generics, tibble, graphics, - parallel, spmodel, RSQLite, - utils + utils, + withr Suggests: rmarkdown, knitr, diff --git a/NAMESPACE b/NAMESPACE index b49b317..a5edd8c 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -94,7 +94,6 @@ S3method(model.frame,ssn_glm) S3method(model.frame,ssn_lm) S3method(model.matrix,ssn_glm) S3method(model.matrix,ssn_lm) -S3method(names,SSN) S3method(plot,Torgegram) S3method(plot,ssn_glm) S3method(plot,ssn_lm) @@ -132,6 +131,7 @@ export(Torgegram) export(augment) export(copy_lsn_to_temp) export(covmatrix) +export(create_netgeom) export(dispersion_initial) export(dispersion_params) export(euclid_initial) @@ -152,6 +152,7 @@ export(ssn_glm) export(ssn_import) export(ssn_import_predpts) export(ssn_lm) +export(ssn_names) export(ssn_put_data) export(ssn_rbeta) export(ssn_rbinom) @@ -205,10 +206,6 @@ importFrom(graphics,legend) importFrom(graphics,par) importFrom(graphics,points) importFrom(graphics,title) -importFrom(parallel,detectCores) -importFrom(parallel,makeCluster) -importFrom(parallel,parLapply) -importFrom(parallel,stopCluster) importFrom(sf,st_as_sf) importFrom(sf,st_bbox) importFrom(sf,st_centroid) @@ -290,4 +287,5 @@ importFrom(tibble,as_tibble) importFrom(tibble,tibble) importFrom(utils,read.table) importFrom(utils,tail) +importFrom(withr,local_dir) useDynLib(SSN2) diff --git a/NEWS.md b/NEWS.md index cc5b261..33c2ec4 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,25 @@ +# SSN2 0.2.0 + +## Major Updates + +* Significant testing, documentation, and auxiliary (e.g., `README.md`) updates as part of a submission to *Journal of Open Source Software*. Relevant issues associated with the review are available at [#11](https://github.com/USEPA/SSN2/issues/11), [#12](https://github.com/USEPA/SSN2/issues/12), [#13](https://github.com/USEPA/SSN2/issues/13), [#14](https://github.com/USEPA/SSN2/issues/14), [#15](https://github.com/USEPA/SSN2/issues/15), [#16](https://github.com/USEPA/SSN2/issues/16), [#17](https://github.com/USEPA/SSN2/issues/17), [#20](https://github.com/USEPA/SSN2/issues/20), [#21](https://github.com/USEPA/SSN2/issues/21). The review is [linked here](https://github.com/openjournals/joss-reviews/issues/6389). +* Added support for geopackage file formats in the `.ssn` folder that is accessed when importing SSN objects via `ssn_import()`. + +## Minor Updates + +* Added `ssn_names()` to return column names in the `edges`, `obs`, and `preds` elements of an SSN object. +* Changed `Matrix::rankMatrix(X, method = "tolNorm2")` to `Matrix::rankMatrix(X, method = "qr")` to enhance stability when determining linear independence in `X`, the design matrix of explanatory variables. +* Replaced an error message with a warning message when `X` has perfect collinearities (i.e., is not full rank). +* Removed `format_additive` argument from `ssn_import()` because of transition to geopackage support, which eliminates the need to convert additive function values to text. +* Added the `create_netgeom()` function to create the network geometry column for the `edges`, `obs`, and `preds` elements in an SSN object. +* Minor vignette updates. +* Minor documentation updates. + +## Bug Fixes + +* Fixed a bug in `SSN_to_SSN2()` that caused an error using `ssn_write()` with no prediction sites. +* Replaced `names.SSN()` with `ssn_names()`, as `names.SSN()` prevented proper naming of elements in the SSN object. + # SSN2 0.1.1 ## Minor Updates diff --git a/R/SSN2.R b/R/SSN2.R index 71a169a..b93c144 100644 --- a/R/SSN2.R +++ b/R/SSN2.R @@ -7,7 +7,6 @@ #' @importFrom RSQLite dbConnect dbDisconnect dbExistsTable dbListTables dbReadTable dbRemoveTable dbWriteTable SQLite #' @importFrom generics tidy glance augment #' @importFrom graphics abline legend par points title -#' @importFrom parallel detectCores makeCluster parLapply stopCluster #' @importFrom sf st_as_sf st_bbox st_centroid st_coordinates st_crs st_delete st_drop_geometry #' st_geometry_type st_intersects st_read st_set_crs st_set_geometry st_write #' @importFrom spmodel AICc covmatrix dispersion_initial dispersion_params glances loocv pseudoR2 randcov_initial randcov_params varcomp @@ -18,4 +17,7 @@ #' reformulate rbeta rgamma rnbinom rnorm rpois rstandard terms var vcov #' @importFrom tibble tibble as_tibble #' @importFrom utils read.table tail +#' @importFrom withr local_dir NULL + +# #' @importFrom parallel detectCores makeCluster parLapply stopCluster diff --git a/R/SSN_to_SSN2.R b/R/SSN_to_SSN2.R index 0ff9d9e..25e6138 100644 --- a/R/SSN_to_SSN2.R +++ b/R/SSN_to_SSN2.R @@ -4,12 +4,6 @@ #' created in the SSN package to an S3 \code{SSN} object used in the #' SSN2 package. #' @param object A SpatialStreamNetwork object -#' @param edge_additive A character vector of additive function value -#' column names found in edges. Default is NULL. See Details for -#' more information. -#' @param site_additive A character vector of additive function value -#' column names found in the observed sites and prediction -#' sites. See Details for more information. Default is NULL. #' @details \command{SSN_to_SSN2()} has been made available to help users #' migrate from the \code{SSN} package to the updated \code{SSN2} #' package. It is used to convert existing S4 \code{SpatialStreamNetwork} @@ -18,34 +12,12 @@ #' used to create an S3 \code{SSN} object from data stored locally in a .ssn #' directory. #' -#' Additive function values are used to generate spatial weights for -#' the tail-up covariance function used in \code{ssn_glm}. The range -#' of additive function values are restricted to \eqn{0 \le AFV \le -#' 1}. In the \code{SSN2} package, columns containing additive -#' function values are stored as text, rather than numeric -#' format. This prevents values less than 1 with more than 10 digits -#' from being truncated when writing/reading shapefiles (and their -#' .dbf tables). The columns containing additive function values are -#' specified using the \code{edge_additive} and \code{site_additive} arguments -#' and converted to character format in the \code{SSN} class object -#' returned. The arguments \code{edge_additive} and \code{site_additive} -#' accept a single column name in character format, or a vector -#' containing multiple column names. Note that, column names for -#' additive function values on the edges, sites, and prediction -#' sites may differ. If a column specified in \code{edge_additive} or -#' \code{site_additive} is not present, the function will return a -#' warning, rather than an error. Columns containing additive -#' function values can also be converted to text manually using the -#' \code{\link[base]{formatC}} function, which provides the -#' flexibility needed to store the values with their full precision. -#' -#' @return An S3 \code{SSN} class object, with additive function value -#' columns converted to text format. +#' @return An S3 \code{SSN} class object. #' #' @name SSN_to_SSN2 #' @export -SSN_to_SSN2 <- function(object, edge_additive = NULL, site_additive = NULL) { - +#' +SSN_to_SSN2 <- function(object) { .Deprecated(new = "ssn_import") if (!requireNamespace("sp", quietly = TRUE)) { @@ -65,41 +37,43 @@ SSN_to_SSN2 <- function(object, edge_additive = NULL, site_additive = NULL) { nl.coords <- object@network.line.coords - edges$netgeom <- paste("ENETWORK", - paste("(", - paste( - nl.coords$NetworkID, - nl.coords$SegmentID, - nl.coords$DistanceUpstream, - sep = " " - ), - ")", - sep = "" - ), - sep = " " - ) - - ## Convert additive function values to text - if (!is.null(edge_additive)) { - if (sum(edge_additive %in% colnames(edges)) != length(edge_additive)) { - warning(paste0( - "AFV column '", - edge_additive[!edge_additive %in% colnames(edges)], - "' not found in edges" - ), call. = FALSE) - } else { - for (j in seq_len(length(edge_additive))) { - tmp <- edges[, edge_additive[j]] - tmp <- st_set_geometry(tmp, NULL) - ## tmp<- 'st_geometry<-'(tmp, NULL) - ## edges[,edge_additive[j]]<- as.character(tmp[,1]) - edges[, edge_additive[j]] <- formatC(tmp[, 1], - digits = 254, format = "f", - drop0trailing = TRUE - ) - } - } - } + edges <- create_netgeom(edges, type = "LINESTRING", overwrite = TRUE) + + ## edges$netgeom <- paste("ENETWORK", + ## paste("(", + ## paste( + ## nl.coords$NetworkID, + ## nl.coords$SegmentID, + ## nl.coords$DistanceUpstream, + ## sep = " " + ## ), + ## ")", + ## sep = "" + ## ), + ## sep = " " + ## ) + + ## ## Convert additive function values to text + ## if (!is.null(edge_additive)) { + ## if (sum(edge_additive %in% colnames(edges)) != length(edge_additive)) { + ## warning(paste0( + ## "AFV column '", + ## edge_additive[!edge_additive %in% colnames(edges)], + ## "' not found in edges" + ## ), call. = FALSE) + ## } else { + ## for (j in seq_len(length(edge_additive))) { + ## tmp <- edges[, edge_additive[j]] + ## tmp <- st_set_geometry(tmp, NULL) + ## ## tmp<- 'st_geometry<-'(tmp, NULL) + ## ## edges[,edge_additive[j]]<- as.character(tmp[,1]) + ## edges[, edge_additive[j]] <- formatC(tmp[, 1], + ## digits = 254, format = "f", + ## drop0trailing = TRUE + ## ) + ## } + ## } + ## } ## ------------------------------------------------ @@ -119,47 +93,49 @@ SSN_to_SSN2 <- function(object, edge_additive = NULL, site_additive = NULL) { np.coords <- object@obspoints@SSNPoints[[1]]@network.point.coords np.coords <- cbind(np.coords, sites[, c("ratio", "locID")]) np.coords$pid <- rownames(object@obspoints@SSNPoints[[1]]@network.point.coords) - sites$netgeom <- paste( - "SNETWORK", - paste( - "(", - paste( - np.coords$NetworkID, - np.coords$SegmentID, - np.coords$DistanceUpstream, - np.coords$ratio, - np.coords$pid, - np.coords$locID, - sep = " " - ), - ")", - sep = "" - ), - sep = " " - ) + + sites <- create_netgeom(sites, type = "POINT", overwrite = TRUE) + ## sites$netgeom <- paste( + ## "SNETWORK", + ## paste( + ## "(", + ## paste( + ## np.coords$NetworkID, + ## np.coords$SegmentID, + ## np.coords$DistanceUpstream, + ## np.coords$ratio, + ## np.coords$pid, + ## np.coords$locID, + ## sep = " " + ## ), + ## ")", + ## sep = "" + ## ), + ## sep = " " + ## ) rm(np.coords) - ## Convert additive function values to text - if (!is.null(site_additive)) { - if (sum(site_additive %in% colnames(sites)) != length(site_additive)) { - warning(paste0( - "AFV column '", - site_additive[!site_additive %in% colnames(sites)], - "' not found in observed sites" - ), call. = FALSE) - } else { - for (j in seq_len(length(site_additive))) { - ## tmp<- 'st_geometry<-'(sites[,site_additive[j]], NULL) - tmp <- st_set_geometry(sites[, site_additive[j]], NULL) - ## sites[,site_additive[j]]<- as.character(tmp[,]) - sites[, site_additive[j]] <- formatC(tmp[, ], - digits = 254, format = "f", - drop0trailing = TRUE - ) - } - } - } + ## ## Convert additive function values to text + ## if (!is.null(site_additive)) { + ## if (sum(site_additive %in% colnames(sites)) != length(site_additive)) { + ## warning(paste0( + ## "AFV column '", + ## site_additive[!site_additive %in% colnames(sites)], + ## "' not found in observed sites" + ## ), call. = FALSE) + ## } else { + ## for (j in seq_len(length(site_additive))) { + ## ## tmp<- 'st_geometry<-'(sites[,site_additive[j]], NULL) + ## tmp <- st_set_geometry(sites[, site_additive[j]], NULL) + ## ## sites[,site_additive[j]]<- as.character(tmp[,]) + ## sites[, site_additive[j]] <- formatC(tmp[, ], + ## digits = 254, format = "f", + ## drop0trailing = TRUE + ## ) + ## } + ## } + ## } ## ------------------------------------------------------------- If @@ -187,46 +163,48 @@ SSN_to_SSN2 <- function(object, edge_additive = NULL, site_additive = NULL) { tmp.sf[, c("ratio", "locID")] ) np.coords$pid <- rownames(object@predpoints@SSNPoints[[i]]@network.point.coords) - tmp.sf$netgeom <- paste( - "SNETWORK", - paste( - "(", - paste( - np.coords$NetworkID, - np.coords$SegmentID, - np.coords$DistanceUpstream, - np.coords$ratio, - np.coords$pid, - np.coords$locID, - sep = " " - ), - ")", - sep = "" - ), - sep = " " - ) - - ## Convert additive function values to text - if (!is.null(site_additive)) { - if (sum(site_additive %in% colnames(tmp.sf)) != length(site_additive)) { - warning(paste0( - "AFV column '", - site_additive[!site_additive %in% colnames(tmp.sf)], - "' not found in prediction dataset ", pred.name - ), call. = FALSE) - } else { - for (j in seq_len(length(site_additive))) { - ## tmp<- 'st_geometry<-'(tmp.sf[,site_additive[j]], NULL) - tmp <- st_set_geometry(tmp.sf[, site_additive[j]], NULL) - ## tmp.sf[,site_additive[j]]<- as.character(tmp[,]) - tmp.sf[, site_additive[j]] <- formatC(tmp[, ], - digits = 254, format = "f", - drop0trailing = TRUE - ) - } - } - } + tmp.sf<- create_netgeom(tmp.sf, type = "POINT", overwrite = TRUE) + ## tmp.sf$netgeom <- paste( + ## "SNETWORK", + ## paste( + ## "(", + ## paste( + ## np.coords$NetworkID, + ## np.coords$SegmentID, + ## np.coords$DistanceUpstream, + ## np.coords$ratio, + ## np.coords$pid, + ## np.coords$locID, + ## sep = " " + ## ), + ## ")", + ## sep = "" + ## ), + ## sep = " " + ## ) + + + ## ## Convert additive function values to text + ## if (!is.null(site_additive)) { + ## if (sum(site_additive %in% colnames(tmp.sf)) != length(site_additive)) { + ## warning(paste0( + ## "AFV column '", + ## site_additive[!site_additive %in% colnames(tmp.sf)], + ## "' not found in prediction dataset ", pred.name + ## ), call. = FALSE) + ## } else { + ## for (j in seq_len(length(site_additive))) { + ## ## tmp<- 'st_geometry<-'(tmp.sf[,site_additive[j]], NULL) + ## tmp <- st_set_geometry(tmp.sf[, site_additive[j]], NULL) + ## ## tmp.sf[,site_additive[j]]<- as.character(tmp[,]) + ## tmp.sf[, site_additive[j]] <- formatC(tmp[, ], + ## digits = 254, format = "f", + ## drop0trailing = TRUE + ## ) + ## } + ## } + ## } pred.list[i] <- list(tmp.sf) names(pred.list)[i] <- pred.name @@ -237,6 +215,7 @@ SSN_to_SSN2 <- function(object, edge_additive = NULL, site_additive = NULL) { ## ----------------------------------------------- ## Construct SSN object ## ----------------------------------------------- + ssnlist <- list( edges = edges, obs = sites @@ -245,7 +224,7 @@ SSN_to_SSN2 <- function(object, edge_additive = NULL, site_additive = NULL) { if (exists("pred.list")) { ssnlist$preds <- pred.list } else { - ssnlist$preds <- NA + ssnlist$preds <- list() } ssnlist$path <- object@path @@ -265,7 +244,5 @@ SSN_to_SSN2 <- function(object, edge_additive = NULL, site_additive = NULL) { # library(SSN2) # # ## Convert S4 SpatialStreamNetworkObject to S3 SSN object -# new_SSN <- SSN_to_SSN2(old_SSN, -# edge_additive = "afvArea", -# site_additive = "afvArea" +# new_SSN <- SSN_to_SSN2(old_SSN) # ) diff --git a/R/Torgegram.R b/R/Torgegram.R index b27589e..cad515e 100644 --- a/R/Torgegram.R +++ b/R/Torgegram.R @@ -135,6 +135,14 @@ Torgegram <- function(formula, ssn.object, new_esv_list } +#' Get a single empirical semivariogram for flow-connected, flow-unconnected, or Euclidean distance +#' +#' @param dist_vector Distance vector +#' @param resid2_vector Residual squared vector +#' @param bins Distance bins +#' @param cutoff Distance cutoff +#' +#' @noRd get_esv <- function(dist_vector, resid2_vector, bins, cutoff) { if (is.null(cutoff)) { cutoff <- max(dist_vector) * 0.5 diff --git a/R/amongSitesDistMat.R b/R/amongSitesDistMat.R index 7ae834f..330f428 100644 --- a/R/amongSitesDistMat.R +++ b/R/amongSitesDistMat.R @@ -1,8 +1,13 @@ +#' Helper function to determining distance matrices among sites +#' +#' @param ssn An SSN object. +#' @param pids A list of pid values for prediction sites +#' @param name The network name (obs or prediction name) +#' @param bin.table A binaryID table for the network. +#' +#' @return A distance matrix +#' @noRd amongSitesDistMat <- function(ssn, pids, name = "obs", bin.table) { - ## ssn = SSN object - ## pids = list of pid values for prediction sites - ## bin.table = binaryID table for the network - site.no <- length(pids) among_distance_matrix <- matrix(NA, nrow = site.no, ncol = site.no) @@ -25,9 +30,9 @@ amongSitesDistMat <- function(ssn, pids, name = "obs", bin.table) { pid.data <- ssn_get_netgeom(ssn$obs[ind.pids, ], c( "pid", "SegmentID", "locID", "DistanceUpstream" - ), reformat = TRUE) + ), reformat = TRUE) - ##pid.data <- as.data.frame(sapply(pid.data, as.numeric)) + ## pid.data <- as.data.frame(sapply(pid.data, as.numeric)) colnames(pid.data) <- c("pid", "rid", "locID", "upDist") } diff --git a/R/anova.R b/R/anova.R index ce080f4..b9a8b6d 100644 --- a/R/anova.R +++ b/R/anova.R @@ -167,6 +167,14 @@ anova.ssn_lm <- function(object, ..., test = TRUE, Terms, L) { #' @export anova.ssn_glm <- anova.ssn_lm + +#' Get marginal (type III) chi-square statistic for ANOVA +#' +#' @param L Matrix of contrasts. +#' @param object Model object. +#' +#' @return A marginal chi-square statistic +#' @noRd get_marginal_Chi2 <- function(L, object) { # make matrix if a numeric vector if (!is.matrix(L)) { @@ -215,6 +223,8 @@ tidy.anova.ssn_glm <- tidy.anova.ssn_lm #' Get relevant L lists for anova #' +#' @description This function iterates through each call to get_L_vector (which is defined below). +#' #' @param assign_index A single assign value from the model matrix #' @param assign_indices The assign values from the model matrix #' @@ -227,6 +237,14 @@ get_L_list <- function(assign_index, assign_indices) { do.call(rbind, L_vectors) } +#' Get relevant L vectors for anova +#' +#' @param assign_index Relevant assign value from the model matrix +#' @param assign_indices The assign values from the model matrix +#' +#' @return L vectors for anova +#' +#' @noRd get_L_vector <- function(assign_val, assign_indices) { L_vector <- matrix(0, nrow = 1, ncol = length(assign_indices)) L_vector[, assign_val] <- 1 diff --git a/R/checks.R b/R/checks.R index 587031a..a7cfb2e 100644 --- a/R/checks.R +++ b/R/checks.R @@ -1,3 +1,11 @@ +#' Various model checks for ssn_lm +#' +#' @param initial_object Initial value object. +#' @param ssn.object SSN object. +#' @param additive Additive function value name. +#' @param estmethod Estimation method. +#' +#' @noRd check_ssn_lm <- function(initial_object, ssn.object, additive, estmethod) { if (is.null(additive)) { if (!grepl("none", class(initial_object$tailup))) { @@ -14,6 +22,14 @@ check_ssn_lm <- function(initial_object, ssn.object, additive, estmethod) { } } +#' Various model checks for ssn_glm +#' +#' @param initial_object Initial value object. +#' @param ssn.object SSN object. +#' @param additive Additive function value name. +#' @param estmethod Estimation method. +#' +#' @noRd check_ssn_glm <- function(initial_object, ssn.object, additive, estmethod) { if (is.null(additive)) { if (!grepl("none", class(initial_object$tailup_initial))) { @@ -30,6 +46,12 @@ check_ssn_glm <- function(initial_object, ssn.object, additive, estmethod) { } } + +#' Check for valid tailup type +#' +#' @param tailup_type The tailup covariance type. +#' +#' @noRd check_tailup_type <- function(tailup_type) { tailup_valid <- c("linear", "spherical", "exponential", "mariah", "epa", "none") @@ -38,6 +60,11 @@ check_tailup_type <- function(tailup_type) { } } +#' Check for valid taildown type +#' +#' @param taildown_type The taildown covariance type. +#' +#' @noRd check_taildown_type <- function(taildown_type) { taildown_valid <- c("linear", "spherical", "exponential", "mariah", "epa", "none") @@ -46,6 +73,11 @@ check_taildown_type <- function(taildown_type) { } } +#' Check for valid Euclidean type +#' +#' @param euclid_type The Euclidean covariance type. +#' +#' @noRd check_euclid_type <- function(euclid_type) { euclid_valid <- c( "spherical", "exponential", "gaussian", "cosine", @@ -58,6 +90,11 @@ check_euclid_type <- function(euclid_type) { } } +#' Check for valid nugget type +#' +#' @param nugget_type The nugget covariance type. +#' +#' @noRd check_nugget_type <- function(nugget_type) { nugget_valid <- c("nugget", "none") if (!(nugget_type %in% nugget_valid)) { @@ -65,6 +102,12 @@ check_nugget_type <- function(nugget_type) { } } +#' Check for valid ssn_glm family and dispersion parameter +#' +#' @param family The ssn_glm family. +#' @param dispersion The dispersion parameter. +#' +#' @noRd check_dispersion <- function(family, dispersion) { # family must be a character here family_valid <- c("binomial", "poisson", "nbinomial", "Gamma", "inverse.gaussian", "beta") @@ -78,6 +121,13 @@ check_dispersion <- function(family, dispersion) { } } +#' Various checks on the response variable in ssn_glm +#' +#' @param family The ssn_glm family. +#' @param y The response variable. +#' @param size The number of trials (if family is binomial) +#' +#' @noRd response_checks_glm <- function(family, y, size) { # checks on y if (family == "binomial") { @@ -121,7 +171,13 @@ response_checks_glm <- function(family, y, size) { } } -# check if whole number + +#' Check if value is a whole number. +#' +#' @param x A vector. +#' @param tol Tolerance to check whether x is a whole number. +#' +#' @noRd is.wholenumber <- function(x, tol = .Machine$double.eps^0.5) { abs(x - round(x)) < tol } diff --git a/R/cov_betahat_adjust.R b/R/cov_betahat_adjust.R index d4ca315..5065cd2 100644 --- a/R/cov_betahat_adjust.R +++ b/R/cov_betahat_adjust.R @@ -1,3 +1,16 @@ +#' Specify spatial indexing covariance matrix adjustment. Not currently +#' relevant as spatial indexing has not yet been implemented. +#' +#' @param invcov_betahat_list Placeholder. +#' @param betahat_list Placeholder. +#' @param betahat Placeholder. +#' @param eigenprods_list Placeholder. +#' @param data_object Placeholder. +#' @param params_object Placeholder. +#' @param cov_betahat_noadjust Placeholder. +#' @param var_adjust Placeholder. +#' +#' @noRd cov_betahat_adjust <- function(invcov_betahat_list, betahat_list, betahat, eigenprods_list, data_object, params_object, cov_betahat_noadjust, var_adjust) { diff --git a/R/cov_initial_search.R b/R/cov_initial_search.R index b5579ff..7a45202 100644 --- a/R/cov_initial_search.R +++ b/R/cov_initial_search.R @@ -1,3 +1,12 @@ +#' Find initial values for optimization. +#' +#' @param initial_NA_object Initial object with relevant NAs to indicate which require estimation. +#' @param ssn.object SSN object. +#' @param data_object Data object. +#' @param estmethod Estimation method. +#' +#' @return Initial values +#' @noRd cov_initial_search <- function(initial_NA_object, ssn.object, data_object, estmethod) { # find the initial "new" sample variance s2 <- data_object$s2 @@ -104,6 +113,16 @@ cov_initial_search <- function(initial_NA_object, ssn.object, data_object, estme +#' Evaluate initial values +#' +#' @param cov_grid_split A set of initial values. +#' @param initial_NA_object Initial object with relevant NAs to indicate which require estimation. +#' @param ssn.object SSN object. +#' @param data_object Data object. +#' @param estmethod Estimation method. +#' +#' @return The minus twice log likelihood +#' @noRd eval_grid <- function(cov_grid_split, initial_NA_object, ssn.object, data_object, estmethod) { # turn list element into a vector cov_grid <- unlist(cov_grid_split) @@ -118,6 +137,14 @@ eval_grid <- function(cov_grid_split, initial_NA_object, ssn.object, data_object get_minustwologlik(gll_prods, data_object, estmethod) } +#' Replace initial values with known values. +#' +#' @param cov_grid A grid of initial values. +#' @param initial_object An initial object. +#' @param data_object Data object. +#' +#' @return Initial values with known values replaced. +#' @noRd cov_grid_replace <- function(cov_grid, initial_object, data_object) { # replace each relevant component with a known value if applicable diff --git a/R/cov_initial_search_glm.R b/R/cov_initial_search_glm.R index b17500b..e52f23d 100644 --- a/R/cov_initial_search_glm.R +++ b/R/cov_initial_search_glm.R @@ -1,3 +1,12 @@ +#' Find initial values for optimization. +#' +#' @param initial_NA_object Initial object with relevant NAs to indicate which require estimation. +#' @param ssn.object SSN object. +#' @param data_object Data object. +#' @param estmethod Estimation method. +#' +#' @return Initial values +#' @noRd cov_initial_search_glm <- function(initial_NA_object, ssn.object, data_object, estmethod) { # find the initial "new" sample variance s2 <- data_object$s2 @@ -111,6 +120,16 @@ cov_initial_search_glm <- function(initial_NA_object, ssn.object, data_object, e best_params <- list(initial_object = updated_NA_object) } +#' Evaluate initial values +#' +#' @param cov_grid_split A set of initial values. +#' @param initial_NA_object Initial object with relevant NAs to indicate which require estimation. +#' @param ssn.object SSN object. +#' @param data_object Data object. +#' @param estmethod Estimation method. +#' +#' @return The minus twice log likelihood +#' @noRd eval_grid_glm <- function(cov_grid_split, initial_NA_object, ssn.object, data_object, estmethod) { cov_grid <- unlist(cov_grid_split) # params object @@ -123,6 +142,14 @@ eval_grid_glm <- function(cov_grid_split, initial_NA_object, ssn.object, data_ob get_minustwolaploglik(lapll_prods, data_object, estmethod) } +#' Replace initial values with known values. +#' +#' @param cov_grid A grid of initial values. +#' @param initial_object An initial object. +#' @param data_object Data object. +#' +#' @return Initial values with known values replaced. +#' @noRd cov_grid_replace_glm <- function(cov_grid, initial_object, data_object) { if (!is.na(initial_object$tailup_initial$initial[["de"]])) { cov_grid[, "tailup_de"] <- initial_object$tailup_initial$initial[["de"]] diff --git a/R/cov_matrix.R b/R/cov_matrix.R index e675c1d..e6f64bb 100644 --- a/R/cov_matrix.R +++ b/R/cov_matrix.R @@ -2,6 +2,15 @@ # GENERIC SETUP ############################################################################### + +#' Create a covariance matrix +#' +#' @param params Parameter object. +#' @param dist_object Distance matrix object. +#' @param ... Additional arguments +#' +#' @return A covariance matrix +#' @noRd cov_matrix <- function(params, dist_object, ...) { UseMethod("cov_matrix", params) } diff --git a/R/cov_vector.R b/R/cov_vector.R index 6e25ef4..b2fc2e2 100644 --- a/R/cov_vector.R +++ b/R/cov_vector.R @@ -2,6 +2,14 @@ # GENERIC SETUP COVARIANCES ############################################################################### +#' Create a covariance vector +#' +#' @param params Parameter object. +#' @param dist_pred_object Prediction distance matrix object. +#' @param ... Additional arguments +#' +#' @return A covariance vector +#' @noRd cov_vector <- function(params, dist_pred_object, ...) { UseMethod("cov_vector", params) } diff --git a/R/createBinaryID.R b/R/createBinaryID.R index a0f451b..df5b623 100644 --- a/R/createBinaryID.R +++ b/R/createBinaryID.R @@ -1,3 +1,10 @@ +#' Create the binary ID database. +#' +#' @param ssn SSN object +#' @param overwrite Should binaryID.db be deleted if it already exists? +#' +#' @return binary ID database +#' @noRd createBinaryID <- function(ssn, overwrite) { ## If binaryID.db exists if (file.exists("binaryID.db") == TRUE) { diff --git a/R/create_netgeom.R b/R/create_netgeom.R new file mode 100644 index 0000000..b051341 --- /dev/null +++ b/R/create_netgeom.R @@ -0,0 +1,143 @@ +#' @title Create netgeom column in SSN object +#' +#' @description Create netgeom column for edges, observed sites, +#' and/or prediction sites in a Landscape Network (LSN). +#' +#' @param sf_data An \code{sf} object with LINESTING or POINT geometry +#' created using \code{link{lsn_to_ssn}} (see Details). +#' @param type Character string defining geometry type of +#' \code{sf_data}. Default = \code{NULL}. +#' @param overwrite Logical indicating whether existing data should be +#' overwritten if present. Default = \code{FALSE}. +#' +#' @details Most users will not need to run \code{create_netgeom} +#' themselves because it is called internally when \code{lsn_to_ssn} +#' is run or an \code{SSN} is imported using +#' \code{link[SSN2]{ssn_import}} found in the \code{SSN2} +#' package. For users who do wish to run \code{create_netgeom}, the +#' \code{sf_data} object must represent edges, observed sites, or +#' prediction sites in a \code{SSN} object created using +#' \code{link{lsn_to_ssn}}. +#' +#' The netgeom column contains information in character format used +#' to describe the topology of the LSN. The format and content of +#' the netgeom column differs depending on whether \code{sf_data} +#' contains LINESTRING (edges) or POINT (observed or prediction +#' sites) geometry. For edges, the netgeom format is: +#' \itemize{ +#' \item{\code{'ENETWORK (netID, rid, upDist)'}} +#' } +#' +#' +#' For observed or prediction sites, the netgeom format is: +#' \itemize{ +#' \item{\code{'SNETWORK (netID, rid, upDist, ratio, pid, locID)'}} +#' } +#' +#' The rid, ratio, upDist, netID, pid, and locID columns must be +#' present in \code{sf_data} before netgeom is added. +#' +#' If \code{overwrite = TRUE} and a column named netgeom is present in +#' \code{sf_data}, the data will be overwritten. Default = FALSE. +#' +#' @return An \code{sf} object containing the original data from +#' \code{sf_data} and an additional column named netgeom. +#' +#' @export +#' @examples +#' ## Create local temporary copy of MiddleFork04.ssn found in +#' ## the SSN2 package. Only necessary for this example. +#' copy_lsn_to_temp() +#' +#' # Import the SSN object with prediction points, pred1km +#' mf04<- ssn_import( +#' paste0(tempdir(), "/MiddleFork04.ssn"), +#' predpts = c("pred1km"), +#' overwrite = TRUE +#' ) +#' +#' # Recalculate the netgeom column for the observed sites +#' sf_obs <- create_netgeom( +#' mf04$obs, +#' type = "POINT", +#' overwrite = TRUE +#' ) +#' +#' # Recalculate the netgeom column for the edges +#' sf_edges <- create_netgeom( +#' mf04$edges, +#' type = "LINESTRING", +#' overwrite = TRUE +#' ) +create_netgeom <- function(sf_data, type = NULL, overwrite = FALSE) { + ## check sf object + if (!inherits(sf_data, "sf")) { + stop("sf_data must be an sf object.", call. = FALSE) + } + + ## Can we overwrite netgeom if it exists? + if ("netgeom" %in% colnames(sf_data) & overwrite == FALSE) { + stop("netgeom column is present in sf_data and overwrite = FALSE") + } + + ## Check type argument + if (type != "POINT" & type != "LINESTRING") { + stop("type argument must be set to POINT or LINESTRING") + } + + if (type == "POINT") { + ## Check that columns exist + c.names <- c("netID", "rid", "upDist", "ratio", "pid", "locID") + ind <- !c.names %in% colnames(sf_data) + if (sum(ind) > 0) { + stop(paste0("columns ", paste(c.names[ind], collapse = ", "), + " not found in sf_data. Is sf_data part of an SSN object and does it have POINT geometry?")) + } + ## Create netgeom + sf_data[, "netgeom"] <- paste0("SNETWORK (", paste( + sf_data$netID, sf_data$rid, sf_data$upDist, + sf_data$ratio, sf_data$pid, sf_data$locID + ), ")", sep = "") + } + if (type == "LINESTRING") { + ## Check that columns exist + c.names <- c("netID", "rid", "upDist") + ind <- !c.names %in% colnames(sf_data) + if (sum(ind) > 0) { + stop(paste0("columns ", paste(c.names[ind], collapse = ", "), + " not found in sf_data. Is sf_data part of an SSN object and does it have LINESTRING geometry?")) + } + + ## Create netgeom + sf_data[, "netgeom"] <- paste0("ENETWORK (", paste( + sf_data$netID, + sf_data$rid, + sf_data$upDist + ), + ")", + sep = "" + ) + } + return(sf_data) ## Return sf data.frame with netgeom column added +} + + + + + + + +# The rid, upDist and netID columns must already be present in edges +# before netgeom is added. These columns are created using +# \code{link{lines_to_lsn}}, \code{\link{updist_edges}}, and +# \code{link{lsn_to_ssn}}, respectively. +# +# For observed or prediction sites, the netgeom format is: +# \itemize{ +# \item{\code{'SNETWORK (netID, rid, upDist, ratio, pid, locID)'}} +# } +# +# The rid, ratio, upDist, netID, pid, and locID columns must be +# present in \code{sf_data} and are created using \code{SSNbler} package functions +# \code{sites_to_lsn}, \code{updist_sites}, and +# \code{ssn_assemble}, respectively. diff --git a/R/create_netgeometry.R b/R/create_netgeometry.R deleted file mode 100644 index 3def30e..0000000 --- a/R/create_netgeometry.R +++ /dev/null @@ -1,19 +0,0 @@ -## Add netgeom column to sf data.frame sf_data -create_netgeom <- function(sf_data, type = NULL) { - if (type == "point") { - sf_data[, "netgeom"] <- paste0("SNETWORK (", paste( - sf_data$netID, sf_data$rid, sf_data$upDist, - sf_data$ratio, sf_data$pid, sf_data$locID - ), ")", sep = "") - } else { - sf_data[, "netgeom"] <- paste0("ENETWORK (", paste( - sf_data$netID, - sf_data$rid, - sf_data$upDist - ), - ")", - sep = "" - ) - } - return(sf_data) ## Return sf data.frame with netgeom column added -} diff --git a/R/data.R b/R/data.R index 69ac620..20af008 100644 --- a/R/data.R +++ b/R/data.R @@ -4,24 +4,22 @@ #' and topological information needed to construct an SSN object using #' the \code{SSN2} package. #' -#' The \code{MiddleFork04.ssn} folder contains five shapefiles: +#' The \code{MiddleFork04.ssn} folder contains five geopackages: #' #' \itemize{ -#' \item edges: polyline shapefile representing the stream network -#' \item sites: point shapefile representing the observed site locations -#' \item pred1km: point shapefile representing prediction site locations at +#' \item edges: geopackage with LINESTRING geometry representing the stream network +#' \item sites: geopackage with POINT geometry representing the observed site locations +#' \item pred1km: geopackage with POINT geometry prediction site locations at #' approximately 1km intervals throughout the stream network -#' \item Knapp: point shapefile representing prediction site locations on the Knapp River -#' \item CapeHorn: point shapefile representing prediction site locations on the Cape Horn River +#' \item CapeHorn: geopackage with POINT geometry representing prediction site locations on the Cape Horn River #' } #' -#' The \code{MiddleFork04.ssn} includes one text file, \code{netID1.txt}, which contains the -#' topological information for the stream network in the Middle Fork 2004 +#' The \code{MiddleFork04.ssn} includes two text files, \code{netID1.txt} and \code{netID2.txt}, which contain the +#' topological information for the two stream networks in the Middle Fork 2004 #' dataset. #' #' The distance folder contains four folders that store the hydrologic -#' distance matrices for each of the point shapefiles (\code{obs}, \code{CapeHorn}, -#' \code{Knapp}, and \code{pred1km}). See [ssn_create_distmat()] for a +#' distance matrices for each of the point datasets (\code{obs}, \code{CapeHorn}, and \code{pred1km}). See [ssn_create_distmat()] for a #' detailed description of the distance matrix file structure. #' #' Attribute data is also stored within each of the spatial @@ -29,16 +27,16 @@ #' #' \code{edges}: #' \itemize{ +#' \item rid: Reach identifier #' \item COMID: Common identifier of an NHD feature or relationship #' \item GNIS_Name: Feature name as found in the Geographic Names Information System #' \item REACHCODE: Unique identifier for a reach. The first 8 digits contain the identfier for the HUC8 and the last 6 digits are a unique within-HUC8 identifier for the reach #' \item FTYPE: three-digit integer used to classify hydrography features in the NHD and define subtypes #' \item FCODE: Numeric code that contains the feature type and its attributes as found in the NHDFCode lookup table -#' \item CDRAINAG: Cumulative drainage area (km2) for the lowermost location on the edge #' \item AREAWTMAP: Area weighted mean annual precipitation (mm) at the lowermost location on the edge #' \item SLOPE: Slope of the edge (cm/cm) -#' \item h2oAreaKm2: Watershed area (km2) for the lowermost location on the line segment -#' \item rid: Reach identifier +#' \item rcaAreaKm2: Reach contributing area (km2) for each edge feature. The RCA represents the land area that drains directly to the edge feature +#' \item h2oAreaKm2: Watershed area (km2) for the lowermost location on the edge feature #' \item areaPI: Segment proportional influence value, calculated using watershed area (h2oAreaKm2) #' \item afvArea: Additive function value, calculated using areaPI #' \item upDist: Distance from the stream outlet (most downstream location in the the stream network) to the uppermost location on the line segment @@ -47,9 +45,10 @@ #' } #' \code{sites}: #' \itemize{ +#' \item rid: Reach identifier of the edge the site resides on +#' \item pid: Point identifier #' \item STREAMNAME: Stream name #' \item COMID: Common identifier of an NHD feature or relationship -#' \item CDRAINAG: Cumulative drainage area (km2) #' \item AREAWTMAP: Area weighted mean annual precipitation (mm) at lowermost #' location on the line segment where the site resides #' \item SLOPE: Slope of the line segment (cm/cm) where the site resides @@ -69,34 +68,33 @@ #' from 1980-2009 across 10 COOP air stations within the domain. #' MWMT = maximum 7-day moving average of the maximum daily temperature #' (i.e. maximum of all the 7-day maximums) -#' \item NEAR_X: x coordinate -#' \item NEAR_Y: y coordinate -#' \item rid: Reach identifier of the edge the site resides on -#' \item ratio: Site ratio value; provides the proportional distance along the edge to the site location +#' \item rcaAreaKm2: Reach contributing area (km2) for each edge feature the site resides on. The RCA represents the land area that drains directly to the edge feature +#' \item h2oAreaKm2: Watershed area (km2) for the lowermost location on the edge feature the site resides on +#' \item ratio: Site ratio value; provides the proportional distance from the downstream node of the edge to the site location #' \item upDist: Distance upstream from the stream outlet (m) #' \item afvArea: Additive function value calculated using waterhsed area (h2oAreaKm2) #' \item locID: Location identifier #' \item netID: Stream network identifier -#' \item pid: Point identifier +#' \item snapdist: Distance (m) that site was moved when it was snapped to the edges #' } #' -#' \code{pred1km}, \code{CapeHorn}, and \code{Knapp}: +#' \code{pred1km} and \code{CapeHorn}: #' \itemize{ +#' \item rid: Reach identifier of the edge the site resides on +#' \item pid: Point identifier #' \item COMID: Common identifier of an NHD feature or relationship #' \item GNIS_Name: Feature name of the edge the site resides on, as found in the Geographic Names Information System -#' \item CDRAINAG: Cumulative drainage area (km2) #' \item AREAWTMAP: Area weighted mean annual precipitation (mm) at lowermost location on the line segment where the site resides #' \item SLOPE: Slope of the line segment (cm/cm) where the site resides #' \item ELEV_DEM: Elevation at the site based on a 30m DEM -#' \item NEAR_X: x coordinate -#' \item NEAR_Y: y coordinate -#' \item rid: Reach identifier of the edge the site resides on +#' \item rcaAreaKm2: Reach contributing area (km2) for each edge feature the site resides on. The RCA represents the land area that drains directly to the edge feature +#' \item h2oAreaKm2: Watershed area (km2) for the lowermost location on the edge feature the site resides on #' \item ratio: Site ratio value; provides the proportional distance along the edge to the site location #' \item upDist: Distance upstream from the stream outlet (m) #' \item afvArea: Additive function value calculated using watershed area (h2oAreaKm2) #' \item locID: Location identifier #' \item netID: Stream network identifier -#' \item pid: Point identifier +#' \item snapdist: Distance (m) that site was moved when it was snapped to the edges #' \item FlowCMS: Average stream flow (cubic meters per sec) for August, by year, from 1950-2010 across 9 USGS gauges in the region #' \item AirMEANc: Average mean air temperature (C) from July 15 - August 31, from 1980-2009 across 10 COOP air stations within the domain #' \item AirMWMTc: Average maximum air temperature (C) from July 15 - August 31, from 1980-2009 across 10 COOP air stations within the domain. MWMT = maximum 7-day moving average of the maximum daily temperature(i.e. maximum of all the 7-day maximums) @@ -104,8 +102,8 @@ #' #' @source \code{edges} are a modified version of the United States #' National Hydrography Dataset -#' (http://nhd.usgs.gov/). \code{sites}, \code{pred1km}, \code{CapeHorn} -#' and \code{Knapp} are unpublished United States Forest Service data. +#' (http://nhd.usgs.gov/). \code{sites}, \code{pred1km} and \code{CapeHorn} +#' are unpublished United States Forest Service data. #' #' @docType data #' diff --git a/R/get.rid.fc.R b/R/get.rid.fc.R index 63fb4d7..a7741b4 100644 --- a/R/get.rid.fc.R +++ b/R/get.rid.fc.R @@ -1,3 +1,10 @@ +#' Get reach ids (in C) +#' +#' @param binIDs Binary ID values in the stream network +#' @param referenceBinID A reference set of Binary ID values in the stream network +#' +#' @return The reach ids are unique ids for each stream segment in the stream network. +#' @noRd get.rid.fc <- function(binIDs, referenceBinID) { ind.match <- .Call("test_fc", binIDs, referenceBinID) data.frame( diff --git a/R/getObsRelationshipsDF.R b/R/getObsRelationshipsDF.R index 9c04982..889fe35 100644 --- a/R/getObsRelationshipsDF.R +++ b/R/getObsRelationshipsDF.R @@ -1,3 +1,15 @@ +#' Get spatial relationships for observed points +#' +#' @param ssn SSN object +#' @param pid integer pid value of interest +#' @param junk data.frame with columns fc (logical) and binaryID +#' @param ind vector indicator saying whether it is a duplicate +#' @param ob data.frame with pid, rid, locID, and binaryID for sites on network ordered by pid +#' @param ob_by_locID data.frame with pid, rid, locID, and binaryID for sites on network +#' ordered by locID factor level (not true locID) +#' @param bin binaryID table +#' +#' @noRd getObsRelationshipsDF <- function(ssn, pid, junk, ind, ob, ob_by_locID, bin) { ## ssn = SpatialStreamNetwork object ## pid = integer pid value of interest diff --git a/R/getPredRelationshipsDF.R b/R/getPredRelationshipsDF.R index b55e8cb..2733bc2 100644 --- a/R/getPredRelationshipsDF.R +++ b/R/getPredRelationshipsDF.R @@ -1,3 +1,14 @@ +#' Get spatial relationships for prediction points +#' +#' @param ssn SSN object +#' @param predpts Index of predpts +#' @param ind Index for pred values on the network +#' @param bin binaryID table for the network +#' @param ob Data frame with pid, rid, locID, and binaryID for sites on the +#' network ordered by pid (ob.i) +#' @param j Row j of data.frame ob +#' +#' @noRd getPredRelationshipsDF <- function(ssn, predpts, ind, bin, ob, j) { ## ssn = SpatialStreamNetwork object ## num = index of predpts in SSN diff --git a/R/get_Torgegram_initial_object.R b/R/get_Torgegram_initial_object.R index 0b134d7..fc07c36 100644 --- a/R/get_Torgegram_initial_object.R +++ b/R/get_Torgegram_initial_object.R @@ -1,3 +1,8 @@ +#' Get initial object for use with Torgegram calculations +#' +#' @param type The Torgegram type (flow-connected, flow-unconnected, Euclidean) +#' +#' @noRd get_Torgegram_initial_object <- function(type) { if (any(!type %in% c("flowcon", "flowuncon", "euclid"))) { stop("All elements of type must be \"flowcon\", \"flowuncon\", or \"euclid\".", call. = FALSE) diff --git a/R/get_anisotropy_corrected.R b/R/get_anisotropy_corrected.R index bae082b..95309ba 100644 --- a/R/get_anisotropy_corrected.R +++ b/R/get_anisotropy_corrected.R @@ -1,3 +1,11 @@ +#' Correct anisotropy argument if initial values specified +#' +#' @param anisotropy Anisotropy argument +#' @param initial_object Initial object +#' +#' @return A corrected anisotropy argument. Values specified in the initial object +#' take precedence over the anisotropy argument. +#' @noRd get_anisotropy_corrected <- function(anisotropy, initial_object) { if (inherits(initial_object$euclid_initial, "euclid_none")) { anisotropy <- FALSE diff --git a/R/get_cholprods.R b/R/get_cholprods.R index d2681ac..bd405fd 100644 --- a/R/get_cholprods.R +++ b/R/get_cholprods.R @@ -15,6 +15,7 @@ get_cholprods <- function(cov_matrix, X, y) { list(Sig_lowchol = Sig_lowchol, SqrtSigInv_X = SqrtSigInv_X, SqrtSigInv_y = SqrtSigInv_y) } +# A vectorized version of get_cholprods get_cholprods_parallel <- function(cluster_list) { cov_matrix <- cluster_list$c X <- cluster_list$x @@ -22,7 +23,7 @@ get_cholprods_parallel <- function(cluster_list) { get_cholprods(cov_matrix, X, y) } -#' Find relevant Cholesky quantities (upper, lower, products, and inverse) +#' Find relevant Cholesky quantities (upper, lower, products, and inverse) for glms #' #' @param cov_matrix A covariance matrix #' @param X A model matrix @@ -45,6 +46,7 @@ get_cholprods_glm <- function(cov_matrix, X, y) { ) } +# A vectorized version of get_cholprods_glm get_cholprods_glm_parallel <- function(cluster_list) { cov_matrix <- cluster_list$c X <- cluster_list$x diff --git a/R/get_cov_matrix.R b/R/get_cov_matrix.R index 200d28c..9f64835 100644 --- a/R/get_cov_matrix.R +++ b/R/get_cov_matrix.R @@ -1,3 +1,16 @@ +#' A helper to get the overall covariance matrix. +#' +#' @param params_object Parameter object. +#' @param dist_object_oblist The distance matrices for the observed data. +#' @param randcov_list Random effect list. +#' @param partition_list Partition factor list. +#' @param anisotropy Whether there is anisotropy. +#' @param de_scale A scaling parameter on the de parameter that helps ensure +#' numeric stability of the covariance matrix. +#' @param diagtol A tolerance added to the diagonal of the covariance matrix +#' that helps ensure numeric stability of the covariance matrix. +#' +#' @noRd get_cov_matrix <- function(params_object, dist_object_oblist, randcov_list = NULL, partition_list = NULL, anisotropy, de_scale, diagtol) { @@ -24,6 +37,7 @@ get_cov_matrix <- function(params_object, dist_object_oblist, cov_matrix } +# A vectorized version of get_cov_matrix get_cov_matrix_list <- function(params_object, data_object) { # this is a list of elements diff --git a/R/get_cov_vector.R b/R/get_cov_vector.R index 786fee6..2e5e7ee 100644 --- a/R/get_cov_vector.R +++ b/R/get_cov_vector.R @@ -1,3 +1,13 @@ +#' A helper to get the overall covariance matrix. +#' +#' @param params_object Parameter object. +#' @param dist_pred_object The distance matrices between observed and prediction data. +#' @param data The data. +#' @param newdata The prediction data. +#' @param partition_factor The name of the partition factor. +#' @param anisotropy Whether there is anisotropy. +#' +#' @noRd get_cov_vector <- function(params_object, dist_pred_object, data, newdata, partition_factor = NULL, anisotropy) { tailup_none <- inherits(params_object$tailup, "tailup_none") taildown_none <- inherits(params_object$taildown, "taildown_none") @@ -20,6 +30,13 @@ get_cov_vector <- function(params_object, dist_pred_object, data, newdata, parti cov_vector } +#' Compute Euclidean covariance for prediction and a possible adjustment to Euclidean covariance for anisotropy +#' +#' @param params The Euclidean covariance parameters +#' @param dist_pred_object The distance matrices between observed and prediction data. +#' @param anisotropy Whether there is anisotropy. +#' +#' @noRd get_euclid_pred <- function(params, dist_pred_object, anisotropy) { if (anisotropy) { new_coords_observed <- transform_anis( diff --git a/R/get_data_object.R b/R/get_data_object.R index 1296ac3..08a356a 100644 --- a/R/get_data_object.R +++ b/R/get_data_object.R @@ -1,3 +1,19 @@ +#' Get the data object for use with many other functions. +#' +#' @param formula Model formula. +#' @param ssn.object SSN object. +#' @param additive Name of the additive function value column. +#' @param anisotropy Whether there is anisotropy. +#' @param initial_object The initial value object. +#' @param random Random effect formula. +#' @param randcov_initial The initial random effect object. +#' @param partition_factor Partition factor formula. +#' @param local Spatial indexing argument (not yet implemented) +#' @param ... Additional arguments +#' +#' @return The data object that contains various pieces of important information +#' required for modeling. +#' @noRd get_data_object <- function(formula, ssn.object, additive, anisotropy, initial_object, random, randcov_initial, partition_factor, local, ...) { sf_column_name <- attributes(ssn.object$obs)$sf_column @@ -37,9 +53,9 @@ get_data_object <- function(formula, ssn.object, additive, anisotropy, dots$contrasts <- attr(X, "contrasts") xlevels <- .getXlevels(terms_val, obdata_model_frame) # find p - p <- as.numeric(Matrix::rankMatrix(X)) + p <- as.numeric(Matrix::rankMatrix(X, method = "qr")) if (p < NCOL(X)) { - stop("Perfect collinearities detected in X. Remove redundant predictors.", call. = FALSE) + warning("There are perfect collinearities detected in X (the matrix of explanatory variables). This may make the model fit unreliable or may cause an error while model fitting. Consider removing redundant explanatory variables and refitting the model.", call. = FALSE) } # find sample size n <- NROW(X) diff --git a/R/get_data_object_glm.R b/R/get_data_object_glm.R index 2147ae8..ae082bd 100644 --- a/R/get_data_object_glm.R +++ b/R/get_data_object_glm.R @@ -1,3 +1,19 @@ +#' Get the glm data object for use with many other functions. +#' +#' @param formula Model formula. +#' @param ssn.object SSN object. +#' @param additive Name of the additive function value column. +#' @param anisotropy Whether there is anisotropy. +#' @param initial_object The initial value object. +#' @param random Random effect formula. +#' @param randcov_initial The initial random effect object. +#' @param partition_factor Partition factor formula. +#' @param local Spatial indexing argument (not yet implemented) +#' @param ... Additional arguments +#' +#' @return The glm data object that contains various pieces of important information +#' required for modeling. +#' @noRd get_data_object_glm <- function(formula, ssn.object, family, additive, anisotropy, initial_object, random, randcov_initial, partition_factor, local, ...) { sf_column_name <- attributes(ssn.object$obs)$sf_column @@ -37,9 +53,9 @@ get_data_object_glm <- function(formula, ssn.object, family, additive, anisotrop dots$contrasts <- attr(X, "contrasts") xlevels <- .getXlevels(terms_val, obdata_model_frame) # find p - p <- as.numeric(Matrix::rankMatrix(X)) + p <- as.numeric(Matrix::rankMatrix(X, method = "qr")) if (p < NCOL(X)) { - stop("Perfect collinearities detected in X. Remove redundant predictors.", call. = FALSE) + warning("There are perfect collinearities detected in X (the matrix of explanatory variables). This may make the model fit unreliable or may cause an error while model fitting. Consider removing redundant explanatory variables and refitting the model.", call. = FALSE) } # find sample size n <- NROW(X) diff --git a/R/get_dist_object.R b/R/get_dist_object.R index 8da4739..850885f 100644 --- a/R/get_dist_object.R +++ b/R/get_dist_object.R @@ -1,4 +1,12 @@ -# parent function to get the distance object +#' Get the distance matrix oibject +#' +#' @param ssn.object SSN object. +#' @param initial_object Initial value object. +#' @param additive Name of the additive function value column. +#' @param anisotropy Whether there is anisotropy. +#' +#' @return A distance matrix object that contains various distance matrices used in modeling. +#' @noRd get_dist_object <- function(ssn.object, initial_object, additive, anisotropy) { # get netgeom netgeom <- ssn_get_netgeom(ssn.object$obs, reformat = TRUE) @@ -60,7 +68,7 @@ get_dist_object <- function(ssn.object, initial_object, additive, anisotropy) { dist_object } -# implement list structure for when local is implemented (big data) +# a vectorized version of get_dist_object get_dist_object_oblist <- function(dist_object, observed_index, local_index) { # find names of the distance object dist_object_names <- names(dist_object) @@ -87,6 +95,14 @@ get_dist_object_oblist <- function(dist_object, observed_index, local_index) { dist_object_oblist } +#' Get list of full distance matrices +#' +#' @param ssn.object SSN object. +#' @param initial_object Initial value object. +#' @param additive Name of the additive function value column. +#' @param order_list A list of order by pid and network. +#' +#' @noRd get_dist_matlist <- function(ssn.object, initial_object, additive, order_list) { network_index <- order_list$network_index @@ -171,8 +187,14 @@ get_dist_matlist <- function(ssn.object, initial_object, additive, dist_matlist } -# auxiliary functions -# auxiliary functions + +#' Get distance to the nearest junction matrix +#' +#' @param network_index Network index +#' @param ssn.object SSN object +#' @param newdata_name Name of the newdata matrix (if relevant) +#' +#' @noRd get_distjunc_matlist <- function(network_index, ssn.object, newdata_name = NULL) { if (is.null(newdata_name)) { ext <- "obs" @@ -208,7 +230,13 @@ get_distjunc_matlist <- function(network_index, ssn.object, newdata_name = NULL) } -# subset relevant objects in the distance object +#' Subset relevant objects in the distance object +#' +#' @param dist_object_name Name of the relevant element in the distance object +#' @param dist_object Distance object +#' @param index Index of the distance matrix elements for ordering +#' +#' @noRd subset_dist_object <- function(dist_object_name, dist_object, index) { # store the distance object element dist_object_element <- dist_object[[dist_object_name]] @@ -230,7 +258,12 @@ subset_dist_object <- function(dist_object_name, dist_object, index) { dist_sub } -# find the mask matrix list for each network +#' Find the mask matrix list for each network +#' +#' @param distjunc_list Create list of mask matrices (which zero out tailup +#' and taildown covariance for observations on separate networks). +#' +#' @noRd get_mask_matlist <- function(distjunc_list) { mask_list <- lapply(distjunc_list, function(x) { n_i <- dim(x)[[1]] @@ -238,7 +271,11 @@ get_mask_matlist <- function(distjunc_list) { }) } -# find the a matrix list (largest stream distance) for each network +#' Find the a matrix list for each network +#' +#' @param distjunc_list Create list of a matrices (largest stream distance) for each network. +#' +#' @noRd get_a_matlist <- function(distjunc_list) { # find a matrix list a_matlist <- lapply(distjunc_list, function(x) { @@ -247,7 +284,11 @@ get_a_matlist <- function(distjunc_list) { }) } -# find the b matrix list (smallest stream distance) for each network +#' Find the b matrix list for each network +#' +#' @param distjunc_list Create list of a matrices (smallest stream distance) for each network. +#' +#' @noRd get_b_matlist <- function(distjunc_list) { # find a matrix list b_matlist <- lapply(distjunc_list, function(x) { @@ -256,13 +297,21 @@ get_b_matlist <- function(distjunc_list) { }) } -# find the hydrologic distance matrix list for each network +#' Find the hydrologic distance matrix list for each network +#' +#' @param distjunc_list Create list of matrices (hydrologic stream distance) for each network. +#' +#' @noRd get_hydro_matlist <- function(distjunc_list) { # find hydro matrix list hydro_matlist <- lapply(distjunc_list, function(x) x + t(x)) } -# find the additive matrix list for each network +#' Find the w matrix list for each network +#' +#' @param distjunc_list Create list of w matrices (additive function weight) for each network. +#' +#' @noRd get_w_matlist <- function(ssn.object, additive, network_index, dist_order, b_matlist, mask_matlist, newdata_name = NULL) { if (is.null(newdata_name)) { additive_val <- as.numeric(ssn.object$obs[[additive]]) # remember a character here diff --git a/R/get_dist_pred_object.R b/R/get_dist_pred_object.R index 581e88e..0d46a28 100644 --- a/R/get_dist_pred_object.R +++ b/R/get_dist_pred_object.R @@ -1,4 +1,10 @@ -# parent function to get the distance object +#' Get prediction distance object +#' +#' @param object Data object. +#' @param newdata_name Name of the prediction data set. +#' @param initial_object Initial value object. +#' +#' @noRd get_dist_pred_object <- function(object, newdata_name, initial_object) { # get netgeom netgeom <- ssn_get_netgeom(object$ssn.object$obs, reformat = TRUE) @@ -84,10 +90,9 @@ get_dist_pred_object <- function(object, newdata_name, initial_object) { dist_pred_object } -# get the list of prediction matrices +# vectorized version of get_dist_pred_object get_dist_pred_matlist <- function(ssn.object, newdata_name, initial_object, additive, order_list_pred) { - # store network indices and orders network_index <- order_list_pred$network_index dist_order <- order_list_pred$dist_order @@ -185,7 +190,13 @@ get_dist_pred_matlist <- function(ssn.object, newdata_name, initial_object, addi dist_pred_matlist } -# get list of distance junction matrices +#' Get list of prediction distance junction matrices +#' +#' @param ssn.object SSN object. +#' @param newdata_name Name of the prediction data set. +#' @param order_list_pred The order for observations in the prediction data set. +#' +#' @noRd get_distjunc_pred_matlist <- function(ssn.object, newdata_name, order_list_pred) { # check and make sure there is missing data to predict if (newdata_name %in% names(ssn.object$preds) && NROW(ssn.object$preds[[newdata_name]]) == 0) { @@ -303,12 +314,23 @@ get_distjunc_pred_matlist <- function(ssn.object, newdata_name, order_list_pred) distjunc_pred_matlist <- list(distjunca = distjunca, distjuncb = distjuncb) } + +#' Get mask prediction distance matrices +#' +#' @param distjunc_pred_matlist +#' +#' @noRd get_mask_pred_matlist <- function(distjunc_pred_matlist) { mask_pred_list <- lapply(distjunc_pred_matlist$distjunca, function(x) { Matrix::Matrix(1, nrow = dim(x)[1], ncol = dim(x)[2], sparse = TRUE) }) } +#' Get a prediction distance matrices +#' +#' @param distjunc_pred_matlist +#' +#' @noRd get_a_pred_matlist <- function(distjunc_pred_matlist) { a_matrix_list <- mapply( a = distjunc_pred_matlist$distjunca, @@ -320,6 +342,11 @@ get_a_pred_matlist <- function(distjunc_pred_matlist) { ) } +#' Get b prediction distance matrices +#' +#' @param distjunc_pred_matlist +#' +#' @noRd get_b_pred_matlist <- function(distjunc_pred_matlist) { a_matrix_list <- mapply( a = distjunc_pred_matlist$distjunca, @@ -331,6 +358,11 @@ get_b_pred_matlist <- function(distjunc_pred_matlist) { ) } +#' Get hydrologic prediction distance matrices +#' +#' @param distjunc_pred_matlist +#' +#' @noRd get_hydro_pred_matlist <- function(distjunc_pred_matlist) { a_matrix_list <- mapply( a = distjunc_pred_matlist$distjunca, @@ -342,12 +374,17 @@ get_hydro_pred_matlist <- function(distjunc_pred_matlist) { ) } +#' Get w prediction distance matrices +#' +#' @param distjunc_pred_matlist +#' +#' @noRd get_w_pred_matlist <- function(ssn.object, newdata_name, order_list_pred, additive, b_pred_matlist, mask_pred_matlist) { # make list network_index_obs <- as.numeric(as.character(order_list_pred$network_index)) network_index_pred <- as.numeric(as.character(order_list_pred$network_index_pred)) network_index_vals <- sort(unique(c(network_index_obs, network_index_pred))) - # network_index_integer <- seq_along(network_index_vals) + # network_index_integer <- seq_along(network_index_vals) dist_order <- order_list_pred$dist_order diff --git a/R/get_dist_predbk_object.R b/R/get_dist_predbk_object.R index 9b18379..581cefa 100644 --- a/R/get_dist_predbk_object.R +++ b/R/get_dist_predbk_object.R @@ -1,4 +1,10 @@ -# parent function to get the distance object +#' Get prediction by prediction distance matrix for block Kriging +#' +#' @param object Model object. +#' @param newdata_name Name of prediction data set. +#' @param initial_object Initial value object. +#' +#' @noRd get_dist_predbk_object <- function(object, newdata_name, initial_object) { # get netgeom netgeom <- ssn_get_netgeom(object$ssn.object$preds[[newdata_name]], reformat = TRUE) @@ -62,6 +68,7 @@ get_dist_predbk_object <- function(object, newdata_name, initial_object) { dist_object } +# vectorized version of get_dist_predbk_object get_dist_predbk_matlist <- function(ssn.object, newdata_name, initial_object, additive, order_list) { network_index <- order_list$network_index diff --git a/R/get_eigenprods.R b/R/get_eigenprods.R index c61aa27..34b1f8e 100644 --- a/R/get_eigenprods.R +++ b/R/get_eigenprods.R @@ -1,3 +1,13 @@ +#' Get eigenproducts for relevant covariance computations +#' +#' @param cov_matrix A covariance matrix. +#' @param X Data model matrix. +#' @param y Data response value. +#' @param ones A vector of ones with the same dimension as y +#' +#' @return A list with various products with eigenvectors and eigenvalues that +#' return products with square root matrices. +#' @noRd get_eigenprods <- function(cov_matrix, X, y, ones) { eig <- eigen(Matrix::forceSymmetric(cov_matrix)) SigInv_p1 <- t(t(eig$vectors) * 1 / eig$values) @@ -17,6 +27,7 @@ get_eigenprods <- function(cov_matrix, X, y, ones) { ) } +# a vectorized version of get_eigenprods get_eigenprods_parallel <- function(cluster_list) { cov_matrix <- cluster_list$c X <- cluster_list$x @@ -25,6 +36,16 @@ get_eigenprods_parallel <- function(cluster_list) { get_eigenprods(cov_matrix, X, y, o) } +#' Get eigenproducts for relevant covariance computations (for glms) +#' +#' @param cov_matrix A covariance matrix. +#' @param X Data model matrix. +#' @param y Data response value. +#' @param ones A vector of ones with the same dimension as y +#' +#' @return A list with various products with eigenvectors and eigenvalues that +#' return products with square root matrices. +#' @noRd get_eigenprods_glm <- function(cov_matrix, X, y, ones) { eig <- eigen(Matrix::forceSymmetric(cov_matrix)) SigInv_p1 <- t(t(eig$vectors) * 1 / eig$values) @@ -46,6 +67,7 @@ get_eigenprods_glm <- function(cov_matrix, X, y, ones) { ) } +# a vectorized version of get_eigenprods_glm get_eigenprods_glm_parallel <- function(cluster_list) { cov_matrix <- cluster_list$c X <- cluster_list$x diff --git a/R/get_model_stats.R b/R/get_model_stats.R index 5923d13..6783762 100644 --- a/R/get_model_stats.R +++ b/R/get_model_stats.R @@ -1,3 +1,10 @@ +#' Get relevant model fit statistics and diagnostics +#' +#' @param cov_est_object Covariance parameter estimation object. +#' @param data_object Data object. +#' @param estmethod Estimation method. +#' +#' @noRd get_model_stats <- function(cov_est_object, data_object, estmethod) { # store the covariance matrix list cov_matrix_list <- get_cov_matrix_list(cov_est_object$params_object, data_object) @@ -143,6 +150,10 @@ get_model_stats <- function(cov_est_object, data_object, estmethod) { ) } +############################################################################### +############### helpers to store relevant statistics as list objects +############################################################################### + get_coefficients <- function(betahat, params_object) { # store fixed effect and random coefficients as list list(fixed = betahat, params_object = params_object) diff --git a/R/get_model_stats_glm.R b/R/get_model_stats_glm.R index 5a361ac..2073c44 100644 --- a/R/get_model_stats_glm.R +++ b/R/get_model_stats_glm.R @@ -1,3 +1,10 @@ +#' Get relevant model fit statistics and diagnostics for glms +#' +#' @param cov_est_object Covariance parameter estimation object. +#' @param data_object Data object. +#' @param estmethod Estimation method. +#' +#' @noRd get_model_stats_glm <- function(cov_est_object, data_object, estmethod) { cov_matrix_list <- get_cov_matrix_list(cov_est_object$params_object, data_object) @@ -212,6 +219,10 @@ get_model_stats_glm <- function(cov_est_object, data_object, estmethod) { ) } +############################################################################### +############### helpers to store relevant statistics as list objects +############################################################################### + get_coefficients_glm <- function(betahat, params_object) { list(fixed = betahat, params_object = params_object) } diff --git a/R/get_sf_obj.R b/R/get_sf_obj.R new file mode 100644 index 0000000..9351d6b --- /dev/null +++ b/R/get_sf_obj.R @@ -0,0 +1,57 @@ +#' Read relevant sf objects from user shalpefiles or geopackages +#' +#' @param fn Path to shapefile or geopackage +#' +#' @noRd +get_sf_obj <- function(fn) { + f.ext.shp <- substr(fn, nchar(fn) - 3, nchar(fn)) == ".shp" + f.ext.gpkg <- substr(fn, nchar(fn) - 4, nchar(fn)) == ".gpkg" + + ## Check if shapefile or geopackage exists---------- + ## If shapefile specified, check if it exists + if (f.ext.shp) { + shp.exists <- file.exists(fn) + gpkg.exists <- FALSE + + fn <- substr(fn, 1, nchar(fn) - 4) + } + ## If geopackage specified, check if it exists + if (f.ext.gpkg) { + shp.exists <- FALSE + gpkg.exists <- file.exists(fn) + + fn <- substr(fn, 1, nchar(fn) - 5) + } + + ## If no file extension specified, check for both + if (!f.ext.shp & !f.ext.gpkg) { + shp.exists <- file.exists(paste0(fn, ".shp")) + gpkg.exists <- file.exists(paste0(fn, ".gpkg")) + } + + ## If both exist without file extension, return error + if (gpkg.exists & shp.exists) { + ## sfobj<- st_read(paste0(fn, ".gpkg"), quiet = TRUE) + stop(paste0( + dirname(fn), " contains both ", basename(fn), ".shp and ", + basename(fn), ".gpkg." + )) + } + ## Read in shapefile + if (shp.exists & !gpkg.exists) { + sfobj <- st_read(paste0(fn, ".shp"), quiet = TRUE) + } + ## Read in geopackage + if (gpkg.exists & !shp.exists) { + sfobj <- st_read(paste0(fn, ".gpkg"), quiet = TRUE) + } + ## Neither exists + if (!gpkg.exists & !shp.exists) { + stop(paste0( + basename(fn), " not found in ", dirname(fn), ". ", + basename(fn), " must reside in path and be in .shp or .gpkg format." + )) + } + + return(sfobj) +} diff --git a/R/gloglik.R b/R/gloglik.R index b27a9f5..d7fa908 100644 --- a/R/gloglik.R +++ b/R/gloglik.R @@ -1,3 +1,11 @@ +#' Compute log likeihood +#' +#' @param par Optimization parameters +#' @param orig2optim_object An optimization object that performs necessary transformations +#' @param data_object Data object +#' @param estmethod Estimation method +#' +#' @noRd gloglik <- function(par, orig2optim_object, data_object, estmethod) { # find parameters on the original scale cov_orig_val <- optim2orig(orig2optim_object, par) diff --git a/R/gloglik_products.R b/R/gloglik_products.R index 1373a5e..7589428 100644 --- a/R/gloglik_products.R +++ b/R/gloglik_products.R @@ -1,3 +1,10 @@ +#' Get products needed to evaluate log likelihood +#' +#' @param params_object Object with covariance parameters. +#' @param data_object Data object +#' @param estmethod Estimation Method +#' +#' @noRd gloglik_products <- function(params_object, data_object, estmethod) { cov_matrix_list <- get_cov_matrix_list(params_object, data_object) # cholesky products (no local) @@ -60,6 +67,13 @@ gloglik_products <- function(params_object, data_object, estmethod) { } } +#' Get minus twice the log likelihood +#' +#' @param gloglik_products The relevant log likelihood products +#' @param data_object Data object +#' @param estmethod Estimation method +#' +#' @noRd get_minustwologlik <- function(gloglik_products, data_object, estmethod) { if (estmethod == "reml") { minustwologlik <- as.numeric(gloglik_products$l1 + gloglik_products$l2 + gloglik_products$l3 + (data_object$n - data_object$p) * log(2 * pi)) diff --git a/R/initial.R b/R/initial.R index afc97b0..cbcd6ee 100644 --- a/R/initial.R +++ b/R/initial.R @@ -220,6 +220,18 @@ nugget_initial <- function(nugget_type, nugget, known) { new_nugget_initial } +#' A helper to create an initial value object +#' +#' @param tailup_type Tailup covariance type +#' @param taildown_type Taildown covariance type +#' @param euclid_type Euclidean covariance type +#' @param nugget_type Nugget covariance type +#' @param tailup_initial Tailup initial object (if specified) +#' @param taildown_initial Taildown initial object (if specified) +#' @param euclid_initial Euclidean initial object (if specified) +#' @param nugget_initial Nugget initial object (if specified) +#' +#' @noRd get_initial_object <- function(tailup_type, taildown_type, euclid_type, nugget_type, tailup_initial, taildown_initial, euclid_initial, nugget_initial) { if (is.null(tailup_initial)) tailup_initial <- tailup_initial(tailup_type) @@ -241,6 +253,20 @@ get_initial_object <- function(tailup_type, taildown_type, euclid_type, nugget_t initial_object } +#' A helper to create an initial value glm object +#' +#' @param tailup_type Tailup covariance type +#' @param taildown_type Taildown covariance type +#' @param euclid_type Euclidean covariance type +#' @param nugget_type Nugget covariance type +#' @param tailup_initial Tailup initial object (if specified) +#' @param taildown_initial Taildown initial object (if specified) +#' @param euclid_initial Euclidean initial object (if specified) +#' @param nugget_initial Nugget initial object (if specified) +#' @param family The generalized linear model family +#' @param dispersion_inital Dispersion initial object +#' +#' @noRd get_initial_object_glm <- function(tailup_type, taildown_type, euclid_type, nugget_type, tailup_initial, taildown_initial, euclid_initial, nugget_initial, family, dispersion_initial) { @@ -261,6 +287,12 @@ get_initial_object_glm <- function(tailup_type, taildown_type, euclid_type, nugg initial_object } +#' Create a vector that specifies whether covariance parameters are to be assumed known +#' +#' @param params A covariance parameter vector +#' @param known Which covariance parameters assumed known +#' +#' @noRd get_is_known <- function(params, known) { if (missing(known)) { is_known <- rep(FALSE, length(params)) diff --git a/R/initial_NA.R b/R/initial_NA.R index 426da34..95f1f1a 100644 --- a/R/initial_NA.R +++ b/R/initial_NA.R @@ -1,3 +1,9 @@ +#' An initial value object with NA values that indicate where estimation is required +#' +#' @param initial_object Initial value object +#' @param data_object Data object +#' +#' @noRd get_initial_NA_object <- function(initial_object, data_object) { # get each initial NA object tailup_initial_NA_val <- tailup_initial_NA(initial_object$tailup_initial) @@ -19,7 +25,11 @@ get_initial_NA_object <- function(initial_object, data_object) { initial_NA_object } -# the tailup initial NA object +#' Tailup initial NA object +#' +#' @param initial An initial value specification +#' +#' @noRd tailup_initial_NA <- function(initial) { tailup_names <- c("de", "range") @@ -37,7 +47,11 @@ tailup_initial_NA <- function(initial) { new_initial } -# the taildown initial NA object +#' Taildown initial NA object +#' +#' @param initial An initial value specification +#' +#' @noRd taildown_initial_NA <- function(initial) { taildown_names <- c("de", "range") @@ -55,7 +69,11 @@ taildown_initial_NA <- function(initial) { new_initial } -# the euclid initial NA object +#' Euclidean initial NA object +#' +#' @param initial An initial value specification +#' +#' @noRd euclid_initial_NA <- function(initial, data_object) { euclid_names <- c("de", "range", "rotate", "scale") @@ -79,6 +97,11 @@ euclid_initial_NA <- function(initial, data_object) { new_initial } +#' Nugget initial NA object +#' +#' @param initial An initial value specification +#' +#' @noRd nugget_initial_NA <- function(initial) { nugget_names <- c("nugget") @@ -96,6 +119,14 @@ nugget_initial_NA <- function(initial) { new_initial } +#' Insert NA values when covariance parameters assumed unknown +#' +#' @param names The names to assume unknown +#' @param val_default The default value of values (NA) +#' @param known_default The default value of whether parameters are known +#' @param initial An initial value specification +#' +#' @noRd insert_initial_NA <- function(names, val_default, known_default, initial) { # find names with known initial values names_replace <- setdiff(names, names(initial$initial)) @@ -110,6 +141,12 @@ insert_initial_NA <- function(names, val_default, known_default, initial) { initial } +#' Insert NA values when random effect parameters assumed unknown +#' +#' @param randcov_initial Random effect initial object +#' @param data_object Data object +#' +#' @noRd randcov_initial_NA <- function(randcov_initial, data_object) { if (is.null(randcov_initial)) { randcov_initial <- NULL @@ -134,6 +171,12 @@ randcov_initial_NA <- function(randcov_initial, data_object) { randcov_initial } +#' An initial value object with NA values that indicate where estimation is required for glms +#' +#' @param initial_object Initial value object +#' @param data_object Data object +#' +#' @noRd get_initial_NA_object_glm <- function(initial_object, data_object) { tailup_initial_NA_val <- tailup_initial_NA(initial_object$tailup_initial) taildown_initial_NA_val <- taildown_initial_NA(initial_object$taildown_initial) @@ -154,6 +197,12 @@ get_initial_NA_object_glm <- function(initial_object, data_object) { initial_NA_object } +#' Insert NA values when dispersion parameters assumed unknown +#' +#' @param initial Initial value object +#' @param data_object Data object +#' +#' @noRd dispersion_initial_NA <- function(initial, data_object) { dispersion_names <- c("dispersion") diff --git a/R/laploglik.R b/R/laploglik.R index ba40ad7..0080240 100644 --- a/R/laploglik.R +++ b/R/laploglik.R @@ -1,3 +1,11 @@ +#' Evaluate Laplace log likelihood +#' +#' @param par Current optimization parameter +#' @param orig2optim_object Optimization object that controls transformations +#' @param data_object Data object +#' @param estmethod Estimation method +#' +#' @noRd laploglik <- function(par, orig2optim_object, data_object, estmethod) { # find covariance parameters on original scale cov_orig_val <- optim2orig_glm(orig2optim_object, par) diff --git a/R/laploglik_products.R b/R/laploglik_products.R index ca9396d..e91f337 100644 --- a/R/laploglik_products.R +++ b/R/laploglik_products.R @@ -1,3 +1,10 @@ +#' Get products involved in Laplace log likelihood (glms) +#' +#' @param params_object Covariance parameter object +#' @param data_object Data object +#' @param estmethod Estimation Method +#' +#' @noRd laploglik_products <- function(params_object, data_object, estmethod) { cov_matrix_list <- get_cov_matrix_list(params_object, data_object) # cholesky products (no local) @@ -81,6 +88,13 @@ laploglik_products <- function(params_object, data_object, estmethod) { } } +#' Get minus twice the Laplace log likelihood +#' +#' @param laploglik_products Laplace log likelihood products +#' @param data_object Data object +#' @param estmethod Estimation method +#' +#' @noRd get_minustwolaploglik <- function(laploglik_products, data_object, estmethod) { if (estmethod == "reml") { minustwolaploglik <- as.numeric(laploglik_products$l00 + laploglik_products$l01 + laploglik_products$l1 + @@ -96,6 +110,18 @@ get_minustwolaploglik <- function(laploglik_products, data_object, estmethod) { } +#' Get prediction and hessian of latent effects +#' +#' @param data_object Data object +#' @param dispersion Dispersion parameter +#' @param SigInv_list List of inverse covariance matrices for each block +#' @param SigInv_X Product between inverse covariance matrix and model matrix +#' @param cov_betahat Covariance of fixed effects +#' @param cov_betahat_Inv Inverse covariance of fixed effects (precision) +#' @param estmethod Estimation method +#' @param ret_mHInv Should -(H)^-1 be retained for later use? +#' +#' @noRd get_w_and_H <- function(data_object, dispersion, SigInv_list, SigInv_X, cov_betahat, cov_betahat_Inv, estmethod, ret_mHInv = FALSE) { family <- data_object$family SigInv <- Matrix::bdiag(SigInv_list) @@ -230,7 +256,15 @@ get_w_and_H <- function(data_object, dispersion, SigInv_list, SigInv_X, cov_beta w_and_H_list } -# gradient of w (lowcase d) +#' Get the gradient of w (called d) +#' +#' @param family The glm family +#' @param w The latent effect (link mean) +#' @param y The response +#' @param size Number of trails (for binomial response) +#' @param dispersion Dispersion parameter +#' +#' @noRd get_d <- function(family, w, y, size, dispersion) { if (family == "poisson") { d <- -exp(w) + y @@ -251,7 +285,15 @@ get_d <- function(family, w, y, size, dispersion) { d } -# Hessian of w (cap D) +#' Get the Hessian of w (called D) +#' +#' @param family The glm family +#' @param w The latent effect (link mean) +#' @param y The response +#' @param size Number of trails (for binomial response) +#' @param dispersion Dispersion parameter +#' +#' @noRd get_D <- function(family, w, y, size, dispersion) { w <- as.vector(w) @@ -275,6 +317,13 @@ get_D <- function(family, w, y, size, dispersion) { D <- Diagonal(x = D_vec) } +#' Get initial values for w +#' +#' @param w The latent effect (link mean) +#' @param y The response +#' @param dispersion Dispersion parameter +#' +#' @noRd get_w_init <- function(family, y, dispersion) { if (family == "poisson") { w_init <- 0.5 * log(y + 1) @@ -292,6 +341,15 @@ get_w_init <- function(family, y, dispersion) { w_init } +#' Get glm distribution piece of Laplace log likelihood +#' +#' @param family The glm family +#' @param w The latent effect (link mean) +#' @param y The response +#' @param size Number of trails (for binomial response) +#' @param dispersion Dispersion parameter +#' +#' @noRd get_l00 <- function(family, w, y, size, dispersion) { w <- as.vector(w) y <- as.vector(y) diff --git a/R/local.R b/R/local.R index 0702067..7abc85d 100644 --- a/R/local.R +++ b/R/local.R @@ -1,3 +1,12 @@ +#' Parameters that control spatial indexing (not yet implemented for spatial indexing +#' and function calls to this just act as placeholders) +#' +#' @param local Placeholder +#' @param data Placeholder +#' @param n Placeholder +#' @param partition_factor Placeholder +#' +#' @noRd get_local_list_estimation <- function(local, data, n, partition_factor) { # size can be an integer and sets group size # alternatively, set the number of groups @@ -83,6 +92,14 @@ get_local_list_estimation <- function(local, data, n, partition_factor) { local } +#' A helper to get spatial indexes (not yet implemented for spatial indexing +#' and function calls to this just act as placeholders) +#' +#' @param local Placeholder +#' @param data Placeholder +#' @param n Placeholder +#' +#' @noRd get_local_estimation_index <- function(local, data, n) { # if (local$method == "random") { # index <- sample(rep(seq_len(local$groups), times = local$size)[seq_len(n)]) @@ -97,6 +114,13 @@ get_local_estimation_index <- function(local, data, n) { # index } + +#' Parameters that control local neighborhood prediction(not yet implemented +#' and function calls to this just act as placeholders) +#' +#' @param local Placeholder +#' +#' @noRd get_local_list_prediction <- function(local) { # set local neighborhood size # method can be "all" (for all data), "distance" (for local distance neighborhoods) diff --git a/R/loocv.R b/R/loocv.R index 00f1769..a887cea 100644 --- a/R/loocv.R +++ b/R/loocv.R @@ -112,6 +112,15 @@ loocv.ssn_lm <- function(object, cv_predict = FALSE, se.fit = FALSE, ...) { return(loocv_out) } } + +#' A helper to get the leave one out predictions +#' +#' @param object Model object +#' @param cv_predict Whether cross validation predictions should be returned +#' @param se.fit Whether the standard error should be returned +#' @param ... Additional arguments +#' +#' @noRd get_loocv.ssn_lm <- function(object, cv_predict = FALSE, se.fit = FALSE, ...) { # local stuff (save for later) # if (missing(local)) local <- NULL @@ -206,6 +215,7 @@ get_loocv.ssn_lm <- function(object, cv_predict = FALSE, se.fit = FALSE, ...) { cv_output } +# not currently used (but will be later when local prediction implemented) loocv_local <- function(row, se.fit, cov_matrix_val, total_var, Xmat, y, betahat, cov_betahat, local) { # new_cov_matrix_val <- cov_matrix_val[-row, -row, drop = FALSE] # new_cov_vector_val <- cov_matrix_val[row, -row, drop = FALSE] @@ -241,6 +251,18 @@ loocv_local <- function(row, se.fit, cov_matrix_val, total_var, Xmat, y, betahat # pred_list } +#' Get leave one out cross validation value +#' +#' @param obs Index of observed data +#' @param Sig Covariance matrix +#' @param SigInv Inverse covariance matrix +#' @param Xmat Model matrix +#' @param y response +#' @param yX Matrix containing model matrix and response +#' @param SigInv_yX Product of inverse covariance matrix with model matrix and response +#' @param se.fit Whether to return standard errors of leave one out predictions +#' +#' @noRd get_loocv <- function(obs, Sig, SigInv, Xmat, y, yX, SigInv_yX, se.fit) { SigInv_mm <- SigInv[obs, obs] # a constant SigInv_om <- SigInv[-obs, obs, drop = FALSE] diff --git a/R/loocv_glm.R b/R/loocv_glm.R index 569f9e3..afac62d 100644 --- a/R/loocv_glm.R +++ b/R/loocv_glm.R @@ -43,6 +43,14 @@ loocv.ssn_glm <- function(object, cv_predict = FALSE, se.fit = FALSE, ...) { } } +#' A helper to get the leave one out predictions for glms +#' +#' @param object Model object +#' @param cv_predict Whether cross validation predictions should be returned +#' @param se.fit Whether the standard error should be returned +#' @param ... Additional arguments +#' +#' @noRd get_loocv.ssn_glm <- function(object, cv_predict = FALSE, se.fit = FALSE, ...) { # turn local off for now local <- FALSE @@ -151,6 +159,14 @@ get_loocv.ssn_glm <- function(object, cv_predict = FALSE, se.fit = FALSE, ...) { cv_output } +#' Helper to get each row leave one out prediction +#' +#' @param row The row of interest +#' @param object Model object +#' @param se.fit Whether to return the standard error +#' @param local_list Local list (not yet functional) +#' +#' @noRd loocv_local_glm <- function(row, object, se.fit, local_list) { object$fitted$link <- object$fitted$link[-row] # w needs to be subset newdata <- object$obdata[row, , drop = FALSE] @@ -158,6 +174,19 @@ loocv_local_glm <- function(row, object, se.fit, local_list) { predict(object, newdata = newdata, se.fit = se.fit, local = local_list) } +#' Get leave one out cross validation value for glm +#' +#' @param obs Index of observed data +#' @param Sig Covariance matrix +#' @param SigInv Inverse covariance matrix +#' @param Xmat Model matrix +#' @param w The predicted latent effects +#' @param wX Matrix containing w and response +#' @param SigInv_yX Product of inverse covariance matrix with w and response +#' @param mHinv The minus of the inverse Hessian of w +#' @param se.fit Whether to return standard errors of leave one out predictions +#' +#' @noRd get_loocv_glm <- function(obs, Sig, SigInv, Xmat, w, wX, SigInv_wX, mHinv, se.fit) { SigInv_mm <- SigInv[obs, obs] # a constant SigInv_om <- SigInv[-obs, obs, drop = FALSE] diff --git a/R/names.SSN.R b/R/names.SSN.R deleted file mode 100644 index 3c35396..0000000 --- a/R/names.SSN.R +++ /dev/null @@ -1,31 +0,0 @@ -#' names SSN object -#' -#' @description Extract and print names from the SSN object -#' -#' @param x An SSN object. -#' @param ... Other arguments. Not used (needed for generic consistency). -#' -#' @return Print variable names to console -#' -#' @name names.SSN -#' @method names SSN -#' @export - - -names.SSN <- function(x, ...) { - d <- x$obs - no <- names(d) - np <- length(x$preds) - namesList <- vector("list", np + 1) - namesList[[1]] <- no - names4List <- "obs" - if (np > 0) { - for (i in seq_len(np)) { - names4List <- c(names4List, names(x$preds)[[i]]) - d <- x$preds[[i]] - namesList[[i + 1]] <- names(d) - } - } - names(namesList) <- names4List - return(namesList) -} diff --git a/R/optim.R b/R/optim.R index aedc390..07c3e64 100644 --- a/R/optim.R +++ b/R/optim.R @@ -97,6 +97,15 @@ check_optim_method <- function(optim_par, optim_dotlist) { optim_dotlist } +#' Get parameters to optimize over in optim (remove known parameters) for glms +#' +#' @param spcov_orig2optim A \code{spcov_orig2optim} object +#' @param dispersion_orig2optim A \code{dispersion_orig2optim} object +#' @param randcov_orig2optim A \code{randcov_orig2optim} object +#' +#' @return The parameters to optimize over in optim +#' +#' @noRd get_optim_par_glm <- function(spcov_orig2optim, dispersion_orig2optim, randcov_orig2optim = NULL) { if (is.null(randcov_orig2optim)) { par <- spcov_orig2optim$value[!spcov_orig2optim$is_known] diff --git a/R/optim2orig.R b/R/optim2orig.R index b297bf8..34786db 100644 --- a/R/optim2orig.R +++ b/R/optim2orig.R @@ -1,3 +1,9 @@ +#' Transform parameters from optim to original scale +#' +#' @param orig2optim_object An object that contains the parameters on the original scale +#' @param par Current parameter values +#' +#' @noRd optim2orig <- function(orig2optim_object, par) { # fill optim parameter vector with known parameters fill_optim_par_val <- fill_optim_par(orig2optim_object, par) @@ -39,6 +45,12 @@ optim2orig <- function(orig2optim_object, par) { list(orig_ssn = fill_orig_val_ssn, orig_randcov = fill_orig_val_randcov) } +#' Transform parameters from optim to original scale for glms +#' +#' @param orig2optim_object An object that contains the parameters on the original scale +#' @param par Current parameter values +#' +#' @noRd optim2orig_glm <- function(orig2optim_object, par) { fill_optim_par_val <- fill_optim_par(orig2optim_object, par) diff --git a/R/orig2optim.R b/R/orig2optim.R index 9cbb7f0..2050975 100644 --- a/R/orig2optim.R +++ b/R/orig2optim.R @@ -1,3 +1,8 @@ +#' Transform parameters from original to optim scale +#' +#' @param initial_object Initial value object +#' +#' @noRd orig2optim <- function(initial_object) { # find parameters on optim (transformed) scale @@ -109,7 +114,11 @@ orig2optim <- function(initial_object) { ) } - +#' Transform parameters from original to optim scale for glms +#' +#' @param initial_object Initial value object +#' +#' @noRd orig2optim_glm <- function(initial_object) { # tailup tailup_de <- initial_object$tailup_initial$initial[["de"]] diff --git a/R/params.R b/R/params.R index fd07414..8f84e23 100644 --- a/R/params.R +++ b/R/params.R @@ -162,6 +162,11 @@ get_params_object <- function(classes, cov_orig_val) { params_object } +#' Get the initial values that are known +#' +#' @param initial_object Initial value object +#' +#' @noRd get_params_object_known <- function(initial_object) { classes <- c( tailup = class(initial_object$tailup_initial), taildown = class(initial_object$taildown_initial), @@ -208,6 +213,12 @@ get_params_object_known <- function(initial_object) { params_object } +#' Get the parameter object for glms +#' +#' @param initial_object Initial value object +#' @param cov_orig_val The original covariance parameter values +#' +#' @noRd get_params_object_glm <- function(classes, cov_orig_val) { classes <- remove_covtype(classes) @@ -266,6 +277,11 @@ get_params_object_glm <- function(classes, cov_orig_val) { params_object } +#' Get known parameters for glms +#' +#' @param initial_object Initial value object +#' +#' @noRd get_params_object_glm_known <- function(initial_object) { classes <- c( tailup = class(initial_object$tailup_initial), taildown = class(initial_object$taildown_initial), @@ -319,6 +335,12 @@ get_params_object_glm_known <- function(initial_object) { params_object } +#' Get grid of possible initial values +#' +#' @param cov_grid_vector A configuration of initial covariance parameters +#' @param initial_NA_object The inital NA object (which has information on the known parameters) +#' +#' @noRd get_params_object_grid <- function(cov_grid_vector, initial_NA_object) { classes <- c( tailup = class(initial_NA_object$tailup_initial), taildown = class(initial_NA_object$taildown_initial), @@ -368,6 +390,12 @@ get_params_object_grid <- function(cov_grid_vector, initial_NA_object) { ) } +#' Get grid of possible initial values for glms +#' +#' @param cov_grid_vector A configuration of initial covariance parameters +#' @param initial_NA_object The inital NA object (which has information on the known parameters) +#' +#' @noRd get_params_object_grid_glm <- function(cov_grid_vector, initial_NA_object) { classes <- c( tailup = class(initial_NA_object$tailup_initial), taildown = class(initial_NA_object$taildown_initial), diff --git a/R/partition.R b/R/partition.R index 6518b3a..f4aab44 100644 --- a/R/partition.R +++ b/R/partition.R @@ -26,13 +26,24 @@ partition_matrix <- function(partition_factor = NULL, data) { } -# could use old version for partition names + +#' Get names of partition factors +#' +#' @param partition_factor A partition factor +#' +#' @noRd get_partition_names <- function(partition_factor) { + # could use old version for partition names # get the partition formula and turn it into a named vector here labels_initial <- labels(terms(partition_factor)) new_labels <- unlist(lapply(labels_initial, get_partition_name)) } +#' Get adjusted name of a partition factor +#' +#' @param label A partition factor name +#' +#' @noRd get_partition_name <- function(label) { if (grepl("|", label, fixed = TRUE)) { new_label <- label diff --git a/R/predict.R b/R/predict.R index b90f406..0b14f08 100644 --- a/R/predict.R +++ b/R/predict.R @@ -4,7 +4,9 @@ #' #' @param object A fitted model object from [ssn_lm()] or [ssn_glm()]. #' @param newdata A character vector that indicates the name of the prediction data set -#' in the SSN object for which predictions are desired. If omitted, predictions +#' for which predictions are desired (accessible via \code{object$ssn.object$preds}). +#' Note that the prediction data must be in the original SSN object used to fit the model. +#' If \code{newdata} is omitted, predictions #' for all prediction data sets are returned. Note that the name \code{".missing"} #' indicates the prediction data set that contains the missing observations in the data used #' to fit the model. @@ -58,8 +60,6 @@ #' predict(ssn_mod, "pred1km") predict.ssn_lm <- function(object, newdata, se.fit = FALSE, interval = c("none", "confidence", "prediction"), level = 0.95, block = FALSE, ...) { - - # match interval argument so the three display interval <- match.arg(interval) @@ -284,6 +284,26 @@ predict.ssn_lm <- function(object, newdata, se.fit = FALSE, interval = c("none", } } +#' Title +#' +#' @param newdata_list A row of prediction data +#' @param se.fit Whether standard errors should be returned +#' @param interval The interval type +#' @param formula Model formula +#' @param obdata Observed data +#' @param cov_matrix_val Covariance matrix +#' @param total_var Total variance in the process +#' @param cov_lowchol Lower triangular of Cholesky decomposition matrix +#' @param Xmat Model matrix +#' @param y Response variable +#' @param offset A possible offset +#' @param betahat Fixed effect estimates +#' @param cov_betahat Covariance of fixed effects +#' @param contrasts Possible contrasts +#' @param local Local neighborhood options (not yet implemented) +#' @param xlevels Levels of explanatory variables +#' +#' @noRd get_pred <- function(newdata_list, se.fit, interval, formula, obdata, cov_matrix_val, total_var, cov_lowchol, Xmat, y, offset, betahat, cov_betahat, contrasts, local, xlevels) { cov_vector_val <- newdata_list$c0 @@ -332,9 +352,6 @@ get_pred <- function(newdata_list, se.fit, interval, formula, obdata, cov_matrix predict_block <- function(object, newdata, se.fit, interval, level, ...) { - - - # deal with local (omitted for now) # if (missing(local)) local <- NULL # if (is.null(local)) local <- FALSE diff --git a/R/predict_glm.R b/R/predict_glm.R index 8a34284..8492141 100644 --- a/R/predict_glm.R +++ b/R/predict_glm.R @@ -298,6 +298,30 @@ predict.ssn_glm <- function(object, newdata, type = c("link", "response"), se.fi +#' Get a prediction (and its standard error) for glms +#' +#' @param newdata_list A row of prediction data +#' @param se.fit Whether standard errors should be returned +#' @param interval The interval type +#' @param formula Model formula +#' @param obdata Observed data +#' @param cov_matrix_val Covariance matrix +#' @param total_var Total variance in the process +#' @param cov_lowchol Lower triangular of Cholesky decomposition matrix +#' @param Xmat Model matrix +#' @param y Response variable +#' @param betahat Fixed effect estimates +#' @param cov_betahat Covariance of fixed effects +#' @param contrasts Possible contrasts +#' @param local Local neighborhood options (not yet implemented) +#' @param family glm family +#' @param w Latent effects +#' @param size Number of binomial trials +#' @param dispersion Dispersion parameter +#' @param predvar_adjust_ind Whether prediction variance should be adjusted for uncertainty in w +#' @param xlevels Levels of explanatory variables +#' +#' @noRd get_pred_glm <- function(newdata_list, se.fit, interval, formula, obdata, cov_matrix_val, total_var, cov_lowchol, Xmat, y, betahat, cov_betahat, contrasts, local, @@ -342,6 +366,19 @@ get_pred_glm <- function(newdata_list, se.fit, interval, pred_list } +#' Get weights by which to adjust variances involving w +#' +#' @param family glm family +#' @param Xmat Model matrix +#' @param y Response variable +#' @param w Latent effects +#' @param size Number of binomial trials +#' @param dispersion Dispersion parameter +#' @param cov_lowchol Lower triangular of Cholesky decomposition matrix +#' @param x0 Explanatory variable values for newdata +#' @param c0 Covariance between observed and newdata +#' +#' @noRd get_wts_varw <- function(family, Xmat, y, w, size, dispersion, cov_lowchol, x0, c0) { SigInv <- chol2inv(t(cov_lowchol)) # works on upchol SigInv_X <- SigInv %*% Xmat diff --git a/R/print.SSN.R b/R/print.SSN.R index 8f51c24..3bc6f6e 100644 --- a/R/print.SSN.R +++ b/R/print.SSN.R @@ -14,6 +14,10 @@ print.SSN <- function(x, ...) { cat("Object of class SSN\n\n") nobs <- dim(x$obs) + # no observed data + if (is.null(nobs)) { + nobs <- c(0, 0) + } nobs <- matrix(nobs, 1, ) np <- length(x$preds) if (np > 0) { @@ -26,12 +30,12 @@ print.SSN <- function(x, ...) { print(st_bbox(x$edges)) cat("\n") - if (nrow(nobs) == 2) { - cat("Object also includes", nrow(nobs) - 1, "set of prediction points with", sum(nobs[, 1]) - nobs[1, 1], "locations\n\n") - } else if (nrow(nobs) > 2) { - cat("Object also includes", nrow(nobs) - 1, "sets of prediction points with a total of", sum(nobs[, 1]) - nobs[1, 1], "locations\n\n") + if (np == 1) { + cat("Object also includes", 1, "set of prediction points with", sum(nobs[, 1]) - nobs[1, 1], "locations\n\n") + } else if (np > 1) { + cat("Object also includes", np, "sets of prediction points with a total of", sum(nobs[, 1]) - nobs[1, 1], "locations\n\n") } - cat("Variable names are (found using names(object)):\n") + cat("Variable names are (found using ssn_names(object)):\n") ## print(names(x$preds)) - print(names(x)) + ssn_names(x) } diff --git a/R/random.R b/R/random.R index 38fb05e..9810810 100644 --- a/R/random.R +++ b/R/random.R @@ -19,12 +19,29 @@ get_randcov_list <- function(index, randcov_Zs = NULL, randcov_names = NULL) { randcov_list } +#' Get list of random effects +#' +#' @param index_val A spatial index (not yet functional) to compare +#' @param index A spatial index (not yet functional) +#' @param randcov_Zs Random effect design matrices +#' @param randcov_names Random effect names +#' +#' @noRd get_randcov_index_list <- function(index_val, index, randcov_Zs, randcov_names) { Z_lists <- lapply(randcov_names, function(x) get_randcov_var_list(x, index_val, index, randcov_Zs)) names(Z_lists) <- randcov_names Z_lists } +#' Get list of random effect variances +#' +#' +#' @param randcov_name Random effect name +#' @param index_val A spatial index (not yet functional) to compare +#' @param index A spatial index (not yet functional) +#' @param randcov_Zs Random effect design matrices +#' +#' @noRd get_randcov_var_list <- function(randcov_name, index_val, index, randcov_Zs) { Z_list <- randcov_Zs[[randcov_name]][["Z"]][index_val, , drop = FALSE] if (is.null(randcov_Zs[[randcov_name]][["ZZt"]])) { @@ -40,6 +57,15 @@ get_randcov_var_list <- function(randcov_name, index_val, index, randcov_Zs) { list(Z = Z_list, ZZt = ZZt_list, ZtZ = ZtZ_list) } +#' Get random effect design matrices +#' +#' @param data Data +#' @param randcov_names Random effect names +#' @param ZZt Product of random effect design matrix and its transpose +#' @param ZtZ Transpose of ZZt +#' @param xlev_list Levels of random effects +#' +#' @noRd get_randcov_Zs <- function(data, randcov_names = NULL, ZZt = TRUE, ZtZ = FALSE, xlev_list = NULL) { if (is.null(randcov_names)) { randcov_Zs <- NULL @@ -50,6 +76,15 @@ get_randcov_Zs <- function(data, randcov_names = NULL, ZZt = TRUE, ZtZ = FALSE, randcov_Zs } +#' Get a random effect design matrix +#' +#' @param randcov_name Random effect name +#' @param randcov_names Random effect names +#' @param ZZt Product of random effect design matrix and its transpose +#' @param ZtZ Transpose of ZZt +#' @param xlev_list Levels of random effects +#' +#' @noRd get_randcov_Z <- function(randcov_name, data, ZZt = TRUE, ZtZ = FALSE, xlev_list = NULL) { bar_split <- unlist(strsplit(randcov_name, " | ", fixed = TRUE)) Z_reform <- reformulate(bar_split[[2]], intercept = FALSE) @@ -112,6 +147,11 @@ get_randcov_names <- function(random = NULL) { new_labels } +#' Adjust a single name of a random effect +#' +#' @param label Name of a random effect +#' +#' @noRd get_randcov_name <- function(label) { if (grepl("|", label, fixed = TRUE)) { new_label <- label diff --git a/R/restruct_ssn_missing.R b/R/restruct_ssn_missing.R index 55babd7..0abe134 100644 --- a/R/restruct_ssn_missing.R +++ b/R/restruct_ssn_missing.R @@ -1,3 +1,13 @@ +#' Restructure a SSN object with missing response data +#' +#' @param ssn.object SSN object +#' @param observed_index Index of observed (non-NA) response values +#' @param missing_index Index of missing (NA) response values +#' +#' @return An SSN object with missing data stored as a prediction data set and +#' observed data adjusted accordingly +#' +#' @noRd restruct_ssn_missing <- function(ssn.object, observed_index, missing_index) { if (length(missing_index) > 0) { # only return .missing object if necessary diff --git a/R/ssn_create_distmat.R b/R/ssn_create_distmat.R index bebb86c..034f29e 100644 --- a/R/ssn_create_distmat.R +++ b/R/ssn_create_distmat.R @@ -114,7 +114,7 @@ #' copy_lsn_to_temp() #' ## Import SSN data #' mf04p <- ssn_import(paste0(tempdir(), "/MiddleFork04.ssn"), -#' predpts = c("pred1km.shp", "Knapp"), +#' predpts = c("pred1km.gpkg", "CapeHorn"), #' overwrite = TRUE #' ) #' @@ -129,27 +129,32 @@ #' ## Distance matrices for observations and pred1km prediction sites are #' ## not recalculated. #' ssn_create_distmat(mf04p, -#' predpts = "Knapp", overwrite = TRUE, +#' predpts = "CapeHorn", overwrite = TRUE, #' among_predpts = TRUE, only_predpts = TRUE #' ) ssn_create_distmat <- function(ssn.object, predpts = NULL, overwrite = FALSE, among_predpts = FALSE, only_predpts = FALSE) { - # iterate if predpts is not null if (length(predpts) > 1) { if (only_predpts) { - x <- lapply(predpts, function(x) ssn_create_distmat(ssn.object, - predpts = x, - overwrite, - among_predpts, - only_predpts)) + x <- lapply(predpts, function(x) { + ssn_create_distmat(ssn.object, + predpts = x, + overwrite, + among_predpts, + only_predpts + ) + }) } else { x1 <- ssn_create_distmat(ssn.object, overwrite = overwrite) - x2 <- lapply(predpts, function(x) ssn_create_distmat(ssn.object, - predpts = x, - overwrite, - among_predpts, - only_predpts = TRUE)) + x2 <- lapply(predpts, function(x) { + ssn_create_distmat(ssn.object, + predpts = x, + overwrite, + among_predpts, + only_predpts = TRUE + ) + }) } return(invisible(NULL)) } @@ -220,10 +225,12 @@ ssn_create_distmat <- function(ssn.object, predpts = NULL, overwrite = FALSE, ## Get netID with observed or predicted sites if (!is.null(predpts)) { - site.nets<- unique(c(levels(ssn$obs$NetworkID), - levels(ssn$preds[[predpts]]$NetworkID))) + site.nets <- unique(c( + levels(ssn$obs$NetworkID), + levels(ssn$preds[[predpts]]$NetworkID) + )) } else { - site.nets<- unique(c(levels(ssn$obs$NetworkID))) + site.nets <- unique(c(levels(ssn$obs$NetworkID))) } net.count <- length(site.nets) @@ -258,7 +265,7 @@ ssn_create_distmat <- function(ssn.object, predpts = NULL, overwrite = FALSE, ## ---------------------------------------------------------------- for (i in seq_len(net.count)) { ## Set network number and name - ##net.num <- levels(ssn$edges$NetworkID)[i] + ## net.num <- levels(ssn$edges$NetworkID)[i] net.num <- site.nets[i] net.name <- paste("net", net.num, sep = "") @@ -369,8 +376,9 @@ ssn_create_distmat <- function(ssn.object, predpts = NULL, overwrite = FALSE, ## Create data.frame for obs with columns pid, rid, locID ob.i <- ssn_get_netgeom(ssn$obs[ind.obs, ], c("pid", "SegmentID", "locID"), - reformat = TRUE) - ##ob.i <- as.data.frame(sapply(ob.i, as.numeric)) + reformat = TRUE + ) + ## ob.i <- as.data.frame(sapply(ob.i, as.numeric)) colnames(ob.i) <- c("pid", "rid", "locID") ob.i$locID <- as.factor(ob.i$locID) diff --git a/R/ssn_get_netgeometry.R b/R/ssn_get_netgeom.R similarity index 100% rename from R/ssn_get_netgeometry.R rename to R/ssn_get_netgeom.R diff --git a/R/ssn_import.R b/R/ssn_import.R index 584063d..5825a65 100644 --- a/R/ssn_import.R +++ b/R/ssn_import.R @@ -5,59 +5,37 @@ #' @param path Filepath to the .ssn directory. See details. #' @param include_obs default = \code{TRUE}. Logical indicating #' whether observed sites should be included in the SSN object. -#' @param predpts Vector of shapefile basenames for prediction sites -#' found within the .ssn folder. -#' @param format_additive Logical indicating whether the columns containing -#' the addtive function values should be formated for -#' \code{SSN2}. Default = \code{FALSE}. -#' @param names_additive Character vector of column names in observed and -#' prediction site datasets containing additive function values. -#' Must be defined if \code{format_additive = TRUE}. Default = -#' \code{NULL}. +#' @param predpts Vector of prediction site dataset names found within +#' the .ssn folder. See details. #' @param overwrite default = \code{FALSE}. If \code{TRUE}, overwrite -#' existing binaryID.db files. -#' @details The \command{importSSN} function imports spatial data from a .ssn -#' folder to create an \code{SSN} object. The information contained in the -#' .ssn folder can be generated using a number of proprietary and -#' open source software tools: -#' \itemize{ -#' \item{The Spatial Tools for the Analysis of River Systems -#' (STARS) tools for ArcGIS Desktop versions 9.3x-10.8x (Peterson -#' and Ver Hoef 2014). This custom ArcGIS toolset is designed to -#' work with existing streams data in vector format.} -#' \item{The openSTARS package (Kattwinkel et al. 2020) extends -#' the functionality of the STARS toolset, which makes use of R -#' and GRASS GIS. It is open source and designed to derive streams -#' in raster format from a digital elevation model (DEM).} -#' \item{The SSNbler package (currently in development as of -#' September 2023) is an open source version of the STARS toolset, -#' which makes use of the functionality found in the sf package to -#' process streams data in vector format.} -#' } -#' When spatial data are processed using one of these software -#' tools, a .ssn directory is output which contains all of the -#' spatial, topological and attribute data needed to fit a spatial -#' statistical stream network model to streams data. This includes: -#' \itemize{ -#' \item{An edges shapefile of lines that represent the stream -#' network.} -#' \item{A sites shapefile of points where observed data were -#' collected on the stream network.} -#' \item{Prediction sites shapefile(s) of locations where -#' predictions will be made.} -#' \item{netID.dat files for each distinct network, which store -#' the topological relationships of the line segments in edges.} -#' } +#' existing binaryID.db files and netgeom column(s) if it exists in the +#' edges, observed sites (if \code{include_obs = TRUE}), and +#' prediction site datasets (if they exist). +#' +#' @details The \command{ssn_import} function imports spatial data (shapefile or GeoPackage format) +#' from a .ssn folder generated using the +#' \code{SSNbler} package function \code{SSNbler::lsn_to_ssn}. The .ssn folder contains all of the spatial, topological and +#' attribute data needed to fit a spatial statistical stream network +#' model to streams data. This includes: +#' \itemize{ +#' \item{An edges dataset with LINESTRING geometry representing the stream network.} +#' \item{A sites dataset with POINT geometry where observed data were collected on +#' the stream network.} +#' \item{Prediction sites dataset(s) representing +#' locations where predictions will be made.} +#' \item{netID.dat file(s) for each distinct network, which stores the topological +#' relationships of the line features in edges.}} +#' #' A more detailed description of the .ssn directory and its #' contents is provided in Peterson and Ver Hoef (2014). #' -#' The \command{ssn_import} imports the edges, observed sites, and -#' prediction sites as \code{sf data.frame} objects. A new column named 'netgeom' -#' is created to store important data that represents +#' The \command{ssn_import} imports the edges, observed sites (optional), and +#' prediction sites (optional) as \code{sf data.frame} objects. A new column named 'netgeom' +#' is created to store important data representing #' topological relationships in a spatial stream network #' model. These data are stored in character format, which is less #' likely to be inadvertantly changed by users. See -#' \code{\link[SSN2]{ssn_get_netgeom}} for a more detailed description of +#' \code{\link[SSN2]{create_netgeom}} for a more detailed description of #' the format and contents of 'netgeom'. #' #' The information contained in the netID text files is imported @@ -71,20 +49,25 @@ #' file already exists within the .ssn directory, it will be #' overwriten when the \code{SSN} object is created. #' -#' At a minimum, an \code{SSN} object must always contain streams, which -#' are referred to as edges. The \code{SSN} object would also typically -#' contain a set of observed sites, where measurements have been -#' collected and only one observed dataset is permitted. When -#' \code{include_obs=FALSE}, an \code{SSN} object is created without -#' observations. This option provides flexibility for users who -#' would like to simulate data on a set of artifical sites on an -#' existing stream network. Note that observation sites must be -#' included in the \code{SSN} object in order to fit models using -#' \command{ssn_lm} or \command{ssn_glm}. The \code{SSN} object may contain -#' multiple sets of prediction points (or none), which are stored as -#' separate shapefiles in the .ssn directory. The -#' \code{\link[SSN2]{ssn_import_predpts}} function allows users to import additional -#' sets of prediction sites to a an existing \code{SSN} object. +#' At a minimum, an \code{SSN} object must always contain streams, +#' which are referred to as edges. The \code{SSN} object would also +#' typically contain a set of observed sites, where measurements +#' have been collected. Only one observed dataset is permitted in an +#' \code{SSN} object. When \code{include_obs=FALSE}, an \code{SSN} +#' object is created without observations. This option provides +#' flexibility for users who would like to simulate data on a set of +#' artifical sites on an existing stream network. Note that +#' observation sites must be included in the \code{SSN} object in +#' order to fit models using \command{ssn_lm} or +#' \command{ssn_glm}. The \code{SSN} object may contain multiple +#' sets of prediction points (or none), which are stored as separate +#' datasets in the .ssn directory. If \code{predpts} is a named +#' vector, the names of the \code{preds} list in the \code{SSN} +#' object correspond to the vector names. Otherwise, they are set to +#' the basename of the prediction site dataset file(s) specified in +#' \code{predpts}. The \code{\link[SSN2]{ssn_import_predpts}} +#' function allows users to import additional sets of prediction +#' sites to a an existing \code{SSN} object. #' #' @return \code{ssn_import} returns an object of class SSN, which is a list #' with four elements containing: @@ -95,18 +78,12 @@ #' with an additional 'netgeom' column. NA if \code{include_obs = #' FALSE}.} #' \item{\code{preds}: A list of sf data.frames containing prediction -#' site locations. The names of the preds list correspond to the -#' basenames of the prediction site shapefiles (without the .shp -#' extension) specified in \code{predpts}. Empty list if \code{predpts} is not provided.} -#' \item{path: The local file to the .ssn directory associated with the \code{SSN} +#' site locations. An empty list is returned if \code{predpts} is not provided.} +#' \item{path: The local filepath for the .ssn directory associated with the \code{SSN} #' object.} #' } -#' @seealso [ssn_get_netgeom] +#' #' @references -#' Kattwinkel, M., Szocs, E., Peterson, E., and Schafer, -#' R.B. (2020) Preparing GIS data for analysis of stream monitoring -#' data: The R package openSTARS. \emph{PLOS One} \bold{15(9)}, -#' e0239237. #' Peterson, E., and Ver Hoef, J.M. (2014) STARS: An #' ArcGIS toolset used to calculate the spatial information needed #' to fit spatial statistical stream network models to stream @@ -126,238 +103,156 @@ #' ## Import SSN object with 3 sets of prediction sites #' mf04p <- ssn_import(paste0(tempdir(), "/MiddleFork04.ssn"), #' predpts = c( -#' "pred1km.shp", -#' "CapeHorn.shp", -#' "Knapp.shp" +#' "pred1km", +#' "CapeHorn" #' ), #' overwrite = TRUE #' ) #' -ssn_import <- function(path, include_obs = TRUE, predpts, - format_additive = FALSE, names_additive = NULL, +ssn_import <- function(path, include_obs = TRUE, predpts = NULL, overwrite = FALSE) { - ## Extract dirname - file <- path - ssn_folder <- file + if (!dir.exists(path)) stop("Cannot find the .ssn folder.") # Get wd old_wd <- getwd() on.exit(setwd(old_wd)) # if the function crashes or finishes, it will restore the initial working directory - ## Adjust if relative pathname is supplied and fix edges - if (substr(ssn_folder, start = 1, stop = 2) == "./") { - rel.path <- substr(ssn_folder, start = 2, stop = nchar(ssn_folder)) - ssn_folder <- paste0(old_wd, rel.path) - } + local_dir(path) - if (!dir.exists(ssn_folder)) stop("Cannot find the .ssn folder.") + ## Adjust if relative pathname is supplied in path + if (substr(path, start = 1, stop = 2) == "./") { + rel.path <- substr(path, start = 2, stop = nchar(path)) + path <- paste0(old_wd, rel.path) + } - # Set wd - setwd(ssn_folder) + ################################################################# + ## Check format of predpts + ################################################################ - ## Check inputs, generally - missingsites <- !include_obs - missingpreds <- missing(predpts) + ## If names are provided, use them + if (is.vector(predpts) & !is.null(names(predpts))) { + p.names <- names(predpts) + } - # Check observation sites exist and import - if (!missingsites) { - obs_sites <- "sites.shp" + ## If no names provided assign based on name of file without extension + if (is.vector(predpts) & is.null(names(predpts))) { + p.names <- NULL - # Check the file exists and then ingest, process - if (file.exists(obs_sites)) { - sfsites <- st_read(obs_sites, quiet = TRUE) + for (d in 1:length(predpts)) { + shp.ext <- substr(predpts[d], nchar(predpts[d]) - 3, nchar(predpts[d])) == ".shp" + gpkg.ext <- substr(predpts[d], nchar(predpts[d]) - 4, nchar(predpts[d])) == ".gpkg" - ## Check geometry type - if (sum(st_geometry_type(sfsites, by_geometry = TRUE) == "POINT") != nrow(sfsites)) { - stop("Observed sites do not have POINT geometry") - } - ## Add network geometry column - sfsites[, "netgeom"] <- paste0("SNETWORK (", paste( - sfsites$netID, sfsites$rid, sfsites$upDist, - sfsites$ratio, sfsites$pid, sfsites$locID - ), - ")", - sep = "" + p.names[d] <- ifelse( + shp.ext == TRUE, substr(predpts[d], 1, nchar(predpts[d]) - 4), + ifelse(gpkg.ext == TRUE, + substr(predpts[d], 1, nchar(predpts[d]) - 5), + ifelse(shp.ext == FALSE & gpkg.ext == FALSE, predpts[d]) + ) ) - - if (format_additive == TRUE) { - if (is.null(names_additive)) { - stop("names_additive is required when format_additive = TRUE") - } - for (q in seq_len(length(names_additive))) { - if (!names_additive[q] %in% colnames(sfsites)) { - warning(paste0(names_additive[q], " is not found in obs"), call. = FALSE) - } else { - ## Extract afv column - tmp.col <- st_drop_geometry(sfsites[, names_additive[q]]) - ## convert to character if column is numeric - if (!inherits(tmp.col[, 1], "numeric")) { - message(paste0(names_additive[q], " is not numeric. AFV values were not modified")) - } else { - tmp.col2 <- formatC(tmp.col[, 1], - digits = 254, format = "f", - drop0trailing = TRUE - ) - sfsites[, names_additive[q]] <- tmp.col2 - } - } - } - } - } else { - stop("This shapefile does not exist: ", obs_sites) } } - if (!missingpreds) { - # Append .shp if necessary - nofe <- !grepl(".shp$", predpts) - if (any(nofe)) predpts[nofe] <- paste0(predpts[nofe], ".shp") - - ## Check for relative pathnames - ## rel.name <- substr(predpts, start = 1, stop = 2)== "./" - ## if(any(rel.name)) predpts[rel.name]<- basename(predpts[rel.name]) - # Check the files exist and then import prediction points - if (all(file.exists(predpts))) { - sfpreds <- vector(mode = "list", length = length(predpts)) - - for (m in seq_len(length(predpts))) { - tmp.preds <- st_read(paste0(file, "/", predpts[m]), quiet = TRUE) - - ## Check geometry type - if (sum(st_geometry_type(tmp.preds, by_geometry = TRUE) == "POINT") != nrow(tmp.preds)) { - stop(paste0(predpts[m], " do not have POINT geometry")) - } + ## Remove path to predpts files, if included + if (!is.null(predpts)) { + predpts <- basename(predpts) + } - if (format_additive == TRUE) { - if (is.null(names_additive)) { - stop("names_additive is required when format_additive = TRUE") - } - for (q in seq_len(length(names_additive))) { - if (!names_additive[q] %in% colnames(tmp.preds)) { - warning(paste0(names_additive[q], " is not found in ", predpts[m]), call. = FALSE) - } else { - ## Extract afv column - tmp.col <- st_drop_geometry(tmp.preds[, names_additive[q]]) - ## convert to character if column is numeric - if (!inherits(tmp.col[, 1], "numeric")) { - message(paste0(names_additive[q], " is not numeric. AFV values were not modified")) - } else { - tmp.col2 <- formatC(tmp.col[, 1], - digits = 254, format = "f", - drop0trailing = TRUE - ) - tmp.preds[, names_additive[q]] <- tmp.col2 - } - } - } - } - ## Add network geometry column - tmp.preds[, "netgeom"] <- paste0("SNETWORK (", paste( - tmp.preds$netID, - tmp.preds$rid, - tmp.preds$upDist, - tmp.preds$ratio, - tmp.preds$pid, - tmp.preds$locID - ), - ")", - sep = "" - ) - sfpreds[[m]] <- tmp.preds - names(sfpreds)[m] <- substr(basename(predpts[m]), - start = 1, - stop = nchar(basename(predpts[m])) - 4 - ) - rm(tmp.preds) - } - } else { - stop("At least one of these shapefiles does not exist: ", predpts) - } - } + ## ---------------------------------------------------- ## Import edges - if (file.exists("edges.shp")) { - # Get the edges - sfedges <- st_read("edges.shp", quiet = TRUE) + ## ---------------------------------------------------- + sfedges <- get_sf_obj("edges") + if (exists("sfedges")) { ## Check geometry type - if (sum(st_geometry_type(sfedges, by_geometry = TRUE) == "LINESTRING") != nrow(sfedges)) { + if (sum(st_geometry_type(sfedges, by_geometry = TRUE) == + "LINESTRING") != nrow(sfedges)) { stop("Edges do not have LINESTRING geometry") } - if (format_additive == TRUE) { - if (is.null(names_additive)) { - stop("names_additive is required when format_additive = TRUE") - } - for (q in seq_len(length(names_additive))) { - if (!names_additive[q] %in% colnames(sfedges)) { - warning(paste0(names_additive[q], " is not found in obs"), call. = FALSE) - } else { - ## Extract afv column - tmp.col <- st_drop_geometry(sfedges[, names_additive[q]]) - ## convert to character if column is numeric - if (!inherits(tmp.col[, 1], "numeric")) { - message(paste0(names_additive[q], " is not numeric. AFV values were not modified")) - } else { - tmp.col2 <- formatC(tmp.col[, 1], - digits = 254, format = "f", - drop0trailing = TRUE - ) - sfedges[, names_additive[q]] <- tmp.col2 - } - } - } + ## Ensure geometry column is named geometry + if (!"geometry" %in% colnames(sfedges)) { + sf::st_geometry(sfedges) <- "geometry" } - ## Add network geometry column to edges - sfedges[, "netgeom"] <- paste0("ENETWORK (", paste( - sfedges$netID, - sfedges$rid, - sfedges$upDist - ), - ")", - sep = "" - ) - ssnlist <- list(edges = sfedges) + sfedges<- create_netgeom(sfedges, type = "LINESTRING", + overwrite = overwrite) + } else { - stop(paste0("Edges is missing from ", file)) + stop(paste0("Edges is missing from ", path)) } - ## } + ## ---------------------------------------------------- + ## Import obs + ## ---------------------------------------------------- - ## ) + ## Check observation sites exist and import + if (include_obs == TRUE) { + ## Check the file exists and then ingest, process + sfsites <- get_sf_obj("sites") + + ## Check geometry type + if (sum(st_geometry_type(sfsites, by_geometry = TRUE) == + "POINT") != nrow(sfsites)) { + stop("Observed sites do not have POINT geometry") + } + + ## Ensure geometry column is named geometry + if (!"geometry" %in% colnames(sfsites)) { + sf::st_geometry(sfsites) <- "geometry" + } - ## Add observation sites and prediction sites if present - if (missingsites) { - ssnlist$obs <- NA + ## ## Add network geometry column + sfsites<- create_netgeom(sfsites, type = "POINT", + overwrite = overwrite) + } else { - ssnlist$obs <- sfsites + sfsites <- NA } - if (!missingpreds) { - ssnlist$preds <- sfpreds + ## ---------------------------------------------------- + ## Import preds + ## ---------------------------------------------------- + if (!is.null(predpts)) { + sfpreds <- vector(mode = "list", length = length(predpts)) + + for (m in seq_len(length(predpts))) { + tmp.preds <- get_sf_obj(predpts[m]) + + ## Check geometry type + if (sum(st_geometry_type(tmp.preds, by_geometry = TRUE) == "POINT") != nrow(tmp.preds)) { + stop(paste0(predpts[m], " do not have POINT geometry")) + } + + ## Add network geometry column + tmp.preds<- create_netgeom(tmp.preds, type = "POINT", + overwrite = overwrite) + + sfpreds[[m]] <- tmp.preds + names(sfpreds)[m] <- p.names[m] + rm(tmp.preds) + } + } else { - ssnlist$preds <- list() + sfpreds <- list() } - # Add the path - ## ssnlist$path <- dirname(edges) - ssnlist$path <- ssn_folder - - # Set custom class + ## --------------------------------------------------------- + ## Create SSN object and return + ## --------------------------------------------------------- + ssnlist <- list(edges = sfedges, obs = sfsites, preds = sfpreds, path = path) class(ssnlist) <- "SSN" ## Create Binary ID database createBinaryID(ssnlist, overwrite = overwrite) - ## Warning when observation sites are not included if(include_obs == - if (include_obs == FALSE) { - warning("SSN does not include observed sites, which are needed to fit models. If this was a mistake, set include_obs == TRUE") + ## Warning when observation sites are not included in SSN + if (is.logical(ssnlist$obs)) { + warning("SSN does not include observed sites, which are needed to fit models. If this was a mistake, run ssn_import() with obs correctly defined") } - - # Return + ## Return return(ssnlist) } diff --git a/R/ssn_import_predpts.R b/R/ssn_import_predpts.R index a79e127..dedf27a 100644 --- a/R/ssn_import_predpts.R +++ b/R/ssn_import_predpts.R @@ -5,56 +5,41 @@ #' \code{SSN}, \code{ssn_lm}, or \code{ssn_glm}. #' @param x An object of class\code{SSN}, \code{ssn_lm}, or #' \code{ssn_glm}. -#' @param predpts Name of the prediction point shapefile to import in -#' character format, without the .shp extension. -#' @param format_additive Logical indicating whether the columns containing -#' the addtive function values should be formated for -#' \code{SSN2}. Default = \code{FALSE}. -#' @param names_additive Character vector of column names in observed and -#' prediction site datasets containing additive function -#' values. Must be defined if \code{format_additive = TRUE}. Default = -#' \code{NULL}. +#' @param predpts Name of the prediction point dataset to import in +#' character format. See details. #' -#' @details \command{ssn_import_predpts} imports a shapefile of -#' prediction points residing in the .ssn directory into an existing +#' @details \command{ssn_import_predpts} imports one set of prediction +#' points residing in the .ssn directory into an existing #' \code{SSN}, \code{ssn_lm}, or \code{ssn_glm} object. The -#' prediction dataset must reside in the ssn.object$path +#' prediction dataset must be in shapefile or geopackage format +#' (.shp or .gpkg, respectively) and reside in the ssn.object$path #' directory. The path for an \code{SSN} object can be updated using #' \command{ssn_update_path()} prior to importing prediction -#' datasets. Note that, the prediction dataset must contain the +#' datasets. The argument \code{predpts} accepts the name of the +#' prediction point dataset, with or without the file extension. If +#' it is passed as a named vector (of length 1), then the name +#' provided is used as the prediction dataset name in the \code{SSN} +#' object prediction sites list +#' (e.g. \code{names(ssn.obj$preds)}). Otherwise, the file basename +#' is used in the names attribute. See +#' \code{\link[SSN2]{ssn_import}} for a detailed description of the +#' prediction dataset format within the \code{SSN} class object. +#' +#' When the prediction dataset is imported, a new column named +#' \code{netgeom} is created. If this column already exists it is +#' overwritten. Please see \code{\link{create_netgeom}} for a +#' detailed description of the \code{netgeom} column and the +#' information it contains. +#' +#' The prediction dataset specified in \code{predpts} must contain the #' spatial, topological and attribute information needed to make -#' predictions using an ssn_lm or ssn_glm object. This information -#' can be generated using a number of proprietary and open source -#' software tools: \itemize{ \item{The Spatial Tools for the -#' Analysis of River Systems (STARS) tools for ArcGIS Desktop -#' versions 9.3x-10.8x (Peterson and Ver Hoef 2014). This custom -#' ArcGIS toolset is designed to work with existing streams data in -#' vector format.} \item{The openSTARS package (Kattwinkel et -#' al. 2020) extends the functionality of the STARS toolset, which -#' makes use of R and GRASS GIS. It is open source and designed to -#' derive streams in raster format from a digital elevation model -#' (DEM).} \item{The SSNbler package (currently in development as -#' of September 2023) is an open source version of the STARS -#' toolset, which makes use of the functionality found in the sf -#' package to process streams data in vector format.} } +#' predictions using an ssn_lm or ssn_glm object. This information +#' is generated using the \code{SSNbler} package, which makes use of +#' the functionality found in the \code{sf} and \code{igraph} +#' packages to process streams data in vector format. #' @return an object of class \code{SSN}, \code{ssn_lm}, or -#' \code{ssn_glm} which contains the new prediction dataset. The -#' name of the prediction dataset in the preds list corresponds to -#' the basenames of the prediction site shapefiles (without the .shp -#' extension) specified in \code{predpts}. See -#' \code{\link[SSN2]{ssn_import}} for a detailed description of -#' the prediction dataset format within the \code{SSN} class object. +#' \code{ssn_glm} which contains the new prediction dataset. #' -#' @references -#' Kattwinkel, M., Szocs, E., Peterson, E., and Schafer, -#' R.B. (2020) Preparing GIS data for analysis of stream monitoring -#' data: The R package openSTARS. \emph{PLOS One} \bold{15(9)}, -#' e0239237. -#' Peterson, E., and Ver Hoef, J.M. (2014) STARS: An -#' ArcGIS toolset used to calculate the spatial information needed -#' to fit spatial statistical stream network models to stream -#' network data. \emph{Journal of Statistical Software} -#' \bold{56(2)}, 1--17. #' @export #' @examples #' ## Create local temporary copy of MiddleFork04.ssn found in @@ -66,40 +51,85 @@ #' overwrite = TRUE #' ) #' -#' ## Import pred1km prediction dataset into SSN object -#' mf04p <- ssn_import(paste0(tempdir(), "/MiddleFork04.ssn")) -#' mf04p <- ssn_import_predpts(mf04p, predpts = "pred1km") +#' ## Import pred1km prediction dataset into SSN object and assign the +#' ## name preds1 +#' mf04p <- ssn_import(paste0(tempdir(), "/MiddleFork04.ssn"), +#' overwrite = TRUE) +#' mf04p <- ssn_import_predpts(mf04p, predpts = c(preds1 = "pred1km")) #' names(mf04p$preds) #' -#' ## Import pred1km prediction dataset into a ssn_glm object +#' ## Import CapeHorn prediction dataset into a ssn_glm object, using +#' ## the default file basename as the name #' ssn_gmod <- ssn_glm(Summer_mn ~ netID, mf04p, #' family = "Gamma", #' tailup_type = "exponential", additive = "afvArea" #' ) #' ssn_gmod <- ssn_import_predpts(ssn_gmod, predpts = "CapeHorn") #' names(ssn_gmod$ssn.object$preds) -ssn_import_predpts <- function(x, predpts, - format_additive = FALSE, names_additive = NULL) { +#' +ssn_import_predpts <- function(x, predpts) { obj.type <- class(x) old_wd <- getwd() on.exit(setwd(old_wd)) + ################################################################# + ## Check format of predpts + ################################################################ + + ## If names are provided, use them + if (is.vector(predpts) & !is.null(names(predpts))) { + p.names <- names(predpts) + } + + ## If no names provided assign based on name of file without extension + if (is.vector(predpts) & is.null(names(predpts))) { + p.names <- NULL + + for (d in 1:length(predpts)) { + shp.ext <- substr( + predpts[d], nchar(predpts[d]) - 3, + nchar(predpts[d]) + ) == ".shp" + gpkg.ext <- substr( + predpts[d], nchar(predpts[d]) - 4, + nchar(predpts[d]) + ) == ".gpkg" + + p.names[d] <- ifelse( + shp.ext == TRUE, substr(predpts[d], 1, nchar(predpts[d]) - 4), + ifelse(gpkg.ext == TRUE, + substr(predpts[d], 1, nchar(predpts[d]) - 5), + ifelse(shp.ext == FALSE & gpkg.ext == FALSE, + predpts[d] + ) + ) + ) + } + } + + ## Remove path to predpts files, if included + predpts <- basename(predpts) + + ################################################ + + ## For fitted model objects- check if predpts already exists if (obj.type %in% c("ssn_lm", "ssn_glm")) { setwd(x$ssn.object$path) - count <- 0 - - if (length(x$ssn.object$preds) > 0) { - for (m in seq_len(length(x$ssn.object$preds))) { - if (names(x$ssn.object$preds[m]) == predpts) { - pred.num <- m - count <- count + 1 - } - } - } + ## count <- 0 - if (count > 0) { + ## if (length(x$ssn.object$preds) > 0) { + ## for (m in seq_len(length(x$ssn.object$preds))) { + ## if (names(x$ssn.object$preds[m]) == p.names) { + ## pred.num <- m + ## count <- count + 1 + ## } + ## } + ## } + + ## if (count > 0) { + if(p.names %in% names(x$ssn.object$preds)) { stop("Fitted model object already contains predpoints named ", predpts) } } @@ -107,84 +137,46 @@ ssn_import_predpts <- function(x, predpts, ## For SSN objects - check if predpts already exits if (obj.type == "SSN") { setwd(x$path) - count <- 0 - - if (length(x$preds) > 0) { - for (m in seq_len(length(x$preds))) { - if (names(x$preds[m]) == predpts) { - pred.num <- m - count <- count + 1 - } - } - } + ## count <- 0 + + ## if (length(x$preds) > 0) { + ## for (m in seq_len(length(x$preds))) { + ## if (names(x$preds[m]) == p.names) { + ## pred.num <- m + ## count <- count + 1 + ## } + ## } + ## } - if (count > 0) { + ## if (count > 0) { + if(p.names %in% names(x$ssn.object$preds)) { stop("SSN already contains a prediction dataset named ", predpts) } } - if (file.exists(paste(predpts, ".shp", sep = "")) == 0) { - stop(paste(predpts, ".shp data is missing from ", old_wd, - " folder", - sep = "" - )) - } - ## Read in prediction point shapefile - predpoints <- st_read(paste0(predpts, ".shp"), quiet = TRUE) + ## Import predpts as sf object + predpoints <- get_sf_obj(predpts) ## Check geometry type if (sum(st_geometry_type(predpoints, by_geometry = TRUE) == "POINT") != nrow(predpoints)) { stop(paste0(predpts, " does not have POINT geometry")) } - if (format_additive == TRUE) { - if (is.null(names_additive)) { - stop("names_additive is required when format_additive = TRUE") - } - for (q in seq_len(length(names_additive))) { - if (!names_additive[q] %in% colnames(predpoints)) { - warning(paste0(names_additive[q], " is not found in ", predpts), call. = FALSE) - } else { - ## Extract afv column - tmp.col <- st_drop_geometry(predpoints[, names_additive[q]]) - ## convert to character if column is numeric - if (!inherits(tmp.col[, 1], "numeric")) { - message(paste0(names_additive[q], " is not numeric. AFV values were not modified")) - } else { - tmp.col2 <- formatC(tmp.col[, 1], - digits = 254, format = "f", - drop0trailing = TRUE - ) - predpoints[, names_additive[q]] <- tmp.col2 - } - } - } - } - - ## Add netgeom column - predpoints[, "netgeom"] <- paste0("SNETWORK (", paste( - predpoints$netID, - predpoints$rid, - predpoints$upDist, - predpoints$ratio, - predpoints$pid, - predpoints$locID - ), - ")", - sep = "" - ) + ## Add network geometry column + predpoints<- create_netgeom(predpoints, type = "POINT", + overwrite = TRUE) ## Put prediction points in SSN object if (obj.type == "SSN") { pred.num <- length(x$preds) x$preds[[pred.num + 1]] <- predpoints - names(x$preds)[[pred.num + 1]] <- predpts + names(x$preds)[[pred.num + 1]] <- p.names } ## Put prediction points in fitted model object if (obj.type %in% c("ssn_glm", "ssn_lm")) { pred.num <- length(x$ssn.object$preds) x$ssn.object$preds[[pred.num + 1]] <- predpoints - names(x$ssn.object$preds)[[pred.num + 1]] <- predpts + names(x$ssn.object$preds)[[pred.num + 1]] <- p.names } return(x) } diff --git a/R/ssn_names.R b/R/ssn_names.R new file mode 100644 index 0000000..dbaf0ed --- /dev/null +++ b/R/ssn_names.R @@ -0,0 +1,44 @@ +#' Return names of data in an SSN object +#' +#' @description Extract and print names from the \code{edges}, \code{sites} +#' and \code{preds} elements of an SSN object. +#' +#' @param ssn.object An \code{SSN} object. +#' +#' @return Print variable names to console +#' +#' @name ssn_names +#' @export +ssn_names <- function(ssn.object) { + if (identical(ssn.object$obs, NA)) { + no <- 0 + nameso <- NULL + } else { + # is obs present + no <- 1 + nameso <- names(ssn.object$obs) + } + np <- length(ssn.object$preds) + nt <- no + np + if (nt == 0) { + return(cat("There are no observed or prediction data and hence, no variables names.")) + } + + namesList <- vector("list", nt) + if (no == 1) { + namesList[[1]] <- nameso + names4List <- "obs" + } else { + names4List <- NULL + } + if (np > 0) { + prednames <- names(ssn.object$preds) + for (i in seq_len(np)) { + namespi <- names(ssn.object$preds[[i]]) + names4List <- c(names4List, prednames[i]) + namesList[[i + no]] <- namespi + } + } + names(namesList) <- names4List + print(namesList) +} diff --git a/R/ssn_split_predpts.R b/R/ssn_split_predpts.R index cb52699..e3f753c 100644 --- a/R/ssn_split_predpts.R +++ b/R/ssn_split_predpts.R @@ -9,7 +9,7 @@ #' #' @param ssn An \code{SSN} object. #' @param predpts A character string representing the name of the -#' prediction dataset +#' prediction dataset. #' @param size_predpts numeric value representing the size of the new #' prediction sets. The existing prediction set is split equally to #' produce multiple prediction sets of this size @@ -28,20 +28,20 @@ #' levels should be dropped in the \code{by} column when the new #' prediction dataset(s) are created. Default is \code{FALSE} #' @param overwrite logical indicating whether the new prediction -#' dataset shapefile should be deleted in the .ssn directory if it +#' dataset geopackage should be deleted in the .ssn directory if it #' already exists. Default = \code{FALSE} #' #' @details Three methods have been provided to split prediction sets: -#' \code{size_predpts}, \code{by}, and \code{subset}. The -#' \code{size_predpts} method is used to split the existing prediction -#' set into multiple equally-sized prediction sets. Note that the +#' size, by, and subset. The +#' size method is used to split the existing prediction +#' set into multiple equally-sized prediction sets using the \code{size_predpts} argument. Note that the #' final prediction set may be smaller in size than the others if #' the total number of predictions is not evenly divisible by -#' \code{size_predpts}. The \code{by} method is used if the prediction +#' \code{size_predpts}. The by method is used if the prediction #' set is to be split into multiple new prediction sets based on an #' existing column of type factor, integer, character, or -#' logical. The \code{subset} method is used to create one new -#' prediction set based on a logical expression. +#' logical specified using the argument \code{by}. The subset method is used to create one new +#' prediction set based on a logical expression defined in \code{subset}. #' #' When more than one prediction dataset is created the prediction #' dataset names will be appended with a hyphen and prediction @@ -61,7 +61,7 @@ #' predictions can be made. #' #' @return returns the \code{SSN} specified in \code{ssn}, with one or more new prediction -#' sets. Shapefiles of the new prediction sets are written to the +#' sets. Geopackages of the new prediction sets are written to the #' .ssn directory designated in ssn$path. #' #' @name ssn_split_predpts @@ -70,11 +70,11 @@ #' ## Import SSN object #' copy_lsn_to_temp() ## Only needed for this example #' ssn <- ssn_import(paste0(tempdir(), "/MiddleFork04.ssn"), -#' predpts = c("pred1km.shp", "Knapp", "CapeHorn"), +#' predpts = c("pred1km", "CapeHorn"), #' overwrite = TRUE #' ) #' -#' ## Split predictions into size_predpts 200 +#' ## Split predictions based on 'size' method #' ssn1 <- ssn_split_predpts(ssn, "CapeHorn", #' size_predpts = 200, #' keep = FALSE, overwrite = TRUE @@ -82,7 +82,7 @@ #' names(ssn1$preds) #' nrow(ssn1$preds[["CapeHorn-1"]]) #' -#' ## Split predictions using by method +#' ## Split predictions using 'by' method #' ssn$preds$pred1km$net.fac <- as.factor(ssn$preds$pred1km$netID) #' ssn2 <- ssn_split_predpts(ssn, "pred1km", #' by = "net.fac", @@ -90,7 +90,7 @@ #' ) #' names(ssn2$preds) #' -#' ## Split predictions using subset method +#' ## Split predictions using 'subset' method #' ssn3 <- ssn_split_predpts(ssn, "pred1km", #' subset = ratio > 0.5, #' id_predpts = "RATIO_05", overwrite = TRUE @@ -178,7 +178,7 @@ ssn_split_predpts <- function(ssn, predpts, size_predpts, by, ## Recreate netgeom ind <- colnames(tmp) == "netgeom" tmp <- tmp[, !ind] - tmp <- create_netgeom(tmp, "point") + tmp <- create_netgeom(tmp, "POINT", overwrite = TRUE) ## Replace predpts with updated data.frame ssn$preds[[predpts]] <- tmp @@ -215,18 +215,18 @@ ssn_split_predpts <- function(ssn, predpts, size_predpts, by, ## write subset of prediction points to file - fe.old <- file.exists(paste0(ssn$path, "/", id_predpts2, ".shp")) + fe.old <- file.exists(paste0(ssn$path, "/", id_predpts2, ".gpkg")) if (overwrite == TRUE & fe.old == TRUE) { - st_delete(paste0(ssn$path, "/", id_predpts2, ".shp"), + st_delete(paste0(ssn$path, "/", id_predpts2, ".gpkg"), quiet = TRUE ) } if (overwrite == FALSE & fe.old == TRUE) { - stop(paste0("overwrite = FALSE and ", id_predpts2, ".shp already exists")) + stop(paste0("overwrite = FALSE and ", id_predpts2, ".gpkg already exists")) } - ## Write shapefile - st_write(sub.data, paste0(id_predpts2, ".shp"), quiet = TRUE) + ## Write geopackage + st_write(sub.data, paste0(id_predpts2, ".gpkg"), quiet = TRUE) ## Add to existing ssn new.index <- length(ssn$preds) + 1 @@ -253,18 +253,18 @@ ssn_split_predpts <- function(ssn, predpts, size_predpts, by, sub.data <- ssn$preds[[predpts]][values, ] ## write subset of prediction points to file - fe.old <- file.exists(paste0(ssn$path, "/", id_predpts, ".shp")) + fe.old <- file.exists(paste0(ssn$path, "/", id_predpts, ".gpkg")) if (overwrite == TRUE & fe.old == TRUE) { - st_delete(paste0(ssn$path, "/", id_predpts, ".shp"), + st_delete(paste0(ssn$path, "/", id_predpts, ".gpkg"), quiet = TRUE ) } if (overwrite == FALSE & fe.old == TRUE) { - stop(paste0("overwrite = FALSE and ", id_predpts, ".shp already exists")) + stop(paste0("overwrite = FALSE and ", id_predpts, ".gpkg already exists")) } ## write subset of prediction points to file - st_write(sub.data, paste0(id_predpts, ".shp"), quiet = TRUE) + st_write(sub.data, paste0(id_predpts, ".gpkg"), quiet = TRUE) ## Add to existing ssn new.index <- length(ssn$preds) + 1 @@ -306,18 +306,18 @@ ssn_split_predpts <- function(ssn, predpts, size_predpts, by, } ## write subset of prediction points to file - fe.old <- file.exists(paste0(ssn$path, "/", id_predpts, ".shp")) + fe.old <- file.exists(paste0(ssn$path, "/", id_predpts, ".gpkg")) if (overwrite == TRUE & fe.old == TRUE) { - st_delete(paste0(ssn$path, "/", id_predpts, ".shp"), + st_delete(paste0(ssn$path, "/", id_predpts, ".gpkg"), quiet = TRUE ) } if (overwrite == FALSE & fe.old == TRUE) { - stop(paste0("overwrite = FALSE and ", id_predpts, ".shp already exists")) + stop(paste0("overwrite = FALSE and ", id_predpts, ".gpkg already exists")) } ## write subset of prediction points to file - st_write(sub.data, paste0(id_predpts, ".shp"), quiet = TRUE) + st_write(sub.data, paste0(id_predpts, ".gpkg"), quiet = TRUE) ## Add to existing ssn new.index <- length(ssn$preds) + 1 diff --git a/R/ssn_subset.R b/R/ssn_subset.R index 4e23b8e..5a7b4aa 100644 --- a/R/ssn_subset.R +++ b/R/ssn_subset.R @@ -45,7 +45,7 @@ #' ## Import SSN object #' copy_lsn_to_temp() ## Only needed for this example #' mf04p <- ssn_import(paste0(tempdir(), "/MiddleFork04.ssn"), -#' predpts = c("pred1km.shp", "Knapp"), +#' predpts = "pred1km", #' overwrite = TRUE #' ) #' @@ -63,6 +63,12 @@ #' overwrite = TRUE #' ) ssn_subset <- function(ssn, path, subset, clip = FALSE, overwrite = FALSE) { + ## Add .ssn extension if necessary + if (substr(path, nchar(path) - 3, nchar(path)) != ".ssn") { + print(paste0("path must include .ssn extension. ", path, " updated to ", paste0(path, ".ssn"))) + path <- paste0(path, ".ssn") + } + file <- path suppressWarnings({ @@ -140,8 +146,8 @@ ssn_subset <- function(ssn, path, subset, clip = FALSE, overwrite = FALSE) { rm(ind, ind2) } - ## Write sites shapefile - st_write(ssn.tmp$obs, paste0(file, "/sites.shp"), quiet = TRUE) + ## Write sites to file + st_write(ssn.tmp$obs, paste0(file, "/sites.gpkg"), quiet = TRUE) if (clip == FALSE) { @@ -172,7 +178,6 @@ ssn_subset <- function(ssn, path, subset, clip = FALSE, overwrite = FALSE) { ## Subset edges ind.edges <- eval(substitute(subset), ssn.tmp$edges) - ## ind.edges<- eval(substitute(netID == 2), ssn.tmp$edges) ind.na <- is.na(ind.edges) ind.edges[ind.na] <- FALSE rm(ind.na) @@ -201,7 +206,7 @@ ssn_subset <- function(ssn, path, subset, clip = FALSE, overwrite = FALSE) { edges.sub <- ssn.tmp$edges[ind.edges, ] ## Save subset of edges - st_write(edges.sub, paste0(ssn.tmp$path, "/edges.shp"), quiet = TRUE) + st_write(edges.sub, paste0(ssn.tmp$path, "/edges.gpkg"), quiet = TRUE) ## Subset prediction points @@ -220,7 +225,6 @@ ssn_subset <- function(ssn, path, subset, clip = FALSE, overwrite = FALSE) { } ind.preds <- eval(substitute(subset), ssn.tmp$preds[[pred.name]]) - ## ind.preds <- eval(substitute(netID == 2), ssn.tmp$preds[[pred.name]]) ind.na <- is.na(ind.preds) ind.preds[ind.na] <- FALSE rm(ind.na) @@ -244,7 +248,7 @@ ssn_subset <- function(ssn, path, subset, clip = FALSE, overwrite = FALSE) { } ## Subset predictions preds.sub <- ssn.tmp$preds[[pred.name]][ind.preds, ] - st_write(preds.sub, paste0(ssn.tmp$path, "/", pred.name, ".shp"), + st_write(preds.sub, paste0(ssn.tmp$path, "/", pred.name, ".gpkg"), quiet = TRUE ) rm(preds.sub) @@ -260,7 +264,7 @@ ssn_subset <- function(ssn, path, subset, clip = FALSE, overwrite = FALSE) { ind.dup <- !duplicated(edges.sub$netID) netID.list <- edges.sub$netID[ind.dup] - # copy netID files + ## copy netID files for (i in seq_len(length(netID.list))) { fn.old <- file.path(ssn$path, paste("netID", netID.list[i], ".dat", sep = "")) fn.new <- file.path(ssn.tmp$path, paste("netID", netID.list[i], ".dat", sep = "")) diff --git a/R/ssn_write.R b/R/ssn_write.R index fbf9f58..e0c2d25 100644 --- a/R/ssn_write.R +++ b/R/ssn_write.R @@ -14,7 +14,9 @@ #' #' @return{ssn_write} creates an .ssn directory that contains the #' spatial, topological, and attribute information stored in the -#' original \code{SSN} object. When \code{import = TRUE}, the +#' original \code{SSN} object. Spatial datasets found in the +#' \code{SSN} object (e.g. edges, obs, and prediction sites) are +#' saved in GeoPackage format. When \code{import = TRUE}, the #' \code{SSN} object is imported and returned. #' #' @export @@ -24,12 +26,15 @@ #' copy_lsn_to_temp() #' ## Import SSN object with prediction sites #' mf04p <- ssn_import(paste0(tempdir(), "/MiddleFork04.ssn"), -#' predpts = c("pred1km.shp"), +#' predpts = "pred1km", #' overwrite = TRUE #' ) #' #' ## Write SSN to new .ssn directory -#' ssn_write(mf04p, path = paste0(tempdir(), "/tempSSN.ssn")) +#' ssn_write(mf04p, +#' path = paste0(tempdir(), "/tempSSN.ssn"), +#' overwrite = TRUE +#' ) #' #' ## Write SSN to .ssn directory and return SSN object #' tempSSN <- ssn_write(mf04p, path = paste0( @@ -38,33 +43,51 @@ #' ), overwrite = TRUE, import = TRUE) ssn_write <- function(ssn, path, overwrite = FALSE, copy_dist = FALSE, import = FALSE) { - file <- path + ## Add .ssn extension if necessary + if (substr(path, nchar(path) - 3, nchar(path)) != ".ssn") { + print(paste0( + "path must include .ssn extension. ", + path, " updated to ", paste0(path, ".ssn") + )) + path <- paste0(path, ".ssn") + } suppressWarnings({ - if (!file.exists(file)) { - dir.create(file) + if (!file.exists(path)) { + dir.create(path) } else { - if (overwrite == FALSE) stop("file exists and overwrite = FALSE") + if (overwrite == FALSE) stop(paste0(path, " exists and overwrite = FALSE")) if (overwrite == TRUE) { - unlink(file, recursive = TRUE) - dir.create(file) + unlink(path, recursive = TRUE) + dir.create(path) } } old_wd <- getwd() on.exit(setwd(old_wd)) - setwd(file) + setwd(path) - ## Get list of files NOT associated with shapefiles + ####################################################################### + ## Get vector of filenames not associated with shapefiles or geopackages ssn.tmp <- ssn pred.len <- length(ssn.tmp$preds) ssn.tmp$path <- getwd() ssn.files <- list.files(ssn$path) + + ## Remove binaryID.db from the vector b/c need recreate it on import + ## anyway + if(import == TRUE) { + ind.bid <- ssn.files == "binaryID.db" + ssn.files<- ssn.files[-c(which(ind.bid))] + } + + ind.gpkg <- substr(ssn.files, nchar(ssn.files) - 4, nchar(ssn.files)) == ".gpkg" ind.shp <- substr(ssn.files, nchar(ssn.files) - 3, nchar(ssn.files)) == ".shp" - sub.list <- substr(ssn.files[ind.shp], 1, nchar(ssn.files[ind.shp]) - 4) + sub.list <- substr(ssn.files[ind.gpkg], 1, nchar(ssn.files[ind.gpkg]) - 5) + sub.list <- c(sub.list, substr(ssn.files[ind.shp], 1, nchar(ssn.files[ind.shp]) - 4)) ind.list <- vector(mode = "logical", length = length(ssn.files)) @@ -72,7 +95,8 @@ ssn_write <- function(ssn, path, overwrite = FALSE, tmp <- unlist(strsplit(ssn.files[j], "[.]")) if ((tmp[1] %in% sub.list) == TRUE) ind.list[j] <- TRUE } - ssn.files <- ssn.files[!ind.list] + + ssn.files <- ssn.files[!ind.list] ## Copy files to new .ssn directory for (i in seq_len(length(ssn.files))) { @@ -91,11 +115,11 @@ ssn_write <- function(ssn, path, overwrite = FALSE, ## Copy observed sites if they exist if (class(ssn.tmp$obs)[1] == c("sf")) { - st_write(ssn$obs, paste0(ssn.tmp$path, "/sites.shp"), quiet = TRUE) + st_write(ssn$obs, paste0(ssn.tmp$path, "/sites.gpkg"), quiet = TRUE) } ## Copy edges - st_write(ssn$edges, paste0(ssn.tmp$path, "/edges.shp"), quiet = TRUE) + st_write(ssn$edges, paste0(ssn.tmp$path, "/edges.gpkg"), quiet = TRUE) ## Copy prediction sites if (pred.len > 0) { @@ -108,7 +132,7 @@ ssn_write <- function(ssn, path, overwrite = FALSE, pred.name <- pred.name.vec[i] st_write(ssn$preds[[pred.name]], paste0( ssn.tmp$path, "/", - pred.name, ".shp" + pred.name, ".gpkg" ), quiet = TRUE ) @@ -118,12 +142,12 @@ ssn_write <- function(ssn, path, overwrite = FALSE, ## Import SSN without prediction sites if (import == TRUE & pred.len == 0) { - ssn.tmp2 <- ssn_import(ssn.tmp$path, overwrite = FALSE) + ssn.tmp <- ssn_import(ssn.tmp$path, overwrite = TRUE) return(ssn.tmp2) } ## Import SSN with all prediction sites if (import == TRUE & pred.len > 0) { - ssn.tmp2 <- ssn_import(ssn.tmp$path, overwrite = FALSE) + ssn.tmp2 <- ssn_import(ssn.tmp$path, overwrite = TRUE) for (j in seq_len(pred.len)) { ssn.tmp2 <- ssn_import_predpts(ssn.tmp2, pred.name.vec[j]) } diff --git a/R/tidy.R b/R/tidy.R index f6b2744..1460542 100644 --- a/R/tidy.R +++ b/R/tidy.R @@ -42,7 +42,6 @@ #' tidy(ssn_mod) tidy.ssn_lm <- function(x, conf.int = FALSE, conf.level = 0.95, effects = "fixed", ...) { - if (conf.int && (conf.level < 0 || conf.level > 1)) { stop("conf.level must be between 0 and 1.", call. = FALSE) } @@ -165,7 +164,6 @@ tidy.ssn_lm <- function(x, conf.int = FALSE, #' @export tidy.ssn_glm <- function(x, conf.int = FALSE, conf.level = 0.95, effects = "fixed", ...) { - if (conf.int && (conf.level < 0 || conf.level > 1)) { stop("conf.level must be between 0 and 1.", call. = FALSE) } diff --git a/R/transform_anis.R b/R/transform_anis.R index cbc7fd6..7aa6b29 100644 --- a/R/transform_anis.R +++ b/R/transform_anis.R @@ -1,3 +1,11 @@ +#' Perform anisotropy transformation for Euclidean distance +#' +#' @param xcoord_val x-coordinate +#' @param ycoord_val y-coordinate +#' @param rotate Anisotropy rotation parameter +#' @param scale Anisotropy scale parameter +#' +#' @noRd transform_anis <- function(xcoord_val, ycoord_val, rotate, scale) { rotate_clockwise <- matrix(c(cos(rotate), sin(rotate), -sin(rotate), cos(rotate)), nrow = 2, ncol = 2, byrow = TRUE) scale_yaxis <- matrix(c(1, 0, 0, 1 / scale), nrow = 2, ncol = 2, byrow = TRUE) @@ -6,6 +14,14 @@ transform_anis <- function(xcoord_val, ycoord_val, rotate, scale) { list(xcoord_val = new_coords[1, ], ycoord_val = new_coords[2, ]) } +#' Perform inverse anisotropy transformation for Euclidean distance +#' +#' @param xcoord_val x-coordinate +#' @param ycoord_val y-coordinate +#' @param rotate Anisotropy rotation parameter +#' @param scale Anisotropy scale parameter +#' +#' @noRd transform_anis_inv <- function(xcoord_val, ycoord_val, rotate, scale) { rotate_clockwise_inv <- matrix(c(cos(rotate), -sin(rotate), sin(rotate), cos(rotate)), nrow = 2, ncol = 2, byrow = TRUE) scale_yaxis_inv <- matrix(c(1, 0, 0, scale), nrow = 2, ncol = 2, byrow = TRUE) diff --git a/R/use_gloglik.R b/R/use_gloglik.R index ccba71f..5dde5bd 100644 --- a/R/use_gloglik.R +++ b/R/use_gloglik.R @@ -1,3 +1,11 @@ +#' Get covariance parameter output using log likelihood +#' +#' @param initial_object Initial value object +#' @param data_object Data object +#' @param estmethod Estimation method +#' @param optim_dotlist Additional optim arguments +#' +#' @noRd use_gloglik <- function(initial_object, data_object, estmethod, optim_dotlist) { # take original parameter values and put them on optim scale orig2optim_object <- orig2optim(initial_object) @@ -73,6 +81,13 @@ use_gloglik <- function(initial_object, data_object, estmethod, optim_dotlist) { +#' Get covariance parameter output from log likelihood when all parameters are known +#' +#' @param initial_object Initial value object +#' @param data_object Data object +#' @param estmethod Estimation method +#' +#' @noRd use_gloglik_known <- function(initial_object, data_object, estmethod) { # all parameters known diff --git a/R/use_laploglik.R b/R/use_laploglik.R index 82d06bb..f71061f 100644 --- a/R/use_laploglik.R +++ b/R/use_laploglik.R @@ -1,3 +1,11 @@ +#' Get covariance parameter output using Laplace log likelihood (for glms) +#' +#' @param initial_object Initial value object +#' @param data_object Data object +#' @param estmethod Estimation method +#' @param optim_dotlist Additional optim arguments +#' +#' @noRd use_laploglik <- function(initial_object, data_object, estmethod, optim_dotlist) { orig2optim_object <- orig2optim_glm(initial_object) @@ -67,7 +75,13 @@ use_laploglik <- function(initial_object, data_object, estmethod, optim_dotlist) - +#' Get covariance parameter output from Laplace log likelihood (for glms) when all parameters are known +#' +#' @param initial_object Initial value object +#' @param data_object Data object +#' @param estmethod Estimation method +#' +#' @noRd use_laploglik_known <- function(initial_object, data_object, estmethod) { params_object <- get_params_object_glm_known(initial_object) diff --git a/R/utils.R b/R/utils.R index e78bb5e..30af5aa 100644 --- a/R/utils.R +++ b/R/utils.R @@ -40,7 +40,11 @@ spmodel::randcov_params #' @export spmodel::varcomp -# logit function +#' Logit function +#' +#' @param x Vector to take logit of +#' +#' @noRd logit <- function(x) { if (x < 0 | x > 1) { stop("logit argument must be between zero and one", call. = FALSE) @@ -48,17 +52,28 @@ logit <- function(x) { log(x / (1 - x)) } -# expit function +#' Inverse logit (expit) function +#' +#' @param x Vector to take inverse logit of +#' +#' @noRd expit <- function(x) { 1 / (1 + exp(-x)) } -# remove type class +#' Helper to remove type class prefix from certain class strings +#' +#' @param class_string +#' +#' @noRd remove_covtype <- function(class_string) { sub("^[^_]*_", "", class_string) } -# CRAN release questions + +#' CRAN release questions +#' +#' @noRd release_questions <- function() { c( "Have you changed version numbers in DESCRIPTION, CITATION, and README?", diff --git a/README.md b/README.md index f515ffc..3c68258 100644 --- a/README.md +++ b/README.md @@ -3,6 +3,7 @@ [![cran checks](https://badges.cranchecks.info/worst/SSN2.svg)](https://cran.r-project.org/web/checks/check_results_SSN2.html) [![R-CMD-check](https://github.com/USEPA/SSN2/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/USEPA/SSN2/actions/workflows/R-CMD-check.yaml) [![Project Status: Active – The project has reached a stable, usable state and is being actively developed.](https://www.repostatus.org/badges/latest/active.svg)](https://www.repostatus.org/#active) +[![status](https://joss.theoj.org/papers/66fd932526762f8ccd8bd9c3954e0e3d/status.svg)](https://joss.theoj.org/papers/66fd932526762f8ccd8bd9c3954e0e3d) # SSN2: Spatial Modeling on Stream Networks @@ -24,7 +25,7 @@ citation(package = "SSN2") #> #> To cite SSN2 in publications use: #> -#> Dumelle M, Peterson, E, Ver Hoef JM, Pearse A, Isaak D (2023). SSN2: Spatial Modeling on Stream Networks in R. R package version 0.1.0 +#> Dumelle M, Peterson, E, Ver Hoef JM, Pearse A, Isaak D (2023). SSN2: Spatial Modeling on Stream Networks in R. R package version 0.2.0 #> #> A BibTeX entry for LaTeX users is #> @@ -32,13 +33,13 @@ citation(package = "SSN2") #> title = {{SSN2}: Spatial Modeling on Stream Networks in {R}}, #> author = {Michael Dumelle and Erin Peterson and Jay M. {Ver Hoef} and Alan Pearse and Dan Isaak}, #> year = {2023}, -#> note = {{R} package version 0.1.0}, +#> note = {{R} package version 0.2.0}, #> } ``` ## Statement of Need -Streams provide vital aquatic services that sustain wildlife, provide drinking and irrigation water, and support recreational and cultural activities. Data are often collected at various locations on a stream network and used to characterize some scientific phenomenon in the stream. Spatial stream network (SSN) models use a spatial statistical modeling framework to describe unique and complex dependencies on a stream network resulting from a branching network structure, directional water flow, and differences in flow volume. SSN models relate a response variable to one or more explanatory variables, a spatially independent error term (i.e., nugget), and up to three spatially dependent error terms: tail-down errors, tail-up errors, and Euclidean errors. Tail-down errors restrict spatial dependence to flow-connected sites (i.e., water flows from an upstream to a downstream site) and incorporate spatial weights (i.e., additive function) to describe the branching network between them. Tail-up errors describe spatial dependence between both flow-connected and flow-unconnected (i.e., sites that share a common downstream junction but not flow) sites, but spatial weights are not required. Euclidean errors describe spatial dependence between sites based on Euclidean distance and are governed by factors not confined to the stream network like regional geology. +Streams provide vital aquatic services that sustain wildlife, provide drinking and irrigation water, and support recreational and cultural activities. Data are often collected at various locations on a stream network and used to characterize some scientific phenomenon in the stream. Spatial stream network (SSN) models use a spatial statistical modeling framework to describe unique and complex dependencies on a stream network resulting from a branching network structure, directional water flow, and differences in flow volume. SSN models relate a response variable to one or more explanatory variables, a spatially independent error term (i.e., nugget), and up to three spatially dependent error terms: tail-down errors, tail-up errors, and Euclidean errors. Tail-down errors restrict spatial dependence to flow-connected sites (i.e., water flows from an upstream to a downstream site) and incorporate spatial weights (i.e., additive function) to describe the branching network between them. Tail-up errors describe spatial dependence between both flow-connected and flow-unconnected (i.e., sites that share a common downstream junction but not flow) sites, but spatial weights are not required. Euclidean errors describe spatial dependence between sites based on Euclidean distance and are governed by factors not confined to the stream network like regional geology. The `SSN2` **R** package is designed to help users fit SSN models to their stream network data. ## Installation Instructions @@ -73,6 +74,18 @@ remotes::install_github("USEPA/SSN2", ref = "develop") library(SSN2) ``` +## Contributing to `SSN2` + +We encourage users to report bugs and/or contribute to `SSN2`. For more detail on how to do this, please see our contributing guide (`CONTRIBUTING.md`). + +## Getting Help + +There are several ways to get help with `SSN2`: + +1. Open a GitHub issue [link here](https://github.com/USEPA/SSN2/issues). +2. Email the SSN support team (support@spatialstreamnetworks.com or Dumelle.Michael@epa.gov) +3. Post on a support website like Stack Overflow or Cross Validated. + ## Example Usage Below we provide a brief example showing how to use `SSN2`. For a thorough introduction to the software, see our introductory vignette [linked here](https://usepa.github.io/SSN2/articles/introduction.html). For a list of all functions available in `SSN2`, see our function reference [linked here](https://usepa.github.io/SSN2/reference/index.html). @@ -199,14 +212,37 @@ head(preds) #> 6 15.12783 14.76358 15.49208 ``` -## Contributing to `SSN2` +## Imported Packages -We encourage users to propose changes to `SSN2`. Please see our contributing guide (`CONTRIBUTING.md`) for more detail. +`SSN2` imports the following **R** packages: -## EPA Disclaimer +* generics: For exporting generic functions. +* graphics: For visualizations (e.g., `plot()`). +* Matrix: For efficient matrix manipulations. +* RSQlite: For various functions that read and write (e.g., `ssn_create_distmat()`). +* sf: For handling spatial data. +* spmodel: For various modeling functions (e.g., `randcov_initial()`) and generic functions (e.g., `loocv()`). +* stats: For various modeling functions (e.g., `confint()`). +* tibble: For creating tibbles as output for various functions (e.g., `tidy()`). +* utils: For various utility functions. +* withr: For path handling while reading and writing. -The United States Environmental Protection Agency (EPA) GitHub project code is provided on an "as is" basis and the user assumes responsibility for its use. EPA has relinquished control of the information and no longer has responsibility to protect the integrity , confidentiality, or availability of the information. Any reference to specific commercial products, processes, or services by service mark, trademark, manufacturer, or otherwise, does not constitute or imply their endorsement, recommendation or favoring by EPA. The EPA seal and logo shall not be used in any manner to imply endorsement of any commercial product or activity by EPA or the United States Government. +## Suggested Packages + +`SSN2` suggests the following **R** packages: + +* ggplot2: For vignette visualizations. +* knitr: For vignette building. +* rmarkdown: For vignette building. +* sp: For making `SSN` objects from the `SSN` **R** package compatible with `SSN2`. +* statmod: For modeling and simulation of inverse Gaussian data. +* testthat: For unit testing. ## License This project is licensed under the GNU General Public License, [GPL-3](https://cran.r-project.org/web/licenses/GPL-3). + +## EPA Disclaimer + +The United States Environmental Protection Agency (EPA) GitHub project code is provided on an "as is" basis and the user assumes responsibility for its use. EPA has relinquished control of the information and no longer has responsibility to protect the integrity , confidentiality, or availability of the information. Any reference to specific commercial products, processes, or services by service mark, trademark, manufacturer, or otherwise, does not constitute or imply their endorsement, recommendation or favoring by EPA. The EPA seal and logo shall not be used in any manner to imply endorsement of any commercial product or activity by EPA or the United States Government. + diff --git a/data/mf04p.rda b/data/mf04p.rda index 5b6a052..288748b 100644 Binary files a/data/mf04p.rda and b/data/mf04p.rda differ diff --git a/docs/404.html b/docs/404.html index ae0e8d9..dbe33f7 100644 --- a/docs/404.html +++ b/docs/404.html @@ -7,66 +7,53 @@