Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enable users to pass functions to readSource #222

Draft
wants to merge 4 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ export(metadataGFZ)
export(pucAggregate)
export(putMadratMessage)
export(readSource)
export(readSourceAlternative)
export(redirectSource)
export(regionscode)
export(resetMadratMessages)
Expand Down
254 changes: 254 additions & 0 deletions R/readSourceAlternative.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,254 @@
#' readSourceAlternative
#'
#' Read in a source file and convert it to a MAgPIE object. The function is a
#' wrapper for specific functions designed for the different possible source
#' types.
#'
#' @param read A function to read the data. Mandatory. Function name must start with 'read'. The
#' rest of the function name determines the 'type'.
#' @param correct A function to correct the data. Optional. Function name must start with 'correct'
#' and end with the type, same as for read.
#' @param covert A function to convert the data (putting it at country level). Optional. Function
#' name must start with 'convert' and end with the type, same as for read.
#' @param subtype A character string. For some sources there are subtypes of the source, for these
#' sources the subtype can be specified with this argument. If a source does not
#' have subtypes, subtypes should not be set.
#' @param subset A character string. Similar to \code{subtype} a source can also have \code{subsets}.
#' A \code{subsets} can be used to only read part of the data. This can in particular make sense
#' for huge data sets where reading in the whole data set might be impractical or even infeasible.
#' @export
readSourceAlternative <- function(read, correct = NULL, convert = NULL, subtype = NULL, subset = NULL) {

#' check passed functions follow the naming conventions
.checkNamingConventions <- function(read, correct, convert) {
if (!grepl("read", deparse(substitute(read)))) {
stop(read "does not following the naming convention: it must start with 'read'.")
SallyDa marked this conversation as resolved.
Show resolved Hide resolved
}
type <- .getType()

if (!is.null(correct)) {
if (!grepl("correct", deparse(substitute(correct)))) {
stop(correct "does not follow the naming convention: it must start with 'correct'.")
}
if (sub(".*correct", "", deparse(subsitute(correct))) != type) {
stop(correct "does not follow the naming convention: it must match" type)
}
}

if (!is.null(convert)) {
if (!grepl("convert", deparse(substitute(convert)))) {
stop(convert "does not follow the naming convention: it must start with 'convert'.")
}
if (sub(".*convert", "", deparse(subsitute(convert))) != type) {
stop(convert "does not follow the naming convention: it must match" type)
}
}
}

#' get string from function for caching consistency with readSource
.getType <- function(read) {
type <- sub(".*read", "", deparse(substitute(read)))
SallyDa marked this conversation as resolved.
Show resolved Hide resolved
return(type)
}

#' get string/bool from function(s) for caching consistency with readSource
.getPrefix <- function(convert, correct) {
if (!is.null(convert)) {prefix <- "convert"}
else if (!is.null(correct)) {prefix <- "correct"}
else {prefix <- "read"}
return(prefix)
}

#' get other arguments used for caching
.getArgs <- function(subtype, suset) {
args <- NULL
if (!is.null(subtype)) {args <- append(args, list(subtype = subtype))}
if (!is.null(subset)) {args <- append(args, list(subset = subset))}
return(args)
}

#' try to get from cache and check
.getFromCache <- function(prefix, type, args, subtype, subset) {
xList <- cacheGet(prefix = prefix, type = type, args = args)
if (!is.null(xList)) {
if (!is.list(xList)) {
xList <- list(x = xList, class = "magpie")
}
if (prefix == "convert" && magclass::is.magpie(xList$x)) {
fname <- paste0(prefix, type, "_", subtype, "_", subset)
err <- try(.testISO(magclass::getItems(xList$x, dim = 1.1), functionname = fname), silent = TRUE)
if ("try-error" %in% class(err)) {
vcat(2, " - cache file corrupt for ", fname, show_prefix = FALSE)
xList <- NULL
}
}
}
return(xList)
}

.tryNewDownload <- function() {
sourcefolder <- getSourceFolder(type, subtype = NULL)
# if any DOWNLOAD.yml exists use these files as reference,
# otherwise just check whether the sourcefolder exists
df <- dir(sourcefolder, recursive = TRUE, pattern = "DOWNLOAD.yml")
if (length(df) == 0) {
sourceAvailable <- dir.exists(sourcefolder)
} else {
sourcefile <- file.path(sourcefolder, "DOWNLOAD.yml")
sourcesubfile <- file.path(sourcefolder, make.names(subtype), "DOWNLOAD.yml")
sourceAvailable <- isTRUE(file.exists(sourcefile)) || isTRUE(file.exists(sourcesubfile))
}

if (!sourceAvailable) {
# does a routine exist to download the source data?
if (type %in% getSources(type = "download")) {
downloadCall <- prepFunctionName(type = type, prefix = "download")
downloadFunctionName <- sub("\\(.*$", "", downloadCall)
downloadFormals <- formals(eval(parse(text = downloadFunctionName)))
downloadHasSubtypeArg <- "subtype" %in% names(downloadFormals)

downloadSource(type = type, subtype = if (downloadHasSubtypeArg) subtype else NULL)
} else {
typesubtype <- paste0(paste(c(paste0('type = "', type), subtype), collapse = '" subtype = "'), '"')
stop("Sourcefolder does not contain data for the requested source ", typesubtype,
" and there is no download script which could provide the missing data. Please check your settings!")
}
}
}

.clean <- function(result) {
if (magclass::is.magpie(result$x)) {
result$x <- clean_magpie(result$x)
}
return(if (supplementary) result else result$x)
}

#' run the actual read/correct/convert function
.runWithLoggingandCaching <- function(x, callable, subtype, subset) {
sourcefolder <- getSourceFolder(type, subtype)

# find which arguments we have here can be used by our callable, if any
func_args <- names(formals(callable))
available_args <- c("subtype", "subset")[c(!is.null(subtype), !is.null(subset))]
selected_args <- intersect(func_args, available_args)
if (callable %in% c(correct, convert)) {
selected_args <- append(selected_args, "x")
}

# no arguments available/required
if (is.null(selected_args)) {
setWrapperActive("wrapperChecks")
withr::with_dir(sourcefolder, {
x <- withMadratLogging(callable())
})
setWrapperInactive("wrapperChecks")
cachePut(xList, prefix = prefix, type = type, args = args)
return(x)
}
# use available/required arguments
selected_values <- mget(selected_args)
setWrapperActive("wrapperChecks")
withr::with_dir(sourcefolder, {
x <- withMadratLogging(do.call(callable, selected_values))
})
setWrapperInactive("wrapperChecks")
cachePut(xList, prefix = prefix, type = type, args = args)
return(x)
}

.testISO <- function(x, ) {
isoCountry <- read.csv2(system.file("extdata", "iso_country.csv", package = "madrat"), row.names = NULL)
isoCountry1 <- as.vector(isoCountry[, "x"])
names(isoCountry1) <- isoCountry[, "X"]
isocountries <- robustSort(isoCountry1)
datacountries <- robustSort(x)
if (length(isocountries) != length(datacountries)) {
stop("Wrong number of countries returned by ", convert, "!")
}
if (any(isocountries != datacountries)) {
stop("Countries returned by ", convert, " do not agree with iso country list!")
}
}

.ensureMagpie <- function(x, callable) {
# ensure we are always working with a list with entries "x" and "class"
xList <- if (is.list(x)) x else list(x = x, class = "magpie")

# set class to "magpie" if not set
if (is.null(xList$class)) xList$class <- "magpie"

# assert return list has the expected entries
if (!all(c("class", "x") %in% names(xList))) {
stop('Output of "', callable,
'" must be a MAgPIE object or a list with the entries "x" and "class"!')
}

if (!inherits(xList$x, xList$class)) {
stop('Output of "', callable, '" should have class "', xList$class, '" but it does not.')
}

if !is.null(convert) {
if (magclass::is.magpie(xList$x)) {
.testISO(magclass::getItems(xList$x, dim = 1.1))
} else {
vcat(2, "Non-magpie objects are not checked for ISO country level.")
}
}
}


.checkNamingConventions(read=read, correct=correct, convert=convert)

# get default subtype argument if available, otherwise NULL
if (is.null(subtype)) {subtype <- formals(eval(read))[["subtype"]]}

# conversions for cache search
type <- .getType(read=read)
prefix <- .getPrefix(convert=convert, correct=correct)
args <- .getArgs(subtype=subtype, subset=subset)

# check forcecache before checking source dir
forcecacheActive <- all(!is.null(getConfig("forcecache")),
any(isTRUE(getConfig("forcecache")),
type %in% getConfig("forcecache"),
paste0(prefix, type) %in% getConfig("forcecache")))
if (forcecacheActive) {
xList <- .getFromCache(prefix, type, args, subtype, subset)
if (!is.null(xList)) {
return(if (supplementary) xList else xList$x)
}
}

.tryNewDownload()

# try for a perfect cache
xList <- .getFromCache(prefix, type, args, subtype, subset)
if (!is.null(xList)) {
return(.clean(xList))
}

# try a cache of an earlier step in the preprocessing routine and run remaining steps
if (!is.null(convert)) {
xList <- .getFromCache(type, subtype, subset, args, "correct")
if !is.null(xList) {
xList <- .runWithLoggingandCaching(xList, callable=convert, subtype=subtype, subset=subset)
}
return(.clean(xList))
}

# try last cache option
xList <- .getFromCache(type, subtype, subset, args, "read")
# if that didn't work, run the read function
if (!is.null(xList)) {
xList <- .runWithLoggingandCaching(xList, callable=read, subtype=subtype, subset=subset)
}
# and then run the correct and/or convert functions if desired
if (!is.null(correct)) {
xList <- .runWithLoggingandCaching(xList, callable=correct, subtype=subtype, subset=subset)
}
if (!is.null(convert)) {
xList <- .runWithLoggingandCaching(xList, callable=convert, subtype=subtype, subset=subset)
}

return(.clean(xList))
}
Loading