diff --git a/apis/r/DESCRIPTION b/apis/r/DESCRIPTION index 59476a0f10..3cd94b2166 100644 --- a/apis/r/DESCRIPTION +++ b/apis/r/DESCRIPTION @@ -45,14 +45,16 @@ Imports: spdl, rlang, tools, - tibble + tibble, + itertools, + iterators LinkingTo: Rcpp, RcppSpdlog, RcppInt64 Additional_repositories: https://ghrr.github.io/drat Roxygen: list(markdown = TRUE) -RoxygenNote: 7.2.3 +RoxygenNote: 7.3.1 Suggests: rmarkdown, knitr, diff --git a/apis/r/NAMESPACE b/apis/r/NAMESPACE index 2607cb7deb..d1f7d97944 100644 --- a/apis/r/NAMESPACE +++ b/apis/r/NAMESPACE @@ -2,9 +2,12 @@ S3method("[[",MappingBase) S3method("[[<-",MappingBase) +S3method(as.list,CoordsStrider) S3method(as.list,MappingBase) +S3method(hasNext,CoordsStrider) S3method(length,MappingBase) S3method(names,MappingBase) +S3method(nextElem,CoordsStrider) S3method(write_soma,Assay) S3method(write_soma,DataFrame) S3method(write_soma,DimReduc) @@ -51,9 +54,11 @@ export(SOMAMeasurementOpen) export(SOMANDArrayBase) export(SOMAOpen) export(SOMASparseNDArray) +export(SOMASparseNDArrayBlockwiseRead) export(SOMASparseNDArrayCreate) export(SOMASparseNDArrayOpen) export(SOMASparseNDArrayRead) +export(SOMASparseNDArrayReadBase) export(SOMATileDBContext) export(ScalarMap) export(SparseReadIter) @@ -63,9 +68,11 @@ export(TileDBCreateOptions) export(TileDBGroup) export(TileDBObject) export(extract_dataset) +export(hasNext) export(list_datasets) export(load_dataset) export(matrixZeroBasedView) +export(nextElem) export(set_log_level) export(show_package_versions) export(tiledbsoma_stats_disable) @@ -88,6 +95,8 @@ importFrom(data.table,address) importFrom(fs,path_has_parent) importFrom(fs,path_rel) importFrom(glue,glue_collapse) +importFrom(iterators,nextElem) +importFrom(itertools,hasNext) importFrom(methods,as) importFrom(methods,getClassDef) importFrom(methods,new) diff --git a/apis/r/R/CoordsStrider.R b/apis/r/R/CoordsStrider.R new file mode 100644 index 0000000000..6e75f0860b --- /dev/null +++ b/apis/r/R/CoordsStrider.R @@ -0,0 +1,129 @@ +CoordsStrider <- R6::R6Class( + classname = "CoordsStrider", + cloneable = FALSE, + public = list( + initialize = function(coords, ..., stride = NULL, start = NULL, end = NULL) { + if (missing(coords)) { + stopifnot( + rlang::is_integerish(start, 1L, TRUE) || + (inherits(start, "integer64") && length(start) == 1L && is.finite(start)), + rlang::is_integerish(end, 1L, TRUE) || + (inherits(end, "integer64") && length(end) == 1L && is.finite(end)), + start <= end + ) + private$.start <- start + private$.end <- end + stride <- stride %||% abs(end - start + 1L) + private$.index <- 0L + } else { + stopifnot(inherits(coords, c("integer64", "numeric", "integer"))) + private$.coords <- coords + stride <- stride %||% length(coords) + stopifnot(stride <= .Machine$integer.max) + private$.index <- 1L + } + stopifnot(rlang::is_integerish(stride, 1L, TRUE) && stride > 0L) + private$.stride <- stride + }, + print = function() { + cat("<", class(self)[1L], ">\n", sep = "") + if (is.null(self$coords)) { + cat(" start:", self$start, "\n") + cat(" end:", self$end, "\n") + } else { + cat(" length(coords):", length(self$coords), "\n") + } + cat(" stride:", self$stride, "\n") + return(invisible(self)) + }, + hasNext = function() { + if (is.null(self$coords)) { + return(private$.index <= abs(self$end - self$start)) + } + return(private$.index <= length(self$coords)) + }, + nextElem = function() { + if (!self$hasNext()) { + private$.stopIteration() + } + if (is.null(self$coords)) { + start <- min(self$start + private$.index, self$end) + end <- min(start + self$stride - 1L, self$end) + private$.index <- private$.index + self$stride + if (start == end) { + return(bit64::as.integer64(start)) + } + by <- ifelse(start <= end, 1L, -1L) + return(bit64::seq.integer64(from = start, to = end, by = by)) + } + start <- private$.index + end <- min(private$.index + self$stride - 1L, length(self$coords)) + private$.index <- end + 1 + return(self$coords[start:end]) + } + ), + active = list( + coords = function() private$.coords, + start = function() ifelse(is.null(self$coords), private$.start, min(self$coords)), + end = function() ifelse(is.null(self$coords), private$.end, max(self$coords)), + stride = function() private$.stride + ), + private = list( + .coords = NULL, + .start = NULL, + .end = NULL, + .stride = NULL, + .index = NULL, + .stopIteration = function() stop(errorCondition( + "StopIteration", + class = "stopIteration" + )) + ) +) + +#' @method as.list CoordsStrider +#' @export +#' +as.list.CoordsStrider <- function(x, ...) { + f <- get('as.list.iter', envir = asNamespace('iterators')) + return(f(x, ...)) +} + +#' @importFrom iterators nextElem +#' @export +#' +iterators::nextElem + +#' @method nextElem CoordsStrider +#' @export +#' +nextElem.CoordsStrider <- function(obj, ...) { + return(obj$nextElem()) +} + +#' @importFrom itertools hasNext +#' @export +#' +itertools::hasNext + +#' @method hasNext CoordsStrider +#' @export +#' +hasNext.CoordsStrider <- function(obj, ...) { + return(obj$hasNext()) +} + +unlist64 <- function(x) { + stopifnot( + is.list(x), + all(vapply_lgl(x, inherits, what = 'integer64')) + ) + res <- bit64::integer64(sum(vapply_int(x, length))) + idx <- 1L + for (i in seq_along(x)) { + end <- idx + length(x[[i]]) + res[idx:(end - 1L)] <- x[[i]] + idx <- end + } + return(res) +} diff --git a/apis/r/R/SOMASparseNDArray.R b/apis/r/R/SOMASparseNDArray.R index f6dd6e85f1..13d94e26ac 100644 --- a/apis/r/R/SOMASparseNDArray.R +++ b/apis/r/R/SOMASparseNDArray.R @@ -54,7 +54,7 @@ SOMASparseNDArray <- R6::R6Class( timestamp_end = private$tiledb_timestamp, loglevel = log_level) private$ctx_ptr <- rl$ctx - SOMASparseNDArrayRead$new(rl$sr, shape = self$shape()) + SOMASparseNDArrayRead$new(rl$sr, self, coords) }, #' @description Write matrix-like data to the array. (lifecycle: experimental) diff --git a/apis/r/R/SOMASparseNDArrayRead.R b/apis/r/R/SOMASparseNDArrayRead.R index b2a9ad1ac1..7eb9c78658 100644 --- a/apis/r/R/SOMASparseNDArrayRead.R +++ b/apis/r/R/SOMASparseNDArrayRead.R @@ -1,3 +1,75 @@ +#' SOMA Sparse ND-Array Reader Base +#' +#' @description Base class for SOMA sparse ND-array reads +#' +#' @keywords internal +#' +#' @export +#' +SOMASparseNDArrayReadBase <- R6::R6Class( + classname = "SOMASparseNDArrayReadBase", + cloneable = FALSE, + public = list( + #' @description Create + #' + #' @param sr SOMA read pointer + #' @param array \code{\link{SOMASparseNDArray}} + #' @param coords ... + # @param shape Shape of the full matrix + #' + initialize = function(sr, array, coords) { + stopifnot( + "'array' must be a SOMASparseNDArray" = inherits(array, "SOMASparseNDArray") + ) + if (is.null(coords)) { + private$.striders <- vector(mode = "list", length = array$ndim()) + shape <- array$shape() + for (i in seq_along(private$.striders)) { + private$.striders[[i]] <- CoordsStrider$new( + start = 0L, + end = shape[i], + stride = .Machine$integer.max + ) + } + names(private$.striders) <- array$dimnames() + # shape <- array$shape() + # coords <- vector(mode = "list", length = array$ndim()) + # for (i in seq_along(coords)) { + # coords[[i]] <- bit64::seq.integer64(0L, shape[i] - 1L) + # } + # names(coords) <- array$dimnames() + } else { + stopifnot( + "'coords' must be a list of integer64 values" = is.list(coords) && + all(vapply_lgl(coords, inherits, what = c('integer64', 'numeric'))), + "'coords' must be named with the dimnames of 'array'" = is_named(coords, FALSE) && + all(names(coords) %in% array$dimnames()) + ) + private$.coords <- coords + } + private$.sr <- sr + private$.array <- array + # private$.shape <- shape + } + ), + active = list( + #' @field sr The SOMA read pointer + sr = function() return(private$.sr), + #' @field array The underlying \code{\link{SOMASparseNDArray}} + array = function() return(private$.array), + #' @field coords The coordinates for the read + coords = function() return(private$.coords), + #' @field shape The shape of the underlying array + shape = function() return(self$array$shape()) + ), + private = list( + .sr = NULL, + .array = NULL, + .coords = NULL, + .striders = NULL + ) +) + #' SOMASparseNDArrayRead #' #' @description @@ -7,16 +79,17 @@ SOMASparseNDArrayRead <- R6::R6Class( classname = "SOMASparseNDArrayRead", - + inherit = SOMASparseNDArrayReadBase, + cloneable = FALSE, public = list( - #' @description Create (lifecycle: experimental) - #' @param sr soma read pointer - #' @param shape Shape of the full matrix - initialize = function(sr, shape) { - private$sr <- sr - private$shape <- shape - }, + # @description Create (lifecycle: experimental) + # @param sr soma read pointer + # @param shape Shape of the full matrix + # initialize = function(sr, shape) { + # private$sr <- sr + # private$shape <- shape + # }, #' @description Read as a sparse matrix (lifecycle: experimental). Returns #' an iterator of Matrix::\link[Matrix]{dgTMatrix-class} or \link{matrixZeroBasedView} of it. @@ -26,8 +99,9 @@ SOMASparseNDArrayRead <- R6::R6Class( sparse_matrix = function(zero_based=FALSE) { #TODO implement zero_based argument, currently doesn't do anything - - if (any(private$shape > .Machine$integer.max)) { + shape <- self$shape + # if (any(private$shape > .Machine$integer.max)) { + if (any(shape > .Machine$integer.max)) { warning( "Array's shape exceeds '.Machine$integer.max'.\n", " - Result will only include coordinates within [0, 2^31 - 1).\n", @@ -35,23 +109,124 @@ SOMASparseNDArrayRead <- R6::R6Class( call. = FALSE, immediate. = TRUE ) - private$shape <- pmin(private$shape, .Machine$integer.max) + # private$shape <- pmin(private$shape, .Machine$integer.max) + shape <- pmin(shape, .Machine$integer.max) } - SparseReadIter$new(private$sr, private$shape, zero_based = zero_based) + SparseReadIter$new(self$sr, shape, zero_based = zero_based) }, #' @description Read as a arrow::\link[arrow]{Table} (lifecycle: experimental). #' Returns an iterator of arrow::\link[arrow]{Table}. #' @return \link{TableReadIter} tables = function() { - TableReadIter$new(private$sr) + TableReadIter$new(self$sr) + }, + #' @description ... + #' + #' @param axis ... + #' @param ... Ignored + #' @param size ... + #' @param reindex_disable_on_axis ... + #' @param eager ... + #' + #' @return A \code{\link{SOMASparseNDArrayBlockwiseRead}} iterated reader + #' + blockwise = function( + axis, + ..., + size = NULL, + reindex_disable_on_axis = NULL, + eager = TRUE + ) { + return(SOMASparseNDArrayBlockwiseRead$new( + self$sr, + self$array, + self$coords, + axis, + size = size, + reindex_disable_on_axis = reindex_disable_on_axis, + eager = eager + )) } - ), + ) +) +#' Blockwise Sparse ND-Array Reader +#' +#' @description Blockwise reader for \code{\link{SOMASparseNDArray}} +#' +#' @keywords internal +#' +#' @export +#' +SOMASparseNDArrayBlockwiseRead <- R6::R6Class( + classname = "SOMASparseNDArrayBlockwiseRead", + inherit = SOMASparseNDArrayReadBase, + cloneable = FALSE, + public = list( + #' @description Create + #' + #' @param sr SOMA read pointer + #' @param array \code{\link{SOMASparseNDArray}} + #' @param coords ... + #' @param axis ... + #' @param ... Ignored + #' @param size ... + #' @param reindex_disable_on_axis ... + #' @param eager ... + #' + initialize = function( + sr, + array, + coords, + axis, + ..., + size, + reindex_disable_on_axis, + eager = TRUE + ) { + super$initialize(sr, array, coords) + stopifnot() + private$.axis <- axis + private$.size <- size + private$.reindex_disable_on_axis <- reindex_disable_on_axis + private$.eager <- eager + }, + #' @description ... + #' + #' @return ... + #' + tables = function() { + .NotYetImplemented() + }, + #' @description ... + #' + #' @param compress ... + #' + #' @return ... + #' + sparse_matrix = function(compress = TRUE) { + stopifnot( + "'compress' must be TRUE or FALSE" = isTRUE(compress) || isFALSE(compress) + ) + .NotYetImplemented() + } + ), + active = list( + #' @field axis ... + axis = function() return(private$.axis), + #' @field size ... + size = function() return(private$.size), + #' @field reindex_disable_on_axis ... + reindex_disable_on_axis = function() return(private$.reindex_disable_on_axis), + #' @field eager ... + eager = function() return(private$eager) + ), private = list( - sr=NULL, - shape=NULL + .axis = NULL, + .size = NULL, + .reindex_disable_on_axis = NULL, + .eager = NULL ) - ) diff --git a/apis/r/man/SOMASparseNDArrayBlockwiseRead.Rd b/apis/r/man/SOMASparseNDArrayBlockwiseRead.Rd new file mode 100644 index 0000000000..949f8ccd7f --- /dev/null +++ b/apis/r/man/SOMASparseNDArrayBlockwiseRead.Rd @@ -0,0 +1,107 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/SOMASparseNDArrayRead.R +\name{SOMASparseNDArrayBlockwiseRead} +\alias{SOMASparseNDArrayBlockwiseRead} +\title{Blockwise Sparse ND-Array Reader} +\description{ +Blockwise reader for \code{\link{SOMASparseNDArray}} +} +\keyword{internal} +\section{Super class}{ +\code{\link[tiledbsoma:SOMASparseNDArrayReadBase]{tiledbsoma::SOMASparseNDArrayReadBase}} -> \code{SOMASparseNDArrayBlockwiseRead} +} +\section{Active bindings}{ +\if{html}{\out{