Skip to content

Commit

Permalink
[r] [WIP] Blockwise Reader
Browse files Browse the repository at this point in the history
Initial support for blockwise reader/iteration
New classes:
- `CoordsStrider`: new class to iterate through coordinate similar to Python's `_coords_strider`
- `SOMASparseNDArrayReadBase`: base class for sparse array reads
- `SOMASparseNDArrayBlockwiseRead`: new reader class for blockwise iterated reads

New SOMA methods:
- `SOMASparseNDArrayRead$blockwse()`: perform a blockwise read

addresses #1853
  • Loading branch information
mojaveazure committed Feb 19, 2024
1 parent db548eb commit bc481b1
Show file tree
Hide file tree
Showing 9 changed files with 545 additions and 47 deletions.
6 changes: 4 additions & 2 deletions apis/r/DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -45,14 +45,16 @@ Imports:
spdl,
rlang,
tools,
tibble
tibble,
itertools,
iterators
LinkingTo:
Rcpp,
RcppSpdlog,
RcppInt64
Additional_repositories: https://ghrr.github.io/drat
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.2.3
RoxygenNote: 7.3.1
Suggests:
rmarkdown,
knitr,
Expand Down
9 changes: 9 additions & 0 deletions apis/r/NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,12 @@

S3method("[[",MappingBase)
S3method("[[<-",MappingBase)
S3method(as.list,CoordsStrider)
S3method(as.list,MappingBase)
S3method(hasNext,CoordsStrider)
S3method(length,MappingBase)
S3method(names,MappingBase)
S3method(nextElem,CoordsStrider)
S3method(write_soma,Assay)
S3method(write_soma,DataFrame)
S3method(write_soma,DimReduc)
Expand Down Expand Up @@ -51,9 +54,11 @@ export(SOMAMeasurementOpen)
export(SOMANDArrayBase)
export(SOMAOpen)
export(SOMASparseNDArray)
export(SOMASparseNDArrayBlockwiseRead)
export(SOMASparseNDArrayCreate)
export(SOMASparseNDArrayOpen)
export(SOMASparseNDArrayRead)
export(SOMASparseNDArrayReadBase)
export(SOMATileDBContext)
export(ScalarMap)
export(SparseReadIter)
Expand All @@ -63,9 +68,11 @@ export(TileDBCreateOptions)
export(TileDBGroup)
export(TileDBObject)
export(extract_dataset)
export(hasNext)
export(list_datasets)
export(load_dataset)
export(matrixZeroBasedView)
export(nextElem)
export(set_log_level)
export(show_package_versions)
export(tiledbsoma_stats_disable)
Expand All @@ -88,6 +95,8 @@ importFrom(data.table,address)
importFrom(fs,path_has_parent)
importFrom(fs,path_rel)
importFrom(glue,glue_collapse)
importFrom(iterators,nextElem)
importFrom(itertools,hasNext)
importFrom(methods,as)
importFrom(methods,getClassDef)
importFrom(methods,new)
Expand Down
129 changes: 129 additions & 0 deletions apis/r/R/CoordsStrider.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
CoordsStrider <- R6::R6Class(
classname = "CoordsStrider",
cloneable = FALSE,
public = list(
initialize = function(coords, ..., stride = NULL, start = NULL, end = NULL) {
if (missing(coords)) {
stopifnot(
rlang::is_integerish(start, 1L, TRUE) ||
(inherits(start, "integer64") && length(start) == 1L && is.finite(start)),
rlang::is_integerish(end, 1L, TRUE) ||
(inherits(end, "integer64") && length(end) == 1L && is.finite(end)),
start <= end
)
private$.start <- start
private$.end <- end
stride <- stride %||% abs(end - start + 1L)
private$.index <- 0L
} else {
stopifnot(inherits(coords, c("integer64", "numeric", "integer")))
private$.coords <- coords
stride <- stride %||% length(coords)
stopifnot(stride <= .Machine$integer.max)
private$.index <- 1L
}
stopifnot(rlang::is_integerish(stride, 1L, TRUE) && stride > 0L)
private$.stride <- stride
},
print = function() {
cat("<", class(self)[1L], ">\n", sep = "")
if (is.null(self$coords)) {
cat(" start:", self$start, "\n")
cat(" end:", self$end, "\n")
} else {
cat(" length(coords):", length(self$coords), "\n")
}
cat(" stride:", self$stride, "\n")
return(invisible(self))
},
hasNext = function() {
if (is.null(self$coords)) {
return(private$.index <= abs(self$end - self$start))
}
return(private$.index <= length(self$coords))
},
nextElem = function() {
if (!self$hasNext()) {
private$.stopIteration()
}
if (is.null(self$coords)) {
start <- min(self$start + private$.index, self$end)
end <- min(start + self$stride - 1L, self$end)
private$.index <- private$.index + self$stride
if (start == end) {
return(bit64::as.integer64(start))
}
by <- ifelse(start <= end, 1L, -1L)
return(bit64::seq.integer64(from = start, to = end, by = by))
}
start <- private$.index
end <- min(private$.index + self$stride - 1L, length(self$coords))
private$.index <- end + 1
return(self$coords[start:end])
}
),
active = list(
coords = function() private$.coords,
start = function() ifelse(is.null(self$coords), private$.start, min(self$coords)),
end = function() ifelse(is.null(self$coords), private$.end, max(self$coords)),
stride = function() private$.stride
),
private = list(
.coords = NULL,
.start = NULL,
.end = NULL,
.stride = NULL,
.index = NULL,
.stopIteration = function() stop(errorCondition(
"StopIteration",
class = "stopIteration"
))
)
)

#' @method as.list CoordsStrider
#' @export
#'
as.list.CoordsStrider <- function(x, ...) {
f <- get('as.list.iter', envir = asNamespace('iterators'))
return(f(x, ...))
}

#' @importFrom iterators nextElem
#' @export
#'
iterators::nextElem

#' @method nextElem CoordsStrider
#' @export
#'
nextElem.CoordsStrider <- function(obj, ...) {
return(obj$nextElem())
}

#' @importFrom itertools hasNext
#' @export
#'
itertools::hasNext

#' @method hasNext CoordsStrider
#' @export
#'
hasNext.CoordsStrider <- function(obj, ...) {
return(obj$hasNext())
}

unlist64 <- function(x) {
stopifnot(
is.list(x),
all(vapply_lgl(x, inherits, what = 'integer64'))
)
res <- bit64::integer64(sum(vapply_int(x, length)))
idx <- 1L
for (i in seq_along(x)) {
end <- idx + length(x[[i]])
res[idx:(end - 1L)] <- x[[i]]
idx <- end
}
return(res)
}
2 changes: 1 addition & 1 deletion apis/r/R/SOMASparseNDArray.R
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ SOMASparseNDArray <- R6::R6Class(
timestamp_end = private$tiledb_timestamp,
loglevel = log_level)
private$ctx_ptr <- rl$ctx
SOMASparseNDArrayRead$new(rl$sr, shape = self$shape())
SOMASparseNDArrayRead$new(rl$sr, self, coords)
},

#' @description Write matrix-like data to the array. (lifecycle: experimental)
Expand Down
Loading

0 comments on commit bc481b1

Please sign in to comment.