Skip to content

Commit

Permalink
[r] Adjust blockwise + re-indexer to return condensed matrix, not ful…
Browse files Browse the repository at this point in the history
…l domain (#3395)

The blockwise iterator + re-indexer in the R API currently return a matrix with the full domain of the array, not the re-indexed shape. This PR adjusts the behavior to return the matrix with the re-indexed shape

The new behavior mimics the Python API demonstrated [here](https://gist.github.com/mojaveazure/beb4ecda9d37dc67140ba494e4ec43a5#file-soma-blockwise-reindexer-py-rmd)

Modified SOMA methods:
 - `BlockwiseSparseReadIter$private$soma_reader_transform()`: if re-indexing is enabled, adjust the shape for each re-indexed axis to the re-indexed shape

#3385 [SC-59743](https://app.shortcut.com/tiledb-inc/story/59743)

* Start fixing blockwise iterator

* Complete updated re-indexer

* Remove outdated comments

* Apply suggestions from code review

Co-authored-by: John Kerl <[email protected]>

* Update apis/r/R/BlockwiseIter.R

Co-authored-by: Aaron Wolen <[email protected]>

---------

Co-authored-by: John Kerl <[email protected]>
Co-authored-by: Aaron Wolen <[email protected]>
  • Loading branch information
3 people authored Dec 4, 2024
1 parent b4cf341 commit 6b6edae
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 7 deletions.
18 changes: 17 additions & 1 deletion apis/r/R/BlockwiseIter.R
Original file line number Diff line number Diff line change
Expand Up @@ -346,10 +346,26 @@ BlockwiseSparseReadIter <- R6::R6Class(
.zero_based = FALSE,
soma_reader_transform = function(x) {
tbl <- private$reindex_arrow_table(soma_array_to_arrow_table(x))
shape <- private$.shape
axis <- as.integer(self$axis)
axname <- sprintf("soma_dim_%i", axis)
stride <- self$coords[[axname]]$stride
# For re-indexed blockwise iterators, shape should reflect the re-indexed
# axes; this can generally be the stride of the iterator, but for the end
# of each iterator this needs to be the remainder (see ?`%%`).
# Note: this only applies to the major axis, minor axes always return
# the full domain.
if (self$reindexable && shape[axis + 1L] > stride) {
shape[axis + 1L] <- if (self$coords[[axname]]$has_next()) {
as.numeric(stride)
} else {
as.numeric(self$coords[[axname]]$end %% stride) + 1L
}
}
mat <- arrow_table_to_sparse(
tbl,
repr = self$repr,
shape = private$.shape,
shape = shape,
zero_based = private$.zero_based
)
attr(mat, "coords") <- attr(tbl, "coords", exact = TRUE)
Expand Down
31 changes: 25 additions & 6 deletions apis/r/tests/testthat/test-00-Blockwise.R
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,7 @@ test_that("Blockwise iterator for sparse matrices", {
expect_error(bi$sparse_matrix("R"))

expect_s3_class(it <- bi$sparse_matrix(), "BlockwiseSparseReadIter")
expect_false(it$reindexable)
expect_false(it$read_complete())

for (i in seq.int(1L, ceiling(it$coords_axis$length() / it$coords_axis$stride))) {
Expand Down Expand Up @@ -200,6 +201,7 @@ test_that("Sparse matrix blockwise iterator: re-indexed", {
query <- exp$axis_query("RNA")
xrqry <- query$X("data")

# Reindex only on major axis
expect_s3_class(
bi <- xrqry$blockwise(axis = ax, size = sz, reindex_disable_on_axis = NA),
"SOMASparseNDArrayBlockwiseRead"
Expand All @@ -216,7 +218,11 @@ test_that("Sparse matrix blockwise iterator: re-indexed", {
for (i in seq.int(1L, ceiling(it$coords_axis$length() / it$coords_axis$stride))) {
mat <- it$read_next()
expect_s4_class(mat, "TsparseMatrix")
expect_identical(dim(mat), rev(dim(obj)))
dims <- c(
ifelse(it$read_complete(), yes = ncol(obj) %% sz, no = sz),
nrow(obj)
)
expect_identical(dim(mat), dims)
expect_true(min(mat@i) >= 0L)
expect_true(max(mat@i) <= sz)
strider <- attr(mat, "coords")$soma_dim_0
Expand All @@ -225,6 +231,7 @@ test_that("Sparse matrix blockwise iterator: re-indexed", {
expect_true(strider$end < sz * i)
}

# Reindex on all axes
expect_s3_class(
bi <- suppressWarnings(xrqry$blockwise(
axis = ax,
Expand All @@ -242,7 +249,11 @@ test_that("Sparse matrix blockwise iterator: re-indexed", {
for (i in seq.int(1L, ceiling(it$coords_axis$length() / it$coords_axis$stride))) {
mat <- it$read_next()
expect_s4_class(mat, "TsparseMatrix")
expect_identical(dim(mat), rev(dim(obj)))
dims <- c(
ifelse(it$read_complete(), yes = ncol(obj) %% sz, no = sz),
nrow(obj)
)
expect_identical(dim(mat), dims)
expect_true(min(mat@i) >= 0L)
expect_true(max(mat@i) <= sz)
}
Expand Down Expand Up @@ -270,14 +281,19 @@ test_that("Blockwise iterate through full array", {
obs_stride <- n_obs %/% n_chunks
it <- exp$ms$get("RNA")$X$get(X_layer)$read()$blockwise(axis = 0L, size = obs_stride)$sparse_matrix()
expect_false(it$read_complete())
i <- 1L
while (!it$read_complete()) {
i <- i + 1L
mat <- it$read_next()
expect_s4_class(mat, "dgTMatrix")
expect_identical(ncol(mat), n_var)
expect_identical(nrow(mat), n_obs)
nobs <- ifelse(it$read_complete(), yes = n_obs %% obs_stride, no = obs_stride)
expect_identical(nrow(mat), nobs, expected.label = nobs)
ncoords <- ifelse(it$read_complete(), yes = n_obs %% n_chunks, no = obs_stride)
expect_identical(
length(attr(mat, "coords")$soma_dim_0),
ifelse(it$read_complete(), yes = n_obs %% n_chunks, no = obs_stride)
ncoords,
expected.label = ncoords
)
}
expect_true(it$read_complete())
Expand All @@ -289,11 +305,14 @@ test_that("Blockwise iterate through full array", {
while (!it$read_complete()) {
mat <- it$read_next()
expect_s4_class(mat, "dgTMatrix")
expect_identical(ncol(mat), n_var)
nvar <- ifelse(it$read_complete(), yes = n_var %% var_stride, no = var_stride)
expect_identical(ncol(mat), nvar, expected.label = nvar)
expect_identical(nrow(mat), n_obs)
ncoords <- ifelse(it$read_complete(), yes = n_var %% n_chunks, no = var_stride)
expect_identical(
length(attr(mat, "coords")$soma_dim_1),
ifelse(it$read_complete(), yes = n_var %% n_chunks, no = var_stride)
ncoords,
expected.label = ncoords
)
}
expect_true(it$read_complete())
Expand Down

0 comments on commit 6b6edae

Please sign in to comment.