Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[r] Handle numeric coords better in SOMASparseNDArray$read() #3145

Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 45 additions & 16 deletions apis/r/R/SOMANDArrayBase.R
Original file line number Diff line number Diff line change
Expand Up @@ -111,22 +111,51 @@ SOMANDArrayBase <- R6::R6Class(
# format acceptable for sr_setup and soma_array_reader
.convert_coords = function(coords) {

## ensure coords is a named list, use to select dim points
stopifnot("'coords' must be a list" = is.list(coords),
"'coords' must be a list of vectors or integer64" =
all(vapply_lgl(coords, is_vector_or_int64)),
"'coords' if unnamed must have length of dim names, else if named names must match dim names" =
(is.null(names(coords)) && length(coords) == length(self$dimnames())) ||
(!is.null(names(coords)) && all(names(coords) %in% self$dimnames()))
)

## if unnamed (and test for length has passed in previous statement) set names
if (is.null(names(coords))) names(coords) <- self$dimnames()

## convert integer to integer64 to match dimension type
coords <- lapply(coords, function(x) if (inherits(x, "integer")) bit64::as.integer64(x) else x)

coords
# Ensure coords is a named list, use to select dim points
stopifnot(
"'coords' must be a list" = is.list(coords) && length(coords),
"'coords' must be a list integerish vectors" =
all(vapply(
X = coords,
FUN = function(x) {
if (is.null(x)) {
return(TRUE)
}
return(
(is.null(dim(x)) && !is.factor(x)) &&
(rlang::is_integerish(x, finite = TRUE) || (bit64::is.integer64(x) && all(is.finite(x)))) &&
length(x) &&
all(x >= 0L)
)
},
FUN.VALUE = logical(length = 1L),
USE.NAMES = FALSE
)),
"'coords' if unnamed must have length of dim names, else if named names must match dim names" = ifelse(
test = is.null(names(coords)),
yes = length(coords) == length(self$dimnames()),
no = all(names(coords) %in% self$dimnames())
)
)

# Remove NULL-entries from coords
coords <- Filter(Negate(is.null), coords)
if (!length(coords)) {
return(NULL)
}

# If unnamed, set names
if (is.null(names(coords))) {
names(coords) <- self$dimnames()
}

# Convert to integer64 to match dimension type
return(sapply(
coords,
FUN = bit64::as.integer64,
simplify = FALSE,
USE.NAMES = TRUE
))
},

# @description Converts a vector of ints into a vector of int64 in a format
Expand Down
17 changes: 10 additions & 7 deletions apis/r/R/SOMASparseNDArray.R
Original file line number Diff line number Diff line change
Expand Up @@ -46,13 +46,16 @@ SOMASparseNDArray <- R6::R6Class(
coords <- private$.convert_coords(coords)
}

sr <- sr_setup(uri = self$uri,
private$.soma_context,
dim_points = coords,
result_order = result_order,
timestamprange = self$.tiledb_timestamp_range,
loglevel = log_level)
SOMASparseNDArrayRead$new(sr, self, coords)
sr <- sr_setup(
uri = self$uri,
private$.soma_context,
dim_points = coords,
result_order = result_order,
timestamprange = self$.tiledb_timestamp_range,
loglevel = log_level
)

return(SOMASparseNDArrayRead$new(sr, self, coords))
},

#' @description Write matrix-like data to the array. (lifecycle: maturing)
Expand Down
4 changes: 2 additions & 2 deletions apis/r/tests/testthat/test-SOMADenseNDArray.R
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ test_that("SOMADenseNDArray creation", {

# Subset the array on both dimensions
tbl <- ndarray$read_arrow_table(
coords = list(soma_dim_0=0:3, soma_dim_1=0:2),
coords = list(soma_dim_0 = 0:3, soma_dim_1 = 0:2),
result_order = "COL_MAJOR"
)
expect_identical(
Expand Down Expand Up @@ -69,7 +69,7 @@ test_that("SOMADenseNDArray creation", {
# Validating coords format
expect_error(
ndarray$read_arrow_table(coords = list(cbind(0, 1))),
"must be a list of vectors"
regexp = "'coords' must be a list integerish vectors"
)

# Validate TileDB array schema
Expand Down
110 changes: 110 additions & 0 deletions apis/r/tests/testthat/test-SOMASparseNDArray.R
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,116 @@ test_that("SOMASparseNDArray read_sparse_matrix_zero_based", {
ndarray$close()
})

test_that("SOMASparseNDArray read coordinates", {
skip_if(!extended_tests())
uri <- tempfile(pattern = "sparse-ndarray")
nrows <- 100L
ncols <- 20L

ndarray <- create_and_populate_sparse_nd_array(
uri = uri,
mode = "READ",
nrows = nrows,
ncols = ncols,
seed = 42L
)
on.exit(ndarray$close(), add = TRUE, after = FALSE)

expect_identical(as.integer(ndarray$shape()), c(nrows, ncols))
expect_s4_class(mat <- ndarray$read()$sparse_matrix()$concat(), "dgTMatrix")
expect_identical(dim(mat), c(nrows, ncols))

# Note: slices `:` yield integers, not numerics
# Note: #L is integer, # on its own is numeric
cases <- list(
# Test one dim NULL
"dim0 null, dim1 slice" = list(soma_dim_0 = NULL, soma_dim_1 = 0:9),
"dim0 null, dim1 slice" = list(soma_dim_0 = 35:45, soma_dim_1 = NULL),
"dim0 null, dim1 coords" = list(
soma_dim_0 = NULL,
soma_dim_1 = c(0L, 5L, 10L)
),
"dim0 coords, dim1 null" = list(soma_dim_0 = c(72, 83), soma_dim_1 = NULL),
# Test both dims null
"dim0 null, dim1 null" = list(soma_dim_0 = NULL, soma_dim_1 = NULL),
# Test both dims provided
"dim0 coords, dim1 coords" = list(
soma_dim_0 = c(72, 83),
soma_dim_1 = c(0L, 5L, 10L)
),
"dim0 slice, dim1 slice" = list(soma_dim_0 = 35:45, soma_dim_1 = 0:9),
"dim0 coords, dim1 slice" = list(soma_dim_0 = c(72, 83), soma_dim_1 = 0:9),
"dim0 slice, dim0 coords" = list(
soma_dim_0 = 35:45,
soma_dim_1 = c(0L, 5L, 10L)
),
# Test one dim missing
"dim0 missing, dim1 slice" = list(soma_dim_1 = 0:9),
"dim0 missing, dim1 coords" = list(soma_dim_1 = c(0L, 5L, 10L)),
"dim0 missing, dim1 null" = list(soma_dim_1 = NULL),
"dim0 slice, dim1 missing" = list(soma_dim_0 = 35:45),
"dim0 coords, dim1 missing" = list(soma_dim_0 = c(72, 83)),
"dim0 coords, dim1 null" = list(soma_dim_0 = NULL),
# Test zero-pull
"zero-pull" = list(soma_dim_0 = c(0, 3), soma_dim_1 = c(0L, 9L))
)
for (i in seq_along(cases)) {
coords <- cases[[i]]
label <- names(cases)[i]
expect_s3_class(tbl <- ndarray$read(coords)$tables()$concat(), "Table")
ii <- if (is.null(coords$soma_dim_0)) {
TRUE
} else {
mat@i %in% coords$soma_dim_0
}
jj <- if (is.null(coords$soma_dim_1)) {
TRUE
} else {
mat@j %in% coords$soma_dim_1
}
nr <- ifelse(isTRUE(ii) && isTRUE(jj), yes = length(mat@x), no = sum(ii & jj))
expect_identical(nrow(tbl), nr, label = label)
}

# Test assertions
list_cases <- list(TRUE, "tomato", 1L, 1.1, bit64::as.integer64(1L), list())
for (coords in list_cases) {
expect_error(ndarray$read(coords), regexp = "'coords' must be a list")
}

intgerish_cases <- list(
list(TRUE),
list("tomato"),
list(1.1),
list(NA_integer_),
list(NA_real_),
list(bit64::NA_integer64_),
list(Inf),
list(-4),
list(factor(letters[1:10])),
list(matrix(1:10, ncol = 1:10)),
list(array(1:10))
)
for (coords in intgerish_cases) {
expect_error(
ndarray$read(coords),
regexp = "'coords' must be a list integerish vectors"
)
}

names_cases <- list(
list(1:3, 1:5, 1:10),
list(tomato = 1:10),
list(soma_dim_0 = 1:10, tomato = 1:10)
)
for (coords in names_cases) {
expect_error(
ndarray$read(coords),
regexp = "'coords' if unnamed must have length of dim names, else if named names must match dim names"
)
}
})

test_that("SOMASparseNDArray creation with duplicates", {
skip_if(!extended_tests())
uri <- tempfile(pattern="sparse-ndarray")
Expand Down
Loading