Skip to content

Commit

Permalink
[r] Update SOMADataFrame$create() to automatically mark ordered() (
Browse files Browse the repository at this point in the history
…#1720)

* [r] Update `SOMADataFrame$create()` to automatically mark `ordered()`
Add an check in `SOMADataFrame$create()` to detect if a column in the schema is ordered. If so, automatically mark the enumerations as such. Note: this only happens if:
- there are enumerations for the column
- the column in the arrow schema is marked as ordered
- there is no attribute `'ordered'` on the enumerations for the column

* name unusual argument

* Bump develop version
Update changelog
  • Loading branch information
mojaveazure authored Sep 26, 2023
1 parent 503ddd1 commit 97456c3
Show file tree
Hide file tree
Showing 4 changed files with 77 additions and 1 deletion.
2 changes: 1 addition & 1 deletion apis/r/DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ Description: Interface for working with 'TileDB'-based Stack of Matrices,
like those commonly used for single cell data analysis. It is documented at
<https://github.com/single-cell-data>; a formal specification available is at
<https://github.com/single-cell-data/SOMA/blob/main/abstract_specification.md>.
Version: 1.4.3.1
Version: 1.4.3.2
Authors@R: c(
person(given = "Aaron", family = "Wolen",
role = c("cre", "aut"), email = "[email protected]",
Expand Down
1 change: 1 addition & 0 deletions apis/r/NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

* Add support for writing `SummarizedExperiment` and `SingleCellExperiment` object to SOMAs
* Add support for bounding boxes for sparse arrays
* Add support for creating `SOMADataFrames` with `ordered()` columns


# tiledbsoma 1.4.0
Expand Down
5 changes: 5 additions & 0 deletions apis/r/R/SOMADataFrame.R
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,11 @@ SOMADataFrame <- R6::R6Class(
field <- schema$GetFieldByName(field_name)
field_type <- tiledb_type_from_arrow_type(field$type)

# Check if the field is ordered and mark it as such
if (!is.null(x = levels[[field_name]]) && isTRUE(field$type$ordered)) {
attr(levels[[field_name]], 'ordered') <- attr(levels[[field_name]], 'ordered', exact = TRUE) %||% TRUE
}

tdb_attrs[[field_name]] <- tiledb::tiledb_attr(
name = field_name,
type = field_type,
Expand Down
70 changes: 70 additions & 0 deletions apis/r/tests/testthat/test-SOMADataFrame.R
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,76 @@ test_that("int64 values are stored correctly", {
gc()
})

test_that("creation with ordered factors", {
skip_if_not_installed("tiledb", "0.21.0")
skip_if(!extended_tests())
uri <- withr::local_tempdir("soma-dataframe-ordered")
n <- 10L
df <- data.frame(
soma_joinid = bit64::as.integer64(seq_len(length.out = n) - 1L),
int = seq_len(length.out = n),
bool = rep_len(c(TRUE, FALSE), length.out = n),
ord = ordered(rep_len(c("g1", "g2", "g3"), length.out = n))
)
tbl <- arrow::as_arrow_table(df)
expect_true(tbl$schema$GetFieldByName("ord")$type$ordered)
expect_no_condition(sdf <- SOMADataFrameCreate(
uri = uri,
schema = tbl$schema,
levels = sapply(
X = df[, setdiff(names(df), "soma_joinid")],
FUN = levels,
simplify = FALSE,
USE.NAMES = TRUE
)
))
expect_no_condition(sdf$write(values = tbl))
expect_s3_class(sdf <- SOMADataFrameOpen(uri), "SOMADataFrame")
expect_true(sdf$schema()$GetFieldByName("ord")$type$ordered)
expect_s3_class(ord <- sdf$object[]$ord, c("ordered", "factor"), exact = TRUE)
expect_length(ord, n)
expect_identical(levels(ord), levels(df$ord))
})

test_that("explicit casting of ordered factors to regular factors", {
skip_if_not_installed("tiledb", "0.21.0")
skip_if(!extended_tests())
uri <- withr::local_tempdir("soma-dataframe-unordered")
n <- 10L
df <- data.frame(
soma_joinid = bit64::as.integer64(seq_len(length.out = n) - 1L),
int = seq_len(length.out = n),
bool = rep_len(c(TRUE, FALSE), length.out = n),
ord = ordered(rep_len(c("g1", "g2", "g3"), length.out = n))
)
tbl <- arrow::as_arrow_table(df)
expect_true(tbl$schema$GetFieldByName("ord")$type$ordered)
lvls <- sapply(
X = df[, setdiff(names(df), "soma_joinid")],
FUN = levels,
simplify = FALSE,
USE.NAMES = TRUE
)
for (col in names(lvls)) {
if (!is.null(lvls[[col]])) {
attr(lvls[[col]], 'ordered') <- FALSE
}
}
expect_no_condition(sdf <- SOMADataFrameCreate(
uri = uri,
schema = tbl$schema,
levels = lvls
))
expect_no_condition(sdf$write(values = tbl))
expect_s3_class(sdf <- SOMADataFrameOpen(uri), "SOMADataFrame")
expect_false(sdf$schema()$GetFieldByName("ord")$type$ordered)
expect_s3_class(ord <- sdf$object[]$ord, "factor", exact = TRUE)
expect_false(is.ordered(ord))
expect_length(ord, n)
expect_identical(levels(ord), levels(df$ord))
})


test_that("SOMADataFrame read", {
skip_if(!extended_tests())
uri <- extract_dataset("soma-dataframe-pbmc3k-processed-obs")
Expand Down

0 comments on commit 97456c3

Please sign in to comment.