Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[r] Add enumeration support #1559

Merged
merged 4 commits into from
Sep 15, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .github/workflows/r-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@ on:
branches:
- main
- 'release-*'

workflow_dispatch:

env:
COVERAGE_FLAGS: "r"
COVERAGE_TOKEN: ${{ secrets.CODECOV_TOKEN }}
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/r-python-interop-testing.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ on:
branches:
- main
- 'release-*'
workflow_dispatch:

jobs:
ci:
Expand Down
10 changes: 5 additions & 5 deletions apis/python/src/tiledbsoma/pytiledbsoma.cc
Original file line number Diff line number Diff line change
Expand Up @@ -129,9 +129,9 @@ py::object to_table(SOMAArray& sr, std::shared_ptr<ArrayBuffers> array_buffers)
arrays.append(to_array(column));
}else{
arrays.append(pa_dict_from_arrays(
to_array(column),
get_enum(sr, name),
py::none(),
to_array(column),
get_enum(sr, name),
py::none(),
get_enum_is_ordered(sr, name)));
}
}
Expand Down Expand Up @@ -640,11 +640,11 @@ PYBIND11_MODULE(pytiledbsoma, m) {
.def("nnz", &SOMAArray::nnz, py::call_guard<py::gil_scoped_release>())

.def_property_readonly("shape", &SOMAArray::shape)

.def("get_enum", get_enum)

.def("get_enum_is_ordered", get_enum_is_ordered)

.def("get_enum_label_on_attr", &SOMAArray::get_enum_label_on_attr);
}
} // namespace tiledbsoma
1 change: 1 addition & 0 deletions apis/r/.Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ tiledbsoma.tar.gz
# subdirectories of soft-linked libtiledbsoma
src/libtiledbsoma/build
src/libtiledbsoma/test
src/libtiledbsoma/docs

# vscode
^\.vscode$
Expand Down
1 change: 1 addition & 0 deletions apis/r/NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ export(extract_dataset)
export(list_datasets)
export(load_dataset)
export(matrixZeroBasedView)
export(set_log_level)
export(show_package_versions)
export(tiledbsoma_stats_disable)
export(tiledbsoma_stats_dump)
Expand Down
4 changes: 3 additions & 1 deletion apis/r/R/Factory.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,16 @@
#' @param uri URI for the TileDB object
#' @param schema schema Arrow schema argument passed on to DataFrame$create()
#' @param index_column_names Index column names passed on to DataFrame$create()
#' @param levels Optional list of enumeration (aka factor) levels
#' @param platform_config Optional platform configuration
#' @param tiledbsoma_ctx Optional SOMATileDBContext
#' @param tiledb_timestamp Optional Datetime (POSIXct) for TileDB timestamp
#' @export
SOMADataFrameCreate <- function(uri, schema, index_column_names = c("soma_joinid"),
levels = NULL,
platform_config = NULL, tiledbsoma_ctx = NULL, tiledb_timestamp = NULL) {
sdf <- SOMADataFrame$new(uri, platform_config, tiledbsoma_ctx, tiledb_timestamp, internal_use_only = "allowed_use")
sdf$create(schema, index_column_names=index_column_names, platform_config=platform_config, internal_use_only = "allowed_use")
sdf$create(schema, index_column_names=index_column_names, levels=levels, platform_config=platform_config, internal_use_only = "allowed_use")

sdf
}
Expand Down
7 changes: 6 additions & 1 deletion apis/r/R/RcppExports.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,12 @@ soma_array_reader_impl <- function(uri, colnames = NULL, qc = NULL, dim_points =
.Call(`_tiledbsoma_soma_array_reader`, uri, colnames, qc, dim_points, dim_ranges, batch_size, result_order, loglevel, config)
}

#' @noRd
#' Set the logging level for the R package and underlying C++ library
#'
#' @param level A character value with logging level understood by \sQuote{spdlog}
#' such as \dQuote{trace}, \dQuote{debug}, \dQuote{info}, or \dQuote{warn}.
#' @return Nothing is returned as the function is invoked for the side-effect.
#' @export
set_log_level <- function(level) {
invisible(.Call(`_tiledbsoma_set_log_level`, level))
}
Expand Down
1 change: 0 additions & 1 deletion apis/r/R/ReadIter.R
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,6 @@ ReadIter <- R6::R6Class(

# Internal 'external pointer' object used for iterated reads
soma_reader_pointer = NULL,
#ctx_pointer = NULL,

# to be refined in derived classes
soma_reader_transform = function(x) {
Expand Down
25 changes: 14 additions & 11 deletions apis/r/R/SOMADataFrame.R
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,11 @@ SOMADataFrame <- R6::R6Class(
#' @param index_column_names A vector of column names to use as user-defined
#' index columns. All named columns must exist in the schema, and at least
#' one index column name is required.
#' @param levels Optional list of enumeration (aka factor) levels
#' @template param-platform-config
#' @param internal_use_only Character value to signal this is a 'permitted' call,
#' as `create()` is considered internal and should not be called directly.
create = function(schema, index_column_names = c("soma_joinid"), platform_config = NULL, internal_use_only = NULL) {
create = function(schema, index_column_names = c("soma_joinid"), levels = NULL, platform_config = NULL, internal_use_only = NULL) {
if (is.null(internal_use_only) || internal_use_only != "allowed_use") {
stop(paste("Use of the create() method is for internal use only. Consider using a",
"factory method as e.g. 'SOMADataFrameCreate()'."), call. = FALSE)
Expand Down Expand Up @@ -94,9 +95,15 @@ SOMADataFrame <- R6::R6Class(

for (field_name in attr_column_names) {
field <- schema$GetFieldByName(field_name)
tdb_attrs[[field_name]] <- tiledb_attr_from_arrow_field(
schema$GetFieldByName(field_name),
tiledb_create_options = tiledb_create_options
field_type <- tiledb_type_from_arrow_type(field$type)

tdb_attrs[[field_name]] <- tiledb::tiledb_attr(
name = field_name,
type = field_type,
nullable = field$nullable,
ncells = if (field_type == "ASCII") NA_integer_ else 1L,
filter_list = tiledb::tiledb_filter_list(tiledb_create_options$attr_filters(field_name)),
enumeration = levels[[field_name]]
)
}

Expand All @@ -110,13 +117,10 @@ SOMADataFrame <- R6::R6Class(
tile_order = cell_tile_orders["tile_order"],
capacity = tiledb_create_options$capacity(),
allows_dups = tiledb_create_options$allows_duplicates(),
offsets_filter_list = tiledb::tiledb_filter_list(
tiledb_create_options$offsets_filters()
),
validity_filter_list = tiledb::tiledb_filter_list(
tiledb_create_options$validity_filters()
offsets_filter_list = tiledb::tiledb_filter_list(tiledb_create_options$offsets_filters()),
validity_filter_list = tiledb::tiledb_filter_list(tiledb_create_options$validity_filters()),
enumerations = if (any(!sapply(levels, is.null))) levels else NULL
)
)

# create array
tiledb::tiledb_array_create(uri = self$uri, schema = tdb_schema)
Expand Down Expand Up @@ -242,7 +246,6 @@ SOMADataFrame <- R6::R6Class(
#' names will be extracted and added as a new column to the `data.frame`
#' prior to performing the update. The name of this new column will be set
#' to the value specified by `row_index_name`.

update = function(values, row_index_name = NULL) {
private$check_open_for_write()
stopifnot(
Expand Down
25 changes: 13 additions & 12 deletions apis/r/R/TableReadIter.R
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
#' SOMA Read Iterator over Arrow Table
#'
#' @description
#' `TableReadIter` is a class that allows for iteration over
#' a reads on \link{SOMASparseNDArray} and \link{SOMADataFrame}.
#' `TableReadIter` is a class that allows for iteration over
#' a reads on \link{SOMASparseNDArray} and \link{SOMADataFrame}.
#' Iteration chunks are retrieved as arrow::\link[arrow]{Table}
#' @export

Expand All @@ -11,32 +11,33 @@ TableReadIter <- R6::R6Class(
inherit = ReadIter,

public = list(

#' @description Concatenate remainder of iterator.
#' @return arrow::\link[arrow]{Table}
concat = function(){

if(self$read_complete()) {
warning("Iteration complete, returning NULL")
return(NULL)
}

tbl <- self$read_next()

while (!self$read_complete()) {
tbl <- arrow::concat_tables(tbl, self$read_next())
}

tbl

}),

private = list(

## refined from base class
soma_reader_transform = function(x) {
soma_array_to_arrow_table(x)
at <- soma_array_to_arrow_table(x)
at
}

)
)
63 changes: 50 additions & 13 deletions apis/r/R/utils-arrow.R
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,8 @@ tiledb_type_from_arrow_type <- function(x) {
# fixed_size_list = "fixed_size_list",
# map_of = "map",
# duration = "duration",
stop("Unsupported data type: ", x$name, call. = FALSE)
dictionary = "INT32", # for a dictionary the 'values' are ints, levels are character
stop("Unsupported Arrow data type: ", x$name, call. = FALSE)
)
}

Expand Down Expand Up @@ -161,15 +162,33 @@ arrow_field_from_tiledb_dim <- function(x) {
)
}

## With a nod to Kevin Ushey
#' @noRd
yoink <- function(package, symbol) {
do.call(":::", list(package, symbol))
}


#' Create an Arrow field from a TileDB attribute
#' @noRd
arrow_field_from_tiledb_attr <- function(x) {
stopifnot(inherits(x, "tiledb_attr"))
arrow::field(
name = tiledb::name(x),
type = arrow_type_from_tiledb_type(tiledb::datatype(x)),
nullable = tiledb::tiledb_attribute_get_nullable(x)
)
arrow_field_from_tiledb_attr <- function(x, arrptr=NULL) {
stopifnot(inherits(x, "tiledb_attr"))
if (tiledb::tiledb_attribute_has_enumeration(x) && !is.null(arrptr)) {
.tiledb_array_is_open <- yoink("tiledb", "libtiledb_array_is_open")
if (!.tiledb_array_is_open(arrptr)) {
.tiledb_array_open_with_ptr <- yoink("tiledb", "libtiledb_array_open_with_ptr")
arrptr <- .tiledb_array_open_with_ptr(arrptr, "READ")
}
ord <- tiledb::tiledb_attribute_is_ordered_enumeration_ptr(x, arrptr)
idx <- arrow_type_from_tiledb_type(tiledb::datatype(x))
arrow::field(name = tiledb::name(x),
type = arrow::dictionary(index_type=idx, ordered=ord),
nullable = tiledb::tiledb_attribute_get_nullable(x))
} else {
arrow::field(name = tiledb::name(x),
type = arrow_type_from_tiledb_type(tiledb::datatype(x)),
nullable = tiledb::tiledb_attribute_get_nullable(x))
}
}

#' Create a TileDB attribute from an Arrow field
Expand Down Expand Up @@ -206,11 +225,13 @@ tiledb_attr_from_arrow_field <- function(field, tiledb_create_options) {
#' @noRd
arrow_schema_from_tiledb_schema <- function(x) {
stopifnot(inherits(x, "tiledb_array_schema"))
fields <- c(
lapply(tiledb::dimensions(x), arrow_field_from_tiledb_dim),
lapply(tiledb::attrs(x), arrow_field_from_tiledb_attr)
)
arrow::schema(fields)
dimfields <- lapply(tiledb::dimensions(x), arrow_field_from_tiledb_dim)
if (!is.null(x@arrptr)) {
attfields <- lapply(tiledb::attrs(x), arrow_field_from_tiledb_attr, x@arrptr)
} else {
attfields <- lapply(tiledb::attrs(x), arrow_field_from_tiledb_attr)
}
arrow::schema(c(dimfields, attfields))
}

#' Validate external pointer to ArrowArray
Expand Down Expand Up @@ -293,3 +314,19 @@ check_arrow_schema_data_types <- function(from, to) {
}
return(TRUE)
}

#' Extract levels from dictionaries
#' @noRd
extract_levels <- function(arrtbl) {
stopifnot("Argument must be an Arrow Table object" = is_arrow_table(arrtbl))
nm <- names(arrtbl) # we go over the table column by column
reslst <- vector(mode = "list", length = length(nm))
names(reslst) <- nm # and fill a named list, entries default to NULL
for (n in nm) {
if (inherits(arrow::infer_type(arrtbl[[n]]), "DictionaryType")) {
# levels() extracts the enumeration levels from the factor vector we have
reslst[[n]] <- levels(arrtbl[[n]]$as_vector())
}
}
reslst
}
3 changes: 3 additions & 0 deletions apis/r/man/SOMADataFrame.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions apis/r/man/SOMADataFrameCreate.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

18 changes: 18 additions & 0 deletions apis/r/man/set_log_level.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading