Skip to content

Commit

Permalink
Additional data.frame support (#119)
Browse files Browse the repository at this point in the history
* Generalize assignment method to also recognise char columns

* Simple example as on web page

* Generalize to empty columns

* Ensure buffers are kept around long enough

* Ensure buffers are kept around long enough

* Improved temp directory handling in tests

* Additions to NEWS

* Generalization to more types
  • Loading branch information
eddelbuettel authored May 14, 2020
1 parent 11b4515 commit 34117f9
Show file tree
Hide file tree
Showing 8 changed files with 89 additions and 77 deletions.
28 changes: 16 additions & 12 deletions R/DenseArray.R
Original file line number Diff line number Diff line change
Expand Up @@ -442,6 +442,8 @@ setMethod("[<-", "tiledb_dense",
qry <- libtiledb_query_set_subarray(qry, as.double(subarray))
}
attr_names <- names(value)
## we need to hold on to the allocated buffers til the query fires
buflst <- vector(mode="list", length=length(attr_names))
for (idx in seq_along(value)) {
aname <- attr_names[[idx]]
val <- value[[idx]]
Expand All @@ -456,24 +458,26 @@ setMethod("[<-", "tiledb_dense",
## offsets starts: cumulative sum of all word lengths as provided by nchar
## but starting at 0 and then omitting the last
offs <- cumsum(c(0, head(sapply(val[1:n], nchar, USE.NAMES=FALSE), -1)))
bufptr <- libtiledb_query_buffer_var_char_create(offs, string)
qry <- libtiledb_query_set_buffer_var_char(qry, aname, bufptr)
buflst[[idx]] <- libtiledb_query_buffer_var_char_create(offs, string)
qry <- libtiledb_query_set_buffer_var_char(qry, aname, buflst[[idx]])
} else if (inherits(val, "Date")) {
bufptr <- libtiledb_query_buffer_alloc_ptr(x@ptr, "DATETIME_DAY", length(val))
bufptr <- libtiledb_query_buffer_assign_ptr(bufptr, "DATETIME_DAY", val)
qry <- libtiledb_query_set_buffer_ptr(qry, aname, bufptr)
buflst[[idx]] <- libtiledb_query_buffer_alloc_ptr(x@ptr, "DATETIME_DAY", length(val))
buflst[[idx]] <- libtiledb_query_buffer_assign_ptr(buflst[[idx]], "DATETIME_DAY", val)
qry <- libtiledb_query_set_buffer_ptr(qry, aname, buflst[[idx]])
} else if (inherits(val, "POSIXt")) {
#cat("*** POSIXt case\n")
# could also use DATETIME_SEC here but _MS dominates it with higher resolution
bufptr <- libtiledb_query_buffer_alloc_ptr(x@ptr, attrtype, length(val))
bufptr <- libtiledb_query_buffer_assign_ptr(bufptr, attrtype, val)
qry <- libtiledb_query_set_buffer_ptr(qry, aname, bufptr)
buflst[[idx]] <- libtiledb_query_buffer_alloc_ptr(x@ptr, attrtype, length(val))
buflst[[idx]] <- libtiledb_query_buffer_assign_ptr(buflst[[idx]], attrtype, val)
qry <- libtiledb_query_set_buffer_ptr(qry, aname, buflst[[idx]])
} else if (inherits(val, "nanotime")) {
bufptr <- libtiledb_query_buffer_alloc_ptr(x@ptr, "DATETIME_NS", length(val))
bufptr <- libtiledb_query_buffer_assign_ptr(bufptr, "DATETIME_NS", val)
qry <- libtiledb_query_set_buffer_ptr(qry, aname, bufptr)
buflst[[idx]] <- libtiledb_query_buffer_alloc_ptr(x@ptr, "DATETIME_NS", length(val))
buflst[[idx]] <- libtiledb_query_buffer_assign_ptr(buflst[[idx]], "DATETIME_NS", val)
qry <- libtiledb_query_set_buffer_ptr(qry, aname, buflst[[idx]])
} else {
qry <- libtiledb_query_set_buffer(qry, aname, val)
buflst[[idx]] <- libtiledb_query_buffer_alloc_ptr(x@ptr, attrtype, length(val))
buflst[[idx]] <- libtiledb_query_buffer_assign_ptr(buflst[[idx]], attrtype, val)
qry <- libtiledb_query_set_buffer_ptr(qry, aname, buflst[[idx]])
}
}
qry <- libtiledb_query_submit(qry)
Expand Down
24 changes: 14 additions & 10 deletions R/SparseArray.R
Original file line number Diff line number Diff line change
Expand Up @@ -364,33 +364,37 @@ setMethod("[<-", "tiledb_sparse",
qry <- libtiledb_query_set_coordinates(qry, zip_coords, domaintype[1])
## set attribute buffers
attr_names <- names(value)
## we need to hold on to the allocated buffers til the query fires
buflst <- vector(mode="list", length=length(attr_names))
for (idx in seq_along(value)) {
aname <- attr_names[[idx]]
val <- value[[idx]]
attribute <- libtiledb_array_schema_get_attribute_from_name(schema@ptr, aname)
attrtype <- libtiledb_attribute_get_type(attribute)

if (inherits(val, "POSIXt") || inherits(val, "nanotime")) {
bufptr <- libtiledb_query_buffer_alloc_ptr(x@ptr, attrtype, length(val))
bufptr <- libtiledb_query_buffer_assign_ptr(bufptr, attrtype, val)
qry <- libtiledb_query_set_buffer_ptr(qry, aname, bufptr)
buflst[[idx]] <- libtiledb_query_buffer_alloc_ptr(x@ptr, attrtype, length(val))
buflst[[idx]] <- libtiledb_query_buffer_assign_ptr(buflst[[idx]], attrtype, val)
qry <- libtiledb_query_set_buffer_ptr(qry, aname, buflst[[idx]])
} else if (inherits(val, "Date")) {
bufptr <- libtiledb_query_buffer_alloc_ptr(x@ptr, "DATETIME_DAY", length(val))
bufptr <- libtiledb_query_buffer_assign_ptr(bufptr, "DATETIME_DAY", val)
qry <- libtiledb_query_set_buffer_ptr(qry, aname, bufptr)
buflst[[idx]] <- libtiledb_query_buffer_alloc_ptr(x@ptr, "DATETIME_DAY", length(val))
buflst[[idx]] <- libtiledb_query_buffer_assign_ptr(buflst[[idx]], "DATETIME_DAY", val)
qry <- libtiledb_query_set_buffer_ptr(qry, aname, buflst[[idx]])
} else if (inherits(val, "character")) {
n <- ifelse(is.vector(val), length(val), prod(dim(val)))
string <- paste(val[1:n], collapse="")
## offsets starts: cumulative sum of all word lengths as provided by nchar
## but starting at 0 and then omitting the last
offs <- cumsum(c(0, head(sapply(val[1:n], nchar, USE.NAMES=FALSE), -1)))
bufptr <- libtiledb_query_buffer_var_char_create(offs, string)
qry <- libtiledb_query_set_buffer_var_char(qry, aname, bufptr)
buflst[[idx]] <- libtiledb_query_buffer_var_char_create(offs, string)
qry <- libtiledb_query_set_buffer_var_char(qry, aname, buflst[[idx]])
} else {
qry <- libtiledb_query_set_buffer(qry, attr_names[[idx]], value[[idx]])
#qry <- libtiledb_query_set_buffer(qry, aname, val)
buflst[[idx]] <- libtiledb_query_buffer_alloc_ptr(x@ptr, attrtype, length(val))
buflst[[idx]] <- libtiledb_query_buffer_assign_ptr(buflst[[idx]], attrtype, val)
qry <- libtiledb_query_set_buffer_ptr(qry, aname, buflst[[idx]])
}
}
#cat("About to submit\n")
qry <- libtiledb_query_submit(qry)
if (libtiledb_query_status(qry) != "COMPLETE") {
stop("error in incomplete sparse write query")
Expand Down
21 changes: 15 additions & 6 deletions R/tiledb_array.R
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,6 @@ setMethod("[", "tiledb_array",
}
nonemptydom <- mapply(getDomain, dimnames, dimtypes, SIMPLIFY=FALSE)


## open query
qryptr <- libtiledb_query(ctx@ptr, arrptr, "READ")

Expand All @@ -181,8 +180,10 @@ setMethod("[", "tiledb_array",

## set range(s) on second dimension
if (is.null(js)) {
qryptr <- libtiledb_query_add_range_with_type(qryptr, 1, dimtypes[2],
nonemptydom[[2]][1], nonemptydom[[2]][2])
if (length(nonemptydom) == 2) {
qryptr <- libtiledb_query_add_range_with_type(qryptr, 1, dimtypes[2],
nonemptydom[[2]][1], nonemptydom[[2]][2])
}
} else {
if (!identical(eval(js[[1]]),list)) stop("The col argument must be a list.")
if (length(js) == 1) stop("No content to parse in col argument.")
Expand Down Expand Up @@ -356,9 +357,17 @@ setMethod("[<-", "tiledb_array",

buflist <- vector(mode="list", length=nc)
for (i in 1:nc) {
buflist[[i]] <- libtiledb_query_buffer_alloc_ptr(arrptr, alltypes[i], nr)
buflist[[i]] <- libtiledb_query_buffer_assign_ptr(buflist[[i]], alltypes[i], value[[i]])
qryptr <- libtiledb_query_set_buffer_ptr(qryptr, allnames[i], buflist[[i]])
if (alltypes[i] %in% c("CHAR", "ASCII")) { # variable length
txtvec <- as.character(value[[i]])
offsets <- c(0L, cumsum(nchar(txtvec[-length(txtvec)])))
data <- paste(txtvec, collapse="")
buflist[[i]] <- libtiledb_query_buffer_var_char_create(offsets, data)
qryptr <- libtiledb_query_set_buffer_var_char(qryptr, allnames[i], buflist[[i]])
} else {
buflist[[i]] <- libtiledb_query_buffer_alloc_ptr(arrptr, alltypes[i], nr)
buflist[[i]] <- libtiledb_query_buffer_assign_ptr(buflist[[i]], alltypes[i], value[[i]])
qryptr <- libtiledb_query_set_buffer_ptr(qryptr, allnames[i], buflist[[i]])
}
}

qryptr <- libtiledb_query_submit(qryptr)
Expand Down
4 changes: 3 additions & 1 deletion inst/NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@

## Improvements

- All S4 classes are now consistently documented or aliased
- If needed, the build system now builds TileDB and its required component (#118)

- All S4 classes are now consistently documented or aliased (#117)

# 0.6.0

Expand Down
6 changes: 6 additions & 0 deletions inst/examples/ex_1.R
Original file line number Diff line number Diff line change
Expand Up @@ -76,9 +76,15 @@ open_read_change_read <- function(uri) {
show(data)
}

simple_ex <- function(uri) {
arr <- tiledb_dense(uri, as.data.frame = TRUE)
show(arr[7:9, 2:3])
}

create_array(uri)
write_array(uri)
read_array(uri)
read_as_df(uri)
read_array_subset(uri)
open_read_change_read(uri)
simple_ex(uri)
34 changes: 28 additions & 6 deletions src/libtiledb.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,8 @@ tiledb_datatype_t _string_to_tiledb_datatype(std::string typestr) {
return TILEDB_DATETIME_FS;
} else if (typestr == "DATETIME_AS") {
return TILEDB_DATETIME_AS;
} else if (typestr == "UTF8") {
return TILEDB_STRING_UTF8;
} else {
Rcpp::stop("Unknown TileDB type '%s'", typestr.c_str());
}
Expand Down Expand Up @@ -1515,6 +1517,10 @@ NumericVector libtiledb_array_get_non_empty_domain_from_name(XPtr<tiledb::Array>
auto p = array->non_empty_domain<int64_t>(name);
std::vector<int64_t> v{p.first, p.second};
return makeInteger64(v);
} else if (typestr == "UINT64") {
auto p = array->non_empty_domain<uint64_t>(name);
std::vector<int64_t> v{ static_cast<int64_t>(p.first), static_cast<int64_t>(p.second) };
return makeInteger64(v);
} else if (typestr == "INT32") {
auto p = array->non_empty_domain<int32_t>(name);
return NumericVector::create(p.first, p.second);
Expand Down Expand Up @@ -2335,15 +2341,31 @@ XPtr<tiledb::Query> libtiledb_query_add_range_with_type(XPtr<tiledb::Query> quer
double stride = as<double>(strides);
query->add_range(uidx, start, end, stride);
}
} else if (typestr == "INT64" ||
typestr == "UINT64" ||
typestr == "UINT32") {
int64_t start = as<int64_t>(starts);
int64_t end = as<int64_t>(ends);
} else if (typestr == "INT64") {
int64_t start = makeScalarInteger64(as<double>(starts));
int64_t end = makeScalarInteger64(as<double>(ends));
if (strides == R_NilValue) {
query->add_range(uidx, start, end);
} else {
int64_t stride = as<int64_t>(strides);
int64_t stride = makeScalarInteger64(as<double>(strides));
query->add_range(uidx, start, end, stride);
}
} else if (typestr == "UINT64") {
uint64_t start = static_cast<uint64_t>(makeScalarInteger64(as<double>(starts)));
uint64_t end = static_cast<uint64_t>(makeScalarInteger64(as<double>(ends)));
if (strides == R_NilValue) {
query->add_range(uidx, start, end);
} else {
uint64_t stride = static_cast<uint64_t>(makeScalarInteger64(as<double>(strides)));
query->add_range(uidx, start, end, stride);
}
} else if (typestr == "UINT32") {
uint32_t start = as<uint32_t>(starts);
uint32_t end = as<uint32_t>(ends);
if (strides == R_NilValue) {
query->add_range(uidx, start, end);
} else {
uint32_t stride = as<int32_t>(strides);
query->add_range(uidx, start, end, stride);
}
} else if (typestr == "DATETIME_DAY" ||
Expand Down
4 changes: 3 additions & 1 deletion tests/testthat/test_ArraySchema.R
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ test_that("tiledb_array_schema full constructor argument values are correct", {


test_that("tiledb_array_schema created with encryption", {
uri <- tempfile()
dir.create(uri <- tempfile())
key <- "0123456789abcdeF0123456789abcdeF"

dom <- tiledb_domain(dims = c(tiledb_dim("rows", c(1L, 4L), 4L, "INT32"),
Expand All @@ -112,4 +112,6 @@ test_that("tiledb_array_schema created with encryption", {
expect_true(is(A, "tiledb_dense"))
##expect_true(is(schema(A), "tiledb_dense"))
## can't yet read / write as scheme getter not generalized for encryption

unlink(uri, recursive=TRUE)
})
45 changes: 4 additions & 41 deletions tests/testthat/test_SparseArray.R
Original file line number Diff line number Diff line change
@@ -1,15 +1,6 @@
library(tiledb)
context("tiledb::SparseArray")

unlink_and_create <- function(tmp) {
if (dir.exists(tmp)) {
unlink(tmp, recursive = TRUE)
dir.create(tmp)
} else {
dir.create(tmp)
}
return(tmp)
}
#
# test_that("Can read / write simple 1D sparse vector", {
# tmp <- tempfile()
Expand All @@ -35,11 +26,7 @@ unlink_and_create <- function(tmp) {
# })

test_that("test tiledb_subarray read for sparse array", {
#tmp <- tempfile()
#setup({
# unlink_and_create(tmp)
#})
tmp <- tempdir()
dir.create(tmp <- tempfile())

d1 <- tiledb_dim("d1", domain = c(1L, 5L))
d2 <- tiledb_dim("d2", domain = c(1L, 5L))
Expand All @@ -64,18 +51,11 @@ test_that("test tiledb_subarray read for sparse array", {
# vector range syntax
expect_equal(tiledb_subarray(arr, list(1,3,1,3))$val, unlist(as.list(dat[1:3, 1:3])))

#teardown({
# unlink(tmp, recursive = TRUE)
#})
unlink(tmp, recursive = TRUE)
})

test_that("test tiledb_subarray read for sparse array with attribute list", {
#tmp <- tempfile()
#setup({
# unlink_and_create(tmp)
#})
tmp <- tempdir()
dir.create(tmp <- tempfile())

d1 <- tiledb_dim("d1", domain = c(1L, 5L))
d2 <- tiledb_dim("d2", domain = c(1L, 5L))
Expand Down Expand Up @@ -104,18 +84,11 @@ test_that("test tiledb_subarray read for sparse array with attribute list", {
# vector range syntax
expect_equal(tiledb_subarray(arr, list(1,3,1,3), attrs=c("val2"))$val2, unlist(as.list(dat2[1:3, 1:3])))

#teardown({
# unlink(tmp, recursive = TRUE)
#})
unlink(tmp, recursive = TRUE)
})

test_that("test tiledb_subarray read for sparse array as dataframe", {
#tmp <- tempfile()
#setup({
# unlink_and_create(tmp)
#})
tmp <- tempdir()
dir.create(tmp <- tempfile())

d1 <- tiledb_dim("d1", domain = c(1L, 5L))
d2 <- tiledb_dim("d2", domain = c(1L, 5L))
Expand Down Expand Up @@ -145,19 +118,12 @@ test_that("test tiledb_subarray read for sparse array as dataframe", {
# vector range syntax
expect_equal(tiledb_subarray(arr, list(1,3,1,3), attrs=c("val2"))$val2, unlist(as.list(dat2[1:3, 1:3])))

#teardown({
# unlink(tmp, recursive = TRUE)
#})
unlink(tmp, recursive = TRUE)
})


test_that("test tiledb_subarray read/write for sparse array with list of coordinates", {
#tmp <- tempfile()
#setup({
# unlink_and_create(tmp)
#})
tmp <- tempdir()
dir.create(tmp <- tempfile())

d1 <- tiledb_dim("d1", domain = c(1L, 5L))
d2 <- tiledb_dim("d2", domain = c(1L, 5L))
Expand All @@ -182,8 +148,5 @@ test_that("test tiledb_subarray read/write for sparse array with list of coordin
# vector range syntax
expect_equal(arr[list(c(1:3), c(1:3))]$val, unlist(as.list(dat[1:3, 1:3])))

#teardown({
# unlink(tmp, recursive = TRUE)
#})
unlink(tmp, recursive = TRUE)
})

0 comments on commit 34117f9

Please sign in to comment.