Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add array schema evolution support for enumerations #590

Merged
merged 4 commits into from
Sep 22, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: tiledb
Type: Package
Version: 0.21.0
Version: 0.21.0.1
Title: Universal Storage Engine for Sparse and Dense Multidimensional Arrays
Authors@R: c(person("TileDB, Inc.", role = c("aut", "cph")),
person("Dirk", "Eddelbuettel", email = "[email protected]", role = "cre"))
Expand Down
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -98,8 +98,10 @@ export(tiledb_array_schema)
export(tiledb_array_schema_check)
export(tiledb_array_schema_evolution)
export(tiledb_array_schema_evolution_add_attribute)
export(tiledb_array_schema_evolution_add_enumeration)
export(tiledb_array_schema_evolution_array_evolve)
export(tiledb_array_schema_evolution_drop_attribute)
export(tiledb_array_schema_evolution_drop_enumeration)
export(tiledb_array_schema_get_allows_dups)
export(tiledb_array_schema_get_capacity)
export(tiledb_array_schema_set_allows_dups)
Expand Down
9 changes: 9 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,12 @@
# tiledb ongoing development

* This release of the R package builds against [TileDB 2.17.0](https://github.com/TileDB-Inc/TileDB/releases/tag/2.17.0), and has also been tested against earlier releases as well as the development version (#583, #587)

## Improvements

* Array schema evolution has been extended to support enumerations (#590)


# tiledb 0.21.0

* This release of the R package builds against [TileDB 2.17.0](https://github.com/TileDB-Inc/TileDB/releases/tag/2.17.0), and has also been tested against earlier releases as well as the development version (#583, #587)
Expand Down
39 changes: 39 additions & 0 deletions R/ArraySchemaEvolution.R
Original file line number Diff line number Diff line change
Expand Up @@ -81,3 +81,42 @@ tiledb_array_schema_evolution_array_evolve <- function(object, uri) {
object@ptr <- libtiledb_array_schema_evolution_array_evolve(object@ptr, uri)
invisible(object)
}

#' Add an Enumeration to a TileDB Array Schema Evolution object
#'
#' @param object A TileDB 'array_schema_evolution' object
#' @param name A character value with the name for the Enumeration
#' @param enums A character vector
#' @param ordered (optional) A boolean switch whether the enumeration is ordered
#' @param ctx (optional) A TileDB Ctx object; if not supplied the default
#' context object is retrieved
#' @return The modified 'array_schema_evolution' object, invisibly
#' @export
tiledb_array_schema_evolution_add_enumeration <- function(object, name, enums, ordered=FALSE,
ctx = tiledb_get_context()) {
stopifnot("The first argument must be an Array Schema Evolution object" =
is(object, "tiledb_array_schema_evolution"),
"The 'name' argument must be a scalar character object" =
is.character(name) && length(name) == 1,
"The 'enumlist' argument must be a character object" = is.character(enums),
"This function needs TileDB 2.17.0 or later" = tiledb_version(TRUE) >= "2.17.0",
"The 'ctx' argument must be a Context object" = is(ctx, "tiledb_ctx"))
object@ptr <- libtiledb_array_schema_evolution_add_enumeration(ctx@ptr, object@ptr, name,
enums, FALSE, ordered)
invisible(object)
}

#' Drop an Enumeration given by name from a TileDB Array Schema Evolution object
#'
#' @param object A TileDB 'array_schema_evolution' object
#' @param attrname A character variable with an attribute name
#' @return The modified 'array_schema_evolution' object, invisibly
#' @export
tiledb_array_schema_evolution_drop_enumeration <- function(object, attrname) {
stopifnot("The first argument must be an Array Schema Evolution object" =
is(object, "tiledb_array_schema_evolution"),
"The 'attrname' argument must be a character variable" = is.character(attrname),
"This function needs TileDB 2.17.0 or later" = tiledb_version(TRUE) >= "2.17.0")
object@ptr <- libtiledb_array_schema_evolution_drop_enumeration(object@ptr, attrname)
invisible(object)
}
8 changes: 8 additions & 0 deletions R/RcppExports.R
Original file line number Diff line number Diff line change
Expand Up @@ -458,6 +458,14 @@ libtiledb_array_schema_evolution_array_evolve <- function(ase, uri) {
.Call(`_tiledb_libtiledb_array_schema_evolution_array_evolve`, ase, uri)
}

libtiledb_array_schema_evolution_add_enumeration <- function(ctx, ase, enum_name, values, nullable = FALSE, ordered = FALSE) {
.Call(`_tiledb_libtiledb_array_schema_evolution_add_enumeration`, ctx, ase, enum_name, values, nullable, ordered)
}

libtiledb_array_schema_evolution_drop_enumeration <- function(ase, attrname) {
.Call(`_tiledb_libtiledb_array_schema_evolution_drop_enumeration`, ase, attrname)
}

libtiledb_array_create <- function(uri, schema) {
.Call(`_tiledb_libtiledb_array_create`, uri, schema)
}
Expand Down
9 changes: 6 additions & 3 deletions R/TileDBArray.R
Original file line number Diff line number Diff line change
Expand Up @@ -988,10 +988,13 @@ setMethod("[", "tiledb_array",
if (!is.null(dictionaries[[name]])) { # if there is a dictionary
dct <- dictionaries[[name]] # access it from utility
ord <- ordered_dict[[name]]
## the following expands out to a char vector first; we can do better
## col <- factor(dct[col+1], levels=dct)
## so we do it "by hand"

col <- col + 1L # adjust for zero-index C/C++ layer

## special case from schema evolution could have added twice so correct
if (min(col, na.rm=TRUE) == 2 && max(col, na.rm=TRUE) == length(dct) + 1)
col <- col - 1L

attr(col, "levels") <- dct
attr(col, "class") <- if (ord) c("ordered", "factor") else "factor"
}
Expand Down
33 changes: 33 additions & 0 deletions inst/tinytest/test_arrayschemaevolution.R
Original file line number Diff line number Diff line change
Expand Up @@ -29,3 +29,36 @@ arr <- tiledb_array(uri, return_as="data.frame", extended=FALSE)
res <- arr[]
expect_equal(dim(res), c(10,2))
expect_equal(colnames(res), c("key", "foo"))


if (tiledb_version(TRUE) < "2.17.0") exit_file("Needs TileDB 2.17.* or later")

df <- data.frame(key=letters[1:10], val=c(1:5,5:1))
uri <- tempfile()
arr <- fromDataFrame(df, uri)

sch <- schema(uri)
attrs <- attrs(sch)
attr <- attrs$val # copy of attribute

## First drop existing attribute
ase <- tiledb_array_schema_evolution()
ase <- tiledb_array_schema_evolution_drop_attribute(ase, "val")
tiledb_array_schema_evolution_array_evolve(ase, uri)

## Second add enumeration under a name
ase <- tiledb_array_schema_evolution()
enums <- c("red", "blue", "green", "orange", "pink")
ase <- tiledb_array_schema_evolution_add_enumeration(ase, "frobo", enums)

## Third connect the attribute to the enum and add it back in
attr <- tiledb_attribute_set_enumeration_name(attr, "frobo")
ase <- tiledb_array_schema_evolution_add_attribute(ase, attr)
tiledb_array_schema_evolution_array_evolve(ase, uri)

## check
arr <- tiledb_array(uri, return_as="data.table")
res <- arr[]
expect_true(is.factor(res$val))
expect_equal(levels(res$val), enums)
expect_equal(as.integer(res$val), c(1:5,5:1))
32 changes: 32 additions & 0 deletions man/tiledb_array_schema_evolution_add_enumeration.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

19 changes: 19 additions & 0 deletions man/tiledb_array_schema_evolution_drop_enumeration.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

30 changes: 30 additions & 0 deletions src/RcppExports.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1310,6 +1310,34 @@ BEGIN_RCPP
return rcpp_result_gen;
END_RCPP
}
// libtiledb_array_schema_evolution_add_enumeration
XPtr<tiledb::ArraySchemaEvolution> libtiledb_array_schema_evolution_add_enumeration(XPtr<tiledb::Context> ctx, XPtr<tiledb::ArraySchemaEvolution> ase, const std::string& enum_name, std::vector<std::string> values, bool nullable, bool ordered);
RcppExport SEXP _tiledb_libtiledb_array_schema_evolution_add_enumeration(SEXP ctxSEXP, SEXP aseSEXP, SEXP enum_nameSEXP, SEXP valuesSEXP, SEXP nullableSEXP, SEXP orderedSEXP) {
BEGIN_RCPP
Rcpp::RObject rcpp_result_gen;
Rcpp::RNGScope rcpp_rngScope_gen;
Rcpp::traits::input_parameter< XPtr<tiledb::Context> >::type ctx(ctxSEXP);
Rcpp::traits::input_parameter< XPtr<tiledb::ArraySchemaEvolution> >::type ase(aseSEXP);
Rcpp::traits::input_parameter< const std::string& >::type enum_name(enum_nameSEXP);
Rcpp::traits::input_parameter< std::vector<std::string> >::type values(valuesSEXP);
Rcpp::traits::input_parameter< bool >::type nullable(nullableSEXP);
Rcpp::traits::input_parameter< bool >::type ordered(orderedSEXP);
rcpp_result_gen = Rcpp::wrap(libtiledb_array_schema_evolution_add_enumeration(ctx, ase, enum_name, values, nullable, ordered));
return rcpp_result_gen;
END_RCPP
}
// libtiledb_array_schema_evolution_drop_enumeration
XPtr<tiledb::ArraySchemaEvolution> libtiledb_array_schema_evolution_drop_enumeration(XPtr<tiledb::ArraySchemaEvolution> ase, const std::string& attrname);
RcppExport SEXP _tiledb_libtiledb_array_schema_evolution_drop_enumeration(SEXP aseSEXP, SEXP attrnameSEXP) {
BEGIN_RCPP
Rcpp::RObject rcpp_result_gen;
Rcpp::RNGScope rcpp_rngScope_gen;
Rcpp::traits::input_parameter< XPtr<tiledb::ArraySchemaEvolution> >::type ase(aseSEXP);
Rcpp::traits::input_parameter< const std::string& >::type attrname(attrnameSEXP);
rcpp_result_gen = Rcpp::wrap(libtiledb_array_schema_evolution_drop_enumeration(ase, attrname));
return rcpp_result_gen;
END_RCPP
}
// libtiledb_array_create
std::string libtiledb_array_create(std::string uri, XPtr<tiledb::ArraySchema> schema);
RcppExport SEXP _tiledb_libtiledb_array_create(SEXP uriSEXP, SEXP schemaSEXP) {
Expand Down Expand Up @@ -3527,6 +3555,8 @@ static const R_CallMethodDef CallEntries[] = {
{"_tiledb_libtiledb_array_schema_evolution_add_attribute", (DL_FUNC) &_tiledb_libtiledb_array_schema_evolution_add_attribute, 2},
{"_tiledb_libtiledb_array_schema_evolution_drop_attribute", (DL_FUNC) &_tiledb_libtiledb_array_schema_evolution_drop_attribute, 2},
{"_tiledb_libtiledb_array_schema_evolution_array_evolve", (DL_FUNC) &_tiledb_libtiledb_array_schema_evolution_array_evolve, 2},
{"_tiledb_libtiledb_array_schema_evolution_add_enumeration", (DL_FUNC) &_tiledb_libtiledb_array_schema_evolution_add_enumeration, 6},
{"_tiledb_libtiledb_array_schema_evolution_drop_enumeration", (DL_FUNC) &_tiledb_libtiledb_array_schema_evolution_drop_enumeration, 2},
{"_tiledb_libtiledb_array_create", (DL_FUNC) &_tiledb_libtiledb_array_create, 2},
{"_tiledb_libtiledb_array_create_with_key", (DL_FUNC) &_tiledb_libtiledb_array_create_with_key, 3},
{"_tiledb_libtiledb_array_open", (DL_FUNC) &_tiledb_libtiledb_array_open, 3},
Expand Down
31 changes: 31 additions & 0 deletions src/libtiledb.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2038,6 +2038,37 @@ libtiledb_array_schema_evolution_array_evolve(XPtr<tiledb::ArraySchemaEvolution>
return make_xptr<tiledb::ArraySchemaEvolution>(ptr);
}

//[[Rcpp::export]]
XPtr<tiledb::ArraySchemaEvolution>
libtiledb_array_schema_evolution_add_enumeration(XPtr<tiledb::Context> ctx,
XPtr<tiledb::ArraySchemaEvolution> ase,
const std::string & enum_name,
std::vector<std::string> values,
bool nullable = false,
bool ordered = false) {
check_xptr_tag<tiledb::Context>(ctx);
check_xptr_tag<tiledb::ArraySchemaEvolution>(ase);
#if TILEDB_VERSION >= TileDB_Version(2,17,0)
auto enumeration = tiledb::Enumeration::create(*ctx.get(), enum_name, values, ordered);
tiledb::ArraySchemaEvolution res = ase->add_enumeration(enumeration);
auto ptr = new tiledb::ArraySchemaEvolution(res);
return make_xptr<tiledb::ArraySchemaEvolution>(ptr);
#endif
return ase;
}

//[[Rcpp::export]]
XPtr<tiledb::ArraySchemaEvolution>
libtiledb_array_schema_evolution_drop_enumeration(XPtr<tiledb::ArraySchemaEvolution> ase,
const std::string & attrname) {
check_xptr_tag<tiledb::ArraySchemaEvolution>(ase);
#if TILEDB_VERSION >= TileDB_Version(2,17,0)
tiledb::ArraySchemaEvolution res = ase->drop_attribute(attrname);
auto ptr = new tiledb::ArraySchemaEvolution(res);
return make_xptr<tiledb::ArraySchemaEvolution>(ptr);
#endif
return ase;
}


/**
Expand Down