Skip to content

Commit

Permalink
ARROW-11589: [R] Add methods for modifying Schemas
Browse files Browse the repository at this point in the history
Closes apache#9969 from nealrichardson/modify-schema

Authored-by: Neal Richardson <[email protected]>
Signed-off-by: Neal Richardson <[email protected]>
  • Loading branch information
nealrichardson authored and pull[bot] committed Dec 14, 2021
1 parent 7000172 commit 090b923
Show file tree
Hide file tree
Showing 6 changed files with 186 additions and 1 deletion.
2 changes: 2 additions & 0 deletions r/NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ S3method("$",Schema)
S3method("$",StructArray)
S3method("$",SubTreeFileSystem)
S3method("$<-",ArrowTabular)
S3method("$<-",Schema)
S3method("==",ArrowObject)
S3method("[",ArrowDatum)
S3method("[",ArrowTabular)
Expand All @@ -16,6 +17,7 @@ S3method("[[",ArrowTabular)
S3method("[[",Schema)
S3method("[[",StructArray)
S3method("[[<-",ArrowTabular)
S3method("[[<-",Schema)
S3method("names<-",ArrowTabular)
S3method(Ops,ArrowDatum)
S3method(Ops,Expression)
Expand Down
12 changes: 12 additions & 0 deletions r/R/arrowExports.R

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

50 changes: 50 additions & 0 deletions r/R/schema.R
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,15 @@ Schema <- R6Class("Schema",
},
field = function(i) Schema__field(self, i),
GetFieldByName = function(x) Schema__GetFieldByName(self, x),
AddField = function(i, field) {
assert_is(field, "Field")
Schema__AddField(self, i, field)
},
SetField = function(i, field) {
assert_is(field, "Field")
Schema__SetField(self, i, field)
},
RemoveField = function(i) Schema__RemoveField(self, i),
serialize = function() Schema__serialize(self),
WithMetadata = function(metadata = NULL) {
metadata <- prepare_key_value_metadata(metadata)
Expand Down Expand Up @@ -173,6 +182,47 @@ length.Schema <- function(x) x$num_fields
}
}

#' @export
`[[<-.Schema` <- function(x, i, value) {
assert_that(length(i) == 1)
if (is.character(i)) {
field_names <- names(x)
if (anyDuplicated(field_names)) {
stop("Cannot update field by name with duplicates", call. = FALSE)
}

# If i is character, it's the field name
if (!is.null(value) && !inherits(value, "Field")) {
value <- field(i, as_type(value, "value"))
}

# No match means we're adding to the end
i <- match(i, field_names, nomatch = length(field_names) + 1L)
} else {
assert_that(is.numeric(i), !is.na(i), i > 0)
# If i is numeric and we have a type,
# we need to grab the existing field name for the new one
if (!is.null(value) && !inherits(value, "Field")) {
value <- field(names(x)[i], as_type(value, "value"))
}
}

i <- as.integer(i - 1L)
if (i >= length(x)) {
if (!is.null(value)) {
x <- x$AddField(i, value)
}
} else if (is.null(value)) {
x <- x$RemoveField(i)
} else {
x <- x$SetField(i, value)
}
x
}

#' @export
`$<-.Schema` <- `$<-.ArrowTabular`

#' @export
`[.Schema` <- function(x, i, ...) {
if (is.logical(i)) {
Expand Down
53 changes: 53 additions & 0 deletions r/src/arrowExports.cpp

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

20 changes: 20 additions & 0 deletions r/src/schema.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,26 @@ std::shared_ptr<arrow::Field> Schema__field(const std::shared_ptr<arrow::Schema>
return s->field(i);
}

// [[arrow::export]]
std::shared_ptr<arrow::Schema> Schema__AddField(
const std::shared_ptr<arrow::Schema>& s, int i,
const std::shared_ptr<arrow::Field>& field) {
return ValueOrStop(s->AddField(i, field));
}

// [[arrow::export]]
std::shared_ptr<arrow::Schema> Schema__SetField(
const std::shared_ptr<arrow::Schema>& s, int i,
const std::shared_ptr<arrow::Field>& field) {
return ValueOrStop(s->SetField(i, field));
}

// [[arrow::export]]
std::shared_ptr<arrow::Schema> Schema__RemoveField(
const std::shared_ptr<arrow::Schema>& s, int i) {
return ValueOrStop(s->RemoveField(i));
}

// [[arrow::export]]
std::shared_ptr<arrow::Field> Schema__GetFieldByName(
const std::shared_ptr<arrow::Schema>& s, std::string x) {
Expand Down
50 changes: 49 additions & 1 deletion r/tests/testthat/test-schema.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
# specific language governing permissions and limitations
# under the License.

context("Schema")

test_that("Alternate type names are supported", {
expect_equal(
Expand Down Expand Up @@ -48,6 +47,11 @@ test_that("Schema $GetFieldByName", {
})

test_that("Schema extract (returns Field)", {
# TODO: should this return a Field or the Type?
# I think of Schema like list(name = type, name = type, ...)
# but in practice it is more like list(list(name, type), list(name, type), ...)
# -> Field names in a Schema may be duplicated
# -> Fields may have metadata (though we don't really handle that in R)
schm <- schema(b = double(), c = string())
expect_equal(schm$b, field("b", double()))
expect_equal(schm[["b"]], field("b", double()))
Expand All @@ -65,7 +69,51 @@ test_that("Schema slicing", {
expect_equal(schm[c(FALSE, TRUE, TRUE)], schema(c = string(), d = int8()))
expect_error(schm[c("c", "ZZZ")], 'Invalid field name: "ZZZ"')
expect_error(schm[c("XXX", "c", "ZZZ")], 'Invalid field names: "XXX" and "ZZZ"')
})

test_that("Schema modification", {
schm <- schema(b = double(), c = string(), d = int8())
schm$c <- boolean()
expect_equal(schm, schema(b = double(), c = boolean(), d = int8()))
schm[["d"]] <- int16()
expect_equal(schm, schema(b = double(), c = boolean(), d = int16()))
schm$b <- NULL
expect_equal(schm, schema(c = boolean(), d = int16()))
# NULL assigning something that doesn't exist doesn't modify
schm$zzzz <- NULL
expect_equal(schm, schema(c = boolean(), d = int16()))
# Adding a field
schm$fff <- int32()
expect_equal(schm, schema(c = boolean(), d = int16(), fff = int32()))

# By index
schm <- schema(b = double(), c = string(), d = int8())
schm[[2]] <- int32()
expect_equal(schm, schema(b = double(), c = int32(), d = int8()))

# Adding actual Fields
# If assigning by name, note that this can modify the resulting name
schm <- schema(b = double(), c = string(), d = int8())
schm$c <- field("x", int32())
expect_equal(schm, schema(b = double(), x = int32(), d = int8()))
schm[[2]] <- field("y", int64())
expect_equal(schm, schema(b = double(), y = int64(), d = int8()))

# Error handling
expect_error(schm$c <- 4, "value must be a DataType")
expect_error(schm[[-3]] <- int32(), "i not greater than 0")
expect_error(schm[[0]] <- int32(), "i not greater than 0")
expect_error(schm[[NA_integer_]] <- int32(), "!is.na(i) is not TRUE", fixed = TRUE)
expect_error(schm[[TRUE]] <- int32(), "i is not a numeric or integer vector")
expect_error(schm[[c(2, 4)]] <- int32(), "length(i) not equal to 1", fixed = TRUE)
})

test_that("Metadata is preserved when modifying Schema", {
schm <- schema(b = double(), c = string(), d = int8())
schm$metadata$foo <- "bar"
expect_identical(schm$metadata, list(foo = "bar"))
schm$c <- field("x", int32())
expect_identical(schm$metadata, list(foo = "bar"))
})

test_that("reading schema from Buffer", {
Expand Down

0 comments on commit 090b923

Please sign in to comment.