Skip to content

Commit

Permalink
refactor: Use nanoarrow_vctr in forthcoming nanoarrow release (#36)
Browse files Browse the repository at this point in the history
GeoArrow was the wrong place for this class...it's much better in
nanoarrow! See apache/arrow-nanoarrow#461 for
the implementation.
  • Loading branch information
paleolimbot authored May 27, 2024
1 parent 7065a18 commit 4eaf0b0
Show file tree
Hide file tree
Showing 10 changed files with 19 additions and 429 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ Encoding: UTF-8
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.2.3
Imports:
nanoarrow (>= 0.3.0),
nanoarrow (>= 0.5.0),
wk (>= 0.6.0)
LinkingTo:
wk
Expand Down
7 changes: 0 additions & 7 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,8 +1,5 @@
# Generated by roxygen2: do not edit by hand

S3method("[",geoarrow_vctr)
S3method("[<-",geoarrow_vctr)
S3method("[[<-",geoarrow_vctr)
S3method(as.character,geoarrow_vctr)
S3method(as_geoarrow_array,character)
S3method(as_geoarrow_array,default)
Expand All @@ -18,8 +15,6 @@ S3method(as_geoarrow_array_stream,geoarrow_vctr)
S3method(as_geoarrow_array_stream,nanoarrow_array_stream)
S3method(as_nanoarrow_array,sfc)
S3method(as_nanoarrow_array_extension,geoarrow_extension_spec)
S3method(as_nanoarrow_array_stream,geoarrow_vctr)
S3method(as_nanoarrow_schema,geoarrow_vctr)
S3method(convert_array,geoarrow_vctr)
S3method(convert_array,sfc)
S3method(convert_array,wk_wkb)
Expand All @@ -31,7 +26,6 @@ S3method(infer_geoarrow_schema,default)
S3method(infer_geoarrow_schema,nanoarrow_array)
S3method(infer_geoarrow_schema,nanoarrow_array_stream)
S3method(infer_nanoarrow_ptype_extension,geoarrow_extension_spec)
S3method(infer_nanoarrow_schema,geoarrow_vctr)
S3method(infer_nanoarrow_schema,sfc)
S3method(infer_nanoarrow_schema,wk_wkb)
S3method(infer_nanoarrow_schema,wk_wkt)
Expand All @@ -53,7 +47,6 @@ export(na_extension_wkb)
export(na_extension_wkt)
importFrom(nanoarrow,as_nanoarrow_array)
importFrom(nanoarrow,as_nanoarrow_array_extension)
importFrom(nanoarrow,as_nanoarrow_array_stream)
importFrom(nanoarrow,as_nanoarrow_schema)
importFrom(nanoarrow,convert_array)
importFrom(nanoarrow,convert_array_extension)
Expand Down
3 changes: 2 additions & 1 deletion R/pkg-arrow.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ as_arrow_array.geoarrow_vctr <- function(x, ..., type = NULL) {
}
}

#' @importFrom nanoarrow as_nanoarrow_schema
as_chunked_array.geoarrow_vctr <- function(x, ..., type = NULL) {
if (is.null(type)) {
schema <- NULL
Expand All @@ -18,7 +19,7 @@ as_chunked_array.geoarrow_vctr <- function(x, ..., type = NULL) {
}

# as_nanoarrow_array_stream() applies the indices if vctr is sliced
stream <- as_nanoarrow_array_stream(x, schema = schema)
stream <- as_geoarrow_array_stream(x, schema = schema)
chunks <- nanoarrow::collect_array_stream(stream, validate = FALSE)
type <- arrow::as_data_type(type)

Expand Down
2 changes: 1 addition & 1 deletion R/pkg-nanoarrow.R
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ register_geoarrow_extension <- function() {
#' @importFrom nanoarrow infer_nanoarrow_ptype_extension
#' @export
infer_nanoarrow_ptype_extension.geoarrow_extension_spec <- function(extension_spec, x, ...) {
new_geoarrow_vctr(list(), x, integer())
nanoarrow::nanoarrow_vctr(schema = x, subclass = "geoarrow_vctr")
}

#' @importFrom nanoarrow convert_array_extension
Expand Down
168 changes: 3 additions & 165 deletions R/vctr.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,48 +18,7 @@ as_geoarrow_vctr <- function(x, ..., schema = NULL) {
}

stream <- as_geoarrow_array_stream(x, ..., schema = schema)
chunks <- nanoarrow::collect_array_stream(stream, validate = FALSE)
new_geoarrow_vctr(chunks, stream$get_schema())
}

new_geoarrow_vctr <- function(chunks, schema, indices = NULL) {
offsets <- .Call(geoarrow_c_vctr_chunk_offsets, chunks)
if (is.null(indices)) {
indices <- seq_len(offsets[length(offsets)])
}

structure(
indices,
schema = schema,
chunks = chunks,
offsets = offsets,
class = c("geoarrow_vctr", "wk_vctr")
)
}

#' @export
`[.geoarrow_vctr` <- function(x, i) {
attrs <- attributes(x)
x <- NextMethod()

if (is.null(vctr_as_slice(x))) {
stop(
"Can't subset geoarrow_vctr with non-slice (e.g., only i:j indexing is supported)"
)
}

attributes(x) <- attrs
x
}

#' @export
`[<-.geoarrow_vctr` <- function(x, i, value) {
stop("subset assignment for geoarrow_vctr is not supported")
}

#' @export
`[[<-.geoarrow_vctr` <- function(x, i, value) {
stop("subset assignment for geoarrow_vctr is not supported")
nanoarrow::as_nanoarrow_vctr(stream, subclass = "geoarrow_vctr")
}

#' @export
Expand Down Expand Up @@ -102,129 +61,8 @@ as.character.geoarrow_vctr <- function(x, ...) {
format(x, ...)
}

#' @export
infer_nanoarrow_schema.geoarrow_vctr <- function(x, ...) {
attr(x, "schema", exact = TRUE)
}

# Because zero-length vctrs are R's way of communicating "type", implement
# as_nanoarrow_schema() here so that it works in places that expect a type
#' @importFrom nanoarrow as_nanoarrow_schema
#' @export
as_nanoarrow_schema.geoarrow_vctr <- function(x, ...) {
attr(x, "schema", exact = TRUE)
}

#' @export
as_geoarrow_array_stream.geoarrow_vctr <- function(x, ..., schema = NULL) {
as_nanoarrow_array_stream.geoarrow_vctr(x, ..., schema = schema)
}

#' @importFrom nanoarrow as_nanoarrow_array_stream
#' @export
as_nanoarrow_array_stream.geoarrow_vctr <- function(x, ..., schema = NULL) {
if (!is.null(schema)) {
stream <- as_nanoarrow_array_stream(x, schema = NULL)
return(as_geoarrow_array_stream(stream, schema = schema))
}

slice <- vctr_as_slice(x)
if (is.null(slice)) {
stop("Can't resolve non-slice geoarrow_vctr to nanoarrow_array_stream")
}

x_schema <- attr(x, "schema", exact = TRUE)

# Zero-size slice can be an array stream with zero batches
if (slice[2] == 0) {
return(nanoarrow::basic_array_stream(list(), schema = x_schema))
}

# Full slice doesn't need slicing logic
offsets <- attr(x, "offsets", exact = TRUE)
batches <- attr(x, "chunks", exact = TRUE)
if (slice[1] == 1 && slice[2] == max(offsets)) {
return(
nanoarrow::basic_array_stream(
batches,
schema = x_schema,
validate = FALSE
)
)
}

# Calculate first and last slice information
first_index <- slice[1] - 1L
end_index <- first_index + slice[2]
last_index <- end_index - 1L
first_chunk_index <- vctr_resolve_chunk(first_index, offsets)
last_chunk_index <- vctr_resolve_chunk(last_index, offsets)

first_chunk_offset <- first_index - offsets[first_chunk_index + 1L]
first_chunk_length <- offsets[first_chunk_index + 2L] - first_index
last_chunk_offset <- 0L
last_chunk_length <- end_index - offsets[last_chunk_index + 1L]

# Calculate first and last slices
if (first_chunk_index == last_chunk_index) {
batch <- vctr_array_slice(
batches[[first_chunk_index + 1L]],
first_chunk_offset,
last_chunk_length - first_chunk_offset
)

return(
nanoarrow::basic_array_stream(
list(batch),
schema = x_schema,
validate = FALSE
)
)
}

batch1 <- vctr_array_slice(
batches[[first_chunk_index + 1L]],
first_chunk_offset,
first_chunk_length
)

batchn <- vctr_array_slice(
batches[[last_chunk_index + 1L]],
last_chunk_offset,
last_chunk_length
)

seq_mid <- seq_len(last_chunk_index - first_chunk_index - 1)
batch_mid <- batches[first_chunk_index + seq_mid]

nanoarrow::basic_array_stream(
c(
list(batch1),
batch_mid,
list(batchn)
),
schema = x_schema,
validate = FALSE
)
}


# Utilities for vctr methods

vctr_resolve_chunk <- function(x, offsets) {
.Call(geoarrow_c_vctr_chunk_resolve, x, offsets)
}

vctr_as_slice <- function(x) {
.Call(geoarrow_c_vctr_as_slice, x)
}

vctr_array_slice <- function(x, offset, length) {
new_offset <- x$offset + offset
new_length <- length
nanoarrow::nanoarrow_array_modify(
x,
list(offset = new_offset, length = new_length),
validate = FALSE
)
stream <- nanoarrow::as_nanoarrow_array_stream(x)
as_geoarrow_array_stream(stream, schema = schema)
}
6 changes: 0 additions & 6 deletions src/r-init.c
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,6 @@ extern SEXP geoarrow_c_make_type(SEXP geometry_type_sexp, SEXP dimensions_sexp,
SEXP coord_type_sexp);
extern SEXP geoarrow_c_schema_init_extension(SEXP schema_xptr, SEXP type_sexp);
extern SEXP geoarrow_c_schema_parse(SEXP schema_xptr, SEXP extension_name_sexp);
extern SEXP geoarrow_c_vctr_chunk_offsets(SEXP array_list);
extern SEXP geoarrow_c_vctr_chunk_resolve(SEXP indices_sexp, SEXP offsets_sexp);
extern SEXP geoarrow_c_vctr_as_slice(SEXP indices_sexp);
extern SEXP geoarrow_c_handle_stream(SEXP data, SEXP handler_xptr);
extern SEXP geoarrow_c_writer_new(SEXP schema_xptr, SEXP array_out_xptr);

Expand All @@ -28,9 +25,6 @@ static const R_CallMethodDef CallEntries[] = {
{"geoarrow_c_make_type", (DL_FUNC)&geoarrow_c_make_type, 3},
{"geoarrow_c_schema_init_extension", (DL_FUNC)&geoarrow_c_schema_init_extension, 2},
{"geoarrow_c_schema_parse", (DL_FUNC)&geoarrow_c_schema_parse, 2},
{"geoarrow_c_vctr_chunk_offsets", (DL_FUNC)&geoarrow_c_vctr_chunk_offsets, 1},
{"geoarrow_c_vctr_chunk_resolve", (DL_FUNC)&geoarrow_c_vctr_chunk_resolve, 2},
{"geoarrow_c_vctr_as_slice", (DL_FUNC)&geoarrow_c_vctr_as_slice, 1},
{"geoarrow_c_handle_stream", (DL_FUNC)&geoarrow_c_handle_stream, 2},
{"geoarrow_c_writer_new", (DL_FUNC)&geoarrow_c_writer_new, 2},
{NULL, NULL, 0}};
Expand Down
115 changes: 0 additions & 115 deletions src/r-vctr.c

This file was deleted.

Loading

0 comments on commit 4eaf0b0

Please sign in to comment.