Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor: Use nanoarrow_vctr in forthcoming nanoarrow release #36

Merged
merged 2 commits into from
May 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ Encoding: UTF-8
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.2.3
Imports:
nanoarrow (>= 0.3.0),
nanoarrow (>= 0.5.0),
wk (>= 0.6.0)
LinkingTo:
wk
Expand Down
7 changes: 0 additions & 7 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,8 +1,5 @@
# Generated by roxygen2: do not edit by hand

S3method("[",geoarrow_vctr)
S3method("[<-",geoarrow_vctr)
S3method("[[<-",geoarrow_vctr)
S3method(as.character,geoarrow_vctr)
S3method(as_geoarrow_array,character)
S3method(as_geoarrow_array,default)
Expand All @@ -18,8 +15,6 @@ S3method(as_geoarrow_array_stream,geoarrow_vctr)
S3method(as_geoarrow_array_stream,nanoarrow_array_stream)
S3method(as_nanoarrow_array,sfc)
S3method(as_nanoarrow_array_extension,geoarrow_extension_spec)
S3method(as_nanoarrow_array_stream,geoarrow_vctr)
S3method(as_nanoarrow_schema,geoarrow_vctr)
S3method(convert_array,geoarrow_vctr)
S3method(convert_array,sfc)
S3method(convert_array,wk_wkb)
Expand All @@ -31,7 +26,6 @@ S3method(infer_geoarrow_schema,default)
S3method(infer_geoarrow_schema,nanoarrow_array)
S3method(infer_geoarrow_schema,nanoarrow_array_stream)
S3method(infer_nanoarrow_ptype_extension,geoarrow_extension_spec)
S3method(infer_nanoarrow_schema,geoarrow_vctr)
S3method(infer_nanoarrow_schema,sfc)
S3method(infer_nanoarrow_schema,wk_wkb)
S3method(infer_nanoarrow_schema,wk_wkt)
Expand All @@ -53,7 +47,6 @@ export(na_extension_wkb)
export(na_extension_wkt)
importFrom(nanoarrow,as_nanoarrow_array)
importFrom(nanoarrow,as_nanoarrow_array_extension)
importFrom(nanoarrow,as_nanoarrow_array_stream)
importFrom(nanoarrow,as_nanoarrow_schema)
importFrom(nanoarrow,convert_array)
importFrom(nanoarrow,convert_array_extension)
Expand Down
3 changes: 2 additions & 1 deletion R/pkg-arrow.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ as_arrow_array.geoarrow_vctr <- function(x, ..., type = NULL) {
}
}

#' @importFrom nanoarrow as_nanoarrow_schema
as_chunked_array.geoarrow_vctr <- function(x, ..., type = NULL) {
if (is.null(type)) {
schema <- NULL
Expand All @@ -18,7 +19,7 @@ as_chunked_array.geoarrow_vctr <- function(x, ..., type = NULL) {
}

# as_nanoarrow_array_stream() applies the indices if vctr is sliced
stream <- as_nanoarrow_array_stream(x, schema = schema)
stream <- as_geoarrow_array_stream(x, schema = schema)
chunks <- nanoarrow::collect_array_stream(stream, validate = FALSE)
type <- arrow::as_data_type(type)

Expand Down
2 changes: 1 addition & 1 deletion R/pkg-nanoarrow.R
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ register_geoarrow_extension <- function() {
#' @importFrom nanoarrow infer_nanoarrow_ptype_extension
#' @export
infer_nanoarrow_ptype_extension.geoarrow_extension_spec <- function(extension_spec, x, ...) {
new_geoarrow_vctr(list(), x, integer())
nanoarrow::nanoarrow_vctr(schema = x, subclass = "geoarrow_vctr")
}

#' @importFrom nanoarrow convert_array_extension
Expand Down
168 changes: 3 additions & 165 deletions R/vctr.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,48 +18,7 @@ as_geoarrow_vctr <- function(x, ..., schema = NULL) {
}

stream <- as_geoarrow_array_stream(x, ..., schema = schema)
chunks <- nanoarrow::collect_array_stream(stream, validate = FALSE)
new_geoarrow_vctr(chunks, stream$get_schema())
}

new_geoarrow_vctr <- function(chunks, schema, indices = NULL) {
offsets <- .Call(geoarrow_c_vctr_chunk_offsets, chunks)
if (is.null(indices)) {
indices <- seq_len(offsets[length(offsets)])
}

structure(
indices,
schema = schema,
chunks = chunks,
offsets = offsets,
class = c("geoarrow_vctr", "wk_vctr")
)
}

#' @export
`[.geoarrow_vctr` <- function(x, i) {
attrs <- attributes(x)
x <- NextMethod()

if (is.null(vctr_as_slice(x))) {
stop(
"Can't subset geoarrow_vctr with non-slice (e.g., only i:j indexing is supported)"
)
}

attributes(x) <- attrs
x
}

#' @export
`[<-.geoarrow_vctr` <- function(x, i, value) {
stop("subset assignment for geoarrow_vctr is not supported")
}

#' @export
`[[<-.geoarrow_vctr` <- function(x, i, value) {
stop("subset assignment for geoarrow_vctr is not supported")
nanoarrow::as_nanoarrow_vctr(stream, subclass = "geoarrow_vctr")
}

#' @export
Expand Down Expand Up @@ -102,129 +61,8 @@ as.character.geoarrow_vctr <- function(x, ...) {
format(x, ...)
}

#' @export
infer_nanoarrow_schema.geoarrow_vctr <- function(x, ...) {
attr(x, "schema", exact = TRUE)
}

# Because zero-length vctrs are R's way of communicating "type", implement
# as_nanoarrow_schema() here so that it works in places that expect a type
#' @importFrom nanoarrow as_nanoarrow_schema
#' @export
as_nanoarrow_schema.geoarrow_vctr <- function(x, ...) {
attr(x, "schema", exact = TRUE)
}

#' @export
as_geoarrow_array_stream.geoarrow_vctr <- function(x, ..., schema = NULL) {
as_nanoarrow_array_stream.geoarrow_vctr(x, ..., schema = schema)
}

#' @importFrom nanoarrow as_nanoarrow_array_stream
#' @export
as_nanoarrow_array_stream.geoarrow_vctr <- function(x, ..., schema = NULL) {
if (!is.null(schema)) {
stream <- as_nanoarrow_array_stream(x, schema = NULL)
return(as_geoarrow_array_stream(stream, schema = schema))
}

slice <- vctr_as_slice(x)
if (is.null(slice)) {
stop("Can't resolve non-slice geoarrow_vctr to nanoarrow_array_stream")
}

x_schema <- attr(x, "schema", exact = TRUE)

# Zero-size slice can be an array stream with zero batches
if (slice[2] == 0) {
return(nanoarrow::basic_array_stream(list(), schema = x_schema))
}

# Full slice doesn't need slicing logic
offsets <- attr(x, "offsets", exact = TRUE)
batches <- attr(x, "chunks", exact = TRUE)
if (slice[1] == 1 && slice[2] == max(offsets)) {
return(
nanoarrow::basic_array_stream(
batches,
schema = x_schema,
validate = FALSE
)
)
}

# Calculate first and last slice information
first_index <- slice[1] - 1L
end_index <- first_index + slice[2]
last_index <- end_index - 1L
first_chunk_index <- vctr_resolve_chunk(first_index, offsets)
last_chunk_index <- vctr_resolve_chunk(last_index, offsets)

first_chunk_offset <- first_index - offsets[first_chunk_index + 1L]
first_chunk_length <- offsets[first_chunk_index + 2L] - first_index
last_chunk_offset <- 0L
last_chunk_length <- end_index - offsets[last_chunk_index + 1L]

# Calculate first and last slices
if (first_chunk_index == last_chunk_index) {
batch <- vctr_array_slice(
batches[[first_chunk_index + 1L]],
first_chunk_offset,
last_chunk_length - first_chunk_offset
)

return(
nanoarrow::basic_array_stream(
list(batch),
schema = x_schema,
validate = FALSE
)
)
}

batch1 <- vctr_array_slice(
batches[[first_chunk_index + 1L]],
first_chunk_offset,
first_chunk_length
)

batchn <- vctr_array_slice(
batches[[last_chunk_index + 1L]],
last_chunk_offset,
last_chunk_length
)

seq_mid <- seq_len(last_chunk_index - first_chunk_index - 1)
batch_mid <- batches[first_chunk_index + seq_mid]

nanoarrow::basic_array_stream(
c(
list(batch1),
batch_mid,
list(batchn)
),
schema = x_schema,
validate = FALSE
)
}


# Utilities for vctr methods

vctr_resolve_chunk <- function(x, offsets) {
.Call(geoarrow_c_vctr_chunk_resolve, x, offsets)
}

vctr_as_slice <- function(x) {
.Call(geoarrow_c_vctr_as_slice, x)
}

vctr_array_slice <- function(x, offset, length) {
new_offset <- x$offset + offset
new_length <- length
nanoarrow::nanoarrow_array_modify(
x,
list(offset = new_offset, length = new_length),
validate = FALSE
)
stream <- nanoarrow::as_nanoarrow_array_stream(x)
as_geoarrow_array_stream(stream, schema = schema)
}
6 changes: 0 additions & 6 deletions src/r-init.c
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,6 @@ extern SEXP geoarrow_c_make_type(SEXP geometry_type_sexp, SEXP dimensions_sexp,
SEXP coord_type_sexp);
extern SEXP geoarrow_c_schema_init_extension(SEXP schema_xptr, SEXP type_sexp);
extern SEXP geoarrow_c_schema_parse(SEXP schema_xptr, SEXP extension_name_sexp);
extern SEXP geoarrow_c_vctr_chunk_offsets(SEXP array_list);
extern SEXP geoarrow_c_vctr_chunk_resolve(SEXP indices_sexp, SEXP offsets_sexp);
extern SEXP geoarrow_c_vctr_as_slice(SEXP indices_sexp);
extern SEXP geoarrow_c_handle_stream(SEXP data, SEXP handler_xptr);
extern SEXP geoarrow_c_writer_new(SEXP schema_xptr, SEXP array_out_xptr);

Expand All @@ -28,9 +25,6 @@ static const R_CallMethodDef CallEntries[] = {
{"geoarrow_c_make_type", (DL_FUNC)&geoarrow_c_make_type, 3},
{"geoarrow_c_schema_init_extension", (DL_FUNC)&geoarrow_c_schema_init_extension, 2},
{"geoarrow_c_schema_parse", (DL_FUNC)&geoarrow_c_schema_parse, 2},
{"geoarrow_c_vctr_chunk_offsets", (DL_FUNC)&geoarrow_c_vctr_chunk_offsets, 1},
{"geoarrow_c_vctr_chunk_resolve", (DL_FUNC)&geoarrow_c_vctr_chunk_resolve, 2},
{"geoarrow_c_vctr_as_slice", (DL_FUNC)&geoarrow_c_vctr_as_slice, 1},
{"geoarrow_c_handle_stream", (DL_FUNC)&geoarrow_c_handle_stream, 2},
{"geoarrow_c_writer_new", (DL_FUNC)&geoarrow_c_writer_new, 2},
{NULL, NULL, 0}};
Expand Down
115 changes: 0 additions & 115 deletions src/r-vctr.c

This file was deleted.

Loading
Loading