Skip to content

Commit

Permalink
Merge pull request #20 from paleolimbot/sams-suggestions
Browse files Browse the repository at this point in the history
Better sf conversion for Arrow table-like things
  • Loading branch information
paleolimbot authored May 20, 2022
2 parents 5b7346c + 54764c2 commit 45033ff
Show file tree
Hide file tree
Showing 5 changed files with 208 additions and 30 deletions.
59 changes: 59 additions & 0 deletions R/pkg-sf.R
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,65 @@ as_arrow_table.sf <- function(x, ..., schema = NULL) {
as_geoarrow_table(x, geoparquet_metadata = TRUE)
}

st_as_sf.ArrowTabular <- function(x, ...) {
geoarrow_collect_sf(x, ...)
}

st_geometry.ArrowTabular <- function(x, ...) {
schema <- x$.data$schema %||% x$schema
for (i in seq_len(schema$num_fields)) {
if (inherits(schema$field(i - 1)$type, "GeoArrowType")) {
name <- schema$field(i - 1)$name
return(geoarrow_collect_sf(dplyr::select(x, !! name))[[name]])
}
}

stop("No geometry column present")
}

st_crs.ArrowTabular <- function(x, ...) {
schema <- x$.data$schema %||% x$schema
for (i in seq_len(schema$num_fields)) {
if (inherits(schema$field(i - 1)$type, "GeoArrowType")) {
return(sf::st_crs(schema$field(i - 1)$type$crs))
}
}

stop("No geometry column present")
}

st_bbox.ArrowTabular <- function(x) {
schema <- x$.data$schema %||% x$schema
for (i in seq_len(schema$num_fields)) {
if (inherits(schema$field(i - 1)$type, "GeoArrowType")) {
name <- schema$field(i - 1)$name
x_geom <- dplyr::select(x, !! name)
rbr <- arrow::as_record_batch_reader(x_geom)
geom_stream <- narrow::narrow_array_stream_function(
rbr$schema[[1]]$type,
function() {
batch <- rbr$read_next_batch()
if (is.null(batch)) {
NULL
} else {
batch[[1]]
}
}
)

bbox <- wk::wk_handle(geom_stream, wk::wk_bbox_handler())
wk::wk_crs(bbox) <- st_crs.ArrowTabular(x)
return(sf::st_bbox(bbox))
}
}

stop("No geometry column present")
}

st_as_sf.geoarrow_vctr <- function(x, ...) {
sf::st_as_sf(new_data_frame(list(geometry = sf::st_as_sfc(x, ...))))
}

st_as_sfc.geoarrow_vctr <- function(x, ...) {
sf::st_set_crs(
wk::wk_handle(x, wk::sfc_writer()),
Expand Down
2 changes: 1 addition & 1 deletion R/read.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
#' in the file.
#' @param trans A function to be applied to each chunk after it has been
#' collected into a data frame.
#' @inheritDotParams arrow::write_parquet
#' @inheritDotParams arrow::read_parquet
#'
#' @return The result of [arrow::read_parquet()], with geometry
#' columns processed according to `handler`.
Expand Down
8 changes: 8 additions & 0 deletions R/zzz.R
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,14 @@
s3_register("arrow::as_arrow_array", cls, as_arrow_array_handleable)
s3_register("arrow::infer_type", cls, infer_type_handleable)
}

table_like_things <- c("arrow_dplyr_query", "RecordBatch", "Table", "Dataset")
for (cls in table_like_things) {
s3_register("sf::st_as_sf", cls, st_as_sf.ArrowTabular)
s3_register("sf::st_geometry", cls, st_geometry.ArrowTabular)
s3_register("sf::st_bbox", cls, st_bbox.ArrowTabular)
s3_register("sf::st_crs", cls, st_crs.ArrowTabular)
}
}
}

Expand Down
34 changes: 6 additions & 28 deletions man/read_geoparquet.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

135 changes: 134 additions & 1 deletion tests/testthat/test-pkg-sf.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,140 @@ test_that("st_* methods work for geoarrow_vctr", {
)
})

test_that("st_geometry() methods work for Arrow table-like things", {
skip_if_not_installed("sf")
skip_if_not(has_arrow_with_extension_type())

vctr <- geoarrow(wk::wkt("POINT (0 1)", crs = "OGC:CRS84"))
table <- arrow::arrow_table(geom = vctr)
batch <- arrow::record_batch(geom = vctr)
dataset <- arrow::InMemoryDataset$create(table)
query <- dplyr::filter(table, arrow::Expression$scalar(TRUE))

expect_identical(
sf::st_geometry(table),
sf::st_as_sfc("POINT (0 1)", crs = sf::st_crs("OGC:CRS84"))
)

expect_identical(
sf::st_geometry(batch),
sf::st_as_sfc("POINT (0 1)", crs = sf::st_crs("OGC:CRS84"))
)

expect_identical(
sf::st_geometry(dataset),
sf::st_as_sfc("POINT (0 1)", crs = sf::st_crs("OGC:CRS84"))
)

expect_identical(
sf::st_geometry(query),
sf::st_as_sfc("POINT (0 1)", crs = sf::st_crs("OGC:CRS84"))
)
})

test_that("st_crs() methods work for Arrow table-like things", {
skip_if_not_installed("sf")
skip_if_not(has_arrow_with_extension_type())

vctr <- geoarrow(wk::wkt("POINT (0 1)", crs = "OGC:CRS84"))
table <- arrow::arrow_table(geom = vctr)
batch <- arrow::record_batch(geom = vctr)
dataset <- arrow::InMemoryDataset$create(table)
query <- dplyr::filter(table, arrow::Expression$scalar(TRUE))

expect_identical(
sf::st_crs(table),
sf::st_crs("OGC:CRS84")
)

expect_identical(
sf::st_crs(batch),
sf::st_crs("OGC:CRS84")
)

expect_identical(
sf::st_crs(dataset),
sf::st_crs("OGC:CRS84")
)

expect_identical(
sf::st_crs(query),
sf::st_crs("OGC:CRS84")
)
})


test_that("st_crs() methods work for Arrow table-like things", {
skip_if_not_installed("sf")
skip_if_not(has_arrow_with_extension_type())

vctr <- geoarrow(wk::wkt("POINT (0 1)", crs = "OGC:CRS84"))
table <- arrow::arrow_table(geom = vctr)
batch <- arrow::record_batch(geom = vctr)
dataset <- arrow::InMemoryDataset$create(table)
query <- dplyr::filter(table, arrow::Expression$scalar(TRUE))

bbox <- sf::st_bbox(wk::rct(0, 1, 0, 1, crs = "OGC:CRS84"))

expect_identical(
sf::st_bbox(table),
bbox
)

expect_identical(
sf::st_bbox(batch),
bbox
)

expect_identical(
sf::st_bbox(dataset),
bbox
)

expect_identical(
sf::st_bbox(query),
bbox
)
})

test_that("st_as_sf() methods work for Arrow table-like things", {
skip_if_not_installed("sf")
skip_if_not(has_arrow_with_extension_type())

vctr <- geoarrow(wk::wkt("POINT (0 1)", crs = "OGC:CRS84"))
table <- arrow::arrow_table(geometry = vctr)
batch <- arrow::record_batch(geometry = vctr)
dataset <- arrow::InMemoryDataset$create(table)
query <- dplyr::filter(table, arrow::Expression$scalar(TRUE))

sf_tbl <- sf::st_as_sf(
data.frame(
geom = sf::st_as_sfc("POINT (0 1)", crs = sf::st_crs("OGC:CRS84"))
)
)

expect_identical(
sf::st_as_sf(table),
sf_tbl
)

expect_identical(
sf::st_as_sf(batch),
sf_tbl
)

expect_equal(
sf::st_as_sf(dataset),
sf_tbl,
ignore_attr = TRUE
)

expect_identical(
sf::st_as_sf(query),
sf_tbl
)
})

test_that("geoarrow_collect_sf() works on a data.frame", {
skip_if_not_installed("sf")

Expand Down Expand Up @@ -41,7 +175,6 @@ test_that("as_arrow_table() works for sf objects", {
expect_identical(metadata$columns$geometry$encoding, "geoarrow.point")
})


test_that("read_geoparquet_sf() works", {
skip_if_not(has_arrow_extension_type())
skip_if_not_installed("sf")
Expand Down

0 comments on commit 45033ff

Please sign in to comment.