From 54764c25feb4317734827598ad12f0449b52b1e7 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Fri, 20 May 2022 16:15:33 -0300 Subject: [PATCH] sf methods for arrow Table/Dataset/RecordBatch/arrow_dplyr_query --- R/pkg-sf.R | 59 +++++++++++++++ R/zzz.R | 8 +++ tests/testthat/test-pkg-sf.R | 135 ++++++++++++++++++++++++++++++++++- 3 files changed, 201 insertions(+), 1 deletion(-) diff --git a/R/pkg-sf.R b/R/pkg-sf.R index d7c23ed..bede15b 100644 --- a/R/pkg-sf.R +++ b/R/pkg-sf.R @@ -27,6 +27,65 @@ as_arrow_table.sf <- function(x, ..., schema = NULL) { as_geoarrow_table(x, geoparquet_metadata = TRUE) } +st_as_sf.ArrowTabular <- function(x, ...) { + geoarrow_collect_sf(x, ...) +} + +st_geometry.ArrowTabular <- function(x, ...) { + schema <- x$.data$schema %||% x$schema + for (i in seq_len(schema$num_fields)) { + if (inherits(schema$field(i - 1)$type, "GeoArrowType")) { + name <- schema$field(i - 1)$name + return(geoarrow_collect_sf(dplyr::select(x, !! name))[[name]]) + } + } + + stop("No geometry column present") +} + +st_crs.ArrowTabular <- function(x, ...) { + schema <- x$.data$schema %||% x$schema + for (i in seq_len(schema$num_fields)) { + if (inherits(schema$field(i - 1)$type, "GeoArrowType")) { + return(sf::st_crs(schema$field(i - 1)$type$crs)) + } + } + + stop("No geometry column present") +} + +st_bbox.ArrowTabular <- function(x) { + schema <- x$.data$schema %||% x$schema + for (i in seq_len(schema$num_fields)) { + if (inherits(schema$field(i - 1)$type, "GeoArrowType")) { + name <- schema$field(i - 1)$name + x_geom <- dplyr::select(x, !! name) + rbr <- arrow::as_record_batch_reader(x_geom) + geom_stream <- narrow::narrow_array_stream_function( + rbr$schema[[1]]$type, + function() { + batch <- rbr$read_next_batch() + if (is.null(batch)) { + NULL + } else { + batch[[1]] + } + } + ) + + bbox <- wk::wk_handle(geom_stream, wk::wk_bbox_handler()) + wk::wk_crs(bbox) <- st_crs.ArrowTabular(x) + return(sf::st_bbox(bbox)) + } + } + + stop("No geometry column present") +} + +st_as_sf.geoarrow_vctr <- function(x, ...) { + sf::st_as_sf(new_data_frame(list(geometry = sf::st_as_sfc(x, ...)))) +} + st_as_sfc.geoarrow_vctr <- function(x, ...) { sf::st_set_crs( wk::wk_handle(x, wk::sfc_writer()), diff --git a/R/zzz.R b/R/zzz.R index ae70971..c4d7218 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -19,6 +19,14 @@ s3_register("arrow::as_arrow_array", cls, as_arrow_array_handleable) s3_register("arrow::infer_type", cls, infer_type_handleable) } + + table_like_things <- c("arrow_dplyr_query", "RecordBatch", "Table", "Dataset") + for (cls in table_like_things) { + s3_register("sf::st_as_sf", cls, st_as_sf.ArrowTabular) + s3_register("sf::st_geometry", cls, st_geometry.ArrowTabular) + s3_register("sf::st_bbox", cls, st_bbox.ArrowTabular) + s3_register("sf::st_crs", cls, st_crs.ArrowTabular) + } } } diff --git a/tests/testthat/test-pkg-sf.R b/tests/testthat/test-pkg-sf.R index fd31e7b..0ac28cf 100644 --- a/tests/testthat/test-pkg-sf.R +++ b/tests/testthat/test-pkg-sf.R @@ -12,6 +12,140 @@ test_that("st_* methods work for geoarrow_vctr", { ) }) +test_that("st_geometry() methods work for Arrow table-like things", { + skip_if_not_installed("sf") + skip_if_not(has_arrow_with_extension_type()) + + vctr <- geoarrow(wk::wkt("POINT (0 1)", crs = "OGC:CRS84")) + table <- arrow::arrow_table(geom = vctr) + batch <- arrow::record_batch(geom = vctr) + dataset <- arrow::InMemoryDataset$create(table) + query <- dplyr::filter(table, arrow::Expression$scalar(TRUE)) + + expect_identical( + sf::st_geometry(table), + sf::st_as_sfc("POINT (0 1)", crs = sf::st_crs("OGC:CRS84")) + ) + + expect_identical( + sf::st_geometry(batch), + sf::st_as_sfc("POINT (0 1)", crs = sf::st_crs("OGC:CRS84")) + ) + + expect_identical( + sf::st_geometry(dataset), + sf::st_as_sfc("POINT (0 1)", crs = sf::st_crs("OGC:CRS84")) + ) + + expect_identical( + sf::st_geometry(query), + sf::st_as_sfc("POINT (0 1)", crs = sf::st_crs("OGC:CRS84")) + ) +}) + +test_that("st_crs() methods work for Arrow table-like things", { + skip_if_not_installed("sf") + skip_if_not(has_arrow_with_extension_type()) + + vctr <- geoarrow(wk::wkt("POINT (0 1)", crs = "OGC:CRS84")) + table <- arrow::arrow_table(geom = vctr) + batch <- arrow::record_batch(geom = vctr) + dataset <- arrow::InMemoryDataset$create(table) + query <- dplyr::filter(table, arrow::Expression$scalar(TRUE)) + + expect_identical( + sf::st_crs(table), + sf::st_crs("OGC:CRS84") + ) + + expect_identical( + sf::st_crs(batch), + sf::st_crs("OGC:CRS84") + ) + + expect_identical( + sf::st_crs(dataset), + sf::st_crs("OGC:CRS84") + ) + + expect_identical( + sf::st_crs(query), + sf::st_crs("OGC:CRS84") + ) +}) + + +test_that("st_crs() methods work for Arrow table-like things", { + skip_if_not_installed("sf") + skip_if_not(has_arrow_with_extension_type()) + + vctr <- geoarrow(wk::wkt("POINT (0 1)", crs = "OGC:CRS84")) + table <- arrow::arrow_table(geom = vctr) + batch <- arrow::record_batch(geom = vctr) + dataset <- arrow::InMemoryDataset$create(table) + query <- dplyr::filter(table, arrow::Expression$scalar(TRUE)) + + bbox <- sf::st_bbox(wk::rct(0, 1, 0, 1, crs = "OGC:CRS84")) + + expect_identical( + sf::st_bbox(table), + bbox + ) + + expect_identical( + sf::st_bbox(batch), + bbox + ) + + expect_identical( + sf::st_bbox(dataset), + bbox + ) + + expect_identical( + sf::st_bbox(query), + bbox + ) +}) + +test_that("st_as_sf() methods work for Arrow table-like things", { + skip_if_not_installed("sf") + skip_if_not(has_arrow_with_extension_type()) + + vctr <- geoarrow(wk::wkt("POINT (0 1)", crs = "OGC:CRS84")) + table <- arrow::arrow_table(geometry = vctr) + batch <- arrow::record_batch(geometry = vctr) + dataset <- arrow::InMemoryDataset$create(table) + query <- dplyr::filter(table, arrow::Expression$scalar(TRUE)) + + sf_tbl <- sf::st_as_sf( + data.frame( + geom = sf::st_as_sfc("POINT (0 1)", crs = sf::st_crs("OGC:CRS84")) + ) + ) + + expect_identical( + sf::st_as_sf(table), + sf_tbl + ) + + expect_identical( + sf::st_as_sf(batch), + sf_tbl + ) + + expect_equal( + sf::st_as_sf(dataset), + sf_tbl, + ignore_attr = TRUE + ) + + expect_identical( + sf::st_as_sf(query), + sf_tbl + ) +}) + test_that("geoarrow_collect_sf() works on a data.frame", { skip_if_not_installed("sf") @@ -41,7 +175,6 @@ test_that("as_arrow_table() works for sf objects", { expect_identical(metadata$columns$geometry$encoding, "geoarrow.point") }) - test_that("read_geoparquet_sf() works", { skip_if_not(has_arrow_extension_type()) skip_if_not_installed("sf")