From eb48ffc4f950e33c3c75cbc7eaf446d5bfbceb41 Mon Sep 17 00:00:00 2001
From: Hadley Wickham <h.wickham@gmail.com>
Date: Tue, 2 Apr 2024 14:00:49 -0500
Subject: [PATCH 01/33] WIP: Use bigrquerystorage for downloads

---
 DESCRIPTION                             |  3 +-
 NEWS.md                                 |  5 +++
 R/bq-download.R                         | 42 ++++++++++++++++++++++++-
 R/dbi-result.R                          |  1 +
 R/dplyr.R                               | 37 ++++++++++++++++------
 R/utils.R                               |  4 +++
 man/bq_table_download.Rd                | 15 ++++++++-
 man/src_bigquery.Rd                     |  2 +-
 tests/testthat/_snaps/dbi-connection.md |  2 +-
 tests/testthat/test-bq-download.R       | 13 ++++----
 tests/testthat/test-bq-parse.R          |  4 +--
 tests/testthat/test-bq-table.R          | 16 +++++-----
 12 files changed, 113 insertions(+), 31 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 340a2216..0b5e4b6a 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -39,7 +39,8 @@ Suggests:
     sodium,
     testthat (>= 3.1.5),
     wk (>= 0.3.2),
-    withr
+    withr,
+    bigrquerystorage
 LinkingTo: 
     cli,
     cpp11,
diff --git a/NEWS.md b/NEWS.md
index 3e526903..28c7c4ec 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,5 +1,10 @@
 # bigrquery (development version)
 
+* If the bigrquerystorage package is installed, `bq_table_download()` (and
+  hence basically every function that downloads data from BigQuery) will
+  use it. This will considerably improve the speed of downloading large 
+  datasets.
+
 # bigrquery 1.5.1
 
 * Forward compatibility with upcoming dbplyr release (#601).
diff --git a/R/bq-download.R b/R/bq-download.R
index 126d1dcf..e1259fc7 100644
--- a/R/bq-download.R
+++ b/R/bq-download.R
@@ -1,5 +1,6 @@
 #' Download table data
 #'
+#' This function provides two ways to download
 #' This retrieves rows in chunks of `page_size`. It is most suitable for results
 #' of smaller queries (<100 MB, say). For larger queries, it is better to
 #' export the results to a CSV file stored on google cloud and use the
@@ -43,19 +44,27 @@
 #' @param start_index Starting row index (zero-based).
 #' @param max_connections Number of maximum simultaneous connections to
 #'   BigQuery servers.
+#' @param api Which API to use? The `"json"` API works where ever bigrquery
+#'   does, but is slow and can require fiddling with the `page_size` parameter.
+#'   The `"arrow"` API is faster and more reliable, but only works if you
+#'   have also installed the bigrquerystorage package.
+#'
+#'   Because the `"arrow"` API is so much faster, it will be used automatically
+#'   if the bigrquerystorage package is installed.
 #' @inheritParams api-job
 #' @param bigint The R type that BigQuery's 64-bit integer types should be
 #'   mapped to. The default is `"integer"`, which returns R's `integer` type,
 #'   but results in `NA` for values above/below +/- 2147483647. `"integer64"`
 #'   returns a [bit64::integer64], which allows the full range of 64 bit
 #'   integers.
+#' @param billing Identifier of project to bill.
 #' @param max_results `r lifecycle::badge("deprecated")` Deprecated. Please use
 #'   `n_max` instead.
 #' @section Google BigQuery API documentation:
 #' * [list](https://cloud.google.com/bigquery/docs/reference/rest/v2/tabledata/list)
 #' @export
 #' @examplesIf bq_testable()
-#' df <- bq_table_download("publicdata.samples.natality", n_max = 35000)
+#' df <- bq_table_download("publicdata.samples.natality", n_max = 35000, billing = bq_test_project())
 bq_table_download <-
   function(x,
            n_max = Inf,
@@ -64,6 +73,8 @@ bq_table_download <-
            max_connections = 6L,
            quiet = NA,
            bigint = c("integer", "integer64", "numeric", "character"),
+           api = c("json", "arrow"),
+           billing = x$project,
            max_results = deprecated()) {
     x <- as_bq_table(x)
     check_number_whole(n_max, min = 0, allow_infinite = TRUE)
@@ -71,6 +82,13 @@ bq_table_download <-
     check_number_whole(max_connections, min = 1)
     quiet <- check_quiet(quiet)
     bigint <- arg_match(bigint)
+
+    if (missing(api)) {
+      api <- if (has_bigrquerystorage()) "arrow" else "json"
+    } else {
+      api <- arg_match(api)
+    }
+
     if (lifecycle::is_present(max_results)) {
       lifecycle::deprecate_warn(
         "1.4.0", "bq_table_download(max_results)", "bq_table_download(n_max)"
@@ -78,6 +96,28 @@ bq_table_download <-
       n_max <- max_results
     }
 
+    if (api == "arrow") {
+      check_installed("bigrquerystorage", "required to download using arrow API")
+      if (!missing(page_size)) {
+        cli::cli_warn('{.arg page_size} is ignored when {.code api == "arrow"}')
+      }
+      if (!missing(start_index)) {
+        cli::cli_warn('{.arg start_index} is ignored when {.code api == "arrow"}')
+      }
+      if (!missing(max_connections)) {
+        cli::cli_warn('{.arg max_connections} is ignored when {.code api == "arrow"}')
+      }
+
+      return(bigrquerystorage::bqs_table_download(
+        x = toString(x),
+        parent = billing,
+        n_max = n_max,
+        quiet = quiet,
+        bigint = bigint,
+        as_tibble = TRUE
+      ))
+    }
+
     params <- set_row_params(
       nrow = bq_table_nrow(x),
       n_max = n_max,
diff --git a/R/dbi-result.R b/R/dbi-result.R
index e5248933..7ba63b4d 100644
--- a/R/dbi-result.R
+++ b/R/dbi-result.R
@@ -105,6 +105,7 @@ setMethod(
       n <- res@cursor$left()
     }
 
+    # TODO: figure out what how to ignore pagination here
     data <- bq_table_download(res@bq_table,
       n_max = n,
       start_index = res@cursor$cur(),
diff --git a/R/dplyr.R b/R/dplyr.R
index 6044a441..aae8411e 100644
--- a/R/dplyr.R
+++ b/R/dplyr.R
@@ -20,14 +20,18 @@
 #' # set up for billing
 #' con <- DBI::dbConnect(bigquery(), project = bq_test_project())
 #'
-#' shakespeare <- con %>% tbl("publicdata.samples.shakespeare")
+#' shakespeare <- con %>% tbl(I("publicdata.samples.shakespeare"))
 #' shakespeare
 #' shakespeare %>%
 #'   group_by(word) %>%
 #'   summarise(n = sum(word_count, na.rm = TRUE)) %>%
 #'   arrange(desc(n))
 #' }
-src_bigquery <- function(project, dataset, billing = project, max_pages = 10) {
+src_bigquery <- function(project,
+                         dataset,
+                         billing = project,
+                         api = c("json", "arrow"),
+                         max_pages = 10) {
   check_installed("dbplyr")
 
   con <- DBI::dbConnect(
@@ -127,6 +131,7 @@ collect.tbl_BigQueryConnection <- function(x, ...,
   check_bool(warn_incomplete)
 
   con <- dbplyr::remote_con(x)
+  billing <- con@billing
 
   if (op_can_download(x)) {
     lq <- x$lazy_query
@@ -136,7 +141,6 @@ collect.tbl_BigQueryConnection <- function(x, ...,
   } else {
     sql <- dbplyr::db_sql_render(con, x)
 
-    billing <- con@billing
     if (is.null(con@dataset)) {
       tb <- bq_project_query(billing, sql, quiet = con@quiet, ...)
     } else {
@@ -147,13 +151,26 @@ collect.tbl_BigQueryConnection <- function(x, ...,
 
   quiet <- if (n < 100) TRUE else con@quiet
   bigint <- con@bigint %||% "integer"
-  out <- bq_table_download(tb,
-    n_max = n,
-    page_size = page_size,
-    quiet = quiet,
-    max_connections = max_connections,
-    bigint = bigint
-  )
+
+  if (has_bigrquerystorage()) {
+    out <- bq_table_download(tb,
+      n_max = n,
+      quiet = quiet,
+      bigint = bigint,
+      billing = billing,
+      api = "arrow"
+    )
+  } else {
+    out <- bq_table_download(tb,
+      n_max = n,
+      page_size = page_size,
+      quiet = quiet,
+      max_connections = max_connections,
+      bigint = bigint,
+      api = "json"
+    )
+  }
+
   dplyr::grouped_df(out, intersect(dbplyr::op_grps(x), names(out)))
 }
 
diff --git a/R/utils.R b/R/utils.R
index 98e872fd..1e84f9ff 100644
--- a/R/utils.R
+++ b/R/utils.R
@@ -71,3 +71,7 @@ as_query <- function(x, error_arg = caller_arg(x), error_call = caller_env()) {
   check_string(x, arg = error_arg, call = error_call)
   x
 }
+
+has_bigrquerystorage <- function() {
+  is_installed("bigrquerystorage")
+}
diff --git a/man/bq_table_download.Rd b/man/bq_table_download.Rd
index 90970863..928627ef 100644
--- a/man/bq_table_download.Rd
+++ b/man/bq_table_download.Rd
@@ -12,6 +12,8 @@ bq_table_download(
   max_connections = 6L,
   quiet = NA,
   bigint = c("integer", "integer64", "numeric", "character"),
+  api = c("json", "arrow"),
+  billing = x$project,
   max_results = deprecated()
 )
 }
@@ -44,6 +46,16 @@ but results in \code{NA} for values above/below +/- 2147483647. \code{"integer64
 returns a \link[bit64:bit64-package]{bit64::integer64}, which allows the full range of 64 bit
 integers.}
 
+\item{api}{Which API to use? The \code{"json"} API works where ever bigrquery
+does, but is slow and can require fiddling with the \code{page_size} parameter.
+The \code{"arrow"} API is faster and more reliable, but only works if you
+have also installed the bigrquerystorage package.
+
+Because the \code{"arrow"} API is so much faster, it will be used automatically
+if the bigrquerystorage package is installed.}
+
+\item{billing}{Identifier of project to bill.}
+
 \item{max_results}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} Deprecated. Please use
 \code{n_max} instead.}
 }
@@ -54,6 +66,7 @@ a tibble. If you need a \code{data.frame}, coerce the results with
 \code{\link[=as.data.frame]{as.data.frame()}}.
 }
 \description{
+This function provides two ways to download
 This retrieves rows in chunks of \code{page_size}. It is most suitable for results
 of smaller queries (<100 MB, say). For larger queries, it is better to
 export the results to a CSV file stored on google cloud and use the
@@ -94,6 +107,6 @@ nested/repeated values, are not well supported in R.
 
 \examples{
 \dontshow{if (bq_testable()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
-df <- bq_table_download("publicdata.samples.natality", n_max = 35000)
+df <- bq_table_download("publicdata.samples.natality", n_max = 35000, billing = bq_test_project())
 \dontshow{\}) # examplesIf}
 }
diff --git a/man/src_bigquery.Rd b/man/src_bigquery.Rd
index 39a2b1bf..36ef88b6 100644
--- a/man/src_bigquery.Rd
+++ b/man/src_bigquery.Rd
@@ -31,7 +31,7 @@ library(dplyr)
 # set up for billing
 con <- DBI::dbConnect(bigquery(), project = bq_test_project())
 
-shakespeare <- con \%>\% tbl("publicdata.samples.shakespeare")
+shakespeare <- con \%>\% tbl(I("publicdata.samples.shakespeare"))
 shakespeare
 shakespeare \%>\%
   group_by(word) \%>\%
diff --git a/tests/testthat/_snaps/dbi-connection.md b/tests/testthat/_snaps/dbi-connection.md
index 724fa1cb..67c560a7 100644
--- a/tests/testthat/_snaps/dbi-connection.md
+++ b/tests/testthat/_snaps/dbi-connection.md
@@ -48,7 +48,7 @@
     Code
       DBI::dbReadTable(con, "natality", n_max = 10)
     Condition
-      Error in `as_bq_table()`:
+      Error in `bigrquery::as_bq_table()`:
       ! `name` ("natality") must have 2 or 3 components if the connection doesn't have a dataset.
 
 # can create bq_table from connection + name
diff --git a/tests/testthat/test-bq-download.R b/tests/testthat/test-bq-download.R
index e8f64576..04ad531d 100644
--- a/tests/testthat/test-bq-download.R
+++ b/tests/testthat/test-bq-download.R
@@ -3,8 +3,8 @@ test_that("same results regardless of page size", {
 
   tb <- as_bq_table("bigquery-public-data.moon_phases.moon_phases")
 
-  df3 <- bq_table_download(tb, n_max = 30, page_size = 10)
-  df1 <- bq_table_download(tb, n_max = 30, page_size = 30)
+  df3 <- bq_table_download(tb, n_max = 30, page_size = 10, api = "json")
+  df1 <- bq_table_download(tb, n_max = 30, page_size = 30, api = "json")
   expect_equal(nrow(df1), 30)
   expect_equal(df1, df3)
 })
@@ -13,7 +13,7 @@ test_that("can retrieve fraction of page size", {
   skip_if_no_auth()
 
   tb <- as_bq_table("bigquery-public-data.moon_phases.moon_phases")
-  df <- bq_table_download(tb, n_max = 15, page_size = 10)
+  df <- bq_table_download(tb, n_max = 15, page_size = 10, api = "json")
   expect_equal(nrow(df), 15)
 })
 
@@ -21,7 +21,7 @@ test_that("can retrieve zero rows", {
   skip_if_no_auth()
 
   tb <- as_bq_table("bigquery-public-data.moon_phases.moon_phases")
-  df <- bq_table_download(tb, n_max = 0)
+  df <- bq_table_download(tb, n_max = 0, api = "json")
   expect_equal(nrow(df), 0)
   expect_named(df, c("phase", "phase_emoji", "peak_datetime"))
 })
@@ -34,7 +34,7 @@ test_that("can specify large integers in page params", {
   withr::local_options(list(scipen = -4))
 
   tb <- as_bq_table("bigquery-public-data.moon_phases.moon_phases")
-  df <- bq_table_download(tb, n_max = 100, page_size = 20)
+  df <- bq_table_download(tb, n_max = 100, page_size = 20, api = "json")
   expect_equal(nrow(df), 100)
 })
 
@@ -49,7 +49,8 @@ test_that("errors when table is known to be incomplete", {
       tb,
       n_max = 35000,
       page_size = 35000,
-      bigint = "integer64"
+      bigint = "integer64",
+      api = "json"
     ),
     transform = function(x) {
       gsub("[0-9,]+ rows were received", "{n} rows were received", x, perl = TRUE)
diff --git a/tests/testthat/test-bq-parse.R b/tests/testthat/test-bq-parse.R
index 93328768..70ac23b4 100644
--- a/tests/testthat/test-bq-parse.R
+++ b/tests/testthat/test-bq-parse.R
@@ -128,11 +128,11 @@ test_that("can parse nested structures", {
 
 test_that("can parse empty arrays", {
   tb <- bq_project_query(bq_test_project(), "SELECT ARRAY<INT64>[] as x")
-  df <- bq_table_download(tb)
+  df <- bq_table_download(tb, api = "json")
   expect_equal(df$x, list(integer(length = 0)))
 
   tb <- bq_project_query(bq_test_project(), "SELECT ARRAY<STRUCT<a INT64, b STRING>>[] as x")
-  df <- bq_table_download(tb)
+  df <- bq_table_download(tb, api = "json")
   expect_equal(df$x, list(tibble::tibble(a = integer(length = 0), b = character())))
 })
 
diff --git a/tests/testthat/test-bq-table.R b/tests/testthat/test-bq-table.R
index 2400ee6e..76b30396 100644
--- a/tests/testthat/test-bq-table.R
+++ b/tests/testthat/test-bq-table.R
@@ -38,7 +38,7 @@ test_that("can round trip to non-default location", {
   bq_df <- bq_table(dallas, "df")
   bq_table_upload(bq_df, df1)
 
-  df2 <- bq_table_download(bq_df)
+  df2 <- bq_table_download(bq_df, api = "json")
   df2 <- df2[order(df2$x), names(df1)] # BQ doesn't guarantee order
   rownames(df2) <- NULL
 
@@ -54,7 +54,7 @@ test_that("can roundtrip via save + load", {
   defer(gs_object_delete(gs))
   bq_table_load(tb2, gs)
 
-  df <- bq_table_download(tb2)
+  df <- bq_table_download(tb2, api = "json")
   expect_equal(dim(df), c(32, 11))
 })
 
@@ -79,7 +79,7 @@ test_that("can round trip atomic vectors", {
   bq_df <- bq_test_table()
   bq_table_upload(bq_df, df1)
 
-  df2 <- bq_table_download(bq_df, bigint = "integer")
+  df2 <- bq_table_download(bq_df, bigint = "integer", api = "json")
   df2 <- df2[order(df2[[1]]), names(df1)] # BQ doesn't gaurantee order
   rownames(df2) <- NULL
 
@@ -94,7 +94,7 @@ test_that("can round-trip POSIXt to either TIMESTAMP or DATETIME", {
     bq_fields(list(bq_field("datetime", "TIMESTAMP")))
   )
   bq_table_upload(tb1, df)
-  df1 <- bq_table_download(tb1)
+  df1 <- bq_table_download(tb1, api = "json")
   expect_equal(df1, df)
 
   tb2 <- bq_table_create(
@@ -102,7 +102,7 @@ test_that("can round-trip POSIXt to either TIMESTAMP or DATETIME", {
     bq_fields(list(bq_field("datetime", "DATETIME")))
   )
   bq_table_upload(tb2, df)
-  df2 <- bq_table_download(tb2)
+  df2 <- bq_table_download(tb2, api = "json")
   expect_equal(df2, df)
 })
 
@@ -117,7 +117,7 @@ test_that("can round trip data frame with list-cols", {
   )
   bq_table_upload(tb, df1)
 
-  df2 <- bq_table_download(tb, bigint = "integer")
+  df2 <- bq_table_download(tb, bigint = "integer", api = "json")
   # restore column order
   df2 <- df2[names(df1)]
   df2$struct[[1]] <- df2$struct[[1]][c("x", "y", "z")]
@@ -164,7 +164,7 @@ test_that("can round-trip GEOGRAPHY", {
 
   tb1 <- bq_table_create(bq_test_table(), as_bq_fields(df))
   bq_table_upload(tb1, df)
-  df1 <- bq_table_download(tb1)
+  df1 <- bq_table_download(tb1, api = "json")
   expect_equal(df1, df)
 })
 
@@ -173,6 +173,6 @@ test_that("can round-trip BYTES", {
 
   tb1 <- bq_table_create(bq_test_table(), as_bq_fields(df))
   bq_table_upload(tb1, df)
-  df1 <- bq_table_download(tb1)
+  df1 <- bq_table_download(tb1, api = "json")
   expect_equal(df1, df)
 })

From 1b95bf3a0c0af8f7e218c542701f8e07152f44ee Mon Sep 17 00:00:00 2001
From: Hadley Wickham <h.wickham@gmail.com>
Date: Wed, 10 Apr 2024 13:46:40 -0500
Subject: [PATCH 02/33] Pass bq-download tests

---
 tests/testthat/_snaps/bq-download.md |  3 ++-
 tests/testthat/test-bq-download.R    | 16 ++++++++--------
 2 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/tests/testthat/_snaps/bq-download.md b/tests/testthat/_snaps/bq-download.md
index bc9eed7a..b9496982 100644
--- a/tests/testthat/_snaps/bq-download.md
+++ b/tests/testthat/_snaps/bq-download.md
@@ -1,7 +1,8 @@
 # errors when table is known to be incomplete
 
     Code
-      bq_table_download(tb, n_max = 35000, page_size = 35000, bigint = "integer64")
+      bq_table_download(tb, n_max = 35000, page_size = 35000, bigint = "integer64",
+        api = "json")
     Message
       Downloading first chunk of data.
     Condition
diff --git a/tests/testthat/test-bq-download.R b/tests/testthat/test-bq-download.R
index 04ad531d..b604081b 100644
--- a/tests/testthat/test-bq-download.R
+++ b/tests/testthat/test-bq-download.R
@@ -174,7 +174,7 @@ test_that("can convert date time types", {
   "
 
   tb <- bq_project_query(bq_test_project(), sql, quiet = TRUE)
-  df <- bq_table_download(tb)
+  df <- bq_table_download(tb, api = "json")
 
   base <- ISOdatetime(2000, 1, 2, 3, 4, 5.67, tz = "UTC")
 
@@ -198,7 +198,7 @@ test_that("can parse fractional seconds", {
 test_that("correctly parse logical values" ,{
   query <- "SELECT TRUE as x"
   tb <- bq_project_query(bq_test_project(), query)
-  df <- bq_table_download(tb)
+  df <- bq_table_download(tb, api = "json")
 
   expect_true(df$x)
 })
@@ -209,18 +209,18 @@ test_that("the return type of integer columns is set by the bigint argument", {
   qry <- bq_project_query(bq_test_project(), sql)
 
   expect_warning(
-    out_int <- bq_table_download(qry, bigint = "integer")$x,
+    out_int <- bq_table_download(qry, bigint = "integer", api = "json")$x,
     "integer overflow"
   )
   expect_identical(out_int, suppressWarnings(as.integer(x)))
 
-  out_int64 <- bq_table_download(qry, bigint = "integer64")$x
+  out_int64 <- bq_table_download(qry, bigint = "integer64", api = "json")$x
   expect_identical(out_int64, bit64::as.integer64(x))
 
-  out_dbl <- bq_table_download(qry, bigint = "numeric")$x
+  out_dbl <- bq_table_download(qry, bigint = "numeric", api = "json")$x
   expect_identical(out_dbl, as.double(x))
 
-  out_chr <- bq_table_download(qry, bigint = "character")$x
+  out_chr <- bq_table_download(qry, bigint = "character", api = "json")$x
   expect_identical(out_chr, x)
 })
 
@@ -228,7 +228,7 @@ test_that("can convert geography type", {
   skip_if_not_installed("wk")
   sql <- "SELECT ST_GEOGFROMTEXT('POINT (30 10)') as geography"
   tb <- bq_project_query(bq_test_project(), sql, quiet = TRUE)
-  df <- bq_table_download(tb)
+  df <- bq_table_download(tb, api = "json")
 
   expect_identical(df$geography, wk::wkt("POINT(30 10)"))
 })
@@ -236,7 +236,7 @@ test_that("can convert geography type", {
 test_that("can convert bytes type", {
   sql <- "SELECT ST_ASBINARY(ST_GEOGFROMTEXT('POINT (30 10)')) as bytes"
   tb <- bq_project_query(bq_test_project(), sql, quiet = TRUE)
-  df <- bq_table_download(tb)
+  df <- bq_table_download(tb, api = "json")
 
   expect_identical(
     df$bytes,

From d90ac6ffbd04ba25179374c01877ae1e69c6b03d Mon Sep 17 00:00:00 2001
From: Hadley Wickham <h.wickham@gmail.com>
Date: Wed, 10 Apr 2024 13:49:33 -0500
Subject: [PATCH 03/33] Fix doc buglet; re-document

---
 R/bq-perform.R      | 2 +-
 man/api-perform.Rd  | 2 +-
 man/api-table.Rd    | 2 +-
 man/src_bigquery.Rd | 8 +++++++-
 4 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/R/bq-perform.R b/R/bq-perform.R
index 8085b5dc..ec005098 100644
--- a/R/bq-perform.R
+++ b/R/bq-perform.R
@@ -192,7 +192,7 @@ export_json <- function(values) {
 #'   Google Cloud.
 #'
 #'   For Google Cloud Storage URIs: Each URI can contain one
-#'   `'*'`` wildcard character and it must come after the 'bucket' name.
+#'   `'*'` wildcard character and it must come after the 'bucket' name.
 #'   Size limits related to load jobs apply to external data sources.
 #'
 #'   For Google Cloud Bigtable URIs: Exactly one URI can be specified and
diff --git a/man/api-perform.Rd b/man/api-perform.Rd
index ea6f34df..3ee02a7b 100644
--- a/man/api-perform.Rd
+++ b/man/api-perform.Rd
@@ -129,7 +129,7 @@ to the table.
 Google Cloud.
 
 For Google Cloud Storage URIs: Each URI can contain one
-`'*'`` wildcard character and it must come after the 'bucket' name.
+\code{'*'} wildcard character and it must come after the 'bucket' name.
 Size limits related to load jobs apply to external data sources.
 
 For Google Cloud Bigtable URIs: Exactly one URI can be specified and
diff --git a/man/api-table.Rd b/man/api-table.Rd
index 6509d18d..70c8031a 100644
--- a/man/api-table.Rd
+++ b/man/api-table.Rd
@@ -66,7 +66,7 @@ number of files.}
 Google Cloud.
 
 For Google Cloud Storage URIs: Each URI can contain one
-`'*'`` wildcard character and it must come after the 'bucket' name.
+\code{'*'} wildcard character and it must come after the 'bucket' name.
 Size limits related to load jobs apply to external data sources.
 
 For Google Cloud Bigtable URIs: Exactly one URI can be specified and
diff --git a/man/src_bigquery.Rd b/man/src_bigquery.Rd
index 36ef88b6..2cc6c52d 100644
--- a/man/src_bigquery.Rd
+++ b/man/src_bigquery.Rd
@@ -4,7 +4,13 @@
 \alias{src_bigquery}
 \title{A BigQuery data source for dplyr.}
 \usage{
-src_bigquery(project, dataset, billing = project, max_pages = 10)
+src_bigquery(
+  project,
+  dataset,
+  billing = project,
+  api = c("json", "arrow"),
+  max_pages = 10
+)
 }
 \arguments{
 \item{project}{project id or name}

From 6f6783e823c2191e0d324ced629c74c4ce2032db Mon Sep 17 00:00:00 2001
From: Hadley Wickham <h.wickham@gmail.com>
Date: Wed, 10 Apr 2024 13:54:27 -0500
Subject: [PATCH 04/33] Final api = 'json' args

---
 R/dbi-connection.R                      | 2 +-
 R/dbi-result.R                          | 5 +++--
 tests/testthat/_snaps/dbi-connection.md | 2 +-
 3 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/R/dbi-connection.R b/R/dbi-connection.R
index a790ba24..18cb4b35 100644
--- a/R/dbi-connection.R
+++ b/R/dbi-connection.R
@@ -318,7 +318,7 @@ setMethod("dbCreateTable", "BigQueryConnection", dbCreateTable_bq)
 
 dbReadTable_bq <- function(conn, name, ...) {
   tb <- as_bq_table(conn, name)
-  bq_table_download(tb, ...)
+  bq_table_download(tb, ..., api = "json")
 }
 
 #' @rdname DBI
diff --git a/R/dbi-result.R b/R/dbi-result.R
index 7ba63b4d..f9554ef8 100644
--- a/R/dbi-result.R
+++ b/R/dbi-result.R
@@ -105,13 +105,14 @@ setMethod(
       n <- res@cursor$left()
     }
 
-    # TODO: figure out what how to ignore pagination here
+    # TODO: figure out how to ignore pagination here
     data <- bq_table_download(res@bq_table,
       n_max = n,
       start_index = res@cursor$cur(),
       page_size = res@page_size,
       bigint = res@bigint,
-      quiet = res@quiet
+      quiet = res@quiet,
+      api = "json"
     )
     res@cursor$adv(nrow(data))
 
diff --git a/tests/testthat/_snaps/dbi-connection.md b/tests/testthat/_snaps/dbi-connection.md
index 67c560a7..724fa1cb 100644
--- a/tests/testthat/_snaps/dbi-connection.md
+++ b/tests/testthat/_snaps/dbi-connection.md
@@ -48,7 +48,7 @@
     Code
       DBI::dbReadTable(con, "natality", n_max = 10)
     Condition
-      Error in `bigrquery::as_bq_table()`:
+      Error in `as_bq_table()`:
       ! `name` ("natality") must have 2 or 3 components if the connection doesn't have a dataset.
 
 # can create bq_table from connection + name

From 36ed1799a076b12e78021889ee4e5b5c24046a31 Mon Sep 17 00:00:00 2001
From: Hadley Wickham <h.wickham@gmail.com>
Date: Thu, 11 Apr 2024 08:03:24 -0500
Subject: [PATCH 05/33] When possible, Use arrow in dbFetch()

---
 R/dbi-result.R | 34 +++++++++++++++++++++-------------
 1 file changed, 21 insertions(+), 13 deletions(-)

diff --git a/R/dbi-result.R b/R/dbi-result.R
index f9554ef8..dd1fee7a 100644
--- a/R/dbi-result.R
+++ b/R/dbi-result.R
@@ -100,20 +100,28 @@ setMethod(
   "dbFetch", "BigQueryResult",
   function(res, n = -1, ...) {
     check_number_whole(n, min = -1, allow_infinite = TRUE)
-
-    if (n == -1 || n == Inf) {
-      n <- res@cursor$left()
+    if (n == -1) n <- Inf
+
+    if (has_bigrquerystorage() && n == Inf && res@cursor$cur() == 0) {
+      # If possible, download complete dataset using arrow
+      data <- bq_table_download(res@bq_table,
+        bigint = res@bigint,
+        quiet = res@quiet,
+        n_max = res@cursor$left(),
+        api = "arrow"
+      )
+    } else {
+      # Otherwise, fall back to slower JSON API
+      data <- bq_table_download(res@bq_table,
+        n_max = n,
+        start_index = res@cursor$cur(),
+        page_size = res@page_size,
+        bigint = res@bigint,
+        quiet = res@quiet,
+        api = "json"
+      )
     }
-
-    # TODO: figure out how to ignore pagination here
-    data <- bq_table_download(res@bq_table,
-      n_max = n,
-      start_index = res@cursor$cur(),
-      page_size = res@page_size,
-      bigint = res@bigint,
-      quiet = res@quiet,
-      api = "json"
-    )
+    
     res@cursor$adv(nrow(data))
 
     data

From 793a251346ac0338884ff9d1469fcf05ea719b3f Mon Sep 17 00:00:00 2001
From: Hadley Wickham <h.wickham@gmail.com>
Date: Thu, 11 Apr 2024 08:25:46 -0500
Subject: [PATCH 06/33] Add API argument to collect()

---
 R/bq-download.R                       | 15 +++++---
 R/dplyr.R                             | 27 +++++++++++---
 _pkgdown.yml                          |  1 +
 man/collect.tbl_BigQueryConnection.Rd | 54 +++++++++++++++++++++++++++
 4 files changed, 86 insertions(+), 11 deletions(-)
 create mode 100644 man/collect.tbl_BigQueryConnection.Rd

diff --git a/R/bq-download.R b/R/bq-download.R
index e1259fc7..cb33f6d9 100644
--- a/R/bq-download.R
+++ b/R/bq-download.R
@@ -82,12 +82,7 @@ bq_table_download <-
     check_number_whole(max_connections, min = 1)
     quiet <- check_quiet(quiet)
     bigint <- arg_match(bigint)
-
-    if (missing(api)) {
-      api <- if (has_bigrquerystorage()) "arrow" else "json"
-    } else {
-      api <- arg_match(api)
-    }
+    api <- check_api(api)
 
     if (lifecycle::is_present(max_results)) {
       lifecycle::deprecate_warn(
@@ -242,6 +237,14 @@ bq_table_download <-
     parse_postprocess(table_data, bigint = bigint)
   }
 
+check_api <- function(api = c("json", "arrow"), error_call = caller_env()) {
+  if (identical(api, c("json", "arrow"))) {
+    if (has_bigrquerystorage()) "arrow" else "json"
+  } else {
+    arg_match(api, error_call = error_call)
+  }
+}
+
 # This function is a modified version of
 # https://github.com/r-dbi/RPostgres/blob/master/R/PqResult.R
 parse_postprocess <- function(df, bigint) {
diff --git a/R/dplyr.R b/R/dplyr.R
index aae8411e..c75875d1 100644
--- a/R/dplyr.R
+++ b/R/dplyr.R
@@ -120,15 +120,32 @@ db_copy_to.BigQueryConnection <- function(con,
 # Efficient downloads -----------------------------------------------
 
 # registered onLoad
+
+#' Collect a BigQuery table
+#' 
+#' This collect method is specialised for BigQuery tables, generating the
+#' SQL from your dplyr commands, then calling [bq_project_query()]
+#' or [bq_dataset_query()] to run the query, then [bq_download_table()] 
+#' to download the results. Thus the arguments are a combination of the
+#' arguments to [dplyr::collect()], `bq_project_query()`/`bq_dataset_query()`,
+#' and `bq_download_table()`.
+#' 
+#' @inheritParams dplyr::collect
+#' @inheritParams bq_table_download
+#' @param n Maximum number of results to retrieve. 
+#'   The default, `Inf`, will retrieve all rows.
+#' @param ... Other arguments passed on to 
+#'   `bq_project_query()`/`bq_project_query()`
 collect.tbl_BigQueryConnection <- function(x, ...,
-                                           page_size = NULL,
-                                           max_connections = 6L,
                                            n = Inf,
-                                           warn_incomplete = TRUE) {
+                                           api = c("json", "arrow"),
+                                           page_size = NULL,
+                                           max_connections = 6L
+                                           ) {
 
+  api <- check_api(api)
   check_number_whole(n, min = 0, allow_infinite = TRUE)
   check_number_whole(max_connections, min = 1)
-  check_bool(warn_incomplete)
 
   con <- dbplyr::remote_con(x)
   billing <- con@billing
@@ -152,7 +169,7 @@ collect.tbl_BigQueryConnection <- function(x, ...,
   quiet <- if (n < 100) TRUE else con@quiet
   bigint <- con@bigint %||% "integer"
 
-  if (has_bigrquerystorage()) {
+  if (api == "arrow") {
     out <- bq_table_download(tb,
       n_max = n,
       quiet = quiet,
diff --git a/_pkgdown.yml b/_pkgdown.yml
index beadd04a..d4bda334 100644
--- a/_pkgdown.yml
+++ b/_pkgdown.yml
@@ -16,6 +16,7 @@ reference:
   contents:
   - src_bigquery
   - bigquery
+  - collect.tbl_BigQueryConnection
 
 - title: Low-level API
   contents:
diff --git a/man/collect.tbl_BigQueryConnection.Rd b/man/collect.tbl_BigQueryConnection.Rd
new file mode 100644
index 00000000..4402deb1
--- /dev/null
+++ b/man/collect.tbl_BigQueryConnection.Rd
@@ -0,0 +1,54 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/dplyr.R
+\name{collect.tbl_BigQueryConnection}
+\alias{collect.tbl_BigQueryConnection}
+\title{Collect a BigQuery table}
+\usage{
+collect.tbl_BigQueryConnection(
+  x,
+  ...,
+  n = Inf,
+  api = c("json", "arrow"),
+  page_size = NULL,
+  max_connections = 6L
+)
+}
+\arguments{
+\item{x}{A data frame, data frame extension (e.g. a tibble), or a lazy
+data frame (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for more
+details.}
+
+\item{...}{Other arguments passed on to
+\code{bq_project_query()}/\code{bq_project_query()}}
+
+\item{n}{Maximum number of results to retrieve.
+The default, \code{Inf}, will retrieve all rows.}
+
+\item{api}{Which API to use? The \code{"json"} API works where ever bigrquery
+does, but is slow and can require fiddling with the \code{page_size} parameter.
+The \code{"arrow"} API is faster and more reliable, but only works if you
+have also installed the bigrquerystorage package.
+
+Because the \code{"arrow"} API is so much faster, it will be used automatically
+if the bigrquerystorage package is installed.}
+
+\item{page_size}{The number of rows requested per chunk. It is recommended to
+leave this unspecified until you have evidence that the \code{page_size}
+selected automatically by \code{bq_table_download()} is problematic.
+
+When \code{page_size = NULL} bigrquery determines a conservative, natural chunk
+size empirically. If you specify the \code{page_size}, it is important that each
+chunk fits on one page, i.e. that the requested row limit is low enough to
+prevent the API from paginating based on response size.}
+
+\item{max_connections}{Number of maximum simultaneous connections to
+BigQuery servers.}
+}
+\description{
+This collect method is specialised for BigQuery tables, generating the
+SQL from your dplyr commands, then calling \code{\link[=bq_project_query]{bq_project_query()}}
+or \code{\link[=bq_dataset_query]{bq_dataset_query()}} to run the query, then \code{\link[=bq_download_table]{bq_download_table()}}
+to download the results. Thus the arguments are a combination of the
+arguments to \code{\link[dplyr:compute]{dplyr::collect()}}, \code{bq_project_query()}/\code{bq_dataset_query()},
+and \code{bq_download_table()}.
+}

From c68a49deac6eb8020a5bd477ad335fbc4d52246f Mon Sep 17 00:00:00 2001
From: Hadley Wickham <h.wickham@gmail.com>
Date: Thu, 11 Apr 2024 08:32:49 -0500
Subject: [PATCH 07/33] Clarify bug workaround

---
 R/dbi-result.R | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/R/dbi-result.R b/R/dbi-result.R
index dd1fee7a..f4543717 100644
--- a/R/dbi-result.R
+++ b/R/dbi-result.R
@@ -103,11 +103,14 @@ setMethod(
     if (n == -1) n <- Inf
 
     if (has_bigrquerystorage() && n == Inf && res@cursor$cur() == 0) {
+      # https://github.com/meztez/bigrquerystorage/issues/48
+      n <- res@cursor$left()
+      
       # If possible, download complete dataset using arrow
       data <- bq_table_download(res@bq_table,
+        n_max = n,
         bigint = res@bigint,
         quiet = res@quiet,
-        n_max = res@cursor$left(),
         api = "arrow"
       )
     } else {

From 9d1f36d4be96b70906abf87edfb88428a88ec59b Mon Sep 17 00:00:00 2001
From: Hadley Wickham <h.wickham@gmail.com>
Date: Thu, 11 Apr 2024 08:34:03 -0500
Subject: [PATCH 08/33] R CMD check fixes

---
 R/dplyr.R                             | 5 ++---
 man/collect.tbl_BigQueryConnection.Rd | 4 ++--
 man/src_bigquery.Rd                   | 8 +-------
 3 files changed, 5 insertions(+), 12 deletions(-)

diff --git a/R/dplyr.R b/R/dplyr.R
index c75875d1..47a7b38e 100644
--- a/R/dplyr.R
+++ b/R/dplyr.R
@@ -30,7 +30,6 @@
 src_bigquery <- function(project,
                          dataset,
                          billing = project,
-                         api = c("json", "arrow"),
                          max_pages = 10) {
   check_installed("dbplyr")
 
@@ -125,10 +124,10 @@ db_copy_to.BigQueryConnection <- function(con,
 #' 
 #' This collect method is specialised for BigQuery tables, generating the
 #' SQL from your dplyr commands, then calling [bq_project_query()]
-#' or [bq_dataset_query()] to run the query, then [bq_download_table()] 
+#' or [bq_dataset_query()] to run the query, then [bq_table_download()] 
 #' to download the results. Thus the arguments are a combination of the
 #' arguments to [dplyr::collect()], `bq_project_query()`/`bq_dataset_query()`,
-#' and `bq_download_table()`.
+#' and `bq_table_download()`.
 #' 
 #' @inheritParams dplyr::collect
 #' @inheritParams bq_table_download
diff --git a/man/collect.tbl_BigQueryConnection.Rd b/man/collect.tbl_BigQueryConnection.Rd
index 4402deb1..4f68161a 100644
--- a/man/collect.tbl_BigQueryConnection.Rd
+++ b/man/collect.tbl_BigQueryConnection.Rd
@@ -47,8 +47,8 @@ BigQuery servers.}
 \description{
 This collect method is specialised for BigQuery tables, generating the
 SQL from your dplyr commands, then calling \code{\link[=bq_project_query]{bq_project_query()}}
-or \code{\link[=bq_dataset_query]{bq_dataset_query()}} to run the query, then \code{\link[=bq_download_table]{bq_download_table()}}
+or \code{\link[=bq_dataset_query]{bq_dataset_query()}} to run the query, then \code{\link[=bq_table_download]{bq_table_download()}}
 to download the results. Thus the arguments are a combination of the
 arguments to \code{\link[dplyr:compute]{dplyr::collect()}}, \code{bq_project_query()}/\code{bq_dataset_query()},
-and \code{bq_download_table()}.
+and \code{bq_table_download()}.
 }
diff --git a/man/src_bigquery.Rd b/man/src_bigquery.Rd
index 2cc6c52d..36ef88b6 100644
--- a/man/src_bigquery.Rd
+++ b/man/src_bigquery.Rd
@@ -4,13 +4,7 @@
 \alias{src_bigquery}
 \title{A BigQuery data source for dplyr.}
 \usage{
-src_bigquery(
-  project,
-  dataset,
-  billing = project,
-  api = c("json", "arrow"),
-  max_pages = 10
-)
+src_bigquery(project, dataset, billing = project, max_pages = 10)
 }
 \arguments{
 \item{project}{project id or name}

From c28f1801ef4591544974a144e3cb1f3b16c97e5b Mon Sep 17 00:00:00 2001
From: Hadley Wickham <h.wickham@gmail.com>
Date: Thu, 11 Apr 2024 08:34:34 -0500
Subject: [PATCH 09/33] use_tidy_description()

---
 DESCRIPTION                                 |  4 +-
 tests/testthat/_snaps/dbi-connection.new.md | 77 +++++++++++++++++++++
 tests/testthat/_snaps/dbi-result.new.md     | 23 ++++++
 3 files changed, 102 insertions(+), 2 deletions(-)
 create mode 100644 tests/testthat/_snaps/dbi-connection.new.md
 create mode 100644 tests/testthat/_snaps/dbi-result.new.md

diff --git a/DESCRIPTION b/DESCRIPTION
index 0b5e4b6a..d43737bc 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -30,6 +30,7 @@ Imports:
     rlang (>= 1.1.0),
     tibble
 Suggests: 
+    bigrquerystorage,
     blob,
     covr,
     dbplyr (>= 2.4.0),
@@ -38,9 +39,8 @@ Suggests:
     readr,
     sodium,
     testthat (>= 3.1.5),
-    wk (>= 0.3.2),
     withr,
-    bigrquerystorage
+    wk (>= 0.3.2)
 LinkingTo: 
     cli,
     cpp11,
diff --git a/tests/testthat/_snaps/dbi-connection.new.md b/tests/testthat/_snaps/dbi-connection.new.md
new file mode 100644
index 00000000..67c560a7
--- /dev/null
+++ b/tests/testthat/_snaps/dbi-connection.new.md
@@ -0,0 +1,77 @@
+# useful print with and without dataset
+
+    Code
+      # With dataset
+      con1
+    Output
+      <BigQueryConnection>
+        Dataset: p.x
+        Billing: b
+    Code
+      # Without dataset
+      con2
+    Output
+      <BigQueryConnection>
+        Billing: p
+
+# dbQuoteIdentifier validates inputs
+
+    Code
+      DBI::dbQuoteIdentifier(con, c("x", NA))
+    Condition
+      Error in `DBI::dbQuoteIdentifier()`:
+      ! `x` must not contain missing values.
+
+# dbWriteTable errors on unsupported arguments
+
+    Code
+      DBI::dbWriteTable(con, "x", df, field.types = list())
+    Condition
+      Error in `DBI::dbWriteTable()`:
+      ! `field.types` not supported by bigrquery.
+    Code
+      DBI::dbWriteTable(con, "x", df, temporary = TRUE)
+    Condition
+      Error in `DBI::dbWriteTable()`:
+      ! `temporary = FALSE` not supported by bigrquery.
+
+# dataset is optional
+
+    Code
+      DBI::dbListTables(con)
+    Condition
+      Error in `DBI::dbListTables()`:
+      ! Can't list tables without a connection `dataset`.
+
+---
+
+    Code
+      DBI::dbReadTable(con, "natality", n_max = 10)
+    Condition
+      Error in `bigrquery::as_bq_table()`:
+      ! `name` ("natality") must have 2 or 3 components if the connection doesn't have a dataset.
+
+# can create bq_table from connection + name
+
+    Code
+      as_bq_table(con1, "x")
+    Condition
+      Error in `as_bq_table()`:
+      ! `name` ("x") must have 2 or 3 components if the connection doesn't have a dataset.
+
+---
+
+    Code
+      as_bq_table(con1, "a.b.c.d")
+    Condition
+      Error in `as_bq_table()`:
+      ! `name` ("a.b.c.d") must have 1-3 components.
+
+# as_bq_table checks its input types
+
+    Code
+      as_bq_table(con1, letters)
+    Condition
+      Error in `as_bq_table()`:
+      ! `name` must be a string or a dbplyr_table_ident.
+
diff --git a/tests/testthat/_snaps/dbi-result.new.md b/tests/testthat/_snaps/dbi-result.new.md
new file mode 100644
index 00000000..4ada7d56
--- /dev/null
+++ b/tests/testthat/_snaps/dbi-result.new.md
@@ -0,0 +1,23 @@
+# can retrieve query in pieces and that quiet is respected
+
+    Code
+      DBI::dbFetch(res, NA)
+    Condition
+      Error in `DBI::dbFetch()`:
+      ! is.numeric(n) is not TRUE
+
+---
+
+    Code
+      df <- DBI::dbFetch(res, 10)
+
+# can get metadata
+
+    Code
+      res
+    Output
+      <BigQueryResult>
+        Query: SELECT cyl, mpg FROM mtcars
+        Has completed: FALSE
+        Rows fetched: 0
+

From 4a18fa241c5b29e125042cecdd0d428c38d28a28 Mon Sep 17 00:00:00 2001
From: Hadley Wickham <h.wickham@gmail.com>
Date: Thu, 11 Apr 2024 08:39:30 -0500
Subject: [PATCH 10/33] Start tests for arrow api

---
 tests/testthat/test-bq-download.R | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/tests/testthat/test-bq-download.R b/tests/testthat/test-bq-download.R
index b604081b..1921ebcd 100644
--- a/tests/testthat/test-bq-download.R
+++ b/tests/testthat/test-bq-download.R
@@ -59,6 +59,20 @@ test_that("errors when table is known to be incomplete", {
   )
 })
 
+# api = "arrow" ----------------------------------------------------------------
+
+test_that("check_api respects inputs", {
+  expect_equal(check_api("arrow"), "arrow")
+  expect_equal(check_api("json"), "json")
+})
+
+test_that("uses arrow api if bigrquerystorage installed", {
+  expect_equal(check_api(), "arrow")
+
+  local_mocked_bindings(is_installed = function(...) FALSE)
+  expect_equal(check_api(), "json")
+})
+
 # helpers around row and chunk params ------------------------------------------
 
 test_that("set_row_params() works ", {

From 5590b6d2600f5846d539bbe566e0db880ec8ac2a Mon Sep 17 00:00:00 2001
From: Hadley Wickham <h.wickham@gmail.com>
Date: Thu, 11 Apr 2024 08:40:57 -0500
Subject: [PATCH 11/33] Polish news

---
 NEWS.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/NEWS.md b/NEWS.md
index 28c7c4ec..fc5d66d5 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,9 +1,9 @@
 # bigrquery (development version)
 
 * If the bigrquerystorage package is installed, `bq_table_download()` (and
-  hence basically every function that downloads data from BigQuery) will
-  use it. This will considerably improve the speed of downloading large 
-  datasets.
+  hence `collect()`, `dbGetQuery()` and `dbFetch()` will use it. This will 
+  drastically improve the speed of downloading large datasets. A big thanks
+  to @meztez for creating the bigrquerystorage package!
 
 # bigrquery 1.5.1
 

From 5c257186cf0b53a403065d7f52713dda6a38f1a5 Mon Sep 17 00:00:00 2001
From: Hadley Wickham <h.wickham@gmail.com>
Date: Thu, 11 Apr 2024 08:49:46 -0500
Subject: [PATCH 12/33] Polish docs

---
 R/bq-download.R                       | 51 ++++++++++++-------------
 man/bq_table_download.Rd              | 55 ++++++++++++---------------
 man/collect.tbl_BigQueryConnection.Rd | 10 ++---
 3 files changed, 54 insertions(+), 62 deletions(-)

diff --git a/R/bq-download.R b/R/bq-download.R
index cb33f6d9..6d067470 100644
--- a/R/bq-download.R
+++ b/R/bq-download.R
@@ -1,31 +1,28 @@
 #' Download table data
 #'
-#' This function provides two ways to download
-#' This retrieves rows in chunks of `page_size`. It is most suitable for results
-#' of smaller queries (<100 MB, say). For larger queries, it is better to
-#' export the results to a CSV file stored on google cloud and use the
-#' bq command line tool to download locally.
+#' @description
+#' This function provides two ways to download data from BigQuery, transfering
+#' data using either JSON or arrow, depending on the `api` argument. JSON is 
+#' much slower but requires no additional dependencies, and is what bigrquery 
+#' used prior to version 1.6.0. The arrow method is much much faster, but 
+#' requires the bigrquerystorage, which in turn requires the arrow package.
+#' These dependencies are fairly heavy, and can be tricky to compile on Linux,
+#' but in our opinion the massive speedup is worth the effort.
+#' 
+#' ## JSON API
+#' 
+#' The JSON API retrieves rows in chunks of `page_size`. It is most suitable 
+#' for results of smaller queries (<100 MB, say). Unfortunately due to
+#' limitations in the BigQuery API, you may need to vary this parameter 
+#' depending on the complexity of the underlying data. 
 #'
-#' @section Complex data:
-#' bigrquery will retrieve nested and repeated columns in to list-columns
+#' The JSON API will convert nested and repeated columns in to list-columns
 #' as follows:
 #'
 #' * Repeated values (arrays) will become a list-column of vectors.
 #' * Records will become list-columns of named lists.
 #' * Repeated records will become list-columns of data frames.
 #'
-#' @section Larger datasets:
-#' In my timings, this code takes around 1 minute per 100 MB of data.
-#' If you need to download considerably more than this, I recommend:
-#'
-#'  * Export a `.csv` file to Cloud Storage using [bq_table_save()].
-#'  * Use the `gsutil` command line utility to download it.
-#'  * Read the csv file into R with `readr::read_csv()` or `data.table::fread()`.
-#'
-#'  Unfortunately you can not export nested or repeated formats into CSV, and
-#'  the formats that BigQuery supports (arvn and ndjson) that allow for
-#'  nested/repeated values, are not well supported in R.
-#'
 #' @return Because data retrieval may generate list-columns and the `data.frame`
 #'   print method can have problems with list-columns, this method returns
 #'   a tibble. If you need a `data.frame`, coerce the results with
@@ -33,17 +30,17 @@
 #' @param x A [bq_table]
 #' @param n_max Maximum number of results to retrieve. Use `Inf` to retrieve all
 #'   rows.
-#' @param page_size The number of rows requested per chunk. It is recommended to
-#'   leave this unspecified until you have evidence that the `page_size`
-#'   selected automatically by `bq_table_download()` is problematic.
+#' @param page_size (JSON only) The number of rows requested per chunk. It is 
+#'   recommended to leave this unspecified until you have evidence that the 
+#'  `page_size` selected automatically by `bq_table_download()` is problematic.
 #'
 #'   When `page_size = NULL` bigrquery determines a conservative, natural chunk
 #'   size empirically. If you specify the `page_size`, it is important that each
 #'   chunk fits on one page, i.e. that the requested row limit is low enough to
 #'   prevent the API from paginating based on response size.
-#' @param start_index Starting row index (zero-based).
-#' @param max_connections Number of maximum simultaneous connections to
-#'   BigQuery servers.
+#' @param start_index (JSON only) Starting row index (zero-based).
+#' @param max_connections (JSON only) Number of maximum simultaneous 
+#'   connections to BigQuery servers.
 #' @param api Which API to use? The `"json"` API works where ever bigrquery
 #'   does, but is slow and can require fiddling with the `page_size` parameter.
 #'   The `"arrow"` API is faster and more reliable, but only works if you
@@ -57,7 +54,9 @@
 #'   but results in `NA` for values above/below +/- 2147483647. `"integer64"`
 #'   returns a [bit64::integer64], which allows the full range of 64 bit
 #'   integers.
-#' @param billing Identifier of project to bill.
+#' @param billing (Arrow only) Project to bill; defaults to the project of `x`,
+#'   and typically only needs to be specified if you're working with public
+#'   datasets.
 #' @param max_results `r lifecycle::badge("deprecated")` Deprecated. Please use
 #'   `n_max` instead.
 #' @section Google BigQuery API documentation:
diff --git a/man/bq_table_download.Rd b/man/bq_table_download.Rd
index 928627ef..0d6eb2af 100644
--- a/man/bq_table_download.Rd
+++ b/man/bq_table_download.Rd
@@ -23,19 +23,19 @@ bq_table_download(
 \item{n_max}{Maximum number of results to retrieve. Use \code{Inf} to retrieve all
 rows.}
 
-\item{page_size}{The number of rows requested per chunk. It is recommended to
-leave this unspecified until you have evidence that the \code{page_size}
-selected automatically by \code{bq_table_download()} is problematic.
+\item{page_size}{(JSON only) The number of rows requested per chunk. It is
+recommended to leave this unspecified until you have evidence that the
+\code{page_size} selected automatically by \code{bq_table_download()} is problematic.
 
 When \code{page_size = NULL} bigrquery determines a conservative, natural chunk
 size empirically. If you specify the \code{page_size}, it is important that each
 chunk fits on one page, i.e. that the requested row limit is low enough to
 prevent the API from paginating based on response size.}
 
-\item{start_index}{Starting row index (zero-based).}
+\item{start_index}{(JSON only) Starting row index (zero-based).}
 
-\item{max_connections}{Number of maximum simultaneous connections to
-BigQuery servers.}
+\item{max_connections}{(JSON only) Number of maximum simultaneous
+connections to BigQuery servers.}
 
 \item{quiet}{If \code{FALSE}, displays progress bar; if \code{TRUE} is silent;
 if \code{NA} picks based on whether or not you're in an interactive context.}
@@ -54,7 +54,9 @@ have also installed the bigrquerystorage package.
 Because the \code{"arrow"} API is so much faster, it will be used automatically
 if the bigrquerystorage package is installed.}
 
-\item{billing}{Identifier of project to bill.}
+\item{billing}{(Arrow only) Project to bill; defaults to the project of \code{x},
+and typically only needs to be specified if you're working with public
+datasets.}
 
 \item{max_results}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} Deprecated. Please use
 \code{n_max} instead.}
@@ -66,15 +68,21 @@ a tibble. If you need a \code{data.frame}, coerce the results with
 \code{\link[=as.data.frame]{as.data.frame()}}.
 }
 \description{
-This function provides two ways to download
-This retrieves rows in chunks of \code{page_size}. It is most suitable for results
-of smaller queries (<100 MB, say). For larger queries, it is better to
-export the results to a CSV file stored on google cloud and use the
-bq command line tool to download locally.
-}
-\section{Complex data}{
-
-bigrquery will retrieve nested and repeated columns in to list-columns
+This function provides two ways to download data from BigQuery, transfering
+data using either JSON or arrow, depending on the \code{api} argument. JSON is
+much slower but requires no additional dependencies, and is what bigrquery
+used prior to version 1.6.0. The arrow method is much much faster, but
+requires the bigrquerystorage, which in turn requires the arrow package.
+These dependencies are fairly heavy, and can be tricky to compile on Linux,
+but in our opinion the massive speedup is worth the effort.
+\subsection{JSON API}{
+
+The JSON API retrieves rows in chunks of \code{page_size}. It is most suitable
+for results of smaller queries (<100 MB, say). Unfortunately due to
+limitations in the BigQuery API, you may need to vary this parameter
+depending on the complexity of the underlying data.
+
+The JSON API will convert nested and repeated columns in to list-columns
 as follows:
 \itemize{
 \item Repeated values (arrays) will become a list-column of vectors.
@@ -82,22 +90,7 @@ as follows:
 \item Repeated records will become list-columns of data frames.
 }
 }
-
-\section{Larger datasets}{
-
-In my timings, this code takes around 1 minute per 100 MB of data.
-If you need to download considerably more than this, I recommend:
-\itemize{
-\item Export a \code{.csv} file to Cloud Storage using \code{\link[=bq_table_save]{bq_table_save()}}.
-\item Use the \code{gsutil} command line utility to download it.
-\item Read the csv file into R with \code{readr::read_csv()} or \code{data.table::fread()}.
-}
-
-Unfortunately you can not export nested or repeated formats into CSV, and
-the formats that BigQuery supports (arvn and ndjson) that allow for
-nested/repeated values, are not well supported in R.
 }
-
 \section{Google BigQuery API documentation}{
 
 \itemize{
diff --git a/man/collect.tbl_BigQueryConnection.Rd b/man/collect.tbl_BigQueryConnection.Rd
index 4f68161a..ac401ae2 100644
--- a/man/collect.tbl_BigQueryConnection.Rd
+++ b/man/collect.tbl_BigQueryConnection.Rd
@@ -32,17 +32,17 @@ have also installed the bigrquerystorage package.
 Because the \code{"arrow"} API is so much faster, it will be used automatically
 if the bigrquerystorage package is installed.}
 
-\item{page_size}{The number of rows requested per chunk. It is recommended to
-leave this unspecified until you have evidence that the \code{page_size}
-selected automatically by \code{bq_table_download()} is problematic.
+\item{page_size}{(JSON only) The number of rows requested per chunk. It is
+recommended to leave this unspecified until you have evidence that the
+\code{page_size} selected automatically by \code{bq_table_download()} is problematic.
 
 When \code{page_size = NULL} bigrquery determines a conservative, natural chunk
 size empirically. If you specify the \code{page_size}, it is important that each
 chunk fits on one page, i.e. that the requested row limit is low enough to
 prevent the API from paginating based on response size.}
 
-\item{max_connections}{Number of maximum simultaneous connections to
-BigQuery servers.}
+\item{max_connections}{(JSON only) Number of maximum simultaneous
+connections to BigQuery servers.}
 }
 \description{
 This collect method is specialised for BigQuery tables, generating the

From b8f44a8b50aa67825cc740afee0a4bd6bb56048b Mon Sep 17 00:00:00 2001
From: Hadley Wickham <h.wickham@gmail.com>
Date: Thu, 18 Apr 2024 15:13:01 -0500
Subject: [PATCH 13/33] Use bigrquerystorage without DBI methods

---
 DESCRIPTION | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index d43737bc..2ca64657 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -15,6 +15,7 @@ BugReports: https://github.com/r-dbi/bigrquery/issues
 Depends: 
     R (>= 3.6)
 Imports: 
+    bigrquerystorage (>= 1.1.0),
     bit64,
     brio,
     cli,
@@ -30,7 +31,6 @@ Imports:
     rlang (>= 1.1.0),
     tibble
 Suggests: 
-    bigrquerystorage,
     blob,
     covr,
     dbplyr (>= 2.4.0),
@@ -82,3 +82,5 @@ Collate:
     'import-standalone-types-check.R'
     'utils.R'
     'zzz.R'
+Remotes:  
+    meztez/bigrquerystorage#52

From 7265975f1666882ea7275ff59043ae0c0f0b9f44 Mon Sep 17 00:00:00 2001
From: Hadley Wickham <h.wickham@gmail.com>
Date: Thu, 18 Apr 2024 15:13:47 -0500
Subject: [PATCH 14/33] Wrap DB references in I()

---
 tests/testthat/test-dplyr.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/testthat/test-dplyr.R b/tests/testthat/test-dplyr.R
index b9ffd8ff..2bf86187 100644
--- a/tests/testthat/test-dplyr.R
+++ b/tests/testthat/test-dplyr.R
@@ -156,8 +156,8 @@ test_that("all BigQuery tbls share the same src", {
     billing = bq_test_project()
   )
 
-  tbl1 <- dplyr::tbl(con1, "basedata.mtcars", vars = "x")
-  tbl2 <- dplyr::tbl(con2, "publicdata.samples.natality", vars = "x")
+  tbl1 <- dplyr::tbl(con1, I("basedata.mtcars"), vars = "x")
+  tbl2 <- dplyr::tbl(con2, I("publicdata.samples.natality"), vars = "x")
   expect_true(dplyr::same_src(tbl1, tbl2))
   expect_false(dplyr::same_src(tbl1, mtcars))
 })

From 9b52bf39daf27d58818c3f8cc782896a7ef4a490 Mon Sep 17 00:00:00 2001
From: Hadley Wickham <h.wickham@gmail.com>
Date: Thu, 18 Apr 2024 15:15:26 -0500
Subject: [PATCH 15/33] Don't commit snapshots

---
 tests/testthat/_snaps/dbi-connection.new.md | 77 ---------------------
 tests/testthat/_snaps/dbi-result.new.md     | 23 ------
 2 files changed, 100 deletions(-)
 delete mode 100644 tests/testthat/_snaps/dbi-connection.new.md
 delete mode 100644 tests/testthat/_snaps/dbi-result.new.md

diff --git a/tests/testthat/_snaps/dbi-connection.new.md b/tests/testthat/_snaps/dbi-connection.new.md
deleted file mode 100644
index 67c560a7..00000000
--- a/tests/testthat/_snaps/dbi-connection.new.md
+++ /dev/null
@@ -1,77 +0,0 @@
-# useful print with and without dataset
-
-    Code
-      # With dataset
-      con1
-    Output
-      <BigQueryConnection>
-        Dataset: p.x
-        Billing: b
-    Code
-      # Without dataset
-      con2
-    Output
-      <BigQueryConnection>
-        Billing: p
-
-# dbQuoteIdentifier validates inputs
-
-    Code
-      DBI::dbQuoteIdentifier(con, c("x", NA))
-    Condition
-      Error in `DBI::dbQuoteIdentifier()`:
-      ! `x` must not contain missing values.
-
-# dbWriteTable errors on unsupported arguments
-
-    Code
-      DBI::dbWriteTable(con, "x", df, field.types = list())
-    Condition
-      Error in `DBI::dbWriteTable()`:
-      ! `field.types` not supported by bigrquery.
-    Code
-      DBI::dbWriteTable(con, "x", df, temporary = TRUE)
-    Condition
-      Error in `DBI::dbWriteTable()`:
-      ! `temporary = FALSE` not supported by bigrquery.
-
-# dataset is optional
-
-    Code
-      DBI::dbListTables(con)
-    Condition
-      Error in `DBI::dbListTables()`:
-      ! Can't list tables without a connection `dataset`.
-
----
-
-    Code
-      DBI::dbReadTable(con, "natality", n_max = 10)
-    Condition
-      Error in `bigrquery::as_bq_table()`:
-      ! `name` ("natality") must have 2 or 3 components if the connection doesn't have a dataset.
-
-# can create bq_table from connection + name
-
-    Code
-      as_bq_table(con1, "x")
-    Condition
-      Error in `as_bq_table()`:
-      ! `name` ("x") must have 2 or 3 components if the connection doesn't have a dataset.
-
----
-
-    Code
-      as_bq_table(con1, "a.b.c.d")
-    Condition
-      Error in `as_bq_table()`:
-      ! `name` ("a.b.c.d") must have 1-3 components.
-
-# as_bq_table checks its input types
-
-    Code
-      as_bq_table(con1, letters)
-    Condition
-      Error in `as_bq_table()`:
-      ! `name` must be a string or a dbplyr_table_ident.
-
diff --git a/tests/testthat/_snaps/dbi-result.new.md b/tests/testthat/_snaps/dbi-result.new.md
deleted file mode 100644
index 4ada7d56..00000000
--- a/tests/testthat/_snaps/dbi-result.new.md
+++ /dev/null
@@ -1,23 +0,0 @@
-# can retrieve query in pieces and that quiet is respected
-
-    Code
-      DBI::dbFetch(res, NA)
-    Condition
-      Error in `DBI::dbFetch()`:
-      ! is.numeric(n) is not TRUE
-
----
-
-    Code
-      df <- DBI::dbFetch(res, 10)
-
-# can get metadata
-
-    Code
-      res
-    Output
-      <BigQueryResult>
-        Query: SELECT cyl, mpg FROM mtcars
-        Has completed: FALSE
-        Rows fetched: 0
-

From fea1b2f78a4227cfcb3ceed79b18dd7ea1c172ae Mon Sep 17 00:00:00 2001
From: Hadley Wickham <h.wickham@gmail.com>
Date: Thu, 18 Apr 2024 16:20:10 -0500
Subject: [PATCH 16/33] Restore bigrquerystorage to correct place

---
 DESCRIPTION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 2ca64657..57f5f6f2 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -15,7 +15,6 @@ BugReports: https://github.com/r-dbi/bigrquery/issues
 Depends: 
     R (>= 3.6)
 Imports: 
-    bigrquerystorage (>= 1.1.0),
     bit64,
     brio,
     cli,
@@ -31,6 +30,7 @@ Imports:
     rlang (>= 1.1.0),
     tibble
 Suggests: 
+    bigrquerystorage (>= 1.1.0),
     blob,
     covr,
     dbplyr (>= 2.4.0),

From bb4ff9be1b44c060ea56028ba09d0e1a7e6d0d2b Mon Sep 17 00:00:00 2001
From: Hadley Wickham <h.wickham@gmail.com>
Date: Thu, 18 Apr 2024 16:20:44 -0500
Subject: [PATCH 17/33] Implement `bq_perform_query_schema()`

---
 NAMESPACE                        |  1 +
 R/bq-perform.R                   | 73 ++++++++++++++++++++++++++------
 man/api-perform.Rd               |  9 ++++
 tests/testthat/test-bq-perform.R | 12 +++++-
 4 files changed, 81 insertions(+), 14 deletions(-)

diff --git a/NAMESPACE b/NAMESPACE
index 5d04491b..dff217d8 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -88,6 +88,7 @@ export(bq_perform_extract)
 export(bq_perform_load)
 export(bq_perform_query)
 export(bq_perform_query_dry_run)
+export(bq_perform_query_schema)
 export(bq_perform_upload)
 export(bq_project_datasets)
 export(bq_project_jobs)
diff --git a/R/bq-perform.R b/R/bq-perform.R
index ec005098..b9f1301b 100644
--- a/R/bq-perform.R
+++ b/R/bq-perform.R
@@ -340,21 +340,14 @@ bq_perform_query_dry_run <- function(query, billing,
                                      parameters = NULL,
                                      use_legacy_sql = FALSE) {
 
-  check_string(query)
-  check_string(billing)
-  check_bool(use_legacy_sql)
 
-  query <- list(
-    query = unbox(query),
-    useLegacySql = unbox(use_legacy_sql)
+  query <- bq_perform_query_data(
+    query = query,
+    billing = billing, 
+    default_dataset = default_dataset,
+    parameters = parameters, 
+    use_legacy_sql = use_legacy_sql
   )
-  if (!is.null(parameters)) {
-    parameters <- as_bq_params(parameters)
-    query$queryParameters <- as_json(parameters)
-  }
-  if (!is.null(default_dataset)) {
-    query$defaultDataset <- datasetReference(default_dataset)
-  }
 
   url <- bq_path(billing, jobs = "")
   body <- list(configuration = list(query = query, dryRun = unbox(TRUE)))
@@ -368,6 +361,60 @@ bq_perform_query_dry_run <- function(query, billing,
   structure(bytes, class = "bq_bytes")
 }
 
+#' @export
+#' @rdname api-perform
+bq_perform_query_schema <- function(query, billing,
+                                    ...,
+                                    default_dataset = NULL,
+                                    parameters = NULL) {
+
+  query <- bq_perform_query_data(
+    query = query,
+    billing = billing, 
+    default_dataset = default_dataset,
+    parameters = parameters, 
+    use_legacy_sql = FALSE
+  )
+
+  url <- bq_path(billing, jobs = "")
+  body <- list(configuration = list(query = query, dryRun = unbox(TRUE)))
+
+  res <- bq_post(
+    url,
+    body = bq_body(body, ...),
+    query = list(fields = "statistics")
+  )
+  # https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#TableSchema
+  res$statistics$query$schema$fields
+}
+
+bq_perform_query_data <- function(query, billing,
+                                  ...,
+                                  default_dataset = NULL,
+                                  parameters = NULL,
+                                  use_legacy_sql = FALSE, 
+                                  call = caller_env()) {
+  check_string(query, error_call = call)
+  check_string(billing, error_call = call)
+  check_bool(use_legacy_sql, error_call = call)
+
+  query <- list(
+    query = unbox(query),
+    useLegacySql = unbox(use_legacy_sql)
+  )
+  if (!is.null(parameters)) {
+    parameters <- as_bq_params(parameters)
+    query$queryParameters <- as_json(parameters)
+  }
+  if (!is.null(default_dataset)) {
+    query$defaultDataset <- datasetReference(default_dataset)
+  }
+
+  query
+}
+
+
+
 #' @export
 #' @rdname api-perform
 bq_perform_copy <- function(src, dest,
diff --git a/man/api-perform.Rd b/man/api-perform.Rd
index 3ee02a7b..821f101a 100644
--- a/man/api-perform.Rd
+++ b/man/api-perform.Rd
@@ -7,6 +7,7 @@
 \alias{bq_perform_load}
 \alias{bq_perform_query}
 \alias{bq_perform_query_dry_run}
+\alias{bq_perform_query_schema}
 \alias{bq_perform_copy}
 \title{BigQuery jobs: perform a job}
 \usage{
@@ -64,6 +65,14 @@ bq_perform_query_dry_run(
   use_legacy_sql = FALSE
 )
 
+bq_perform_query_schema(
+  query,
+  billing,
+  ...,
+  default_dataset = NULL,
+  parameters = NULL
+)
+
 bq_perform_copy(
   src,
   dest,
diff --git a/tests/testthat/test-bq-perform.R b/tests/testthat/test-bq-perform.R
index 3b804321..9ad0a06d 100644
--- a/tests/testthat/test-bq-perform.R
+++ b/tests/testthat/test-bq-perform.R
@@ -95,10 +95,20 @@ test_that("can supply array parameters", {
   expect_setequal(df$values, c("a", "b"))
 })
 
-test_that("can estimate cost", {
+test_that("can estimate cost and get schema", {
   cost <- bq_perform_query_dry_run(
     "SELECT count(*) FROM bigquery-public-data.moon_phases.moon_phases",
     billing = bq_test_project()
   )
   expect_equal(cost, structure(0, class = "bq_bytes"))
+
+  schema <- bq_perform_query_schema(
+    "SELECT * FROM bigquery-public-data.moon_phases.moon_phases",
+    billing = bq_test_project()
+  )
+  names <- vapply(schema, function(x) x$name, character(1))
+  expect_equal(names, c("phase", "phase_emoji", "peak_datetime"))
+
+  types <- vapply(schema, function(x) x$type, character(1))
+  expect_equal(types, c("STRING", "STRING", "DATETIME"))
 })

From 17c0d3a02337552d0c0e7bc2bc99017146c6b621 Mon Sep 17 00:00:00 2001
From: Hadley Wickham <h.wickham@gmail.com>
Date: Thu, 18 Apr 2024 16:37:54 -0500
Subject: [PATCH 18/33] Use `bq_perform_query_schema()` to get vars

---
 NEWS.md   |  5 +++++
 R/dplyr.R | 12 ++++++++++--
 2 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/NEWS.md b/NEWS.md
index fc5d66d5..914549b9 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,5 +1,10 @@
 # bigrquery (development version)
 
+* `tbl()` uses a more efficient method to determine variable names.
+
+* New `bq_perform_query_schema()` to determine the schema of a query
+  without executing it.
+
 * If the bigrquerystorage package is installed, `bq_table_download()` (and
   hence `collect()`, `dbGetQuery()` and `dbFetch()` will use it. This will 
   drastically improve the speed of downloading large datasets. A big thanks
diff --git a/R/dplyr.R b/R/dplyr.R
index 47a7b38e..de9cbcaa 100644
--- a/R/dplyr.R
+++ b/R/dplyr.R
@@ -48,10 +48,18 @@ src_bigquery <- function(project,
 tbl.BigQueryConnection <- function(src, from, ...) {
   src <- dbplyr::src_dbi(src, auto_disconnect = FALSE)
 
+  sql <- dbplyr::sql_query_fields(src$con, from)
+  dataset <- if (!is.null(src$con@dataset)) as_bq_dataset(src$con)
+  schema <- bq_perform_query_schema(sql, 
+    billing = src$con@billing,
+    default_dataset = dataset
+  )
+  vars <- map_chr(schema, "[[", "name")
+
   if (utils::packageVersion("dbplyr") >= "2.4.0.9000") {
-    tbl <- dplyr::tbl(src, from = from)
+    tbl <- dplyr::tbl(src, from = from, vars = vars)
   } else {
-    tbl <- dplyr::tbl(src, from = from, check_from = FALSE)
+    tbl <- dplyr::tbl(src, from = from, vars = vars, check_from = FALSE)
   }
 
   # This is ugly, but I don't see a better way of doing this

From f81a82b7b8fb893a133323e7fe463a7b21092773 Mon Sep 17 00:00:00 2001
From: Hadley Wickham <h.wickham@gmail.com>
Date: Fri, 19 Apr 2024 08:03:28 -0500
Subject: [PATCH 19/33] Add some basic type tests

---
 tests/testthat/test-bq-download.R | 48 +++++++++++++++++++++++++++++++
 1 file changed, 48 insertions(+)

diff --git a/tests/testthat/test-bq-download.R b/tests/testthat/test-bq-download.R
index 1921ebcd..2d98443b 100644
--- a/tests/testthat/test-bq-download.R
+++ b/tests/testthat/test-bq-download.R
@@ -73,6 +73,54 @@ test_that("uses arrow api if bigrquerystorage installed", {
   expect_equal(check_api(), "json")
 })
 
+test_that("can convert date time types", {
+  sql <- "SELECT
+    '\U0001f603' as unicode,
+    datetime,
+    TRUE as logicaltrue,
+    FALSE as logicalfalse,
+    CAST ('Hi' as BYTES) as bytes,
+    CAST (datetime as DATE) as date,
+    CAST (datetime as TIME) as time,
+    CAST (datetime as TIMESTAMP) as timestamp,
+    ST_GEOGFROMTEXT('POINT (30 10)') as geography
+    FROM (SELECT DATETIME '2000-01-02 03:04:05.67' as datetime)
+  "
+
+  tb <- bq_project_query(bq_test_project(), sql, quiet = TRUE)
+  df <- bq_table_download(tb, api = "arrow")
+
+  base <- ISOdatetime(2000, 1, 2, 3, 4, 5.67, tz = "UTC")
+  expect_identical(df$unicode, "\U0001f603", ignore_encoding = FALSE)
+
+  expect_equal(df$logicaltrue, TRUE)
+  expect_equal(df$logicalfalse, FALSE)
+
+  expect_equal(unclass(df$bytes), list(as.raw(c(0x48, 0x69))))
+
+  expect_equal(df$date, as.Date(base))
+  expect_equal(df$timestamp, base)
+  # expect_equal(df$datetime, base)
+  expect_equal(df$time, hms::hms(hours = 3, minutes = 4, seconds = 5.67))
+
+  # expect_identical(df$geography, wk::wkt("POINT(30 10)"))
+})
+
+test_that("the return type of integer columns is set by the bigint argument", {
+  x <- c("-2147483648", "-2147483647", "-1", "0", "1", "2147483647", "2147483648")
+  sql <- paste0("SELECT * FROM UNNEST ([", paste0(x, collapse = ","), "]) AS x");
+  qry <- bq_project_query(bq_test_project(), sql)
+
+  out_int64 <- bq_table_download(qry, bigint = "integer64", api = "arrow")$x
+  expect_identical(out_int64, bit64::as.integer64(x))
+
+  out_dbl <- bq_table_download(qry, bigint = "numeric", api = "arrow")$x
+  expect_identical(out_dbl, as.double(x))
+
+  out_chr <- bq_table_download(qry, bigint = "character", api = "arrow")$x
+  expect_identical(out_chr, x)
+})
+
 # helpers around row and chunk params ------------------------------------------
 
 test_that("set_row_params() works ", {

From 6043c82cfadd361a7bf971897c9680ff76763a4b Mon Sep 17 00:00:00 2001
From: Hadley Wickham <h.wickham@gmail.com>
Date: Fri, 19 Apr 2024 08:07:58 -0500
Subject: [PATCH 20/33] Polish docs some more

---
 R/bq-download.R          | 20 ++++++++++++++------
 man/bq_table_download.Rd | 21 +++++++++++++++------
 2 files changed, 29 insertions(+), 12 deletions(-)

diff --git a/R/bq-download.R b/R/bq-download.R
index 6d067470..effd39ef 100644
--- a/R/bq-download.R
+++ b/R/bq-download.R
@@ -2,12 +2,20 @@
 #'
 #' @description
 #' This function provides two ways to download data from BigQuery, transfering
-#' data using either JSON or arrow, depending on the `api` argument. JSON is 
-#' much slower but requires no additional dependencies, and is what bigrquery 
-#' used prior to version 1.6.0. The arrow method is much much faster, but 
-#' requires the bigrquerystorage, which in turn requires the arrow package.
-#' These dependencies are fairly heavy, and can be tricky to compile on Linux,
-#' but in our opinion the massive speedup is worth the effort.
+#' data using either JSON or arrow, depending on the `api` argument. 
+#' `api = "json"` is much slower but requires no additional dependencies, 
+#' and is what bigrquery always used prior to v1.6.0. `api = "arrow"` is 
+#' much much faster, but requires the bigrquerystorage package.
+#'
+#' ## Arrow API
+#'
+#' The arrow API is much faster, but has heavier dependencies: bigrquerystorage
+#' requires the arrow package, which can be tricky to compile on Linux (but in
+#' general you can get a binary from 
+#' [Posit Public Package Manager](https://posit.co/products/cloud/public-package-manager/).
+#' 
+#' Currently the only know limitation of `api = "arrow"` is that geographic
+#' data is returned as a string; you'll need to parse yourself using `wkt::wkt()`.
 #' 
 #' ## JSON API
 #' 
diff --git a/man/bq_table_download.Rd b/man/bq_table_download.Rd
index 0d6eb2af..aeac37e0 100644
--- a/man/bq_table_download.Rd
+++ b/man/bq_table_download.Rd
@@ -69,12 +69,21 @@ a tibble. If you need a \code{data.frame}, coerce the results with
 }
 \description{
 This function provides two ways to download data from BigQuery, transfering
-data using either JSON or arrow, depending on the \code{api} argument. JSON is
-much slower but requires no additional dependencies, and is what bigrquery
-used prior to version 1.6.0. The arrow method is much much faster, but
-requires the bigrquerystorage, which in turn requires the arrow package.
-These dependencies are fairly heavy, and can be tricky to compile on Linux,
-but in our opinion the massive speedup is worth the effort.
+data using either JSON or arrow, depending on the \code{api} argument.
+\code{api = "json"} is much slower but requires no additional dependencies,
+and is what bigrquery always used prior to v1.6.0. \code{api = "arrow"} is
+much much faster, but requires the bigrquerystorage package.
+\subsection{Arrow API}{
+
+The arrow API is much faster, but has heavier dependencies: bigrquerystorage
+requires the arrow package, which can be tricky to compile on Linux (but in
+general you can get a binary from
+\href{https://posit.co/products/cloud/public-package-manager/}{Posit Public Package Manager}.
+
+Currently the only know limitation of \code{api = "arrow"} is that geographic
+data is returned as a string; you'll need to parse yourself using \code{wkt::wkt()}.
+}
+
 \subsection{JSON API}{
 
 The JSON API retrieves rows in chunks of \code{page_size}. It is most suitable

From 55694bbc5ce9167102486856016180a738225e7e Mon Sep 17 00:00:00 2001
From: Hadley Wickham <h.wickham@gmail.com>
Date: Fri, 19 Apr 2024 08:22:07 -0500
Subject: [PATCH 21/33] Polishing tests

---
 R/bq-download.R                   | 11 +++++++----
 man/bq_table_download.Rd          |  8 ++++++--
 tests/testthat/test-bq-download.R | 22 ++++++++++++++++++++--
 3 files changed, 33 insertions(+), 8 deletions(-)

diff --git a/R/bq-download.R b/R/bq-download.R
index effd39ef..a0428618 100644
--- a/R/bq-download.R
+++ b/R/bq-download.R
@@ -10,12 +10,15 @@
 #' ## Arrow API
 #'
 #' The arrow API is much faster, but has heavier dependencies: bigrquerystorage
-#' requires the arrow package, which can be tricky to compile on Linux (but in
-#' general you can get a binary from 
+#' requires the arrow package, which can be tricky to compile on Linux (but you
+#' usually should be able to get a binary from 
 #' [Posit Public Package Manager](https://posit.co/products/cloud/public-package-manager/).
 #' 
-#' Currently the only know limitation of `api = "arrow"` is that geographic
-#' data is returned as a string; you'll need to parse yourself using `wkt::wkt()`.
+#' There are two known limitations of `api = "arrow"`:
+#'
+#' * Geographic data is returned as a string; you'll need to parse yourself 
+#'   using `wkt::wkt()`.
+#' * When querying public data, you'll now need to provide a `billing` project.
 #' 
 #' ## JSON API
 #' 
diff --git a/man/bq_table_download.Rd b/man/bq_table_download.Rd
index aeac37e0..201ced71 100644
--- a/man/bq_table_download.Rd
+++ b/man/bq_table_download.Rd
@@ -80,8 +80,12 @@ requires the arrow package, which can be tricky to compile on Linux (but in
 general you can get a binary from
 \href{https://posit.co/products/cloud/public-package-manager/}{Posit Public Package Manager}.
 
-Currently the only know limitation of \code{api = "arrow"} is that geographic
-data is returned as a string; you'll need to parse yourself using \code{wkt::wkt()}.
+There are two known limitations of \code{api = "arrow"}:
+\itemize{
+\item Geographic data is returned as a string; you'll need to parse yourself
+using \code{wkt::wkt()}.
+\item When querying public data, you'll now need to provide a \code{billing} project.
+}
 }
 
 \subsection{JSON API}{
diff --git a/tests/testthat/test-bq-download.R b/tests/testthat/test-bq-download.R
index 2d98443b..7b58fcff 100644
--- a/tests/testthat/test-bq-download.R
+++ b/tests/testthat/test-bq-download.R
@@ -73,7 +73,7 @@ test_that("uses arrow api if bigrquerystorage installed", {
   expect_equal(check_api(), "json")
 })
 
-test_that("can convert date time types", {
+test_that("arrow api can convert non-nested types", {
   sql <- "SELECT
     '\U0001f603' as unicode,
     datetime,
@@ -106,7 +106,25 @@ test_that("can convert date time types", {
   # expect_identical(df$geography, wk::wkt("POINT(30 10)"))
 })
 
-test_that("the return type of integer columns is set by the bigint argument", {
+test_that("arrow api can convert nested types", {
+  skip("https://github.com/meztez/bigrquerystorage/issues/54")
+  sql <- "SELECT
+    STRUCT(1.0 AS a, 'abc' AS b) as s,
+    [1.0, 2.0, 3.0] as a,
+    [STRUCT(1.0 as a, 'a' as b), STRUCT(2.0, 'b'), STRUCT(3, 'c')] as aos,
+    STRUCT([1.0, 2.0, 3.0] as a, ['a', 'b'] as b) as soa
+  "
+
+  tb <- bq_project_query(bq_test_project(), sql, quiet = TRUE)
+  df <- bq_table_download(tb, api = "arrow")
+
+  expect_equal(df$s, list(list(a = 1, b = "abc")))
+  expect_equal(df$a, list(c(1, 2, 3)))
+  expect_equal(df$aos, list(tibble(a = c(1, 2, 3), b = c("a", "b", "c"))))
+  expect_equal(df$soa, list(list(a = c(1, 2, 3), b = c("a", "b"))))
+})
+
+test_that("arrow api respects bigint", {
   x <- c("-2147483648", "-2147483647", "-1", "0", "1", "2147483647", "2147483648")
   sql <- paste0("SELECT * FROM UNNEST ([", paste0(x, collapse = ","), "]) AS x");
   qry <- bq_project_query(bq_test_project(), sql)

From f777d65bc50fe74ef12ccd9c782b26d8cd08b921 Mon Sep 17 00:00:00 2001
From: Hadley Wickham <h.wickham@gmail.com>
Date: Fri, 19 Apr 2024 08:27:53 -0500
Subject: [PATCH 22/33] Test & improve argument warnings

---
 R/bq-download.R                      | 15 ++++++++++++---
 tests/testthat/_snaps/bq-download.md | 13 +++++++++++++
 tests/testthat/test-bq-download.R    | 12 ++++++++++++
 3 files changed, 37 insertions(+), 3 deletions(-)

diff --git a/R/bq-download.R b/R/bq-download.R
index a0428618..4d6face5 100644
--- a/R/bq-download.R
+++ b/R/bq-download.R
@@ -104,13 +104,22 @@ bq_table_download <-
     if (api == "arrow") {
       check_installed("bigrquerystorage", "required to download using arrow API")
       if (!missing(page_size)) {
-        cli::cli_warn('{.arg page_size} is ignored when {.code api == "arrow"}')
+        cli::cli_warn(
+          '{.arg page_size} is ignored when {.code api == "arrow"}',
+          call = environment()
+        )
       }
       if (!missing(start_index)) {
-        cli::cli_warn('{.arg start_index} is ignored when {.code api == "arrow"}')
+        cli::cli_warn(
+          '{.arg start_index} is ignored when {.code api == "arrow"}',
+          call = environment()
+        )
       }
       if (!missing(max_connections)) {
-        cli::cli_warn('{.arg max_connections} is ignored when {.code api == "arrow"}')
+        cli::cli_warn(
+          '{.arg max_connections} is ignored when {.code api == "arrow"}',
+          call = environment()
+        )
       }
 
       return(bigrquerystorage::bqs_table_download(
diff --git a/tests/testthat/_snaps/bq-download.md b/tests/testthat/_snaps/bq-download.md
index b9496982..5d5d49a0 100644
--- a/tests/testthat/_snaps/bq-download.md
+++ b/tests/testthat/_snaps/bq-download.md
@@ -11,3 +11,16 @@
       x 35,000 rows were requested, but only {n} rows were received.
       i Leave `page_size` unspecified or use an even smaller value.
 
+# warns if supplying unnused arguments
+
+    Code
+      . <- bq_table_download(tb, api = "arrow", page_size = 1, start_index = 1,
+        max_connections = 1)
+    Condition
+      Warning in `bq_table_download()`:
+      `page_size` is ignored when `api == "arrow"`
+      Warning in `bq_table_download()`:
+      `start_index` is ignored when `api == "arrow"`
+      Warning in `bq_table_download()`:
+      `max_connections` is ignored when `api == "arrow"`
+
diff --git a/tests/testthat/test-bq-download.R b/tests/testthat/test-bq-download.R
index 7b58fcff..4520c6a1 100644
--- a/tests/testthat/test-bq-download.R
+++ b/tests/testthat/test-bq-download.R
@@ -73,6 +73,18 @@ test_that("uses arrow api if bigrquerystorage installed", {
   expect_equal(check_api(), "json")
 })
 
+test_that("warns if supplying unnused arguments", {
+  tb <- bq_project_query(bq_test_project(), "SELECT 1.0", quiet = TRUE)
+  expect_snapshot(
+    . <- bq_table_download(tb, 
+      api = "arrow",
+      page_size = 1,
+      start_index = 1,
+      max_connections = 1
+    )
+  )
+})
+
 test_that("arrow api can convert non-nested types", {
   sql <- "SELECT
     '\U0001f603' as unicode,

From 26104ba7486d32d5985dad426c288990bd3a78d6 Mon Sep 17 00:00:00 2001
From: Hadley Wickham <h.wickham@gmail.com>
Date: Fri, 19 Apr 2024 08:29:07 -0500
Subject: [PATCH 23/33] Drop R3.6 check since we're losing it soon anyway

---
 .github/workflows/R-CMD-check.yaml | 2 --
 1 file changed, 2 deletions(-)

diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml
index ee65ccb5..d9fced24 100644
--- a/.github/workflows/R-CMD-check.yaml
+++ b/.github/workflows/R-CMD-check.yaml
@@ -25,8 +25,6 @@ jobs:
           - {os: macos-latest,   r: 'release'}
 
           - {os: windows-latest, r: 'release'}
-          # Use 3.6 to trigger usage of RTools35
-          - {os: windows-latest, r: '3.6'}
           # use 4.1 to check with rtools40's older compiler
           - {os: windows-latest, r: '4.1'}
 

From 50b69c16968ad972b393eb3e32ed199d27c6ec91 Mon Sep 17 00:00:00 2001
From: Hadley Wickham <h.wickham@gmail.com>
Date: Fri, 19 Apr 2024 08:31:56 -0500
Subject: [PATCH 24/33] Re-document

---
 man/bq_table_download.Rd | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/man/bq_table_download.Rd b/man/bq_table_download.Rd
index 201ced71..b0d8eaaf 100644
--- a/man/bq_table_download.Rd
+++ b/man/bq_table_download.Rd
@@ -76,8 +76,8 @@ much much faster, but requires the bigrquerystorage package.
 \subsection{Arrow API}{
 
 The arrow API is much faster, but has heavier dependencies: bigrquerystorage
-requires the arrow package, which can be tricky to compile on Linux (but in
-general you can get a binary from
+requires the arrow package, which can be tricky to compile on Linux (but you
+usually should be able to get a binary from
 \href{https://posit.co/products/cloud/public-package-manager/}{Posit Public Package Manager}.
 
 There are two known limitations of \code{api = "arrow"}:

From 7f59af7e0aed21806636d910e4e583744c50ff82 Mon Sep 17 00:00:00 2001
From: Hadley Wickham <h.wickham@gmail.com>
Date: Thu, 19 Sep 2024 17:40:34 -0500
Subject: [PATCH 25/33] Can now use CRAN nanoparquet

---
 DESCRIPTION | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index a44b1746..f828baf0 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -29,7 +29,7 @@ Imports:
     prettyunits,
     rlang (>= 1.1.0),
     tibble,
-    nanoparquet (> 0.3.1)
+    nanoparquet (>= 0.3.1)
 Suggests:
     blob,
     covr,
@@ -41,8 +41,6 @@ Suggests:
     testthat (>= 3.1.5),
     wk (>= 0.3.2),
     withr
-Remotes:
-    r-lib/nanoparquet
 LinkingTo:
     cli,
     cpp11,

From 27c3dc7439f3ed4c5a9825a42ea09c6f0f54ef51 Mon Sep 17 00:00:00 2001
From: Hadley Wickham <h.wickham@gmail.com>
Date: Fri, 20 Sep 2024 07:32:08 -0500
Subject: [PATCH 26/33] Re-add accidentally dropped dep

---
 DESCRIPTION | 1 +
 1 file changed, 1 insertion(+)

diff --git a/DESCRIPTION b/DESCRIPTION
index f828baf0..06c17f43 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -31,6 +31,7 @@ Imports:
     tibble,
     nanoparquet (>= 0.3.1)
 Suggests:
+    bigrquerystorage (>= 1.1.0),
     blob,
     covr,
     dbplyr (>= 2.4.0),

From 120d5a5012bc13c71985de5b974eed338abece44 Mon Sep 17 00:00:00 2001
From: Hadley Wickham <h.wickham@gmail.com>
Date: Fri, 20 Sep 2024 07:34:38 -0500
Subject: [PATCH 27/33] Can use CRAN bigrquerystorage

---
 DESCRIPTION | 2 --
 1 file changed, 2 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 06c17f43..a061c365 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -83,5 +83,3 @@ Collate:
     'import-standalone-types-check.R'
     'utils.R'
     'zzz.R'
-Remotes:
-    meztez/bigrquerystorage#52

From 3858c5e6e793b4ba10709b3d59e16fb79aa5ece5 Mon Sep 17 00:00:00 2001
From: Hadley Wickham <h.wickham@gmail.com>
Date: Fri, 20 Sep 2024 07:35:56 -0500
Subject: [PATCH 28/33] Fix merge issue in NEWS

---
 NEWS.md | 15 ++++-----------
 1 file changed, 4 insertions(+), 11 deletions(-)

diff --git a/NEWS.md b/NEWS.md
index d9a0888e..8ecfd063 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,21 +1,14 @@
 # bigrquery (development version)
 
-* The `bq_perform_upload()` function now allows users to choose the transmission format (JSON or PARQUET) for data sent to BigQuery (@apalacio9502, #608).
-* bigrquery now requires R 4.0, in line with our version support principles.
-
-* The `bq_perform_upload()` function now allows users to choose the transmission format (JSON or PARQUET) for data sent to BigQuery (@apalacio9502, #608).
-* bigrquery now requires R 4.0, in line with our version support principles.
-
-* `tbl()` uses a more efficient method to determine variable names.
-
-* New `bq_perform_query_schema()` to determine the schema of a query
-  without executing it.
-
 * If the bigrquerystorage package is installed, `bq_table_download()` (and
   hence `collect()`, `dbGetQuery()` and `dbFetch()` will use it. This will
   drastically improve the speed of downloading large datasets. A big thanks
   to @meztez for creating the bigrquerystorage package!
 
+* The `bq_perform_upload()` function now allows users to choose the transmission format (JSON or PARQUET) for data sent to BigQuery (@apalacio9502, #608).
+
+* bigrquery now requires R 4.0, in line with our version support principles.
+
 # bigrquery 1.5.1
 
 * Forward compatibility with upcoming dbplyr release (#601).

From 9c513736cb42c1fc86a6234d9fcc8e2e9dafdf19 Mon Sep 17 00:00:00 2001
From: Hadley Wickham <h.wickham@gmail.com>
Date: Fri, 20 Sep 2024 07:41:47 -0500
Subject: [PATCH 29/33] Polish docs

---
 R/bq-download.R          | 30 +++++++++++++++---------------
 man/bq_table_download.Rd |  8 ++++----
 2 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/R/bq-download.R b/R/bq-download.R
index 4d6face5..0cc48c3d 100644
--- a/R/bq-download.R
+++ b/R/bq-download.R
@@ -2,30 +2,30 @@
 #'
 #' @description
 #' This function provides two ways to download data from BigQuery, transfering
-#' data using either JSON or arrow, depending on the `api` argument. 
-#' `api = "json"` is much slower but requires no additional dependencies, 
-#' and is what bigrquery always used prior to v1.6.0. `api = "arrow"` is 
-#' much much faster, but requires the bigrquerystorage package.
+#' data using either JSON or arrow, depending on the `api` argument. If
+#' bigrquerystorage is installed, `api = "arrow"` will be used (because it's
+#' so much faster, but see the limitions below), otherwise you can select
+#' deliberately by using `api = "json"` or `api = "arrow"`.
 #'
 #' ## Arrow API
 #'
 #' The arrow API is much faster, but has heavier dependencies: bigrquerystorage
 #' requires the arrow package, which can be tricky to compile on Linux (but you
-#' usually should be able to get a binary from 
+#' usually should be able to get a binary from
 #' [Posit Public Package Manager](https://posit.co/products/cloud/public-package-manager/).
-#' 
+#'
 #' There are two known limitations of `api = "arrow"`:
 #'
-#' * Geographic data is returned as a string; you'll need to parse yourself 
+#' * Geographic data is returned as a string; you'll need to parse yourself
 #'   using `wkt::wkt()`.
 #' * When querying public data, you'll now need to provide a `billing` project.
-#' 
+#'
 #' ## JSON API
-#' 
-#' The JSON API retrieves rows in chunks of `page_size`. It is most suitable 
+#'
+#' The JSON API retrieves rows in chunks of `page_size`. It is most suitable
 #' for results of smaller queries (<100 MB, say). Unfortunately due to
-#' limitations in the BigQuery API, you may need to vary this parameter 
-#' depending on the complexity of the underlying data. 
+#' limitations in the BigQuery API, you may need to vary this parameter
+#' depending on the complexity of the underlying data.
 #'
 #' The JSON API will convert nested and repeated columns in to list-columns
 #' as follows:
@@ -41,8 +41,8 @@
 #' @param x A [bq_table]
 #' @param n_max Maximum number of results to retrieve. Use `Inf` to retrieve all
 #'   rows.
-#' @param page_size (JSON only) The number of rows requested per chunk. It is 
-#'   recommended to leave this unspecified until you have evidence that the 
+#' @param page_size (JSON only) The number of rows requested per chunk. It is
+#'   recommended to leave this unspecified until you have evidence that the
 #'  `page_size` selected automatically by `bq_table_download()` is problematic.
 #'
 #'   When `page_size = NULL` bigrquery determines a conservative, natural chunk
@@ -50,7 +50,7 @@
 #'   chunk fits on one page, i.e. that the requested row limit is low enough to
 #'   prevent the API from paginating based on response size.
 #' @param start_index (JSON only) Starting row index (zero-based).
-#' @param max_connections (JSON only) Number of maximum simultaneous 
+#' @param max_connections (JSON only) Number of maximum simultaneous
 #'   connections to BigQuery servers.
 #' @param api Which API to use? The `"json"` API works where ever bigrquery
 #'   does, but is slow and can require fiddling with the `page_size` parameter.
diff --git a/man/bq_table_download.Rd b/man/bq_table_download.Rd
index b0d8eaaf..d4972994 100644
--- a/man/bq_table_download.Rd
+++ b/man/bq_table_download.Rd
@@ -69,10 +69,10 @@ a tibble. If you need a \code{data.frame}, coerce the results with
 }
 \description{
 This function provides two ways to download data from BigQuery, transfering
-data using either JSON or arrow, depending on the \code{api} argument.
-\code{api = "json"} is much slower but requires no additional dependencies,
-and is what bigrquery always used prior to v1.6.0. \code{api = "arrow"} is
-much much faster, but requires the bigrquerystorage package.
+data using either JSON or arrow, depending on the \code{api} argument. If
+bigrquerystorage is installed, \code{api = "arrow"} will be used (because it's
+so much faster, but see the limitions below), otherwise you can select
+deliberately by using \code{api = "json"} or \code{api = "arrow"}.
 \subsection{Arrow API}{
 
 The arrow API is much faster, but has heavier dependencies: bigrquerystorage

From 9e6eceee7bd9fb13ebf1b939d039b578769a4c21 Mon Sep 17 00:00:00 2001
From: Hadley Wickham <h.wickham@gmail.com>
Date: Fri, 20 Sep 2024 10:34:01 -0500
Subject: [PATCH 30/33] Switch back to dev version

---
 DESCRIPTION              | 4 +++-
 R/bq-download.R          | 7 ++-----
 man/bq_table_download.Rd | 8 ++------
 3 files changed, 7 insertions(+), 12 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index a061c365..34bfa4ba 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -31,7 +31,7 @@ Imports:
     tibble,
     nanoparquet (>= 0.3.1)
 Suggests:
-    bigrquerystorage (>= 1.1.0),
+    bigrquerystorage (>= 1.1.0.9000),
     blob,
     covr,
     dbplyr (>= 2.4.0),
@@ -83,3 +83,5 @@ Collate:
     'import-standalone-types-check.R'
     'utils.R'
     'zzz.R'
+Remotes:
+    meztez/bigrquerystorage
diff --git a/R/bq-download.R b/R/bq-download.R
index 0cc48c3d..14d12a66 100644
--- a/R/bq-download.R
+++ b/R/bq-download.R
@@ -14,11 +14,8 @@
 #' usually should be able to get a binary from
 #' [Posit Public Package Manager](https://posit.co/products/cloud/public-package-manager/).
 #'
-#' There are two known limitations of `api = "arrow"`:
-#'
-#' * Geographic data is returned as a string; you'll need to parse yourself
-#'   using `wkt::wkt()`.
-#' * When querying public data, you'll now need to provide a `billing` project.
+#' There's one known limitation of `api = "arrow"`: when querying public data,
+#' you'll now need to provide a `billing` project.
 #'
 #' ## JSON API
 #'
diff --git a/man/bq_table_download.Rd b/man/bq_table_download.Rd
index d4972994..939f8780 100644
--- a/man/bq_table_download.Rd
+++ b/man/bq_table_download.Rd
@@ -80,12 +80,8 @@ requires the arrow package, which can be tricky to compile on Linux (but you
 usually should be able to get a binary from
 \href{https://posit.co/products/cloud/public-package-manager/}{Posit Public Package Manager}.
 
-There are two known limitations of \code{api = "arrow"}:
-\itemize{
-\item Geographic data is returned as a string; you'll need to parse yourself
-using \code{wkt::wkt()}.
-\item When querying public data, you'll now need to provide a \code{billing} project.
-}
+There's one known limitation of \code{api = "arrow"}: when querying public data,
+you'll now need to provide a \code{billing} project.
 }
 
 \subsection{JSON API}{

From 97e87abc8b02655859b66278b6816cf2e5a716ae Mon Sep 17 00:00:00 2001
From: Hadley Wickham <h.wickham@gmail.com>
Date: Fri, 20 Sep 2024 10:40:17 -0500
Subject: [PATCH 31/33] Restore more types supported by dev bigrquerystorage

---
 tests/testthat/test-bq-download.R | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/testthat/test-bq-download.R b/tests/testthat/test-bq-download.R
index 4520c6a1..815d09b9 100644
--- a/tests/testthat/test-bq-download.R
+++ b/tests/testthat/test-bq-download.R
@@ -76,7 +76,7 @@ test_that("uses arrow api if bigrquerystorage installed", {
 test_that("warns if supplying unnused arguments", {
   tb <- bq_project_query(bq_test_project(), "SELECT 1.0", quiet = TRUE)
   expect_snapshot(
-    . <- bq_table_download(tb, 
+    . <- bq_table_download(tb,
       api = "arrow",
       page_size = 1,
       start_index = 1,
@@ -108,14 +108,14 @@ test_that("arrow api can convert non-nested types", {
   expect_equal(df$logicaltrue, TRUE)
   expect_equal(df$logicalfalse, FALSE)
 
-  expect_equal(unclass(df$bytes), list(as.raw(c(0x48, 0x69))))
+  expect_equal(df$bytes, blob::as.blob(as.raw(c(0x48, 0x69))))
 
   expect_equal(df$date, as.Date(base))
   expect_equal(df$timestamp, base)
-  # expect_equal(df$datetime, base)
+  expect_equal(df$datetime, base)
   expect_equal(df$time, hms::hms(hours = 3, minutes = 4, seconds = 5.67))
 
-  # expect_identical(df$geography, wk::wkt("POINT(30 10)"))
+  expect_identical(df$geography, wk::wkt("POINT(30 10)"))
 })
 
 test_that("arrow api can convert nested types", {

From 1446d9fd11334d562653e872d680dec57fd92b51 Mon Sep 17 00:00:00 2001
From: Hadley Wickham <h.wickham@gmail.com>
Date: Fri, 20 Sep 2024 11:58:11 -0500
Subject: [PATCH 32/33] Improve tests

---
 R/bq-perform.R                    | 11 ++++-------
 tests/testthat/test-bq-download.R | 10 +++++-----
 tests/testthat/test-dplyr.R       | 17 ++++++++++++++---
 3 files changed, 23 insertions(+), 15 deletions(-)

diff --git a/R/bq-perform.R b/R/bq-perform.R
index 92b486e2..203d97a6 100644
--- a/R/bq-perform.R
+++ b/R/bq-perform.R
@@ -361,9 +361,8 @@ bq_perform_query_dry_run <- function(query, billing,
 
   query <- bq_perform_query_data(
     query = query,
-    billing = billing, 
     default_dataset = default_dataset,
-    parameters = parameters, 
+    parameters = parameters,
     use_legacy_sql = use_legacy_sql
   )
 
@@ -388,9 +387,8 @@ bq_perform_query_schema <- function(query, billing,
 
   query <- bq_perform_query_data(
     query = query,
-    billing = billing, 
     default_dataset = default_dataset,
-    parameters = parameters, 
+    parameters = parameters,
     use_legacy_sql = FALSE
   )
 
@@ -406,14 +404,13 @@ bq_perform_query_schema <- function(query, billing,
   res$statistics$query$schema$fields
 }
 
-bq_perform_query_data <- function(query, billing,
+bq_perform_query_data <- function(query,
                                   ...,
                                   default_dataset = NULL,
                                   parameters = NULL,
-                                  use_legacy_sql = FALSE, 
+                                  use_legacy_sql = FALSE,
                                   call = caller_env()) {
   check_string(query, error_call = call)
-  check_string(billing, error_call = call)
   check_bool(use_legacy_sql, error_call = call)
 
   query <- list(
diff --git a/tests/testthat/test-bq-download.R b/tests/testthat/test-bq-download.R
index 815d09b9..9d07c660 100644
--- a/tests/testthat/test-bq-download.R
+++ b/tests/testthat/test-bq-download.R
@@ -100,7 +100,7 @@ test_that("arrow api can convert non-nested types", {
   "
 
   tb <- bq_project_query(bq_test_project(), sql, quiet = TRUE)
-  df <- bq_table_download(tb, api = "arrow")
+  df <- bq_table_download(tb, api = "arrow", quiet = TRUE)
 
   base <- ISOdatetime(2000, 1, 2, 3, 4, 5.67, tz = "UTC")
   expect_identical(df$unicode, "\U0001f603", ignore_encoding = FALSE)
@@ -128,7 +128,7 @@ test_that("arrow api can convert nested types", {
   "
 
   tb <- bq_project_query(bq_test_project(), sql, quiet = TRUE)
-  df <- bq_table_download(tb, api = "arrow")
+  df <- bq_table_download(tb, api = "arrow", quiet = TRUE)
 
   expect_equal(df$s, list(list(a = 1, b = "abc")))
   expect_equal(df$a, list(c(1, 2, 3)))
@@ -141,13 +141,13 @@ test_that("arrow api respects bigint", {
   sql <- paste0("SELECT * FROM UNNEST ([", paste0(x, collapse = ","), "]) AS x");
   qry <- bq_project_query(bq_test_project(), sql)
 
-  out_int64 <- bq_table_download(qry, bigint = "integer64", api = "arrow")$x
+  out_int64 <- bq_table_download(qry, bigint = "integer64", api = "arrow", quiet = TRUE)$x
   expect_identical(out_int64, bit64::as.integer64(x))
 
-  out_dbl <- bq_table_download(qry, bigint = "numeric", api = "arrow")$x
+  out_dbl <- bq_table_download(qry, bigint = "numeric", api = "arrow", quiet = TRUE)$x
   expect_identical(out_dbl, as.double(x))
 
-  out_chr <- bq_table_download(qry, bigint = "character", api = "arrow")$x
+  out_chr <- bq_table_download(qry, bigint = "character", api = "arrow", quiet = TRUE)$x
   expect_identical(out_chr, x)
 })
 
diff --git a/tests/testthat/test-dplyr.R b/tests/testthat/test-dplyr.R
index 8b0c5774..01bfeeed 100644
--- a/tests/testthat/test-dplyr.R
+++ b/tests/testthat/test-dplyr.R
@@ -21,14 +21,25 @@ test_that("can work with literal SQL", {
 })
 
 test_that("can work with nested table identifier", {
-  con_us <- DBI::dbConnect(
+  con1 <- DBI::dbConnect(
     bigquery(),
     project = "bigquery-public-data",
     billing = bq_test_project()
   )
+  # As far as I can tell from the BigQuery API there's no way to provide
+  # a default project; you can either provide a default dataset + project or
+  # nothing
+  table_name <- I("bigquery-public-data.utility_us.country_code_iso")
+  expect_no_error(dplyr::collect(head(dplyr::tbl(con1, table_name))))
 
-  expect_s3_class(dplyr::collect(head(dplyr::tbl(con_us, I("utility_us.country_code_iso")))), "tbl_df")
-  expect_error(dplyr::collect(head(dplyr::tbl(con_us, "utility_us.country_code_iso"))), "tbl_df")
+
+  con2 <- DBI::dbConnect(
+    bigquery(),
+    project = "bigquery-public-data",
+    dataset = "utility_us",
+    billing = bq_test_project(),
+  )
+  expect_no_error(dplyr::collect(head(dplyr::tbl(con2, "country_code_iso"))))
 })
 
 test_that("can copy_to", {

From fcfb00cf2480075cd1ddb3131eea6f7006e1f16d Mon Sep 17 00:00:00 2001
From: Hadley Wickham <h.wickham@gmail.com>
Date: Fri, 20 Sep 2024 15:48:49 -0500
Subject: [PATCH 33/33] Use correct function name

---
 tests/testthat/test-bq-download.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/testthat/test-bq-download.R b/tests/testthat/test-bq-download.R
index 9d07c660..b7ee579a 100644
--- a/tests/testthat/test-bq-download.R
+++ b/tests/testthat/test-bq-download.R
@@ -108,7 +108,7 @@ test_that("arrow api can convert non-nested types", {
   expect_equal(df$logicaltrue, TRUE)
   expect_equal(df$logicalfalse, FALSE)
 
-  expect_equal(df$bytes, blob::as.blob(as.raw(c(0x48, 0x69))))
+  expect_equal(df$bytes, blob::as_blob(as.raw(c(0x48, 0x69))))
 
   expect_equal(df$date, as.Date(base))
   expect_equal(df$timestamp, base)