From 7889c0c20c38b9405cfea1eca2098df1f17385ad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20Moldovan-Gr=C3=BCnfeld?= Date: Mon, 11 Apr 2022 12:15:04 -0500 Subject: [PATCH] ARROW-14442: [R] fix behaviour when converting timestamps with "" as tzone MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes #12240 from dragosmg/timestampts_missing_timezone Authored-by: Dragoș Moldovan-Grünfeld Signed-off-by: Jonathan Keane --- r/src/type_infer.cpp | 6 +++-- r/tests/testthat/test-Array.R | 27 +++++++++++++++++--- r/tests/testthat/test-dplyr-funcs-datetime.R | 4 --- 3 files changed, 28 insertions(+), 9 deletions(-) diff --git a/r/src/type_infer.cpp b/r/src/type_infer.cpp index 2568757aa2d83..75b1e85c426fe 100644 --- a/r/src/type_infer.cpp +++ b/r/src/type_infer.cpp @@ -72,7 +72,8 @@ std::shared_ptr InferArrowTypeFromVector(SEXP x) { } else if (Rf_inherits(x, "POSIXct")) { auto tzone_sexp = Rf_getAttrib(x, symbols::tzone); if (Rf_isNull(tzone_sexp)) { - return timestamp(TimeUnit::MICRO); + auto systzone_sexp = cpp11::package("base")["Sys.timezone"]; + return timestamp(TimeUnit::MICRO, CHAR(STRING_ELT(systzone_sexp(), 0))); } else { return timestamp(TimeUnit::MICRO, CHAR(STRING_ELT(tzone_sexp, 0))); } @@ -88,7 +89,8 @@ std::shared_ptr InferArrowTypeFromVector(SEXP x) { if (Rf_inherits(x, "POSIXct")) { auto tzone_sexp = Rf_getAttrib(x, symbols::tzone); if (Rf_isNull(tzone_sexp)) { - return timestamp(TimeUnit::MICRO); + auto systzone_sexp = cpp11::package("base")["Sys.timezone"]; + return timestamp(TimeUnit::MICRO, CHAR(STRING_ELT(systzone_sexp(), 0))); } else { return timestamp(TimeUnit::MICRO, CHAR(STRING_ELT(tzone_sexp, 0))); } diff --git a/r/tests/testthat/test-Array.R b/r/tests/testthat/test-Array.R index 15d6d79247a43..2f75efb3d6701 100644 --- a/r/tests/testthat/test-Array.R +++ b/r/tests/testthat/test-Array.R @@ -260,11 +260,11 @@ test_that("array supports POSIXct (ARROW-3340)", { expect_array_roundtrip(times2, timestamp("us", "US/Eastern")) }) -test_that("array supports POSIXct without timezone", { - # Make sure timezone is not set +test_that("array uses local timezone for POSIXct without timezone", { withr::with_envvar(c(TZ = ""), { times <- strptime("2019-02-03 12:34:56", format = "%Y-%m-%d %H:%M:%S") + 1:10 - expect_array_roundtrip(times, timestamp("us", "")) + expect_equal(attr(times, "tzone"), NULL) + expect_array_roundtrip(times, timestamp("us", Sys.timezone())) # Also test the INTSXP code path skip("Ingest_POSIXct only implemented for REALSXP") @@ -272,6 +272,27 @@ test_that("array supports POSIXct without timezone", { attributes(times_int) <- attributes(times) expect_array_roundtrip(times_int, timestamp("us", "")) }) + + # If there is a timezone set, we record that + withr::with_timezone("Pacific/Marquesas", { + times <- strptime("2019-02-03 12:34:56", format = "%Y-%m-%d %H:%M:%S") + 1:10 + expect_equal(attr(times, "tzone"), "Pacific/Marquesas") + expect_array_roundtrip(times, timestamp("us", "Pacific/Marquesas")) + + times_with_tz <- strptime( + "2019-02-03 12:34:56", + format = "%Y-%m-%d %H:%M:%S", + tz = "Asia/Katmandu") + 1:10 + expect_equal(attr(times, "tzone"), "Asia/Katmandu") + expect_array_roundtrip(times, timestamp("us", "Asia/Katmandu")) + }) + + # and although the TZ is NULL in R, we set it to the Sys.timezone() + withr::with_timezone(NA, { + times <- strptime("2019-02-03 12:34:56", format = "%Y-%m-%d %H:%M:%S") + 1:10 + expect_equal(attr(times, "tzone"), NULL) + expect_array_roundtrip(times, timestamp("us", Sys.timezone())) + }) }) test_that("Timezone handling in Arrow roundtrip (ARROW-3543)", { diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R b/r/tests/testthat/test-dplyr-funcs-datetime.R index 16e4958f1ccb7..c901742f65b2e 100644 --- a/r/tests/testthat/test-dplyr-funcs-datetime.R +++ b/r/tests/testthat/test-dplyr-funcs-datetime.R @@ -693,7 +693,6 @@ test_that("extract yday from date", { }) test_that("leap_year mirror lubridate", { - compare_dplyr_binding( .input %>% mutate(x = leap_year(date)) %>% @@ -721,7 +720,6 @@ test_that("leap_year mirror lubridate", { )) ) ) - }) test_that("am/pm mirror lubridate", { @@ -741,10 +739,8 @@ test_that("am/pm mirror lubridate", { ), format = "%Y-%m-%d %H:%M:%S" ) - ) ) - }) test_that("extract tz", {