From 3624e6d68d911f83af28e406fb74c76b4a9cfb2e Mon Sep 17 00:00:00 2001 From: Nic Crane Date: Wed, 26 Apr 2023 10:12:54 +0100 Subject: [PATCH] GH-35131: [R] Test failure with dev waldo (#35308) This PR fixes the tests failing due to the dev version of the waldo package being more strict comparing NaN and NA_real_ values. (n.b. our CI doesn't yet use the dev version of waldo, so this PR should be tested locally to verify tests pass). * Closes: #35131 Authored-by: Nic Crane Signed-off-by: Nic Crane --- r/tests/testthat/test-compute-sort.R | 17 ++++++--- .../testthat/test-dplyr-funcs-conditional.R | 37 ++++++++++++++----- 2 files changed, 39 insertions(+), 15 deletions(-) diff --git a/r/tests/testthat/test-compute-sort.R b/r/tests/testthat/test-compute-sort.R index f521efeddc54a..ba3039c3313a9 100644 --- a/r/tests/testthat/test-compute-sort.R +++ b/r/tests/testthat/test-compute-sort.R @@ -108,29 +108,34 @@ test_that("sort(vector), sort(Array), sort(ChunkedArray) give equivalent results }) test_that("sort(vector), sort(Array), sort(ChunkedArray) give equivalent results on floats", { + + test_vec <- tbl$dbl + # Arrow sorts NA and NaN differently, but it's not important, so eliminate here + test_vec[is.nan(test_vec)] <- NA_real_ + compare_expression( sort(.input, decreasing = TRUE, na.last = TRUE), - tbl$dbl + test_vec ) compare_expression( sort(.input, decreasing = FALSE, na.last = TRUE), - tbl$dbl + test_vec ) compare_expression( sort(.input, decreasing = TRUE, na.last = NA), - tbl$dbl + test_vec ) compare_expression( sort(.input, decreasing = TRUE, na.last = FALSE), - tbl$dbl, + test_vec, ) compare_expression( sort(.input, decreasing = FALSE, na.last = NA), - tbl$dbl + test_vec ) compare_expression( sort(.input, decreasing = FALSE, na.last = FALSE), - tbl$dbl, + test_vec, ) }) diff --git a/r/tests/testthat/test-dplyr-funcs-conditional.R b/r/tests/testthat/test-dplyr-funcs-conditional.R index 85d21b73226be..b3d86da8b41c9 100644 --- a/r/tests/testthat/test-dplyr-funcs-conditional.R +++ b/r/tests/testthat/test-dplyr-funcs-conditional.R @@ -377,8 +377,11 @@ test_that("coalesce()", { y = c(NA_real_, 2.2, 3.3), z = c(1.1, 2.2, 3.3) ) - compare_dplyr_binding( - .input %>% + + # we can't use compare_dplyr_binding here as dplyr silently converts NaN to NA in coalesce() + # see https://github.com/tidyverse/dplyr/issues/6833 + expect_identical( + arrow_table(df) %>% mutate( cw = coalesce(w), cz = coalesce(z), @@ -387,21 +390,29 @@ test_that("coalesce()", { cwxyz = coalesce(w, x, y, z) ) %>% collect(), - df + mutate( + df, + cw = c(NA, NaN, NA), + cz = c(1.1, 2.2, 3.3), + cwx = c(NA, NaN, 3.3), + cwxy = c(NA, 2.2, 3.3), + cwxyz = c(1.1, 2.2, 3.3) + ) ) + # NaNs stay NaN and are not converted to NA in the results # (testing this requires expect_identical()) expect_identical( df %>% Table$create() %>% mutate(cwx = coalesce(w, x)) %>% collect(), - df %>% mutate(cwx = coalesce(w, x)) + df %>% mutate(cwx = c(NA, NaN, 3.3)) ) expect_identical( df %>% Table$create() %>% transmute(cw = coalesce(w)) %>% collect(), - df %>% transmute(cw = coalesce(w)) + df %>% transmute(cw = w) ) expect_identical( df %>% Table$create() %>% transmute(cn = coalesce(NaN)) %>% collect(), - df %>% transmute(cn = coalesce(NaN)) + df %>% transmute(cn = NaN) ) # singles stay single expect_equal( @@ -418,8 +429,8 @@ test_that("coalesce()", { float32() ) # with R literal values - compare_dplyr_binding( - .input %>% + expect_identical( + arrow_table(df) %>% mutate( c1 = coalesce(4.4), c2 = coalesce(NA_real_), @@ -429,7 +440,15 @@ test_that("coalesce()", { c6 = coalesce(w, x, y, NaN) ) %>% collect(), - df + mutate( + df, + c1 = 4.4, + c2 = NA_real_, + c3 = NaN, + c4 = c(5.5, 2.2, 3.3), + c5 = c(NA, 2.2, 3.3), + c6 = c(NaN, 2.2, 3.3) + ) ) # no arguments