From 3624e6d68d911f83af28e406fb74c76b4a9cfb2e Mon Sep 17 00:00:00 2001
From: Nic Crane <thisisnic@gmail.com>
Date: Wed, 26 Apr 2023 10:12:54 +0100
Subject: [PATCH] GH-35131: [R] Test failure with dev waldo (#35308)

This PR fixes the tests failing due to the dev version of the waldo package being more strict comparing NaN and NA_real_ values.  (n.b. our CI doesn't yet use the dev version of waldo, so this PR should be tested locally to verify tests pass).
* Closes: #35131

Authored-by: Nic Crane <thisisnic@gmail.com>
Signed-off-by: Nic Crane <thisisnic@gmail.com>
---
 r/tests/testthat/test-compute-sort.R          | 17 ++++++---
 .../testthat/test-dplyr-funcs-conditional.R   | 37 ++++++++++++++-----
 2 files changed, 39 insertions(+), 15 deletions(-)

diff --git a/r/tests/testthat/test-compute-sort.R b/r/tests/testthat/test-compute-sort.R
index f521efeddc54a..ba3039c3313a9 100644
--- a/r/tests/testthat/test-compute-sort.R
+++ b/r/tests/testthat/test-compute-sort.R
@@ -108,29 +108,34 @@ test_that("sort(vector), sort(Array), sort(ChunkedArray) give equivalent results
 })
 
 test_that("sort(vector), sort(Array), sort(ChunkedArray) give equivalent results on floats", {
+
+  test_vec <- tbl$dbl
+  # Arrow sorts NA and NaN differently, but it's not important, so eliminate here
+  test_vec[is.nan(test_vec)] <- NA_real_
+
   compare_expression(
     sort(.input, decreasing = TRUE, na.last = TRUE),
-    tbl$dbl
+    test_vec
   )
   compare_expression(
     sort(.input, decreasing = FALSE, na.last = TRUE),
-    tbl$dbl
+    test_vec
   )
   compare_expression(
     sort(.input, decreasing = TRUE, na.last = NA),
-    tbl$dbl
+    test_vec
   )
   compare_expression(
     sort(.input, decreasing = TRUE, na.last = FALSE),
-    tbl$dbl,
+    test_vec,
   )
   compare_expression(
     sort(.input, decreasing = FALSE, na.last = NA),
-    tbl$dbl
+    test_vec
   )
   compare_expression(
     sort(.input, decreasing = FALSE, na.last = FALSE),
-    tbl$dbl,
+    test_vec,
   )
 })
 
diff --git a/r/tests/testthat/test-dplyr-funcs-conditional.R b/r/tests/testthat/test-dplyr-funcs-conditional.R
index 85d21b73226be..b3d86da8b41c9 100644
--- a/r/tests/testthat/test-dplyr-funcs-conditional.R
+++ b/r/tests/testthat/test-dplyr-funcs-conditional.R
@@ -377,8 +377,11 @@ test_that("coalesce()", {
     y = c(NA_real_, 2.2, 3.3),
     z = c(1.1, 2.2, 3.3)
   )
-  compare_dplyr_binding(
-    .input %>%
+
+  # we can't use compare_dplyr_binding here as dplyr silently converts NaN to NA in coalesce()
+  # see https://github.com/tidyverse/dplyr/issues/6833
+  expect_identical(
+    arrow_table(df) %>%
       mutate(
         cw = coalesce(w),
         cz = coalesce(z),
@@ -387,21 +390,29 @@ test_that("coalesce()", {
         cwxyz = coalesce(w, x, y, z)
       ) %>%
       collect(),
-    df
+    mutate(
+      df,
+      cw = c(NA, NaN, NA),
+      cz = c(1.1, 2.2, 3.3),
+      cwx = c(NA, NaN, 3.3),
+      cwxy = c(NA, 2.2, 3.3),
+      cwxyz = c(1.1, 2.2, 3.3)
+    )
   )
+
   # NaNs stay NaN and are not converted to NA in the results
   # (testing this requires expect_identical())
   expect_identical(
     df %>% Table$create() %>% mutate(cwx = coalesce(w, x)) %>% collect(),
-    df %>% mutate(cwx = coalesce(w, x))
+    df %>% mutate(cwx = c(NA, NaN, 3.3))
   )
   expect_identical(
     df %>% Table$create() %>% transmute(cw = coalesce(w)) %>% collect(),
-    df %>% transmute(cw = coalesce(w))
+    df %>% transmute(cw = w)
   )
   expect_identical(
     df %>% Table$create() %>% transmute(cn = coalesce(NaN)) %>% collect(),
-    df %>% transmute(cn = coalesce(NaN))
+    df %>% transmute(cn = NaN)
   )
   # singles stay single
   expect_equal(
@@ -418,8 +429,8 @@ test_that("coalesce()", {
     float32()
   )
   # with R literal values
-  compare_dplyr_binding(
-    .input %>%
+  expect_identical(
+    arrow_table(df) %>%
       mutate(
         c1 = coalesce(4.4),
         c2 = coalesce(NA_real_),
@@ -429,7 +440,15 @@ test_that("coalesce()", {
         c6 = coalesce(w, x, y, NaN)
       ) %>%
       collect(),
-    df
+    mutate(
+      df,
+      c1 = 4.4,
+      c2 = NA_real_,
+      c3 = NaN,
+      c4 = c(5.5, 2.2, 3.3),
+      c5 = c(NA, 2.2, 3.3),
+      c6 = c(NaN, 2.2, 3.3)
+    )
   )
 
   # no arguments