Fix rounding to zero error in stod on very small float numbers (#10672)

Fixes a rounding error on extremely small floating-point numbers in the range `1E-287 - 1E-307`. These values were incorrectly being rounded to zero due to the fix in #10622. The extra float operation removed in #10622 is necessary for values in this range to keep them from being converted to zero. The fix adds a check so the extra floating point operation is only used when the overall exponent falls below `std::numeric_limits<double>::min_exponent10` (which is `-307`). The `ToFloat64` gtest was also updated to include value in this range to ensure this error does not occur again. Additionally, the conversion now supports subnormal numbers that are very very small in the range of E-307 and E-324. Authors: - David Wendt (https://github.com/davidwendt) Approvers: - Bradley Dice (https://github.com/bdice) - Yunsong Wang (https://github.com/PointKernel) - Mike Wilson (https://github.com/hyperbolic2346) URL: #10672
rapidsai · Apr 23, 2022 · 5264f95 · 5264f95
1 parent d6e3068
commit 5264f95
Show file tree

Hide file tree

Showing 2 changed files with 21 additions and 6 deletions.
diff --git a/cpp/src/strings/convert/convert_floats.cu b/cpp/src/strings/convert/convert_floats.cu
@@ -124,16 +124,27 @@ __device__ inline double stod(string_view const& d_str)
   exp_ten *= exp_sign;
   exp_ten += exp_off;
   exp_ten += num_digits - 1;
-  if (exp_ten > std::numeric_limits<double>::max_exponent10)
+  if (exp_ten > std::numeric_limits<double>::max_exponent10) {
     return sign > 0 ? std::numeric_limits<double>::infinity()
                     : -std::numeric_limits<double>::infinity();
-  else if (exp_ten < std::numeric_limits<double>::min_exponent10)
-    return double{0};
+  }
+
+  double base = sign * static_cast<double>(digits);
 
   exp_ten += 1 - num_digits;
-  // exp10() is faster than pow(10.0,exp_ten)
+  // If 10^exp_ten would result in a subnormal value, the base and
+  // exponent should be adjusted so that 10^exp_ten is a normal value
+  auto const subnormal_shift = std::numeric_limits<double>::min_exponent10 - exp_ten;
+  if (subnormal_shift > 0) {
+    // Handle subnormal values. Ensure that both base and exponent are
+    // normal values before computing their product.
+    base = base / exp10(static_cast<double>(num_digits - 1 + subnormal_shift));
+    exp_ten += num_digits - 1;  // adjust exponent
+    auto const exponent = exp10(static_cast<double>(exp_ten + subnormal_shift));
+    return base * exponent;
+  }
+
   double const exponent = exp10(static_cast<double>(std::abs(exp_ten)));
-  double const base     = sign * static_cast<double>(digits);
   return exp_ten < 0 ? base / exponent : base * exponent;
 }
 

diff --git a/cpp/tests/strings/floats_tests.cpp b/cpp/tests/strings/floats_tests.cpp
@@ -125,11 +125,15 @@ TEST_F(StringsConvertTest, FromFloats32)
 
 TEST_F(StringsConvertTest, ToFloats64)
 {
+  // clang-format off
   std::vector<const char*> h_strings{
     "1234",   nullptr,    "-876",     "543.2",         "-0.12",   ".25",
     "-.002",  "",         "-0.0",     "1.28e256",      "NaN",     "abc123",
     "123abc", "456e",     "-1.78e+5", "-122.33644782", "12e+309", "1.7976931348623159E308",
-    "-Inf",   "-INFINITY"};
+    "-Inf",   "-INFINITY", "1.0",     "1.7976931348623157e+308",  "1.7976931348623157e-307",
+    // subnormal numbers:           v--- smallest double               v--- result is 0
+    "4e-308", "3.3333333333e-320", "4.940656458412465441765688e-324", "1.e-324" };
+  // clang-format on
   cudf::test::strings_column_wrapper strings(
     h_strings.begin(),
     h_strings.end(),