nv-legate · rkarim2 · Sep 4, 2022 · Jul 27, 2022 · Jul 28, 2022 · Jul 28, 2022
diff --git a/cunumeric/array.py b/cunumeric/array.py
@@ -41,6 +41,7 @@
 
 from .config import (
     BinaryOpCode,
+    ConvertCode,
     FFTDirection,
     FFTNormalization,
     FFTType,
@@ -4075,6 +4076,8 @@ def _perform_scan(
         out: Union[ndarray, None] = None,
         nan_to_identity: bool = False,
     ) -> ndarray:
+        if src.dtype.kind != "c" and src.dtype.kind != "f":
+            nan_to_identity = False
         if dtype is None:
             if out is None:
                 if src.dtype.kind == "i":
@@ -4084,12 +4087,6 @@ def _perform_scan(
                     dtype = src.dtype
             else:
                 dtype = out.dtype
-        if (src.dtype.kind in ("f", "c")) and np.issubdtype(dtype, np.integer):
-            # Needs changes to convert()
-            raise NotImplementedError(
-                "Integer output types currently not supported for "
-                "floating/complex inputs"
-            )
         # flatten input when axis is None
         if axis is None:
             axis = 0
@@ -4110,9 +4107,18 @@ def _perform_scan(
             out = ndarray(shape=src_arr.shape, dtype=dtype)
 
         if dtype != src_arr.dtype:
+            if nan_to_identity:
+                if op is ScanCode.SUM:
+                    nan_op = ConvertCode.SUM
+                else:
+                    nan_op = ConvertCode.PROD
+                # If convert is called, it will handle NAN conversion
+                nan_to_identity = False
+            else:
+                nan_op = ConvertCode.NOOP
             # convert input to temporary for type conversion
             temp = ndarray(shape=src_arr.shape, dtype=dtype)
-            temp._thunk.convert(src_arr._thunk)
+            temp._thunk.convert(src_arr._thunk, nan_op=nan_op)
             src_arr = temp
 
         out._thunk.scan(

diff --git a/cunumeric/config.py b/cunumeric/config.py
@@ -139,6 +139,9 @@ class _CunumericSharedLib:
     CUNUMERIC_CHOOSE: int
     CUNUMERIC_CONTRACT: int
     CUNUMERIC_CONVERT: int
+    CUNUMERIC_CONVERT_NAN_NOOP: int
+    CUNUMERIC_CONVERT_NAN_PROD: int
+    CUNUMERIC_CONVERT_NAN_SUM: int
     CUNUMERIC_CONVOLVE: int
     CUNUMERIC_DIAG: int
     CUNUMERIC_DOT: int
@@ -526,6 +529,14 @@ class ScanCode(IntEnum):
     SUM = _cunumeric.CUNUMERIC_SCAN_SUM
 
 
+# Match these to CuNumericConvertCode in cunumeric_c.h
+@unique
+class ConvertCode(IntEnum):
+    NOOP = _cunumeric.CUNUMERIC_CONVERT_NAN_NOOP
+    PROD = _cunumeric.CUNUMERIC_CONVERT_NAN_PROD
+    SUM = _cunumeric.CUNUMERIC_CONVERT_NAN_SUM
+
+
 # Match these to BitGeneratorOperation in cunumeric_c.h
 @unique
 class BitGeneratorOperation(IntEnum):

diff --git a/cunumeric/deferred.py b/cunumeric/deferred.py
@@ -44,6 +44,7 @@
     BitGeneratorDistribution,
     BitGeneratorOperation,
     Bitorder,
+    ConvertCode,
     CuNumericOpCode,
     CuNumericRedopCode,
     RandGenCode,
@@ -1145,7 +1146,12 @@ def swapaxes(self, axis1: int, axis2: int) -> DeferredArray:
 
     # Convert the source array to the destination array
     @auto_convert([1])
-    def convert(self, rhs: Any, warn: bool = True) -> None:
+    def convert(
+        self,
+        rhs: Any,
+        warn: bool = True,
+        nan_op: ConvertCode = ConvertCode.NOOP,
+    ) -> None:
         lhs_array = self
         rhs_array = rhs
         assert lhs_array.dtype != rhs_array.dtype
@@ -1165,7 +1171,7 @@ def convert(self, rhs: Any, warn: bool = True) -> None:
         task = self.context.create_auto_task(CuNumericOpCode.CONVERT)
         task.add_output(lhs)
         task.add_input(rhs)
-        task.add_dtype_arg(lhs_array.dtype)
+        task.add_scalar_arg(nan_op, ty.int32)
 
         task.add_alignment(lhs, rhs)
 

diff --git a/cunumeric/eager.py b/cunumeric/eager.py
@@ -33,6 +33,7 @@
     FFT_R2C,
     FFT_Z2D,
     BinaryOpCode,
+    ConvertCode,
     FFTDirection,
     ScanCode,
     UnaryOpCode,
@@ -485,15 +486,30 @@ def swapaxes(self, axis1: int, axis2: int) -> NumPyThunk:
         self.children.append(result)
         return result
 
-    def convert(self, rhs: Any, warn: bool = True) -> None:
+    def convert(
+        self,
+        rhs: Any,
+        warn: bool = True,
+        nan_op: ConvertCode = ConvertCode.NOOP,
+    ) -> None:
         self.check_eager_args(rhs)
         if self.deferred is not None:
             return self.deferred.convert(rhs, warn=warn)
         else:
             if self.array.size == 1:
-                self.array.fill(rhs.array.item())
+                if nan_op is ConvertCode.SUM and np.isnan(rhs.array.item()):
+                    self.array.fill(0)
+                elif nan_op is ConvertCode.PROD and np.isnan(rhs.array.item()):
+                    self.array.fill(1)
+                else:
+                    self.array.fill(rhs.array.item())
             else:
-                self.array[:] = rhs.array
+                if nan_op is ConvertCode.SUM:
+                    self.array[:] = np.where(np.isnan(rhs.array), 0, rhs.array)
+                elif nan_op is ConvertCode.PROD:
+                    self.array[:] = np.where(np.isnan(rhs.array), 1, rhs.array)
+                else:
+                    self.array[:] = rhs.array
 
     def fill(self, value: Any) -> None:
         if self.deferred is not None:

diff --git a/cunumeric/thunk.py b/cunumeric/thunk.py
@@ -17,6 +17,8 @@
 from abc import ABC, abstractmethod, abstractproperty
 from typing import TYPE_CHECKING, Any, Optional, Sequence, Union
 
+from .config import ConvertCode
+
 if TYPE_CHECKING:
     import numpy as np
     import numpy.typing as npt
@@ -151,7 +153,12 @@ def swapaxes(self, axis1: int, axis2: int) -> NumPyThunk:
         ...
 
     @abstractmethod
-    def convert(self, rhs: Any, warn: bool = True) -> None:
+    def convert(
+        self,
+        rhs: Any,
+        warn: bool = True,
+        nan_op: ConvertCode = ConvertCode.NOOP,
+    ) -> None:
         ...
 
     @abstractmethod

diff --git a/src/cunumeric/cunumeric_c.h b/src/cunumeric/cunumeric_c.h
@@ -221,6 +221,14 @@ enum CuNumericScanCode {
   CUNUMERIC_SCAN_SUM,
 };
 
+// Match these to ConvertCode in config.py
+// Also, sort these alphabetically for easy lookup later
+enum CuNumericConvertCode {
+  CUNUMERIC_CONVERT_NAN_NOOP = 1,
+  CUNUMERIC_CONVERT_NAN_PROD,
+  CUNUMERIC_CONVERT_NAN_SUM,
+};
+
 // Match these to BitGeneratorOperation in config.py
 enum CuNumericBitGeneratorOperation {
   CUNUMERIC_BITGENOP_CREATE       = 1,

diff --git a/src/cunumeric/scan/scan_global_util.h b/src/cunumeric/scan/scan_global_util.h
@@ -40,8 +40,6 @@ constexpr decltype(auto) op_dispatch(ScanCode op_code, Functor f, Fnargs&&... ar
   return f.template operator()<ScanCode::SUM>(std::forward<Fnargs>(args)...);
 }
 
-// RRRR not sure I fully understand these?
-
 template <ScanCode OP_CODE, legate::LegateTypeCode CODE>
 struct ScanOp {
 };

diff --git a/src/cunumeric/unary/convert.cc b/src/cunumeric/unary/convert.cc
@@ -22,9 +22,9 @@ namespace cunumeric {
 using namespace Legion;
 using namespace legate;
 
-template <LegateTypeCode DST_TYPE, LegateTypeCode SRC_TYPE, int DIM>
-struct ConvertImplBody<VariantKind::CPU, DST_TYPE, SRC_TYPE, DIM> {
-  using OP  = ConvertOp<DST_TYPE, SRC_TYPE>;
+template <ConvertCode NAN_OP, LegateTypeCode DST_TYPE, LegateTypeCode SRC_TYPE, int DIM>
+struct ConvertImplBody<VariantKind::CPU, NAN_OP, DST_TYPE, SRC_TYPE, DIM> {
+  using OP  = ConvertOp<NAN_OP, DST_TYPE, SRC_TYPE>;
   using SRC = legate_type_of<SRC_TYPE>;
   using DST = legate_type_of<DST_TYPE>;
 

diff --git a/src/cunumeric/unary/convert.cu b/src/cunumeric/unary/convert.cu
@@ -42,9 +42,9 @@ static __global__ void __launch_bounds__(THREADS_PER_BLOCK, MIN_CTAS_PER_SM)
   out[point] = func(in[point]);
 }
 
-template <LegateTypeCode DST_TYPE, LegateTypeCode SRC_TYPE, int DIM>
-struct ConvertImplBody<VariantKind::GPU, DST_TYPE, SRC_TYPE, DIM> {
-  using OP  = ConvertOp<DST_TYPE, SRC_TYPE>;
+template <ConvertCode NAN_OP, LegateTypeCode DST_TYPE, LegateTypeCode SRC_TYPE, int DIM>
+struct ConvertImplBody<VariantKind::GPU, NAN_OP, DST_TYPE, SRC_TYPE, DIM> {
+  using OP  = ConvertOp<NAN_OP, DST_TYPE, SRC_TYPE>;
   using SRC = legate_type_of<SRC_TYPE>;
   using DST = legate_type_of<DST_TYPE>;
 

diff --git a/src/cunumeric/unary/convert.h b/src/cunumeric/unary/convert.h
@@ -16,13 +16,15 @@
 
 #pragma once
 
+#include "cunumeric/unary/convert_util.h"
 #include "cunumeric/cunumeric.h"
 
 namespace cunumeric {
 
 struct ConvertArgs {
   const Array& out;
   const Array& in;
+  ConvertCode nan_op;
 };
 
 class ConvertTask : public CuNumericTask<ConvertTask> {

diff --git a/src/cunumeric/unary/convert_omp.cc b/src/cunumeric/unary/convert_omp.cc
@@ -22,9 +22,9 @@ namespace cunumeric {
 using namespace Legion;
 using namespace legate;
 
-template <LegateTypeCode DST_TYPE, LegateTypeCode SRC_TYPE, int DIM>
-struct ConvertImplBody<VariantKind::OMP, DST_TYPE, SRC_TYPE, DIM> {
-  using OP  = ConvertOp<DST_TYPE, SRC_TYPE>;
+template <ConvertCode NAN_OP, LegateTypeCode DST_TYPE, LegateTypeCode SRC_TYPE, int DIM>
+struct ConvertImplBody<VariantKind::OMP, NAN_OP, DST_TYPE, SRC_TYPE, DIM> {
+  using OP  = ConvertOp<NAN_OP, DST_TYPE, SRC_TYPE>;
   using SRC = legate_type_of<SRC_TYPE>;
   using DST = legate_type_of<DST_TYPE>;
 

diff --git a/src/cunumeric/unary/convert_template.inl b/src/cunumeric/unary/convert_template.inl
@@ -26,15 +26,19 @@ namespace cunumeric {
 using namespace Legion;
 using namespace legate;
 
-template <VariantKind KIND, LegateTypeCode DST_TYPE, LegateTypeCode SRC_TYPE, int DIM>
+template <VariantKind KIND,
+          ConvertCode NAN_OP,
+          LegateTypeCode DST_TYPE,
+          LegateTypeCode SRC_TYPE,
+          int DIM>
 struct ConvertImplBody;
 
-template <VariantKind KIND, LegateTypeCode SRC_TYPE>
+template <VariantKind KIND, ConvertCode NAN_OP, LegateTypeCode SRC_TYPE>
 struct ConvertImpl {
   template <LegateTypeCode DST_TYPE, int DIM, std::enable_if_t<SRC_TYPE != DST_TYPE>* = nullptr>
   void operator()(ConvertArgs& args) const
   {
-    using OP  = ConvertOp<DST_TYPE, SRC_TYPE>;
+    using OP  = ConvertOp<NAN_OP, DST_TYPE, SRC_TYPE>;
     using SRC = legate_type_of<SRC_TYPE>;
     using DST = legate_type_of<DST_TYPE>;
 
@@ -57,7 +61,7 @@ struct ConvertImpl {
 #endif
 
     OP func{};
-    ConvertImplBody<KIND, DST_TYPE, SRC_TYPE, DIM>()(func, out, in, pitches, rect, dense);
+    ConvertImplBody<KIND, NAN_OP, DST_TYPE, SRC_TYPE, DIM>()(func, out, in, pitches, rect, dense);
   }
 
   template <LegateTypeCode DST_TYPE, int DIM, std::enable_if_t<SRC_TYPE == DST_TYPE>* = nullptr>
@@ -67,20 +71,42 @@ struct ConvertImpl {
   }
 };
 
+template <VariantKind KIND, LegateTypeCode SRC_TYPE>
+struct ConvertDispatch {
+  template <ConvertCode NAN_OP,
+            std::enable_if_t<(legate::is_floating_point<SRC_TYPE>::value ||
+                              legate::is_complex<legate::legate_type_of<SRC_TYPE>>::value) ||
+                             NAN_OP == ConvertCode::NOOP>* = nullptr>
+  void operator()(ConvertArgs& args) const
+  {
+    auto dim = std::max(1, args.out.dim());
+    double_dispatch(dim, args.out.code(), ConvertImpl<KIND, NAN_OP, SRC_TYPE>{}, args);
+  }
+
+  template <ConvertCode NAN_OP,
+            std::enable_if_t<!((legate::is_floating_point<SRC_TYPE>::value ||
+                                legate::is_complex<legate::legate_type_of<SRC_TYPE>>::value) ||
+                               (NAN_OP == ConvertCode::NOOP))>* = nullptr>
+  void operator()(ConvertArgs& args) const
+  {
+    assert(false);
+  }
+};
+
 template <VariantKind KIND>
 struct SourceTypeDispatch {
   template <LegateTypeCode SRC_TYPE>
   void operator()(ConvertArgs& args) const
   {
-    auto dim = std::max(1, args.out.dim());
-    double_dispatch(dim, args.out.code(), ConvertImpl<KIND, SRC_TYPE>{}, args);
+    op_dispatch(args.nan_op, ConvertDispatch<KIND, SRC_TYPE>{}, args);
   }
 };
 
 template <VariantKind KIND>
 static void convert_template(TaskContext& context)
 {
-  ConvertArgs args{context.outputs()[0], context.inputs()[0]};
+  ConvertArgs args{
+    context.outputs()[0], context.inputs()[0], context.scalars()[0].value<ConvertCode>()};
   type_dispatch(args.in.code(), SourceTypeDispatch<KIND>{}, args);
 }