mfoerste4 · mfoerste4 · Mar 10, 2022 · Mar 2, 2022 · Mar 2, 2022 · Mar 2, 2022
diff --git a/cunumeric/array.py b/cunumeric/array.py
diff --git a/cunumeric/deferred.py b/cunumeric/deferred.py
@@ -1071,7 +1071,7 @@ def choose(self, *args, rhs):
 
     # Create or extract a diagonal from a matrix
     @auto_convert([1])
-    def diag_helper(
+    def _diag_helper(
         self,
         rhs,
         offset,
@@ -1519,9 +1519,7 @@ def compute_strides(shape):
 
     @auto_convert([1])
     def cholesky(self, src, no_tril=False):
-        cholesky(self, src)
-        if not no_tril:
-            self.trilu(self, 0, True)
+        cholesky(self, src, no_tril)
 
     def unique(self):
         result = self.runtime.create_unbound_thunk(self.dtype)

diff --git a/cunumeric/eager.py b/cunumeric/eager.py
@@ -429,7 +429,7 @@ def choose(self, *args, rhs):
             choices = tuple(c.array for c in args)
             self.array[:] = np.choose(rhs.array, choices, mode="raise")
 
-    def diag_helper(
+    def _diag_helper(
         self,
         rhs,
         offset,
@@ -438,7 +438,7 @@ def diag_helper(
     ):
         self.check_eager_args(rhs)
         if self.deferred is not None:
-            self.deferred.diag_helper(
+            self.deferred._diag_helper(
                 rhs,
                 offset,
                 naxes,

diff --git a/cunumeric/linalg/cholesky.py b/cunumeric/linalg/cholesky.py
@@ -123,7 +123,23 @@ def choose_color_shape(runtime, shape):
         return (num_tiles, num_tiles)
 
 
-def cholesky(output, input):
+def tril(context, p_output, n):
+    launch_domain = Rect((n, n))
+    task = context.create_task(
+        CuNumericOpCode.TRILU, manual=True, launch_domain=launch_domain
+    )
+
+    task.add_output(p_output)
+    task.add_input(p_output)
+    task.add_scalar_arg(True, bool)
+    task.add_scalar_arg(0, ty.int32)
+    # Add a fake task argument to indicate that this is for Cholesky
+    task.add_scalar_arg(True, bool)
+
+    task.execute()
+
+
+def cholesky(output, input, no_tril):
     shape = output.base.shape
     color_shape = choose_color_shape(output.runtime, shape)
     tile_shape = (shape + color_shape - 1) // color_shape
@@ -141,3 +157,8 @@ def cholesky(output, input):
         for k in range(i + 1, n):
             syrk(context, p_output, k, i)
             gemm(context, p_output, k, i, k + 1, n)
+
+    if no_tril:
+        return
+
+    tril(context, p_output, n)
diff --git a/cunumeric/linalg/linalg.py b/cunumeric/linalg/linalg.py
@@ -14,8 +14,8 @@
 #
 
 import numpy as np
-from cunumeric.array import ndarray
-from cunumeric.module import sqrt as _sqrt
+from cunumeric.array import convert_to_cunumeric_ndarray
+from cunumeric.module import ndarray, sqrt as _sqrt
 
 
 def cholesky(a):
@@ -55,7 +55,7 @@ def cholesky(a):
     Multiple GPUs, Multiple CPUs
     """
 
-    lg_array = ndarray.convert_to_cunumeric_ndarray(a)
+    lg_array = convert_to_cunumeric_ndarray(a)
     shape = lg_array.shape
     if len(shape) < 2:
         raise ValueError(
@@ -69,7 +69,7 @@ def cholesky(a):
         raise NotImplementedError(
             "cuNumeric needs to support stacked 2d arrays"
         )
-    return lg_array.cholesky()
+    return _cholesky(lg_array)
 
 
 def norm(x, ord=None, axis=None, keepdims=False):
@@ -116,7 +116,7 @@ def norm(x, ord=None, axis=None, keepdims=False):
     Multiple GPUs, Multiple CPUs
     """
 
-    lg_array = ndarray.convert_to_cunumeric_ndarray(x)
+    lg_array = convert_to_cunumeric_ndarray(x)
     if (axis is None and lg_array.ndim == 1) or type(axis) == int:
         # Handle the weird norm cases
         if ord == np.inf:
@@ -147,3 +147,51 @@ def norm(x, ord=None, axis=None, keepdims=False):
         raise NotImplementedError(
             "cuNumeric needs support for other kinds of norms"
         )
+
+
+def _cholesky(a, no_tril=False):
+    """Cholesky decomposition.
+
+    Return the Cholesky decomposition, `L * L.H`, of the square matrix `a`,
+    where `L` is lower-triangular and .H is the conjugate transpose operator
+    (which is the ordinary transpose if `a` is real-valued).  `a` must be
+    Hermitian (symmetric if real-valued) and positive-definite. No
+    checking is performed to verify whether `a` is Hermitian or not.
+    In addition, only the lower-triangular and diagonal elements of `a`
+    are used. Only `L` is actually returned.
+
+    Parameters
+    ----------
+    a : (..., M, M) array_like
+        Hermitian (symmetric if all elements are real), positive-definite
+        input matrix.
+
+    Returns
+    -------
+    L : (..., M, M) array_like
+        Upper or lower-triangular Cholesky factor of `a`.  Returns a
+        matrix object if `a` is a matrix object.
+
+    Notes
+    -----
+    The current implementation kills the process when the decomposition fails.
+
+    See Also
+    --------
+    numpy.linalg.cholesky
+
+    Availability
+    --------
+    Multiple GPUs, Multiple CPUs
+
+    """
+    input = a
+    if input.dtype.kind not in ("f", "c"):
+        input = input.astype("float64")
+    output = ndarray(
+        shape=input.shape,
+        dtype=input.dtype,
+        inputs=(input,),
+    )
+    output._thunk.cholesky(input._thunk, no_tril=no_tril)
+    return output