rapidsai · JohnZed · Nov 13, 2020 · Jul 28, 2020 · Jul 30, 2020 · Aug 10, 2020
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -24,6 +24,7 @@
 - PR #3112: Speed test_array
 - PR #3111: Adding Cython to Code Coverage
 - PR #3129:  Update notebooks README
+- PR #3040: Improved Array Conversion with CumlArrayDescriptor and Decorators
 
 ## Bug Fixes
 - PR #3065: Refactoring prims metrics function names from camelcase to underscore format

diff --git a/cpp/include/cuml/tsa/batched_arima.hpp b/cpp/include/cuml/tsa/batched_arima.hpp
@@ -23,6 +23,30 @@ namespace ML {
 
 enum LoglikeMethod { CSS, MLE };
 
+/**
+ * Pack separate parameter arrays into a compact array
+ * 
+ * @param[in]  handle     cuML handle
+ * @param[in]  params     Parameter structure
+ * @param[in]  order      ARIMA order
+ * @param[in]  batch_size Batch size
+ * @param[out] param_vec  Compact parameter array
+ */
+void pack(raft::handle_t& handle, const ARIMAParams<double>& params,
+          const ARIMAOrder& order, int batch_size, double* param_vec);
+
+/**
+ * Unpack a compact array into separate parameter arrays
+ * 
+ * @param[in]  handle     cuML handle
+ * @param[out] params     Parameter structure
+ * @param[in]  order      ARIMA order
+ * @param[in]  batch_size Batch size
+ * @param[in]  param_vec  Compact parameter array
+ */
+void unpack(raft::handle_t& handle, ARIMAParams<double>& params,
+            const ARIMAOrder& order, int batch_size, const double* param_vec);
+
 /**
  * Compute the differenced series (seasonal and/or non-seasonal differences)
  * 

diff --git a/cpp/src/arima/batched_arima.cu b/cpp/src/arima/batched_arima.cu
@@ -40,6 +40,18 @@
 
 namespace ML {
 
+void pack(raft::handle_t& handle, const ARIMAParams<double>& params,
+          const ARIMAOrder& order, int batch_size, double* param_vec) {
+  const auto stream = handle.get_stream();
+  params.pack(order, batch_size, param_vec, stream);
+}
+
+void unpack(raft::handle_t& handle, ARIMAParams<double>& params,
+            const ARIMAOrder& order, int batch_size, const double* param_vec) {
+  const auto stream = handle.get_stream();
+  params.unpack(order, batch_size, param_vec, stream);
+}
+
 void batched_diff(raft::handle_t& handle, double* d_y_diff, const double* d_y,
                   int batch_size, int n_obs, const ARIMAOrder& order) {
   const auto stream = handle.get_stream();

diff --git a/notebooks/arima_demo.ipynb b/notebooks/arima_demo.ipynb
@@ -200,7 +200,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "If we want to get the parameters that were fitted to the model, we can use `get_fit_params` or the corresponding properties:"
+    "If we want to get the parameters that were fitted to the model, we can use `get_fit_params` or the corresponding properties. The parameters are organized in 2D arrays: one row represents one parameter and the columns are different batch members."
    ]
   },
   {
@@ -211,7 +211,7 @@
    "source": [
     "param_mig = model_mig.get_fit_params()\n",
     "print(param_mig[\"ma\"])\n",
-    "print(model_mig.ma)"
+    "print(model_mig.ma_)"
    ]
   },
   {
@@ -230,23 +230,6 @@
     "print(model_mig.pack())"
    ]
   },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "The parameters are organized in 2D arrays: one row represents one parameter and the columns are different batch members."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Print the ma.L1 and ma.L2 parameters for each of 4 batch members\n",
-    "print(param_mig[\"ma\"])"
-   ]
-  },
   {
    "cell_type": "markdown",
    "metadata": {},

@@ -82,7 +82,7 @@
 
 # Output type configuration
 
-global_output_type = 'input'
+global_output_type = None
 
 from cuml.common.memory_utils import set_global_output_type, using_output_type
 

@@ -219,7 +219,8 @@ def _convert_to_gpuarray(data, order='F'):
         gs = cudf.Series.from_pandas(data)
         return cuda.as_cuda_array(gs)
     else:
-        return input_utils.input_to_dev_array(data, order=order)[0]
+        return input_utils.input_to_cuml_array(
+            data, order=order)[0].to_output("numba")
 
 
 def _convert_to_gpuarray_c(data):

@@ -30,6 +30,8 @@ from cuml.common.base import Base
 from cuml.common.doc_utils import generate_docstring
 from cuml.raft.common.handle cimport handle_t
 from cuml.common import input_to_cuml_array
+from cuml.common import using_output_type
+from cuml.common.array_descriptor import CumlArrayDescriptor
 
 from collections import defaultdict
 
@@ -186,6 +188,9 @@ class DBSCAN(Base):
     <http://scikit-learn.org/stable/modules/generated/sklearn.cluster.DBSCAN.html>`_.
     """
 
+    labels_ = CumlArrayDescriptor()
+    core_sample_indices_ = CumlArrayDescriptor()
+
     def __init__(self, eps=0.5, handle=None, min_samples=5,
                  verbose=False, max_mbytes_per_batch=None,
                  output_type=None, calc_core_sample_indices=True):
@@ -196,18 +201,18 @@ class DBSCAN(Base):
         self.calc_core_sample_indices = calc_core_sample_indices
 
         # internal array attributes
-        self._labels_ = None  # accessed via estimator.labels_
+        self.labels_ = None
 
         # accessed via estimator._core_sample_indices_ when
-        # self.calc_core_sample_indices == True
-        self._core_sample_indices_ = None
+        # `self.calc_core_sample_indices == True`
+        self.core_sample_indices_ = None
 
         # C++ API expects this to be numeric.
         if self.max_mbytes_per_batch is None:
             self.max_mbytes_per_batch = 0
 
     @generate_docstring(skip_parameters_heading=True)
-    def fit(self, X, out_dtype="int32"):
+    def fit(self, X, out_dtype="int32") -> "DBSCAN":
         """
         Perform DBSCAN clustering from features.
 
@@ -218,11 +223,6 @@ class DBSCAN(Base):
             "int64", np.int64}.
 
         """
-        self._set_base_attributes(output_type=X, n_features=X)
-
-        if self._labels_ is not None:
-            del self._labels_
-
         if out_dtype not in ["int32", np.int32, "int64", np.int64]:
             raise ValueError("Invalid value for out_dtype. "
                              "Valid values are {'int32', 'int64', "
@@ -236,16 +236,16 @@ class DBSCAN(Base):
 
         cdef handle_t* handle_ = <handle_t*><size_t>self.handle.getHandle()
 
-        self._labels_ = CumlArray.empty(n_rows, dtype=out_dtype)
-        cdef uintptr_t labels_ptr = self._labels_.ptr
+        self.labels_ = CumlArray.empty(n_rows, dtype=out_dtype)
+        cdef uintptr_t labels_ptr = self.labels_.ptr
 
         cdef uintptr_t core_sample_indices_ptr = <uintptr_t> NULL
 
         # Create the output core_sample_indices only if needed
         if self.calc_core_sample_indices:
-            self._core_sample_indices_ = \
+            self.core_sample_indices_ = \
                 CumlArray.empty(n_rows, dtype=out_dtype)
-            core_sample_indices_ptr = self._core_sample_indices_.ptr
+            core_sample_indices_ptr = self.core_sample_indices_.ptr
 
         if self.dtype == np.float32:
             if out_dtype is "int32" or out_dtype is np.int32:
@@ -303,20 +303,21 @@ class DBSCAN(Base):
         # Finally, resize the core_sample_indices array if necessary
         if self.calc_core_sample_indices:
 
-            # Temp convert to cupy array only once
-            core_samples_cupy = self._core_sample_indices_.to_output("cupy")
+            # Temp convert to cupy array (better than using `cupy.asarray`)
+            with using_output_type("cupy"):
 
-            # First get the min index. These have to monotonically increasing,
-            # so the min index should be the first returned -1
-            min_index = cp.argmin(core_samples_cupy).item()
+                # First get the min index. These have to monotonically
+                # increasing, so the min index should be the first returned -1
+                min_index = cp.argmin(self.core_sample_indices_).item()
 
-            # Check for the case where there are no -1's
-            if (min_index == 0 and core_samples_cupy[min_index].item() != -1):
-                # Nothing to delete. The array has no -1's
-                pass
-            else:
-                self._core_sample_indices_ = \
-                    self._core_sample_indices_[:min_index]
+                # Check for the case where there are no -1's
+                if ((min_index == 0 and
+                     self.core_sample_indices_[min_index].item() != -1)):
+                    # Nothing to delete. The array has no -1's
+                    pass
+                else:
+                    self.core_sample_indices_ = \
+                        self.core_sample_indices_[:min_index]
 
         return self
 
@@ -325,7 +326,7 @@ class DBSCAN(Base):
                                        'type': 'dense',
                                        'description': 'Cluster labels',
                                        'shape': '(n_samples, 1)'})
-    def fit_predict(self, X, out_dtype="int32"):
+    def fit_predict(self, X, out_dtype="int32") -> CumlArray:
         """
         Performs clustering on X and returns cluster labels.