From 1a4f8a4b8ff9223425f13b83af5728b7ba56d396 Mon Sep 17 00:00:00 2001 From: Anton <100830759+antonwolfy@users.noreply.github.com> Date: Tue, 2 Jul 2024 16:50:35 +0200 Subject: [PATCH] Adopt dpnp interface to asynchronous dpctl execution (Part #1) (#1897) * Update manipulation functions * Update functions from the array creation container * Update dpnp array methods * Implement backward compatible solution * dpnp.meshgrid has to follow CFD and prohibit input arrays allocating on different SYCL queues * updated linspace, logspace and geomspace functions * Updated elementwise functions and astype * Updated counting and histogram functions * Switched back to use dppy/label/dev for coverage GH action * Removed dpnp_container.linspace since unused * Return dpnp ndarray for linspace, logspace and geomspace internal functions --- .github/workflows/generate_coverage.yaml | 2 +- dpnp/dpnp_algo/dpnp_arraycreation.py | 114 ++++++++++---------- dpnp/dpnp_algo/dpnp_elementwise_common.py | 125 +++++++++++++--------- dpnp/dpnp_array.py | 3 + dpnp/dpnp_container.py | 42 ++------ dpnp/dpnp_iface.py | 19 +++- dpnp/dpnp_iface_arraycreation.py | 77 +++++++++---- dpnp/dpnp_iface_counting.py | 7 +- dpnp/dpnp_iface_histograms.py | 68 ++++++++---- dpnp/dpnp_iface_manipulation.py | 64 +++++++---- tests/test_sycl_queue.py | 13 +-- 11 files changed, 318 insertions(+), 216 deletions(-) diff --git a/.github/workflows/generate_coverage.yaml b/.github/workflows/generate_coverage.yaml index 1fa71fb479d..5a0480235a7 100644 --- a/.github/workflows/generate_coverage.yaml +++ b/.github/workflows/generate_coverage.yaml @@ -21,7 +21,7 @@ jobs: env: python-ver: '3.10' - CHANNELS: '-c dppy/label/coverage -c intel -c conda-forge --override-channels' + CHANNELS: '-c dppy/label/dev -c intel -c conda-forge --override-channels' # Install the latest oneAPI compiler to work around an issue INSTALL_ONE_API: 'yes' diff --git a/dpnp/dpnp_algo/dpnp_arraycreation.py b/dpnp/dpnp_algo/dpnp_arraycreation.py index 83cd9da4acf..b493efac993 100644 --- a/dpnp/dpnp_algo/dpnp_arraycreation.py +++ b/dpnp/dpnp_algo/dpnp_arraycreation.py @@ -1,12 +1,13 @@ import math import operator +import dpctl.tensor as dpt import dpctl.utils as dpu import numpy import dpnp -import dpnp.dpnp_container as dpnp_container import dpnp.dpnp_utils as utils +from dpnp.dpnp_array import dpnp_array __all__ = [ "dpnp_geomspace", @@ -16,6 +17,12 @@ ] +def _as_usm_ndarray(a, usm_type, sycl_queue): + if isinstance(a, dpnp_array): + return a.get_array() + return dpt.asarray(a, usm_type=usm_type, sycl_queue=sycl_queue) + + def dpnp_geomspace( start, stop, @@ -40,14 +47,8 @@ def dpnp_geomspace( else: _usm_type = usm_type - if not dpnp.is_supported_array_type(start): - start = dpnp.asarray( - start, usm_type=_usm_type, sycl_queue=sycl_queue_normalized - ) - if not dpnp.is_supported_array_type(stop): - stop = dpnp.asarray( - stop, usm_type=_usm_type, sycl_queue=sycl_queue_normalized - ) + start = _as_usm_ndarray(start, _usm_type, sycl_queue_normalized) + stop = _as_usm_ndarray(stop, _usm_type, sycl_queue_normalized) dt = numpy.result_type(start, stop, float(num)) dt = utils.map_dtype_to_device(dt, sycl_queue_normalized.sycl_device) @@ -57,8 +58,8 @@ def dpnp_geomspace( if dpnp.any(start == 0) or dpnp.any(stop == 0): raise ValueError("Geometric sequence cannot include zero") - out_sign = dpnp.ones( - dpnp.broadcast_arrays(start, stop)[0].shape, + out_sign = dpt.ones( + dpt.broadcast_arrays(start, stop)[0].shape, dtype=dt, usm_type=_usm_type, sycl_queue=sycl_queue_normalized, @@ -72,15 +73,15 @@ def dpnp_geomspace( stop[all_imag] = stop[all_imag].imag out_sign[all_imag] = 1j - both_negative = (dpnp.sign(start) == -1) & (dpnp.sign(stop) == -1) + both_negative = (dpt.sign(start) == -1) & (dpt.sign(stop) == -1) if dpnp.any(both_negative): - dpnp.negative(start[both_negative], out=start[both_negative]) - dpnp.negative(stop[both_negative], out=stop[both_negative]) - dpnp.negative(out_sign[both_negative], out=out_sign[both_negative]) + dpt.negative(start[both_negative], out=start[both_negative]) + dpt.negative(stop[both_negative], out=stop[both_negative]) + dpt.negative(out_sign[both_negative], out=out_sign[both_negative]) - log_start = dpnp.log10(start) - log_stop = dpnp.log10(stop) - result = dpnp_logspace( + log_start = dpt.log10(start) + log_stop = dpt.log10(stop) + res = dpnp_logspace( log_start, log_stop, num=num, @@ -89,19 +90,20 @@ def dpnp_geomspace( dtype=dtype, usm_type=_usm_type, sycl_queue=sycl_queue_normalized, - ) + ).get_array() if num > 0: - result[0] = start + res[0] = start if num > 1 and endpoint: - result[-1] = stop + res[-1] = stop - result = out_sign * result + res = out_sign * res if axis != 0: - result = dpnp.moveaxis(result, 0, axis) + res = dpt.moveaxis(res, 0, axis) - return result.astype(dtype, copy=False) + res = dpt.astype(res, dtype, copy=False) + return dpnp_array._create_from_usm_ndarray(res) def dpnp_linspace( @@ -129,14 +131,11 @@ def dpnp_linspace( else: _usm_type = usm_type - if not hasattr(start, "dtype") and not dpnp.isscalar(start): - start = dpnp.asarray( - start, usm_type=_usm_type, sycl_queue=sycl_queue_normalized - ) - if not hasattr(stop, "dtype") and not dpnp.isscalar(stop): - stop = dpnp.asarray( - stop, usm_type=_usm_type, sycl_queue=sycl_queue_normalized - ) + if not dpnp.isscalar(start): + start = _as_usm_ndarray(start, _usm_type, sycl_queue_normalized) + + if not dpnp.isscalar(stop): + stop = _as_usm_ndarray(stop, _usm_type, sycl_queue_normalized) dt = numpy.result_type(start, stop, float(num)) dt = utils.map_dtype_to_device(dt, sycl_queue_normalized.sycl_device) @@ -155,7 +154,7 @@ def dpnp_linspace( if dpnp.isscalar(start) and dpnp.isscalar(stop): # Call linspace() function for scalars. - res = dpnp_container.linspace( + usm_res = dpt.linspace( start, stop, num, @@ -167,17 +166,17 @@ def dpnp_linspace( if retstep is True and step_nan is False: step = (stop - start) / step_num else: - _start = dpnp.asarray( + usm_start = dpt.asarray( start, dtype=dt, usm_type=_usm_type, sycl_queue=sycl_queue_normalized, ) - _stop = dpnp.asarray( + usm_stop = dpt.asarray( stop, dtype=dt, usm_type=_usm_type, sycl_queue=sycl_queue_normalized ) - res = dpnp_container.arange( + usm_res = dpt.arange( 0, stop=num, step=1, @@ -187,28 +186,29 @@ def dpnp_linspace( ) if step_nan is False: - step = (_stop - _start) / step_num - res = res.reshape((-1,) + (1,) * step.ndim) - res = res * step + _start + step = (usm_stop - usm_start) / step_num + usm_res = dpt.reshape(usm_res, (-1,) + (1,) * step.ndim, copy=False) + usm_res = usm_res * step + usm_res += usm_start if endpoint and num > 1: - res[-1] = dpnp_container.full(step.shape, _stop) + usm_res[-1] = dpt.full(step.shape, usm_stop) if axis != 0: - res = dpnp.moveaxis(res, 0, axis) + usm_res = dpt.moveaxis(usm_res, 0, axis) if numpy.issubdtype(dtype, dpnp.integer): - dpnp.floor(res, out=res) + dpt.floor(usm_res, out=usm_res) - res = res.astype(dtype, copy=False) + res = dpt.astype(usm_res, dtype, copy=False) + res = dpnp_array._create_from_usm_ndarray(res) if retstep is True: if dpnp.isscalar(step): - step = dpnp.asarray( + step = dpt.asarray( step, usm_type=res.usm_type, sycl_queue=res.sycl_queue ) - return (res, step) - + return res, dpnp_array._create_from_usm_ndarray(step) return res @@ -239,12 +239,15 @@ def dpnp_logspace( usm_type = "device" if usm_type_alloc is None else usm_type_alloc else: usm_type = usm_type - start = dpnp.asarray(start, usm_type=usm_type, sycl_queue=sycl_queue) - stop = dpnp.asarray(stop, usm_type=usm_type, sycl_queue=sycl_queue) - base = dpnp.asarray(base, usm_type=usm_type, sycl_queue=sycl_queue) - [start, stop, base] = dpnp.broadcast_arrays(start, stop, base) - base = dpnp.expand_dims(base, axis=axis) + start = _as_usm_ndarray(start, usm_type, sycl_queue) + stop = _as_usm_ndarray(stop, usm_type, sycl_queue) + base = _as_usm_ndarray(base, usm_type, sycl_queue) + + [start, stop, base] = dpt.broadcast_arrays(start, stop, base) + base = dpt.expand_dims(base, axis=axis) + + # assume res as not a tuple, because retstep is False res = dpnp_linspace( start, stop, @@ -254,11 +257,12 @@ def dpnp_logspace( sycl_queue=sycl_queue, endpoint=endpoint, axis=axis, - ) + ).get_array() - if dtype is None: - return dpnp.power(base, res) - return dpnp.power(base, res).astype(dtype, copy=False) + dpt.pow(base, res, out=res) + if dtype is not None: + res = dpt.astype(res, dtype, copy=False) + return dpnp_array._create_from_usm_ndarray(res) class dpnp_nd_grid: diff --git a/dpnp/dpnp_algo/dpnp_elementwise_common.py b/dpnp/dpnp_algo/dpnp_elementwise_common.py index 374981a6303..b13ea56bc32 100644 --- a/dpnp/dpnp_algo/dpnp_elementwise_common.py +++ b/dpnp/dpnp_algo/dpnp_elementwise_common.py @@ -24,6 +24,7 @@ # THE POSSIBILITY OF SUCH DAMAGE. # ***************************************************************************** +import dpctl.tensor as dpt import numpy from dpctl.tensor._elementwise_common import ( BinaryElementwiseFunc, @@ -161,24 +162,27 @@ def __call__( f"Requested function={self.name_} only takes `out` or `dtype`" "as an argument, but both were provided." ) + + if order is None: + order = "K" + elif order in "afkcAFKC": + order = order.upper() else: - if order is None: - order = "K" - elif order in "afkcAFKC": - order = order.upper() - else: - raise ValueError( - "order must be one of 'C', 'F', 'A', or 'K' " - f"(got '{order}')" - ) - if dtype is not None: - x = dpnp.astype(x, dtype=dtype, copy=False) - x_usm = dpnp.get_usm_ndarray(x) - out_usm = None if out is None else dpnp.get_usm_ndarray(out) - res_usm = super().__call__(x_usm, out=out_usm, order=order) - if out is not None and isinstance(out, dpnp_array): - return out - return dpnp_array._create_from_usm_ndarray(res_usm) + raise ValueError( + "order must be one of 'C', 'F', 'A', or 'K' " f"(got '{order}')" + ) + + x_usm = dpnp.get_usm_ndarray(x) + if dtype is not None: + x_usm = dpt.astype(x_usm, dtype, copy=False) + + out_usm = None if out is None else dpnp.get_usm_ndarray(out) + res_usm = super().__call__(x_usm, out=out_usm, order=order) + + dpnp.synchronize_array_data(res_usm) + if out is not None and isinstance(out, dpnp_array): + return out + return dpnp_array._create_from_usm_ndarray(res_usm) class DPNPBinaryFunc(BinaryElementwiseFunc): @@ -311,35 +315,47 @@ def __call__( f"Requested function={self.name_} only takes `out` or `dtype`" "as an argument, but both were provided." ) + + if order is None: + order = "K" + elif order in "afkcAFKC": + order = order.upper() else: - if order is None: - order = "K" - elif order in "afkcAFKC": - order = order.upper() - else: - raise ValueError( - "order must be one of 'C', 'F', 'A', or 'K' " - f"(got '{order}')" + raise ValueError( + "order must be one of 'C', 'F', 'A', or 'K' (got '{order}')" + ) + + x1_usm = dpnp.get_usm_ndarray_or_scalar(x1) + x2_usm = dpnp.get_usm_ndarray_or_scalar(x2) + + if dtype is not None: + if dpnp.isscalar(x1): + x1_usm = dpt.asarray( + x1, + dtype=dtype, + sycl_queue=x2.sycl_queue, + usm_type=x2.usm_type, ) - if dtype is not None: - if dpnp.isscalar(x1): - x1 = dpnp.asarray(x1, dtype=dtype) - x2 = dpnp.astype(x2, dtype=dtype, copy=False) - elif dpnp.isscalar(x2): - x1 = dpnp.astype(x1, dtype=dtype, copy=False) - x2 = dpnp.asarray(x2, dtype=dtype) - else: - x1 = dpnp.astype(x1, dtype=dtype, copy=False) - x2 = dpnp.astype(x2, dtype=dtype, copy=False) - - x1_usm = dpnp.get_usm_ndarray_or_scalar(x1) - x2_usm = dpnp.get_usm_ndarray_or_scalar(x2) + x2_usm = dpt.astype(x2_usm, dtype, copy=False) + elif dpnp.isscalar(x2): + x1_usm = dpt.astype(x1_usm, dtype, copy=False) + x2_usm = dpt.asarray( + x2, + dtype=dtype, + sycl_queue=x1.sycl_queue, + usm_type=x1.usm_type, + ) + else: + x1_usm = dpt.astype(x1_usm, dtype, copy=False) + x2_usm = dpt.astype(x2_usm, dtype, copy=False) - out_usm = None if out is None else dpnp.get_usm_ndarray(out) - res_usm = super().__call__(x1_usm, x2_usm, out=out_usm, order=order) - if out is not None and isinstance(out, dpnp_array): - return out - return dpnp_array._create_from_usm_ndarray(res_usm) + out_usm = None if out is None else dpnp.get_usm_ndarray(out) + res_usm = super().__call__(x1_usm, x2_usm, out=out_usm, order=order) + + dpnp.synchronize_array_data(res_usm) + if out is not None and isinstance(out, dpnp_array): + return out + return dpnp_array._create_from_usm_ndarray(res_usm) def outer( self, @@ -463,7 +479,7 @@ def __init__( def __call__(self, x, deg=False): res = super().__call__(x) if deg is True: - res = res * (180 / dpnp.pi) + res *= 180 / dpnp.pi return res @@ -513,14 +529,21 @@ def __init__( def __call__(self, x, decimals=0, out=None, dtype=None): if decimals != 0: - if dpnp.issubdtype(x.dtype, dpnp.integer) and dtype is None: - dtype = x.dtype - res = dpnp.true_divide( - dpnp.rint(x * 10**decimals, out=out), 10**decimals, out=out - ) + x_usm = dpnp.get_usm_ndarray(x) + if dpnp.issubdtype(x_usm.dtype, dpnp.integer) and dtype is None: + dtype = x_usm.dtype + + out_usm = None if out is None else dpnp.get_usm_ndarray(out) + x_usm = dpt.round(x_usm * 10**decimals, out=out_usm) + res_usm = dpt.divide(x_usm, 10**decimals, out=out_usm) + if dtype is not None: - res = res.astype(dtype) - return res + res_usm = dpt.astype(res_usm, dtype, copy=False) + + dpnp.synchronize_array_data(res_usm) + if out is not None and isinstance(out, dpnp_array): + return out + return dpnp_array._create_from_usm_ndarray(res_usm) else: return super().__call__(x, out=out, dtype=dtype) diff --git a/dpnp/dpnp_array.py b/dpnp/dpnp_array.py index fd2d06f7428..d9936872a89 100644 --- a/dpnp/dpnp_array.py +++ b/dpnp/dpnp_array.py @@ -258,6 +258,8 @@ def __getitem__(self, key): res = self.__new__(dpnp_array) res._array_obj = item + if self._array_obj.usm_data is not res._array_obj.usm_data: + dpnp.synchronize_array_data(self) return res def __gt__(self, other): @@ -454,6 +456,7 @@ def __setitem__(self, key, val): val = val.get_array() self._array_obj.__setitem__(key, val) + dpnp.synchronize_array_data(self) # '__setstate__', # '__sizeof__', diff --git a/dpnp/dpnp_container.py b/dpnp/dpnp_container.py index 5322df3324b..8f70e015393 100644 --- a/dpnp/dpnp_container.py +++ b/dpnp/dpnp_container.py @@ -47,7 +47,6 @@ "empty", "eye", "full", - "linspace", "ones", "tril", "triu", @@ -81,6 +80,7 @@ def arange( sycl_queue=sycl_queue_normalized, ) + dpnp.synchronize_array_data(array_obj) return dpnp_array(array_obj.shape, buffer=array_obj) @@ -133,6 +133,7 @@ def asarray( if array_obj is x1_obj and isinstance(x1, dpnp_array): return x1 + dpnp.synchronize_array_data(array_obj) return dpnp_array(array_obj.shape, buffer=array_obj, order=order) @@ -142,6 +143,7 @@ def copy(x1, /, *, order="K"): order = "K" array_obj = dpt.copy(dpnp.get_usm_ndarray(x1), order=order) + dpnp.synchronize_array_data(array_obj) return dpnp_array(array_obj.shape, buffer=array_obj, order="K") @@ -203,6 +205,7 @@ def eye( usm_type=usm_type, sycl_queue=sycl_queue_normalized, ) + dpnp.synchronize_array_data(array_obj) return dpnp_array(array_obj.shape, buffer=array_obj, order=order) @@ -237,40 +240,10 @@ def full( usm_type=usm_type, sycl_queue=sycl_queue_normalized, ) + dpnp.synchronize_array_data(array_obj) return dpnp_array(array_obj.shape, buffer=array_obj, order=order) -def linspace( - start, - stop, - /, - num, - *, - dtype=None, - device=None, - usm_type="device", - sycl_queue=None, - endpoint=True, -): - """Validate input parameters before passing them into `dpctl.tensor` module""" - dpu.validate_usm_type(usm_type, allow_none=False) - sycl_queue_normalized = dpnp.get_normalized_queue_device( - sycl_queue=sycl_queue, device=device - ) - - """Creates `dpnp_array` with evenly spaced numbers of specified interval.""" - array_obj = dpt.linspace( - start, - stop, - num, - dtype=dtype, - usm_type=usm_type, - sycl_queue=sycl_queue_normalized, - endpoint=endpoint, - ) - return dpnp_array(array_obj.shape, buffer=array_obj) - - def ones( shape, *, @@ -296,18 +269,21 @@ def ones( usm_type=usm_type, sycl_queue=sycl_queue_normalized, ) + dpnp.synchronize_array_data(array_obj) return dpnp_array(array_obj.shape, buffer=array_obj, order=order) def tril(x1, /, *, k=0): """Creates `dpnp_array` as lower triangular part of an input array.""" array_obj = dpt.tril(dpnp.get_usm_ndarray(x1), k=k) + dpnp.synchronize_array_data(array_obj) return dpnp_array(array_obj.shape, buffer=array_obj, order="K") def triu(x1, /, *, k=0): """Creates `dpnp_array` as upper triangular part of an input array.""" array_obj = dpt.triu(dpnp.get_usm_ndarray(x1), k=k) + dpnp.synchronize_array_data(array_obj) return dpnp_array(array_obj.shape, buffer=array_obj, order="K") @@ -336,4 +312,6 @@ def zeros( usm_type=usm_type, sycl_queue=sycl_queue_normalized, ) + # TODO: uncomment once dpctl implements asynchronous call + # dpnp.synchronize_array_data(array_obj) return dpnp_array(array_obj.shape, buffer=array_obj, order=order) diff --git a/dpnp/dpnp_iface.py b/dpnp/dpnp_iface.py index 49e7b41c01c..b3103869e8d 100644 --- a/dpnp/dpnp_iface.py +++ b/dpnp/dpnp_iface.py @@ -42,6 +42,7 @@ import dpctl import dpctl.tensor as dpt +import dpctl.utils as dpu import numpy from dpctl.tensor._device import normalize_queue_device @@ -69,6 +70,7 @@ "get_usm_ndarray_or_scalar", "is_supported_array_or_scalar", "is_supported_array_type", + "synchronize_array_data", ] from dpnp import float64, isscalar @@ -238,10 +240,10 @@ def astype(x1, dtype, order="K", casting="unsafe", copy=True, device=None): x1_obj, dtype, order=order, casting=casting, copy=copy, device=device ) - # return x1 if dpctl returns a zero copy of x1_obj + dpnp.synchronize_array_data(x1) if array_obj is x1_obj and isinstance(x1, dpnp_array): + # return x1 if dpctl returns a zero copy of x1_obj return x1 - return dpnp_array._create_from_usm_ndarray(array_obj) @@ -699,3 +701,16 @@ def is_supported_array_type(a): """ return isinstance(a, (dpnp_array, dpt.usm_ndarray)) + + +def synchronize_array_data(a): + """ + The dpctl interface was reworked to make asynchronous execution. + That function makes a synchronization call to ensure array data is valid + before exit from dpnp interface function. + + """ + + if hasattr(dpu, "SequentialOrderManager"): + check_supported_arrays_type(a) + dpu.SequentialOrderManager[a.sycl_queue].wait() diff --git a/dpnp/dpnp_iface_arraycreation.py b/dpnp/dpnp_iface_arraycreation.py index 5cf63ea0fca..6698f3f782e 100644 --- a/dpnp/dpnp_iface_arraycreation.py +++ b/dpnp/dpnp_iface_arraycreation.py @@ -40,6 +40,7 @@ import operator +import dpctl.tensor as dpt import numpy import dpnp @@ -51,6 +52,10 @@ dpnp_logspace, dpnp_nd_grid, ) +from .dpnp_array import dpnp_array + +# pylint: disable=no-name-in-module +from .dpnp_utils import get_usm_allocations, map_dtype_to_device __all__ = [ "arange", @@ -2183,7 +2188,7 @@ def geomspace( """ - return dpnp_geomspace( + res = dpnp_geomspace( start, stop, num, @@ -2195,6 +2200,9 @@ def geomspace( axis=axis, ) + dpnp.synchronize_array_data(res) + return res + def identity( n, @@ -2402,7 +2410,7 @@ def linspace( """ - return dpnp_linspace( + res = dpnp_linspace( start, stop, num, @@ -2415,6 +2423,12 @@ def linspace( axis=axis, ) + if isinstance(res, tuple): # (result, step) is returning + dpnp.synchronize_array_data(res[0]) + else: + dpnp.synchronize_array_data(res) + return res + def loadtxt( fname, @@ -2629,7 +2643,7 @@ def logspace( """ - return dpnp_logspace( + res = dpnp_logspace( start, stop, num=num, @@ -2642,6 +2656,9 @@ def logspace( axis=axis, ) + dpnp.synchronize_array_data(res) + return res + # pylint: disable=redefined-outer-name def meshgrid(*xi, copy=True, sparse=False, indexing="xy"): @@ -2720,21 +2737,30 @@ def meshgrid(*xi, copy=True, sparse=False, indexing="xy"): "Unrecognized indexing keyword value, expecting 'xy' or 'ij'." ) + if ndim < 1: + return [] + s0 = (1,) * ndim output = [ - dpnp.reshape(x, s0[:i] + (-1,) + s0[i + 1 :]) for i, x in enumerate(xi) + dpt.reshape(dpnp.get_usm_ndarray(x), s0[:i] + (-1,) + s0[i + 1 :]) + for i, x in enumerate(xi) ] + # input arrays must be allocated on the same queue + _, _ = get_usm_allocations(output) + if indexing == "xy" and ndim > 1: - output[0] = output[0].reshape((1, -1) + s0[2:]) - output[1] = output[1].reshape((-1, 1) + s0[2:]) + output[0] = dpt.reshape(output[0], (1, -1) + s0[2:]) + output[1] = dpt.reshape(output[1], (-1, 1) + s0[2:]) if not sparse: - output = dpnp.broadcast_arrays(*output) + output = dpt.broadcast_arrays(*output) if copy: - output = [x.copy() for x in output] + output = [dpt.copy(x) for x in output] + dpnp.synchronize_array_data(output[0]) + output = [dpnp_array._create_from_usm_ndarray(x) for x in output] return output @@ -3261,7 +3287,10 @@ def tri( _dtype = dpnp.default_float_type() if dtype in (dpnp.float, None) else dtype - m = dpnp.ones( + if usm_type is None: + usm_type = "device" + + m = dpt.ones( (N, M), dtype=_dtype, device=device, @@ -3469,28 +3498,34 @@ def vander( [125, 25, 5, 1]]), Device(level_zero:gpu:0), 'host') """ - x = dpnp.asarray(x, device=device, usm_type=usm_type, sycl_queue=sycl_queue) + if dpnp.is_supported_array_type(x): + x = dpnp.get_usm_ndarray(x) + usm_x = dpt.asarray( + x, device=device, usm_type=usm_type, sycl_queue=sycl_queue + ) + + x_sycl_queue = usm_x.sycl_queue + x_usm_type = usm_x.usm_type if N is not None and not isinstance(N, int): raise TypeError(f"An integer is required, but got {type(N)}") - if x.ndim != 1: + if usm_x.ndim != 1: raise ValueError("`x` must be a one-dimensional array or sequence.") if N is None: - N = x.size + N = usm_x.size + + _dtype = numpy.promote_types(usm_x.dtype, int) + _dtype = map_dtype_to_device(_dtype, x_sycl_queue.sycl_device) + m = dpnp.empty_like(usm_x, shape=(usm_x.size, N), dtype=_dtype) - _dtype = int if x.dtype == bool else x.dtype - m = empty( - (x.size, N), - dtype=_dtype, - usm_type=x.usm_type, - sycl_queue=x.sycl_queue, - ) tmp = m[:, ::-1] if not increasing else m dpnp.power( - x.reshape(-1, 1), - dpnp.arange(N, dtype=_dtype, sycl_queue=x.sycl_queue), + dpt.reshape(usm_x, (-1, 1)), + dpt.arange( + N, dtype=_dtype, usm_type=x_usm_type, sycl_queue=x_sycl_queue + ), out=tmp, ) return m diff --git a/dpnp/dpnp_iface_counting.py b/dpnp/dpnp_iface_counting.py index 8a90601ce8f..515cad08a06 100644 --- a/dpnp/dpnp_iface_counting.py +++ b/dpnp/dpnp_iface_counting.py @@ -37,6 +37,8 @@ """ +import dpctl.tensor as dpt + import dpnp __all__ = ["count_nonzero"] @@ -87,5 +89,6 @@ def count_nonzero(a, axis=None, *, keepdims=False): # TODO: might be improved by implementing an extension # with `count_nonzero` kernel - a = dpnp.astype(a, dpnp.bool, copy=False) - return a.sum(axis=axis, dtype=dpnp.intp, keepdims=keepdims) + usm_a = dpnp.get_usm_ndarray(a) + usm_a = dpt.astype(usm_a, dpnp.bool, copy=False) + return dpnp.sum(usm_a, axis=axis, dtype=dpnp.intp, keepdims=keepdims) diff --git a/dpnp/dpnp_iface_histograms.py b/dpnp/dpnp_iface_histograms.py index 1a1b4daf740..24c8b6aaf78 100644 --- a/dpnp/dpnp_iface_histograms.py +++ b/dpnp/dpnp_iface_histograms.py @@ -40,11 +40,17 @@ import operator import warnings +import dpctl.tensor as dpt import dpctl.utils as dpu import numpy import dpnp +from .dpnp_algo.dpnp_arraycreation import ( + dpnp_linspace, +) +from .dpnp_array import dpnp_array + __all__ = [ "digitize", "histogram", @@ -60,7 +66,7 @@ def _ravel_check_a_and_weights(a, weights): """Check input `a` and `weights` arrays, and ravel both.""" # ensure that `a` array has supported type - dpnp.check_supported_arrays_type(a) + a = dpnp.get_usm_ndarray(a) usm_type = a.usm_type # ensure that the array is a "subtractable" dtype @@ -71,11 +77,11 @@ def _ravel_check_a_and_weights(a, weights): RuntimeWarning, stacklevel=3, ) - a = a.astype(numpy.uint8) + a = dpt.astype(a, numpy.uint8) if weights is not None: # check that `weights` array has supported type - dpnp.check_supported_arrays_type(weights) + weights = dpnp.get_usm_ndarray(weights) usm_type = dpu.get_coerced_usm_type([usm_type, weights.usm_type]) # check that arrays have the same allocation queue @@ -86,8 +92,9 @@ def _ravel_check_a_and_weights(a, weights): if weights.shape != a.shape: raise ValueError("weights should have the same shape as a.") - weights = weights.ravel() - a = a.ravel() + weights = dpt.reshape(weights, -1) + + a = dpt.reshape(a, -1) return a, weights, usm_type @@ -113,7 +120,7 @@ def _get_outer_edges(a, range): first_edge, last_edge = 0, 1 else: - first_edge, last_edge = a.min(), a.max() + first_edge, last_edge = dpt.min(a), dpt.max(a) if not (dpnp.isfinite(first_edge) and dpnp.isfinite(last_edge)): raise ValueError( f"autodetected range of [{first_edge}, {last_edge}] " @@ -157,9 +164,9 @@ def _get_bin_edges(a, bins, range, usm_type): "a and bins must be allocated on the same SYCL queue" ) - bin_edges = bins + bin_edges = dpnp.get_usm_ndarray(bins) else: - bin_edges = dpnp.asarray( + bin_edges = dpt.asarray( bins, sycl_queue=sycl_queue, usm_type=usm_type ) @@ -183,7 +190,7 @@ def _get_bin_edges(a, bins, range, usm_type): ) # bin edges must be computed - bin_edges = dpnp.linspace( + bin_edges = dpnp_linspace( first_edge, last_edge, n_equal_bins + 1, @@ -191,7 +198,7 @@ def _get_bin_edges(a, bins, range, usm_type): dtype=bin_type, sycl_queue=sycl_queue, usm_type=usm_type, - ) + ).get_array() return bin_edges, (first_edge, last_edge, n_equal_bins) return bin_edges, None @@ -204,8 +211,11 @@ def _search_sorted_inclusive(a, v): """ - return dpnp.concatenate( - (a.searchsorted(v[:-1], "left"), a.searchsorted(v[-1:], "right")) + return dpt.concat( + ( + dpt.searchsorted(a, v[:-1], side="left"), + dpt.searchsorted(a, v[-1:], side="right"), + ) ) @@ -297,8 +307,14 @@ def digitize(x, bins, right=False): # Use dpnp.searchsorted directly if bins are increasing return dpnp.searchsorted(bins, x, side=side) + usm_x = dpnp.get_usm_ndarray(x) + usm_bins = dpnp.get_usm_ndarray(bins) + # Reverse bins and adjust indices if bins are decreasing - return bins.size - dpnp.searchsorted(bins[::-1], x, side=side) + usm_res = usm_bins.size - dpt.searchsorted(usm_bins[::-1], usm_x, side=side) + + dpnp.synchronize_array_data(usm_res) + return dpnp_array._create_from_usm_ndarray(usm_res) def histogram(a, bins=10, range=None, density=None, weights=None): @@ -412,26 +428,36 @@ def histogram(a, bins=10, range=None, density=None, weights=None): else: # Compute via cumulative histogram if weights is None: - sa = dpnp.sort(a) + sa = dpt.sort(a) cum_n = _search_sorted_inclusive(sa, bin_edges) else: - zero = dpnp.zeros( + zero = dpt.zeros( 1, dtype=ntype, sycl_queue=a.sycl_queue, usm_type=usm_type ) - sorting_index = dpnp.argsort(a) + sorting_index = dpt.argsort(a) sa = a[sorting_index] sw = weights[sorting_index] - cw = dpnp.concatenate((zero, sw.cumsum(dtype=ntype))) + cw = dpt.concat((zero, dpt.cumulative_sum(sw, dtype=ntype))) bin_index = _search_sorted_inclusive(sa, bin_edges) cum_n = cw[bin_index] n = dpnp.diff(cum_n) + # convert bin_edges to dpnp.ndarray + bin_edges = dpnp_array._create_from_usm_ndarray(bin_edges) + if density: # pylint: disable=possibly-used-before-assignment - db = dpnp.diff(bin_edges).astype(dpnp.default_float_type()) - return n / db / n.sum(), bin_edges + db = dpnp.diff(bin_edges) + db = dpt.astype(db.get_array(), dpnp.default_float_type()) + + usm_n = n.get_array() + hist = usm_n / db / dpt.sum(usm_n) + dpnp.synchronize_array_data(hist) + return dpnp_array._create_from_usm_ndarray(hist), bin_edges + + dpnp.synchronize_array_data(n) return n, bin_edges @@ -517,4 +543,6 @@ def histogram_bin_edges(a, bins=10, range=None, weights=None): a, weights, usm_type = _ravel_check_a_and_weights(a, weights) bin_edges, _ = _get_bin_edges(a, bins, range, usm_type) - return bin_edges + + dpnp.synchronize_array_data(bin_edges) + return dpnp_array._create_from_usm_ndarray(bin_edges) diff --git a/dpnp/dpnp_iface_manipulation.py b/dpnp/dpnp_iface_manipulation.py index bf3c66d7fda..a4b7352d4e6 100644 --- a/dpnp/dpnp_iface_manipulation.py +++ b/dpnp/dpnp_iface_manipulation.py @@ -668,12 +668,15 @@ def concatenate( usm_arrays = [dpnp.get_usm_ndarray(x) for x in arrays] usm_res = dpt.concat(usm_arrays, axis=axis) + res = dpnp_array._create_from_usm_ndarray(usm_res) if dtype is not None: res = res.astype(dtype, casting=casting, copy=False) elif out is not None: dpnp.copyto(out, res, casting=casting) return out + + dpnp.synchronize_array_data(res) return res @@ -907,10 +910,11 @@ def expand_dims(a, axis): """ - usm_array = dpnp.get_usm_ndarray(a) - return dpnp_array._create_from_usm_ndarray( - dpt.expand_dims(usm_array, axis=axis) - ) + usm_a = dpnp.get_usm_ndarray(a) + usm_res = dpt.expand_dims(usm_a, axis=axis) + + dpnp.synchronize_array_data(usm_res) + return dpnp_array._create_from_usm_ndarray(usm_res) def flip(m, axis=None): @@ -1298,8 +1302,10 @@ def repeat(a, repeats, axis=None): a = dpnp.ravel(a) usm_arr = dpnp.get_usm_ndarray(a) - usm_arr = dpt.repeat(usm_arr, repeats, axis=axis) - return dpnp_array._create_from_usm_ndarray(usm_arr) + usm_res = dpt.repeat(usm_arr, repeats, axis=axis) + + dpnp.synchronize_array_data(usm_res) + return dpnp_array._create_from_usm_ndarray(usm_res) def reshape(a, /, newshape, order="C", copy=None): @@ -1374,9 +1380,11 @@ def reshape(a, /, newshape, order="C", copy=None): elif order not in "cfCF": raise ValueError(f"order must be one of 'C' or 'F' (got {order})") - usm_arr = dpnp.get_usm_ndarray(a) - usm_arr = dpt.reshape(usm_arr, shape=newshape, order=order, copy=copy) - return dpnp_array._create_from_usm_ndarray(usm_arr) + usm_a = dpnp.get_usm_ndarray(a) + usm_res = dpt.reshape(usm_a, shape=newshape, order=order, copy=copy) + + dpnp.synchronize_array_data(usm_res) + return dpnp_array._create_from_usm_ndarray(usm_res) def result_type(*arrays_and_dtypes): @@ -1483,10 +1491,12 @@ def roll(x, shift, axis=None): """ if axis is None: return roll(x.reshape(-1), shift, 0).reshape(x.shape) - usm_array = dpnp.get_usm_ndarray(x) - return dpnp_array._create_from_usm_ndarray( - dpt.roll(usm_array, shift=shift, axis=axis) - ) + + usm_x = dpnp.get_usm_ndarray(x) + usm_res = dpt.roll(usm_x, shift=shift, axis=axis) + + dpnp.synchronize_array_data(usm_res) + return dpnp_array._create_from_usm_ndarray(usm_res) def rollaxis(x, axis, start=0): @@ -1633,10 +1643,11 @@ def squeeze(a, /, axis=None): """ - usm_array = dpnp.get_usm_ndarray(a) - return dpnp_array._create_from_usm_ndarray( - dpt.squeeze(usm_array, axis=axis) - ) + usm_a = dpnp.get_usm_ndarray(a) + usm_res = dpt.squeeze(usm_a, axis=axis) + + dpnp.synchronize_array_data(usm_res) + return dpnp_array._create_from_usm_ndarray(usm_res) def stack(arrays, /, *, axis=0, out=None, dtype=None, casting="same_kind"): @@ -1714,12 +1725,15 @@ def stack(arrays, /, *, axis=0, out=None, dtype=None, casting="same_kind"): usm_arrays = [dpnp.get_usm_ndarray(x) for x in arrays] usm_res = dpt.stack(usm_arrays, axis=axis) + res = dpnp_array._create_from_usm_ndarray(usm_res) if dtype is not None: res = res.astype(dtype, casting=casting, copy=False) elif out is not None: dpnp.copyto(out, res, casting=casting) return out + + dpnp.synchronize_array_data(res) return res @@ -1772,10 +1786,11 @@ def swapaxes(a, axis1, axis2): """ - usm_array = dpnp.get_usm_ndarray(a) - return dpnp_array._create_from_usm_ndarray( - dpt.swapaxes(usm_array, axis1=axis1, axis2=axis2) - ) + usm_a = dpnp.get_usm_ndarray(a) + usm_res = dpt.swapaxes(usm_a, axis1=axis1, axis2=axis2) + + dpnp.synchronize_array_data(usm_res) + return dpnp_array._create_from_usm_ndarray(usm_res) # pylint: disable=invalid-name @@ -1853,8 +1868,11 @@ def tile(A, reps): """ - usm_array = dpnp.get_usm_ndarray(A) - return dpnp_array._create_from_usm_ndarray(dpt.tile(usm_array, reps)) + usm_a = dpnp.get_usm_ndarray(A) + usm_res = dpt.tile(usm_a, reps) + + dpnp.synchronize_array_data(usm_res) + return dpnp_array._create_from_usm_ndarray(usm_res) def transpose(a, axes=None): diff --git a/tests/test_sycl_queue.py b/tests/test_sycl_queue.py index 378ecaf9b19..f7c70320dbf 100644 --- a/tests/test_sycl_queue.py +++ b/tests/test_sycl_queue.py @@ -373,18 +373,13 @@ def test_array_creation_load_txt(device): @pytest.mark.parametrize( - "device_x", - valid_devices, - ids=[device.filter_string for device in valid_devices], -) -@pytest.mark.parametrize( - "device_y", + "device", valid_devices, ids=[device.filter_string for device in valid_devices], ) -def test_meshgrid(device_x, device_y): - x = dpnp.arange(100, device=device_x) - y = dpnp.arange(100, device=device_y) +def test_meshgrid(device): + x = dpnp.arange(100, device=device) + y = dpnp.arange(100, device=device) z = dpnp.meshgrid(x, y) assert_sycl_queue_equal(z[0].sycl_queue, x.sycl_queue) assert_sycl_queue_equal(z[1].sycl_queue, y.sycl_queue)