Skip to content

Commit

Permalink
Merge pull request #199 from mfoerste4/sort_pr
Browse files Browse the repository at this point in the history
Sort pr
  • Loading branch information
mfoerste4 authored Mar 22, 2022
2 parents 3b62ade + 21d47b7 commit d97b567
Show file tree
Hide file tree
Showing 16 changed files with 1,896 additions and 0 deletions.
8 changes: 8 additions & 0 deletions cunumeric/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -2682,6 +2682,14 @@ def setflags(self, write=None, align=None, uic=None):
"""
self.__array__().setflags(write=write, align=align, uic=uic)

def sort(self, axis=-1, kind="quicksort", order=None):
self._thunk.sort(rhs=self._thunk, axis=axis, kind=kind, order=order)

def argsort(self, axis=-1, kind="quicksort", order=None):
self._thunk.sort(
rhs=self._thunk, argsort=True, axis=axis, kind=kind, order=order
)

def squeeze(self, axis=None):
"""a.squeeze(axis=None)
Expand Down
1 change: 1 addition & 0 deletions cunumeric/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ class CuNumericOpCode(IntEnum):
READ = _cunumeric.CUNUMERIC_READ
REPEAT = _cunumeric.CUNUMERIC_REPEAT
SCALAR_UNARY_RED = _cunumeric.CUNUMERIC_SCALAR_UNARY_RED
SORT = _cunumeric.CUNUMERIC_SORT
SYRK = _cunumeric.CUNUMERIC_SYRK
TILE = _cunumeric.CUNUMERIC_TILE
TRANSPOSE_COPY_2D = _cunumeric.CUNUMERIC_TRANSPOSE_COPY_2D
Expand Down
19 changes: 19 additions & 0 deletions cunumeric/deferred.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
UnaryRedCode,
)
from .linalg.cholesky import cholesky
from .sort import sort
from .thunk import NumPyThunk
from .utils import get_arg_value_dtype

Expand Down Expand Up @@ -1589,3 +1590,21 @@ def unique(self):
)

return result

@auto_convert([1])
def sort(self, rhs, argsort=False, axis=-1, kind="quicksort", order=None):

if kind == "stable":
stable = True
else:
stable = False

if order is not None:
raise NotImplementedError(
"cuNumeric does not support sorting with 'order' as "
"ndarray only supports numeric values"
)
if axis is not None and (axis >= rhs.ndim or axis < -rhs.ndim):
raise ValueError("invalid axis")

sort(self, rhs, argsort, axis, stable)
10 changes: 10 additions & 0 deletions cunumeric/eager.py
Original file line number Diff line number Diff line change
Expand Up @@ -518,6 +518,16 @@ def nonzero(self):
result += (EagerArray(self.runtime, array),)
return result

def sort(self, rhs, argsort=False, axis=-1, kind="quicksort", order=None):
self.check_eager_args(rhs, axis, kind, order)
if self.deferred is not None:
self.deferred.sort(rhs, argsort, axis, kind, order)
else:
if argsort:
self.array = np.argsort(rhs.array, axis, kind, order)
else:
self.array = np.sort(rhs.array, axis, kind, order)

def random_uniform(self):
if self.deferred is not None:
self.deferred.random_uniform()
Expand Down
162 changes: 162 additions & 0 deletions cunumeric/module.py
Original file line number Diff line number Diff line change
Expand Up @@ -5992,6 +5992,168 @@ def unique(
# Sorting, searching, and counting
##################################

# Sorting


@add_boilerplate("a")
def argsort(a, axis=-1, kind="quicksort", order=None):
"""
Returns the indices that would sort an array.
Parameters
----------
a : array_like
Input array.
axis : int or None, optional
Axis to sort. By default, the index -1 (the last axis) is used. If
None, the flattened array is used.
kind : {'quicksort', 'mergesort', 'heapsort', 'stable'}, optional
Default is 'quicksort'. The underlying sort algorithm might vary.
The code basically supports 'stable' or *not* 'stable'.
order : str or list of str, optional
Currently not supported
Returns
-------
index_array : ndarray of ints
Array of indices that sort a along the specified axis. It has the
same shape as `a.shape` or is flattened in case of `axis` is None.
Notes
-----
The current implementation has only limited support for distributed data.
Distributed 1-D or flattened data will be broadcasted.
See Also
--------
numpy.argsort
Availability
--------
Multiple GPUs, Single CPU
"""

result = ndarray(a.shape, np.int64)
result._thunk.sort(
rhs=a._thunk, argsort=True, axis=axis, kind=kind, order=order
)
return result


def msort(a):
"""
Returns a sorted copy of an array sorted along the first axis.
Parameters
----------
a : array_like
Input array.
Returns
-------
out : ndarray
Sorted array with same dtype and shape as `a`.
Notes
-----
The current implementation has only limited support for distributed data.
Distributed 1-D data will be broadcasted.
See Also
--------
numpy.msort
Availability
--------
Multiple GPUs, Single CPU
"""
return sort(a, axis=0)


@add_boilerplate("a")
def sort(a, axis=-1, kind="quicksort", order=None):
"""
Returns a sorted copy of an array.
Parameters
----------
a : array_like
Input array.
axis : int or None, optional
Axis to sort. By default, the index -1 (the last axis) is used. If
None, the flattened array is used.
kind : {'quicksort', 'mergesort', 'heapsort', 'stable'}, optional
Default is 'quicksort'. The underlying sort algorithm might vary.
The code basically supports 'stable' or *not* 'stable'.
order : str or list of str, optional
Currently not supported
Returns
-------
out : ndarray
Sorted array with same dtype and shape as `a`. In case `axis` is
None the result is flattened.
Notes
-----
The current implementation has only limited support for distributed data.
Distributed 1-D or flattened data will be broadcasted.
See Also
--------
numpy.sort
Availability
--------
Multiple GPUs, Single CPU
"""
result = ndarray(a.shape, a.dtype)
result._thunk.sort(rhs=a._thunk, axis=axis, kind=kind, order=order)
return result


@add_boilerplate("a")
def sort_complex(a):
"""
Returns a sorted copy of an array sorted along the last axis. Sorts the
real part first, the imaginary part second.
Parameters
----------
a : array_like
Input array.
Returns
-------
out : ndarray, complex
Sorted array with same shape as `a`.
Notes
-----
The current implementation has only limited support for distributed data.
Distributed 1-D data will be broadcasted.
See Also
--------
numpy.sort_complex
Availability
--------
Multiple GPUs, Single CPU
"""

result = sort(a)
# force complex result upon return
if np.issubdtype(result.dtype, np.complexfloating):
return result
else:
return result.astype(np.complex64, copy=True)


# Searching


Expand Down
105 changes: 105 additions & 0 deletions cunumeric/sort.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
# Copyright 2022 NVIDIA Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#


from cunumeric.config import CuNumericOpCode

from legate.core import types as ty


def sort_flattened(output, input, argsort, stable):
flattened = input.reshape((input.size,), order="C")

# run sort flattened -- return 1D solution
sort_result = output.runtime.create_empty_thunk(
flattened.shape, dtype=output.dtype, inputs=(flattened,)
)
sort(sort_result, flattened, argsort, stable=stable)
output.base = sort_result.base
output.numpy_array = None


def sort_swapped(output, input, argsort, sort_axis, stable):
assert sort_axis < input.ndim - 1 and sort_axis >= 0

# swap axes
swapped = input.swapaxes(sort_axis, input.ndim - 1)

swapped_copy = output.runtime.create_empty_thunk(
swapped.shape, dtype=input.dtype, inputs=(input, swapped)
)
swapped_copy.copy(swapped, deep=True)

# run sort on last axis
if argsort is True:
sort_result = output.runtime.create_empty_thunk(
swapped_copy.shape, dtype=output.dtype, inputs=(swapped_copy,)
)
sort(sort_result, swapped_copy, argsort, stable=stable)
output.base = sort_result.swapaxes(input.ndim - 1, sort_axis).base
output.numpy_array = None
else:
sort(swapped_copy, swapped_copy, argsort, stable=stable)
output.base = swapped_copy.swapaxes(input.ndim - 1, sort_axis).base
output.numpy_array = None


def sort_task(output, input, argsort, stable):
task = output.context.create_task(CuNumericOpCode.SORT)

needs_unbound_output = output.runtime.num_gpus > 1 and input.ndim == 1

if needs_unbound_output:
unbound = output.runtime.create_unbound_thunk(dtype=output.dtype)
task.add_output(unbound.base)
else:
task.add_output(output.base)
task.add_alignment(output.base, input.base)

task.add_input(input.base)

if output.ndim > 1:
task.add_broadcast(input.base, input.ndim - 1)
elif output.runtime.num_gpus > 1:
task.add_nccl_communicator()
elif output.runtime.num_gpus == 0 and output.runtime.num_procs > 1:
# Distributed 1D sort on CPU not supported yet
task.add_broadcast(input.base)

task.add_scalar_arg(argsort, bool) # return indices flag
task.add_scalar_arg(input.base.shape, (ty.int32,))
task.add_scalar_arg(stable, bool)
task.execute()

if needs_unbound_output:
output.base = unbound.base
output.numpy_array = None


def sort(output, input, argsort, axis=-1, stable=False):
if axis is None and input.ndim > 1:
sort_flattened(output, input, argsort, stable)
else:
if axis is None:
axis = 0
elif axis < 0:
axis = input.ndim + axis

if axis is not input.ndim - 1:
sort_swapped(output, input, argsort, axis, stable)

else:
# run actual sort task
sort_task(output, input, argsort, stable)
Loading

0 comments on commit d97b567

Please sign in to comment.