Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implementing PUT routine #582

Merged
merged 40 commits into from
Oct 19, 2022
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
d4b6d69
towards implementing put
ipdemes Sep 9, 2022
4dd090a
fixing errors and improving test
ipdemes Sep 12, 2022
fc6ebe7
updating documentation
ipdemes Sep 12, 2022
1d87fd7
code clean-up
ipdemes Sep 12, 2022
15db53e
adding missig pragams for openmp
ipdemes Sep 13, 2022
d01e17c
fixing compile-time errors
ipdemes Sep 13, 2022
48d1920
fixing mypy errors
ipdemes Sep 13, 2022
458f1a2
fixing issue whith converting futures for put + modifying tests
ipdemes Sep 13, 2022
e67d0f3
adding check for repeated entires in indices array
ipdemes Sep 13, 2022
5b8a301
fixing mypy errors
ipdemes Sep 13, 2022
dd5f0a3
Update error message for wrong clip mode
ipdemes Sep 19, 2022
ca965ed
update warning message
ipdemes Sep 19, 2022
6e05573
adding _warn_and_convert function
ipdemes Sep 19, 2022
a821bf6
Merge branch 'put' of github.com:ipdemes/cunumeric into put
ipdemes Sep 19, 2022
9ead87f
fixed formatting error
ipdemes Sep 19, 2022
24cb096
fixing mypy errors
ipdemes Sep 19, 2022
ba0ec13
Merge remote-tracking branch 'origin/branch-22.10' into put
ipdemes Sep 27, 2022
41e8406
addressing PR comments
ipdemes Sep 27, 2022
4814b4c
Avoid emitting new warnings
manopapad Sep 27, 2022
5f64cad
fixing logic for PUT in the case of transformed arrays
ipdemes Sep 27, 2022
d5c4414
_warn_and_convert checks the target type already
manopapad Sep 28, 2022
9a8a3ca
addressing PR comments
ipdemes Sep 28, 2022
c91042a
Typo
manopapad Sep 28, 2022
5d6c9fa
adding check for out-of-the-bounds indices
ipdemes Sep 28, 2022
4ffae77
fixing the case when scalar walue needs to be wrapped
ipdemes Sep 28, 2022
f8af1de
adding bounds check to the cuda kernel
ipdemes Oct 6, 2022
f5b92d3
changing name of a bool variable in _convert_future_to_regionfield me…
ipdemes Oct 6, 2022
c893ee5
fixing the cases for scalar lhs in put operation
ipdemes Oct 7, 2022
9edae62
fixing out of the bounds check for ZIP cuda kernel
ipdemes Oct 7, 2022
7f59b15
fixing logic for negative indices
ipdemes Oct 7, 2022
6a9545f
Merge remote-tracking branch 'origin/branch-22.12' into put
ipdemes Oct 10, 2022
ed60e72
Merge branch 'branch-22.12' into put
manopapad Oct 13, 2022
67aa0ad
Update a leftover use of auto_convert
manopapad Oct 13, 2022
88cca6d
addressing PR comments
ipdemes Oct 14, 2022
92cf417
fixing logic for the bounds check
ipdemes Oct 17, 2022
e489d56
addressing PR comments
ipdemes Oct 18, 2022
d363758
Merge remote-tracking branch 'origin/branch-22.12' into put
ipdemes Oct 18, 2022
58cd174
addressing PR comments
ipdemes Oct 18, 2022
34fc00b
Merge remote-tracking branch 'origin/branch-22.12' into put
ipdemes Oct 19, 2022
be9f556
Merge branch 'branch-22.12' into put
ipdemes Oct 19, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
75 changes: 63 additions & 12 deletions cunumeric/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -920,12 +920,8 @@ def _convert_key(self, key: Any, first: bool = True) -> Any:
key = convert_to_cunumeric_ndarray(key)
if key.dtype != bool and not np.issubdtype(key.dtype, np.integer):
raise TypeError("index arrays should be int or bool type")
if key.dtype != bool and key.dtype != np.int64:
runtime.warn(
"converting index array to int64 type",
category=RuntimeWarning,
)
key = key.astype(np.int64)
if key.dtype != bool:
key = key._warn_and_convert(np.int64)

return key._thunk

Expand Down Expand Up @@ -2093,12 +2089,8 @@ def compress(
raise ValueError(
"Dimension mismatch: condition must be a 1D array"
)
if condition.dtype != bool:
runtime.warn(
"converting condition to bool type",
category=RuntimeWarning,
)
condition = condition.astype(bool)

condition = condition._warn_and_convert(bool)

if axis is None:
axis = 0
Expand Down Expand Up @@ -2465,6 +2457,55 @@ def diagonal(
raise ValueError("Either axis1/axis2 or axes must be supplied")
return self._diag_helper(offset=offset, axes=axes, extract=extract)

@add_boilerplate("indices", "values")
def put(
self, indices: ndarray, values: ndarray, mode: str = "raise"
) -> None:
"""
Set storage-indexed locations to corresponding values.

See Also
--------
numpy.put
manopapad marked this conversation as resolved.
Show resolved Hide resolved

Availability
--------
Multiple GPUs, Multiple CPUs

"""

if values.size == 0 or indices.size == 0:
return

if mode not in ("raise", "wrap", "clip"):
raise ValueError(f"clipmode must be one of 'clip', 'raise', or 'wrap' (got {mode})")

")
if mode == "wrap":
indices = indices % self.size
elif mode == "clip":
indices = indices.clip(0, self.size - 1)
manopapad marked this conversation as resolved.
Show resolved Hide resolved

indices = indices._warn_and_convert(np.int64)

if indices.ndim > 1:
indices = indices.ravel()

# in case if there are repeated entries in the indices array
# Legate doesn't guarantee the order in which values
# are updated
if indices.size > indices.unique().size:
runtime.warn(
"size of indices is larger than source array which"
" might yield results different from NumPy",
category=RuntimeWarning,
)
manopapad marked this conversation as resolved.
Show resolved Hide resolved
# call _wrap on the values if they need to be wrapped
if values.ndim != indices.ndim or values.size != indices.size:
values = values._wrap(indices.size)

self._thunk.put(indices._thunk, values._thunk)

@add_boilerplate()
def trace(
self,
Expand Down Expand Up @@ -3811,6 +3852,16 @@ def _maybe_convert(self, dtype: np.dtype[Any], hints: Any) -> ndarray:
copy._thunk.convert(self._thunk)
return copy

def _warn_and_convert(self, dtype: np.dtype[Any]) -> ndarray:
manopapad marked this conversation as resolved.
Show resolved Hide resolved
if self.dtype != dtype:
runtime.warn(
f"converting array to {dtype} type",
category=RuntimeWarning,
)
return self.astype(dtype)
else:
return self

# For performing normal/broadcast unary operations
@classmethod
def _perform_unary_op(
Expand Down
59 changes: 57 additions & 2 deletions cunumeric/deferred.py
Original file line number Diff line number Diff line change
Expand Up @@ -781,10 +781,16 @@ def _broadcast(self, shape: NdShape) -> Any:

return result

def _convert_future_to_regionfield(self) -> DeferredArray:
def _convert_future_to_regionfield(
self, future: bool = False
) -> DeferredArray:
if future:
shape: NdShape = (1,)
else:
shape = self.shape
manopapad marked this conversation as resolved.
Show resolved Hide resolved
store = self.context.create_store(
self.dtype,
shape=self.shape,
shape=shape,
optimize_scalar=False,
)
thunk_copy = DeferredArray(
Expand Down Expand Up @@ -1657,6 +1663,54 @@ def _diag_helper(

task.execute()

@auto_convert([1, 2])
def put(self, indices: Any, values: Any) -> None:
if indices.base.kind == Future or indices.base.transformed:
indices = indices._convert_future_to_regionfield(
indices.base.kind == Future
)
manopapad marked this conversation as resolved.
Show resolved Hide resolved
if values.base.kind == Future or values.base.transformed:
values = values._convert_future_to_regionfield(
values.base.kind == Future
)
if self.base.kind == Future or self.base.transformed:
manopapad marked this conversation as resolved.
Show resolved Hide resolved
self = self._convert_future_to_regionfield(
self.base.kind == Future
)

assert indices.size == values.size

# first, we create indirect array with PointN type that
# (indices.size,) shape and is used to copy data from values
# to the target ND array (self)
N = self.ndim
pointN_dtype = self.runtime.get_point_type(N)
indirect = cast(
DeferredArray,
self.runtime.create_empty_thunk(
shape=indices.shape,
dtype=pointN_dtype,
inputs=[indices],
),
)

task = self.context.create_task(CuNumericOpCode.WRAP)
task.add_output(indirect.base)
task.add_scalar_arg(self.shape, (ty.int64,))
task.add_scalar_arg(True, bool) # has_input
task.add_input(indices.base)
task.add_alignment(indices.base, indirect.base)
task.execute()
if indirect.base.kind == Future or indirect.base.transformed:
indirect = indirect._convert_future_to_regionfield()

copy = self.context.create_copy()
copy.set_target_indirect_out_of_range(False)
copy.add_input(values.base)
copy.add_target_indirect(indirect.base)
copy.add_output(self.base)
copy.execute()

# Create an identity array with the ones offset from the diagonal by k
def eye(self, k: int) -> None:
assert self.ndim == 2 # Only 2-D arrays should be here
Expand Down Expand Up @@ -3341,6 +3395,7 @@ def _wrap(self, src: Any, new_len: int) -> None:
task = self.context.create_task(CuNumericOpCode.WRAP)
task.add_output(indirect.base)
task.add_scalar_arg(src.shape, (ty.int64,))
task.add_scalar_arg(False, bool) # has_input
task.execute()

copy = self.context.create_copy()
Expand Down
6 changes: 6 additions & 0 deletions cunumeric/eager.py
Original file line number Diff line number Diff line change
Expand Up @@ -620,6 +620,12 @@ def _diag_helper(
axes = tuple(range(ndims - naxes, ndims))
self.array = diagonal_reference(rhs.array, axes)

def put(self, indices: Any, values: Any) -> None:
if self.deferred is not None:
manopapad marked this conversation as resolved.
Show resolved Hide resolved
self.deferred.put(indices, values)
else:
np.put(self.array, indices.array, values.array)

def eye(self, k: int) -> None:
if self.deferred is not None:
self.deferred.eye(k)
Expand Down
32 changes: 32 additions & 0 deletions cunumeric/module.py
Original file line number Diff line number Diff line change
Expand Up @@ -3446,6 +3446,38 @@ def diagonal(
)


@add_boilerplate("a", "indices", "values")
def put(
a: ndarray, indices: ndarray, values: ndarray, mode: str = "raise"
) -> None:
"""
Set storage-indexed locations to corresponding values.
manopapad marked this conversation as resolved.
Show resolved Hide resolved

Parameters
----------
a : array_like
Array to put data into
indices : array_like
Target indices, interpreted as integers.
values : array_like
Values to place in `a` at target indices.
manopapad marked this conversation as resolved.
Show resolved Hide resolved
mode : {'raise', 'wrap', 'clip'}, optional
Specifies how out-of-bounds indices will behave.
'raise' : raise an error.
'wrap' : wrap around.
'clip' : clip to the range.

See Also
--------
numpy.put

Availability
--------
Multiple GPUs, Multiple CPUs
"""
a.put(indices=indices, values=values, mode=mode)


@add_boilerplate("a", "val")
def fill_diagonal(a: ndarray, val: ndarray, wrap: bool = False) -> None:
"""
Expand Down
4 changes: 4 additions & 0 deletions cunumeric/thunk.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,10 @@ def _diag_helper(
) -> None:
...

@abstractmethod
def put(self, indices: Any, values: Any) -> None:
...

@abstractmethod
def eye(self, k: int) -> None:
...
Expand Down
1 change: 1 addition & 0 deletions docs/cunumeric/source/api/indexing.rst
Original file line number Diff line number Diff line change
Expand Up @@ -43,5 +43,6 @@ Inserting data into arrays
:toctree: generated/

fill_diagonal
put
put_along_axis
place
14 changes: 7 additions & 7 deletions src/cunumeric/index/wrap.cc
Original file line number Diff line number Diff line change
Expand Up @@ -24,28 +24,28 @@ using namespace legate;

template <int DIM>
struct WrapImplBody<VariantKind::CPU, DIM> {
template <typename IND>
void operator()(const AccessorWO<Point<DIM>, 1>& out,
const Pitches<0>& pitches_out,
const Rect<1>& out_rect,
const Pitches<DIM - 1>& pitches_in,
const Rect<DIM>& in_rect,
const bool dense) const
const bool dense,
IND& indices) const
{
const int64_t start = out_rect.lo[0];
const int64_t end = out_rect.hi[0];
const auto in_volume = in_rect.volume();
if (dense) {
int64_t out_idx = 0;
auto outptr = out.ptr(out_rect);
auto outptr = out.ptr(out_rect);
for (int64_t i = start; i <= end; i++) {
const int64_t input_idx = i % in_volume;
const int64_t input_idx = compute_idx(i, in_volume, indices);
auto point = pitches_in.unflatten(input_idx, in_rect.lo);
outptr[out_idx] = point;
out_idx++;
outptr[i - start] = point;
}
} else {
for (int64_t i = start; i <= end; i++) {
const int64_t input_idx = i % in_volume;
const int64_t input_idx = compute_idx(i, in_volume, indices); // i % in_volume;
auto point = pitches_in.unflatten(input_idx, in_rect.lo);
out[i] = point;
}
Expand Down
26 changes: 15 additions & 11 deletions src/cunumeric/index/wrap.cu
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ namespace cunumeric {
using namespace Legion;
using namespace legate;

template <int DIM>
template <int DIM, typename IND>
__global__ static void __launch_bounds__(THREADS_PER_BLOCK, MIN_CTAS_PER_SM)
wrap_kernel(const AccessorWO<Point<DIM>, 1> out,
const int64_t start,
Expand All @@ -32,40 +32,44 @@ __global__ static void __launch_bounds__(THREADS_PER_BLOCK, MIN_CTAS_PER_SM)
const Point<1> out_lo,
const Pitches<DIM - 1> pitches_in,
const Point<DIM> in_lo,
const size_t in_volume)
const size_t in_volume,
IND indices)
manopapad marked this conversation as resolved.
Show resolved Hide resolved
{
const auto idx = global_tid_1d();
if (idx >= volume) return;
const int64_t input_idx = (idx + start) % in_volume;
const int64_t input_idx = compute_idx((idx + start), in_volume, indices);
auto out_p = pitches_out.unflatten(idx, out_lo);
auto p = pitches_in.unflatten(input_idx, in_lo);
out[out_p] = p;
}

template <int DIM>
template <int DIM, typename IND>
__global__ static void __launch_bounds__(THREADS_PER_BLOCK, MIN_CTAS_PER_SM)
wrap_kernel_dense(Point<DIM>* out,
const int64_t start,
const int64_t volume,
const Pitches<DIM - 1> pitches_in,
const Point<DIM> in_lo,
const size_t in_volume)
const size_t in_volume,
IND indices)
{
const auto idx = global_tid_1d();
if (idx >= volume) return;
const int64_t input_idx = (idx + start) % in_volume;
const int64_t input_idx = compute_idx((idx + start), in_volume, indices);
auto p = pitches_in.unflatten(input_idx, in_lo);
out[idx] = p;
}

template <int DIM>
struct WrapImplBody<VariantKind::GPU, DIM> {
template <typename IND>
void operator()(const AccessorWO<Point<DIM>, 1>& out,
const Pitches<0>& pitches_out,
const Rect<1>& out_rect,
const Pitches<DIM - 1>& pitches_in,
const Rect<DIM>& in_rect,
const bool dense) const
const bool dense,
IND& indices) const
{
auto stream = get_cached_stream();
const int64_t start = out_rect.lo[0];
Expand All @@ -74,11 +78,11 @@ struct WrapImplBody<VariantKind::GPU, DIM> {
const size_t blocks = (volume + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK;
if (dense) {
auto outptr = out.ptr(out_rect);
wrap_kernel_dense<DIM><<<blocks, THREADS_PER_BLOCK, 0, stream>>>(
outptr, start, volume, pitches_in, in_rect.lo, in_volume);
wrap_kernel_dense<DIM, IND><<<blocks, THREADS_PER_BLOCK, 0, stream>>>(
outptr, start, volume, pitches_in, in_rect.lo, in_volume, indices);
} else {
wrap_kernel<DIM><<<blocks, THREADS_PER_BLOCK, 0, stream>>>(
out, start, volume, pitches_out, out_rect.lo, pitches_in, in_rect.lo, in_volume);
wrap_kernel<DIM, IND><<<blocks, THREADS_PER_BLOCK, 0, stream>>>(
out, start, volume, pitches_out, out_rect.lo, pitches_in, in_rect.lo, in_volume, indices);
}
CHECK_CUDA_STREAM(stream);
}
Expand Down
10 changes: 10 additions & 0 deletions src/cunumeric/index/wrap.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ struct WrapArgs {
// copy information from original array to the
// `wrapped` one
const Legion::DomainPoint shape; // shape of the original array
const bool has_input;
const Array& in;
};

class WrapTask : public CuNumericTask<WrapTask> {
Expand All @@ -41,4 +43,12 @@ class WrapTask : public CuNumericTask<WrapTask> {
#endif
};

__CUDA_HD__ static int64_t compute_idx(int64_t i, int64_t volume, bool&) { return i % volume; }

__CUDA_HD__ static int64_t compute_idx(int64_t i,
int64_t,
const legate::AccessorRO<int64_t, 1>& indices)
{
return indices[i];
}
} // namespace cunumeric
Loading