Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merge master into gold/2021 #1335

Merged
merged 3 commits into from
Mar 7, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions dpnp/backend/include/dpnp_gen_2arg_1type_tbl.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
//*****************************************************************************
// Copyright (c) 2016-2020, Intel Corporation
// Copyright (c) 2016-2023, Intel Corporation
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
Expand Down Expand Up @@ -104,7 +104,7 @@

#endif

MACRO_2ARG_1TYPE_OP(dpnp_bitwise_and_c, input1_elem& input2_elem)
MACRO_2ARG_1TYPE_OP(dpnp_bitwise_and_c, input1_elem & input2_elem)
MACRO_2ARG_1TYPE_OP(dpnp_bitwise_or_c, input1_elem | input2_elem)
MACRO_2ARG_1TYPE_OP(dpnp_bitwise_xor_c, input1_elem ^ input2_elem)
MACRO_2ARG_1TYPE_OP(dpnp_left_shift_c, input1_elem << input2_elem)
Expand Down
110 changes: 98 additions & 12 deletions dpnp/backend/kernels/dpnp_krnl_bitwise.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@

#include "dpnp_fptr.hpp"
#include "dpnp_iface.hpp"
#include "dpnp_iterator.hpp"
#include "dpnp_utils.hpp"
#include "dpnpc_memory_adapter.hpp"
#include "queue_sycl.hpp"
Expand All @@ -49,27 +50,66 @@ DPCTLSyclEventRef dpnp_invert_c(DPCTLSyclQueueRef q_ref,
sycl::queue q = *(reinterpret_cast<sycl::queue*>(q_ref));
sycl::event event;

DPNPC_ptr_adapter<_DataType> input1_ptr(q_ref, array1_in, size);
_DataType* array1 = input1_ptr.get_ptr();
_DataType* result = reinterpret_cast<_DataType*>(result1);
_DataType* input_data = static_cast<_DataType*>(array1_in);
_DataType* result = static_cast<_DataType*>(result1);

sycl::range<1> gws(size);
auto kernel_parallel_for_func = [=](sycl::id<1> global_id) {
size_t i = global_id[0]; /*for (size_t i = 0; i < size; ++i)*/
constexpr size_t lws = 64;
constexpr unsigned int vec_sz = 8;

auto gws_range = sycl::range<1>(((size + lws * vec_sz - 1) / (lws * vec_sz)) * lws);
auto lws_range = sycl::range<1>(lws);

auto kernel_parallel_for_func = [=](sycl::nd_item<1> nd_it) {
auto sg = nd_it.get_sub_group();
const auto max_sg_size = sg.get_max_local_range()[0];
const size_t start =
vec_sz * (nd_it.get_group(0) * nd_it.get_local_range(0) + sg.get_group_id()[0] * max_sg_size);

if (start + static_cast<size_t>(vec_sz) * max_sg_size < size)
{
_DataType input_elem1 = array1[i];
result[i] = ~input_elem1;
using multi_ptrT = sycl::multi_ptr<_DataType, sycl::access::address_space::global_space>;

sycl::vec<_DataType, vec_sz> x = sg.load<vec_sz>(multi_ptrT(&input_data[start]));
sycl::vec<_DataType, vec_sz> res_vec;

if constexpr (std::is_same_v<_DataType, bool>)
{
#pragma unroll
for (size_t k = 0; k < vec_sz; ++k)
{
res_vec[k] = !(x[k]);
}
}
else
{
res_vec = ~x;
}

sg.store<vec_sz>(multi_ptrT(&result[start]), res_vec);
}
else
{
for (size_t k = start + sg.get_local_id()[0]; k < size; k += max_sg_size)
{
if constexpr (std::is_same_v<_DataType, bool>)
{
result[k] = !(input_data[k]);
}
else
{
result[k] = ~(input_data[k]);
}
}
}
};

auto kernel_func = [&](sycl::handler& cgh) {
cgh.parallel_for<class dpnp_invert_c_kernel<_DataType>>(gws, kernel_parallel_for_func);
cgh.parallel_for<class dpnp_invert_c_kernel<_DataType>>(sycl::nd_range<1>(gws_range, lws_range),
kernel_parallel_for_func);
};

event = q.submit(kernel_func);

event_ref = reinterpret_cast<DPCTLSyclEventRef>(&event);

return DPCTLEvent_Copy(event_ref);
}

Expand All @@ -84,6 +124,7 @@ void dpnp_invert_c(void* array1_in, void* result1, size_t size)
size,
dep_event_vec_ref);
DPCTLEvent_WaitAndThrow(event_ref);
DPCTLEvent_Delete(event_ref);
}

template <typename _DataType>
Expand All @@ -98,9 +139,11 @@ DPCTLSyclEventRef (*dpnp_invert_ext_c)(DPCTLSyclQueueRef,

static void func_map_init_bitwise_1arg_1type(func_map_t& fmap)
{
fmap[DPNPFuncName::DPNP_FN_INVERT][eft_BLN][eft_BLN] = {eft_BLN, (void*)dpnp_invert_default_c<bool>};
fmap[DPNPFuncName::DPNP_FN_INVERT][eft_INT][eft_INT] = {eft_INT, (void*)dpnp_invert_default_c<int32_t>};
fmap[DPNPFuncName::DPNP_FN_INVERT][eft_LNG][eft_LNG] = {eft_LNG, (void*)dpnp_invert_default_c<int64_t>};

fmap[DPNPFuncName::DPNP_FN_INVERT_EXT][eft_BLN][eft_BLN] = {eft_BLN, (void*)dpnp_invert_ext_c<bool>};
fmap[DPNPFuncName::DPNP_FN_INVERT_EXT][eft_INT][eft_INT] = {eft_INT, (void*)dpnp_invert_ext_c<int32_t>};
fmap[DPNPFuncName::DPNP_FN_INVERT_EXT][eft_LNG][eft_LNG] = {eft_LNG, (void*)dpnp_invert_ext_c<int64_t>};

Expand All @@ -114,6 +157,9 @@ static void func_map_init_bitwise_1arg_1type(func_map_t& fmap)
template <typename _KernelNameSpecialization> \
class __name__##_strides_kernel; \
\
template <typename _KernelNameSpecialization> \
class __name__##_broadcast_kernel; \
\
template <typename _DataType> \
DPCTLSyclEventRef __name__(DPCTLSyclQueueRef q_ref, \
void* result_out, \
Expand Down Expand Up @@ -152,6 +198,8 @@ static void func_map_init_bitwise_1arg_1type(func_map_t& fmap)
_DataType* input2_data = static_cast<_DataType*>(const_cast<void*>(input2_in)); \
_DataType* result = static_cast<_DataType*>(result_out); \
\
bool use_broadcasting = !array_equal(input1_shape, input1_ndim, input2_shape, input2_ndim); \
\
shape_elem_type* input1_shape_offsets = new shape_elem_type[input1_ndim]; \
\
get_shape_offsets_inkernel(input1_shape, input1_ndim, input1_shape_offsets); \
Expand All @@ -167,7 +215,42 @@ static void func_map_init_bitwise_1arg_1type(func_map_t& fmap)
sycl::event event; \
sycl::range<1> gws(result_size); \
\
if (use_strides) \
if (use_broadcasting) \
{ \
DPNPC_id<_DataType>* input1_it; \
const size_t input1_it_size_in_bytes = sizeof(DPNPC_id<_DataType>); \
input1_it = reinterpret_cast<DPNPC_id<_DataType>*>(dpnp_memory_alloc_c(q_ref, input1_it_size_in_bytes)); \
new (input1_it) DPNPC_id<_DataType>(q_ref, input1_data, input1_shape, input1_strides, input1_ndim); \
\
input1_it->broadcast_to_shape(result_shape, result_ndim); \
\
DPNPC_id<_DataType>* input2_it; \
const size_t input2_it_size_in_bytes = sizeof(DPNPC_id<_DataType>); \
input2_it = reinterpret_cast<DPNPC_id<_DataType>*>(dpnp_memory_alloc_c(q_ref, input2_it_size_in_bytes)); \
new (input2_it) DPNPC_id<_DataType>(q_ref, input2_data, input2_shape, input2_strides, input2_ndim); \
\
input2_it->broadcast_to_shape(result_shape, result_ndim); \
\
auto kernel_parallel_for_func = [=](sycl::id<1> global_id) { \
const size_t i = global_id[0]; /* for (size_t i = 0; i < result_size; ++i) */ \
{ \
const _DataType input1_elem = (*input1_it)[i]; \
const _DataType input2_elem = (*input2_it)[i]; \
result[i] = __operation__; \
} \
}; \
auto kernel_func = [&](sycl::handler& cgh) { \
cgh.parallel_for<class __name__##_broadcast_kernel<_DataType>>(gws, kernel_parallel_for_func); \
}; \
\
q.submit(kernel_func).wait(); \
\
input1_it->~DPNPC_id(); \
input2_it->~DPNPC_id(); \
\
return event_ref; \
} \
else if (use_strides) \
{ \
if ((result_ndim != input1_ndim) || (result_ndim != input2_ndim)) \
{ \
Expand Down Expand Up @@ -332,18 +415,21 @@ static void func_map_init_bitwise_2arg_1type(func_map_t& fmap)
fmap[DPNPFuncName::DPNP_FN_BITWISE_AND][eft_INT][eft_INT] = {eft_INT, (void*)dpnp_bitwise_and_c_default<int32_t>};
fmap[DPNPFuncName::DPNP_FN_BITWISE_AND][eft_LNG][eft_LNG] = {eft_LNG, (void*)dpnp_bitwise_and_c_default<int64_t>};

fmap[DPNPFuncName::DPNP_FN_BITWISE_AND_EXT][eft_BLN][eft_BLN] = {eft_BLN, (void*)dpnp_bitwise_and_c_ext<bool>};
fmap[DPNPFuncName::DPNP_FN_BITWISE_AND_EXT][eft_INT][eft_INT] = {eft_INT, (void*)dpnp_bitwise_and_c_ext<int32_t>};
fmap[DPNPFuncName::DPNP_FN_BITWISE_AND_EXT][eft_LNG][eft_LNG] = {eft_LNG, (void*)dpnp_bitwise_and_c_ext<int64_t>};

fmap[DPNPFuncName::DPNP_FN_BITWISE_OR][eft_INT][eft_INT] = {eft_INT, (void*)dpnp_bitwise_or_c_default<int32_t>};
fmap[DPNPFuncName::DPNP_FN_BITWISE_OR][eft_LNG][eft_LNG] = {eft_LNG, (void*)dpnp_bitwise_or_c_default<int64_t>};

fmap[DPNPFuncName::DPNP_FN_BITWISE_OR_EXT][eft_BLN][eft_BLN] = {eft_BLN, (void*)dpnp_bitwise_or_c_ext<bool>};
fmap[DPNPFuncName::DPNP_FN_BITWISE_OR_EXT][eft_INT][eft_INT] = {eft_INT, (void*)dpnp_bitwise_or_c_ext<int32_t>};
fmap[DPNPFuncName::DPNP_FN_BITWISE_OR_EXT][eft_LNG][eft_LNG] = {eft_LNG, (void*)dpnp_bitwise_or_c_ext<int64_t>};

fmap[DPNPFuncName::DPNP_FN_BITWISE_XOR][eft_INT][eft_INT] = {eft_INT, (void*)dpnp_bitwise_xor_c_default<int32_t>};
fmap[DPNPFuncName::DPNP_FN_BITWISE_XOR][eft_LNG][eft_LNG] = {eft_LNG, (void*)dpnp_bitwise_xor_c_default<int64_t>};

fmap[DPNPFuncName::DPNP_FN_BITWISE_XOR_EXT][eft_BLN][eft_BLN] = {eft_BLN, (void*)dpnp_bitwise_xor_c_ext<bool>};
fmap[DPNPFuncName::DPNP_FN_BITWISE_XOR_EXT][eft_INT][eft_INT] = {eft_INT, (void*)dpnp_bitwise_xor_c_ext<int32_t>};
fmap[DPNPFuncName::DPNP_FN_BITWISE_XOR_EXT][eft_LNG][eft_LNG] = {eft_LNG, (void*)dpnp_bitwise_xor_c_ext<int64_t>};

Expand Down
6 changes: 3 additions & 3 deletions dpnp/dpnp_algo/dpnp_algo_bitwise.pyx
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# cython: language_level=3
# -*- coding: utf-8 -*-
# *****************************************************************************
# Copyright (c) 2016-2020, Intel Corporation
# Copyright (c) 2016-2023, Intel Corporation
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
Expand Down Expand Up @@ -68,8 +68,8 @@ cpdef utils.dpnp_descriptor dpnp_bitwise_xor(utils.dpnp_descriptor x1_obj,
return call_fptr_2in_1out_strides(DPNP_FN_BITWISE_XOR_EXT, x1_obj, x2_obj, dtype=dtype, out=out, where=where)


cpdef utils.dpnp_descriptor dpnp_invert(utils.dpnp_descriptor arr):
return call_fptr_1in_1out(DPNP_FN_INVERT_EXT, arr, arr.shape)
cpdef utils.dpnp_descriptor dpnp_invert(utils.dpnp_descriptor arr, utils.dpnp_descriptor out=None):
return call_fptr_1in_1out(DPNP_FN_INVERT_EXT, arr, arr.shape, out=out, func_name="invert")


cpdef utils.dpnp_descriptor dpnp_left_shift(utils.dpnp_descriptor x1_obj,
Expand Down
69 changes: 52 additions & 17 deletions dpnp/dpnp_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,9 @@ def __abs__(self):
def __add__(self, other):
return dpnp.add(self, other)

# '__and__',
def __and__(self, other):
return dpnp.bitwise_and(self, other)

# '__array__',
# '__array_finalize__',
# '__array_function__',
Expand Down Expand Up @@ -193,9 +195,17 @@ def __gt__(self, other):

# '__hash__',
# '__iadd__',
# '__iand__',

def __iand__(self, other):
dpnp.bitwise_and(self, other, out=self)
return self

# '__ifloordiv__',
# '__ilshift__',

def __ilshift__(self, other):
dpnp.left_shift(self, other, out=self)
return self

# '__imatmul__',
# '__imod__',
# '__imul__',
Expand All @@ -209,18 +219,28 @@ def __index__(self):
def __int__(self):
return self._array_obj.__int__()

# '__invert__',
# '__ior__',
def __invert__(self):
return dpnp.invert(self)

def __ior__(self, other):
dpnp.bitwise_or(self, other, out=self)
return self

def __ipow__(self, other):
dpnp.power(self, other, out=self)
return self

# '__irshift__',
def __irshift__(self, other):
dpnp.right_shift(self, other, out=self)
return self

# '__isub__',
# '__iter__',
# '__itruediv__',
# '__ixor__',

def __ixor__(self, other):
dpnp.bitwise_xor(self, other, out=self)
return self

def __le__(self, other):
return dpnp.less_equal(self, other)
Expand All @@ -232,7 +252,8 @@ def __len__(self):

return self._array_obj.__len__()

# '__lshift__',
def __lshift__(self, other):
return dpnp.left_shift(self, other)

def __lt__(self, other):
return dpnp.less(self, other)
Expand All @@ -253,7 +274,10 @@ def __neg__(self):
return dpnp.negative(self)

# '__new__',
# '__or__',

def __or__(self, other):
return dpnp.bitwise_or(self, other)

# '__pos__',

def __pow__(self, other):
Expand All @@ -262,7 +286,9 @@ def __pow__(self, other):
def __radd__(self, other):
return dpnp.add(other, self)

# '__rand__',
def __rand__(self, other):
return dpnp.bitwise_and(other, self)

# '__rdivmod__',
# '__reduce__',
# '__reduce_ex__',
Expand All @@ -271,7 +297,9 @@ def __repr__(self):
return dpt.usm_ndarray_repr(self._array_obj, prefix="array")

# '__rfloordiv__',
# '__rlshift__',

def __rlshift__(self, other):
return dpnp.left_shift(other, self)

def __rmatmul__(self, other):
return dpnp.matmul(other, self)
Expand All @@ -282,21 +310,27 @@ def __rmod__(self, other):
def __rmul__(self, other):
return dpnp.multiply(other, self)

# '__ror__',

def __ror__(self, other):
return dpnp.bitwise_or(other, self)

def __rpow__(self, other):
return dpnp.power(other, self)

# '__rrshift__',
# '__rshift__',
def __rrshift__(self, other):
return dpnp.right_shift(other, self)

def __rshift__(self, other):
return dpnp.right_shift(self, other)

def __rsub__(self, other):
return dpnp.subtract(other, self)

def __rtruediv__(self, other):
return dpnp.true_divide(other, self)

# '__rxor__',
def __rxor__(self, other):
return dpnp.bitwise_xor(other, self)

# '__setattr__',

def __setitem__(self, key, val):
Expand Down Expand Up @@ -334,7 +368,8 @@ def __sub__(self, other):
def __truediv__(self, other):
return dpnp.true_divide(self, other)

# '__xor__',
def __xor__(self, other):
return dpnp.bitwise_xor(self, other)

@staticmethod
def _create_from_usm_ndarray(usm_ary : dpt.usm_ndarray):
Expand Down
3 changes: 2 additions & 1 deletion dpnp/dpnp_iface.py
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,7 @@ def from_dlpack(obj, /):
def get_dpnp_descriptor(ext_obj,
copy_when_strides=True,
copy_when_nondefault_queue=True,
alloc_dtype=None,
alloc_usm_type=None,
alloc_queue=None):
"""
Expand All @@ -274,7 +275,7 @@ def get_dpnp_descriptor(ext_obj,
# If input object is a scalar, it means it was allocated on host memory.
# We need to copy it to USM memory according to compute follows data paradigm.
if isscalar(ext_obj):
ext_obj = array(ext_obj, usm_type=alloc_usm_type, sycl_queue=alloc_queue)
ext_obj = array(ext_obj, dtype=alloc_dtype, usm_type=alloc_usm_type, sycl_queue=alloc_queue)

# while dpnp functions have no implementation with strides support
# we need to create a non-strided copy
Expand Down
Loading