Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

[sparse] slice for csr on two dimensions, cpu implementation #8331

Merged
merged 16 commits into from
Nov 8, 2017
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
152 changes: 125 additions & 27 deletions src/operator/tensor/matrix_op-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -443,8 +443,7 @@ inline bool SliceForwardInferStorageType(const nnvm::NodeAttrs& attrs,
dispatch_mode, DispatchMode::kFCompute);
}

if (!dispatched && in_stype == kCSRStorage && param.begin.ndim() <= 1 &&
param.end.ndim() <= 1) {
if (!dispatched && in_stype == kCSRStorage) {
dispatched = storage_type_assign(&out_stype, kCSRStorage,
dispatch_mode, dispatch_ex);
}
Expand Down Expand Up @@ -551,37 +550,27 @@ void SliceCsrIndPtrImpl(const int begin, const int end, RunContext ctx,
}

/*
* Slice a CSR NDArray
* Slice a CSR NDArray for first dimension
* Only implemented for CPU
*/
template<typename xpu>
void SliceCsrImpl(const SliceParam &param, const OpContext& ctx,
const NDArray &in, OpReqType req, const NDArray &out) {
void SliceDimOneCsrImpl(const TShape &begin, const TShape &end, const OpContext& ctx,
const NDArray &in, const NDArray &out) {
using namespace mshadow;
using namespace mxnet_op;
using namespace csr;
CHECK((std::is_same<xpu, cpu>::value)) << "Slice for CSR input only implemented for CPU";
if (req == kNullOp) return;
CHECK_NE(req, kAddTo) << "kAddTo for Slice on CSR input is not supported";
CHECK_NE(req, kWriteInplace) << "kWriteInplace for Slice on CSR input is not supported";
const TShape ishape = in.shape();
int begin = *param.begin[0];
if (begin < 0) begin += ishape[0];
int end = *param.end[0];
if (end < 0) end += ishape[0];
int indptr_len = end - begin + 1;
CHECK((std::is_same<xpu, cpu>::value)) << "SliceDimOneCsrImpl is only implemented for CPU";
nnvm::dim_t begin_row = begin[0];
nnvm::dim_t end_row = end[0];
nnvm::dim_t indptr_len = end_row - begin_row + 1;
out.CheckAndAllocAuxData(kIndPtr, Shape1(indptr_len));
if (!in.storage_initialized()) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What happens here if input is a CSR Array with all zeroes ?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for your comments. If input is zeros, kernel launch will return immediately. Unittest for zeros input case is added.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is that still true on GPU, when we add GPU support? This PR is dealing with some bugs for zero inputs for dot operator #8470

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For CSRNDArray, storage_initialized() return aux_shape(0).Size() != 0, I think it is always true for a valid CSRNDArray except for rank-0 array.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Changed to returning csr zeros immediately if nnz=0.

out.set_aux_shape(kIndPtr, Shape1(0));
return;
}
// assume idx indptr share the same type
MSHADOW_IDX_TYPE_SWITCH(in.aux_type(kIndPtr), RType, {
MSHADOW_IDX_TYPE_SWITCH(in.aux_type(kIdx), IType, {
MSHADOW_TYPE_SWITCH(in.dtype(), DType, {
auto in_indptr = in.aux_data(kIndPtr).dptr<RType>();
auto out_indptr = out.aux_data(kIndPtr).dptr<RType>();
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we avoid auto for in_indptr and out_indptr

SliceCsrIndPtrImpl<cpu, RType>(begin, end, ctx.run_ctx, in_indptr, out_indptr);
SliceCsrIndPtrImpl<cpu, RType>(begin_row, end_row, ctx.run_ctx, in_indptr, out_indptr);

// retrieve nnz (CPU implementation)
int nnz = out_indptr[indptr_len - 1];
Expand All @@ -592,7 +581,7 @@ void SliceCsrImpl(const SliceParam &param, const OpContext& ctx,
auto out_idx = out.aux_data(kIdx).dptr<IType>();
auto in_data = in.data().dptr<DType>();
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we avoid auto here and use IType and DType

auto out_data = out.data().dptr<DType>();
int offset = in_indptr[begin];
int offset = in_indptr[begin_row];
// this is also a CPU-only implementation
memcpy(out_idx, in_idx + offset, nnz * sizeof(IType));
memcpy(out_data, in_data + offset, nnz * sizeof(DType));
Expand All @@ -601,18 +590,127 @@ void SliceCsrImpl(const SliceParam &param, const OpContext& ctx,
});
}

// slice a CSRNDArray for two dimensions
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

struct SliceDimTwoCsrAssign {
template<typename IType, typename RType, typename DType>
MSHADOW_XINLINE static void Map(int i, IType* out_idx, DType* out_data,
const RType* out_indptr, const IType* in_idx,
const DType* in_data, const RType* in_indptr,
const int begin, const int end) {
RType ind = out_indptr[i];
for (RType j = in_indptr[i]; j < in_indptr[i+1]; j++) {
if (in_idx[j] >= end) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Any reason to not just do if (in_idx[j] >= begin_col && in_idx < end_col)

Copy link
Member Author

@ZiyueHuang ZiyueHuang Oct 31, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Indices of csr ndarray is in ascending order per row. So if indice >= end, there is no need to continue the loop.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was suggesting this change for readability. Also, you would be doing the checks for all in_idx[j] < begin_col which will be avoided with the change.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I used this condition, in_idx[j] >= begin_col && in_idx < end_col, at the first time. But according to @eric-haibin-lin 's comments, this logic should be changed to a if/else logic which can jump out of the loop since indices are sorted per row.

break;
} else if (in_idx[j] >= begin) {
out_idx[ind] = in_idx[j] - begin;
out_data[ind] = in_data[j];
ind++;
}
}
}
};

/*
* Slice a CSR NDArray for two dimensions
* Only implemented for CPU
*/
template<typename xpu>
void SliceDimTwoCsrImpl(const TShape &begin, const TShape &end, const OpContext& ctx,
const NDArray &in, const NDArray &out) {
using namespace mshadow;
using namespace mxnet_op;
using namespace csr;
CHECK((std::is_same<xpu, cpu>::value)) << "SliceDimTwoCsrImpl is only implemented for CPU";
nnvm::dim_t begin_row = begin[0], end_row = end[0];
nnvm::dim_t begin_col = begin[1], end_col = end[1];
nnvm::dim_t indptr_len = end_row - begin_row + 1;
out.CheckAndAllocAuxData(kIndPtr, Shape1(indptr_len));
// assume idx indptr share the same type
MSHADOW_IDX_TYPE_SWITCH(in.aux_type(kIndPtr), RType, {
MSHADOW_IDX_TYPE_SWITCH(in.aux_type(kIdx), IType, {
MSHADOW_TYPE_SWITCH(in.dtype(), DType, {
RType *in_indptr = in.aux_data(kIndPtr).dptr<RType>();
IType *in_idx = in.aux_data(kIdx).dptr<IType>();
DType *in_data = in.data().dptr<DType>();
// retrieve nnz (CPU implementation)
RType *out_indptr = out.aux_data(kIndPtr).dptr<RType>();
int nnz = 0;
out_indptr[0] = 0;
for (nnvm::dim_t i = 0; i < indptr_len - 1; i++) {
out_indptr[i+1] = out_indptr[i];
for (RType j = in_indptr[i + begin_row];
j < in_indptr[i + begin_row + 1]; j++) {
if (in_idx[j] >= end_col) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we add one line comment for the if, else if logic here. Also why not just if (in_idx[j] >= begin_col && in_idx < end_col) ?

break;
} else if (in_idx[j] >= begin_col) {
out_indptr[i+1]++;
nnz++;
}
}
}
out.CheckAndAllocAuxData(kIdx, Shape1(nnz));
out.CheckAndAllocData(Shape1(nnz));
IType *out_idx = out.aux_data(kIdx).dptr<IType>();
DType *out_data = out.data().dptr<DType>();

Stream<xpu> *s = ctx.get_stream<xpu>();
Kernel<SliceDimTwoCsrAssign, xpu>::Launch(s, indptr_len - 1, out_idx, out_data,
out_indptr, in_idx, in_data,
in_indptr + begin_row,
begin_col, end_col);
});
});
});
}


template<typename xpu>
void SliceCsrImpl(const SliceParam &param, const OpContext& ctx,
const NDArray &in, OpReqType req, const NDArray &out) {
CHECK((std::is_same<xpu, cpu>::value)) << "Slice for CSR input only implemented for CPU";
if (req == kNullOp) return;
CHECK_NE(req, kAddTo) << "kAddTo for Slice on CSR input is not supported";
CHECK_NE(req, kWriteInplace) << "kWriteInplace for Slice on CSR input is not supported";

const TShape ishape = in.shape();
const TShape oshape = out.shape();

uint32_t N = ishape.ndim();
TShape begin(N), end(N);
for (uint32_t i = 0; i < N; ++i) {
int s = 0;
if (param.begin[i]) {
s = *param.begin[i];
if (s < 0) s += ishape[i];
}
begin[i] = s;
end[i] = s + oshape[i];
}
switch (N) {
case 1: {
SliceDimOneCsrImpl<xpu>(begin, end, ctx, in, out);
break;
}
case 2: {
SliceDimTwoCsrImpl<xpu>(begin, end, ctx, in, out);
break;
}
default:
LOG(FATAL) << "CSR is only for 2-D shape";
break;
}
}

template<typename xpu>
void SliceEx(const nnvm::NodeAttrs& attrs,
const OpContext& ctx,
const std::vector<NDArray>& inputs,
const std::vector<OpReqType>& req,
const std::vector<NDArray>& outputs) {
const OpContext& ctx,
const std::vector<NDArray>& inputs,
const std::vector<OpReqType>& req,
const std::vector<NDArray>& outputs) {
CHECK_EQ(inputs.size(), 1);
CHECK_EQ(outputs.size(), 1);
const SliceParam& param = nnvm::get<SliceParam>(attrs.parsed);
auto in_stype = inputs[0].storage_type();
CHECK_NE(in_stype, kDefaultStorage)
<< "SliceEx is not expected to execute for input with default storage type";
if (in_stype == kCSRStorage) {
SliceCsrImpl<xpu>(param, ctx, inputs[0], req[0], outputs[0]);
} else {
Expand Down
2 changes: 1 addition & 1 deletion src/operator/tensor/matrix_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -264,7 +264,7 @@ The resulting array's *k*-th dimension contains elements
from the *k*-th dimension of the input array with the open range ``[b_k, e_k)``.

For an input array of non-default storage type(e.g. `csr` or `row_sparse`), it only supports
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think row_sparse is not supported for slice. Let's remove this sentence in the doc.

The storage type of ``slice`` output depends on storage types of inputs:
   - slice(csr) = csr
   - slice(default) = default

slicing on the first dimension.
slicing on the two dimensions for `csr`.

Example::

Expand Down
44 changes: 18 additions & 26 deletions tests/python/unittest/test_sparse_ndarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,32 +105,24 @@ def check_sparse_nd_setitem(stype, shape, dst):


def test_sparse_nd_slice():
def check_sparse_nd_csr_slice(shape):
stype = 'csr'
A, _ = rand_sparse_ndarray(shape, stype)
A2 = A.asnumpy()
start = rnd.randint(0, shape[0] - 1)
end = rnd.randint(start + 1, shape[0])
assert same(A[start:end].asnumpy(), A2[start:end])
assert same(A[start - shape[0]:end].asnumpy(), A2[start:end])
assert same(A[start:].asnumpy(), A2[start:])
assert same(A[:end].asnumpy(), A2[:end])
ind = rnd.randint(-shape[0], shape[0] - 1)
assert same(A[ind].asnumpy(), A2[ind][np.newaxis, :])

def check_slice_nd_csr_fallback(shape):
stype = 'csr'
A, _ = rand_sparse_ndarray(shape, stype)
A2 = A.asnumpy()
start = rnd.randint(0, shape[0] - 1)
end = rnd.randint(start + 1, shape[0])
result = mx.nd.sparse.slice(A, begin=(start, shape[1] - 1), end=(end + 1, shape[1]))
result_dense = mx.nd.slice(mx.nd.array(A2), begin=(start, shape[1] - 1), end=(end + 1, shape[1]))
assert same(result_dense.asnumpy(), result.asnumpy())

shape = (rnd.randint(2, 10), rnd.randint(1, 10))
check_sparse_nd_csr_slice(shape)
check_slice_nd_csr_fallback(shape)
shape = (rnd.randint(2, 10), rnd.randint(2, 10))
stype = 'csr'
A, _ = rand_sparse_ndarray(shape, stype)
A2 = A.asnumpy()
start = rnd.randint(0, shape[0] - 1)
end = rnd.randint(start + 1, shape[0])
assert same(A[start:end].asnumpy(), A2[start:end])
assert same(A[start - shape[0]:end].asnumpy(), A2[start:end])
assert same(A[start:].asnumpy(), A2[start:])
assert same(A[:end].asnumpy(), A2[:end])
ind = rnd.randint(-shape[0], shape[0] - 1)
assert same(A[ind].asnumpy(), A2[ind][np.newaxis, :])

start_col = rnd.randint(0, shape[1] - 1)
end_col = rnd.randint(start_col + 1, shape[1])
result = mx.nd.slice(A, begin=(start, start_col), end=(end, end_col))
result_dense = mx.nd.slice(mx.nd.array(A2), begin=(start, start_col), end=(end, end_col))
assert same(result_dense.asnumpy(), result.asnumpy())


def test_sparse_nd_equal():
Expand Down