diff --git a/python/mxnet/sparse_ndarray.py b/python/mxnet/sparse_ndarray.py index 5faddd979078..2923a4c25292 100644 --- a/python/mxnet/sparse_ndarray.py +++ b/python/mxnet/sparse_ndarray.py @@ -184,7 +184,9 @@ def __getitem__(self, key): if key.step is not None: raise ValueError('NDArray only supports continuous slicing on axis 0') if key.start is not None or key.stop is not None: - return self._slice(key.start, key.stop) + begin = key.start if key.start else 0 + end = key.stop if key.stop else self.shape[0] + return ndarray.slice(self, begin=begin, end=end) else: return self if isinstance(key, tuple): @@ -193,44 +195,6 @@ def __getitem__(self, key): def _sync_copyfrom(self, source_array): raise Exception('Not implemented for SparseND yet!') - def _slice(self, start, stop): - """Returns a read-only SparseNDArray slice that shares memory with current one. - To create a writable slice, please use ``mx.nd.slice`` instead. Currently only - `csr` storage type is supported. - - Parameters - ---------- - start : int - Starting index of slice. - stop : int - Finishing index of slice. - - Example - ---------- - >>> indptr = np.array([0, 2, 3, 6]) - >>> indices = np.array([0, 2, 2, 0, 1, 2]) - >>> data = np.array([1, 2, 3, 4, 5, 6]) - >>> a = mx.sparse_nd.csr(data, indptr, indices, (3, 3)) - >>> a.asnumpy() - array([[1, 0, 2], - [0, 0, 3], - [4, 5, 6]]) - - >>> a[1:2].asnumpy() - array([[0, 0, 3]]) - - """ - stype = self.storage_type - assert(stype == 'csr'), "_slice for " + str(stype) + " not implemented yet" - warnings.warn('slicing SparseNDArray is not efficient', RuntimeWarning) - handle = NDArrayHandle() - start = mx_uint(start) if start else mx_uint(0) - stop = mx_uint(stop) if stop else mx_uint(self.shape[0]) - check_call(_LIB.MXNDArraySlice( - self.handle, start, stop, ctypes.byref(handle))) - ret = _ndarray_cls(handle=handle, writable=False) - return ret - def _at(self, idx): raise Exception('at operator for SparseND is not supported.') diff --git a/src/ndarray/ndarray.cc b/src/ndarray/ndarray.cc index c894f27c25b7..b08e6c659c1f 100644 --- a/src/ndarray/ndarray.cc +++ b/src/ndarray/ndarray.cc @@ -66,91 +66,33 @@ NDArray NDArray::Slice(index_t begin, index_t end) const { using namespace mshadow; CHECK(!is_none()) << "NDArray is not initialized"; CHECK_GE(shape_[0], end) << "Slice end index out of range"; - CHECK_NE(storage_type(), kUndefinedStorage); - if (storage_type() == kDefaultStorage) { - NDArray ret = *this; - auto stype = storage_type(); - size_t length = shape_.ProdShape(1, shape_.ndim()); - MSHADOW_TYPE_SWITCH(ret.dtype(), DType, { - ret.byte_offset_ += begin * length * sizeof(DType); - }); - ret.shape_[0] = end - begin; - if (AutogradRuntime::Get()->IsTraining()) { - // fake a slice_axis op - ret.entry_.clear(); - const nnvm::Op* op = nnvm::Op::Get("slice_axis"); - nnvm::NodeAttrs attrs; - attrs.op = op; - attrs.dict.insert({"axis", "0"}); - attrs.dict.insert({"begin", std::to_string(begin)}); - attrs.dict.insert({"end", std::to_string(end)}); - op->attr_parser(&attrs); - std::vector inputs, outputs; - inputs.emplace_back(*this); - outputs.emplace_back(std::move(ret)); - AutogradRuntime::Get()->RecordImperativeFCompute( - op, attrs, &inputs, &outputs); - return outputs[0]; - } else { - return ret; - } - } else if (storage_type() == kCSRStorage) { - // TODO(haibin) support auto_grad - TShape sliced_shape(Shape2(end-begin, shape()[1])); - using namespace csr; - NDArray ret(storage_type(), TShape(Shape2(end-begin, shape()[1])), - ctx(), true, dtype_, ptr_->aux_types, - {TShape(Shape1(0)), TShape(Shape1(0))}); - NDArray src = *this; - // destination NDArray shares the same variable - ret.ptr_->var = var(); - - Engine::Get()->PushSync([src, ret, begin, end](RunContext ctx) { - NDArray dst = ret; - // create a new chunk for dst NDArray - NDArray::Chunk chunk = *src.ptr_; - // void indptr storage handle - chunk.aux_handles[kIndPtr] = Storage::Handle(); - // shape for indptr is end - begin + 1 - chunk.CheckAndAllocAuxData(kIndPtr, Shape1(end - begin + 1)); - if (src.ctx().dev_mask() == cpu::kDevMask) { - MSHADOW_INT_TYPE_SWITCH(src.aux_type(kIndPtr), IType, { - MSHADOW_TYPE_SWITCH(src.dtype(), DType, { - // create new indptr - const IType* src_indptr = src.aux_data(kIndPtr).dptr(); - IType* dst_indptr = static_cast (chunk.aux_handles[kIndPtr].dptr); - op::SliceCsrIndPtrImpl(begin, end, ctx, src_indptr, dst_indptr); - // advance idx and values pointers (CPU implementation) - // TODO(haibin) refactor for GPU implementation later - IType offset = src_indptr[begin]; - IType* idx = static_cast(chunk.aux_handles[kIdx].dptr); - DType* values = static_cast(chunk.shandle.dptr); - chunk.aux_handles[kIdx].dptr = idx + offset; - chunk.shandle.dptr = values + offset; - // update storage shape and aux shape (CPU implementation) - auto nnz = dst_indptr[end - begin]; - chunk.aux_shapes[kIdx] = Shape1(nnz); - chunk.storage_shape = Shape1(nnz); - chunk.static_data = true; - chunk.skip_delete_var = true; - // update dst chunk - *dst.ptr_ = chunk; - }); - }); - } else { -#if MXNET_USE_CUDA - LOG(FATAL) << "SliceEx CSR not implemented yet"; -#else - LOG(FATAL) << MXNET_GPU_NOT_ENABLED_ERROR; -#endif - } - }, ctx(), {}, {var()}, - FnProperty::kNormal, 0, PROFILER_MESSAGE_FUNCNAME); - return ret; + CHECK_EQ(storage_type(), kDefaultStorage); + NDArray ret = *this; + auto stype = storage_type(); + size_t length = shape_.ProdShape(1, shape_.ndim()); + MSHADOW_TYPE_SWITCH(ret.dtype(), DType, { + ret.byte_offset_ += begin * length * sizeof(DType); + }); + ret.shape_[0] = end - begin; + if (AutogradRuntime::Get()->IsTraining()) { + // fake a slice_axis op + ret.entry_.clear(); + const nnvm::Op* op = nnvm::Op::Get("slice_axis"); + nnvm::NodeAttrs attrs; + attrs.op = op; + attrs.dict.insert({"axis", "0"}); + attrs.dict.insert({"begin", std::to_string(begin)}); + attrs.dict.insert({"end", std::to_string(end)}); + op->attr_parser(&attrs); + std::vector inputs, outputs; + inputs.emplace_back(*this); + outputs.emplace_back(std::move(ret)); + AutogradRuntime::Get()->RecordImperativeFCompute( + op, attrs, &inputs, &outputs); + return outputs[0]; } else { - LOG(FATAL) << "Slice not yet implemented for storage " << storage_type(); + return ret; } - return NDArray(); } NDArray NDArray::At(index_t idx) const { diff --git a/tests/python/unittest/test_io.py b/tests/python/unittest/test_io.py index 4cbb4f19e40a..942be2c9d818 100644 --- a/tests/python/unittest/test_io.py +++ b/tests/python/unittest/test_io.py @@ -89,8 +89,30 @@ def test_NDArrayIter(): else: assert(labelcount[i] == 100) +def test_NDArrayIter_csr(): + import scipy.sparse as sp + # creating toy data + num_rows = rnd.randint(5, 15) + num_cols = rnd.randint(1, 20) + batch_size = rnd.randint(1, num_rows) + shape = (num_rows, num_cols) + csr, _ = rand_sparse_ndarray(shape, 'csr') + dns = csr.asnumpy() + + # make iterators + csr_iter = iter(mx.io.NDArrayIter(csr, csr, batch_size)) + begin = 0 + for batch in csr_iter: + expected = np.zeros((batch_size, num_cols)) + end = begin + batch_size + expected[:num_rows - begin] = dns[begin:end] + if end > num_rows: + expected[num_rows - begin:] = dns[0:end - num_rows] + assert_almost_equal(batch.data[0].asnumpy(), expected) + begin += batch_size + ''' -def test_libsvm(): +def test_LibSVMIter(): #TODO(haibin) automatic the test instead of hard coded test cwd = os.getcwd() data_path = os.path.join(cwd, 'data.t') @@ -128,4 +150,5 @@ def test_libsvm(): test_NDArrayIter() test_MNISTIter() test_Cifar10Rec() - # test_libsvm() + # test_LibSVMIter() + test_NDArrayIter_csr() diff --git a/tests/python/unittest/test_sparse_operator.py b/tests/python/unittest/test_sparse_operator.py index 55667225fd35..ba10ad830f23 100644 --- a/tests/python/unittest/test_sparse_operator.py +++ b/tests/python/unittest/test_sparse_operator.py @@ -165,8 +165,7 @@ def test_sparse_embedding(): def test_sparse_slice(): def check_csr_slice(shape, slice_input): storage_type = 'csr' - A, _ = rand_sparse_ndarray(shape, storage_type) - B = A._slice(1, shape[0] - 1) if slice_input else A + B, _ = rand_sparse_ndarray(shape, storage_type) np = B.asnumpy() begin = rnd.randint(0, B.shape[0] - 1) end = rnd.randint(begin + 1, B.shape[0]) diff --git a/tests/travis/run_test.sh b/tests/travis/run_test.sh index d0ee09312cd4..6b8f778e29ab 100755 --- a/tests/travis/run_test.sh +++ b/tests/travis/run_test.sh @@ -99,15 +99,15 @@ if [ ${TASK} == "python_test" ]; then mkdir -p ${PWD}/data if [ ${TRAVIS_OS_NAME} == "osx" ]; then - python -m nose tests/python/unittest || exit -1 - python3 -m nose tests/python/unittest || exit -1 + python -m nose -v tests/python/unittest || exit -1 + python3 -m nose -v tests/python/unittest || exit -1 # make cython3 # cython tests # export MXNET_ENFORCE_CYTHON=1 # python3 -m nose tests/python/unittest || exit -1 - python3 -m nose tests/python/train || exit -1 - python -m nose tests/python/doctest || exit -1 - python3 -m nose tests/python/doctest || exit -1 + python3 -m nose -v tests/python/train || exit -1 + python -m nose -v tests/python/doctest || exit -1 + python3 -m nose -v tests/python/doctest || exit -1 else nosetests -v tests/python/unittest || exit -1 nosetests3 -v tests/python/unittest || exit -1