diff --git a/python/mxnet/sparse_ndarray.py b/python/mxnet/sparse_ndarray.py
index 5faddd979078..2923a4c25292 100644
--- a/python/mxnet/sparse_ndarray.py
+++ b/python/mxnet/sparse_ndarray.py
@@ -184,7 +184,9 @@ def __getitem__(self, key):
             if key.step is not None:
                 raise ValueError('NDArray only supports continuous slicing on axis 0')
             if key.start is not None or key.stop is not None:
-                return self._slice(key.start, key.stop)
+                begin = key.start if key.start else 0
+                end = key.stop if key.stop else self.shape[0]
+                return ndarray.slice(self, begin=begin, end=end)
             else:
                 return self
         if isinstance(key, tuple):
@@ -193,44 +195,6 @@ def __getitem__(self, key):
     def _sync_copyfrom(self, source_array):
         raise Exception('Not implemented for SparseND yet!')
 
-    def _slice(self, start, stop):
-        """Returns a read-only SparseNDArray slice that shares memory with current one.
-        To create a writable slice, please use ``mx.nd.slice`` instead. Currently only
-        `csr` storage type is supported.
-
-        Parameters
-        ----------
-        start : int
-            Starting index of slice.
-        stop : int
-            Finishing index of slice.
-
-        Example
-        ----------
-        >>> indptr = np.array([0, 2, 3, 6])
-        >>> indices = np.array([0, 2, 2, 0, 1, 2])
-        >>> data = np.array([1, 2, 3, 4, 5, 6])
-        >>> a = mx.sparse_nd.csr(data, indptr, indices, (3, 3))
-        >>> a.asnumpy()
-        array([[1, 0, 2],
-               [0, 0, 3],
-               [4, 5, 6]])
-
-        >>> a[1:2].asnumpy()
-        array([[0, 0, 3]])
-
-        """
-        stype = self.storage_type
-        assert(stype == 'csr'), "_slice for " + str(stype) + " not implemented yet"
-        warnings.warn('slicing SparseNDArray is not efficient', RuntimeWarning)
-        handle = NDArrayHandle()
-        start = mx_uint(start) if start else mx_uint(0)
-        stop = mx_uint(stop) if stop else mx_uint(self.shape[0])
-        check_call(_LIB.MXNDArraySlice(
-            self.handle, start, stop, ctypes.byref(handle)))
-        ret = _ndarray_cls(handle=handle, writable=False)
-        return ret
-
     def _at(self, idx):
         raise Exception('at operator for SparseND is not supported.')
 
diff --git a/src/ndarray/ndarray.cc b/src/ndarray/ndarray.cc
index c894f27c25b7..b08e6c659c1f 100644
--- a/src/ndarray/ndarray.cc
+++ b/src/ndarray/ndarray.cc
@@ -66,91 +66,33 @@ NDArray NDArray::Slice(index_t begin, index_t end) const {
   using namespace mshadow;
   CHECK(!is_none()) << "NDArray is not initialized";
   CHECK_GE(shape_[0], end) << "Slice end index out of range";
-  CHECK_NE(storage_type(), kUndefinedStorage);
-  if (storage_type() == kDefaultStorage) {
-    NDArray ret = *this;
-    auto stype = storage_type();
-    size_t length = shape_.ProdShape(1, shape_.ndim());
-    MSHADOW_TYPE_SWITCH(ret.dtype(), DType, {
-      ret.byte_offset_ += begin * length * sizeof(DType);
-    });
-    ret.shape_[0] = end - begin;
-    if (AutogradRuntime::Get()->IsTraining()) {
-      // fake a slice_axis op
-      ret.entry_.clear();
-      const nnvm::Op* op = nnvm::Op::Get("slice_axis");
-      nnvm::NodeAttrs attrs;
-      attrs.op = op;
-      attrs.dict.insert({"axis", "0"});
-      attrs.dict.insert({"begin", std::to_string(begin)});
-      attrs.dict.insert({"end", std::to_string(end)});
-      op->attr_parser(&attrs);
-      std::vector<NDArray> inputs, outputs;
-      inputs.emplace_back(*this);
-      outputs.emplace_back(std::move(ret));
-      AutogradRuntime::Get()->RecordImperativeFCompute(
-        op, attrs, &inputs, &outputs);
-      return outputs[0];
-    } else {
-      return ret;
-    }
-  } else if (storage_type() == kCSRStorage) {
-    // TODO(haibin) support auto_grad
-    TShape sliced_shape(Shape2(end-begin, shape()[1]));
-    using namespace csr;
-    NDArray ret(storage_type(), TShape(Shape2(end-begin, shape()[1])),
-                ctx(), true, dtype_, ptr_->aux_types,
-                {TShape(Shape1(0)), TShape(Shape1(0))});
-    NDArray src = *this;
-    // destination NDArray shares the same variable
-    ret.ptr_->var = var();
-
-    Engine::Get()->PushSync([src, ret, begin, end](RunContext ctx) {
-      NDArray dst = ret;
-      // create a new chunk for dst NDArray
-      NDArray::Chunk chunk = *src.ptr_;
-      // void indptr storage handle
-      chunk.aux_handles[kIndPtr] = Storage::Handle();
-      // shape for indptr is end - begin + 1
-      chunk.CheckAndAllocAuxData(kIndPtr, Shape1(end - begin + 1));
-      if (src.ctx().dev_mask() == cpu::kDevMask) {
-        MSHADOW_INT_TYPE_SWITCH(src.aux_type(kIndPtr), IType, {
-          MSHADOW_TYPE_SWITCH(src.dtype(), DType, {
-            // create new indptr
-            const IType* src_indptr = src.aux_data(kIndPtr).dptr<IType>();
-            IType* dst_indptr = static_cast<IType*> (chunk.aux_handles[kIndPtr].dptr);
-            op::SliceCsrIndPtrImpl<cpu, IType>(begin, end, ctx, src_indptr, dst_indptr);
-            // advance idx and values pointers (CPU implementation)
-            // TODO(haibin) refactor for GPU implementation later
-            IType offset = src_indptr[begin];
-            IType* idx = static_cast<IType*>(chunk.aux_handles[kIdx].dptr);
-            DType* values = static_cast<DType*>(chunk.shandle.dptr);
-            chunk.aux_handles[kIdx].dptr = idx + offset;
-            chunk.shandle.dptr = values + offset;
-            // update storage shape and aux shape (CPU implementation)
-            auto nnz = dst_indptr[end - begin];
-            chunk.aux_shapes[kIdx] = Shape1(nnz);
-            chunk.storage_shape = Shape1(nnz);
-            chunk.static_data = true;
-            chunk.skip_delete_var = true;
-            // update dst chunk
-            *dst.ptr_ = chunk;
-          });
-        });
-      } else {
-#if MXNET_USE_CUDA
-       LOG(FATAL) << "SliceEx CSR not implemented yet";
-#else
-       LOG(FATAL) << MXNET_GPU_NOT_ENABLED_ERROR;
-#endif
-      }
-      }, ctx(), {}, {var()},
-      FnProperty::kNormal, 0, PROFILER_MESSAGE_FUNCNAME);
-    return ret;
+  CHECK_EQ(storage_type(), kDefaultStorage);
+  NDArray ret = *this;
+  auto stype = storage_type();
+  size_t length = shape_.ProdShape(1, shape_.ndim());
+  MSHADOW_TYPE_SWITCH(ret.dtype(), DType, {
+    ret.byte_offset_ += begin * length * sizeof(DType);
+  });
+  ret.shape_[0] = end - begin;
+  if (AutogradRuntime::Get()->IsTraining()) {
+    // fake a slice_axis op
+    ret.entry_.clear();
+    const nnvm::Op* op = nnvm::Op::Get("slice_axis");
+    nnvm::NodeAttrs attrs;
+    attrs.op = op;
+    attrs.dict.insert({"axis", "0"});
+    attrs.dict.insert({"begin", std::to_string(begin)});
+    attrs.dict.insert({"end", std::to_string(end)});
+    op->attr_parser(&attrs);
+    std::vector<NDArray> inputs, outputs;
+    inputs.emplace_back(*this);
+    outputs.emplace_back(std::move(ret));
+    AutogradRuntime::Get()->RecordImperativeFCompute(
+      op, attrs, &inputs, &outputs);
+    return outputs[0];
   } else {
-    LOG(FATAL) << "Slice not yet implemented for storage " << storage_type();
+    return ret;
   }
-  return NDArray();
 }
 
 NDArray NDArray::At(index_t idx) const {
diff --git a/tests/python/unittest/test_io.py b/tests/python/unittest/test_io.py
index 4cbb4f19e40a..942be2c9d818 100644
--- a/tests/python/unittest/test_io.py
+++ b/tests/python/unittest/test_io.py
@@ -89,8 +89,30 @@ def test_NDArrayIter():
         else:
             assert(labelcount[i] == 100)
 
+def test_NDArrayIter_csr():
+    import scipy.sparse as sp
+    # creating toy data
+    num_rows = rnd.randint(5, 15)
+    num_cols = rnd.randint(1, 20)
+    batch_size = rnd.randint(1, num_rows)
+    shape = (num_rows, num_cols)
+    csr, _ = rand_sparse_ndarray(shape, 'csr')
+    dns = csr.asnumpy()
+
+    # make iterators
+    csr_iter = iter(mx.io.NDArrayIter(csr, csr, batch_size))
+    begin = 0
+    for batch in csr_iter:
+        expected = np.zeros((batch_size, num_cols))
+        end = begin + batch_size
+        expected[:num_rows - begin] = dns[begin:end]
+        if end > num_rows:
+            expected[num_rows - begin:] = dns[0:end - num_rows]
+        assert_almost_equal(batch.data[0].asnumpy(), expected)
+        begin += batch_size
+
 '''
-def test_libsvm():
+def test_LibSVMIter():
     #TODO(haibin) automatic the test instead of hard coded test
     cwd = os.getcwd()
     data_path = os.path.join(cwd, 'data.t')
@@ -128,4 +150,5 @@ def test_libsvm():
     test_NDArrayIter()
     test_MNISTIter()
     test_Cifar10Rec()
-    # test_libsvm()
+    # test_LibSVMIter()
+    test_NDArrayIter_csr()
diff --git a/tests/python/unittest/test_sparse_operator.py b/tests/python/unittest/test_sparse_operator.py
index 55667225fd35..ba10ad830f23 100644
--- a/tests/python/unittest/test_sparse_operator.py
+++ b/tests/python/unittest/test_sparse_operator.py
@@ -165,8 +165,7 @@ def test_sparse_embedding():
 def test_sparse_slice():
     def check_csr_slice(shape, slice_input):
         storage_type = 'csr'
-        A, _ = rand_sparse_ndarray(shape, storage_type)
-        B = A._slice(1, shape[0] - 1) if slice_input else A
+        B, _ = rand_sparse_ndarray(shape, storage_type)
         np = B.asnumpy()
         begin = rnd.randint(0, B.shape[0] - 1)
         end = rnd.randint(begin + 1, B.shape[0])
diff --git a/tests/travis/run_test.sh b/tests/travis/run_test.sh
index d0ee09312cd4..6b8f778e29ab 100755
--- a/tests/travis/run_test.sh
+++ b/tests/travis/run_test.sh
@@ -99,15 +99,15 @@ if [ ${TASK} == "python_test" ]; then
     mkdir -p ${PWD}/data
 
     if [ ${TRAVIS_OS_NAME} == "osx" ]; then
-        python -m nose tests/python/unittest || exit -1
-        python3 -m nose tests/python/unittest || exit -1
+        python -m nose -v tests/python/unittest || exit -1
+        python3 -m nose -v tests/python/unittest || exit -1
         # make cython3
         # cython tests
         # export MXNET_ENFORCE_CYTHON=1
         # python3 -m nose tests/python/unittest || exit -1
-        python3 -m nose tests/python/train || exit -1
-        python -m nose tests/python/doctest || exit -1
-        python3 -m nose tests/python/doctest || exit -1
+        python3 -m nose -v tests/python/train || exit -1
+        python -m nose -v tests/python/doctest || exit -1
+        python3 -m nose -v tests/python/doctest || exit -1
     else
         nosetests -v tests/python/unittest || exit -1
         nosetests3 -v tests/python/unittest || exit -1