Skip to content

Commit

Permalink
refactor conversion api (apache#25)
Browse files Browse the repository at this point in the history
* add multiple stage test

* refactor GetFCompute

* adjust jenkins python version. pass all unit test

* register cast storage type op

* refactor conversion interface
  • Loading branch information
eric-haibin-lin authored Apr 24, 2017
1 parent 69aecce commit e65e577
Show file tree
Hide file tree
Showing 24 changed files with 332 additions and 283 deletions.
6 changes: 3 additions & 3 deletions Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -201,17 +201,17 @@ del /Q *.7z
// Python unittest for CPU
def python_ut(docker_type) {
timeout(time: max_time, unit: 'MINUTES') {
sh "${docker_run} ${docker_type} PYTHONPATH=./python/ nosetests --with-timer --verbose tests/python/unittest"
sh "${docker_run} ${docker_type} PYTHONPATH=./python/ nosetests-2.7 --with-timer --verbose tests/python/unittest"
// sh "${docker_run} ${docker_type} PYTHONPATH=./python/ nosetests-3.4 --with-timer --verbose tests/python/unittest"
sh "${docker_run} ${docker_type} PYTHONPATH=./python/ nosetests --with-timer --verbose tests/python/train"
sh "${docker_run} ${docker_type} PYTHONPATH=./python/ nosetests-2.7 --with-timer --verbose tests/python/train"
}
}

// GPU test has two parts. 1) run unittest on GPU, 2) compare the results on
// both CPU and GPU
def python_gpu_ut(docker_type) {
timeout(time: max_time, unit: 'MINUTES') {
sh "${docker_run} ${docker_type} PYTHONPATH=./python/ nosetests --with-timer --verbose tests/python/gpu"
sh "${docker_run} ${docker_type} PYTHONPATH=./python/ nosetests-2.7 --with-timer --verbose tests/python/gpu"
// sh "${docker_run} ${docker_type} PYTHONPATH=./python/ nosetests-3.4 --with-timer --verbose tests/python/gpu"
}
}
Expand Down
5 changes: 0 additions & 5 deletions include/mxnet/c_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -272,11 +272,6 @@ MXNET_DLL int MXNDArrayCreateSparseEx(int storage_type,
int *aux_types,
NDArrayHandle *out);

// TEMPORARY API FOR TESTING PURPOSE. Conversion should be an op instead
MXNET_DLL int MXNDArrayConvert(NDArrayHandle in,
int storage_type,
NDArrayHandle *out);

/*!
* \brief create a NDArray handle that is loaded from raw bytes.
* \param buf the head of the raw bytes
Expand Down
48 changes: 5 additions & 43 deletions include/mxnet/ndarray.h
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ class NDArray {
}
/*! \brief constructor for NDArray with chunk type
*/
NDArray(NDArrayStorageType storage_type, const TShape &shape, Context ctx,
NDArray(const NDArrayStorageType storage_type, const TShape &shape, Context ctx,
bool delay_alloc = true, int dtype = mshadow::default_type_flag,
std::vector<int> aux_types = {})
: shape_(shape), offset_(0), dtype_(dtype), entry_({nullptr, 0, 0}) {
Expand Down Expand Up @@ -127,6 +127,7 @@ class NDArray {
Mkl_mem_ = std::make_shared<MKLMemHolder>();
#endif
}
// TODO this constructor should be removed
NDArray(NDArray data, const std::vector<NDArray> aux_data, Context ctx,
NDArrayStorageType storage_type, const TShape &shape)
: ptr_(std::make_shared<Chunk>(data, aux_data, ctx, storage_type)), shape_(shape),
Expand All @@ -137,11 +138,6 @@ class NDArray {
CHECK(aux_data.size() == 1) << "Multiple aux_data not supported yet";
}

template<typename xpu>
NDArray ConvertTo(NDArrayStorageType storage_type, mshadow::Stream<xpu> *s) const {
CHECK_EQ(storage_type, kDefaultStorage) << "other storage type not supported yet";
return ToDefault<xpu>(s);
}
/*!
* \return the shape of current NDArray.
*/
Expand Down Expand Up @@ -487,44 +483,10 @@ class NDArray {

private:
friend class autograd::AutogradRuntime;
// Make a copy of the ndarray in dense format
template<typename xpu>
NDArray ToDefault(mshadow::Stream<xpu>* s) const {
NDArray result(shape_, ptr_->ctx, false, dtype());
this->WaitToRead();
if (storage_type() == kDefaultStorage) {
MSHADOW_TYPE_SWITCH(dtype(), DType, {
mshadow::Copy(result.data().FlatTo1D<xpu, DType>(), data().FlatTo1D<xpu, DType>());
});
return result;
}
CHECK(storage_type() == kRowSparseStorage);
MSHADOW_TYPE_SWITCH(dtype(), DType, {
MSHADOW_TYPE_SWITCH(aux_type(rowsparse::kIdx), AuxType, {
// Fill in zeros
result.data().FlatTo1D<xpu, DType>(s) = 0;
result.data().shape_ = shape_;
// data() is not empty
if (storage_shape().ndim() != 0) {
// Copy over
auto in_data = data().FlatTo2D<xpu, DType>(s);
auto out_data = result.data().FlatTo2D<xpu, DType>(s);
auto num_rows = aux_shape(rowsparse::kIdx)[0];
auto in_idx = aux_data(rowsparse::kIdx).FlatTo1D<xpu, AuxType>(s);
for (size_t i = 0; i < num_rows; i += 1) {
mshadow::Copy(out_data[in_idx[i]], in_data[i], s);
}
}
});
});
return result;
}

/*! \brief the real data chunk that backs NDArray */
// shandle is used to store the actual values in the NDArray
// aux_handles store the aux data(such as indices) if it's needed by non-default storage.
struct Chunk {
// TODO(haibin) Also specify the capacity & size of the chunk, we don't want to resize it
// every time a new element is added to a non default storage
/*! \brief storage handle from storage engine.
for non-default storage, shandle stores the data(value) array.
Expand All @@ -551,7 +513,7 @@ class NDArray {
// context of data
Context ctx;
// The shape of the chunk data.
// This might not be the same shape as the NDArray, since the chunk may be sparse.
// This might not be the same shape as the NDArray, since the storage may be sparse.
TShape storage_shape;
// The shape of aux data. The default value for the shape is 0.
std::vector<TShape> aux_shapes;
Expand Down Expand Up @@ -660,7 +622,7 @@ class NDArray {
CHECK_EQ(storage_type, kRowSparseStorage) << "Not yet implemented";
// calculate size, perform allocation
if (delay_alloc) {
// For row sparse chunk, aux_shape indicates the number of rows to allocate
// For row sparse storage, aux_shape indicates the number of rows to allocate
auto aux_shape = aux_shapes[0];
CHECK_EQ(aux_shape.ndim(), 1);
auto num_rows = aux_shape[0];
Expand All @@ -670,7 +632,7 @@ class NDArray {
shandle = Storage::Get()->Alloc(dbytes, ctx);
aux_handles.push_back(Storage::Get()->Alloc(aux_bytes, ctx));
delay_alloc = false;
// Initialize aux_shape and shape
// Initialize shapes
this->aux_shapes = aux_shapes;
storage_shape = shape;
storage_shape[0] = num_rows;
Expand Down
2 changes: 0 additions & 2 deletions include/mxnet/op_attr_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -66,8 +66,6 @@ using FCompute = std::function<void (const nnvm::NodeAttrs& attrs,
*
* \note Register under "FComputeEx<cpu, `storage_type`>" and "FComputeEx<gpu, `storage_type`>"
* e.g FComputeEx<cpu, row_sparse>
* TODO should probably change const std::vector<NDArray>& outputs to
std::vector<NDArray> *outputs
*/
using FComputeEx = std::function<void (const nnvm::NodeAttrs& attrs,
const OpContext& ctx,
Expand Down
2 changes: 1 addition & 1 deletion nnvm
53 changes: 24 additions & 29 deletions python/mxnet/sparse_ndarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,11 +92,7 @@ class SparseNDArray(NDArray):
''' sparse ndarray '''
__slots__ = []

def __repr__(self):
"""Return a string representation of the array"""
# TODO(haibin) also print shape info
#shape_info = 'x'.join(['%d' % x for x in self.shape])
return '<%s>' % (self.__class__.__name__)
#def __repr__(self):
def __reduce__(self):
return (SparseNDArray, (None,), self.__getstate__())
def __add__(self, other):
Expand Down Expand Up @@ -160,26 +156,22 @@ def _sync_copyfrom(self, source_array):
def _slice(self, start, stop):
raise Exception('Not implemented for SparseND yet!')
def _at(self, idx):
raise Exception('Not implemented for SparseND yet!')
raise Exception('at operator for SparseND is not supported.')
def reshape(self, shape):
raise Exception('Not implemented for SparseND yet!')
def broadcast_to(self, shape):
raise Exception('Not implemented for SparseND yet!')
#def wait_to_read(self):
#inherited from parent
#@property
#def shape(self):
#inherited from parent

@property
def size(self):
raise Exception('Not implemented for SparseND yet!')
@property
def context(self):
raise Exception('Not implemented for SparseND yet!')
@property
def dtype(self):
raise Exception('Not implemented for SparseND yet!')
#@property
#def context(self):
#@property
#def dtype(self):
@property
# pylint: disable= invalid-name, undefined-variable
def T(self):
Expand All @@ -202,16 +194,16 @@ def as_in_context(self, context):
def to_dense(self):
return to_dense(self)

#TODO(haibin) also add aux_types. Not tested yet.
#We need a to_dense method to test it
def csr(values, indptr, idx, shape, ctx=Context.default_ctx, dtype=mx_real_t):
#TODO We need a to_dense method to test it
def csr(values, indptr, idx, shape, ctx=Context.default_ctx, dtype=mx_real_t, aux_types=None):
''' constructor '''
hdl = NDArrayHandle()
#TODO currently only supports NDArray input
assert(isinstance(values, NDArray))
assert(isinstance(index, NDArray))
indices = c_array(NDArrayHandle, [idx.handle, indptr.handle])
num_aux = mx_uint(2)
# TODO create an empty handle with specified types, then assign values
check_call(_LIB.MXNDArrayCreateSparse(
values.handle, num_aux, indices,
c_array(mx_uint, shape),
Expand All @@ -226,13 +218,14 @@ def csr(values, indptr, idx, shape, ctx=Context.default_ctx, dtype=mx_real_t):

# pylint: enable= no-member
#TODO(haibin) also specify aux_types
def row_sparse(values, index, shape, ctx=Context.default_ctx, dtype=mx_real_t):
def row_sparse(values, index, shape, ctx=Context.default_ctx, dtype=mx_real_t, aux_types=None):
''' constructor '''
hdl = NDArrayHandle()
assert(isinstance(values, NDArray))
assert(isinstance(index, NDArray))
indices = c_array(NDArrayHandle, [index.handle])
num_aux = mx_uint(1)
# TODO create an empty handle with specified types, then assign values
check_call(_LIB.MXNDArrayCreateSparse(
values.handle, num_aux, indices,
c_array(mx_uint, shape),
Expand All @@ -245,26 +238,27 @@ def row_sparse(values, index, shape, ctx=Context.default_ctx, dtype=mx_real_t):
ctypes.byref(hdl)))
return SparseNDArray(hdl)

def array(values, index_list, storage_type, shape, ctx=None, dtype=mx_real_t):
def array(values, index_list, storage_type, shape, ctx=None, dtype=mx_real_t, aux_types=None):
# TODO check input array types. Assume NDArray class for now
# TODO support other types
assert(storage_type == 'row_sparse')
if isinstance(shape, int):
shape = (shape, )
if ctx is None:
ctx = Context.default_ctx
arr = row_sparse(values, index_list[0], shape, ctx=ctx, dtype=dtype)
arr = row_sparse(values, index_list[0], shape, ctx=ctx, dtype=dtype, aux_types=aux_types)
return arr

# Temporary function for testing purpose
def to_dense(source):
hdl = NDArrayHandle()
return ndarray.cast_storage(source, storage_type=1)
'''hdl = NDArrayHandle()
check_call(_LIB.MXNDArrayConvert(
source.handle, _STORAGE_TYPE_STR_TO_ID['default'],
ctypes.byref(hdl)))
return ndarray.NDArray(handle=hdl, writable=True)
return ndarray.NDArray(handle=hdl, writable=True)'''

def zeros(shape, storage_type, ctx=None, dtype=mx_real_t):
def zeros(shape, storage_type, ctx=None, dtype=mx_real_t, aux_types=None):
"""Return a new array of given shape and type, filled with zeros.
Parameters
Expand Down Expand Up @@ -294,12 +288,13 @@ def zeros(shape, storage_type, ctx=None, dtype=mx_real_t):
"""
if ctx is None:
ctx = Context.default_ctx
if storage_type == 'row_sparse':
# pylint: disable= no-member, protected-access
out = SparseNDArray(_new_alloc_handle(storage_type, shape, ctx,
aux_types=_STORAGE_AUX_TYPES['row_sparse']))
return _internal._zeros(shape=shape, ctx=ctx, dtype=dtype, out=out)
return _internal._zeros(shape=shape, ctx=ctx, dtype=dtype)
assert(storage_type == 'row_sparse')
if aux_types == None:
aux_types = _STORAGE_AUX_TYPES['row_sparse']
# pylint: disable= no-member, protected-access
out = SparseNDArray(_new_alloc_handle(storage_type, shape, ctx,
aux_types=aux_types))
return _internal._zeros(shape=shape, ctx=ctx, dtype=dtype, out=out)
# pylint: enable= no-member, protected-access

_STORAGE_TYPE_TO_ND_CLASS = {
Expand Down
12 changes: 1 addition & 11 deletions src/c_api/c_api.cc
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,7 @@ int MXNDArrayCreate(const mx_uint *shape,
API_END();
}

// TODO remove this API
int MXNDArrayCreateSparse(NDArrayHandle data,
mx_uint num_aux,
NDArrayHandle *aux_vec,
Expand All @@ -155,16 +156,6 @@ int MXNDArrayCreateSparse(NDArrayHandle data,
API_END();
}

// TODO(haibin) Should also consider context
int MXNDArrayConvert(NDArrayHandle in,
int storage_type,
NDArrayHandle *out) {
API_BEGIN();
NDArray* nd = reinterpret_cast<NDArray*>(in);
*out = new NDArray(nd->ConvertTo<cpu>(static_cast<NDArrayStorageType>(storage_type), nullptr));
API_END();
}

int MXNDArrayCreateEx(const mx_uint *shape,
mx_uint ndim,
int dev_type,
Expand Down Expand Up @@ -363,7 +354,6 @@ int MXNDArrayGetStorageType(NDArrayHandle handle,
int *out_storage_type) {
API_BEGIN();
NDArray *arr = static_cast<NDArray*>(handle);
// Check is_none?
if (!arr->is_none()) {
*out_storage_type = arr->storage_type();
} else {
Expand Down
2 changes: 1 addition & 1 deletion src/c_api/c_api_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ struct MXAPIThreadLocalEntry {
std::vector<TShape> arg_shapes, out_shapes, aux_shapes;
/*! \brief result holder for returning type flags */
std::vector<int> arg_types, out_types, aux_types;
/*! \brief result holder for returning chunk types */
/*! \brief result holder for returning storage types */
std::vector<int> arg_storage_types, out_storage_types, aux_storage_types;
/*! \brief result holder for returning shape dimensions */
std::vector<mx_uint> arg_shape_ndim, out_shape_ndim, aux_shape_ndim;
Expand Down
Loading

0 comments on commit e65e577

Please sign in to comment.