diff --git a/cmake/compiler_options.cmake b/cmake/compiler_options.cmake index c7c55f402..ef4a0dbcf 100644 --- a/cmake/compiler_options.cmake +++ b/cmake/compiler_options.cmake @@ -39,6 +39,9 @@ anakin_add_compile_option(-Wshadow) anakin_add_compile_option(-fpermissive) anakin_add_compile_option(-Wsign-promo) anakin_add_compile_option(-fdiagnostics-show-option) +if(USE_BM_PLACE) + anakin_add_compile_option(-lbmlib-asic) +endif() if(ENABLE_NOISY_WARNINGS) anakin_add_compile_option(-Wcast-align) diff --git a/cmake/find_modules.cmake b/cmake/find_modules.cmake index 6a94f32b0..a06ea879b 100644 --- a/cmake/find_modules.cmake +++ b/cmake/find_modules.cmake @@ -357,24 +357,22 @@ macro(anakin_find_openmp) endmacro() macro(anakin_find_bmlib) - find_path(BM_ROOT include/bmdnn/bmdnn_api.h ${CMAKE_SOURCE_DIR}/third-party/bm_lib/ $ENV{BM_ROOT}/) - find_path(BM_ROOT_INCLUDE_DNN bmdnn_api.h ${BM_ROOT}/include/bmdnn) - find_path(BM_ROOT_INCLUDE_RT bmruntime.h ${BM_ROOT}/include/bmruntime) - find_path(BM_ROOT_INCLUDE_LIB bmlib_runtime.h ${BM_ROOT}/include/bmlib) - if(BM_ROOT_INCLUDE_DNN AND BM_ROOT_INCLUDE_RT AND BM_ROOT_INCLUDE_LIB) - set(BM_FOUND TRUE) - endif() - if(BM_FOUND) - message(STATUS " Found bm_lib in ${BM_ROOT} ${BM_ROOT_INCLUDE_DNN} ${BM_ROOT_INCLUDE_RT} ${BM_ROOT_INCLUDE_LIB}") - include_directories(${BM_ROOT_INCLUDE_DNN}) - include_directories(${BM_ROOT_INCLUDE_RT}) - include_directories(${BM_ROOT_INCLUDE_LIB}) - set(BM_LIBRARIES "") - list(APPEND BM_LIBRARIES ${BM_ROOT}/lib/app/libbmdnn_device.so) - list(APPEND BM_LIBRARIES ${BM_ROOT}/lib/app/libbmlib_device.so) - list(APPEND BM_LIBRARIES ${BM_ROOT}/lib/app/libbmrt.so) - list(APPEND ANAKIN_LINKER_LIBS ${BM_LIBRARIES}) - else() - message(FATAL_ERROR "Could not found bm_lib") - endif() + find_path(BM_ROOT include/bmlib/bmlib_runtime.h /usr/local/include/bm/ $ENV{BM_ROOT}/) + if(BM_ROOT) + set(BM_FOUND TRUE) + endif() + if(BM_FOUND) + message(STATUS " Found bm_lib in ${BM_ROOT}") + anakin_fetch_include_recursively(${BM_ROOT}/include) + set(BM_LIBRARIES "") + list(APPEND BM_LIBRARIES ${BM_ROOT}/lib/device/bmlib.a) + list(APPEND BM_LIBRARIES ${BM_ROOT}/lib/device/common-arm.a) + list(APPEND BM_LIBRARIES ${BM_ROOT}/lib/device/common.a) + list(APPEND BM_LIBRARIES ${BM_ROOT}/lib/device/fw-arm.a) + list(APPEND BM_LIBRARIES ${BM_ROOT}/lib/device/fw-top.a) + list(APPEND BM_LIBRARIES ${BM_ROOT}/lib/device/libbmlib-asic.so) + list(APPEND ANAKIN_LINKER_LIBS ${BM_LIBRARIES}) + else() + message(FATAL_ERROR "Could not found bm_lib") + endif() endmacro() diff --git a/framework/core/net/worker.cpp b/framework/core/net/worker.cpp index 0e8e436da..9d2b70b72 100644 --- a/framework/core/net/worker.cpp +++ b/framework/core/net/worker.cpp @@ -106,7 +106,8 @@ Worker::sync_prediction(std::vectorreshape(ins[i].valid_shape()); d_tensor_in_p->copy_from(ins[i]); d_tensor_in_p->set_seq_offset(ins[i].get_seq_offset()); - } + } +#ifdef NVIDIA_GPU Context ctx(0, 0, 0); saber::SaberTimer my_time; my_time.start(ctx); @@ -114,7 +115,7 @@ Worker::sync_prediction(std::vector ctx(0, 0, 0); saber::SaberTimer my_time; my_time.start(ctx); -#endif +#endif // ENABLE_OP_TIMER net.prediction(); my_time.end(ctx); @@ -127,7 +128,8 @@ Worker::sync_prediction(std::vector::type>> ret; ret.resize(_outputs_in_order.size()); @@ -226,6 +228,16 @@ template class Worker; template class Worker; #endif +#ifdef AMD_GPU +template class Worker; +template class Worker; +template class Worker; + +template class Worker; +template class Worker; +template class Worker; +#endif + #ifdef USE_X86_PLACE template class Worker; template class Worker; @@ -256,4 +268,3 @@ template class Worker; #endif } /* namespace */ - diff --git a/saber/CMakeLists.txt b/saber/CMakeLists.txt index c13f3df61..c31f7f941 100644 --- a/saber/CMakeLists.txt +++ b/saber/CMakeLists.txt @@ -92,6 +92,31 @@ if(USE_CUDA) ${WHOLE_ARCHIVE_END}) endif() +if(USE_BM_PLACE) + set(BIN_NAME bmkernel_bin) + set(LINK_CONFIG link/bm1682_ddr.lds) + add_custom_command(OUTPUT bm_kernel_tmp + COMMAND arm-none-eabi-gcc ${ANAKIN_SABER}/funcs/impl/bm/device/bmkernel_base.c -mcpu=arm926ej-s -mfpu=vfp -fno-short-enums -std=gnu99 -O2 -Wall -Werror -ffunction-sections -fdata-sections -nostdlib -DENABLE_PRINT -I${BM_ROOT}/include/config -I${BM_ROOT}/include/common -I${BM_ROOT}/include/c_model -I${BM_ROOT}/include/firmware_core -I${BM_ROOT}/include/bmlib -c -o ${BIN_NAME}.o + COMMAND arm-none-eabi-gcc -T ${BM_ROOT}/${LINK_CONFIG} -mcpu=arm926ej-s -mfpu=vfp -fno-short-enums -Wl,--check-sections -Wl,--gc-sections -Wl,--unresolved-symbols=report-all -Wl,--no-enum-size-warning -o ${BIN_NAME}.elf -Wl,--start-group -lc -lm ${BIN_NAME}.o ${BM_ROOT}/lib/device/fw-top.a ${BM_ROOT}/lib/device/fw-arm.a -Wl,--end-group + COMMAND arm-none-eabi-objcopy -O binary -R *.slow* ${BIN_NAME}.elf ${BIN_NAME}_itcm.bin + COMMAND hexdump -v -e '1/4 \"%08x\\n\"' ${BIN_NAME}_itcm.bin > ${BIN_NAME}_itcm.hex.sim + COMMAND arm-none-eabi-objcopy -O binary -j *.slow* ${BIN_NAME}.elf ${BIN_NAME}_ddr.bin + COMMAND hexdump -v -e '1/4 \"%08x\\n\"' ${BIN_NAME}_ddr.bin > ${BIN_NAME}_ddr.hex.sim + COMMAND printf "%x" 0xAABBCCDD > ${BIN_NAME}.bin + COMMAND printf "%x" 0x0 >> ${BIN_NAME}.bin + COMMAND printf "%x" 0x0 >> ${BIN_NAME}.bin + COMMAND printf "%x" 0x0 >> ${BIN_NAME}.bin + + COMMAND printf \"%x\" `wc -c < ${BIN_NAME}_itcm.hex.sim` >> ${BIN_NAME}.bin + + COMMAND cat ${BIN_NAME}_itcm.hex.sim >> ${BIN_NAME}.bin + COMMAND cat ${BIN_NAME}_ddr.hex.sim >> ${BIN_NAME}.bin + COMMAND cp ${ANAKIN_ROOT}/build/saber/bmkernel_bin.bin /var/tmp/${BIN_NAME}.bin + COMMENT "BM Kernel compilation..." + ) + add_custom_target(ANAKIN ALL DEPENDS bm_kernel_tmp) +endif() + # add saber library to static if(UNIX OR APPLE) if (USE_ARM_PLACE) diff --git a/saber/core/common.h b/saber/core/common.h index 73316bcf0..3a64b0d5d 100644 --- a/saber/core/common.h +++ b/saber/core/common.h @@ -179,11 +179,9 @@ const char* cudnn_get_errorstring(cudnnStatus_t status); #ifdef USE_BM_PLACE #include "bmlib_runtime.h" -#include "bmdnn_api.h" -#include "bmdnn_ext_api.h" #include "bmlib_utils.h" -#define BMDNN_CHECK(condition) \ +#define BM_CHECK(condition) \ do { \ bm_status_t error = condition; \ CHECK_EQ(error, BM_SUCCESS) << " Failed with error code:" << error; \ diff --git a/saber/core/context.h b/saber/core/context.h index 0cc032f2f..fc21bc755 100644 --- a/saber/core/context.h +++ b/saber/core/context.h @@ -35,7 +35,7 @@ class Context final{ * @param compute_stream_id */ Context(int device_id = 0, int data_stream_id = 0, int compute_stream_id = 0){ -#ifdef USE_BM +#ifdef USE_BM_PLACE if(std::is_same::value){ LOG(INFO) << "context init for BM"; int dev_count = 0; @@ -69,7 +69,7 @@ class Context final{ } Context(const Context& ctx){ -#ifdef USE_BM +#ifdef USE_BM_PLACE if(std::is_same::value){ LOG(INFO) << "context init for BM"; _bm_handle = ctx._bm_handle; @@ -98,7 +98,7 @@ class Context final{ this->_act_ids = ctx._act_ids; this->_mode = ctx._mode; #endif -#ifdef USE_BM +#ifdef USE_BM_PLACE this->_bm_handle = ctx._bm_handle; #endif return *this; @@ -109,7 +109,7 @@ class Context final{ comp_eq = comp_eq && (_device_id == right._device_id); comp_eq = comp_eq && (_data_stream_id == right._data_stream_id); comp_eq = comp_eq && (_compute_stream_id == right._compute_stream_id); -#ifdef USE_BM +#ifdef USE_BM_PLACE comp_eq = comp_eq && (_bm_handle == right._bm_handle); #endif return comp_eq; @@ -151,7 +151,7 @@ class Context final{ //std::vector get_act_ids(); #endif -#ifdef USE_BM +#ifdef USE_BM_PLACE bm_handle_t get_handle() { return _bm_handle; } @@ -170,7 +170,7 @@ class Context final{ PowerMode _mode{SABER_POWER_HIGH}; std::vector _act_ids{0}; #endif -#ifdef USE_BM +#ifdef USE_BM_PLACE bm_handle_t _bm_handle; #endif }; diff --git a/saber/core/data_traits.h b/saber/core/data_traits.h index c552ce258..342331caf 100644 --- a/saber/core/data_traits.h +++ b/saber/core/data_traits.h @@ -20,7 +20,6 @@ #ifdef USE_BM_PLACE #include "bmlib_runtime.h" -#include "bmdnn_api.h" #include "bmlib_utils.h" #endif diff --git a/saber/core/impl/bm/bm_impl.cpp b/saber/core/impl/bm/bm_impl.cpp index 1505acbf9..dd1a194c8 100644 --- a/saber/core/impl/bm/bm_impl.cpp +++ b/saber/core/impl/bm/bm_impl.cpp @@ -45,19 +45,19 @@ typedef TargetWrapper BM_API; // Init handle only once in the lifetime static bm_handle_t handle; -static bm_status_t init_handle{bmdnn_init(&handle)}; +static bm_status_t init_handle{bmlib_kernel_init(&handle)}; bm_handle_t BM_API::get_handle() { return handle; }; void BM_API::get_device_count(int& count) { - BMDNN_CHECK(bm_dev_getcount(&count)); + BM_CHECK(bm_dev_getcount(&count)); } void BM_API::set_device(int id) { //(bm_handle_t &handle, bool bmkernel_used, int id){ - //BMDNN_CHECK(bm_dev_request(&handle, 0, id)); + //BM_CHECK(bm_dev_request(&handle, 0, id)); } //TODO: Do we have this functionality? @@ -69,12 +69,12 @@ void BM_API::mem_alloc(TPtr* ptr, size_t n) { /* bm_device_mem_t *mem = reinterpret_cast(*ptr); */ // bm_device_mem_t *mem = new bm_device_mem_t(); bm_device_mem_t mem; - BMDNN_CHECK(bm_malloc_device_byte(handle, &mem, n)); + BM_CHECK(bm_malloc_device_byte(handle, &mem, n)); *ptr = TPtr(mem); } void BM_API::mem_free(TPtr ptr) { - if ((ptr != BM_MEM_NULL)) { + if (bm_mem_get_type(ptr) == BM_MEM_TYPE_SYSTEM) { bm_free_device(handle, ptr); // delete ptr; } @@ -82,9 +82,9 @@ void BM_API::mem_free(TPtr ptr) { void BM_API::mem_set(TPtr ptr, int value, size_t n) { //(bm_handle_t handle, const int value, bm_device_mem_t mem){ - BMDNN_CHECK(bm_memset_device(handle, value, ptr)); + BM_CHECK(bm_memset_device(handle, value, ptr)); //bm_device_mem_t* pmem = (struct bm_mem_desc *)(ptr); - //BMDNN_CHECK(bm_memset_device(handle, value, *pmem)); + //BM_CHECK(bm_memset_device(handle, value, *pmem)); } void BM_API::sync_memcpy(TPtr dst, size_t dst_offset, int dst_id, \ @@ -92,8 +92,8 @@ void BM_API::sync_memcpy(TPtr dst, size_t dst_offset, int dst_id, \ size_t count, __DtoD) { if(count==0) return; - //BMDNN_CHECK(bm_memcpy_d2d(handle, bm_mem_from_device(dst), dst_id, bm_mem_from_device(src), src_id, count)); - BMDNN_CHECK(bm_memcpy_d2d(handle, dst, dst_offset, src, src_offset, count)); + //BM_CHECK(bm_memcpy_d2d(handle, bm_mem_from_device(dst), dst_id, bm_mem_from_device(src), src_id, count)); + BM_CHECK(bm_memcpy_d2d(handle, dst, dst_offset, src, src_offset, count)); }; void BM_API::sync_memcpy(TPtr dst, size_t dst_offset, int dst_id, \ @@ -101,7 +101,7 @@ void BM_API::sync_memcpy(TPtr dst, size_t dst_offset, int dst_id, \ size_t count, __HtoD) { if(count==0) return; - BMDNN_CHECK(bm_memcpy_s2d(handle, dst+dst_offset, bm_mem_from_system(const_cast(src)+src_offset))); + BM_CHECK(bm_memcpy_s2d(handle, dst+dst_offset, bm_mem_from_system(const_cast(src)+src_offset))); #ifdef DEBUG @@ -118,7 +118,7 @@ void BM_API::sync_memcpy(void* dst, size_t dst_offset, int dst_id, \ if(count==0) return; // LOG(INFO)<<"host ptr = "<<(dst)<<",dst_offset = "< temp_tensor(tensor.valid_shape(),tensor.get_dtype()); + LOG(INFO) << "BM device tensor data:"; + Tensor temp_tensor(tensor.shape(), tensor.get_dtype()); + temp_tensor.set_shape(tensor.valid_shape()); temp_tensor.copy_from(tensor); print_tensor(temp_tensor); } @@ -41,7 +45,8 @@ void print_tensor_valid(Tensor& tensor, typename Tensor::API::stream template<> double tensor_mean_value(Tensor& tensor, typename Tensor::API::stream_t stream = NULL) { - Tensor temp_tensor(tensor.valid_shape(),tensor.get_dtype()); + Tensor temp_tensor(tensor.shape(), tensor.get_dtype()); + temp_tensor.set_shape(tensor.valid_shape()); temp_tensor.copy_from(tensor); return tensor_mean_value(temp_tensor); } @@ -49,7 +54,8 @@ double tensor_mean_value(Tensor& tensor, typename Tensor::API::strea template<> double tensor_mean_value_valid(Tensor& tensor, typename Tensor::API::stream_t stream = NULL) { - Tensor temp_tensor(tensor.valid_shape(),tensor.get_dtype()); + Tensor temp_tensor(tensor.shape(), tensor.get_dtype()); + temp_tensor.set_shape(tensor.valid_shape()); temp_tensor.copy_from(tensor); return tensor_mean_value(temp_tensor); } diff --git a/saber/core/target_wrapper.h b/saber/core/target_wrapper.h index b888a5e24..6991119c8 100644 --- a/saber/core/target_wrapper.h +++ b/saber/core/target_wrapper.h @@ -450,7 +450,7 @@ struct TargetWrapper { static bm_handle_t get_handle(); }; -#endif //USE_BM +#endif //USE_BM_PLACE #ifdef AMD_GPU diff --git a/saber/funcs/impl/bm/device/bm_common.h b/saber/funcs/impl/bm/device/bm_common.h new file mode 100644 index 000000000..b10230dad --- /dev/null +++ b/saber/funcs/impl/bm/device/bm_common.h @@ -0,0 +1,156 @@ +#ifndef ANAKIN_SABER_FUNCS_IMPL_BM_DEVICE_BM_COMMON_H +#define ANAKIN_SABER_FUNCS_IMPL_BM_DEVICE_BM_COMMON_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include "bm_config.h" +#include "op_code.h" +#include "bm_memmap.h" +#include "firmware_core_kernel.h" +#ifdef __cplusplus +extern "C" { +#endif + +//#define DEBUG_MESSAGE +#ifdef DEBUG_MESSAGE +#define MSG_DBG(fmt, ...) printf("MSG: "fmt, ##__VA_ARGS__) +#else +#define MSG_DBG(fmt, ...) +#endif + + +#define INLINE inline + +#define UNUSED(x) (void)(x) + +#define __ALIGN_MASK(x,mask) (((x)+(mask))&~(mask)) +#define ALIGN(x,a) __ALIGN_MASK(x,(__typeof__(x))(a)-1) + +#define ROUND_UP(A, B) ((A)/(B) + ((A) % (B) == 0 ? 0 : 1)) + +#define bm_min(x, y) ((x) < (y) ? (x) : (y)) +#define bm_max(x, y) ((x) > (y) ? (x) : (y)) + + +typedef unsigned char u8; +typedef unsigned short u16; +typedef unsigned int u32; +typedef unsigned long long u64; + +typedef union { + int ival; + float fval; +} IF_VAL; + +typedef u32 tuple4_u32[4]; + +typedef struct tensor_info{ + u32 n,c,h,w; + u32 w_stride, n_stride, c_stride, h_stride; + u32 address; + u32 data_format; + u32 neuron_matrix; //0: neuron, 1: matrix + u32 matrix_col_magin; //the magin is not 0, when column_num%w_param!=0 +}TENSOR_INFO; + +#define FLOAT_SIZE 4 +#define INT8_SIZE 1 +#define FLOAT_BITWIDTH 32 +#define GET_U64(U32_H, U32_L) (((u64)(U32_H) << 32) | (u64)(U32_L)) + +typedef enum { + CAFFE_SUPPORT = 0, + TENSORFLOW_SUPPORT = 1 +} PLATFORM_SUPPORT; + +typedef enum { + NODECHIP_REG = 0, + HOST_REG = 1 +} REG_TYPE; + +typedef struct kernel_param{ + int g; + int oc; + int ic; + int h; + int w; +} bm_kernel_param_t; + +typedef struct bm_conv_param{ + int stride_h; + int stride_w; + int pad_h; + int pad_w; + int dilation_h; + int dilation_w; + bool result_add; +} bm_conv_param_t; + +typedef struct conv_secs_info{ + int ocsecs; + int icsecs; + int nsecs; + int hsecs; +} conv_secs_info_t; + +static INLINE int ceiling_func(int numerator, int denominator) +{ + return (numerator + denominator - 1) / denominator; +} + +static INLINE int ceiling_func_shift(int numerator, int shift) +{ + return (numerator + (1 << shift) - 1) >> shift; +} + +static int INLINE calc_offset(int *shape, int *offset) +{ + return ((offset[0] * shape[1] + offset[1]) * shape[2] + offset[2]) + * shape[3] + offset[3]; +} + +//All the size are in the units of bytes +static int INLINE get_index_csize_global(int h, int w, int index_bitwidth) +{ + int size = h * w * index_bitwidth; + //32 bit align + return (((size >> 5)) + ((size & 0x1f) != 0)) * FLOAT_SIZE; +} + +static int INLINE get_index_cstride_global(int h, int w, int index_bitwidth) +{ + int size = h * w * index_bitwidth; + //32 bit align + return (((size >> 5)) + + ((size & 0x1f) != 0)) * FLOAT_BITWIDTH / index_bitwidth; +} + +static int INLINE get_neuron_csize_local(int h, int w) +{ + int size = h * w; + //EU_NUM neurons align + return ALIGN(size,EU_NUM) * FLOAT_SIZE; +} + +static int INLINE addr_EU_align(int addr){ + addr = addr / FLOAT_SIZE; + return ALIGN( addr, EU_NUM ) * FLOAT_SIZE; +} + +static int INLINE get_cstride_local(int h, int w) +{ + int size = h * w; + //EU_NUM neurons align + return ALIGN(size,EU_NUM); +} + +#ifdef __cplusplus +} +#endif +#endif /* ANAKIN_SABER_FUNCS_IMPL_BM_DEVICE_BM_COMMON_H */ diff --git a/saber/funcs/impl/bm/device/bm_memmap.h b/saber/funcs/impl/bm/device/bm_memmap.h new file mode 100644 index 000000000..18d8185e6 --- /dev/null +++ b/saber/funcs/impl/bm/device/bm_memmap.h @@ -0,0 +1,61 @@ +#ifndef ANAKIN_SABER_FUNCS_IMPL_BM_DEVICE_BM_MEMMAP_H +#define ANAKIN_SABER_FUNCS_IMPL_BM_DEVICE_BM_MEMMAP_H + +#define ITCM_MEM_START_ADDR 0x00000000 +#define ITCM_MEM_SIZE 0x00080000 // 512KB +#define DTCM_MEM_START_ADDR 0x02000000 +#define DTCM_MEM_SIZE 0x00010000 // 64KB +#define SHARE_MEM_START_ADDR (DTCM_MEM_START_ADDR + DTCM_MEM_SIZE) +#define SHARE_MEM_SIZE 0x00010000 // 64KB + +#define LOCAL_MEM_ADDRWIDTH 18 +#define LOCAL_MEM_START_ADDR 0x04000000 +//#define LOCAL_MEM_SIZE (1< +/** + * bmkernel_func is the user entry to BMKERNEL just like "main" to some applications. + * + * \param args - Pointer to arguments that user sends from host. + * op - Flag to determine the operation type. + */ + +int bmkernel_func(void *args) +{ + bmkernel_api_base* param = (bmkernel_api_base *)args; + switch (param->op) { + case ACTIVATION: { + // bm_activation_fwd(param) + return 0; + } + case CONV: { + // bm_api_conv_forward* api = (bm_api_conv_forward *)param->opParam; + return 0; + } + default: { + printf("op %d is not supported by BM yet.\n", param->op); + return -1; + } + } +} diff --git a/saber/funcs/impl/bm/device/bmkernel_base.h b/saber/funcs/impl/bm/device/bmkernel_base.h new file mode 100644 index 000000000..5a192127c --- /dev/null +++ b/saber/funcs/impl/bm/device/bmkernel_base.h @@ -0,0 +1,47 @@ +#ifndef ANAKIN_SABER_FUNCS_IMPL_BM_DEVICE_BMKERNEL_BASE_H +#define ANAKIN_SABER_FUNCS_IMPL_BM_DEVICE_BMKERNEL_BASE_H +#ifdef __cplusplus +extern "C" { +#endif + +enum BmOpType { + ACTIVATION, + CONV +}; + +typedef struct { + unsigned long long ifmap_offset_global; + unsigned long long ofmap_offset_global; + unsigned long long weight_offset_global; + unsigned long long bias_offset_global; + int input_n; // note this is total input_n + int input_c; + int input_h; + int input_w; + int groups; + int output_c; + int kh; + int kw; + int dh; + int dw; + int pad_h; + int pad_w; + int stride_h; + int stride_w; + int using_bias; + int result_add; + int icsecs; + int ocsecs; + int nsecs; + int hsecs; +} __attribute__((packed)) bm_api_conv_forward; + +typedef struct { + enum BmOpType op; // Flag to determine the operation type. + void* opParam; +} __attribute__((packed)) bmkernel_api_base; + +#ifdef __cplusplus +} +#endif +#endif /* ANAKIN_SABER_FUNCS_IMPL_BM_DEVICE_BMKERNEL_BASE_H */ diff --git a/test/saber/test_saber_buffer.cpp b/test/saber/test_saber_buffer.cpp index f1afc3a9b..ac49f4f20 100644 --- a/test/saber/test_saber_buffer.cpp +++ b/test/saber/test_saber_buffer.cpp @@ -138,7 +138,7 @@ TEST(TestSaberFunc, test_saber_buffer) { test_buffer(); #endif -#ifdef USE_BM +#ifdef USE_BM_PLACE LOG(INFO) << "test BM FP32 buffer"; //test_buffer(); #endif diff --git a/test/saber/test_saber_context.cpp b/test/saber/test_saber_context.cpp index db2e7a6f9..b7c41b768 100644 --- a/test/saber/test_saber_context.cpp +++ b/test/saber/test_saber_context.cpp @@ -57,12 +57,12 @@ TEST(TestSaberFunc, test_arm_context) { } #endif //USE_ARM_PLACE -#ifdef USE_BM +#ifdef USE_BM_PLACE TEST(TestSaberFunc, test_BM_context) { Context ctx; CHECK_NOTNULL(ctx.get_handle()) << "Failed to get BM handle"; } -#endif //USE_BM +#endif //USE_BM_PLACE int main(int argc, const char** argv) { // initial logger diff --git a/test/saber/test_saber_conv.cpp b/test/saber/test_saber_conv.cpp index 52411fbb3..6fa1bede6 100644 --- a/test/saber/test_saber_conv.cpp +++ b/test/saber/test_saber_conv.cpp @@ -11,7 +11,6 @@ using namespace anakin::saber; #define CHECK_RESULT //#define CHECK_SPEED -#if 0 #ifdef USE_BM_PLACE TEST(TestSaberFunc, test_saber_conv_results_bm) { Env::env_init(); @@ -70,7 +69,6 @@ TEST(TestSaberFunc, test_saber_conv_results_bm) { } } #endif -#endif TEST(TestSaberFunc, test_saber_conv_results) { #ifdef USE_CUDA diff --git a/test/saber/test_saber_tensor.cpp b/test/saber/test_saber_tensor.cpp index b4917ec31..f1114f415 100644 --- a/test/saber/test_saber_tensor.cpp +++ b/test/saber/test_saber_tensor.cpp @@ -622,7 +622,6 @@ TEST(TestSaberFunc, test_saber_tensor_shape) { #ifdef USE_BM Env::env_init(); - Env::env_init(); LOG(INFO) << "test BM tensor shape API"; test_tensor_shape(); #endif //USE_BM @@ -745,7 +744,7 @@ TEST(TestSaberFunc, test_tensor_reshape_realloc) { Env::env_init(); Env::env_init(); LOG(INFO) << "test BM FP32 tensor reshape realloc"; - //tensor_reshape_realloc(); + tensor_reshape_realloc(); #endif //USE_BM } #endif