Skip to content
This repository has been archived by the owner on Jan 24, 2024. It is now read-only.

Bitmain: use BM Kernel for op implementation #464

Open
wants to merge 355 commits into
base: dev_v2
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
355 commits
Select commit Hold shift + click to select a range
e532873
Merge branch 'bitmain' of https://github.com/guangzhixie/Anakin into …
SophonTPU Jun 28, 2018
56271d4
Fix d2d mem copy
Jun 28, 2018
80654f2
Merge branch 'bitmain' of https://github.com/guangzhixie/Anakin into …
SophonTPU Jun 28, 2018
c5a30a7
Add batch norm operation
guangzhixie Jun 28, 2018
b5cdc73
Implement batch norm for BM
guangzhixie Jun 28, 2018
5c6ec7f
Use template specifications instead of macro
guangzhixie Jun 28, 2018
22a8b32
Merge pull request #151 from guangzhixie/bitmain
LittleMaer Jun 28, 2018
597fc4c
conv test
SophonTPU Jun 28, 2018
a941292
Add test for batch norm
guangzhixie Jun 28, 2018
64325fe
Use specialization
guangzhixie Jun 28, 2018
da713a9
Update batch norm test for BM
guangzhixie Jun 28, 2018
efd4524
Update batch norm test for BM
guangzhixie Jun 28, 2018
b500c82
Merge branch 'bitmain' of https://github.com/guangzhixie/Anakin into …
SophonTPU Jun 28, 2018
44e1395
Update BM batch norm test
guangzhixie Jun 28, 2018
609bcd8
Use vender scale for test
guangzhixie Jun 28, 2018
16b6f6e
Update BM scale
guangzhixie Jun 28, 2018
20f7ed0
update BM bias input
guangzhixie Jun 28, 2018
b729f54
BM scale test with bias
guangzhixie Jun 28, 2018
28a35e9
fix bias in scale
hongliu104 Jun 29, 2018
7291e21
Update BM scale ops
guangzhixie Jun 29, 2018
c89d92c
cleanup
guangzhixie Jun 29, 2018
d8f4d47
Update BM scale test
guangzhixie Jun 29, 2018
d3cef11
cleanup
guangzhixie Jun 29, 2018
ec90295
flush before next operation
guangzhixie Jun 29, 2018
c38bf09
check BM conv bias
guangzhixie Jun 29, 2018
02b5e95
Merge pull request #162 from guangzhixie/bitmain
LittleMaer Jun 29, 2018
8dbb4b4
Update BM tensor test
guangzhixie Jul 5, 2018
a19e6fa
Implement fc for BM
guangzhixie Jul 10, 2018
e340b1b
Implement eltwise for BM
guangzhixie Jul 11, 2018
2fe6ca0
Add test for BM eltwise
guangzhixie Jul 11, 2018
8125c94
test eltwise PROD for BM
guangzhixie Jul 11, 2018
0596def
Initial checkin for BM device support
Jun 18, 2018
a8c7357
Fix cmake issues
Jun 18, 2018
d7a941c
Resolve BM library compilation issue
Jun 19, 2018
be88f93
Remove unnecessary files
Jun 19, 2018
ec7ed85
Put empty implementation for BM sync_mem for now
Jun 19, 2018
953f99a
Fix wrong input param
Jun 19, 2018
8543546
Fix param type issue
Jun 20, 2018
db24efb
Initialize BM handler
Jun 20, 2018
4a509e2
Add more unit test for tensor
Jun 20, 2018
e93ec78
Update Dtype for host
Jun 20, 2018
7172074
Conversion from void* to bm_device_mem_t*
Jun 20, 2018
8f131ab
Convert from void* to bm_device_mem_t*
Jun 20, 2018
190dc30
Revert back first
Jun 20, 2018
dcdfa8a
test
Jun 20, 2018
76c64b7
Revert "test"
Jun 20, 2018
54dfffd
Debug on copy_from
Jun 20, 2018
a21ed77
Revert "Revert "test""
Jun 20, 2018
f46db51
Print tensor for BM
Jun 21, 2018
f3d589d
Revert "Revert "Revert "test"""
Jun 21, 2018
fd54ed8
Passing through BM handler
Jun 21, 2018
eefea6d
Implement copy_from for BM; Add back test_TargetWrapper_BM
Jun 21, 2018
4c2fc50
check tensor target type
Jun 21, 2018
c652479
Change back to compliable version
Jun 21, 2018
4594e9a
modify activation op and test
SophonTPU Jun 21, 2018
2fa9cc0
Enable copy from tensor with different Dtype
Jun 21, 2018
71f0c6f
Complete copy_from method
Jun 22, 2018
d7ed46b
const_cast the immutable target data pointer
Jun 22, 2018
d8119d5
Revert back to compilable version
Jun 22, 2018
fd07e72
Modify handle usage & mem_alloc function
Jun 22, 2018
1261fa2
Modify handle usage & mem_alloc function
Jun 22, 2018
90efca0
Modify test_TargetWrapper
Jun 22, 2018
9c8d71a
fill activation and fc op; compile error
SophonTPU Jun 22, 2018
cddbb5b
allow copy from tensor with different data type
Jun 23, 2018
849006d
AK_BM size should return 1
Jun 23, 2018
5ce796d
Comment out specialization of _type_len for now.
Jun 23, 2018
6e48d84
Add implementation for copy_from between device and system
Jun 23, 2018
56433bc
Redefine _type_len as function so that we can do specialization
Jun 23, 2018
2a963f6
Fix mem_free function
Jun 24, 2018
4e944ab
Fix mem_free function
Jun 24, 2018
f8fe05c
change mem_free_BM to mem_free; tensor test passed
SophonTPU Jun 24, 2018
cf2daea
remove stream test in context
Jun 25, 2018
366e01f
Update buffer test for BM
Jun 25, 2018
4134a33
Specialization for Env<BM>
Jun 25, 2018
e147561
env skip bm
SophonTPU Jun 25, 2018
6d86453
modify mem_alloc for void*
SophonTPU Jun 25, 2018
0a4a48f
Specialization for copy_from
Jun 25, 2018
630e58e
Revert speical handling for Env<BM>
Jun 25, 2018
74f27b5
add conv op, did't test
SophonTPU Jun 25, 2018
6346b23
Add sync_memcpy function & fix test_saber_buffer_BM
Jun 25, 2018
f0c6e78
init handle for tensor test
guangzhixie Jun 25, 2018
595677d
init handle for BM context test
guangzhixie Jun 25, 2018
43c3a68
handle init rearrange
Jun 26, 2018
4fbb951
add pooling wrapper, didn't test
SophonTPU Jun 26, 2018
222f562
ptr2 should be from buf2
Jun 26, 2018
0ebed06
Restrict copy_from for different types
Jun 26, 2018
cbb2878
Implement fill_tensor_device_rand & fill_tensor_device_const for BM
Jun 26, 2018
599cf50
get handle directly by calling get_handler()
guangzhixie Jun 26, 2018
5bbc2ee
modify pooling, test failed
SophonTPU Jun 26, 2018
ccfa11b
Implement print_tensor_device for BM
guangzhixie Jun 26, 2018
0038414
Update BM tensor test
guangzhixie Jun 26, 2018
9ca8735
fix pooling api error
SophonTPU Jun 26, 2018
62565be
Update pooling test
guangzhixie Jun 26, 2018
7f1a4f3
Skip context init for BM
guangzhixie Jun 26, 2018
154d5ad
remove flush action in print
guangzhixie Jun 26, 2018
d84c51e
ignore set_device for BM for now
guangzhixie Jun 26, 2018
fe30322
Update logs for copy_from
guangzhixie Jun 26, 2018
a6088e3
Initialize bm handle only in one place
guangzhixie Jun 26, 2018
42d7ee0
chage tensor type_len
hlzy Jun 26, 2018
e048078
Implement conv for BM
guangzhixie Jun 26, 2018
a394d60
Comment out last conv test for now
guangzhixie Jun 26, 2018
6f752bb
Modify sync_memcpy & add bm_mem_from_device
Jun 26, 2018
8925da3
Update BM conv params
guangzhixie Jun 27, 2018
3b8ceed
Init handle in init function
guangzhixie Jun 27, 2018
abb123e
Include BM conv implementation
guangzhixie Jun 27, 2018
27ba06b
remove unecessary include
guangzhixie Jun 27, 2018
88d7ced
empty create function
guangzhixie Jun 27, 2018
7d9bc02
unit test for BM conv
guangzhixie Jun 27, 2018
5ce9056
Update BM tensor print function
guangzhixie Jun 27, 2018
838a285
modify activation op, test pass
SophonTPU Jun 27, 2018
272ef52
tensor_test
hlzy Jun 27, 2018
033a6ab
Fix sync_memcpy functions & test_saber_buffer_BM all passes
Jun 27, 2018
9bba50e
Implement BM softmax
guangzhixie Jun 27, 2018
1a8861b
only print in DEBUG
guangzhixie Jun 27, 2018
2103811
reduce iteration
guangzhixie Jun 27, 2018
67e9bbd
Revert "reduce iteration"
guangzhixie Jun 27, 2018
ceccee4
modify fc op, compile error
SophonTPU Jun 27, 2018
944214d
Update for BM softmax
guangzhixie Jun 27, 2018
6d5c486
xRevert "modify fc op, compile error"
SophonTPU Jun 27, 2018
8a7a8d7
change tensor_test_bm
hlzy Jun 27, 2018
5aef6ab
tensor test update
hlzy Jun 28, 2018
6af8e17
Add back missing files
guangzhixie Jun 28, 2018
1d98f9f
Add back missing files
guangzhixie Jun 28, 2018
360433a
Implement BM scale
guangzhixie Jun 28, 2018
702b59c
pooling test
SophonTPU Jun 28, 2018
4cabbce
Fix d2d mem copy
Jun 28, 2018
4b125ce
Add batch norm operation
guangzhixie Jun 28, 2018
1bda81d
Implement batch norm for BM
guangzhixie Jun 28, 2018
0c2f59b
Use template specifications instead of macro
guangzhixie Jun 28, 2018
03dfabb
conv test
SophonTPU Jun 28, 2018
bdb0dac
Add test for batch norm
guangzhixie Jun 28, 2018
be07f10
Use specialization
guangzhixie Jun 28, 2018
b1ae58a
Update batch norm test for BM
guangzhixie Jun 28, 2018
0d454d5
Update batch norm test for BM
guangzhixie Jun 28, 2018
1f4c082
Update BM batch norm test
guangzhixie Jun 28, 2018
b96989f
Use vender scale for test
guangzhixie Jun 28, 2018
ebfcd88
Update BM scale
guangzhixie Jun 28, 2018
4d5dfaa
update BM bias input
guangzhixie Jun 28, 2018
d6f5cbb
BM scale test with bias
guangzhixie Jun 28, 2018
3645cb3
fix bias in scale
hongliu104 Jun 29, 2018
df2b2b2
Update BM scale ops
guangzhixie Jun 29, 2018
d345de4
cleanup
guangzhixie Jun 29, 2018
fb6af6b
Update BM scale test
guangzhixie Jun 29, 2018
e7d2d04
cleanup
guangzhixie Jun 29, 2018
9729c00
flush before next operation
guangzhixie Jun 29, 2018
ff9f16c
check BM conv bias
guangzhixie Jun 29, 2018
16bba5b
Update BM tensor test
guangzhixie Jul 5, 2018
8059ee1
Implement fc for BM
guangzhixie Jul 10, 2018
a93a77e
Implement eltwise for BM
guangzhixie Jul 11, 2018
0eee023
Add test for BM eltwise
guangzhixie Jul 11, 2018
3403329
test eltwise PROD for BM
guangzhixie Jul 11, 2018
51cb5a2
update gitignore
guangzhixie Aug 6, 2018
2994f0b
update gitignore
guangzhixie Aug 6, 2018
0178c02
Merge branch 'bitmain' of https://github.com/guangzhixie/Anakin into …
guangzhixie Aug 6, 2018
f8c859a
Refactor according to new template scheme
guangzhixie Aug 7, 2018
4002ede
Specialize AK_BM for DataTrait
guangzhixie Aug 7, 2018
16383d8
Update copy_from template
guangzhixie Aug 7, 2018
cb06b3a
cast data type
guangzhixie Aug 7, 2018
ccf5156
cast data type
guangzhixie Aug 7, 2018
d5d7399
set default value
guangzhixie Aug 7, 2018
3587f90
Update restrictions
guangzhixie Aug 7, 2018
4cb9112
Update BM conv impl
guangzhixie Aug 7, 2018
84d2021
Keep in correct order
guangzhixie Aug 7, 2018
03ae49b
comment out BM activation op
guangzhixie Aug 7, 2018
4413241
Refactor
guangzhixie Aug 7, 2018
82e61f2
Update gitignore
guangzhixie Aug 7, 2018
e86bc25
cmake update for BM
guangzhixie Aug 7, 2018
5c3b32b
cmake update for BM
guangzhixie Aug 7, 2018
4e43d91
Revert "cmake update for BM"
guangzhixie Aug 7, 2018
f4e9d42
Revert "cmake update for BM"
guangzhixie Aug 7, 2018
7236538
cleanup
guangzhixie Aug 7, 2018
7017d02
TEST
guangzhixie Aug 7, 2018
f73a88c
Revert "TEST"
guangzhixie Aug 7, 2018
f7f4cc7
cmake for test
guangzhixie Aug 7, 2018
95dd7e1
Revert "cmake for test"
guangzhixie Aug 8, 2018
abf819d
implementation according to data types
guangzhixie Aug 8, 2018
24d3ef1
implementation according to data types
guangzhixie Aug 8, 2018
9922c83
cmake updates
guangzhixie Aug 8, 2018
ea13097
Remove AK_BM and use AK_FLOAT instead
guangzhixie Aug 13, 2018
76b35cc
Upgrade BM SDK; New way to get BM handle
guangzhixie Aug 13, 2018
b47d575
New way to get BM handle
guangzhixie Aug 13, 2018
f86b95e
Update tests for BM
guangzhixie Aug 13, 2018
43bf9ef
BM tensor test
guangzhixie Aug 14, 2018
c75c35a
Remove all BM ops for now
guangzhixie Aug 15, 2018
6a9fd33
Merge branch 'bitmain' into bitmain
guangzhixie Aug 15, 2018
ab08104
Merge branch 'bitmain' into dev_v2
guangzhixie Aug 16, 2018
224f025
Updates according to dev_v2
guangzhixie Aug 16, 2018
e5c96fb
Cleanup
guangzhixie Aug 16, 2018
934128f
Update BM header file path
guangzhixie Aug 16, 2018
a66072a
Check device count when init context
guangzhixie Aug 16, 2018
b87eca8
Check device count when init context
guangzhixie Aug 16, 2018
deb0bd6
User system SDK
guangzhixie Aug 16, 2018
dc1bdfe
Fix bug for tensor_reshape_realloc test. Init host tensor with proper…
guangzhixie Aug 17, 2018
dceb78b
Merge branch 'dev_v2' into dev_v2
LittleMaer Aug 20, 2018
2115134
Use BM Kernel instead of BMDNN
guangzhixie Aug 21, 2018
126fbcd
Merge branch 'dev_v2' into dev_v2
xyoungli Aug 22, 2018
7558ec0
Revert "Use BM Kernel instead of BMDNN"
guangzhixie Aug 28, 2018
a23d99f
Merge remote-tracking branch 'upstream/dev_v2' into dev_v2
guangzhixie Aug 28, 2018
97fdd33
Merge branch 'dev_v2' into dev_v2
cyj1986 Aug 28, 2018
71c7c87
Merge remote-tracking branch 'upstream/dev_v2' into dev_master
guangzhixie Aug 28, 2018
5153c15
Merge branch 'dev_v2' into dev_v2
guangzhixie Aug 31, 2018
0cf065d
Merge branch 'dev_v2' into dev_v2
guangzhixie Sep 3, 2018
99f5f73
Merge branch 'dev_v2' of https://github.com/guangzhixie/Anakin into d…
guangzhixie Sep 6, 2018
6deecef
Merge remote-tracking branch 'upstream/dev_v2' into dev_v2
guangzhixie Sep 6, 2018
4c9409b
Merge branch 'dev_v2' into dev_master
guangzhixie Sep 7, 2018
30e64c6
Revert "Revert "Use BM Kernel instead of BMDNN""
guangzhixie Sep 7, 2018
de1fdbe
bm kernel implementation for saber op
guangzhixie Sep 7, 2018
eaf7302
Update bmkernel_api_base
guangzhixie Sep 7, 2018
c1c061c
Add namespace
guangzhixie Sep 7, 2018
a8a83a6
revert namespace first
guangzhixie Sep 7, 2018
156dfa8
switch for bm kernel op
guangzhixie Sep 7, 2018
27dbdda
switch for bm kernel op
guangzhixie Sep 7, 2018
afc072a
Use enum for bm op type
guangzhixie Sep 7, 2018
6ce0724
Add BM conv implementation
guangzhixie Sep 7, 2018
10ec524
update bm bin path
guangzhixie Sep 10, 2018
eff245e
host bm kernel bin at bm root system directory
guangzhixie Sep 10, 2018
25b2f1a
Merge branch 'dev_master' into bmk_conv
guangzhixie Sep 10, 2018
62301c6
Update bm kernel bin path
guangzhixie Sep 10, 2018
ed296e3
Merge remote-tracking branch 'upstream/dev_v2' into dev_v2
guangzhixie Sep 10, 2018
256dd90
Merge branch 'dev_v2' into dev_master
guangzhixie Sep 10, 2018
c5ddf4b
Merge branch 'dev_master' into bmk_conv
guangzhixie Sep 10, 2018
b957a81
Cleanup after merge
guangzhixie Sep 11, 2018
8d0b900
Merge branch 'dev_master' into bmk_conv
guangzhixie Sep 11, 2018
59476a1
comment out configure_file first
guangzhixie Sep 11, 2018
b07a5dc
Comment out code with issue
guangzhixie Sep 11, 2018
895d0e2
Merge branch 'dev_master' into bmk_conv
guangzhixie Sep 11, 2018
a0a9618
uncomment bm conv test
guangzhixie Sep 11, 2018
7beca13
test
guangzhixie Sep 13, 2018
0e45e32
Revert "test"
guangzhixie Sep 13, 2018
c3ac7c9
Fix BM bin compilation issue
guangzhixie Sep 13, 2018
3e7048d
Fix issue for BM bin
guangzhixie Sep 14, 2018
bafff06
Merge branch 'dev_master' into bmk_conv
guangzhixie Sep 14, 2018
179d6f4
scripting permission
guangzhixie Sep 14, 2018
dcd473c
Merge branch 'dev_master' into bmk_conv
guangzhixie Sep 14, 2018
bf6e712
Fix bm bin compilation issue
guangzhixie Sep 14, 2018
b49e9de
Merge branch 'dev_master' into bmk_conv
guangzhixie Sep 14, 2018
46aa154
BM conv host implementation
guangzhixie Sep 18, 2018
60decef
BM conv device implementation
guangzhixie Sep 19, 2018
73a9326
Refactor
guangzhixie Sep 19, 2018
e72aa26
Update to new version to bm kernel APIs
guangzhixie Oct 1, 2018
81467b2
Update BM Kernel dependencies
guangzhixie Oct 1, 2018
015cd28
Implement conv with new version of BM Kernel
guangzhixie Oct 1, 2018
6473856
Remove redundancy
guangzhixie Oct 1, 2018
e89f3d5
Refactor
guangzhixie Oct 1, 2018
97afe14
refactor
guangzhixie Oct 5, 2018
e9f6bb6
refactor
guangzhixie Oct 8, 2018
9b63a03
Refactor according to new APIs
guangzhixie Oct 11, 2018
a8cc6c5
merge from upstream
guangzhixie Oct 11, 2018
b42ca19
revert bm conv
guangzhixie Oct 11, 2018
2a1c06a
resolve permission issue
guangzhixie Oct 23, 2018
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions cmake/compiler_options.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,9 @@ anakin_add_compile_option(-Wshadow)
anakin_add_compile_option(-fpermissive)
anakin_add_compile_option(-Wsign-promo)
anakin_add_compile_option(-fdiagnostics-show-option)
if(USE_BM_PLACE)
anakin_add_compile_option(-lbmlib-asic)
endif()

if(ENABLE_NOISY_WARNINGS)
anakin_add_compile_option(-Wcast-align)
Expand Down
38 changes: 18 additions & 20 deletions cmake/find_modules.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -357,24 +357,22 @@ macro(anakin_find_openmp)
endmacro()

macro(anakin_find_bmlib)
find_path(BM_ROOT include/bmdnn/bmdnn_api.h ${CMAKE_SOURCE_DIR}/third-party/bm_lib/ $ENV{BM_ROOT}/)
find_path(BM_ROOT_INCLUDE_DNN bmdnn_api.h ${BM_ROOT}/include/bmdnn)
find_path(BM_ROOT_INCLUDE_RT bmruntime.h ${BM_ROOT}/include/bmruntime)
find_path(BM_ROOT_INCLUDE_LIB bmlib_runtime.h ${BM_ROOT}/include/bmlib)
if(BM_ROOT_INCLUDE_DNN AND BM_ROOT_INCLUDE_RT AND BM_ROOT_INCLUDE_LIB)
set(BM_FOUND TRUE)
endif()
if(BM_FOUND)
message(STATUS " Found bm_lib in ${BM_ROOT} ${BM_ROOT_INCLUDE_DNN} ${BM_ROOT_INCLUDE_RT} ${BM_ROOT_INCLUDE_LIB}")
include_directories(${BM_ROOT_INCLUDE_DNN})
include_directories(${BM_ROOT_INCLUDE_RT})
include_directories(${BM_ROOT_INCLUDE_LIB})
set(BM_LIBRARIES "")
list(APPEND BM_LIBRARIES ${BM_ROOT}/lib/app/libbmdnn_device.so)
list(APPEND BM_LIBRARIES ${BM_ROOT}/lib/app/libbmlib_device.so)
list(APPEND BM_LIBRARIES ${BM_ROOT}/lib/app/libbmrt.so)
list(APPEND ANAKIN_LINKER_LIBS ${BM_LIBRARIES})
else()
message(FATAL_ERROR "Could not found bm_lib")
endif()
find_path(BM_ROOT include/bmlib/bmlib_runtime.h /usr/local/include/bm/ $ENV{BM_ROOT}/)
if(BM_ROOT)
set(BM_FOUND TRUE)
endif()
if(BM_FOUND)
message(STATUS " Found bm_lib in ${BM_ROOT}")
anakin_fetch_include_recursively(${BM_ROOT}/include)
set(BM_LIBRARIES "")
list(APPEND BM_LIBRARIES ${BM_ROOT}/lib/device/bmlib.a)
list(APPEND BM_LIBRARIES ${BM_ROOT}/lib/device/common-arm.a)
list(APPEND BM_LIBRARIES ${BM_ROOT}/lib/device/common.a)
list(APPEND BM_LIBRARIES ${BM_ROOT}/lib/device/fw-arm.a)
list(APPEND BM_LIBRARIES ${BM_ROOT}/lib/device/fw-top.a)
list(APPEND BM_LIBRARIES ${BM_ROOT}/lib/device/libbmlib-asic.so)
list(APPEND ANAKIN_LINKER_LIBS ${BM_LIBRARIES})
else()
message(FATAL_ERROR "Could not found bm_lib")
endif()
endmacro()
19 changes: 15 additions & 4 deletions framework/core/net/worker.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -106,15 +106,16 @@ Worker<Ttype, Ptype, RunType>::sync_prediction(std::vector<Tensor4d<typename tar
d_tensor_in_p->reshape(ins[i].valid_shape());
d_tensor_in_p->copy_from(ins[i]);
d_tensor_in_p->set_seq_offset(ins[i].get_seq_offset());
}
}
#ifdef NVIDIA_GPU
Context<NV> ctx(0, 0, 0);
saber::SaberTimer<NV> my_time;
my_time.start(ctx);
#ifdef ENABLE_OP_TIMER
Context<NV> ctx(0, 0, 0);
saber::SaberTimer<NV> my_time;
my_time.start(ctx);
#endif
#endif // ENABLE_OP_TIMER
net.prediction();

my_time.end(ctx);
Expand All @@ -127,7 +128,8 @@ Worker<Ttype, Ptype, RunType>::sync_prediction(std::vector<Tensor4d<typename tar
_thead_id_to_prediction_times_vec_in_ms[std::this_thread::get_id()].push_back(my_time.get_average_ms());
LOG(ERROR) << " exec << time: " << my_time.get_average_ms() << " ms ";
}
#endif
#endif // ENABLE_OP_TIMER
#endif // NVIDIA_GPU
// get outputs of graph
std::vector<Tensor4d<typename target_host<Ttype>::type>> ret;
ret.resize(_outputs_in_order.size());
Expand Down Expand Up @@ -226,6 +228,16 @@ template class Worker<NV, Precision::FP16, OpRunType::SYNC>;
template class Worker<NV, Precision::INT8, OpRunType::SYNC>;
#endif

#ifdef AMD_GPU
template class Worker<AMD, Precision::FP32, OpRunType::ASYNC>;
template class Worker<AMD, Precision::FP16, OpRunType::ASYNC>;
template class Worker<AMD, Precision::INT8, OpRunType::ASYNC>;

template class Worker<AMD, Precision::FP32, OpRunType::SYNC>;
template class Worker<AMD, Precision::FP16, OpRunType::SYNC>;
template class Worker<AMD, Precision::INT8, OpRunType::SYNC>;
#endif

#ifdef USE_X86_PLACE
template class Worker<X86, Precision::FP32, OpRunType::ASYNC>;
template class Worker<X86, Precision::FP16, OpRunType::ASYNC>;
Expand Down Expand Up @@ -256,4 +268,3 @@ template class Worker<ARM, Precision::INT8, OpRunType::SYNC>;
#endif

} /* namespace */

25 changes: 25 additions & 0 deletions saber/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,31 @@ if(USE_CUDA)
${WHOLE_ARCHIVE_END})
endif()

if(USE_BM_PLACE)
set(BIN_NAME bmkernel_bin)
set(LINK_CONFIG link/bm1682_ddr.lds)
add_custom_command(OUTPUT bm_kernel_tmp
COMMAND arm-none-eabi-gcc ${ANAKIN_SABER}/funcs/impl/bm/device/bmkernel_base.c -mcpu=arm926ej-s -mfpu=vfp -fno-short-enums -std=gnu99 -O2 -Wall -Werror -ffunction-sections -fdata-sections -nostdlib -DENABLE_PRINT -I${BM_ROOT}/include/config -I${BM_ROOT}/include/common -I${BM_ROOT}/include/c_model -I${BM_ROOT}/include/firmware_core -I${BM_ROOT}/include/bmlib -c -o ${BIN_NAME}.o
COMMAND arm-none-eabi-gcc -T ${BM_ROOT}/${LINK_CONFIG} -mcpu=arm926ej-s -mfpu=vfp -fno-short-enums -Wl,--check-sections -Wl,--gc-sections -Wl,--unresolved-symbols=report-all -Wl,--no-enum-size-warning -o ${BIN_NAME}.elf -Wl,--start-group -lc -lm ${BIN_NAME}.o ${BM_ROOT}/lib/device/fw-top.a ${BM_ROOT}/lib/device/fw-arm.a -Wl,--end-group
COMMAND arm-none-eabi-objcopy -O binary -R *.slow* ${BIN_NAME}.elf ${BIN_NAME}_itcm.bin
COMMAND hexdump -v -e '1/4 \"%08x\\n\"' ${BIN_NAME}_itcm.bin > ${BIN_NAME}_itcm.hex.sim
COMMAND arm-none-eabi-objcopy -O binary -j *.slow* ${BIN_NAME}.elf ${BIN_NAME}_ddr.bin
COMMAND hexdump -v -e '1/4 \"%08x\\n\"' ${BIN_NAME}_ddr.bin > ${BIN_NAME}_ddr.hex.sim
COMMAND printf "%x" 0xAABBCCDD > ${BIN_NAME}.bin
COMMAND printf "%x" 0x0 >> ${BIN_NAME}.bin
COMMAND printf "%x" 0x0 >> ${BIN_NAME}.bin
COMMAND printf "%x" 0x0 >> ${BIN_NAME}.bin

COMMAND printf \"%x\" `wc -c < ${BIN_NAME}_itcm.hex.sim` >> ${BIN_NAME}.bin

COMMAND cat ${BIN_NAME}_itcm.hex.sim >> ${BIN_NAME}.bin
COMMAND cat ${BIN_NAME}_ddr.hex.sim >> ${BIN_NAME}.bin
COMMAND cp ${ANAKIN_ROOT}/build/saber/bmkernel_bin.bin /var/tmp/${BIN_NAME}.bin
COMMENT "BM Kernel compilation..."
)
add_custom_target(ANAKIN ALL DEPENDS bm_kernel_tmp)
endif()

# add saber library to static
if(UNIX OR APPLE)
if (USE_ARM_PLACE)
Expand Down
4 changes: 1 addition & 3 deletions saber/core/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -179,11 +179,9 @@ const char* cudnn_get_errorstring(cudnnStatus_t status);
#ifdef USE_BM_PLACE

#include "bmlib_runtime.h"
#include "bmdnn_api.h"
#include "bmdnn_ext_api.h"
#include "bmlib_utils.h"

#define BMDNN_CHECK(condition) \
#define BM_CHECK(condition) \
do { \
bm_status_t error = condition; \
CHECK_EQ(error, BM_SUCCESS) << " Failed with error code:" << error; \
Expand Down
12 changes: 6 additions & 6 deletions saber/core/context.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ class Context final{
* @param compute_stream_id
*/
Context(int device_id = 0, int data_stream_id = 0, int compute_stream_id = 0){
#ifdef USE_BM
#ifdef USE_BM_PLACE
if(std::is_same<TargetType, BM>::value){
LOG(INFO) << "context init for BM";
int dev_count = 0;
Expand Down Expand Up @@ -69,7 +69,7 @@ class Context final{
}

Context(const Context<TargetType>& ctx){
#ifdef USE_BM
#ifdef USE_BM_PLACE
if(std::is_same<TargetType, BM>::value){
LOG(INFO) << "context init for BM";
_bm_handle = ctx._bm_handle;
Expand Down Expand Up @@ -98,7 +98,7 @@ class Context final{
this->_act_ids = ctx._act_ids;
this->_mode = ctx._mode;
#endif
#ifdef USE_BM
#ifdef USE_BM_PLACE
this->_bm_handle = ctx._bm_handle;
#endif
return *this;
Expand All @@ -109,7 +109,7 @@ class Context final{
comp_eq = comp_eq && (_device_id == right._device_id);
comp_eq = comp_eq && (_data_stream_id == right._data_stream_id);
comp_eq = comp_eq && (_compute_stream_id == right._compute_stream_id);
#ifdef USE_BM
#ifdef USE_BM_PLACE
comp_eq = comp_eq && (_bm_handle == right._bm_handle);
#endif
return comp_eq;
Expand Down Expand Up @@ -151,7 +151,7 @@ class Context final{
//std::vector<int> get_act_ids();
#endif

#ifdef USE_BM
#ifdef USE_BM_PLACE
bm_handle_t get_handle() {
return _bm_handle;
}
Expand All @@ -170,7 +170,7 @@ class Context final{
PowerMode _mode{SABER_POWER_HIGH};
std::vector<int> _act_ids{0};
#endif
#ifdef USE_BM
#ifdef USE_BM_PLACE
bm_handle_t _bm_handle;
#endif
};
Expand Down
1 change: 0 additions & 1 deletion saber/core/data_traits.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@

#ifdef USE_BM_PLACE
#include "bmlib_runtime.h"
#include "bmdnn_api.h"
#include "bmlib_utils.h"
#endif

Expand Down
22 changes: 11 additions & 11 deletions saber/core/impl/bm/bm_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,19 +45,19 @@ typedef TargetWrapper<BM, __device_target> BM_API;

// Init handle only once in the lifetime
static bm_handle_t handle;
static bm_status_t init_handle{bmdnn_init(&handle)};
static bm_status_t init_handle{bmlib_kernel_init(&handle)};

bm_handle_t BM_API::get_handle() {
return handle;
};

void BM_API::get_device_count(int& count) {
BMDNN_CHECK(bm_dev_getcount(&count));
BM_CHECK(bm_dev_getcount(&count));
}

void BM_API::set_device(int id) {
//(bm_handle_t &handle, bool bmkernel_used, int id){
//BMDNN_CHECK(bm_dev_request(&handle, 0, id));
//BM_CHECK(bm_dev_request(&handle, 0, id));
}

//TODO: Do we have this functionality?
Expand All @@ -69,39 +69,39 @@ void BM_API::mem_alloc(TPtr* ptr, size_t n) {
/* bm_device_mem_t *mem = reinterpret_cast<struct bm_mem_desc *>(*ptr); */
// bm_device_mem_t *mem = new bm_device_mem_t();
bm_device_mem_t mem;
BMDNN_CHECK(bm_malloc_device_byte(handle, &mem, n));
BM_CHECK(bm_malloc_device_byte(handle, &mem, n));
*ptr = TPtr(mem);
}

void BM_API::mem_free(TPtr ptr) {
if ((ptr != BM_MEM_NULL)) {
if (bm_mem_get_type(ptr) == BM_MEM_TYPE_SYSTEM) {
bm_free_device(handle, ptr);
// delete ptr;
}
}

void BM_API::mem_set(TPtr ptr, int value, size_t n) {
//(bm_handle_t handle, const int value, bm_device_mem_t mem){
BMDNN_CHECK(bm_memset_device(handle, value, ptr));
BM_CHECK(bm_memset_device(handle, value, ptr));
//bm_device_mem_t* pmem = (struct bm_mem_desc *)(ptr);
//BMDNN_CHECK(bm_memset_device(handle, value, *pmem));
//BM_CHECK(bm_memset_device(handle, value, *pmem));
}

void BM_API::sync_memcpy(TPtr dst, size_t dst_offset, int dst_id, \
const TPtr src, size_t src_offset, int src_id, \
size_t count, __DtoD) {
if(count==0)
return;
//BMDNN_CHECK(bm_memcpy_d2d(handle, bm_mem_from_device(dst), dst_id, bm_mem_from_device(src), src_id, count));
BMDNN_CHECK(bm_memcpy_d2d(handle, dst, dst_offset, src, src_offset, count));
//BM_CHECK(bm_memcpy_d2d(handle, bm_mem_from_device(dst), dst_id, bm_mem_from_device(src), src_id, count));
BM_CHECK(bm_memcpy_d2d(handle, dst, dst_offset, src, src_offset, count));
};

void BM_API::sync_memcpy(TPtr dst, size_t dst_offset, int dst_id, \
const void* src, size_t src_offset, int src_id, \
size_t count, __HtoD) {
if(count==0)
return;
BMDNN_CHECK(bm_memcpy_s2d(handle, dst+dst_offset, bm_mem_from_system(const_cast<void*>(src)+src_offset)));
BM_CHECK(bm_memcpy_s2d(handle, dst+dst_offset, bm_mem_from_system(const_cast<void*>(src)+src_offset)));

#ifdef DEBUG

Expand All @@ -118,7 +118,7 @@ void BM_API::sync_memcpy(void* dst, size_t dst_offset, int dst_id, \
if(count==0)
return;
// LOG(INFO)<<"host ptr = "<<(dst)<<",dst_offset = "<<dst_offset<<", dev ptr = "<<(src)<<",dev offset = "<<src_offset;
BMDNN_CHECK(bm_memcpy_d2s(handle, bm_mem_from_system(dst+dst_offset), src+src_offset));
BM_CHECK(bm_memcpy_d2s(handle, bm_mem_from_system(dst+dst_offset), src+src_offset));

#ifdef DEBUG

Expand Down
20 changes: 13 additions & 7 deletions saber/core/impl/bm/tensor_op_bm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,29 +6,33 @@ namespace saber {
template<>
void fill_tensor_const<BM>(Tensor<BM>& tensor, float value,
typename Tensor<BM>::API::stream_t stream = NULL) {
Tensor<X86> temp_tensor(tensor.valid_shape(),tensor.get_dtype());
Tensor<X86> temp_tensor(tensor.shape(), tensor.get_dtype());
temp_tensor.set_shape(tensor.valid_shape());
fill_tensor_const(temp_tensor, value);
tensor.copy_from(temp_tensor);
}
template<>
void fill_tensor_rand<BM>(Tensor<BM>& tensor, typename Tensor<BM>::API::stream_t stream = NULL) {
Tensor<X86> temp_tensor(tensor.valid_shape(),tensor.get_dtype());
Tensor<X86> temp_tensor(tensor.shape(), tensor.get_dtype());
temp_tensor.set_shape(tensor.valid_shape());
fill_tensor_rand(temp_tensor);
tensor.copy_from(temp_tensor);
}

template<>
void fill_tensor_rand<BM>(Tensor<BM>& tensor, float vstart, float vend,
typename Tensor<BM>::API::stream_t stream = NULL) {
Tensor<X86> temp_tensor(tensor.valid_shape(),tensor.get_dtype());
Tensor<X86> temp_tensor(tensor.shape(), tensor.get_dtype());
temp_tensor.set_shape(tensor.valid_shape());
fill_tensor_rand(temp_tensor, vstart, vend);
tensor.copy_from(temp_tensor);
}

template<>
void print_tensor<BM>(Tensor<BM>& tensor, typename Tensor<BM>::API::stream_t stream = NULL) {
LOG(INFO) << "device tensor data";
Tensor<X86> temp_tensor(tensor.valid_shape(),tensor.get_dtype());
LOG(INFO) << "BM device tensor data:";
Tensor<X86> temp_tensor(tensor.shape(), tensor.get_dtype());
temp_tensor.set_shape(tensor.valid_shape());
temp_tensor.copy_from(tensor);
print_tensor(temp_tensor);
}
Expand All @@ -41,15 +45,17 @@ void print_tensor_valid<BM>(Tensor<BM>& tensor, typename Tensor<BM>::API::stream

template<>
double tensor_mean_value<BM>(Tensor<BM>& tensor, typename Tensor<BM>::API::stream_t stream = NULL) {
Tensor<X86> temp_tensor(tensor.valid_shape(),tensor.get_dtype());
Tensor<X86> temp_tensor(tensor.shape(), tensor.get_dtype());
temp_tensor.set_shape(tensor.valid_shape());
temp_tensor.copy_from(tensor);
return tensor_mean_value(temp_tensor);
}

template<>
double tensor_mean_value_valid<BM>(Tensor<BM>& tensor,
typename Tensor<BM>::API::stream_t stream = NULL) {
Tensor<X86> temp_tensor(tensor.valid_shape(),tensor.get_dtype());
Tensor<X86> temp_tensor(tensor.shape(), tensor.get_dtype());
temp_tensor.set_shape(tensor.valid_shape());
temp_tensor.copy_from(tensor);
return tensor_mean_value(temp_tensor);
}
Expand Down
2 changes: 1 addition & 1 deletion saber/core/target_wrapper.h
Original file line number Diff line number Diff line change
Expand Up @@ -450,7 +450,7 @@ struct TargetWrapper<BM, __device_target> {
static bm_handle_t get_handle();

};
#endif //USE_BM
#endif //USE_BM_PLACE

#ifdef AMD_GPU

Expand Down
Loading