Skip to content

Commit

Permalink
[ cpu_backend ] Refactor blas_interface considering arch-dep
Browse files Browse the repository at this point in the history
1. Substitute `blas_interface.h` to `cpu_backend.h` which has virtual functions of `blas_interface.h`
2. Actual implementations are implemented at `arm_compute_backend`, `x86_compute_backend`, and `fallback`, and they are included considering target cpu architecture. `cblas.h` is used for both of them for fp32 computation.
3. There are some differences (unsupported intrinsics, or dataTypes ) along the versions, and they are managed under each `arm` or `x86` directory.

**Self evaluation:**
1. Build test:     [X]Passed [ ]Failed [ ]Skipped
2. Run test:     [X]Passed [ ]Failed [ ]Skipped

Signed-off-by: skykongkong8 <[email protected]>
  • Loading branch information
skykongkong8 committed Aug 30, 2024
1 parent 6d6e924 commit 738bec4
Show file tree
Hide file tree
Showing 97 changed files with 4,899 additions and 1,999 deletions.
3 changes: 3 additions & 0 deletions Applications/AlexNet/jni/Android.mk
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ NNTRAINER_INCLUDES := $(NNTRAINER_ROOT)/nntrainer \
$(NNTRAINER_ROOT)/nntrainer/compiler \
$(NNTRAINER_ROOT)/nntrainer/optimizers \
$(NNTRAINER_ROOT)/nntrainer/tensor \
$(NNTRAINER_ROOT)/nntrainer/tensor/cpu_backend \
$(NNTRAINER_ROOT)/nntrainer/tensor/cpu_backend/fallback \
$(NNTRAINER_ROOT)/nntrainer/tensor/cpu_backend/arm \
$(NNTRAINER_ROOT)/nntrainer/utils \
$(NNTRAINER_ROOT)/api \
$(NNTRAINER_ROOT)/api/ccapi/include \
Expand Down
10 changes: 4 additions & 6 deletions Applications/Android/NNDetector/app/src/main/jni/Android.mk
Original file line number Diff line number Diff line change
Expand Up @@ -28,17 +28,15 @@ include $(CLEAR_VARS)
NNTRAINER_INCLUDES := $(NNTRAINER_ROOT)/include/nntrainer
SIMPLESHOT_DIR = .


LOCAL_ARM_NEON := true
LOCAL_CFLAGS += -std=c++17 -Ofast -mcpu=cortex-a53 -Ilz4-nougat/lib
LOCAL_LDFLAGS += -Llz4-nougat/lib/obj/local/$(TARGET_ARCH_ABI)/
LOCAL_CXXFLAGS += -std=c++17 -frtti -fexceptions
LOCAL_CFLAGS += -std=c++17 -Ofast -mcpu=cortex-a53 -Ilz4-nougat/lib -DARM=1
LOCAL_LDFLAGS += -Llz4-nougat/lib/obj/local/$(TARGET_ARCH_ABI)/ -DARM=1
LOCAL_CXXFLAGS += -std=c++17 -frtti -fexceptions -fopenmp -static-openmp -DARM=1
LOCAL_CFLAGS += -pthread -fexceptions -fopenmp -static-openmp
LOCAL_LDFLAGS += -fexceptions -fopenmp -static-openmp
LOCAL_MODULE_TAGS := optional
LOCAL_ARM_MODE := arm
LOCAL_MODULE := simpleshot_jni
LOCAL_LDLIBS := -llog -landroid -fopenmp -static-openmp -ljnigraphics
LOCAL_LDLIBS := -llog -landroid -fopenmp -static-openmp -ljnigraphics -DARM=1

LOCAL_SRC_FILES := simpleshot.cpp simpleshot_jni.cpp dataloader.cpp image.cpp
LOCAL_SHARED_LIBRARIES := ccapi-nntrainer nntrainer
Expand Down
3 changes: 3 additions & 0 deletions Applications/Custom/LayerClient/jni/Android.mk
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@ NNTRAINER_INCLUDES := $(NNTRAINER_ROOT)/nntrainer \
$(NNTRAINER_ROOT)/nntrainer/utils \
$(NNTRAINER_ROOT)/nntrainer/optimizers \
$(NNTRAINER_ROOT)/nntrainer/tensor \
$(NNTRAINER_ROOT)/nntrainer/tensor/cpu_backend \
$(NNTRAINER_ROOT)/nntrainer/tensor/cpu_backend/fallback \
$(NNTRAINER_ROOT)/nntrainer/tensor/cpu_backend/arm \
$(NNTRAINER_ROOT)/api \
$(NNTRAINER_ROOT)/api/ccapi/include \
${ML_API_COMMON_INCLUDES}
Expand Down
3 changes: 3 additions & 0 deletions Applications/LLaMA/jni/Android.mk
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ NNTRAINER_INCLUDES := $(NNTRAINER_ROOT)/nntrainer \
$(NNTRAINER_ROOT)/nntrainer/graph \
$(NNTRAINER_ROOT)/nntrainer/optimizers \
$(NNTRAINER_ROOT)/nntrainer/tensor \
$(NNTRAINER_ROOT)/nntrainer/tensor/cpu_backend \
$(NNTRAINER_ROOT)/nntrainer/tensor/cpu_backend/fallback \
$(NNTRAINER_ROOT)/nntrainer/tensor/cpu_backend/arm \
$(NNTRAINER_ROOT)/nntrainer/utils \
$(NNTRAINER_ROOT)/api \
$(NNTRAINER_ROOT)/api/ccapi/include \
Expand Down
3 changes: 3 additions & 0 deletions Applications/Layers/jni/Android.mk
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ NNTRAINER_INCLUDES := $(NNTRAINER_ROOT)/nntrainer \
$(NNTRAINER_ROOT)/nntrainer/compiler \
$(NNTRAINER_ROOT)/nntrainer/optimizers \
$(NNTRAINER_ROOT)/nntrainer/tensor \
$(NNTRAINER_ROOT)/nntrainer/tensor/cpu_backend \
$(NNTRAINER_ROOT)/nntrainer/tensor/cpu_backend/fallback \
$(NNTRAINER_ROOT)/nntrainer/tensor/cpu_backend/arm \
$(NNTRAINER_ROOT)/nntrainer/utils \
$(NNTRAINER_ROOT)/api \
$(NNTRAINER_ROOT)/api/ccapi/include \
Expand Down
3 changes: 3 additions & 0 deletions Applications/Multi_input/jni/Android.mk
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ NNTRAINER_INCLUDES := $(NNTRAINER_ROOT)/nntrainer \
$(NNTRAINER_ROOT)/nntrainer/graph \
$(NNTRAINER_ROOT)/nntrainer/optimizers \
$(NNTRAINER_ROOT)/nntrainer/tensor \
$(NNTRAINER_ROOT)/nntrainer/tensor/cpu_backend \
$(NNTRAINER_ROOT)/nntrainer/tensor/cpu_backend/fallback \
$(NNTRAINER_ROOT)/nntrainer/tensor/cpu_backend/arm \
$(NNTRAINER_ROOT)/nntrainer/utils \
$(NNTRAINER_ROOT)/api \
$(NNTRAINER_ROOT)/api/ccapi/include \
Expand Down
3 changes: 3 additions & 0 deletions Applications/PicoGPT/jni/Android.mk
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ NNTRAINER_INCLUDES := $(NNTRAINER_ROOT)/nntrainer \
$(NNTRAINER_ROOT)/nntrainer/graph \
$(NNTRAINER_ROOT)/nntrainer/optimizers \
$(NNTRAINER_ROOT)/nntrainer/tensor \
$(NNTRAINER_ROOT)/nntrainer/tensor/cpu_backend \
$(NNTRAINER_ROOT)/nntrainer/tensor/cpu_backend/fallback \
$(NNTRAINER_ROOT)/nntrainer/tensor/cpu_backend/arm \
$(NNTRAINER_ROOT)/nntrainer/utils \
$(NNTRAINER_ROOT)/api \
$(NNTRAINER_ROOT)/api/ccapi/include \
Expand Down
3 changes: 3 additions & 0 deletions Applications/ProductRatings/jni/Android.mk
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@ NNTRAINER_INCLUDES := $(NNTRAINER_ROOT)/nntrainer/include \
$(NNTRAINER_ROOT)/nntrainer/layers \
$(NNTRAINER_ROOT)/nntrainer/compiler \
$(NNTRAINER_ROOT)/nntrainer/graph \
$(NNTRAINER_ROOT)/nntrainer/tensor/cpu_backend \
$(NNTRAINER_ROOT)/nntrainer/tensor/cpu_backend/fallback \
$(NNTRAINER_ROOT)/nntrainer/tensor/cpu_backend/arm \
$(NNTRAINER_ROOT)/nntrainer/utils \
$(NNTRAINER_ROOT)/nntrainer/optimizers \
$(NNTRAINER_ROOT)/nntrainer/tensor
Expand Down
3 changes: 3 additions & 0 deletions Applications/ReinforcementLearning/DeepQ/jni/Android.mk
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ NNTRAINER_INCLUDES := $(NNTRAINER_ROOT)/nntrainer \
$(NNTRAINER_ROOT)/nntrainer/graph \
$(NNTRAINER_ROOT)/nntrainer/optimizers \
$(NNTRAINER_ROOT)/nntrainer/tensor \
$(NNTRAINER_ROOT)/nntrainer/tensor/cpu_backend \
$(NNTRAINER_ROOT)/nntrainer/tensor/cpu_backend/fallback \
$(NNTRAINER_ROOT)/nntrainer/tensor/cpu_backend/arm \
$(NNTRAINER_ROOT)/nntrainer/utils \
$(NNTRAINER_ROOT)/api \
$(NNTRAINER_ROOT)/api/ccapi/include \
Expand Down
3 changes: 3 additions & 0 deletions Applications/Resnet/jni/Android.mk
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ NNTRAINER_INCLUDES := $(NNTRAINER_ROOT)/nntrainer \
$(NNTRAINER_ROOT)/nntrainer/graph \
$(NNTRAINER_ROOT)/nntrainer/optimizers \
$(NNTRAINER_ROOT)/nntrainer/tensor \
$(NNTRAINER_ROOT)/nntrainer/tensor/cpu_backend \
$(NNTRAINER_ROOT)/nntrainer/tensor/cpu_backend/fallback \
$(NNTRAINER_ROOT)/nntrainer/tensor/cpu_backend/arm \
$(NNTRAINER_ROOT)/nntrainer/utils \
$(NNTRAINER_ROOT)/api \
$(NNTRAINER_ROOT)/api/ccapi/include \
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ NNTRAINER_INCLUDES := $(NNTRAINER_ROOT)/nntrainer \
$(NNTRAINER_ROOT)/nntrainer/compiler \
$(NNTRAINER_ROOT)/nntrainer/optimizers \
$(NNTRAINER_ROOT)/nntrainer/tensor \
$(NNTRAINER_ROOT)/nntrainer/tensor/cpu_backend \
$(NNTRAINER_ROOT)/nntrainer/tensor/cpu_backend/fallback \
$(NNTRAINER_ROOT)/nntrainer/tensor/cpu_backend/arm \
$(NNTRAINER_ROOT)/nntrainer/utils \
$(NNTRAINER_ROOT)/api \
$(NNTRAINER_ROOT)/api/ccapi/include \
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@ NNTRAINER_INCLUDES := $(NNTRAINER_ROOT)/nntrainer \
$(NNTRAINER_ROOT)/nntrainer/models \
$(NNTRAINER_ROOT)/nntrainer/graph \
$(NNTRAINER_ROOT)/nntrainer/tensor \
$(NNTRAINER_ROOT)/nntrainer/tensor/cpu_backend \
$(NNTRAINER_ROOT)/nntrainer/tensor/cpu_backend/fallback \
$(NNTRAINER_ROOT)/nntrainer/tensor/cpu_backend/arm \
$(NNTRAINER_ROOT)/nntrainer/optimizers \
$(NNTRAINER_ROOT)/nntrainer/utils \
$(NNTRAINER_ROOT)/api \
Expand Down
3 changes: 3 additions & 0 deletions Applications/VGG/jni/Android.mk
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ NNTRAINER_INCLUDES := $(NNTRAINER_ROOT)/nntrainer \
$(NNTRAINER_ROOT)/nntrainer/compiler \
$(NNTRAINER_ROOT)/nntrainer/optimizers \
$(NNTRAINER_ROOT)/nntrainer/tensor \
$(NNTRAINER_ROOT)/nntrainer/tensor/cpu_backend \
$(NNTRAINER_ROOT)/nntrainer/tensor/cpu_backend/fallback \
$(NNTRAINER_ROOT)/nntrainer/tensor/cpu_backend/arm \
$(NNTRAINER_ROOT)/nntrainer/utils \
$(NNTRAINER_ROOT)/api \
$(NNTRAINER_ROOT)/api/ccapi/include \
Expand Down
3 changes: 3 additions & 0 deletions Applications/YOLOv2/jni/Android.mk
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ NNTRAINER_INCLUDES := $(NNTRAINER_ROOT)/nntrainer \
$(NNTRAINER_ROOT)/nntrainer/graph \
$(NNTRAINER_ROOT)/nntrainer/optimizers \
$(NNTRAINER_ROOT)/nntrainer/tensor \
$(NNTRAINER_ROOT)/nntrainer/tensor/cpu_backend \
$(NNTRAINER_ROOT)/nntrainer/tensor/cpu_backend/fallback \
$(NNTRAINER_ROOT)/nntrainer/tensor/cpu_backend/arm \
$(NNTRAINER_ROOT)/nntrainer/utils \
$(NNTRAINER_ROOT)/api \
$(NNTRAINER_ROOT)/api/ccapi/include \
Expand Down
3 changes: 3 additions & 0 deletions Applications/YOLOv3/jni/Android.mk
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ NNTRAINER_INCLUDES := $(NNTRAINER_ROOT)/nntrainer \
$(NNTRAINER_ROOT)/nntrainer/graph \
$(NNTRAINER_ROOT)/nntrainer/optimizers \
$(NNTRAINER_ROOT)/nntrainer/tensor \
$(NNTRAINER_ROOT)/nntrainer/tensor/cpu_backend \
$(NNTRAINER_ROOT)/nntrainer/tensor/cpu_backend/fallback \
$(NNTRAINER_ROOT)/nntrainer/tensor/cpu_backend/arm \
$(NNTRAINER_ROOT)/nntrainer/utils \
$(NNTRAINER_ROOT)/api \
$(NNTRAINER_ROOT)/api/ccapi/include \
Expand Down
5 changes: 4 additions & 1 deletion debian/nntrainer-dev.install
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,10 @@
/usr/include/nntrainer/short_tensor.h
/usr/include/nntrainer/float_tensor.h
/usr/include/nntrainer/tensor_wrap_specs.h
/usr/include/nntrainer/blas_interface.h
usr/include/nntrainer/fallback_internal.h
usr/include/nntrainer/cblas_interface.h
usr/include/nntrainer/x86_compute_backend.h
/usr/include/nntrainer/cpu_backend.h
/usr/include/nntrainer/var_grad.h
/usr/include/nntrainer/weight.h
# todo: update dataset headers
Expand Down
6 changes: 6 additions & 0 deletions jni/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,12 @@ and_conf.set('VERSION_MAJOR', nntrainer_version_split[0])
and_conf.set('VERSION_MINOR', nntrainer_version_split[1])
and_conf.set('VERSION_MICRO', nntrainer_version_split[2])

arch = host_machine.cpu_family()
and_conf.set('ARM', 1)
if arch == 'arm'
and_conf.set('ARMV7', 1)
endif

if get_option('enable-capi').enabled()
and_conf.set('MESON_CAPI_NNTRAINER_SRCS', ' '.join(capi_src))
and_conf.set('MESON_CAPI_NNTRAINER_INCS', ' '.join(capi_inc_abs))
Expand Down
21 changes: 15 additions & 6 deletions meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -68,9 +68,23 @@ warning_c_flags = [
'-Wno-error=varargs'
]

arch = host_machine.cpu_family()
if arch == 'arm' or arch == 'aarch64' or get_option('platform') == 'android'
message('Build for ARM architecture')
extra_defines += '-DARM=1'
if arch == 'arm'
extra_defines += '-DARMV7=1'
endif
elif arch == 'x86' or arch == 'x86_64'
message('Build for X86 architecture')
if get_option('enable-fp16')
add_project_arguments(['-march=native'], language: ['c','cpp'])
message('-march=native added for AVX hardware acceleration.')
endif
extra_defines += '-DX86=1'
endif

if get_option('enable-fp16')
arch = host_machine.cpu_family()
if get_option('platform') == 'android'
add_project_arguments('-mfp16-format=ieee', language: ['c', 'cpp'])
extra_defines += '-DENABLE_FP16=1'
Expand Down Expand Up @@ -110,11 +124,6 @@ if get_option('enable-fp16')
if cc.version().version_compare('>=12.1.0')
message ('Float16 for x86_64 enabled. Modern gcc-x64 generally supports float16 with _Float16.')
extra_defines += '-DENABLE_FP16=1'
if get_option('enable-avx')
extra_defines += '-DUSE_AVX=1'
add_project_arguments(['-march=native'], language: ['c','cpp'])
message('-march=native added for AVX hardware acceleration.')
endif
else
warning ('Float16 for x86_64 enabled. However, software emulation is applied for fp16, making it slower and inconsistent. Use GCC 12+ for FP16 support. This build will probably fail unless you bring a compiler that supports fp16 for x64.')
endif
Expand Down
2 changes: 1 addition & 1 deletion nntrainer/layers/acti_func.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@
#define __ACTI_FUNC_H__
#ifdef __cplusplus

#include <blas_interface.h>
#include <common_properties.h>
#include <cpu_backend.h>

namespace nntrainer {

Expand Down
5 changes: 2 additions & 3 deletions nntrainer/layers/activation_layer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@
#include <vector>

#include <activation_layer.h>
#include <blas_interface.h>
#include <common_properties.h>
#include <cpu_backend.h>
#include <layer_context.h>
#include <nntrainer_error.h>
#include <nntrainer_log.h>
Expand All @@ -32,8 +32,7 @@

namespace nntrainer {
ActivationLayer::ActivationLayer() :
Layer(),
activation_props(new PropTypes(props::Activation())) {
Layer(), activation_props(new PropTypes(props::Activation())) {
acti_func.setActiFunc(ActivationType::ACT_NONE);
}

Expand Down
2 changes: 1 addition & 1 deletion nntrainer/layers/conv2d_layer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@
#include <limits>
#include <string>

#include <blas_interface.h>
#include <conv2d_layer.h>
#include <cpu_backend.h>
#include <layer_context.h>
#include <lazy_tensor.h>
#include <nntr_threads.h>
Expand Down
Loading

0 comments on commit 738bec4

Please sign in to comment.