diff --git a/api/ccapi/include/layer.h b/api/ccapi/include/layer.h index 8bd13e01bc..3740500aa6 100644 --- a/api/ccapi/include/layer.h +++ b/api/ccapi/include/layer.h @@ -131,6 +131,9 @@ class Layer { */ virtual const std::string getType() const = 0; + /** + * @brief Initialize layer + */ virtual void initialize() = 0; /** diff --git a/debian/nntrainer-dev.install b/debian/nntrainer-dev.install index 6390d02b0a..fea2a1b5b5 100644 --- a/debian/nntrainer-dev.install +++ b/debian/nntrainer-dev.install @@ -24,6 +24,7 @@ /usr/include/nntrainer/layer_context.h /usr/include/nntrainer/layer_devel.h /usr/include/nntrainer/layer_impl.h +/usr/include/nntrainer/acti_func.h # custom layer kits /usr/include/nntrainer/app_context.h # logger diff --git a/meson.build b/meson.build index c4d94e8e9b..98e2cae9f6 100644 --- a/meson.build +++ b/meson.build @@ -88,6 +88,7 @@ if get_option('enable-fp16') # comaptible with armv8.0 machines. if cxx.has_argument('-mfp16-format=ieee') add_project_arguments('-mfp16-format=ieee', language: ['c', 'cpp']) + add_project_arguments('-march=armv8.2-a+fp16', language: ['c', 'cpp']) else message ('The compiler does not support -mfp16-format=ieee. However, according to https://gcc.gnu.org/onlinedocs/gcc-9.1.0/gcc/Half-Precision.html, gcc may use IEEE fp16 anyway. Thus, we will proceed without the option for FP16 support.') endif diff --git a/nnstreamer/meson.build b/nnstreamer/meson.build index 9a7735d614..8b42476aef 100644 --- a/nnstreamer/meson.build +++ b/nnstreamer/meson.build @@ -3,5 +3,5 @@ if get_option('enable-nnstreamer-tensor-filter').enabled() subdir('tensor_filter') endif if get_option('enable-nnstreamer-tensor-trainer').enabled() -# subdir('tensor_trainer') + subdir('tensor_trainer') endif diff --git a/nntrainer/layers/layer_context.cpp b/nntrainer/layers/layer_context.cpp index e6753285d7..6748637e01 100644 --- a/nntrainer/layers/layer_context.cpp +++ b/nntrainer/layers/layer_context.cpp @@ -541,9 +541,10 @@ bool RunLayerContext::validate(bool skip_input, bool skip_label) { } else if (val->getVariableRef().getTensorType().data_type == TensorDim::DataType::FP16) { #ifdef ENABLE_FP16 - tensor_map[val->getName()] = val->getVariableRef().getData<_FP16>(); + tensor_map[val->getName()] = + val->getVariableRef().template getData<_FP16>(); tensor_map[val->getGradientName()] = - val->getGradientRef().getData<_FP16>(); + val->getGradientRef().template getData<_FP16>(); #else throw std::invalid_argument("Error: enable-fp16 is not enabled"); #endif diff --git a/nntrainer/layers/layer_context.h b/nntrainer/layers/layer_context.h index b1f297f6ee..2022692874 100644 --- a/nntrainer/layers/layer_context.h +++ b/nntrainer/layers/layer_context.h @@ -438,9 +438,6 @@ class RunLayerContext { d.setDataType(o_t); w = Tensor(d, true); } - unsigned int o_ax = getWeightObject(idx).getOutputAxis(); - - // t_w.dequantize(w, o_ax); return; } diff --git a/nntrainer/layers/layer_devel.h b/nntrainer/layers/layer_devel.h index be02b5bc2f..e59d1e9936 100644 --- a/nntrainer/layers/layer_devel.h +++ b/nntrainer/layers/layer_devel.h @@ -160,6 +160,9 @@ class Layer { */ virtual void finalize(InitLayerContext &context) = 0; + /** + * @brief Initialize the layer + */ virtual void initialize(RunLayerContext &context){}; /** diff --git a/nntrainer/layers/meson.build b/nntrainer/layers/meson.build index c612d8c177..0902adb6e4 100644 --- a/nntrainer/layers/meson.build +++ b/nntrainer/layers/meson.build @@ -51,6 +51,7 @@ layer_headers = [ 'layer_context.h', 'layer_devel.h', 'layer_impl.h', + 'acti_func.h', 'common_properties.h', ] diff --git a/nntrainer/tensor/hgemm/hgemm_pack.cpp b/nntrainer/tensor/hgemm/hgemm_pack.cpp index c19fde6ecd..813a2bbd77 100644 --- a/nntrainer/tensor/hgemm/hgemm_pack.cpp +++ b/nntrainer/tensor/hgemm/hgemm_pack.cpp @@ -367,10 +367,10 @@ void packing_B8(unsigned int K, unsigned int N, const __fp16 *src, unsigned int ldb, const __fp16 *dst) { assert(K != 0 && N != 0 && N % 8 == 0); - for (int i = 0; i < K; i++) { + for (unsigned int i = 0; i < K; i++) { const __fp16 *a_off = src + i * ldb; __fp16 *b_off = (__fp16 *)dst + i * 8; - for (int j = 0; j < N; j += 8) { + for (unsigned int j = 0; j < N; j += 8) { float16x8_t v = vld1q_f16(a_off); a_off += 8; @@ -384,10 +384,10 @@ void packing_B16(unsigned int K, unsigned int N, const __fp16 *src, unsigned int ldb, const __fp16 *dst) { assert(K != 0 && N != 0 && N % 16 == 0); - for (int i = 0; i < K; i++) { + for (unsigned int i = 0; i < K; i++) { const __fp16 *a_off = src + i * ldb; __fp16 *b_off = (__fp16 *)dst + i * 16; - for (int j = 0; j < N; j += 16) { + for (unsigned int j = 0; j < N; j += 16) { float16x8_t v0_7 = vld1q_f16(a_off); float16x8_t v8_15 = vld1q_f16(a_off + 8); a_off += 16; diff --git a/packaging/nntrainer.spec b/packaging/nntrainer.spec index 9e4ff0067e..deaafebd1b 100644 --- a/packaging/nntrainer.spec +++ b/packaging/nntrainer.spec @@ -131,13 +131,13 @@ BuildRequires: tensorflow2-lite-devel BuildRequires: tensorflow2-lite-devel %endif # support_tflite_interpreter -%define enable_nnstreamer_tensor_filter -Denable-nnstreamer-tensor-filter=false -%define enable_nnstreamer_tensor_trainer -Denable-nnstreamer-tensor-trainer=false +%define enable_nnstreamer_tensor_filter -Denable-nnstreamer-tensor-filter=disabled +%define enable_nnstreamer_tensor_trainer -Denable-nnstreamer-tensor-trainer=disabled %if 0%{?nnstreamer_filter} Requires: nnstreamer-nntrainer = %{version}-%{release} BuildRequires: nnstreamer-devel -%define enable_nnstreamer_tensor_filter -Denable-nnstreamer-tensor-filter=true +%define enable_nnstreamer_tensor_filter -Denable-nnstreamer-tensor-filter=enabled %if 0%{?unit_test} %if 0%{tizen_version_major}%{tizen_version_minor} > 60 @@ -151,7 +151,7 @@ BuildRequires: python %if 0%{?nnstreamer_trainer} Requires: nnstreamer-nntrainer = %{version}-%{release} BuildRequires: nnstreamer-devel -%define enable_nnstreamer_tensor_trainer -Denable-nnstreamer-tensor-trainer=true +%define enable_nnstreamer_tensor_trainer -Denable-nnstreamer-tensor-trainer=enabled %endif # nnstreamer_trainer %endif # tizen @@ -413,8 +413,8 @@ meson --buildtype=plain --prefix=%{_prefix} --sysconfdir=%{_sysconfdir} \ %{enable_profile} %{enable_nnstreamer_backbone} %{enable_tflite_backbone} \ %{enable_tflite_interpreter} %{capi_ml_pkg_dep_resolution} \ %{enable_reduce_tolerance} %{configure_subplugin_install_path} %{enable_debug} \ - -Dml-api-support=enabled -Denable-nnstreamer-tensor-filter=enabled \ - -Denable-nnstreamer-tensor-trainer=enabled -Denable-capi=enabled \ + -Dml-api-support=enabled \ + -Denable-capi=enabled \ %{fp16_support} %{neon_support} build ninja -C build %{?_smp_mflags} @@ -565,9 +565,18 @@ cp -r result %{buildroot}%{_datadir}/nntrainer/unittest/ %{_includedir}/nntrainer/util_func.h %{_includedir}/nntrainer/fp16.h %{_includedir}/nntrainer/util_simd.h +# In the current version, Neon SIMD is enabled only when FP16 is enabled with AArch64. +# This may be subject to change in future versions. +%ifarch aarch64 %if 0%{?enable_fp16} %{_includedir}/nntrainer/util_simd_neon.h +%{_includedir}/nntrainer/blas_neon.h +%{_includedir}/nntrainer/hgemm.h +%{_includedir}/nntrainer/hgemm_util.h +%endif %endif +%{_includedir}/nntrainer/acti_func.h + %files devel-static %{_libdir}/libnntrainer*.a