nnstreamer · DonghakPark · Aug 31, 2023 · Aug 31, 2023 · Aug 31, 2023 · Aug 1, 2024
@@ -25,11 +25,18 @@ jobs:
       run: sudo apt-get update && sudo apt-get install -y gcc g++ pkg-config libopenblas-dev libiniparser-dev libjsoncpp-dev libcurl3-dev tensorflow2-lite-dev nnstreamer-dev libglib2.0-dev libgstreamer1.0-dev libgtest-dev ml-api-common-dev flatbuffers-compiler ml-inference-api-dev libunwind-dev
     - name: install additional packages for features
       run: sudo apt-get install -y python3-dev python3-numpy python3
+    - name: gcc version change
+      run: |
+        sudo add-apt-repository ppa:ubuntu-toolchain-r/test
+        sudo apt-get install build-essential
+        sudo apt update
+        sudo apt install -y gcc-13
+        sudo apt install -y g++-13
+        sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-13 1000 
+        sudo update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-13 1000
+        sudo update-alternatives --set gcc /usr/bin/gcc-13
     - name: install build systems
       run: sudo apt install meson ninja-build
-    - run: meson setup build/
-      env:
-        CC: gcc
     - run: |
         meson \
           --buildtype=plain \

@@ -15,4 +15,4 @@ e = executable('knn_sample',
   install_dir: application_install_dir
 )
 
-test('app_knn', e, args: [nntr_app_resdir / 'KNN'])
+test('app_knn', e, args: [nntr_app_resdir / 'KNN/'])
@@ -0,0 +1,27 @@
+# Multi_Input example
+
+- This example demonstrates how to use the `multi_input` layer. 
+- The NNTrainer supports a network that takes multiple tensors as inputs. 
+- Users can create multiple `input` layers for the network with their own names and build the network accordingly. 
+- This code includes an example of training with...
+
+```
+                       +-----------+
+                       |  output   |
+                       +-----------+
+                              |                  
+    +---------------------------------------------------+  
+    |                      flatten                      |
+    +---------------------------------------------------+  
+                              |                   
+    +---------------------------------------------------+  
+    |                      concat0                      |
+    +---------------------------------------------------+  
+        |                     |                  |
+    +-----------+       +-----------+       +-----------+  
+    |  input 2  |       |  input 1  |       |  input 0  |  
+    +-----------+       +-----------+       +-----------+   
+
+```
+
+- **[Note]** Users should feed the multi-input in reverse order because the model is structured in a reversed manner internally. This is a known issue for us, and we plan to address it soon.
@@ -63,14 +63,18 @@ ModelHandle createMultiInputModel() {
   layers.push_back(createLayer(
     "input", {withKey("name", "input0"), withKey("input_shape", "1:2:2")}));
   layers.push_back(createLayer(
-    "input", {withKey("name", "input1"), withKey("input_shape", "1:2:2")}));
+    "input", {withKey("name", "input1"), withKey("input_shape", "1:4:2")}));
   layers.push_back(createLayer(
-    "input", {withKey("name", "input2"), withKey("input_shape", "1:2:2")}));
+    "input", {withKey("name", "input2"), withKey("input_shape", "1:8:2")}));
 
   layers.push_back(
-    createLayer("concat", {withKey("name", "concat0"), withKey("axis", "3"),
+    createLayer("concat", {withKey("name", "concat0"), withKey("axis", "2"),
                            withKey("input_layers", "input0, input1, input2")}));
 
+  layers.push_back(
+    createLayer("flatten", {withKey("name", "flatten0"),
+                            withKey("input_layers", "concat0")}));
+
   layers.push_back(createLayer(
     "fully_connected", {withKey("unit", 5), withKey("activation", "softmax")}));
 
@@ -123,16 +127,16 @@ std::array<UserDataType, 1>
 createFakeMultiDataGenerator(unsigned int batch_size,
                              unsigned int simulated_data_size) {
   UserDataType train_data(new nntrainer::util::MultiDataLoader(
-    {{batch_size, 1, 2, 2}, {batch_size, 1, 2, 2}, {batch_size, 1, 2, 2}},
+    {{batch_size, 1, 2, 2}, {batch_size, 1, 4, 2}, {batch_size, 1, 8, 2}},
     {{batch_size, 1, 1, 5}}, simulated_data_size));
 
   return {std::move(train_data)};
 }
 
 int main(int argc, char *argv[]) {
-  unsigned int total_data_size = 16;
-  unsigned int batch_size = 2;
-  unsigned int epoch = 2;
+  unsigned int total_data_size = 32;
+  unsigned int batch_size = 4;
+  unsigned int epoch = 10;
 
   std::array<UserDataType, 1> user_datas;
 

@@ -78,10 +78,11 @@ void MultiDataLoader::next(float **input, float **label, bool *last) {
   };
 
   float **cur_input_tensor = input;
+  const auto num_input = input_shapes.size() - 1;
   for (unsigned int i = 0; i < input_shapes.size(); ++i) {
-    fill_input(*cur_input_tensor, input_shapes.at(i).getFeatureLen(),
-               indicies[count]);
-    cur_input_tensor++;
+    fill_input(*cur_input_tensor,
+               input_shapes.at(num_input - i).getFeatureLen(), indicies[count]);
+    ++cur_input_tensor;
   }
 
   float **cur_label_tensor = label;

@@ -131,6 +131,11 @@ class Layer {
    */
   virtual const std::string getType() const = 0;
 
+  /**
+   * @brief Initialize layer
+   */
+  virtual void initialize() = 0;
+
   /**
    * @brief     Default allowed properties
    * - input shape : string
@@ -261,7 +266,8 @@ createLayer(const LayerType &type,
  */
 std::unique_ptr<Layer>
 createLayer(const std::string &type,
-            const std::vector<std::string> &properties = {});
+            const std::vector<std::string> &properties = {},
+            const LayerComputeEngine &compute_engine = LayerComputeEngine::CPU);
 
 /**
  * @brief General Layer Factory function to register Layer

@@ -136,7 +136,7 @@ class Model {
    * @retval #ML_ERROR_NONE Successful.
    * @retval #ML_ERROR_INVALID_PARAMETER invalid parameter.
    */
-  virtual int compile() = 0;
+  virtual int compile(ExecutionMode exec_mode_ = ExecutionMode::TRAIN) = 0;
 
   /**
    * @brief     Initialize Network. This should be called after setting the
@@ -188,13 +188,14 @@ class Model {
    * @details   This function accepts vector of properties in the format -
    *  { std::string property_name, void * property_val, ...}
    */
-  virtual int train(const std::vector<std::string> &values = {},
-                    std::function<bool(void *)> stop_cb =
-                      [](void *stop_user_data) { return false; },
-                    void *stop_user_data = nullptr,
-                    std::function<void(void *)> epoch_complete_cb =
-                      [](void *epoch_user_data) { return false; },
-                    void *epoch_user_data = nullptr) = 0;
+  virtual int train(
+    const std::vector<std::string> &values = {},
+    std::function<bool(void *)> stop_cb =
+      [](void *stop_user_data) { return false; },
+    void *stop_user_data = nullptr,
+    std::function<void(void *)> epoch_complete_cb =
+      [](void *epoch_user_data) { return false; },
+    void *epoch_user_data = nullptr) = 0;
 
   /**
    * @brief     Run Model train with callback function by user

@@ -40,8 +40,9 @@ std::unique_ptr<Layer> createLayer(const LayerType &type,
  * @brief Factory creator with constructor for layer
  */
 std::unique_ptr<Layer> createLayer(const std::string &type,
-                                   const std::vector<std::string> &properties) {
-  return nntrainer::createLayerNode(type, properties);
+                                   const std::vector<std::string> &properties,
+                                   const LayerComputeEngine &compute_engine) {
+  return nntrainer::createLayerNode(type, properties, compute_engine);
 }
 
 std::unique_ptr<Optimizer>

@@ -17,13 +17,16 @@
 /usr/include/nntrainer/blas_interface.h
 /usr/include/nntrainer/var_grad.h
 /usr/include/nntrainer/weight.h
+/usr/include/nntrainer/blas_avx.h
 # todo: update dataset headers
 /usr/include/nntrainer/databuffer.h
 /usr/include/nntrainer/databuffer_factory.h
 # layer headers
 /usr/include/nntrainer/layer_context.h
 /usr/include/nntrainer/layer_devel.h
 /usr/include/nntrainer/layer_impl.h
+/usr/include/nntrainer/loss_layer.h
+/usr/include/nntrainer/acti_func.h
 # custom layer kits
 /usr/include/nntrainer/app_context.h
 # logger
@@ -41,3 +44,26 @@
 /usr/include/nntrainer/util_func.h
 /usr/include/nntrainer/fp16.h
 /usr/include/nntrainer/util_simd.h
+# model
+/usr/include/nntrainer/neuralnet.h
+## neuralnet.h : forwarding() / backwarding() support
+/usr/include/nntrainer/compiler_fwd.h 
+/usr/include/nntrainer/dynamic_training_optimization.h
+/usr/include/nntrainer/layer_node.h
+/usr/include/nntrainer/graph_node.h
+/usr/include/nntrainer/model_common_properties.h
+/usr/include/nntrainer/network_graph.h
+/usr/include/nntrainer/graph_core.h
+/usr/include/nntrainer/graph_node.h
+/usr/include/nntrainer/manager.h
+/usr/include/nntrainer/basic_planner.h
+/usr/include/nntrainer/memory_planner.h
+/usr/include/nntrainer/tensor_pool.h
+/usr/include/nntrainer/cache_loader.h
+/usr/include/nntrainer/task.h
+/usr/include/nntrainer/task_executor.h
+/usr/include/nntrainer/cache_pool.h
+/usr/include/nntrainer/cache_elem.h
+/usr/include/nntrainer/memory_pool.h
+/usr/include/nntrainer/swap_device.h
+/usr/include/nntrainer/optimizer_wrapped.h
@@ -68,9 +68,19 @@ warning_c_flags = [
   '-Wno-error=varargs'
 ]
 
+arch = host_machine.cpu_family()
+
+if get_option('enable-avx')
+   extra_defines += '-DUSE_AVX=1'
+   if get_option('platform') == 'tizen'
+      add_project_arguments(['-mavx2'], language: ['c','cpp'])
+   else
+      add_project_arguments(['-march=native'], language: ['c','cpp'])
+   endif
+   message('-march=native added for AVX hardware acceleration.')
+endif
 
 if get_option('enable-fp16')
-   arch = host_machine.cpu_family()
    if get_option('platform') == 'android'
      add_project_arguments('-mfp16-format=ieee', language: ['c', 'cpp'])
      extra_defines += '-DENABLE_FP16=1'
@@ -88,6 +98,7 @@ if get_option('enable-fp16')
      # comaptible with armv8.0 machines.
      if cxx.has_argument('-mfp16-format=ieee')
        add_project_arguments('-mfp16-format=ieee', language: ['c', 'cpp'])
+       add_project_arguments('-march=armv8.2-a+fp16', language: ['c', 'cpp'])
      else
        message ('The compiler does not support -mfp16-format=ieee. However, according to https://gcc.gnu.org/onlinedocs/gcc-9.1.0/gcc/Half-Precision.html, gcc may use IEEE fp16 anyway. Thus, we will proceed without the option for FP16 support.')
      endif
@@ -109,11 +120,6 @@ if get_option('enable-fp16')
      if cc.version().version_compare('>=12.1.0')
        message ('Float16 for x86_64 enabled. Modern gcc-x64 generally supports float16 with _Float16.')
        extra_defines += '-DENABLE_FP16=1'
-       if get_option('enable-avx')
-        extra_defines += '-DUSE_AVX=1'
-        add_project_arguments(['-march=native'], language: ['c','cpp'])
-        message('-march=native added for AVX hardware acceleration.')
-       endif
      else
        warning ('Float16 for x86_64 enabled. However, software emulation is applied for fp16, making it slower and inconsistent. Use GCC 12+ for FP16 support. This build will probably fail unless you bring a compiler that supports fp16 for x64.')
      endif

@@ -41,7 +41,7 @@ option('enable-fp16', type: 'boolean', value: false)
 option('enable-cublas', type: 'boolean', value: false)
 option('enable-openmp', type: 'boolean', value: true)
 option('enable-neon', type: 'boolean', value: false)
-option('enable-avx', type: 'boolean', value: false)
+option('enable-avx', type: 'boolean', value: true)
 option('enable-opencl', type: 'boolean', value: false)
 
 # ml-api dependency (to enable, install capi-inference from github.com/nnstreamer/api )

@@ -3,5 +3,5 @@ if get_option('enable-nnstreamer-tensor-filter').enabled()
   subdir('tensor_filter')
 endif
 if get_option('enable-nnstreamer-tensor-trainer').enabled()
-  subdir('tensor_trainer')
+ subdir('tensor_trainer')
 endif