nnstreamer · jijoongmoon · Nov 11, 2024 · Aug 31, 2023 · Aug 31, 2023 · Aug 31, 2023
@@ -15,4 +15,4 @@ e = executable('knn_sample',
   install_dir: application_install_dir
 )
 
-test('app_knn', e, args: [nntr_app_resdir / 'KNN'])
+test('app_knn', e, args: [nntr_app_resdir / 'KNN/'])
@@ -136,7 +136,7 @@ class Model {
    * @retval #ML_ERROR_NONE Successful.
    * @retval #ML_ERROR_INVALID_PARAMETER invalid parameter.
    */
-  virtual int compile() = 0;
+  virtual int compile(ExecutionMode exec_mode_ = ExecutionMode::TRAIN) = 0;
 
   /**
    * @brief     Initialize Network. This should be called after setting the
@@ -188,13 +188,14 @@ class Model {
    * @details   This function accepts vector of properties in the format -
    *  { std::string property_name, void * property_val, ...}
    */
-  virtual int train(const std::vector<std::string> &values = {},
-                    std::function<bool(void *)> stop_cb =
-                      [](void *stop_user_data) { return false; },
-                    void *stop_user_data = nullptr,
-                    std::function<void(void *)> epoch_complete_cb =
-                      [](void *epoch_user_data) { return false; },
-                    void *epoch_user_data = nullptr) = 0;
+  virtual int train(
+    const std::vector<std::string> &values = {},
+    std::function<bool(void *)> stop_cb =
+      [](void *stop_user_data) { return false; },
+    void *stop_user_data = nullptr,
+    std::function<void(void *)> epoch_complete_cb =
+      [](void *epoch_user_data) { return false; },
+    void *epoch_user_data = nullptr) = 0;
 
   /**
    * @brief     Run Model train with callback function by user

@@ -17,6 +17,7 @@
 /usr/include/nntrainer/blas_interface.h
 /usr/include/nntrainer/var_grad.h
 /usr/include/nntrainer/weight.h
+/usr/include/nntrainer/blas_avx.h
 # todo: update dataset headers
 /usr/include/nntrainer/databuffer.h
 /usr/include/nntrainer/databuffer_factory.h
@@ -26,6 +27,7 @@
 /usr/include/nntrainer/layer_impl.h
 /usr/include/nntrainer/operation_layer.h
 /usr/include/nntrainer/acti_func.h
+/usr/include/nntrainer/loss_layer.h
 # custom layer kits
 /usr/include/nntrainer/app_context.h
 # logger

@@ -68,9 +68,23 @@ warning_c_flags = [
   '-Wno-error=varargs'
 ]
 
+arch = host_machine.cpu_family()
+
+target = target_machine.cpu_family()
+
+if get_option('enable-avx')
+   if get_option('platform') != 'android'
+      if target == 'x86_64' or target == 'x86'
+         extra_defines += '-DUSE_AVX=1'
+	 add_project_arguments(['-march=native'], language: ['c','cpp'])
+	 add_project_arguments(['-mavx2'], language: ['c','cpp'])
+         message('-march=native added for AVX hardware acceleration.')
+      endif
+      message('This arch does not support avx2')
+  endif
+endif
 
 if get_option('enable-fp16')
-   arch = host_machine.cpu_family()
    if get_option('platform') == 'android'
      add_project_arguments('-mfp16-format=ieee', language: ['c', 'cpp'])
      extra_defines += '-DENABLE_FP16=1'
@@ -110,11 +124,6 @@ if get_option('enable-fp16')
      if cc.version().version_compare('>=12.1.0')
        message ('Float16 for x86_64 enabled. Modern gcc-x64 generally supports float16 with _Float16.')
        extra_defines += '-DENABLE_FP16=1'
-       if get_option('enable-avx')
-        extra_defines += '-DUSE_AVX=1'
-        add_project_arguments(['-march=native'], language: ['c','cpp'])
-        message('-march=native added for AVX hardware acceleration.')
-       endif
      else
        warning ('Float16 for x86_64 enabled. However, software emulation is applied for fp16, making it slower and inconsistent. Use GCC 12+ for FP16 support. This build will probably fail unless you bring a compiler that supports fp16 for x64.')
      endif

@@ -41,7 +41,7 @@ option('enable-fp16', type: 'boolean', value: false)
 option('enable-cublas', type: 'boolean', value: false)
 option('enable-openmp', type: 'boolean', value: true)
 option('enable-neon', type: 'boolean', value: false)
-option('enable-avx', type: 'boolean', value: false)
+option('enable-avx', type: 'boolean', value: true)
 option('enable-opencl', type: 'boolean', value: false)
 
 # ml-api dependency (to enable, install capi-inference from github.com/nnstreamer/api )

@@ -559,6 +559,7 @@ AppContext::registerPluggableFromDirectory(const std::string &base_path) {
   struct dirent *entry;
 
   std::vector<int> keys;
+
   while ((entry = readdir(dir)) != NULL) {
     if (endswith(entry->d_name, solib_suffix)) {
       if (endswith(entry->d_name, layerlib_suffix)) {
@@ -581,7 +582,8 @@ AppContext::registerPluggableFromDirectory(const std::string &base_path) {
     }
   }
 
-  closedir(dir);
+  if (dir != NULL)
+    closedir(dir);
 
   return keys;
 }

@@ -35,6 +35,10 @@ GraphCore::getSortedNode(unsigned int ith) const {
   return Sorted.at(ith);
 }
 
+const unsigned int GraphCore::getSortedNodeIdx(const std::string &name) const {
+  return sorted_node_map.at(name);
+}
+
 void GraphCore::makeAdjacencyList(
   std::vector<std::list<std::shared_ptr<GraphNode>>> &adj) {
   /** initialize the adj list */
@@ -93,6 +97,11 @@ void GraphCore::topologicalSort() {
 
   if (Sorted.size() != node_list.size())
     throw std::runtime_error("Internal error in topologicalSort");
+  unsigned int idx = 0;
+  for (auto &n : Sorted) {
+    sorted_node_map[n->getName()] = idx;
+    idx++;
+  }
 }
 
 const std::shared_ptr<GraphNode> &

@@ -91,6 +91,13 @@ class GraphCore {
    */
   const std::shared_ptr<GraphNode> &getSortedNode(unsigned int ith) const;
 
+  /**
+   * @brief getter of Sorted GraphNode index with name
+   * @param[in] layer name
+   * @ret index
+   */
+  const unsigned int getSortedNodeIdx(const std::string &name) const;
+
   /**
    * @brief getter of GraphNode with node name
    * @param[in] node name
@@ -249,9 +256,10 @@ class GraphCore {
 private:
   std::vector<std::shared_ptr<GraphNode>> input_list;
   std::vector<std::shared_ptr<GraphNode>> output_list;
-  std::vector<std::shared_ptr<GraphNode>>
-    node_list;                                    /**< Unordered Node List  */
-  std::unordered_map<std::string, int> node_map;  /**< Unordered Node map  */
+  std::vector<std::shared_ptr<GraphNode>> node_list; /**< Unordered Node List */
+  std::unordered_map<std::string, int> node_map;     /**< Unordered Node map  */
+  std::unordered_map<std::string, int>
+    sorted_node_map;                              /**< Unordered Node map  */
   std::vector<std::shared_ptr<GraphNode>> Sorted; /**< Ordered Node List  */
   bool sorted; /** if the node_list is sorted */