Fix im2col assignment in TrainLib_Deployer

pulp-platform · Sep 25, 2023 · 8e3b344 · 8e3b344
1 parent de6a17b
commit 8e3b344
Show file tree

Hide file tree

Showing 4 changed files with 37 additions and 41 deletions.
diff --git a/tools/TrainLib_Deployer/TrainLib_Deployer.py b/tools/TrainLib_Deployer/TrainLib_Deployer.py
@@ -40,7 +40,7 @@
 
 # GENERAL PROPERTIES
 project_name    = 'Test_CNN'
-project_path    = '../../../../TrainLib_Examples/'
+project_path    = '../../../DNN_Tests/'
 proj_folder     = project_path + project_name + '/'
 
 # TRAINING PROPERTIES

diff --git a/tools/TrainLib_Deployer/utils/deployment_utils.py b/tools/TrainLib_Deployer/utils/deployment_utils.py
@@ -103,7 +103,7 @@ def compute_im2col_memocc_bytes(layers_l, in_ch_l, out_ch_l, hk_l, wk_l, hin_l,
         hout = math.floor( (hin_l[layer]-hk_l[layer]+2*h_pad_l[layer]+h_str_l[layer])/h_str_l[layer] )
         wout = math.floor( (win_l[layer]-wk_l[layer]+2*w_pad_l[layer]+w_str_l[layer])/w_str_l[layer] )
         # Find max im2col size
-        if layers_l[layer] == 'conv2d' or layers_l[layer] == 'DW':
+        if layers_l[layer] == 'conv2d': # or layers_l[layer] == 'DW':
             im2col_size = 0
             size_FW = hk_l[layer] * wk_l[layer] * in_ch_l[layer] * hout * wout * byte_size
             size_BW = out_ch_l[layer] * hk_l[layer] * wk_l[layer] * hin_l[layer] * win_l[layer] * byte_size
@@ -289,29 +289,31 @@ def GenerateMakefile(proj_folder_path, project_name, layers_l, NUM_CORES, data_t
 
     f.write('# SOURCES\n')
     if check_FP32 == True:
-        f.write('APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_matmul_fp32.c\n')
-        f.write('APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_im2col_fp32.c\n')
+        f.write('APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_act_fp32.c\n')
+        f.write('APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_conv_dw_fp32.c\n')
+        f.write('APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_conv_pw_fp32.c\n')
         f.write('APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_conv2d_fp32.c\n')
+        f.write('APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_im2col_fp32.c\n')
         f.write('APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_linear_fp32.c\n')
-        f.write('APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_conv_pw_fp32.c\n')
-        f.write('APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_conv_dw_fp32.c\n')
-        f.write('APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_train_utils_fp32.c\n')
         f.write('APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_losses_fp32.c\n')
+        f.write('APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_matmul_fp32.c\n')
         f.write('APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_optimizers_fp32.c\n')
-        f.write('APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_act_fp32.c\n')
-        f.write('APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_pooling_fp32.c\n\n')
+        f.write('APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_pooling_fp32.c\n')
+        f.write('APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_residual_fp32.c\n')
+        f.write('APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_train_utils_fp32.c\n\n')
     if check_FP16 == True:
-        f.write('APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_matmul_fp16.c\n')
-        f.write('APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_im2col_fp16.c\n')
+        f.write('APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_act_fp16.c\n')
+        f.write('APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_conv_dw_fp16.c\n')
+        f.write('APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_conv_pw_fp16.c\n')
         f.write('APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_conv2d_fp16.c\n')
+        f.write('APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_im2col_fp16.c\n')
         f.write('APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_linear_fp16.c\n')
-        f.write('APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_conv_pw_fp16.c\n')
-        f.write('APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_conv_dw_fp16.c\n')
-        f.write('APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_train_utils_fp16.c\n')
         f.write('APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_losses_fp16.c\n')
+        f.write('APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_matmul_fp16.c\n')
         f.write('APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_optimizers_fp16.c\n')
-        f.write('APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_act_fp16.c\n')
-        f.write('APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_pooling_fp16.c\n\n')        
+        f.write('APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_pooling_fp16.c\n')
+        f.write('APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_residual_fp16.c\n')
+        f.write('APP_SRCS += $(TRAIN_LIB_SRCS)/pulp_train_utils_fp16.c\n\n')
     # if (check_FP16 and check_FP32) == False:
     #     print("[deployment_utils.GenerateMakefile] Data format not implemented!!\n")
     #     exit()
@@ -356,6 +358,13 @@ def GenerateGM(proj_folder_path, project_name,
     f.write("import math\n")
     f.write("\n")
 
+    # Define hyperparameters
+    f.write("# Define hyperparameters\n")
+    f.write("learning_rate = "+str(learning_rate)+"\n")
+    f.write("batch_size = "+str(batch_size)+"\n")
+    f.write("epochs = "+str(epochs)+"\n")
+    f.write("\n")
+
     # Write sizes
     for layer in range(len(layers_l)):
         f.write("# LAYER "+str(layer)+" SIZES\n")
@@ -391,13 +400,6 @@ def GenerateGM(proj_folder_path, project_name,
         f.write("f.write('#define Tpad_W_l"+str(layer)+" '+str(l"+str(layer)+"_wpad)+'\\n')\n")
     f.write("f.close()\n\n")
 
-    # Define hyperparameters
-    f.write("# Define hyperparameters\n")
-    f.write("learning_rate = "+str(learning_rate)+"\n")
-    f.write("batch_size = "+str(batch_size)+"\n")
-    f.write("epochs = "+str(epochs)+"\n")
-    f.write("\n")
-
     # Write hyperparameters to header
     f.write("f = open('init-defines.h', 'a')\n")
     f.write("f.write('\\n// HYPERPARAMETERS\\n')\n")
@@ -825,7 +827,7 @@ def GenerateNet(proj_folder_path, project_name,
     im2col_byte_length = 0
     im2col_max_data_type = 'FP32'
     for layer in range(len(layers_l)):
-        if layers_l[layer] == 'conv2d' or layers_l[layer] == 'DW':
+        if layers_l[layer] == 'conv2d': # or layers_l[layer] == 'DW':
             if data_type_l[layer] == 'FP32':
                 im2col_byte_length = 4
             elif data_type_l[layer] == 'FP16':

diff --git a/tools/TrainLib_Deployer/utils/net_templates.py b/tools/TrainLib_Deployer/utils/net_templates.py
@@ -283,17 +283,17 @@ def DW_config_template(layer_number, pad_h, pad_w, stride_h, stride_w, skip_in_g
     template += "  l"+str(layer_number)+"_args.Rpad = "+str(pad_w)+";\n"
     template += "  l"+str(layer_number)+"_args.Upad = "+str(pad_h)+";\n"
     template += "  l"+str(layer_number)+"_args.Dpad = "+str(pad_h)+";\n"
-    if DATA_TYPE == 'FP32':
-        template += "  l"+str(layer_number)+"_args.i2c_buffer = (float*) im2col_buffer;\n"
-    elif DATA_TYPE == 'FP16':
-        template += "  l"+str(layer_number)+"_args.i2c_buffer = (fp16*) im2col_buffer;\n"
-    else:
-        print("[net_templates.DW_config_template]: Invalid data type!")
-        exit()
+    #if DATA_TYPE == 'FP32':
+    #    template += "  l"+str(layer_number)+"_args.i2c_buffer = (float*) im2col_buffer;\n"
+    #elif DATA_TYPE == 'FP16':
+    #    template += "  l"+str(layer_number)+"_args.i2c_buffer = (fp16*) im2col_buffer;\n"
+    #else:
+    #    print("[net_templates.DW_config_template]: Invalid data type!")
+    #    exit()
     template += "  l"+str(layer_number)+"_args.HWC = 0;\n"
-    template += "  l"+str(layer_number)+"_args.opt_matmul_type_fw = MATMUL_TYPE_FW_L"+str(layer_number)+";\n"
-    template += "  l"+str(layer_number)+"_args.opt_matmul_type_wg = MATMUL_TYPE_WG_L"+str(layer_number)+";\n"
-    template += "  l"+str(layer_number)+"_args.opt_matmul_type_ig = MATMUL_TYPE_IG_L"+str(layer_number)+";\n"
+    #template += "  l"+str(layer_number)+"_args.opt_matmul_type_fw = MATMUL_TYPE_FW_L"+str(layer_number)+";\n"
+    #template += "  l"+str(layer_number)+"_args.opt_matmul_type_wg = MATMUL_TYPE_WG_L"+str(layer_number)+";\n"
+    #template += "  l"+str(layer_number)+"_args.opt_matmul_type_ig = MATMUL_TYPE_IG_L"+str(layer_number)+";\n"
     return template
 
 def PW_config_template(layer_number, skip_in_grad, DATA_TYPE):

diff --git a/tools/memory_footprint_tool/memory_footprint_eval.py b/tools/memory_footprint_tool/memory_footprint_eval.py
@@ -154,7 +154,6 @@
 # Compute DW memory occupation (FORWARD)
 in_act  = dw_in_H * dw_in_W * dw_inout_ch
 ker     = dw_ker_H * dw_ker_W * dw_inout_ch
-im2colF = dw_ker_H * dw_ker_W * dw_inout_ch * (dw_in_H-dw_ker_H+1) * (dw_in_W-dw_ker_W+1)
 out_act = dw_inout_ch * (dw_in_H-dw_ker_H+1) * (dw_in_W-dw_ker_W+1)
 tot_FW  = in_act + ker + im2colF + out_act
 f.write("-------------------------------------------\n")
@@ -167,38 +166,33 @@
 f.write("-------------------------------------------\n")
 f.write("| ### FORWARD ###\n|\n")
 f.write("| IN: \t\t\t\t{} ({} bytes)\n".format(in_act, in_act*data_size))
-f.write("| IM2COL BUFFER: \t{} ({} bytes)\n".format(im2colF, im2colF*data_size))
 f.write("| KER: \t\t\t\t{} ({} bytes)\n".format(ker, ker*data_size))
 f.write("| OUT: \t\t\t\t{} ({} bytes)\n".format(out_act, out_act*data_size))
 f.write("| \n| TOTAL FORWARD: \t{} ({} bytes)\n".format(tot_FW, tot_FW*data_size))
 f.write("-------------------------------------------\n")
 # Compute DW memory occupation (WEIGHT GRADIENT)
 in_act  = dw_in_H * dw_in_W * dw_inout_ch
 ker     = dw_ker_H * dw_ker_W * dw_inout_ch
-im2colW  = dw_ker_H * dw_ker_W * dw_inout_ch * (dw_in_H-dw_ker_H+1) * (dw_in_W-dw_ker_W+1) 
 out_act = dw_inout_ch * (dw_in_H-dw_ker_H+1) * (dw_in_W-dw_ker_W+1)
 tot_WGT = in_act + ker + im2colW + out_act
 f.write("| ### WEIGHT GRADIENT ###\n|\n")
 f.write("| IN: \t\t\t\t{} ({} bytes)\n".format(in_act, in_act*data_size))
-f.write("| IM2COL BUFFER: \t{} ({} bytes)\n".format(im2colW, im2colW*data_size))
 f.write("| KER: \t\t\t\t{} ({} bytes)\n".format(ker, ker*data_size))
 f.write("| OUT DIFF: \t\t{} ({} bytes)\n".format(out_act, out_act*data_size))
 f.write("| \n| TOTAL WGT GRAD: \t{} ({} bytes)\n".format(tot_WGT, tot_WGT*data_size))
 f.write("-------------------------------------------\n")
 # Compute DW memory occupation (IN GRADIENT)
 in_act  = dw_in_H * dw_in_W * dw_inout_ch
 ker     = dw_ker_H * dw_ker_W * dw_inout_ch
-im2colI = dw_in_H * dw_in_W * dw_inout_ch * dw_ker_H * dw_ker_W
 out_act = dw_inout_ch * (dw_in_H-dw_ker_H+1) * (dw_in_W-dw_ker_W+1)
 tot_ING = in_act + ker + im2colI + out_act
 f.write("| ### INPUT GRADIENT ###\n|\n")
 f.write("| IN: \t\t\t\t{} ({} bytes)\n".format(in_act, in_act*data_size))
-f.write("| IM2COL BUFFER: \t{} ({} bytes)\n".format(im2colI, im2colI*data_size))
 f.write("| KER: \t\t\t\t{} ({} bytes)\n".format(ker, ker*data_size))
 f.write("| OUT DIFF: \t\t{} ({} bytes)\n".format(out_act, out_act*data_size))
 f.write("| \n| TOTAL IN GRAD: \t{} ({} bytes)\n".format(tot_ING, tot_ING*data_size))
 f.write("-------------------------------------------\n")
-tot_MEM = tot_FW + tot_WGT + tot_ING - im2colF - im2colW - im2colI + max(im2colF, im2colW, im2colI)
+tot_MEM = tot_FW + tot_WGT
 f.write("DEPTHWISE CONV TOTAL OCCUPATION: \t{} ({} bytes)\n".format(tot_MEM, (tot_MEM)*data_size))