started split layer (#26)

* started split layer * working with main branch now (for current tests) * added test for optimiser * working on improving resource modelling * updated resource models * changes to split layer * fixed merge conflict * updated visualiser and fn model for splitlayer Co-authored-by: AlexMontgomerie <[email protected]> Co-authored-by: AlexMontgomerie <[email protected]>
AlexMontgomerie · Aug 26, 2021 · b273d34 · b273d34
1 parent 9474ee3
commit b273d34
Show file tree

Hide file tree

Showing 125 changed files with 1,751 additions and 311 deletions.
diff --git a/.gitignore b/.gitignore
@@ -16,6 +16,7 @@ outputs/
 onnx/
 
 .eggs/
+*.egg-info/
 
 docs/fpgaconvnet_optimiser/
 

diff --git a/README.md b/README.md
@@ -18,6 +18,14 @@ sudo apt-get install protobuf-compiler libprotoc-dev
 python -m pip install .
 ```
 
+## Testing
+
+A suite of tests have been created for the optimiser repo. To run all of them, use the following:
+
+```
+python -m unittest discover tests/
+```
+
 ## Optimiser Framework
 
 The main tool is the optimisation script which generates an optimised hardware topology for a given model and platform. There are several components needed for this: a model of the hardware, transforms that map the model to the hardware and an optimisation scheme that chooses the best mapping. These will be outlined later.

diff --git a/fpgaconvnet_optimiser/cli.py b/fpgaconvnet_optimiser/cli.py
@@ -13,6 +13,8 @@
 from fpgaconvnet_optimiser.optimiser import SimulatedAnnealing
 from fpgaconvnet_optimiser.optimiser import Improve
 
+import fpgaconvnet_optimiser.tools.graphs as graphs
+
 def main():
     parser = argparse.ArgumentParser(description="Optimiser Script")
     parser.add_argument('-n','--name',metavar='PATH',required=True,
@@ -104,6 +106,10 @@ def main():
         for partition_index in range(len(net.partitions)):
             net.partitions[partition_index].apply_max_weights_reloading()
 
+
+    #for partition in net.partitions:
+    #    graphs.print_graph(partition.graph)
+
     # run optimiser
     net.run_optimiser()
 

diff --git a/fpgaconvnet_optimiser/coefficients/accum_bram.npy b/fpgaconvnet_optimiser/coefficients/accum_bram.npy
diff --git a/fpgaconvnet_optimiser/coefficients/accum_dsp.npy b/fpgaconvnet_optimiser/coefficients/accum_dsp.npy
diff --git a/fpgaconvnet_optimiser/coefficients/accum_ff.npy b/fpgaconvnet_optimiser/coefficients/accum_ff.npy
diff --git a/fpgaconvnet_optimiser/coefficients/accum_lut.npy b/fpgaconvnet_optimiser/coefficients/accum_lut.npy
diff --git a/fpgaconvnet_optimiser/coefficients/accum_rsc_coef.npy b/fpgaconvnet_optimiser/coefficients/accum_rsc_coef.npy
diff --git a/fpgaconvnet_optimiser/coefficients/conv_bram.npy b/fpgaconvnet_optimiser/coefficients/conv_bram.npy
diff --git a/fpgaconvnet_optimiser/coefficients/conv_dsp.npy b/fpgaconvnet_optimiser/coefficients/conv_dsp.npy
diff --git a/fpgaconvnet_optimiser/coefficients/conv_ff.npy b/fpgaconvnet_optimiser/coefficients/conv_ff.npy
diff --git a/fpgaconvnet_optimiser/coefficients/conv_lut.npy b/fpgaconvnet_optimiser/coefficients/conv_lut.npy
diff --git a/fpgaconvnet_optimiser/coefficients/conv_rsc_coef.npy b/fpgaconvnet_optimiser/coefficients/conv_rsc_coef.npy
diff --git a/fpgaconvnet_optimiser/coefficients/fork_bram.npy b/fpgaconvnet_optimiser/coefficients/fork_bram.npy
diff --git a/fpgaconvnet_optimiser/coefficients/fork_dsp.npy b/fpgaconvnet_optimiser/coefficients/fork_dsp.npy
diff --git a/fpgaconvnet_optimiser/coefficients/fork_ff.npy b/fpgaconvnet_optimiser/coefficients/fork_ff.npy
diff --git a/fpgaconvnet_optimiser/coefficients/fork_lut.npy b/fpgaconvnet_optimiser/coefficients/fork_lut.npy
diff --git a/fpgaconvnet_optimiser/coefficients/fork_rsc_coef.npy b/fpgaconvnet_optimiser/coefficients/fork_rsc_coef.npy
diff --git a/fpgaconvnet_optimiser/coefficients/glue_bram.npy b/fpgaconvnet_optimiser/coefficients/glue_bram.npy
diff --git a/fpgaconvnet_optimiser/coefficients/glue_dsp.npy b/fpgaconvnet_optimiser/coefficients/glue_dsp.npy
diff --git a/fpgaconvnet_optimiser/coefficients/glue_ff.npy b/fpgaconvnet_optimiser/coefficients/glue_ff.npy
diff --git a/fpgaconvnet_optimiser/coefficients/glue_lut.npy b/fpgaconvnet_optimiser/coefficients/glue_lut.npy
diff --git a/fpgaconvnet_optimiser/coefficients/glue_rsc_coef.npy b/fpgaconvnet_optimiser/coefficients/glue_rsc_coef.npy
diff --git a/fpgaconvnet_optimiser/coefficients/pool_bram.npy b/fpgaconvnet_optimiser/coefficients/pool_bram.npy
diff --git a/fpgaconvnet_optimiser/coefficients/pool_dsp.npy b/fpgaconvnet_optimiser/coefficients/pool_dsp.npy
diff --git a/fpgaconvnet_optimiser/coefficients/pool_ff.npy b/fpgaconvnet_optimiser/coefficients/pool_ff.npy
diff --git a/fpgaconvnet_optimiser/coefficients/pool_lut.npy b/fpgaconvnet_optimiser/coefficients/pool_lut.npy
diff --git a/fpgaconvnet_optimiser/coefficients/pool_rsc_coef.npy b/fpgaconvnet_optimiser/coefficients/pool_rsc_coef.npy
diff --git a/fpgaconvnet_optimiser/coefficients/relu_bram.npy b/fpgaconvnet_optimiser/coefficients/relu_bram.npy
diff --git a/fpgaconvnet_optimiser/coefficients/relu_dsp.npy b/fpgaconvnet_optimiser/coefficients/relu_dsp.npy
diff --git a/fpgaconvnet_optimiser/coefficients/relu_ff.npy b/fpgaconvnet_optimiser/coefficients/relu_ff.npy
diff --git a/fpgaconvnet_optimiser/coefficients/relu_lut.npy b/fpgaconvnet_optimiser/coefficients/relu_lut.npy
diff --git a/fpgaconvnet_optimiser/coefficients/sliding_window_bram.npy b/fpgaconvnet_optimiser/coefficients/sliding_window_bram.npy
diff --git a/fpgaconvnet_optimiser/coefficients/sliding_window_dsp.npy b/fpgaconvnet_optimiser/coefficients/sliding_window_dsp.npy
diff --git a/fpgaconvnet_optimiser/coefficients/sliding_window_ff.npy b/fpgaconvnet_optimiser/coefficients/sliding_window_ff.npy
diff --git a/fpgaconvnet_optimiser/coefficients/sliding_window_lut.npy b/fpgaconvnet_optimiser/coefficients/sliding_window_lut.npy
diff --git a/fpgaconvnet_optimiser/coefficients/sliding_window_rsc_coef.npy b/fpgaconvnet_optimiser/coefficients/sliding_window_rsc_coef.npy
diff --git a/fpgaconvnet_optimiser/coefficients/squeeze_bram.npy b/fpgaconvnet_optimiser/coefficients/squeeze_bram.npy
diff --git a/fpgaconvnet_optimiser/coefficients/squeeze_dsp.npy b/fpgaconvnet_optimiser/coefficients/squeeze_dsp.npy
diff --git a/fpgaconvnet_optimiser/coefficients/squeeze_ff.npy b/fpgaconvnet_optimiser/coefficients/squeeze_ff.npy
diff --git a/fpgaconvnet_optimiser/coefficients/squeeze_lut.npy b/fpgaconvnet_optimiser/coefficients/squeeze_lut.npy
diff --git a/fpgaconvnet_optimiser/coefficients/squeeze_rsc_coef.npy b/fpgaconvnet_optimiser/coefficients/squeeze_rsc_coef.npy
diff --git a/fpgaconvnet_optimiser/models/layers/ConvolutionLayer.py b/fpgaconvnet_optimiser/models/layers/ConvolutionLayer.py
@@ -13,22 +13,21 @@
 class ConvolutionLayer(Layer):
     def __init__(
             self,
-            rows,
-            cols,
-            channels,
-            filters,
+            filters: int,
+            rows: int,
+            cols: int,
+            channels: int,
+            coarse_in: int,
+            coarse_out: int,
             k_size      =3,
             stride      =1,
             groups      =1,
             pad         =0,
-            coarse_in   =1,
-            coarse_out  =1,
             fine        =1,
-            data_width  =16,
-            sa          =0.5,
-            sa_out      =0.5
         ):
-        Layer.__init__(self, [rows], [cols], [channels], [coarse_in], [coarse_out], data_width)
+
+        # initialise parent class
+        super().__init__([rows],[cols],[channels],[coarse_in],[coarse_out])
 
         # update flags
         self.flags['channel_dependant'] = True
@@ -42,41 +41,43 @@ def __init__(
         self.stride     = stride
         self.groups     = groups
         self.pad        = pad
-        self.pad_top    = pad - (self.rows[0] - k_size + 2*pad) % stride
-        self.pad_right  = pad - (self.cols[0] - k_size + 2*pad) % stride
+        self.pad_top    = pad - (self.rows_in(0) - k_size + 2*pad) % stride
+        self.pad_right  = pad - (self.cols_in(0) - k_size + 2*pad) % stride
         self.pad_bottom = pad
         self.pad_left   = pad
         self.fine       = fine
         self.filters    = filters
 
         # init modules
         self.modules = {
-            "sliding_window" : SlidingWindow(rows, cols, channels, k_size, stride, self.pad_top, self.pad_right, self.pad_bottom, self.pad_left, data_width),
-            "fork"           : Fork(self.rows_out(0), self.cols_out(0), self.filters,k_size,coarse_out),
-            "conv"           : Conv(self.rows_out(0), self.cols_out(0), self.filters,filters,fine,k_size,groups),
-            "accum"          : Accum(self.rows_out(0), self.cols_out(0), self.filters,filters,groups),
-            "glue"           : Glue(self.rows_out(0), self.cols_out(0), self.filters,filters,coarse_in,coarse_out)
+            "sliding_window" : SlidingWindow(self.rows_in(0), self.cols_in(0), self.channels_in(0), k_size, stride,
+                self.pad_top, self.pad_right, self.pad_bottom, self.pad_left, self.data_width),
+            "fork"           : Fork(self.rows_out(0), self.cols_out(0), self.filters, k_size, self.coarse_out),
+            "conv"           : Conv(self.rows_out(0), self.cols_out(0), self.filters, filters, fine, k_size, groups),
+            "accum"          : Accum(self.rows_out(0), self.cols_out(0), self.filters, filters, groups),
+            "glue"           : Glue(self.rows_out(0), self.cols_out(0), self.filters, filters, self.coarse_in[0], self.coarse_out[0])
         }
         self.update()
         #self.load_coef()
 
-        # switching activity
-        self.sa     = sa
-        self.sa_out = sa_out
-
-    def rows_out(self):
+    def rows_out(self, port_index):
+        assert port_index == 0, "convolution layers are only allowed a single port"
         return int(math.floor((self.rows_in(0)-self.k_size+2*self.pad)/self.stride)+1)
 
-    def cols_out(self):
+    def cols_out(self, port_index):
+        assert port_index == 0, "convolution layers are only allowed a single port"
         return int(math.floor((self.cols_in(0)-self.k_size+2*self.pad)/self.stride)+1)
 
-    def channels_out(self):
+    def channels_out(self, port_index):
+        assert port_index == 0, "convolution layers are only allowed a single port"
         return self.filters
 
-    def rate_in(self,index):
+    def rate_in(self,port_index):
+        assert port_index == 0, "convolution layers are only allowed a single port"
         return abs(self.balance_module_rates(self.rates_graph())[0,0])
 
-    def rate_out(self,index):
+    def rate_out(self,port_index):
+        assert port_index == 0, "convolution layers are only allowed a single port"
         return abs(self.balance_module_rates(self.rates_graph())[4,5])
 
     ## LAYER INFO ##
@@ -141,14 +142,14 @@ def rates_graph(self):
             rates_graph[0,0] = 1
             rates_graph[0,1] = 1
         else:
-            rates_graph[0,0] = self.modules['sliding_window'].rate_in(0)
-            rates_graph[0,1] = self.modules['sliding_window'].rate_out(0)
+            rates_graph[0,0] = self.modules['sliding_window'].rate_in()
+            rates_graph[0,1] = self.modules['sliding_window'].rate_out()
         # fork
-        rates_graph[1,1] = self.modules['fork'].rate_in(0)
-        rates_graph[1,2] = self.modules['fork'].rate_out(0)
+        rates_graph[1,1] = self.modules['fork'].rate_in()
+        rates_graph[1,2] = self.modules['fork'].rate_out()
         # conv
-        rates_graph[2,2] = self.modules['conv'].rate_in(0)
-        rates_graph[2,3] = self.modules['conv'].rate_out(0)
+        rates_graph[2,2] = self.modules['conv'].rate_in()
+        rates_graph[2,3] = self.modules['conv'].rate_out()
         # accum
         rates_graph[3,3] = self.modules['accum'].rate_in(0)
         rates_graph[3,4] = self.modules['accum'].rate_out(0)
@@ -158,17 +159,19 @@ def rates_graph(self):
 
         return rates_graph
 
-    def get_coarse_in_feasible(self,wr_factor=1):
+    def get_coarse_in_feasible(self,port_index,wr_factor=1):
+        assert port_index == 0
         return self.get_factors(int(self.channels_in(0)/(self.groups*wr_factor)))
 
-    def get_coarse_out_feasible(self,wr_factor=1):
+    def get_coarse_out_feasible(self,port_index,wr_factor=1):
+        assert port_index == 0
         return self.get_factors(int(self.channels_out(0)/(self.groups*wr_factor)))
 
     def update_coarse_in(self, coarse_in):
-        self.coarse_in  = coarse_in
+        self.coarse_in[0]  = coarse_in
 
     def update_coarse_out(self, coarse_out):
-        self.coarse_out = coarse_out
+        self.coarse_out[0] = coarse_out
 
     def get_fine_feasible(self):
         #return self.get_factors(int(self.k_size*self.k_size))
@@ -178,15 +181,15 @@ def get_weights_reloading_feasible(self):
         return self.get_factors(int(self.filters/(self.groups*self.coarse_out[0])))
 
     def get_parameters_size(self):
-        weights_size = self.channels[0] * int( self.filters / self.groups ) * self.k_size * self.k_size
+        weights_size = self.channels_in(0) * int( self.filters / self.groups ) * self.k_size * self.k_size
         bias_size = 0
         return {
             "weights"   : weights_size,
             "bias"      : bias_size
         }
 
     def get_operations(self):
-        return self.k_size*self.k_size*self.channels_in()*self.filters*self.rows_out()*self.cols_out()
+        return self.k_size*self.k_size*self.channels_in(0)*self.filters*self.rows_out(0)*self.cols_out(0)
 
     def resource(self):
 
@@ -206,7 +209,7 @@ def resource(self):
             glue_rsc    = {"LUT" : 0,"BRAM" : 0,"DSP" : 0,"FF" : 0}
 
         # weight usage
-        n_filters = float(self.filters*self.channels[0]*self.k_size*self.k_size)/float(self.fine*self.groups*self.coarse_in[0]*self.coarse_out[0])
+        n_filters = float(self.filters*self.channels_in(0)*self.k_size*self.k_size)/float(self.fine*self.groups*self.coarse_in[0]*self.coarse_out[0])
         weights_bram_usage = int(math.ceil((self.weight_width*n_filters)/18000))*self.coarse_in[0]*self.coarse_out[0]*self.fine
 
         # Total
@@ -264,9 +267,9 @@ def visualise(self,name):
 
     def functional_model(self,data,weights,bias,batch_size=1):
 
-        assert data.shape[0] == self.rows[0]    , "ERROR (data): invalid row dimension"
-        assert data.shape[1] == self.cols[0]    , "ERROR (data): invalid column dimension"
-        assert data.shape[2] == self.channels[0], "ERROR (data): invalid channel dimension"
+        assert data.shape[0] == self.rows_in(0)    , "ERROR (data): invalid row dimension"
+        assert data.shape[1] == self.cols_in(0)    , "ERROR (data): invalid column dimension"
+        assert data.shape[2] == self.channels_in(0), "ERROR (data): invalid channel dimension"
 
         assert weights.shape[0] == self.filters , "ERROR (weights): invalid filter dimension"
         assert weights.shape[1] == int(self.channels[0]/self.groups), "ERROR (weights): invalid channel dimension"

diff --git a/fpgaconvnet_optimiser/models/layers/InnerProductLayer.py b/fpgaconvnet_optimiser/models/layers/InnerProductLayer.py
@@ -13,59 +13,56 @@
 class InnerProductLayer(Layer):
     def __init__(
             self,
-            rows,
-            cols,
-            channels,
-            filters,
-            coarse_in   =1,
-            coarse_out  =1,
-            data_width  =16,
-            sa          =0.5,
-            sa_out      =0.5
+            filters: int,
+            rows: int,
+            cols: int,
+            channels: int,
+            coarse_in: int,
+            coarse_out: int,
         ):
-        Layer.__init__(self,[rows],[cols],[channels],[coarse_in],[coarse_out],data_width)
+
+        # initialise parent class
+        super().__init__([rows], [cols], [channels], [coarse_in], [coarse_out])
 
         self.weight_width = 8
 
         # update flags
         self.flags['channel_dependant'] = True
         self.flags['transformable']     = True
 
+        # save parameters
         self.filters   = filters
 
         # init modules
         self.modules = {
-            "fork"           : Fork( self.rows[0],self.cols[0], self.channels[0],1,coarse_out),
-            "conv"           : Conv( 1,1,self.channels[0]*self.rows[0]*self.cols[0],filters,1,1,1),
-            "accum"          : Accum(1,1,self.channels[0]*self.rows[0]*self.cols[0],filters,1),
-            "glue"           : Glue( 1,1,self.channels[0]*self.rows[0]*self.cols[0],filters,coarse_in,coarse_out)
+            "fork"           : Fork( self.rows_in(0),self.cols_in(0), self.channels_in(0),1,self.coarse_out[0]),
+            "conv"           : Conv( 1,1,self.channels_in(0)*self.rows_in(0)*self.cols_in(0),filters,1,1,1),
+            "accum"          : Accum(1,1,self.channels_in(0)*self.rows_in(0)*self.cols_in(0),filters,1),
+            "glue"           : Glue( 1,1,self.channels_in(0)*self.rows_in(0)*self.cols_in(0),
+                filters, self.coarse_in[0], self.coarse_out[0])
         }
         self.update()
 
-        # switching activity
-        self.sa     = sa
-        self.sa_out = sa_out
-
-    def rows_out(self):
+    def rows_out(self, port_index):
         return 1
 
-    def cols_out(self):
+    def cols_out(self, port_index):
         return 1
 
-    def channels_out(self):
+    def channels_out(self, port_index):
         return self.filters 
 
-    def rate_in(self,index):
+    def rate_in(self, port_index):
         return abs(self.balance_module_rates(self.rates_graph())[0,0])
 
-    def rate_out(self,index):
+    def rate_out(self, port_index):
         return abs(self.balance_module_rates(self.rates_graph())[3,4])
 
     def update_coarse_in(self, coarse_in):
-        self.coarse_in  = coarse_in
+        self.coarse_in[0]  = coarse_in
 
     def update_coarse_out(self, coarse_out):
-        self.coarse_out = coarse_out
+        self.coarse_out[0] = coarse_out
 
     ## LAYER INFO ##
     def layer_info(self,parameters,batch_size=1):
@@ -110,17 +107,17 @@ def update(self): # TODO: update all parameters
     def rates_graph(self): 
         rates_graph = np.zeros( shape=(4,5) , dtype=float ) 
         # fork 
-        rates_graph[0,0] = self.modules['fork'].rate_in(0) 
-        rates_graph[0,1] = self.modules['fork'].rate_out(0)
+        rates_graph[0,0] = self.modules['fork'].rate_in() 
+        rates_graph[0,1] = self.modules['fork'].rate_out()
         # conv
-        rates_graph[1,1] = self.modules['conv'].rate_in(0)
-        rates_graph[1,2] = self.modules['conv'].rate_out(0)
+        rates_graph[1,1] = self.modules['conv'].rate_in()
+        rates_graph[1,2] = self.modules['conv'].rate_out()
         # accum
-        rates_graph[2,2] = self.modules['accum'].rate_in(0)
-        rates_graph[2,3] = self.modules['accum'].rate_out(0)
+        rates_graph[2,2] = self.modules['accum'].rate_in()
+        rates_graph[2,3] = self.modules['accum'].rate_out()
         # glue
-        rates_graph[3,3] = self.modules['glue'].rate_in(0)
-        rates_graph[3,4] = self.modules['glue'].rate_out(0)
+        rates_graph[3,3] = self.modules['glue'].rate_in()
+        rates_graph[3,4] = self.modules['glue'].rate_out()
 
         return rates_graph
-Original file line number
+Diff line change
@@ Expand Up / @@ -16,6 +16,7 @@ outputs/ @@
     onnx/
     .eggs/
+    *.egg-info/
     docs/fpgaconvnet_optimiser/
@@ Expand Down @@