Skip to content

Commit

Permalink
started split layer (#26)
Browse files Browse the repository at this point in the history
* started split layer

* working with main branch now (for current tests)

* added test for optimiser

* working on improving resource modelling

* updated resource models

* changes to split layer

* fixed merge conflict

* updated visualiser and fn model for splitlayer

Co-authored-by: AlexMontgomerie <[email protected]>
Co-authored-by: AlexMontgomerie <[email protected]>
  • Loading branch information
3 people authored and Ben Biggs committed Aug 26, 2021
1 parent 9474ee3 commit b273d34
Show file tree
Hide file tree
Showing 125 changed files with 1,751 additions and 311 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ outputs/
onnx/

.eggs/
*.egg-info/

docs/fpgaconvnet_optimiser/

Expand Down
8 changes: 8 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,14 @@ sudo apt-get install protobuf-compiler libprotoc-dev
python -m pip install .
```

## Testing

A suite of tests have been created for the optimiser repo. To run all of them, use the following:

```
python -m unittest discover tests/
```

## Optimiser Framework

The main tool is the optimisation script which generates an optimised hardware topology for a given model and platform. There are several components needed for this: a model of the hardware, transforms that map the model to the hardware and an optimisation scheme that chooses the best mapping. These will be outlined later.
Expand Down
6 changes: 6 additions & 0 deletions fpgaconvnet_optimiser/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
from fpgaconvnet_optimiser.optimiser import SimulatedAnnealing
from fpgaconvnet_optimiser.optimiser import Improve

import fpgaconvnet_optimiser.tools.graphs as graphs

def main():
parser = argparse.ArgumentParser(description="Optimiser Script")
parser.add_argument('-n','--name',metavar='PATH',required=True,
Expand Down Expand Up @@ -104,6 +106,10 @@ def main():
for partition_index in range(len(net.partitions)):
net.partitions[partition_index].apply_max_weights_reloading()


#for partition in net.partitions:
# graphs.print_graph(partition.graph)

# run optimiser
net.run_optimiser()

Expand Down
Binary file added fpgaconvnet_optimiser/coefficients/accum_bram.npy
Binary file not shown.
Binary file added fpgaconvnet_optimiser/coefficients/accum_dsp.npy
Binary file not shown.
Binary file added fpgaconvnet_optimiser/coefficients/accum_ff.npy
Binary file not shown.
Binary file added fpgaconvnet_optimiser/coefficients/accum_lut.npy
Binary file not shown.
Binary file removed fpgaconvnet_optimiser/coefficients/accum_rsc_coef.npy
Binary file not shown.
Binary file added fpgaconvnet_optimiser/coefficients/conv_bram.npy
Binary file not shown.
Binary file added fpgaconvnet_optimiser/coefficients/conv_dsp.npy
Binary file not shown.
Binary file added fpgaconvnet_optimiser/coefficients/conv_ff.npy
Binary file not shown.
Binary file added fpgaconvnet_optimiser/coefficients/conv_lut.npy
Binary file not shown.
Binary file removed fpgaconvnet_optimiser/coefficients/conv_rsc_coef.npy
Binary file not shown.
Binary file added fpgaconvnet_optimiser/coefficients/fork_bram.npy
Binary file not shown.
Binary file added fpgaconvnet_optimiser/coefficients/fork_dsp.npy
Binary file not shown.
Binary file added fpgaconvnet_optimiser/coefficients/fork_ff.npy
Binary file not shown.
Binary file added fpgaconvnet_optimiser/coefficients/fork_lut.npy
Binary file not shown.
Binary file removed fpgaconvnet_optimiser/coefficients/fork_rsc_coef.npy
Binary file not shown.
Binary file added fpgaconvnet_optimiser/coefficients/glue_bram.npy
Binary file not shown.
Binary file added fpgaconvnet_optimiser/coefficients/glue_dsp.npy
Binary file not shown.
Binary file added fpgaconvnet_optimiser/coefficients/glue_ff.npy
Binary file not shown.
Binary file added fpgaconvnet_optimiser/coefficients/glue_lut.npy
Binary file not shown.
Binary file removed fpgaconvnet_optimiser/coefficients/glue_rsc_coef.npy
Binary file not shown.
Binary file added fpgaconvnet_optimiser/coefficients/pool_bram.npy
Binary file not shown.
Binary file added fpgaconvnet_optimiser/coefficients/pool_dsp.npy
Binary file not shown.
Binary file added fpgaconvnet_optimiser/coefficients/pool_ff.npy
Binary file not shown.
Binary file added fpgaconvnet_optimiser/coefficients/pool_lut.npy
Binary file not shown.
Binary file modified fpgaconvnet_optimiser/coefficients/pool_rsc_coef.npy
Binary file not shown.
Binary file added fpgaconvnet_optimiser/coefficients/relu_bram.npy
Binary file not shown.
Binary file added fpgaconvnet_optimiser/coefficients/relu_dsp.npy
Binary file not shown.
Binary file added fpgaconvnet_optimiser/coefficients/relu_ff.npy
Binary file not shown.
Binary file added fpgaconvnet_optimiser/coefficients/relu_lut.npy
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file added fpgaconvnet_optimiser/coefficients/squeeze_bram.npy
Binary file not shown.
Binary file added fpgaconvnet_optimiser/coefficients/squeeze_dsp.npy
Binary file not shown.
Binary file added fpgaconvnet_optimiser/coefficients/squeeze_ff.npy
Binary file not shown.
Binary file added fpgaconvnet_optimiser/coefficients/squeeze_lut.npy
Binary file not shown.
Binary file removed fpgaconvnet_optimiser/coefficients/squeeze_rsc_coef.npy
Binary file not shown.
87 changes: 45 additions & 42 deletions fpgaconvnet_optimiser/models/layers/ConvolutionLayer.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,22 +13,21 @@
class ConvolutionLayer(Layer):
def __init__(
self,
rows,
cols,
channels,
filters,
filters: int,
rows: int,
cols: int,
channels: int,
coarse_in: int,
coarse_out: int,
k_size =3,
stride =1,
groups =1,
pad =0,
coarse_in =1,
coarse_out =1,
fine =1,
data_width =16,
sa =0.5,
sa_out =0.5
):
Layer.__init__(self, [rows], [cols], [channels], [coarse_in], [coarse_out], data_width)

# initialise parent class
super().__init__([rows],[cols],[channels],[coarse_in],[coarse_out])

# update flags
self.flags['channel_dependant'] = True
Expand All @@ -42,41 +41,43 @@ def __init__(
self.stride = stride
self.groups = groups
self.pad = pad
self.pad_top = pad - (self.rows[0] - k_size + 2*pad) % stride
self.pad_right = pad - (self.cols[0] - k_size + 2*pad) % stride
self.pad_top = pad - (self.rows_in(0) - k_size + 2*pad) % stride
self.pad_right = pad - (self.cols_in(0) - k_size + 2*pad) % stride
self.pad_bottom = pad
self.pad_left = pad
self.fine = fine
self.filters = filters

# init modules
self.modules = {
"sliding_window" : SlidingWindow(rows, cols, channels, k_size, stride, self.pad_top, self.pad_right, self.pad_bottom, self.pad_left, data_width),
"fork" : Fork(self.rows_out(0), self.cols_out(0), self.filters,k_size,coarse_out),
"conv" : Conv(self.rows_out(0), self.cols_out(0), self.filters,filters,fine,k_size,groups),
"accum" : Accum(self.rows_out(0), self.cols_out(0), self.filters,filters,groups),
"glue" : Glue(self.rows_out(0), self.cols_out(0), self.filters,filters,coarse_in,coarse_out)
"sliding_window" : SlidingWindow(self.rows_in(0), self.cols_in(0), self.channels_in(0), k_size, stride,
self.pad_top, self.pad_right, self.pad_bottom, self.pad_left, self.data_width),
"fork" : Fork(self.rows_out(0), self.cols_out(0), self.filters, k_size, self.coarse_out),
"conv" : Conv(self.rows_out(0), self.cols_out(0), self.filters, filters, fine, k_size, groups),
"accum" : Accum(self.rows_out(0), self.cols_out(0), self.filters, filters, groups),
"glue" : Glue(self.rows_out(0), self.cols_out(0), self.filters, filters, self.coarse_in[0], self.coarse_out[0])
}
self.update()
#self.load_coef()

# switching activity
self.sa = sa
self.sa_out = sa_out

def rows_out(self):
def rows_out(self, port_index):
assert port_index == 0, "convolution layers are only allowed a single port"
return int(math.floor((self.rows_in(0)-self.k_size+2*self.pad)/self.stride)+1)

def cols_out(self):
def cols_out(self, port_index):
assert port_index == 0, "convolution layers are only allowed a single port"
return int(math.floor((self.cols_in(0)-self.k_size+2*self.pad)/self.stride)+1)

def channels_out(self):
def channels_out(self, port_index):
assert port_index == 0, "convolution layers are only allowed a single port"
return self.filters

def rate_in(self,index):
def rate_in(self,port_index):
assert port_index == 0, "convolution layers are only allowed a single port"
return abs(self.balance_module_rates(self.rates_graph())[0,0])

def rate_out(self,index):
def rate_out(self,port_index):
assert port_index == 0, "convolution layers are only allowed a single port"
return abs(self.balance_module_rates(self.rates_graph())[4,5])

## LAYER INFO ##
Expand Down Expand Up @@ -141,14 +142,14 @@ def rates_graph(self):
rates_graph[0,0] = 1
rates_graph[0,1] = 1
else:
rates_graph[0,0] = self.modules['sliding_window'].rate_in(0)
rates_graph[0,1] = self.modules['sliding_window'].rate_out(0)
rates_graph[0,0] = self.modules['sliding_window'].rate_in()
rates_graph[0,1] = self.modules['sliding_window'].rate_out()
# fork
rates_graph[1,1] = self.modules['fork'].rate_in(0)
rates_graph[1,2] = self.modules['fork'].rate_out(0)
rates_graph[1,1] = self.modules['fork'].rate_in()
rates_graph[1,2] = self.modules['fork'].rate_out()
# conv
rates_graph[2,2] = self.modules['conv'].rate_in(0)
rates_graph[2,3] = self.modules['conv'].rate_out(0)
rates_graph[2,2] = self.modules['conv'].rate_in()
rates_graph[2,3] = self.modules['conv'].rate_out()
# accum
rates_graph[3,3] = self.modules['accum'].rate_in(0)
rates_graph[3,4] = self.modules['accum'].rate_out(0)
Expand All @@ -158,17 +159,19 @@ def rates_graph(self):

return rates_graph

def get_coarse_in_feasible(self,wr_factor=1):
def get_coarse_in_feasible(self,port_index,wr_factor=1):
assert port_index == 0
return self.get_factors(int(self.channels_in(0)/(self.groups*wr_factor)))

def get_coarse_out_feasible(self,wr_factor=1):
def get_coarse_out_feasible(self,port_index,wr_factor=1):
assert port_index == 0
return self.get_factors(int(self.channels_out(0)/(self.groups*wr_factor)))

def update_coarse_in(self, coarse_in):
self.coarse_in = coarse_in
self.coarse_in[0] = coarse_in

def update_coarse_out(self, coarse_out):
self.coarse_out = coarse_out
self.coarse_out[0] = coarse_out

def get_fine_feasible(self):
#return self.get_factors(int(self.k_size*self.k_size))
Expand All @@ -178,15 +181,15 @@ def get_weights_reloading_feasible(self):
return self.get_factors(int(self.filters/(self.groups*self.coarse_out[0])))

def get_parameters_size(self):
weights_size = self.channels[0] * int( self.filters / self.groups ) * self.k_size * self.k_size
weights_size = self.channels_in(0) * int( self.filters / self.groups ) * self.k_size * self.k_size
bias_size = 0
return {
"weights" : weights_size,
"bias" : bias_size
}

def get_operations(self):
return self.k_size*self.k_size*self.channels_in()*self.filters*self.rows_out()*self.cols_out()
return self.k_size*self.k_size*self.channels_in(0)*self.filters*self.rows_out(0)*self.cols_out(0)

def resource(self):

Expand All @@ -206,7 +209,7 @@ def resource(self):
glue_rsc = {"LUT" : 0,"BRAM" : 0,"DSP" : 0,"FF" : 0}

# weight usage
n_filters = float(self.filters*self.channels[0]*self.k_size*self.k_size)/float(self.fine*self.groups*self.coarse_in[0]*self.coarse_out[0])
n_filters = float(self.filters*self.channels_in(0)*self.k_size*self.k_size)/float(self.fine*self.groups*self.coarse_in[0]*self.coarse_out[0])
weights_bram_usage = int(math.ceil((self.weight_width*n_filters)/18000))*self.coarse_in[0]*self.coarse_out[0]*self.fine

# Total
Expand Down Expand Up @@ -264,9 +267,9 @@ def visualise(self,name):

def functional_model(self,data,weights,bias,batch_size=1):

assert data.shape[0] == self.rows[0] , "ERROR (data): invalid row dimension"
assert data.shape[1] == self.cols[0] , "ERROR (data): invalid column dimension"
assert data.shape[2] == self.channels[0], "ERROR (data): invalid channel dimension"
assert data.shape[0] == self.rows_in(0) , "ERROR (data): invalid row dimension"
assert data.shape[1] == self.cols_in(0) , "ERROR (data): invalid column dimension"
assert data.shape[2] == self.channels_in(0), "ERROR (data): invalid channel dimension"

assert weights.shape[0] == self.filters , "ERROR (weights): invalid filter dimension"
assert weights.shape[1] == int(self.channels[0]/self.groups), "ERROR (weights): invalid channel dimension"
Expand Down
63 changes: 30 additions & 33 deletions fpgaconvnet_optimiser/models/layers/InnerProductLayer.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,59 +13,56 @@
class InnerProductLayer(Layer):
def __init__(
self,
rows,
cols,
channels,
filters,
coarse_in =1,
coarse_out =1,
data_width =16,
sa =0.5,
sa_out =0.5
filters: int,
rows: int,
cols: int,
channels: int,
coarse_in: int,
coarse_out: int,
):
Layer.__init__(self,[rows],[cols],[channels],[coarse_in],[coarse_out],data_width)

# initialise parent class
super().__init__([rows], [cols], [channels], [coarse_in], [coarse_out])

self.weight_width = 8

# update flags
self.flags['channel_dependant'] = True
self.flags['transformable'] = True

# save parameters
self.filters = filters

# init modules
self.modules = {
"fork" : Fork( self.rows[0],self.cols[0], self.channels[0],1,coarse_out),
"conv" : Conv( 1,1,self.channels[0]*self.rows[0]*self.cols[0],filters,1,1,1),
"accum" : Accum(1,1,self.channels[0]*self.rows[0]*self.cols[0],filters,1),
"glue" : Glue( 1,1,self.channels[0]*self.rows[0]*self.cols[0],filters,coarse_in,coarse_out)
"fork" : Fork( self.rows_in(0),self.cols_in(0), self.channels_in(0),1,self.coarse_out[0]),
"conv" : Conv( 1,1,self.channels_in(0)*self.rows_in(0)*self.cols_in(0),filters,1,1,1),
"accum" : Accum(1,1,self.channels_in(0)*self.rows_in(0)*self.cols_in(0),filters,1),
"glue" : Glue( 1,1,self.channels_in(0)*self.rows_in(0)*self.cols_in(0),
filters, self.coarse_in[0], self.coarse_out[0])
}
self.update()

# switching activity
self.sa = sa
self.sa_out = sa_out

def rows_out(self):
def rows_out(self, port_index):
return 1

def cols_out(self):
def cols_out(self, port_index):
return 1

def channels_out(self):
def channels_out(self, port_index):
return self.filters

def rate_in(self,index):
def rate_in(self, port_index):
return abs(self.balance_module_rates(self.rates_graph())[0,0])

def rate_out(self,index):
def rate_out(self, port_index):
return abs(self.balance_module_rates(self.rates_graph())[3,4])

def update_coarse_in(self, coarse_in):
self.coarse_in = coarse_in
self.coarse_in[0] = coarse_in

def update_coarse_out(self, coarse_out):
self.coarse_out = coarse_out
self.coarse_out[0] = coarse_out

## LAYER INFO ##
def layer_info(self,parameters,batch_size=1):
Expand Down Expand Up @@ -110,17 +107,17 @@ def update(self): # TODO: update all parameters
def rates_graph(self):
rates_graph = np.zeros( shape=(4,5) , dtype=float )
# fork
rates_graph[0,0] = self.modules['fork'].rate_in(0)
rates_graph[0,1] = self.modules['fork'].rate_out(0)
rates_graph[0,0] = self.modules['fork'].rate_in()
rates_graph[0,1] = self.modules['fork'].rate_out()
# conv
rates_graph[1,1] = self.modules['conv'].rate_in(0)
rates_graph[1,2] = self.modules['conv'].rate_out(0)
rates_graph[1,1] = self.modules['conv'].rate_in()
rates_graph[1,2] = self.modules['conv'].rate_out()
# accum
rates_graph[2,2] = self.modules['accum'].rate_in(0)
rates_graph[2,3] = self.modules['accum'].rate_out(0)
rates_graph[2,2] = self.modules['accum'].rate_in()
rates_graph[2,3] = self.modules['accum'].rate_out()
# glue
rates_graph[3,3] = self.modules['glue'].rate_in(0)
rates_graph[3,4] = self.modules['glue'].rate_out(0)
rates_graph[3,3] = self.modules['glue'].rate_in()
rates_graph[3,4] = self.modules['glue'].rate_out()

return rates_graph

Expand Down
Loading

0 comments on commit b273d34

Please sign in to comment.