Skip to content

Commit

Permalink
attempt to add support for conv1d transpose
Browse files Browse the repository at this point in the history
add new files for conv1dtranspose resource

clean up so that conv code is reached. Still need to get the actual implementation matching keras

implement conv1dtranspose super inefficiently (gets correct answer though)

try to fix indices to make code work

make the c code work for conv1dtranspose

reduce weight dimensions to properly reflect transposed kernel size

clean up so that transpose filter width is passes around from config

fix code such that simple transpose layer gets synthesized

move variables out of loops, optimize slightly and add in alternative method of computation to compute by kernel (that option is not optimized as of now)

add in conv1d transpose linebuffer format code. seems to work, unsure of if it is optimized yet

trying to fix stream behavior

get transpose compilation working mostly as expected. weird jump in latency from reuse 1 to 2 still exists

initial conv2dtranspose addition. Output is permuted as of now.

output in correct order. using large array to buffer output though

fix up conv1dtranspose a bit to pad correctly. fix up stream instructions for both 1d and 2d transposes

fix allowed reuse factors for transpose layers

update to new conv methods for io_parallel. Still some issues with multiple filters as well as some padding issues

clean up error with multiple filters and larger kernels

optimize conv transpose resource to get it working reasonably well. may still have slight optimization left

fix output to conv1d transpose resource

add conv2dtranspose io_parallel implementation. Can still be optimized

small changeup to data storage in conv1d parallel

fix zero padding pass addition for transpose stream layers

move transposing of weight matrix to resource_strategy for transpose layers

change how stream loads in weights to be like parallel for conv transposes. unroll all stride steps completely

 fix output of 1d transpose parallel to be faster

change 1d transpose weight input to be 2-dimensional (passed from python code)

change 2d transpose weight input to be 3-dimensional (passed from python code)

small changes to transposes

Revert "fix nondefault project name handling (fastmachinelearning#626)". The commit breaks the Vivado Accelerator workflow, and the fix is unclear to me right now.

This reverts commit e8f048a.

steps towards getting integer inputs to work
  • Loading branch information
Jonathan-Shoemaker authored and jmduarte committed Mar 18, 2023
1 parent d54843d commit 5175e1a
Show file tree
Hide file tree
Showing 29 changed files with 1,881 additions and 233 deletions.
161 changes: 161 additions & 0 deletions hls4ml/backends/fpga/fpga_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,22 @@ def get_layer_mult_size(self, layer):
n_out = layer.get_attr('n_out')
return n_in, n_out

if 'Conv1DTranspose' in layer.class_name:
trfilt_width = (layer.get_attr('filt_width') + layer.get_attr('stride_width') - 1) \
// layer.get_attr('stride_width')
n_in = layer.get_attr('n_chan') * trfilt_width
n_out = layer.get_attr('n_filt')
return n_in, n_out

if 'Conv2DTranspose' in layer.class_name:
trfilt_width = (layer.get_attr('filt_width') + layer.get_attr('stride_width') - 1) \
// layer.get_attr('stride_width')
trfilt_height = (layer.get_attr('filt_height') + layer.get_attr('stride_height') - 1) \
// layer.get_attr('stride_height')
n_in = layer.get_attr('n_chan') * trfilt_height * trfilt_width
n_out = layer.get_attr('n_filt')
return n_in, n_out

if 'Conv1D' in layer.class_name:
n_in = layer.get_attr('n_chan') * layer.get_attr('filt_width')
n_out = layer.get_attr('n_filt')
Expand Down Expand Up @@ -476,7 +492,67 @@ def generate_conv1d_line_buffer_fn(self, layer_idx, n_partitions, in_W, in_C, ke
" ) {{\n"
).format(index=layer_idx)
indent = ' '
for partition_idx, partition in enumerate(np.split(im2col_matrix, n_partitions)):
generated_code += indent * 2 + 'if (partition == {:>3}) {{\n'.format(partition_idx)
for pixel_idx, arr in enumerate(partition):
buffer_stmts = []
for j, v in enumerate(arr):
if v == 0:
val = '0'
else:
val = 'data[{}]'.format(int(v-1))
buffer_stmts.append('buffer[{}][{}] = {:>10};'.format(pixel_idx, j, val))
generated_code += indent * 3 + ' '.join(buffer_stmts) + '\n'
generated_code += '\n' + indent * 2 + '}\n'

generated_code += indent + '}\n'
generated_code += '};\n'

return generated_code

def _compute_conv1d_tr_im2col(self, input_shape, out_w, kernel=3, stride=1):
W, C = input_shape

tr_kernel = (kernel+stride-1)//stride

input_img = np.arange(1, W * C + 1)
im_matrix = np.zeros((tr_kernel * C * out_w, ))

index = 0
for i_ow in range(out_w):
for i_kw in range(tr_kernel):
for i_c in range(C):
# input column is just the output column shifted
input_col = i_ow - (tr_kernel-1) + i_kw
if (input_col >= 0 and input_col < W):
im_matrix[index] = input_img[input_col * C + i_c]
else:
im_matrix[index] = 0
index += 1
im_matrix = im_matrix.reshape(out_w, -1)
return im_matrix


def generate_conv1d_tr_line_buffer_fn(self, layer_idx, n_partitions, in_W, in_C, out_W, kernel=3, stride=1):

im2col_matrix = self._compute_conv1d_tr_im2col(
(in_W, in_C),
out_W,
kernel,
stride,
)

generated_code = (
"template<class data_T, typename CONFIG_T>\n"
"class fill_buffer_{index} : public FillConv1DBuffer<data_T, CONFIG_T> {{\n"
" public:\n"
" static void fill_buffer(\n"
" data_T data[CONFIG_T::in_width * CONFIG_T::n_chan],\n"
" data_T buffer[CONFIG_T::n_pixels][CONFIG_T::trfilt_width * CONFIG_T::n_chan],\n"
" const unsigned partition\n"
" ) {{\n"
).format(index=layer_idx)
indent = ' '
for partition_idx, partition in enumerate(np.split(im2col_matrix, n_partitions)):
generated_code += indent * 2 + 'if (partition == {:>3}) {{\n'.format(partition_idx)
for pixel_idx, arr in enumerate(partition):
Expand Down Expand Up @@ -622,6 +698,91 @@ def generate_conv2d_line_buffer_fn(self, layer_idx, n_partitions, in_H, in_W, in

return generated_code

def _compute_conv2d_tr_im2col(self, input_shape, out_shape, kernel=(3, 3), stride=(1, 1)):
H, W, C = input_shape
kernel_h, kernel_w = kernel
stride_h, stride_w = stride
out_h, out_w = out_shape

tr_kernel_h = (kernel_h+stride_h-1)//stride_h
tr_kernel_w = (kernel_w+stride_w-1)//stride_w

input_img = np.arange(1, H * W * C + 1)
im_matrix = np.zeros((tr_kernel_h * tr_kernel_w * C * out_h * out_w, ))

index = 0
for i_oh in range(out_h):
for i_ow in range(out_w):
for i_kh in range(tr_kernel_h):
input_row = i_oh - (tr_kernel_h-1) + i_kh
for i_kw in range(tr_kernel_w):
for i_c in range(C):
if (input_row < 0 or input_row >= H):
im_matrix[index] = 0
else:
input_col = i_ow - (tr_kernel_w-1) + i_kw
if (input_col >= 0 and input_col < W):
im_matrix[index] = input_img[input_row * W * C + input_col * C + i_c]
else:
im_matrix[index] = 0
index += 1

im_matrix = im_matrix.reshape(out_h * out_w, -1)
return im_matrix


def generate_conv2d_tr_line_buffer_fn(self, layer_idx, n_partitions, in_H, in_W, in_C, out_H, out_W, kernel=(3, 3), stride=(1, 1)):
if isinstance(kernel, Iterable):
kernel_height = kernel[0]
kernel_width = kernel[1]
else:
kernel_height = kernel
kernel_width = kernel

if isinstance(stride, Iterable):
stride_height = stride[0]
stride_width = stride[1]
else:
stride_height = stride
stride_width = stride

im2col_matrix = self._compute_conv2d_tr_im2col(
(in_H, in_W, in_C),
(out_W, out_W),
(kernel_height, kernel_width),
(stride_height, stride_width),
)

generated_code = (
"template<class data_T, typename CONFIG_T>\n"
"class fill_buffer_{index} : public FillConv2DBuffer<data_T, CONFIG_T> {{\n"
" public:\n"
" static void fill_buffer(\n"
" data_T data[CONFIG_T::in_height * CONFIG_T::in_width * CONFIG_T::n_chan],\n"
" data_T buffer[CONFIG_T::n_pixels][CONFIG_T::trfilt_height * CONFIG_T::trfilt_width * CONFIG_T::n_chan],\n"
" const unsigned partition\n"
" ) {{\n"
).format(index=layer_idx)
indent = ' '

for partition_idx, partition in enumerate(np.split(im2col_matrix, n_partitions)):
generated_code += indent * 2 + 'if (partition == {:>3}) {{\n'.format(partition_idx)
for pixel_idx, arr in enumerate(partition):
buffer_stmts = []
for j, v in enumerate(arr):
if v == 0:
val = '0'
else:
val = 'data[{}]'.format(int(v-1))
buffer_stmts.append('buffer[{}][{}] = {:>10};'.format(pixel_idx, j, val))
generated_code += indent * 3 + ' '.join(buffer_stmts) + '\n'
generated_code += '\n' + indent * 2 + '}\n'

generated_code += indent + '}\n'
generated_code += '};\n'

return generated_code

@model_optimizer()
def write_hls(self, model):
self.writer.write_hls(model)
Expand Down
9 changes: 9 additions & 0 deletions hls4ml/backends/fpga/fpga_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -326,6 +326,15 @@ def __init__(self, type_converter):

class StaticWeightVariableDefinition(VariableDefinition):
def definition_cpp(self, name_suffix='', as_reference=False):
if self.keep_dims > 0:
size_str = ''
for dim in range(self.keep_dims):
size_str += '[{cur_dim}]'.format(cur_dim=self.shape[dim])
final_dim = 1
for dim in range(self.keep_dims, len(self.shape)):
final_dim *= self.shape[dim]
size_str += '[{last_dim}]'.format(last_dim=final_dim)
return '{type} {name}{sizes}'.format(type=self.type.name, name=self.name, sizes=size_str)
return '{type} {name}[{size}]'.format(type=self.type.name, name=self.name, size=self.data_length)

class StaticWeightVariableConverter(object):
Expand Down
38 changes: 35 additions & 3 deletions hls4ml/backends/fpga/passes/codegen.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,21 @@
from hls4ml.model.optimizer import OptimizerPass
from hls4ml.model.layers import Conv1D, Conv2D
from hls4ml.model.layers import Conv1D, Conv2D, Conv1DTranspose, Conv2DTranspose
from hls4ml.model.types import Source

class GenerateConvIm2col(OptimizerPass):
''' Generates tcode for im2col step of 1D/2d convolution '''
def match(self, node):
return isinstance(node, (Conv1D, Conv2D)) and \
return isinstance(node, (Conv1D, Conv2D, Conv1DTranspose, Conv2DTranspose)) and \
node.model.config.get_config_value('IOType') == 'io_parallel'

def transform(self, model, node):
node_class = node.__class__.__name__
if '1D' in node_class:
if '1DTranspose' in node_class:
self._generate_im2col_1d_transpose(node)
elif '1D' in node_class:
self._generate_im2col_1d(node)
elif '2DTranspose' in node_class:
self._generate_im2col_2d_transpose(node)
elif '2D' in node_class:
self._generate_im2col_2d(node)
else:
Expand All @@ -30,6 +34,19 @@ def _generate_im2col_1d(self, node):

node.set_attr('line_buffer_codegen', Source(code_str))

def _generate_im2col_1d_transpose(self, node):
code_str = node.model.config.backend.generate_conv1d_tr_line_buffer_fn(
node.get_attr('index'),
node.get_attr('n_partitions'),
node.get_input_variable().shape[0],
node.get_input_variable().shape[1],
node.get_attr('proc_width'),
kernel=node.get_attr('filt_width'),
stride=node.get_attr('stride_width'),
)

node.set_attr('line_buffer_codegen', Source(code_str))

def _generate_im2col_2d(self, node):
code_str = node.model.config.backend.generate_conv2d_line_buffer_fn(
node.get_attr('index'),
Expand All @@ -43,3 +60,18 @@ def _generate_im2col_2d(self, node):
)

node.set_attr('line_buffer_codegen', Source(code_str))

def _generate_im2col_2d_transpose(self, node):
code_str = node.model.config.backend.generate_conv2d_tr_line_buffer_fn(
node.get_attr('index'),
node.get_attr('n_partitions'),
node.get_input_variable().shape[0],
node.get_input_variable().shape[1],
node.get_input_variable().shape[2],
node.get_attr('proc_height'),
node.get_attr('proc_width'),
kernel=(node.get_attr('filt_height'), node.get_attr('filt_width')),
stride=(node.get_attr('stride_height'), node.get_attr('stride_width')),
)

node.set_attr('line_buffer_codegen', Source(code_str))
112 changes: 111 additions & 1 deletion hls4ml/backends/vivado/passes/conv_same_pad.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from hls4ml.model.optimizer import OptimizerPass
from hls4ml.model.layers import Conv1D, SeparableConv1D, Conv2D, SeparableConv2D
from hls4ml.model.layers import Conv1D, SeparableConv1D, Conv2D, SeparableConv2D, Conv1DTranspose, Conv2DTranspose

class InsertZeroPaddingBeforeConv1D(OptimizerPass):
name = 'insert_zero_padding_before_conv1d'
Expand Down Expand Up @@ -46,6 +46,53 @@ def transform(self, model, node):

return True

class InsertZeroPaddingBeforeConv1DTranspose(OptimizerPass):
name = 'insert_zero_padding_before_conv1dtranspose'

def match(self, node):
is_match = isinstance(node, (Conv1DTranspose)) and \
node.get_attr('padding') == 'same' and \
node.get_attr('filt_width') != 1
return is_match

def transform(self, model, node):
if model.config.get_config_value('IOType') != 'io_stream':
return False

# Get the padding parameters from Conv1D layer
pad_left = node.get_attr('pad_left')
pad_right = node.get_attr('pad_right')
convtr_out_width = node.get_attr('out_width')
in_width = node.get_attr('in_width')
stride_width = node.get_attr('stride_width')
trfilt_width = (node.get_attr('filt_width') + node.get_attr('stride_width') - 1) \
// node.get_attr('stride_width')

add_right = (convtr_out_width + pad_left)//stride_width - (in_width-1)

out_width = in_width + add_right + trfilt_width-1

attrs = {
'pad_left': trfilt_width-1,
'pad_right': add_right,
'in_width': in_width,
'out_width': out_width,
'n_chan': node.get_attr('n_chan'),
'data_format': node.get_attr('data_format', 'channels_last')
}

# Switch Conv1DTranspose to be 'valid'. I think this is wrong
node.set_attr('padding', 'valid')
node.set_attr('in_width', out_width)
node.set_attr('pad_left', pad_left + (trfilt_width-1)*stride_width)

# Insert new ZeroPadding1D node above Conv1DTranspose
padding_layer = model.make_node('ZeroPadding1D', 'zp1d_' + node.name, attrs, node.inputs.copy())
padding_layer.get_output_variable().type.precision = node.get_input_variable().type.precision
model.insert_node(padding_layer)

return True

class InsertZeroPaddingBeforeConv2D(OptimizerPass):
name = 'insert_zero_padding_before_conv2d'

Expand Down Expand Up @@ -100,3 +147,66 @@ def transform(self, model, node):
model.insert_node(padding_layer, before=node)

return True

class InsertZeroPaddingBeforeConv2DTranspose(OptimizerPass):
name = 'insert_zero_padding_before_conv2dtranspose'

def match(self, node):
is_match = isinstance(node, Conv2DTranspose) and \
node.get_attr('padding') == 'same' and \
node.get_attr('filt_width') != 1
return is_match

def transform(self, model, node):
if model.config.get_config_value('IOType') != 'io_stream':
return False

# Get the padding parameters from Conv2DTranspose layer
pad_left = node.get_attr('pad_left')
pad_right = node.get_attr('pad_right')
pad_top = node.get_attr('pad_top')
pad_bottom = node.get_attr('pad_bottom')
convtr_out_width = node.get_attr('out_width')
convtr_out_height = node.get_attr('out_height')
in_width = node.get_attr('in_width')
in_height = node.get_attr('in_height')
stride_width = node.get_attr('stride_width')
stride_height = node.get_attr('stride_height')
trfilt_width = (node.get_attr('filt_width') + node.get_attr('stride_width') - 1) \
// node.get_attr('stride_width')
trfilt_height = (node.get_attr('filt_height') + node.get_attr('stride_height') - 1) \
// node.get_attr('stride_height')

add_right = (convtr_out_width + pad_left)//stride_width-(in_width-1)
add_bottom = (convtr_out_height + pad_top)//stride_height-(in_height-1)

out_width = in_width + add_right + trfilt_width-1
out_height = in_height + add_bottom + trfilt_height-1

attrs = {
'pad_left': trfilt_width-1,
'pad_right': add_right,
'pad_top': trfilt_height-1,
'pad_bottom': add_bottom,
'in_width': in_width,
'in_height': in_height,
'out_width': out_width,
'out_height': out_height,
'n_chan': node.get_attr('n_chan'),
'data_format': node.get_attr('data_format', 'channels_last')
}

# switch Conv2DTranspose to be 'valid'. This is technically not true though
node.set_attr('padding', 'valid')
node.set_attr('in_width', out_width)
node.set_attr('in_height', out_height)
node.set_attr('pad_left', pad_left + (trfilt_width-1)*stride_width)
node.set_attr('pad_top', pad_top + (trfilt_height-1)*stride_height)

# insert new ZeroPadding2D ndoe above Conv2DTranspose
padding_layer = model.make_node('ZeroPadding2D', 'zp2d_' + node.name, attrs, node.inputs.copy())
padding_layer.get_output_variable().type.precision = node.get_input_variable().type.precision
model.insert_node(padding_layer, before=node)

return True

Loading

0 comments on commit 5175e1a

Please sign in to comment.