Skip to content

Commit

Permalink
Merge pull request fastmachinelearning#629 from vloncar/vitis_port
Browse files Browse the repository at this point in the history
Vitis HLS backend
  • Loading branch information
jmitrevs authored Mar 31, 2023
2 parents c947ec9 + 5be3146 commit 0da2d8e
Show file tree
Hide file tree
Showing 72 changed files with 1,923 additions and 346 deletions.
2 changes: 2 additions & 0 deletions hls4ml/backends/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,10 @@
from hls4ml.backends.vivado.vivado_backend import VivadoBackend
from hls4ml.backends.vivado_accelerator.vivado_accelerator_backend import VivadoAcceleratorBackend
from hls4ml.backends.vivado_accelerator.vivado_accelerator_config import VivadoAcceleratorConfig
from hls4ml.backends.vitis.vitis_backend import VitisBackend
from hls4ml.backends.quartus.quartus_backend import QuartusBackend

register_backend('Vivado', VivadoBackend)
register_backend('VivadoAccelerator', VivadoAcceleratorBackend)
register_backend('Vitis', VitisBackend)
register_backend('Quartus', QuartusBackend)
Empty file.
Empty file.
28 changes: 28 additions & 0 deletions hls4ml/backends/vitis/passes/feature_check.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
from hls4ml.model.optimizer import OptimizerPass


class ValidateConvImplementation(OptimizerPass):

def match(self, node):
return 'Conv' in node.class_name

def transform(self, model, node):
if node.get_attr('implementation', 'linebuffer') == 'encoded':
print(f'WARNING: "Encoded" implementation in "{node.name}" ({node.class_name}) is not supported in Vitis backend. Switching to "LineBuffer" implementation.')
node.set_attr('implementation', 'linebuffer')


class ValidateStrategy(OptimizerPass):
_resource_layer_cls = ['Conv1D', 'Conv2D', 'Dense']

def match(self, node):
is_resource_layer = len([layer_cls for layer_cls in self._resource_layer_cls if layer_cls in node.class_name]) > 0
is_resource_strategy = node.model.config.is_resource_strategy(node)

return is_resource_layer and is_resource_strategy

def transform(self, model, node):
n_in, _ = model.config.backend.get_layer_mult_size(node)
rf = node.get_attr('reuse_factor')
if rf > n_in and rf % n_in > 0:
print(f'WARNING: "Resource" strategy in "{node.name}" ({node.class_name}) may have suboptimal QoR in Vitis backend due to use of "urem" cores. Consider using a different ReuseFactor or switching to "Latency" strategy.')
46 changes: 46 additions & 0 deletions hls4ml/backends/vitis/vitis_backend.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
import os
import sys

from hls4ml.backends import VivadoBackend
from hls4ml.model.flow import register_flow, get_flow
from hls4ml.report import parse_vivado_report


class VitisBackend(VivadoBackend):
def __init__(self):
super(VivadoBackend, self).__init__(name='Vitis')
self._register_layer_attributes()
self._register_flows()

def _register_flows(self):
validation_passes = [
'vitis:validate_conv_implementation',
'vitis:validate_strategy',
]
validation_flow = register_flow('validation', validation_passes, requires=['vivado:init_layers'], backend=self.name)

# Any potential templates registered specifically for Vitis backend
template_flow = register_flow('apply_templates', self._get_layer_templates, requires=['vivado:init_layers'], backend=self.name)

writer_passes = ['make_stamp', 'vitis:write_hls']
self._writer_flow = register_flow('write', writer_passes, requires=['vitis:ip'], backend=self.name)

ip_flow_requirements = get_flow('vivado:ip').requires.copy()
ip_flow_requirements.insert(ip_flow_requirements.index('vivado:init_layers'), validation_flow)
ip_flow_requirements.insert(ip_flow_requirements.index('vivado:apply_templates'), template_flow)

self._default_flow = register_flow('ip', None, requires=ip_flow_requirements, backend=self.name)

def build(self, model, reset=False, csim=True, synth=True, cosim=False, validation=False, export=False, vsynth=False):
if 'linux' in sys.platform:
found = os.system('command -v vitis_hls > /dev/null')
if found != 0:
raise Exception('Vitis HLS installation not found. Make sure "vitis_hls" is on PATH.')

curr_dir = os.getcwd()
os.chdir(model.config.get_output_dir())
os.system('vitis_hls -f build_prj.tcl "reset={reset} csim={csim} synth={synth} cosim={cosim} validation={validation} export={export} vsynth={vsynth}"'
.format(reset=reset, csim=csim, synth=synth, cosim=cosim, validation=validation, export=export, vsynth=vsynth))
os.chdir(curr_dir)

return parse_vivado_report(model.config.get_output_dir())
6 changes: 6 additions & 0 deletions hls4ml/backends/vivado/passes/convolution_templates.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
static const unsigned n_out = {n_out};
static const unsigned reuse_factor = {reuse};
static const unsigned strategy = nnet::{strategy};
static const unsigned n_zeros = 0;
static const unsigned multiplier_limit = DIV_ROUNDUP(n_in * n_out, reuse_factor) - n_zeros / reuse_factor;
typedef {accum_t.name} accum_t;
typedef {bias_t.name} bias_t;
typedef {weight_t.name} weight_t;
Expand Down Expand Up @@ -123,6 +125,7 @@ def format(self, node):
static const unsigned out_width = {out_width};
static const unsigned reuse_factor = {reuse};
static const unsigned n_zeros = {nzeros};
static const unsigned multiplier_limit = DIV_ROUNDUP(kernel_size * n_chan * n_filt, reuse_factor) - n_zeros / reuse_factor;
static const bool store_weights_in_bram = false;
static const unsigned strategy = nnet::{strategy};
static const nnet::conv_implementation implementation = nnet::conv_implementation::{implementation};
Expand Down Expand Up @@ -363,6 +366,9 @@ def format(self, node):

# Depthwise config
params = self._default_config_params(node)
# Override bias and bias_t since these are zeros in depthwise step of SepConv2D
params['bias'] = params['zero_bias']
params['bias_t'] = params['zero_bias_t']
params['n_filt'] = params['n_chan'] # In depthwise step n_chan == n_filt
params['dilation'] = node.get_attr('dilation', 1)
params['nzeros'] = node.get_weights('depthwise').nzeros
Expand Down
2 changes: 2 additions & 0 deletions hls4ml/backends/vivado/passes/core_templates.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
static const unsigned reuse_factor = {reuse};
static const unsigned n_zeros = {nzeros};
static const unsigned n_nonzeros = {nonzeros};
static const unsigned multiplier_limit = DIV_ROUNDUP(n_in * n_out, reuse_factor) - n_zeros / reuse_factor;
static const bool store_weights_in_bram = false;
typedef {accum_t.name} accum_t;
typedef {bias_t.name} bias_t;
Expand Down Expand Up @@ -63,6 +64,7 @@ def format(self, node):
static const unsigned n_scale_bias = (n_filt == -1) ? n_in : n_filt;
static const unsigned io_type = nnet::{iotype};
static const unsigned reuse_factor = {reuse};
static const unsigned multiplier_limit = DIV_ROUNDUP(n_in, reuse_factor);
static const bool store_weights_in_bram = false;
typedef {bias_t.name} bias_t;
typedef {scale_t.name} scale_t;
Expand Down
1 change: 1 addition & 0 deletions hls4ml/backends/vivado/passes/merge_templates.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ def format(self, node):
static const unsigned n_in = {n_in};
static const unsigned n_out = {n_out};
static const unsigned reuse_factor = {reuse};
static const unsigned multiplier_limit = DIV_ROUNDUP(n_in, reuse_factor);
typedef {accum_t.name} accum_t;
template<class x_T, class y_T>
using product = nnet::product::{product_type}<x_T, y_T>;
Expand Down
1 change: 1 addition & 0 deletions hls4ml/backends/vivado/passes/recurrent_templates.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
static const unsigned reuse_factor = {reuse};
static const unsigned n_zeros = {nzeros};
static const unsigned n_nonzeros = {nonzeros};
static const unsigned multiplier_limit = DIV_ROUNDUP(n_in * n_out, reuse_factor) - n_zeros / reuse_factor;
static const bool store_weights_in_bram = false;
typedef {accum_t.name} accum_t;
typedef {bias_t.name} bias_t;
Expand Down
27 changes: 19 additions & 8 deletions hls4ml/report/vivado_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,15 +56,21 @@ def _find_solutions(sln_dir):
solutions = []

if os.path.isfile(sln_dir + '/vivado_hls.app'):
with open(sln_dir + '/vivado_hls.app') as f:
# Get rid of namespaces (workaround to support two types of vivado_hls.app files)
xmlstring = re.sub(' xmlns="[^"]+"', '', f.read(), count=1)
sln_file = 'vivado_hls.app'
elif os.path.isfile(sln_dir + '/hls.app'):
sln_file = 'hls.app'
else:
return solutions

with open(sln_dir + '/' + sln_file) as f:
# Get rid of namespaces (workaround to support two types of vivado_hls.app files)
xmlstring = re.sub(' xmlns="[^"]+"', '', f.read(), count=1)

root = ET.fromstring(xmlstring)
for sln_tag in root.findall('solutions/solution'):
sln_name = sln_tag.get('name')
if sln_name is not None and os.path.isdir(sln_dir + '/' + sln_name):
solutions.append(sln_name)
root = ET.fromstring(xmlstring)
for sln_tag in root.findall('solutions/solution'):
sln_name = sln_tag.get('name')
if sln_name is not None and os.path.isdir(sln_dir + '/' + sln_name):
solutions.append(sln_name)

return solutions

Expand Down Expand Up @@ -172,8 +178,13 @@ def parse_vivado_report(hls_dir):
# Area
area_node = root.find('./AreaEstimates')
for child in area_node.find('./Resources'):
# DSPs are called 'DSP48E' in Vivado and just 'DSP' in Vitis. Overriding here to have consistent keys
if child.tag == 'DSP48E':
child.tag = 'DSP'
c_synth_report[child.tag] = child.text
for child in area_node.find('./AvailableResources'):
if child.tag == 'DSP48E':
child.tag = 'DSP'
c_synth_report['Available' + child.tag] = child.text
report['CSynthesisReport'] = c_synth_report
else:
Expand Down
102 changes: 102 additions & 0 deletions hls4ml/templates/vitis/nnet_utils/nnet_conv1d_resource.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
#ifndef NNET_CONV1D_RESOURCE_H_
#define NNET_CONV1D_RESOURCE_H_

#include "nnet_common.h"
#include "nnet_dense.h"

namespace nnet {

template<class data_T, class res_T, typename CONFIG_T>
void conv_1d_resource_cl(
data_T data[CONFIG_T::in_width * CONFIG_T::n_chan],
res_T res[CONFIG_T::out_width * CONFIG_T::n_filt],
typename CONFIG_T::weight_t weights[CONFIG_T::filt_width * CONFIG_T::n_chan * CONFIG_T::n_filt],
typename CONFIG_T::bias_t biases[CONFIG_T::n_filt])
{
constexpr unsigned mult_n_in = CONFIG_T::filt_width * CONFIG_T::n_chan;
constexpr unsigned mult_n_out = CONFIG_T::n_filt;
constexpr unsigned block_factor = DIV_ROUNDUP(mult_n_in * mult_n_out, CONFIG_T::reuse_factor);
constexpr unsigned multscale = block_factor / mult_n_out;

assert((block_factor % mult_n_out == 0 || CONFIG_T::reuse_factor >= mult_n_in) && "The current Reuse Factor is not allowed");
assert((CONFIG_T::reuse_factor <= CONFIG_T::filt_width * CONFIG_T::n_chan) && "This function is correct only for RF <= FILT_WIDTH * N_CHAN");

// Treating weights as 2d is required to make sure Vitis doesn't use urem cores to calculate indices.
// Also, we don't apply ARRAY_RESHAPE pragma as Vitis figures this out on its own.
typename CONFIG_T::weight_t (*weights_2d)[CONFIG_T::reuse_factor] = (typename CONFIG_T::weight_t (*)[CONFIG_T::reuse_factor]) weights;

data_T data_buf[CONFIG_T::n_pixels][mult_n_in];
#pragma HLS ARRAY_PARTITION variable=data_buf complete dim=0

#pragma HLS ARRAY_PARTITION variable=biases complete

typename CONFIG_T::accum_t acc[CONFIG_T::n_pixels][mult_n_out];
#pragma HLS ARRAY_PARTITION variable=acc complete dim=0

PartitionLoop:
for (unsigned i_part = 0; i_part < CONFIG_T::n_partitions; i_part++) {
//#pragma HLS UNROLL // We don't want this loop unrolled

CONFIG_T::template fill_buffer<data_T, CONFIG_T>::fill_buffer(data, data_buf, i_part);

PixelInitAccumLoop:
for (unsigned i_pxl = 0; i_pxl < CONFIG_T::n_pixels; i_pxl++) {
#pragma HLS UNROLL

InitAccumLoop:
for (unsigned i_acc = 0; i_acc < mult_n_out; i_acc++) {
#pragma HLS UNROLL
acc[i_pxl][i_acc] = (typename CONFIG_T::accum_t) biases[i_acc];
}
}

ReuseLoop:
for (unsigned i_rf = 0; i_rf < CONFIG_T::reuse_factor; i_rf++) {
#pragma HLS PIPELINE II=1 rewind

unsigned i_in = i_rf;
unsigned i_out = 0;
unsigned i_acc = 0;

MultLoop:
for (unsigned i_blk = 0; i_blk < block_factor; i_blk++) {
#pragma HLS UNROLL

PixelMultLoop:
for (unsigned i_pxl = 0; i_pxl < CONFIG_T::n_pixels; i_pxl++) {
#pragma HLS UNROLL

acc[i_pxl][i_out] += static_cast<typename CONFIG_T::accum_t>(
CONFIG_T::mult_config::template product<data_T, typename CONFIG_T::mult_config::weight_t>::product(data_buf[i_pxl][i_in], weights_2d[i_blk][i_rf]));
}

// Increment i_in
i_in += CONFIG_T::reuse_factor;
if (i_in >= mult_n_in) {
i_in = i_rf;
}
// Increment i_out
if (i_acc + 1 >= multscale) {
i_acc = 0;
i_out++;
} else {
i_acc++;
}
}
}

PixelResultLoop:
for (unsigned i_pxl = 0; i_pxl < CONFIG_T::n_pixels; i_pxl++) {
#pragma HLS UNROLL
// Cast to "res_t" type
ResultLoop:
for (unsigned i_res = 0; i_res < mult_n_out; i_res++) {
#pragma HLS UNROLL
*(res++) = cast<data_T, res_T, typename CONFIG_T::mult_config>(acc[i_pxl][i_res]);
}
}
}
}

}
#endif
36 changes: 36 additions & 0 deletions hls4ml/templates/vitis/nnet_utils/nnet_conv1d_stream.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
#ifndef NNET_CONV1D_STREAM_H_
#define NNET_CONV1D_STREAM_H_

#include "nnet_common.h"
#include "nnet_conv_stream.h"
#include "hls_stream.h"

namespace nnet {

template<class data_T, class res_T, typename CONFIG_T>
void conv_1d_cl(
hls::stream<data_T> &data,
hls::stream<res_T> &res,
typename CONFIG_T::weight_t weights[CONFIG_T::filt_width * CONFIG_T::n_chan * CONFIG_T::n_filt],
typename CONFIG_T::bias_t biases[CONFIG_T::n_filt])
{
assert(CONFIG_T::implementation == conv_implementation::linebuffer && "Only \"linebuffer\" implementation is supported in Vitis HLS.");

assert(CONFIG_T::pad_left == 0 && CONFIG_T::pad_right == 0);

if (CONFIG_T::strategy == nnet::latency) {
ReadInputWidth: for (unsigned i_iw = 0; i_iw < CONFIG_T::in_width; i_iw++) {
#pragma HLS PIPELINE II=CONFIG_T::reuse_factor
compute_output_buffer_1d<data_T, res_T, CONFIG_T>(data.read(), res, weights, biases);
}
} else {
ReadInputWidthSerial: for (unsigned i_iw = 0; i_iw < CONFIG_T::in_width; i_iw++) {
compute_output_buffer_1d<data_T, res_T, CONFIG_T>(data.read(), res, weights, biases);
}
}

}


}
#endif
Loading

0 comments on commit 0da2d8e

Please sign in to comment.