From 1bb98fe34926c2acf6d29b8d19eb4fa069f7107d Mon Sep 17 00:00:00 2001 From: joshlerner Date: Sat, 24 Jun 2023 01:28:39 -0700 Subject: [PATCH 1/3] added necessary weight sources and non default precision convert for GarNet --- .gitignore | 1 + hls4ml/backends/vivado/passes/garnet_templates.py | 3 ++- hls4ml/converters/keras/graph.py | 14 +++++++++----- hls4ml/model/layers.py | 5 +---- 4 files changed, 13 insertions(+), 10 deletions(-) diff --git a/.gitignore b/.gitignore index ef4f0a88a..22c8ff685 100644 --- a/.gitignore +++ b/.gitignore @@ -13,3 +13,4 @@ docs/_build docs/autodoc/* hls4mlprj_* *~ +*.ipynb_checkpoints/ diff --git a/hls4ml/backends/vivado/passes/garnet_templates.py b/hls4ml/backends/vivado/passes/garnet_templates.py index 0d42ec941..4b968b0f4 100644 --- a/hls4ml/backends/vivado/passes/garnet_templates.py +++ b/hls4ml/backends/vivado/passes/garnet_templates.py @@ -114,7 +114,8 @@ def format(self, node): params[f'{vname}_t'], type_name = node.model.config.get_precision(node, var=vname) if type_name.endswith('default_t'): params[f'{vname}_t'] = precision_converter.convert(default_precision).definition_cpp() - + else: + params[f'{vname}_t'] = precision_converter.convert(params[f'{vname}_t']).definition_cpp() params['output_t'] = node.get_output_variable().type.name if node.attributes['collapse'] in ['mean', 'max']: diff --git a/hls4ml/converters/keras/graph.py b/hls4ml/converters/keras/graph.py index 8f93239fc..fc21034a2 100644 --- a/hls4ml/converters/keras/graph.py +++ b/hls4ml/converters/keras/graph.py @@ -46,13 +46,17 @@ def parse_garnet_layer(keras_layer, input_names, input_shapes, data_reader): layer['n_sublayers'] = keras_layer['config']['n_sublayers'] layer['n_in_features'] = [input_shapes[0][2]] - for il in range(1, layer['n_sublayers']): - layer['n_in_features'].append(layer['n_out_features'][il - 1]) + for il in range(layer['n_sublayers']): + if il > 0: + layer['n_in_features'].append(layer['n_out_features'][il - 1]) weights_source = [ - f'S{il}_kernel', - f'S{il}_bias', - f'Fout{il}_bias', + f'FLR{il}_kernel', + f'FLR{il}_bias', + f'S{il}_kernel', + f'S{il}_bias', + f'Fout{il}_kernel', + f'Fout{il}_bias', ] for weight in weights_source: layer[weight + '_data'] = get_weights_data(data_reader, layer['name'], weight) diff --git a/hls4ml/model/layers.py b/hls4ml/model/layers.py index c62998672..81109d8cc 100644 --- a/hls4ml/model/layers.py +++ b/hls4ml/model/layers.py @@ -1182,10 +1182,7 @@ def _initialize_transforms(self): def _make_input_transform_weights(self, n_propagate, n_aggregators, n_out_features, quantize=False, sublayer=''): # Due to linearity of the input transform, input weights and biases can be contracted away at conversion time - - output_transform_kernel = self.get_attr( - f'Fout{sublayer}_kernel_data' - ) # [(n_aggregators, n_propagate), n_out_features] + output_transform_kernel = self.get_attr(f'Fout{sublayer}_kernel_data') # [(n_aggregators, n_propagate), n_out_features] output_transform_kernel = output_transform_kernel.reshape((n_aggregators, n_propagate, n_out_features)) if quantize: output_transform_kernel = self.get_attr('quantizer')(output_transform_kernel) From 8ba6a931385b9497336d716caef8edba79a47633 Mon Sep 17 00:00:00 2001 From: joshlerner Date: Fri, 30 Jun 2023 12:23:33 -0700 Subject: [PATCH 2/3] fixed output activation in stacked garnet, added test for stacked garnet (both keras and hls models) and garnet sublayer precision --- contrib/garnet.py | 28 ++++++++++------------ test/pytest/test_garnet.py | 49 +++++++++++++++++++++++++++++++++++++- 2 files changed, 61 insertions(+), 16 deletions(-) diff --git a/contrib/garnet.py b/contrib/garnet.py index 5cd93f1a3..d8808b704 100644 --- a/contrib/garnet.py +++ b/contrib/garnet.py @@ -313,36 +313,34 @@ def _setup_transforms(self, n_aggregators, n_filters, n_propagate): name=('Fout%d' % it), ) - # Check for correctness. This commented out because pre-commit showed it was unused. - - # if self._output_activation is None or self._output_activation == "linear": - # output_activation_transform = (QActivation("quantized_bits(%i, %i)" - # % (self._total_bits, self._int_bits))) - # else: - # output_activation_transform = QActivation( - # "quantized_%s(%i, %i)" % (self._output_activation, self._total_bits, self._int_bits) - # ) + if self._output_activation is None or self._output_activation == "linear": + output_activation_transform = (QActivation("quantized_bits(%i, %i)" + % (self._total_bits, self._int_bits))) + else: + output_activation_transform = QActivation( + "quantized_%s(%i, %i)" % (self._output_activation, self._total_bits, self._int_bits) + ) else: input_feature_transform = NamedDense(p, name=('FLR%d' % it)) output_feature_transform = NamedDense(f, name=('Fout%d' % it)) - # output_activation_transform = keras.layers.Activation(self._output_activation) + output_activation_transform = keras.layers.Activation(self._output_activation) aggregator_distance = NamedDense(a, name=('S%d' % it)) - self._transform_layers.append((input_feature_transform, aggregator_distance, output_feature_transform)) + self._transform_layers.append((input_feature_transform, aggregator_distance, output_feature_transform, output_activation_transform)) self._sublayers = sum((list(layers) for layers in self._transform_layers), []) def _build_transforms(self, data_shape): - for in_transform, d_compute, out_transform in self._transform_layers: + for in_transform, d_compute, out_transform, act_transform in self._transform_layers: in_transform.build(data_shape) d_compute.build(data_shape) if self._simplified: - out_transform.build(data_shape[:2] + (d_compute.units * in_transform.units,)) + act_transform.build(out_transform.build(data_shape[:2] + (d_compute.units * in_transform.units,))) else: - out_transform.build( + act_transform.build(out_transform.build( data_shape[:2] + (data_shape[2] + d_compute.units * in_transform.units + d_compute.units,) - ) + )) data_shape = data_shape[:2] + (out_transform.units,) diff --git a/test/pytest/test_garnet.py b/test/pytest/test_garnet.py index 802505d5d..a24d03a08 100644 --- a/test/pytest/test_garnet.py +++ b/test/pytest/test_garnet.py @@ -1,3 +1,7 @@ +import os +os.environ['CUDA_VISIBLE_DEVICES'] = "1" +os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true' + from pathlib import Path import numpy as np @@ -6,7 +10,7 @@ from tensorflow.keras.models import Model import hls4ml -from contrib.garnet import GarNet +from contrib.garnet import GarNet, GarNetStack test_root_path = Path(__file__).parent @@ -48,6 +52,39 @@ def garnet_models(): hls_model.compile() return model, hls_model +@pytest.fixture(scope='module') +def garnet_stack_models(): + x = Input(shape=(vmax, feat)) + n = Input(shape=(1,), dtype='uint16') + inputs = [x, n] + outputs = GarNetStack( + ([4, 4, 8]), + ([4, 4, 8]), + ([8, 8, 16]), + simplified=True, + collapse='mean', + input_format='xn', + output_activation=None, # added output_activation_transform back in contrib.garnet.py + name='gar_1', + quantize_transforms=None, # this should be false, not None...fix in contrib.garnet.py + )(inputs) + model = Model(inputs=inputs, outputs=outputs) + model.summary() + + config = hls4ml.utils.config_from_keras_model(model, granularity='name') + config['Model'] = {} + config['Model']['ReuseFactor'] = 1 + config['Model']['Strategy'] = 'Latency' + config['Model']['Precision'] = 'ap_fixed<32,6>' + + cfg = hls4ml.converters.create_config(output_dir=str(test_root_path / 'hls4mlprj_garnet'), part='xc7z020clg400-1') + cfg['HLSConfig'] = config + cfg['KerasModel'] = model + + hls_model = hls4ml.converters.keras_to_hls(cfg) + hls_model.compile() + return model, hls_model + @pytest.mark.parametrize('batch', [1, 3]) def test_accuracy(garnet_models, batch): @@ -58,3 +95,13 @@ def test_accuracy(garnet_models, batch): y_hls = hls_model.predict(x_hls).reshape(y.shape) np.testing.assert_allclose(y_hls, y, rtol=0, atol=0.1) + +@pytest.mark.parametrize('batch', [1, 3]) +def test_accuracy_stack(garnet_stack_models, batch): + model, hls_model = garnet_stack_models + x = [np.random.rand(batch, vmax, feat), np.random.randint(0, vmax, size=(batch, 1))] + y = model.predict(x) + x_hls = [x[0], x[1].astype(np.float64)] + y_hls = hls_model.predict(x_hls).reshape(y.shape) + + np.testing.assert_allclose(y_hls, y, rtol=0, atol=0.1) From f07c112cef826bf380558c63c41b9a42cfaf0d50 Mon Sep 17 00:00:00 2001 From: joshlerner Date: Fri, 30 Jun 2023 14:14:58 -0700 Subject: [PATCH 3/3] fixed GarNet loading weights and internal array precisions --- contrib/garnet.py | 15 +++++++++------ hls4ml/converters/keras/graph.py | 12 ++++++------ hls4ml/model/layers.py | 4 +++- test/pytest/test_garnet.py | 14 ++++++-------- 4 files changed, 24 insertions(+), 21 deletions(-) diff --git a/contrib/garnet.py b/contrib/garnet.py index d8808b704..075819e9d 100644 --- a/contrib/garnet.py +++ b/contrib/garnet.py @@ -314,8 +314,7 @@ def _setup_transforms(self, n_aggregators, n_filters, n_propagate): ) if self._output_activation is None or self._output_activation == "linear": - output_activation_transform = (QActivation("quantized_bits(%i, %i)" - % (self._total_bits, self._int_bits))) + output_activation_transform = QActivation("quantized_bits(%i, %i)" % (self._total_bits, self._int_bits)) else: output_activation_transform = QActivation( "quantized_%s(%i, %i)" % (self._output_activation, self._total_bits, self._int_bits) @@ -327,7 +326,9 @@ def _setup_transforms(self, n_aggregators, n_filters, n_propagate): aggregator_distance = NamedDense(a, name=('S%d' % it)) - self._transform_layers.append((input_feature_transform, aggregator_distance, output_feature_transform, output_activation_transform)) + self._transform_layers.append( + (input_feature_transform, aggregator_distance, output_feature_transform, output_activation_transform) + ) self._sublayers = sum((list(layers) for layers in self._transform_layers), []) @@ -338,9 +339,11 @@ def _build_transforms(self, data_shape): if self._simplified: act_transform.build(out_transform.build(data_shape[:2] + (d_compute.units * in_transform.units,))) else: - act_transform.build(out_transform.build( - data_shape[:2] + (data_shape[2] + d_compute.units * in_transform.units + d_compute.units,) - )) + act_transform.build( + out_transform.build( + data_shape[:2] + (data_shape[2] + d_compute.units * in_transform.units + d_compute.units,) + ) + ) data_shape = data_shape[:2] + (out_transform.units,) diff --git a/hls4ml/converters/keras/graph.py b/hls4ml/converters/keras/graph.py index fc21034a2..5c5c2247c 100644 --- a/hls4ml/converters/keras/graph.py +++ b/hls4ml/converters/keras/graph.py @@ -51,12 +51,12 @@ def parse_garnet_layer(keras_layer, input_names, input_shapes, data_reader): layer['n_in_features'].append(layer['n_out_features'][il - 1]) weights_source = [ - f'FLR{il}_kernel', - f'FLR{il}_bias', - f'S{il}_kernel', - f'S{il}_bias', - f'Fout{il}_kernel', - f'Fout{il}_bias', + f'FLR{il}_kernel', + f'FLR{il}_bias', + f'S{il}_kernel', + f'S{il}_bias', + f'Fout{il}_kernel', + f'Fout{il}_bias', ] for weight in weights_source: layer[weight + '_data'] = get_weights_data(data_reader, layer['name'], weight) diff --git a/hls4ml/model/layers.py b/hls4ml/model/layers.py index 81109d8cc..d9da2cc74 100644 --- a/hls4ml/model/layers.py +++ b/hls4ml/model/layers.py @@ -1182,7 +1182,9 @@ def _initialize_transforms(self): def _make_input_transform_weights(self, n_propagate, n_aggregators, n_out_features, quantize=False, sublayer=''): # Due to linearity of the input transform, input weights and biases can be contracted away at conversion time - output_transform_kernel = self.get_attr(f'Fout{sublayer}_kernel_data') # [(n_aggregators, n_propagate), n_out_features] + output_transform_kernel = self.get_attr( + f'Fout{sublayer}_kernel_data' + ) # [(n_aggregators, n_propagate), n_out_features] output_transform_kernel = output_transform_kernel.reshape((n_aggregators, n_propagate, n_out_features)) if quantize: output_transform_kernel = self.get_attr('quantizer')(output_transform_kernel) diff --git a/test/pytest/test_garnet.py b/test/pytest/test_garnet.py index a24d03a08..67ddf7718 100644 --- a/test/pytest/test_garnet.py +++ b/test/pytest/test_garnet.py @@ -1,7 +1,3 @@ -import os -os.environ['CUDA_VISIBLE_DEVICES'] = "1" -os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true' - from pathlib import Path import numpy as np @@ -52,6 +48,7 @@ def garnet_models(): hls_model.compile() return model, hls_model + @pytest.fixture(scope='module') def garnet_stack_models(): x = Input(shape=(vmax, feat)) @@ -64,9 +61,9 @@ def garnet_stack_models(): simplified=True, collapse='mean', input_format='xn', - output_activation=None, # added output_activation_transform back in contrib.garnet.py + output_activation=None, # added output_activation_transform back in contrib.garnet.py name='gar_1', - quantize_transforms=None, # this should be false, not None...fix in contrib.garnet.py + quantize_transforms=None, # this should be false, not None...fix in contrib.garnet.py )(inputs) model = Model(inputs=inputs, outputs=outputs) model.summary() @@ -76,7 +73,7 @@ def garnet_stack_models(): config['Model']['ReuseFactor'] = 1 config['Model']['Strategy'] = 'Latency' config['Model']['Precision'] = 'ap_fixed<32,6>' - + # config should now have precisions specified for ['LayerName']['gar_1']['Precision']['norm', 'aggr', etc.] cfg = hls4ml.converters.create_config(output_dir=str(test_root_path / 'hls4mlprj_garnet'), part='xc7z020clg400-1') cfg['HLSConfig'] = config cfg['KerasModel'] = model @@ -95,7 +92,8 @@ def test_accuracy(garnet_models, batch): y_hls = hls_model.predict(x_hls).reshape(y.shape) np.testing.assert_allclose(y_hls, y, rtol=0, atol=0.1) - + + @pytest.mark.parametrize('batch', [1, 3]) def test_accuracy_stack(garnet_stack_models, batch): model, hls_model = garnet_stack_models