Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[IE Python Speech Sample] Add --scale_factor and --performance_counter options #6663

Merged
merged 9 commits into from
Sep 3, 2021
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,8 @@ Usage message:
usage: speech_sample.py [-h] (-m MODEL | -rg IMPORT_GNA_MODEL) -i INPUT
[-o OUTPUT] [-r REFERENCE] [-d DEVICE]
[-bs BATCH_SIZE] [-qb QUANTIZATION_BITS]
[-wg EXPORT_GNA_MODEL] [-iname INPUT_LAYERS]
[-sf SCALE_FACTOR] [-wg EXPORT_GNA_MODEL] [-pc]
[-a {CORE,ATOM}] [-iname INPUT_LAYERS]
[-oname OUTPUT_LAYERS]

optional arguments:
Expand All @@ -94,9 +95,10 @@ optional arguments:
Options:
-h, --help Show this help message and exit.
-i INPUT, --input INPUT
Required. Path to an input file (.ark or .npz).
Required. Path to an input file (.ark or .npz).
-o OUTPUT, --output OUTPUT
Optional. Output file name to save inference results (.ark or .npz).
Optional. Output file name to save inference results
(.ark or .npz).
-r REFERENCE, --reference REFERENCE
Optional. Read reference score file and compare
scores.
Expand All @@ -113,9 +115,18 @@ Options:
-qb QUANTIZATION_BITS, --quantization_bits QUANTIZATION_BITS
Optional. Weight bits for quantization: 8 or 16
(default 16).
-sf SCALE_FACTOR, --scale_factor SCALE_FACTOR
Optional. User-specified input scale factor for
generalova-kate marked this conversation as resolved.
Show resolved Hide resolved
quantization.
-wg EXPORT_GNA_MODEL, --export_gna_model EXPORT_GNA_MODEL
Optional. Write GNA model to file using path/filename
provided.
-pc, --performance_counter
Optional. Enables performance report (specify -a to
ensure arch accurate results).
-a {CORE,ATOM}, --arch {CORE,ATOM}
Optional. Specify a architecture. CORE, ATOM with
generalova-kate marked this conversation as resolved.
Show resolved Hide resolved
combination of -pc.
-iname INPUT_LAYERS, --input_layers INPUT_LAYERS
Optional. Layer names for input blobs. The names are
separated with ",". Allows to change the order of
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,16 @@ def parse_args() -> argparse.Namespace:
args.add_argument('-bs', '--batch_size', default=1, type=int, help='Optional. Batch size 1-8 (default 1).')
args.add_argument('-qb', '--quantization_bits', default=16, type=int,
help='Optional. Weight bits for quantization: 8 or 16 (default 16).')
args.add_argument('-sf', '--scale_factor', type=float,
help='Optional. User-specified input scale factor for quantization.')
generalova-kate marked this conversation as resolved.
Show resolved Hide resolved
args.add_argument('-wg', '--export_gna_model', type=str,
help='Optional. Write GNA model to file using path/filename provided.')
args.add_argument('-we', '--export_embedded_gna_model', type=str, help=argparse.SUPPRESS)
args.add_argument('-we_gen', '--embedded_gna_configuration', default='GNA1', type=str, help=argparse.SUPPRESS)
args.add_argument('-pc', '--performance_counter', action='store_true',
help='Optional. Enables performance report (specify -a to ensure arch accurate results).')
args.add_argument('-a', '--arch', default='CORE', type=str.upper, choices=['CORE', 'ATOM'],
help='Optional. Specify a architecture. CORE, ATOM with combination of -pc.')
generalova-kate marked this conversation as resolved.
Show resolved Hide resolved
args.add_argument('-iname', '--input_layers', type=str,
help='Optional. Layer names for input blobs. The names are separated with ",". '
'Allows to change the order of input layers for -i flag. Example: Input1,Input2')
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,10 @@
from file_options import read_utterance_file, write_utterance_file
from openvino.inference_engine import ExecutableNetwork, IECore

# Operating Frequency for GNA HW devices for Core and Atom architecture
GNA_CORE_FREQUENCY = 400
GNA_ATOM_FREQUENCY = 200


def get_scale_factor(matrix: np.ndarray) -> float:
"""Get scale factor for quantization using utterance matrix"""
Expand Down Expand Up @@ -131,21 +135,26 @@ def main():
plugin_config['GNA_DEVICE_MODE'] = gna_device_mode
plugin_config['GNA_PRECISION'] = f'I{args.quantization_bits}'

# Get a GNA scale factor
# Set a GNA scale factor
if args.import_gna_model:
log.info(f'Using scale factor from the imported GNA model: {args.import_gna_model}')
elif args.scale_factor:
log.info(f'Using scale factor of {args.scale_factor:.7f} specified by user.')
plugin_config['GNA_SCALE_FACTOR'] = str(args.scale_factor)
else:
utterances = read_utterance_file(args.input.split(',')[0])
key = sorted(utterances)[0]
scale_factor = get_scale_factor(utterances[key])
log.info(f'Using scale factor of {scale_factor:.7f} calculated from first utterance.')

plugin_config['GNA_SCALE_FACTOR'] = str(scale_factor)

if args.export_embedded_gna_model:
plugin_config['GNA_FIRMWARE_MODEL_IMAGE'] = args.export_embedded_gna_model
plugin_config['GNA_FIRMWARE_MODEL_IMAGE_GENERATION'] = args.embedded_gna_configuration

if args.performance_counter:
plugin_config['PERF_COUNT'] = 'YES'

device_str = f'HETERO:{",".join(devices)}' if 'HETERO' in args.device else devices[0]

log.info('Loading the model to the plugin')
Expand Down Expand Up @@ -208,6 +217,7 @@ def main():
log.info('Starting inference in synchronous mode')
results = {blob_name: {} for blob_name in output_blobs}
infer_times = []
perf_counters = []

for key in sorted(input_data):
start_infer_time = default_timer()
Expand All @@ -223,6 +233,7 @@ def main():
results[blob_name][key] = result[blob_name]

infer_times.append(default_timer() - start_infer_time)
perf_counters.append(exec_net.requests[0].get_perf_counts())

# ---------------------------Step 8. Process output--------------------------------------------------------------------
for blob_name in output_blobs:
Expand All @@ -235,6 +246,26 @@ def main():
if args.reference:
compare_with_reference(results[blob_name][key], references[blob_name][key])

if args.performance_counter:
if 'GNA' in args.device:
pc = perf_counters[i]
total_cycles = int(pc['1.1 Total scoring time in HW']['real_time'])
stall_cycles = int(pc['1.2 Stall scoring time in HW']['real_time'])
active_cycles = total_cycles - stall_cycles
frequency = 10**6
if args.arch == 'CORE':
frequency *= GNA_CORE_FREQUENCY
else:
frequency *= GNA_ATOM_FREQUENCY
total_inference_time = total_cycles / frequency
active_time = active_cycles / frequency
stall_time = stall_cycles / frequency
log.info('')
log.info('Performance Statistics of GNA Hardware')
log.info(f' Total Inference Time: {(total_inference_time * 1000):.4f} ms')
log.info(f' Active Time: {(active_time * 1000):.4f} ms')
log.info(f' Stall Time: {(stall_time * 1000):.4f} ms')

log.info('')

log.info(f'Total sample time: {sum(infer_times) * 1000:.2f}ms')
Expand Down