diff --git a/inference-engine/ie_bridges/python/sample/speech_sample/README.md b/inference-engine/ie_bridges/python/sample/speech_sample/README.md index 2f7fd4323aa61d..54403416bc4ace 100644 --- a/inference-engine/ie_bridges/python/sample/speech_sample/README.md +++ b/inference-engine/ie_bridges/python/sample/speech_sample/README.md @@ -80,7 +80,8 @@ Usage message: usage: speech_sample.py [-h] (-m MODEL | -rg IMPORT_GNA_MODEL) -i INPUT [-o OUTPUT] [-r REFERENCE] [-d DEVICE] [-bs BATCH_SIZE] [-qb QUANTIZATION_BITS] - [-wg EXPORT_GNA_MODEL] [-iname INPUT_LAYERS] + [-sf SCALE_FACTOR] [-wg EXPORT_GNA_MODEL] [-pc] + [-a {CORE,ATOM}] [-iname INPUT_LAYERS] [-oname OUTPUT_LAYERS] optional arguments: @@ -94,9 +95,10 @@ optional arguments: Options: -h, --help Show this help message and exit. -i INPUT, --input INPUT - Required. Path to an input file (.ark or .npz). + Required. Path to an input file (.ark or .npz). -o OUTPUT, --output OUTPUT - Optional. Output file name to save inference results (.ark or .npz). + Optional. Output file name to save inference results + (.ark or .npz). -r REFERENCE, --reference REFERENCE Optional. Read reference score file and compare scores. @@ -113,9 +115,18 @@ Options: -qb QUANTIZATION_BITS, --quantization_bits QUANTIZATION_BITS Optional. Weight bits for quantization: 8 or 16 (default 16). + -sf SCALE_FACTOR, --scale_factor SCALE_FACTOR + Optional. The user-specified input scale factor for + quantization. -wg EXPORT_GNA_MODEL, --export_gna_model EXPORT_GNA_MODEL Optional. Write GNA model to file using path/filename provided. + -pc, --performance_counter + Optional. Enables performance report (specify -a to + ensure arch accurate results). + -a {CORE,ATOM}, --arch {CORE,ATOM} + Optional. Specify architecture. CORE, ATOM with the + combination of -pc. -iname INPUT_LAYERS, --input_layers INPUT_LAYERS Optional. Layer names for input blobs. The names are separated with ",". Allows to change the order of diff --git a/inference-engine/ie_bridges/python/sample/speech_sample/arg_parser.py b/inference-engine/ie_bridges/python/sample/speech_sample/arg_parser.py index cfc20dfb42590c..1d2ad5c7d71a7f 100644 --- a/inference-engine/ie_bridges/python/sample/speech_sample/arg_parser.py +++ b/inference-engine/ie_bridges/python/sample/speech_sample/arg_parser.py @@ -28,10 +28,16 @@ def parse_args() -> argparse.Namespace: args.add_argument('-bs', '--batch_size', default=1, type=int, help='Optional. Batch size 1-8 (default 1).') args.add_argument('-qb', '--quantization_bits', default=16, type=int, help='Optional. Weight bits for quantization: 8 or 16 (default 16).') + args.add_argument('-sf', '--scale_factor', type=float, + help='Optional. The user-specified input scale factor for quantization.') args.add_argument('-wg', '--export_gna_model', type=str, help='Optional. Write GNA model to file using path/filename provided.') args.add_argument('-we', '--export_embedded_gna_model', type=str, help=argparse.SUPPRESS) args.add_argument('-we_gen', '--embedded_gna_configuration', default='GNA1', type=str, help=argparse.SUPPRESS) + args.add_argument('-pc', '--performance_counter', action='store_true', + help='Optional. Enables performance report (specify -a to ensure arch accurate results).') + args.add_argument('-a', '--arch', default='CORE', type=str.upper, choices=['CORE', 'ATOM'], + help='Optional. Specify architecture. CORE, ATOM with the combination of -pc.') args.add_argument('-iname', '--input_layers', type=str, help='Optional. Layer names for input blobs. The names are separated with ",". ' 'Allows to change the order of input layers for -i flag. Example: Input1,Input2') diff --git a/inference-engine/ie_bridges/python/sample/speech_sample/speech_sample.py b/inference-engine/ie_bridges/python/sample/speech_sample/speech_sample.py index 8019746d177088..0a86d467742b9c 100755 --- a/inference-engine/ie_bridges/python/sample/speech_sample/speech_sample.py +++ b/inference-engine/ie_bridges/python/sample/speech_sample/speech_sample.py @@ -12,6 +12,10 @@ from file_options import read_utterance_file, write_utterance_file from openvino.inference_engine import ExecutableNetwork, IECore +# Operating Frequency for GNA HW devices for Core and Atom architecture +GNA_CORE_FREQUENCY = 400 +GNA_ATOM_FREQUENCY = 200 + def get_scale_factor(matrix: np.ndarray) -> float: """Get scale factor for quantization using utterance matrix""" @@ -131,21 +135,26 @@ def main(): plugin_config['GNA_DEVICE_MODE'] = gna_device_mode plugin_config['GNA_PRECISION'] = f'I{args.quantization_bits}' - # Get a GNA scale factor + # Set a GNA scale factor if args.import_gna_model: log.info(f'Using scale factor from the imported GNA model: {args.import_gna_model}') + elif args.scale_factor: + log.info(f'Using scale factor of {args.scale_factor:.7f} specified by user.') + plugin_config['GNA_SCALE_FACTOR'] = str(args.scale_factor) else: utterances = read_utterance_file(args.input.split(',')[0]) key = sorted(utterances)[0] scale_factor = get_scale_factor(utterances[key]) log.info(f'Using scale factor of {scale_factor:.7f} calculated from first utterance.') - plugin_config['GNA_SCALE_FACTOR'] = str(scale_factor) if args.export_embedded_gna_model: plugin_config['GNA_FIRMWARE_MODEL_IMAGE'] = args.export_embedded_gna_model plugin_config['GNA_FIRMWARE_MODEL_IMAGE_GENERATION'] = args.embedded_gna_configuration + if args.performance_counter: + plugin_config['PERF_COUNT'] = 'YES' + device_str = f'HETERO:{",".join(devices)}' if 'HETERO' in args.device else devices[0] log.info('Loading the model to the plugin') @@ -208,6 +217,7 @@ def main(): log.info('Starting inference in synchronous mode') results = {blob_name: {} for blob_name in output_blobs} infer_times = [] + perf_counters = [] for key in sorted(input_data): start_infer_time = default_timer() @@ -223,6 +233,7 @@ def main(): results[blob_name][key] = result[blob_name] infer_times.append(default_timer() - start_infer_time) + perf_counters.append(exec_net.requests[0].get_perf_counts()) # ---------------------------Step 8. Process output-------------------------------------------------------------------- for blob_name in output_blobs: @@ -235,6 +246,26 @@ def main(): if args.reference: compare_with_reference(results[blob_name][key], references[blob_name][key]) + if args.performance_counter: + if 'GNA' in args.device: + pc = perf_counters[i] + total_cycles = int(pc['1.1 Total scoring time in HW']['real_time']) + stall_cycles = int(pc['1.2 Stall scoring time in HW']['real_time']) + active_cycles = total_cycles - stall_cycles + frequency = 10**6 + if args.arch == 'CORE': + frequency *= GNA_CORE_FREQUENCY + else: + frequency *= GNA_ATOM_FREQUENCY + total_inference_time = total_cycles / frequency + active_time = active_cycles / frequency + stall_time = stall_cycles / frequency + log.info('') + log.info('Performance Statistics of GNA Hardware') + log.info(f' Total Inference Time: {(total_inference_time * 1000):.4f} ms') + log.info(f' Active Time: {(active_time * 1000):.4f} ms') + log.info(f' Stall Time: {(stall_time * 1000):.4f} ms') + log.info('') log.info(f'Total sample time: {sum(infer_times) * 1000:.2f}ms')