openvinotoolkit · generalova-kate · Sep 3, 2021 · Jul 14, 2021 · Jul 16, 2021 · Jul 16, 2021
@@ -80,7 +80,8 @@ Usage message:
 usage: speech_sample.py [-h] (-m MODEL | -rg IMPORT_GNA_MODEL) -i INPUT       
                         [-o OUTPUT] [-r REFERENCE] [-d DEVICE]
                         [-bs BATCH_SIZE] [-qb QUANTIZATION_BITS]
-                        [-wg EXPORT_GNA_MODEL] [-iname INPUT_LAYERS]
+                        [-sf SCALE_FACTOR] [-wg EXPORT_GNA_MODEL] [-pc]       
+                        [-a {CORE,ATOM}] [-iname INPUT_LAYERS]
                         [-oname OUTPUT_LAYERS]
 
 optional arguments:
@@ -94,9 +95,10 @@ optional arguments:
 Options:
   -h, --help            Show this help message and exit.
   -i INPUT, --input INPUT
-                        Required. Path to an input file (.ark or .npz).
+                        Required. Path to an input file (.ark or .npz).       
   -o OUTPUT, --output OUTPUT
-                        Optional. Output file name to save inference results (.ark or .npz).
+                        Optional. Output file name to save inference results  
+                        (.ark or .npz).
   -r REFERENCE, --reference REFERENCE
                         Optional. Read reference score file and compare
                         scores.
@@ -113,9 +115,18 @@ Options:
   -qb QUANTIZATION_BITS, --quantization_bits QUANTIZATION_BITS
                         Optional. Weight bits for quantization: 8 or 16
                         (default 16).
+  -sf SCALE_FACTOR, --scale_factor SCALE_FACTOR
+                        Optional. User-specified input scale factor for
+                        quantization.
   -wg EXPORT_GNA_MODEL, --export_gna_model EXPORT_GNA_MODEL
                         Optional. Write GNA model to file using path/filename
                         provided.
+  -pc, --performance_counter
+                        Optional. Enables performance report (specify -a to
+                        ensure arch accurate results).
+  -a {CORE,ATOM}, --arch {CORE,ATOM}
+                        Optional. Specify a architecture. CORE, ATOM with
+                        combination of -pc.
   -iname INPUT_LAYERS, --input_layers INPUT_LAYERS
                         Optional. Layer names for input blobs. The names are
                         separated with ",". Allows to change the order of

@@ -28,10 +28,16 @@ def parse_args() -> argparse.Namespace:
     args.add_argument('-bs', '--batch_size', default=1, type=int, help='Optional. Batch size 1-8 (default 1).')
     args.add_argument('-qb', '--quantization_bits', default=16, type=int,
                       help='Optional. Weight bits for quantization: 8 or 16 (default 16).')
+    args.add_argument('-sf', '--scale_factor', type=float,
+                      help='Optional. User-specified input scale factor for quantization.')
     args.add_argument('-wg', '--export_gna_model', type=str,
                       help='Optional. Write GNA model to file using path/filename provided.')
     args.add_argument('-we', '--export_embedded_gna_model', type=str, help=argparse.SUPPRESS)
     args.add_argument('-we_gen', '--embedded_gna_configuration', default='GNA1', type=str, help=argparse.SUPPRESS)
+    args.add_argument('-pc', '--performance_counter', action='store_true',
+                      help='Optional. Enables performance report (specify -a to ensure arch accurate results).')
+    args.add_argument('-a', '--arch', default='CORE', type=str.upper, choices=['CORE', 'ATOM'],
+                      help='Optional. Specify a architecture. CORE, ATOM with combination of -pc.')
     args.add_argument('-iname', '--input_layers', type=str,
                       help='Optional. Layer names for input blobs. The names are separated with ",". '
                       'Allows to change the order of input layers for -i flag. Example: Input1,Input2')

@@ -12,6 +12,10 @@
 from file_options import read_utterance_file, write_utterance_file
 from openvino.inference_engine import ExecutableNetwork, IECore
 
+# Operating Frequency for GNA HW devices for Core and Atom architecture
+GNA_CORE_FREQUENCY = 400
+GNA_ATOM_FREQUENCY = 200
+
 
 def get_scale_factor(matrix: np.ndarray) -> float:
     """Get scale factor for quantization using utterance matrix"""
@@ -131,21 +135,26 @@ def main():
         plugin_config['GNA_DEVICE_MODE'] = gna_device_mode
         plugin_config['GNA_PRECISION'] = f'I{args.quantization_bits}'
 
-        # Get a GNA scale factor
+        # Set a GNA scale factor
         if args.import_gna_model:
             log.info(f'Using scale factor from the imported GNA model: {args.import_gna_model}')
+        elif args.scale_factor:
+            log.info(f'Using scale factor of {args.scale_factor:.7f} specified by user.')
+            plugin_config['GNA_SCALE_FACTOR'] = str(args.scale_factor)
         else:
             utterances = read_utterance_file(args.input.split(',')[0])
             key = sorted(utterances)[0]
             scale_factor = get_scale_factor(utterances[key])
             log.info(f'Using scale factor of {scale_factor:.7f} calculated from first utterance.')
-
             plugin_config['GNA_SCALE_FACTOR'] = str(scale_factor)
 
         if args.export_embedded_gna_model:
             plugin_config['GNA_FIRMWARE_MODEL_IMAGE'] = args.export_embedded_gna_model
             plugin_config['GNA_FIRMWARE_MODEL_IMAGE_GENERATION'] = args.embedded_gna_configuration
 
+        if args.performance_counter:
+            plugin_config['PERF_COUNT'] = 'YES'
+
     device_str = f'HETERO:{",".join(devices)}' if 'HETERO' in args.device else devices[0]
 
     log.info('Loading the model to the plugin')
@@ -208,6 +217,7 @@ def main():
     log.info('Starting inference in synchronous mode')
     results = {blob_name: {} for blob_name in output_blobs}
     infer_times = []
+    perf_counters = []
 
     for key in sorted(input_data):
         start_infer_time = default_timer()
@@ -223,6 +233,7 @@ def main():
             results[blob_name][key] = result[blob_name]
 
         infer_times.append(default_timer() - start_infer_time)
+        perf_counters.append(exec_net.requests[0].get_perf_counts())
 
 # ---------------------------Step 8. Process output--------------------------------------------------------------------
     for blob_name in output_blobs:
@@ -235,6 +246,26 @@ def main():
             if args.reference:
                 compare_with_reference(results[blob_name][key], references[blob_name][key])
 
+            if args.performance_counter:
+                if 'GNA' in args.device:
+                    pc = perf_counters[i]
+                    total_cycles = int(pc['1.1 Total scoring time in HW']['real_time'])
+                    stall_cycles = int(pc['1.2 Stall scoring time in HW']['real_time'])
+                    active_cycles = total_cycles - stall_cycles
+                    frequency = 10**6
+                    if args.arch == 'CORE':
+                        frequency *= GNA_CORE_FREQUENCY
+                    else:
+                        frequency *= GNA_ATOM_FREQUENCY
+                    total_inference_time = total_cycles / frequency
+                    active_time = active_cycles / frequency
+                    stall_time = stall_cycles / frequency
+                    log.info('')
+                    log.info('Performance Statistics of GNA Hardware')
+                    log.info(f'   Total Inference Time: {(total_inference_time * 1000):.4f} ms')
+                    log.info(f'   Active Time: {(active_time * 1000):.4f} ms')
+                    log.info(f'   Stall Time:  {(stall_time * 1000):.4f} ms')
+
             log.info('')
 
     log.info(f'Total sample time: {sum(infer_times) * 1000:.2f}ms')