diff --git a/inference-engine/ie_bridges/python/sample/speech_sample/speech_sample.py b/inference-engine/ie_bridges/python/sample/speech_sample/speech_sample.py index bb2fa27e43b017..67601e1f3794be 100755 --- a/inference-engine/ie_bridges/python/sample/speech_sample/speech_sample.py +++ b/inference-engine/ie_bridges/python/sample/speech_sample/speech_sample.py @@ -228,10 +228,9 @@ def main(): # ---------------------------Step 7. Do inference---------------------------------------------------------------------- log.info('Starting inference in synchronous mode') results = {blob_name: {} for blob_name in output_blobs} - infer_times = [] - perf_counters = [] + total_infer_time = 0 - for key in sorted(input_data): + for i, key in enumerate(sorted(input_data)): start_infer_time = default_timer() # Reset states between utterance inferences to remove a memory impact @@ -244,43 +243,49 @@ def main(): for blob_name in result.keys(): results[blob_name][key] = result[blob_name] - infer_times.append(default_timer() - start_infer_time) - perf_counters.append(exec_net.requests[0].get_perf_counts()) + infer_time = default_timer() - start_infer_time + total_infer_time += infer_time + num_of_frames = file_data[0][key].shape[0] + avg_infer_time_per_frame = infer_time / num_of_frames # ---------------------------Step 8. Process output-------------------------------------------------------------------- - for blob_name in output_blobs: - for i, key in enumerate(sorted(results[blob_name])): - log.info(f'Utterance {i} ({key})') + log.info('') + log.info(f'Utterance {i} ({key}):') + log.info(f'Total time in Infer (HW and SW): {infer_time * 1000:.2f}ms') + log.info(f'Frames in utterance: {num_of_frames}') + log.info(f'Average Infer time per frame: {avg_infer_time_per_frame * 1000:.2f}ms') + + for blob_name in output_blobs: + log.info('') log.info(f'Output blob name: {blob_name}') - log.info(f'Frames in utterance: {results[blob_name][key].shape[0]}') - log.info(f'Total time in Infer (HW and SW): {infer_times[i] * 1000:.2f}ms') + log.info(f'Number scores per frame: {results[blob_name][key].shape[1]}') if args.reference: + log.info('') compare_with_reference(results[blob_name][key], references[blob_name][key]) - if args.performance_counter: - if 'GNA' in args.device: - pc = perf_counters[i] - total_cycles = int(pc['1.1 Total scoring time in HW']['real_time']) - stall_cycles = int(pc['1.2 Stall scoring time in HW']['real_time']) - active_cycles = total_cycles - stall_cycles - frequency = 10**6 - if args.arch == 'CORE': - frequency *= GNA_CORE_FREQUENCY - else: - frequency *= GNA_ATOM_FREQUENCY - total_inference_time = total_cycles / frequency - active_time = active_cycles / frequency - stall_time = stall_cycles / frequency - log.info('') - log.info('Performance Statistics of GNA Hardware') - log.info(f' Total Inference Time: {(total_inference_time * 1000):.4f} ms') - log.info(f' Active Time: {(active_time * 1000):.4f} ms') - log.info(f' Stall Time: {(stall_time * 1000):.4f} ms') - - log.info('') - - log.info(f'Total sample time: {sum(infer_times) * 1000:.2f}ms') + if args.performance_counter: + if 'GNA' in args.device: + pc = exec_net.requests[0].get_perf_counts() + total_cycles = int(pc['1.1 Total scoring time in HW']['real_time']) + stall_cycles = int(pc['1.2 Stall scoring time in HW']['real_time']) + active_cycles = total_cycles - stall_cycles + frequency = 10**6 + if args.arch == 'CORE': + frequency *= GNA_CORE_FREQUENCY + else: + frequency *= GNA_ATOM_FREQUENCY + total_inference_time = total_cycles / frequency + active_time = active_cycles / frequency + stall_time = stall_cycles / frequency + log.info('') + log.info('Performance Statistics of GNA Hardware') + log.info(f' Total Inference Time: {(total_inference_time * 1000):.4f} ms') + log.info(f' Active Time: {(active_time * 1000):.4f} ms') + log.info(f' Stall Time: {(stall_time * 1000):.4f} ms') + + log.info('') + log.info(f'Total sample time: {total_infer_time * 1000:.2f}ms') if args.output: for i, blob_name in enumerate(results):