From e648370f23719abd2b398621ba3ffc9457a55955 Mon Sep 17 00:00:00 2001 From: Connor Goggins Date: Tue, 11 Feb 2020 14:37:02 -0800 Subject: [PATCH 1/4] Fixed ordering, added warmup & runs to argparse and individual benchmark function calls --- benchmark/opperf/opperf.py | 55 ++++++++++++++--------- benchmark/opperf/utils/benchmark_utils.py | 4 +- benchmark/opperf/utils/common_utils.py | 11 ++++- benchmark/opperf/utils/profiler_utils.py | 15 +++++-- 4 files changed, 58 insertions(+), 27 deletions(-) diff --git a/benchmark/opperf/opperf.py b/benchmark/opperf/opperf.py index 5f4c8ee9cf0e..dc71190e8659 100755 --- a/benchmark/opperf/opperf.py +++ b/benchmark/opperf/opperf.py @@ -50,7 +50,7 @@ get_current_runtime_features -def run_all_mxnet_operator_benchmarks(ctx=mx.cpu(), dtype='float32', profiler='native'): +def run_all_mxnet_operator_benchmarks(ctx=mx.cpu(), dtype='float32', profiler='native', warmup=25, runs=100): """Run all the MXNet operators (NDArray) benchmarks. Returns @@ -62,61 +62,61 @@ def run_all_mxnet_operator_benchmarks(ctx=mx.cpu(), dtype='float32', profiler='n # *************************MXNET TENSOR OPERATOR BENCHMARKS***************************** # Run all Unary operations benchmarks with default input values - mxnet_operator_benchmark_results.append(run_mx_unary_operators_benchmarks(ctx=ctx, dtype=dtype, profiler=profiler)) + mxnet_operator_benchmark_results.append(run_mx_unary_operators_benchmarks(ctx=ctx, dtype=dtype, profiler=profiler, warmup=warmup, runs=runs)) # Run all Binary Broadcast, element_wise, and miscellaneous operations benchmarks with default input values mxnet_operator_benchmark_results.append(run_mx_binary_broadcast_operators_benchmarks(ctx=ctx, - dtype=dtype, profiler=profiler)) + dtype=dtype, profiler=profiler, warmup=warmup, runs=runs)) mxnet_operator_benchmark_results.append(run_mx_binary_element_wise_operators_benchmarks(ctx=ctx, - dtype=dtype, profiler=profiler)) + dtype=dtype, profiler=profiler, warmup=warmup, runs=runs)) mxnet_operator_benchmark_results.append(run_mx_binary_misc_operators_benchmarks(ctx=ctx, - dtype=dtype, profiler=profiler)) + dtype=dtype, profiler=profiler, warmup=warmup, runs=runs)) # Run all GEMM operations benchmarks with default input values mxnet_operator_benchmark_results.append(run_gemm_operators_benchmarks(ctx=ctx, - dtype=dtype, profiler=profiler)) + dtype=dtype, profiler=profiler, warmup=warmup, runs=runs)) # Run all Random sampling operations benchmarks with default input values - mxnet_operator_benchmark_results.append(run_mx_random_sampling_operators_benchmarks(ctx=ctx, dtype=dtype, profiler=profiler)) + mxnet_operator_benchmark_results.append(run_mx_random_sampling_operators_benchmarks(ctx=ctx, dtype=dtype, profiler=profiler, warmup=warmup, runs=runs)) # Run all Reduction operations benchmarks with default input values - mxnet_operator_benchmark_results.append(run_mx_reduction_operators_benchmarks(ctx=ctx, dtype=dtype, profiler=profiler)) + mxnet_operator_benchmark_results.append(run_mx_reduction_operators_benchmarks(ctx=ctx, dtype=dtype, profiler=profiler, warmup=warmup, runs=runs)) # Run all Sorting and Searching operations benchmarks with default input values - mxnet_operator_benchmark_results.append(run_sorting_searching_operators_benchmarks(ctx=ctx, dtype=dtype, profiler=profiler)) + mxnet_operator_benchmark_results.append(run_sorting_searching_operators_benchmarks(ctx=ctx, dtype=dtype, profiler=profiler, warmup=warmup, runs=runs)) # Run all Array Rearrange operations benchmarks with default input values - mxnet_operator_benchmark_results.append(run_rearrange_operators_benchmarks(ctx=ctx, dtype=dtype, profiler=profiler)) + mxnet_operator_benchmark_results.append(run_rearrange_operators_benchmarks(ctx=ctx, dtype=dtype, profiler=profiler, warmup=warmup, runs=runs)) # Run all Indexing routines benchmarks with default input values - mxnet_operator_benchmark_results.append(run_indexing_routines_benchmarks(ctx=ctx, dtype=dtype, profiler=profiler)) + mxnet_operator_benchmark_results.append(run_indexing_routines_benchmarks(ctx=ctx, dtype=dtype, profiler=profiler, warmup=warmup, runs=runs)) # ************************ MXNET NN OPERATOR BENCHMARKS **************************** # Run all basic NN operations benchmarks with default input values - mxnet_operator_benchmark_results.append(run_nn_basic_operators_benchmarks(ctx=ctx, dtype=dtype, profiler=profiler)) + mxnet_operator_benchmark_results.append(run_nn_basic_operators_benchmarks(ctx=ctx, dtype=dtype, profiler=profiler, warmup=warmup, runs=runs)) # Run all Activation operations benchmarks with default input values - mxnet_operator_benchmark_results.append(run_activation_operators_benchmarks(ctx=ctx, dtype=dtype, profiler=profiler)) + mxnet_operator_benchmark_results.append(run_activation_operators_benchmarks(ctx=ctx, dtype=dtype, profiler=profiler, warmup=warmup, runs=runs)) # Run all Pooling operations benchmarks with default input values - mxnet_operator_benchmark_results.append(run_pooling_operators_benchmarks(ctx=ctx, dtype=dtype, profiler=profiler)) + mxnet_operator_benchmark_results.append(run_pooling_operators_benchmarks(ctx=ctx, dtype=dtype, profiler=profiler, warmup=warmup, runs=runs)) # Run all Convolution operations benchmarks with default input values - mxnet_operator_benchmark_results.append(run_convolution_operators_benchmarks(ctx=ctx, dtype=dtype, profiler=profiler)) + mxnet_operator_benchmark_results.append(run_convolution_operators_benchmarks(ctx=ctx, dtype=dtype, profiler=profiler, warmup=warmup, runs=runs)) # Run all Optimizer operations benchmarks with default input values - mxnet_operator_benchmark_results.append(run_optimizer_operators_benchmarks(ctx=ctx, dtype=dtype, profiler=profiler)) + mxnet_operator_benchmark_results.append(run_optimizer_operators_benchmarks(ctx=ctx, dtype=dtype, profiler=profiler, warmup=warmup, runs=runs)) # Run all Transpose Convolution operations benchmarks with default input values - mxnet_operator_benchmark_results.append(run_transpose_convolution_operators_benchmarks(ctx=ctx, dtype=dtype, profiler=profiler)) + mxnet_operator_benchmark_results.append(run_transpose_convolution_operators_benchmarks(ctx=ctx, dtype=dtype, profiler=profiler, warmup=warmup, runs=runs)) # Run all NN loss operations benchmarks with default input values - mxnet_operator_benchmark_results.append(run_loss_operators_benchmarks(ctx=ctx, dtype=dtype, profiler=profiler)) + mxnet_operator_benchmark_results.append(run_loss_operators_benchmarks(ctx=ctx, dtype=dtype, profiler=profiler, warmup=warmup, runs=runs)) # Run all Linear Algebra operations benchmarks with default input values - mxnet_operator_benchmark_results.append(run_linalg_operators_benchmarks(ctx=ctx, dtype=dtype, profiler=profiler)) + mxnet_operator_benchmark_results.append(run_linalg_operators_benchmarks(ctx=ctx, dtype=dtype, profiler=profiler, warmup=warmup, runs=runs)) # ****************************** PREPARE FINAL RESULTS ******************************** final_benchmark_result_map = merge_map_list(mxnet_operator_benchmark_results) @@ -159,6 +159,14 @@ def main(): 'time module.' 'Valid Inputs - native, python') + parser.add_argument('-w', '--warmup', type=int, default=25, + help='Number of times to run for warmup.' + 'Valid Inputs - positive integers') + + parser.add_argument('-r', '--runs', type=int, default=100, + help='Number of runs to capture benchmark results.' + 'Valid Inputs - positive integers') + args = parser.parse_args() logging.info("Running MXNet operator benchmarks with the following options: {args}".format(args=args)) assert not os.path.isfile(args.output_file),\ @@ -168,7 +176,14 @@ def main(): ctx = _parse_mxnet_context(args.ctx) dtype = args.dtype profiler = args.profiler - final_benchmark_results = run_all_mxnet_operator_benchmarks(ctx=ctx, dtype=dtype, profiler=profiler) + warmup = args.warmup + runs = args.runs + benchmark_results = run_all_mxnet_operator_benchmarks(ctx=ctx, dtype=dtype, profiler=profiler, warmup=warmup, runs=runs) + + # Sort benchmark results alphabetically by op name + final_benchmark_results = dict() + for key in sorted(benchmark_results.keys()): + final_benchmark_results[key] = benchmark_results[key] # 3. PREPARE OUTPUTS run_time_features = get_current_runtime_features() diff --git a/benchmark/opperf/utils/benchmark_utils.py b/benchmark/opperf/utils/benchmark_utils.py index 421fe314267e..da7e2b8910aa 100644 --- a/benchmark/opperf/utils/benchmark_utils.py +++ b/benchmark/opperf/utils/benchmark_utils.py @@ -80,14 +80,14 @@ def _run_nd_operator_performance_test(op, inputs, run_backward, warmup, runs, ar _, profiler_output = benchmark_helper_func(op, runs, [], **kwargs) # Add inputs used for profiling this operator into result - profiler_output["inputs"] = inputs[idx] + profiler_output = merge_map_list([{"inputs": inputs[idx]}] + [profiler_output]) op_benchmark_result[op.__name__].append(profiler_output) else: for idx, (args, kwargs) in enumerate(zip(args_list, kwargs_list)): _, profiler_output = benchmark_helper_func(op, runs, args, **kwargs) # Add inputs used for profiling this operator into result - profiler_output["inputs"] = inputs[idx] + profiler_output = merge_map_list([{"inputs": inputs[idx]}] + [profiler_output]) op_benchmark_result[op.__name__].append(profiler_output) logging.info("Complete Benchmark - {name}".format(name=op.__name__)) return op_benchmark_result diff --git a/benchmark/opperf/utils/common_utils.py b/benchmark/opperf/utils/common_utils.py index 924d2fa9a23b..3eb8254e1132 100644 --- a/benchmark/opperf/utils/common_utils.py +++ b/benchmark/opperf/utils/common_utils.py @@ -41,7 +41,14 @@ def merge_map_list(map_list): map where all individual maps in the into map_list are merged """ - return dict(ChainMap(*map_list)) + # Preserve order of underlying maps and keys when converting to a single map + final_map = dict() + + for current_map in map_list: + for key in current_map: + final_map[key] = current_map[key] + + return final_map def save_to_file(inp_dict, out_filepath, out_format='json', runtime_features=None, profiler='native'): @@ -65,7 +72,7 @@ def save_to_file(inp_dict, out_filepath, out_format='json', runtime_features=Non if out_format == 'json': # Save as JSON with open(out_filepath, "w") as result_file: - json.dump(inp_dict, result_file, indent=4, sort_keys=True) + json.dump(inp_dict, result_file, indent=4, sort_keys=False) elif out_format == 'md': # Save as md with open(out_filepath, "w") as result_file: diff --git a/benchmark/opperf/utils/profiler_utils.py b/benchmark/opperf/utils/profiler_utils.py index 45322c1066cf..874d27f069fb 100644 --- a/benchmark/opperf/utils/profiler_utils.py +++ b/benchmark/opperf/utils/profiler_utils.py @@ -58,14 +58,24 @@ def _get_operator_profile(operator_name, operator_profile_results): else: op_name = operator_name + # Variables to store forward/backward performance results + forward_res, backward_res = None, None + for line in operator_profile_results: if op_name in line or op_name[:3] + " " in line: operation = line.split()[0] operation_avg_time = float(line.split()[-1]) if "_backward" in operation: - operator_profile["avg_time_backward_" + operator_name] = operation_avg_time + backward_res = operation_avg_time else: - operator_profile["avg_time_forward_" + operator_name] = operation_avg_time + forward_res = operation_avg_time + + # Add forward and backward performance results to the dict in the correct order + if forward_res: + operator_profile["avg_time_forward_" + operator_name] = forward_res + + if backward_res: + operator_profile["avg_time_backward_" + operator_name] = backward_res return operator_profile @@ -149,7 +159,6 @@ def parse_profiler_dump(operator_name, profiler_dump): # Prepare results memory_profile = _get_memory_profile(memory_profile_results) operator_profile = _get_operator_profile(operator_name, operator_profile_results) - return merge_map_list([memory_profile, operator_profile]) From 5a6265323b1ca7405aeaa2344bd58cb62c56975a Mon Sep 17 00:00:00 2001 From: Connor Goggins Date: Tue, 11 Feb 2020 15:04:56 -0800 Subject: [PATCH 2/4] Dropped unused ChainMap --- benchmark/opperf/utils/common_utils.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/benchmark/opperf/utils/common_utils.py b/benchmark/opperf/utils/common_utils.py index 3eb8254e1132..9af135eed34e 100644 --- a/benchmark/opperf/utils/common_utils.py +++ b/benchmark/opperf/utils/common_utils.py @@ -19,8 +19,6 @@ import json from operator import itemgetter -from collections import ChainMap - import logging logging.basicConfig(level=logging.INFO) From 319775162c989717b95ff059decc2fdf7457c987 Mon Sep 17 00:00:00 2001 From: Connor Goggins Date: Tue, 11 Feb 2020 15:31:20 -0800 Subject: [PATCH 3/4] Added newline for consistency with previous changes --- benchmark/opperf/utils/profiler_utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/benchmark/opperf/utils/profiler_utils.py b/benchmark/opperf/utils/profiler_utils.py index 874d27f069fb..fa959bf5a8b1 100644 --- a/benchmark/opperf/utils/profiler_utils.py +++ b/benchmark/opperf/utils/profiler_utils.py @@ -159,6 +159,7 @@ def parse_profiler_dump(operator_name, profiler_dump): # Prepare results memory_profile = _get_memory_profile(memory_profile_results) operator_profile = _get_operator_profile(operator_name, operator_profile_results) + return merge_map_list([memory_profile, operator_profile]) From 0bce87975bb2dd575a2292656fad8276d01822e0 Mon Sep 17 00:00:00 2001 From: Connor Goggins Date: Tue, 11 Feb 2020 16:57:42 -0800 Subject: [PATCH 4/4] Adjusted markdown output ordering --- benchmark/opperf/utils/common_utils.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/benchmark/opperf/utils/common_utils.py b/benchmark/opperf/utils/common_utils.py index 9af135eed34e..fcf52d4377dd 100644 --- a/benchmark/opperf/utils/common_utils.py +++ b/benchmark/opperf/utils/common_utils.py @@ -127,7 +127,7 @@ def _prepare_op_benchmark_result(op, op_bench_result, profiler): result = "" if profiler == "native": result = "| {} | {} | {} | {} | {} |".format(operator_name, - avg_forward_time, avg_backward_time, max_mem_usage, inputs) + inputs, max_mem_usage, avg_forward_time, avg_backward_time) elif profiler == "python": result = "| {} | {} | {} | {} | {} | {} |".format(operator_name, avg_time, p50_time, p90_time, p99_time, inputs) return result @@ -144,8 +144,8 @@ def _prepare_markdown(results, runtime_features=None, profiler='native'): results_markdown.append("# Benchmark Results") if profiler == 'native': results_markdown.append( - "| Operator | Avg Forward Time (ms) | Avg. Backward Time (ms) | Max Mem Usage (Storage) (Bytes)" - " | Inputs |") + "| Operator | Inputs | Max Mem Usage (Storage) (Bytes) | Avg Forward Time (ms)" + " | Avg. Backward Time (ms) |") results_markdown.append("| :---: | :---: | :---: | :---: | :---: |") elif profiler == 'python': results_markdown.append(