Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

Commit

Permalink
merge from upstream
Browse files Browse the repository at this point in the history
  • Loading branch information
yzhliu committed Aug 23, 2019
2 parents 299010b + 9023256 commit 1da8878
Show file tree
Hide file tree
Showing 114 changed files with 4,732 additions and 1,651 deletions.
9 changes: 6 additions & 3 deletions benchmark/opperf/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,10 @@ Hence, in this utility, we will build the functionality to allow users and devel

## Prerequisites

Make sure to build the flavor of MXNet, for example - with/without MKL, with CUDA 9 or 10.1 etc., on which you would like to measure operator performance. Finally, you need to add path to your cloned MXNet repository to the PYTHONPATH.
Provided you have MXNet installed (any version >= 1.5.1), all you need to use opperf utility is to add path to your cloned MXNet repository to the PYTHONPATH.

Note:
To install MXNet, refer [Installing MXNet page](https://mxnet.incubator.apache.org/versions/master/install/index.html)

```
export PYTHONPATH=$PYTHONPATH:/path/to/incubator-mxnet/
Expand Down Expand Up @@ -75,7 +78,7 @@ For example, you want to run benchmarks for all NDArray Broadcast Binary Operato

```
#!/usr/bin/python
from benchmark.opperf.nd_operations.binary_broadcast_operators import run_mx_binary_broadcast_operators_benchmarks
from benchmark.opperf.nd_operations.binary_operators import run_mx_binary_broadcast_operators_benchmarks
# Run all Binary Broadcast operations benchmarks with default input values
print(run_mx_binary_broadcast_operators_benchmarks())
Expand Down Expand Up @@ -136,7 +139,7 @@ from mxnet import nd
from benchmark.opperf.utils.benchmark_utils import run_performance_test
add_res = run_performance_test([nd.add, nd.sub], run_backward=True, dtype='float32', ctx=mx.cpu(),
add_res = run_performance_test([nd.add, nd.subtract], run_backward=True, dtype='float32', ctx=mx.cpu(),
inputs=[{"lhs": (1024, 1024),
"rhs": (1024, 1024)}],
warmup=10, runs=25)
Expand Down
4 changes: 2 additions & 2 deletions benchmark/opperf/nd_operations/array_rearrange.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
"""


def run_rearrange_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=25, runs=100):
def run_rearrange_operators_benchmarks(ctx=mx.cpu(), dtype='float32', profiler='native', warmup=25, runs=100):
"""Runs benchmarks with the given context and precision (dtype) for all the
rearrange operators in MXNet.
Expand All @@ -53,5 +53,5 @@ def run_rearrange_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=25,
mx_rearrange_ops = get_all_rearrange_operators()

# Run benchmarks
mx_rearrange_op_results = run_op_benchmarks(mx_rearrange_ops, dtype, ctx, warmup, runs)
mx_rearrange_op_results = run_op_benchmarks(mx_rearrange_ops, dtype, ctx, profiler, warmup, runs)
return mx_rearrange_op_results
8 changes: 4 additions & 4 deletions benchmark/opperf/nd_operations/binary_operators.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
get_all_elemen_wise_binary_operators


def run_mx_binary_broadcast_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=25, runs=100):
def run_mx_binary_broadcast_operators_benchmarks(ctx=mx.cpu(), dtype='float32', profiler='native', warmup=25, runs=100):
"""Runs benchmarks with the given context and precision (dtype) for all the binary
broadcast operators in MXNet.
Expand All @@ -61,11 +61,11 @@ def run_mx_binary_broadcast_operators_benchmarks(ctx=mx.cpu(), dtype='float32',
# Fetch all Binary Broadcast Operators
mx_binary_broadcast_ops = get_all_broadcast_binary_operators()
# Run benchmarks
mx_binary_op_results = run_op_benchmarks(mx_binary_broadcast_ops, dtype, ctx, warmup, runs)
mx_binary_op_results = run_op_benchmarks(mx_binary_broadcast_ops, dtype, ctx, profiler, warmup, runs)
return mx_binary_op_results


def run_mx_binary_element_wise_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=25, runs=100):
def run_mx_binary_element_wise_operators_benchmarks(ctx=mx.cpu(), dtype='float32', profiler='native', warmup=25, runs=100):
"""Runs benchmarks with the given context and precision (dtype) for all the binary
element_wise operators in MXNet.
Expand All @@ -88,5 +88,5 @@ def run_mx_binary_element_wise_operators_benchmarks(ctx=mx.cpu(), dtype='float32
# Fetch all Binary Element_wise Operators
mx_binary_element_wise_ops = get_all_elemen_wise_binary_operators()
# Run benchmarks
mx_binary_op_results = run_op_benchmarks(mx_binary_element_wise_ops, dtype, ctx, warmup, runs)
mx_binary_op_results = run_op_benchmarks(mx_binary_element_wise_ops, dtype, ctx, profiler, warmup, runs)
return mx_binary_op_results
6 changes: 3 additions & 3 deletions benchmark/opperf/nd_operations/gemm_operators.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
"""


def run_gemm_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=25, runs=100):
def run_gemm_operators_benchmarks(ctx=mx.cpu(), dtype='float32', profiler='native', warmup=25, runs=100):
"""Runs benchmarks with the given context and precision (dtype)for all the GEMM
operators (dot, batch_dot) in MXNet.
Expand Down Expand Up @@ -67,7 +67,7 @@ def run_gemm_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=25, runs
"rhs": (100, 1000),
"transpose_a": True,
"transpose_b": True}],
warmup=warmup, runs=runs)
warmup=warmup, runs=runs, profiler=profiler)

batch_dot_benchmark_res = run_performance_test(
[getattr(MX_OP_MODULE, "batch_dot")], run_backward=True,
Expand All @@ -81,7 +81,7 @@ def run_gemm_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=25, runs
"rhs": (32, 100, 1000),
"transpose_a": True,
"transpose_b": True}],
warmup=warmup, runs=runs)
warmup=warmup, runs=runs, profiler=profiler)

# Prepare combined results for GEMM operators
mx_gemm_op_results = merge_map_list(dot_benchmark_res + batch_dot_benchmark_res)
Expand Down
5 changes: 4 additions & 1 deletion benchmark/opperf/nd_operations/nn_activation_operators.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
"""


def run_activation_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=25, runs=100):
def run_activation_operators_benchmarks(ctx=mx.cpu(), dtype='float32', profiler='native', warmup=25, runs=100):
"""Runs benchmarks with the given context and precision (dtype)for all the activation
operators (relu, sigmoid, softmax) in MXNet.
Expand All @@ -60,6 +60,7 @@ def run_activation_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=25
run_backward=True,
dtype=dtype,
ctx=ctx,
profiler=profiler,
inputs=[{"data": (1024, 1024), "act_type": "leaky", "slope": 0.1},
{"data": (10000, 1), "act_type": "leaky", "slope": 0.1},
{"data": (10000, 100), "act_type": "leaky", "slope": 0.1},
Expand All @@ -82,6 +83,7 @@ def run_activation_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=25
run_backward=True,
dtype=dtype,
ctx=ctx,
profiler=profiler,
inputs=[{"data": (1024, 1024), "alpha": 0.25, "beta": 0.5},
{"data": (10000, 1), "alpha": 0.25, "beta": 0.5},
{"data": (10000, 100), "alpha": 0.25, "beta": 0.5}
Expand All @@ -95,6 +97,7 @@ def run_activation_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=25
run_backward=True,
dtype=dtype,
ctx=ctx,
profiler=profiler,
inputs=[{"data": (1024, 1024), "axis": -1, "temperature": 0.5},
{"data": (10000, 1), "axis": -1, "temperature": 0.5},
{"data": (10000, 100), "axis": -1, "temperature": 0.5}
Expand Down
5 changes: 4 additions & 1 deletion benchmark/opperf/nd_operations/nn_basic_operators.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,13 @@
"""


def run_nn_basic_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=25, runs=100):
def run_nn_basic_operators_benchmarks(ctx=mx.cpu(), dtype='float32', profiler='native', warmup=25, runs=100):
# FullyConnnected operator benchmarks
fc_benchmark_res = run_performance_test([getattr(MX_OP_MODULE, "FullyConnected")],
run_backward=True,
dtype=dtype,
ctx=ctx,
profiler=profiler,
inputs=[{"data": (32, 3, 256, 256),
"num_hidden": 64,
"weight": (64, 3 * 256 * 256),
Expand All @@ -53,6 +54,7 @@ def run_nn_basic_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=25,
run_backward=True,
dtype=dtype,
ctx=ctx,
profiler=profiler,
inputs=[{"data": (32, 3, 256, 256),
"p": 0.5,
"mode": "always"},
Expand All @@ -66,6 +68,7 @@ def run_nn_basic_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=25,
run_backward=True,
dtype=dtype,
ctx=ctx,
profiler=profiler,
inputs=[{"data": (32, 3, 256, 256),
"gamma": (3,),
"beta": (3,),
Expand Down
12 changes: 9 additions & 3 deletions benchmark/opperf/nd_operations/nn_conv_operators.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@
"""


def run_pooling_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=25, runs=100):
def run_pooling_operators_benchmarks(ctx=mx.cpu(), dtype='float32', profiler='native', warmup=25, runs=100):
pool_types = ['avg', 'max', 'sum']
global_pool_types = [0, 1]

Expand All @@ -65,6 +65,7 @@ def run_pooling_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=25, r
run_backward=True,
dtype=dtype,
ctx=ctx,
profiler=profiler,
inputs=[{"data": pool1d_data,
"kernel": 3,
"pool_type": pool_type,
Expand All @@ -79,6 +80,7 @@ def run_pooling_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=25, r
run_backward=True,
dtype=dtype,
ctx=ctx,
profiler=profiler,
inputs=[{"data": pool2d_data,
"kernel": (3, 3),
"pool_type": pool_type,
Expand All @@ -93,14 +95,15 @@ def run_pooling_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=25, r
return mx_pooling_op_results


def run_convolution_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=25, runs=100):
def run_convolution_operators_benchmarks(ctx=mx.cpu(), dtype='float32', profiler='native', warmup=25, runs=100):
# Conv1D Benchmarks
conv1d_benchmark_res = []
for conv_data in [(32, 3, 256), (32, 3, 64)]:
conv1d_benchmark_res += run_performance_test([getattr(MX_OP_MODULE, "Convolution")],
run_backward=True,
dtype=dtype,
ctx=ctx,
profiler=profiler,
inputs=[{"data": conv_data,
"weight": (64, 3, 3),
"bias": (64,),
Expand All @@ -120,6 +123,7 @@ def run_convolution_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=2
run_backward=True,
dtype=dtype,
ctx=ctx,
profiler=profiler,
inputs=[{"data": conv_data,
"weight": (64, 3, 3, 3),
"bias": (64,),
Expand All @@ -137,14 +141,15 @@ def run_convolution_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=2
return mx_conv_op_results


def run_transpose_convolution_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=10, runs=50):
def run_transpose_convolution_operators_benchmarks(ctx=mx.cpu(), profiler='native', dtype='float32', warmup=10, runs=50):
# Conv1DTranspose Benchmarks
conv1d_transpose_benchmark_res = []
for conv_data in [(32, 3, 256), (32, 3, 64)]:
conv1d_transpose_benchmark_res += run_performance_test([getattr(MX_OP_MODULE, "Deconvolution")],
run_backward=True,
dtype=dtype,
ctx=ctx,
profiler=profiler,
inputs=[{"data": conv_data,
"weight": (3, 64, 3),
"bias": (64,),
Expand All @@ -166,6 +171,7 @@ def run_transpose_convolution_operators_benchmarks(ctx=mx.cpu(), dtype='float32'
run_backward=True,
dtype=dtype,
ctx=ctx,
profiler=profiler,
inputs=[{"data": conv_data,
"weight": (3, 64, 3, 3),
"bias": (64,),
Expand Down
4 changes: 2 additions & 2 deletions benchmark/opperf/nd_operations/nn_optimizer_operators.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
"""


def run_optimizer_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=25, runs=100):
def run_optimizer_operators_benchmarks(ctx=mx.cpu(), dtype='float32', profiler='native', warmup=25, runs=100):
"""Runs benchmarks with the given context and precision (dtype) for all the neural network
optimizer update operators in MXNet.
Expand All @@ -60,5 +60,5 @@ def run_optimizer_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=25,
mx_optimizer_ops = get_all_optimizer_operators()

# Run benchmarks
mx_optimizer_op_results = run_op_benchmarks(mx_optimizer_ops, dtype, ctx, warmup, runs)
mx_optimizer_op_results = run_op_benchmarks(mx_optimizer_ops, dtype, ctx, profiler, warmup, runs)
return mx_optimizer_op_results
4 changes: 2 additions & 2 deletions benchmark/opperf/nd_operations/random_sampling_operators.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
from benchmark.opperf.utils.op_registry_utils import get_all_random_sampling_operators


def run_mx_random_sampling_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=25, runs=100):
def run_mx_random_sampling_operators_benchmarks(ctx=mx.cpu(), dtype='float32', profiler='native', warmup=25, runs=100):
"""Runs benchmarks with the given context and precision (dtype)for all the random sampling
operators in MXNet.
Expand All @@ -57,5 +57,5 @@ def run_mx_random_sampling_operators_benchmarks(ctx=mx.cpu(), dtype='float32', w
# Fetch all Random Sampling Operators
mx_random_sample_ops = get_all_random_sampling_operators()
# Run benchmarks
mx_random_sample_op_results = run_op_benchmarks(mx_random_sample_ops, dtype, ctx, warmup, runs)
mx_random_sample_op_results = run_op_benchmarks(mx_random_sample_ops, dtype, ctx, profiler, warmup, runs)
return mx_random_sample_op_results
4 changes: 2 additions & 2 deletions benchmark/opperf/nd_operations/reduction_operators.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
from benchmark.opperf.utils.benchmark_utils import run_op_benchmarks


def run_mx_reduction_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=25, runs=100):
def run_mx_reduction_operators_benchmarks(ctx=mx.cpu(), dtype='float32', profiler='native', warmup=25, runs=100):
"""Runs benchmarks with the given context and precision (dtype)for all the reduction
operators in MXNet.
Expand All @@ -54,5 +54,5 @@ def run_mx_reduction_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=
# Fetch all Reduction Operators
mx_reduction_broadcast_ops = get_all_reduction_operators()
# Run benchmarks
mx_reduction_op_results = run_op_benchmarks(mx_reduction_broadcast_ops, dtype, ctx, warmup, runs)
mx_reduction_op_results = run_op_benchmarks(mx_reduction_broadcast_ops, dtype, ctx, profiler, warmup, runs)
return mx_reduction_op_results
4 changes: 2 additions & 2 deletions benchmark/opperf/nd_operations/sorting_searching_operators.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
"""


def run_sorting_searching_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=25, runs=100):
def run_sorting_searching_operators_benchmarks(ctx=mx.cpu(), dtype='float32', profiler='native', warmup=25, runs=100):
"""Runs benchmarks with the given context and precision (dtype)for all the sorting and searching
operators in MXNet.
Expand All @@ -52,5 +52,5 @@ def run_sorting_searching_operators_benchmarks(ctx=mx.cpu(), dtype='float32', wa
# Fetch all Random Sampling Operators
mx_sort_search_ops = get_all_sorting_searching_operators()
# Run benchmarks
mx_sort_search_op_results = run_op_benchmarks(mx_sort_search_ops, dtype, ctx, warmup, runs)
mx_sort_search_op_results = run_op_benchmarks(mx_sort_search_ops, dtype, ctx, profiler, warmup, runs)
return mx_sort_search_op_results
4 changes: 2 additions & 2 deletions benchmark/opperf/nd_operations/unary_operators.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
from benchmark.opperf.utils.benchmark_utils import run_op_benchmarks


def run_mx_unary_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=25, runs=100):
def run_mx_unary_operators_benchmarks(ctx=mx.cpu(), dtype='float32', profiler='native', warmup=25, runs=100):
"""Runs benchmarks with the given context and precision (dtype)for all the unary
operators in MXNet.
Expand All @@ -58,5 +58,5 @@ def run_mx_unary_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=25,
# Fetch all Unary Operators
mx_unary_broadcast_ops = get_all_unary_operators()
# Run benchmarks
mx_unary_op_results = run_op_benchmarks(mx_unary_broadcast_ops, dtype, ctx, warmup, runs)
mx_unary_op_results = run_op_benchmarks(mx_unary_broadcast_ops, dtype, ctx, profiler, warmup, runs)
return mx_unary_op_results
Loading

0 comments on commit 1da8878

Please sign in to comment.