From 105fd9e8a81a910a107374e5de3d0bfd7911107e Mon Sep 17 00:00:00 2001 From: root <26priya11@gmail.com> Date: Fri, 26 Jan 2024 14:41:23 -0800 Subject: [PATCH] combine eager and compile benchmarks --- python_benchmarks/conftest.py | 37 +++++++++++++++++++++++++++ python_benchmarks/test_softmax_bwd.py | 29 +++++++-------------- python_benchmarks/test_softmax_fwd.py | 26 +++++++------------ 3 files changed, 55 insertions(+), 37 deletions(-) diff --git a/python_benchmarks/conftest.py b/python_benchmarks/conftest.py index 7748c22c077..db3e50b699c 100644 --- a/python_benchmarks/conftest.py +++ b/python_benchmarks/conftest.py @@ -15,6 +15,19 @@ def pytest_addoption(parser): default=False, help="Disable benchmarking.", ) + parser.addoption( + "--benchmark-eager", + action="store_true", + default=False, + help="Benchmarks torch eager mode.", + ) + + parser.addoption( + "--benchmark-torchcompile", + action="store_true", + default=False, + help="Benchmarks torch.compile mode.", + ) @pytest.fixture @@ -33,3 +46,27 @@ def pytest_make_parametrize_id(val): def pytest_benchmark_update_machine_info(config, machine_info): machine_info.update(DEVICE_PROPERTIES) + + +def pytest_collection_modifyitems(session, config, items): + run_eager = config.getoption("--benchmark-eager") + run_torchcompile = config.getoption("--benchmark-torchcompile") + + if not run_eager: + skip_eager = pytest.mark.skip(reason="need --benchmark-eager option to run") + for item in items: + # If the benchmark has compile=False parameter (eager mode), skip it. + if ( + "compile" in item.callspec.params + and not item.callspec.params["compile"] + ): + item.add_marker(skip_eager) + + if not run_torchcompile: + skip_torchcompile = pytest.mark.skip( + reason="need --benchmark-torchcompile option to run" + ) + for item in items: + # If the benchmark has compile=True parameter (torch.compile mode), skip it. + if "compile" in item.callspec.params and item.callspec.params["compile"]: + item.add_marker(skip_torchcompile) diff --git a/python_benchmarks/test_softmax_bwd.py b/python_benchmarks/test_softmax_bwd.py index 4bf566bb1d4..1f788d139a2 100644 --- a/python_benchmarks/test_softmax_bwd.py +++ b/python_benchmarks/test_softmax_bwd.py @@ -54,7 +54,7 @@ def softmax_bwd_fusion( fd.add_output(T19) -def softmax_bwd_fn(inputs: list): # [in_tensor, output, grads] +def unary_bwd_torch(inputs: list): # [in_tensor, output, grads] inputs[1].backward(inputs[2], retain_graph=True) return inputs[0].grad @@ -89,34 +89,23 @@ def test_softmax_bwd_nvf_benchmark( run_benchmark(benchmark, fd.execute, inputs) +@pytest.mark.parametrize("compile", [False, True], ids=["eager", "compile"]) @pytest.mark.parametrize("size", generate_input_sizes(dims=2)) @pytest.mark.parametrize("dtype", FLOAT_DTYPES) @pytest.mark.parametrize("reduction_axis", [0, 1]) -def test_softmax_bwd_eager_benchmark( - benchmark, - size: tuple, - dtype: torch.dtype, - reduction_axis: int, -): - clear_cuda_cache() - input = torch.randn(*size, device="cuda", dtype=dtype, requires_grad=True) - grads = torch.randn(*size, device="cuda", dtype=dtype) - - output = torch.nn.functional.softmax(input, dim=reduction_axis) - run_benchmark(benchmark, softmax_bwd_fn, [input, output, grads]) - - -@pytest.mark.parametrize("size", generate_input_sizes(dims=2)) -@pytest.mark.parametrize("dtype", FLOAT_DTYPES) -@pytest.mark.parametrize("reduction_axis", [0, 1]) -def test_softmax_bwd_compile_benchmark( +def test_softmax_bwd_baseline_benchmark( benchmark, size: tuple, dtype: torch.dtype, reduction_axis: int, + compile: bool, ): clear_cuda_cache() input = torch.randn(*size, device="cuda", dtype=dtype, requires_grad=True) grads = torch.randn(*size, device="cuda", dtype=dtype) output = torch.nn.functional.softmax(input, dim=reduction_axis) - run_benchmark(benchmark, torch.compile(softmax_bwd_fn), [input, output, grads]) + run_benchmark( + benchmark, + torch.compile(unary_bwd_torch) if compile else unary_bwd_torch, + [input, output, grads], + ) diff --git a/python_benchmarks/test_softmax_fwd.py b/python_benchmarks/test_softmax_fwd.py index fc0bb50f7b8..fbdb4c2d1c3 100644 --- a/python_benchmarks/test_softmax_fwd.py +++ b/python_benchmarks/test_softmax_fwd.py @@ -74,29 +74,21 @@ def test_softmax_fwd_nvf_benchmark( run_benchmark(benchmark, fd.execute, inputs) -@pytest.mark.parametrize("size", generate_input_sizes(dims=2)) -@pytest.mark.parametrize("dtype", FLOAT_DTYPES) -@pytest.mark.parametrize("reduction_axis", [0, 1]) -def test_softmax_fwd_eager_benchmark( - benchmark, - size: tuple, - dtype: torch.dtype, - reduction_axis: int, -): - clear_cuda_cache() - input = torch.randn(*size, device="cuda", dtype=dtype) - run_benchmark(benchmark, softmax_fwd_fn, [input, reduction_axis]) - - -@pytest.mark.parametrize("size", generate_input_sizes(dims=2)) +@pytest.mark.parametrize("compile", [False, True], ids=["eager", "compile"]) +@pytest.mark.parametrize("size", [(128, 768)]) @pytest.mark.parametrize("dtype", FLOAT_DTYPES) @pytest.mark.parametrize("reduction_axis", [0, 1]) -def test_softmax_fwd_compile_benchmark( +def test_softmax_fwd_baseline_benchmark( benchmark, size: tuple, dtype: torch.dtype, reduction_axis: int, + compile: bool, ): clear_cuda_cache() input = torch.randn(*size, device="cuda", dtype=dtype) - run_benchmark(benchmark, torch.compile(softmax_fwd_fn), [input, reduction_axis]) + run_benchmark( + benchmark, + torch.compile(softmax_fwd_fn) if compile else softmax_fwd_fn, + [input, reduction_axis], + )