Skip to content

Commit

Permalink
combine eager and compile benchmarks
Browse files Browse the repository at this point in the history
  • Loading branch information
Priya2698 committed Jan 26, 2024
1 parent 480e3fc commit 105fd9e
Show file tree
Hide file tree
Showing 3 changed files with 55 additions and 37 deletions.
37 changes: 37 additions & 0 deletions python_benchmarks/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,19 @@ def pytest_addoption(parser):
default=False,
help="Disable benchmarking.",
)
parser.addoption(
"--benchmark-eager",
action="store_true",
default=False,
help="Benchmarks torch eager mode.",
)

parser.addoption(
"--benchmark-torchcompile",
action="store_true",
default=False,
help="Benchmarks torch.compile mode.",
)


@pytest.fixture
Expand All @@ -33,3 +46,27 @@ def pytest_make_parametrize_id(val):

def pytest_benchmark_update_machine_info(config, machine_info):
machine_info.update(DEVICE_PROPERTIES)


def pytest_collection_modifyitems(session, config, items):
run_eager = config.getoption("--benchmark-eager")
run_torchcompile = config.getoption("--benchmark-torchcompile")

if not run_eager:
skip_eager = pytest.mark.skip(reason="need --benchmark-eager option to run")
for item in items:
# If the benchmark has compile=False parameter (eager mode), skip it.
if (
"compile" in item.callspec.params
and not item.callspec.params["compile"]
):
item.add_marker(skip_eager)

if not run_torchcompile:
skip_torchcompile = pytest.mark.skip(
reason="need --benchmark-torchcompile option to run"
)
for item in items:
# If the benchmark has compile=True parameter (torch.compile mode), skip it.
if "compile" in item.callspec.params and item.callspec.params["compile"]:
item.add_marker(skip_torchcompile)
29 changes: 9 additions & 20 deletions python_benchmarks/test_softmax_bwd.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def softmax_bwd_fusion(
fd.add_output(T19)


def softmax_bwd_fn(inputs: list): # [in_tensor, output, grads]
def unary_bwd_torch(inputs: list): # [in_tensor, output, grads]
inputs[1].backward(inputs[2], retain_graph=True)
return inputs[0].grad

Expand Down Expand Up @@ -89,34 +89,23 @@ def test_softmax_bwd_nvf_benchmark(
run_benchmark(benchmark, fd.execute, inputs)


@pytest.mark.parametrize("compile", [False, True], ids=["eager", "compile"])
@pytest.mark.parametrize("size", generate_input_sizes(dims=2))
@pytest.mark.parametrize("dtype", FLOAT_DTYPES)
@pytest.mark.parametrize("reduction_axis", [0, 1])
def test_softmax_bwd_eager_benchmark(
benchmark,
size: tuple,
dtype: torch.dtype,
reduction_axis: int,
):
clear_cuda_cache()
input = torch.randn(*size, device="cuda", dtype=dtype, requires_grad=True)
grads = torch.randn(*size, device="cuda", dtype=dtype)

output = torch.nn.functional.softmax(input, dim=reduction_axis)
run_benchmark(benchmark, softmax_bwd_fn, [input, output, grads])


@pytest.mark.parametrize("size", generate_input_sizes(dims=2))
@pytest.mark.parametrize("dtype", FLOAT_DTYPES)
@pytest.mark.parametrize("reduction_axis", [0, 1])
def test_softmax_bwd_compile_benchmark(
def test_softmax_bwd_baseline_benchmark(
benchmark,
size: tuple,
dtype: torch.dtype,
reduction_axis: int,
compile: bool,
):
clear_cuda_cache()
input = torch.randn(*size, device="cuda", dtype=dtype, requires_grad=True)
grads = torch.randn(*size, device="cuda", dtype=dtype)
output = torch.nn.functional.softmax(input, dim=reduction_axis)
run_benchmark(benchmark, torch.compile(softmax_bwd_fn), [input, output, grads])
run_benchmark(
benchmark,
torch.compile(unary_bwd_torch) if compile else unary_bwd_torch,
[input, output, grads],
)
26 changes: 9 additions & 17 deletions python_benchmarks/test_softmax_fwd.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,29 +74,21 @@ def test_softmax_fwd_nvf_benchmark(
run_benchmark(benchmark, fd.execute, inputs)


@pytest.mark.parametrize("size", generate_input_sizes(dims=2))
@pytest.mark.parametrize("dtype", FLOAT_DTYPES)
@pytest.mark.parametrize("reduction_axis", [0, 1])
def test_softmax_fwd_eager_benchmark(
benchmark,
size: tuple,
dtype: torch.dtype,
reduction_axis: int,
):
clear_cuda_cache()
input = torch.randn(*size, device="cuda", dtype=dtype)
run_benchmark(benchmark, softmax_fwd_fn, [input, reduction_axis])


@pytest.mark.parametrize("size", generate_input_sizes(dims=2))
@pytest.mark.parametrize("compile", [False, True], ids=["eager", "compile"])
@pytest.mark.parametrize("size", [(128, 768)])
@pytest.mark.parametrize("dtype", FLOAT_DTYPES)
@pytest.mark.parametrize("reduction_axis", [0, 1])
def test_softmax_fwd_compile_benchmark(
def test_softmax_fwd_baseline_benchmark(
benchmark,
size: tuple,
dtype: torch.dtype,
reduction_axis: int,
compile: bool,
):
clear_cuda_cache()
input = torch.randn(*size, device="cuda", dtype=dtype)
run_benchmark(benchmark, torch.compile(softmax_fwd_fn), [input, reduction_axis])
run_benchmark(
benchmark,
torch.compile(softmax_fwd_fn) if compile else softmax_fwd_fn,
[input, reduction_axis],
)

0 comments on commit 105fd9e

Please sign in to comment.