ROCProfV3 PC sampling tests: Initial multi-agents test (#72)

Testing multi-agent host-trap PC sampling support in ROCProfV3.
ROCm · Jan 4, 2025 · 00d4c17 · 00d4c17
1 parent 1ca2896
commit 00d4c17
Show file tree

Hide file tree

Showing 7 changed files with 319 additions and 0 deletions.
diff --git a/tests/rocprofv3/pc-sampling/host-trap/CMakeLists.txt b/tests/rocprofv3/pc-sampling/host-trap/CMakeLists.txt
@@ -3,3 +3,4 @@
 #
 
 add_subdirectory(exec-mask-manipulation)
+add_subdirectory(transpose-multiple-agents)
diff --git a/tests/rocprofv3/pc-sampling/host-trap/transpose-multiple-agents/CMakeLists.txt b/tests/rocprofv3/pc-sampling/host-trap/transpose-multiple-agents/CMakeLists.txt
@@ -0,0 +1,165 @@
+#
+# rocprofv3 tool test
+#
+cmake_minimum_required(VERSION 3.21.0 FATAL_ERROR)
+
+project(
+    rocprofiler-tests-pc-sampling
+    LANGUAGES CXX
+    VERSION 0.0.0)
+
+find_package(rocprofiler-sdk REQUIRED)
+
+rocprofiler_configure_pytest_files(CONFIG pytest.ini COPY validate.py conftest.py
+                                                          input.json input.yml)
+
+# To ensure we sample all agents, use 16 threads each assigned to one agent in a round
+# robin manner. To keep the job per device reasonable, each thread offloads 100 instances
+# of a transpose kernel.
+set(TRANSPOSE_NUM_THREADS 16)
+set(TRANSPOSE_NUM_ITERATIONS 100)
+
+add_test(
+    NAME rocprofv3-test-pc-sampling-host-trap-transpose-multiple-agents-input-cmd-execute
+    COMMAND
+        $<TARGET_FILE:rocprofiler-sdk::rocprofv3> --kernel-trace --pc-sampling-unit time
+        --pc-sampling-method host_trap --pc-sampling-interval 1 -d
+        ${CMAKE_CURRENT_BINARY_DIR}/pc_sampling_cmd_input -o out --output-format csv --
+        $<TARGET_FILE:transpose> ${TRANSPOSE_NUM_THREADS} ${TRANSPOSE_NUM_ITERATIONS})
+
+string(REPLACE "LD_PRELOAD=" "ROCPROF_PRELOAD=" PRELOAD_ENV
+               "${ROCPROFILER_MEMCHECK_PRELOAD_ENV}")
+
+set(pc-sampling-env-host-trap-transpose-multiple-agents "${PRELOAD_ENV}")
+
+set_tests_properties(
+    rocprofv3-test-pc-sampling-host-trap-transpose-multiple-agents-input-cmd-execute
+    PROPERTIES TIMEOUT
+               45
+               LABELS
+               "integration-tests;pc-sampling"
+               ENVIRONMENT
+               "${pc-sampling-env-host-trap-transpose-multiple-agents}"
+               FAIL_REGULAR_EXPRESSION
+               "${ROCPROFILER_DEFAULT_FAIL_REGEX}"
+               SKIP_REGULAR_EXPRESSION
+               "PC sampling unavailable")
+
+add_test(
+    NAME rocprofv3-test-pc-sampling-host-trap-transpose-multiple-agents-input-json-execute
+    COMMAND
+        $<TARGET_FILE:rocprofiler-sdk::rocprofv3> -i
+        ${CMAKE_CURRENT_BINARY_DIR}/input.json -d
+        ${CMAKE_CURRENT_BINARY_DIR}/pc_sampling_json_input -- $<TARGET_FILE:transpose>
+        ${TRANSPOSE_NUM_THREADS} ${TRANSPOSE_NUM_ITERATIONS})
+
+set_tests_properties(
+    rocprofv3-test-pc-sampling-host-trap-transpose-multiple-agents-input-json-execute
+    PROPERTIES TIMEOUT
+               45
+               LABELS
+               "integration-tests;pc-sampling"
+               ENVIRONMENT
+               "${pc-sampling-env-host-trap-transpose-multiple-agents}"
+               FAIL_REGULAR_EXPRESSION
+               "${ROCPROFILER_DEFAULT_FAIL_REGEX}"
+               SKIP_REGULAR_EXPRESSION
+               "PC sampling unavailable")
+
+add_test(
+    NAME rocprofv3-test-pc-sampling-host-trap-transpose-multiple-agents-input-yaml-execute
+    COMMAND
+        $<TARGET_FILE:rocprofiler-sdk::rocprofv3> -i
+        ${CMAKE_CURRENT_BINARY_DIR}/input.yml -d
+        ${CMAKE_CURRENT_BINARY_DIR}/pc_sampling_yaml_input -o out --output-format csv --
+        $<TARGET_FILE:transpose> ${TRANSPOSE_NUM_THREADS} ${TRANSPOSE_NUM_ITERATIONS})
+
+set_tests_properties(
+    rocprofv3-test-pc-sampling-host-trap-transpose-multiple-agents-input-yaml-execute
+    PROPERTIES TIMEOUT
+               45
+               LABELS
+               "integration-tests;pc-sampling"
+               ENVIRONMENT
+               "${pc-sampling-env-host-trap-transpose-multiple-agents}"
+               FAIL_REGULAR_EXPRESSION
+               "${ROCPROFILER_DEFAULT_FAIL_REGEX}"
+               SKIP_REGULAR_EXPRESSION
+               "PC sampling unavailable")
+
+# ========================= Validation tests
+
+add_test(
+    NAME rocprofv3-test-pc-sampling-host-trap-transpose-multiple-agents-input-cmd-validate
+    COMMAND
+        ${Python3_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/validate.py -k
+        test_multi_agent_support --input-samples-csv
+        ${CMAKE_CURRENT_BINARY_DIR}/pc_sampling_cmd_input/out_pc_sampling_host_trap.csv
+        --input-kernel-trace-csv
+        ${CMAKE_CURRENT_BINARY_DIR}/pc_sampling_cmd_input/out_kernel_trace.csv
+        --input-agent-info-csv
+        ${CMAKE_CURRENT_BINARY_DIR}/pc_sampling_cmd_input/out_agent_info.csv)
+
+set_tests_properties(
+    rocprofv3-test-pc-sampling-host-trap-transpose-multiple-agents-input-cmd-validate
+    PROPERTIES
+        TIMEOUT
+        60
+        LABELS
+        "integration-tests;pc-sampling"
+        DEPENDS
+        "rocprofv3-test-pc-sampling-host-trap-transpose-multiple-agents-input-cmd-execute"
+        FAIL_REGULAR_EXPRESSION
+        "${ROCPROFILER_DEFAULT_FAIL_REGEX}"
+        SKIP_REGULAR_EXPRESSION
+        "PC sampling unavailable")
+
+add_test(
+    NAME rocprofv3-test-pc-sampling-host-trap-transpose-multiple-agents-input-json-validate
+    COMMAND
+        ${Python3_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/validate.py -k
+        test_multi_agent_support --input-samples-csv
+        ${CMAKE_CURRENT_BINARY_DIR}/pc_sampling_json_input/out_pc_sampling_host_trap.csv
+        --input-kernel-trace-csv
+        ${CMAKE_CURRENT_BINARY_DIR}/pc_sampling_json_input/out_kernel_trace.csv
+        --input-agent-info-csv
+        ${CMAKE_CURRENT_BINARY_DIR}/pc_sampling_json_input/out_agent_info.csv)
+
+set_tests_properties(
+    rocprofv3-test-pc-sampling-host-trap-transpose-multiple-agents-input-json-validate
+    PROPERTIES
+        TIMEOUT
+        60
+        LABELS
+        "integration-tests;pc-sampling"
+        DEPENDS
+        "rocprofv3-test-pc-sampling-host-trap-transpose-multiple-agents-input-json-execute"
+        FAIL_REGULAR_EXPRESSION
+        "${ROCPROFILER_DEFAULT_FAIL_REGEX}"
+        SKIP_REGULAR_EXPRESSION
+        "PC sampling unavailable")
+
+add_test(
+    NAME rocprofv3-test-pc-sampling-host-trap-transpose-multiple-agents-input-yaml-validate
+    COMMAND
+        ${Python3_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/validate.py -k
+        test_multi_agent_support --input-samples-csv
+        ${CMAKE_CURRENT_BINARY_DIR}/pc_sampling_yaml_input/out_pc_sampling_host_trap.csv
+        --input-kernel-trace-csv
+        ${CMAKE_CURRENT_BINARY_DIR}/pc_sampling_yaml_input/out_kernel_trace.csv
+        --input-agent-info-csv
+        ${CMAKE_CURRENT_BINARY_DIR}/pc_sampling_yaml_input/out_agent_info.csv)
+
+set_tests_properties(
+    rocprofv3-test-pc-sampling-host-trap-transpose-multiple-agents-input-yaml-validate
+    PROPERTIES
+        TIMEOUT
+        60
+        LABELS
+        "integration-tests;pc-sampling"
+        DEPENDS
+        "rocprofv3-test-pc-sampling-host-trap-transpose-multiple-agents-input-yaml-execute"
+        FAIL_REGULAR_EXPRESSION
+        "${ROCPROFILER_DEFAULT_FAIL_REGEX}"
+        SKIP_REGULAR_EXPRESSION
+        "PC sampling unavailable")
diff --git a/tests/rocprofv3/pc-sampling/host-trap/transpose-multiple-agents/conftest.py b/tests/rocprofv3/pc-sampling/host-trap/transpose-multiple-agents/conftest.py
@@ -0,0 +1,65 @@
+#!/usr/bin/env python3
+
+import json
+import os
+import pytest
+import pandas as pd
+
+from rocprofiler_sdk.pytest_utils.dotdict import dotdict
+from rocprofiler_sdk.pytest_utils import collapse_dict_list
+
+
+def pytest_addoption(parser):
+    parser.addoption(
+        "--input-samples-csv",
+        action="store",
+        help="Path to CSV file containing PC samples.",
+    )
+
+    parser.addoption(
+        "--input-kernel-trace-csv",
+        action="store",
+        help="Path to CSV file containing kernel trace.",
+    )
+
+    parser.addoption(
+        "--input-agent-info-csv",
+        action="store",
+        help="Path to CSV file containing agents information.",
+    )
+
+
+@pytest.fixture
+def input_samples_csv(request):
+    filename = request.config.getoption("--input-samples-csv")
+    if not os.path.isfile(filename):
+        # The CSV file is not generated, because the dependency test
+        # responsible to generate this file was skipped or failed.
+        # Thus emit the message to skip this test as well.
+        print("PC sampling unavailable")
+    else:
+        with open(filename, "r") as inp:
+            return pd.read_csv(
+                inp,
+                na_filter=False,  # parse empty fields as ""
+                keep_default_na=False,  # parse empty fields as ""
+                dtype={
+                    "Exec_Mask": "uint64",
+                    "Instruction": str,
+                    "Instruction_Comment": str,
+                },
+            )
+
+
+@pytest.fixture
+def input_kernel_trace_csv(request):
+    filename = request.config.getoption("--input-kernel-trace-csv")
+    with open(filename, "r") as inp:
+        return pd.read_csv(inp)
+
+
+@pytest.fixture
+def input_agent_info_csv(request):
+    filename = request.config.getoption("--input-agent-info-csv")
+    with open(filename, "r") as inp:
+        return pd.read_csv(inp)
diff --git a/tests/rocprofv3/pc-sampling/host-trap/transpose-multiple-agents/input.json b/tests/rocprofv3/pc-sampling/host-trap/transpose-multiple-agents/input.json
@@ -0,0 +1,14 @@
+{
+    "jobs": [
+        {
+            "kernel_trace": true,
+            "pc_sampling_unit": "time",
+            "pc_sampling_method": "host_trap",
+            "pc_sampling_interval": 1,
+            "output_file": "out",
+            "output_format": [
+                "csv"
+            ]
+        }
+    ]
+}
diff --git a/tests/rocprofv3/pc-sampling/host-trap/transpose-multiple-agents/input.yml b/tests/rocprofv3/pc-sampling/host-trap/transpose-multiple-agents/input.yml
@@ -0,0 +1,5 @@
+jobs:
+  - kernel_trace: true
+    pc_sampling_unit: "time"
+    pc_sampling_method: "host_trap"
+    pc_sampling_interval: 1
diff --git a/tests/rocprofv3/pc-sampling/host-trap/transpose-multiple-agents/pytest.ini b/tests/rocprofv3/pc-sampling/host-trap/transpose-multiple-agents/pytest.ini
@@ -0,0 +1,5 @@
+
+[pytest]
+addopts = --durations=20 -rA -s -vv
+testpaths = validate.py
+pythonpath = @ROCPROFILER_SDK_TESTS_BINARY_DIR@/pytest-packages
diff --git a/tests/rocprofv3/pc-sampling/host-trap/transpose-multiple-agents/validate.py b/tests/rocprofv3/pc-sampling/host-trap/transpose-multiple-agents/validate.py
@@ -0,0 +1,64 @@
+#!/usr/bin/env python3
+
+import itertools
+import sys
+import pytest
+import numpy as np
+import pandas as pd
+
+
+def test_multi_agent_support(
+    input_samples_csv: pd.DataFrame,
+    input_kernel_trace_csv: pd.DataFrame,
+    input_agent_info_csv: pd.DataFrame,
+):
+    transpose_kernel_source_line_start = 137
+    transpose_kernel_source_line_end = 145
+
+    mi2xx_mi3xx_agents_df = input_agent_info_csv[
+        input_agent_info_csv["Name"].apply(
+            lambda name: name == "gfx90a" or name.startswith("gfx94")
+        )
+    ]
+
+    # Extract samples that originates from know code object it
+    samples_df = input_samples_csv[input_samples_csv["Dispatch_Id"] != 0].copy()
+
+    # Determine the agent on which sample was generated
+    samples_df["Agent_Id"] = (
+        samples_df["Dispatch_Id"]
+        .map(input_kernel_trace_csv.set_index("Dispatch_Id")["Agent_Id"])
+        .astype(np.uint64)
+    )
+    sampled_agents = samples_df["Agent_Id"].unique()
+    sampled_agents_num = len(sampled_agents)
+    # all agents must be sampled
+    assert sampled_agents_num == len(mi2xx_mi3xx_agents_df)
+
+    # separate samples per agents
+    grouped_samples_per_agent = samples_df.groupby("Agent_Id")
+    for agent_id, agent_samples_df in grouped_samples_per_agent:
+        sampled_dispatches = agent_samples_df["Dispatch_Id"].unique()
+        # at least 1 sampled dispatch per agent
+        assert len(sampled_dispatches) >= 1
+
+    # extract decoded samples that are mapped to the transpose.cpp file
+    transpose_samples_df = samples_df[
+        samples_df["Instruction_Comment"].apply(
+            lambda comment: "transpose-all-agents.cpp" in comment
+        )
+    ].copy()
+    # determine the line number for each sample
+    transpose_samples_df["Source_Line_Num"] = transpose_samples_df[
+        "Instruction_Comment"
+    ].apply(lambda source_line: int(source_line.split(":")[-1]))
+    # assert that line belongs to a kernel range
+    assert (
+        (transpose_samples_df["Source_Line_Num"] >= transpose_kernel_source_line_start)
+        & (transpose_samples_df["Source_Line_Num"] <= transpose_kernel_source_line_end)
+    ).all()
+
+
+if __name__ == "__main__":
+    exit_code = pytest.main(["-x", __file__] + sys.argv[1:])
+    sys.exit(exit_code)
Original file line number	Diff line number	Diff line change
Expand Up		@@ -3,3 +3,4 @@
		#

		add_subdirectory(exec-mask-manipulation)
		add_subdirectory(transpose-multiple-agents)