Skip to content

Commit

Permalink
Test GPU metrics API in rocm smi library
Browse files Browse the repository at this point in the history
Fetch the GPU metrics using the rsmi_dev_gpu_metrics_info_get
and test the values.

Signed-off-by: Sajina P Kandy <[email protected]>
  • Loading branch information
Sajina Kandy authored and sputhala-amd committed Dec 17, 2024
1 parent 88aa2d3 commit 44919c9
Show file tree
Hide file tree
Showing 8 changed files with 80 additions and 15 deletions.
2 changes: 2 additions & 0 deletions source/lib/core/categories.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ ROCPROFSYS_DEFINE_CATEGORY(category, rocm_smi_busy, ROCPROFSYS_CATEGORY_ROCM_SMI
ROCPROFSYS_DEFINE_CATEGORY(category, rocm_smi_temp, ROCPROFSYS_CATEGORY_ROCM_SMI_TEMP, "device_temp", "Temperature of a GPU device")
ROCPROFSYS_DEFINE_CATEGORY(category, rocm_smi_power, ROCPROFSYS_CATEGORY_ROCM_SMI_POWER, "device_power", "Power consumption of a GPU device")
ROCPROFSYS_DEFINE_CATEGORY(category, rocm_smi_memory_usage, ROCPROFSYS_CATEGORY_ROCM_SMI_MEMORY_USAGE, "device_memory_usage", "Memory usage of a GPU device")
ROCPROFSYS_DEFINE_CATEGORY(category, rocm_smi_vcn_activity, ROCPROFSYS_CATEGORY_ROCM_SMI_VCN_ACTIVITY, "device_vcn_activity", "VCN Activity of a GPU device")
ROCPROFSYS_DEFINE_CATEGORY(category, rocm_rccl, ROCPROFSYS_CATEGORY_ROCM_RCCL, "rccl", "ROCm Communication Collectives Library (RCCL) regions")
ROCPROFSYS_DEFINE_CATEGORY(category, pthread, ROCPROFSYS_CATEGORY_PTHREAD, "pthread", "POSIX threading functions")
ROCPROFSYS_DEFINE_CATEGORY(category, kokkos, ROCPROFSYS_CATEGORY_KOKKOS, "kokkos", "KokkosTools regions")
Expand Down Expand Up @@ -167,6 +168,7 @@ using name = perfetto_category<Tp...>;
ROCPROFSYS_PERFETTO_CATEGORY(category::rocm_smi_temp), \
ROCPROFSYS_PERFETTO_CATEGORY(category::rocm_smi_power), \
ROCPROFSYS_PERFETTO_CATEGORY(category::rocm_smi_memory_usage), \
ROCPROFSYS_PERFETTO_CATEGORY(category::rocm_smi_vcn_activity), \
ROCPROFSYS_PERFETTO_CATEGORY(category::rocm_rccl), \
ROCPROFSYS_PERFETTO_CATEGORY(category::pthread), \
ROCPROFSYS_PERFETTO_CATEGORY(category::kokkos), \
Expand Down
13 changes: 13 additions & 0 deletions source/lib/core/components/fwd.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -82,13 +82,16 @@ struct backtrace_gpu_power
{};
struct backtrace_gpu_memory
{};
struct backtrace_gpu_vcn
{};
using sampling_wall_clock = data_tracker<double, backtrace_wall_clock>;
using sampling_cpu_clock = data_tracker<double, backtrace_cpu_clock>;
using sampling_percent = data_tracker<double, backtrace_fraction>;
using sampling_gpu_busy = data_tracker<double, backtrace_gpu_busy>;
using sampling_gpu_temp = data_tracker<double, backtrace_gpu_temp>;
using sampling_gpu_power = data_tracker<double, backtrace_gpu_power>;
using sampling_gpu_memory = data_tracker<double, backtrace_gpu_memory>;
using sampling_gpu_vcn = data_tracker<double, backtrace_gpu_vcn>;

template <typename ApiT, typename StartFuncT = default_functor_t,
typename StopFuncT = default_functor_t>
Expand Down Expand Up @@ -121,6 +124,7 @@ ROCPROFSYS_DEFINE_CONCRETE_TRAIT(is_available, component::sampling_gpu_busy, fal
ROCPROFSYS_DEFINE_CONCRETE_TRAIT(is_available, component::sampling_gpu_temp, false_type)
ROCPROFSYS_DEFINE_CONCRETE_TRAIT(is_available, component::sampling_gpu_power, false_type)
ROCPROFSYS_DEFINE_CONCRETE_TRAIT(is_available, component::sampling_gpu_memory, false_type)
ROCPROFSYS_DEFINE_CONCRETE_TRAIT(is_available, component::sampling_gpu_vcn, false_type)
#endif

TIMEMORY_SET_COMPONENT_API(rocprofsys::component::roctracer, project::rocprofsys,
Expand Down Expand Up @@ -152,6 +156,9 @@ TIMEMORY_SET_COMPONENT_API(rocprofsys::component::sampling_gpu_temp, project::ro
tpls::rocm, device::gpu, os::supports_linux,
category::temperature, category::sampling,
category::process_sampling)
TIMEMORY_SET_COMPONENT_API(rocprofsys::component::sampling_gpu_vcn, project::rocprofsys,
tpls::rocm, device::gpu, os::supports_linux,
category::sampling, category::process_sampling)

TIMEMORY_METADATA_SPECIALIZATION(rocprofsys::component::roctracer, "roctracer",
"High-precision ROCm API and kernel tracing", "")
Expand Down Expand Up @@ -180,6 +187,10 @@ TIMEMORY_METADATA_SPECIALIZATION(rocprofsys::component::sampling_gpu_power,
TIMEMORY_METADATA_SPECIALIZATION(rocprofsys::component::sampling_gpu_temp,
"sampling_gpu_temp", "GPU Temperature via ROCm-SMI",
"Derived from sampling")
TIMEMORY_METADATA_SPECIALIZATION(rocprofsys::component::sampling_gpu_vcn,
"sampling_gpu_vcn",
"VCN Activity (% activity) via ROCm-SMI",
"Derived from sampling")

// statistics type
TIMEMORY_STATISTICS_TYPE(rocprofsys::component::sampling_wall_clock, double)
Expand All @@ -188,6 +199,7 @@ TIMEMORY_STATISTICS_TYPE(rocprofsys::component::sampling_gpu_busy, double)
TIMEMORY_STATISTICS_TYPE(rocprofsys::component::sampling_gpu_temp, double)
TIMEMORY_STATISTICS_TYPE(rocprofsys::component::sampling_gpu_power, double)
TIMEMORY_STATISTICS_TYPE(rocprofsys::component::sampling_gpu_memory, double)
TIMEMORY_STATISTICS_TYPE(rocprofsys::component::sampling_gpu_vcn, double)
TIMEMORY_STATISTICS_TYPE(rocprofsys::component::comm_data_tracker_t, float)

// enable timing units
Expand Down Expand Up @@ -219,6 +231,7 @@ ROCPROFSYS_DEFINE_CONCRETE_TRAIT(report_sum, component::sampling_gpu_busy, false
ROCPROFSYS_DEFINE_CONCRETE_TRAIT(report_sum, component::sampling_gpu_temp, false_type)
ROCPROFSYS_DEFINE_CONCRETE_TRAIT(report_sum, component::sampling_gpu_power, false_type)
ROCPROFSYS_DEFINE_CONCRETE_TRAIT(report_sum, component::sampling_gpu_memory, false_type)
ROCPROFSYS_DEFINE_CONCRETE_TRAIT(report_sum, component::sampling_gpu_vcn, false_type)

// reporting categories (mean)
ROCPROFSYS_DEFINE_CONCRETE_TRAIT(report_mean, component::sampling_percent, false_type)
Expand Down
9 changes: 5 additions & 4 deletions source/lib/core/config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -626,10 +626,11 @@ configure_settings(bool _init)

rocprofiler_sdk::config_settings(_config);

ROCPROFSYS_CONFIG_SETTING(std::string, "ROCPROFSYS_ROCM_SMI_METRICS",
"rocm-smi metrics to collect: busy, temp, power, mem_usage",
"busy,temp,power,mem_usage", "backend", "rocm_smi", "rocm",
"process_sampling", "advanced");
ROCPROFSYS_CONFIG_SETTING(
std::string, "ROCPROFSYS_ROCM_SMI_METRICS",
"rocm-smi metrics to collect: busy, temp, power, mem_usage,vcn_activity",
"busy,temp,power,mem_usage,vcn_activity", "backend", "rocm_smi", "rocm",
"process_sampling", "advanced");

ROCPROFSYS_CONFIG_SETTING(size_t, "ROCPROFSYS_PERFETTO_SHMEM_SIZE_HINT_KB",
"Hint for shared-memory buffer size in perfetto (in KB)",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ extern "C"
ROCPROFSYS_CATEGORY_ROCM_SMI_TEMP,
ROCPROFSYS_CATEGORY_ROCM_SMI_POWER,
ROCPROFSYS_CATEGORY_ROCM_SMI_MEMORY_USAGE,
ROCPROFSYS_CATEGORY_ROCM_SMI_VCN_ACTIVITY,
ROCPROFSYS_CATEGORY_ROCM_RCCL,
ROCPROFSYS_CATEGORY_SAMPLING,
ROCPROFSYS_CATEGORY_PTHREAD,
Expand Down
3 changes: 3 additions & 0 deletions source/lib/rocprof-sys/library/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,9 @@ set(library_headers
target_sources(rocprofiler-systems-object-library PRIVATE ${library_sources}
${library_headers})

target_include_directories(rocprofiler-systems-core-library BEFORE
PRIVATE ${CMAKE_CURRENT_LIST_DIR})

if(ROCPROFSYS_USE_RCCL)
target_sources(rocprofiler-systems-object-library
PRIVATE ${CMAKE_CURRENT_LIST_DIR}/rcclp.cpp)
Expand Down
30 changes: 29 additions & 1 deletion source/lib/rocprof-sys/library/rocm_smi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,8 @@ data::sample(uint32_t _dev_id)
&m_power, &power_type)
ROCPROFSYS_RSMI_GET(get_settings(m_dev_id).mem_usage, rsmi_dev_memory_usage_get,
_dev_id, RSMI_MEM_TYPE_VRAM, &m_mem_usage);
ROCPROFSYS_RSMI_GET(get_settings(m_dev_id).vcn_activity,
rsmi_dev_gpu_metrics_info_get, _dev_id, &m_gpu_metrics);

#undef ROCPROFSYS_RSMI_GET
}
Expand Down Expand Up @@ -257,6 +259,7 @@ data::post_process(uint32_t _dev_id)
using component::sampling_gpu_memory;
using component::sampling_gpu_power;
using component::sampling_gpu_temp;
using component::sampling_gpu_vcn;

if(device_count < _dev_id) return;

Expand All @@ -273,14 +276,15 @@ data::post_process(uint32_t _dev_id)
auto _settings = get_settings(_dev_id);

auto _process_perfetto = [&]() {
auto _idx = std::array<uint64_t, 4>{};
auto _idx = std::array<uint64_t, 5>{};
{
_idx.fill(_idx.size());
uint64_t nidx = 0;
if(_settings.busy) _idx.at(0) = nidx++;
if(_settings.temp) _idx.at(1) = nidx++;
if(_settings.power) _idx.at(2) = nidx++;
if(_settings.mem_usage) _idx.at(3) = nidx++;
if(_settings.vcn_activity) _idx.at(4) = nidx++;
}

for(auto& itr : _rocm_smi)
Expand All @@ -301,6 +305,15 @@ data::post_process(uint32_t _dev_id)
if(_settings.mem_usage)
counter_track::emplace(_dev_id, addendum("Memory Usage"),
"megabytes");
if(_settings.vcn_activity)
{
for(std::size_t i = 0; i < std::size(itr.m_gpu_metrics.vcn_activity);
++i)
counter_track::emplace(
_dev_id,
addendum(("VCN Activity on " + std::to_string(i)).c_str()),
"%");
}
}
uint64_t _ts = itr.m_ts;
if(!_thread_info->is_valid_time(_ts)) continue;
Expand All @@ -322,6 +335,16 @@ data::post_process(uint32_t _dev_id)
if(_settings.mem_usage)
TRACE_COUNTER("device_memory_usage",
counter_track::at(_dev_id, _idx.at(3)), _ts, _usage);
if(_settings.vcn_activity)
{
uint64_t idx = _idx.at(4);
for(const auto& temp : itr.m_gpu_metrics.vcn_activity)
{
TRACE_COUNTER("device_vcn_activity", counter_track::at(_dev_id, idx),
_ts, temp);
++idx;
}
}
}
};

Expand Down Expand Up @@ -411,6 +434,7 @@ setup()
key_pair_t{ "temp", get_settings(dev_id).temp },
key_pair_t{ "power", get_settings(dev_id).power },
key_pair_t{ "mem_usage", get_settings(dev_id).mem_usage },
key_pair_t{ "vcn_activity", get_settings(dev_id).vcn_activity },
};

get_settings(dev_id) = { false, false, false, false };
Expand Down Expand Up @@ -491,3 +515,7 @@ ROCPROFSYS_INSTANTIATE_EXTERN_COMPONENT(
ROCPROFSYS_INSTANTIATE_EXTERN_COMPONENT(
TIMEMORY_ESC(data_tracker<double, rocprofsys::component::backtrace_gpu_memory>), true,
double)

ROCPROFSYS_INSTANTIATE_EXTERN_COMPONENT(
TIMEMORY_ESC(data_tracker<double, rocprofsys::component::backtrace_gpu_vcn>), true,
double)
30 changes: 20 additions & 10 deletions source/lib/rocprof-sys/library/rocm_smi.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@
#include "core/state.hpp"
#include "library/thread_data.hpp"

#include <rocm_smi/rocm_smi.h>

#include <chrono>
#include <cstdint>
#include <deque>
Expand All @@ -45,6 +47,8 @@
#include <tuple>
#include <type_traits>

//struct rsmi_gpu_metrics_t;

namespace rocprofsys
{
namespace rocm_smi
Expand All @@ -71,10 +75,11 @@ device_count();

struct settings
{
bool busy = true;
bool temp = true;
bool power = true;
bool mem_usage = true;
bool busy = true;
bool temp = true;
bool power = true;
bool mem_usage = true;
bool vcn_activity = true;
};

struct data
Expand All @@ -99,12 +104,13 @@ struct data

static void post_process(uint32_t _dev_id);

uint32_t m_dev_id = std::numeric_limits<uint32_t>::max();
timestamp_t m_ts = 0;
busy_perc_t m_busy_perc = 0;
temp_t m_temp = 0;
power_t m_power = 0;
mem_usage_t m_mem_usage = 0;
uint32_t m_dev_id = std::numeric_limits<uint32_t>::max();
timestamp_t m_ts = 0;
busy_perc_t m_busy_perc = 0;
temp_t m_temp = 0;
power_t m_power = 0;
mem_usage_t m_mem_usage = 0;
rsmi_gpu_metrics_t m_gpu_metrics;

friend std::ostream& operator<<(std::ostream& _os, const data& _v)
{
Expand Down Expand Up @@ -179,5 +185,9 @@ ROCPROFSYS_DECLARE_EXTERN_COMPONENT(
TIMEMORY_ESC(data_tracker<double, rocprofsys::component::backtrace_gpu_memory>), true,
double)

ROCPROFSYS_DECLARE_EXTERN_COMPONENT(
TIMEMORY_ESC(data_tracker<double, rocprofsys::component::backtrace_gpu_vcn>), true,
double)

# endif
#endif
7 changes: 7 additions & 0 deletions source/lib/rocprof-sys/library/sampling.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,7 @@ using component::sampling_gpu_busy;
using component::sampling_gpu_memory;
using component::sampling_gpu_power;
using component::sampling_gpu_temp;
using component::sampling_gpu_vcn;
using component::sampling_percent;
using component::sampling_wall_clock;
} // namespace sampling
Expand Down Expand Up @@ -1572,6 +1573,12 @@ struct sampling_initialization
sampling_gpu_temp::display_unit() = "degC";
sampling_gpu_temp::set_precision(1);
sampling_gpu_temp::set_format_flags(sampling_gpu_temp::get_format_flags());

sampling_gpu_vcn::label() = "sampling_gpu_vcn_percent";
sampling_gpu_vcn::description() = "Utilization of VCN(s)";
sampling_gpu_vcn::set_precision(0);
sampling_gpu_vcn::set_format_flags(sampling_gpu_vcn::get_format_flags() &
std::ios_base::showpoint);
}
};
} // namespace
Expand Down

0 comments on commit 44919c9

Please sign in to comment.