Skip to content

Commit

Permalink
SWDEV-484742+SWDEV-489158: Dynamic record size for counter collection…
Browse files Browse the repository at this point in the history
… (#1208)

* Rebased optizations for rocprofv3 tool

* Fixing merge conflicts

* Formatting

* Open from within mutex

* Small name changes

* Added operator
  • Loading branch information
ApoKalipse-V authored Nov 20, 2024
1 parent bc52c17 commit 6ae441f
Show file tree
Hide file tree
Showing 10 changed files with 143 additions and 38 deletions.
1 change: 1 addition & 0 deletions source/lib/output/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ set(TOOL_OUTPUT_HEADERS

set(TOOL_OUTPUT_SOURCES
csv_output_file.cpp
counter_info.cpp
domain_type.cpp
format_path.cpp
generateCSV.cpp
Expand Down
3 changes: 3 additions & 0 deletions source/lib/output/buffered_output.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -156,5 +156,8 @@ using scratch_memory_buffered_output_t =
using memory_allocation_buffered_output_t =
buffered_output<rocprofiler_buffer_tracing_memory_allocation_record_t,
domain_type::MEMORY_ALLOCATION>;
using counter_records_buffered_output_t =
::rocprofiler::tool::buffered_output<rocprofiler::tool::serialized_counter_record_t,
domain_type::COUNTER_COLLECTION>;
} // namespace tool
} // namespace rocprofiler
60 changes: 60 additions & 0 deletions source/lib/output/counter_info.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
// MIT License
//
// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.

#include "counter_info.hpp"
#include "buffered_output.hpp"
#include "tmp_file_buffer.hpp"

#include "lib/common/container/ring_buffer.hpp"
#include "lib/common/filesystem.hpp"
#include "lib/common/logging.hpp"

#include <fmt/core.h>
#include <fmt/format.h>

#include <string_view>
#include <unordered_set>

namespace rocprofiler
{
namespace tool
{
constexpr auto type = domain_type::COUNTER_VALUES;

std::vector<tool_counter_value_t>
tool_counter_record_t::getRecords() const
{
auto& _tmp_file = get_tmp_file_buffer<tool_counter_value_t>(type)->file;

return _tmp_file.read<tool_counter_value_t>(records.offset, records.count);
}

void
tool_counter_record_t::writeRecord(const tool_counter_value_t* ptr, size_t num_records)
{
auto& _tmp_file = get_tmp_file_buffer<tool_counter_value_t>(type)->file;

records.offset = _tmp_file.write<tool_counter_value_t>(ptr, num_records);
records.count = num_records;
}
} // namespace tool
} // namespace rocprofiler
21 changes: 13 additions & 8 deletions source/lib/output/counter_info.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,26 +84,31 @@ struct tool_counter_value_t
}
};

struct tool_counter_record_t
struct serialized_counter_record_t
{
static constexpr size_t max_capacity = 512;
size_t offset = 0;
size_t count = 0;
};

uint64_t thread_id = 0;
rocprofiler_dispatch_counting_service_data_t dispatch_data = {};
std::array<tool_counter_value_t, max_capacity> records = {};
uint64_t counter_count = 0;
struct tool_counter_record_t
{
uint64_t thread_id = 0;
rocprofiler_dispatch_counting_service_data_t dispatch_data = {};
serialized_counter_record_t records = {};

template <typename ArchiveT>
void save(ArchiveT& ar) const
{
// should be removed when moving to buffered tracing
auto tmp =
std::vector<tool_counter_value_t>{records.begin(), records.begin() + counter_count};
auto tmp = getRecords();

ar(cereal::make_nvp("thread_id", thread_id));
ar(cereal::make_nvp("dispatch_data", dispatch_data));
ar(cereal::make_nvp("records", tmp));
}

std::vector<tool_counter_value_t> getRecords() const;
void writeRecord(const tool_counter_value_t* ptr, size_t num_records);
};
} // namespace tool
} // namespace rocprofiler
Expand Down
1 change: 1 addition & 0 deletions source/lib/output/domain_type.cpp
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ DEFINE_BUFFER_TYPE_NAME(MEMORY_ALLOCATION,
"MEMORY_ALLOCATION",
"memory_allocation",
"memory_allocation_stats")
DEFINE_BUFFER_TYPE_NAME(COUNTER_VALUES, "COUNTER_VALUES", "counter_values", "no_filename")

#undef DEFINE_BUFFER_TYPE_NAME

Expand Down
1 change: 1 addition & 0 deletions source/lib/output/domain_type.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ enum class domain_type
COUNTER_COLLECTION,
RCCL,
MEMORY_ALLOCATION,
COUNTER_VALUES,
LAST,
};

Expand Down
35 changes: 21 additions & 14 deletions source/lib/output/generateCSV.cpp
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@

#include <rocprofiler-sdk/fwd.h>
#include <rocprofiler-sdk/marker/api_id.h>
#include <rocprofiler-sdk/cxx/operators.hpp>

#include <unistd.h>
#include <cstdint>
Expand Down Expand Up @@ -567,22 +568,28 @@ generate_csv(const output_config& cfg,
"Counter_Value",
"Start_Timestamp",
"End_Timestamp"}};

auto counter_id_to_name = std::map<rocprofiler_counter_id_t, std::string>{};

for(auto ditr : data)
{
for(auto record : data.get(ditr))
{
auto kernel_id = record.dispatch_data.dispatch_info.kernel_id;
auto counter_name_value = std::map<std::string_view, double>{};
for(uint64_t i = 0; i < record.counter_count; i++)
auto kernel_id = record.dispatch_data.dispatch_info.kernel_id;
auto counter_id_value = std::map<rocprofiler_counter_id_t, double>{};
auto record_vector = record.getRecords();

// Accumulate counters based on ID
for(auto& count : record_vector)
{
counter_id_value[count.id] += count.value;
}

// Query counter names for all IDs
for(auto& [id, _] : counter_id_value)
{
const auto& rec = record.records.at(i);
std::string_view counter_name = tool_metadata.get_counter_info(rec.id)->name;
auto search = counter_name_value.find(counter_name);
if(search == counter_name_value.end())
counter_name_value.emplace(
std::pair<std::string_view, double>{counter_name, rec.value});
else
search->second += rec.value;
if(counter_id_to_name.find(id) == counter_id_to_name.end())
counter_id_to_name[id] = tool_metadata.get_counter_info(id)->name;
}

const auto& correlation_id = record.dispatch_data.correlation_id;
Expand All @@ -592,7 +599,7 @@ generate_csv(const output_config& cfg,

auto magnitude = [](rocprofiler_dim3_t dims) { return (dims.x * dims.y * dims.z); };
auto row_ss = std::stringstream{};
for(auto& itr : counter_name_value)
for(auto& [counter_id, counter_value] : counter_id_value)
{
tool::csv::counter_collection_csv_encoder::write_row(
row_ss,
Expand All @@ -610,8 +617,8 @@ generate_csv(const output_config& cfg,
record.dispatch_data.dispatch_info.private_segment_size,
kernel_info->arch_vgpr_count,
kernel_info->sgpr_count,
itr.first,
itr.second,
counter_id_to_name.at(counter_id),
counter_value,
record.dispatch_data.start_timestamp,
record.dispatch_data.end_timestamp);
}
Expand Down
35 changes: 35 additions & 0 deletions source/lib/output/tmp_file.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,13 @@

#pragma once

#include <atomic>
#include <fstream>
#include <ios>
#include <mutex>
#include <set>
#include <string>
#include <vector>

struct tmp_file
{
Expand All @@ -41,6 +43,39 @@ struct tmp_file

explicit operator bool() const;

template <typename Type>
size_t write(const Type* data, size_t num_records)
{
// Assert we are not mixing types with tool_counter_value_t
static_assert(sizeof(Type) == 16);
size_t allocated = offset.fetch_add(num_records);

std::unique_lock<std::mutex> lk(file_mutex);
if(!stream.is_open()) open();
stream.seekp(allocated * sizeof(Type));
stream.write((char*) data, num_records * sizeof(Type));
return allocated;
};

template <typename Type>
std::vector<Type> read(size_t seekpos, size_t num_elements)
{
// Assert we are not mixing types with tool_counter_value_t
static_assert(sizeof(Type) == 16);

std::vector<Type> ret;
ret.resize(num_elements);

std::unique_lock<std::mutex> lk(file_mutex);
if(!stream.is_open()) open();

stream.seekg(seekpos * sizeof(Type));
stream.read((char*) ret.data(), num_elements * sizeof(Type));
return ret;
}

std::atomic<size_t> offset{0};

std::string filename = {};
std::string subdirectory = {};
std::fstream stream = {};
Expand Down
22 changes: 7 additions & 15 deletions source/lib/rocprofiler-sdk-tool/tool.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -843,33 +843,22 @@ counter_record_callback(rocprofiler_dispatch_counting_service_data_t dispatch_da
static const auto gpu_agents_counter_info = get_agent_counter_info();

auto counter_record = tool::tool_counter_record_t{};
auto kernel_id = dispatch_data.dispatch_info.kernel_id;

counter_record.dispatch_data = dispatch_data;
counter_record.thread_id = user_data.value;

const kernel_symbol_info* kernel_info =
CHECK_NOTNULL(tool_metadata)->get_kernel_symbol(kernel_id);
ROCP_ERROR_IF(record_count == 0) << "zero record count for kernel_id=" << kernel_id
<< " (name=" << kernel_info->kernel_name << ")";
std::vector<rocprofiler::tool::tool_counter_value_t> serialized_records;
serialized_records.resize(record_count);

for(size_t count = 0; count < record_count; count++)
{
// Unlikely to trigger, temporary until we move to buffered callbacks
if(count >= counter_record.records.size())
{
ROCP_WARNING << "Exceeded maximum counter capacity, skipping remaining";
break;
}

auto _counter_id = rocprofiler_counter_id_t{};
ROCPROFILER_CALL(rocprofiler_query_record_counter_id(record_data[count].id, &_counter_id),
"query record counter id");
counter_record.records[count] =
tool::tool_counter_value_t{_counter_id, record_data[count].counter_value};
counter_record.counter_count++;
serialized_records[count] = {_counter_id, record_data[count].counter_value};
}

counter_record.writeRecord(serialized_records.data(), serialized_records.size());
tool::write_ring_buffer(counter_record, domain_type::COUNTER_COLLECTION);
}

Expand Down Expand Up @@ -1388,6 +1377,8 @@ tool_fini(void* /*tool_data*/)
auto rccl_output = tool::rccl_buffered_output_t{tool::get_config().rccl_api_trace};
auto memory_allocation_output =
tool::memory_allocation_buffered_output_t{tool::get_config().memory_allocation_trace};
auto counters_records_output =
tool::counter_records_buffered_output_t{tool::get_config().counter_collection};

auto node_id_sort = [](const auto& lhs, const auto& rhs) { return lhs.node_id < rhs.node_id; };

Expand Down Expand Up @@ -1496,6 +1487,7 @@ tool_fini(void* /*tool_data*/)
destroy_output(counters_output);
destroy_output(scratch_memory_output);
destroy_output(rccl_output);
destroy_output(counters_records_output);

if(destructors)
{
Expand Down
2 changes: 1 addition & 1 deletion source/lib/rocprofiler-sdk/hsa/queue.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -356,7 +356,7 @@ WriteInterceptor(const void* packets,
if(get_balanced_signal_slots().fetch_sub(1) <= 0)
{
sched_yield();
std::this_thread::sleep_for(std::chrono::microseconds(1));
std::this_thread::sleep_for(std::chrono::nanoseconds(100));
}

// Stores the instrumentation pkt (i.e. AQL packets for counter collection)
Expand Down

0 comments on commit 6ae441f

Please sign in to comment.