diff --git a/source/lib/output/CMakeLists.txt b/source/lib/output/CMakeLists.txt index f2048aba..93c37158 100644 --- a/source/lib/output/CMakeLists.txt +++ b/source/lib/output/CMakeLists.txt @@ -30,6 +30,7 @@ set(TOOL_OUTPUT_HEADERS set(TOOL_OUTPUT_SOURCES csv_output_file.cpp + counter_info.cpp domain_type.cpp format_path.cpp generateCSV.cpp diff --git a/source/lib/output/buffered_output.hpp b/source/lib/output/buffered_output.hpp index 8a495f5f..e045c63d 100644 --- a/source/lib/output/buffered_output.hpp +++ b/source/lib/output/buffered_output.hpp @@ -156,5 +156,8 @@ using scratch_memory_buffered_output_t = using memory_allocation_buffered_output_t = buffered_output; +using counter_records_buffered_output_t = + ::rocprofiler::tool::buffered_output; } // namespace tool } // namespace rocprofiler diff --git a/source/lib/output/counter_info.cpp b/source/lib/output/counter_info.cpp new file mode 100644 index 00000000..6fb949e0 --- /dev/null +++ b/source/lib/output/counter_info.cpp @@ -0,0 +1,60 @@ +// MIT License +// +// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#include "counter_info.hpp" +#include "buffered_output.hpp" +#include "tmp_file_buffer.hpp" + +#include "lib/common/container/ring_buffer.hpp" +#include "lib/common/filesystem.hpp" +#include "lib/common/logging.hpp" + +#include +#include + +#include +#include + +namespace rocprofiler +{ +namespace tool +{ +constexpr auto type = domain_type::COUNTER_VALUES; + +std::vector +tool_counter_record_t::getRecords() const +{ + auto& _tmp_file = get_tmp_file_buffer(type)->file; + + return _tmp_file.read(records.offset, records.count); +} + +void +tool_counter_record_t::writeRecord(const tool_counter_value_t* ptr, size_t num_records) +{ + auto& _tmp_file = get_tmp_file_buffer(type)->file; + + records.offset = _tmp_file.write(ptr, num_records); + records.count = num_records; +} +} // namespace tool +} // namespace rocprofiler diff --git a/source/lib/output/counter_info.hpp b/source/lib/output/counter_info.hpp index 556abc40..8af62ef3 100644 --- a/source/lib/output/counter_info.hpp +++ b/source/lib/output/counter_info.hpp @@ -84,26 +84,31 @@ struct tool_counter_value_t } }; -struct tool_counter_record_t +struct serialized_counter_record_t { - static constexpr size_t max_capacity = 512; + size_t offset = 0; + size_t count = 0; +}; - uint64_t thread_id = 0; - rocprofiler_dispatch_counting_service_data_t dispatch_data = {}; - std::array records = {}; - uint64_t counter_count = 0; +struct tool_counter_record_t +{ + uint64_t thread_id = 0; + rocprofiler_dispatch_counting_service_data_t dispatch_data = {}; + serialized_counter_record_t records = {}; template void save(ArchiveT& ar) const { // should be removed when moving to buffered tracing - auto tmp = - std::vector{records.begin(), records.begin() + counter_count}; + auto tmp = getRecords(); ar(cereal::make_nvp("thread_id", thread_id)); ar(cereal::make_nvp("dispatch_data", dispatch_data)); ar(cereal::make_nvp("records", tmp)); } + + std::vector getRecords() const; + void writeRecord(const tool_counter_value_t* ptr, size_t num_records); }; } // namespace tool } // namespace rocprofiler diff --git a/source/lib/output/domain_type.cpp b/source/lib/output/domain_type.cpp old mode 100644 new mode 100755 index e903e156..8d281a86 --- a/source/lib/output/domain_type.cpp +++ b/source/lib/output/domain_type.cpp @@ -56,6 +56,7 @@ DEFINE_BUFFER_TYPE_NAME(MEMORY_ALLOCATION, "MEMORY_ALLOCATION", "memory_allocation", "memory_allocation_stats") +DEFINE_BUFFER_TYPE_NAME(COUNTER_VALUES, "COUNTER_VALUES", "counter_values", "no_filename") #undef DEFINE_BUFFER_TYPE_NAME diff --git a/source/lib/output/domain_type.hpp b/source/lib/output/domain_type.hpp index 7c23eed4..8fc8ced3 100644 --- a/source/lib/output/domain_type.hpp +++ b/source/lib/output/domain_type.hpp @@ -35,6 +35,7 @@ enum class domain_type COUNTER_COLLECTION, RCCL, MEMORY_ALLOCATION, + COUNTER_VALUES, LAST, }; diff --git a/source/lib/output/generateCSV.cpp b/source/lib/output/generateCSV.cpp old mode 100644 new mode 100755 index d1216783..a95965d3 --- a/source/lib/output/generateCSV.cpp +++ b/source/lib/output/generateCSV.cpp @@ -32,6 +32,7 @@ #include #include +#include #include #include @@ -567,22 +568,28 @@ generate_csv(const output_config& cfg, "Counter_Value", "Start_Timestamp", "End_Timestamp"}}; + + auto counter_id_to_name = std::map{}; + for(auto ditr : data) { for(auto record : data.get(ditr)) { - auto kernel_id = record.dispatch_data.dispatch_info.kernel_id; - auto counter_name_value = std::map{}; - for(uint64_t i = 0; i < record.counter_count; i++) + auto kernel_id = record.dispatch_data.dispatch_info.kernel_id; + auto counter_id_value = std::map{}; + auto record_vector = record.getRecords(); + + // Accumulate counters based on ID + for(auto& count : record_vector) + { + counter_id_value[count.id] += count.value; + } + + // Query counter names for all IDs + for(auto& [id, _] : counter_id_value) { - const auto& rec = record.records.at(i); - std::string_view counter_name = tool_metadata.get_counter_info(rec.id)->name; - auto search = counter_name_value.find(counter_name); - if(search == counter_name_value.end()) - counter_name_value.emplace( - std::pair{counter_name, rec.value}); - else - search->second += rec.value; + if(counter_id_to_name.find(id) == counter_id_to_name.end()) + counter_id_to_name[id] = tool_metadata.get_counter_info(id)->name; } const auto& correlation_id = record.dispatch_data.correlation_id; @@ -592,7 +599,7 @@ generate_csv(const output_config& cfg, auto magnitude = [](rocprofiler_dim3_t dims) { return (dims.x * dims.y * dims.z); }; auto row_ss = std::stringstream{}; - for(auto& itr : counter_name_value) + for(auto& [counter_id, counter_value] : counter_id_value) { tool::csv::counter_collection_csv_encoder::write_row( row_ss, @@ -610,8 +617,8 @@ generate_csv(const output_config& cfg, record.dispatch_data.dispatch_info.private_segment_size, kernel_info->arch_vgpr_count, kernel_info->sgpr_count, - itr.first, - itr.second, + counter_id_to_name.at(counter_id), + counter_value, record.dispatch_data.start_timestamp, record.dispatch_data.end_timestamp); } diff --git a/source/lib/output/tmp_file.hpp b/source/lib/output/tmp_file.hpp index 04348fde..748f8dfc 100644 --- a/source/lib/output/tmp_file.hpp +++ b/source/lib/output/tmp_file.hpp @@ -22,11 +22,13 @@ #pragma once +#include #include #include #include #include #include +#include struct tmp_file { @@ -41,6 +43,39 @@ struct tmp_file explicit operator bool() const; + template + size_t write(const Type* data, size_t num_records) + { + // Assert we are not mixing types with tool_counter_value_t + static_assert(sizeof(Type) == 16); + size_t allocated = offset.fetch_add(num_records); + + std::unique_lock lk(file_mutex); + if(!stream.is_open()) open(); + stream.seekp(allocated * sizeof(Type)); + stream.write((char*) data, num_records * sizeof(Type)); + return allocated; + }; + + template + std::vector read(size_t seekpos, size_t num_elements) + { + // Assert we are not mixing types with tool_counter_value_t + static_assert(sizeof(Type) == 16); + + std::vector ret; + ret.resize(num_elements); + + std::unique_lock lk(file_mutex); + if(!stream.is_open()) open(); + + stream.seekg(seekpos * sizeof(Type)); + stream.read((char*) ret.data(), num_elements * sizeof(Type)); + return ret; + } + + std::atomic offset{0}; + std::string filename = {}; std::string subdirectory = {}; std::fstream stream = {}; diff --git a/source/lib/rocprofiler-sdk-tool/tool.cpp b/source/lib/rocprofiler-sdk-tool/tool.cpp index 582b52ae..31ceb260 100644 --- a/source/lib/rocprofiler-sdk-tool/tool.cpp +++ b/source/lib/rocprofiler-sdk-tool/tool.cpp @@ -843,33 +843,22 @@ counter_record_callback(rocprofiler_dispatch_counting_service_data_t dispatch_da static const auto gpu_agents_counter_info = get_agent_counter_info(); auto counter_record = tool::tool_counter_record_t{}; - auto kernel_id = dispatch_data.dispatch_info.kernel_id; counter_record.dispatch_data = dispatch_data; counter_record.thread_id = user_data.value; - const kernel_symbol_info* kernel_info = - CHECK_NOTNULL(tool_metadata)->get_kernel_symbol(kernel_id); - ROCP_ERROR_IF(record_count == 0) << "zero record count for kernel_id=" << kernel_id - << " (name=" << kernel_info->kernel_name << ")"; + std::vector serialized_records; + serialized_records.resize(record_count); for(size_t count = 0; count < record_count; count++) { - // Unlikely to trigger, temporary until we move to buffered callbacks - if(count >= counter_record.records.size()) - { - ROCP_WARNING << "Exceeded maximum counter capacity, skipping remaining"; - break; - } - auto _counter_id = rocprofiler_counter_id_t{}; ROCPROFILER_CALL(rocprofiler_query_record_counter_id(record_data[count].id, &_counter_id), "query record counter id"); - counter_record.records[count] = - tool::tool_counter_value_t{_counter_id, record_data[count].counter_value}; - counter_record.counter_count++; + serialized_records[count] = {_counter_id, record_data[count].counter_value}; } + counter_record.writeRecord(serialized_records.data(), serialized_records.size()); tool::write_ring_buffer(counter_record, domain_type::COUNTER_COLLECTION); } @@ -1388,6 +1377,8 @@ tool_fini(void* /*tool_data*/) auto rccl_output = tool::rccl_buffered_output_t{tool::get_config().rccl_api_trace}; auto memory_allocation_output = tool::memory_allocation_buffered_output_t{tool::get_config().memory_allocation_trace}; + auto counters_records_output = + tool::counter_records_buffered_output_t{tool::get_config().counter_collection}; auto node_id_sort = [](const auto& lhs, const auto& rhs) { return lhs.node_id < rhs.node_id; }; @@ -1496,6 +1487,7 @@ tool_fini(void* /*tool_data*/) destroy_output(counters_output); destroy_output(scratch_memory_output); destroy_output(rccl_output); + destroy_output(counters_records_output); if(destructors) { diff --git a/source/lib/rocprofiler-sdk/hsa/queue.cpp b/source/lib/rocprofiler-sdk/hsa/queue.cpp index 027ba407..d5bd040c 100644 --- a/source/lib/rocprofiler-sdk/hsa/queue.cpp +++ b/source/lib/rocprofiler-sdk/hsa/queue.cpp @@ -356,7 +356,7 @@ WriteInterceptor(const void* packets, if(get_balanced_signal_slots().fetch_sub(1) <= 0) { sched_yield(); - std::this_thread::sleep_for(std::chrono::microseconds(1)); + std::this_thread::sleep_for(std::chrono::nanoseconds(100)); } // Stores the instrumentation pkt (i.e. AQL packets for counter collection)