From 1bd38fed4e83ce77a66b0a8cc07f1bb32892eda3 Mon Sep 17 00:00:00 2001 From: chenjian Date: Fri, 17 Jun 2022 12:18:44 +0800 Subject: [PATCH] add more field to memory record (#43578) --- .../platform/profiler/chrometracing_logger.cc | 16 ++++++--- .../profiler/dump/deserialization_reader.cc | 2 ++ .../platform/profiler/dump/nodetree.proto | 12 +++++-- .../profiler/dump/serialization_logger.cc | 2 ++ .../dump/test_serialization_logger.cc | 17 ++++++++-- paddle/fluid/platform/profiler/event_node.h | 2 ++ .../fluid/platform/profiler/event_python.cc | 2 ++ paddle/fluid/platform/profiler/event_python.h | 4 +++ .../platform/profiler/test_event_node.cc | 34 +++++++++++++++---- paddle/fluid/platform/profiler/trace_event.h | 20 ++++++++--- paddle/fluid/platform/profiler/utils.cc | 3 +- 11 files changed, 94 insertions(+), 20 deletions(-) diff --git a/paddle/fluid/platform/profiler/chrometracing_logger.cc b/paddle/fluid/platform/profiler/chrometracing_logger.cc index 1e26c0a94408c..e8fe541272137 100644 --- a/paddle/fluid/platform/profiler/chrometracing_logger.cc +++ b/paddle/fluid/platform/profiler/chrometracing_logger.cc @@ -15,6 +15,7 @@ limitations under the License. */ #include #include #include +#include #include "glog/logging.h" @@ -128,27 +129,32 @@ void ChromeTracingLogger::LogMemTraceEventNode( std::string( R"JSON( { - "name": "[memory]", "pid": %lld, "tid": "%lld", + "name": "[memory]", "pid": %lld, "tid": "%lld(C++)", "ts": %lld, "ph": "i", "cat": "%s", "args": { "place": "%s", "addr": "%llu", + "increase_bytes": %lld, "current_allocated": %llu, "current_reserved": %llu, - "increase_bytes": %lld + "peak_allocated": %llu, + "peak_reserved": %llu } }, )JSON"), mem_node.ProcessId(), mem_node.ThreadId(), - mem_node.TimeStampNs(), + nsToUs(mem_node.TimeStampNs()), StringTracerMemEventType(mem_node.Type()), mem_node.Place().c_str(), mem_node.Addr(), + mem_node.IncreaseBytes(), mem_node.CurrentAllocated(), mem_node.CurrentReserved(), - mem_node.IncreaseBytes()); + mem_node.PeakAllocated(), + mem_node.PeakReserved()); + pid_tid_set_.insert({mem_node.ProcessId(), mem_node.ThreadId()}); } void ChromeTracingLogger::LogHostTraceEventNode( @@ -172,6 +178,8 @@ void ChromeTracingLogger::LogHostTraceEventNode( input_shapes = op_supplement_node->InputShapes(); input_dtypes = op_supplement_node->Dtypes(); callstack = op_supplement_node->CallStack(); + callstack = std::regex_replace(callstack, std::regex("\""), "\'"); + callstack = std::regex_replace(callstack, std::regex("\n"), "\\n"); } switch (host_node.Type()) { case TracerEventType::ProfileStep: diff --git a/paddle/fluid/platform/profiler/dump/deserialization_reader.cc b/paddle/fluid/platform/profiler/dump/deserialization_reader.cc index 1fa70d794a81e..d17aa9e9ce2aa 100644 --- a/paddle/fluid/platform/profiler/dump/deserialization_reader.cc +++ b/paddle/fluid/platform/profiler/dump/deserialization_reader.cc @@ -208,6 +208,8 @@ MemTraceEventNode* DeserializationReader::RestoreMemTraceEventNode( mem_event.place = mem_event_proto.place(); mem_event.current_allocated = mem_event_proto.current_allocated(); mem_event.current_reserved = mem_event_proto.current_reserved(); + mem_event.peak_allocated = mem_event_proto.peak_allocated(); + mem_event.peak_reserved = mem_event_proto.peak_reserved(); return new MemTraceEventNode(mem_event); } diff --git a/paddle/fluid/platform/profiler/dump/nodetree.proto b/paddle/fluid/platform/profiler/dump/nodetree.proto index 0f0c9c92c9c93..4ebfb6e73b331 100644 --- a/paddle/fluid/platform/profiler/dump/nodetree.proto +++ b/paddle/fluid/platform/profiler/dump/nodetree.proto @@ -51,10 +51,14 @@ enum TracerEventTypeProto { }; enum TracerMemEventTypeProto { - // Used to mark memory allocation + // Used to mark memory allocation which is managed by paddle Allocate = 0; - // Used to mark memory free + // Used to mark memory free which is managed by paddle Free = 1; + // Used to mark reserved memory allocation which is applied from device. + ReservedAllocate = 2; + // Used to mark reserved memory free which is released to device. + ReservedFree = 3; }; message KernelEventInfoProto { @@ -150,6 +154,10 @@ message MemTraceEventProto { required uint64 current_allocated = 8; // current total reserved memory required uint64 current_reserved = 9; + // current peak allocated memory + required uint64 peak_allocated = 10; + // current peak reserved memory + required uint64 peak_reserved = 11; } message OperatorSupplementEventProto { diff --git a/paddle/fluid/platform/profiler/dump/serialization_logger.cc b/paddle/fluid/platform/profiler/dump/serialization_logger.cc index 4c1ab34896a79..cbb86e76d3a1e 100644 --- a/paddle/fluid/platform/profiler/dump/serialization_logger.cc +++ b/paddle/fluid/platform/profiler/dump/serialization_logger.cc @@ -133,6 +133,8 @@ void SerializationLogger::LogMemTraceEventNode( mem_trace_event->set_place(mem_node.Place()); mem_trace_event->set_current_allocated(mem_node.CurrentAllocated()); mem_trace_event->set_current_reserved(mem_node.CurrentReserved()); + mem_trace_event->set_peak_allocated(mem_node.PeakAllocated()); + mem_trace_event->set_peak_reserved(mem_node.PeakReserved()); current_mem_trace_event_node_proto_->set_allocated_mem_event(mem_trace_event); } diff --git a/paddle/fluid/platform/profiler/dump/test_serialization_logger.cc b/paddle/fluid/platform/profiler/dump/test_serialization_logger.cc index 368a1b3b10225..a49d799c78521 100644 --- a/paddle/fluid/platform/profiler/dump/test_serialization_logger.cc +++ b/paddle/fluid/platform/profiler/dump/test_serialization_logger.cc @@ -63,9 +63,20 @@ TEST(SerializationLoggerTest, dump_case0) { 50, "GPU:0", 50, - 50)); - mem_events.push_back(MemTraceEvent( - 11900, 0x1000, TracerMemEventType::Free, 10, 10, -50, "GPU:0", 0, 50)); + 50, + 100, + 100)); + mem_events.push_back(MemTraceEvent(11900, + 0x1000, + TracerMemEventType::Free, + 10, + 10, + -50, + "GPU:0", + 0, + 50, + 100, + 100)); std::map>> input_shapes; std::map> dtypes; input_shapes[std::string("X")].push_back(std::vector{1, 2, 3}); diff --git a/paddle/fluid/platform/profiler/event_node.h b/paddle/fluid/platform/profiler/event_node.h index 13ec115100505..34e6556f7f47a 100644 --- a/paddle/fluid/platform/profiler/event_node.h +++ b/paddle/fluid/platform/profiler/event_node.h @@ -47,6 +47,8 @@ class MemTraceEventNode { std::string Place() const { return mem_event_.place; } uint64_t CurrentAllocated() const { return mem_event_.current_allocated; } uint64_t CurrentReserved() const { return mem_event_.current_reserved; } + uint64_t PeakAllocated() const { return mem_event_.peak_allocated; } + uint64_t PeakReserved() const { return mem_event_.peak_reserved; } // member function void LogMe(BaseLogger* logger) { logger->LogMemTraceEventNode(*this); } diff --git a/paddle/fluid/platform/profiler/event_python.cc b/paddle/fluid/platform/profiler/event_python.cc index 7dd49b58eb0ad..028d666f35537 100644 --- a/paddle/fluid/platform/profiler/event_python.cc +++ b/paddle/fluid/platform/profiler/event_python.cc @@ -94,6 +94,8 @@ HostPythonNode* ProfilerResult::CopyTree(HostTraceEventNode* root) { mem_python_node->place = (*memnode)->Place(); mem_python_node->current_allocated = (*memnode)->CurrentAllocated(); mem_python_node->current_reserved = (*memnode)->CurrentReserved(); + mem_python_node->peak_allocated = (*memnode)->PeakAllocated(); + mem_python_node->peak_reserved = (*memnode)->PeakReserved(); host_python_node->mem_node_ptrs.push_back(mem_python_node); } // copy OperatorSupplementEventNode's information if exists diff --git a/paddle/fluid/platform/profiler/event_python.h b/paddle/fluid/platform/profiler/event_python.h index 45c84c4a94f21..44f6e61fd3737 100644 --- a/paddle/fluid/platform/profiler/event_python.h +++ b/paddle/fluid/platform/profiler/event_python.h @@ -66,6 +66,10 @@ struct MemPythonNode { uint64_t current_allocated; // current total reserved memory uint64_t current_reserved; + // peak allocated memory + uint64_t peak_allocated; + // peak reserved memory + uint64_t peak_reserved; }; struct HostPythonNode { diff --git a/paddle/fluid/platform/profiler/test_event_node.cc b/paddle/fluid/platform/profiler/test_event_node.cc index 41a5ebce023a0..dcf6dd56d74af 100644 --- a/paddle/fluid/platform/profiler/test_event_node.cc +++ b/paddle/fluid/platform/profiler/test_event_node.cc @@ -60,9 +60,20 @@ TEST(NodeTreesTest, LogMe_case0) { 50, "GPU:0", 50, - 50)); - mem_events.push_back(MemTraceEvent( - 11900, 0x1000, TracerMemEventType::Free, 10, 10, -50, "GPU:0", 0, 50)); + 50, + 100, + 100)); + mem_events.push_back(MemTraceEvent(11900, + 0x1000, + TracerMemEventType::Free, + 10, + 10, + -50, + "GPU:0", + 0, + 50, + 100, + 100)); std::map>> input_shapes; std::map> dtypes; input_shapes[std::string("X")].push_back(std::vector{1, 2, 3}); @@ -267,9 +278,20 @@ TEST(NodeTreesTest, HandleTrees_case0) { 50, "GPU:0", 50, - 50)); - mem_events.push_back(MemTraceEvent( - 11900, 0x1000, TracerMemEventType::Free, 10, 10, -50, "GPU:0", 0, 50)); + 50, + 100, + 100)); + mem_events.push_back(MemTraceEvent(11900, + 0x1000, + TracerMemEventType::Free, + 10, + 10, + -50, + "GPU:0", + 0, + 50, + 100, + 100)); op_supplement_events.push_back(OperatorSupplementEvent( 11600, "op1", diff --git a/paddle/fluid/platform/profiler/trace_event.h b/paddle/fluid/platform/profiler/trace_event.h index d50c5584f5c4b..62d82c19d1796 100644 --- a/paddle/fluid/platform/profiler/trace_event.h +++ b/paddle/fluid/platform/profiler/trace_event.h @@ -59,10 +59,14 @@ enum class TracerEventType { }; enum class TracerMemEventType { - // Used to mark memory allocation + // Used to mark memory allocation which is managed by paddle Allocate = 0, - // Used to mark memory free + // Used to mark memory free which is managed by paddle Free = 1, + // Used to mark reserved memory allocation which is applied from device. + ReservedAllocate = 2, + // Used to mark reserved memory free which is released to device. + ReservedFree = 3, // A flag to denote the number of current types NumTypes }; @@ -318,7 +322,9 @@ struct MemTraceEvent { int64_t increase_bytes, const std::string& place, uint64_t current_allocated, - uint64_t current_reserved) + uint64_t current_reserved, + uint64_t peak_allocated, + uint64_t peak_reserved) : timestamp_ns(timestamp_ns), addr(addr), type(type), @@ -327,7 +333,9 @@ struct MemTraceEvent { increase_bytes(increase_bytes), place(place), current_allocated(current_allocated), - current_reserved(current_reserved) {} + current_reserved(current_reserved), + peak_allocated(peak_allocated), + peak_reserved(peak_reserved) {} // timestamp of the record uint64_t timestamp_ns; @@ -348,6 +356,10 @@ struct MemTraceEvent { uint64_t current_allocated; // current total reserved memory uint64_t current_reserved; + // current peak allocated memory + uint64_t peak_allocated; + // current peak reserved memory + uint64_t peak_reserved; }; } // namespace platform diff --git a/paddle/fluid/platform/profiler/utils.cc b/paddle/fluid/platform/profiler/utils.cc index bbfc687738dd9..11035867416b8 100644 --- a/paddle/fluid/platform/profiler/utils.cc +++ b/paddle/fluid/platform/profiler/utils.cc @@ -91,7 +91,8 @@ float CalculateEstOccupancy(uint32_t DeviceId, #endif const char* StringTracerMemEventType(TracerMemEventType type) { - static const char* categary_name_[] = {"Allocate", "Free"}; + static const char* categary_name_[] = { + "Allocate", "Free", "ReservedAllocate", "ReservedFree"}; return categary_name_[static_cast(type)]; }