From 2c43347e88b8e5190051a541ddf2c035dd9620cf Mon Sep 17 00:00:00 2001 From: Kevin Huck Date: Mon, 6 Feb 2023 13:45:56 -0800 Subject: [PATCH 1/5] Fixing portability bug without HIP/ROCm --- src/apex/proc_read.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/apex/proc_read.cpp b/src/apex/proc_read.cpp index d9e64042..9a36c6bb 100644 --- a/src/apex/proc_read.cpp +++ b/src/apex/proc_read.cpp @@ -1020,9 +1020,11 @@ std::array getAvailableMemory() { } fclose(f); } +#ifdef APEX_WITH_HIP if (global_rsmi_reader != nullptr) { values[1] = global_rsmi_reader->getAvailableMemory(); } +#endif return values; } From 3c2abffab7090b2dafd5476c01eadea0c642f38c Mon Sep 17 00:00:00 2001 From: Kevin Huck Date: Mon, 6 Feb 2023 13:47:33 -0800 Subject: [PATCH 2/5] Fixing sorting of csv tasktree output, I think --- src/apex/dependency_tree.cpp | 10 +++++----- src/apex/dependency_tree.hpp | 13 ++++++++++++- 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/src/apex/dependency_tree.cpp b/src/apex/dependency_tree.cpp index 427148b1..7e2a9476 100644 --- a/src/apex/dependency_tree.cpp +++ b/src/apex/dependency_tree.cpp @@ -398,18 +398,18 @@ double Node::writeNodeCSV(std::stringstream& outfile, double total, int node_id) // end the line outfile << std::endl; - // sort the children by accumulated time - std::vector > sorted; + // sort the children by name to make tree merging easier (I hope) + std::vector sorted; for (auto& it : children) { - sorted.push_back(it); + sorted.push_back(it.second); } - sort(sorted.begin(), sorted.end(), cmp); + sort(sorted.begin(), sorted.end(), Node::compareNodeByParentName); // do all the children double remainder = acc; depth++; for (auto c : sorted) { - double tmp = c.second->writeNodeCSV(outfile, total, node_id); + double tmp = c->writeNodeCSV(outfile, total, node_id); remainder = remainder - tmp; } depth--; diff --git a/src/apex/dependency_tree.hpp b/src/apex/dependency_tree.hpp index 86e8e75d..b9edbdbc 100644 --- a/src/apex/dependency_tree.hpp +++ b/src/apex/dependency_tree.hpp @@ -94,7 +94,7 @@ class Node { inline double& getSumSquares() { return prof.sum_squares; } void addAccumulated(double value, double incl, bool is_resume, uint64_t thread_id); size_t getIndex() { return index; }; - std::string getName() { return data->get_name(); }; + std::string getName() const { return data->get_name(); }; void writeNode(std::ofstream& outfile, double total); double writeNodeASCII(std::ofstream& outfile, double total, size_t indent); double writeNodeCSV(std::stringstream& outfile, double total, int node_id); @@ -107,6 +107,17 @@ class Node { static std::set& getKnownMetrics() { return known_metrics; } + // required for using this class as a key in a map, vector, etc. + static bool compareNodeByParentName (const Node* lhs, const Node* rhs) { + if (lhs->parent < rhs->parent) { + return true; + } + if (lhs->getName().compare(lhs->getName()) < 0) { + return true; + } + return false; + } + }; } // dependency_tree From 84a46e61b9930019c9e7e31a8d0367d489472056 Mon Sep 17 00:00:00 2001 From: Kevin Huck Date: Mon, 6 Feb 2023 14:02:56 -0800 Subject: [PATCH 3/5] Adding reduction support for HPX+MPI. Still need support for other parcels. --- src/apex/profile_reducer.cpp | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/src/apex/profile_reducer.cpp b/src/apex/profile_reducer.cpp index a47ac415..53292a42 100644 --- a/src/apex/profile_reducer.cpp +++ b/src/apex/profile_reducer.cpp @@ -21,7 +21,8 @@ * 8 values (up to) when PAPI enabled */ constexpr size_t num_fields{23}; -#if !defined(HPX_HAVE_NETWORKING) && defined(APEX_HAVE_MPI) +#if defined(APEX_HAVE_MPI) || \ + (defined(HPX_HAVE_NETWORKING) && defined(HPX_HAVE_PARCELPORT_MPI)) #include "mpi.h" #endif @@ -46,7 +47,8 @@ namespace apex { std::map reduce_profiles_for_screen() { int commrank = 0; int commsize = 1; -#if !defined(HPX_HAVE_NETWORKING) && defined(APEX_HAVE_MPI) +#if defined(APEX_HAVE_MPI) || \ + (defined(HPX_HAVE_NETWORKING) && defined(HPX_HAVE_PARCELPORT_MPI)) int mpi_initialized = 0; MPI_CALL(MPI_Initialized( &mpi_initialized )); if (mpi_initialized) { @@ -94,7 +96,8 @@ std::map reduce_profiles_for_screen() { length[1] = length[1] + 1; /* AllReduce all profile name counts */ -#if !defined(HPX_HAVE_NETWORKING) && defined(APEX_HAVE_MPI) +#if defined(APEX_HAVE_MPI) || \ + (defined(HPX_HAVE_NETWORKING) && defined(HPX_HAVE_PARCELPORT_MPI)) if (mpi_initialized && commsize > 1) { MPI_CALL(PMPI_Allreduce(&length, &max_length, 2, MPI_UINT64_T, MPI_MAX, MPI_COMM_WORLD)); @@ -119,7 +122,8 @@ std::map reduce_profiles_for_screen() { strncpy(ptr, name.c_str(), max_length[1]); ptr = ptr + max_length[1]; } -#if !defined(HPX_HAVE_NETWORKING) && defined(APEX_HAVE_MPI) +#if defined(APEX_HAVE_MPI) || \ + (defined(HPX_HAVE_NETWORKING) && defined(HPX_HAVE_PARCELPORT_MPI)) if (mpi_initialized && commsize > 1) { MPI_CALL(PMPI_Allgather(sbuf, sbuf_length, MPI_CHAR, rbuf, sbuf_length, MPI_CHAR, MPI_COMM_WORLD)); @@ -192,7 +196,8 @@ std::map reduce_profiles_for_screen() { } /* Reduce the data */ -#if !defined(HPX_HAVE_NETWORKING) && defined(APEX_HAVE_MPI) +#if defined(APEX_HAVE_MPI) || \ + (defined(HPX_HAVE_NETWORKING) && defined(HPX_HAVE_PARCELPORT_MPI)) if (mpi_initialized && commsize > 1) { MPI_CALL(PMPI_Gather(s_pdata, sbuf_length, MPI_DOUBLE, r_pdata, sbuf_length, MPI_DOUBLE, 0, MPI_COMM_WORLD)); @@ -256,7 +261,8 @@ std::map reduce_profiles_for_screen() { } } -#if !defined(HPX_HAVE_NETWORKING) && defined(APEX_HAVE_MPI) +#if defined(APEX_HAVE_MPI) || \ + (defined(HPX_HAVE_NETWORKING) && defined(HPX_HAVE_PARCELPORT_MPI)) if (mpi_initialized && commsize > 1) { MPI_CALL(PMPI_Barrier(MPI_COMM_WORLD)); } @@ -267,7 +273,8 @@ std::map reduce_profiles_for_screen() { void reduce_profiles(std::stringstream& csv_output, std::string filename) { int commrank = 0; int commsize = 1; -#if !defined(HPX_HAVE_NETWORKING) && defined(APEX_HAVE_MPI) +#if defined(APEX_HAVE_MPI) || \ + (defined(HPX_HAVE_NETWORKING) && defined(HPX_HAVE_PARCELPORT_MPI)) int mpi_initialized = 0; MPI_CALL(MPI_Initialized( &mpi_initialized )); if (mpi_initialized) { @@ -291,7 +298,8 @@ std::map reduce_profiles_for_screen() { size_t length{csv_output.str().size()}; size_t max_length{length}; // get the longest string from all ranks -#if !defined(HPX_HAVE_NETWORKING) && defined(APEX_HAVE_MPI) +#if defined(APEX_HAVE_MPI) || \ + (defined(HPX_HAVE_NETWORKING) && defined(HPX_HAVE_PARCELPORT_MPI)) if (mpi_initialized && commsize > 1) { MPI_CALL(PMPI_Allreduce(&length, &max_length, 1, MPI_UINT64_T, MPI_MAX, MPI_COMM_WORLD)); @@ -307,14 +315,16 @@ std::map reduce_profiles_for_screen() { // allocate the memory to hold all output char * rbuf = nullptr; if (commrank == 0) { -#if !defined(HPX_HAVE_NETWORKING) && defined(APEX_HAVE_MPI) +#if defined(APEX_HAVE_MPI) || \ + (defined(HPX_HAVE_NETWORKING) && defined(HPX_HAVE_PARCELPORT_MPI)) rbuf = (char*)calloc(max_length * commsize, sizeof(char)); #else rbuf = sbuf; #endif } -#if !defined(HPX_HAVE_NETWORKING) && defined(APEX_HAVE_MPI) +#if defined(APEX_HAVE_MPI) || \ + (defined(HPX_HAVE_NETWORKING) && defined(HPX_HAVE_PARCELPORT_MPI)) MPI_Gather(sbuf, max_length, MPI_CHAR, rbuf, max_length, MPI_CHAR, 0, MPI_COMM_WORLD); #endif From e4c7c28e37a82a55236a72c72295709dbe229839 Mon Sep 17 00:00:00 2001 From: Kevin Huck Date: Mon, 6 Feb 2023 14:18:29 -0800 Subject: [PATCH 4/5] Adding inclusive time to tasktree output. --- src/apex/dependency_tree.cpp | 3 ++- src/apex/profiler_listener.cpp | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/apex/dependency_tree.cpp b/src/apex/dependency_tree.cpp index 7e2a9476..95eea3c0 100644 --- a/src/apex/dependency_tree.cpp +++ b/src/apex/dependency_tree.cpp @@ -339,7 +339,7 @@ double Node::writeNodeCSV(std::stringstream& outfile, double total, int node_id) outfile << ((parent == nullptr) ? 0 : parent->index) << ","; outfile << depth << ",\""; outfile << data->get_tree_name() << "\","; - // write out the inclusive + // write out the accumulated double acc = (data == task_identifier::get_main_task_id() || getAccumulated() == 0.0) ? total : getAccumulated(); // write the number of calls @@ -350,6 +350,7 @@ double Node::writeNodeCSV(std::stringstream& outfile, double total, int node_id) double mean = acc / ncalls; outfile << std::setprecision(9); outfile << acc << ","; + outfile << inclusive << ","; outfile << getMinimum() << ","; outfile << mean << ","; outfile << getMaximum() << ","; diff --git a/src/apex/profiler_listener.cpp b/src/apex/profiler_listener.cpp index 6952cf9c..70fd85c8 100644 --- a/src/apex/profiler_listener.cpp +++ b/src/apex/profiler_listener.cpp @@ -1175,7 +1175,7 @@ std::unordered_set free_profiles; stringstream tree_stream; if (node_id == 0) { tree_stream << "\"process rank\",\"node index\",\"parent index\",\"depth\","; - tree_stream << "\"name\",\"calls\",\"threads\",\"total time(ns)\","; + tree_stream << "\"name\",\"calls\",\"threads\",\"total time(ns)\",\"inclusive time(ns)\","; tree_stream << "\"minimum time(ns)\",\"mean time(ns)\",\"maximum time(ns)\","; tree_stream << "\"stddev time(ns)\""; for (auto& x : dependency::Node::getKnownMetrics()) { From 5b5e134e86bfce5f232f6e01344020ab1b62f3df Mon Sep 17 00:00:00 2001 From: Kevin Huck Date: Mon, 6 Feb 2023 14:23:24 -0800 Subject: [PATCH 5/5] Fixing units in tasktree output --- src/apex/profiler_listener.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/apex/profiler_listener.cpp b/src/apex/profiler_listener.cpp index 70fd85c8..16d76e60 100644 --- a/src/apex/profiler_listener.cpp +++ b/src/apex/profiler_listener.cpp @@ -1175,9 +1175,9 @@ std::unordered_set free_profiles; stringstream tree_stream; if (node_id == 0) { tree_stream << "\"process rank\",\"node index\",\"parent index\",\"depth\","; - tree_stream << "\"name\",\"calls\",\"threads\",\"total time(ns)\",\"inclusive time(ns)\","; - tree_stream << "\"minimum time(ns)\",\"mean time(ns)\",\"maximum time(ns)\","; - tree_stream << "\"stddev time(ns)\""; + tree_stream << "\"name\",\"calls\",\"threads\",\"total time(s)\",\"inclusive time(s)\","; + tree_stream << "\"minimum time(s)\",\"mean time(s)\",\"maximum time(s)\","; + tree_stream << "\"stddev time(s)\""; for (auto& x : dependency::Node::getKnownMetrics()) { tree_stream << ",\"total " << x << "\""; tree_stream << ",\"minimum " << x << "\"";