diff --git a/src/apex/dependency_tree.cpp b/src/apex/dependency_tree.cpp index 427148b1..95eea3c0 100644 --- a/src/apex/dependency_tree.cpp +++ b/src/apex/dependency_tree.cpp @@ -339,7 +339,7 @@ double Node::writeNodeCSV(std::stringstream& outfile, double total, int node_id) outfile << ((parent == nullptr) ? 0 : parent->index) << ","; outfile << depth << ",\""; outfile << data->get_tree_name() << "\","; - // write out the inclusive + // write out the accumulated double acc = (data == task_identifier::get_main_task_id() || getAccumulated() == 0.0) ? total : getAccumulated(); // write the number of calls @@ -350,6 +350,7 @@ double Node::writeNodeCSV(std::stringstream& outfile, double total, int node_id) double mean = acc / ncalls; outfile << std::setprecision(9); outfile << acc << ","; + outfile << inclusive << ","; outfile << getMinimum() << ","; outfile << mean << ","; outfile << getMaximum() << ","; @@ -398,18 +399,18 @@ double Node::writeNodeCSV(std::stringstream& outfile, double total, int node_id) // end the line outfile << std::endl; - // sort the children by accumulated time - std::vector > sorted; + // sort the children by name to make tree merging easier (I hope) + std::vector sorted; for (auto& it : children) { - sorted.push_back(it); + sorted.push_back(it.second); } - sort(sorted.begin(), sorted.end(), cmp); + sort(sorted.begin(), sorted.end(), Node::compareNodeByParentName); // do all the children double remainder = acc; depth++; for (auto c : sorted) { - double tmp = c.second->writeNodeCSV(outfile, total, node_id); + double tmp = c->writeNodeCSV(outfile, total, node_id); remainder = remainder - tmp; } depth--; diff --git a/src/apex/dependency_tree.hpp b/src/apex/dependency_tree.hpp index 86e8e75d..b9edbdbc 100644 --- a/src/apex/dependency_tree.hpp +++ b/src/apex/dependency_tree.hpp @@ -94,7 +94,7 @@ class Node { inline double& getSumSquares() { return prof.sum_squares; } void addAccumulated(double value, double incl, bool is_resume, uint64_t thread_id); size_t getIndex() { return index; }; - std::string getName() { return data->get_name(); }; + std::string getName() const { return data->get_name(); }; void writeNode(std::ofstream& outfile, double total); double writeNodeASCII(std::ofstream& outfile, double total, size_t indent); double writeNodeCSV(std::stringstream& outfile, double total, int node_id); @@ -107,6 +107,17 @@ class Node { static std::set& getKnownMetrics() { return known_metrics; } + // required for using this class as a key in a map, vector, etc. + static bool compareNodeByParentName (const Node* lhs, const Node* rhs) { + if (lhs->parent < rhs->parent) { + return true; + } + if (lhs->getName().compare(lhs->getName()) < 0) { + return true; + } + return false; + } + }; } // dependency_tree diff --git a/src/apex/proc_read.cpp b/src/apex/proc_read.cpp index d9e64042..9a36c6bb 100644 --- a/src/apex/proc_read.cpp +++ b/src/apex/proc_read.cpp @@ -1020,9 +1020,11 @@ std::array getAvailableMemory() { } fclose(f); } +#ifdef APEX_WITH_HIP if (global_rsmi_reader != nullptr) { values[1] = global_rsmi_reader->getAvailableMemory(); } +#endif return values; } diff --git a/src/apex/profile_reducer.cpp b/src/apex/profile_reducer.cpp index a47ac415..53292a42 100644 --- a/src/apex/profile_reducer.cpp +++ b/src/apex/profile_reducer.cpp @@ -21,7 +21,8 @@ * 8 values (up to) when PAPI enabled */ constexpr size_t num_fields{23}; -#if !defined(HPX_HAVE_NETWORKING) && defined(APEX_HAVE_MPI) +#if defined(APEX_HAVE_MPI) || \ + (defined(HPX_HAVE_NETWORKING) && defined(HPX_HAVE_PARCELPORT_MPI)) #include "mpi.h" #endif @@ -46,7 +47,8 @@ namespace apex { std::map reduce_profiles_for_screen() { int commrank = 0; int commsize = 1; -#if !defined(HPX_HAVE_NETWORKING) && defined(APEX_HAVE_MPI) +#if defined(APEX_HAVE_MPI) || \ + (defined(HPX_HAVE_NETWORKING) && defined(HPX_HAVE_PARCELPORT_MPI)) int mpi_initialized = 0; MPI_CALL(MPI_Initialized( &mpi_initialized )); if (mpi_initialized) { @@ -94,7 +96,8 @@ std::map reduce_profiles_for_screen() { length[1] = length[1] + 1; /* AllReduce all profile name counts */ -#if !defined(HPX_HAVE_NETWORKING) && defined(APEX_HAVE_MPI) +#if defined(APEX_HAVE_MPI) || \ + (defined(HPX_HAVE_NETWORKING) && defined(HPX_HAVE_PARCELPORT_MPI)) if (mpi_initialized && commsize > 1) { MPI_CALL(PMPI_Allreduce(&length, &max_length, 2, MPI_UINT64_T, MPI_MAX, MPI_COMM_WORLD)); @@ -119,7 +122,8 @@ std::map reduce_profiles_for_screen() { strncpy(ptr, name.c_str(), max_length[1]); ptr = ptr + max_length[1]; } -#if !defined(HPX_HAVE_NETWORKING) && defined(APEX_HAVE_MPI) +#if defined(APEX_HAVE_MPI) || \ + (defined(HPX_HAVE_NETWORKING) && defined(HPX_HAVE_PARCELPORT_MPI)) if (mpi_initialized && commsize > 1) { MPI_CALL(PMPI_Allgather(sbuf, sbuf_length, MPI_CHAR, rbuf, sbuf_length, MPI_CHAR, MPI_COMM_WORLD)); @@ -192,7 +196,8 @@ std::map reduce_profiles_for_screen() { } /* Reduce the data */ -#if !defined(HPX_HAVE_NETWORKING) && defined(APEX_HAVE_MPI) +#if defined(APEX_HAVE_MPI) || \ + (defined(HPX_HAVE_NETWORKING) && defined(HPX_HAVE_PARCELPORT_MPI)) if (mpi_initialized && commsize > 1) { MPI_CALL(PMPI_Gather(s_pdata, sbuf_length, MPI_DOUBLE, r_pdata, sbuf_length, MPI_DOUBLE, 0, MPI_COMM_WORLD)); @@ -256,7 +261,8 @@ std::map reduce_profiles_for_screen() { } } -#if !defined(HPX_HAVE_NETWORKING) && defined(APEX_HAVE_MPI) +#if defined(APEX_HAVE_MPI) || \ + (defined(HPX_HAVE_NETWORKING) && defined(HPX_HAVE_PARCELPORT_MPI)) if (mpi_initialized && commsize > 1) { MPI_CALL(PMPI_Barrier(MPI_COMM_WORLD)); } @@ -267,7 +273,8 @@ std::map reduce_profiles_for_screen() { void reduce_profiles(std::stringstream& csv_output, std::string filename) { int commrank = 0; int commsize = 1; -#if !defined(HPX_HAVE_NETWORKING) && defined(APEX_HAVE_MPI) +#if defined(APEX_HAVE_MPI) || \ + (defined(HPX_HAVE_NETWORKING) && defined(HPX_HAVE_PARCELPORT_MPI)) int mpi_initialized = 0; MPI_CALL(MPI_Initialized( &mpi_initialized )); if (mpi_initialized) { @@ -291,7 +298,8 @@ std::map reduce_profiles_for_screen() { size_t length{csv_output.str().size()}; size_t max_length{length}; // get the longest string from all ranks -#if !defined(HPX_HAVE_NETWORKING) && defined(APEX_HAVE_MPI) +#if defined(APEX_HAVE_MPI) || \ + (defined(HPX_HAVE_NETWORKING) && defined(HPX_HAVE_PARCELPORT_MPI)) if (mpi_initialized && commsize > 1) { MPI_CALL(PMPI_Allreduce(&length, &max_length, 1, MPI_UINT64_T, MPI_MAX, MPI_COMM_WORLD)); @@ -307,14 +315,16 @@ std::map reduce_profiles_for_screen() { // allocate the memory to hold all output char * rbuf = nullptr; if (commrank == 0) { -#if !defined(HPX_HAVE_NETWORKING) && defined(APEX_HAVE_MPI) +#if defined(APEX_HAVE_MPI) || \ + (defined(HPX_HAVE_NETWORKING) && defined(HPX_HAVE_PARCELPORT_MPI)) rbuf = (char*)calloc(max_length * commsize, sizeof(char)); #else rbuf = sbuf; #endif } -#if !defined(HPX_HAVE_NETWORKING) && defined(APEX_HAVE_MPI) +#if defined(APEX_HAVE_MPI) || \ + (defined(HPX_HAVE_NETWORKING) && defined(HPX_HAVE_PARCELPORT_MPI)) MPI_Gather(sbuf, max_length, MPI_CHAR, rbuf, max_length, MPI_CHAR, 0, MPI_COMM_WORLD); #endif diff --git a/src/apex/profiler_listener.cpp b/src/apex/profiler_listener.cpp index 6952cf9c..16d76e60 100644 --- a/src/apex/profiler_listener.cpp +++ b/src/apex/profiler_listener.cpp @@ -1175,9 +1175,9 @@ std::unordered_set free_profiles; stringstream tree_stream; if (node_id == 0) { tree_stream << "\"process rank\",\"node index\",\"parent index\",\"depth\","; - tree_stream << "\"name\",\"calls\",\"threads\",\"total time(ns)\","; - tree_stream << "\"minimum time(ns)\",\"mean time(ns)\",\"maximum time(ns)\","; - tree_stream << "\"stddev time(ns)\""; + tree_stream << "\"name\",\"calls\",\"threads\",\"total time(s)\",\"inclusive time(s)\","; + tree_stream << "\"minimum time(s)\",\"mean time(s)\",\"maximum time(s)\","; + tree_stream << "\"stddev time(s)\""; for (auto& x : dependency::Node::getKnownMetrics()) { tree_stream << ",\"total " << x << "\""; tree_stream << ",\"minimum " << x << "\"";