Skip to content

Commit

Permalink
Merge pull request #2352 from JasonRuonanWang/mpihandshake
Browse files Browse the repository at this point in the history
Optimize MpiHandshake and add more debugging information
  • Loading branch information
JasonRuonanWang authored Jun 30, 2020
2 parents 48dd42a + 217114f commit 8852b3e
Show file tree
Hide file tree
Showing 2 changed files with 47 additions and 26 deletions.
56 changes: 36 additions & 20 deletions source/adios2/helper/adiosMpiHandshake.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,8 @@ int MpiHandshake::m_WorldRank;
int MpiHandshake::m_LocalSize;
int MpiHandshake::m_LocalRank;
int MpiHandshake::m_LocalMasterRank;
std::map<std::string, std::map<int, std::vector<int>>>
MpiHandshake::m_WritersMap;
std::map<std::string, std::map<int, std::vector<int>>>
MpiHandshake::m_ReadersMap;
std::map<std::string, std::map<int, std::set<int>>> MpiHandshake::m_WritersMap;
std::map<std::string, std::map<int, std::set<int>>> MpiHandshake::m_ReadersMap;
std::map<int, int> MpiHandshake::m_AppsSize;

size_t MpiHandshake::PlaceInBuffer(size_t stream, int rank)
Expand All @@ -46,7 +44,7 @@ size_t MpiHandshake::PlaceInBuffer(size_t stream, int rank)

void MpiHandshake::Test()
{
int success;
int success = 0;
MPI_Status status;

for (int rank = 0; rank < m_WorldSize; ++rank)
Expand All @@ -72,20 +70,12 @@ void MpiHandshake::Test()
if (mode == 'w')
{
auto &ranks = m_WritersMap[filename][appMasterRank];
if (std::find(ranks.begin(), ranks.end(), rank) ==
ranks.end())
{
ranks.push_back(rank);
}
ranks.insert(rank);
}
else if (mode == 'r')
{
auto &ranks = m_ReadersMap[filename][appMasterRank];
if (std::find(ranks.begin(), ranks.end(), rank) ==
ranks.end())
{
ranks.push_back(rank);
}
ranks.insert(rank);
}
}
}
Expand Down Expand Up @@ -119,6 +109,19 @@ bool MpiHandshake::Check(const std::string &filename, const bool verbose)
{
if (app.second.size() != m_AppsSize[app.first])
{
if (verbose)
{
std::cout << "MpiHandshake Rank " << m_WorldRank << " Stream "
<< filename << ": "
<< " App master rank " << app.first << ", Expected "
<< m_AppsSize[app.first] << " ranks "
<< ", Received " << app.second.size() << " ranks: ";
for (const auto r : app.second)
{
std::cout << r << ", ";
}
std::cout << std::endl;
}
return false;
}
}
Expand All @@ -127,6 +130,19 @@ bool MpiHandshake::Check(const std::string &filename, const bool verbose)
{
if (app.second.size() != m_AppsSize[app.first])
{
if (verbose)
{
std::cout << "MpiHandshake Rank " << m_WorldRank << " Stream "
<< filename << ": "
<< " App master rank " << app.first << ", Expected "
<< m_AppsSize[app.first] << " ranks "
<< ", Received " << app.second.size() << " ranks: ";
for (const auto r : app.second)
{
std::cout << r << ", ";
}
std::cout << std::endl;
}
return false;
}
}
Expand Down Expand Up @@ -166,7 +182,7 @@ void MpiHandshake::Handshake(const std::string &filename, const char mode,
}

m_ItemSize = maxFilenameLength + sizeof(char) + sizeof(int) * 2;
m_Buffer.resize(m_WorldSize * maxStreamsPerApp * m_ItemSize);
m_Buffer.resize(m_WorldSize * maxStreamsPerApp * m_ItemSize, '\0');

// broadcast local master rank's world rank to use as app ID

Expand Down Expand Up @@ -212,14 +228,14 @@ void MpiHandshake::Handshake(const std::string &filename, const char mode,
auto startTime = std::chrono::system_clock::now();
while (!Check(filename, false))
{
std::this_thread::sleep_for(std::chrono::microseconds(100));
std::this_thread::sleep_for(std::chrono::milliseconds(5));
auto nowTime = std::chrono::system_clock::now();
auto duration = std::chrono::duration_cast<std::chrono::seconds>(
nowTime - startTime);
if (duration.count() > timeoutSeconds)
{
Check(filename, true);
throw(std::runtime_error("Mpi handshake timeout on Rank" +
throw(std::runtime_error("Mpi handshake timeout on Rank " +
std::to_string(m_WorldRank) +
" for Stream " + filename));
}
Expand All @@ -245,12 +261,12 @@ void MpiHandshake::Handshake(const std::string &filename, const char mode,
++m_StreamID;
}

const std::map<int, std::vector<int>> &
const std::map<int, std::set<int>> &
MpiHandshake::GetWriterMap(const std::string &filename)
{
return m_WritersMap[filename];
}
const std::map<int, std::vector<int>> &
const std::map<int, std::set<int>> &
MpiHandshake::GetReaderMap(const std::string &filename)
{
return m_ReadersMap[filename];
Expand Down
17 changes: 11 additions & 6 deletions source/adios2/helper/adiosMpiHandshake.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

#include <map>
#include <mpi.h>
#include <set>
#include <string>
#include <vector>

Expand Down Expand Up @@ -78,9 +79,9 @@ class MpiHandshake
*
* @return map of all writer apps participating the stream. Key is the world
* rank of the master rank of the local communicator of a participating
* writer app. Value is a vector of all world ranks of this writer app
* writer app. Value is a set of all world ranks of this writer app
*/
static const std::map<int, std::vector<int>> &
static const std::map<int, std::set<int>> &
GetWriterMap(const std::string &filename);

/**
Expand All @@ -90,9 +91,9 @@ class MpiHandshake
*
* @return map of all reader apps participating the stream. Key is the world
* rank of the master rank of the local communicator of a participating
* reader app. Value is a vector of all world ranks of this reader app
* reader app. Value is a set of all world ranks of this reader app
*/
static const std::map<int, std::vector<int>> &
static const std::map<int, std::set<int>> &
GetReaderMap(const std::string &filename);

private:
Expand All @@ -115,8 +116,12 @@ class MpiHandshake
static int m_LocalSize;
static int m_LocalRank;
static int m_LocalMasterRank;
static std::map<std::string, std::map<int, std::vector<int>>> m_WritersMap;
static std::map<std::string, std::map<int, std::vector<int>>> m_ReadersMap;

// <StreamName, <AppMasterRank, AppAllRankSet>>
static std::map<std::string, std::map<int, std::set<int>>> m_WritersMap;
static std::map<std::string, std::map<int, std::set<int>>> m_ReadersMap;

// <AppMasterRank, AppTotalRanks>
static std::map<int, int> m_AppsSize;
};

Expand Down

0 comments on commit 8852b3e

Please sign in to comment.