From f5a58721679a1949dfb11aaf1a0ffdb9653059cb Mon Sep 17 00:00:00 2001 From: Mark Vousden Date: Fri, 27 Aug 2021 14:22:16 +0100 Subject: [PATCH 01/10] Add new configuration section 'mode', and field 'single_app_mode', to discriminate between when we are supposed to support hardware idle, and when we are not. --- .gitmodules | 4 ---- Config/Orchestrator.ocfg | 13 +++++++++---- Source/Root/OrchConfig.cpp | 18 ++++++++++++++++-- Source/Root/OrchConfig.h | 5 +++++ 4 files changed, 30 insertions(+), 10 deletions(-) diff --git a/.gitmodules b/.gitmodules index ab22aa56..e69de29b 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,4 +0,0 @@ -[submodule "Tinsel"] - path = Tinsel - url = https://github.com/poetsii/tinsel - branch = master diff --git a/Config/Orchestrator.ocfg b/Config/Orchestrator.ocfg index f878e144..1a8d08f3 100644 --- a/Config/Orchestrator.ocfg +++ b/Config/Orchestrator.ocfg @@ -8,10 +8,10 @@ [Orchestrator_header] // All pretty arbitrary; just copied in and stored -name = OrchestratorConfiguration -author = "MLV and GMB" -date = "2021-01-17" -version = "0.0.6" +name = "OrchestratorConfiguration" +author = "MLV" +date = "2021-08-27" +version = "0.1.0" // All these may be overridden by the console "path" command [default_paths] @@ -58,6 +58,11 @@ hardware = "../Config/POETSHardwareOneBox.ocfg" // Default flags for the cross-compiler // build = "\oink -plop ++wheeee !. " +// Alter the behaviour of the Orchestrator +[modes] +single_app_mode = "true" // Either 'true' or 'false'. Must be true to support + // hardware idle, for now. + // Elaboration messages passed out to the author for errors in processing THIS // file They are all classed as "Unrecoverable", not because they are, but // because: If you're not a grown-up you shouldn't be mucking about with it diff --git a/Source/Root/OrchConfig.cpp b/Source/Root/OrchConfig.cpp index 0b93f499..05c41203 100644 --- a/Source/Root/OrchConfig.cpp +++ b/Source/Root/OrchConfig.cpp @@ -27,8 +27,8 @@ ecnt = 0; // So far, so good, then. No errors JNJ P(WhereAmI); // Parse defining file ecnt = P.ErrCnt(); // Syntax cockups? if (ecnt!=0) { // If so, bail - if (P.Td.t==Lex::S_0) IncErr(0,6); - else IncErr(0,7); + if (P.Td.t==Lex::S_0) IncErr(0,7); + else IncErr(0,8); return; } vH sects; @@ -99,6 +99,20 @@ WALKVECTOR(UIF::Node *,sects,i) { // Walk the sections if ((*k)->str=="build" ) flags.build = s; } } + if (sn=="modes") { + P.GetVari(*i,varis); + WALKVECTOR(UIF::Node *,varis,k) { + P.LocValu(*k,valus); + if (valus.size()>1) IncErr(P.FndRecd(*k),6); + if (valus.empty()) s.clear(); + else s = valus[0]->str; + if ((*k)->str=="single_app_mode") { + if (s=="false") modes.single_app_mode = false; + else if (s=="true") modes.single_app_mode = true; + else IncErr(P.FndRecd(*k),6); + } + } + } } if (ecnt!=0) Init(); // Any errors, kill the lot Force2Linux(); // Force all the separators to linux-land diff --git a/Source/Root/OrchConfig.h b/Source/Root/OrchConfig.h index ebbac36b..7a8344cd 100644 --- a/Source/Root/OrchConfig.h +++ b/Source/Root/OrchConfig.h @@ -41,6 +41,7 @@ string Placement() { return setup_files.placement; } string RemoteMshp() { return default_paths.remote_mship; } string RemoteOut() { return default_paths.remote_outdir; } string Trace() { return default_paths.trace; } +bool SingleApp() { return modes.single_app_mode; } string Stage() { return default_paths.stage; } string Supervisors() { return default_paths.supervisors; } string Ulog() { return default_paths.ulog; } @@ -86,6 +87,10 @@ struct flags_t { string build; // Default x-compiler flags } flags; +struct modes_t { + bool single_app_mode; // Hardware-idle friendly? +} modes; + }; //============================================================================== From 28c8c099582c8df626a6be5e3229782ebe9c1c97 Mon Sep 17 00:00:00 2001 From: Mark Vousden Date: Fri, 27 Aug 2021 14:58:03 +0100 Subject: [PATCH 02/10] Deployment logic reads from the aforementioned config field, and lets the Mothership know. The Mothership will use global backend 'start' and 'go' methods instead. --- Config/OrchestratorMessages.ocfg | 3 + Source/Common/Pglobals.h | 2 + Source/Mothership/AppDB.cpp | 14 +++-- Source/Mothership/AppDB.h | 2 +- Source/Mothership/AppInfo.cpp | 5 +- Source/Mothership/AppInfo.h | 3 +- Source/Mothership/AppTransitions.cpp | 84 +++++++++++++++++----------- Source/Mothership/MPIHandlers.cpp | 10 ++-- Source/Mothership/MessageUtils.cpp | 22 +++++++- Source/Mothership/Mothership.h | 4 +- Source/OrchBase/Handlers/CmDepl.cpp | 9 ++- 11 files changed, 106 insertions(+), 52 deletions(-) diff --git a/Config/OrchestratorMessages.ocfg b/Config/OrchestratorMessages.ocfg index 93973a3b..fbca9cf0 100644 --- a/Config/OrchestratorMessages.ocfg +++ b/Config/OrchestratorMessages.ocfg @@ -234,7 +234,10 @@ 534(I) : "Mothership (rank %s): All devices on this Mothership for application '%s' have stopped." 535(I) : "Mothership (rank %s): Recalling application '%s'." 536(I) : "Mothership (rank %s): Application '%s' recalled. This Mothership has forgotten everything about this application." +537(E) : "Mothership: Error decoding MPI message with key '0x%s': Expected bool in field %s. Ignoring message." +578(I) : "Mothership: Calling backend->go." +579(I) : "Mothership: Calling backend->startAll." 580(E) : "Mothership: Received a log packet with an invalid device index 0x%s." 581(I) : "Mothership: Consuming log packet from device address 0x%s with name %s." 582(I) : "Mothership: Received a message containing packets for a supervisor device for application '%s' that is not running (it may be in the process of stopping). Ignoring these packets." diff --git a/Source/Common/Pglobals.h b/Source/Common/Pglobals.h index d26e57b5..0651ccd5 100644 --- a/Source/Common/Pglobals.h +++ b/Source/Common/Pglobals.h @@ -82,6 +82,8 @@ EXIT |- |- |- | (None) SYST |KILL |- |- | (None) APP |SPEC |- |- | (0:string)Application name (1:uint32_t)Number of expected distribution messages + (2:uint8_t)Application number + (3:bool)Hardware-idle application? APP |DIST |- |- | (0:string)Application name (1:string)Code path for this core (2:string)Data path for this core diff --git a/Source/Mothership/AppDB.cpp b/Source/Mothership/AppDB.cpp index c09d72dd..c3947072 100644 --- a/Source/Mothership/AppDB.cpp +++ b/Source/Mothership/AppDB.cpp @@ -1,10 +1,12 @@ #include "AppDB.h" /* Checks appInfos for an application of a given name. If it doesn't exist, - * AppDB creates it and returns a pointer to it (passing distCountExpected to - * it as an argument). If it already exists, returns a pointer to the existing - * application, and does not use distCountExpected. */ -AppInfo* AppDB::check_create_app(std::string name, uint32_t distCountExpected) + * AppDB creates it and returns a pointer to it (passing distCountExpected and + * soloApp to it as an argument). If it already exists, returns a pointer to + * the existing application, does not use distCountExpected, but sets + * soloApp. */ +AppInfo* AppDB::check_create_app(std::string name, uint32_t distCountExpected, + bool soloApp) { AppInfoIt appFinder = appInfos.find(name); @@ -21,7 +23,7 @@ AppInfo* AppDB::check_create_app(std::string name, uint32_t distCountExpected) else { appInfos.insert(std::pair - (name, AppInfo(name, distCountExpected))); + (name, AppInfo(name, distCountExpected, soloApp))); } return &(appInfos.find(name)->second); @@ -37,7 +39,7 @@ AppInfo* AppDB::check_create_app(std::string name, uint32_t distCountExpected) /* Sub-synonym. */ AppInfo* AppDB::check_create_app(std::string name) { - return check_create_app(name, 0); + return check_create_app(name, 0, false); /* soloApp argument not used. */ } /* Checks appInfos for an application of a given name, returning true if such diff --git a/Source/Mothership/AppDB.h b/Source/Mothership/AppDB.h index 32667778..4a067f26 100644 --- a/Source/Mothership/AppDB.h +++ b/Source/Mothership/AppDB.h @@ -22,7 +22,7 @@ class AppDB std::map threadToCoreAddr; std::map numberToApp; - AppInfo* check_create_app(std::string, uint32_t); + AppInfo* check_create_app(std::string, uint32_t, bool); AppInfo* check_create_app(std::string); bool check_defined_app(std::string); void recall_app(AppInfo*); diff --git a/Source/Mothership/AppInfo.cpp b/Source/Mothership/AppInfo.cpp index 9b27395e..832d1545 100644 --- a/Source/Mothership/AppInfo.cpp +++ b/Source/Mothership/AppInfo.cpp @@ -1,9 +1,10 @@ #include "AppInfo.h" /* This constructor is used by SPEC messages. */ -AppInfo::AppInfo(std::string name, uint32_t distCountExpected): +AppInfo::AppInfo(std::string name, uint32_t distCountExpected, bool soloApp): name(name), - distCountExpected(distCountExpected) + distCountExpected(distCountExpected), + soloApp(soloApp) { pendingCommands = 0; distCountCurrent = 0; diff --git a/Source/Mothership/AppInfo.h b/Source/Mothership/AppInfo.h index 408462fa..98073c88 100644 --- a/Source/Mothership/AppInfo.h +++ b/Source/Mothership/AppInfo.h @@ -39,11 +39,12 @@ enum AppState{UNDERDEFINED, /* We're still receiving DIST messages. */ class AppInfo { public: - AppInfo(std::string nameArg, uint32_t distCountExpected); + AppInfo(std::string nameArg, uint32_t distCountExpected, bool soloApp); AppInfo(std::string nameArg); std::string name; uint32_t distCountExpected; + bool soloApp; AppState state; std::map coreInfos; std::set coresLoaded; diff --git a/Source/Mothership/AppTransitions.cpp b/Source/Mothership/AppTransitions.cpp index 81320820..22ef2158 100644 --- a/Source/Mothership/AppTransitions.cpp +++ b/Source/Mothership/AppTransitions.cpp @@ -58,44 +58,62 @@ void Mothership::initialise_application(AppInfo* app) meshX, meshY, coreId); } - /* 2: For each core, kick off the threads (mode=false). - * 3: For each core, start execution (mode=true). */ - mode = false; - do + /* 2: For each core, kick off the threads. + * 3: For each core, start execution. */ + + /* In solo-app mode, use backend's available global start and go + * methods. */ + if (app->soloApp) { - for (coreIt = app->coreInfos.begin(); coreIt != app->coreInfos.end(); - coreIt++) - { - backend->fromAddr(coreIt->first, &meshX, &meshY, &coreId, - &threadId); + debug_post(579, 0); + backend->startAll(); /* 2 */ + debug_post(578, 0); + backend->go(); /* 3 */ + } - if (!mode) /* 2 */ - { - debug_post( - 587, 4, hex2str(meshX).c_str(), hex2str(meshY).c_str(), - hex2str(coreId).c_str(), - uint2str(coreIt->second.threadsExpected.size()).c_str()); - /* Note that startOne can hang for the Tinsel backend if the - * number of threads expected is greater than the number of - * threads that the core will start - this is because startOne - * waits for an acknowledgement message from the core that - * varies as a function of the number of threads. If you find - * the above 587 being the last message you see from the MPI - * CNC Resolver thread (for example), this is most likely your - * issue. */ - backend->startOne(meshX, meshY, coreId, - coreIt->second.threadsExpected.size()); - } - else /* 3 */ + /* Otherwise, things get more complicated... */ + else + { + mode = false; /* 2 when false, 3 when true. */ + do /* Simple loop to reduce code duplication (see comment accompanying + * this function definition). */ + { + for (coreIt = app->coreInfos.begin(); + coreIt != app->coreInfos.end(); coreIt++) { - debug_post(586, 3, hex2str(meshX).c_str(), - hex2str(meshY).c_str(), hex2str(coreId).c_str()); - backend->goOne(meshX, meshY, coreId); + backend->fromAddr(coreIt->first, &meshX, &meshY, &coreId, + &threadId); + + if (!mode) /* 2 */ + { + debug_post( + 587, 4, hex2str(meshX).c_str(), hex2str(meshY).c_str(), + hex2str(coreId).c_str(), + uint2str(coreIt->second.threadsExpected.size()) + .c_str()); + /* Note that startOne can hang for the Tinsel backend if + * the number of threads expected is greater than the + * number of threads that the core will start - this is + * because startOne waits for an acknowledgement message + * from the core that varies as a function of the number of + * threads. If you find the above 587 being the last + * message you see from the MPI CNC Resolver thread (for + * example), this is most likely your issue. */ + backend->startOne(meshX, meshY, coreId, + coreIt->second.threadsExpected.size()); + } + else /* 3 */ + { + debug_post(586, 3, hex2str(meshX).c_str(), + hex2str(meshY).c_str(), + hex2str(coreId).c_str()); + backend->goOne(meshX, meshY, coreId); + } } - } - mode = !mode; - } while (mode); + mode = !mode; + } while (mode); + } /* Good stuff. Now the cores will spin up and send BARRIER messages to the * Mothership. */ diff --git a/Source/Mothership/MPIHandlers.cpp b/Source/Mothership/MPIHandlers.cpp index fec38f8b..f25a22aa 100644 --- a/Source/Mothership/MPIHandlers.cpp +++ b/Source/Mothership/MPIHandlers.cpp @@ -87,8 +87,9 @@ unsigned Mothership::handle_msg_app_spec(PMsg_p* message) std::string appName; uint32_t distCount; uint8_t appNumber; + bool soloApp; if (!decode_app_spec_message(message, &appName, &distCount, - &appNumber)) + &appNumber, &soloApp)) { debug_post(597, 3, "Q::APP,Q::SPEC", hex2str(message->Key()).c_str(), "Failed to decode."); @@ -96,12 +97,13 @@ unsigned Mothership::handle_msg_app_spec(PMsg_p* message) } debug_post(597, 3, "Q::APP,Q::SPEC", hex2str(message->Key()).c_str(), - dformat("appName=%s, distCount=%u, appNumber=%u", - appName.c_str(), distCount, appNumber).c_str()); + dformat("appName=%s, distCount=%u, appNumber=%u, soloApp=%s", + appName.c_str(), distCount, appNumber, + soloApp ? "true" : "false").c_str()); /* Ensure application existence idempotently (it might have been created by * an AppDist message). */ - appInfo = appdb.check_create_app(appName, distCount); + appInfo = appdb.check_create_app(appName, distCount, soloApp); /* If the application is not in the UNDERDEFINED state, post bossily and do * nothing else. */ diff --git a/Source/Mothership/MessageUtils.cpp b/Source/Mothership/MessageUtils.cpp index 814a3c43..2c85f141 100644 --- a/Source/Mothership/MessageUtils.cpp +++ b/Source/Mothership/MessageUtils.cpp @@ -38,12 +38,14 @@ bool Mothership::decode_app_supd_message(PMsg_p* message, std::string* appName, bool Mothership::decode_app_spec_message(PMsg_p* message, std::string* appName, uint32_t* distCount, - uint8_t* appNumber) + uint8_t* appNumber, + bool* soloApp) { *distCount = 0; if(!decode_string_message(message, appName)) return false; if(!decode_unsigned_message(message, distCount, 1)) return false; if(!decode_char_message(message, appNumber, 2)) return false; + if(!decode_bool_message(message, soloApp, 3)) return false; return true; } @@ -89,6 +91,24 @@ bool Mothership::decode_addressed_packets_message(PMsg_p* message, return true; } +bool Mothership::decode_bool_message(PMsg_p* message, bool* result, + unsigned index) +{ + int countBuffer; + bool* resultBuffer; + + /* Get and check for errors. */ + resultBuffer = message->Get(index, countBuffer); + if (resultBuffer == PNULL) + { + *result = 0; + Post(537, hex2str(message->Key()), uint2str(index)); + return false; + } + *result = *resultBuffer; + return true; +} + bool Mothership::decode_char_message(PMsg_p* message, unsigned char* result, unsigned index) { diff --git a/Source/Mothership/Mothership.h b/Source/Mothership/Mothership.h index b3fc99ea..3fafa457 100644 --- a/Source/Mothership/Mothership.h +++ b/Source/Mothership/Mothership.h @@ -102,12 +102,14 @@ class Mothership: public CommonBase bool decode_app_supd_message(PMsg_p* message, std::string* appName, std::string* soPath); bool decode_app_spec_message(PMsg_p* message, std::string* appName, - uint32_t* distCount, uint8_t* appNumber); + uint32_t* distCount, uint8_t* appNumber, + bool* soloApp); bool decode_addresses_message(PMsg_p* message, std::vector* addresses, unsigned index=0); bool decode_addressed_packets_message(PMsg_p* message, std::vector* packets, unsigned index=0); + bool decode_bool_message(PMsg_p* message, bool* result, unsigned index=0); bool decode_char_message(PMsg_p* message, unsigned char* result, unsigned index=0); bool decode_packets_message(PMsg_p* message, diff --git a/Source/OrchBase/Handlers/CmDepl.cpp b/Source/OrchBase/Handlers/CmDepl.cpp index a4ab09fe..5c2c1758 100644 --- a/Source/OrchBase/Handlers/CmDepl.cpp +++ b/Source/OrchBase/Handlers/CmDepl.cpp @@ -1,8 +1,8 @@ //------------------------------------------------------------------------------ #include "CmDepl.h" -#include "OrchBase.h" #include "Pglobals.h" +#include "Root.h" /* Grabs Orchestrator config, and OrchBase. */ #include "SupervisorModes.h" //============================================================================== @@ -376,9 +376,12 @@ int CmDepl::DeployGraph(GraphI_t* gi) specMessage.Put(1, &distCount); specMessage.Put(2, static_cast(&appNumber)); + bool soloApp = dynamic_cast(par)->pOC->SingleApp(); + specMessage.Put(3,&soloApp); fprintf(par->fd, "Sending SPEC message to Mothership rank %d, with " - "appNumber=%u and distCount=%u...", - mothershipPayloadsIt->first, appNumber, distCount); + "appNumber=%u, distCount=%u, and soloApp=%s...", + mothershipPayloadsIt->first, appNumber, distCount, + soloApp ? "true" : "false"); specMessage.Send(); fprintf(par->fd, " message sent.\n"); From 026550e9e79542d865ae0870aab92fd633f4515b Mon Sep 17 00:00:00 2001 From: Mark Vousden Date: Fri, 27 Aug 2021 14:58:17 +0100 Subject: [PATCH 03/10] Whitespace fixes. --- Source/Mothership/MessageUtils.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Source/Mothership/MessageUtils.cpp b/Source/Mothership/MessageUtils.cpp index 2c85f141..8c58fa2b 100644 --- a/Source/Mothership/MessageUtils.cpp +++ b/Source/Mothership/MessageUtils.cpp @@ -79,9 +79,9 @@ bool Mothership::decode_addressed_packets_message(PMsg_p* message, message->Put(); // Tell the message its type packets->clear(); - + message->Get(index, *packets); - + if (packets->empty()) { Post(516, hex2str(message->Key()), uint2str(index)); @@ -140,7 +140,7 @@ bool Mothership::decode_packets_message(PMsg_p* message, packets->clear(); message->Get(index, *packets); - + // If the packet vector has come back empty, there is an error. if (packets->empty()) { From 10850e9e297adaedccc2a2cf3c1132281325ef8e Mon Sep 17 00:00:00 2001 From: Mark Vousden Date: Fri, 27 Aug 2021 17:46:49 +0100 Subject: [PATCH 04/10] Define an `APP,EMPT` message, which is sent by root in single-app mode at deploy time. It causes a Mothership to call backend's loadAll method. --- Source/Common/Pglobals.cpp | 3 ++- Source/Common/Pglobals.h | 3 +++ Source/Mothership/MPIHandlers.cpp | 23 ++++++++++++++++++++++- Source/Mothership/MessageUtils.cpp | 10 ++++++++++ Source/Mothership/Mothership.h | 3 +++ Source/Mothership/ThreadLogic.cpp | 4 +++- Source/OrchBase/Handlers/CmDepl.cpp | 28 +++++++++++++++++++++++++++- 7 files changed, 70 insertions(+), 4 deletions(-) diff --git a/Source/Common/Pglobals.cpp b/Source/Common/Pglobals.cpp index e986fcf7..9738b7e5 100644 --- a/Source/Common/Pglobals.cpp +++ b/Source/Common/Pglobals.cpp @@ -40,8 +40,9 @@ const byte Q::SPEC = 0x50; const byte Q::SUPD = 0x51; const byte Q::INIT = 0x52; const byte Q::CNC = 0x53; -const byte Q::ACKt = 0x54; +const byte Q::ACKt = 0x54; const byte Q::SUPR = 0x55; +const byte Q::EMPT = 0x56; // temporary use: for MPI testing ------------------------------------------ const byte Q::M0 = 0x60; const byte Q::M1 = 0x61; diff --git a/Source/Common/Pglobals.h b/Source/Common/Pglobals.h index 0651ccd5..f4b36525 100644 --- a/Source/Common/Pglobals.h +++ b/Source/Common/Pglobals.h @@ -80,6 +80,8 @@ Mothership ---------- EXIT |- |- |- | (None) SYST |KILL |- |- | (None) +APP |EMPT |- |- | (0:string)Code path to broadcast + | (1:string)Data path to broadcast APP |SPEC |- |- | (0:string)Application name (1:uint32_t)Number of expected distribution messages (2:uint8_t)Application number @@ -154,6 +156,7 @@ static const byte INIT; static const byte CNC; static const byte ACK; static const byte SUPR; +static const byte EMPT; // temporary use: for MPI testing ------------------------------------------ static const byte M0; static const byte M1; diff --git a/Source/Mothership/MPIHandlers.cpp b/Source/Mothership/MPIHandlers.cpp index f25a22aa..61ab28cd 100644 --- a/Source/Mothership/MPIHandlers.cpp +++ b/Source/Mothership/MPIHandlers.cpp @@ -55,7 +55,7 @@ unsigned Mothership::handle_msg_cnc(PMsg_p* message) #if ORCHESTRATOR_DEBUG std::string key = "Unknown"; if (message->Key() == PMsg_p::KEY(Q::APP,Q::SPEC)) - key = "Q::APP,Q::SPEC"; + key = "Q::APP,Q::EMPT"; else if (message->Key() == PMsg_p::KEY(Q::APP,Q::SPEC)) key = "Q::APP,Q::SPEC"; else if (message->Key() == PMsg_p::KEY(Q::APP,Q::DIST)) @@ -79,6 +79,27 @@ unsigned Mothership::handle_msg_cnc(PMsg_p* message) return 0; } +unsigned Mothership::handle_msg_app_empt(PMsg_p* message) +{ + /* Pull message contents. */ + std::string codePath; + std::string dataPath; + if (!decode_app_empt_message(message, &codePath, &dataPath)) + { + debug_post(597, 3, "Q::APP,Q::EMPT", hex2str(message->Key()).c_str(), + "Failed to decode."); + return 0; + } + + debug_post(597, 3, "Q::APP,Q::EMPT", hex2str(message->Key()).c_str(), + dformat("codePath=%s, dataPath=%s", + codePath.c_str(), dataPath.c_str())); + + /* gogogo */ + backend->loadAll(codePath.c_str(), dataPath.c_str()); + return 0; +} + unsigned Mothership::handle_msg_app_spec(PMsg_p* message) { AppInfo* appInfo; diff --git a/Source/Mothership/MessageUtils.cpp b/Source/Mothership/MessageUtils.cpp index 8c58fa2b..75909343 100644 --- a/Source/Mothership/MessageUtils.cpp +++ b/Source/Mothership/MessageUtils.cpp @@ -10,6 +10,16 @@ #include "Mothership.h" +bool Mothership::decode_app_empt_message( + PMsg_p* message, std::string* codePath, std::string* dataPath) +{ + codePath->clear(); + dataPath->clear(); + if(!decode_string_message(message, codePath)) return false; + if(!decode_string_message(message, dataPath, 1)) return false; + return true; +} + bool Mothership::decode_app_dist_message( PMsg_p* message, std::string* appName, std::string* codePath, std::string* dataPath, uint32_t* coreAddr, diff --git a/Source/Mothership/Mothership.h b/Source/Mothership/Mothership.h index 3fafa457..bc72ca59 100644 --- a/Source/Mothership/Mothership.h +++ b/Source/Mothership/Mothership.h @@ -51,6 +51,7 @@ class Mothership: public CommonBase void recall_application(AppInfo*); /* Methods for handling MPI messages (called by consumer threads). */ + unsigned handle_msg_app_empt(PMsg_p* message); unsigned handle_msg_app_spec(PMsg_p* message); unsigned handle_msg_app_dist(PMsg_p* message); unsigned handle_msg_app_supd(PMsg_p* message); @@ -95,6 +96,8 @@ class Mothership: public CommonBase /* Methods for safely decoding MPI messages with certain field * configurations. */ + bool decode_app_empt_message(PMsg_p* message, std::string* codePath, + std::string* dataPath); bool decode_app_dist_message(PMsg_p* message, std::string* appName, std::string* codePath, std::string* dataPath, uint32_t* coreAddr, diff --git a/Source/Mothership/ThreadLogic.cpp b/Source/Mothership/ThreadLogic.cpp index f69d910b..bc9ee9ab 100644 --- a/Source/Mothership/ThreadLogic.cpp +++ b/Source/Mothership/ThreadLogic.cpp @@ -55,7 +55,9 @@ void* ThreadComms::mpi_cnc_resolver(void* mothershipArg) messageIt++) { key = messageIt->Key(); - if (key == PMsg_p::KEY(Q::APP, Q::SPEC)) + if (key == PMsg_p::KEY(Q::APP, Q::EMPT)) + mothership->handle_msg_app_empt(&*messageIt); + else if (key == PMsg_p::KEY(Q::APP, Q::SPEC)) mothership->handle_msg_app_spec(&*messageIt); else if (key == PMsg_p::KEY(Q::APP, Q::DIST)) mothership->handle_msg_app_dist(&*messageIt); diff --git a/Source/OrchBase/Handlers/CmDepl.cpp b/Source/OrchBase/Handlers/CmDepl.cpp index 5c2c1758..2dd0a68a 100644 --- a/Source/OrchBase/Handlers/CmDepl.cpp +++ b/Source/OrchBase/Handlers/CmDepl.cpp @@ -145,6 +145,7 @@ int CmDepl::DeployGraph(GraphI_t* gi) PMsg_p specMessage; PMsg_p distMessage; PMsg_p supdMessage; + PMsg_p emptMessage; std::vector messages; std::vector::iterator messageIt; @@ -347,6 +348,7 @@ int CmDepl::DeployGraph(GraphI_t* gi) messages.push_back(&specMessage); messages.push_back(&distMessage); messages.push_back(&supdMessage); + messages.push_back(&emptMessage); for (messageIt = messages.begin(); messageIt != messages.end(); messageIt++) { @@ -356,6 +358,7 @@ int CmDepl::DeployGraph(GraphI_t* gi) specMessage.Key(Q::APP, Q::SPEC); distMessage.Key(Q::APP, Q::DIST); supdMessage.Key(Q::APP, Q::SUPD); + emptMessage.Key(Q::APP, Q::EMPT); /* Iterate through participating Motherships. */ for (mothershipPayloadsIt = mothershipPayloads.begin(); @@ -369,6 +372,30 @@ int CmDepl::DeployGraph(GraphI_t* gi) (*messageIt)->Tgt(mothershipPayloadsIt->first); } + /* Customise and send the EMPT message, if configured to use them. */ + bool soloApp = dynamic_cast(par)->pOC->SingleApp(); + if (soloApp) + { + /* TODO: GMB to plonk binary paths here (or at least some way to + * grab them). */ + std::string codePath; + std::string dataPath; + + emptMessage.Put(0, &codePath); + emptMessage.Put(1, &dataPath); + fprintf(par->fd, "Sending EMPT message to Mothership rank %d, " + "with codepath=%s and dataPath=%s...", + mothershipPayloadsIt->first, codePath.c_str(), + dataPath.c_str()); + emptMessage.Send(); + fprintf(par->fd, " message sent.\n"); + } + else + { + fprintf(par->fd, "Not sending EMPT message, as Orchestrator is " + "not configured to operate in single-application mode.\n"); + } + /* Customise and send the SPEC message. */ appNumber = 0; /* This is terrible - only one graph instance can be * loaded at a time! TODO */ @@ -376,7 +403,6 @@ int CmDepl::DeployGraph(GraphI_t* gi) specMessage.Put(1, &distCount); specMessage.Put(2, static_cast(&appNumber)); - bool soloApp = dynamic_cast(par)->pOC->SingleApp(); specMessage.Put(3,&soloApp); fprintf(par->fd, "Sending SPEC message to Mothership rank %d, with " "appNumber=%u, distCount=%u, and soloApp=%s...", From 8ef318269c9b88fb80f8e7ff39b2ce057224db67 Mon Sep 17 00:00:00 2001 From: Mark Vousden Date: Fri, 27 Aug 2021 17:50:55 +0100 Subject: [PATCH 05/10] Add a preprocessor warning to make sure GMB defines these paths in future. --- Source/OrchBase/Handlers/CmDepl.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/Source/OrchBase/Handlers/CmDepl.cpp b/Source/OrchBase/Handlers/CmDepl.cpp index 2dd0a68a..9f0cb1eb 100644 --- a/Source/OrchBase/Handlers/CmDepl.cpp +++ b/Source/OrchBase/Handlers/CmDepl.cpp @@ -378,6 +378,7 @@ int CmDepl::DeployGraph(GraphI_t* gi) { /* TODO: GMB to plonk binary paths here (or at least some way to * grab them). */ + #warning GMB to define these paths somewhere. std::string codePath; std::string dataPath; From 02c286df7d2c916ccec1f97bd709d6bcdd925bc4 Mon Sep 17 00:00:00 2001 From: Mark Vousden Date: Fri, 27 Aug 2021 18:50:08 +0100 Subject: [PATCH 06/10] Put the Tinsel submodule back (whoops). --- .gitmodules | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.gitmodules b/.gitmodules index e69de29b..ab22aa56 100644 --- a/.gitmodules +++ b/.gitmodules @@ -0,0 +1,4 @@ +[submodule "Tinsel"] + path = Tinsel + url = https://github.com/poetsii/tinsel + branch = master From e9c983a6bdba2d661bbdd0ca593d7f7bc68ea497 Mon Sep 17 00:00:00 2001 From: Graeme Bragg Date: Mon, 25 Oct 2021 18:17:10 +0100 Subject: [PATCH 07/10] add logic to get dummy binaries --- Source/OrchBase/Composer.cpp | 34 +++++++++++++++++++++++++++++ Source/OrchBase/Composer.h | 2 ++ Source/OrchBase/Handlers/CmDepl.cpp | 6 ++--- 3 files changed, 39 insertions(+), 3 deletions(-) diff --git a/Source/OrchBase/Composer.cpp b/Source/OrchBase/Composer.cpp index 665c0958..901a0f84 100644 --- a/Source/OrchBase/Composer.cpp +++ b/Source/OrchBase/Composer.cpp @@ -38,6 +38,9 @@ ComposerGraphI_t::ComposerGraphI_t() compilationFlags = ""; provenanceCache = ""; + + idleInstructionBinary = ""; + idleDataBinary = ""; } ComposerGraphI_t::ComposerGraphI_t(GraphI_t* graphIIn, std::string& outputPath) @@ -59,6 +62,9 @@ ComposerGraphI_t::ComposerGraphI_t(GraphI_t* graphIIn, std::string& outputPath) compilationFlags = ""; provenanceCache = ""; + + idleInstructionBinary = ""; + idleDataBinary = ""; } ComposerGraphI_t::~ComposerGraphI_t() @@ -1017,6 +1023,34 @@ bool Composer::isCompiled(GraphI_t* graphI) return builderGraphI->compiled; } +/****************************************************************************** + * Public method to get the paths for the HW Idle binaries + *****************************************************************************/ +bool Composer::getDummyPaths(GraphI_t* graphI, std::string& instrBin, + std::string& dataBin) +{ + ComposerGraphI_t* builderGraphI; + + ComposerGraphIMap_t::iterator srch = graphIMap.find(graphI); + if (srch == graphIMap.end()) + { // The Graph Instance has not been seen before, so not compiled. + return false; + + } else { + builderGraphI = srch->second; + } + + if(builderGraphI->idleInstructionBinary == "" || + builderGraphI->idleDataBinary == "") + { // We are missing a binary path, return false. + return false; + } + + instrBin = builderGraphI->idleInstructionBinary; + dataBin = builderGraphI->idleDataBinary; + return true; +} + /****************************************************************************** * Invoke a clean and then a degenerate *****************************************************************************/ diff --git a/Source/OrchBase/Composer.h b/Source/OrchBase/Composer.h index f7293064..69c62aba 100644 --- a/Source/OrchBase/Composer.h +++ b/Source/OrchBase/Composer.h @@ -143,6 +143,8 @@ int addFlags(GraphI_t*, std::string&); bool isGenerated(GraphI_t*); bool isCompiled(GraphI_t*); +bool getDummyPaths(GraphI_t*, std::string&, std::string&); // Get the hardware idle binary names + void Show(FILE * = stdout); void Dump(unsigned = 0,FILE * = stdout); diff --git a/Source/OrchBase/Handlers/CmDepl.cpp b/Source/OrchBase/Handlers/CmDepl.cpp index 9f0cb1eb..688d8742 100644 --- a/Source/OrchBase/Handlers/CmDepl.cpp +++ b/Source/OrchBase/Handlers/CmDepl.cpp @@ -376,11 +376,11 @@ int CmDepl::DeployGraph(GraphI_t* gi) bool soloApp = dynamic_cast(par)->pOC->SingleApp(); if (soloApp) { - /* TODO: GMB to plonk binary paths here (or at least some way to - * grab them). */ - #warning GMB to define these paths somewhere. std::string codePath; std::string dataPath; + + // Get the dummy binary paths + par->pComposer->getDummyPaths(gi, codePath, dataPath); emptMessage.Put(0, &codePath); emptMessage.Put(1, &dataPath); From c4450feca850fc03cc249d7fcbceaa5d8c310140 Mon Sep 17 00:00:00 2001 From: Mark Vousden Date: Wed, 27 Oct 2021 13:47:45 +0100 Subject: [PATCH 08/10] Add missing entry for APP,EMPT messages in the Mothership's MPI message map. --- Source/Mothership/MPIHandlers.cpp | 2 +- Source/Mothership/Mothership.cpp | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/Source/Mothership/MPIHandlers.cpp b/Source/Mothership/MPIHandlers.cpp index 61ab28cd..2f2d784b 100644 --- a/Source/Mothership/MPIHandlers.cpp +++ b/Source/Mothership/MPIHandlers.cpp @@ -54,7 +54,7 @@ unsigned Mothership::handle_msg_cnc(PMsg_p* message) { #if ORCHESTRATOR_DEBUG std::string key = "Unknown"; - if (message->Key() == PMsg_p::KEY(Q::APP,Q::SPEC)) + if (message->Key() == PMsg_p::KEY(Q::APP,Q::EMPT)) key = "Q::APP,Q::EMPT"; else if (message->Key() == PMsg_p::KEY(Q::APP,Q::SPEC)) key = "Q::APP,Q::SPEC"; diff --git a/Source/Mothership/Mothership.cpp b/Source/Mothership/Mothership.cpp index 35f7a471..7052ee70 100644 --- a/Source/Mothership/Mothership.cpp +++ b/Source/Mothership/Mothership.cpp @@ -196,6 +196,7 @@ void Mothership::setup_mpi_hooks() DebugPrint("[MOTHERSHIP] Setting up MPI hooks.\n"); FnMap[PMsg_p::KEY(Q::EXIT)] = &Mothership::handle_msg_exit; FnMap[PMsg_p::KEY(Q::SYST,Q::KILL)] = &Mothership::handle_msg_syst_kill; + FnMap[PMsg_p::KEY(Q::APP,Q::EMPT)] = &Mothership::handle_msg_cnc; FnMap[PMsg_p::KEY(Q::APP,Q::SPEC)] = &Mothership::handle_msg_cnc; FnMap[PMsg_p::KEY(Q::APP,Q::DIST)] = &Mothership::handle_msg_cnc; FnMap[PMsg_p::KEY(Q::APP,Q::SUPD)] = &Mothership::handle_msg_cnc; From 7fb7f9ecdabf272f91a33336604faca7a9b5436c Mon Sep 17 00:00:00 2001 From: Mark Vousden Date: Wed, 27 Oct 2021 13:47:59 +0100 Subject: [PATCH 09/10] Fix bad decode message format. --- Config/OrchestratorMessages.ocfg | 2 +- Source/Common/Decode.cpp | 7 ++----- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/Config/OrchestratorMessages.ocfg b/Config/OrchestratorMessages.ocfg index 9d5de844..27fb0530 100644 --- a/Config/OrchestratorMessages.ocfg +++ b/Config/OrchestratorMessages.ocfg @@ -133,7 +133,7 @@ // Loading, typelinking 100(U) : "MLV: Allocated but as yet unused" -101(S) : "Decoder in %s (PID %s) has dropped a packet from %s(rank %s) to %s(rank %s) with key 0x%s" +101(S) : "Decoder in %s (rank %s) has dropped a packet from %s (rank %s) with key 0x%s." 102(I) : "Task graph default file path is ||%s||" 103(I) : "New path is ||%s||" 104(W) : "Filename %s does not parse correctly" diff --git a/Source/Common/Decode.cpp b/Source/Common/Decode.cpp index 262d1058..f738e7c5 100644 --- a/Source/Common/Decode.cpp +++ b/Source/Common/Decode.cpp @@ -32,12 +32,9 @@ if (CommonBase::FnMap.find(pPkt->Key())!=CommonBase::FnMap.end()) { // Nope. Kick. // Pull out the unknown key and post what // little we know to the LogServer -Post(101,Sderived,int2str(pPkt->Src()),pPmap->vPmap[pPkt->Src()].P_class,int2str(pPkt->Tgt()), - pPmap->vPmap[pPkt->Tgt()].P_class,hex2str(pPkt->Key())); +Post(101,Sderived,int2str(pPkt->Tgt()),pPmap->vPmap[pPkt->Src()].P_class, + int2str(pPkt->Src()),hex2str(pPkt->Key())); return 0; // Return "keep going" value } //============================================================================== - - - From 09e91f32e8ace2693d2cfc24126c92154c8f9b94 Mon Sep 17 00:00:00 2001 From: Mark Vousden Date: Wed, 27 Oct 2021 13:30:56 +0000 Subject: [PATCH 10/10] Fix some misleading reporting in debug mode. --- Config/OrchestratorMessages.ocfg | 2 +- Source/Mothership/MPIHandlers.cpp | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/Config/OrchestratorMessages.ocfg b/Config/OrchestratorMessages.ocfg index 27fb0530..73058130 100644 --- a/Config/OrchestratorMessages.ocfg +++ b/Config/OrchestratorMessages.ocfg @@ -490,7 +490,7 @@ 574(U) : "MLV: Allocated but as yet unused" 575(U) : "MLV: Allocated but as yet unused" 576(U) : "MLV: Allocated but as yet unused" -577(U) : "MLV: Allocated but as yet unused" +577(I) : "Mothership: Calling backend->loadAll." 578(I) : "Mothership: Calling backend->go." 579(I) : "Mothership: Calling backend->startAll." 580(E) : "Mothership: Received a log packet with an invalid device index 0x%s." diff --git a/Source/Mothership/MPIHandlers.cpp b/Source/Mothership/MPIHandlers.cpp index 2f2d784b..58f77b1b 100644 --- a/Source/Mothership/MPIHandlers.cpp +++ b/Source/Mothership/MPIHandlers.cpp @@ -93,9 +93,10 @@ unsigned Mothership::handle_msg_app_empt(PMsg_p* message) debug_post(597, 3, "Q::APP,Q::EMPT", hex2str(message->Key()).c_str(), dformat("codePath=%s, dataPath=%s", - codePath.c_str(), dataPath.c_str())); + codePath.c_str(), dataPath.c_str()).c_str()); /* gogogo */ + debug_post(577, 0); backend->loadAll(codePath.c_str(), dataPath.c_str()); return 0; }