From 831739b5effff471560f8395722141a90d2d3127 Mon Sep 17 00:00:00 2001 From: Sergio Date: Fri, 22 Sep 2023 13:03:41 +0200 Subject: [PATCH] Improve CoreNEURON file transfer mode --- src/nrniv/nrncore_write.cpp | 49 +++++++++++++++-- src/nrniv/nrncore_write/io/nrncore_io.cpp | 64 ++++++++++++++++------- src/nrniv/nrncore_write/io/nrncore_io.h | 5 +- src/nrniv/partrans.cpp | 41 ++++++++++++--- 4 files changed, 127 insertions(+), 32 deletions(-) diff --git a/src/nrniv/nrncore_write.cpp b/src/nrniv/nrncore_write.cpp index a3b5d8d94c..f86e3eb22e 100644 --- a/src/nrniv/nrncore_write.cpp +++ b/src/nrniv/nrncore_write.cpp @@ -218,17 +218,56 @@ static part1_ret part1() { return {rankbytes, std::move(sorted_token)}; } +// <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< WATCH OUT! Move to common header!! +std::string get_rank_fname(const char* basepath, bool create_folder = true) { + // TODO: Change this for equivalent MPI functions to get the node ID <<<<<<<<<<<<<<<<<<<<<<<<<< + std::string nodepath = ""; + if (std::getenv("SLURM_NODEID") != nullptr) { + const int factor = 20; + int node_id = std::atoi(std::getenv("SLURM_NODEID")); + + nodepath = std::to_string(node_id/factor) + "/" + std::getenv("SLURM_NODEID"); + } + else if (std::getenv("HOSTNAME") != nullptr) { + nodepath = std::getenv("HOSTNAME"); + } + + // Create subfolder for the rank, based on the node + if (create_folder) { + std::string path = std::string(basepath) + "/" + nodepath; + mkdir_p(path.c_str()); + } + + return (path + "/" + nrnmpi_myid + ".dat"); +} + +// <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< +size_t get_filesize(const char* fname) { + FILE* f = fopen(fname, "ab"); + if (!f) { + hoc_execerror("get_filesize could not open:", fname.c_str()); + } + const size_t offset = ftell(f); + fclose(f); + + return offset; +} + static void part2(const char* path) { + std::array offsets; + CellGroup* cgs = cellgroups_; for (int i = 0; i < nrn_nthread; ++i) { chkpnt = 0; - write_nrnthread(path, nrn_threads[i], cgs[i]); + const auto &nrnthread_offsets = write_nrnthread(path, nrn_threads[i], cgs[i]); + offsets[0] = ; + offsets[1] = ; } /** write mapping information */ if (mapinfo.size()) { int gid = cgs[0].group_id; - nrn_write_mapping_info(path, gid, mapinfo); + offsets[2] = nrn_write_mapping_info(path, gid, mapinfo); mapinfo.clear(); } @@ -238,10 +277,12 @@ static void part2(const char* path) { for (int i = 0; i < nrn_nthread; ++i) { group_ids[i] = cgs[i].group_id; } - nrnbbcore_gap_write(path, group_ids); + offsets[3] = nrnbbcore_gap_write(path, group_ids); delete[] group_ids; } + // get_filesize(get_rank_fname(path).c_str()) <<<<<<<<<<<<<<<<<<<<<<<<<<< (gap junction may or may not be there, and we still need the last size of the file) + // filename data might have to be collected at hoc level since // pc.nrncore_write might be called // many times per rank since model may be built as series of submodels. @@ -262,7 +303,7 @@ static void part2(const char* path) { hoc_execerror("Second arg must be Vector or double.", NULL); } } - write_nrnthread_task(path, cgs, append); + write_nrnthread_task(path, cgs, append, offsets); // <<<<<<< } part2_clean(); diff --git a/src/nrniv/nrncore_write/io/nrncore_io.cpp b/src/nrniv/nrncore_write/io/nrncore_io.cpp index ef41bc3f48..70dfe87247 100644 --- a/src/nrniv/nrncore_write/io/nrncore_io.cpp +++ b/src/nrniv/nrncore_write/io/nrncore_io.cpp @@ -54,6 +54,28 @@ std::string get_filename(const std::string& path, std::string file_name) { return fname; } +std::string get_rank_fname(const char* basepath, bool create_folder = true) { + // TODO: Change this for equivalent MPI functions to get the node ID <<<<<<<<<<<<<<<<<<<<<<<<<< + std::string nodepath = ""; + if (std::getenv("SLURM_NODEID") != nullptr) { + const int factor = 20; + int node_id = std::atoi(std::getenv("SLURM_NODEID")); + + nodepath = std::to_string(node_id/factor) + "/" + std::getenv("SLURM_NODEID"); + } + else if (std::getenv("HOSTNAME") != nullptr) { + nodepath = std::getenv("HOSTNAME"); + } + + // Create subfolder for the rank, based on the node + if (create_folder) { + std::string path = std::string(basepath) + "/" + nodepath; + mkdir_p(path.c_str()); + } + + return (path + "/" + nrnmpi_myid + ".dat"); +} + void write_memb_mech_types(const char* fname) { if (nrnmpi_myid > 0) { @@ -113,18 +135,22 @@ void write_globals(const char* fname) { fclose(f); } +std::array write_nrnthread(const char* path, NrnThread& nt, CellGroup& cg) { + std::array offsets = { 0, 0 }; -void write_nrnthread(const char* path, NrnThread& nt, CellGroup& cg) { - char fname[1000]; if (cg.n_output <= 0) { - return; + return offsets; } assert(cg.group_id >= 0); - nrn_assert(snprintf(fname, 1000, "%s/%d_1.dat", path, cg.group_id) < 1000); - FILE* f = fopen(fname, "wb"); + + FILE* f = fopen(get_rank_fname(path).c_str(), "ab"); if (!f) { - hoc_execerror("nrncore_write write_nrnthread could not open for writing:", fname); + hoc_execerror("nrncore_write write_nrnthread could not open for writing:", fname.c_str()); } + + // Set the first offset inside the file + offsets[0] = ftell(f); + fprintf(f, "%s\n", bbcore_write_version); // nrnthread_dat1(int tid, int& n_presyn, int& n_netcon, int*& output_gid, int*& netcon_srcgid); @@ -138,13 +164,9 @@ void write_nrnthread(const char* path, NrnThread& nt, CellGroup& cg) { delete[] cg.netcon_srcgid; cg.netcon_srcgid = NULL; } - fclose(f); - nrn_assert(snprintf(fname, 1000, "%s/%d_2.dat", path, cg.group_id) < 1000); - f = fopen(fname, "w"); - if (!f) { - hoc_execerror("nrncore_write write_nrnthread could not open for writing:", fname); - } + // Set the second offset inside the file + offsets[1] = ftell(f); fprintf(f, "%s\n", bbcore_write_version); @@ -286,6 +308,8 @@ void write_nrnthread(const char* path, NrnThread& nt, CellGroup& cg) { nrnbbcore_vecplay_write(f, nt); fclose(f); + + return offsets; } @@ -516,17 +540,15 @@ void write_nrnthread_task(const char* path, CellGroup* cgs, bool append) { } /** @brief dump mapping information to gid_3.dat file */ -void nrn_write_mapping_info(const char* path, int gid, NrnMappingInfo& minfo) { - /** full path of mapping file */ - std::stringstream ss; - ss << path << "/" << gid << "_3.dat"; - - std::string fname(ss.str()); - FILE* f = fopen(fname.c_str(), "w"); - +size_t nrn_write_mapping_info(const char* path, int gid, NrnMappingInfo& minfo) { + size_t offset = 0; + FILE* f = fopen(get_rank_fname(path).c_str(), "ab"); if (!f) { hoc_execerror("nrnbbcore_write could not open for writing:", fname.c_str()); } + + // Set the offset inside the file + offset = ftell(f); fprintf(f, "%s\n", bbcore_write_version); @@ -563,4 +585,6 @@ void nrn_write_mapping_info(const char* path, int gid, NrnMappingInfo& minfo) { } } fclose(f); + + return offset; } diff --git a/src/nrniv/nrncore_write/io/nrncore_io.h b/src/nrniv/nrncore_write/io/nrncore_io.h index ee690b694d..4c247f755e 100644 --- a/src/nrniv/nrncore_write/io/nrncore_io.h +++ b/src/nrniv/nrncore_write/io/nrncore_io.h @@ -2,6 +2,7 @@ #define NRN_NRNCORE_IO_H #include "hocdec.h" +#include #include #include @@ -27,7 +28,7 @@ extern int chkpnt; void write_memb_mech_types(const char* fname); void write_globals(const char* fname); -void write_nrnthread(const char* fname, NrnThread& nt, CellGroup& cg); +std::array write_nrnthread(const char* fname, NrnThread& nt, CellGroup& cg); void writeint_(int* p, size_t size, FILE* f); void writedbl_(double* p, size_t size, FILE* f); @@ -41,7 +42,7 @@ using bbcore_write_t = void write_nrnthread_task(const char*, CellGroup* cgs, bool append); void nrnbbcore_vecplay_write(FILE* f, NrnThread& nt); -void nrn_write_mapping_info(const char* path, int gid, NrnMappingInfo& minfo); +size_t nrn_write_mapping_info(const char* path, int gid, NrnMappingInfo& minfo); #endif // NRN_NRNCORE_IO_H diff --git a/src/nrniv/partrans.cpp b/src/nrniv/partrans.cpp index c632b2a845..b28c951e03 100644 --- a/src/nrniv/partrans.cpp +++ b/src/nrniv/partrans.cpp @@ -964,10 +964,35 @@ SetupTransferInfo* nrn_get_partrans_setup_info(int ngroup, int cn_nthread, size_ return nrncore_transfer_info(cn_nthread); } +// <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< WATCH OUT! Move to common header!! +std::string get_rank_fname(const char* basepath, bool create_folder = true) { + // TODO: Change this for equivalent MPI functions to get the node ID <<<<<<<<<<<<<<<<<<<<<<<<<< + std::string nodepath = ""; + if (std::getenv("SLURM_NODEID") != nullptr) { + const int factor = 20; + int node_id = std::atoi(std::getenv("SLURM_NODEID")); + + nodepath = std::to_string(node_id/factor) + "/" + std::getenv("SLURM_NODEID"); + } + else if (std::getenv("HOSTNAME") != nullptr) { + nodepath = std::getenv("HOSTNAME"); + } + + // Create subfolder for the rank, based on the node + if (create_folder) { + std::string path = std::string(basepath) + "/" + nodepath; + mkdir_p(path.c_str()); + } + + return (path + "/" + nrnmpi_myid + ".dat"); +} + size_t nrnbbcore_gap_write(const char* path, int* group_ids) { + size_t offset = 0; + auto gi = nrncore_transfer_info(nrn_nthread); // gi stood for gapinfo if (gi == nullptr) { - return 0; + return offset; } // print the files @@ -978,10 +1003,14 @@ size_t nrnbbcore_gap_write(const char* path, int* group_ids) { continue; } - char fname[1000]; - Sprintf(fname, "%s/%d_gap.dat", path, group_ids[tid]); - FILE* f = fopen(fname, "wb"); - assert(f); + FILE* f = fopen(get_rank_fname(path).c_str(), "ab"); + if (!f) { + hoc_execerror("nrnbbcore_write could not open for writing:", fname.c_str()); + } + + // Set the offset inside the file + offset = ftell(f); + fprintf(f, "%s\n", bbcore_write_version); fprintf(f, "%d sizeof_sid_t\n", int(sizeof(sgid_t))); @@ -1010,7 +1039,7 @@ size_t nrnbbcore_gap_write(const char* path, int* group_ids) { // cleanup delete[] gi; - return 0; + return offset; } static SetupTransferInfo* nrncore_transfer_info(int cn_nthread) {