NanoComp · stevengj · Aug 19, 2021 · Jul 28, 2021 · Jul 29, 2021 · Jul 29, 2021
diff --git a/doc/docs/Python_User_Interface.md b/doc/docs/Python_User_Interface.md
@@ -114,7 +114,6 @@ def __init__(self,
              filename_prefix=None,
              output_volume=None,
              output_single_precision=False,
-             load_structure='',
              geometry_center=Vector3<0.0, 0.0, 0.0>,
              force_all_components=False,
              split_chunks_evenly=True,
@@ -314,11 +313,6 @@ Python. `Vector3` is a `meep` class.
   that are specified by geometric objects. You should list any materials other
   than scalar dielectrics that are returned by `material_function` here.
 
-+ **`load_structure` [`string`]** — If not empty, Meep will load the structure
-  file specified by this string. The file must have been created by
-  `mp.dump_structure`. Defaults to an empty string. See [Load and Dump
-  Structure](#load-and-dump-structure) for more information.
-
 + **`chunk_layout` [`string` or `Simulation` instance or `BinaryPartition` class]** —
   This will cause the `Simulation` to use the chunk layout described by either
   (1) an `.h5` file (created using `Simulation.dump_chunk_layout`), (2) another
@@ -2305,13 +2299,15 @@ is a 1d array of `nfreq` dimensions.
 
 These functions dump the raw ε and μ data to disk and load it back for doing multiple simulations with the same materials but different sources etc. The only prerequisite is that the dump/load simulations have the same [chunks](Chunks_and_Symmetry.md) (i.e. the same grid, number of processors, symmetries, and PML). When using `split_chunks_evenly=False`, you must also dump the original chunk layout using `dump_chunk_layout` and load it into the new `Simulation` using the `chunk_layout` parameter. Currently only stores dispersive and non-dispersive $\varepsilon$ and $\mu$ but not nonlinearities. Note that loading data from a file in this way overwrites any `geometry` data passed to the `Simulation` constructor.
 
+For dump_structure and load_structure, when `single_parallel_file=True` (the default) - all chunks write to the same/single file after computing their respective offsets into this file. When set to 'False', each chunk writes writes its data to a separate file that is appropriately named by its chunk index.
+
 
 <a id="Simulation.dump_structure"></a>
 
 <div class="class_members" markdown="1">
 
 ```python
-def dump_structure(self, fname):
+def dump_structure(self, fname, single_parallel_file=True):
 ```
 
 <div class="method_docstring" markdown="1">
@@ -2327,7 +2323,7 @@ Dumps the structure to the file `fname`.
 <div class="class_members" markdown="1">
 
 ```python
-def load_structure(self, fname):
+def load_structure(self, fname, single_parallel_file=True):
 ```
 
 <div class="method_docstring" markdown="1">

diff --git a/doc/docs/Python_User_Interface.md.in b/doc/docs/Python_User_Interface.md.in
@@ -351,6 +351,8 @@ And this [`DftNear2Far`](#DftNear2Far) method:
 
 These functions dump the raw ε and μ data to disk and load it back for doing multiple simulations with the same materials but different sources etc. The only prerequisite is that the dump/load simulations have the same [chunks](Chunks_and_Symmetry.md) (i.e. the same grid, number of processors, symmetries, and PML). When using `split_chunks_evenly=False`, you must also dump the original chunk layout using `dump_chunk_layout` and load it into the new `Simulation` using the `chunk_layout` parameter. Currently only stores dispersive and non-dispersive $\varepsilon$ and $\mu$ but not nonlinearities. Note that loading data from a file in this way overwrites any `geometry` data passed to the `Simulation` constructor.
 
+For dump_structure and load_structure, when `single_parallel_file=True` (the default) - all chunks write to the same/single file after computing their respective offsets into this file. When set to 'False', each chunk writes writes its data to a separate file that is appropriately named by its chunk index.
+
 @@ Simulation.dump_structure @@
 @@ Simulation.load_structure @@
 @@ Simulation.dump_chunk_layout @@

diff --git a/python/simulation.py b/python/simulation.py
@@ -961,7 +961,6 @@ def __init__(self,
                  filename_prefix=None,
                  output_volume=None,
                  output_single_precision=False,
-                 load_structure='',
                  geometry_center=mp.Vector3(),
                  force_all_components=False,
                  split_chunks_evenly=True,
@@ -1158,11 +1157,6 @@ def __init__(self,
           that are specified by geometric objects. You should list any materials other
           than scalar dielectrics that are returned by `material_function` here.
 
-        + **`load_structure` [`string`]** — If not empty, Meep will load the structure
-          file specified by this string. The file must have been created by
-          `mp.dump_structure`. Defaults to an empty string. See [Load and Dump
-          Structure](#load-and-dump-structure) for more information.
-
         + **`chunk_layout` [`string` or `Simulation` instance or `BinaryPartition` class]** —
           This will cause the `Simulation` to use the chunk layout described by either
           (1) an `.h5` file (created using `Simulation.dump_chunk_layout`), (2) another
@@ -1235,7 +1229,6 @@ def __init__(self,
         self.is_cylindrical = False
         self.material_function = material_function
         self.epsilon_func = epsilon_func
-        self.load_structure_file = load_structure
         self.dft_objects = []
         self._is_initialized = False
         self.force_all_components = force_all_components
@@ -1246,6 +1239,9 @@ def __init__(self,
         self.fragment_stats = None
         self._output_stats = os.environ.get('MEEP_STATS', None)
 
+        self.load_single_parallel_file = True
+        self.load_structure_file = None
+
         self.special_kz = False
         if self.cell_size.z == 0 and self.k_point and self.k_point.z != 0:
             if kz_2d == "complex":
@@ -1717,7 +1713,8 @@ def _init_structure(self, k=False):
             self.num_chunks = self.chunk_layout.numchunks()
 
         if self.load_structure_file:
-            self.load_structure(self.load_structure_file)
+            self.load_structure(
+                self.load_structure_file, self.load_single_parallel_file)
 
     def _is_outer_boundary(self, vol, direction, side):
 
@@ -1861,22 +1858,22 @@ def set_materials(self, geometry=None, default_material=None):
             None
         )
 
-    def dump_structure(self, fname):
+    def dump_structure(self, fname, single_parallel_file=True):
         """
         Dumps the structure to the file `fname`.
         """
         if self.structure is None:
             raise ValueError("Fields must be initialized before calling dump_structure")
-        self.structure.dump(fname)
+        self.structure.dump(fname, single_parallel_file)
 
-    def load_structure(self, fname):
+    def load_structure(self, fname, single_parallel_file=True):
         """
         Loads a structure from the file `fname`. A file name to load can also be passed to
         the `Simulation` constructor via the `load_structure` keyword argument.
         """
         if self.structure is None:
             raise ValueError("Fields must be initialized before calling load_structure")
-        self.structure.load(fname)
+        self.structure.load(fname, single_parallel_file)
 
     def dump_chunk_layout(self, fname):
         """
@@ -1898,6 +1895,41 @@ def load_chunk_layout(self, br, source):
             ## source is either filename (string)
             self.structure.load_chunk_layout(source, br)
 
+    def _get_load_dump_dirname(self, dirname, single_parallel_file):
+        """
+        Get the dirname to dump simulation state to.
+        """
+        if single_parallel_file:
+            dump_dirname = dirname
+        else:
+            # When doing a sharded dump (each process to its own file), use
+            # the process rank to get a unique name.
+            dump_dirname = os.path.join(dirname, 'rank%02d' % mp.my_rank())
+        return dump_dirname
+
+    def dump(self, dirname, structure=True, single_parallel_file=True):
+        """
+        Dumps simulation state.
+        """
+        dump_dirname = self._get_load_dump_dirname(dirname, single_parallel_file)
+        os.makedirs(dump_dirname, exist_ok=True)
+
+        if structure:
+            structure_dump_filename = os.path.join(dump_dirname, 'structure.h5')
+            self.dump_structure(structure_dump_filename, single_parallel_file)
+
+    def load(self, dirname, structure=True, single_parallel_file=True):
+        """
+        Loads simulation state.
+
+        This should called right after the Simulation object has been created
+        but before 'init_sim' is called.
+        """
+        dump_dirname = self._get_load_dump_dirname(dirname, single_parallel_file)
+        self.load_single_parallel_file = single_parallel_file
+        if structure:
+          self.load_structure_file = os.path.join(dump_dirname, 'structure.h5')
+
     def init_sim(self):
         if self._is_initialized:
             return

diff --git a/python/tests/test_simulation.py b/python/tests/test_simulation.py
@@ -302,7 +302,7 @@ def test_in_box_volumes(self):
         sim.field_energy_in_box(tv)
         sim.field_energy_in_box(v)
 
-    def _load_dump_structure(self, chunk_file=False, chunk_sim=False):
+    def _load_dump_structure(self, chunk_file=False, chunk_sim=False, single_parallel_file=True):
         from meep.materials import Al
         resolution = 50
         cell = mp.Vector3(5, 5)
@@ -329,12 +329,14 @@ def get_ref_field_point(sim):
             ref_field_points.append(p.real)
 
         sim1.run(mp.at_every(5, get_ref_field_point), until=50)
-        dump_fn = os.path.join(self.temp_dir, 'test_load_dump_structure.h5')
+
+        dump_dirname = os.path.join(self.temp_dir, 'test_load_dump')
+        sim1.dump(dump_dirname, structure=True, single_parallel_file=single_parallel_file)
+
         dump_chunk_fname = None
         chunk_layout = None
-        sim1.dump_structure(dump_fn)
         if chunk_file:
-            dump_chunk_fname = os.path.join(self.temp_dir, 'test_load_dump_structure_chunks.h5')
+            dump_chunk_fname = os.path.join(dump_dirname, 'chunk_layout.h5')
             sim1.dump_chunk_layout(dump_chunk_fname)
             chunk_layout = dump_chunk_fname
         if chunk_sim:
@@ -345,9 +347,8 @@ def get_ref_field_point(sim):
                             boundary_layers=pml_layers,
                             sources=[sources],
                             symmetries=symmetries,
-                            chunk_layout=chunk_layout,
-                            load_structure=dump_fn)
-
+                            chunk_layout=chunk_layout)
+        sim.load(dump_dirname, structure=True, single_parallel_file=single_parallel_file)
         field_points = []
 
         def get_field_point(sim):
@@ -362,6 +363,10 @@ def get_field_point(sim):
     def test_load_dump_structure(self):
         self._load_dump_structure()
 
+    @unittest.skipIf(not mp.with_mpi(), "MPI specific test")
+    def test_load_dump_structure_sharded(self):
+        self._load_dump_structure(single_parallel_file=False)
+
     def test_load_dump_chunk_layout_file(self):
         self._load_dump_structure(chunk_file=True)
 

diff --git a/src/h5file.cpp b/src/h5file.cpp
@@ -150,7 +150,7 @@ void h5file::close_id() {
 
 /* note: if parallel is true, then *all* processes must call this,
    and all processes will use I/O. */
-h5file::h5file(const char *filename_, access_mode m, bool parallel_) {
+h5file::h5file(const char *filename_, access_mode m, bool parallel_, bool local_) {
   cur_dataname = NULL;
   id = (void *)malloc(sizeof(hid_t));
   cur_id = (void *)malloc(sizeof(hid_t));
@@ -160,7 +160,12 @@ h5file::h5file(const char *filename_, access_mode m, bool parallel_) {
   filename = new char[strlen(filename_) + 1];
   strcpy(filename, filename_);
   mode = m;
+
+  if (parallel_ && local_) {
+    meep::abort("Can not open h5file (%s) in both parallel and local mode.", filename);
+  }
   parallel = parallel_;
+  local = local_;
 }
 
 h5file::~h5file() {
@@ -191,7 +196,9 @@ void h5file::remove() {
   extending = 0;
 
   IF_EXCLUSIVE(if (parallel) all_wait(), (void)0);
-  if (am_master() && std::remove(filename)) meep::abort("error removing file %s", filename);
+  if (am_master() || local) {
+    if (std::remove(filename)) meep::abort("error removing file %s", filename);
+  }
 }
 
 h5file::extending_s *h5file::get_extending(const char *dataname) const {
@@ -226,7 +233,7 @@ void h5file::set_cur(const char *dataname, void *data_id) {
 
 void h5file::read_size(const char *dataname, int *rank, size_t *dims, int maxrank) {
 #ifdef HAVE_HDF5
-  if (parallel || am_master()) {
+  if (parallel || am_master() || local) {
     hid_t file_id = HID(get_id()), space_id, data_id;
 
     CHECK(file_id >= 0, "error opening HDF5 input file");
@@ -253,7 +260,7 @@ void h5file::read_size(const char *dataname, int *rank, size_t *dims, int maxran
     H5Sclose(space_id);
   }
 
-  if (!parallel) {
+  if (!parallel && !local) {
     *rank = broadcast(0, *rank);
     broadcast(0, dims, *rank);
 
@@ -291,7 +298,7 @@ void *h5file::read(const char *dataname, int *rank, size_t *dims, int maxrank,
                    bool single_precision) {
 #ifdef HAVE_HDF5
   void *data = 0;
-  if (parallel || am_master()) {
+  if (parallel || am_master() || local) {
     int i, N;
     hid_t file_id = HID(get_id()), space_id, data_id;
 
@@ -345,7 +352,7 @@ void *h5file::read(const char *dataname, int *rank, size_t *dims, int maxrank,
     if (close_data_id) H5Dclose(data_id);
   }
 
-  if (!parallel) {
+  if (!parallel && !local) {
     *rank = broadcast(0, *rank);
     broadcast(0, dims, *rank);
     size_t N = 1;
@@ -375,7 +382,7 @@ char *h5file::read(const char *dataname) {
 #ifdef HAVE_HDF5
   char *data = 0;
   int len = 0;
-  if (parallel || am_master()) {
+  if (parallel || am_master() || local) {
     hid_t file_id = HID(get_id()), space_id, data_id, type_id;
 
     CHECK(file_id >= 0, "error opening HDF5 input file");
@@ -404,7 +411,7 @@ char *h5file::read(const char *dataname) {
     H5Dclose(data_id);
   }
 
-  if (!parallel) {
+  if (!parallel && !local) {
     len = broadcast(0, len);
     if (!am_master()) data = new char[len];
     broadcast(0, data, len);
@@ -442,7 +449,7 @@ void h5file::remove_data(const char *dataname) {
   if (dataset_exists(dataname)) {
     /* this is hackish ...need to pester HDF5 developers to make
        H5Gunlink a collective operation for parallel mode */
-    if (!parallel || am_master()) {
+    if (!parallel || am_master() || local) {
       H5Gunlink(file_id, dataname); /* delete it */
       H5Fflush(file_id, H5F_SCOPE_GLOBAL);
     }
@@ -471,7 +478,7 @@ void h5file::create_data(const char *dataname, int rank, const size_t *dims, boo
   unset_cur();
   remove_data(dataname); // HDF5 gives error if we H5Dcreate existing dataset
 
-  if (IF_EXCLUSIVE(!parallel || am_master(), 1)) {
+  if (local || IF_EXCLUSIVE(!parallel || am_master(), 1)) {
     hsize_t *dims_copy = new hsize_t[rank1 + append_data];
     hsize_t *maxdims = new hsize_t[rank1 + append_data];
     hsize_t N = 1;
@@ -708,12 +715,12 @@ void h5file::done_writing_chunks() {
      I'm assuming(?) that non-extensible datasets will use different
      files, etcetera, for different timesteps.  All of this hackery
      goes away if we just use an MPI-compiled version of HDF5. */
-  if (parallel && cur_dataname && get_extending(cur_dataname)) prevent_deadlock(); // closes id
+  if (parallel && !local && cur_dataname && get_extending(cur_dataname)) prevent_deadlock(); // closes id
 }
 
 void h5file::write(const char *dataname, int rank, const size_t *dims, void *data,
                    bool single_precision) {
-  if (parallel || am_master()) {
+  if (parallel || am_master() || local) {
     size_t *start = new size_t[rank + 1];
     for (int i = 0; i < rank; i++)
       start[i] = 0;
@@ -732,7 +739,7 @@ void h5file::write(const char *dataname, int rank, const size_t *dims, void *dat
 
 void h5file::write(const char *dataname, const char *data) {
 #ifdef HAVE_HDF5
-  if (IF_EXCLUSIVE(am_master(), parallel || am_master())) {
+  if (local || IF_EXCLUSIVE(am_master(), parallel || am_master())) {
     hid_t file_id = HID(get_id()), type_id, data_id, space_id;
 
     CHECK(file_id >= 0, "error opening HDF5 output file");