diff --git a/python/adjoint/objective.py b/python/adjoint/objective.py index c09d835e1..6a0316931 100644 --- a/python/adjoint/objective.py +++ b/python/adjoint/objective.py @@ -273,12 +273,12 @@ def place_adjoint_source(self, dJ): scale = amp_arr * self._adj_src_scale(include_resolution=False) if self.num_freq == 1: - sources += [mp.IndexedSource(time_src, fourier_data, scale[:,0])] + sources += [mp.IndexedSource(time_src, fourier_data, scale[:,0], not self.yee_grid)] else: src = FilteredSource(time_src.frequency,self._frequencies,scale,self.sim.fields.dt) (num_basis, num_pts) = src.nodes.shape for basis_i in range(num_basis): - sources += [mp.IndexedSource(src.time_src_bf[basis_i], fourier_data, src.nodes[basis_i])] + sources += [mp.IndexedSource(src.time_src_bf[basis_i], fourier_data, src.nodes[basis_i], not self.yee_grid)] return sources def __call__(self): diff --git a/python/simulation.py b/python/simulation.py index 4a622fff5..e72b79c34 100644 --- a/python/simulation.py +++ b/python/simulation.py @@ -2454,7 +2454,8 @@ def add_source(self, src): self.init_sim() if isinstance(src, IndexedSource): - self.fields.add_srcdata(src.srcdata, src.src.swigobj, src.num_pts, src.amp_arr) + self.fields.register_src_time(src.src.swigobj) + self.fields.add_srcdata(src.srcdata, src.src.swigobj, src.num_pts, src.amp_arr, src.needs_boundary_fix) return where = Volume(src.center, src.size, dims=self.dimensions, diff --git a/python/source.py b/python/source.py index 7df7b34cf..1107d671b 100644 --- a/python/source.py +++ b/python/source.py @@ -597,8 +597,9 @@ class IndexedSource(Source): """ created a source object using (SWIG-wrapped mp::srcdata*) srcdata. """ - def __init__(self, src, srcdata, amp_arr): + def __init__(self, src, srcdata, amp_arr, needs_boundary_fix=False): self.src = src self.num_pts = len(amp_arr) self.srcdata = srcdata self.amp_arr = amp_arr + self.needs_boundary_fix = needs_boundary_fix \ No newline at end of file diff --git a/src/Makefile.am b/src/Makefile.am index bf2111a57..3196a5e79 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -12,7 +12,7 @@ bicgstab.hpp meepgeom.hpp material_data.hpp adjust_verbosity.hpp libmeep_la_SOURCES = array_slice.cpp anisotropic_averaging.cpp \ bands.cpp boundaries.cpp bicgstab.cpp casimir.cpp \ cw_fields.cpp dft.cpp dft_ldos.cpp energy_and_flux.cpp \ -fields.cpp fields_dump.cpp loop_in_chunks.cpp h5fields.cpp h5file.cpp \ +fields.cpp fields_dump.cpp fix_boundary_sources.cpp loop_in_chunks.cpp h5fields.cpp h5file.cpp \ initialize.cpp integrate.cpp integrate2.cpp material_data.cpp monitor.cpp mympi.cpp \ multilevel-atom.cpp near2far.cpp output_directory.cpp random.cpp \ sources.cpp step.cpp step_db.cpp stress.cpp structure.cpp structure_dump.cpp \ diff --git a/src/fields.cpp b/src/fields.cpp index 608aeebb7..e1b9f9318 100644 --- a/src/fields.cpp +++ b/src/fields.cpp @@ -495,6 +495,8 @@ bool fields_chunk::alloc_f(component c) { // allocate fields for components required by any source on any process // ... this is needed after calling the low-level fields::add_srcdata void fields::require_source_components() { + fix_boundary_sources(); // needed if add_srcdata put sources on non-owned points + int needed[NUM_FIELD_COMPONENTS]; memset(needed, 0, sizeof(needed)); for (int i = 0; i < num_chunks; i++) { diff --git a/src/fix_boundary_sources.cpp b/src/fix_boundary_sources.cpp new file mode 100644 index 000000000..f31ad4b28 --- /dev/null +++ b/src/fix_boundary_sources.cpp @@ -0,0 +1,177 @@ +#include "meep_internals.hpp" +#include "config.h" + +#ifdef HAVE_MPI +#ifdef NEED_UNDEF_SEEK_FOR_MPI +// undef'ing SEEK_* is needed for MPICH, possibly other MPI versions +#undef SEEK_SET +#undef SEEK_END +#undef SEEK_CUR +#endif +#include +#endif + +#include + +namespace meep { + +#ifdef HAVE_MPI +static MPI_Comm mycomm = MPI_COMM_WORLD; +#endif + +// data structure for sending source information from one chunk to another +struct srcpt_info { + std::complex A; // amplitude + ptrdiff_t index; // index in chunk's fields array + size_t src_time_id; + int chunk_idx; + int c; // component +}; + +// comparison function for sorting srcpt_info lexicographically +// by (processor, src_time_id, chunk_idx, c) +struct srcpt_info_compare { + fields_chunk **chunks; + bool operator() (srcpt_info a, srcpt_info b) { + int aproc = chunks[a.chunk_idx]->n_proc(); + int bproc = chunks[b.chunk_idx]->n_proc(); + return (aproc != bproc ? aproc < bproc : + (a.src_time_id != b.src_time_id ? a.src_time_id < b.src_time_id : + (a.chunk_idx != b.chunk_idx ? a.chunk_idx < b.chunk_idx : + a.c < b.c))); + } +}; + +void fields::fix_boundary_sources() { + am_now_working_on(Connecting); + + std::vector boundarysources; + + // find all not-owned source points and figure out in which chunk + // they are actually supposed to be located, storing info in boundarysources. + for (int i = 0; i < num_chunks; i++) { + FOR_FIELD_TYPES(ft) { + for (src_vol &src : chunks[i]->sources[ft]) + if (src.needs_boundary_fix) { + for (size_t ipt = 0; ipt < src.num_points(); ++ipt) { + component c = src.c; + ivec here = chunks[i]->gv.iloc(c, src.index_at(ipt)); + if (!chunks[i]->gv.owns(here) && src.amplitude(ipt) != 0.0) { + if (src.t()->id == 0) abort("bug: fix_boundary_sources called for non-registered source"); + + // find the chunk that owns this point, similar to logic in boundaries.cpp + std::complex thephase; + if (locate_component_point(&c, &here, &thephase) && !on_metal_boundary(here)) { + for (int j = 0; j < num_chunks; j++) + if (chunks[j]->gv.owns(here)) { + srcpt_info s = { src.amplitude(ipt)*conj(thephase), chunks[j]->gv.index(c, here), src.t()->id, chunks[j]->chunk_idx, c }; + boundarysources.push_back(s); + break; + } + } + src.set_amplitude(ipt, 0.0); // will no longer be needed + } + } + src.needs_boundary_fix = false; + } + } + } + + // we need each process's data to be contiguous + srcpt_info_compare compare = {chunks}; + std::sort(boundarysources.begin(), boundarysources.end(), compare); + + // collect 2d (row-major) arrays offsets and numcomm, + // where numcomm[i,j] is the number of srcpt_info items + // to be set from process i to process j, and offsets[i,j] + // is the corresponding offset in the boundarysources input. + int p = my_rank(); + int P = count_processors(); + std::vector offsets(P * P, size_t(0)); + std::vector numcomm_(P * P, size_t(0)); + size_t idx0 = 0; + int p0 = 0; + for (size_t idx = 0; idx < boundarysources.size(); ++idx) { + int pidx = chunks[boundarysources[idx].chunk_idx]->n_proc(); + if (pidx != p0) { + offsets[p*P + p0] = idx0; + numcomm_[p*P + p0] = idx - idx0; + p0 = pidx; + idx0 = idx; + } + } + offsets[p*P + p0] = idx0; + numcomm_[p*P + p0] = boundarysources.size() - idx0; + + // collect the numcomm data from all processes + std::vector numcomm(P * P, size_t(0)); + sum_to_all(numcomm_.data(), numcomm.data(), P*P); + +#ifdef HAVE_MPI + // declare an MPI datatype mirroring srcpt_info, so that we can send/receive srcpt_info arrays + int srcpt_info_blocklengths[5] = {2,1,1,1,1}; + MPI_Datatype srcpt_info_types[5] = {MPI_DOUBLE, sizeof(ptrdiff_t) == sizeof(int) ? MPI_INT : MPI_LONG_LONG, sizeof(size_t) == sizeof(unsigned) ? MPI_UNSIGNED : MPI_UNSIGNED_LONG_LONG, MPI_INT, MPI_INT}; + MPI_Aint srcpt_info_offsets[5] = { offsetof(srcpt_info,A), offsetof(srcpt_info,index), offsetof(srcpt_info,src_time_id), offsetof(srcpt_info,chunk_idx), offsetof(srcpt_info,c) }; + MPI_Datatype mpi_srcpt_info; + MPI_Type_create_struct(5, srcpt_info_blocklengths, srcpt_info_offsets, srcpt_info_types, &mpi_srcpt_info); + MPI_Type_commit(&mpi_srcpt_info); +#endif + +for (int psrc = 0; psrc < P; ++psrc) + for (int pdest = 0; pdest < P; ++pdest) { + size_t N = numcomm[psrc*P + pdest]; + if (N == 0) continue; + if (pdest == p) { + srcpt_info *srcpts; +#ifdef HAVE_MPI + if (psrc != p) { + srcpts = new srcpt_info[N]; + MPI_Status status; + MPI_Recv(srcpts, N, mpi_srcpt_info, psrc, psrc*P + pdest, mycomm, &status); + } + else +#endif + srcpts = boundarysources.data() + offsets[psrc*P + pdest]; + int chunk_idx = srcpts[0].chunk_idx; + size_t src_time_id = srcpts[0].src_time_id; + int c = srcpts[0].c; + size_t idx0 = 0; + for (size_t idx = 0; idx <= N; ++idx) { + if (idx == N || srcpts[idx].chunk_idx != chunk_idx || srcpts[idx].src_time_id != src_time_id || srcpts[idx].c != c) { + std::vector idx_arr(idx - idx0); + std::vector > amp_arr(idx - idx0); + for (size_t i = idx0; i < idx; ++i) { + idx_arr[i-idx0] = srcpts[i].index; + amp_arr[i-idx0] = srcpts[i].A; + } + sourcedata srcdata = { (component) c, idx_arr, chunk_idx, amp_arr }; + src_time *srctime = lookup_src_time(src_time_id); + if (srctime == NULL) abort("bug: unknown src_time_id (missing registration?)"); + add_srcdata(srcdata, srctime, size_t(0), NULL, false); + if (idx < N) { + chunk_idx = srcpts[idx].chunk_idx; + src_time_id = srcpts[idx].src_time_id; + c = srcpts[idx].c; + idx0 = idx; + } + } + } + + if (psrc != p) delete[] srcpts; + } +#ifdef HAVE_MPI + else if (psrc == p) { + srcpt_info *srcpts = boundarysources.data() + offsets[psrc*P + pdest]; + MPI_Send(srcpts, N, mpi_srcpt_info, pdest, psrc*P + pdest, mycomm); + } +#endif + } + +#ifdef HAVE_MPI + MPI_Type_free(&mpi_srcpt_info); +#endif + + finished_working(); +} + +} // namespace meep \ No newline at end of file diff --git a/src/meep.hpp b/src/meep.hpp index e9047695b..54873e28b 100644 --- a/src/meep.hpp +++ b/src/meep.hpp @@ -939,8 +939,13 @@ class src_time { // sources are not, but this may change. bool is_integrated; + // a unique ID > 0 can be assigned to a src_time object by fields::register_src_time, + // in order to communicate it from one process to another; otherwise defaults to 0. + size_t id; + src_time() { is_integrated = true; + id = 0; current_time = nan; current_current = 0.0; next = NULL; @@ -951,6 +956,7 @@ class src_time { current_time = t.current_time; current_current = t.current_current; current_dipole = t.current_dipole; + id = t.id; if (t.next) next = t.next->clone(); else @@ -1892,7 +1898,9 @@ class fields { void require_source_components(); void _require_component(component c, bool aniso2d); void require_component(component c) { _require_component(c, is_aniso2d()); sync_chunk_connections(); } - void add_srcdata(struct sourcedata cur_data, src_time *src, size_t n, std::complex* amp_arr); + void add_srcdata(struct sourcedata cur_data, src_time *src, size_t n, std::complex* amp_arr, bool needs_boundary_fix); + void register_src_time(src_time *src); + src_time *lookup_src_time(size_t id); // mpb.cpp @@ -2233,6 +2241,8 @@ class fields { bool locate_point_in_user_volume(ivec *, std::complex *phase) const; void locate_volume_source_in_user_volume(const vec p1, const vec p2, vec newp1[8], vec newp2[8], std::complex kphase[8], int &ncopies) const; + // fix_boundary_sources.cpp + void fix_boundary_sources(); // step.cpp void phase_material(); void step_db(field_type ft); diff --git a/src/meep_internals.hpp b/src/meep_internals.hpp index 8a559a872..c3461b3b1 100644 --- a/src/meep_internals.hpp +++ b/src/meep_internals.hpp @@ -55,13 +55,16 @@ class src_vol { // Constructs a new source volume. Takes ownership of `ind` and `amps`. // Requirement: ind.size() == amps.size() src_vol(component cc, src_time *st, std::vector &&ind, - std::vector > &&s); + std::vector > &&s, bool fix_boundaries=false); // Checks whether `a` and `b` are combinable, i.e. have the same indices and point to the same // `src_time` instance, but have potentially different amplitudes. static bool combinable(const src_vol &a, const src_vol &b); ptrdiff_t index_at(size_t pos) const { return index[pos]; } + std::complex amplitude(size_t j) const { return amp[j]; }; + void set_amplitude(size_t j, std::complex a) { amp[j] = a; }; + void set_amplitude(size_t j, double a) { amp[j] = a; }; const std::complex &litude_at(size_t pos) const { return amp[pos]; } size_t num_points() const { return index.size(); }; const src_time *t() const { return src_t; }; @@ -75,6 +78,7 @@ class src_vol { void add_amplitudes_from(const src_vol &other); const component c; // field component the source applies to + bool needs_boundary_fix; // whether fix_boundary_sources needs calling private: src_time *src_t; // Not owned by us. std::vector index; // locations of sources in grid (indices) diff --git a/src/sources.cpp b/src/sources.cpp index 4f58e99c3..cc1dc91d3 100644 --- a/src/sources.cpp +++ b/src/sources.cpp @@ -21,6 +21,7 @@ #include #include #include +#include #include "meep.hpp" #include "meep_internals.hpp" @@ -165,13 +166,13 @@ bool custom_src_time::is_equal(const src_time &t) const { /*********************************************************************/ src_vol::src_vol(component cc, src_time *st, std::vector &&ind, - std::vector > &&s) + std::vector > &&s, bool fix_boundaries) : c([](component c) -> component { if (is_D(c)) c = direction_component(Ex, component_direction(c)); if (is_B(c)) c = direction_component(Hx, component_direction(c)); return c; }(cc)), - src_t(st), index(std::move(ind)), amp(std::move(amps)) { + src_t(st), index(std::move(ind)), amp(std::move(amps)), needs_boundary_fix(fix_boundaries) { assert(index.size() == amp.size()); } @@ -288,18 +289,23 @@ static void src_vol_chunkloop(fields_chunk *fc, int ichunk, component c, ivec is fc->add_source(ft, src_vol(c, data->src, std::move(index_array), std::move(amps_array))); } -void fields::add_srcdata(struct sourcedata cur_data, src_time *src, size_t n, std::complex* amp_arr){ +void fields::add_srcdata(struct sourcedata cur_data, src_time *src, size_t n, std::complex* amp_arr, bool needs_boundary_fix){ + if (n == 0) { + n = cur_data.idx_arr.size(); + assert(amp_arr == NULL); + amp_arr = cur_data.amp_arr.data(); + } + assert(n == cur_data.idx_arr.size()); sources = src->add_to(sources, &src); std::vector index_arr(cur_data.idx_arr); std::vector> amplitudes(amp_arr, amp_arr+n); component c = cur_data.near_fd_comp; - field_type ft = is_magnetic(c) ? B_stuff : D_stuff; if (0 > cur_data.fc_idx or cur_data.fc_idx >= num_chunks) meep::abort("fields chunk index out of range"); fields_chunk *fc = chunks[cur_data.fc_idx]; if (!fc->is_mine()) meep::abort("wrong fields chunk"); - fc->add_source(ft, src_vol(c, src, std::move(index_arr), std::move(amplitudes))); + fc->add_source(ft, src_vol(c, src, std::move(index_arr), std::move(amplitudes), needs_boundary_fix)); // We can't do require_component(c) since that only works if all processes are adding // srcdata for the same components in the same order, which may not be true. // ... instead, the caller should call fields::require_source_components() @@ -343,6 +349,23 @@ complex amp_file_func(const vec &p) { return res; } +void fields::register_src_time(src_time *src) { + sources = src->add_to(sources, &src); + if (src->id == 0) { // doesn't have an ID yet + size_t max_id = 0; + for (src_time *s = sources; s; s = s->next) + max_id = s->id > max_id ? s->id : max_id; + src->id = max_id + 1; + } +} + +src_time *fields::lookup_src_time(size_t id) { + if (id == 0) abort("bug: cannot lookup unregistered source"); + for (src_time *s = sources; s; s = s->next) + if (s->id == id) return s; + return NULL; +} + void fields::add_volume_source(component c, const src_time &src, const volume &where_, complex *arr, size_t dim1, size_t dim2, size_t dim3, complex amp) {