diff --git a/src/anisotropic_averaging.cpp b/src/anisotropic_averaging.cpp index 4f4eb8a0f..7f3065da2 100644 --- a/src/anisotropic_averaging.cpp +++ b/src/anisotropic_averaging.cpp @@ -39,7 +39,7 @@ static vec sphere_pt(const vec ¢, double R, int n, double &weight) { case Dcyl: { weight = sphere_quad[1][n][3]; - return cent + return cent + veccyl(sphere_quad[1][n][0], sphere_quad[1][n][1]) * R; } default: @@ -53,7 +53,7 @@ vec material_function::normal_vector(field_type ft, const volume &v) { vec gradient(zero_vec(v.dim)); vec p(v.center()); - double R = v.diameter(); + double R = v.diameter(); for (int i = 0; i < num_sphere_quad[number_of_directions(v.dim)-1]; ++i) { double weight; vec pt = sphere_pt(p, R, i, weight); @@ -82,7 +82,7 @@ void material_function::eff_chi1inv_row(component c, double chi1inv_row[3], double meps=1, minveps=1; vec d = v.get_max_corner() - v.get_min_corner(); - int ms = 10; + int ms = 10; double old_meps=0, old_minveps=0; int iter = 0; switch(v.dim) { @@ -91,12 +91,12 @@ void material_function::eff_chi1inv_row(component c, double chi1inv_row[3], old_meps=meps; old_minveps=minveps; meps = minveps = 0; for (int k=0; k < ms; k++) - for (int j=0; j < ms; j++) - for (int i=0; i < ms; i++) { - double ep = chi1p1(ft,v.get_min_corner() + vec(i*d.x()/ms, j*d.y()/ms, k*d.z()/ms)); - if (ep < 0) goto trivial; - meps += ep; minveps += 1/ep; - } + for (int j=0; j < ms; j++) + for (int i=0; i < ms; i++) { + double ep = chi1p1(ft,v.get_min_corner() + vec(i*d.x()/ms, j*d.y()/ms, k*d.z()/ms)); + if (ep < 0) goto trivial; + meps += ep; minveps += 1/ep; + } meps /= ms*ms*ms; minveps /= ms*ms*ms; ms *= 2; @@ -108,14 +108,14 @@ void material_function::eff_chi1inv_row(component c, double chi1inv_row[3], old_meps=meps; old_minveps=minveps; meps = minveps = 0; for (int j=0; j < ms; j++) - for (int i=0; i < ms; i++) { - double ep = chi1p1(ft,v.get_min_corner() + vec(i*d.x()/ms, j*d.y()/ms)); - if (ep < 0) goto trivial; - meps += ep; minveps += 1/ep; - } + for (int i=0; i < ms; i++) { + double ep = chi1p1(ft,v.get_min_corner() + vec(i*d.x()/ms, j*d.y()/ms)); + if (ep < 0) goto trivial; + meps += ep; minveps += 1/ep; + } meps /= ms*ms; minveps /= ms*ms; - ms *= 2; + ms *= 2; if (maxeval && (iter += ms*ms) >= maxeval) goto done; } break; @@ -125,16 +125,16 @@ void material_function::eff_chi1inv_row(component c, double chi1inv_row[3], meps = minveps = 0; double sumvol = 0; for (int j=0; j < ms; j++) - for (int i=0; i < ms; i++) { - double r = v.get_min_corner().r() + i*d.r()/ms; - double ep = chi1p1(ft,v.get_min_corner() + veccyl(i*d.r()/ms, j*d.z()/ms)); - if (ep < 0) goto trivial; - sumvol += r; - meps += ep * r; minveps += r/ep; - } + for (int i=0; i < ms; i++) { + double r = v.get_min_corner().r() + i*d.r()/ms; + double ep = chi1p1(ft,v.get_min_corner() + veccyl(i*d.r()/ms, j*d.z()/ms)); + if (ep < 0) goto trivial; + sumvol += r; + meps += ep * r; minveps += r/ep; + } meps /= sumvol; minveps /= sumvol; - ms *= 2; + ms *= 2; if (maxeval && (iter += ms*ms) >= maxeval) goto done; } break; @@ -143,17 +143,17 @@ void material_function::eff_chi1inv_row(component c, double chi1inv_row[3], old_meps=meps; old_minveps=minveps; meps = minveps = 0; for (int i=0; i < ms; i++) { - double ep = chi1p1(ft,v.get_min_corner() + vec(i*d.z()/ms)); - if (ep < 0) { - meps = chi1p1(ft,v.center()); - minveps = 1/meps; - goto done; - } - meps += ep; minveps += 1/ep; + double ep = chi1p1(ft,v.get_min_corner() + vec(i*d.z()/ms)); + if (ep < 0) { + meps = chi1p1(ft,v.center()); + minveps = 1/meps; + goto done; + } + meps += ep; minveps += 1/ep; } meps /= ms; minveps /= ms; - ms *= 2; + ms *= 2; if (maxeval && (iter += ms*ms) >= maxeval) goto done; } break; @@ -165,13 +165,13 @@ void material_function::eff_chi1inv_row(component c, double chi1inv_row[3], double nabsinv = 1.0/abs(gradient); LOOP_OVER_DIRECTIONS(gradient.dim, k) n[k%3] = gradient.in_direction(k) * nabsinv; - + /* get rownum'th row of effective tensor P * minveps + (I-P) * 1/meps = P * (minveps-1/meps) + I * 1/meps where I is the identity and P is the projection matrix P_{ij} = n[i] * n[j]. */ int rownum = component_direction(c) % 3; - for (int i=0; i<3; ++i) + for (int i=0; i<3; ++i) chi1inv_row[i] = n[rownum] * n[i] * (minveps - 1/meps); chi1inv_row[rownum] += 1/meps; } @@ -189,10 +189,10 @@ void structure_chunk::set_chi1inv(component c, if (!use_anisotropic_averaging) maxeval = 0; const double smoothing_diameter = 1.0; // FIXME: make user-changable? - + // may take a long time in 3d, so prepare to print status messages - int npixels = 0, ipixel = 0; - int loop_npixels = 0; + size_t npixels = 0, ipixel = 0; + size_t loop_npixels = 0; LOOP_OVER_VOL(gv, c, i) { loop_npixels = loop_n1 * loop_n2 * loop_n3; goto breakout; // hack to use loop-size computation from LOOP_OVER_VOL @@ -233,10 +233,10 @@ void structure_chunk::set_chi1inv(component c, chi1inv[c][d2][i] = (d2 == dc) ? chi1invrow[2] : chi1invrow_offdiag[2]; trivial[2] = trivial[2] && (chi1inv[c][d2][i] == trivial_val[2]); } - + if (!quiet && (ipixel+1) % 1000 == 0 && wall_time() > last_output_time + MIN_OUTPUT_TIME) { - master_printf("subpixel-averaging is %g%% done, %g s remaining\n", + master_printf("subpixel-averaging is %g%% done, %g s remaining\n", ipixel * 100.0 / npixels, (npixels - ipixel) * (wall_time() - last_output_time) / ipixel); @@ -248,8 +248,8 @@ void structure_chunk::set_chi1inv(component c, for (int i = 0; i < 3; ++i) { trivial_chi1inv[c][ds[i]] = trivial[i]; if (i != idiag && trivial[i]) { // deallocate trivial offdiag - delete[] chi1inv[c][ds[i]]; - chi1inv[c][ds[i]] = 0; + delete[] chi1inv[c][ds[i]]; + chi1inv[c][ds[i]] = 0; } } // only deallocate trivial diag if entire tensor is trivial @@ -260,7 +260,7 @@ void structure_chunk::set_chi1inv(component c, medium.unset_volume(); } -void structure_chunk::add_susceptibility(material_function &sigma, +void structure_chunk::add_susceptibility(material_function &sigma, field_type ft, const susceptibility &sus) { @@ -278,7 +278,7 @@ void structure_chunk::add_susceptibility(material_function &sigma, newsus->sigma[c][d] = NULL; newsus->trivial_sigma[c][d] = true; } - + // if we own this chunk, set up the sigma array(s): if (is_mine()) FOR_FT_COMPONENTS(ft,c) if (gv.has_field(c)) { FOR_FT_COMPONENTS(ft,c2) if (gv.has_field(c2)) { @@ -312,20 +312,20 @@ void structure_chunk::add_susceptibility(material_function &sigma, for (int i = 0; i < 3; ++i) { newsus->trivial_sigma[c][ds[i]] = trivial[i]; if (i != idiag && trivial[i]) { // deallocate trivial offdiag - delete[] newsus->sigma[c][ds[i]]; - newsus->sigma[c][ds[i]] = 0; + delete[] newsus->sigma[c][ds[i]]; + newsus->sigma[c][ds[i]] = 0; } } // only deallocate trivial diag if entire tensor is trivial if (trivial[0] && trivial[1] && trivial[2]) { - delete[] newsus->sigma[c][dc]; - newsus->sigma[c][dc] = 0; + delete[] newsus->sigma[c][dc]; + newsus->sigma[c][dc] = 0; } } // finally, add to the beginning of the chiP list: newsus->next = chiP[ft]; - chiP[ft] = newsus; + chiP[ft] = newsus; sigma.unset_volume(); } diff --git a/src/array_slice.cpp b/src/array_slice.cpp index ed48599f2..1f2a82642 100644 --- a/src/array_slice.cpp +++ b/src/array_slice.cpp @@ -16,7 +16,7 @@ */ /* create and return arrays of field components on user-specified - spatial slices. Uses fields::loop_in_chunks analogous to + spatial slices. Uses fields::loop_in_chunks analogous to h5fields.cpp */ @@ -38,14 +38,14 @@ namespace meep { typedef struct { // information related to the volume covered by the - // array slice (its size, etcetera) + // array slice (its size, etcetera) // these fields are filled in by get_array_slice_dimensions // if the data parameter is non-null ivec min_corner, max_corner; int num_chunks; int rank; direction ds[3]; - int slice_size; + size_t slice_size; // the function to output and related info (offsets for averaging, etc.) // note: either fun *or* rfun should be non-NULL (not both) @@ -60,7 +60,7 @@ typedef struct { component *cS; cdouble *ph; cdouble *fields; - int *offsets; + ptrdiff_t *offsets; int ninveps; component inveps_cs[3]; @@ -130,20 +130,21 @@ static void get_array_slice_chunkloop(fields_chunk *fc, int ichnk, component cgr //-----------------------------------------------------------------------// // Find output chunk dimensions and strides, etc. - int start[3]={0,0,0}, count[3]={1,1,1}, offset[3]={0,0,0}; + int start[3]={0,0,0}, count[3]={1,1,1}; + ptrdiff_t offset[3]={0,0,0}; ivec isS = S.transform(is, sn) + shift; ivec ieS = S.transform(ie, sn) + shift; - + // figure out what yucky_directions (in LOOP_OVER_IVECS) // correspond to what directions in the transformed vectors (in output). ivec permute(zero_ivec(fc->gv.dim)); - for (int i = 0; i < 3; ++i) + for (int i = 0; i < 3; ++i) permute.set_direction(fc->gv.yucky_direction(i), i); permute = S.transform_unshifted(permute, sn); LOOP_OVER_DIRECTIONS(permute.dim, d) permute.set_direction(d, abs(permute.in_direction(d))); - + // compute the size of the chunk to output, and its strides etc. for (int i = 0; i < data->rank; ++i) { direction d = data->ds[i]; @@ -163,7 +164,7 @@ static void get_array_slice_chunkloop(fields_chunk *fc, int ichnk, component cgr - data->min_corner.in_direction(d)) / 2 + 1; }; - int stride[3]={1,1,1}; + ptrdiff_t stride[3]={1,1,1}; for (int i = 0; i < data->rank; ++i) { direction d = data->ds[i]; int j = permute.in_direction(d); @@ -173,21 +174,21 @@ static void get_array_slice_chunkloop(fields_chunk *fc, int ichnk, component cgr }; // sco="slice chunk offset" - int sco=start[0]*dims[1]*dims[2] + start[1]*dims[2] + start[2]; + ptrdiff_t sco=start[0]*dims[1]*dims[2] + start[1]*dims[2] + start[2]; //-----------------------------------------------------------------------// // Compute the function to output, exactly as in fields::integrate. - int *off = data->offsets; + ptrdiff_t *off = data->offsets; component *cS = data->cS; complex *fields = data->fields, *ph = data->ph; const component *iecs = data->inveps_cs; const direction *ieds = data->inveps_ds; - int ieos[6]; + ptrdiff_t ieos[6]; const component *imcs = data->invmu_cs; const direction *imds = data->invmu_ds; - int imos[6]; + ptrdiff_t imos[6]; int num_components=data->components.size(); - + double *slice=0; cdouble *zslice=0; bool complex_data = (data->rfun==0); @@ -298,7 +299,8 @@ int fields::get_array_slice_dimensions(const volume &where, int dims[3], void *c if (data->num_chunks == 0 || !(data->min_corner <= data->max_corner)) return 0; // no data to write; - int rank=0, slice_size=1; + int rank=0; + size_t slice_size=1; LOOP_OVER_DIRECTIONS(gv.dim, d) { if (rank >= 3) abort("too many dimensions in array_slice"); int n = (data->max_corner.in_direction(d) @@ -335,7 +337,7 @@ void *fields::do_get_array_slice(const volume &where, int dims[3]; array_slice_data data; int rank=get_array_slice_dimensions(where, dims, &data); - int slice_size=data.slice_size; + size_t slice_size=data.slice_size; if (rank==0 || slice_size==0) return 0; // no data to write bool complex_data = (rfun==0); @@ -345,13 +347,13 @@ void *fields::do_get_array_slice(const volume &where, { if (complex_data) { zslice = new cdouble[slice_size]; vslice = (void *)zslice; - } + } else { slice = new double[slice_size]; vslice = (void *)slice; }; }; - + data.vslice = vslice; data.fun = fun; data.rfun = rfun; @@ -363,8 +365,8 @@ void *fields::do_get_array_slice(const volume &where, data.cS = new component[num_components]; data.ph = new cdouble[num_components]; data.fields = new cdouble[num_components]; - - data.offsets = new int[2 * num_components]; + + data.offsets = new ptrdiff_t[2 * num_components]; for (int i = 0; i < 2 * num_components; ++i) data.offsets[i] = 0; @@ -373,27 +375,27 @@ void *fields::do_get_array_slice(const volume &where, bool needs_dielectric = false; for (int i = 0; i < num_components; ++i) if (components[i] == Dielectric) { needs_dielectric = true; break; } - if (needs_dielectric) + if (needs_dielectric) FOR_ELECTRIC_COMPONENTS(c) if (gv.has_field(c)) { if (data.ninveps == 3) abort("more than 3 field components??"); data.inveps_cs[data.ninveps] = c; data.inveps_ds[data.ninveps] = component_direction(c); ++data.ninveps; } - + /* compute inverse-mu directions for computing Permeability fields */ data.ninvmu = 0; bool needs_permeability = false; for (int i = 0; i < num_components; ++i) if (components[i] == Permeability) { needs_permeability = true; break; } - if (needs_permeability) + if (needs_permeability) FOR_MAGNETIC_COMPONENTS(c) if (gv.has_field(c)) { if (data.ninvmu == 3) abort("more than 3 field components??"); data.invmu_cs[data.ninvmu] = c; data.invmu_ds[data.ninvmu] = component_direction(c); ++data.ninvmu; } - + loop_in_chunks(get_array_slice_chunkloop, (void *) &data, where, Centered, true, true); @@ -405,10 +407,11 @@ void *fields::do_get_array_slice(const volume &where, if (complex_data) { cdouble *buffer = new cdouble[BUFSIZE]; cdouble *slice = (cdouble *)vslice; - int offset=0, remaining=slice_size; + ptrdiff_t offset=0; + size_t remaining=slice_size; while(remaining!=0) - { - int size = (remaining > BUFSIZE ? BUFSIZE : remaining); + { + size_t size = (remaining > BUFSIZE ? BUFSIZE : remaining); sum_to_all(slice + offset, buffer, size); memcpy(slice+offset, buffer, size*sizeof(cdouble)); remaining-=size; @@ -419,9 +422,10 @@ void *fields::do_get_array_slice(const volume &where, else { double *buffer = new double[BUFSIZE]; double *slice = (double *)vslice; - int offset=0, remaining=slice_size; + ptrdiff_t offset=0; + size_t remaining=slice_size; while(remaining!=0) - { int size = (remaining > BUFSIZE ? BUFSIZE : remaining); + { size_t size = (remaining > BUFSIZE ? BUFSIZE : remaining); sum_to_all(slice + offset, buffer, size); memcpy(slice+offset, buffer, size*sizeof(double)); remaining-=size; @@ -450,7 +454,7 @@ double *fields::get_array_slice(const volume &where, return (double *)do_get_array_slice(where, components, 0, rfun, fun_data, (void *)slice); -} +} cdouble *fields::get_complex_array_slice(const volume &where, std::vector components, diff --git a/src/bands.cpp b/src/bands.cpp index 08d57b4f4..f33965db9 100644 --- a/src/bands.cpp +++ b/src/bands.cpp @@ -105,10 +105,10 @@ int do_harminv(complex *data, int n, double dt, for (int i = 0; i < nf; ++i) // sort in increasing order of error for (int j = i + 1; j < nf; ++j) if (harminv_get_freq_error(hd, fsort[i]) > - harminv_get_freq_error(hd, fsort[j])) { - int k = fsort[i]; - fsort[i] = fsort[j]; - fsort[j] = k; + harminv_get_freq_error(hd, fsort[j])) { + int k = fsort[i]; + fsort[i] = fsort[j]; + fsort[j] = k; } double min_err = harminv_get_freq_error(hd, fsort[0]); @@ -129,12 +129,12 @@ int do_harminv(complex *data, int n, double dt, harminv_get_amplitude(&aa, hd, fsort[i]); double amp = abs(aa); if (f >= fmin && f <= fmax - && abs(harminv_get_Q(hd, fsort[i])) > Q_thresh - && err < err_thresh - && err < rel_err_thresh * min_err - && amp > amp_thresh - && amp > rel_amp_thresh * max_amp) { - fsort[j++] = fsort[i]; + && abs(harminv_get_Q(hd, fsort[i])) > Q_thresh + && err < err_thresh + && err < rel_err_thresh * min_err + && amp > amp_thresh + && amp > rel_amp_thresh * max_amp) { + fsort[j++] = fsort[i]; } } nf = j; @@ -143,16 +143,16 @@ int do_harminv(complex *data, int n, double dt, // set indices to -1 for frequencies to be eliminated for (int i = 0; i < nf; ++i) if (fsort[i] != -1) { // i hasn't been eliminated yet - double f = harminv_get_freq(hd, fsort[i]); - if (f < 0.0) { - double kdiff = -2 * f; - int kpos = i; - for (int k = 0; k < nf; ++k) // search for closest positive freq. - if (fsort[k] != -1) { // k hasn't been eliminated yet - double fdiff = abs(harminv_get_freq(hd, fsort[k]) + f); - if (fdiff < kdiff) { - kpos = k; - kdiff = fdiff; + double f = harminv_get_freq(hd, fsort[i]); + if (f < 0.0) { + double kdiff = -2 * f; + int kpos = i; + for (int k = 0; k < nf; ++k) // search for closest positive freq. + if (fsort[k] != -1) { // k hasn't been eliminated yet + double fdiff = abs(harminv_get_freq(hd, fsort[k]) + f); + if (fdiff < kdiff) { + kpos = k; + kdiff = fdiff; } } if (kpos != i && kdiff < 2.0 / n) { // consider them the same @@ -168,7 +168,7 @@ int do_harminv(complex *data, int n, double dt, int j = 0; for (int i = 0; i < nf; ++i) // remove the eliminated indices if (fsort[i] != -1) - fsort[j++] = fsort[i]; + fsort[j++] = fsort[i]; nf = j; } @@ -179,10 +179,10 @@ int do_harminv(complex *data, int n, double dt, for (int i = 0; i < nf; ++i) // simple O(nf^2) sort for (int j = i + 1; j < nf; ++j) if (abs(harminv_get_freq(hd, fsort[i])) > - abs(harminv_get_freq(hd, fsort[j]))) { - int k = fsort[i]; - fsort[i] = fsort[j]; - fsort[j] = k; + abs(harminv_get_freq(hd, fsort[j]))) { + int k = fsort[i]; + fsort[i] = fsort[j]; + fsort[j] = k; } for (int i = 0; i < nf; ++i) { diff --git a/src/bicgstab.cpp b/src/bicgstab.cpp index f6739dd62..e1f8c8ca0 100644 --- a/src/bicgstab.cpp +++ b/src/bicgstab.cpp @@ -73,16 +73,16 @@ using namespace std; namespace meep { -static double dot(int n, const realnum *x, const realnum *y) +static double dot(size_t n, const realnum *x, const realnum *y) { double sum = 0; - for (int i = 0; i < n; ++i) sum += x[i] * y[i]; + for (size_t i = 0; i < n; ++i) sum += x[i] * y[i]; return sum_to_all(sum); } -static double norm2(int n, const realnum *x) { +static double norm2(size_t n, const realnum *x) { // note: we don't just do sqrt(dot(n, x, x)) in order to avoid overflow - int i; + size_t i; double xmax = 0, scale; long double sum = 0; for (i = 0; i < n; ++i) { @@ -99,8 +99,8 @@ static double norm2(int n, const realnum *x) { return xmax * sqrt(sum_to_all(sum)); } -static void xpay(int n, realnum *x, double a, const realnum *y) { - for (int m = 0; m < n; ++m) x[m] += a * y[m]; +static void xpay(size_t n, realnum *x, double a, const realnum *y) { + for (size_t m = 0; m < n; ++m) x[m] += a * y[m]; } #define MIN_OUTPUT_TIME 4.0 // output no more often than this many seconds @@ -108,7 +108,7 @@ static void xpay(int n, realnum *x, double a, const realnum *y) { typedef realnum *prealnum; // grr, ISO C++ forbids new (double*)[...] /* BiCGSTAB(L) algorithm for the n-by-n problem Ax = b */ -int bicgstabL(const int L, const int n, realnum *x, +ptrdiff_t bicgstabL(const int L, const size_t n, realnum *x, bicgstab_op A, void *Adata, const realnum *b, const double tol, int *iters, @@ -126,7 +126,7 @@ int bicgstabL(const int L, const int n, realnum *x, double bnrm = norm2(n, b); if (bnrm == 0.0) bnrm = 1.0; - + int iter = 0; double last_output_wall_time = wall_time(); @@ -145,11 +145,11 @@ int bicgstabL(const int L, const int n, realnum *x, // rtilde = r[0] = b - Ax realnum *rtilde = work + (2*L+2) * n; A(x, r[0], Adata); - for (int m = 0; m < n; ++m) rtilde[m] = r[0][m] = b[m] - r[0][m]; + for (size_t m = 0; m < n; ++m) rtilde[m] = r[0][m] = b[m] - r[0][m]; { /* Sleipjen normalizes rtilde in his code; it seems to help slightly */ double s = 1.0 / norm2(n, rtilde); - for (int m = 0; m < n; ++m) rtilde[m] *= s; + for (size_t m = 0; m < n; ++m) rtilde[m] *= s; } memset(u[0], 0, sizeof(realnum) * n); // u[0] = 0 @@ -171,20 +171,20 @@ int bicgstabL(const int L, const int n, realnum *x, double beta = alpha * rho1 / rho; rho = rho1; for (int i = 0; i <= j; ++i) - for (int m = 0; m < n; ++m) u[i][m] = r[i][m] - beta * u[i][m]; + for (size_t m = 0; m < n; ++m) u[i][m] = r[i][m] - beta * u[i][m]; A(u[j], u[j+1], Adata); alpha = rho / dot(n, u[j+1], rtilde); for (int i = 0; i <= j; ++i) - xpay(n, r[i], -alpha, u[i+1]); + xpay(n, r[i], -alpha, u[i+1]); A(r[j], r[j+1], Adata); xpay(n, x, alpha, u[0]); } for (int j = 1; j <= L; ++j) { for (int i = 1; i < j; ++i) { - int ij = (j-1)*L + (i-1); - tau[ij] = dot(n, r[j], r[i]) / sigma[i]; - xpay(n, r[j], -tau[ij], r[i]); + int ij = (j-1)*L + (i-1); + tau[ij] = dot(n, r[j], r[i]) / sigma[i]; + xpay(n, r[j], -tau[ij], r[i]); } sigma[j] = dot(n, r[j],r[j]); gamma_p[j] = dot(n, r[0], r[j]) / sigma[j]; @@ -194,14 +194,14 @@ int bicgstabL(const int L, const int n, realnum *x, for (int j = L-1; j >= 1; --j) { gamma[j] = gamma_p[j]; for (int i = j+1; i <= L; ++i) - gamma[j] -= tau[(i-1)*L + (j-1)] * gamma[i]; + gamma[j] -= tau[(i-1)*L + (j-1)] * gamma[i]; } for (int j = 1; j < L; ++j) { gamma_pp[j] = gamma[j+1]; for (int i = j+1; i < L; ++i) - gamma_pp[j] += tau[(i-1)*L + (j-1)] * gamma[i+1]; + gamma_pp[j] += tau[(i-1)*L + (j-1)] * gamma[i+1]; } - + xpay(n, x, gamma[1], r[0]); xpay(n, r[0], -gamma_p[L], r[L]); xpay(n, u[0], -gamma[L], u[L]); diff --git a/src/bicgstab.hpp b/src/bicgstab.hpp index dd4729528..e0f8b6a8d 100644 --- a/src/bicgstab.hpp +++ b/src/bicgstab.hpp @@ -24,13 +24,13 @@ namespace meep { typedef void (*bicgstab_op)(const realnum *x, realnum *y, void *data); -int bicgstabL(const int L, - const int n, realnum *x, - bicgstab_op A, void *Adata, const realnum *b, - const double tol, - int *iters, // input *iters = max iters, output = actual iters - realnum *work, // if you pass work=NULL, bicgstab returns nwork - const bool quiet); +ptrdiff_t bicgstabL(const int L, + const size_t n, realnum *x, + bicgstab_op A, void *Adata, const realnum *b, + const double tol, + int *iters, // input *iters = max iters, output = actual iters + realnum *work, // if you pass work=NULL, bicgstab returns nwork + const bool quiet); } // namespace meep diff --git a/src/boundaries.cpp b/src/boundaries.cpp index 2b7e1ce30..ca71e679d 100644 --- a/src/boundaries.cpp +++ b/src/boundaries.cpp @@ -91,18 +91,18 @@ void fields::disconnect_chunks() { for (int i=0;iconnections[f][ip][io]; - chunks[i]->connections[f][ip][io] = NULL; - } + for (int ip=0;ip<3;++ip) + for (int io=0;io<2;io++) { + delete[] chunks[i]->connections[f][ip][io]; + chunks[i]->connections[f][ip][io] = NULL; + } } FOR_FIELD_TYPES(f) { delete[] chunks[i]->connection_phases[f]; chunks[i]->connection_phases[f] = NULL; for (int ip=0;ip<3;++ip) - for (int io=0;io<2;io++) - chunks[i]->num_connections[f][ip][io] = 0; + for (int io=0;io<2;io++) + chunks[i]->num_connections[f][ip][io] = 0; } } FOR_FIELD_TYPES(ft) @@ -110,7 +110,7 @@ void fields::disconnect_chunks() { delete[] comm_blocks[ft][i]; comm_blocks[ft][i] = 0; for (int ip=0;ip<3;++ip) - comm_sizes[ft][ip][i] = 0; + comm_sizes[ft][ip][i] = 0; } } @@ -169,7 +169,7 @@ bool fields::locate_point_in_user_volume(ivec *there, complex *phase) co void fields::locate_volume_source_in_user_volume(const vec p1, const vec p2, vec newp1[8], vec newp2[8], complex kphase[8], int &ncopies) const { - // For periodic boundary conditions, + // For periodic boundary conditions, // this function locates up to 8 translated copies of the initial grid_volume specified by (p1,p2) // First bring center of grid_volume inside ncopies = 1; @@ -177,7 +177,7 @@ void fields::locate_volume_source_in_user_volume(const vec p1, const vec p2, vec newp2[0] = p2; kphase[0] = 1; vec cen = (newp1[0] + newp2[0]) * 0.5; - LOOP_OVER_DIRECTIONS(gv.dim, d) + LOOP_OVER_DIRECTIONS(gv.dim, d) if (boundaries[High][d] == Periodic) { while (cen.in_direction(d) < gv.boundary_location(Low, d)) { newp1[0] += lattice_vector(d); @@ -192,9 +192,9 @@ void fields::locate_volume_source_in_user_volume(const vec p1, const vec p2, vec cen = (newp1[0] + newp2[0]) * 0.5; } } - + // if grid_volume extends outside user_volume in any direction, we need to duplicate already existing copies - LOOP_OVER_DIRECTIONS(gv.dim, d) + LOOP_OVER_DIRECTIONS(gv.dim, d) if (boundaries[High][d] == Periodic) { if (newp1[0].in_direction(d) < gv.boundary_location(Low, d) || newp2[0].in_direction(d) < gv.boundary_location(Low, d)) { @@ -244,7 +244,7 @@ bool fields::locate_component_point(component *c, ivec *there, } void fields_chunk::zero_metal(field_type ft) { - for (int i=0;inum_zeroes[ft] = 0; DOCMP FOR_COMPONENTS(c) if (type(c) == ft && chunks[i]->f[c][cmp]) - LOOP_OVER_VOL_OWNED(vi, c, n) + LOOP_OVER_VOL_OWNED(vi, c, n) if (IVEC_LOOP_AT_BOUNDARY) { // todo: just loop over boundaries IVEC_LOOP_ILOC(vi, here); if (on_metal_boundary(here)) @@ -265,15 +265,15 @@ void fields::find_metals() { } typedef realnum *realnum_ptr; chunks[i]->zeroes[ft] = new realnum_ptr[chunks[i]->num_zeroes[ft]]; - int num = 0; + size_t num = 0; DOCMP FOR_COMPONENTS(c) if (type(c) == ft && chunks[i]->f[c][cmp]) - LOOP_OVER_VOL_OWNED(vi, c, n) - if (IVEC_LOOP_AT_BOUNDARY) { // todo: just loop over boundaries - IVEC_LOOP_ILOC(vi, here); - if (on_metal_boundary(here)) - chunks[i]->zeroes[ft][num++] = chunks[i]->f[c][cmp] + n; - } + LOOP_OVER_VOL_OWNED(vi, c, n) + if (IVEC_LOOP_AT_BOUNDARY) { // todo: just loop over boundaries + IVEC_LOOP_ILOC(vi, here); + if (on_metal_boundary(here)) + chunks[i]->zeroes[ft][num++] = chunks[i]->f[c][cmp] + n; + } } } } @@ -285,12 +285,12 @@ bool fields_chunk::needs_W_notowned(component c) { } void fields::connect_the_chunks() { - int *nc[NUM_FIELD_TYPES][3][2]; + size_t *nc[NUM_FIELD_TYPES][3][2]; FOR_FIELD_TYPES(f) for (int ip=0;ip<3;ip++) for (int io=0;io<2;io++) { - nc[f][ip][io] = new int[num_chunks]; - for (int i=0;if[hc][0] == chunks[i]->f[bc][0]; and_to_all(B_redundant + 5*num_chunks, B_redundant, 5*num_chunks); @@ -320,7 +320,7 @@ void fields::connect_the_chunks() { array) and a non-PML region (no separate W). */ bool needs_W_notowned[NUM_FIELD_COMPONENTS]; FOR_COMPONENTS(c) needs_W_notowned[c] = false; - FOR_E_AND_H(c) for (int i=0;ineeds_W_notowned(c); FOR_E_AND_H(c) needs_W_notowned[c] = or_to_all(needs_W_notowned[c]); @@ -329,71 +329,71 @@ void fields::connect_the_chunks() { const grid_volume vi = chunks[i]->gv; FOR_FIELD_TYPES(ft) for (int ip=0;ip<3;ip++) - for (int j=0;j thephase; - if (locate_component_point(&c,&here,&thephase) - && !on_metal_boundary(here)) - for (int j=0;jis_mine() || chunks[j]->is_mine()) - && chunks[j]->gv.owns(here) - && !(is_B(corig) && is_B(c) && - B_redundant[5*i+corig-Bx] && B_redundant[5*j+c-Bx])) { - const int pair = j+i*num_chunks; - const connect_phase ip = thephase == 1.0 ? CONNECT_COPY - : (thephase == -1.0 ? CONNECT_NEGATE : CONNECT_PHASE); - { - field_type f = type(c); - const int nn = is_real?1:2; - nc[f][ip][Incoming][i] += nn; - nc[f][ip][Outgoing][j] += nn; - comm_sizes[f][ip][pair] += nn; - } - if (needs_W_notowned[corig]) { - field_type f = is_electric(corig) ? WE_stuff : WH_stuff; - const int nn = is_real?1:2; - nc[f][ip][Incoming][i] += nn; - nc[f][ip][Outgoing][j] += nn; - comm_sizes[f][ip][pair] += nn; - } - if (is_electric(corig) || is_magnetic(corig)) { - field_type f = is_electric(corig) ? PE_stuff : PH_stuff; - int ni = 0, cni = 0; - for (polarization_state *pi=chunks[i]->pol[type(corig)]; pi; - pi = pi->next) - for (polarization_state *pj=chunks[j]->pol[type(c)]; pj; - pj = pj->next) - if (*pi->s == *pj->s) { - if (pi->data && chunks[i]->is_mine()) { - ni += pi->s->num_internal_notowned_needed(corig, - pi->data); - cni += pi->s->num_cinternal_notowned_needed(corig, - pi->data); - } - else if (pj->data && chunks[j]->is_mine()) { - ni += pj->s->num_internal_notowned_needed(c, - pj->data); - cni += pj->s->num_cinternal_notowned_needed(c, - pj->data); - } - } - const int nn = (is_real?1:2) * (cni); - nc[f][ip][Incoming][i] += nn; - nc[f][ip][Outgoing][j] += nn; - comm_sizes[f][ip][pair] += nn; - const connect_phase iip = CONNECT_COPY; - nc[f][iip][Incoming][i] += ni; - nc[f][iip][Outgoing][j] += ni; - comm_sizes[f][iip][pair] += ni; - } - } // if is_mine and owns... - } // loop over j chunks + LOOP_OVER_VOL_NOTOWNED(vi, corig, n) { + IVEC_LOOP_ILOC(vi, here); + component c = corig; + // We're looking at a border element... + complex thephase; + if (locate_component_point(&c,&here,&thephase) + && !on_metal_boundary(here)) + for (int j=0;jis_mine() || chunks[j]->is_mine()) + && chunks[j]->gv.owns(here) + && !(is_B(corig) && is_B(c) && + B_redundant[5*i+corig-Bx] && B_redundant[5*j+c-Bx])) { + const int pair = j+i*num_chunks; + const connect_phase ip = thephase == 1.0 ? CONNECT_COPY + : (thephase == -1.0 ? CONNECT_NEGATE : CONNECT_PHASE); + { + field_type f = type(c); + const int nn = is_real?1:2; + nc[f][ip][Incoming][i] += nn; + nc[f][ip][Outgoing][j] += nn; + comm_sizes[f][ip][pair] += nn; + } + if (needs_W_notowned[corig]) { + field_type f = is_electric(corig) ? WE_stuff : WH_stuff; + const int nn = is_real?1:2; + nc[f][ip][Incoming][i] += nn; + nc[f][ip][Outgoing][j] += nn; + comm_sizes[f][ip][pair] += nn; + } + if (is_electric(corig) || is_magnetic(corig)) { + field_type f = is_electric(corig) ? PE_stuff : PH_stuff; + size_t ni = 0, cni = 0; + for (polarization_state *pi=chunks[i]->pol[type(corig)]; pi; + pi = pi->next) + for (polarization_state *pj=chunks[j]->pol[type(c)]; pj; + pj = pj->next) + if (*pi->s == *pj->s) { + if (pi->data && chunks[i]->is_mine()) { + ni += pi->s->num_internal_notowned_needed(corig, + pi->data); + cni += pi->s->num_cinternal_notowned_needed(corig, + pi->data); + } + else if (pj->data && chunks[j]->is_mine()) { + ni += pj->s->num_internal_notowned_needed(c, + pj->data); + cni += pj->s->num_cinternal_notowned_needed(c, + pj->data); + } + } + const size_t nn = (is_real?1:2) * (cni); + nc[f][ip][Incoming][i] += nn; + nc[f][ip][Outgoing][j] += nn; + comm_sizes[f][ip][pair] += nn; + const connect_phase iip = CONNECT_COPY; + nc[f][iip][Incoming][i] += ni; + nc[f][iip][Outgoing][j] += ni; + comm_sizes[f][iip][pair] += ni; + } + } // if is_mine and owns... + } // loop over j chunks } // LOOP_OVER_VOL_NOTOWNED } // FOR_COMPONENTS @@ -413,7 +413,7 @@ void fields::connect_the_chunks() { for i < i' */ // wh stores the current indices in the connections array(s) - int *wh[NUM_FIELD_TYPES][3][2]; + size_t *wh[NUM_FIELD_TYPES][3][2]; /* Now allocate the connection arrays... this is still slightly wasteful (by a factor of 2) because we allocate for chunks we @@ -422,142 +422,140 @@ void fields::connect_the_chunks() { FOR_FIELD_TYPES(f) for (int ip=0;ip<3;ip++) { for (int io=0;io<2;io++) { - for (int i=0;ialloc_extra_connections(field_type(f), - connect_phase(ip), - in_or_out(io), - nc[f][ip][io][i]); - delete[] nc[f][ip][io]; - wh[f][ip][io] = new int[num_chunks]; + for (int i=0;ialloc_extra_connections(field_type(f), + connect_phase(ip), + in_or_out(io), + nc[f][ip][io][i]); + delete[] nc[f][ip][io]; + wh[f][ip][io] = new size_t[num_chunks]; } for (int i=0;igv; - + // initialize wh[f][ip][Incoming][j] to sum of comm_sizes for jj < j FOR_FIELD_TYPES(f) for (int ip=0;ip<3;ip++) { - wh[f][ip][Incoming][0] = 0; - for (int j = 1; j < num_chunks; ++j) - wh[f][ip][Incoming][j] = wh[f][ip][Incoming][j-1] - + comm_sizes[f][ip][(j-1)+i*num_chunks]; + wh[f][ip][Incoming][0] = 0; + for (int j = 1; j < num_chunks; ++j) + wh[f][ip][Incoming][j] = wh[f][ip][Incoming][j-1] + + comm_sizes[f][ip][(j-1)+i*num_chunks]; } FOR_COMPONENTS(corig) if (have_component(corig)) - LOOP_OVER_VOL_NOTOWNED(vi, corig, n) { - IVEC_LOOP_ILOC(vi, here); - component c = corig; - // We're looking at a border element... - complex thephase; - if (locate_component_point(&c,&here,&thephase) - && !on_metal_boundary(here)) - for (int j=0;jis_mine() || chunks[j]->is_mine()) - && chunks[j]->gv.owns(here) - && !(is_B(corig) && is_B(c) && - B_redundant[5*i+corig-Bx] && B_redundant[5*j+c-Bx])) { - const connect_phase ip = thephase == 1.0 ? CONNECT_COPY - : (thephase == -1.0 ? CONNECT_NEGATE : CONNECT_PHASE); - const int m = chunks[j]->gv.index(c, here); - - { - field_type f = type(c); - if (ip == CONNECT_PHASE) - chunks[i]->connection_phases[f][wh[f][ip][Incoming][j]/2] = - thephase; - DOCMP { - chunks[i]->connections[f][ip][Incoming] - [wh[f][ip][Incoming][j]++] = - chunks[i]->f[corig][cmp] + n; - chunks[j]->connections[f][ip][Outgoing] - [wh[f][ip][Outgoing][j]++] = - chunks[j]->f[c][cmp] + m; - } - } - - if (needs_W_notowned[corig]) { - field_type f = is_electric(corig) ? WE_stuff : WH_stuff; - if (ip == CONNECT_PHASE) - chunks[i]->connection_phases[f][wh[f][ip][Incoming][j]/2] = - thephase; - DOCMP { - chunks[i]->connections[f][ip][Incoming] - [wh[f][ip][Incoming][j]++] = - (chunks[i]->f_w[corig][cmp] ? chunks[i]->f_w[corig][cmp] - : chunks[i]->f[corig][cmp]) + n; - chunks[j]->connections[f][ip][Outgoing] - [wh[f][ip][Outgoing][j]++] = - (chunks[j]->f_w[c][cmp] ? chunks[j]->f_w[c][cmp] - : chunks[j]->f[c][cmp]) + m; - } - } - - if (is_electric(corig) || is_magnetic(corig)) { - field_type f = is_electric(corig) ? PE_stuff : PH_stuff; - for (polarization_state *pi=chunks[i]->pol[type(corig)]; pi; - pi = pi->next) - for (polarization_state *pj=chunks[j]->pol[type(c)]; pj; - pj = pj->next) - if (*pi->s == *pj->s) { - polarization_state *po = NULL; - if (pi->data && chunks[i]->is_mine()) - po = pi; - else if (pj->data && chunks[j]->is_mine()) - po = pj; - if (po) { - const connect_phase iip = CONNECT_COPY; - const int ni = po->s-> - num_internal_notowned_needed(corig, po->data); - for (int k = 0; k < ni; ++k) { - chunks[i]->connections[f][iip][Incoming] - [wh[f][iip][Incoming][j]++] = - po->s->internal_notowned_ptr(k, corig, n, - pi->data); - chunks[j]->connections[f][iip][Outgoing] - [wh[f][iip][Outgoing][j]++] = - po->s->internal_notowned_ptr(k, c, m, - pj->data); - } - const int cni = po->s-> - num_cinternal_notowned_needed(corig, po->data); - for (int k = 0; k < cni; ++k) { - if (ip == CONNECT_PHASE) - chunks[i]->connection_phases[f] - [wh[f][ip][Incoming][j]/2] = thephase; - DOCMP { - chunks[i]->connections[f][ip][Incoming] - [wh[f][ip][Incoming][j]++] = - po->s->cinternal_notowned_ptr(k, corig,cmp, n, - pi->data); - chunks[j]->connections[f][ip][Outgoing] - [wh[f][ip][Outgoing][j]++] = - po->s->cinternal_notowned_ptr(k, c,cmp, m, - pj->data); - } - } - } - } - } // is_electric(corig) - } // if is_mine and owns... - } // loop over j chunks + LOOP_OVER_VOL_NOTOWNED(vi, corig, n) { + IVEC_LOOP_ILOC(vi, here); + component c = corig; + // We're looking at a border element... + complex thephase; + if (locate_component_point(&c,&here,&thephase) + && !on_metal_boundary(here)) + for (int j=0;jis_mine() || chunks[j]->is_mine()) + && chunks[j]->gv.owns(here) + && !(is_B(corig) && is_B(c) + && B_redundant[5*i+corig-Bx] && B_redundant[5*j+c-Bx])) { + const connect_phase ip = thephase == 1.0 ? CONNECT_COPY + : (thephase == -1.0 ? CONNECT_NEGATE : CONNECT_PHASE); + const ptrdiff_t m = chunks[j]->gv.index(c, here); + + { + field_type f = type(c); + if (ip == CONNECT_PHASE) + chunks[i]->connection_phases[f][wh[f][ip][Incoming][j]/2] = + thephase; + DOCMP { + chunks[i]->connections[f][ip][Incoming] + [wh[f][ip][Incoming][j]++] = + chunks[i]->f[corig][cmp] + n; + chunks[j]->connections[f][ip][Outgoing] + [wh[f][ip][Outgoing][j]++] = + chunks[j]->f[c][cmp] + m; + } + } + + if (needs_W_notowned[corig]) { + field_type f = is_electric(corig) ? WE_stuff : WH_stuff; + if (ip == CONNECT_PHASE) + chunks[i]->connection_phases[f][wh[f][ip][Incoming][j]/2] = + thephase; + DOCMP { + chunks[i]->connections[f][ip][Incoming] + [wh[f][ip][Incoming][j]++] = + (chunks[i]->f_w[corig][cmp] ? chunks[i]->f_w[corig][cmp] + : chunks[i]->f[corig][cmp]) + n; + chunks[j]->connections[f][ip][Outgoing] + [wh[f][ip][Outgoing][j]++] = + (chunks[j]->f_w[c][cmp] ? chunks[j]->f_w[c][cmp] + : chunks[j]->f[c][cmp]) + m; + } + } + + if (is_electric(corig) || is_magnetic(corig)) { + field_type f = is_electric(corig) ? PE_stuff : PH_stuff; + for (polarization_state *pi=chunks[i]->pol[type(corig)]; pi; + pi = pi->next) + for (polarization_state *pj=chunks[j]->pol[type(c)]; pj; + pj = pj->next) + if (*pi->s == *pj->s) { + polarization_state *po = NULL; + if (pi->data && chunks[i]->is_mine()) + po = pi; + else if (pj->data && chunks[j]->is_mine()) + po = pj; + if (po) { + const connect_phase iip = CONNECT_COPY; + const size_t ni = po->s-> + num_internal_notowned_needed(corig, po->data); + for (size_t k = 0; k < ni; ++k) { + chunks[i]->connections[f][iip][Incoming] + [wh[f][iip][Incoming][j]++] = + po->s->internal_notowned_ptr(k, corig, n, + pi->data); + chunks[j]->connections[f][iip][Outgoing] + [wh[f][iip][Outgoing][j]++] = + po->s->internal_notowned_ptr(k, c, m, + pj->data); + } + const size_t cni = po->s-> + num_cinternal_notowned_needed(corig, po->data); + for (size_t k = 0; k < cni; ++k) { + if (ip == CONNECT_PHASE) + chunks[i]->connection_phases[f] + [wh[f][ip][Incoming][j]/2] = thephase; + DOCMP { + chunks[i]->connections[f][ip][Incoming] + [wh[f][ip][Incoming][j]++] = + po->s->cinternal_notowned_ptr(k, corig,cmp, n, pi->data); + chunks[j]->connections[f][ip][Outgoing] + [wh[f][ip][Outgoing][j]++] = + po->s->cinternal_notowned_ptr(k, c,cmp, m, pj->data); + } + } + } + } + } // is_electric(corig) + } // if is_mine and owns... + } // loop over j chunks } // LOOP_OVER_VOL_NOTOWNED } // loop over i chunks FOR_FIELD_TYPES(f) for (int ip=0;ip<3;ip++) for (int io=0;io<2;io++) - delete[] wh[f][ip][io]; + delete[] wh[f][ip][io]; delete[] B_redundant; } void fields_chunk::alloc_extra_connections(field_type f, connect_phase ip, - in_or_out io, int num) { + in_or_out io, size_t num) { if (num == 0) return; // No need to go to any bother... - const int tot = num_connections[f][ip][io] + num; + const size_t tot = num_connections[f][ip][io] + num; if (io == Incoming && ip == CONNECT_PHASE) { delete[] connection_phases[f]; connection_phases[f] = new complex[tot]; diff --git a/src/cw_fields.cpp b/src/cw_fields.cpp index d3813a664..63f0f5953 100644 --- a/src/cw_fields.cpp +++ b/src/cw_fields.cpp @@ -24,7 +24,7 @@ namespace meep { static void fields_to_array(const fields &f, complex *x) { - int ix = 0; + size_t ix = 0; for (int i=0;iis_mine()) FOR_COMPONENTS(c) @@ -44,10 +44,10 @@ static void fields_to_array(const fields &f, complex *x) #undef COPY_FROM_FIELD } } - + static void array_to_fields(const complex *x, fields &f) { - int ix = 0; + size_t ix = 0; for (int i=0;iis_mine()) FOR_COMPONENTS(c) @@ -80,7 +80,7 @@ static void array_to_fields(const complex *x, fields &f) } typedef struct { - int n; + size_t n; fields *f; complex iomega; int iters; @@ -94,11 +94,11 @@ static void fieldop(const realnum *xr, realnum *yr, void *data_) array_to_fields(x, *data->f); data->f->step(); fields_to_array(*data->f, y); - int n = data->n; + size_t n = data->n; realnum dt_inv = 1.0 / data->f->dt; complex iomega = complex(real(data->iomega), imag(data->iomega)); - for (int i = 0; i < n; ++i) y[i] = (y[i] - x[i]) * dt_inv + iomega * x[i]; + for (size_t i = 0; i < n; ++i) y[i] = (y[i] - x[i]) * dt_inv + iomega * x[i]; data->iters++; } @@ -122,7 +122,7 @@ bool fields::solve_cw(double tol, int maxiters, complex frequency, step(); // step once to make sure everything is allocated - int N = 0; // size of linear system (on this processor, at least) + size_t N = 0; // size of linear system (on this processor, at least) for (int i=0;iis_mine()) { FOR_COMPONENTS(c) @@ -141,7 +141,7 @@ bool fields::solve_cw(double tol, int maxiters, complex frequency, } } - int nwork = bicgstabL(L, N, 0, 0, 0, 0, tol, &maxiters, 0, true); + size_t nwork = (size_t) bicgstabL(L, N, 0, 0, 0, 0, tol, &maxiters, 0, true); realnum *work = new realnum[nwork + 2*N]; complex *x = reinterpret_cast*>(work + nwork); complex *b = reinterpret_cast*>(work + nwork + N); @@ -160,10 +160,10 @@ bool fields::solve_cw(double tol, int maxiters, complex frequency, update_eh(E_stuff); fields_to_array(*this, b); double mdt_inv = -1.0 / dt; - for (int i = 0; i < N/2; ++i) b[i] *= mdt_inv; + for (size_t i = 0; i < N/2; ++i) b[i] *= mdt_inv; { double bmax = 0; - for (int i = 0; i < N/2; ++i) { + for (size_t i = 0; i < N/2; ++i) { double babs = abs(b[i]); if (babs > bmax) bmax = babs; } @@ -177,12 +177,12 @@ bool fields::solve_cw(double tol, int maxiters, complex frequency, * (1.0 / dt)); data.iters = 0; - int ierr = bicgstabL(L, N, reinterpret_cast(x), + int ierr = (int) bicgstabL(L, N, reinterpret_cast(x), fieldop, &data, reinterpret_cast(b), tol, &maxiters, work, quiet); if (!quiet) { - master_printf("Finished solve_cw after %d steps and %d CG iters.\n", + master_printf("Finished solve_cw after %d steps and %d CG iters.\n", data.iters, maxiters); if (ierr) master_printf(" -- CONVERGENCE FAILURE (%d) in solve_cw!\n", ierr); diff --git a/src/dft.cpp b/src/dft.cpp index cff036015..20377eb00 100644 --- a/src/dft.cpp +++ b/src/dft.cpp @@ -90,7 +90,7 @@ dft_chunk::dft_chunk(fields_chunk *fc_, LOOP_OVER_DIRECTIONS(is.dim, d) N *= (ie.in_direction(d) - is.in_direction(d)) / 2 + 1; dft = new complex[N * Nomega]; - for (int i = 0; i < N * Nomega; ++i) + for (size_t i = 0; i < N * Nomega; ++i) dft[i] = 0.0; next_in_chunk = fc->dft_chunks; @@ -229,7 +229,7 @@ void dft_chunk::update_dft(double time) { int numcmp = fc->f[c][1] ? 2 : 1; - int idx_dft = 0; + size_t idx_dft = 0; LOOP_OVER_IVECS(fc->gv, is, ie, idx) { double w; if (include_dV_and_interp_weights) { @@ -266,7 +266,7 @@ void dft_chunk::update_dft(double time) { } void dft_chunk::scale_dft(complex scale) { - for (int i = 0; i < N * Nomega; ++i) + for (size_t i = 0; i < N * Nomega; ++i) dft[i] *= scale; if (next_in_dft) next_in_dft->scale_dft(scale); @@ -275,7 +275,7 @@ void dft_chunk::scale_dft(complex scale) { void dft_chunk::operator-=(const dft_chunk &chunk) { if (c != chunk.c || N * Nomega != chunk.N * chunk.Nomega) abort("Mismatched chunks in dft_chunk::operator-="); - for (int i = 0; i < N * Nomega; ++i) + for (size_t i = 0; i < N * Nomega; ++i) dft[i] -= chunk.dft[i]; if (next_in_dft) { @@ -284,8 +284,8 @@ void dft_chunk::operator-=(const dft_chunk &chunk) { } } -static int dft_chunks_Ntotal(dft_chunk *dft_chunks, int *my_start) { - int n = 0; +static size_t dft_chunks_Ntotal(dft_chunk *dft_chunks, size_t *my_start) { + size_t n = 0; for (dft_chunk *cur = dft_chunks; cur; cur = cur->next_in_dft) n += cur->N * cur->Nomega * 2; *my_start = partial_sum_to_all(n) - n; // sum(n) for processes before this @@ -295,17 +295,19 @@ static int dft_chunks_Ntotal(dft_chunk *dft_chunks, int *my_start) { // Note: the file must have been created in parallel mode, typically via fields::open_h5file. void save_dft_hdf5(dft_chunk *dft_chunks, const char *name, h5file *file, const char *dprefix) { - int istart; - int n = dft_chunks_Ntotal(dft_chunks, &istart); + size_t istart; + size_t n = dft_chunks_Ntotal(dft_chunks, &istart); char dataname[1024]; snprintf(dataname, 1024, "%s%s" "%s_dft", dprefix ? dprefix : "", dprefix && dprefix[0] ? "_" : "", name); - file->create_data(dataname, 1, &n); + int n_ = int(n); // fixme: handle size_t in hdf5 + file->create_data(dataname, 1, &n_); + int istart_ = int(istart); for (dft_chunk *cur = dft_chunks; cur; cur = cur->next_in_dft) { - int Nchunk = cur->N * cur->Nomega * 2; - file->write_chunk(1, &istart, &Nchunk, (realnum *) cur->dft); + int Nchunk = int(cur->N * cur->Nomega * 2); + file->write_chunk(1, &istart_, &Nchunk, (realnum *) cur->dft); istart += Nchunk; } file->done_writing_chunks(); @@ -318,20 +320,21 @@ void save_dft_hdf5(dft_chunk *dft_chunks, component c, h5file *file, void load_dft_hdf5(dft_chunk *dft_chunks, const char *name, h5file *file, const char *dprefix) { - int istart; - int n = dft_chunks_Ntotal(dft_chunks, &istart); + size_t istart; + size_t n = dft_chunks_Ntotal(dft_chunks, &istart); char dataname[1024]; snprintf(dataname, 1024, "%s%s" "%s_dft", dprefix ? dprefix : "", dprefix && dprefix[0] ? "_" : "", name); int file_rank, file_dims; file->read_size(dataname, &file_rank, &file_dims, 1); - if (file_rank != 1 || file_dims != n) - abort("incorrect dataset size (%d vs. %d) in load_dft_hdf5 %s:%s", file_dims, n, file->file_name(), dataname); + if (file_rank != 1 || file_dims != int(n)) + abort("incorrect dataset size (%d vs. %zd) in load_dft_hdf5 %s:%s", file_dims, n, file->file_name(), dataname); + int istart_int = int(istart); for (dft_chunk *cur = dft_chunks; cur; cur = cur->next_in_dft) { - int Nchunk = cur->N * cur->Nomega * 2; - file->read_chunk(1, &istart, &Nchunk, (realnum *) cur->dft); + int Nchunk = int(cur->N * cur->Nomega * 2); // fixme: handle size_t in read_chunk + file->read_chunk(1, &istart_int, &Nchunk, (realnum *) cur->dft); istart += Nchunk; } } @@ -364,7 +367,7 @@ double *dft_flux::flux() { for (int i = 0; i < Nfreq; ++i) F[i] = 0; for (dft_chunk *curE = E, *curH = H; curE && curH; curE = curE->next_in_dft, curH = curH->next_in_dft) - for (int k = 0; k < curE->N; ++k) + for (size_t k = 0; k < curE->N; ++k) for (int i = 0; i < Nfreq; ++i) F[i] += real(curE->dft[k*Nfreq + i] * conj(curH->dft[k*Nfreq + i])); diff --git a/src/dft_ldos.cpp b/src/dft_ldos.cpp index 31a229673..d3d76005d 100644 --- a/src/dft_ldos.cpp +++ b/src/dft_ldos.cpp @@ -85,7 +85,7 @@ void dft_ldos::update(fields &f) // compute Jsum for LDOS normalization purposes // ...don't worry about the tiny inefficiency of recomputing this repeatedly - Jsum = 0.0; + Jsum = 0.0; for (int ic=0;icis_mine()) { for (src_vol *sv = f.chunks[ic]->sources[D_stuff]; sv; sv = sv->next) { @@ -93,15 +93,15 @@ void dft_ldos::update(fields &f) realnum *fr = f.chunks[ic]->f[c][0]; realnum *fi = f.chunks[ic]->f[c][1]; if (fr && fi) // complex E - for (int j=0; jnpts; j++) { - const int idx = sv->index[j]; + for (size_t j=0; jnpts; j++) { + const ptrdiff_t idx = sv->index[j]; const complex A = sv->A[j]; EJ += complex(fr[idx],fi[idx]) * conj(A); Jsum += abs(A); } else if (fr) { // E is purely real - for (int j=0; jnpts; j++) { - const int idx = sv->index[j]; + for (size_t j=0; jnpts; j++) { + const ptrdiff_t idx = sv->index[j]; const complex A = sv->A[j]; EJ += double(fr[idx]) * conj(A); Jsum += abs(A); @@ -113,15 +113,15 @@ void dft_ldos::update(fields &f) realnum *fr = f.chunks[ic]->f[c][0]; realnum *fi = f.chunks[ic]->f[c][1]; if (fr && fi) // complex H - for (int j=0; jnpts; j++) { - const int idx = sv->index[j]; + for (size_t j=0; jnpts; j++) { + const ptrdiff_t idx = sv->index[j]; const complex A = sv->A[j]; HJ += complex(fr[idx],fi[idx]) * conj(A); Jsum += abs(A); } else if (fr) { // H is purely real - for (int j=0; jnpts; j++) { - const int idx = sv->index[j]; + for (size_t j=0; jnpts; j++) { + const ptrdiff_t idx = sv->index[j]; const complex A = sv->A[j]; HJ += double(fr[idx]) * conj(A); Jsum += abs(A); @@ -145,7 +145,7 @@ void dft_ldos::update(fields &f) // correct for dV factors Jsum *= sqrt(f.gv.dV(f.gv.icenter(),1).computational_volume()); - + } } diff --git a/src/energy_and_flux.cpp b/src/energy_and_flux.cpp index 28deae7e0..02f4b2e6c 100644 --- a/src/energy_and_flux.cpp +++ b/src/energy_and_flux.cpp @@ -39,7 +39,7 @@ double fields::count_volume(component c) { double fields_chunk::count_volume(component c) { double vol = 0; - for (int i=0;iis_mine()) { FOR_B_COMPONENTS(c) chunks[i]->average_with_backup(c); FOR_MAGNETIC_COMPONENTS(c) chunks[i]->average_with_backup(c); @@ -177,7 +177,7 @@ void fields::synchronize_magnetic_fields() { void fields::restore_magnetic_fields() { if (!synchronized_magnetic_fields // already restored || --synchronized_magnetic_fields) // not ready to restore yet - return; + return; for (int i=0;iis_mine()) { FOR_B_COMPONENTS(c) chunks[i]->restore_component(c); @@ -208,11 +208,11 @@ double fields::flux_in_box_wrongH(direction d, const volume &where) { if (gv.dim == Dcyl) cE[0] = Er, cE[1] = Ep, cH[0] = Hp, cH[1] = Hr; else - cE[0] = Ex, cE[1] = Ey, cH[0] = Hy, cH[1] = Hx; + cE[0] = Ex, cE[1] = Ey, cH[0] = Hy, cH[1] = Hx; break; case NO_DIRECTION: abort("cannot get flux in NO_DIRECTION"); } - + long double sum = 0.0; for (int i = 0; i < 2; ++i) { component cs[2]; @@ -252,7 +252,7 @@ flux_vol *fields::add_flux_plane(const vec &p1, const vec &p2) { max|D*E|, which requires averaging discontinuous functions. Hence, except for the special case of 2d TM polarization, the computed value tends to have a large error bar if the maximum lies on a - dielectric boundary as it commonly does. + dielectric boundary as it commonly does. A better method would be to average only continuous quantities in order to compute the fields on the Centered grid, but this @@ -278,7 +278,7 @@ double fields::electric_energy_max_in_box(const volume &where) { cs[0] = Ex; cs[1] = Ey; cs[2] = Ez; cs[3+0] = Dx; cs[3+1] = Dy; cs[3+2] = Dz; } - + return max_abs(6, cs, dot3_max_integrand, 0, where) * 0.5; } @@ -307,7 +307,7 @@ static complex dot_fx_integrand(const complex *fields, double fields::electric_sqr_weighted_integral(double (*f)(const vec &), const volume &where) { double sum = 0.0; - FOR_ELECTRIC_COMPONENTS(c) + FOR_ELECTRIC_COMPONENTS(c) if (!coordinate_mismatch(gv.dim, component_direction(c))) { component cs[2]; cs[0] = cs[1] = direction_component(Ex, component_direction(c)); @@ -320,7 +320,7 @@ double fields::electric_sqr_weighted_integral(double (*f)(const vec &), double fields::electric_energy_weighted_integral(double (*f)(const vec &), const volume &where) { double sum = 0.0; - FOR_ELECTRIC_COMPONENTS(c) + FOR_ELECTRIC_COMPONENTS(c) if (!coordinate_mismatch(gv.dim, component_direction(c))) { component cs[2]; cs[0] = direction_component(Ex, component_direction(c)); @@ -331,4 +331,3 @@ double fields::electric_energy_weighted_integral(double (*f)(const vec &), } } // namespace meep - diff --git a/src/fields.cpp b/src/fields.cpp index 24d660578..b77662212 100644 --- a/src/fields.cpp +++ b/src/fields.cpp @@ -60,7 +60,7 @@ fields::fields(structure *s, double m, double beta, beta, zero_fields_near_cylorigin); FOR_FIELD_TYPES(ft) { for (int ip=0;ip<3;ip++) { - comm_sizes[ft][ip] = new int[num_chunks*num_chunks]; + comm_sizes[ft][ip] = new size_t[num_chunks*num_chunks]; for (int i=0;ialloc_f(c_alloc)) - need_to_reconnect++; + if (chunks[i]->alloc_f(c_alloc)) + need_to_reconnect++; if (need_to_reconnect) figure_out_step_plan(); if (sum_to_all(need_to_reconnect)) chunk_connections_valid = false; diff --git a/src/h5fields.cpp b/src/h5fields.cpp index 6462fcc18..6096aa729 100644 --- a/src/h5fields.cpp +++ b/src/h5fields.cpp @@ -36,7 +36,7 @@ typedef struct { ivec min_corner, max_corner; int num_chunks; realnum *buf; - int bufsz; + size_t bufsz; int rank; direction ds[3]; @@ -48,7 +48,7 @@ typedef struct { component *cS; complex *ph; complex *fields; - int *offsets; + ptrdiff_t *offsets; int ninveps; component inveps_cs[3]; direction inveps_ds[3]; @@ -77,7 +77,7 @@ static void h5_findsize_chunkloop(fields_chunk *fc, int ichnk, component cgrid, data->min_corner = min(data->min_corner, min(isS, ieS)); data->max_corner = max(data->max_corner, max(isS, ieS)); data->num_chunks++; - int bufsz = 1; + size_t bufsz = 1; LOOP_OVER_DIRECTIONS(fc->gv.dim, d) bufsz *= (ie.in_direction(d) - is.in_direction(d)) / 2 + 1; data->bufsz = max(data->bufsz, bufsz); @@ -99,20 +99,20 @@ static void h5_output_chunkloop(fields_chunk *fc, int ichnk, component cgrid, // Find output chunk dimensions and strides, etc. int start[3]={0,0,0}, count[3]={1,1,1}; - int offset[3]={0,0,0}, stride[3]={1,1,1}; + ptrdiff_t offset[3]={0,0,0}, stride[3]={1,1,1}; ivec isS = S.transform(is, sn) + shift; ivec ieS = S.transform(ie, sn) + shift; - + // figure out what yucky_directions (in LOOP_OVER_IVECS) // correspond to what directions in the transformed vectors (in output). ivec permute(zero_ivec(fc->gv.dim)); - for (int i = 0; i < 3; ++i) + for (int i = 0; i < 3; ++i) permute.set_direction(fc->gv.yucky_direction(i), i); permute = S.transform_unshifted(permute, sn); LOOP_OVER_DIRECTIONS(permute.dim, d) permute.set_direction(d, abs(permute.in_direction(d))); - + // compute the size of the chunk to output, and its strides etc. for (int i = 0; i < data->rank; ++i) { direction d = data->ds[i]; @@ -128,20 +128,20 @@ static void h5_output_chunkloop(fields_chunk *fc, int ichnk, component cgrid, offset[j] *= stride[j]; if (offset[j]) stride[j] *= -1; } - + //-----------------------------------------------------------------------// // Compute the function to output, exactly as in fields::integrate, // except that here we store its values in a buffer instead of integrating. - int *off = data->offsets; + ptrdiff_t *off = data->offsets; component *cS = data->cS; complex *fields = data->fields, *ph = data->ph; const component *iecs = data->inveps_cs; const direction *ieds = data->inveps_ds; - int ieos[6]; + ptrdiff_t ieos[6]; const component *imcs = data->invmu_cs; const direction *imds = data->invmu_ds; - int imos[6]; + ptrdiff_t imos[6]; for (int i = 0; i < data->num_fields; ++i) { cS[i] = S.transform(data->components[i], -sn); @@ -198,8 +198,8 @@ static void h5_output_chunkloop(fields_chunk *fc, int ichnk, component cgrid, } complex fun = data->fun(fields, loc, data->fun_data_); - int idx2 = ((((offset[0] + offset[1] + offset[2]) - + loop_i1 * stride[0]) + ptrdiff_t idx2 = ((((offset[0] + offset[1] + offset[2]) + + loop_i1 * stride[0]) + loop_i2 * stride[1]) + loop_i3 * stride[2]); data->buf[idx2] = data->reim ? imag(fun) : real(fun); } @@ -225,7 +225,7 @@ void fields::output_hdf5(h5file *file, const char *dataname, data.bufsz = 0; data.reim = reim; - loop_in_chunks(h5_findsize_chunkloop, (void *) &data, + loop_in_chunks(h5_findsize_chunkloop, (void *) &data, where, Centered, true, true); file->prevent_deadlock(); // can't hold a lock since *_to_all is collective @@ -265,32 +265,32 @@ void fields::output_hdf5(h5file *file, const char *dataname, bool needs_dielectric = false; for (int i = 0; i < num_fields; ++i) if (components[i] == Dielectric) { needs_dielectric = true; break; } - if (needs_dielectric) + if (needs_dielectric) FOR_ELECTRIC_COMPONENTS(c) if (gv.has_field(c)) { if (data.ninveps == 3) abort("more than 3 field components??"); data.inveps_cs[data.ninveps] = c; data.inveps_ds[data.ninveps] = component_direction(c); ++data.ninveps; } - + /* compute inverse-mu directions for computing Permeability fields */ data.ninvmu = 0; bool needs_permeability = false; for (int i = 0; i < num_fields; ++i) if (components[i] == Permeability) { needs_permeability = true; break; } - if (needs_permeability) + if (needs_permeability) FOR_MAGNETIC_COMPONENTS(c) if (gv.has_field(c)) { if (data.ninvmu == 3) abort("more than 3 field components??"); data.invmu_cs[data.ninvmu] = c; data.invmu_ds[data.ninvmu] = component_direction(c); ++data.ninvmu; } - - data.offsets = new int[2 * num_fields]; + + data.offsets = new ptrdiff_t[2 * num_fields]; for (int i = 0; i < 2 * num_fields; ++i) data.offsets[i] = 0; - - loop_in_chunks(h5_output_chunkloop, (void *) &data, + + loop_in_chunks(h5_output_chunkloop, (void *) &data, where, Centered, true, true); delete[] data.offsets; @@ -319,17 +319,17 @@ void fields::output_hdf5(const char *dataname, file = open_h5file(dataname, h5file::WRITE, prefix, true); if (real_part_only) { - output_hdf5(file, dataname, num_fields, components, fun, fun_data_, + output_hdf5(file, dataname, num_fields, components, fun, fun_data_, 0, where, append_data, single_precision); } else { int len = strlen(dataname) + 5; char *dataname2 = new char[len]; snprintf(dataname2, len, "%s%s", dataname, ".r"); - output_hdf5(file, dataname2, num_fields, components, fun, fun_data_, + output_hdf5(file, dataname2, num_fields, components, fun, fun_data_, 0, where, append_data, single_precision); snprintf(dataname2, len, "%s%s", dataname, ".i"); - output_hdf5(file, dataname2, num_fields, components, fun, fun_data_, + output_hdf5(file, dataname2, num_fields, components, fun, fun_data_, 1, where, append_data, single_precision); delete[] dataname2; } @@ -389,7 +389,7 @@ void fields::output_hdf5(component c, bool single_precision, const char *prefix) { if (is_derived(int(c))) { - output_hdf5(derived_component(c), + output_hdf5(derived_component(c), where, file, append_data, single_precision, prefix); return; } @@ -424,7 +424,7 @@ void fields::output_hdf5(derived_component c, bool single_precision, const char *prefix) { if (!is_derived(int(c))) { - output_hdf5(component(c), + output_hdf5(component(c), where, file, append_data, single_precision, prefix); return; } @@ -441,7 +441,7 @@ void fields::output_hdf5(derived_component c, /***************************************************************************/ -const char *fields::h5file_name(const char *name, +const char *fields::h5file_name(const char *name, const char *prefix, bool timestamp) { const int buflen = 1024; @@ -453,7 +453,7 @@ const char *fields::h5file_name(const char *name, snprintf(time_step_string, 32, "-%09.2f", time()); else snprintf(time_step_string, 32, "-%09d", t); - } + } snprintf(filename, buflen, "%s/" "%s%s" "%s" "%s" ".h5", outdir, @@ -462,7 +462,7 @@ const char *fields::h5file_name(const char *name, return filename; } -h5file *fields::open_h5file(const char *name, h5file::access_mode mode, +h5file *fields::open_h5file(const char *name, h5file::access_mode mode, const char *prefix, bool timestamp) { const char *filename = h5file_name(name, prefix, timestamp); diff --git a/src/h5file.cpp b/src/h5file.cpp index 5f7b89b9d..e5083c9d4 100644 --- a/src/h5file.cpp +++ b/src/h5file.cpp @@ -369,7 +369,7 @@ realnum *h5file::read(const char *dataname, if (!parallel) { *rank = broadcast(0, *rank); broadcast(0, dims, *rank); - int N = 1; + size_t N = 1; for (int i = 0; i < *rank; ++i) N *= dims[i]; if (!am_master()) @@ -672,7 +672,7 @@ void h5file::write_chunk(int rank, start_t *start = new start_t[rank1 + append_data]; hsize_t *count = new hsize_t[rank1 + append_data]; - int count_prod = 1; + size_t count_prod = 1; for (i = 0; i < rank; ++i) { start[i] = chunk_start[i]; count[i] = chunk_dims[i]; @@ -806,7 +806,7 @@ void h5file::read_chunk(int rank, start_t *start = new start_t[rank1]; hsize_t *count = new hsize_t[rank1]; - int count_prod = 1; + size_t count_prod = 1; for (int i = 0; i < rank; ++i) { start[i] = chunk_start[i]; count[i] = chunk_dims[i]; diff --git a/src/integrate.cpp b/src/integrate.cpp index 5c6c219e8..860c0b238 100644 --- a/src/integrate.cpp +++ b/src/integrate.cpp @@ -30,7 +30,7 @@ struct integrate_data { component *cS; complex *ph; complex *fvals; - int *offsets; + ptrdiff_t *offsets; int ninveps; component inveps_cs[3]; direction inveps_ds[3]; @@ -53,17 +53,17 @@ static void integrate_chunkloop(fields_chunk *fc, int ichunk, component cgrid, { (void) ichunk; // unused integrate_data *data = (integrate_data *) data_; - int *off = data->offsets; + ptrdiff_t *off = data->offsets; component *cS = data->cS; complex *fvals = data->fvals, *ph = data->ph; complex sum = 0.0; double maxabs = 0; const component *iecs = data->inveps_cs; const direction *ieds = data->inveps_ds; - int ieos[6]; + ptrdiff_t ieos[6]; const component *imcs = data->invmu_cs; const direction *imds = data->invmu_ds; - int imos[6]; + ptrdiff_t imos[6]; for (int i = 0; i < data->num_fvals; ++i) { cS[i] = S.transform(data->components[i], -sn); @@ -120,7 +120,7 @@ static void integrate_chunkloop(fields_chunk *fc, int ichunk, component cgrid, } } - complex integrand = + complex integrand = data->integrand(fvals, loc, data->integrand_data_); maxabs = max(maxabs, abs(integrand)); sum += integrand * IVEC_LOOP_WEIGHT(s0, s1, e0, e1, dV0 + dV1 * loop_i2); @@ -164,28 +164,28 @@ complex fields::integrate(int num_fvals, const component *components, bool needs_dielectric = false; for (int i = 0; i < num_fvals; ++i) if (components[i] == Dielectric) { needs_dielectric = true; break; } - if (needs_dielectric) + if (needs_dielectric) FOR_ELECTRIC_COMPONENTS(c) if (gv.has_field(c)) { if (data.ninveps == 3) abort("more than 3 field components??"); data.inveps_cs[data.ninveps] = c; data.inveps_ds[data.ninveps] = component_direction(c); ++data.ninveps; } - + /* compute inverse-mu directions for computing Permeability fields */ data.ninvmu = 0; bool needs_permeability = false; for (int i = 0; i < num_fvals; ++i) if (components[i] == Permeability) { needs_permeability = true; break; } - if (needs_permeability) + if (needs_permeability) FOR_MAGNETIC_COMPONENTS(c) if (gv.has_field(c)) { if (data.ninvmu == 3) abort("more than 3 field components??"); data.invmu_cs[data.ninvmu] = c; data.invmu_ds[data.ninvmu] = component_direction(c); ++data.ninvmu; } - - data.offsets = new int[2 * num_fvals]; + + data.offsets = new ptrdiff_t[2 * num_fvals]; for (int i = 0; i < 2 * num_fvals; ++i) data.offsets[i] = 0; @@ -203,8 +203,8 @@ complex fields::integrate(int num_fvals, const component *components, return complex(real(data.sum), imag(data.sum)); } -typedef struct { - field_rfunction integrand; void *integrand_data; +typedef struct { + field_rfunction integrand; void *integrand_data; } rfun_wrap_data; static complex rfun_wrap(const complex *fvals, const vec &loc, void *data_) { diff --git a/src/integrate2.cpp b/src/integrate2.cpp index ccf1171b5..571f39f70 100644 --- a/src/integrate2.cpp +++ b/src/integrate2.cpp @@ -36,7 +36,7 @@ struct integrate_data { component *cS; complex *ph; complex *fvals; - int *offsets; + ptrdiff_t *offsets; int ninveps; component inveps_cs[3]; direction inveps_ds[3]; @@ -59,19 +59,19 @@ static void integrate_chunkloop(fields_chunk *fc, int ichunk, component cgrid, { (void) ichunk; // unused integrate_data *data = (integrate_data *) data_; - int *off = data->offsets; + ptrdiff_t *off = data->offsets; component *cS = data->cS; complex *fvals = data->fvals, *ph = data->ph; complex sum = 0.0; double maxabs = 0; const component *iecs = data->inveps_cs; const direction *ieds = data->inveps_ds; - int ieos[6]; + ptrdiff_t ieos[6]; const component *imcs = data->invmu_cs; const direction *imds = data->invmu_ds; int num_fvals1 = data->num_fvals; int num_fvals2 = data->num_fvals2; - int imos[6]; + ptrdiff_t imos[6]; const fields_chunk *fc2 = data->fields2->chunks[ichunk]; for (int i = 0; i < num_fvals1; ++i) { @@ -176,7 +176,7 @@ static void integrate_chunkloop(fields_chunk *fc, int ichunk, component cgrid, } } - complex integrand = + complex integrand = data->integrand(fvals, loc, data->integrand_data_); maxabs = max(maxabs, abs(integrand)); sum += integrand * IVEC_LOOP_WEIGHT(s0, s1, e0, e1, dV0 + dV1 * loop_i2); @@ -189,7 +189,7 @@ static void integrate_chunkloop(fields_chunk *fc, int ichunk, component cgrid, complex fields::integrate2(const fields &fields2, int num_fvals1, const component *components1, - int num_fvals2, + int num_fvals2, const component *components2, field_function integrand, void *integrand_data_, @@ -203,9 +203,9 @@ complex fields::integrate2(const fields &fields2, return integrate(num_fvals1, components1, integrand, integrand_data_, where, maxabs); if (num_fvals1 == 0) - return const_cast(fields2).integrate(num_fvals2, components2, + return const_cast(fields2).integrate(num_fvals2, components2, integrand, integrand_data_, where, maxabs); - + // check if components are all on the same grid: bool same_grid = true; for (int i = 1; i < num_fvals1; ++i) @@ -245,14 +245,14 @@ complex fields::integrate2(const fields &fields2, if (components1[i] == Dielectric) { needs_dielectric = true; break; } if (!needs_dielectric) for (int i = 0; i < num_fvals2; ++i) if (components2[i] == Dielectric) { needs_dielectric = true; break; } - if (needs_dielectric) + if (needs_dielectric) FOR_ELECTRIC_COMPONENTS(c) if (gv.has_field(c)) { if (data.ninveps == 3) abort("more than 3 field components??"); data.inveps_cs[data.ninveps] = c; data.inveps_ds[data.ninveps] = component_direction(c); ++data.ninveps; } - + /* compute inverse-mu directions for computing Permeability fields */ data.ninvmu = 0; bool needs_permeability = false; @@ -260,15 +260,15 @@ complex fields::integrate2(const fields &fields2, if (components1[i] == Permeability) { needs_permeability = true; break; } if (!needs_permeability) for (int i = 0; i < num_fvals2; ++i) if (components2[i] == Permeability) { needs_permeability = true; break; } - if (needs_permeability) + if (needs_permeability) FOR_MAGNETIC_COMPONENTS(c) if (gv.has_field(c)) { if (data.ninvmu == 3) abort("more than 3 field components??"); data.invmu_cs[data.ninvmu] = c; data.invmu_ds[data.ninvmu] = component_direction(c); ++data.ninvmu; } - - data.offsets = new int[2 * (num_fvals1 + num_fvals2)]; + + data.offsets = new ptrdiff_t[2 * (num_fvals1 + num_fvals2)]; for (int i = 0; i < 2 * (num_fvals1 + num_fvals2); ++i) data.offsets[i] = 0; @@ -286,8 +286,8 @@ complex fields::integrate2(const fields &fields2, return complex(real(data.sum), imag(data.sum)); } -typedef struct { - field_rfunction integrand; void *integrand_data; +typedef struct { + field_rfunction integrand; void *integrand_data; } rfun_wrap_data; static complex rfun_wrap(const complex *fields, const vec &loc, void *data_) { @@ -306,7 +306,7 @@ double fields::integrate2(const fields &fields2, rfun_wrap_data data; data.integrand = integrand; data.integrand_data = integrand_data_; - return real(integrate2(fields2, num_fvals1, components1, + return real(integrate2(fields2, num_fvals1, components1, num_fvals2, components2, rfun_wrap, &data, where, maxabs)); diff --git a/src/meep.hpp b/src/meep.hpp index d3ce73be6..2b1fd7811 100644 --- a/src/meep.hpp +++ b/src/meep.hpp @@ -18,6 +18,7 @@ #define MEEP_H #include +#include #include #include "meep/vec.hpp" @@ -151,7 +152,7 @@ class susceptibility { return 0; } susceptibility *next; - int ntot; + size_t ntot; realnum *sigma[NUM_FIELD_COMPONENTS][5]; /* trivial_sigma[c][d] is true only if *none* of the processes has a @@ -649,7 +650,7 @@ class structure { void add_to_effort_volumes(const grid_volume &new_effort_volume, double extra_effort); void choose_chunkdivision(const grid_volume &gv, int num_chunks, - const boundary_region &br, const symmetry &s); + const boundary_region &br, const symmetry &s); void check_chunks(); void changing_chunks(); }; @@ -843,7 +844,7 @@ class dft_chunk { component c; // component to DFT (possibly transformed by symmetry) - int N; // number of spatial points (on epsilon grid) + size_t N; // number of spatial points (on epsilon grid) std::complex *dft; // N x Nomega array of DFT values. class dft_chunk *next_in_chunk; // per-fields_chunk list of DFT chunks @@ -872,7 +873,7 @@ class dft_chunk { // cache of exp(iwt) * scale, of length Nomega std::complex *dft_phase; - int avg1, avg2; // index offsets for average to get epsilon grid + ptrdiff_t avg1, avg2; // index offsets for average to get epsilon grid int vc; // component descriptor from the original volume }; @@ -1049,9 +1050,9 @@ class fields_chunk { dft_chunk *dft_chunks; realnum **zeroes[NUM_FIELD_TYPES]; // Holds pointers to metal points. - int num_zeroes[NUM_FIELD_TYPES]; + size_t num_zeroes[NUM_FIELD_TYPES]; realnum **connections[NUM_FIELD_TYPES][CONNECT_COPY+1][Outgoing+1]; - int num_connections[NUM_FIELD_TYPES][CONNECT_COPY+1][Outgoing+1]; + size_t num_connections[NUM_FIELD_TYPES][CONNECT_COPY+1][Outgoing+1]; std::complex *connection_phases[NUM_FIELD_TYPES]; int npol[NUM_FIELD_TYPES]; // only E_stuff and H_stuff are used @@ -1159,7 +1160,7 @@ class fields_chunk { void initialize_with_nth_te(int n, double kz); void initialize_with_nth_tm(int n, double kz); // boundaries.cpp - void alloc_extra_connections(field_type, connect_phase, in_or_out, int); + void alloc_extra_connections(field_type, connect_phase, in_or_out, size_t); // dft.cpp void update_dfts(double timeE, double timeH); @@ -1200,9 +1201,9 @@ class fields { realnum **comm_blocks[NUM_FIELD_TYPES]; // This is the same size as each comm_blocks array, and store the sizes // of the comm blocks themselves for each connection-phase type - int *comm_sizes[NUM_FIELD_TYPES][CONNECT_COPY+1]; - int comm_size_tot(int f, int pair) const { - int sum = 0; for (int ip=0; ip<3; ++ip) sum+=comm_sizes[f][ip][pair]; + size_t *comm_sizes[NUM_FIELD_TYPES][CONNECT_COPY+1]; + size_t comm_size_tot(int f, int pair) const { + size_t sum = 0; for (int ip=0; ip<3; ++ip) sum+=comm_sizes[f][ip][pair]; return sum; } @@ -1377,8 +1378,8 @@ class fields { void initialize_field(component, std::complex f(const vec &)); void initialize_with_nth_te(int n); void initialize_with_nth_tm(int n); - void initialize_with_n_te(int n); - void initialize_with_n_tm(int n); + void initialize_with_n_te(int ntot); + void initialize_with_n_tm(int ntot); int phase_in_material(const structure *s, double time); int is_phasing(); diff --git a/src/meep/mympi.hpp b/src/meep/mympi.hpp index 2924b57a6..9cc9f79fe 100644 --- a/src/meep/mympi.hpp +++ b/src/meep/mympi.hpp @@ -19,6 +19,7 @@ #define MEEP_MY_MPI_H #include +#include namespace meep { @@ -79,6 +80,8 @@ std::complex sum_to_all(std::complex in); std::complex sum_to_all(std::complex in); int sum_to_all(int); int partial_sum_to_all(int in); +size_t sum_to_all(size_t); +size_t partial_sum_to_all(size_t in); bool or_to_all(bool in); void or_to_all(const int *in, int *out, int size); bool and_to_all(bool in); diff --git a/src/meep/vec.hpp b/src/meep/vec.hpp index 1e1e91779..3194eaae7 100644 --- a/src/meep/vec.hpp +++ b/src/meep/vec.hpp @@ -20,6 +20,7 @@ #define MEEP_VEC_H #include +#include namespace meep { @@ -119,7 +120,7 @@ component first_field_component(field_type ft); // loop over indices idx from is to ie (inclusive) in gv #define LOOP_OVER_IVECS(gv, is, ie, idx) \ - for (int loop_is1 = (is).yucky_val(0), \ + for (ptrdiff_t loop_is1 = (is).yucky_val(0), \ loop_is2 = (is).yucky_val(1), \ loop_is3 = (is).yucky_val(2), \ loop_n1 = ((ie).yucky_val(0) - loop_is1) / 2 + 1, \ @@ -136,7 +137,7 @@ component first_field_component(field_type ft); + (is - (gv).little_corner()).yucky_val(2) / 2 * loop_s3,\ loop_i1 = 0; loop_i1 < loop_n1; loop_i1++) \ for (int loop_i2 = 0; loop_i2 < loop_n2; loop_i2++) \ - for (int idx = idx0 + loop_i1*loop_s1 + loop_i2*loop_s2, \ + for (ptrdiff_t idx = idx0 + loop_i1*loop_s1 + loop_i2*loop_s2, \ loop_i3 = 0; loop_i3 < loop_n3; loop_i3++, idx+=loop_s3) #define LOOP_OVER_VOL(gv, c, idx) \ @@ -172,7 +173,7 @@ component first_field_component(field_type ft); // loop over indices idx from is to ie (inclusive) in gv #define S1LOOP_OVER_IVECS(gv, is, ie, idx) \ - for (int loop_is1 = (is).yucky_val(0), \ + for (ptrdiff_t loop_is1 = (is).yucky_val(0), \ loop_is2 = (is).yucky_val(1), \ loop_is3 = (is).yucky_val(2), \ loop_n1 = ((ie).yucky_val(0) - loop_is1) / 2 + 1, \ @@ -188,7 +189,7 @@ component first_field_component(field_type ft); + (is - (gv).little_corner()).yucky_val(2) / 2 * loop_s3,\ loop_i1 = 0; loop_i1 < loop_n1; loop_i1++) \ for (int loop_i2 = 0; loop_i2 < loop_n2; loop_i2++) _Pragma("ivdep") \ - for (int idx = idx0 + loop_i1*loop_s1 + loop_i2*loop_s2, \ + for (ptrdiff_t idx = idx0 + loop_i1*loop_s1 + loop_i2*loop_s2, \ loop_i3 = 0; loop_i3 < loop_n3; loop_i3++, idx++) #define S1LOOP_OVER_VOL(gv, c, idx) \ @@ -247,7 +248,7 @@ inline bool has_field_direction(ndim dim, direction d) { return false; } -// true if d is polar while dim is cartesian, or vice versa +// true if d is polar while dim is cartesian, or vice versa inline bool coordinate_mismatch(ndim dim, direction d) { return (d != NO_DIRECTION && ((dim >= D1 && dim <= D3 && d != X && d != Y && d != Z) || @@ -280,7 +281,7 @@ const char *direction_name(direction); const char *dimension_name(ndim); inline int component_index(component c) { - switch (c) { + switch (c) { case Ex: case Hx: case Dx: case Bx: return 0; case Ey: case Hy: case Dy: case By: return 1; case Ez: case Hz: case Dz: case Bz: return 2; @@ -295,7 +296,7 @@ inline int component_index(component c) { direction component_direction(int c); int direction_component(int c, direction d); inline direction component_direction(component c) { - switch (c) { + switch (c) { case Ex: case Hx: case Dx: case Bx: return X; case Ey: case Hy: case Dy: case By: return Y; case Ez: case Hz: case Dz: case Bz: return Z; @@ -609,10 +610,10 @@ class ivec { int in_direction(direction d) const { return t[d]; }; void set_direction(direction d, int val) { t[d] = val; }; - ivec round_up_to_even(void) const { + ivec round_up_to_even(void) const { ivec result(dim); - LOOP_OVER_DIRECTIONS(dim, d) - result.t[d] = t[d] + (t[d] >= 0 ? t[d] : -t[d]) % 2; + LOOP_OVER_DIRECTIONS(dim, d) + result.t[d] = t[d] + (t[d] >= 0 ? t[d] : -t[d]) % 2; return result; } @@ -659,7 +660,7 @@ class volume { double in_direction_min(direction d) const { return min_corner.in_direction(d); }; double in_direction_max(direction d) const { return max_corner.in_direction(d); }; double in_direction(direction d) const { return in_direction_max(d) - in_direction_min(d); } - double computational_volume() const; + double computational_volume() const; double integral_volume() const; double full_volume() const; vec center() const { return (min_corner + max_corner) * 0.5; } @@ -722,7 +723,7 @@ class grid_volume { double a, inva /* = 1/a */; void print() const; - int stride(direction d) const { return the_stride[d]; }; + ptrdiff_t stride(direction d) const { return the_stride[d]; }; int num_direction(direction d) const { return num[((int) d) % 3]; }; @@ -747,16 +748,16 @@ class grid_volume { vec dy() const; vec dz() const; - int ntot() const { return the_ntot; } - int nowned_min() const { int n = 1; LOOP_OVER_DIRECTIONS(dim,d) n *= num_direction(d); return n; } - int nowned(component c) const; + size_t ntot() const { return the_ntot; } + size_t nowned_min() const { size_t n = 1; LOOP_OVER_DIRECTIONS(dim,d) n *= (size_t)(num_direction(d)); return n; } + size_t nowned(component c) const; vec operator[](const ivec &p) const { return p*(0.5*inva); }; - int index(component, const ivec &) const; + ptrdiff_t index(component, const ivec &) const; ivec round_vec(const vec &) const; - void interpolate(component, const vec &, int indices[8], double weights[8]) const; + void interpolate(component, const vec &, ptrdiff_t indices[8], double weights[8]) const; void interpolate(component, const vec &, ivec locs[8], double weights[8]) const; - volume dV(component c, int index) const; + volume dV(component c, ptrdiff_t index) const; volume dV(const ivec &, double diameter = 1.0) const; bool intersect_with(const grid_volume &vol_in, grid_volume *intersection = NULL, grid_volume *others = NULL, int *num_others = NULL) const; double rmin() const; @@ -769,21 +770,21 @@ class grid_volume { double zmax() const; vec center() const; ivec icenter() const; - vec loc(component, int index) const; - vec loc_at_resolution(int index, double res) const; - int ntot_at_resolution(double res) const; - ivec iloc(component, int index) const; + vec loc(component, ptrdiff_t index) const; + vec loc_at_resolution(ptrdiff_t index, double res) const; + size_t ntot_at_resolution(double res) const; + ivec iloc(component, ptrdiff_t index) const; - int yee_index(component c) const { - int idx = 0; + ptrdiff_t yee_index(component c) const { + ptrdiff_t idx = 0; LOOP_OVER_DIRECTIONS(dim,d) idx += (1-iyee_shift(c).in_direction(d))*stride(d); return idx; } vec yee_shift(component) const; component eps_component() const; - void yee2cent_offsets(component c, int &offset1, int &offset2) const; - void cent2yee_offsets(component c, int &offset1, int &offset2) const; + void yee2cent_offsets(component c, ptrdiff_t &offset1, ptrdiff_t &offset2) const; + void cent2yee_offsets(component c, ptrdiff_t &offset1, ptrdiff_t &offset2) const; double boundary_location(boundary_side, direction) const; ivec big_corner() const; @@ -813,7 +814,7 @@ class grid_volume { friend grid_volume vol2d(double xsize, double ysize, double a); friend grid_volume vol3d(double xsize, double ysize, double zsize, double a); - grid_volume split(int num, int which) const; + grid_volume split(size_t num, int which) const; grid_volume split_by_effort(int num, int which, int Ngv = 0, const grid_volume *v = NULL, double *effort = NULL) const; grid_volume split_at_fraction(bool want_high, int numer) const; grid_volume halve(direction d) const; @@ -828,7 +829,7 @@ class grid_volume { ivec iyee_shift(component c) const { ivec out = zero_ivec(dim); LOOP_OVER_DIRECTIONS(dim,d) - if (c == Dielectric || c == Permeability || + if (c == Dielectric || c == Permeability || ((is_electric(c) || is_D(c)) && d == component_direction(c)) || ((is_magnetic(c) || is_B(c)) && d != component_direction(c))) out.set_direction(d,1); @@ -858,8 +859,8 @@ class grid_volume { void set_strides(); void num_changed() { update_ntot(); set_strides(); } int num[3]; - int the_stride[5]; - int the_ntot; + ptrdiff_t the_stride[5]; + size_t the_ntot; }; class volume_list; @@ -926,7 +927,7 @@ class volume_list { p = p->next; } } - + volume v; int c; // component or derived component associated with v (e.g. for flux) std::complex weight; diff --git a/src/meep_internals.hpp b/src/meep_internals.hpp index 87c514397..5053621a3 100644 --- a/src/meep_internals.hpp +++ b/src/meep_internals.hpp @@ -25,6 +25,7 @@ namespace meep { inline double max(double a, double b) { return (a > b) ? a : b; } inline double min(double a, double b) { return (a < b) ? a : b; } inline int max(int a, int b) { return (a > b) ? a : b; } +inline size_t max(size_t a, size_t b) { return (a > b) ? a : b; } inline int min(int a, int b) { return (a < b) ? a : b; } static inline int abs(int a) { return a < 0 ? -a : a; } static inline double abs(double a) { return fabs(a); } @@ -46,18 +47,18 @@ inline int rmin_bulk(int m) { class src_vol { public: - src_vol(component cc, src_time *st, int n, int *ind, std::complex *amps); + src_vol(component cc, src_time *st, size_t n, ptrdiff_t *ind, std::complex *amps); src_vol(const src_vol &sv); ~src_vol() { delete next; delete[] index; delete[] A;} src_time *t; - int *index; // list of locations of sources in grid (indices) - int npts; // number of points in list + ptrdiff_t *index; // list of locations of sources in grid (indices) + size_t npts; // number of points in list component c; // field component the source applies to std::complex *A; // list of amplitudes - std::complex dipole(int j) { return A[j] * t->dipole(); } - std::complex current(int j) { return A[j] * t->current(); } + std::complex dipole(size_t j) { return A[j] * t->dipole(); } + std::complex current(size_t j) { return A[j] * t->current(); } void update(double time, double dt) { t->update(time, dt); } bool operator==(const src_vol &sv) const { @@ -78,7 +79,7 @@ symmetry r_to_minus_r_symmetry(int m); // functions in step_generic.cpp: void step_curl(realnum *f, component c, const realnum *g1, const realnum *g2, - int s1, int s2, // strides for g1/g2 shift + ptrdiff_t s1, ptrdiff_t s2, // strides for g1/g2 shift const grid_volume &gv, double dtdx, direction dsig, const double *sig, const double *kap, const double *siginv, realnum *fu, direction dsigu, const double *sigu, const double *kapu, const double *siginvu, @@ -88,7 +89,7 @@ void step_curl(realnum *f, component c, const realnum *g1, const realnum *g2, void step_update_EDHB(realnum *f, component fc, const grid_volume &gv, const realnum *g, const realnum *g1, const realnum *g2, const realnum *u, const realnum *u1, const realnum *u2, - int s, int s1, int s2, + ptrdiff_t s, ptrdiff_t s1, ptrdiff_t s2, const realnum *chi2, const realnum *chi3, realnum *fw, direction dsigw, const double *sigw, const double *kapw); @@ -101,7 +102,7 @@ void step_beta(realnum *f, component c, const realnum *g, // functions in step_generic_stride1.cpp, generated from step_generic.cpp: void step_curl_stride1(realnum *f, component c, const realnum *g1, const realnum *g2, - int s1, int s2, // strides for g1/g2 shift + ptrdiff_t s1, ptrdiff_t s2, // strides for g1/g2 shift const grid_volume &gv, double dtdx, direction dsig, const double *sig, const double *kap, const double *siginv, realnum *fu, direction dsigu, const double *sigu, const double *kapu, const double *siginvu, @@ -111,7 +112,7 @@ void step_curl_stride1(realnum *f, component c, const realnum *g1, const realnum void step_update_EDHB_stride1(realnum *f, component fc, const grid_volume &gv, const realnum *g, const realnum *g1, const realnum *g2, const realnum *u, const realnum *u1, const realnum *u2, - int s, int s1, int s2, + ptrdiff_t s, ptrdiff_t s1, ptrdiff_t s2, const realnum *chi2, const realnum *chi3, realnum *fw, direction dsigw, const double *sigw, const double *kapw); diff --git a/src/monitor.cpp b/src/monitor.cpp index 5f4fc81e5..ed7ea200e 100644 --- a/src/monitor.cpp +++ b/src/monitor.cpp @@ -46,7 +46,7 @@ monitor_point::~monitor_point() { if (next) delete next; } -inline complex getcm(const realnum * const f[2], int i) { +inline complex getcm(const realnum * const f[2], size_t i) { return complex(f[0][i],f[1][i]); } @@ -182,7 +182,7 @@ complex fields_chunk::get_field(component c, const vec &loc) const { complex res = 0.0; for (int i = 0; i < 8 && w[i] != 0.0; ++i) { if (!gv.contains(ilocs[i])) - abort("invalid loc in chunk get_field, weight = %g", w[i]); + abort("invalid loc in chunk get_field, weight = %g", w[i]); if (f[c][0] && f[c][1]) res += getcm(f[c], gv.index(c, ilocs[i])) * w[i]; else if (f[c][0]) res += f[c][0][gv.index(c,ilocs[i])] * w[i]; } @@ -199,11 +199,11 @@ double fields::get_chi1inv(component c, direction d, for (int sn=0;sngv.contains(S.transform(iloc,sn))) { - signed_direction ds = S.transform(d,sn); + signed_direction ds = S.transform(d,sn); return chunks[i]->get_chi1inv(S.transform(c,sn), ds.d, - S.transform(iloc,sn)) - * (ds.flipped ^ S.transform(component_direction(c),sn).flipped - ? -1 : 1); + S.transform(iloc,sn)) + * (ds.flipped ^ S.transform(component_direction(c),sn).flipped + ? -1 : 1); } return 0.0; } @@ -271,7 +271,7 @@ double structure::get_chi1inv(component c, direction d, double structure_chunk::get_chi1inv(component c, direction d, const ivec &iloc) const { double res = 0.0; - if (is_mine()) res = chi1inv[c][d] ? chi1inv[c][d][gv.index(c, iloc)] + if (is_mine()) res = chi1inv[c][d] ? chi1inv[c][d][gv.index(c, iloc)] : (d == component_direction(c) ? 1.0 : 0); return broadcast(n_proc(), res); } @@ -323,7 +323,7 @@ monitor_point *fields::get_new_point(const vec &loc, monitor_point *the_list) co complex monitor_point::get_component(component w) { return f[w]; } - + double monitor_point::poynting_in_direction(direction d) { direction d1 = cycle_direction(loc.dim, d, 1); direction d2 = cycle_direction(loc.dim, d, 2); diff --git a/src/multilevel-atom.cpp b/src/multilevel-atom.cpp index b994ca356..9326849c4 100644 --- a/src/multilevel-atom.cpp +++ b/src/multilevel-atom.cpp @@ -79,7 +79,7 @@ multilevel_susceptibility::~multilevel_susceptibility() { #if MEEP_SINGLE # define DGETRF F77_FUNC(sgetrf,SGETRF) # define DGETRI F77_FUNC(sgetri,SGETRI) -#else +#else # define DGETRF F77_FUNC(dgetrf,DGETRF) # define DGETRI F77_FUNC(dgetri,DGETRI) #endif @@ -95,7 +95,7 @@ static bool invert(realnum *S, int p) DGETRF(&p, &p, S, &p, ipiv, &info); if (info < 0) abort("invalid argument %d in DGETRF", -info); if (info > 0) { delete[] ipiv; return false; } // singular - + int lwork = -1; realnum work1; DGETRI(&p, S, &p, ipiv, &work1, &lwork, &info); @@ -117,7 +117,7 @@ static bool invert(realnum *S, int p) typedef realnum *realnumP; typedef struct { size_t sz_data; - int ntot; + size_t ntot; realnum *GammaInv; // inv(1 + Gamma * dt / 2) realnumP *P[NUM_FIELD_COMPONENTS][2]; // P[c][cmp][transition][i] realnumP *P_prev[NUM_FIELD_COMPONENTS][2]; @@ -129,7 +129,7 @@ typedef struct { void *multilevel_susceptibility::new_internal_data( realnum *W[NUM_FIELD_COMPONENTS][2], const grid_volume &gv) const { - int num = 0; // number of P components + size_t num = 0; // number of P components FOR_COMPONENTS(c) DOCMP2 if (needs_P(c, cmp, W)) num += 2 * gv.ntot(); size_t sz = sizeof(multilevel_data) + sizeof(realnum) * (L*L + L + gv.ntot()*L + num*T - 1); @@ -146,7 +146,7 @@ void multilevel_susceptibility::init_internal_data( size_t sz_data = d->sz_data; memset(d, 0, sz_data); d->sz_data = sz_data; - int ntot = d->ntot = gv.ntot(); + size_t ntot = d->ntot = gv.ntot(); /* d->data points to a big block of data that holds GammaInv, P, P_prev, Ntmp, and N. We also initialize a bunch of convenience @@ -158,7 +158,7 @@ void multilevel_susceptibility::init_internal_data( for (int i = 0; i < L; ++i) for (int j = 0; j < L; ++j) d->GammaInv[i*L + j] = (i == j) + Gamma[i*L + j] * dt/2; - if (!invert(d->GammaInv, L)) + if (!invert(d->GammaInv, L)) abort("multilevel_susceptibility: I + Gamma*dt/2 matrix singular"); realnum *P = d->data + L*L; @@ -178,7 +178,7 @@ void multilevel_susceptibility::init_internal_data( d->N = P + L; // the last L*ntot block of the data // initial populations - for (int i = 0; i < ntot; ++i) + for (size_t i = 0; i < ntot; ++i) for (int l = 0; l < L; ++l) d->N[i*L + l] = N0[l]; } @@ -199,7 +199,7 @@ void *multilevel_susceptibility::copy_internal_data(void *data) const { if (!d) return 0; multilevel_data *dnew = (multilevel_data *) malloc(d->sz_data); memcpy(dnew, d, d->sz_data); - int ntot = d->ntot; + size_t ntot = d->ntot; dnew->GammaInv = dnew->data; realnum *P = dnew->data + L*L; realnum *P_prev = P + ntot; @@ -225,8 +225,8 @@ int multilevel_susceptibility::num_cinternal_notowned_needed(component c, } realnum *multilevel_susceptibility::cinternal_notowned_ptr( - int inotowned, component c, int cmp, - int n, + int inotowned, component c, int cmp, + int n, void *P_internal_data) const { multilevel_data *d = (multilevel_data *) P_internal_data; if (!d->P[c][cmp] || inotowned < 0 || inotowned >= T) // never true @@ -236,14 +236,14 @@ realnum *multilevel_susceptibility::cinternal_notowned_ptr( void multilevel_susceptibility::update_P (realnum *W[NUM_FIELD_COMPONENTS][2], - realnum *W_prev[NUM_FIELD_COMPONENTS][2], + realnum *W_prev[NUM_FIELD_COMPONENTS][2], double dt, const grid_volume &gv, void *P_internal_data) const { multilevel_data *d = (multilevel_data *) P_internal_data; double dt2 = 0.5 * dt; // field directions and offsets for E * dP dot product. component cdot[3] = {Dielectric,Dielectric,Dielectric}; - int o1[3], o2[3]; + ptrdiff_t o1[3], o2[3]; int idot = 0; FOR_COMPONENTS(c) if (d->P[c][0]) { if (idot == 3) abort("bug in meep: too many polarization components"); @@ -256,7 +256,7 @@ void multilevel_susceptibility::update_P realnum *Ntmp = d->Ntmp; LOOP_OVER_VOL_OWNED(gv, Centered, i) { realnum *N = d->N + i*L; // N at current point, to update - + // Ntmp = (I - Gamma * dt/2) * N for (int l1 = 0; l1 < L; ++l1) { Ntmp[l1] = (1.0 - Gamma[l1*L + l1]*dt2) * N[l1]; // diagonal term @@ -326,11 +326,11 @@ void multilevel_susceptibility::update_P if (w && s) { realnum *p = d->P[c][cmp][t], *pp = d->P_prev[c][cmp][t]; - int o1, o2; + ptrdiff_t o1, o2; gv.cent2yee_offsets(c, o1, o2); o1 *= L; o2 *= L; const realnum *N = d->N; - + // directions/strides for offdiagonal terms, similar to update_eh const direction d = component_direction(c); direction d1 = cycle_direction(gv.dim, d, 1); @@ -341,7 +341,7 @@ void multilevel_susceptibility::update_P component c2 = direction_component(c, d2); const realnum *w2 = W[c2][cmp]; const realnum *s2 = w2 ? sigma[c][d2] : NULL; - + if (s1 || s2) { abort("nondiagonal saturable gain is not yet supported"); } @@ -352,8 +352,8 @@ void multilevel_susceptibility::update_P // dNi is population inversion for this transition double dNi = -0.25 * (Ni[lp]+Ni[lp+o1]+Ni[lp+o2]+Ni[lp+o1+o2] -Ni[lm]-Ni[lm+o1]-Ni[lm+o2]-Ni[lm+o1+o2]); - p[i] = gamma1inv * (pcur * (2 - omega0dtsqr) - - gamma1 * pp[i] + p[i] = gamma1inv * (pcur * (2 - omega0dtsqr) + - gamma1 * pp[i] + omega0dtsqr * (st * s[i] * w[i])) * dNi; pp[i] = pcur; } @@ -364,18 +364,18 @@ void multilevel_susceptibility::update_P } void multilevel_susceptibility::subtract_P(field_type ft, - realnum *f_minus_p[NUM_FIELD_COMPONENTS][2], + realnum *f_minus_p[NUM_FIELD_COMPONENTS][2], void *P_internal_data) const { multilevel_data *d = (multilevel_data *) P_internal_data; field_type ft2 = ft == E_stuff ? D_stuff : B_stuff; // for sources etc. - int ntot = d->ntot; - for (int t = 0; t < T; ++t) { + size_t ntot = d->ntot; + for (int t = 0; t < T; ++t) { FOR_FT_COMPONENTS(ft, ec) DOCMP2 if (d->P[ec][cmp]) { component dc = field_type_component(ft2, ec); if (f_minus_p[dc][cmp]) { realnum *p = d->P[ec][cmp][t]; realnum *fmp = f_minus_p[dc][cmp]; - for (int i = 0; i < ntot; ++i) fmp[i] -= p[i]; + for (size_t i = 0; i < ntot; ++i) fmp[i] -= p[i]; } } } diff --git a/src/mympi.cpp b/src/mympi.cpp index 9c58b55cb..6874f053c 100644 --- a/src/mympi.cpp +++ b/src/mympi.cpp @@ -368,6 +368,22 @@ int partial_sum_to_all(int in) { return out; } +size_t sum_to_all(size_t in) { + size_t out = in; +#ifdef HAVE_MPI + MPI_Allreduce(&in,&out,1, sizeof(size_t)==4?MPI_UNSIGNED:MPI_UNSIGNED_LONG_LONG, MPI_SUM,mycomm); +#endif + return out; +} + +size_t partial_sum_to_all(size_t in) { + size_t out = in; +#ifdef HAVE_MPI + MPI_Scan(&in,&out,1, sizeof(size_t)==4?MPI_UNSIGNED:MPI_UNSIGNED_LONG_LONG, MPI_SUM,mycomm); +#endif + return out; +} + complex sum_to_all(complex in) { complex out = in; #ifdef HAVE_MPI @@ -484,18 +500,20 @@ void fields::boundary_communications(field_type ft) { for (int j=0;j 0) { - if (chunks[j]->is_mine() && !chunks[i]->is_mine()) - MPI_Isend(comm_blocks[ft][pair], comm_size, - MPI_REALNUM, chunks[i]->n_proc(), - tagto[chunks[i]->n_proc()]++, - mycomm, &reqs[reqnum++]); - if (chunks[i]->is_mine() && !chunks[j]->is_mine()) - MPI_Irecv(comm_blocks[ft][pair], comm_size, - MPI_REALNUM, chunks[j]->n_proc(), - tagto[chunks[j]->n_proc()]++, - mycomm, &reqs[reqnum++]); + if (comm_size > 2147483647) // MPI uses int for size to send/recv + abort("communications size too big for MPI"); + if (chunks[j]->is_mine() && !chunks[i]->is_mine()) + MPI_Isend(comm_blocks[ft][pair], (int) comm_size, + MPI_REALNUM, chunks[i]->n_proc(), + tagto[chunks[i]->n_proc()]++, + mycomm, &reqs[reqnum++]); + if (chunks[i]->is_mine() && !chunks[j]->is_mine()) + MPI_Irecv(comm_blocks[ft][pair], (int) comm_size, + MPI_REALNUM, chunks[j]->n_proc(), + tagto[chunks[j]->n_proc()]++, + mycomm, &reqs[reqnum++]); } } delete[] tagto; diff --git a/src/near2far.cpp b/src/near2far.cpp index 896f14dcc..54f5e9cb0 100644 --- a/src/near2far.cpp +++ b/src/near2far.cpp @@ -55,7 +55,7 @@ void dft_near2far::remove() } } -void dft_near2far::operator-=(const dft_near2far &st) { +void dft_near2far::operator-=(const dft_near2far &st) { if (F && st.F) *F -= *st.F; } @@ -91,10 +91,10 @@ typedef void (*greenfunc)(std::complex *EH, const vec &x, /* Given the field f0 correponding to current-source component c0 at x0, compute the E/H fields EH[6] (6 components) at x for a frequency - freq in the homogeneous 3d medium eps and mu. + freq in the homogeneous 3d medium eps and mu. Adapted from code by M. T. Homer Reid in his SCUFF-EM package - (file scuff-em/src/libs/libIncField/PointSource.cc), which is GPL v2+. */ + (file scuff-em/src/libs/libIncField/PointSource.cc), which is GPL v2+. */ void green3d(std::complex *EH, const vec &x, double freq, double eps, double mu, const vec &x0, component c0, std::complex f0) @@ -124,12 +124,12 @@ void green3d(std::complex *EH, const vec &x, rhat.x() * p.z(), rhat.x() * p.y() - rhat.y() * p.x()); - + /* compute the various scalar quantities in the point source formulae */ std::complex term1 = 1.0 - 1.0/ikr + 1.0/ikr2; std::complex term2 = (-1.0 + 3.0/ikr - 3.0/ikr2) * pdotrhat; std::complex term3 = (1.0 - 1.0/ikr); - + /* now assemble everything based on source type */ if (is_electric(c0)) { expfac /= eps; @@ -137,7 +137,7 @@ void green3d(std::complex *EH, const vec &x, EH[0] = expfac * (term1*p.x() + term2*rhat.x()); EH[1] = expfac * (term1*p.y() + term2*rhat.y()); EH[2] = expfac * (term1*p.z() + term2*rhat.z()); - + EH[3] = expfac*term3*rhatcrossp.x() / Z; EH[4] = expfac*term3*rhatcrossp.y() / Z; EH[5] = expfac*term3*rhatcrossp.z() / Z; @@ -148,7 +148,7 @@ void green3d(std::complex *EH, const vec &x, EH[0] = -expfac*term3*rhatcrossp.x() * Z; EH[1] = -expfac*term3*rhatcrossp.y() * Z; EH[2] = -expfac*term3*rhatcrossp.z() * Z; - + EH[3] = expfac * (term1*p.x() + term2*rhat.x()); EH[4] = expfac * (term1*p.y() + term2*rhat.y()); EH[5] = expfac * (term1*p.z() + term2*rhat.z()); @@ -231,7 +231,7 @@ void green2d(std::complex *EH, const vec &x, else /* (is_magnetic(c0)) */ { // Hxy source EH[0] = EH[1] = 0.0; EH[2] = rhatcrossp * ikH1; - + EH[3] = -(rhat.x() * (pdotrhat/r * 0.25/Z)) * H1 + (rhat.y() * (rhatcrossp * omega*eps * 0.125)) * (H0 - H2); EH[4] = -(rhat.y() * (pdotrhat/r * 0.25/Z)) * H1 - @@ -250,14 +250,14 @@ void dft_near2far::farfield_lowlevel(std::complex *EH, const vec &x) std::complex EH6[6]; for (int i = 0; i < 6 * Nfreq; ++i) EH[i] = 0.0; - + for (dft_chunk *f = F; f; f = f->next_in_dft) { assert(Nfreq == f->Nomega); component c0 = component(f->vc); /* equivalent source component */ vec rshift(f->shift * (0.5*f->fc->gv.inva)); - int idx_dft = 0; + size_t idx_dft = 0; LOOP_OVER_IVECS(f->fc->gv, f->is, f->ie, idx) { IVEC_LOOP_LOC(f->fc->gv, x0); x0 = f->S.transform(x0, f->sn) + rshift; @@ -287,7 +287,8 @@ void dft_near2far::save_farfields(const char *fname, const char *prefix, int dims[4] = {1,1,1,1}; double dx[3] = {0,0,0}; direction dirs[3] = {X,Y,Z}; - int rank = 0, N = 1; + int rank = 0; + size_t N = 1; LOOP_OVER_DIRECTIONS(where.dim, d) { dims[rank] = int(floor(where.in_direction(d) * resolution)); if (dims[rank] <= 1) { @@ -313,13 +314,13 @@ void dft_near2far::save_farfields(const char *fname, const char *prefix, for (int i0 = 0; i0 < dims[0]; ++i0) { x.set_direction(dirs[0], where.in_direction_min(dirs[0]) + i0*dx[0]); for (int i1 = 0; i1 < dims[1]; ++i1) { - x.set_direction(dirs[1], + x.set_direction(dirs[1], where.in_direction_min(dirs[1]) + i1*dx[1]); for (int i2 = 0; i2 < dims[2]; ++i2) { - x.set_direction(dirs[2], + x.set_direction(dirs[2], where.in_direction_min(dirs[2]) + i2*dx[2]); farfield_lowlevel(EH1, x); - int idx = (i0 * dims[1] + i1) * dims[2] + i2; + ptrdiff_t idx = (i0 * dims[1] + i1) * dims[2] + i2; for (int i = 0; i < Nfreq; ++i) for (int k = 0; k < 6; ++k) { EH_[((k * 2 + 0) * N + idx) * Nfreq + i] = @@ -354,7 +355,7 @@ void dft_near2far::save_farfields(const char *fname, const char *prefix, char dataname[128]; for (int k = 0; k < 6; ++k) for (int reim = 0; reim < 2; ++reim) { - snprintf(dataname, 128, "%s.%c", + snprintf(dataname, 128, "%s.%c", component_name(c[k]), "ri"[reim]); ff.write(dataname, rank, dims, EH + (k*2 + reim)*N*Nfreq); } @@ -369,7 +370,7 @@ dft_near2far fields::add_dft_near2far(const volume_list *where, double freq_min, double freq_max, int Nfreq){ dft_chunk *F = 0; /* E and H chunks*/ double eps = 0, mu = 0; - + for (const volume_list *w = where; w; w = w->next) { direction nd = component_direction(w->c); if (nd == NO_DIRECTION) nd = normal_direction(w->v); @@ -382,8 +383,8 @@ dft_near2far fields::add_dft_near2far(const volume_list *where, abort("dft_near2far requires surfaces in a homogeneous medium"); eps = weps; mu = wmu; - - /* two transverse directions to normal (in cyclic order to get + + /* two transverse directions to normal (in cyclic order to get correct sign s below) */ switch (nd) { case X: fd[0] = Y; fd[1] = Z; break; @@ -407,7 +408,7 @@ dft_near2far fields::add_dft_near2far(const volume_list *where, component c0 = direction_component(i == 0 ? Hx : Ex, fd[1-j]); double s = j == 0 ? 1 : -1; /* sign of n x c */ if (is_electric(c)) s = -s; - + F = add_dft(c, w->v, freq_min, freq_max, Nfreq, true, s*w->weight, F, false, 1.0, false, c0); } diff --git a/src/sources.cpp b/src/sources.cpp index 00cf959a6..f6bbdff1d 100644 --- a/src/sources.cpp +++ b/src/sources.cpp @@ -156,7 +156,7 @@ bool custom_src_time::is_equal(const src_time &t) const /*********************************************************************/ -src_vol::src_vol(component cc, src_time *st, int n, int *ind, complex *amps) { +src_vol::src_vol(component cc, src_time *st, size_t n, ptrdiff_t *ind, complex *amps) { c = cc; if (is_D(c)) c = direction_component(Ex, component_direction(c)); if (is_B(c)) c = direction_component(Hx, component_direction(c)); @@ -170,9 +170,9 @@ src_vol::src_vol(const src_vol &sv) { c = sv.c; t = sv.t; npts = sv.npts; - index = new int[npts]; + index = new ptrdiff_t[npts]; A = new complex[npts]; - for (int j=0; jindex[j] != index[j]) abort("Different indices\n"); others->A[j] += A[j]; @@ -275,10 +275,10 @@ static void src_vol_chunkloop(fields_chunk *fc, int ichunk, component c, (void) dV0; (void) dV1; // grid_volume weighting is included in data->amp (void) ichunk; - int npts = 1; + size_t npts = 1; LOOP_OVER_DIRECTIONS(is.dim, d) npts *= (ie.in_direction(d) - is.in_direction(d)) / 2 + 1; - int *index_array = new int[npts]; + ptrdiff_t *index_array = new ptrdiff_t[npts]; complex *amps_array = new complex[npts]; complex amp = data->amp * conj(shift_phase); @@ -286,7 +286,7 @@ static void src_vol_chunkloop(fields_chunk *fc, int ichunk, component c, direction cd = component_direction(c); double inva = fc->gv.inva; - int idx_vol = 0; + size_t idx_vol = 0; LOOP_OVER_IVECS(fc->gv, is, ie, idx) { IVEC_LOOP_ILOC(fc->gv, iloc); if (!fc->gv.owns(iloc)) continue; @@ -308,7 +308,7 @@ static void src_vol_chunkloop(fields_chunk *fc, int ichunk, component c, } if (idx_vol > npts) - abort("add_volume_source: computed wrong npts (%d vs. %d)", npts, idx_vol); + abort("add_volume_source: computed wrong npts (%zd vs. %zd)", npts, idx_vol); src_vol *tmp = new src_vol(c, data->src, idx_vol, index_array, amps_array); field_type ft = is_magnetic(c) ? B_stuff : D_stuff; diff --git a/src/step.cpp b/src/step.cpp index efae55e95..b35c38592 100644 --- a/src/step.cpp +++ b/src/step.cpp @@ -36,7 +36,7 @@ void fields::step() { if (synchronized_magnetic_fields) { synchronized_magnetic_fields = 1; // reset synchronization count restore_magnetic_fields(); - } + } am_now_working_on(Stepping); @@ -45,8 +45,8 @@ void fields::step() { last_step_output_t = t; } if (!quiet && wall_time() > last_step_output_wall_time + MIN_OUTPUT_TIME) { - master_printf("on time step %d (time=%g), %g s/step\n", t, time(), - (wall_time() - last_step_output_wall_time) / + master_printf("on time step %d (time=%g), %g s/step\n", t, time(), + (wall_time() - last_step_output_wall_time) / (t - last_step_output_t)); if (save_synchronized_magnetic_fields) master_printf(" (doing expensive timestepping of synched fields)\n"); @@ -55,7 +55,7 @@ void fields::step() { } phase_material(); - + // update cached conductivity-inverse array, if needed for (int i=0;is->update_condinv(); @@ -115,8 +115,8 @@ void fields::phase_material() { if (is_phasing()) { for (int i=0;iis_mine()) { - chunks[i]->phase_material(phasein_time); - changed = changed || chunks[i]->new_s; + chunks[i]->phase_material(phasein_time); + changed = changed || chunks[i]->new_s; } phasein_time--; } @@ -157,27 +157,27 @@ void fields::step_boundaries(field_type ft) { if (chunks[j]->is_mine()) { int wh[3] = {0,0,0}; for (int i=0;iconnections[ft][ip][Outgoing][wh[ip]++]); - n0 += comm_sizes[ft][ip][pair]; - } + const int pair = j+i*num_chunks; + size_t n0 = 0; + for (int ip=0;ip<3;ip++) { + for (size_t n=0;nconnections[ft][ip][Outgoing][wh[ip]++]); + n0 += comm_sizes[ft][ip][pair]; + } } } boundary_communications(ft); - + // Finally, copy incoming data to the fields themselves, multiplying phases: for (int i=0;iis_mine()) { int wh[3] = {0,0,0}; for (int j=0;jconnection_phases[ft][wh[ip]/2]); const double phi = imag(chunks[i]->connection_phases[ft][wh[ip]/2]); *(chunks[i]->connections[ft][ip][Incoming][wh[ip]]) = @@ -185,16 +185,16 @@ void fields::step_boundaries(field_type ft) { *(chunks[i]->connections[ft][ip][Incoming][wh[ip]+1]) = phr*comm_blocks[ft][pair][n+1] + phi*comm_blocks[ft][pair][n]; } - int n0 = comm_sizes[ft][ip][pair]; - ip = CONNECT_NEGATE; - for (int n = 0; n < comm_sizes[ft][ip][pair]; ++n) + size_t n0 = comm_sizes[ft][ip][pair]; + ip = CONNECT_NEGATE; + for (size_t n = 0; n < comm_sizes[ft][ip][pair]; ++n) *(chunks[i]->connections[ft][ip][Incoming][wh[ip]++]) - = -comm_blocks[ft][pair][n0 + n]; - n0 += comm_sizes[ft][ip][pair]; - ip = CONNECT_COPY; - for (int n = 0; n < comm_sizes[ft][ip][pair]; ++n) + = -comm_blocks[ft][pair][n0 + n]; + n0 += comm_sizes[ft][ip][pair]; + ip = CONNECT_COPY; + for (size_t n = 0; n < comm_sizes[ft][ip][pair]; ++n) *(chunks[i]->connections[ft][ip][Incoming][wh[ip]++]) - = comm_blocks[ft][pair][n0 + n]; + = comm_blocks[ft][pair][n0 + n]; } } @@ -210,23 +210,23 @@ void fields::step_source(field_type ft, bool including_integrated) { void fields_chunk::step_source(field_type ft, bool including_integrated) { if (doing_solve_cw && !including_integrated) return; for (src_vol *sv = sources[ft]; sv; sv = sv->next) { - component c = direction_component(first_field_component(ft), + component c = direction_component(first_field_component(ft), component_direction(sv->c)); const realnum *cndinv = s->condinv[c][component_direction(sv->c)]; if ((including_integrated || !sv->t->is_integrated) && f[c][0] && ((ft == D_stuff && is_electric(sv->c)) || (ft == B_stuff && is_magnetic(sv->c)))) { if (cndinv) - for (int j=0; jnpts; j++) { - const int i = sv->index[j]; + for (size_t j=0; jnpts; j++) { + const ptrdiff_t i = sv->index[j]; const complex A = sv->current(j) * dt * double(cndinv[i]); f[c][0][i] -= real(A); if (!is_real) f[c][1][i] -= imag(A); } else - for (int j=0; jnpts; j++) { + for (size_t j=0; jnpts; j++) { const complex A = sv->current(j) * dt; - const int i = sv->index[j]; + const ptrdiff_t i = sv->index[j]; f[c][0][i] -= real(A); if (!is_real) f[c][1][i] -= imag(A); } diff --git a/src/step_db.cpp b/src/step_db.cpp index 5697f1b8f..20f3938bd 100644 --- a/src/step_db.cpp +++ b/src/step_db.cpp @@ -33,7 +33,7 @@ void fields::step_db(field_type ft) { for (int i=0;iis_mine()) if (chunks[i]->step_db(ft)) - chunk_connections_valid = false; + chunk_connections_valid = false; /* synchronize to avoid deadlocks in connect_the_chunks */ chunk_connections_valid = and_to_all(chunk_connections_valid); @@ -57,8 +57,8 @@ bool fields_chunk::step_db(field_type ft) { const direction dsig = s->sigsize[dsig0] > 1 ? dsig0 : NO_DIRECTION; const direction dsigu0 = cycle_direction(gv.dim,d_c,2); const direction dsigu = s->sigsize[dsigu0] > 1 ? dsigu0 : NO_DIRECTION; - int stride_p = have_p?gv.stride(d_deriv_p):0; - int stride_m = have_m?gv.stride(d_deriv_m):0; + ptrdiff_t stride_p = have_p?gv.stride(d_deriv_p):0; + ptrdiff_t stride_m = have_m?gv.stride(d_deriv_m):0; realnum *f_p = have_p?f[c_p][cmp]:NULL; realnum *f_m = have_m?f[c_m][cmp]:NULL; realnum *the_f = f[cc][cmp]; @@ -73,7 +73,7 @@ bool fields_chunk::step_db(field_type ft) { memcpy(f_u[cc][cmp], the_f, gv.ntot() * sizeof(realnum)); allocated_u = true; } - + if (ft == D_stuff) { // strides are opposite sign for H curl stride_p = -stride_p; stride_m = -stride_m; @@ -87,7 +87,7 @@ bool fields_chunk::step_db(field_type ft) { break; // curl works normally for phi component case Z: { f_m = NULL; // im/r Fr term will be handled separately - + /* Here we do a somewhat cool hack: the update of the z component gives a 1/r d(r Fp)/dr term, rather than just the derivative dg/dr expected in step_curl. @@ -99,14 +99,14 @@ bool fields_chunk::step_db(field_type ft) { the derivative and integral are replaced by differences and sums, but you get the idea). */ if (!f_rderiv_int) f_rderiv_int = new realnum[gv.ntot()]; - double ir0 = gv.origin_r() * gv.a + double ir0 = gv.origin_r() * gv.a + 0.5 * gv.iyee_shift(c_p).in_direction(R); for (int iz = 0; iz <= gv.nz(); ++iz) f_rderiv_int[iz] = 0; int sr = gv.nz() + 1; for (int ir = 1; ir <= gv.nr(); ++ir) { double rinv = 1.0 / ((ir+ir0)-0.5); for (int iz = 0; iz <= gv.nz(); ++iz) { - int idx = ir*sr + iz; + ptrdiff_t idx = ir*sr + iz; f_rderiv_int[idx] = f_rderiv_int[idx - sr] + rinv * (f_p[idx] * (ir+ir0) - f_p[idx - sr] * ((ir-1)+ir0)); } @@ -116,11 +116,11 @@ bool fields_chunk::step_db(field_type ft) { } default: abort("bug - non-cylindrical field component in Dcyl"); } - - STEP_CURL(the_f, cc, f_p, f_m, stride_p, stride_m, gv, Courant, - dsig, s->sig[dsig], s->kap[dsig], s->siginv[dsig], - f_u[cc][cmp], dsigu, s->sig[dsigu], s->kap[dsigu], s->siginv[dsigu], - dt, + + STEP_CURL(the_f, cc, f_p, f_m, stride_p, stride_m, gv, Courant, + dsig, s->sig[dsig], s->kap[dsig], s->siginv[dsig], + f_u[cc][cmp], dsigu, s->sig[dsigu], s->kap[dsigu], s->siginv[dsigu], + dt, s->conductivity[cc][d_c], s->condinv[cc][d_c],f_cond[cc][cmp]); } @@ -137,7 +137,7 @@ bool fields_chunk::step_db(field_type ft) { at -beta.) The nice thing about this is that most calculations of flux, energy, etcetera, are insensitive to this implicit "i" factor. For complex fields, we implement i*beta directly. */ - if (gv.dim == D2 && beta != 0) DOCMP for (direction d_c=X; d_c <= Y; + if (gv.dim == D2 && beta != 0) DOCMP for (direction d_c=X; d_c <= Y; d_c = direction(d_c + 1)) { component cc = direction_component(first_field_component(ft), d_c); component c_g = direction_component(ft == D_stuff ? Hx : Ex, @@ -150,10 +150,10 @@ bool fields_chunk::step_db(field_type ft) { const direction dsigu = s->sigsize[dsigu0] > 1 ? dsigu0 : NO_DIRECTION; const double betadt = 2 * pi * beta * dt * (d_c == X ? +1 : -1) * (f[c_g][1-cmp] ? (ft == D_stuff ? -1 : +1) * (2*cmp-1) : 1); - STEP_BETA(the_f, cc, g, gv, betadt, - dsig, s->siginv[dsig], - f_u[cc][cmp], dsigu, s->siginv[dsigu], - s->condinv[cc][d_c], f_cond[cc][cmp]); + STEP_BETA(the_f, cc, g, gv, betadt, + dsig, s->siginv[dsig], + f_u[cc][cmp], dsigu, s->siginv[dsigu], + s->condinv[cc][d_c], f_cond[cc][cmp]); } // in cylindrical coordinates, we now have to add the i*m/r terms... */ @@ -172,110 +172,110 @@ bool fields_chunk::step_db(field_type ft) { const direction dsigu = cycle_direction(gv.dim,d_c,2); const double *siginvu = s->sigsize[dsigu] > 1 ? s->siginv[dsigu] : 0; const int dku = gv.iyee_shift(cc).in_direction(dsigu); - const double the_m = - m * (1-2*cmp) * (1-2*(ft==B_stuff)) * (1-2*(d_c==R)) * Courant; - const double ir0 = gv.origin_r() * gv.a - + 0.5 * gv.iyee_shift(cc).in_direction(R); + const double the_m = + m * (1-2*cmp) * (1-2*(ft==B_stuff)) * (1-2*(d_c==R)) * Courant; + const double ir0 = gv.origin_r() * gv.a + + 0.5 * gv.iyee_shift(cc).in_direction(R); int sr = gv.nz() + 1; // 8 special cases of the same loop (sigh): if (siginv) { // PML in f update - if (siginvu) { // PML + fu - if (cndinv) // PML + fu + conductivity - //////////////////// MOST GENERAL CASE ////////////////////// - for (int ir = ir0 == 0; ir <= gv.nr(); ++ir) { - double rinv = the_m / (ir+ir0); - for (int iz = 0; iz <= gv.nz(); ++iz) { - int idx = ir*sr + iz; - int k = dk + 2*(dsig==Z ? iz : ir); - int ku = dku + 2*(dsigu==Z ? iz : ir); - double df, dfcnd = rinv * g[idx] * cndinv[idx]; - fcnd[idx] += dfcnd; - fu[idx] += (df = dfcnd * siginv[k]); - the_f[idx] += siginvu[ku] * df; - } - } - ///////////////////////////////////////////////////////////// - else // PML + fu - conductivity - for (int ir = ir0 == 0; ir <= gv.nr(); ++ir) { - double rinv = the_m / (ir+ir0); - for (int iz = 0; iz <= gv.nz(); ++iz) { - int idx = ir*sr + iz; - int k = dk + 2*(dsig==Z ? iz : ir); - int ku = dku + 2*(dsigu==Z ? iz : ir); - double df, dfcnd = rinv * g[idx]; - fu[idx] += (df = dfcnd * siginv[k]); - the_f[idx] += siginvu[ku] * df; - } - } - } - else { // PML - fu - if (cndinv) // PML - fu + conductivity - for (int ir = ir0 == 0; ir <= gv.nr(); ++ir) { - double rinv = the_m / (ir+ir0); - for (int iz = 0; iz <= gv.nz(); ++iz) { - int idx = ir*sr + iz; - int k = dk + 2*(dsig==Z ? iz : ir); - double dfcnd = rinv * g[idx] * cndinv[idx]; - fcnd[idx] += dfcnd; - the_f[idx] += dfcnd * siginv[k]; - } - } - else // PML - fu - conductivity - for (int ir = ir0 == 0; ir <= gv.nr(); ++ir) { - double rinv = the_m / (ir+ir0); - for (int iz = 0; iz <= gv.nz(); ++iz) { - int idx = ir*sr + iz; - int k = dk + 2*(dsig==Z ? iz : ir); - double dfcnd = rinv * g[idx]; - the_f[idx] += dfcnd * siginv[k]; - } - } - } + if (siginvu) { // PML + fu + if (cndinv) // PML + fu + conductivity + //////////////////// MOST GENERAL CASE ////////////////////// + for (int ir = ir0 == 0; ir <= gv.nr(); ++ir) { + double rinv = the_m / (ir+ir0); + for (int iz = 0; iz <= gv.nz(); ++iz) { + ptrdiff_t idx = ir*sr + iz; + int k = dk + 2*(dsig==Z ? iz : ir); + int ku = dku + 2*(dsigu==Z ? iz : ir); + double df, dfcnd = rinv * g[idx] * cndinv[idx]; + fcnd[idx] += dfcnd; + fu[idx] += (df = dfcnd * siginv[k]); + the_f[idx] += siginvu[ku] * df; + } + } + ///////////////////////////////////////////////////////////// + else // PML + fu - conductivity + for (int ir = ir0 == 0; ir <= gv.nr(); ++ir) { + double rinv = the_m / (ir+ir0); + for (int iz = 0; iz <= gv.nz(); ++iz) { + ptrdiff_t idx = ir*sr + iz; + int k = dk + 2*(dsig==Z ? iz : ir); + int ku = dku + 2*(dsigu==Z ? iz : ir); + double df, dfcnd = rinv * g[idx]; + fu[idx] += (df = dfcnd * siginv[k]); + the_f[idx] += siginvu[ku] * df; + } + } + } + else { // PML - fu + if (cndinv) // PML - fu + conductivity + for (int ir = ir0 == 0; ir <= gv.nr(); ++ir) { + double rinv = the_m / (ir+ir0); + for (int iz = 0; iz <= gv.nz(); ++iz) { + ptrdiff_t idx = ir*sr + iz; + int k = dk + 2*(dsig==Z ? iz : ir); + double dfcnd = rinv * g[idx] * cndinv[idx]; + fcnd[idx] += dfcnd; + the_f[idx] += dfcnd * siginv[k]; + } + } + else // PML - fu - conductivity + for (int ir = ir0 == 0; ir <= gv.nr(); ++ir) { + double rinv = the_m / (ir+ir0); + for (int iz = 0; iz <= gv.nz(); ++iz) { + ptrdiff_t idx = ir*sr + iz; + int k = dk + 2*(dsig==Z ? iz : ir); + double dfcnd = rinv * g[idx]; + the_f[idx] += dfcnd * siginv[k]; + } + } + } } else { // no PML in f update - if (siginvu) { // no PML + fu - if (cndinv) // no PML + fu + conductivity - for (int ir = ir0 == 0; ir <= gv.nr(); ++ir) { - double rinv = the_m / (ir+ir0); - for (int iz = 0; iz <= gv.nz(); ++iz) { - int idx = ir*sr + iz; - int ku = dku + 2*(dsigu==Z ? iz : ir); - double df = rinv * g[idx] * cndinv[idx]; - fu[idx] += df; - the_f[idx] += siginvu[ku] * df; - } - } - else // no PML + fu - conductivity - for (int ir = ir0 == 0; ir <= gv.nr(); ++ir) { - double rinv = the_m / (ir+ir0); - for (int iz = 0; iz <= gv.nz(); ++iz) { - int idx = ir*sr + iz; - int ku = dku + 2*(dsigu==Z ? iz : ir); - double df = rinv * g[idx]; - fu[idx] += df; - the_f[idx] += siginvu[ku] * df; - } - } - } - else { // no PML - fu - if (cndinv) // no PML - fu + conductivity - for (int ir = ir0 == 0; ir <= gv.nr(); ++ir) { - double rinv = the_m / (ir+ir0); - for (int iz = 0; iz <= gv.nz(); ++iz) { - int idx = ir*sr + iz; - the_f[idx] += rinv * g[idx] * cndinv[idx]; - } - } - else // no PML - fu - conductivity - for (int ir = ir0 == 0; ir <= gv.nr(); ++ir) { - double rinv = the_m / (ir+ir0); - for (int iz = 0; iz <= gv.nz(); ++iz) { - int idx = ir*sr + iz; - the_f[idx] += rinv * g[idx]; - } - } - } + if (siginvu) { // no PML + fu + if (cndinv) // no PML + fu + conductivity + for (int ir = ir0 == 0; ir <= gv.nr(); ++ir) { + double rinv = the_m / (ir+ir0); + for (int iz = 0; iz <= gv.nz(); ++iz) { + ptrdiff_t idx = ir*sr + iz; + int ku = dku + 2*(dsigu==Z ? iz : ir); + double df = rinv * g[idx] * cndinv[idx]; + fu[idx] += df; + the_f[idx] += siginvu[ku] * df; + } + } + else // no PML + fu - conductivity + for (int ir = ir0 == 0; ir <= gv.nr(); ++ir) { + double rinv = the_m / (ir+ir0); + for (int iz = 0; iz <= gv.nz(); ++iz) { + ptrdiff_t idx = ir*sr + iz; + int ku = dku + 2*(dsigu==Z ? iz : ir); + double df = rinv * g[idx]; + fu[idx] += df; + the_f[idx] += siginvu[ku] * df; + } + } + } + else { // no PML - fu + if (cndinv) // no PML - fu + conductivity + for (int ir = ir0 == 0; ir <= gv.nr(); ++ir) { + double rinv = the_m / (ir+ir0); + for (int iz = 0; iz <= gv.nz(); ++iz) { + ptrdiff_t idx = ir*sr + iz; + the_f[idx] += rinv * g[idx] * cndinv[idx]; + } + } + else // no PML - fu - conductivity + for (int ir = ir0 == 0; ir <= gv.nr(); ++ir) { + double rinv = the_m / (ir+ir0); + for (int iz = 0; iz <= gv.nz(); ++iz) { + ptrdiff_t idx = ir*sr + iz; + the_f[idx] += rinv * g[idx]; + } + } + } } } } @@ -299,11 +299,11 @@ bool fields_chunk::step_db(field_type ft) { realnum *fu = siginvu && f_u[Dz][cmp] ? f[Dz][cmp] : 0; realnum *the_f = fu ? f_u[Dz][cmp] : f[Dz][cmp]; for (int iz = 0; iz < nz; ++iz) { - // Note: old code (prior to Meep 0.2) was missing factor of 4?? - double df, dfcnd = g[iz] * (Courant * 4) * (cndinv ? cndinv[iz] : 1); - if (fcnd) fcnd[iz] += dfcnd; - the_f[iz] += (df = dfcnd * (siginv ? siginv[dk + 2*(dsig==Z)*iz] : 1)); - if (fu) fu[iz] += siginvu[dku + 2*(dsigu==Z)*iz] * df; + // Note: old code (prior to Meep 0.2) was missing factor of 4?? + double df, dfcnd = g[iz] * (Courant * 4) * (cndinv ? cndinv[iz] : 1); + if (fcnd) fcnd[iz] += dfcnd; + the_f[iz] += (df = dfcnd * (siginv ? siginv[dk + 2*(dsig==Z)*iz] : 1)); + if (fu) fu[iz] += siginvu[dku + 2*(dsigu==Z)*iz] * df; } ZERO_Z(f[Dp][cmp]); if (f_cond[Dp][cmp]) ZERO_Z(f_cond[Dp][cmp]); @@ -322,7 +322,7 @@ bool fields_chunk::step_db(field_type ft) { if (!f[cc][cmp]) continue; const realnum *f_p = f[ft == D_stuff ? Hr : Ep][cmp]; const realnum *f_m = ft == D_stuff ? f[Hz][cmp] - : (f[Ez][1-cmp] + (nz+1)); + : (f[Ez][1-cmp] + (nz+1)); const realnum *cndinv = s->condinv[cc][d_c]; realnum *fcnd = f_cond[cc][cmp]; const direction dsig = cycle_direction(gv.dim,d_c,1); @@ -337,77 +337,77 @@ bool fields_chunk::step_db(field_type ft) { double f_m_mult = ft == D_stuff ? 2 : (1-2*cmp); for (int iz = (ft == D_stuff); iz < nz + (ft == D_stuff); ++iz) { - double df; - double dfcnd = (sd*Courant) * (f_p[iz]-f_p[iz-sd] - f_m_mult*f_m[iz]) - * (cndinv ? cndinv[iz] : 1); - if (fcnd) fcnd[iz] += dfcnd; - the_f[iz] += (df = dfcnd * (siginv ? siginv[dk + 2*(dsig==Z)*iz] : 1)); - if (fu) fu[iz] += siginvu[dku + 2*(dsigu==Z)*iz] * df; + double df; + double dfcnd = (sd*Courant) * (f_p[iz]-f_p[iz-sd] - f_m_mult*f_m[iz]) + * (cndinv ? cndinv[iz] : 1); + if (fcnd) fcnd[iz] += dfcnd; + the_f[iz] += (df = dfcnd * (siginv ? siginv[dk + 2*(dsig==Z)*iz] : 1)); + if (fu) fu[iz] += siginvu[dku + 2*(dsigu==Z)*iz] * df; } if (ft == D_stuff) { - ZERO_Z(f[Dz][cmp]); - if (f_cond[Dz][cmp]) ZERO_Z(f_cond[Dz][cmp]); - if (f_u[Dz][cmp]) ZERO_Z(f_u[Dz][cmp]); + ZERO_Z(f[Dz][cmp]); + if (f_cond[Dz][cmp]) ZERO_Z(f_cond[Dz][cmp]); + if (f_u[Dz][cmp]) ZERO_Z(f_u[Dz][cmp]); } } else if (m != 0) { // m != {0,+1,-1} if (zero_fields_near_cylorigin) { /* default behavior */ - /* I seem to recall David telling me that this was for numerical - stability of some sort - the larger m is, the farther from - the origin we need to be before we can use nonzero fields - ... note that this is a fixed number of pixels for a given m, - so it should still converge. Still, this is weird... - - Update: experimentally, this seems to indeed be important - for stability. Setting these fields to zero, it seems to be - stable with a Courant number < 0.62 or so for all m. Without - this, it becomes unstable unless we set the Courant number to - about 1 / (|m| + 0.5) or less. - - Cons: setting fields near the origin to identically zero is - somewhat unexpected for users, and probably spoils 2nd-order - accuracy, and may not fix all stability issues anyway (based - on anecdotal evidence from Alex M. of having to reduce Courant - for large m). */ - double rmax = fabs(m) - int(gv.origin_r()*gv.a+0.5); - if (ft == D_stuff) - for (int r = 0; r <= gv.nr() && r < rmax; r++) { - const int ir = r*(nz+1); - ZERO_Z(f[Dp][cmp]+ir); - ZERO_Z(f[Dz][cmp]+ir); - if (f_cond[Dp][cmp]) ZERO_Z(f_cond[Dp][cmp]+ir); - if (f_cond[Dz][cmp]) ZERO_Z(f_cond[Dz][cmp]+ir); - if (f_u[Dp][cmp]) ZERO_Z(f_u[Dp][cmp]+ir); - if (f_u[Dz][cmp]) ZERO_Z(f_u[Dz][cmp]+ir); - } - else - for (int r = 0; r <= gv.nr() && r < rmax; r++) { - const int ir = r*(nz+1); - ZERO_Z(f[Br][cmp]+ir); - if (f_cond[Br][cmp]) ZERO_Z(f_cond[Br][cmp]+ir); - if (f_u[Br][cmp]) ZERO_Z(f_u[Br][cmp]+ir); - } + /* I seem to recall David telling me that this was for numerical + stability of some sort - the larger m is, the farther from + the origin we need to be before we can use nonzero fields + ... note that this is a fixed number of pixels for a given m, + so it should still converge. Still, this is weird... + + Update: experimentally, this seems to indeed be important + for stability. Setting these fields to zero, it seems to be + stable with a Courant number < 0.62 or so for all m. Without + this, it becomes unstable unless we set the Courant number to + about 1 / (|m| + 0.5) or less. + + Cons: setting fields near the origin to identically zero is + somewhat unexpected for users, and probably spoils 2nd-order + accuracy, and may not fix all stability issues anyway (based + on anecdotal evidence from Alex M. of having to reduce Courant + for large m). */ + double rmax = fabs(m) - int(gv.origin_r()*gv.a+0.5); + if (ft == D_stuff) + for (int r = 0; r <= gv.nr() && r < rmax; r++) { + const int ir = r*(nz+1); + ZERO_Z(f[Dp][cmp]+ir); + ZERO_Z(f[Dz][cmp]+ir); + if (f_cond[Dp][cmp]) ZERO_Z(f_cond[Dp][cmp]+ir); + if (f_cond[Dz][cmp]) ZERO_Z(f_cond[Dz][cmp]+ir); + if (f_u[Dp][cmp]) ZERO_Z(f_u[Dp][cmp]+ir); + if (f_u[Dz][cmp]) ZERO_Z(f_u[Dz][cmp]+ir); + } + else + for (int r = 0; r <= gv.nr() && r < rmax; r++) { + const int ir = r*(nz+1); + ZERO_Z(f[Br][cmp]+ir); + if (f_cond[Br][cmp]) ZERO_Z(f_cond[Br][cmp]+ir); + if (f_u[Br][cmp]) ZERO_Z(f_u[Br][cmp]+ir); + } } else { - /* Without David's hack: just set boundary conditions at r=0. - This seems to be unstable unless we make the Courant number - around 1 / (|m| + 0.5) or smaller. Pros: probably maintains - 2nd-order accuracy, is more sane for r near zero. Cons: - 1/(|m|+0.5) is purely empirical (no theory yet), and I'm not - sure how universal it is. Makes higher m's more expensive. */ - if (ft == D_stuff) { - ZERO_Z(f[Dp][cmp]); - ZERO_Z(f[Dz][cmp]); - if (f_cond[Dp][cmp]) ZERO_Z(f_cond[Dp][cmp]); - if (f_cond[Dz][cmp]) ZERO_Z(f_cond[Dz][cmp]); - if (f_u[Dp][cmp]) ZERO_Z(f_u[Dp][cmp]); - if (f_u[Dz][cmp]) ZERO_Z(f_u[Dz][cmp]); - } - else { - ZERO_Z(f[Br][cmp]); - if (f_cond[Br][cmp]) ZERO_Z(f_cond[Br][cmp]); - if (f_u[Br][cmp]) ZERO_Z(f_u[Br][cmp]); - } + /* Without David's hack: just set boundary conditions at r=0. + This seems to be unstable unless we make the Courant number + around 1 / (|m| + 0.5) or smaller. Pros: probably maintains + 2nd-order accuracy, is more sane for r near zero. Cons: + 1/(|m|+0.5) is purely empirical (no theory yet), and I'm not + sure how universal it is. Makes higher m's more expensive. */ + if (ft == D_stuff) { + ZERO_Z(f[Dp][cmp]); + ZERO_Z(f[Dz][cmp]); + if (f_cond[Dp][cmp]) ZERO_Z(f_cond[Dp][cmp]); + if (f_cond[Dz][cmp]) ZERO_Z(f_cond[Dz][cmp]); + if (f_u[Dp][cmp]) ZERO_Z(f_u[Dp][cmp]); + if (f_u[Dz][cmp]) ZERO_Z(f_u[Dz][cmp]); + } + else { + ZERO_Z(f[Br][cmp]); + if (f_cond[Br][cmp]) ZERO_Z(f_cond[Br][cmp]); + if (f_u[Br][cmp]) ZERO_Z(f_u[Br][cmp]); + } } } } diff --git a/src/step_generic.cpp b/src/step_generic.cpp index 2ae767bd8..8c304fdbc 100644 --- a/src/step_generic.cpp +++ b/src/step_generic.cpp @@ -49,9 +49,9 @@ namespace meep { of PML, cndinv should contain 1 / (1 + dt (cnd + sigma)/2). fcnd is an auxiliary field used ONLY when we simultaneously have - PML (dsig != NO_DIR) and conductivity, in which case fcnd solves + PML (dsig != NO_DIR) and conductivity, in which case fcnd solves dfcnd/dt = curl g - cnd*fcnd - and f satisfies + and f satisfies df/dt = dfcnd/dt - sigma*f. fu is another auxiliary field used only in PML (dsigu != NO_DIR), @@ -60,88 +60,88 @@ namespace meep { and fu replaces f in the equations above (fu += dt curl g etcetera). */ void step_curl(RPR f, component c, const RPR g1, const RPR g2, - int s1, int s2, // strides for g1/g2 shift + ptrdiff_t s1, ptrdiff_t s2, // strides for g1/g2 shift const grid_volume &gv, double dtdx, direction dsig, const DPR sig, const DPR kap, const DPR siginv, RPR fu, direction dsigu, const DPR sigu, const DPR kapu, const DPR siginvu, - double dt, + double dt, const RPR cnd, const RPR cndinv, RPR fcnd) { if (!g1) { // swap g1 and g2 SWAP(const RPR, g1, g2); - SWAP(int, s1, s2); + SWAP(ptrdiff_t, s1, s2); dtdx = -dtdx; // need to flip derivative sign } /* The following are a bunch of special cases of the "MOST GENERAL CASE" loop below. We make copies of the loop for each special case in order to keep the innermost loop efficient. This is especially - important because the non-PML cases are actually more common. - (The "right" way to do this is by partial evaluation of the + important because the non-PML cases are actually more common. + (The "right" way to do this is by partial evaluation of the most general case, but that would require a code generator.) */ if (dsig == NO_DIRECTION) { // no PML in f update if (dsigu == NO_DIRECTION) { // no fu update if (cnd) { - double dt2 = dt * 0.5; - if (g2) { - LOOP_OVER_VOL_OWNED0(gv, c, i) - f[i] = ((1 - dt2 * cnd[i]) * f[i] - - dtdx * (g1[i+s1] - g1[i] + g2[i] - g2[i+s2])) * cndinv[i]; - } - else { - LOOP_OVER_VOL_OWNED0(gv, c, i) - f[i] = ((1 - dt2 * cnd[i]) * f[i] - - dtdx * (g1[i+s1] - g1[i])) * cndinv[i]; - } + double dt2 = dt * 0.5; + if (g2) { + LOOP_OVER_VOL_OWNED0(gv, c, i) + f[i] = ((1 - dt2 * cnd[i]) * f[i] - + dtdx * (g1[i+s1] - g1[i] + g2[i] - g2[i+s2])) * cndinv[i]; + } + else { + LOOP_OVER_VOL_OWNED0(gv, c, i) + f[i] = ((1 - dt2 * cnd[i]) * f[i] + - dtdx * (g1[i+s1] - g1[i])) * cndinv[i]; + } } else { // no conductivity - if (g2) { - LOOP_OVER_VOL_OWNED0(gv, c, i) - f[i] -= dtdx * (g1[i+s1] - g1[i] + g2[i] - g2[i+s2]); - } - else { - LOOP_OVER_VOL_OWNED0(gv, c, i) - f[i] -= dtdx * (g1[i+s1] - g1[i]); - } + if (g2) { + LOOP_OVER_VOL_OWNED0(gv, c, i) + f[i] -= dtdx * (g1[i+s1] - g1[i] + g2[i] - g2[i+s2]); + } + else { + LOOP_OVER_VOL_OWNED0(gv, c, i) + f[i] -= dtdx * (g1[i+s1] - g1[i]); + } } } else { // fu update, no PML in f update KSTRIDE_DEF(dsigu, ku, gv.little_owned_corner0(c)); if (cnd) { - double dt2 = dt * 0.5; - if (g2) { - LOOP_OVER_VOL_OWNED0(gv, c, i) { - DEF_ku; double fprev = fu[i]; - fu[i] = ((1 - dt2 * cnd[i]) * fprev - - dtdx * (g1[i+s1] - g1[i] + g2[i] - g2[i+s2])) * cndinv[i]; - f[i] = siginvu[ku] * ((kapu[ku] - sigu[ku]) * f[i] + fu[i] - fprev); - } - } - else { - LOOP_OVER_VOL_OWNED0(gv, c, i) { - DEF_ku; double fprev = fu[i]; - fu[i] = ((1 - dt2 * cnd[i]) * fprev - - dtdx * (g1[i+s1] - g1[i])) * cndinv[i]; - f[i] = siginvu[ku] * ((kapu[ku] - sigu[ku]) * f[i] + fu[i] - fprev); - } - } + double dt2 = dt * 0.5; + if (g2) { + LOOP_OVER_VOL_OWNED0(gv, c, i) { + DEF_ku; double fprev = fu[i]; + fu[i] = ((1 - dt2 * cnd[i]) * fprev - + dtdx * (g1[i+s1] - g1[i] + g2[i] - g2[i+s2])) * cndinv[i]; + f[i] = siginvu[ku] * ((kapu[ku] - sigu[ku]) * f[i] + fu[i] - fprev); + } + } + else { + LOOP_OVER_VOL_OWNED0(gv, c, i) { + DEF_ku; double fprev = fu[i]; + fu[i] = ((1 - dt2 * cnd[i]) * fprev + - dtdx * (g1[i+s1] - g1[i])) * cndinv[i]; + f[i] = siginvu[ku] * ((kapu[ku] - sigu[ku]) * f[i] + fu[i] - fprev); + } + } } else { // no conductivity - if (g2) { - LOOP_OVER_VOL_OWNED0(gv, c, i) { - DEF_ku; double fprev = fu[i]; - fu[i] -= dtdx * (g1[i+s1] - g1[i] + g2[i] - g2[i+s2]); - f[i] = siginvu[ku] * ((kapu[ku] - sigu[ku]) * f[i] + fu[i] - fprev); - } - } - else { - LOOP_OVER_VOL_OWNED0(gv, c, i) { - DEF_ku; double fprev = fu[i]; - fu[i] -= dtdx * (g1[i+s1] - g1[i]); - f[i] = siginvu[ku] * ((kapu[ku] - sigu[ku]) * f[i] + fu[i] - fprev); - } - } + if (g2) { + LOOP_OVER_VOL_OWNED0(gv, c, i) { + DEF_ku; double fprev = fu[i]; + fu[i] -= dtdx * (g1[i+s1] - g1[i] + g2[i] - g2[i+s2]); + f[i] = siginvu[ku] * ((kapu[ku] - sigu[ku]) * f[i] + fu[i] - fprev); + } + } + else { + LOOP_OVER_VOL_OWNED0(gv, c, i) { + DEF_ku; double fprev = fu[i]; + fu[i] -= dtdx * (g1[i+s1] - g1[i]); + f[i] = siginvu[ku] * ((kapu[ku] - sigu[ku]) * f[i] + fu[i] - fprev); + } + } } } } @@ -154,7 +154,7 @@ void step_curl(RPR f, component c, const RPR g1, const RPR g2, LOOP_OVER_VOL_OWNED0(gv, c, i) { DEF_k; realnum fcnd_prev = fcnd[i]; - fcnd[i] = ((1 - dt2 * cnd[i]) * fcnd[i] - + fcnd[i] = ((1 - dt2 * cnd[i]) * fcnd[i] - dtdx * (g1[i+s1]-g1[i] + g2[i]-g2[i+s2])) * cndinv[i]; f[i] = ((kap[k] - sig[k]) * f[i] + (fcnd[i] - fcnd_prev)) * siginv[k]; } @@ -163,7 +163,7 @@ void step_curl(RPR f, component c, const RPR g1, const RPR g2, LOOP_OVER_VOL_OWNED0(gv, c, i) { DEF_k; realnum fcnd_prev = fcnd[i]; - fcnd[i] = ((1 - dt2 * cnd[i]) * fcnd[i] - + fcnd[i] = ((1 - dt2 * cnd[i]) * fcnd[i] - dtdx * (g1[i+s1] - g1[i])) * cndinv[i]; f[i] = ((kap[k] - sig[k]) * f[i] + (fcnd[i] - fcnd_prev)) * siginv[k]; } @@ -194,7 +194,7 @@ void step_curl(RPR f, component c, const RPR g1, const RPR g2, LOOP_OVER_VOL_OWNED0(gv, c, i) { DEF_k; DEF_ku; double fprev = fu[i]; realnum fcnd_prev = fcnd[i]; - fcnd[i] = ((1 - dt2 * cnd[i]) * fcnd[i] - + fcnd[i] = ((1 - dt2 * cnd[i]) * fcnd[i] - dtdx * (g1[i+s1]-g1[i] + g2[i]-g2[i+s2])) * cndinv[i]; fu[i] = ((kap[k] - sig[k]) * fu[i] + (fcnd[i] - fcnd_prev)) * siginv[k]; f[i] = siginvu[ku] * ((kapu[ku] - sigu[ku]) * f[i] + fu[i] - fprev); @@ -205,7 +205,7 @@ void step_curl(RPR f, component c, const RPR g1, const RPR g2, LOOP_OVER_VOL_OWNED0(gv, c, i) { DEF_k; DEF_ku; double fprev = fu[i]; realnum fcnd_prev = fcnd[i]; - fcnd[i] = ((1 - dt2 * cnd[i]) * fcnd[i] - + fcnd[i] = ((1 - dt2 * cnd[i]) * fcnd[i] - dtdx * (g1[i+s1] - g1[i])) * cndinv[i]; fu[i] = ((kap[k] - sig[k]) * fu[i] + (fcnd[i] - fcnd_prev)) * siginv[k]; f[i] = siginvu[ku] * ((kapu[ku] - sigu[ku]) * f[i] + fu[i] - fprev); @@ -233,7 +233,7 @@ void step_curl(RPR f, component c, const RPR g1, const RPR g2, } } -/* field-update equation f += betadt * g (plus variants for conductivity +/* field-update equation f += betadt * g (plus variants for conductivity and/or PML). This is used in 2d calculations to add an exp(i beta z) time dependence, which gives an additional i \beta \hat{z} \times cross-product in the curl equations. */ @@ -317,12 +317,12 @@ void step_beta(RPR f, component c, const RPR g, /* Given Dsqr = |D|^2 and Di = component of D, compute the factor f so that Ei = chi1inv * f * Di. In principle, this would involve solving - a cubic equation, but instead we use a Pade approximant that is + a cubic equation, but instead we use a Pade approximant that is accurate to several orders. This is inaccurate if the nonlinear index change is large, of course, but in that case the chi2/chi3 power-series expansion isn't accurate anyway, so the cubic isn't physical there either. */ -inline double calc_nonlinear_u(const double Dsqr, +inline double calc_nonlinear_u(const double Dsqr, const double Di, const double chi1inv, const double chi2, const double chi3) { @@ -348,19 +348,19 @@ inline double calc_nonlinear_u(const double Dsqr, */ -void step_update_EDHB(RPR f, component fc, const grid_volume &gv, +void step_update_EDHB(RPR f, component fc, const grid_volume &gv, const RPR g, const RPR g1, const RPR g2, const RPR u, const RPR u1, const RPR u2, - int s, int s1, int s2, + ptrdiff_t s, ptrdiff_t s1, ptrdiff_t s2, const RPR chi2, const RPR chi3, RPR fw, direction dsigw, const DPR sigw, const DPR kapw) { if (!f) return; - + if ((!g1 && g2) || (g1 && g2 && !u1 && u2)) { /* swap g1 and g2 */ SWAP(const RPR, g1, g2); SWAP(const RPR, u1, u2); - SWAP(int, s1, s2); + SWAP(ptrdiff_t, s1, s2); } // stable averaging of offdiagonal components @@ -369,7 +369,7 @@ void step_update_EDHB(RPR f, component fc, const grid_volume &gv, /* As with step_curl, these loops are all essentially copies of the "MOST GENERAL CASE" loop with various terms thrown out. */ - + if (dsigw != NO_DIRECTION) { //////// PML case (with fw) ///////////// KSTRIDE_DEF(dsigw, kw, gv.little_owned_corner0(fc)); if (u1 && u2) { // 3x3 off-diagonal u @@ -382,7 +382,7 @@ void step_update_EDHB(RPR f, component fc, const grid_volume &gv, DEF_kw; double fwprev = fw[i], kapwkw = kapw[kw], sigwkw = sigw[kw]; fw[i] = (gs * us + OFFDIAG(u1,g1,s1) + OFFDIAG(u2,g2,s2)) * calc_nonlinear_u(gs * gs + 0.0625 * (g1s*g1s + g2s*g2s), - gs, us, chi2[i], chi3[i]); + gs, us, chi2[i], chi3[i]); f[i] += (kapwkw + sigwkw) * fw[i] - (kapwkw - sigwkw) * fwprev; } ///////////////////////////////////////////////////////////// @@ -481,7 +481,7 @@ void step_update_EDHB(RPR f, component fc, const grid_volume &gv, double gs = g[i]; double us = u[i]; f[i] = (gs * us + OFFDIAG(u1,g1,s1) + OFFDIAG(u2,g2,s2)) * calc_nonlinear_u(gs * gs + 0.0625 * (g1s*g1s + g2s*g2s), - gs, us, chi2[i], chi3[i]); + gs, us, chi2[i], chi3[i]); } } else { diff --git a/src/stress.cpp b/src/stress.cpp index 2e5e78690..34d125096 100644 --- a/src/stress.cpp +++ b/src/stress.cpp @@ -25,8 +25,8 @@ using namespace std; namespace meep { dft_force::dft_force(dft_chunk *offdiag1_, dft_chunk *offdiag2_, - dft_chunk *diag_, - double fmin, double fmax, int Nf) + dft_chunk *diag_, + double fmin, double fmax, int Nf) { if (Nf <= 1) fmin = fmax = (fmin + fmax) * 0.5; freq_min = fmin; @@ -66,13 +66,13 @@ void dft_force::operator-=(const dft_force &st) { } static void stress_sum(int Nfreq, double *F, - const dft_chunk *F1, const dft_chunk *F2) + const dft_chunk *F1, const dft_chunk *F2) { for (const dft_chunk *curF1 = F1, *curF2 = F2; curF1 && curF2; curF1 = curF1->next_in_dft, curF2 = curF2->next_in_dft) { complex extra_weight(real(curF1->extra_weight), imag(curF1->extra_weight)); - for (int k = 0; k < curF1->N; ++k) + for (size_t k = 0; k < curF1->N; ++k) for (int i = 0; i < Nfreq; ++i) F[i] += real(extra_weight * curF1->dft[k*Nfreq + i] * conj(curF2->dft[k*Nfreq + i])); diff --git a/src/structure.cpp b/src/structure.cpp index c5b0b47b4..53d813892 100644 --- a/src/structure.cpp +++ b/src/structure.cpp @@ -70,8 +70,8 @@ structure::structure(const grid_volume &thegv, double eps(const vec &), } } -void structure::choose_chunkdivision(const grid_volume &thegv, - int desired_num_chunks, +void structure::choose_chunkdivision(const grid_volume &thegv, + int desired_num_chunks, const boundary_region &br, const symmetry &s) { user_volume = thegv; @@ -96,7 +96,7 @@ void structure::choose_chunkdivision(const grid_volume &thegv, (S.transform(d,n).d != d || S.transform(d,n).flipped)) { if (thegv.num_direction(d) & 1 && !break_this[d] && !quiet) master_printf("Padding %s to even number of grid points.\n", - direction_name(d)); + direction_name(d)); break_this[dd] = true; } } @@ -104,11 +104,11 @@ void structure::choose_chunkdivision(const grid_volume &thegv, for (int d=0;d<3;d++) { if (break_mult == S.multiplicity()) break_this[d] = false; if (break_this[d]) { - break_mult *= 2; - if (!quiet) - master_printf("Halving computational cell along direction %s\n", - direction_name(direction(d))); - gv = gv.halve((direction)d); + break_mult *= 2; + if (!quiet) + master_printf("Halving computational cell along direction %s\n", + direction_name(direction(d))); + gv = gv.halve((direction)d); } } // Before padding, find the corresponding geometric grid_volume. @@ -138,8 +138,8 @@ void structure::choose_chunkdivision(const grid_volume &thegv, for (int j = 0; j < num_effort_volumes; j++) { grid_volume vc; if (vi.intersect_with(effort_volumes[j], &vc)) { - chunks[num_chunks] = new structure_chunk(vc, v, Courant, proc); - br.apply(this, chunks[num_chunks++]); + chunks[num_chunks] = new structure_chunk(vc, v, Courant, proc); + br.apply(this, chunks[num_chunks++]); } } } @@ -162,14 +162,14 @@ void boundary_region::apply(structure *s) const { void boundary_region::apply(const structure *s, structure_chunk *sc) const { if (has_direction(s->gv.dim, d) && s->user_volume.has_boundary(side, d) - && s->user_volume.num_direction(d) > 1) { + && s->user_volume.num_direction(d) > 1) { switch (kind) { case NOTHING_SPECIAL: break; - case PML: + case PML: sc->use_pml(d, thickness, s->user_volume.boundary_location(side, d), Rasymptotic, mean_stretch, pml_profile, pml_profile_data, - pml_profile_integral, pml_profile_integral_u); + pml_profile_integral, pml_profile_integral_u); break; default: abort("unknown boundary region kind"); } @@ -200,14 +200,14 @@ double pml_quadratic_profile(double u, void *d) { (void)d; return u * u; } boundary_region pml(double thickness, direction d, boundary_side side, double Rasymptotic, double mean_stretch) { - return boundary_region(boundary_region::PML, thickness, + return boundary_region(boundary_region::PML, thickness, Rasymptotic, mean_stretch, pml_quadratic_profile, NULL, 1./3., 1./4., d, side, NULL); } boundary_region pml(double thickness, direction d, double Rasymptotic, double mean_stretch) { - return (pml(thickness, d, Low, Rasymptotic, mean_stretch) + return (pml(thickness, d, Low, Rasymptotic, mean_stretch) + pml(thickness, d, High, Rasymptotic, mean_stretch)); } boundary_region pml(double thickness, @@ -226,24 +226,23 @@ void structure::check_chunks() { for (int j=i+1; jgv.intersect_with(chunks[j]->gv, &vol_intersection)) abort("chunks[%d] intersects with chunks[%d]\n", i, j); - // FIXME: should use 'long long' else will fail if grid > 2e9 points - int sum = 0; + size_t sum = 0; for (int i=0; igv.dim, d) grid_points *= chunks[i]->gv.num_direction(d); sum += grid_points; } - int v_grid_points = 1; + size_t v_grid_points = 1; LOOP_OVER_DIRECTIONS(gv.dim, d) v_grid_points *= gv.num_direction(d); if (sum != v_grid_points) - abort("v_grid_points = %d, sum(chunks) = %d\n", v_grid_points, sum); + abort("v_grid_points = %zd, sum(chunks) = %zd\n", v_grid_points, sum); } void structure::add_to_effort_volumes(const grid_volume &new_effort_volume, double extra_effort) { grid_volume *temp_volumes = - new grid_volume[(2*number_of_directions(gv.dim)+1)*num_effort_volumes]; + new grid_volume[(2*number_of_directions(gv.dim)+1)*num_effort_volumes]; double *temp_effort = new double[(2*number_of_directions(gv.dim)+1)*num_effort_volumes]; // Intersect previous mat_volumes with this new_effort_volume @@ -266,12 +265,12 @@ void structure::add_to_effort_volumes(const grid_volume &new_effort_volume, temp_volumes[counter] = intersection; counter++; for (int k = 0; kset_chi1inv(c, eps, use_anisotropic_averaging, tol, maxeval); } -void structure::set_epsilon(material_function &eps, +void structure::set_epsilon(material_function &eps, bool use_anisotropic_averaging, double tol, int maxeval) { double tstart = wall_time(); - FOR_ELECTRIC_COMPONENTS(c) set_chi1inv(c, eps, use_anisotropic_averaging, + FOR_ELECTRIC_COMPONENTS(c) set_chi1inv(c, eps, use_anisotropic_averaging, tol, maxeval); if (!quiet) master_printf("time for set_epsilon = %g s\n", wall_time() - tstart); @@ -389,7 +388,7 @@ void structure::set_mu(material_function &m, bool use_anisotropic_averaging, double tol, int maxeval) { double tstart = wall_time(); - FOR_MAGNETIC_COMPONENTS(c) set_chi1inv(c, m, use_anisotropic_averaging, + FOR_MAGNETIC_COMPONENTS(c) set_chi1inv(c, m, use_anisotropic_averaging, tol, maxeval); if (!quiet) master_printf("time for set_mu = %g s\n", wall_time() - tstart); @@ -493,7 +492,7 @@ void structure::use_pml(direction d, boundary_side b, double dx) { if (b == High) pml_volume.set_origin(d, user_volume.big_corner().in_direction(d) - pml_volume.num_direction(d) * 2); - const int v_to_user_shift = (user_volume.little_corner().in_direction(d) + const int v_to_user_shift = (user_volume.little_corner().in_direction(d) - gv.little_corner().in_direction(d)) / 2; if (b == Low && v_to_user_shift != 0) pml_volume.set_num_direction(d, pml_volume.num_direction(d) + v_to_user_shift); @@ -542,7 +541,7 @@ structure_chunk::~structure_chunk() { delete[] chi2[c]; delete[] chi3[c]; } - FOR_DIRECTIONS(d) { + FOR_DIRECTIONS(d) { delete[] sig[d]; delete[] kap[d]; delete[] siginv[d]; @@ -556,33 +555,33 @@ void structure_chunk::mix_with(const structure_chunk *n, double f) { chi1inv[c][d] = new realnum[gv.ntot()]; trivial_chi1inv[c][d] = n->trivial_chi1inv[c][d]; if (component_direction(c) == d) // diagonal components = 1 by default - for (int i=0;iconductivity[c][d]) { conductivity[c][d] = new realnum[gv.ntot()]; - for (int i=0;itrivial_chi1inv[c][d]; if (n->chi1inv[c][d]) - for (int i=0;ichi1inv[c][d][i] - chi1inv[c][d][i]); else { double nval = component_direction(c) == d ? 1.0 : 0.0; // default - for (int i=0;iconductivity[c][d]) - for (int i=0;iconductivity[c][d][i] - conductivity[c][d][i]); else - for (int i=0;i 0) { - double s = pml_profile(x/dx, pml_profile_data); - sig[d][idx]=0.5*dt*prefac*s; - kap[d][idx] = 1 + kappa_prefac*s*(x/dx); - siginv[d][idx] = 1/(kap[d][idx]+sig[d][idx]); + double s = pml_profile(x/dx, pml_profile_data); + sig[d][idx]=0.5*dt*prefac*s; + kap[d][idx] = 1 + kappa_prefac*s*(x/dx); + siginv[d][idx] = 1/(kap[d][idx]+sig[d][idx]); } } } @@ -680,13 +679,13 @@ void structure_chunk::update_condinv() { structure_chunk::structure_chunk(const structure_chunk *o) : v(o->v) { refcount = 1; - + FOR_FIELD_TYPES(ft) { { susceptibility *cur = NULL; for (const susceptibility *ocur = o->chiP[ft]; ocur; ocur = ocur->next) { if (cur) { cur->next = ocur->clone(); cur = cur->next; } - else { chiP[ft] = cur = ocur->clone(); } + else { chiP[ft] = cur = ocur->clone(); } cur->next = NULL; } } @@ -701,14 +700,14 @@ structure_chunk::structure_chunk(const structure_chunk *o) : v(o->v) { if (is_mine() && o->chi3[c]) { chi3[c] = new realnum[gv.ntot()]; if (chi3[c] == NULL) abort("Out of memory!\n"); - for (int i=0;ichi3[c][i]; + for (size_t i=0;ichi3[c][i]; } else { chi3[c] = NULL; } if (is_mine() && o->chi2[c]) { chi2[c] = new realnum[gv.ntot()]; if (chi2[c] == NULL) abort("Out of memory!\n"); - for (int i=0;ichi2[c][i]; + for (size_t i=0;ichi2[c][i]; } else { chi2[c] = NULL; } @@ -730,8 +729,8 @@ structure_chunk::structure_chunk(const structure_chunk *o) : v(o->v) { } condinv_stale = o->condinv_stale; // Allocate the PML conductivity arrays: - FOR_DIRECTIONS(d) { - sig[d] = NULL; + FOR_DIRECTIONS(d) { + sig[d] = NULL; kap[d] = NULL; siginv[d] = NULL; sigsize[d] = 0; @@ -739,29 +738,29 @@ structure_chunk::structure_chunk(const structure_chunk *o) : v(o->v) { for (int i=0;i<5;++i) sigsize[i] = 0; // Copy over the PML conductivity arrays: if (is_mine()) - FOR_DIRECTIONS(d) + FOR_DIRECTIONS(d) if (o->sig[d]) { - sig[d] = new double[2*gv.num_direction(d)+1]; - kap[d] = new double[2*gv.num_direction(d)+1]; - siginv[d] = new double[2*gv.num_direction(d)+1]; - sigsize[d] = o->sigsize[d]; - for (int i=0;i<2*gv.num_direction(d)+1;i++) { - sig[d][i] = o->sig[d][i]; - kap[d][i] = o->kap[d][i]; - siginv[d][i] = o->siginv[d][i]; - } + sig[d] = new double[2*gv.num_direction(d)+1]; + kap[d] = new double[2*gv.num_direction(d)+1]; + siginv[d] = new double[2*gv.num_direction(d)+1]; + sigsize[d] = o->sigsize[d]; + for (int i=0;i<2*gv.num_direction(d)+1;i++) { + sig[d][i] = o->sig[d][i]; + kap[d][i] = o->kap[d][i]; + siginv[d][i] = o->siginv[d][i]; + } } } void structure_chunk::set_chi3(component c, material_function &epsilon) { if (!is_mine() || !gv.has_field(c)) return; if (!is_electric(c) && !is_magnetic(c)) abort("only E or H can have chi3"); - + epsilon.set_volume(gv.pad().surroundings()); if (!chi1inv[c][component_direction(c)]) { // require chi1 if we have chi3 chi1inv[c][component_direction(c)] = new realnum[gv.ntot()]; - for (int i = 0; i < gv.ntot(); ++i) + for (size_t i = 0; i < gv.ntot(); ++i) chi1inv[c][component_direction(c)][i] = 1.0; } @@ -772,12 +771,12 @@ void structure_chunk::set_chi3(component c, material_function &epsilon) { chi3[c][i] = epsilon.chi3(c, here); trivial = trivial && (chi3[c][i] == 0.0); } - + /* currently, our update_e_from_d routine requires that chi2 be present if chi3 is, and vice versa */ if (!chi2[c]) { if (!trivial) { - chi2[c] = new realnum[gv.ntot()]; + chi2[c] = new realnum[gv.ntot()]; memset(chi2[c], 0, gv.ntot() * sizeof(realnum)); // chi2 = 0 } else { // no chi3, and chi2 is trivial (== 0), so delete @@ -792,12 +791,12 @@ void structure_chunk::set_chi3(component c, material_function &epsilon) { void structure_chunk::set_chi2(component c, material_function &epsilon) { if (!is_mine() || !gv.has_field(c)) return; if (!is_electric(c) && !is_magnetic(c)) abort("only E or H can have chi2"); - + epsilon.set_volume(gv.pad().surroundings()); if (!chi1inv[c][component_direction(c)]) { // require chi1 if we have chi2 chi1inv[c][component_direction(c)] = new realnum[gv.ntot()]; - for (int i = 0; i < gv.ntot(); ++i) + for (size_t i = 0; i < gv.ntot(); ++i) chi1inv[c][component_direction(c)][i] = 1.0; } @@ -808,15 +807,15 @@ void structure_chunk::set_chi2(component c, material_function &epsilon) { chi2[c][i] = epsilon.chi2(c, here); trivial = trivial && (chi2[c][i] == 0.0); } - + /* currently, our update_e_from_d routine requires that chi3 be present if chi2 is, and vice versa */ if (!chi3[c]) { if (!trivial) { - chi3[c] = new realnum[gv.ntot()]; + chi3[c] = new realnum[gv.ntot()]; memset(chi3[c], 0, gv.ntot() * sizeof(realnum)); // chi3 = 0 } - else { // no chi2, and chi3 is trivial (== 0), so delete + else { // no chi2, and chi3 is trivial (== 0), so delete delete[] chi2[c]; chi2[c] = NULL; } @@ -837,8 +836,8 @@ void structure_chunk::set_conductivity(component c, material_function &C) { component c_C = is_electric(c) ? direction_component(Dx, c_d) : (is_magnetic(c) ? direction_component(Bx, c_d) : c); realnum *multby = is_electric(c) || is_magnetic(c) ? chi1inv[c][c_d] : 0; - if (!conductivity[c_C][c_d]) - conductivity[c_C][c_d] = new realnum[gv.ntot()]; + if (!conductivity[c_C][c_d]) + conductivity[c_C][c_d] = new realnum[gv.ntot()]; if (!conductivity[c_C][c_d]) abort("Memory allocation error.\n"); bool trivial = true; realnum *cnd = conductivity[c_C][c_d]; @@ -865,8 +864,8 @@ void structure_chunk::set_conductivity(component c, material_function &C) { C.unset_volume(); } -structure_chunk::structure_chunk(const grid_volume &thegv, - const volume &vol_limit, +structure_chunk::structure_chunk(const grid_volume &thegv, + const volume &vol_limit, double Courant, int pr) : Courant(Courant), v(thegv.surroundings() & vol_limit) { refcount = 1; @@ -888,8 +887,8 @@ structure_chunk::structure_chunk(const grid_volume &thegv, condinv[c][d] = NULL; } condinv_stale = false; - FOR_DIRECTIONS(d) { - sig[d] = NULL; + FOR_DIRECTIONS(d) { + sig[d] = NULL; kap[d] = NULL; siginv[d] = NULL; sigsize[d] = 0; @@ -914,16 +913,16 @@ double fields::max_eps() const { double structure_chunk::max_eps() const { double themax = 0.0; - FOR_COMPONENTS(c) { - direction d = component_direction(c); + FOR_COMPONENTS(c) { + direction d = component_direction(c); if (chi1inv[c][d]) - for (int i=0;iremove_susceptibilities(); } diff --git a/src/susceptibility.cpp b/src/susceptibility.cpp index a9a948f37..89fbd7a13 100644 --- a/src/susceptibility.cpp +++ b/src/susceptibility.cpp @@ -92,7 +92,7 @@ bool susceptibility::needs_W_notowned(component c, typedef struct { size_t sz_data; - int ntot; + size_t ntot; realnum *P[NUM_FIELD_COMPONENTS][2]; realnum *P_prev[NUM_FIELD_COMPONENTS][2]; realnum data[1]; @@ -119,7 +119,7 @@ void lorentzian_susceptibility::init_internal_data( size_t sz_data = d->sz_data; memset(d, 0, sz_data); d->sz_data = sz_data; - int ntot = d->ntot = gv.ntot(); + size_t ntot = d->ntot = gv.ntot(); realnum *P = d->data; realnum *P_prev = d->data + ntot; FOR_COMPONENTS(c) DOCMP2 if (needs_P(c, cmp, W)) { @@ -135,7 +135,7 @@ void *lorentzian_susceptibility::copy_internal_data(void *data) const { if (!d) return 0; lorentzian_data *dnew = (lorentzian_data *) malloc(d->sz_data); memcpy(dnew, d, d->sz_data); - int ntot = d->ntot; + size_t ntot = d->ntot; realnum *P = dnew->data; realnum *P_prev = dnew->data + ntot; FOR_COMPONENTS(c) DOCMP2 if (d->P[c][cmp]) { @@ -153,7 +153,7 @@ void *lorentzian_susceptibility::copy_internal_data(void *data) const { (z + 1/z - 2)/dt^2 + g*(z - 1/z)/(2*dt) + w^2 = 0 where w = 2*pi*omega_0 and g = 2*pi*gamma. It is just a little algebra from this to get the condition for a root with |z| > 1. - + FIXME: this test seems to be too conservative (issue #12) */ static bool lorentzian_unstable(double omega_0, double gamma, double dt) { double w = 2*pi*omega_0, g = 2*pi*gamma; @@ -170,7 +170,7 @@ static bool lorentzian_unstable(double omega_0, double gamma, double dt) { void lorentzian_susceptibility::update_P (realnum *W[NUM_FIELD_COMPONENTS][2], - realnum *W_prev[NUM_FIELD_COMPONENTS][2], + realnum *W_prev[NUM_FIELD_COMPONENTS][2], double dt, const grid_volume &gv, void *P_internal_data) const { lorentzian_data *d = (lorentzian_data *) P_internal_data; const double omega2pi = 2*pi*omega_0, g2pi = gamma*2*pi; @@ -180,7 +180,7 @@ void lorentzian_susceptibility::update_P (void) W_prev; // unused; // TODO: add back lorentzian_unstable(omega_0, gamma, dt) if we can improve the stability test - + FOR_COMPONENTS(c) DOCMP2 if (d->P[c][cmp]) { const realnum *w = W[c][cmp], *s = sigma[c][component_direction(c)]; if (w && s) { @@ -188,30 +188,30 @@ void lorentzian_susceptibility::update_P // directions/strides for offdiagonal terms, similar to update_eh const direction d = component_direction(c); - const int is = gv.stride(d) * (is_magnetic(c) ? -1 : +1); + const ptrdiff_t is = gv.stride(d) * (is_magnetic(c) ? -1 : +1); direction d1 = cycle_direction(gv.dim, d, 1); component c1 = direction_component(c, d1); - int is1 = gv.stride(d1) * (is_magnetic(c) ? -1 : +1); + ptrdiff_t is1 = gv.stride(d1) * (is_magnetic(c) ? -1 : +1); const realnum *w1 = W[c1][cmp]; const realnum *s1 = w1 ? sigma[c][d1] : NULL; direction d2 = cycle_direction(gv.dim, d, 2); component c2 = direction_component(c, d2); - int is2 = gv.stride(d2) * (is_magnetic(c) ? -1 : +1); + ptrdiff_t is2 = gv.stride(d2) * (is_magnetic(c) ? -1 : +1); const realnum *w2 = W[c2][cmp]; const realnum *s2 = w2 ? sigma[c][d2] : NULL; if (s2 && !s1) { // make s1 the non-NULL one if possible SWAP(direction, d1, d2); SWAP(component, c1, c2); - SWAP(int, is1, is2); + SWAP(ptrdiff_t, is1, is2); SWAP(const realnum *, w1, w2); SWAP(const realnum *, s1, s2); } if (s1 && s2) { // 3x3 anisotropic LOOP_OVER_VOL_OWNED(gv, c, i) { realnum pcur = p[i]; - p[i] = gamma1inv * (pcur * (2 - omega0dtsqr_denom) - - gamma1 * pp[i] + p[i] = gamma1inv * (pcur * (2 - omega0dtsqr_denom) + - gamma1 * pp[i] + omega0dtsqr * (s[i] * w[i] + OFFDIAG(s1,w1,is1,is) + OFFDIAG(s2,w2,is2,is))); @@ -221,8 +221,8 @@ void lorentzian_susceptibility::update_P else if (s1) { // 2x2 anisotropic LOOP_OVER_VOL_OWNED(gv, c, i) { realnum pcur = p[i]; - p[i] = gamma1inv * (pcur * (2 - omega0dtsqr_denom) - - gamma1 * pp[i] + p[i] = gamma1inv * (pcur * (2 - omega0dtsqr_denom) + - gamma1 * pp[i] + omega0dtsqr * (s[i] * w[i] + OFFDIAG(s1,w1,is1,is))); pp[i] = pcur; @@ -231,8 +231,8 @@ void lorentzian_susceptibility::update_P else { // isotropic LOOP_OVER_VOL_OWNED(gv, c, i) { realnum pcur = p[i]; - p[i] = gamma1inv * (pcur * (2 - omega0dtsqr_denom) - - gamma1 * pp[i] + p[i] = gamma1inv * (pcur * (2 - omega0dtsqr_denom) + - gamma1 * pp[i] + omega0dtsqr * (s[i] * w[i])); pp[i] = pcur; } @@ -242,17 +242,17 @@ void lorentzian_susceptibility::update_P } void lorentzian_susceptibility::subtract_P(field_type ft, - realnum *f_minus_p[NUM_FIELD_COMPONENTS][2], + realnum *f_minus_p[NUM_FIELD_COMPONENTS][2], void *P_internal_data) const { lorentzian_data *d = (lorentzian_data *) P_internal_data; field_type ft2 = ft == E_stuff ? D_stuff : B_stuff; // for sources etc. - int ntot = d->ntot; + size_t ntot = d->ntot; FOR_FT_COMPONENTS(ft, ec) DOCMP2 if (d->P[ec][cmp]) { component dc = field_type_component(ft2, ec); if (f_minus_p[dc][cmp]) { realnum *p = d->P[ec][cmp]; realnum *fmp = f_minus_p[dc][cmp]; - for (int i = 0; i < ntot; ++i) fmp[i] -= p[i]; + for (size_t i = 0; i < ntot; ++i) fmp[i] -= p[i]; } } } @@ -264,8 +264,8 @@ int lorentzian_susceptibility::num_cinternal_notowned_needed(component c, } realnum *lorentzian_susceptibility::cinternal_notowned_ptr( - int inotowned, component c, int cmp, - int n, + int inotowned, component c, int cmp, + int n, void *P_internal_data) const { lorentzian_data *d = (lorentzian_data *) P_internal_data; (void) inotowned; // always = 0 @@ -277,7 +277,7 @@ realnum *lorentzian_susceptibility::cinternal_notowned_ptr( void noisy_lorentzian_susceptibility::update_P (realnum *W[NUM_FIELD_COMPONENTS][2], - realnum *W_prev[NUM_FIELD_COMPONENTS][2], + realnum *W_prev[NUM_FIELD_COMPONENTS][2], double dt, const grid_volume &gv, void *P_internal_data) const { lorentzian_susceptibility::update_P(W, W_prev, dt, gv, P_internal_data); lorentzian_data *d = (lorentzian_data *) P_internal_data; diff --git a/src/update_eh.cpp b/src/update_eh.cpp index 762591851..25b24eae3 100644 --- a/src/update_eh.cpp +++ b/src/update_eh.cpp @@ -23,13 +23,13 @@ using namespace std; namespace meep { - + void fields::update_eh(field_type ft, bool skip_w_components) { if (ft != E_stuff && ft != H_stuff) abort("update_eh only works with E/H"); for (int i=0;iis_mine()) if (chunks[i]->update_eh(ft, skip_w_components)) - chunk_connections_valid = false; // E/H allocated - reconnect chunks + chunk_connections_valid = false; // E/H allocated - reconnect chunks /* synchronize to avoid deadlocks if one process decides it needs to allocate E or H ... */ @@ -80,7 +80,7 @@ bool fields_chunk::update_eh(field_type ft, bool skip_w_components) { break; } - const int ntot = s->gv.ntot(); + const size_t ntot = s->gv.ntot(); if (have_f_minus_p && doing_solve_cw) abort("dispersive materials are not yet implemented for solve_cw"); @@ -104,23 +104,23 @@ bool fields_chunk::update_eh(field_type ft, bool skip_w_components) { // Next, subtract time-integrated sources (i.e. polarizations, not currents) if (have_f_minus_p && !doing_solve_cw) { - for (src_vol *sv = sources[ft2]; sv; sv = sv->next) { + for (src_vol *sv = sources[ft2]; sv; sv = sv->next) { if (sv->t->is_integrated && f[sv->c][0] && ft == type(sv->c)) { - component c = field_type_component(ft2, sv->c); - for (int j = 0; j < sv->npts; ++j) { - const complex A = sv->dipole(j); - DOCMP { - f_minus_p[c][cmp][sv->index[j]] -= - (cmp) ? imag(A) : real(A); - } - } + component c = field_type_component(ft2, sv->c); + for (size_t j = 0; j < sv->npts; ++j) { + const complex A = sv->dipole(j); + DOCMP { + f_minus_p[c][cmp][sv->index[j]] -= + (cmp) ? imag(A) : real(A); + } + } } } } ////////////////////////////////////////////////////////////////////////// // Finally, compute E = chi1inv * D - + realnum *dmp[NUM_FIELD_COMPONENTS][2]; FOR_FT_COMPONENTS(ft2,dc) DOCMP2 dmp[dc][cmp] = f_minus_p[dc][cmp] ? f_minus_p[dc][cmp] : f[dc][cmp]; @@ -129,13 +129,13 @@ bool fields_chunk::update_eh(field_type ft, bool skip_w_components) { if (type(ec) != ft) abort("bug in FOR_FT_COMPONENTS"); component dc = field_type_component(ft2, ec); const direction d_ec = component_direction(ec); - const int s_ec = gv.stride(d_ec) * (ft == H_stuff ? -1 : +1); + const ptrdiff_t s_ec = gv.stride(d_ec) * (ft == H_stuff ? -1 : +1); const direction d_1 = cycle_direction(gv.dim, d_ec, 1); const component dc_1 = direction_component(dc,d_1); - const int s_1 = gv.stride(d_1) * (ft == H_stuff ? -1 : +1); + const ptrdiff_t s_1 = gv.stride(d_1) * (ft == H_stuff ? -1 : +1); const direction d_2 = cycle_direction(gv.dim, d_ec, 2); const component dc_2 = direction_component(dc,d_2); - const int s_2 = gv.stride(d_2) * (ft == H_stuff ? -1 : +1); + const ptrdiff_t s_2 = gv.stride(d_2) * (ft == H_stuff ? -1 : +1); direction dsigw0 = d_ec; direction dsigw = s->sigsize[dsigw0] > 1 ? dsigw0 : NO_DIRECTION; @@ -147,7 +147,7 @@ bool fields_chunk::update_eh(field_type ft, bool skip_w_components) { memcpy(f[ec][cmp], f[dc][cmp], gv.ntot() * sizeof(realnum)); allocated_eh = true; } - + // lazily allocate W auxiliary field if (!f_w[ec][cmp] && dsigw != NO_DIRECTION) { f_w[ec][cmp] = new realnum[gv.ntot()]; @@ -166,7 +166,7 @@ bool fields_chunk::update_eh(field_type ft, bool skip_w_components) { } if (f[ec][cmp] != f[dc][cmp]) - STEP_UPDATE_EDHB(f[ec][cmp], ec, gv, + STEP_UPDATE_EDHB(f[ec][cmp], ec, gv, dmp[dc][cmp], dmp[dc_1][cmp], dmp[dc_2][cmp], s->chi1inv[ec][d_ec], dmp[dc_1][cmp]?s->chi1inv[ec][d_1]:NULL, dmp[dc_2][cmp]?s->chi1inv[ec][d_2]:NULL, s_ec, s_1, s_2, s->chi2[ec], s->chi3[ec], @@ -190,17 +190,17 @@ bool fields_chunk::update_eh(field_type ft, bool skip_w_components) { const realnum *D = f_minus_p[dc][cmp] ? f_minus_p[dc][cmp] : f[dc][cmp]; const realnum *chi1inv = s->chi1inv[ec][d_ec]; if (chi1inv) - for (int iZ=0; iZis_mine()) if (chunks[i]->update_pols(ft)) - chunk_connections_valid = false; + chunk_connections_valid = false; /* synchronize to avoid deadlocks if one process decides it needs to allocate E or H ... */ diff --git a/src/vec.cpp b/src/vec.cpp index cb62e2954..98e290482 100644 --- a/src/vec.cpp +++ b/src/vec.cpp @@ -124,7 +124,7 @@ component first_field_component(field_type ft) { case H_stuff: return Hx; case D_stuff: return Dx; case B_stuff: return Bx; - default: abort("bug - only E/H/D/B stuff have components"); + default: abort("bug - only E/H/D/B stuff have components"); } } @@ -159,23 +159,23 @@ ivec max(const ivec &ivec1, const ivec &ivec2) { volume::volume(const vec &vec1, const vec &vec2) { min_corner = min(vec1, vec2); max_corner = max(vec1, vec2); - dim = vec1.dim; + dim = vec1.dim; } volume::volume(const vec &pt) { - dim = pt.dim; + dim = pt.dim; min_corner = pt; max_corner = pt; } double volume::computational_volume() const { - double vol = 1.0; + double vol = 1.0; LOOP_OVER_DIRECTIONS(dim,d) vol *= in_direction(d); return vol; } double volume::integral_volume() const { - double vol = 1.0; + double vol = 1.0; LOOP_OVER_DIRECTIONS(dim, d) if (in_direction(d) != 0.0) vol *= in_direction(d); if (dim == Dcyl) vol *= pi * (in_direction_max(R) + in_direction_min(R)); @@ -183,7 +183,7 @@ double volume::integral_volume() const { } double volume::full_volume() const { - double vol = computational_volume(); + double vol = computational_volume(); if (dim == Dcyl) vol *= pi * (in_direction_max(R) + in_direction_min(R)); return vol; } @@ -284,18 +284,18 @@ direction grid_volume::yucky_direction(int n) const { } volume grid_volume::surroundings() const { - return volume(operator[](little_corner()), + return volume(operator[](little_corner()), operator[](big_corner())); } volume grid_volume::interior() const { - return volume(operator[](little_corner()), + return volume(operator[](little_corner()), operator[](big_corner() - one_ivec(dim) * 2)); } void grid_volume::update_ntot() { the_ntot = 1; - LOOP_OVER_DIRECTIONS(dim, d) the_ntot *= num[d%3] + 1; + LOOP_OVER_DIRECTIONS(dim, d) the_ntot *= (size_t)(num[d%3] + 1); } void grid_volume::set_num_direction(direction d, int value) { @@ -329,14 +329,14 @@ vec grid_volume::yee_shift(component c) const { /* Return array offsets to average with a given array location of c in order to get c on the "centered" grid. Then, to get the centered grid point i, you should average c over the four - locations: i, i+offset1, i+offset2, i+offset1+offset2. + locations: i, i+offset1, i+offset2, i+offset1+offset2. (offset2, and possibly offset1, may be zero if only 2 or 1 locations need to be averaged). */ -void grid_volume::yee2cent_offsets(component c, int &offset1, int &offset2) const { +void grid_volume::yee2cent_offsets(component c, ptrdiff_t &offset1, ptrdiff_t &offset2) const { offset1 = offset2 = 0; LOOP_OVER_DIRECTIONS(dim,d) { if (!iyee_shift(c).in_direction(d)) { - if (offset2) + if (offset2) abort("weird yee shift for component %s", component_name(c)); if (offset1) offset2 = stride(d); else offset1 = stride(d); @@ -345,7 +345,7 @@ void grid_volume::yee2cent_offsets(component c, int &offset1, int &offset2) cons } /* Same as yee2cent_offsets, but averages centered grid to get c */ -void grid_volume::cent2yee_offsets(component c, int &offset1, int &offset2) const { +void grid_volume::cent2yee_offsets(component c, ptrdiff_t &offset1, ptrdiff_t &offset2) const { yee2cent_offsets(c, offset1, offset2); offset1 = -offset1; offset2 = -offset2; @@ -405,18 +405,18 @@ bool grid_volume::get_boundary_icorners(component c, int ib, LOOP_OVER_DIRECTIONS(dim, d) { if (cl.in_direction(d) < clo.in_direction(d)) { if (jb == ib) { - ce->set_direction(d, cs->in_direction(d)); - ib_found = true; - break; + ce->set_direction(d, cs->in_direction(d)); + ib_found = true; + break; } cs->set_direction(d, clo.in_direction(d)); jb++; } if (cb.in_direction(d) > cbo.in_direction(d)) { if (jb == ib) { - cs->set_direction(d, ce->in_direction(d)); - ib_found = true; - break; + cs->set_direction(d, ce->in_direction(d)); + ib_found = true; + break; } ce->set_direction(d, cbo.in_direction(d)); jb++; @@ -437,8 +437,8 @@ ivec grid_volume::little_owned_corner(component c) const { return iloc; } -int grid_volume::nowned(component c) const { - int n = 1; +size_t grid_volume::nowned(component c) const { + size_t n = 1; ivec pt = big_corner() - little_owned_corner(c); LOOP_OVER_DIRECTIONS(dim, d) n *= pt.in_direction(d) / 2 + 1; return n; @@ -480,9 +480,9 @@ int grid_volume::has_boundary(boundary_side b,direction d) const { return 0; // This should never be reached. } -int grid_volume::index(component c, const ivec &p) const { +ptrdiff_t grid_volume::index(component c, const ivec &p) const { const ivec offset = p - io - iyee_shift(c); - int idx = 0; + ptrdiff_t idx = 0; LOOP_OVER_DIRECTIONS(dim,d) idx += offset.in_direction(d)/2*stride(d); return idx; } @@ -493,14 +493,14 @@ void grid_volume::set_strides() { switch(d) { case Z: the_stride[d] = 1; break; case R: the_stride[d] = nz()+1; break; - case X: the_stride[d] = (nz()+1)*(ny() + 1); break; + case X: the_stride[d] = ptrdiff_t(nz()+1)*(ny() + 1); break; case Y: the_stride[d] = nz() + 1; break; case P: break; // There is no phi stride... case NO_DIRECTION: break; // no stride here, either } } -static inline void stupidsort(int *ind, double *w, int l) { +static inline void stupidsort(ptrdiff_t *ind, double *w, int l) { while (l) { if (fabs(w[0]) < 2e-15) { w[0] = w[l-1]; @@ -531,7 +531,7 @@ static inline void stupidsort(ivec *locs, double *w, int l) { } void grid_volume::interpolate(component c, const vec &p, - int indices[8], double weights[8]) const { + ptrdiff_t indices[8], double weights[8]) const { ivec locs[8]; interpolate(c, p, locs, weights); for (int i=0;i<8&&weights[i];i++) @@ -552,7 +552,7 @@ void grid_volume::interpolate(component c, const vec &p, } // Throw out out of range indices: for (int i=0;i<8&&weights[i];i++) - if (indices[0] < 0 || indices[0] >= ntot()) weights[i] = 0.0; + if (indices[0] < 0 || size_t(indices[0]) >= ntot()) weights[i] = 0.0; // Stupid very crude code to compactify arrays: stupidsort(indices, weights, 8); if (!contains(p) && weights[0]) { @@ -565,7 +565,7 @@ void grid_volume::interpolate(component c, const vec &p, } void grid_volume::interpolate(component c, const vec &pc, - ivec locs[8], double weights[8]) const { + ivec locs[8], double weights[8]) const { const double SMALL = 1e-13; const vec p = (pc - yee_shift(c))*a; ivec middle(dim); @@ -614,7 +614,7 @@ void grid_volume::interpolate(component c, const vec &pc, if (all_same) { int num_weights = 0; for (int i=0;i<8&&weights[i];i++) num_weights++; - for (int i=0;i<8&&weights[i];i++) weights[i] = 1.0/num_weights; + for (int i=0;i<8&&weights[i];i++) weights[i] = 1.0/num_weights; } } @@ -642,7 +642,7 @@ volume grid_volume::dV(const ivec &here, double diameter) const { return out; } -volume grid_volume::dV(component c, int ind) const { +volume grid_volume::dV(component c, ptrdiff_t ind) const { if (!owns(iloc(c, ind))) return empty_volume(dim); return dV(iloc(c,ind)); } @@ -734,7 +734,7 @@ ivec grid_volume::big_corner() const { return ivec(0); // This is never reached. } -vec grid_volume::corner(boundary_side b) const { +vec grid_volume::corner(boundary_side b) const { if (b == Low) return origin; // Low corner vec tmp = origin; LOOP_OVER_DIRECTIONS(dim, d) @@ -744,9 +744,9 @@ vec grid_volume::corner(boundary_side b) const { void grid_volume::print() const { LOOP_OVER_DIRECTIONS(dim, d) - printf("%s =%5g - %5g (%5g) \t", - direction_name(d), origin.in_direction(d), - origin.in_direction(d)+num_direction(d)/a, num_direction(d)/a); + printf("%s =%5g - %5g (%5g) \t", + direction_name(d), origin.in_direction(d), + origin.in_direction(d)+num_direction(d)/a, num_direction(d)/a); printf("\n"); } @@ -795,7 +795,7 @@ bool grid_volume::intersect_with(const grid_volume &vol_in, grid_volume *interse other.shift_origin(d, (vol_containing.num_direction(d) - thick)*2); others[counter] = other; counter++; - vol_containing.set_num_direction(d, vol_containing.num_direction(d) + vol_containing.set_num_direction(d, vol_containing.num_direction(d) - thick); if (vol_containing.big_corner().in_direction(d) < vol_in.big_corner().in_direction(d)) @@ -803,11 +803,11 @@ bool grid_volume::intersect_with(const grid_volume &vol_in, grid_volume *interse } } *num_others = counter; - - int initial_points = 1; + + size_t initial_points = 1; LOOP_OVER_DIRECTIONS(dim, d) initial_points *= num_direction(d); - int final_points , temp = 1; - LOOP_OVER_DIRECTIONS(dim, d) temp *= intersection->num_direction(d); + size_t final_points , temp = 1; + LOOP_OVER_DIRECTIONS(dim, d) temp *= intersection->num_direction(d); final_points = temp; for (int j=0; j<*num_others; j++) { temp = 1; @@ -815,13 +815,13 @@ bool grid_volume::intersect_with(const grid_volume &vol_in, grid_volume *interse final_points += temp; } if (initial_points != final_points) - abort("intersect_with: initial_points != final_points, %d, %d\n", - initial_points, final_points); + abort("intersect_with: initial_points != final_points, %zd, %zd\n", + initial_points, final_points); } return true; } -vec grid_volume::loc_at_resolution(int index, double res) const { +vec grid_volume::loc_at_resolution(ptrdiff_t index, double res) const { vec where = origin; for (int dd=X;dd<=R;dd++) { const direction d = (direction) dd; @@ -836,25 +836,25 @@ vec grid_volume::loc_at_resolution(int index, double res) const { return where; } -int grid_volume::ntot_at_resolution(double res) const { - int mytot = 1; +size_t grid_volume::ntot_at_resolution(double res) const { + size_t mytot = 1; for (int d=X;d<=R;d++) if (has_boundary(High,(direction)d)) { const double dist = boundary_location(High,(direction)d) - boundary_location(Low,(direction)d); - mytot *= max(1,(int)(dist*res+0.5)); + mytot *= max(size_t(1),(size_t)(dist*res+0.5)); } return mytot; } -vec grid_volume::loc(component c, int ind) const { +vec grid_volume::loc(component c, ptrdiff_t ind) const { return operator[](iloc(c,ind)); } -ivec grid_volume::iloc(component c, int ind) const { +ivec grid_volume::iloc(component c, ptrdiff_t ind) const { ivec out(dim); LOOP_OVER_DIRECTIONS(dim,d) { - int ind_over_stride = ind/stride(d); + ptrdiff_t ind_over_stride = ind/stride(d); while (ind_over_stride < 0) ind_over_stride += num_direction(d)+1; out.set_direction(d, 2*(ind_over_stride%(num_direction(d)+1))); } @@ -925,9 +925,9 @@ grid_volume volcyl(double rsize, double zsize, double a) { else return grid_volume(Dcyl, a, (int) (rsize*a + 0.5), 0, (int) (zsize*a + 0.5)); } -grid_volume grid_volume::split(int n, int which) const { +grid_volume grid_volume::split(size_t n, int which) const { if (n > nowned_min()) - abort("Cannot split %d grid points into %d parts\n", nowned_min(), n); + abort("Cannot split %zd grid points into %zd parts\n", nowned_min(), n); if (n == 1) return *this; // Try to get as close as we can... @@ -942,17 +942,17 @@ grid_volume grid_volume::split(int n, int which) const { } grid_volume grid_volume::split_by_effort(int n, int which, int Ngv, const grid_volume *v, double *effort) const { - const int grid_points_owned = nowned_min(); - if (n > grid_points_owned) - abort("Cannot split %d grid points into %d parts\n", nowned_min(), n); + const size_t grid_points_owned = nowned_min(); + if (size_t(n) > grid_points_owned) + abort("Cannot split %zd grid points into %d parts\n", nowned_min(), n); if (n == 1) return *this; int biglen = 0; direction splitdir = NO_DIRECTION; - LOOP_OVER_DIRECTIONS(dim, d) if (num_direction(d) > biglen) { biglen = num_direction(d); splitdir = d; } + LOOP_OVER_DIRECTIONS(dim, d) if (num_direction(d) > biglen) { biglen = num_direction(d); splitdir = d; } double best_split_measure = 1e20, left_effort_fraction = 0; int best_split_point = 0; vec corner = zero_vec(dim); - LOOP_OVER_DIRECTIONS(dim, d) corner.set_direction(d, origin.in_direction(d) + num_direction(d)/a); + LOOP_OVER_DIRECTIONS(dim, d) corner.set_direction(d, origin.in_direction(d) + num_direction(d)/a); for (int split_point = 1; split_point < biglen; split_point+=1) { grid_volume v_left = *this; @@ -983,11 +983,11 @@ grid_volume grid_volume::split_by_effort(int n, int which, int Ngv, const grid_v } } const int split_point = best_split_point; - - const int num_low = (int)(left_effort_fraction *n + 0.5); + + const int num_low = (size_t)(left_effort_fraction *n + 0.5); // Revert to split() when effort method gives less grid points than chunks - if (num_low > best_split_point*(grid_points_owned/biglen) || - (n-num_low) > (grid_points_owned - best_split_point*(grid_points_owned/biglen))) + if (size_t(num_low) > best_split_point*(grid_points_owned/biglen) || + size_t(n-num_low) > (grid_points_owned - best_split_point*(grid_points_owned/biglen))) return split(n, which); if (which < num_low) @@ -1027,7 +1027,7 @@ grid_volume grid_volume::split_at_fraction(bool want_high, int numer) const { grid_volume grid_volume::halve(direction d) const { grid_volume retval(*this); // note that icenter-io is always even by construction of grid_volume::icenter - retval.set_num_direction(d, (icenter().in_direction(d) + retval.set_num_direction(d, (icenter().in_direction(d) - io.in_direction(d)) / 2); return retval; } @@ -1364,7 +1364,7 @@ volume_list *symmetry::reduce(const volume_list *gl) const { } } if (sn == multiplicity() && g->weight != 0.0) { // no match, add to glnew - volume_list *gn = + volume_list *gn = new volume_list(g->v, g->c, g->weight, glnew); glnew = gn; } @@ -1377,7 +1377,7 @@ volume_list *symmetry::reduce(const volume_list *gl) const { bool halve[5] = {false,false,false,false,false}; complex weight = g->weight; for (int sn = 1; sn < multiplicity(); ++sn) - if (g->c == transform(g->c, sn) && + if (g->c == transform(g->c, sn) && g->v.round_float() == transform(g->v, sn).round_float()) { LOOP_OVER_DIRECTIONS(g->v.dim, d) if (transform(d,sn).flipped) { @@ -1390,7 +1390,7 @@ volume_list *symmetry::reduce(const volume_list *gl) const { if (halve[d]) g->v.set_direction_max(d, g->v.in_direction_min(d) + 0.5 * g->v.in_direction(d)); - + // now, delete it if it has zero weight if (g->weight == 0.0) { if (gprev) @@ -1462,7 +1462,7 @@ field_rfunction derived_component_func(derived_component c, const grid_volume &g if (nfields > 12) abort("too many field components"); return energy_fun; - default: + default: abort("unknown derived_component in derived_component_func"); } return 0; diff --git a/tests/h5test.cpp b/tests/h5test.cpp index 00bb79c4a..9b17bce9f 100644 --- a/tests/h5test.cpp +++ b/tests/h5test.cpp @@ -150,7 +150,7 @@ bool check_2d(double eps(const vec &), double a, int splitting, symfunc Sf, for (int i1 = 0; i1 < dims[1]; ++i1) { loc.set_direction(X, loc0.in_direction(X) + i0 * gv.inva); loc.set_direction(Y, loc0.in_direction(Y) + i1 * gv.inva); - int idx = i0 * dims[1] + i1; + ptrdiff_t idx = i0 * dims[1] + i1; /* Ugh, for rotational symmetries (which mix up components etc.), we can't guarantee that a component is *exactly* the @@ -264,7 +264,7 @@ bool check_3d(double eps(const vec &), double a, int splitting, symfunc Sf, loc.set_direction(X, loc0.in_direction(X) + i0 * gv.inva); loc.set_direction(Y, loc0.in_direction(Y) + i1 * gv.inva); loc.set_direction(Z, loc0.in_direction(Z) + i2 * gv.inva); - int idx = (i0 * dims[1] + i1) * dims[2] + i2; + ptrdiff_t idx = (i0 * dims[1] + i1) * dims[2] + i2; /* Ugh, for rotational symmetries (which mix up components etc.), we can't guarantee that a component is *exactly* the