Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimize spectra merge and normalization #81

Merged
merged 12 commits into from
Feb 15, 2024
4 changes: 2 additions & 2 deletions src/esssans/conversions.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,7 @@ def monitor_to_wavelength(
monitor: CalibratedMonitor[RunType, MonitorType], graph: MonitorCoordTransformGraph
) -> WavelengthMonitor[RunType, MonitorType]:
return WavelengthMonitor[RunType, MonitorType](
monitor.transform_coords('wavelength', graph=graph)
monitor.transform_coords('wavelength', graph=graph, keep_inputs=False)
)


Expand All @@ -229,7 +229,7 @@ def detector_to_wavelength(
graph: ElasticCoordTransformGraph,
) -> CleanWavelength[ScatteringRunType, Numerator]:
return CleanWavelength[ScatteringRunType, Numerator](
detector.transform_coords('wavelength', graph=graph)
detector.transform_coords('wavelength', graph=graph, keep_inputs=False)
)


Expand Down
10 changes: 9 additions & 1 deletion src/esssans/i_of_q.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,14 @@ def merge_spectra(

if data.bins is not None:
q_all_pixels = data.bins.concat(dims_to_reduce)
out = q_all_pixels.bin(**edges)
# q_all_pixels may just have a single bin now, which currently yields
# inferior performance when binning (no/bad multi-threading?).
# We operate on the content buffer for better multi-threaded performance.
if q_all_pixels.ndim == 0:
content = q_all_pixels.bins.constituents['data']
out = content.bin(**edges).assign_coords(q_all_pixels.coords)
else:
out = q_all_pixels.bin(**edges)
else:
# We want to flatten data to make histogramming cheaper (avoiding allocation of
# large output before summing). We strip unnecessary content since it makes
Expand Down Expand Up @@ -209,6 +216,7 @@ def merge_spectra(
out = (
flat.flatten(to=str(uuid.uuid4()))
.group(*[flat.coords[dim] for dim in flat.dims if dim != helper_dim])
.drop_coords(dims_to_keep or ())
.hist(**edges)
)
return CleanSummedQ[ScatteringRunType, IofQPart](out.squeeze())
Expand Down
7 changes: 6 additions & 1 deletion src/esssans/normalization.py
Original file line number Diff line number Diff line change
Expand Up @@ -379,6 +379,8 @@ def normalize(
numerator = numerator.bin(wavelength=wavelength_bounds)

def _reduce(da: sc.DataArray) -> sc.DataArray:
if da.sizes[wav] == 1: # Can avoid costly event-data da.bins.concat
return da.squeeze(wav)
return da.sum(wav) if da.bins is None else da.bins.concat(wav)

num_parts = []
Expand Down Expand Up @@ -408,7 +410,10 @@ def _reduce(da: sc.DataArray) -> sc.DataArray:
)
elif numerator.bins is not None:
numerator = numerator.hist()
return IofQ[ScatteringRunType](numerator / denominator)
numerator /= denominator.drop_coords(
[name for name in denominator.coords if name not in denominator.dims]
)
return IofQ[ScatteringRunType](numerator)


providers = (
Expand Down
29 changes: 7 additions & 22 deletions src/esssans/uncertainty.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,27 +67,12 @@ def broadcast_to_events_with_upper_bound_variances(
"""
if da.variances is None:
return da
constituents = events.bins.constituents

if 'Q' in constituents['data'].coords:
Q = constituents['data'].coords['Q']
constituents['data'] = sc.DataArray(
sc.ones(sizes=Q.sizes, dtype='float32'), coords={'Q': Q}
)
edges = {'Q': da.coords['Q']}
else:
Qx = constituents['data'].coords['Qx']
Qy = constituents['data'].coords['Qy']
constituents['data'] = sc.DataArray(
sc.ones(sizes=Qx.sizes, dtype='float32'),
coords={'Qx': Qx, 'Qy': Qy},
)
edges = {'Qy': da.coords['Qy'], 'Qx': da.coords['Qx']}
# Combine all bins of the events that correspond to the same bin in the input data
to_concat = set(events.dims) - set(da.dims)
binned = sc.DataArray(sc.bins(**constituents).bins.concat(to_concat))
counts = binned.hist(**edges)
da = da.copy()
da.variances *= counts.values
if da.sizes != events.sizes:
# This is a safety check, but we should never get here.
raise ValueError(f"Sizes {da.sizes} do not match event sizes {events.sizes}")
# Given how this function is used currently (in the context of normalization
# with matching binning in numerator and denominator, not using scipp.lookup),
# we can simply count the events in the existing binning.
da.variances *= events.bins.size().values
da.data = sc.bins_like(events, da.data)
return da
12 changes: 8 additions & 4 deletions tests/loki/iofq_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,13 +81,17 @@ def test_pipeline_can_compute_IofQ_in_event_mode(uncertainties, target, qxy: boo
assert sc.allclose(
sc.values(reference.data),
sc.values(result.hist().data),
rtol=sc.scalar(1e-11),
atol=sc.scalar(1e-11),
# Could be 1e-11, but currently the workflow defaults to float32 data, as
# returned by ScippNexus.
rtol=sc.scalar(1e-7),
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure I follow anymore. So the changes you made here have degraded the accuracy of the results?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, because I change numerator / denominator to numerator /= denominator. So previously we ended up converting to float64 in that step.

atol=sc.scalar(1e-7),
)
if uncertainties == UncertaintyBroadcastMode.drop:
tol = sc.scalar(1e-14)
# Could both be 1e-14 if using float64
tol = sc.scalar(1e-7 if qxy else 1e-10)
else:
tol = sc.scalar(1e-8 if qxy else 1e-9)
# Could be 1e-8 if using float64
tol = sc.scalar(1e-7 if qxy else 1e-9)
assert sc.allclose(
sc.variances(reference).data,
sc.variances(result.hist()).data,
Expand Down
Loading