From 508af127335a5d39e04e1f2edfa9de02e80828a2 Mon Sep 17 00:00:00 2001
From: Simon Heybrock <simon.heybrock@ess.eu>
Date: Mon, 12 Feb 2024 12:35:03 +0100
Subject: [PATCH 1/9] Optimize spectra merge and normalization

This adds the following optimizations:

- Bypass some multi-threading shortcomings of DataArray.bin
- Avoid bins.concat where possible
- Use in-place normalization

I have tried this on a 10 GByte file and it shaves off several seconds
of runtime when returning event data.
---
 src/esssans/i_of_q.py        | 9 ++++++++-
 src/esssans/normalization.py | 5 ++++-
 2 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/src/esssans/i_of_q.py b/src/esssans/i_of_q.py
index 69ee1670..0bc1321c 100644
--- a/src/esssans/i_of_q.py
+++ b/src/esssans/i_of_q.py
@@ -171,7 +171,14 @@ def merge_spectra(
 
     if data.bins is not None:
         q_all_pixels = data.bins.concat(dims_to_reduce)
-        out = q_all_pixels.bin(**edges)
+        # q_all_pixels may just have a single bin now, which currently yields
+        # inferior performance when binning (no/bad multi-threading?).
+        # We operate on the content buffer for better multi-threaded performance.
+        if len(q_all_pixels.masks) == 0:
+            content = q_all_pixels.bins.constituents['data']
+            out = content.bin(**edges).assign_coords(q_all_pixels.coords)
+        else:
+            out = q_all_pixels.bin(**edges)
     else:
         # We want to flatten data to make histogramming cheaper (avoiding allocation of
         # large output before summing). We strip unnecessary content since it makes
diff --git a/src/esssans/normalization.py b/src/esssans/normalization.py
index d476abbc..1e212ee3 100644
--- a/src/esssans/normalization.py
+++ b/src/esssans/normalization.py
@@ -379,6 +379,8 @@ def normalize(
         numerator = numerator.bin(wavelength=wavelength_bounds)
 
     def _reduce(da: sc.DataArray) -> sc.DataArray:
+        if da.sizes[wav] == 1:  # Can avoid costly event-data da.bins.concat
+            return da.squeeze(wav)
         return da.sum(wav) if da.bins is None else da.bins.concat(wav)
 
     num_parts = []
@@ -408,7 +410,8 @@ def _reduce(da: sc.DataArray) -> sc.DataArray:
                 )
     elif numerator.bins is not None:
         numerator = numerator.hist()
-    return IofQ[ScatteringRunType](numerator / denominator)
+    numerator /= denominator
+    return IofQ[ScatteringRunType](numerator)
 
 
 providers = (

From 334f71917f9559041dac84f25a5c87e8a6e792fa Mon Sep 17 00:00:00 2001
From: Simon Heybrock <simon.heybrock@ess.eu>
Date: Mon, 12 Feb 2024 14:59:27 +0100
Subject: [PATCH 2/9] Do not keep TOF

---
 src/esssans/conversions.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/esssans/conversions.py b/src/esssans/conversions.py
index bcf4eda9..b8b54eb6 100644
--- a/src/esssans/conversions.py
+++ b/src/esssans/conversions.py
@@ -204,7 +204,7 @@ def monitor_to_wavelength(
     monitor: RawMonitor[RunType, MonitorType], graph: MonitorCoordTransformGraph
 ) -> WavelengthMonitor[RunType, MonitorType]:
     return WavelengthMonitor[RunType, MonitorType](
-        monitor.transform_coords('wavelength', graph=graph)
+        monitor.transform_coords('wavelength', graph=graph, keep_inputs=False)
     )
 
 
@@ -229,7 +229,7 @@ def detector_to_wavelength(
     graph: ElasticCoordTransformGraph,
 ) -> CleanWavelength[ScatteringRunType, Numerator]:
     return CleanWavelength[ScatteringRunType, Numerator](
-        detector.transform_coords('wavelength', graph=graph)
+        detector.transform_coords('wavelength', graph=graph, keep_inputs=False)
     )
 
 

From 50bd8db9e8a387207a9d2985fb6aa0fe8fac9766 Mon Sep 17 00:00:00 2001
From: Simon Heybrock <simon.heybrock@ess.eu>
Date: Mon, 12 Feb 2024 14:59:38 +0100
Subject: [PATCH 3/9] Fix condition

---
 src/esssans/i_of_q.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/esssans/i_of_q.py b/src/esssans/i_of_q.py
index 0bc1321c..937fe5b4 100644
--- a/src/esssans/i_of_q.py
+++ b/src/esssans/i_of_q.py
@@ -174,7 +174,7 @@ def merge_spectra(
         # q_all_pixels may just have a single bin now, which currently yields
         # inferior performance when binning (no/bad multi-threading?).
         # We operate on the content buffer for better multi-threaded performance.
-        if len(q_all_pixels.masks) == 0:
+        if q_all_pixels.ndim == 0:
             content = q_all_pixels.bins.constituents['data']
             out = content.bin(**edges).assign_coords(q_all_pixels.coords)
         else:

From c8a3fdfb15ede180faef040a5a2c4d409ee054d3 Mon Sep 17 00:00:00 2001
From: Simon Heybrock <simon.heybrock@ess.eu>
Date: Mon, 12 Feb 2024 14:59:51 +0100
Subject: [PATCH 4/9] Avoid issues from coords added by workaround scipp.hist
 limitation

---
 src/esssans/i_of_q.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/esssans/i_of_q.py b/src/esssans/i_of_q.py
index 937fe5b4..62b5df72 100644
--- a/src/esssans/i_of_q.py
+++ b/src/esssans/i_of_q.py
@@ -216,6 +216,7 @@ def merge_spectra(
             out = (
                 flat.flatten(to=str(uuid.uuid4()))
                 .group(*[flat.coords[dim] for dim in flat.dims if dim != helper_dim])
+                .drop_coords(dims_to_keep)
                 .hist(**edges)
             )
     return CleanSummedQ[ScatteringRunType, IofQPart](out.squeeze())

From 7d5fe429c8a7b86a8512d84e971190677546a4de Mon Sep 17 00:00:00 2001
From: Simon Heybrock <simon.heybrock@ess.eu>
Date: Mon, 12 Feb 2024 15:00:31 +0100
Subject: [PATCH 5/9] Use 64 bit weights

---
 src/esssans/loki/io.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/esssans/loki/io.py b/src/esssans/loki/io.py
index b1177025..547dae96 100644
--- a/src/esssans/loki/io.py
+++ b/src/esssans/loki/io.py
@@ -38,6 +38,8 @@ def _patch_data(
 ) -> sc.DataArray:
     out = da.copy(deep=False)
     if out.bins is not None:
+        # Currently ScippNexus adds 32 bit event weights, this may cause trouble.
+        out.bins.data = out.bins.data.to(dtype='float64', copy=False)
         content = out.bins.constituents['data']
         if content.variances is None:
             content.variances = content.values

From c245b4a8c79257c26012bfbc397252b4dd1d0652 Mon Sep 17 00:00:00 2001
From: Simon Heybrock <simon.heybrock@ess.eu>
Date: Mon, 12 Feb 2024 15:12:11 +0100
Subject: [PATCH 6/9] Drop aux coords

---
 src/esssans/normalization.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/esssans/normalization.py b/src/esssans/normalization.py
index 1e212ee3..eeccb07a 100644
--- a/src/esssans/normalization.py
+++ b/src/esssans/normalization.py
@@ -410,7 +410,9 @@ def _reduce(da: sc.DataArray) -> sc.DataArray:
                 )
     elif numerator.bins is not None:
         numerator = numerator.hist()
-    numerator /= denominator
+    numerator /= denominator.drop_coords(
+        [name for name in denominator.coords if name not in denominator.dims]
+    )
     return IofQ[ScatteringRunType](numerator)
 
 

From 081cf836c39d21f921d4cadb832795e7e1fefa06 Mon Sep 17 00:00:00 2001
From: Simon Heybrock <simon.heybrock@ess.eu>
Date: Mon, 12 Feb 2024 15:14:04 +0100
Subject: [PATCH 7/9] Fix drop_coords call

---
 src/esssans/i_of_q.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/esssans/i_of_q.py b/src/esssans/i_of_q.py
index 62b5df72..0ce3c1bb 100644
--- a/src/esssans/i_of_q.py
+++ b/src/esssans/i_of_q.py
@@ -216,7 +216,7 @@ def merge_spectra(
             out = (
                 flat.flatten(to=str(uuid.uuid4()))
                 .group(*[flat.coords[dim] for dim in flat.dims if dim != helper_dim])
-                .drop_coords(dims_to_keep)
+                .drop_coords(dims_to_keep or ())
                 .hist(**edges)
             )
     return CleanSummedQ[ScatteringRunType, IofQPart](out.squeeze())

From 00d6d25ced518b242974fa43dc081f36e22df7b5 Mon Sep 17 00:00:00 2001
From: Simon Heybrock <simon.heybrock@ess.eu>
Date: Tue, 13 Feb 2024 05:38:09 +0100
Subject: [PATCH 8/9] Simpler and faster event-mode upper-bound broadcast

Fixes #84.

For 1e9 events this reduces the time for the call to this function from
10 seconds to 1 second.
---
 src/esssans/uncertainty.py | 29 +++++++----------------------
 1 file changed, 7 insertions(+), 22 deletions(-)

diff --git a/src/esssans/uncertainty.py b/src/esssans/uncertainty.py
index 7e3346fe..a691e19a 100644
--- a/src/esssans/uncertainty.py
+++ b/src/esssans/uncertainty.py
@@ -67,27 +67,12 @@ def broadcast_to_events_with_upper_bound_variances(
     """
     if da.variances is None:
         return da
-    constituents = events.bins.constituents
-
-    if 'Q' in constituents['data'].coords:
-        Q = constituents['data'].coords['Q']
-        constituents['data'] = sc.DataArray(
-            sc.ones(sizes=Q.sizes, dtype='float32'), coords={'Q': Q}
-        )
-        edges = {'Q': da.coords['Q']}
-    else:
-        Qx = constituents['data'].coords['Qx']
-        Qy = constituents['data'].coords['Qy']
-        constituents['data'] = sc.DataArray(
-            sc.ones(sizes=Qx.sizes, dtype='float32'),
-            coords={'Qx': Qx, 'Qy': Qy},
-        )
-        edges = {'Qy': da.coords['Qy'], 'Qx': da.coords['Qx']}
-    # Combine all bins of the events that correspond to the same bin in the input data
-    to_concat = set(events.dims) - set(da.dims)
-    binned = sc.DataArray(sc.bins(**constituents).bins.concat(to_concat))
-    counts = binned.hist(**edges)
-    da = da.copy()
-    da.variances *= counts.values
+    if da.sizes != events.sizes:
+        # This is a safety check, but we should never get here.
+        raise ValueError(f"Sizes {da.sizes} do not match event sizes {events.sizes}")
+    # Given how this function is used currently (in the context of normalization
+    # with matching binning in numerator and denominator, not using scipp.lookup),
+    # we can simply count the events in the existing binning.
+    da.variances *= events.bins.size().values
     da.data = sc.bins_like(events, da.data)
     return da

From fdd1b514b1b365ad75384cf163d97c1ce62a66f8 Mon Sep 17 00:00:00 2001
From: Simon Heybrock <simon.heybrock@ess.eu>
Date: Thu, 15 Feb 2024 08:22:11 +0100
Subject: [PATCH 9/9] Revert switch to float64 and update tests

---
 src/esssans/loki/io.py  |  2 --
 tests/loki/iofq_test.py | 12 ++++++++----
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/src/esssans/loki/io.py b/src/esssans/loki/io.py
index 547dae96..b1177025 100644
--- a/src/esssans/loki/io.py
+++ b/src/esssans/loki/io.py
@@ -38,8 +38,6 @@ def _patch_data(
 ) -> sc.DataArray:
     out = da.copy(deep=False)
     if out.bins is not None:
-        # Currently ScippNexus adds 32 bit event weights, this may cause trouble.
-        out.bins.data = out.bins.data.to(dtype='float64', copy=False)
         content = out.bins.constituents['data']
         if content.variances is None:
             content.variances = content.values
diff --git a/tests/loki/iofq_test.py b/tests/loki/iofq_test.py
index 503fd236..db11c55f 100644
--- a/tests/loki/iofq_test.py
+++ b/tests/loki/iofq_test.py
@@ -81,13 +81,17 @@ def test_pipeline_can_compute_IofQ_in_event_mode(uncertainties, target, qxy: boo
     assert sc.allclose(
         sc.values(reference.data),
         sc.values(result.hist().data),
-        rtol=sc.scalar(1e-11),
-        atol=sc.scalar(1e-11),
+        # Could be 1e-11, but currently the workflow defaults to float32 data, as
+        # returned by ScippNexus.
+        rtol=sc.scalar(1e-7),
+        atol=sc.scalar(1e-7),
     )
     if uncertainties == UncertaintyBroadcastMode.drop:
-        tol = sc.scalar(1e-14)
+        # Could both be 1e-14 if using float64
+        tol = sc.scalar(1e-7 if qxy else 1e-10)
     else:
-        tol = sc.scalar(1e-8 if qxy else 1e-9)
+        # Could be 1e-8 if using float64
+        tol = sc.scalar(1e-7 if qxy else 1e-9)
     assert sc.allclose(
         sc.variances(reference).data,
         sc.variances(result.hist()).data,