Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix uncertainty calculation in StereoMeanCombiner #2658

Merged
merged 8 commits into from
Dec 3, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions docs/api-reference/vectorization/index.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
.. _vectorization:

*******************************************************************************
Helper Functions for Vectorized Operations on Tables (`~ctapipe.vectorization`)
*******************************************************************************

.. currentmodule:: ctapipe.vectorization

This module contains helper functions for performing vectorized operations
on tables, as used in e.g. :ref:`reco_stereo_combination`.


Reference/API
=============

.. automodapi:: ctapipe.vectorization
:no-inheritance-diagram:

.. automodapi:: ctapipe.vectorization.aggregate
:no-inheritance-diagram:
2 changes: 2 additions & 0 deletions docs/changes/2658.bugfix.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Fix ``<reconstruction_property>_uncert`` calculations in ``ctapipe.reco.StereoMeanCombiner``.
Add helper functions for vectorized numpy calculations as new ``ctapipe.vectorization`` module.
maxnoe marked this conversation as resolved.
Show resolved Hide resolved
117 changes: 48 additions & 69 deletions src/ctapipe/reco/stereo_combination.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
ReconstructedEnergyContainer,
ReconstructedGeometryContainer,
)
from ..vectorization import get_subarray_index, weighted_mean_std_ufunc
from .utils import add_defaults_and_meta

_containers = {
Expand All @@ -31,38 +32,6 @@
]


def _grouped_add(tel_data, n_array_events, indices):
"""
Calculate the group-wise sum for each array event over the
corresponding telescope events. ``indices`` is an array
that gives the index of the subarray event for each telescope event,
returned by
``np.unique(tel_events[["obs_id", "event_id"]], return_inverse=True)``
"""
combined_values = np.zeros(n_array_events)
np.add.at(combined_values, indices, tel_data)
return combined_values


def _weighted_mean_ufunc(tel_values, weights, n_array_events, indices):
# avoid numerical problems by very large or small weights
weights = weights / weights.max()
sum_prediction = _grouped_add(
tel_values * weights,
n_array_events,
indices,
)
sum_of_weights = _grouped_add(
weights,
n_array_events,
indices,
)
mean = np.full(n_array_events, np.nan)
valid = sum_of_weights > 0
mean[valid] = sum_prediction[valid] / sum_of_weights[valid]
return mean


class StereoCombiner(Component):
"""
Base Class for algorithms combining telescope-wise predictions to common prediction.
Expand Down Expand Up @@ -299,26 +268,27 @@ def predict_table(self, mono_predictions: Table) -> Table:
prefix = f"{self.prefix}_tel"
# TODO: Integrate table quality query once its done
valid = mono_predictions[f"{prefix}_is_valid"]
valid_predictions = mono_predictions[valid]

array_events, indices, multiplicity = np.unique(
mono_predictions[["obs_id", "event_id"]],
return_inverse=True,
return_counts=True,
obs_ids, event_ids, multiplicity, tel_to_array_indices = get_subarray_index(
mono_predictions
)
stereo_table = Table(array_events)
n_array_events = len(obs_ids)
stereo_table = Table({"obs_id": obs_ids, "event_id": event_ids})
# copy metadata
for colname in ("obs_id", "event_id"):
stereo_table[colname].description = mono_predictions[colname].description

n_array_events = len(array_events)
weights = self._calculate_weights(valid_predictions)
weights = self._calculate_weights(mono_predictions[valid])

if self.property is ReconstructionProperty.PARTICLE_TYPE:
if len(valid_predictions) > 0:
mono_predictions = valid_predictions[f"{prefix}_prediction"]
stereo_predictions = _weighted_mean_ufunc(
mono_predictions, weights, n_array_events, indices[valid]
if np.sum(valid) > 0:
maxnoe marked this conversation as resolved.
Show resolved Hide resolved
stereo_predictions, _ = weighted_mean_std_ufunc(
mono_predictions[f"{prefix}_prediction"],
valid,
n_array_events,
tel_to_array_indices,
multiplicity,
weights=weights,
)
else:
stereo_predictions = np.full(n_array_events, np.nan)
Expand All @@ -328,29 +298,21 @@ def predict_table(self, mono_predictions: Table) -> Table:
stereo_table[f"{self.prefix}_goodness_of_fit"] = np.nan

elif self.property is ReconstructionProperty.ENERGY:
if len(valid_predictions) > 0:
mono_energies = valid_predictions[f"{prefix}_energy"].quantity.to_value(
if np.sum(valid) > 0:
maxnoe marked this conversation as resolved.
Show resolved Hide resolved
mono_energies = mono_predictions[f"{prefix}_energy"].quantity.to_value(
u.TeV
)

if self.log_target:
mono_energies = np.log(mono_energies)

stereo_energy = _weighted_mean_ufunc(
stereo_energy, std = weighted_mean_std_ufunc(
mono_energies,
weights,
valid,
n_array_events,
indices[valid],
tel_to_array_indices,
multiplicity,
weights=weights,
)
variance = _weighted_mean_ufunc(
(mono_energies - np.repeat(stereo_energy, multiplicity)[valid])
** 2,
weights,
n_array_events,
indices[valid],
)
std = np.sqrt(variance)

if self.log_target:
stereo_energy = np.exp(stereo_energy)
std = np.exp(std)
Expand All @@ -369,22 +331,37 @@ def predict_table(self, mono_predictions: Table) -> Table:
stereo_table[f"{self.prefix}_goodness_of_fit"] = np.nan

elif self.property is ReconstructionProperty.GEOMETRY:
if len(valid_predictions) > 0:
if np.sum(valid) > 0:
maxnoe marked this conversation as resolved.
Show resolved Hide resolved
coord = AltAz(
alt=valid_predictions[f"{prefix}_alt"],
az=valid_predictions[f"{prefix}_az"],
alt=mono_predictions[f"{prefix}_alt"],
az=mono_predictions[f"{prefix}_az"],
)
# https://en.wikipedia.org/wiki/Von_Mises%E2%80%93Fisher_distribution#Mean_direction
mono_x, mono_y, mono_z = coord.cartesian.get_xyz()

stereo_x = _weighted_mean_ufunc(
mono_x, weights, n_array_events, indices[valid]
stereo_x, _ = weighted_mean_std_ufunc(
mono_x,
valid,
n_array_events,
tel_to_array_indices,
multiplicity,
weights=weights,
)
stereo_y = _weighted_mean_ufunc(
mono_y, weights, n_array_events, indices[valid]
stereo_y, _ = weighted_mean_std_ufunc(
mono_y,
valid,
n_array_events,
tel_to_array_indices,
multiplicity,
weights=weights,
)
stereo_z = _weighted_mean_ufunc(
mono_z, weights, n_array_events, indices[valid]
stereo_z, _ = weighted_mean_std_ufunc(
mono_z,
valid,
n_array_events,
tel_to_array_indices,
multiplicity,
weights=weights,
)

mean_cartesian = CartesianRepresentation(
Expand Down Expand Up @@ -425,7 +402,9 @@ def predict_table(self, mono_predictions: Table) -> Table:

tel_ids = [[] for _ in range(n_array_events)]

for index, tel_id in zip(indices[valid], valid_predictions["tel_id"]):
for index, tel_id in zip(
tel_to_array_indices[valid], mono_predictions["tel_id"][valid]
):
tel_ids[index].append(tel_id)

stereo_table[f"{self.prefix}_telescopes"] = tel_ids
Expand Down
10 changes: 10 additions & 0 deletions src/ctapipe/reco/tests/test_stereo_combination.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,8 +84,18 @@ def test_predict_mean_energy(weights, mono_table):
assert_array_equal(stereo["event_id"], np.array([1, 2, 1]))
if weights == "intensity":
assert_array_equal(stereo["dummy_energy"], [7, 0.5, np.nan] * u.TeV)
assert_allclose(
stereo["dummy_energy_uncert"].quantity,
[4.242641, 0, np.nan] * u.TeV,
atol=1e-7,
)
elif weights == "none":
assert_array_equal(stereo["dummy_energy"], [5, 0.5, np.nan] * u.TeV)
assert_allclose(
stereo["dummy_energy_uncert"].quantity,
[3.741657, 0, np.nan] * u.TeV,
atol=1e-7,
)

assert_array_equal(stereo["dummy_telescopes"][0], np.array([1, 2, 3]))
assert_array_equal(stereo["dummy_telescopes"][1], 5)
Expand Down
34 changes: 19 additions & 15 deletions src/ctapipe/tools/apply_models.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Tool to apply machine learning models in bulk (as opposed to event by event).
"""

import numpy as np
import tables
from astropy.table import Table, join, vstack
Expand All @@ -9,9 +10,8 @@
from ctapipe.core.tool import Tool
from ctapipe.core.traits import Bool, Integer, List, Path, classes_with_traits, flag
from ctapipe.io import HDF5Merger, TableLoader, write_table
from ctapipe.io.astropy_helpers import read_table
from ctapipe.io.astropy_helpers import join_allow_empty, read_table
from ctapipe.io.tableio import TelListToMaskTransform
from ctapipe.io.tableloader import _join_subarray_events
from ctapipe.reco import Reconstructor

__all__ = [
Expand Down Expand Up @@ -218,6 +218,23 @@ def _apply(self, reconstructor, tel_tables, start, stop):
def _combine(self, reconstructor, tel_tables, start, stop):
stereo_table = vstack(list(tel_tables.values()))

# stacking the single telescope tables and joining
# potentially changes the order of the subarray events.
# to ensure events are stored in the correct order,
# we resort to trigger table order
trigger = read_table(
self.h5file, "/dl1/event/subarray/trigger", start=start, stop=stop
)[["obs_id", "event_id"]]
trigger["__sort_index__"] = np.arange(len(trigger))

stereo_table = join_allow_empty(
stereo_table,
trigger,
keys=["obs_id", "event_id"],
join_type="left",
)
stereo_table.sort("__sort_index__")

combiner = reconstructor.stereo_combiner
stereo_predictions = combiner.predict_table(stereo_table)
del stereo_table
Expand All @@ -230,19 +247,6 @@ def _combine(self, reconstructor, tel_tables, start, stop):
stereo_predictions[c.name] = np.array([trafo(r) for r in c])
stereo_predictions[c.name].description = c.description

# stacking the single telescope tables and joining
# potentially changes the order of the subarray events.
# to ensure events are stored in the correct order,
# we resort to trigger table order
trigger = read_table(
self.h5file, "/dl1/event/subarray/trigger", start=start, stop=stop
)[["obs_id", "event_id"]]
trigger["__sort_index__"] = np.arange(len(trigger))

stereo_predictions = _join_subarray_events(trigger, stereo_predictions)
stereo_predictions.sort("__sort_index__")
del stereo_predictions["__sort_index__"]

write_table(
stereo_predictions,
self.output_path,
Expand Down
6 changes: 6 additions & 0 deletions src/ctapipe/vectorization/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from .aggregate import get_subarray_index, weighted_mean_std_ufunc

__all__ = [
"get_subarray_index",
"weighted_mean_std_ufunc",
]
Loading
Loading