Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

signal extractor SlidingWindowMaxSum #1568

Merged
merged 15 commits into from
Feb 11, 2021
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
133 changes: 133 additions & 0 deletions ctapipe/image/extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,12 @@
"FixedWindowSum",
"GlobalPeakWindowSum",
"LocalPeakWindowSum",
"SlidingWindowMaxSum",
"NeighborPeakWindowSum",
"BaselineSubtractedNeighborPeakWindowSum",
"TwoPassWindowSum",
"extract_around_peak",
"extract_sliding_window",
"neighbor_average_waveform",
"subtract_baseline",
"integration_correction",
Expand Down Expand Up @@ -119,6 +121,77 @@ def extract_around_peak(
sum_[0] = i_sum


@guvectorize(
[
(float64[:], int64, float64, float32[:], float32[:]),
(float32[:], int64, float64, float32[:], float32[:]),
],
"(s),(),()->(),()",
nopython=True,
)
def extract_sliding_window(waveforms, width, sampling_rate_ghz, sum_, peak_time):
"""
This function performs the following operations:

- Find the largest sum of width consecutive slices
- Obtain the pulse time within a window defined by a peak finding
algorithm, using the weighted average of the samples.

This function is a numpy universal function which defines the operation
applied on the waveform for every channel and pixel. Therefore in the
code body of this function:
- waveforms is a 1D array of size n_samples.
- width is integer

The ret argument is required by numpy to create the numpy array which is
returned. It can be ignored when calling this function.

Parameters
----------
waveforms : ndarray
Waveforms stored in a numpy array.
Shape: (n_pix, n_samples)
width : ndarray or int
Window size of integration window for each pixel.
sampling_rate_ghz : float
Sampling rate of the camera, in units of GHz
Astropy units should have to_value('GHz') applied before being passed
sum_ : ndarray
Return argument for ufunc (ignore)
Returns the sum of the waveform samples
peak_time : ndarray
Return argument for ufunc (ignore)
Returns the peak_time in units "ns"

Returns
-------
charge : ndarray
Extracted charge.
Shape: (n_pix)

"""

# first find the cumulative waveform
cwf = np.cumsum(waveforms)
# add zero at the begining so it is easier to substract the two arrays later
cwf = np.concatenate((np.zeros(1), cwf))
sums = cwf[width:] - cwf[:-width]
maxpos = np.argmax(sums) # start of the window with largest sum
sum_[0] = sums[maxpos]

time_num = float64(0.0)
time_den = float64(0.0)
# now compute the timing as the average of non negative slices
for isample in prange(maxpos, maxpos + width):
if waveforms[isample] > 0:
time_num += waveforms[isample] * isample
time_den += waveforms[isample]

peak_time[0] = time_num / time_den if time_den > 0 else maxpos + 0.5 * width
# Convert to units of ns
peak_time[0] /= sampling_rate_ghz


@njit(parallel=True, cache=True)
def neighbor_average_waveform(waveforms, neighbors_indices, neighbors_indptr, lwt):
"""
Expand Down Expand Up @@ -524,6 +597,66 @@ def __call__(self, waveforms, telid, selected_gain_channel):
return charge, peak_time


class SlidingWindowMaxSum(ImageExtractor):
"""
Sliding window extractor that maximizes the signal in window_width consecutive slices.
"""

window_width = IntTelescopeParameter(
default_value=7, help="Define the width of the integration window"
).tag(config=True)

apply_integration_correction = BoolTelescopeParameter(
default_value=True, help="Apply the integration window correction"
).tag(config=True)

@lru_cache(maxsize=128)
def _calculate_correction(self, telid):
"""
Calculate the correction for the extracted charge such that the value
returned would equal 1 for a noise-less unit pulse.

This method is decorated with @lru_cache to ensure it is only
calculated once per telescope.

WARNING: TO BE DONE properly, the current code reuses the function of
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you implement this directly here, does not sound to complicated?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the main reason why I did not do so is because this feature does not seem to be used (at least in LST), so I did not have a proper set-up to test it, but I can look into making some dummy pulse shape and testing on it.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

that would be great

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You have the reference pulse shape in the CameraDescription. I guess it's a fairly small effect though, and the correction doesn't really matter much except to get the cleaning thresholds in the same units for all cameras.

Copy link
Contributor Author

@jsitarek jsitarek Feb 1, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry for the delay, I restarted working on this.
CameraDescription.readout is where the code is taking the pulse shape from. However this is not really reliable.
If I execute the following code

import numpy as np
import astropy.units as u
import matplotlib.pyplot as plt
plt.ion()

from ctapipe.instrument import SubarrayDescription, TelescopeDescription

subarray = SubarrayDescription(
        "LST1",
        tel_positions={1: np.zeros(3) * u.m},
        tel_descriptions={
            1: TelescopeDescription.from_name(
                optics_name="LST", camera_name="LSTCam"
            ),
        },
)
pulse_shape=subarray.tel[1].camera.readout.reference_pulse_shape[ dt=subarray.tel[1].camera.readout.reference_pulse_sample_width
xs=np.arange(len(pulse_shape))*dt
plt.plot(xs, pulse_shape)

I get the following figure:
pulse_shape_LST
which is a much broader pulse then it should

The calculation of the correction factor would be much simpler if the pulse shape in this class had the same binning as the actual readout, this is the case in the above example, and one would assume to take it from granted since the shape is taken from the "readout" object, which has the binning embedded, however in the first tests that i was doing in lstchain, when the array was being read from the data the pulse shapes there were actually a delta function with a SSC-like sampling, so obviously it cannot be taken for granted.

I will change the code to use a simple conversion and rounding of sampling to make it work also in this more general case, but the whole issue of the LST pulse shape deserves a separate "issue"

EDIT: I forgot to mention that there seems to be only one reference pulse shape in the CameraDescription, while in reality we should have HG and LG

Copy link
Contributor

@kosack kosack Feb 2, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The from_name() methods load up a file from ctapipe-extra (which is now a directory on a server rather than a package), and are just meant for unit-testing purposes. Currently everything in ctapipe-extra is from PROD3 or even PROD2 simulations, so quite out of date for real analysis. In the future I want to clean that up and have an option to select which "prod" to use, but there has not been manpower for that (see e.g #738 )

If you load real data from a SimTel file or something else supported, the correct waveform should be loaded into the instrument model that you get from source.subarray.

E.g. if you do:

with EventSource("some_prod5_sim.simtel.gz") as source:
     readout  = source.subarray.tel[2].camera.readout
     plt.plot(readout.reference_pulse_sample_time, readout.reference_pulse_sample_width)

You will get the "latest" pulse that is defined in Prod5

Copy link
Contributor

@kosack kosack Feb 2, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note that if you don't want to always load up a file, you can take any reference prod5 file, for example, and run

ctapipe-dump-instrument --input=some_reference_file.simtel.gz

And it will dump a bunch of FITS files including the Camera geometry and readout definitions to the local directory. You can then setenv CTAPIPE_SVC_PATH=[directory where those files are], and ctapipe will use that when you run the from_name() functions instead of the defaults (by default it searches all paths listed in a ":" separated list in in CTAPIPE_SVC_PATH first, then if it doesn't find that, it will download the default file from the dataserver, which as I said are a bit out of date.

Copy link
Contributor

@kosack kosack Feb 2, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Update: I found the actual problem... In fact, the camera readout definition for LSTCam does not even exist in the ctapipe-extra directory on the dataserver. It seems the default behavior is to just return some dummy pulse shape if the file is not found, which has no real meaning (you should see a logger warning message if logging is set up)... Clearly this is not good behavior (I think it was there to prevent tests from failing until we updated the testing files, which never happened).

here is a comparison with prod3b:
image

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is also why you get only 1 reference pulse shape if you use from_name()...
I had already opened an issue about this, but obviously forgot: See #1450

LocalPeakWindowSum assuming that the pulse is symmetric (which normally is not
true). The proper approach would be to apply a similar sliding window over the
reference pulse shape and compute the maximum fraction of the pulse contained
in window of a given size. Thus the correction for the leakage of signal outside
of the integration window is slightly overestimated.


Parameters
----------
telid : int

Returns
-------
correction : ndarray
The correction to apply to an extracted charge using this ImageExtractor
Has size n_channels, as a different correction value might be required
for different gain channels.
"""

readout = self.subarray.tel[telid].camera.readout
return integration_correction(
readout.reference_pulse_shape,
readout.reference_pulse_sample_width.to_value("ns"),
(1 / readout.sampling_rate).to_value("ns"),
self.window_width.tel[telid],
self.window_width.tel[telid] // 2, # assuming that shift = 0.5 * window
)

def __call__(self, waveforms, telid, selected_gain_channel):
charge, peak_time = extract_sliding_window(
waveforms, self.window_width.tel[telid], self.sampling_rate[telid]
)
if self.apply_integration_correction.tel[telid]:
charge *= self._calculate_correction(telid=telid)[selected_gain_channel]
return charge, peak_time


class NeighborPeakWindowSum(ImageExtractor):
"""
Extractor which sums in a window about the
Expand Down
31 changes: 31 additions & 0 deletions ctapipe/image/tests/test_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from ctapipe.core import non_abstract_children
from ctapipe.image.extractor import (
extract_around_peak,
extract_sliding_window,
neighbor_average_waveform,
subtract_baseline,
integration_correction,
Expand All @@ -16,6 +17,7 @@
NeighborPeakWindowSum,
TwoPassWindowSum,
FullWaveformSum,
SlidingWindowMaxSum,
)
from ctapipe.image.toymodel import WaveformModel
from ctapipe.instrument import SubarrayDescription, TelescopeDescription
Expand Down Expand Up @@ -103,6 +105,26 @@ def test_extract_around_peak(toymodel):
assert charge.dtype == np.float32


def test_extract_sliding_window(toymodel):
waveforms, _, _, _, _, _ = toymodel
n_pixels, n_samples = waveforms.shape
charge, peak_time = extract_sliding_window(waveforms, 7, 1)
assert (charge >= 0).all()
assert (peak_time >= 0).all() and (peak_time <= n_samples).all()

x = np.arange(100)
y = norm.pdf(x, 41.2, 6)
charge, peak_time = extract_sliding_window(y[np.newaxis, :], x.size, 1)
assert_allclose(charge[0], 1.0, rtol=1e-3)
assert_allclose(peak_time[0], 41.2, rtol=1e-3)

# Test negative amplitude
y_offset = y - y.max() / 2
charge, _ = extract_sliding_window(y_offset[np.newaxis, :], x.size, 1)
assert_allclose(charge, y_offset.sum(), rtol=1e-3)
assert charge.dtype == np.float32


def test_extract_around_peak_charge_expected(toymodel):
waveforms = np.ones((2048, 96))
n_samples = waveforms.shape[-1]
Expand Down Expand Up @@ -289,6 +311,15 @@ def test_fixed_window_sum(toymodel):
assert_allclose(peak_time, true_time, rtol=0.1)


def test_sliding_window_max_sum(toymodel):
waveforms, subarray, telid, selected_gain_channel, true_charge, true_time = toymodel
extractor = SlidingWindowMaxSum(subarray=subarray)
charge, peak_time = extractor(waveforms, telid, selected_gain_channel)
print(true_charge, charge, true_time, peak_time)
assert_allclose(charge, true_charge, rtol=0.1)
assert_allclose(peak_time, true_time, rtol=0.1)


def test_neighbor_peak_window_sum_lwt(toymodel):
waveforms, subarray, telid, selected_gain_channel, true_charge, true_time = toymodel
extractor = NeighborPeakWindowSum(subarray=subarray, lwt=4)
Expand Down