Skip to content

Commit

Permalink
Add Dask tools for VDS creation and simple image binning (#14)
Browse files Browse the repository at this point in the history
* Add VDS creation command-line tool
* Add Dask image binner command-line tool, with options for single-image, multi-image sweep and multi-image pump-probe binning.
* Add command line tool to inspect cue messages
* Avoid Dask v2021.03.0 due to a regression in that release — the dask.array.bincount function does not permit slicing in that version (see dask/dask#7391).
* Add more cue message info and tidy up the functions for discovering the timestamps of chosen cues
  • Loading branch information
benjaminhwilliams authored Mar 31, 2021
1 parent 9957949 commit d546b45
Show file tree
Hide file tree
Showing 11 changed files with 940 additions and 70 deletions.
4 changes: 3 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
dask
dask[array,diagnostics] != 2021.03.0
h5py
hdf5plugin
numpy
pint
zarr
4 changes: 3 additions & 1 deletion requirements_dev.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
dask
dask[array,diagnostics] != 2021.03.0
h5py
hdf5plugin
numpy
pint
pytest
pytest-cov
zarr
10 changes: 9 additions & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,11 @@ project-urls =
[options]
include_package_data = True
install_requires =
dask
dask[array,diagnostics] != 2021.03.0
h5py
hdf5plugin
numpy
pint
zarr
packages = find:
package_dir =
Expand All @@ -42,3 +44,9 @@ zip_safe = False

[options.packages.find]
where = src

[options.entry_points]
console_scripts =
cues = tristan.command_line.cues:main
vds = tristan.command_line.vds:main
images = tristan.command_line.images:main
142 changes: 75 additions & 67 deletions src/tristan/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,53 +13,65 @@
__version__ = "0.0.0"
__version_tuple__ = tuple(int(x) for x in __version__.split("."))

import os
from typing import MutableSequence, Optional, Tuple
from typing import Dict, Optional, Tuple

import h5py
import numpy as np
import pint
from dask import array as da

try:
from numpy.typing import ArrayLike
except ImportError:
# NumPy versions compatible with Python 3.6 do not have the numpy.typing module.
import numpy as np

ArrayLike = np.ndarray

clock_frequency = 6.4e8

# Translations of the cue_id messages.
# Translations of the basic cue_id messages.
padding = 0
sync = 0x800
sync_module_1 = 0x801
sync_module_2 = 0x802
shutter_open = 0x840
shutter_open_module_1 = 0x841
shutter_open_module_2 = 0x842
shutter_close = 0x880
shutter_close_module_1 = 0x881
shutter_close_module_2 = 0x882
fem_falling = 0x8C1
fem_rising = 0x8E1
ttl_falling = 0x8C9
ttl_rising = 0x8E9
lvds_falling = 0x8CA
lvds_rising = 0x8EA
tzero_falling = 0x8CB
tzero_rising = 0x8EB
sync_falling = 0x8CC
sync_rising = 0x8EC
reserved = 0xF00

cues = {
padding: "Padding",
sync: "Extended time stamp, global synchronisation signal",
sync_module_1: "Extended time stamp, sensor module 1",
sync_module_2: "Extended time stamp, sensor module 2",
sync: "Extended time stamp, global synchronisation",
shutter_open: "Shutter open time stamp, global",
shutter_open_module_1: "Shutter open time stamp, sensor module 1",
shutter_open_module_2: "Shutter open time stamp, sensor module 2",
shutter_close: "Shutter close time stamp, global",
shutter_close_module_1: "Shutter close time stamp, sensor module 1",
shutter_close_module_2: "Shutter close time stamp, sensor module 2",
fem_falling: "FEM trigger input, falling edge",
fem_rising: "FEM trigger input, rising edge",
fem_falling: "FEM trigger, falling edge",
fem_rising: "FEM trigger, rising edge",
ttl_falling: "Clock trigger TTL input, falling edge",
ttl_rising: "Clock trigger TTL input, rising edge",
lvds_falling: "Clock trigger LVDS input, falling edge",
lvds_rising: "Clock trigger LVDS input, rising edge",
tzero_falling: "Clock trigger TZERO input, falling edge",
tzero_rising: "Clock trigger TZERO input, rising edge",
sync_falling: "Clock trigger SYNC input, falling edge",
sync_rising: "Clock trigger SYNC input, rising edge",
0xBC6: "Error: messages out of sync",
0xBCA: "Error: messages out of sync",
reserved: "Reserved",
**{
basic + n: f"{name} time stamp, sensor module {n}"
for basic, name in (
(sync, "Extended"),
(shutter_open, "Shutter open"),
(shutter_close, "Shutter close"),
)
for n in range(1, 64)
},
}

# Keys of event data in the HDF5 file structure.
Expand All @@ -68,61 +80,53 @@
event_location_key = "event_id"
event_time_key = "event_time_offset"
event_energy_key = "event_energy"
size_key = "entry/instrument/detector/module/data_size"

cue_keys = cue_id_key, cue_time_key
event_keys = event_location_key, event_time_key, event_energy_key

def fullpath(path: str) -> str:
"""Get an absolute path with tilde home directory shorthand expanded."""
return os.path.abspath(os.path.expanduser(path))
nx_size_key = "entry/instrument/detector/module/data_size"


def first_cue_time(
data: h5py.File, message: int, events_group: Optional[str] = "/"
) -> Optional[int]:
def first_cue_time(data: Dict[str, da.Array], message: int) -> Optional[int]:
"""
Find the timestamp of the first instance of a cue message in a data file.
Find the timestamp of the first instance of a cue message in a Tristan data set.
Args:
data: A NeXus-like LATRD data file.
data: A NeXus-like LATRD data dictionary (a dictionary with data set
names as keys and Dask arrays as values). Must contain one entry
for cue id messages and one for cue timestamps. The two arrays are
assumed to have the same length.
message: The message code, as defined in the Tristan standard.
events_group: HDF5 group containing the events data.
Returns:
The timestamp, measured in clock cycles from the global synchronisation signal.
If the message doesn't exist in the data set, this returns None.
"""
events_group = events_group or "/"

index = np.argmax(data[events_group + cue_id_key][...] == message)

# Catch the case in which the message is not present in the data set.
if index == 0 and data[events_group + cue_id_key][0] != message:
return None
index = da.argmax(data[cue_id_key] == message)
if index or data[cue_id_key][0] == message:
return data[cue_time_key][index]

return data[events_group + cue_time_key][index].astype(int)


def cue_times(
data: h5py.File, message: int, events_group: Optional[str] = "/"
) -> MutableSequence[int]:
def cue_times(data: Dict[str, da.Array], message: int) -> da.Array:
"""
Find the timestamps of all instances of a cue message in a data file.
Find the timestamps of all instances of a cue message in a Tristan data set.
Args:
data: A NeXus-like LATRD data file.
data: A NeXus-like LATRD data dictionary (a dictionary with data set
names as keys and Dask arrays as values). Must contain one entry
for cue id messages and one for cue timestamps. The two arrays are
assumed to have the same length.
message: The message code, as defined in the Tristan standard.
events_group: HDF5 group containing the events data.
Returns:
The timestamps, measured in clock cycles from the global synchronisation signal.
The timestamps, measured in clock cycles from the global synchronisation
signal, de-duplicated.
"""
events_group = events_group or "/"

index = np.nonzero(data[events_group + cue_id_key][...] == message)
return np.unique(data[events_group + cue_time_key][index].astype(int))
index = da.flatnonzero(data[cue_id_key] == message)
return da.unique(data[cue_time_key][index])


def seconds(timestamp: int, reference: int = 0) -> float:
def seconds(timestamp: ArrayLike, reference: ArrayLike = 0) -> ArrayLike:
"""
Convert a Tristan timestamp to seconds, measured from a given time.
Expand All @@ -138,28 +142,32 @@ def seconds(timestamp: int, reference: int = 0) -> float:
Returns:
The difference between the two timestamps in seconds.
"""
return (timestamp - reference) / clock_frequency
return pint.Quantity((timestamp - reference) / clock_frequency, "s").to_compact()


def coordinates(event_location: int) -> Tuple[int, int]:
def pixel_index(location: ArrayLike, image_size: Tuple[int, int]) -> ArrayLike:
"""
Extract pixel coordinate information from an event location message.
Extract pixel coordinate information from an event location (event_id) message.
Translate a Tristan event location message to the index of the corresponding
pixel in the flattened image array (i.e. numbered from zero, in row-major order).
The pixel coordinates of an event on a Tristan detector are encoded in a 32-bit
integer location message (the event_id) with 26 bits of useful information.
Extract the y coordinate (the 13 least significant bits) and the x coordinate
(the 13 next least significant bits). Find the corresponding pixel index in the
flattened image array by multiplying the y value by the size of the array in x,
and adding the x value.
The pixel coordinates of an event on a Tristan detector are encoded in an
integer location message with 26 bits of useful information. Extract the y
coordinate (the 13 least significant bits) and the x coordinate (the 13 next
least significant bits).
This function calls the Python built-in divmod and so can be broadcast over NumPy
and Dask arrays.
Args:
event_location: Either a single event location message (an integer) or a NumPy
array of multiple integers representing the coordinates for
several events.
location: Event location message (an integer).
image_size: Shape of the image array in (y, x), i.e. (slow, fast).
Returns:
A tuple (x, y) of decoded coordinates.
Index in the flattened image array of the pixel where the event occurred.
"""
x, y = divmod(event_location, 0x2000)
if isinstance(event_location, np.ndarray):
return x.astype(np.int16), y.astype(np.int16)
else:
return x, y
x, y = divmod(location, 0x2000)
return x + y * image_size[1]
70 changes: 70 additions & 0 deletions src/tristan/binning.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
"""Tools for binning events to images."""


from operator import mul
from typing import Dict, Tuple

import numpy as np
from dask import array as da

try:
from numpy.typing import ArrayLike
except ImportError:
# NumPy versions compatible with Python 3.6 do not have the numpy.typing module.
ArrayLike = np.ndarray

from . import (
event_location_key,
event_time_key,
first_cue_time,
pixel_index,
shutter_close,
shutter_open,
)


def find_start_end(data):
start_time = first_cue_time(data, shutter_open)
end_time = first_cue_time(data, shutter_close)
return da.compute(start_time, end_time)


def make_single_image(
data: Dict[str, da.Array], image_size: Tuple[int, int], start: int, end: int
) -> da.Array:
event_times = data[event_time_key]
event_locations = data[event_location_key]

valid_events = (start <= event_times) & (event_times < end)
event_locations = event_locations[valid_events]
event_locations = pixel_index(event_locations, image_size)

image = da.bincount(event_locations, minlength=mul(*image_size))
return image.astype(np.uint32).reshape(1, *image_size)


def make_multiple_images(
data: Dict[str, da.Array], image_size: Tuple[int, int], bins: ArrayLike
) -> da.Array:
event_times = data[event_time_key]
event_locations = data[event_location_key]

valid_events = (bins[0] <= event_times) & (event_times < bins[-1])
event_times = event_times[valid_events]
event_locations = event_locations[valid_events]

image_indices = da.digitize(event_times, bins) - 1
event_locations = pixel_index(event_locations, image_size)
num_images = bins.size - 1

image_indices = [
image_indices == image_number for image_number in range(num_images)
]
images = da.stack(
[
da.bincount(event_locations[indices], minlength=mul(*image_size))
for indices in image_indices
]
)

return images.astype(np.uint32).reshape(num_images, *image_size)
65 changes: 65 additions & 0 deletions src/tristan/command_line/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
"""General utilities for the command-line tools."""

import argparse

import pint

from .. import __version__

__all__ = ("version_parser", "input_parser", "image_output_parser", "exposure_parser")


version_parser = argparse.ArgumentParser(add_help=False)
version_parser.add_argument(
"-V",
"--version",
action="version",
version="%(prog)s: Tristan tools {version}".format(version=__version__),
)


input_parser = argparse.ArgumentParser(add_help=False)
input_parser.add_argument(
"input_file",
help="Tristan metadata ('_meta.h5') or raw data ('_000001.h5', etc.) file. "
"This file must be in the same directory as the HDF5 files containing all the "
"corresponding raw events data.",
metavar="input-file",
)


image_output_parser = argparse.ArgumentParser(add_help=False)
image_output_parser.add_argument(
"-o",
"--output-file",
help="File name or location for output image file, defaults to the working "
"directory. If only a directory location is given, the pattern of the raw data "
"files will be used, with '<name>_meta.h5' replaced with '<name>_single_image.h5'.",
)
image_output_parser.add_argument(
"-f",
"--force",
help="Force the output image file to over-write any existing file with the same "
"name.",
action="store_true",
)

image_output_parser.add_argument(
"-s",
"--image-size",
help="Dimensions of the detector in pixels, separated by a comma, as 'x,y', i.e. "
"'fast,slow'.",
)


exposure_parser = argparse.ArgumentParser(add_help=False)
group = exposure_parser.add_mutually_exclusive_group(required=True)
group.add_argument(
"-e",
"--exposure-time",
help="Duration of each image. This will be used to calculate the number of "
"images. Specify a value with units like '--exposure-time .5ms', '-e 500µs' or "
"'-e 500us'.",
type=pint.Quantity,
)
group.add_argument("-n", "--num-images", help="Number of images.", type=int)
Loading

0 comments on commit d546b45

Please sign in to comment.