Add Dask tools for VDS creation and simple image binning (#14)

* Add VDS creation command-line tool * Add Dask image binner command-line tool, with options for single-image, multi-image sweep and multi-image pump-probe binning. * Add command line tool to inspect cue messages * Avoid Dask v2021.03.0 due to a regression in that release — the dask.array.bincount function does not permit slicing in that version (see dask/dask#7391). * Add more cue message info and tidy up the functions for discovering the timestamps of chosen cues
DiamondLightSource · Mar 31, 2021 · d546b45 · d546b45
1 parent 9957949
commit d546b45
Show file tree

Hide file tree

Showing 11 changed files with 940 additions and 70 deletions.
diff --git a/requirements.txt b/requirements.txt
@@ -1,4 +1,6 @@
-dask
+dask[array,diagnostics] != 2021.03.0
 h5py
+hdf5plugin
 numpy
+pint
 zarr
diff --git a/requirements_dev.txt b/requirements_dev.txt
@@ -1,6 +1,8 @@
-dask
+dask[array,diagnostics] != 2021.03.0
 h5py
+hdf5plugin
 numpy
+pint
 pytest
 pytest-cov
 zarr
diff --git a/setup.cfg b/setup.cfg
@@ -30,9 +30,11 @@ project-urls =
 [options]
 include_package_data = True
 install_requires =
-    dask
+    dask[array,diagnostics] != 2021.03.0
     h5py
+    hdf5plugin
     numpy
+    pint
     zarr
 packages = find:
 package_dir =
@@ -42,3 +44,9 @@ zip_safe = False
 
 [options.packages.find]
 where = src
+
+[options.entry_points]
+console_scripts =
+    cues = tristan.command_line.cues:main
+    vds = tristan.command_line.vds:main
+    images = tristan.command_line.images:main
diff --git a/src/tristan/__init__.py b/src/tristan/__init__.py
@@ -13,53 +13,65 @@
 __version__ = "0.0.0"
 __version_tuple__ = tuple(int(x) for x in __version__.split("."))
 
-import os
-from typing import MutableSequence, Optional, Tuple
+from typing import Dict, Optional, Tuple
 
-import h5py
-import numpy as np
+import pint
+from dask import array as da
+
+try:
+    from numpy.typing import ArrayLike
+except ImportError:
+    # NumPy versions compatible with Python 3.6 do not have the numpy.typing module.
+    import numpy as np
+
+    ArrayLike = np.ndarray
 
 clock_frequency = 6.4e8
 
-# Translations of the cue_id messages.
+# Translations of the basic cue_id messages.
 padding = 0
 sync = 0x800
-sync_module_1 = 0x801
-sync_module_2 = 0x802
 shutter_open = 0x840
-shutter_open_module_1 = 0x841
-shutter_open_module_2 = 0x842
 shutter_close = 0x880
-shutter_close_module_1 = 0x881
-shutter_close_module_2 = 0x882
 fem_falling = 0x8C1
 fem_rising = 0x8E1
 ttl_falling = 0x8C9
 ttl_rising = 0x8E9
 lvds_falling = 0x8CA
 lvds_rising = 0x8EA
+tzero_falling = 0x8CB
+tzero_rising = 0x8EB
+sync_falling = 0x8CC
+sync_rising = 0x8EC
 reserved = 0xF00
 
 cues = {
     padding: "Padding",
-    sync: "Extended time stamp, global synchronisation signal",
-    sync_module_1: "Extended time stamp, sensor module 1",
-    sync_module_2: "Extended time stamp, sensor module 2",
+    sync: "Extended time stamp, global synchronisation",
     shutter_open: "Shutter open time stamp, global",
-    shutter_open_module_1: "Shutter open time stamp, sensor module 1",
-    shutter_open_module_2: "Shutter open time stamp, sensor module 2",
     shutter_close: "Shutter close time stamp, global",
-    shutter_close_module_1: "Shutter close time stamp, sensor module 1",
-    shutter_close_module_2: "Shutter close time stamp, sensor module 2",
-    fem_falling: "FEM trigger input, falling edge",
-    fem_rising: "FEM trigger input, rising edge",
+    fem_falling: "FEM trigger, falling edge",
+    fem_rising: "FEM trigger, rising edge",
     ttl_falling: "Clock trigger TTL input, falling edge",
     ttl_rising: "Clock trigger TTL input, rising edge",
     lvds_falling: "Clock trigger LVDS input, falling edge",
     lvds_rising: "Clock trigger LVDS input, rising edge",
+    tzero_falling: "Clock trigger TZERO input, falling edge",
+    tzero_rising: "Clock trigger TZERO input, rising edge",
+    sync_falling: "Clock trigger SYNC input, falling edge",
+    sync_rising: "Clock trigger SYNC input, rising edge",
     0xBC6: "Error: messages out of sync",
     0xBCA: "Error: messages out of sync",
     reserved: "Reserved",
+    **{
+        basic + n: f"{name} time stamp, sensor module {n}"
+        for basic, name in (
+            (sync, "Extended"),
+            (shutter_open, "Shutter open"),
+            (shutter_close, "Shutter close"),
+        )
+        for n in range(1, 64)
+    },
 }
 
 # Keys of event data in the HDF5 file structure.
@@ -68,61 +80,53 @@
 event_location_key = "event_id"
 event_time_key = "event_time_offset"
 event_energy_key = "event_energy"
-size_key = "entry/instrument/detector/module/data_size"
 
+cue_keys = cue_id_key, cue_time_key
+event_keys = event_location_key, event_time_key, event_energy_key
 
-def fullpath(path: str) -> str:
-    """Get an absolute path with tilde home directory shorthand expanded."""
-    return os.path.abspath(os.path.expanduser(path))
+nx_size_key = "entry/instrument/detector/module/data_size"
 
 
-def first_cue_time(
-    data: h5py.File, message: int, events_group: Optional[str] = "/"
-) -> Optional[int]:
+def first_cue_time(data: Dict[str, da.Array], message: int) -> Optional[int]:
     """
-    Find the timestamp of the first instance of a cue message in a data file.
+    Find the timestamp of the first instance of a cue message in a Tristan data set.
 
     Args:
-        data:  A NeXus-like LATRD data file.
+        data:     A NeXus-like LATRD data dictionary (a dictionary with data set
+                  names as keys and Dask arrays as values).  Must contain one entry
+                  for cue id messages and one for cue timestamps.  The two arrays are
+                  assumed to have the same length.
         message:  The message code, as defined in the Tristan standard.
-        events_group:  HDF5 group containing the events data.
 
     Returns:
         The timestamp, measured in clock cycles from the global synchronisation signal.
         If the message doesn't exist in the data set, this returns None.
     """
-    events_group = events_group or "/"
-
-    index = np.argmax(data[events_group + cue_id_key][...] == message)
-
-    # Catch the case in which the message is not present in the data set.
-    if index == 0 and data[events_group + cue_id_key][0] != message:
-        return None
+    index = da.argmax(data[cue_id_key] == message)
+    if index or data[cue_id_key][0] == message:
+        return data[cue_time_key][index]
 
-    return data[events_group + cue_time_key][index].astype(int)
 
-
-def cue_times(
-    data: h5py.File, message: int, events_group: Optional[str] = "/"
-) -> MutableSequence[int]:
+def cue_times(data: Dict[str, da.Array], message: int) -> da.Array:
     """
-    Find the timestamps of all instances of a cue message in a data file.
+    Find the timestamps of all instances of a cue message in a Tristan data set.
 
     Args:
-        data:  A NeXus-like LATRD data file.
+        data:     A NeXus-like LATRD data dictionary (a dictionary with data set
+                  names as keys and Dask arrays as values).  Must contain one entry
+                  for cue id messages and one for cue timestamps.  The two arrays are
+                  assumed to have the same length.
         message:  The message code, as defined in the Tristan standard.
-        events_group:  HDF5 group containing the events data.
 
     Returns:
-        The timestamps, measured in clock cycles from the global synchronisation signal.
+        The timestamps, measured in clock cycles from the global synchronisation
+        signal, de-duplicated.
     """
-    events_group = events_group or "/"
-
-    index = np.nonzero(data[events_group + cue_id_key][...] == message)
-    return np.unique(data[events_group + cue_time_key][index].astype(int))
+    index = da.flatnonzero(data[cue_id_key] == message)
+    return da.unique(data[cue_time_key][index])
 
 
-def seconds(timestamp: int, reference: int = 0) -> float:
+def seconds(timestamp: ArrayLike, reference: ArrayLike = 0) -> ArrayLike:
     """
     Convert a Tristan timestamp to seconds, measured from a given time.
 
@@ -138,28 +142,32 @@ def seconds(timestamp: int, reference: int = 0) -> float:
     Returns:
         The difference between the two timestamps in seconds.
     """
-    return (timestamp - reference) / clock_frequency
+    return pint.Quantity((timestamp - reference) / clock_frequency, "s").to_compact()
 
 
-def coordinates(event_location: int) -> Tuple[int, int]:
+def pixel_index(location: ArrayLike, image_size: Tuple[int, int]) -> ArrayLike:
     """
-    Extract pixel coordinate information from an event location message.
+    Extract pixel coordinate information from an event location (event_id) message.
+
+    Translate a Tristan event location message to the index of the corresponding
+    pixel in the flattened image array (i.e. numbered from zero, in row-major order).
+
+    The pixel coordinates of an event on a Tristan detector are encoded in a 32-bit
+    integer location message (the event_id) with 26 bits of useful information.
+    Extract the y coordinate (the 13 least significant bits) and the x coordinate
+    (the 13 next least significant bits).  Find the corresponding pixel index in the
+    flattened image array by multiplying the y value by the size of the array in x,
+    and adding the x value.
 
-    The pixel coordinates of an event on a Tristan detector are encoded in an
-    integer location message with 26 bits of useful information.  Extract the y
-    coordinate (the 13 least significant bits) and the x coordinate (the 13 next
-    least significant bits).
+    This function calls the Python built-in divmod and so can be broadcast over NumPy
+    and Dask arrays.
 
     Args:
-        event_location:  Either a single event location message (an integer) or a NumPy
-                         array of multiple integers representing the coordinates for
-                         several events.
+        location:    Event location message (an integer).
+        image_size:  Shape of the image array in (y, x), i.e. (slow, fast).
 
     Returns:
-        A tuple (x, y) of decoded coordinates.
+        Index in the flattened image array of the pixel where the event occurred.
     """
-    x, y = divmod(event_location, 0x2000)
-    if isinstance(event_location, np.ndarray):
-        return x.astype(np.int16), y.astype(np.int16)
-    else:
-        return x, y
+    x, y = divmod(location, 0x2000)
+    return x + y * image_size[1]
diff --git a/src/tristan/binning.py b/src/tristan/binning.py
@@ -0,0 +1,70 @@
+"""Tools for binning events to images."""
+
+
+from operator import mul
+from typing import Dict, Tuple
+
+import numpy as np
+from dask import array as da
+
+try:
+    from numpy.typing import ArrayLike
+except ImportError:
+    # NumPy versions compatible with Python 3.6 do not have the numpy.typing module.
+    ArrayLike = np.ndarray
+
+from . import (
+    event_location_key,
+    event_time_key,
+    first_cue_time,
+    pixel_index,
+    shutter_close,
+    shutter_open,
+)
+
+
+def find_start_end(data):
+    start_time = first_cue_time(data, shutter_open)
+    end_time = first_cue_time(data, shutter_close)
+    return da.compute(start_time, end_time)
+
+
+def make_single_image(
+    data: Dict[str, da.Array], image_size: Tuple[int, int], start: int, end: int
+) -> da.Array:
+    event_times = data[event_time_key]
+    event_locations = data[event_location_key]
+
+    valid_events = (start <= event_times) & (event_times < end)
+    event_locations = event_locations[valid_events]
+    event_locations = pixel_index(event_locations, image_size)
+
+    image = da.bincount(event_locations, minlength=mul(*image_size))
+    return image.astype(np.uint32).reshape(1, *image_size)
+
+
+def make_multiple_images(
+    data: Dict[str, da.Array], image_size: Tuple[int, int], bins: ArrayLike
+) -> da.Array:
+    event_times = data[event_time_key]
+    event_locations = data[event_location_key]
+
+    valid_events = (bins[0] <= event_times) & (event_times < bins[-1])
+    event_times = event_times[valid_events]
+    event_locations = event_locations[valid_events]
+
+    image_indices = da.digitize(event_times, bins) - 1
+    event_locations = pixel_index(event_locations, image_size)
+    num_images = bins.size - 1
+
+    image_indices = [
+        image_indices == image_number for image_number in range(num_images)
+    ]
+    images = da.stack(
+        [
+            da.bincount(event_locations[indices], minlength=mul(*image_size))
+            for indices in image_indices
+        ]
+    )
+
+    return images.astype(np.uint32).reshape(num_images, *image_size)
diff --git a/src/tristan/command_line/__init__.py b/src/tristan/command_line/__init__.py
@@ -0,0 +1,65 @@
+"""General utilities for the command-line tools."""
+
+import argparse
+
+import pint
+
+from .. import __version__
+
+__all__ = ("version_parser", "input_parser", "image_output_parser", "exposure_parser")
+
+
+version_parser = argparse.ArgumentParser(add_help=False)
+version_parser.add_argument(
+    "-V",
+    "--version",
+    action="version",
+    version="%(prog)s:  Tristan tools {version}".format(version=__version__),
+)
+
+
+input_parser = argparse.ArgumentParser(add_help=False)
+input_parser.add_argument(
+    "input_file",
+    help="Tristan metadata ('_meta.h5') or raw data ('_000001.h5', etc.) file.  "
+    "This file must be in the same directory as the HDF5 files containing all the "
+    "corresponding raw events data.",
+    metavar="input-file",
+)
+
+
+image_output_parser = argparse.ArgumentParser(add_help=False)
+image_output_parser.add_argument(
+    "-o",
+    "--output-file",
+    help="File name or location for output image file, defaults to the working "
+    "directory.  If only a directory location is given, the pattern of the raw data "
+    "files will be used, with '<name>_meta.h5' replaced with '<name>_single_image.h5'.",
+)
+image_output_parser.add_argument(
+    "-f",
+    "--force",
+    help="Force the output image file to over-write any existing file with the same "
+    "name.",
+    action="store_true",
+)
+
+image_output_parser.add_argument(
+    "-s",
+    "--image-size",
+    help="Dimensions of the detector in pixels, separated by a comma, as 'x,y', i.e. "
+    "'fast,slow'.",
+)
+
+
+exposure_parser = argparse.ArgumentParser(add_help=False)
+group = exposure_parser.add_mutually_exclusive_group(required=True)
+group.add_argument(
+    "-e",
+    "--exposure-time",
+    help="Duration of each image.  This will be used to calculate the number of "
+    "images.  Specify a value with units like '--exposure-time .5ms', '-e 500µs' or "
+    "'-e 500us'.",
+    type=pint.Quantity,
+)
+group.add_argument("-n", "--num-images", help="Number of images.", type=int)