Skip to content

Commit

Permalink
TableLoader (#1771)
Browse files Browse the repository at this point in the history
* Start working on table loader

* Add test for reading by tel_type dl1 files

* Implement more loading

* add get_structure and TableLoader to ctapipe.io namespace

* Update conftest for ctapipe-process

* Produce and prepare DL2 shower geometry test file

* add ctapipe.io.tableloader to docs

* add GEOMETRY_GROUP and get_tel_ids function

* always load trigger and check table when loading shower_table

* Add read_subarray_events

* Add unit-test for get_tel_ids

* Add 'read_events' and 'read_events_by_tel_type'

* update and improve unit-testing

* add module docstring

* improve class dosctring

* add traitlets' helpers

* add provenance

* fix and add missing docstrings

* Explicit required input types in 'read_events' & 'read_events_by_type'

* Add check to get_tel_ids for valid telescope types

* Add support for true image parameters

* Move get_tel_ids to SubarrayDescription

* Fix ref in docs

* Refactor table loader

* Address codacy issues

* Fix docs of ProcessTool, document it

Co-authored-by: Michele Peresano <[email protected]>
  • Loading branch information
maxnoe and HealthyPear authored Sep 20, 2021
1 parent af5018f commit 5ff5401
Show file tree
Hide file tree
Showing 14 changed files with 760 additions and 15 deletions.
1 change: 1 addition & 0 deletions .pylintrc
Original file line number Diff line number Diff line change
Expand Up @@ -425,6 +425,7 @@ good-names=i,
x,
y,
n,
f,
ex,
_

Expand Down
86 changes: 86 additions & 0 deletions ctapipe/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,9 +137,69 @@ def prod5_proton_simtel_path():

@pytest.fixture(scope="session")
def dl1_tmp_path(tmp_path_factory):
"""Temporary directory for global dl1 test data"""
return tmp_path_factory.mktemp("dl1")


@pytest.fixture(scope="session")
def dl2_tmp_path(tmp_path_factory):
"""Temporary directory for global dl2 test data"""
return tmp_path_factory.mktemp("dl2")


@pytest.fixture(scope="session")
def dl2_shower_geometry_file(dl2_tmp_path, prod5_gamma_simtel_path):
"""
File containing both parameters and shower geometry from a gamma simulation set.
"""
from ctapipe.core import run_tool
from ctapipe.tools.process import ProcessorTool

output = dl2_tmp_path / "gamma.training.h5"

# prevent running process multiple times in case of parallel tests
with FileLock(output.with_suffix(output.suffix + ".lock")):
if output.is_file():
return output

argv = [
f"--input={prod5_gamma_simtel_path}",
f"--output={output}",
"--write-images",
"--write-stereo-shower",
"--max-events=20",
]
assert run_tool(ProcessorTool(), argv=argv, cwd=dl2_tmp_path) == 0
return output


@pytest.fixture(scope="session")
def dl2_shower_geometry_file_type(dl2_tmp_path, prod5_gamma_simtel_path):
"""
File containing both parameters and shower geometry from a gamma simulation set.
"""
from ctapipe.core import run_tool
from ctapipe.tools.process import ProcessorTool

output = dl2_tmp_path / "gamma_by_type.training.h5"

# prevent running process multiple times in case of parallel tests
with FileLock(output.with_suffix(output.suffix + ".lock")):
if output.is_file():
return output

argv = [
f"--input={prod5_gamma_simtel_path}",
f"--output={output}",
"--write-images",
"--write-stereo-shower",
"--max-events=20",
"--DataWriter.split_datasets_by=tel_type",
]
assert run_tool(ProcessorTool(), argv=argv, cwd=dl2_tmp_path) == 0
return output


@pytest.fixture(scope="session")
def dl1_file(dl1_tmp_path, prod5_gamma_simtel_path):
"""
Expand All @@ -166,6 +226,32 @@ def dl1_file(dl1_tmp_path, prod5_gamma_simtel_path):
return output


@pytest.fixture(scope="session")
def dl1_by_type_file(dl1_tmp_path, prod5_gamma_simtel_path):
"""
DL1 file containing both images and parameters from a gamma simulation set.
"""
from ctapipe.tools.process import ProcessorTool
from ctapipe.core import run_tool

output = dl1_tmp_path / "gamma_by_type.dl1.h5"

# prevent running stage1 multiple times in case of parallel tests
with FileLock(output.with_suffix(output.suffix + ".lock")):
if output.is_file():
return output

argv = [
f"--input={prod5_gamma_simtel_path}",
f"--output={output}",
"--write-images",
"--max-events=20",
"--DataWriter.split_datasets_by=tel_type",
]
assert run_tool(ProcessorTool(), argv=argv, cwd=dl1_tmp_path) == 0
return output


@pytest.fixture(scope="session")
def dl1_image_file(dl1_tmp_path, prod5_gamma_simtel_path):
"""
Expand Down
10 changes: 5 additions & 5 deletions ctapipe/core/tool.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,16 +52,16 @@ class Tool(Application):
``name``, ``description`` and ``examples`` class attributes as
strings. The ``aliases`` attribute can be set to cause a lower-level
`~ctapipe.core.Component` parameter to become a high-level command-line
parameter (See example below). The `setup()`, `start()`, and
`finish()` methods should be defined in the sub-class.
parameter (See example below). The `setup`, `start`, and
`finish` methods should be defined in the sub-class.
Additionally, any `ctapipe.core.Component` used within the `Tool`
should have their class in a list in the ``classes`` attribute,
which will automatically add their configuration parameters to the
tool.
Once a tool is constructed and the virtual methods defined, the
user can call the `run()` method to setup and start it.
user can call the `run` method to setup and start it.
.. code:: python
Expand Down Expand Up @@ -254,14 +254,14 @@ def setup(self):
@abstractmethod
def start(self):
"""main body of tool (override in subclass). This is automatically
called after `initialize()` when the `run()` is called.
called after `Tool.initialize` when the `Tool.run` is called.
"""
pass

@abstractmethod
def finish(self):
"""finish up (override in subclass). This is called automatically
after `start()` when `run()` is called."""
after `Tool.start` when `Tool.run` is called."""
self.log.info("Goodbye")

def run(self, argv=None):
Expand Down
44 changes: 40 additions & 4 deletions ctapipe/instrument/subarray.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Description of Arrays or Subarrays of telescopes
"""
from typing import Dict, List, Union
from pathlib import Path
import warnings

Expand Down Expand Up @@ -74,7 +75,7 @@ class SubarrayDescription:
def __init__(self, name, tel_positions=None, tel_descriptions=None):
self.name = name
self.positions = tel_positions or dict()
self.tels = tel_descriptions or dict()
self.tels: Dict[int, TelescopeDescription] = tel_descriptions or dict()

if self.positions.keys() != self.tels.keys():
raise ValueError("Telescope ids in positions and descriptions do not match")
Expand Down Expand Up @@ -360,17 +361,17 @@ def peek(self):
plt.tight_layout()

@lazyproperty
def telescope_types(self):
def telescope_types(self) -> List[TelescopeDescription]:
""" list of telescope types in the array"""
return list({tel for tel in self.tel.values()})

@lazyproperty
def camera_types(self):
def camera_types(self) -> List[CameraDescription]:
""" list of camera types in the array """
return list({tel.camera for tel in self.tel.values()})

@lazyproperty
def optics_types(self):
def optics_types(self) -> List[OpticsDescription]:
""" list of optics types in the array """
return list({tel.optics for tel in self.tel.values()})

Expand All @@ -391,6 +392,41 @@ def get_tel_ids_for_type(self, tel_type):

return [id for id, descr in self.tels.items() if str(descr) == tel_str]

def get_tel_ids(
self, telescopes: List[Union[int, str, TelescopeDescription]]
) -> List[int]:
"""
Convert a list of telescope ids and telescope descriptions to
a list of unique telescope ids.
Parameters
----------
telescopes: List[Union[int, str, TelescopeDescription]]
List of Telescope IDs and descriptions.
Supported inputs for telescope descriptions are instances of
`~ctapipe.instrument.TelescopeDescription` as well as their
string representation.
Returns
-------
tel_ids: List[int]
List of unique telescope ids matching ``telescopes``
"""
ids = set()

valid_tel_types = {str(tel_type) for tel_type in self.telescope_types}

for telescope in telescopes:
if isinstance(telescope, int):
ids.add(telescope)

if isinstance(telescope, str) and telescope not in valid_tel_types:
raise ValueError("Invalid telescope type input.")

ids.update(self.get_tel_ids_for_type(telescope))

return sorted(ids)

def __eq__(self, other):
if not isinstance(other, self.__class__):
return False
Expand Down
20 changes: 18 additions & 2 deletions ctapipe/instrument/telescope.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ class TelescopeDescription:
Describes a Cherenkov Telescope and its associated
`~ctapipe.instrument.OpticsDescription` and `~ctapipe.instrument.CameraDescription`
Parameters
Attributes
----------
name: str
Telescope name
Expand All @@ -40,9 +40,25 @@ class TelescopeDescription:
"""

def __init__(
self, name, tel_type, optics: OpticsDescription, camera: CameraDescription
self,
name: str,
tel_type: str,
optics: OpticsDescription,
camera: CameraDescription,
):

if not isinstance(name, str):
raise TypeError("`name` must be a str")

if not isinstance(tel_type, str):
raise TypeError("`tel_type` must be a str")

if not isinstance(optics, OpticsDescription):
raise TypeError("`optics` must be an instance of `OpticsDescription`")

if not isinstance(camera, CameraDescription):
raise TypeError("`camera` must be an instance of `CameraDescription`")

self.name = name
self.type = tel_type
self.optics = optics
Expand Down
27 changes: 25 additions & 2 deletions ctapipe/instrument/tests/test_subarray.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
""" Tests for SubarrayDescriptions """
from copy import deepcopy

import numpy as np
from astropy import units as u
from astropy.coordinates import SkyCoord
from ctapipe.coordinates import TelescopeFrame
from copy import deepcopy
import pytest

from ctapipe.coordinates import TelescopeFrame
from ctapipe.instrument import (
CameraDescription,
OpticsDescription,
Expand Down Expand Up @@ -188,3 +190,24 @@ def test_hdf_duplicate_string_repr(tmp_path):
assert (
read.tel[1].optics.num_mirror_tiles == read.tel[2].optics.num_mirror_tiles + 1
)


def test_get_tel_ids(example_subarray):
"""Test for SubarrayDescription.get_tel_ids"""
subarray = example_subarray
sst = TelescopeDescription.from_name("SST-ASTRI", "CHEC")

telescopes = [1, 2, "MST_MST_FlashCam", sst]
tel_ids = subarray.get_tel_ids(telescopes)

true_tel_ids = (
subarray.get_tel_ids_for_type("MST_MST_FlashCam")
+ subarray.get_tel_ids_for_type(sst)
+ [1, 2]
)

assert sorted(tel_ids) == sorted(true_tel_ids)

# test invalid telescope type
with pytest.raises(Exception):
tel_ids = subarray.get_tel_ids(["It's a-me, Mario!"])
2 changes: 2 additions & 0 deletions ctapipe/io/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from .eventsource import EventSource
from .hdf5tableio import HDF5TableReader, HDF5TableWriter
from .tableio import TableWriter, TableReader
from .tableloader import TableLoader
from .datalevels import DataLevel
from .astropy_helpers import read_table
from .datawriter import DataWriter, DATA_MODEL_VERSION
Expand All @@ -21,6 +22,7 @@
"HDF5TableReader",
"TableWriter",
"TableReader",
"TableLoader",
"EventSeeker",
"EventSource",
"SimTelEventSource",
Expand Down
42 changes: 40 additions & 2 deletions ctapipe/io/astropy_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from pathlib import Path

import tables
from astropy.table import Table
from astropy.table import Table, join
import numpy as np

from .tableio import (
Expand All @@ -18,7 +18,7 @@

from contextlib import ExitStack

__all__ = ["read_table"]
__all__ = ["read_table", "join_allow_empty"]


def read_table(h5file, path, start=None, stop=None, step=None, condition=None) -> Table:
Expand Down Expand Up @@ -119,3 +119,41 @@ def _parse_hdf5_attrs(table):
other_attrs[attr] = str(value) if isinstance(value, np.str_) else value

return column_transforms, column_descriptions, other_attrs


def join_allow_empty(left, right, keys, join_type="left", **kwargs):
"""
Join two astropy tables, allowing both sides to be empty tables.
See https://github.com/astropy/astropy/issues/12012 for why
this is necessary.
This behaves as `~astropy.table.join`, with the only difference of
allowing empty tables to be joined.
"""

left_empty = len(left) == 0
right_empty = len(right) == 0

if join_type == "inner":
if left_empty:
return left.copy()
if right_empty:
return right.copy()

elif join_type == "left":
if left_empty or right_empty:
return left.copy()

elif join_type == "right":
if left_empty or right_empty:
return right.copy()

elif join_type == "outer":
if left_empty:
return right.copy()

if right_empty:
return left.copy()

return join(left, right, keys, join_type=join_type, **kwargs)
Loading

0 comments on commit 5ff5401

Please sign in to comment.