Skip to content

Commit

Permalink
Merge pull request #1816 from cta-observatory/fixed_point_nan
Browse files Browse the repository at this point in the history
Implement treatment of invalid values in FixedPointColumnTransform, fixes #1815
  • Loading branch information
maxnoe authored Feb 22, 2022
2 parents a838ae5 + 1ce535e commit 9ee0635
Show file tree
Hide file tree
Showing 4 changed files with 228 additions and 3 deletions.
4 changes: 4 additions & 0 deletions ctapipe/io/datawriter.py
Original file line number Diff line number Diff line change
Expand Up @@ -653,6 +653,10 @@ def _write_dl1_telescope_events(
)

if self.write_images:
if dl1_camera.image is None:
raise ValueError(
"DataWriter.write_images is True but event does not contain image"
)
# note that we always write the image, even if the image quality
# criteria are not met (those are only to determine if the parameters
# can be computed).
Expand Down
84 changes: 82 additions & 2 deletions ctapipe/io/tableio.py
Original file line number Diff line number Diff line change
Expand Up @@ -350,26 +350,106 @@ class FixedPointColumnTransform(ColumnTransform):
Can be used to store values as fixed point by using an integer dtype
and a scale that is a power of 10.
The transforms reserves 3 integers to represent -inf, nan and inf.
Underflowing values are converted to -inf and overflowing to inf.
For unsigned target dtype:
-inf: maxval - 2
nan: maxval - 1
inf: maxval
For signed target dtype:
-inf: minval
nan: minval + 1
inf: maxval
When reading, these special values must not be interpreted as valid integer
values but be transformed back into -inf, nan, inf respectively.
This is a lossy transformation.
"""

def __init__(self, scale, offset, source_dtype, target_dtype):
self.scale = scale
self.offset = offset
self.source_dtype = np.dtype(source_dtype)
self.target_dtype = np.dtype(target_dtype)
self.unsigned = self.target_dtype.kind == "u"

iinfo = np.iinfo(self.target_dtype)

# use three highest values for nan markers for unsigned case
if self.unsigned:
self.neginf = iinfo.max - 2
self.nan = iinfo.max - 1
self.posinf = iinfo.max

# this leaves this inclusive range for the valid values
self.minval = 0
self.maxval = iinfo.max - 3
else:
self.neginf = iinfo.min
self.nan = iinfo.min + 1
self.posinf = iinfo.max

# this leaves this inclusive range for the valid values
self.minval = iinfo.min + 2
self.maxval = iinfo.max - 1

def __call__(self, value):
return (value * self.scale).astype(self.target_dtype) + self.offset
is_scalar = np.array(value, copy=False).shape == ()
value = np.atleast_1d(value).astype(self.source_dtype, copy=False)

scaled = np.round(value * self.scale) + self.offset

# convert under/overflow values to -inf/inf
scaled[scaled > self.maxval] = np.inf
scaled[scaled < self.minval] = -np.inf

nans = np.isnan(scaled)
pos_inf = np.isposinf(scaled)
neg_inf = np.isneginf(scaled)

result = scaled.astype(self.target_dtype)
result[nans] = self.nan
result[neg_inf] = self.neginf
result[pos_inf] = self.posinf

if is_scalar:
return np.squeeze(result)

return result

def inverse(self, value):
return (value - self.offset).astype(self.source_dtype) / self.scale
is_scalar = np.array(value, copy=False).shape == ()
value = np.atleast_1d(value)

result = (value.astype(self.source_dtype) - self.offset) / self.scale
result = np.atleast_1d(result)

nans = value == self.nan
pos_inf = value == self.posinf
neg_inf = value == self.neginf

result[nans] = np.nan
result[neg_inf] = -np.inf
result[pos_inf] = np.inf

if is_scalar:
return np.squeeze(result)

return result

def get_meta(self, colname: str):
return {
f"{colname}_TRANSFORM": "fixed_point",
f"{colname}_TRANSFORM_SCALE": self.scale,
f"{colname}_TRANSFORM_DTYPE": str(self.source_dtype),
f"{colname}_TRANSFORM_OFFSET": self.offset,
f"{colname}_NAN_VALUE": self.nan,
f"{colname}_POSINF_VALUE": self.posinf,
f"{colname}_NEGINF_VALUE": self.neginf,
}


Expand Down
105 changes: 105 additions & 0 deletions ctapipe/io/tests/test_column_transforms.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
import numpy as np
from numpy.testing import assert_array_equal


def test_fixed_point_unsigned():
from ctapipe.io.hdf5tableio import FixedPointColumnTransform

tr = FixedPointColumnTransform(
scale=10, offset=0, source_dtype=np.float32, target_dtype=np.uint16
)

iinfo = np.iinfo(tr.target_dtype)
assert tr.posinf == iinfo.max
assert tr.nan == iinfo.max - 1
assert tr.neginf == iinfo.max - 2

values = {
0: 0,
0.1: 1,
0.16: 2,
-np.inf: tr.neginf,
np.nan: tr.nan,
np.inf: tr.posinf,
(tr.maxval + 1) / 10: tr.posinf,
-1: tr.neginf,
}

for v, e in values.items():
transformed = tr(v)
assert transformed.dtype == tr.target_dtype
assert transformed == e

# test array
v = np.array(list(values.keys()))
e = np.array(list(values.values()))
assert_array_equal(tr(v), e)


def test_fixed_point_unsigned_offset():
from ctapipe.io.hdf5tableio import FixedPointColumnTransform

tr = FixedPointColumnTransform(
scale=10, offset=400, source_dtype=np.float32, target_dtype=np.uint16
)

values = {
-40: 0,
-30: 100,
0: 400,
0.1: 401,
0.16: 402,
-np.inf: tr.neginf,
np.nan: tr.nan,
np.inf: tr.posinf,
(tr.maxval - 400) / 10: tr.maxval,
(tr.maxval - 399) / 10: tr.posinf,
-40.1: tr.neginf,
}

# test single values
for v, e in values.items():
transformed = tr(v)
assert transformed.dtype == tr.target_dtype
assert transformed == e, f"Unexpected outcome transforming {v}"

# test array
v = np.array(list(values.keys()))
e = np.array(list(values.values()))
assert_array_equal(tr(v), e)


def test_fixed_point_signed():
from ctapipe.io.hdf5tableio import FixedPointColumnTransform

tr = FixedPointColumnTransform(
scale=10, offset=0, source_dtype=np.float32, target_dtype=np.int16
)

iinfo = np.iinfo(tr.target_dtype)
assert tr.posinf == iinfo.max
assert tr.nan == iinfo.min + 1
assert tr.neginf == iinfo.min

values = {
-np.inf: tr.neginf,
(tr.minval - 1) / 10: tr.neginf,
tr.minval / 10: tr.minval,
-50: -500,
0: 0,
50: 500,
tr.maxval / 10: tr.maxval,
(tr.maxval + 1) / 10: tr.posinf,
np.inf: tr.posinf,
}

# test single values
for v, e in values.items():
transformed = tr(v)
assert transformed.dtype == tr.target_dtype
assert transformed == e, f"Unexpected outcome transforming {v}"

# test array
v = np.array(list(values.keys()))
e = np.array(list(values.values()))
assert_array_equal(tr(v), e)
38 changes: 37 additions & 1 deletion ctapipe/io/tests/test_hdf5.py
Original file line number Diff line number Diff line change
Expand Up @@ -640,7 +640,43 @@ class SomeContainer(Container):
assert isinstance(data.time, Time)
assert data.time == NAN_TIME
# rounded to two digits
assert np.all(data.image == np.array([1.23, 123.45]))
assert np.all(data.image == np.array([1.23, 123.46]))


def test_fixed_point_column_transform(tmp_path):
""" ensure a user-added column transform is applied """
from ctapipe.io.tableio import FixedPointColumnTransform

tmp_file = tmp_path / "test_column_transforms.hdf5"

class SomeContainer(Container):
container_prefix = ""
image = Field(np.array([np.nan, np.inf, -np.inf]))

cont = SomeContainer()

with HDF5TableWriter(tmp_file, group_name="data") as writer:
writer.add_column_transform(
"signed", "image", FixedPointColumnTransform(100, 0, np.float64, np.int32)
)
writer.add_column_transform(
"unsigned",
"image",
FixedPointColumnTransform(100, 0, np.float64, np.uint32),
)
# add user generated transform for the "value" column
writer.write("signed", cont)
writer.write("unsigned", cont)

with HDF5TableReader(tmp_file, mode="r") as reader:
signed = next(reader.read("/data/signed", SomeContainer()))
unsigned = next(reader.read("/data/unsigned", SomeContainer()))

for data in (signed, unsigned):
# check we get our original nans back
assert np.isnan(data.image[0])
assert np.isposinf(data.image[1])
assert np.isneginf(data.image[2])


def test_column_transforms_regexps(tmp_path):
Expand Down

0 comments on commit 9ee0635

Please sign in to comment.