From 59931737dee8b27c5e36655a538c87aee9c2c440 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maximilian=20N=C3=B6the?= Date: Thu, 10 Jan 2019 09:34:03 +0100 Subject: [PATCH] Remove serializer, fixes #887 (#913) --- ctapipe/io/serializer.py | 336 --------------------------- ctapipe/io/tests/test_serializer.py | 155 ------------ docs/development/code-guidelines.rst | 6 - docs/io/index.rst | 10 - examples/obsolete/serialization.py | 59 ----- 5 files changed, 566 deletions(-) delete mode 100644 ctapipe/io/serializer.py delete mode 100644 ctapipe/io/tests/test_serializer.py delete mode 100755 examples/obsolete/serialization.py diff --git a/ctapipe/io/serializer.py b/ctapipe/io/serializer.py deleted file mode 100644 index e16e2415c58..00000000000 --- a/ctapipe/io/serializer.py +++ /dev/null @@ -1,336 +0,0 @@ -""" -Serialize ctapipe containers to file -""" - -from abc import ABC, abstractmethod -from gzip import open as gzip_open -from pickle import dump - -import numpy as np -from astropy import log -from astropy.table import Table, Column -from traitlets import Unicode - -from ctapipe.core import Container - -__all__ = ['Serializer'] - - -class Serializer: - """ - Serializes ctapipe.core.Component, write it to a file thanks - to its Writer object - For some formats (i.e. pickle +gzip), read serialized components from - a file - - Examples - -------- - >>> writer = Serializer(filename='output.pickle', format='pickle', mode='w') - >>> for container in input_containers: - ... writer.add_container(container.r0) - >>> writer.close() - - or using the context manager syntax - >>> with Serializer(filename='output.fits', format='fits', mode='w') as writer: - >>> for container in input_containers: - ... writer.add_container(container.r0) - """ - - def __init__(self, filename, format='fits', mode='x'): - - """ - Parameters - ---------- - filename: str - full path name for i/o file - format: str ('fits', 'img', 'pickle') - mode: str ('write', 'read') - : use this serializer as writer or reader - mode: str - 'w' open for writing, truncating the file first - 'x' open for exclusive creation, failing if the file already exists - 'a' open for writing, appending to the end of the file if it exists - Raises - ------ - NotImplementedError: when format is not implemented - ValueError: when mode is not correct - """ - self.filename = filename - self.format = format - self._stat = None # TODO collect statistics about serialized contents - if mode not in ('x', 'w', 'a'): - raise ValueError('{} is not a valid write mode. Use x, w or a'. - format(mode)) - self._writer = None - - if self.format == 'fits': - self._writer = TableWriter(outfile=filename, mode=mode, - format=format) - elif self.format == 'pickle': - self._writer = GZipPickleWriter(outfile=filename, mode=mode) - elif self.format == 'img': - raise NotImplementedError('img serializer format is' - ' not yet implemented') - else: - raise ValueError('You can serialize only on pickle, fits or img') - - def __enter__(self): - return self - - def __exit__(self, exc_type, exc_value, traceback): - """ - Exit the runtime context related to this object. - The parameters describe the exception that caused the context to be - exited. If the context was exited without an exception, - all three arguments will be None. - If an exception is supplied, and the method wishes to suppress - the exception (i.e., prevent it from being propagated), - it should return a true value. Otherwise, the exception will be - processed normally upon exit from this method. - """ - self.close() - - def add_container(self, container): - """ - Add a container to serializer - """ - self._writer.add_container(container) - - - def close(self): - """ - Write data to disk - """ - self._writer.close() - - -class Writer(ABC): - - def __init__(self, filename): - self.outfile = filename - - @abstractmethod - def add_container(self, container): - pass - - @abstractmethod - def close(self): - pass - - -class GZipPickleWriter(Writer): - """ - Serializes list of ctapipe.core.Components. - Write Component to file - """ - - def __init__(self, outfile, mode='x'): - """ - Parameters - ---------- - outfile: Unicode - full path output file name - mode: str - 'w' open for writing, truncating the file first - 'x' open for exclusive creation, failing if the file already exists - 'a' open for writing, appending to the end of the file if it exists - Raises - ------ - FileNotFoundError: When the file cannot be opened - FileExistsError: when infile exist and mode is x - """ - super().__init__(outfile) - mode += 'b' - try: - self.file_object = gzip_open(outfile, mode) - except FileExistsError: - raise FileExistsError('file exists: {} and mode is {}'. - format(outfile, mode)) - - def close(self): - """ - close opened file - Returns - ------- - """ - self.file_object.close() - - def add_container(self, container): - """ - Add a container to be serialized - - Raises - ------ - TypeError: When container is not type of container - """ - if not isinstance(container, Container): - raise TypeError('Can write only Containers') - dump(container, self.file_object) - - -# FITS Implementation - -not_writeable_fields = ('tel', 'tels_with_data', 'calibration_parameters', - 'pedestal_subtracted_adc', 'integration_window') - - -def is_writeable(key, out_format='fits'): - """ - check if a key is writable - - Parameters - ---------- - key: str - out_format: 'fits' or ยด pickle' - according to out_format a same key can be writable or not - - Returns - ------- - True if key is writable according to the out_format - - Raises - ------ - NameError: When out_format is not know - """ - if out_format is 'fits': - return not (key in not_writeable_fields) - elif out_format is 'pickle': - return True - else: - raise NameError('{} not implemented'.format(out_format)) - - -def writeable_items(container): - """ - # Strip off what we cannot write - Parameters - ---------- - container: ctapipe.core.Container - - Returns - ------- - a dictionary with writable values only - """ - - d = dict(container.items()) - for k in not_writeable_fields: - log.debug("Cannot write column {0}".format(k)) - d.pop(k, None) - return d - - -def to_table(container): - """ - Convert a `ctapipe.core.Container` to an `astropy.Table` with one row - - Parameters - ---------- - container: ctapipe.core.Container - - Returns - ------- - Table: astropy.Table - """ - names = list() - columns = list() - for k, v in writeable_items(container).items(): - - v_arr = np.array(v) - v_arr = v_arr.reshape((1,) + v_arr.shape) - log.debug("Creating column for item '{0}' of shape {1}". - format(k, v_arr.shape)) - names.append(k) - columns.append(Column(v_arr)) - - return Table(data=columns, # dtypes are inferred by columns - names=names, - meta=container.meta) - - -class TableWriter(Writer): - """ - Fits table writer - """ - - def __init__(self, outfile, format='fits', mode='w'): - """ - Parameters - ---------- - outfile: str - output file name - format: str - 'fits' or 'img' - mode: str - 'w' open for writing, truncating the file first - 'x' open for exclusive creation, failing if the file already exists - Raises - ------ - NotImplementedError: when mode is correct but not yet implemented - ValueError: when mode is not correct - """ - super().__init__(outfile) - self.table = Table() - self._created_table = False - self.format = format - self.outfile = outfile - if mode == 'w': - self.overwrite = True - elif mode == 'x': - self.overwrite = False - elif mode == 'a': - raise NotImplementedError('a is a valid write mode,' - ' but not yet implemented') - else: - raise ValueError('{} is not a valid write mode. Use x, w or a'. - format(mode)) - - def _setup_table(self, container): - """ - Create Fits table and HDU - - Parameters - ---------- - container: ctapipe.core.Container - """ - # Create Table from Container - self.table = to_table(container) - - # Write HDU name - if self.format == "fits": - self.table.meta["EXTNAME"] = type(container).__name__ - self._created_table = True - - def add_container(self, container): - """ - Add a container as a table row - Parameters - ---------- - container: ctapipe.core.Container - - Raises - ------ - TypeError: When add another type than Container - """ - if not isinstance(container, Container): - raise TypeError("Can write only Containers") - - if not self._created_table: - self._setup_table(container) - else: - self.table.add_row(writeable_items(container)) - - def close(self, **kwargs): - """ - Write Fits table to file - Parameters - ---------- - kwargs to be passed to `astropy.Table.write method` - - Returns - ------- - Fits Table - """ - # Write table using astropy.table write method - self.table.write(output=self.outfile, format=self.format, - overwrite=self.overwrite, **kwargs) - return self.table diff --git a/ctapipe/io/tests/test_serializer.py b/ctapipe/io/tests/test_serializer.py deleted file mode 100644 index 18d35f14c24..00000000000 --- a/ctapipe/io/tests/test_serializer.py +++ /dev/null @@ -1,155 +0,0 @@ -from copy import deepcopy -from os import remove - -import pytest -from astropy.io import fits - -from ctapipe.io import event_source -from ctapipe.io.serializer import Serializer -from ctapipe.io.sources import PickleSource -from ctapipe.utils import get_dataset_path - - -def compare(read_container, source_container): - # test if 4th adc value of telescope 17 HI_GAIN are equals - return (read_container.r0.tel[17].waveform[0][2][4] == - source_container.r0.tel[17].waveform[0][2][4]) - - -def generate_input_containers(): - # Get event from hessio file, append them into input_containers - input_filename = get_dataset_path("gamma_test.simtel.gz") - with event_source(input_filename, max_events=3) as source: - input_containers = [deepcopy(event) for event in source] - return input_containers - - -# Setup -input_containers = generate_input_containers() - - -@pytest.fixture(scope='session') -def binary_filename(tmpdir_factory): - return str(tmpdir_factory.mktemp('data') - .join('pickle_data.pickle.gz')) - - -@pytest.fixture(scope='session') -def fits_file_name(tmpdir_factory): - return str(tmpdir_factory.mktemp('data').join('output.fits')) - - -def test_pickle_serializer(binary_filename): - serial = Serializer(filename=binary_filename, format='pickle', mode='w') - # append all input file events in input_containers list and pickle serializer - for event in input_containers: - serial.add_container(event) - serial.close() - - # read Containers from pickle serializer - reader = PickleSource(filename=binary_filename) - # file read_containers from serializer generator - read_containers = [] - for container in reader: - read_containers.append(container) - # test if number of read Container correspond to input - assert len(read_containers) is len(input_containers) - # test if 4th adc value of telescope 17 HI_GAIN are equals - assert compare(input_containers[2], read_containers[2]) - reader.close() - remove(binary_filename) - - -# Test pickle reader/writer with statement -def test_pickle_with_statement(binary_filename): - with Serializer(filename=binary_filename, format='pickle', mode='w') as \ - containers_writer: - for container in input_containers: - containers_writer.add_container(container) - containers_writer.close() - - read_containers = [] - with PickleSource(filename=binary_filename) as reader: - for container in reader: - read_containers.append(container) - # test if number of read Container correspond to input - assert len(read_containers) is len(input_containers) - # test if 4th adc value of telescope 17 HI_GAIN are equals - assert compare(input_containers[2], read_containers[2]) - remove(binary_filename) - - -# Test pickle reader iterator -def test_pickle_iterator(binary_filename): - serial = Serializer(filename=binary_filename, format='pickle', - mode='w') - # append all events in input_containers list and pickle serializer - for event in input_containers: - serial.add_container(event) - serial.close() - - read_containers = [] - reader = PickleSource(filename=binary_filename) - for container in reader: - read_containers.append(container) - # test if number of read Container correspond to input - assert len(read_containers) is len(input_containers) - # test if 4th adc value of telescope 17 HI_GAIN are equals - assert compare(input_containers[2], read_containers[2]) - reader.close() - remove(binary_filename) - - - - - -def test_fits_dl0(fits_file_name): - serial = Serializer(filename=fits_file_name, format='fits', mode='w') - for container in input_containers: - serial.add_container(container.dl0) - serial.close() - hdu = fits.open(fits_file_name)[1] - assert hdu.data["event_id"][0] == 408 - assert hdu.data["event_id"][1] == 409 - assert hdu.data["event_id"][2] == 803 - assert hdu.data["obs_id"][2] == 31964 - remove(fits_file_name) - - -def test_exclusive_mode(fits_file_name): - serial = Serializer(filename=fits_file_name, format='fits', mode='w') - for container in input_containers: - serial.add_container(container.dl0) - serial.close() - # Try to write to fits_file_name in exclusive mode - with pytest.raises(OSError): - serial = Serializer(filename=fits_file_name, format='fits', mode='x') - serial.add_container(input_containers[2].dl0) - serial.close() - remove(fits_file_name) - -""" -def test_fits_dl1(): - input_test_file = get_datasets_path('example_container.pickle.gz') - with gzip_open(input_test_file, 'rb') as f: - data = load(f) - t38 = data[0].dl1.tel[38] - serial = Serializer('output.fits', 'fits', overwrite=True) - serial.add_container(t38) - serial.write() - # t11_1 = data[1].dl1.tel[11] - # S_cal.write(t11_1) # This will not work because shape of data is different from tel to tel. -""" - - -def test_fits_context_manager(fits_file_name): - with Serializer(filename=fits_file_name, format='fits', mode='w') as writer: - for container in input_containers: - writer.add_container(container.dl0) - - hdulist = fits.open(fits_file_name) - assert hdulist[1].data["event_id"][0] == 408 - remove(fits_file_name) - - -# TODO test FITSSource class diff --git a/docs/development/code-guidelines.rst b/docs/development/code-guidelines.rst index ca08c29c5a6..915313893c8 100644 --- a/docs/development/code-guidelines.rst +++ b/docs/development/code-guidelines.rst @@ -306,7 +306,6 @@ help when writing algorithms: source = calibrated_event_source(filename) ImageMangler mangler(geom.pix_x, geom.pix_y, "transformtable.fits") - Serializer serializer = ... # simple loop over events, calling each algorithm and directly #passing data @@ -317,11 +316,6 @@ help when writing algorithms: mangled_image = mangler.mangle(image) image_parameters = parameterize_image(mangled_image) - # here you may here pack your output values into a Container if - # they are not already in one. We assume here that mangled_image - # and image_parameters are already Container subclasses - - serializer.write([mangled_image, image_parameters]) * When your algorithm test code (as above) works well and you are happy with the results, you can do two things: diff --git a/docs/io/index.rst b/docs/io/index.rst index a7e0e07f7d7..3b6d79391a6 100644 --- a/docs/io/index.rst +++ b/docs/io/index.rst @@ -160,10 +160,6 @@ data: Serialization of Containers: ============================ -The `serializer` module provide support for storing -`ctapipe.io.Container` classes in output files (for example FITS -tables or pickle files) - The `ctapipe.io.TableWriter` and `ctapipe.io.TableReader` base classes provide an interface to implement subclasses that write/read Containers to/from table-like data files. Currently the only implementation is for writing @@ -188,9 +184,3 @@ Reference/API .. automodapi:: ctapipe.io.containers :no-inheritance-diagram: - ------------------------------- - -.. automodapi:: ctapipe.io.serializer - :no-inheritance-diagram: - diff --git a/examples/obsolete/serialization.py b/examples/obsolete/serialization.py deleted file mode 100755 index dc2d1d1caee..00000000000 --- a/examples/obsolete/serialization.py +++ /dev/null @@ -1,59 +0,0 @@ -#!/usr/bin/env python3 - -from ctapipe.io.serializer import Serializer -from astropy import log -import pickle -import gzip - -log.setLevel('DEBUG') - - -def write_dl0_example(filename, data): - S = Serializer(filename, mode='w') - - # Create table - for container in data: - S.write(container.dl0) - - # Print table - print(S._writer.table) - - # Save table to disk - S.save() - return S - - -def write_dl1_tel_example(filename, data): - - t38 = data[0].dl1.tel[38] - - S_cal = Serializer(filename, mode='w') - S_cal.write(t38) - - print(S_cal._writer.table) - - # t11_1 = data[1].dl1.tel[11] - # S_cal.write(t11_1) - # This will not work because shape of data is different from tel to tel. - - S_cal.save() - return S_cal - - -def context_manager_example(filename, data): - with Serializer(filename, mode='w') as writer: - for container in data: - print(container.dl0) - writer.write(container.dl0) - print(writer._writer.table) - return 0 - - -if __name__ == "__main__": - - with gzip.open('example_container.pickle.gz', 'rb') as f: - data = pickle.load(f) - - S = write_dl0_example("output.fits", data) - S_cal = write_dl1_tel_example("cal_output.fits", data) - S_context = context_manager_example("output_context.fits", data)