From f899afc5ff4cc688fb5576f55c5bee54e9365ae4 Mon Sep 17 00:00:00 2001 From: Maximilian Linhoff Date: Wed, 16 Nov 2022 16:44:14 +0100 Subject: [PATCH 1/5] Add test for using energy in particle classifier --- ctapipe/conftest.py | 30 ++++++++-- .../resources/train_particle_classifier.yaml | 2 + ctapipe/tools/tests/test_apply_models.py | 56 +------------------ 3 files changed, 29 insertions(+), 59 deletions(-) diff --git a/ctapipe/conftest.py b/ctapipe/conftest.py index 3bacb02a2e5..42713804284 100644 --- a/ctapipe/conftest.py +++ b/ctapipe/conftest.py @@ -505,7 +505,8 @@ def energy_regressor_path(model_tmp_path): @pytest.fixture(scope="session") -def particle_classifier_path(model_tmp_path): +def particle_classifier_path(model_tmp_path, energy_regressor_path): + from ctapipe.tools.apply_models import ApplyModels from ctapipe.tools.train_particle_classifier import TrainParticleClassifier out_file = model_tmp_path / "particle_classifier.pkl" @@ -513,14 +514,33 @@ def particle_classifier_path(model_tmp_path): if out_file.is_file(): return out_file - tool = TrainParticleClassifier() config = resource_file("train_particle_classifier.yaml") + proton_train = model_tmp_path / "proton_train.dl2.h5" + gamma_train = model_tmp_path / "gamma_train.dl2.h5" + + for inpath, outpath in zip( + [ + "dataset://gamma_diffuse_dl2_train_small.dl2.h5", + "dataset://proton_dl2_train_small.dl2.h5", + ], + (proton_train, gamma_train), + ): + ret = run_tool( + ApplyModels(), + argv=[ + f"--input={inpath}", + f"--output={outpath}", + f"--energy-regressor={energy_regressor_path}", + ], + ) + assert ret == 0 + ret = run_tool( - tool, + TrainParticleClassifier(), argv=[ - "--signal=dataset://gamma_diffuse_dl2_train_small.dl2.h5", - "--background=dataset://proton_dl2_train_small.dl2.h5", + f"--signal={gamma_train}", + f"--background={proton_train}", f"--output={out_file}", f"--config={config}", "--log-level=INFO", diff --git a/ctapipe/resources/train_particle_classifier.yaml b/ctapipe/resources/train_particle_classifier.yaml index a99442a2f77..a7ef64ec98f 100644 --- a/ctapipe/resources/train_particle_classifier.yaml +++ b/ctapipe/resources/train_particle_classifier.yaml @@ -47,6 +47,8 @@ TrainParticleClassifier: - intensity_skewness - intensity_kurtosis - area + - ExtraTreesRegressor_energy + - ExtraTreesRegressor_tel_energy QualityQuery: quality_criteria: diff --git a/ctapipe/tools/tests/test_apply_models.py b/ctapipe/tools/tests/test_apply_models.py index 7393a9bfcd3..3aad3248990 100644 --- a/ctapipe/tools/tests/test_apply_models.py +++ b/ctapipe/tools/tests/test_apply_models.py @@ -1,10 +1,6 @@ import numpy as np -from ctapipe.containers import ( - EventIndexContainer, - ParticleClassificationContainer, - ReconstructedEnergyContainer, -) +from ctapipe.containers import EventIndexContainer, ReconstructedEnergyContainer from ctapipe.core import run_tool from ctapipe.io import TableLoader, read_table @@ -61,54 +57,6 @@ def test_apply_energy_regressor( assert f"{prefix}_tel_is_valid" in events.colnames -def test_apply_particle_classifier( - particle_classifier_path, - dl2_shower_geometry_file_lapalma, - tmp_path, -): - from ctapipe.tools.apply_models import ApplyModels - - input_path = dl2_shower_geometry_file_lapalma - output_path = tmp_path / "particle.dl2.h5" - - ret = run_tool( - ApplyModels(), - argv=[ - f"--input={input_path}", - f"--output={output_path}", - f"--particle-classifier={particle_classifier_path}", - "--StereoMeanCombiner.weights=konrad", - ], - ) - assert ret == 0 - - prefix = "ExtraTreesClassifier" - table = read_table(output_path, f"/dl2/event/subarray/classification/{prefix}") - for col in "obs_id", "event_id": - assert table[col].description == EventIndexContainer.fields[col].description - - for name, field in ParticleClassificationContainer.fields.items(): - colname = f"ExtraTreesClassifier_{name}" - assert colname in table.colnames - assert table[colname].description == field.description - - loader = TableLoader(output_path, load_dl2=True) - events = loader.read_subarray_events() - assert f"{prefix}_prediction" in events.colnames - assert f"{prefix}_telescopes" in events.colnames - assert f"{prefix}_is_valid" in events.colnames - assert f"{prefix}_goodness_of_fit" in events.colnames - - events = loader.read_telescope_events() - assert f"{prefix}_prediction" in events.colnames - assert f"{prefix}_telescopes" in events.colnames - assert f"{prefix}_is_valid" in events.colnames - assert f"{prefix}_goodness_of_fit" in events.colnames - - assert f"{prefix}_tel_prediction" in events.colnames - assert f"{prefix}_tel_is_valid" in events.colnames - - def test_apply_both( energy_regressor_path, particle_classifier_path, @@ -125,8 +73,8 @@ def test_apply_both( argv=[ f"--input={input_path}", f"--output={output_path}", - f"--particle-classifier={particle_classifier_path}", f"--energy-regressor={energy_regressor_path}", + f"--particle-classifier={particle_classifier_path}", "--StereoMeanCombiner.weights=konrad", ], ) From bba1b3271b2579ad3301fa8582a2bd346b1eb5e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maximilian=20N=C3=B6the?= Date: Wed, 16 Nov 2022 16:48:01 +0100 Subject: [PATCH 2/5] Reopen h5file to allow reading just written new tables --- ctapipe/tools/apply_models.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/ctapipe/tools/apply_models.py b/ctapipe/tools/apply_models.py index 49aa949b7fc..3d2d11e3bfc 100644 --- a/ctapipe/tools/apply_models.py +++ b/ctapipe/tools/apply_models.py @@ -112,11 +112,11 @@ def setup(self): self.loader = TableLoader( parent=self, h5file=self.h5file, - load_dl1_images=False, load_dl1_parameters=True, load_dl2=True, - load_simulated=True, load_instrument=True, + load_dl1_images=False, + load_simulated=False, ) self._reconstructors = [] @@ -146,6 +146,9 @@ def start(self): reconstructor, ) self._combine(reconstructor.stereo_combiner, mono_predictions) + self.h5file.close() + self.h5file = tables.open_file(self.output_path, mode="r+") + self.loader.h5file = self.h5file def _apply(self, reconstructor): prefix = reconstructor.model_cls From 8f106530c48ecfecb09058d2ece725b7a39e7bf9 Mon Sep 17 00:00:00 2001 From: Maximilian Linhoff Date: Wed, 16 Nov 2022 16:50:14 +0100 Subject: [PATCH 3/5] Add back specific tests for prediction --- ctapipe/tools/tests/test_apply_models.py | 32 ++++++++++++++++++++---- 1 file changed, 27 insertions(+), 5 deletions(-) diff --git a/ctapipe/tools/tests/test_apply_models.py b/ctapipe/tools/tests/test_apply_models.py index 3aad3248990..3f15aa2ebe2 100644 --- a/ctapipe/tools/tests/test_apply_models.py +++ b/ctapipe/tools/tests/test_apply_models.py @@ -1,6 +1,10 @@ import numpy as np -from ctapipe.containers import EventIndexContainer, ReconstructedEnergyContainer +from ctapipe.containers import ( + EventIndexContainer, + ParticleClassificationContainer, + ReconstructedEnergyContainer, +) from ctapipe.core import run_tool from ctapipe.io import TableLoader, read_table @@ -80,12 +84,30 @@ def test_apply_both( ) assert ret == 0 - loader = TableLoader(output_path, load_dl2=True) + prefix = "ExtraTreesClassifier" + table = read_table(output_path, f"/dl2/event/subarray/classification/{prefix}") + for col in "obs_id", "event_id": + assert table[col].description == EventIndexContainer.fields[col].description + + for name, field in ParticleClassificationContainer.fields.items(): + colname = f"ExtraTreesClassifier_{name}" + assert colname in table.colnames + assert table[colname].description == field.description + loader = TableLoader(output_path, load_dl2=True) events = loader.read_subarray_events() - assert "ExtraTreesRegressor_energy" in events.colnames - assert "ExtraTreesClassifier_prediction" in events.colnames + assert f"{prefix}_prediction" in events.colnames + assert f"{prefix}_telescopes" in events.colnames + assert f"{prefix}_is_valid" in events.colnames + assert f"{prefix}_goodness_of_fit" in events.colnames events = loader.read_telescope_events() - assert "ExtraTreesClassifier_prediction" in events.colnames + assert f"{prefix}_prediction" in events.colnames + assert f"{prefix}_telescopes" in events.colnames + assert f"{prefix}_is_valid" in events.colnames + assert f"{prefix}_goodness_of_fit" in events.colnames + + assert f"{prefix}_tel_prediction" in events.colnames + assert f"{prefix}_tel_is_valid" in events.colnames + assert "ExtraTreesRegressor_energy" in events.colnames From e51bdb3466f937df68f56f32899bd980dd1c6290 Mon Sep 17 00:00:00 2001 From: Maximilian Linhoff Date: Thu, 17 Nov 2022 12:38:40 +0100 Subject: [PATCH 4/5] Properly add fixtures for train_clf files --- ctapipe/conftest.py | 67 ++++++++++++++++---------- ctapipe/tools/tests/test_process_ml.py | 7 ++- ctapipe/tools/tests/test_train.py | 26 ++-------- 3 files changed, 52 insertions(+), 48 deletions(-) diff --git a/ctapipe/conftest.py b/ctapipe/conftest.py index 42713804284..18396dba657 100644 --- a/ctapipe/conftest.py +++ b/ctapipe/conftest.py @@ -194,13 +194,13 @@ def prod3_astri(subarray_prod3_paranal): @pytest.fixture(scope="session") def dl1_tmp_path(tmp_path_factory): """Temporary directory for global dl1 test data""" - return tmp_path_factory.mktemp("dl1") + return tmp_path_factory.mktemp("dl1_") @pytest.fixture(scope="session") def dl2_tmp_path(tmp_path_factory): """Temporary directory for global dl2 test data""" - return tmp_path_factory.mktemp("dl2") + return tmp_path_factory.mktemp("dl2_") @pytest.fixture(scope="session") @@ -505,8 +505,45 @@ def energy_regressor_path(model_tmp_path): @pytest.fixture(scope="session") -def particle_classifier_path(model_tmp_path, energy_regressor_path): +def gamma_train_clf(model_tmp_path, energy_regressor_path): from ctapipe.tools.apply_models import ApplyModels + + inpath = "dataset://gamma_diffuse_dl2_train_small.dl2.h5" + outpath = model_tmp_path / "gamma_train_clf.dl2.h5" + run_tool( + ApplyModels(), + argv=[ + f"--input={inpath}", + f"--output={outpath}", + f"--energy-regressor={energy_regressor_path}", + ], + raises=True, + ) + return outpath + + +@pytest.fixture(scope="session") +def proton_train_clf(model_tmp_path, energy_regressor_path): + from ctapipe.tools.apply_models import ApplyModels + + inpath = "dataset://proton_dl2_train_small.dl2.h5" + outpath = model_tmp_path / "proton_train_clf.dl2.h5" + run_tool( + ApplyModels(), + argv=[ + f"--input={inpath}", + f"--output={outpath}", + f"--energy-regressor={energy_regressor_path}", + ], + raises=True, + ) + return outpath + + +@pytest.fixture(scope="session") +def particle_classifier_path( + model_tmp_path, energy_regressor_path, gamma_train_clf, proton_train_clf +): from ctapipe.tools.train_particle_classifier import TrainParticleClassifier out_file = model_tmp_path / "particle_classifier.pkl" @@ -516,31 +553,11 @@ def particle_classifier_path(model_tmp_path, energy_regressor_path): config = resource_file("train_particle_classifier.yaml") - proton_train = model_tmp_path / "proton_train.dl2.h5" - gamma_train = model_tmp_path / "gamma_train.dl2.h5" - - for inpath, outpath in zip( - [ - "dataset://gamma_diffuse_dl2_train_small.dl2.h5", - "dataset://proton_dl2_train_small.dl2.h5", - ], - (proton_train, gamma_train), - ): - ret = run_tool( - ApplyModels(), - argv=[ - f"--input={inpath}", - f"--output={outpath}", - f"--energy-regressor={energy_regressor_path}", - ], - ) - assert ret == 0 - ret = run_tool( TrainParticleClassifier(), argv=[ - f"--signal={gamma_train}", - f"--background={proton_train}", + f"--signal={gamma_train_clf}", + f"--background={proton_train_clf}", f"--output={out_file}", f"--config={config}", "--log-level=INFO", diff --git a/ctapipe/tools/tests/test_process_ml.py b/ctapipe/tools/tests/test_process_ml.py index 79f00f45e3f..3b011fd86f5 100644 --- a/ctapipe/tools/tests/test_process_ml.py +++ b/ctapipe/tools/tests/test_process_ml.py @@ -57,7 +57,10 @@ def test_process_apply_energy( def test_process_apply_classification( - tmp_path, particle_classifier_path, prod5_gamma_lapalma_simtel_path + tmp_path, + energy_regressor_path, + particle_classifier_path, + prod5_gamma_lapalma_simtel_path, ): from ctapipe.tools.process import ProcessorTool @@ -76,6 +79,7 @@ def test_process_apply_classification( "ShowerProcessor": { "reconstructor_types": [ "HillasReconstructor", + "EnergyRegressor", "ParticleClassifier", ] }, @@ -90,6 +94,7 @@ def test_process_apply_classification( f"--output={output}", "--write-images", "--write-showers", + f"--energy-regressor={energy_regressor_path}", f"--particle-classifier={particle_classifier_path}", f"--config={config_path}", ] diff --git a/ctapipe/tools/tests/test_train.py b/ctapipe/tools/tests/test_train.py index 4957d2e14fb..3d2e5f1e654 100644 --- a/ctapipe/tools/tests/test_train.py +++ b/ctapipe/tools/tests/test_train.py @@ -16,9 +16,8 @@ def test_train_particle_classifier(particle_classifier_path): ParticleClassifier.read(particle_classifier_path) -def test_too_few_events(tmp_path, dl2_shower_geometry_file, dl2_proton_geometry_file): +def test_too_few_events(tmp_path, dl2_shower_geometry_file): from ctapipe.tools.train_energy_regressor import TrainEnergyRegressor - from ctapipe.tools.train_particle_classifier import TrainParticleClassifier tool = TrainEnergyRegressor() config = resource_file("train_energy_regressor.yaml") @@ -36,25 +35,8 @@ def test_too_few_events(tmp_path, dl2_shower_geometry_file, dl2_proton_geometry_ raises=True, ) - tool = TrainParticleClassifier() - config = resource_file("train_particle_classifier.yaml") - out_file = tmp_path / "particle_classifier.pkl" - - with pytest.raises(ValueError, match="Only one class"): - run_tool( - tool, - argv=[ - f"--signal={dl2_shower_geometry_file}", - f"--background={dl2_proton_geometry_file}", - f"--output={out_file}", - f"--config={config}", - "--log-level=INFO", - ], - raises=True, - ) - -def test_cross_validation_results(tmp_path): +def test_cross_validation_results(tmp_path, gamma_train_clf, proton_train_clf): from ctapipe.tools.train_energy_regressor import TrainEnergyRegressor from ctapipe.tools.train_particle_classifier import TrainParticleClassifier @@ -84,8 +66,8 @@ def test_cross_validation_results(tmp_path): ret = run_tool( tool, argv=[ - "--signal=dataset://gamma_diffuse_dl2_train_small.dl2.h5", - "--background=dataset://proton_dl2_train_small.dl2.h5", + f"--signal={gamma_train_clf}", + f"--background={proton_train_clf}", f"--output={out_file}", f"--config={config}", f"--cv-output={classifier_cv_out_file}", From 0c789049fd79dc563755ff54af246b26f5bb2ddd Mon Sep 17 00:00:00 2001 From: Maximilian Linhoff Date: Wed, 23 Nov 2022 13:29:02 +0100 Subject: [PATCH 5/5] Add comment for table loader workaround in apply-models --- ctapipe/tools/apply_models.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/ctapipe/tools/apply_models.py b/ctapipe/tools/apply_models.py index 3d2d11e3bfc..2048987964f 100644 --- a/ctapipe/tools/apply_models.py +++ b/ctapipe/tools/apply_models.py @@ -146,6 +146,9 @@ def start(self): reconstructor, ) self._combine(reconstructor.stereo_combiner, mono_predictions) + # FIXME: this is a not-so-nice solution for the issues that + # the table loader does not seem to see the newly written tables + # we close and reopen the file and then table loader loads also the new tables self.h5file.close() self.h5file = tables.open_file(self.output_path, mode="r+") self.loader.h5file = self.h5file