Skip to content

Commit

Permalink
Merge pull request #2121 from cta-observatory/fix_energy_in_classifier
Browse files Browse the repository at this point in the history
Allow using predicted energy in particle classifier
  • Loading branch information
maxnoe authored Nov 23, 2022
2 parents 56fce14 + 0c78904 commit a356f11
Show file tree
Hide file tree
Showing 6 changed files with 68 additions and 66 deletions.
51 changes: 44 additions & 7 deletions ctapipe/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,13 +194,13 @@ def prod3_astri(subarray_prod3_paranal):
@pytest.fixture(scope="session")
def dl1_tmp_path(tmp_path_factory):
"""Temporary directory for global dl1 test data"""
return tmp_path_factory.mktemp("dl1")
return tmp_path_factory.mktemp("dl1_")


@pytest.fixture(scope="session")
def dl2_tmp_path(tmp_path_factory):
"""Temporary directory for global dl2 test data"""
return tmp_path_factory.mktemp("dl2")
return tmp_path_factory.mktemp("dl2_")


@pytest.fixture(scope="session")
Expand Down Expand Up @@ -505,22 +505,59 @@ def energy_regressor_path(model_tmp_path):


@pytest.fixture(scope="session")
def particle_classifier_path(model_tmp_path):
def gamma_train_clf(model_tmp_path, energy_regressor_path):
from ctapipe.tools.apply_models import ApplyModels

inpath = "dataset://gamma_diffuse_dl2_train_small.dl2.h5"
outpath = model_tmp_path / "gamma_train_clf.dl2.h5"
run_tool(
ApplyModels(),
argv=[
f"--input={inpath}",
f"--output={outpath}",
f"--energy-regressor={energy_regressor_path}",
],
raises=True,
)
return outpath


@pytest.fixture(scope="session")
def proton_train_clf(model_tmp_path, energy_regressor_path):
from ctapipe.tools.apply_models import ApplyModels

inpath = "dataset://proton_dl2_train_small.dl2.h5"
outpath = model_tmp_path / "proton_train_clf.dl2.h5"
run_tool(
ApplyModels(),
argv=[
f"--input={inpath}",
f"--output={outpath}",
f"--energy-regressor={energy_regressor_path}",
],
raises=True,
)
return outpath


@pytest.fixture(scope="session")
def particle_classifier_path(
model_tmp_path, energy_regressor_path, gamma_train_clf, proton_train_clf
):
from ctapipe.tools.train_particle_classifier import TrainParticleClassifier

out_file = model_tmp_path / "particle_classifier.pkl"
with FileLock(out_file.with_suffix(out_file.suffix + ".lock")):
if out_file.is_file():
return out_file

tool = TrainParticleClassifier()
config = resource_file("train_particle_classifier.yaml")

ret = run_tool(
tool,
TrainParticleClassifier(),
argv=[
"--signal=dataset://gamma_diffuse_dl2_train_small.dl2.h5",
"--background=dataset://proton_dl2_train_small.dl2.h5",
f"--signal={gamma_train_clf}",
f"--background={proton_train_clf}",
f"--output={out_file}",
f"--config={config}",
"--log-level=INFO",
Expand Down
2 changes: 2 additions & 0 deletions ctapipe/resources/train_particle_classifier.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@ TrainParticleClassifier:
- intensity_skewness
- intensity_kurtosis
- area
- ExtraTreesRegressor_energy
- ExtraTreesRegressor_tel_energy

QualityQuery:
quality_criteria:
Expand Down
10 changes: 8 additions & 2 deletions ctapipe/tools/apply_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,11 +114,11 @@ def setup(self):
self.loader = TableLoader(
parent=self,
h5file=self.h5file,
load_dl1_images=False,
load_dl1_parameters=True,
load_dl2=True,
load_simulated=True,
load_instrument=True,
load_dl1_images=False,
load_simulated=False,
)

self._reconstructors = []
Expand Down Expand Up @@ -148,6 +148,12 @@ def start(self):
reconstructor,
)
self._combine(reconstructor.stereo_combiner, mono_predictions)
# FIXME: this is a not-so-nice solution for the issues that
# the table loader does not seem to see the newly written tables
# we close and reopen the file and then table loader loads also the new tables
self.h5file.close()
self.h5file = tables.open_file(self.output_path, mode="r+")
self.loader.h5file = self.h5file

def _apply(self, reconstructor):
prefix = reconstructor.model_cls
Expand Down
38 changes: 4 additions & 34 deletions ctapipe/tools/tests/test_apply_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,21 +67,23 @@ def test_apply_energy_regressor(
check_equal_array_event_order(trigger, energy)


def test_apply_particle_classifier(
def test_apply_both(
energy_regressor_path,
particle_classifier_path,
dl2_shower_geometry_file_lapalma,
tmp_path,
):
from ctapipe.tools.apply_models import ApplyModels

input_path = dl2_shower_geometry_file_lapalma
output_path = tmp_path / "particle.dl2.h5"
output_path = tmp_path / "particle-and-energy.dl2.h5"

ret = run_tool(
ApplyModels(),
argv=[
f"--input={input_path}",
f"--output={output_path}",
f"--energy-regressor={energy_regressor_path}",
f"--particle-classifier={particle_classifier_path}",
"--StereoMeanCombiner.weights=konrad",
],
Expand Down Expand Up @@ -114,38 +116,6 @@ def test_apply_particle_classifier(
assert f"{prefix}_tel_prediction" in events.colnames
assert f"{prefix}_tel_is_valid" in events.colnames


def test_apply_both(
energy_regressor_path,
particle_classifier_path,
dl2_shower_geometry_file_lapalma,
tmp_path,
):
from ctapipe.tools.apply_models import ApplyModels

input_path = dl2_shower_geometry_file_lapalma
output_path = tmp_path / "particle-and-energy.dl2.h5"

ret = run_tool(
ApplyModels(),
argv=[
f"--input={input_path}",
f"--output={output_path}",
f"--particle-classifier={particle_classifier_path}",
f"--energy-regressor={energy_regressor_path}",
"--StereoMeanCombiner.weights=konrad",
],
)
assert ret == 0

loader = TableLoader(output_path, load_dl2=True)

events = loader.read_subarray_events()
assert "ExtraTreesRegressor_energy" in events.colnames
assert "ExtraTreesClassifier_prediction" in events.colnames

events = loader.read_telescope_events()
assert "ExtraTreesClassifier_prediction" in events.colnames
assert "ExtraTreesRegressor_energy" in events.colnames

from ctapipe.io.tests.test_table_loader import check_equal_array_event_order
Expand Down
7 changes: 6 additions & 1 deletion ctapipe/tools/tests/test_process_ml.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,10 @@ def test_process_apply_energy(


def test_process_apply_classification(
tmp_path, particle_classifier_path, prod5_gamma_lapalma_simtel_path
tmp_path,
energy_regressor_path,
particle_classifier_path,
prod5_gamma_lapalma_simtel_path,
):
from ctapipe.tools.process import ProcessorTool

Expand All @@ -76,6 +79,7 @@ def test_process_apply_classification(
"ShowerProcessor": {
"reconstructor_types": [
"HillasReconstructor",
"EnergyRegressor",
"ParticleClassifier",
]
},
Expand All @@ -90,6 +94,7 @@ def test_process_apply_classification(
f"--output={output}",
"--write-images",
"--write-showers",
f"--energy-regressor={energy_regressor_path}",
f"--particle-classifier={particle_classifier_path}",
f"--config={config_path}",
]
Expand Down
26 changes: 4 additions & 22 deletions ctapipe/tools/tests/test_train.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,8 @@ def test_train_particle_classifier(particle_classifier_path):
ParticleClassifier.read(particle_classifier_path)


def test_too_few_events(tmp_path, dl2_shower_geometry_file, dl2_proton_geometry_file):
def test_too_few_events(tmp_path, dl2_shower_geometry_file):
from ctapipe.tools.train_energy_regressor import TrainEnergyRegressor
from ctapipe.tools.train_particle_classifier import TrainParticleClassifier

tool = TrainEnergyRegressor()
config = resource_file("train_energy_regressor.yaml")
Expand All @@ -36,25 +35,8 @@ def test_too_few_events(tmp_path, dl2_shower_geometry_file, dl2_proton_geometry_
raises=True,
)

tool = TrainParticleClassifier()
config = resource_file("train_particle_classifier.yaml")
out_file = tmp_path / "particle_classifier.pkl"

with pytest.raises(ValueError, match="Only one class"):
run_tool(
tool,
argv=[
f"--signal={dl2_shower_geometry_file}",
f"--background={dl2_proton_geometry_file}",
f"--output={out_file}",
f"--config={config}",
"--log-level=INFO",
],
raises=True,
)


def test_cross_validation_results(tmp_path):
def test_cross_validation_results(tmp_path, gamma_train_clf, proton_train_clf):
from ctapipe.tools.train_energy_regressor import TrainEnergyRegressor
from ctapipe.tools.train_particle_classifier import TrainParticleClassifier

Expand Down Expand Up @@ -84,8 +66,8 @@ def test_cross_validation_results(tmp_path):
ret = run_tool(
tool,
argv=[
"--signal=dataset://gamma_diffuse_dl2_train_small.dl2.h5",
"--background=dataset://proton_dl2_train_small.dl2.h5",
f"--signal={gamma_train_clf}",
f"--background={proton_train_clf}",
f"--output={out_file}",
f"--config={config}",
f"--cv-output={classifier_cv_out_file}",
Expand Down

0 comments on commit a356f11

Please sign in to comment.