cta-observatory · morcuended · Jan 14, 2022 · Jan 4, 2022 · Jan 4, 2022 · Dec 2, 2021
diff --git a/osa/job.py b/osa/job.py
@@ -289,13 +289,13 @@ def sequence_calibration_filenames(sequence_list):
 
         if not sequence.parent_list:
             drs4_pedestal_run_id = sequence.previousrun
-            calibration_run_id = sequence.run
+            pedcal_run_id = sequence.run
         else:
             drs4_pedestal_run_id = sequence.parent_list[0].previousrun
-            calibration_run_id = sequence.parent_list[0].run
+            pedcal_run_id = sequence.parent_list[0].run
 
         drs4_pedestal_file = f"drs4_pedestal.Run{drs4_pedestal_run_id:05d}.0000.h5"
-        calibration_file = f"calibration_filters_52.Run{calibration_run_id:05d}.0000.h5"
+        calibration_file = f"calibration_filters_52.Run{pedcal_run_id:05d}.0000.h5"
 
         # Assign the calibration and drive files to the sequence object
         sequence.drive = drive_file
@@ -305,7 +305,7 @@ def sequence_calibration_filenames(sequence_list):
         sequence.calibration = (
             Path(cfg.get("LST1", "CALIB_DIR")) / nightdir / "pro" / calibration_file
         )
-        sequence.time_calibration = get_time_calibration_file(calibration_run_id)
+        sequence.time_calibration = get_time_calibration_file(pedcal_run_id)
 
 
 def plot_job_statistics(sacct_output: pd.DataFrame, directory: Path):

diff --git a/osa/provenance/capture.py b/osa/provenance/capture.py
@@ -15,7 +15,6 @@
 import psutil
 import yaml
 
-from osa.provenance.io import read_prov
 from osa.provenance.utils import get_log_config, parse_variables
 
 # gammapy specific
@@ -52,9 +51,9 @@
 PROV_PREFIX = provconfig["PREFIX"]
 SUPPORTED_HASH_METHOD = ["md5"]
 SUPPORTED_HASH_BUFFER = ["content", "path"]
+REDUCTION_TASKS = ["r0_to_dl1", "dl1ab", "dl1_datacheck", "dl1_to_dl2"]
 
 # global variables
-sessions = set()
 traced_entities = {}
 session_name = ""
 session_tag = ""
@@ -99,8 +98,16 @@ def wrapper(*args, **kwargs):
         # variables parsing
         global session_name, session_tag
         class_instance = parse_variables(class_instance)
-        session_tag = f"{activity}:{class_instance.ObservationRun}"
-        session_name = f"{class_instance.ObservationRun}"
+        if class_instance.__name__ in REDUCTION_TASKS:
+            session_tag = f"{activity}:{class_instance.ObservationRun}"
+            session_name = f"{class_instance.ObservationRun}"
+        else:
+            session_tag = (
+                f"{activity}:{class_instance.PedestalRun}-{class_instance.CalibrationRun}"
+            )
+            session_name = f"{class_instance.PedestalRun}-{class_instance.CalibrationRun}"
+        # OSA specific
+        # variables parsing
 
         # provenance capture before execution
         derivation_records = get_derivation_records(class_instance, activity)
@@ -205,7 +212,7 @@ def get_entity_id(value, item):
         entity_name = item["entityName"]
         entity_type = definition["entities"][entity_name]["type"]
     except Exception as ex:
-        logger.warning(f"{ex} in {item}")
+        logger.warning(f"Not found in model {ex} in {item}")
         entity_name = ""
         entity_type = ""
 
@@ -220,7 +227,8 @@ def get_entity_id(value, item):
         # osa specific hash path
         # async calls does not allow for hash content
         return get_file_hash(value, buffer="path")
-
+        # osa specific hash path
+        # async calls does not allow for hash content
     try:
         entity_id = abs(hash(value) + hash(str(value)))
         if hasattr(value, "entity_version"):
@@ -331,7 +339,9 @@ def get_python_packages():
 
 def log_prov_info(prov_dict):
     """Write a dictionary to the logger."""
-    prov_dict["session_tag"] = session_tag  # OSA specific session tag
+    # OSA specific session tag used in merging prov from parallel sessions
+    prov_dict["session_tag"] = session_tag
+    #
     record_date = datetime.datetime.now().isoformat()
     logger.info(f"{PROV_PREFIX}{record_date}{PROV_PREFIX}{prov_dict}")
 
@@ -341,32 +351,39 @@ def log_session(class_instance, start):
     # OSA specific
     # prov session is outside scripting and is run-wise
     # we may have different sessions/runs in the same log file
-    session_id = abs(hash(class_instance))
-    lines = read_prov(filename=LOG_FILENAME)
-    for line in lines:
-        if line.get("observation_run", 0) == class_instance.ObservationRun:
-            session_id = lines[0]["session_id"]
-            sessions.add(session_id)
-
-    if session_id not in sessions:
-        sessions.add(session_id)
-        system = get_system_provenance()
-        log_record = {
-            "session_id": session_id,
-            "name": session_name,
-            "startTime": start,
-            "system": system,
-            # OSA specific
-            "software_version": class_instance.SoftwareVersion,
-            "observation_date": class_instance.ObservationDate,
-            "observation_run": class_instance.ObservationRun,  # a session is run-wise
-            "config_file": class_instance.ProcessingConfigFile,
-            "config_file_hash": get_file_hash(
-                class_instance.ProcessingConfigFile, buffer="path"
-            ),
-            "config_file_hash_type": get_hash_method(),
-        }
-        log_prov_info(log_record)
+    # session_id = abs(hash(class_instance))
+    if class_instance.__name__ in REDUCTION_TASKS:
+        session_id = f"{class_instance.ObservationDate}{class_instance.ObservationRun}"
+    else:
+        session_id = f"{class_instance.PedestalRun}{class_instance. CalibrationRun}"
+    # OSA specific
+    # prov session is outside scripting and is run-wise
+    # we may have different sessions/runs in the same log file
+
+    system = get_system_provenance()
+    log_record = {
+        "session_id": session_id,
+        "name": session_name,
+        "startTime": start,
+        "system": system,
+        # OSA specific
+        "observation_date": class_instance.ObservationDate,
+        # OSA specific
+        "software_version": class_instance.SoftwareVersion,
+        "config_file": class_instance.ProcessingConfigFile,
+        "config_file_hash": get_file_hash(
+            class_instance.ProcessingConfigFile, buffer="path"
+        ),
+        "config_file_hash_type": get_hash_method(),
+    }
+    if class_instance.__name__ in REDUCTION_TASKS:
+        log_record[
+            "observation_run"
+        ] = class_instance.ObservationRun  # a session is run-wise
+    else:
+        log_record["pedestal_run"] = class_instance.PedestalRun
+        log_record["calibration_run"] = class_instance.CalibrationRun
+    log_prov_info(log_record)
     return session_id
 
 

diff --git a/osa/provenance/config/definition.yaml b/osa/provenance/config/definition.yaml
@@ -52,6 +52,74 @@
 #
 
 activities:
+    drs4_pedestal:
+        description:
+            "Create pedestal file"
+        parameters:
+        usage:
+            - role: "Subrun for pedestal"
+              description: "Raw observation file for pedestal"
+              entityName: RawObservationFile
+              value: RawObservationFilePedestal
+#              filepath: /fefs/aswg/data/real/R0/20210913/LST-1.1.Run06268.0000.fits.fz
+        generation:
+            - role: "Pedestal"
+              description: "Pedestal calibration file"
+              entityName: PedestalFile
+              value: PedestalFile
+#              filepath:  /fefs/aswg/data/real/calibration/20210913/v0.7.5/drs4_pedestal.Run06268.0000.fits
+            - role: "Check plot for pedestal"
+              description: "Pedestal check plot"
+              entityName: PedestalCheckPlot
+              value: PedestalCheckPlot
+#              filepath: /fefs/aswg/data/real/calibration/20210913/v0.7.5/log/drs4_pedestal.Run02068.0000.pdf
+
+    calibrate_charge:
+        description:
+            "Create charge calibration file"
+        parameters:
+        usage:
+            - role: "Subrun for calibration"
+              description: "Raw observation file for calibration"
+              entityName: RawObservationFile
+              value: RawObservationFileCalibration
+#              filepath: /fefs/aswg/data/real/R0/20210913/LST-1.1.Run06274.0000.fits.fz
+            - role: "Pedestal file"
+              description: "Pedestal file used"
+              entityName: PedestalFile
+              value: PedestalFile
+#              filepath: /fefs/aswg/data/real/calibration/20210913/v0.7.5/drs4_pedestal.Run06268.0000.fits
+#            - role: "Run summary"
+#              description: "Run summary configuration"
+#              entityName: RunSummaryFile
+#              value: RunSummaryFile
+#              filepath: /fefs/aswg/data/real/monitoring/RunSummary/RunSummary_20210913.ecsv
+#            - role: "Configuration file"
+#              description: "Configuration file for camera"
+#              entityName: AnalysisConfigFile
+#              value: CalibrationConfigurationFile
+#              filepath: /fefs/aswg/software/virtual_env/ctasoft/cta-lstchain/lstchain/data/onsite_camera_calibration_param.json
+#            - role: "Systematics correction file"
+#              description: "Systematics correction file"
+#              entityName: SystematicsCorrectionFile
+#              value: SystematicsCorrectionFile
+#              filepath: /path/to/ff_systematics_file.h5
+#            - role: "Time calibration file"
+#              description: "Time calibration file"
+#              entityName: TimeCalibrationFile
+#              value: TimeCalibrationFile
+#              filepath: /fefs/aswg/data/real/calibration/20210913/v0.7.5/time_calibration.Run06274.0000.hdf5
+        generation:
+            - role: "Coefficients calibration file"
+              description: "Coefficients calibration file"
+              entityName: CoefficientsCalibrationFile
+              value: CoefficientsCalibrationFile
+#              filepath: /fefs/aswg/data/real/calibration/20210913/v0.7.5/calibration.Run06274.0000.hdf5
+            - role: "Check plot for calibration"
+              description: "Calibration check plot"
+              entityName: CalibrationCheckPlot
+              value: CalibrationCheckPlot
+#              filepath: /fefs/aswg/data/real/calibration/20210913/v0.7.5/log/calibration.Run06274.0000.pedestal.Run06268.0000.pdf
     r0_to_dl1:
         description:
             "Create DL1 files for an observation run and subrun"
@@ -280,6 +348,10 @@ entities:
     PythonObject:
         description: "Python variable in memory"
         type: PythonObject
+    RawObservationFile:
+        description: "Raw observation compressed FITS file"
+        type: File
+        contentType: application/fits
     R0SubrunDataset:
         description: "R0 subrun file in FITS format on the disk"
         type: File
@@ -293,17 +365,29 @@ entities:
         type: File
         contentType: text/plain
     PedestalFile:
-        description: "Pedestal file in FITS format on the disk"
+        description: "Pedestal file in HDF5 format on the disk"
         type: File
-        contentType: application/fits
+        contentType: application/x-hdf
+    PedestalCheckPlot:
+        description: "Pedestal check plot PDF file"
+        type: File
+        contentType: application/pdf
     CoefficientsCalibrationFile:
         description: "Coefficients calibration file in HDF5 format on the disk"
         type: File
         contentType: application/x-hdf
+#    SystematicsCorrectionFile:
+#        description: "Systematics correction file in HDF5 format on the disk"
+#        type: File
+#        contentType: application/x-hdf
     TimeCalibrationFile:
         description: "Time calibration file in HDF5 format on the disk"
         type: File
         contentType: application/x-hdf
+    CalibrationCheckPlot:
+        description: "Calibration check plot PDF file"
+        type: File
+        contentType: application/pdf
     AnalysisConfigFile:
         description: "LSTChain analysis configuration file in JSON format on the disk"
         type: File

diff --git a/osa/provenance/config/logger.yaml b/osa/provenance/config/logger.yaml
@@ -7,7 +7,6 @@ formatters:
 handlers:
     provHandler:
         class: logging.handlers.WatchedFileHandler
-        level: INFO
         formatter: simple
         filename: prov.log
 loggers:

diff --git a/osa/provenance/io.py b/osa/provenance/io.py
@@ -186,7 +186,7 @@ def provlist2provdoc(provlist):
                     records[progen_id] = progen
                 ent.wasDerivedFrom(progen)
             for k, v in provdict.items():
-                if k != "session_tag":
+                if k not in["session_tag", "hash", "hash_type"]:
                     ent.add_attributes({k: str(v)})
         # agent
     return pdoc