From a8ffc5b5807f96766d55747d30c905c1fe94adad Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cezary=20Skrzy=C5=84ski?=
 <cezary.skrzynski@ng-analytics.com>
Date: Fri, 19 Jul 2024 16:39:55 +0200
Subject: [PATCH] #2300: scripts: fix dataset detection

---
 scripts/JSON_data_files_validator.py | 25 ++++++++++++++++++-------
 1 file changed, 18 insertions(+), 7 deletions(-)

diff --git a/scripts/JSON_data_files_validator.py b/scripts/JSON_data_files_validator.py
index 7b3a9ce58f..0d3e9876f8 100644
--- a/scripts/JSON_data_files_validator.py
+++ b/scripts/JSON_data_files_validator.py
@@ -1,4 +1,5 @@
 import os
+import re
 import sys
 
 try:
@@ -382,23 +383,30 @@ def __validate_file(self, file_path):
                             "Passing by default when schema type not found.")
 
         if self.__validate_comm_links and schema_type == "LBDatafile":
+            # FIXME: extract into a method
             basename = os.path.basename(file_path)
-            digits = ''.join(filter(lambda c: c.isdigit(), basename))
+            numbers = re.findall(r'\d+', basename)
 
-            all_jsons = []
-            if not digits.isnumeric():
+            if not numbers:
                 # validate single file
+                files = [file_path]
                 all_jsons = [json_data]
-            elif int(digits) == 0:
+            elif numbers[-1] == '0':
                 # validate complete dataset
                 dirname = os.path.dirname(file_path)
-                files = self.__get_files_for_validation(dirname, None, None)
+                index = basename.rfind('0')
+                base = basename[0:index]
+                #FIXME: files = get_complete_dataset...
+                files = [os.path.join(dirname, f) for f in os.listdir(dirname)
+                         if f.startswith(base)]
+                print(files) #REMOVE_ME / logging
                 all_jsons = [get_json(file) for file in files]
             else:
-                # only datasets starting with 0
+                # this dataset is already validated
                 return
 
             if not self.validate_comm_links(all_jsons):
+                # FIXME: could be undefined
                 logging.error(f" Invalid dataset: {files}")
 
 
@@ -409,10 +417,13 @@ def validate_comm_links(all_jsons):
             task_ids = set()
 
             for data in all_jsons:
+                #FIXME: KeyErorr: 'communications'
+                #if data... get("communications") is not None:
                 comms = data["phases"][n]["communications"]
-                tasks = data["phases"][n]["tasks"]
                 comm_ids.update({int(comm["from"]["id"]) for comm in comms})
                 comm_ids.update({int(comm["to"]["id"]) for comm in comms})
+
+                tasks = data["phases"][n]["tasks"]
                 task_ids.update({int(task["entity"]["id"]) for task in tasks})
 
             if not comm_ids.issubset(task_ids):