Skip to content

Commit

Permalink
Be able to lookup base from derived cluster
Browse files Browse the repository at this point in the history
  • Loading branch information
erwinpan1 committed Sep 14, 2024
1 parent e73ef4a commit 9401fe6
Showing 1 changed file with 28 additions and 14 deletions.
42 changes: 28 additions & 14 deletions ParseXmlFolder.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,44 +2,58 @@
import sys
import xml.etree.ElementTree as ET

id2XmlMap = []

name2XmlMap = {}
id2XmlMap = {}
parentClusterMap = {}

def parse_xml_files_in_folder(folder_path):
print(f"DEBUG: Starting to parse XML files in folder: {folder_path}")

for filename in os.listdir(folder_path):
if filename.endswith('.xml'):
full_path = os.path.join(folder_path, filename)
print("==========================================================")
print(f"DEBUG: Processing file: {filename}")

try:
tree = ET.parse(full_path)
root = tree.getroot()
print(f"DEBUG: Successfully parsed {filename}")

# Now you can work with the 'root' element to extract data
# For example, to print all child elements:
for child in root:
print(child.tag, child.attrib)
# If it is a derived cluster
classification = root.find('classification')
baseCluster = classification.get('baseCluster')

if child.tag != 'clusterIds':
continue
# Find the clusterIds
clusterIds = root.find('clusterIds')
if clusterIds:
print(clusterIds)

clusterIdSet = child.findall('clusterId')
clusterIdSet = clusterIds.findall('clusterId')
print(clusterIdSet)

for clusterId in clusterIdSet:
clusterIdMap = {'id':clusterId.get('id'), 'name':clusterId.get('name'), 'file':full_path}

id2XmlMap.append(clusterIdMap)
if baseCluster is not None:
parentClusterMap[ int(clusterId.get('id'), 16) ] = {'name':clusterId.get('name'), 'file':full_path, 'baseCluster': baseCluster}
print(f'Found cluster {clusterId} with parent cluster {baseCluster}')
elif clusterId.get('id') is not None:
id2XmlMap[ int(clusterId.get('id'), 16) ] = {'name':clusterId.get('name'), 'file':full_path}
print(f'Found cluster with id {clusterId}')
else:
print(f'Found cluster without id {clusterId}')

print(f"DEBUG: Successfully parsed {filename}")
name2XmlMap[str(clusterId.get('name'))] = {'file':full_path}

except ET.ParseError as e:
print(f"ERROR: Error parsing {filename}: {e}")

print(clusterIdMap)
print(name2XmlMap)

for key, value in parentClusterMap.items():
print(f'Processing derived class: name: {value['name']}, parent: {value['baseCluster']}')
id2XmlMap[key] = {'name': value['name'], 'file': name2XmlMap[value['baseCluster']]['file']}

print(id2XmlMap)


if __name__ == "__main__":
Expand Down

0 comments on commit 9401fe6

Please sign in to comment.