Skip to content

Commit

Permalink
some redundancy with linkml-run-examples
Browse files Browse the repository at this point in the history
  • Loading branch information
turbomam committed Oct 29, 2024
1 parent 7500265 commit c424733
Showing 1 changed file with 105 additions and 0 deletions.
105 changes: 105 additions & 0 deletions src/nmdc_submission_schema/scripts/check_examples.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
# could check globally, targeted in slot usages, or in all induced slots from all classes
# check though validation of an instance? requires population of all other required slots too
from pathlib import Path

from jsonasobj2 import JsonObj
from linkml_runtime import SchemaView
from collections import defaultdict
import csv

#
# tsv_file = Path("../schema/nmdc_submission_schema.tsv")
#
# # how to load schema? hardcoded file path, or pathlib?

schema_file = Path("../nmdc_submission_schema/schema/nmdc_submission_schema.yaml")
schema_view = SchemaView(schema_file) # time it

# # print(schema_view.schema.name)
#
# all_slots = schema_view.all_slots()
#
# attribute_usage_count = defaultdict(int)
#
# # Assume `slot_dict` is your dictionary with slot names as keys and SlotDefinitions as values
# for slot_name, slot_def in sorted(all_slots.items()):
# # Use vars() to get the dictionary of attributes for the SlotDefinition object
# attributes = vars(slot_def)
#
# # Filter the attributes to only show the populated ones (excluding None, empty lists, etc.)
# populated_attributes = {k: v for k, v in sorted(attributes.items()) if v not in [None, '', [], {}, ()]}
#
# # Print or log the results as needed
# # print(f"Slot Name: {slot_name}")
# for attr_name, attr_value in populated_attributes.items():
# # print(f" {attr_name}: {attr_value}")
# attribute_usage_count[attr_name] += 1
#
# # Convert defaultdict to a regular dict if needed
# attribute_usage_count = dict(attribute_usage_count)
#
# # Print or log the attribute usage count
# for attr_name, count in attribute_usage_count.items():
# print(f"{attr_name}: {count}")
#
# # Prepare data for TSV file
# # Using an explicit type conversion to str for keys and values
# rows = [{'Attribute Name': str(attr_name), 'Usage Count': int(count)} for attr_name, count in
# attribute_usage_count.items()]
#
# # Define the TSV file name
# tsv_file_name = "attribute_usage_count.tsv"
#
# # Write the results to a TSV file using DictWriter
# with open(tsv_file_name, mode='w', newline='') as tsv_file:
# writer = csv.DictWriter(tsv_file, fieldnames=['Attribute Name', 'Usage Count'], delimiter='\t')
# writer.writeheader() # Write the header row
# writer.writerows(rows) # Write the data rows
#
# print(f"Attribute usage count has been saved to {tsv_file_name}")

all_classes = schema_view.all_classes()
all_class_names = all_classes.keys()

attribute_usage_count = defaultdict(int)

for class_name in sorted(all_class_names):
print(f"Class Name: {class_name}")
current_class = schema_view.induced_class(class_name)
current_slots = current_class.attributes
for slot_name, slot_def in sorted(current_slots.items()):
# Use vars() to get the dictionary of attributes for the SlotDefinition object
attributes = vars(slot_def)

# Filter the attributes to only show the populated ones (excluding None, empty lists, etc.)
populated_attributes = {k: v for k, v in sorted(attributes.items()) if
v not in [None, '', [], {}, (), JsonObj()]}

# Print or log the results as needed
# print(f"Slot Name: {slot_name}")
for attr_name, attr_value in sorted(populated_attributes.items()):
print(f"{class_name}:{slot_name} ... {attr_name} = {attr_value}")
attribute_usage_count[attr_name] += 1

# Convert defaultdict to a regular dict if needed
attribute_usage_count = dict(attribute_usage_count)

# Print or log the attribute usage count
for attr_name, count in attribute_usage_count.items():
print(f"{attr_name}: {count}")

# Prepare data for TSV file
# Using an explicit type conversion to str for keys and values
rows = [{'Attribute Name': str(attr_name), 'Usage Count': int(count)} for attr_name, count in
attribute_usage_count.items()]

# Define the TSV file name
tsv_file_name = "attribute_usage_count.tsv"

# Write the results to a TSV file using DictWriter
with open(tsv_file_name, mode='w', newline='') as tsv_file:
writer = csv.DictWriter(tsv_file, fieldnames=['Attribute Name', 'Usage Count'], delimiter='\t')
writer.writeheader() # Write the header row
writer.writerows(rows) # Write the data rows

print(f"Attribute usage count has been saved to {tsv_file_name}")

0 comments on commit c424733

Please sign in to comment.