Skip to content

Commit

Permalink
Change implementation strategy to parameterize focus ontology file
Browse files Browse the repository at this point in the history
This implementation pattern should enable every directory with a Turtle
file to be able to call the script with the same command pattern,
whether in UCO or a downstream ontology.  Hard-coded logic moves out of
the script, and into maintaining a tab-separated-values file and Make
calls to spcify what ontology file to inspect.  This way, any individual
ontology file can be loaded into Protege if desired.

This patch modifies the demonstrated Makefile call pattern.  After
demonstration by regenerating the catalog XML file for the root
`uco.ttl` graph, future patches will generate other catalog files.

This patch also removes some erroneously copy-pasted script text from
`/ontology/uco/master/Makefile`, and retires the first draft by
@DrSnowbird.

References:
* #449

Signed-off-by: Alex Nelson <[email protected]>
  • Loading branch information
ajnelson-nist committed Mar 13, 2023
1 parent ed283dd commit a86d280
Show file tree
Hide file tree
Showing 3 changed files with 198 additions and 172 deletions.
20 changes: 4 additions & 16 deletions ontology/uco/master/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -17,35 +17,23 @@ top_srcdir := $(shell cd ../../.. ; pwd)

all: \
catalog-v001.xml
$(MAKE) \
--directory $< \
--file $(top_srcdir)/src/review.mk

# TODO - Move virtual environment creation up to root directory and re-order top_srcdir's descent.
catalog-v001.xml: \
$(top_srcdir)/src/create-catalog-v001.xml.py \
$(top_srcdir)/tests/.venv.done.log
rm -f _$@
source $(top_srcdir)/tests/venv/bin/activate \
&& python3 $(top_srcdir)/src/create-catalog-v001.xml.py \
__$@ \
_$@ \
domain_directories.tsv \
"$(top_srcdir)" \
uco.ttl
xmllint \
--format \
__$@ \
> _$@
rm __$@
mv _$@ $@

check:
$(MAKE) \
--file $(top_srcdir)/src/review.mk \
check
check: \
catalog-v001.xml

clean:
@rm -f \
catalog-v001.xml
@$(MAKE) \
--file $(top_srcdir)/src/review.mk \
clean
242 changes: 194 additions & 48 deletions src/create-catalog-v001.xml.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,72 +14,218 @@
#
# We would appreciate acknowledgement if the software is used.

import xml.etree.ElementTree as ETree
__version__ = "0.0.3"

import argparse
import csv
import logging
import os
import xml.etree.ElementTree as ETree
from pathlib import Path
from typing import Dict, List, Set, Tuple
from xml.dom import minidom

from rdflib import OWL, RDF, Graph, URIRef

NS_OWL = OWL
NS_RDF = RDF


# XML version string to being file
# XML prolog, as generated by Protege.
XML_VERSION_INFO = '<?xml version="1.0" encoding="UTF-8" standalone="no"?>'


def get_list_of_ttl_files(ont_dir):
"""
Returns the list of turtle files that needs to be made into an import statement
:param ont_dir: The directory that contains the uco ontology files
:return: A dictionary of turtle files per directory to convert into import statements
"""
ttl_files = {}
for root, dir, file in os.walk(ont_dir):
for name in file:
if name.endswith(".ttl") and not name.startswith("."):
dirs = root.split("/")
ttl_files[dirs[-1]] = name
def main() -> None:
parser = argparse.ArgumentParser()
parser.add_argument("--debug", action="store_true")
# "x" mode - exclusive creation.
# https://docs.python.org/3/library/functions.html#open
parser.add_argument("out_xml", type=argparse.FileType("x"))
parser.add_argument(
"roots_tsv",
help="A two-column file, with column 1 being a string prefix in-common to ontology prefix IRIs, and column 2 being a file system directory relative to top_srcdir that is the root directory housing that ontology's files.",
type=argparse.FileType("r"),
)
parser.add_argument("top_srcdir")
parser.add_argument("in_ttl")
args = parser.parse_args()

logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO)

top_srcdir_abspath = Path(args.top_srcdir).resolve()
if not top_srcdir_abspath.exists():
raise FileNotFoundError(args.top_srcdir)
if not top_srcdir_abspath.is_dir():
raise NotADirectoryError(args.top_srcdir)

focus_graph_abspath = Path(args.in_ttl).resolve()
focus_graph = Graph()
focus_graph.parse(str(focus_graph_abspath))
focus_graph_srcdir_abspath = focus_graph_abspath.parent
focus_graph_relpath = focus_graph_abspath.relative_to(top_srcdir_abspath)
logging.debug(focus_graph_relpath)

top_srcdir_relpath = Path(os.path.relpath(top_srcdir_abspath, focus_graph_abspath))
logging.debug(top_srcdir_relpath)

return ttl_files
logging.debug(os.path.commonpath([top_srcdir_abspath, focus_graph_abspath]))

# Determine sole focus ontology IRI. Fail if there is not exactly 1 found.
n_focus_ontologies: Set[URIRef] = set()
for triple in focus_graph.triples((None, NS_RDF.type, NS_OWL.Ontology)):
if isinstance(triple[0], URIRef):
n_focus_ontologies.add(triple[0])
if len(n_focus_ontologies) < 1:
raise ValueError("Found no focus ontology IRI.")
if len(n_focus_ontologies) > 1:
# TODO - Add --focus-iri flag?
raise NotImplementedError("Found multiple ontology IRIs to use as focus.")
n_focus_ontology: URIRef = sorted(n_focus_ontologies)[0]

def create_catalog_xml(ttl_files):
"""
Writes the catalog-v001.xml file to use for local importing
:param ttl_files: The dictionary containing turtle files to convert into import statements
"""
# Read TSV to get domain prefixes' housing directories.
ontology_string_prefix_to_domain_directory: Dict[str, Path] = dict()
reader = csv.reader(args.roots_tsv, delimiter="\t")
for row in reader:
ontology_string_prefix = row[0]
domain_directory_str = row[1].replace("${top_srcdir}", str(top_srcdir_abspath))
domain_directory = Path(domain_directory_str)
if not domain_directory.exists():
raise FileNotFoundError(domain_directory_str)
if not domain_directory.is_dir():
raise NotADirectoryError(domain_directory_str)
ontology_string_prefix_to_domain_directory[
ontology_string_prefix
] = domain_directory
logging.debug(ontology_string_prefix_to_domain_directory)

# Walk domain directories to associate ontology reference IRIs with backing files, and to build imports graph.
# Definition, possibly specialized to just this script:
# An ontology reference IRI is either an ontology IRI or a versionIRI of an ontology.
imports_graph = Graph()
n_ontology_reference_to_backing_file: Dict[URIRef, Path] = dict()
for domain_directory in ontology_string_prefix_to_domain_directory.values():
for dirpath, dirnames, filenames in os.walk(str(domain_directory)):
for filename in filenames:
# Skip build files (syntax normalization checks).
if filename.startswith("."):
continue
# Restrict to Turtle files.
if not filename.endswith(".ttl"):
continue
dirpath_path = Path(dirpath)
graph_filepath = dirpath_path / filename
tmp_graph = Graph()
tmp_graph.parse(str(graph_filepath))
for triple in tmp_graph.triples((None, NS_RDF.type, NS_OWL.Ontology)):
assert isinstance(triple[0], URIRef)
n_ontology_reference_to_backing_file[triple[0]] = graph_filepath
imports_graph.add(triple)
for triple in tmp_graph.triples((None, NS_OWL.imports, None)):
imports_graph.add(triple)
for triple in tmp_graph.triples((None, NS_OWL.versionIRI, None)):
assert isinstance(triple[2], URIRef)
n_ontology_reference_to_backing_file[triple[2]] = graph_filepath
imports_graph.add(triple)
logging.debug(len(imports_graph))
logging.debug(n_ontology_reference_to_backing_file)

unversioned_iri_imports_graph = Graph()
query = """\
SELECT ?nImportingOntology ?nImportedOntology
WHERE {
?nImportingOntology
owl:imports ?nVersionIRI ;
.
?nImportedOntology
owl:versionIRI ?nVersionIRI ;
.
}
"""
for versioned_iri_result in imports_graph.query(query):
unversioned_iri_imports_graph.add(
(versioned_iri_result[0], NS_OWL.imports, versioned_iri_result[1])
)
query = """\
SELECT ?nImportingOntology ?nImportedOntology
WHERE {
?nImportingOntology
owl:imports ?nImportedOntology ;
.
?nImportedOntology
a owl:Ontology ;
.
}
"""
for unversioned_iri_result in imports_graph.query(query):
unversioned_iri_imports_graph.add(
(unversioned_iri_result[0], NS_OWL.imports, unversioned_iri_result[1])
)
logging.debug(len(unversioned_iri_imports_graph))

n_imported_iri_to_relative_backing_path: Dict[URIRef, Path] = dict()

def _map_n_ontology_reference(n_ontology_reference: URIRef) -> None:
# Handle base case - node visited.
if n_ontology_reference in n_imported_iri_to_relative_backing_path:
return
n_imported_iris: Set[URIRef] = set()
for triple in imports_graph.triples(
(n_ontology_reference, NS_OWL.imports, None)
):
assert isinstance(triple[2], URIRef)
n_imported_iri = triple[2]
n_imported_iris.add(n_imported_iri)
imported_iri_backing_file_abspath = n_ontology_reference_to_backing_file[
n_imported_iri
]
imported_iri_backing_file_relpath = Path(
os.path.relpath(
imported_iri_backing_file_abspath, focus_graph_srcdir_abspath
)
)
n_imported_iri_to_relative_backing_path[
n_imported_iri
] = imported_iri_backing_file_relpath
# Recurse.
for n_imported_iri in n_imported_iris:
_map_n_ontology_reference(n_imported_iri)

_map_n_ontology_reference(n_focus_ontology)
logging.debug(n_imported_iri_to_relative_backing_path)

# Create catalog XML tree.
xml_root = ETree.Element("catalog")
# Creates the proper attributes for the root node

# Mimic attributes for the root node from exemplar generated by Protege.
xml_root.attrib = {
"prefer": "public",
"xmlns": "urn:oasis:names:tc:entity:xmlns:xml:catalog",
}

# Sorts turtle files to ensure imports are alphabetical
sorted_ttl_files = sorted(ttl_files.items())
# Creates each import statement as a child node
for key, val in sorted_ttl_files:
if key != "master": # Skips master (uco.ttl) import
uri_string = os.path.join("..", key, val)
name_string = f"https://ontology.unifiedcyberontology.org/uco/{key}"
uri = ETree.SubElement(xml_root, "uri")
uri.attrib = {
"id": "User Entered Import Resolution",
"uri": uri_string,
"name": name_string,
}

# Writes the xml tree to the specified output file
output_file = os.path.abspath(
os.path.join("ontology", "uco", "master", "catalog-v002.xml")
# Sort catalog entries by relative file path, again mimicing Protege behavior.
catalog_entries: List[Tuple[str, str]] = sorted(
[
(
str(n_imported_iri_to_relative_backing_path[n_ontology_reference]),
str(n_ontology_reference),
)
for n_ontology_reference in n_imported_iri_to_relative_backing_path.keys()
]
)
for catalog_entry in catalog_entries:
e_child = ETree.SubElement(xml_root, "uri")
e_child.attrib = {
"id": "User Entered Import Resolution",
"uri": catalog_entry[0],
"name": catalog_entry[1],
}
xml_tree_string = minidom.parseString(
ETree.tostring(xml_root, encoding="utf-8", method="xml").decode("utf-8")
).toprettyxml(indent=" ")
with open(output_file, "w") as output:
output.write(f"{XML_VERSION_INFO}\n")
# ETree.indent(xml_root, level=0) -- Only available in python 3.9 or later
# Writes the xml tree without the default 'version' statement
output.write(f"{xml_tree_string[23:]}\n")
).toprettyxml(indent=" ")
args.out_xml.write(f"{XML_VERSION_INFO}\n")
args.out_xml.write(f"{xml_tree_string[23:]}")

return


if __name__ == "__main__":
ontology_location = os.path.abspath(os.path.join("ontology", "uco"))
ttl_file_list = get_list_of_ttl_files(ontology_location)
create_catalog_xml(ttl_file_list)
main()
Loading

0 comments on commit a86d280

Please sign in to comment.