diff --git a/docs/introduction.rst b/docs/introduction.rst
index 06b93f4..c8fe220 100644
--- a/docs/introduction.rst
+++ b/docs/introduction.rst
@@ -27,10 +27,11 @@ Importing from alternative modeling frameworks
See :ref:`importers`
* OWL (but this only works for schema-style OWL)
+* SHACL (in progress)
* JSON-Schema
* SQL DDL
-In future other frameworks will be supported
+In future other frameworks will be supported.
Annotating schemas
------------------
diff --git a/docs/packages/importers.rst b/docs/packages/importers.rst
index 0aa4546..bdeb1fa 100644
--- a/docs/packages/importers.rst
+++ b/docs/packages/importers.rst
@@ -42,6 +42,16 @@ Use robot to convert ahead of time:
robot convert -i schemaorg.ttl -o schemaorg.ofn
schemauto import-owl schemaorg.ofn
+Importing from SHACL
+--------------------
+
+You can import from a SHACL shapes file.
+
+.. code-block::
+
+ schemauto import-shacl tests/resources/test_shacl_simple.ttl
+
+
Importing from SQL
------------------
diff --git a/poetry.lock b/poetry.lock
index dd714bc..3666b03 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand.
+# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand.
[[package]]
name = "airium"
@@ -3292,9 +3292,9 @@ files = [
[package.dependencies]
numpy = [
- {version = ">=1.26.0", markers = "python_version >= \"3.12\""},
{version = ">=1.22.4", markers = "python_version < \"3.11\""},
{version = ">=1.23.2", markers = "python_version == \"3.11\""},
+ {version = ">=1.26.0", markers = "python_version >= \"3.12\""},
]
python-dateutil = ">=2.8.2"
pytz = ">=2020.1"
@@ -3792,8 +3792,8 @@ files = [
annotated-types = ">=0.4.0"
pydantic-core = "2.20.1"
typing-extensions = [
- {version = ">=4.12.2", markers = "python_version >= \"3.13\""},
{version = ">=4.6.1", markers = "python_version < \"3.13\""},
+ {version = ">=4.12.2", markers = "python_version >= \"3.13\""},
]
[package.extras]
@@ -4116,7 +4116,6 @@ description = "A pure Python implementation of the trie data structure."
optional = false
python-versions = "*"
files = [
- {file = "PyTrie-0.4.0-py3-none-any.whl", hash = "sha256:f687c224ee8c66cda8e8628a903011b692635ffbb08d4b39c5f92b18eb78c950"},
{file = "PyTrie-0.4.0.tar.gz", hash = "sha256:8f4488f402d3465993fb6b6efa09866849ed8cda7903b50647b7d0342b805379"},
]
@@ -5095,7 +5094,7 @@ sphinx = ">=4.0"
name = "sphinx-pdj-theme"
version = "0.4.0"
description = "A cool theme for sphinx documentation"
-optional = false
+optional = true
python-versions = "*"
files = [
{file = "sphinx-pdj-theme-0.4.0.tar.gz", hash = "sha256:4b86bfd8b8e20344db56aba13473f634286149fa0203d18e0437157f48c7e0fa"},
@@ -5167,7 +5166,7 @@ test = ["flake8", "mypy", "pytest"]
name = "sphinxcontrib-mermaid"
version = "0.9.2"
description = "Mermaid diagrams in yours Sphinx powered docs"
-optional = false
+optional = true
python-versions = ">=3.7"
files = [
{file = "sphinxcontrib-mermaid-0.9.2.tar.gz", hash = "sha256:252ef13dd23164b28f16d8b0205cf184b9d8e2b714a302274d9f59eb708e77af"},
@@ -5959,10 +5958,9 @@ doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linke
test = ["big-O", "importlib-resources", "jaraco.functools", "jaraco.itertools", "jaraco.test", "more-itertools", "pytest (>=6,!=8.1.*)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-ignore-flaky", "pytest-mypy", "pytest-ruff (>=0.2.1)"]
[extras]
-docs = []
-mariadb = []
+docs = ["Sphinx", "sphinx-pdj-theme", "sphinxcontrib-mermaid"]
[metadata]
lock-version = "2.0"
python-versions = "^3.9"
-content-hash = "036cba73b6fd660157c70cb76be27a501017e8904b35c8d2ccb00d412bbba870"
+content-hash = "9c29a704add4aaf15c228f9d6a81164390f060582bee85a89d266e2232c4b0ed"
diff --git a/pyproject.toml b/pyproject.toml
index 684e019..1f31453 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -54,16 +54,19 @@ linkml-runtime = "^1.7.2"
duckdb = "^0.10.1"
numpy = "<2.0"
+Sphinx = { version = ">=4.4.0", optional = true }
+sphinx-pdj-theme = { version = ">=0.2.1", optional = true }
+sphinx-click = ">=3.1.0"
+sphinxcontrib-mermaid = { version = ">=0.9.2", optional = true }
+
[tool.poetry.dev-dependencies]
pytest = ">=7.1.1"
-Sphinx = ">=4.4.0"
-sphinx-pdj-theme = ">=0.2.1"
-sphinx-click = ">=3.1.0"
-sphinxcontrib-mermaid = ">=0.9.2"
myst-parser = "*"
jupyter = ">=1.0.0"
lxml = ">=4.9.1"
+#mariadb = { version = "^1.3", optional = true }
+
[tool.poetry.group.llm.dependencies]
llm = ">=0.12"
@@ -82,7 +85,7 @@ extract-schema = "schema_automator.utils.schema_extractor:cli"
[tool.poetry.extras]
docs = ["Sphinx", "sphinx-pdj-theme", "sphinxcontrib-mermaid"]
-mariadb = ["mariadb"]
+#mariadb = ["mariadb"]
[tool.codespell]
# Ref: https://github.com/codespell-project/codespell#using-a-config-file
diff --git a/schema_automator/cli.py b/schema_automator/cli.py
index bd76320..8e29b40 100644
--- a/schema_automator/cli.py
+++ b/schema_automator/cli.py
@@ -497,6 +497,34 @@ def import_rdfs(rdfsfile, output, metamodel_mappings, **args):
schema = sie.convert(rdfsfile, **args)
write_schema(schema, output)
+@main.command()
+@click.argument('shaclfile')
+@output_option
+@schema_name_option
+@click.option('--input-type', '-I',
+ default='turtle',
+ help="Input format, eg. turtle")
+@click.option('--identifier', '-I', help="Slot to use as identifier")
+@click.option('--model-uri', help="Model URI prefix")
+@click.option('--metamodel-mappings',
+ help="Path to metamodel mappings YAML dictionary")
+@click.option('--output', '-o', help="Path to saved yaml schema")
+def import_shacl(shaclfile, output, metamodel_mappings, **args):
+ """
+ Import an SHACL profile to LinkML
+
+ Example:
+
+ schemauto import-shacl mymodel.shacl.ttl -o mymodel.yaml
+ """
+ mappings_obj = None
+ if metamodel_mappings:
+ with open(metamodel_mappings) as f:
+ mappings_obj = yaml.safe_load(f)
+ sie = ShaclImportEngine(initial_metamodel_mappings=mappings_obj)
+ schema = sie.convert(shaclfile, **args)
+ write_schema(schema, output)
+
@main.command()
@click.argument('rdffile')
@output_option
diff --git a/schema_automator/importers/__init__.py b/schema_automator/importers/__init__.py
index 2011d25..fa187ac 100644
--- a/schema_automator/importers/__init__.py
+++ b/schema_automator/importers/__init__.py
@@ -3,3 +3,4 @@
from schema_automator.importers.dosdp_import_engine import DOSDPImportEngine
from schema_automator.importers.frictionless_import_engine import FrictionlessImportEngine
from schema_automator.importers.cadsr_import_engine import CADSRImportEngine
+from schema_automator.importers.shacl_import_engine import ShaclImportEngine
diff --git a/schema_automator/importers/shacl_import_engine.py b/schema_automator/importers/shacl_import_engine.py
new file mode 100644
index 0000000..352c9fa
--- /dev/null
+++ b/schema_automator/importers/shacl_import_engine.py
@@ -0,0 +1,241 @@
+from collections import defaultdict
+import logging
+
+from dataclasses import dataclass
+from typing import Dict, List, Any
+
+from rdflib import Graph, RDF, OWL, URIRef, RDFS, SKOS, SDO, Namespace
+
+from funowl import Literal
+
+from linkml.utils.schema_builder import SchemaBuilder
+from linkml_runtime import SchemaView
+from linkml_runtime.utils.formatutils import underscore
+from linkml_runtime.utils.introspection import package_schemaview
+from linkml_runtime.linkml_model import (
+ SchemaDefinition,
+ SlotDefinition,
+ ClassDefinition,
+)
+from schema_automator.importers.import_engine import ImportEngine
+
+logger = logging.getLogger(__name__)
+
+HTTP_SDO = Namespace("http://schema.org/")
+
+DEFAULT_METAMODEL_MAPPINGS = {
+ "is_a": [RDFS.subClassOf, SKOS.broader],
+ "domain_of": [HTTP_SDO.domainIncludes, SDO.domainIncludes],
+ "rangeIncludes": [HTTP_SDO.rangeIncludes, SDO.rangeIncludes],
+ "exact_mappings": [OWL.sameAs, HTTP_SDO.sameAs],
+ ClassDefinition.__name__: [RDFS.Class, OWL.Class, SKOS.Concept],
+ SlotDefinition.__name__: [
+ RDF.Property,
+ OWL.ObjectProperty,
+ OWL.DatatypeProperty,
+ OWL.AnnotationProperty,
+ ],
+}
+
+
+@dataclass
+class ShaclImportEngine(ImportEngine):
+ """
+ An ImportEngine that takes SHACL and converts it to a LinkML schema
+ """
+
+ mappings: dict = None
+ initial_metamodel_mappings: Dict[str, List[URIRef]] = None
+ metamodel_mappings: Dict[str, List[URIRef]] = None
+ reverse_metamodel_mappings: Dict[URIRef, List[str]] = None
+ include_unmapped_annotations = False
+ metamodel = None
+ metamodel_schemaview: SchemaView = None
+ classdef_slots: List[str] = None
+
+ def __post_init__(self):
+ sv = package_schemaview("linkml_runtime.linkml_model.meta")
+ self.metamodel_schemaview = sv
+ self.metamodel = sv
+ self.metamodel_mappings = defaultdict(list)
+ self.reverse_metamodel_mappings = defaultdict(list)
+ for k, vs in DEFAULT_METAMODEL_MAPPINGS.items():
+ self.metamodel_mappings[k].extend(vs)
+ for v in vs:
+ self.reverse_metamodel_mappings[v].append(k)
+ if self.initial_metamodel_mappings:
+ for k, vs in self.initial_metamodel_mappings.items():
+ if not isinstance(vs, list):
+ vs = [vs]
+ self.metamodel_mappings[k].extend(vs)
+ for v in vs:
+ self.reverse_metamodel_mappings[URIRef(v)].append(k)
+ logging.info(f"Adding mapping {k} -> {v}")
+ for e in sv.all_elements().values():
+ mappings = []
+ for ms in sv.get_mappings(e.name, expand=True).values():
+ for m in ms:
+ uri = URIRef(m)
+ mappings.append(uri)
+ self.reverse_metamodel_mappings[uri].append(e.name)
+ self.metamodel_mappings[e.name] = mappings
+ self.defclass_slots = [s.name for s in sv.class_induced_slots(ClassDefinition.class_name)]
+
+ def convert(
+ self,
+ file: str,
+ name: str = None,
+ format="turtle",
+ default_prefix: str = None,
+ model_uri: str = None,
+ identifier: str = None,
+ **kwargs,
+ ) -> SchemaDefinition:
+ """
+ Converts an shacl shapes file
+
+ :param file:
+ :param name:
+ :param model_uri:
+ :param identifier:
+ :param kwargs:
+ :return:
+ """
+ self.mappings = {}
+ g = Graph()
+ g.parse(file, format=format)
+ if name is not None and default_prefix is None:
+ default_prefix = name
+ if name is None:
+ name = default_prefix
+ if name is None:
+ name = "example"
+ sb = SchemaBuilder(name=name)
+ sb.add_defaults()
+ schema = sb.schema
+ for k, v in g.namespaces():
+ if k == "schema" and v != "http://schema.org/":
+ continue
+ sb.add_prefix(k, v, replace_if_present=True)
+ if default_prefix is not None:
+ schema.default_prefix = default_prefix
+ if default_prefix not in schema.prefixes:
+ sb.add_prefix(default_prefix, model_uri, replace_if_present=True)
+ schema.id = schema.prefixes[default_prefix].prefix_reference
+
+ cls_slots = defaultdict(list)
+ props = []
+ for rdfs_property_metaclass in self._rdfs_metamodel_iri(
+ SlotDefinition.__name__
+ ):
+ for p in g.subjects(RDF.type, rdfs_property_metaclass):
+ props.append(p)
+ # implicit properties
+ for metap in (
+ self.reverse_metamodel_mappings["domain_of"]
+ + self.reverse_metamodel_mappings["rangeIncludes"]
+ ):
+ for p, _, _o in g.triples((None, metap, None)):
+ props.append(p)
+ for p in set(props):
+ sn = self.iri_to_name(p)
+ init_dict = self._dict_for_subject(g, p)
+ if "domain_of" in init_dict:
+ for x in init_dict["domain_of"]:
+ cls_slots[x].append(sn)
+ del init_dict["domain_of"]
+ if "rangeIncludes" in init_dict:
+ init_dict["any_of"] = [{"range": x} for x in init_dict["rangeIncludes"]]
+ del init_dict["rangeIncludes"]
+ slot = SlotDefinition(sn, **init_dict)
+ slot.slot_uri = str(p.n3(g.namespace_manager))
+ sb.add_slot(slot)
+
+ rdfs_classes = []
+ for rdfs_class_metaclass in self._rdfs_metamodel_iri(ClassDefinition.__name__):
+ for s in g.subjects(RDF.type, rdfs_class_metaclass):
+ rdfs_classes.append(s)
+ # implicit classes
+ for metap in [RDFS.subClassOf]:
+ for s, _, o in g.triples((None, metap, None)):
+ rdfs_classes.append(s)
+ rdfs_classes.append(o)
+ for s in set(rdfs_classes):
+ cn = self.iri_to_name(s)
+ init_dict = self._dict_for_subject(g, s)
+ c = ClassDefinition(cn, **init_dict)
+ c.slots = cls_slots.get(cn, [])
+ c.class_uri = str(s.n3(g.namespace_manager))
+ sb.add_class(c)
+ if identifier is not None:
+ id_slot = SlotDefinition(identifier, identifier=True, range="uriorcurie")
+ schema.slots[identifier] = id_slot
+ for c in schema.classes.values():
+ if not c.is_a and not c.mixins:
+ if identifier not in c.slots:
+ c.slots.append(identifier)
+ return schema
+
+ def _dict_for_subject(self, g: Graph, s: URIRef) -> Dict[str, Any]:
+ """
+ Looks up triples for a subject and converts to dict using linkml keys.
+
+ :param g:
+ :param p:
+ :return:
+ """
+ init_dict = {}
+ for pp, obj in g.predicate_objects(s):
+ if pp == RDF.type:
+ continue
+ metaslot_name = self._element_from_iri(pp)
+ logging.debug(f"Mapping {pp} -> {metaslot_name}")
+ if metaslot_name not in self.defclass_slots:
+ continue
+ if metaslot_name is None:
+ logging.warning(f"Not mapping {pp}")
+ continue
+ if metaslot_name == "name":
+ metaslot_name = "title"
+ metaslot = self.metamodel.get_slot(metaslot_name)
+ v = self._object_to_value(obj, metaslot=metaslot)
+ metaslot_name_safe = underscore(metaslot_name)
+ if not metaslot or metaslot.multivalued:
+ if metaslot_name_safe not in init_dict:
+ init_dict[metaslot_name_safe] = []
+ init_dict[metaslot_name_safe].append(v)
+ else:
+ init_dict[metaslot_name_safe] = v
+ return init_dict
+
+ def _rdfs_metamodel_iri(self, name: str) -> List[URIRef]:
+ return self.metamodel_mappings.get(name, [])
+
+ def _element_from_iri(self, iri: URIRef) -> str:
+ r = self.reverse_metamodel_mappings.get(iri, [])
+ if len(r) > 0:
+ if len(r) > 1:
+ logger.debug(f"Multiple mappings for {iri}: {r}")
+ return r[0]
+
+ def _object_to_value(self, obj: Any, metaslot: SlotDefinition = None) -> Any:
+ if isinstance(obj, URIRef):
+ if metaslot.range == "uriorcurie" or metaslot.range == "uri":
+ return str(obj)
+ return self.iri_to_name(obj)
+ if isinstance(obj, Literal):
+ return obj.value
+ return obj
+
+ def iri_to_name(self, v: URIRef) -> str:
+ n = self._as_name(v)
+ if n != v:
+ self.mappings[n] = v
+ return n
+
+ def _as_name(self, v: URIRef):
+ v = str(v)
+ for sep in ["#", "/", ":"]:
+ if sep in v:
+ return v.split(sep)[-1]
+ return v
diff --git a/tests/__init__.py b/tests/__init__.py
index b092ca8..ad4619d 100644
--- a/tests/__init__.py
+++ b/tests/__init__.py
@@ -1,9 +1,6 @@
import os
-import pprint
ROOT = os.path.abspath(os.path.dirname(__file__))
INPUT_DIR = os.path.join(ROOT, 'resources')
OUTPUT_DIR = os.path.join(ROOT, 'outputs')
MODEL_DIR = os.path.join(ROOT, 'test_models')
-
-
diff --git a/tests/resources/shacl_simple.ttl b/tests/resources/shacl_simple.ttl
new file mode 100644
index 0000000..9c3b08b
--- /dev/null
+++ b/tests/resources/shacl_simple.ttl
@@ -0,0 +1,34 @@
+# example from http://book.validatingrdf.com/bookHtml011.html#ch050SHACLExample
+
+@prefix schema: .
+@prefix sh: .
+@prefix xsd: .
+@prefix ex: .
+
+ex:UserShape a sh:NodeShape;
+ sh:targetClass ex:User ;
+ sh:property [ # Blank node 1
+ sh:path schema:name ;
+ sh:minCount 1;
+ sh:maxCount 1;
+ sh:datatype xsd:string ;
+ ] ;
+ sh:property [ # Blank node 2
+ sh:path schema:gender ;
+ sh:minCount 1;
+ sh:maxCount 1;
+ sh:or (
+ [ sh:in (schema:Male schema:Female) ]
+ [ sh:datatype xsd:string]
+ )
+ ] ;
+ sh:property [ # Blank node 3
+ sh:path schema:birthDate ;
+ sh:maxCount 1;
+ sh:datatype xsd:date ;
+ ] ;
+ sh:property [ # Blank node 4
+ sh:path schema:knows ;
+ sh:nodeKind sh:IRI ;
+ sh:class ex:User ;
+ ] .
diff --git a/tests/resources/test_shacl_simple.ttl b/tests/resources/test_shacl_simple.ttl
new file mode 100644
index 0000000..9b869e0
--- /dev/null
+++ b/tests/resources/test_shacl_simple.ttl
@@ -0,0 +1 @@
+# tbw
diff --git a/tests/test_importers/test_shacl_importer.py b/tests/test_importers/test_shacl_importer.py
new file mode 100644
index 0000000..556c58f
--- /dev/null
+++ b/tests/test_importers/test_shacl_importer.py
@@ -0,0 +1,36 @@
+import os
+import pytest
+
+from linkml_runtime import SchemaView
+
+from schema_automator.importers.shacl_import_engine import ShaclImportEngine
+from linkml.generators.yamlgen import YAMLGenerator
+
+from schema_automator.utils.schemautils import write_schema
+from tests import INPUT_DIR, OUTPUT_DIR
+
+# TODO - Write tests (this is a copy of test_rdfs_importer)
+
+REPRO = os.path.join(INPUT_DIR, 'shacl_simple.ttl')
+OUTSCHEMA = os.path.join(OUTPUT_DIR, 'user_from_shacl_simple2.yaml')
+
+
+def test_from_shacl():
+ """Test Shacl conversion."""
+ sie = ShaclImportEngine()
+
+ schema = sie.convert(REPRO, default_prefix='usr', identifier='id')
+ write_schema(schema, OUTSCHEMA)
+ return
+ # roundtrip
+ s = YAMLGenerator(OUTSCHEMA).serialize()
+ print(s[0:100])
+ sv = SchemaView(OUTSCHEMA)
+ activity = sv.get_class("Activity")
+ assert activity
+ assert activity.name == "Activity"
+ assert activity.is_a == "CreativeWork"
+ slots = sv.class_induced_slots(activity.name)
+ assert len(slots) == 1
+ slot = slots[0]
+ assert slot.name == "id"