Skip to content
This repository has been archived by the owner on Apr 26, 2021. It is now read-only.

Commit

Permalink
Merge branch 'develop'
Browse files Browse the repository at this point in the history
  • Loading branch information
DaniFdezAlvarez committed Jan 14, 2021
2 parents 35c5441 + 9b21e36 commit 501f8bb
Show file tree
Hide file tree
Showing 160 changed files with 13,467 additions and 1,015 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,8 @@ ENV/

dbshx/local_code/
local_code/
shexer/local_code/

# data
files/
logs/
40 changes: 34 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,29 +42,57 @@ The following code is handy for analyzing a) a file containing class-instance re

```python
from shexer.shaper import Shaper
from shexer.consts import NT

target_classes = [
"http://example.org/Person",
"http://example.org/Place"
"http://example.org/Gender"
]

output_file = "shaper_example.shex"

namespaces_dict = {"http://www.w3.org/1999/02/22-rdf-syntax-ns#": "rdf",
"http://example.org/": "foo"
"http://example.org/": "ex",
"http://weso.es/shapes/": "",
"http://www.w3.org/2001/XMLSchema#": "xml"
}

raw_graph = """
<http://example.org/sarah> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://example.org/Person> .
<http://example.org/sarah> <http://example.org/age> "30"^^<http://www.w3.org/2001/XMLSchema#int> .
<http://example.org/sarah> <http://example.org/name> "Sarah" .
<http://example.org/sarah> <http://example.org/gender> <http://example.org/Female> .
<http://example.org/sarah> <http://example.org/occupation> <http://example.org/Doctor> .
<http://example.org/sarah> <http://example.org/brother> <http://example.org/Jim> .
<http://example.org/jim> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://example.org/Person> .
<http://example.org/jim> <http://example.org/age> "28"^^<http://www.w3.org/2001/XMLSchema#int> .
<http://example.org/jim> <http://example.org/name> "Jimbo".
<http://example.org/jim> <http://example.org/surname> "Mendes".
<http://example.org/jim> <http://example.org/gender> <http://example.org/Male> .
<http://example.org/Male> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://example.org/Gender> .
<http://example.org/Male> <http://www.w3.org/2000/01/rdf-schema#label> "Male" .
<http://example.org/Female> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://example.org/Gender> .
<http://example.org/Female> <http://www.w3.org/2000/01/rdf-schema#label> "Female" .
<http://example.org/Other> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://example.org/Gender> .
<http://example.org/Other> <http://www.w3.org/2000/01/rdf-schema#label> "Other gender" .
"""



input_nt_file = "target_graph.nt"


shaper = Shaper(target_classes=target_classes,
graph_file_input=input_nt_file,
raw_graph=raw_graph,
input_format=NT,
namespaces_dict=namespaces_dict, # Defaults: no prefixes
instantiation_property="http://example.org/type") # Default rdf:type
instantiation_property="http://www.w3.org/1999/02/22-rdf-syntax-ns#type") # Default rdf:type


shaper.shex_graph(output_file=shex_target_file,
aceptance_threshold=0.1)
shaper.shex_graph(output_file=output_file,
acceptance_threshold=0.1)

print("Done!")

Expand Down
4 changes: 3 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
Flask==1.0.3
Flask-Cors==3.0.7
rdflib==4.2.2
rdflib==4.2.2
SPARQLWrapper==1.8.4
rdflib-jsonld==0.4.0
15 changes: 12 additions & 3 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,22 @@
from distutils.core import setup
from setuptools import find_packages

setup(
name = 'shexer',
packages = ['shexer'], # this must be the same as the name above
version = '0.0.1',
packages = find_packages(exclude=["*.local_code.*"]), # this must be the same as the name above
version = '1.0.1',
description = 'Automatic schema extraction for RDF graphs',
author = 'Daniel Fernandez-Alvarez',
author_email = '[email protected]',
url = 'https://github.com/DaniFdezAlvarez/shexerp3',
download_url = 'https://github.com/DaniFdezAlvarez/shexer/tarball/0.0.1',
download_url = 'https://github.com/DaniFdezAlvarez/shexer/tarball/1.0.1',
keywords = ['testing', 'shexer', 'shexerp3', "rdf", "shex", "schema"],
classifiers = [],
install_requires=[
'Flask',
'Flask-Cors',
'rdflib',
'SPARQLWrapper',
'rdflib-jsonld'
],
)
3 changes: 3 additions & 0 deletions shexer/consts.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,12 @@
TURTLE = "turtle"
RDF_XML = "xml"
N3 = "n3"
JSON_LD = "json-ld"

JSON = "json"
FIXED_SHAPE_MAP = "fsm"

RDF_TYPE = "http://www.w3.org/1999/02/22-rdf-syntax-ns#type"
WIKIDATA_INSTACE_OF = "http://www.wikidata.org/prop/direct/P31"

SHAPES_DEFAULT_NAMESPACE = "http://weso.es/shapes/"
77 changes: 66 additions & 11 deletions shexer/core/class_profiler.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@

from shexer.model.IRI import IRI_ELEM_TYPE
from shexer.utils.shapes import build_shapes_name_for_class_uri
from shexer.utils.target_elements import determine_original_target_nodes_if_needed
from shexer.model.property import Property
from shexer.model.bnode import BNode
from shexer.utils.uri import remove_corners
from shexer.consts import SHAPES_DEFAULT_NAMESPACE

RDF_TYPE_STR = "http://www.w3.org/1999/02/22-rdf-syntax-ns#type"

Expand All @@ -16,14 +19,22 @@

class ClassProfiler(object):

def __init__(self, triples_yielder, target_classes_dict, instantiation_property_str=RDF_TYPE_STR):
def __init__(self, triples_yielder, target_classes_dict, instantiation_property_str=RDF_TYPE_STR,
remove_empty_shapes=True, original_target_classes=None, original_shape_map=None,
shapes_namespace=SHAPES_DEFAULT_NAMESPACE):
self._triples_yielder = triples_yielder
self._target_classes_dict = target_classes_dict
self._instances_shape_dict = {}
self._shapes_namespace = shapes_namespace
self._classes_shape_dict = self._build_classes_shape_dict_with_just_classes()
self._shape_names_dict = self._build_shape_names_dict()
self._relevant_triples = 0
self._instantiation_property_str = self._decide_instantiation_property(instantiation_property_str)
self._remove_empty_shapes=remove_empty_shapes
self._original_target_nodes = determine_original_target_nodes_if_needed(remove_empty_shapes=remove_empty_shapes,
original_target_classes=original_target_classes,
original_shape_map=original_shape_map,
shapes_namespace=shapes_namespace)



Expand All @@ -34,6 +45,7 @@ def profile_classes(self):
# print("Profiler... shape of instances built!")
self._build_class_profile()
# print("Profiler... class profile built!")
self._clean_class_profile()
return self._classes_shape_dict

def get_target_classes_dict(self):
Expand All @@ -54,10 +66,12 @@ def _decide_instantiation_property(instantiation_property_str):
def _build_shape_names_dict(self):
result = {}
for a_class in self._target_classes_dict:
name = build_shapes_name_for_class_uri(a_class)
name = build_shapes_name_for_class_uri(class_uri=a_class,
shapes_namespace=self._shapes_namespace)
result[a_class] = name
return result


def _build_classes_shape_dict_with_just_classes(self):
result = {}
for a_class_key in self._target_classes_dict:
Expand All @@ -74,9 +88,11 @@ def _infer_3tuple_features(self, an_instance):
result.append( (a_prop, a_type, a_valid_cardinality) )
return result


def _infer_valid_cardinalities(self, a_property, a_cardinality):
"""
Special teratment for self._instantiation_property_str. If thats the property, we are targetting specific URIs instead of the type IRI.
Special teratment for self._instantiation_property_str. If thats the property, we are targetting specific URIs
instead of the type IRI.
Cardinality will be always "1"
:param a_property:
:param a_cardinality:
Expand All @@ -94,10 +110,46 @@ def _build_class_profile(self):
feautres_3tuple = self._infer_3tuple_features(an_instance)
for a_class in self._target_classes_dict:
if self._is_instance_of_class(an_instance, a_class):
self._anotate_instance_features_for_class(a_class, feautres_3tuple)


def _anotate_instance_features_for_class(self, a_class, features_3tuple):
self._annotate_instance_features_for_class(a_class, feautres_3tuple)

def _clean_class_profile(self):
if not self._remove_empty_shapes:
return
shapes_to_remove = self._detect_shapes_to_remove()

while(len(shapes_to_remove) != 0):
self._iteration_remove_empty_shapes(shapes_to_remove)
shapes_to_remove = self._detect_shapes_to_remove()

def _detect_shapes_to_remove(self):
shapes_to_remove = set()
for a_shape_key in self._classes_shape_dict:
if not self._is_original_target_shape(a_shape_key):
if not self._has_it_annotated_features(a_shape_key):
shapes_to_remove.add(a_shape_key)
return shapes_to_remove

def _is_original_target_shape(self, shape_label):
return shape_label in self._original_target_nodes

def _has_it_annotated_features(self, shape_label):
if shape_label not in self._classes_shape_dict:
return False
return len(self._classes_shape_dict[shape_label]) > 0

def _iteration_remove_empty_shapes(self, target_shapes):
for a_shape_label_key in self._classes_shape_dict:
for a_prop_key in self._classes_shape_dict[a_shape_label_key]:
# print(self._classes_shape_dict[a_shape_label_key][a_prop_key])
for a_shape_to_remove in target_shapes:
if a_shape_to_remove in self._classes_shape_dict[a_shape_label_key][a_prop_key]:
del self._classes_shape_dict[a_shape_label_key][a_prop_key][a_shape_to_remove]
for a_shape_to_remove in target_shapes:
if a_shape_to_remove in self._classes_shape_dict:
del self._classes_shape_dict[a_shape_to_remove]


def _annotate_instance_features_for_class(self, a_class, features_3tuple):
for a_feature_3tuple in features_3tuple:
self._introduce_needed_elements_in_shape_classes_dict(a_class, a_feature_3tuple)
# 3tuple: 0->str_prop, 1->str_type, 2->cardinality
Expand All @@ -115,7 +167,6 @@ def _introduce_needed_elements_in_shape_classes_dict(self, a_class, a_feature_3t
self._classes_shape_dict[a_class][str_prop][str_type][cardinality] = 0



def _is_instance_of_class(self, an_instance_str, a_class_str):
if an_instance_str in self._target_classes_dict[a_class_str]:
return True
Expand All @@ -125,10 +176,10 @@ def _is_instance_of_class(self, an_instance_str, a_class_str):
def _build_shape_of_instances(self):
for a_triple in self._yield_relevant_triples():
self._relevant_triples += 1
self._anotate_feature_of_target_instance(a_triple)
self._annotate_feature_of_target_instance(a_triple)


def _anotate_feature_of_target_instance(self, a_triple):
def _annotate_feature_of_target_instance(self, a_triple):
str_subj = a_triple[_S].iri
str_prop = a_triple[_P].iri
type_obj = self._decide_type_obj(a_triple[_O], str_prop)
Expand Down Expand Up @@ -192,8 +243,12 @@ def _is_a_relevant_triple(self, a_triple):
return True if self._is_subject_in_target_classes(a_triple) else False

def _is_subject_in_target_classes(self, a_triple):
str_subj = a_triple[_S].iri
subj = a_triple[_S]
if isinstance(subj, BNode):
return False
str_subj = subj.iri
for class_key in self._target_classes_dict:
if str_subj in self._target_classes_dict[class_key]:
return True
return False

Loading

0 comments on commit 501f8bb

Please sign in to comment.