Skip to content

Commit

Permalink
Merge branch 'develop'
Browse files Browse the repository at this point in the history
  • Loading branch information
DaniFdezAlvarez committed Aug 28, 2024
2 parents c8c6442 + 61b6bdc commit fde2c15
Show file tree
Hide file tree
Showing 8 changed files with 72 additions and 32 deletions.
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,12 @@ def read(file_path):
setup(
name = 'shexer',
packages = find_packages(exclude=["*.local_code.*"]), # this must be the same as the name above
version = '2.5.5',
version = '2.5.6',
description = 'Automatic schema extraction for RDF graphs',
author = 'Daniel Fernandez-Alvarez',
author_email = '[email protected]',
url = 'https://github.com/DaniFdezAlvarez/shexer',
download_url = 'https://github.com/DaniFdezAlvarez/shexer/archive/2.5.5.tar.gz',
download_url = 'https://github.com/DaniFdezAlvarez/shexer/archive/2.5.6.tar.gz',
keywords = ['testing', 'shexer', 'shexerp3', "rdf", "shex", "shacl", "schema"],
long_description = read('README.md'),
long_description_content_type='text/markdown',
Expand Down
31 changes: 29 additions & 2 deletions shexer/core/instances/abstract_instance_tracker.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
from shexer.model.property import Property
from shexer.utils.uri import remove_corners
from shexer.utils.factories.h_tree import get_basic_h_tree

_TRACKERS_DISAM_COUNT = 0

_RDF_TYPE = Property(content="http://www.w3.org/1999/02/22-rdf-syntax-ns#type")
_RDFS_SUBCLASS_OF = Property(content="http://www.w3.org/2000/01/rdf-schema#subClassOf")

class AbstractInstanceTracker(object):

def track_instances(self, verbose=False):
Expand All @@ -10,7 +16,7 @@ def track_instances(self, verbose=False):
@property
def disambiguator_prefix(self):
"""
It return an str that may help for disambiguation purposes if the instance_tracker is used to produce dicts
It returns a str that may help for disambiguation purposes if the instance_tracker is used to produce dicts
that may be integrated with other instance dicts and there should be any key colission.
:return:
"""
Expand All @@ -19,4 +25,25 @@ def disambiguator_prefix(self):
return self._specific_disambiguator_prefix() + str(_TRACKERS_DISAM_COUNT )

def _specific_disambiguator_prefix(self):
raise NotImplementedError()
raise NotImplementedError()

@staticmethod
def _build_instances_dict():
return {} # Empty in every case. Instances, on the fly, will be the keys

@staticmethod
def _decide_instantiation_property(instantiation_property):
if instantiation_property == None:
return _RDF_TYPE
if type(instantiation_property) == type(_RDF_TYPE):
return instantiation_property
if type(instantiation_property) == str:
return Property(remove_corners(a_uri=instantiation_property,
raise_error_if_no_corners=False))
raise ValueError("Unrecognized param type to define instantiation property")

def _reset_count(self):
self._relevant_triples = 0
self._not_relevant_triples = 0
self._htree = get_basic_h_tree()

33 changes: 33 additions & 0 deletions shexer/core/instances/endpoint_instance_tracker.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
from shexer.core.instances.abstract_instance_tracker import _RDF_TYPE, _RDFS_SUBCLASS_OF
from shexer.core.instances.instance_tracker import InstanceTracker
from shexer.consts import SHAPES_DEFAULT_NAMESPACE
from shexer.model.bnode import BNode
from shexer.core.instances.pconsts import _S


class EndpointInstanceTracker(InstanceTracker):

def __init__(self, target_classes, triples_yielder, instantiation_property=_RDF_TYPE, all_classes_mode=False,
subclass_property=_RDFS_SUBCLASS_OF, track_hierarchies=True, shape_qualifiers_mode=False,
namespaces_for_qualifier_props=None, shapes_namespace=SHAPES_DEFAULT_NAMESPACE, instances_cap=-1):
super().__init__(target_classes=target_classes,
triples_yielder=triples_yielder,
instantiation_property=instantiation_property,
all_classes_mode=all_classes_mode,
subclass_property=subclass_property,
track_hierarchies=track_hierarchies,
shape_qualifiers_mode=shape_qualifiers_mode,
namespaces_for_qualifier_props=namespaces_for_qualifier_props,
shapes_namespace=shapes_namespace,
instances_cap=instances_cap)

def _yield_relevant_triples(self):
for a_triple in self._triples_yielder.yield_triples():
if self._annotator.is_relevant_triple(a_triple) and self._subject_is_not_bnode(a_triple):
self._relevant_triples += 1
yield a_triple
else:
self._not_relevant_triples += 1

def _subject_is_not_bnode(self, a_triple):
return not isinstance(a_triple[_S], BNode)
29 changes: 4 additions & 25 deletions shexer/core/instances/instance_tracker.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,11 @@
from shexer.model.property import Property
from shexer.utils.uri import remove_corners
from shexer.utils.factories.h_tree import get_basic_h_tree
from shexer.core.instances.annotators.annotator_func import get_proper_annotator
from shexer.core.instances.abstract_instance_tracker import AbstractInstanceTracker
from shexer.core.instances.abstract_instance_tracker import AbstractInstanceTracker, _RDF_TYPE, _RDFS_SUBCLASS_OF
from shexer.consts import SHAPES_DEFAULT_NAMESPACE
from shexer.utils.log import log_msg
from shexer.core.instances.annotators.strategy_mode.instances_cap_exception import InstancesCapException
from shexer.utils.factories.h_tree import get_basic_h_tree
from shexer.core.instances.annotators.annotator_func import get_proper_annotator


_RDF_TYPE = Property(content="http://www.w3.org/1999/02/22-rdf-syntax-ns#type")
_RDFS_SUBCLASS_OF = Property(content="http://www.w3.org/2000/01/rdf-schema#subClassOf")


class InstanceTracker(AbstractInstanceTracker):
Expand Down Expand Up @@ -75,31 +71,14 @@ def _yield_relevant_triples(self):
else:
self._not_relevant_triples += 1

def _reset_count(self):
self._relevant_triples = 0
self._not_relevant_triples = 0
self._htree = get_basic_h_tree()


def is_an_instantiation_prop(self, a_property):
return a_property == self._instantiation_property

def is_a_subclass_property(self, a_property):
return a_property == self._subclass_property

@staticmethod
def _build_instances_dict():
return {} # Empty in every case. Instances, on the fly, will be the keys

@staticmethod
def _decide_instantiation_property(instantiation_property):
if instantiation_property == None:
return _RDF_TYPE
if type(instantiation_property) == type(_RDF_TYPE):
return instantiation_property
if type(instantiation_property) == str:
return Property(remove_corners(a_uri=instantiation_property,
raise_error_if_no_corners=False))
raise ValueError("Unrecognized param type to define instantiation property")



Expand Down
3 changes: 1 addition & 2 deletions shexer/io/graph/yielder/nt_triples_yielder.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,7 @@ def yield_triples(self):
tokens = self._look_for_tokens(a_line.strip())
if len(tokens) != 3:
self._error_triples += 1
log_msg(msg="This line was discarded: " + a_line,
source=self._source_file)
log_msg(verbose=False, msg="This line was discarded: " + a_line)
else:
yield (tune_token(a_token=tokens[0]),
tune_prop(a_token=tokens[1]),
Expand Down
2 changes: 1 addition & 1 deletion shexer/utils/translators/list_of_classes_to_shape_map.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def _get_shape_label_for_class_uri(self, class_uri):
return class_uri

def _get_raw_selector_to_catch_instances_of_class_uri(self, class_uri, instantiation_property, limit_remote_instances):
return 'SPARQL "select ?s where {{ ?s <{prop}> <{class_uri}> }} {limit}"'.format(
return 'SPARQL "select ?s where {{ ?s <{prop}> <{class_uri}> . FILTER (!isBlank(?s)) }} {limit}"'.format( # FILTER (!isBlank(?c))
class_uri=class_uri,
prop=instantiation_property,
limit="" if limit_remote_instances < 0 else "LIMIT " + str(limit_remote_instances)
Expand Down
1 change: 1 addition & 0 deletions test/test_disable_endpoint_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,5 +55,6 @@ def test_all_classes_mode(self):
limit_remote_instances=5,
disable_endpoint_cache=True)
str_result = shaper.shex_graph(string_output=True)
print(str_result)
self.assertTrue(number_of_shapes(str_result) > 2)
pass #
1 change: 1 addition & 0 deletions test/test_url_endpoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,5 +48,6 @@ def test_all_classes_mode(self):
track_classes_for_entities_at_last_depth_level=False,
limit_remote_instances=5)
str_result = shaper.shex_graph(string_output=True)
print(str_result)
self.assertTrue(number_of_shapes(str_result) > 2)
pass #

0 comments on commit fde2c15

Please sign in to comment.