Skip to content

Commit

Permalink
Merge pull request #505 from dice-group/drill_confusion_matrix_sparql…
Browse files Browse the repository at this point in the history
…_integrate

DRILL shifts the quality computation to triplestore
  • Loading branch information
Demirrr authored Dec 5, 2024
2 parents d328569 + d2f976a commit 79c58e8
Show file tree
Hide file tree
Showing 5 changed files with 55 additions and 14 deletions.
8 changes: 4 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -99,16 +99,16 @@ weighted avg 1.00 1.00 1.00 4

## Learning OWL Class Expression over DBpedia
```python
from ontolearn.learners import TDL
from ontolearn.learners import TDL, Drill
from ontolearn.triple_store import TripleStore
from ontolearn.learning_problem import PosNegLPStandard
from owlapy.owl_individual import OWLNamedIndividual
from owlapy import owl_expression_to_sparql, owl_expression_to_dl
from ontolearn.utils.static_funcs import save_owl_class_expressions
# (1) Initialize Triplestore
kb = TripleStore(url="http://dice-dbpedia.cs.upb.de:9080/sparql")
kb = TripleStore(url="https://dbpedia.data.dice-research.org/sparql")
# (3) Initialize a learner.
model = TDL(knowledge_base=kb)
model = Drill(knowledge_base=kb) # or TDL(knowledge_base=kb)
# (4) Define a description logic concept learning problem.
lp = PosNegLPStandard(pos={OWLNamedIndividual("http://dbpedia.org/resource/Angela_Merkel")},
neg={OWLNamedIndividual("http://dbpedia.org/resource/Barack_Obama")})
Expand All @@ -117,7 +117,7 @@ h = model.fit(learning_problem=lp).best_hypotheses()
print(h)
print(owl_expression_to_dl(h))
print(owl_expression_to_sparql(expression=h))
save_owl_class_expressions(expressions=h,path="owl_prediction")
save_owl_class_expressions(expressions=h,path="#owl_prediction")
```

Fore more please refer to the [examples](https://github.com/dice-group/Ontolearn/tree/develop/examples) folder.
Expand Down
31 changes: 24 additions & 7 deletions ontolearn/learners/drill.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
# -----------------------------------------------------------------------------

import pandas as pd
import json
from owlapy.class_expression import OWLClassExpression
Expand All @@ -42,11 +41,14 @@
import time
import os
# F1 class will be deprecated to become compute_f1_score function.
from ontolearn.utils.static_funcs import compute_f1_score
from ontolearn.utils.static_funcs import compute_f1_score, compute_f1_score_from_confusion_matrix
import random
from ontolearn.heuristics import CeloeBasedReward
from ontolearn.data_struct import PrepareBatchOfPrediction
from tqdm import tqdm
from owlapy.converter import owl_expression_to_sparql_with_confusion_matrix

from ..triple_store import TripleStore
from ..utils.static_funcs import make_iterable_verbose
from owlapy.utils import get_expression_length

Expand Down Expand Up @@ -162,7 +164,11 @@ def __init__(self, knowledge_base,
max_num_of_concepts_tested=max_num_of_concepts_tested,
max_runtime=max_runtime)
# CD: This setting the valiable will be removed later.
self.quality_func = compute_f1_score

if isinstance(self.kb, TripleStore):
self.quality_func = compute_f1_score_from_confusion_matrix
else:
self.quality_func = compute_f1_score

def initialize_training_class_expression_learning_problem(self,
pos: FrozenSet[OWLNamedIndividual],
Expand Down Expand Up @@ -301,9 +307,9 @@ def fit(self, learning_problem: PosNegLPStandard, max_runtime=None):
if max_runtime:
assert isinstance(max_runtime, float) or isinstance(max_runtime, int)
self.max_runtime = max_runtime

# (1) Reinitialize few attributes to ensure a clean start.
self.clean()
# (1) Initialize the start time
# (2) Initialize the start time
self.start_time = time.time()
# (2) Two mappings from a unique OWL Concept to integer, where a unique concept represents the type info
# C(x) s.t. x \in E^+ and C(y) s.t. y \in E^-.
Expand Down Expand Up @@ -429,9 +435,20 @@ def compute_quality_of_class_expression(self, state: RL_State) -> None:
# (3) Increment the number of tested concepts attribute.
"""
if isinstance(self.kb,TripleStore):
sparql_query=owl_expression_to_sparql_with_confusion_matrix(expression=state.concept,
positive_examples=self.pos,
negative_examples=self.neg)
bindings=self.kb.query_results(sparql_query).json()["results"]["bindings"]
assert len(bindings) == 1
bindings=bindings.pop()
confusion_matrix={k : v["value"]for k,v in bindings.items()}
quality = self.quality_func(confusion_matrix=confusion_matrix)


individuals = frozenset([i for i in self.kb.individuals(state.concept)])
quality = self.quality_func(individuals=individuals, pos=self.pos, neg=self.neg)
else:
individuals = frozenset([i for i in self.kb.individuals(state.concept)])
quality = self.quality_func(individuals=individuals, pos=self.pos, neg=self.neg)
state.quality = quality
self._number_of_tested_concepts += 1

Expand Down
8 changes: 6 additions & 2 deletions ontolearn/triple_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -985,9 +985,10 @@ class TripleStore:
url: str
def __init__(self, reasoner=None, url: str = None):

self.url=url
if reasoner is None:
assert url is not None, f"Reasoner:{reasoner} and url of a triplestore {url} cannot be both None."
self.g = TripleStoreReasonerOntology(url=url)
assert url is not None, f"Reasoner:{reasoner} and url of a triplestore {self.url} cannot be both None."
self.g = TripleStoreReasonerOntology(url=self.url)
else:
self.g = reasoner
self.ontology = self.g
Expand Down Expand Up @@ -1192,3 +1193,6 @@ def least_general_named_concepts(self):

def query(self, sparql: str):
yield from self.g.query(sparql_query=sparql)

def query_results(self, sparql: str):
return self.g.query(sparql_query=sparql)
20 changes: 20 additions & 0 deletions ontolearn/utils/static_funcs.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,6 +236,26 @@ def compute_f1_score(individuals, pos, neg) -> float:
return f_1


def compute_f1_score_from_confusion_matrix(confusion_matrix:dict)->float:
tp=int(confusion_matrix["tp"])
fn=int(confusion_matrix["fn"])
fp=int(confusion_matrix["fp"])
tn=int(confusion_matrix["tn"])
try:
recall = tp / (tp + fn)
except ZeroDivisionError:
return 0.0
try:
precision = tp / (tp + fp)
except ZeroDivisionError:
return 0.0

if precision == 0 or recall == 0:
return 0.0

f_1 = 2 * ((precision * recall) / (precision + recall))
return f_1

def plot_umap_reduced_embeddings(X: pandas.DataFrame, y: List[float], name: str = "umap_visualization.pdf") -> None: # pragma: no cover
# TODO:AB: 'umap' is not part of the dependencies !?
import umap
Expand Down
2 changes: 1 addition & 1 deletion tests/test_example_concept_learning_evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ def test_learning(self):
0.2,
0.97,
0.1,
0.92,
0.90,
0.4,
0.95,
0.3])):
Expand Down

0 comments on commit 79c58e8

Please sign in to comment.