Add results to README, document owl_class_expression_learning_dbpedia…

… script, revert triple_store file removing multiple attempts to query
dice-group · Dec 11, 2024 · 0278fcf · 0278fcf
1 parent ca050ef
commit 0278fcf
Show file tree

Hide file tree

Showing 3 changed files with 103 additions and 30 deletions.
diff --git a/README.md b/README.md
@@ -45,7 +45,7 @@ wget https://files.dice-research.org/projects/Ontolearn/KGs.zip -O ./KGs.zip &&
 wget https://files.dice-research.org/projects/Ontolearn/LPs.zip -O ./LPs.zip && unzip LPs.zip
 ```
 
-## Learning OWL Class Expression
+## Learning OWL Class Expressions
 ```python
 from ontolearn.learners import TDL
 from ontolearn.triple_store import TripleStore
@@ -97,7 +97,8 @@ weighted avg       1.00      1.00      1.00         4
 """
 ```
 
-## Learning OWL Class Expression over DBpedia
+## Learning OWL Class Expressions over DBpedia
+1. Single learning problem
 ```python
 from ontolearn.learners import TDL, Drill
 from ontolearn.triple_store import TripleStore
@@ -120,6 +121,61 @@ print(owl_expression_to_sparql(expression=h))
 save_owl_class_expressions(expressions=h,path="#owl_prediction")
 ```
 
+2. On 106 learning problems from https://files.dice-research.org/projects/Ontolearn/LPs.zip
+
+- Commands:
+```bash
+python examples/owl_class_expression_learning_dbpedia.py --model TDL
+```
+Or 
+```bash
+python examples/owl_class_expression_learning_dbpedia.py --model Drill
+```
+- Results:
+
+```python
+"""
+                        Cardinality
+Type                      
+OWLObjectAllValuesFrom      7
+OWLObjectIntersectionOf    14
+OWLObjectUnionOf           85
+
+
+Type                                         
+OWLObjectAllValuesFrom   1.000000  206.287996
+OWLObjectIntersectionOf  0.717172   91.663047
+OWLObjectUnionOf         0.966652  129.699940
+                         F1        Runtime   
+"""
+
+# Or
+
+"""
+                        Cardinality
+Type                      
+OWLObjectAllValuesFrom      7
+OWLObjectIntersectionOf    14
+OWLObjectUnionOf           85
+
+
+                         F1        Runtime   
+Type                                         
+OWLObjectAllValuesFrom   1.000000  206.287996
+OWLObjectIntersectionOf  0.717172   91.663047
+OWLObjectUnionOf         0.966652  129.699940
+
+
+Type                                         
+OWLObjectAllValuesFrom   0.437500  240.330940
+OWLObjectIntersectionOf  0.212930  202.557878
+OWLObjectUnionOf         0.546334  187.144105
+
+"""
+```
+
+
+
 Fore more please refer to  the [examples](https://github.com/dice-group/Ontolearn/tree/develop/examples) folder.
 
 ## ontolearn-webservice 

diff --git a/examples/owl_class_expresion_learning_dbpedia.py b/examples/owl_class_expresion_learning_dbpedia.py
@@ -1,6 +1,25 @@
-"""$ python examples/retrieval_eval.py --path_kg "https://dbpedia.data.dice-research.org/sparql"
+"""$ python examples/owl_class_expresion_learning_dbpedia.py --endpoint_triple_store "https://dbpedia.data.dice-research.org/sparql" --model "TDL"
+Computing conjunctive_concepts...
 
+Constructing Description Logic Concepts:   0%|                                  Constructing Description Logic Concepts: 100%|██████████████████████████████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 870187.55it/s]
+Computing disjunction_of_conjunctive_concepts...
+
+Starting query after solution is computed!
+
+Computed solution: OWLClass(IRI('http://dbpedia.org/ontology/', 'President'))
+Expression: President ⊔ Actor | F1 :1.0000 | Runtime:171.275:  99%|██████████████████████████████████████████████████████████████▍| 105/106 [3:49:14<01:31, 91.8Expression: President ⊔ Actor | F1 :1.0000 | Runtime:171.275: 100%|██████████████████████████████████████████████████████████████| 106/106 [3:49:14<00:00, 115.6Expression: President ⊔ Actor | F1 :1.0000 | Runtime:171.275: 100%|██████████████████████████████████████████████████████████████| 106/106 [3:49:14<00:00, 129.76s/it]
+Type
+OWLObjectAllValuesFrom      7
+OWLObjectIntersectionOf    14
+OWLObjectUnionOf           85
+Name: Type, dtype: int64
+                         F1        Runtime   
+Type                                         
+OWLObjectAllValuesFrom   1.000000  206.287996
+OWLObjectIntersectionOf  0.717172   91.663047
+OWLObjectUnionOf         0.966652  129.699940
 """
+# Make imports
 import os
 from tqdm import tqdm
 import random
@@ -30,7 +49,7 @@
 pd.set_option('display.expand_frame_repr', False)
 
 def execute(args):
-    # (1) Initialize knowledge base.
+    # Initialize knowledge base.
     assert args.endpoint_triple_store, 'A SPARQL endpoint of DBpedia must be provided via `--endpoint_triple_store "url"`'
     try:
         kb = TripleStore(url=args.endpoint_triple_store)
@@ -49,6 +68,7 @@ def execute(args):
     print("Starting class expression learning on DBpedia...")
     print("#" * 50,end="\n\n")
 
+    # Define a query function to retrieve instances of class expressions
     def query_func(query):
         try:
             response = requests.post(args.endpoint_triple_store, data={"query": query}, timeout=300)
@@ -73,30 +93,38 @@ def query_func(query):
             yield from inds
         else:
             yield None
-
+
+    # Initialize the model
     model = Drill(knowledge_base=kb, max_runtime=240) if args.model.lower() == "drill" else TDL(knowledge_base=kb)
+    # Read learning problems from file
     with open("./LPs/DBpedia2022-12/lps.json") as f:
         lps = json.load(f)
-
+
+    # Check if csv arleady exists and delete it cause we want to override it
     if os.path.exists(args.path_report):
         os.remove(args.path_report)
 
     file_exists = False
-
+    # Iterate over all problems and solve
     for item in (tqdm_bar := tqdm(lps, position=0, leave=True)):
+        # Create a learning problem object
         lp = PosNegLPStandard(pos=set(list(map(OWLNamedIndividual,map(IRI.create, item["examples"]["positive examples"])))),
                       neg=set(list(map(OWLNamedIndividual,map(IRI.create, item["examples"]["negative examples"])))))
-        # (5) Learn description logic concepts best fitting
+        # Learn description logic concepts best fitting
         t0 = time.time()
         h = model.fit(learning_problem=lp).best_hypotheses()
         t1 = time.time()
         print("\nStarting query after solution is computed!\n")
+        # Convert the learned expression into a sparql query
         concept_to_sparql_query = owl_expression_to_sparql(h) + "\nLIMIT 100" # Due to the size of DBpedia learning problems contain at most 100 pos and 100 neg examples
+        # Load actual instances of the target expression
         actual_instances = set(item["examples"]["positive examples"])
+        # Compute instances of the learned expression
         retrieved_instances = set(query_func(concept_to_sparql_query))
+        # Compute the quality of the learned expression
         f1 = compute_f1_score(retrieved_instances, set(item["examples"]["positive examples"]), set(item["examples"]["negative examples"]))
         print(f"Computed solution: {h}")
-
+        # Write results in a dictionary and create a dataframe
         df_row = pd.DataFrame(
             [{
                 "Expression": owl_expression_to_dl(dl_parser.parse(item["target expression"])),
@@ -109,41 +137,42 @@ def query_func(query):
         # Append the row to the CSV file
         df_row.to_csv(args.path_report, mode='a', header=not file_exists, index=False)
         file_exists = True
-        # () Update the progress bar.
+        # Update the progress bar.
         tqdm_bar.set_description_str(
             f"Expression: {owl_expression_to_dl(dl_parser.parse(item['target expression']))} | F1 :{f1:.4f} | Runtime:{t1 - t0:.3f}"
         )
-    # () Read the data into pandas dataframe
+    # Read the data into pandas dataframe
     df = pd.read_csv(args.path_report, index_col=0)
-    # () Assert that the mean Jaccard Similarity meets the threshold
+    # Assert that the mean f1 score meets the threshold
     assert df["F1"].mean() >= args.min_f1_score
 
-    # () Extract numerical features
+    # Extract numerical features
     numerical_df = df.select_dtypes(include=["number"])
 
-    # () Group by the type of OWL concepts
+    # Group by the type of OWL concepts
     df_g = df.groupby(by="Type")
     print(df_g["Type"].count())
 
-    # () Compute mean of numerical columns per group
+    # Compute mean of numerical columns per group
     mean_df = df_g[numerical_df.columns].mean()
     print(mean_df)
     return f1
 
 def get_default_arguments():
+    # Define an argument parser
     parser = ArgumentParser()
     parser.add_argument("--model", type=str, default="Drill")
     parser.add_argument("--path_kge_model", type=str, default=None)
     parser.add_argument("--endpoint_triple_store", type=str, default="https://dbpedia.data.dice-research.org/sparql")
     parser.add_argument("--seed", type=int, default=1)
     parser.add_argument("--min_f1_score", type=float, default=0.0, help="Minimum f1 score of computed solutions")
 
-    # H is obtained if the forward chain is applied on KG.
     parser.add_argument("--path_report", type=str, default=None)
     return parser.parse_args()
 
 if __name__ == "__main__":
+    # Get default or input values of arguments
     args = get_default_arguments()
-    if not args.path_report :
+    if not args.path_report:
         args.path_report = f"CEL_on_DBpedia_{args.model.upper()}.csv"
     execute(args)
diff --git a/ontolearn/triple_store.py b/ontolearn/triple_store.py
@@ -1195,16 +1195,4 @@ def query(self, sparql: str):
         yield from self.g.query(sparql_query=sparql)
 
     def query_results(self, sparql: str):
-        trials = 0
-        response = None
-        while True:
-            try:
-                response = self.g.query(sparql_query=sparql)
-                results = response.json()["results"]["bindings"]
-                break
-            except Exception:
-                trials += 1
-                if trials > 10:
-                    print("\n\nSPARQL endpoint not working properly. Sent a query 10 times but no results!!!\n")
-                    break
-        return response
+        return self.g.query(sparql_query=sparql)