Skip to content

Commit

Permalink
Add results to README, document owl_class_expression_learning_dbpedia…
Browse files Browse the repository at this point in the history
… script, revert triple_store file removing multiple attempts to query
  • Loading branch information
Jean-KOUAGOU committed Dec 11, 2024
1 parent ca050ef commit 0278fcf
Show file tree
Hide file tree
Showing 3 changed files with 103 additions and 30 deletions.
60 changes: 58 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ wget https://files.dice-research.org/projects/Ontolearn/KGs.zip -O ./KGs.zip &&
wget https://files.dice-research.org/projects/Ontolearn/LPs.zip -O ./LPs.zip && unzip LPs.zip
```

## Learning OWL Class Expression
## Learning OWL Class Expressions
```python
from ontolearn.learners import TDL
from ontolearn.triple_store import TripleStore
Expand Down Expand Up @@ -97,7 +97,8 @@ weighted avg 1.00 1.00 1.00 4
"""
```

## Learning OWL Class Expression over DBpedia
## Learning OWL Class Expressions over DBpedia
1. Single learning problem
```python
from ontolearn.learners import TDL, Drill
from ontolearn.triple_store import TripleStore
Expand All @@ -120,6 +121,61 @@ print(owl_expression_to_sparql(expression=h))
save_owl_class_expressions(expressions=h,path="#owl_prediction")
```

2. On 106 learning problems from https://files.dice-research.org/projects/Ontolearn/LPs.zip

- Commands:
```bash
python examples/owl_class_expression_learning_dbpedia.py --model TDL
```
Or
```bash
python examples/owl_class_expression_learning_dbpedia.py --model Drill
```
- Results:

```python
"""
Cardinality
Type
OWLObjectAllValuesFrom 7
OWLObjectIntersectionOf 14
OWLObjectUnionOf 85
Type
OWLObjectAllValuesFrom 1.000000 206.287996
OWLObjectIntersectionOf 0.717172 91.663047
OWLObjectUnionOf 0.966652 129.699940
F1 Runtime
"""

# Or

"""
Cardinality
Type
OWLObjectAllValuesFrom 7
OWLObjectIntersectionOf 14
OWLObjectUnionOf 85
F1 Runtime
Type
OWLObjectAllValuesFrom 1.000000 206.287996
OWLObjectIntersectionOf 0.717172 91.663047
OWLObjectUnionOf 0.966652 129.699940
Type
OWLObjectAllValuesFrom 0.437500 240.330940
OWLObjectIntersectionOf 0.212930 202.557878
OWLObjectUnionOf 0.546334 187.144105
"""
```



Fore more please refer to the [examples](https://github.com/dice-group/Ontolearn/tree/develop/examples) folder.

## ontolearn-webservice
Expand Down
59 changes: 44 additions & 15 deletions examples/owl_class_expresion_learning_dbpedia.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,25 @@
"""$ python examples/retrieval_eval.py --path_kg "https://dbpedia.data.dice-research.org/sparql"
"""$ python examples/owl_class_expresion_learning_dbpedia.py --endpoint_triple_store "https://dbpedia.data.dice-research.org/sparql" --model "TDL"
Computing conjunctive_concepts...
Constructing Description Logic Concepts: 0%| Constructing Description Logic Concepts: 100%|██████████████████████████████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 870187.55it/s]
Computing disjunction_of_conjunctive_concepts...
Starting query after solution is computed!
Computed solution: OWLClass(IRI('http://dbpedia.org/ontology/', 'President'))
Expression: President ⊔ Actor | F1 :1.0000 | Runtime:171.275: 99%|██████████████████████████████████████████████████████████████▍| 105/106 [3:49:14<01:31, 91.8Expression: President ⊔ Actor | F1 :1.0000 | Runtime:171.275: 100%|██████████████████████████████████████████████████████████████| 106/106 [3:49:14<00:00, 115.6Expression: President ⊔ Actor | F1 :1.0000 | Runtime:171.275: 100%|██████████████████████████████████████████████████████████████| 106/106 [3:49:14<00:00, 129.76s/it]
Type
OWLObjectAllValuesFrom 7
OWLObjectIntersectionOf 14
OWLObjectUnionOf 85
Name: Type, dtype: int64
F1 Runtime
Type
OWLObjectAllValuesFrom 1.000000 206.287996
OWLObjectIntersectionOf 0.717172 91.663047
OWLObjectUnionOf 0.966652 129.699940
"""
# Make imports
import os
from tqdm import tqdm
import random
Expand Down Expand Up @@ -30,7 +49,7 @@
pd.set_option('display.expand_frame_repr', False)

def execute(args):
# (1) Initialize knowledge base.
# Initialize knowledge base.
assert args.endpoint_triple_store, 'A SPARQL endpoint of DBpedia must be provided via `--endpoint_triple_store "url"`'
try:
kb = TripleStore(url=args.endpoint_triple_store)
Expand All @@ -49,6 +68,7 @@ def execute(args):
print("Starting class expression learning on DBpedia...")
print("#" * 50,end="\n\n")

# Define a query function to retrieve instances of class expressions
def query_func(query):
try:
response = requests.post(args.endpoint_triple_store, data={"query": query}, timeout=300)
Expand All @@ -73,30 +93,38 @@ def query_func(query):
yield from inds
else:
yield None


# Initialize the model
model = Drill(knowledge_base=kb, max_runtime=240) if args.model.lower() == "drill" else TDL(knowledge_base=kb)
# Read learning problems from file
with open("./LPs/DBpedia2022-12/lps.json") as f:
lps = json.load(f)


# Check if csv arleady exists and delete it cause we want to override it
if os.path.exists(args.path_report):
os.remove(args.path_report)

file_exists = False

# Iterate over all problems and solve
for item in (tqdm_bar := tqdm(lps, position=0, leave=True)):
# Create a learning problem object
lp = PosNegLPStandard(pos=set(list(map(OWLNamedIndividual,map(IRI.create, item["examples"]["positive examples"])))),
neg=set(list(map(OWLNamedIndividual,map(IRI.create, item["examples"]["negative examples"])))))
# (5) Learn description logic concepts best fitting
# Learn description logic concepts best fitting
t0 = time.time()
h = model.fit(learning_problem=lp).best_hypotheses()
t1 = time.time()
print("\nStarting query after solution is computed!\n")
# Convert the learned expression into a sparql query
concept_to_sparql_query = owl_expression_to_sparql(h) + "\nLIMIT 100" # Due to the size of DBpedia learning problems contain at most 100 pos and 100 neg examples
# Load actual instances of the target expression
actual_instances = set(item["examples"]["positive examples"])
# Compute instances of the learned expression
retrieved_instances = set(query_func(concept_to_sparql_query))
# Compute the quality of the learned expression
f1 = compute_f1_score(retrieved_instances, set(item["examples"]["positive examples"]), set(item["examples"]["negative examples"]))
print(f"Computed solution: {h}")

# Write results in a dictionary and create a dataframe
df_row = pd.DataFrame(
[{
"Expression": owl_expression_to_dl(dl_parser.parse(item["target expression"])),
Expand All @@ -109,41 +137,42 @@ def query_func(query):
# Append the row to the CSV file
df_row.to_csv(args.path_report, mode='a', header=not file_exists, index=False)
file_exists = True
# () Update the progress bar.
# Update the progress bar.
tqdm_bar.set_description_str(
f"Expression: {owl_expression_to_dl(dl_parser.parse(item['target expression']))} | F1 :{f1:.4f} | Runtime:{t1 - t0:.3f}"
)
# () Read the data into pandas dataframe
# Read the data into pandas dataframe
df = pd.read_csv(args.path_report, index_col=0)
# () Assert that the mean Jaccard Similarity meets the threshold
# Assert that the mean f1 score meets the threshold
assert df["F1"].mean() >= args.min_f1_score

# () Extract numerical features
# Extract numerical features
numerical_df = df.select_dtypes(include=["number"])

# () Group by the type of OWL concepts
# Group by the type of OWL concepts
df_g = df.groupby(by="Type")
print(df_g["Type"].count())

# () Compute mean of numerical columns per group
# Compute mean of numerical columns per group
mean_df = df_g[numerical_df.columns].mean()
print(mean_df)
return f1

def get_default_arguments():
# Define an argument parser
parser = ArgumentParser()
parser.add_argument("--model", type=str, default="Drill")
parser.add_argument("--path_kge_model", type=str, default=None)
parser.add_argument("--endpoint_triple_store", type=str, default="https://dbpedia.data.dice-research.org/sparql")
parser.add_argument("--seed", type=int, default=1)
parser.add_argument("--min_f1_score", type=float, default=0.0, help="Minimum f1 score of computed solutions")

# H is obtained if the forward chain is applied on KG.
parser.add_argument("--path_report", type=str, default=None)
return parser.parse_args()

if __name__ == "__main__":
# Get default or input values of arguments
args = get_default_arguments()
if not args.path_report :
if not args.path_report:
args.path_report = f"CEL_on_DBpedia_{args.model.upper()}.csv"
execute(args)
14 changes: 1 addition & 13 deletions ontolearn/triple_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -1195,16 +1195,4 @@ def query(self, sparql: str):
yield from self.g.query(sparql_query=sparql)

def query_results(self, sparql: str):
trials = 0
response = None
while True:
try:
response = self.g.query(sparql_query=sparql)
results = response.json()["results"]["bindings"]
break
except Exception:
trials += 1
if trials > 10:
print("\n\nSPARQL endpoint not working properly. Sent a query 10 times but no results!!!\n")
break
return response
return self.g.query(sparql_query=sparql)

0 comments on commit 0278fcf

Please sign in to comment.