Skip to content

Commit

Permalink
Merge pull request #1 from Louis-Mozart/retrieval_eval_incomplete
Browse files Browse the repository at this point in the history
Retrieval eval incomplete
  • Loading branch information
Louis-Mozart authored Dec 5, 2024
2 parents cf7c395 + 01c9d81 commit 59b45c9
Show file tree
Hide file tree
Showing 2 changed files with 90 additions and 156 deletions.
154 changes: 38 additions & 116 deletions examples/retrieval_with_cache.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,18 @@

import argparse
import pandas as pd
from semantic_caching import run_cache, concept_generator
from ontolearn.semantic_caching import run_cache, concept_generator
from plot_metrics import *
import seaborn as sns

#5, 16, 32, 128, 256, 512, 700, 800, 1024, , "KGs/Family/family.owl" .2, .4, .6, .8
parser = argparse.ArgumentParser()
parser.add_argument('--cache_size_ratios', type=list, default=[1.], help="cache size is proportional to num_concepts, cache size = k * num_concepts")
parser.add_argument('--cache_size_ratios', type=list, default=[.1, .2, .4, .8, 1.], help="cache size is proportional to num_concepts, cache size = k * num_concepts")
parser.add_argument('--path_kg', type=list, default=["KGs/Family/family.owl"])
parser.add_argument('--path_kge', type=list, default=None)
parser.add_argument('--name_reasoner', type=str, default='EBR', choices=["EBR",'HermiT', 'Pellet', 'JFact', 'Openllet'])
parser.add_argument('--eviction_strategy', type=str, default='LRU', choices=['LIFO', 'FIFO', 'LRU', 'MRU', 'RP'])
parser.add_argument('--random_seed_for_RP', type=int, default=10, help="Random seed if the eviction startegy is RP")
parser.add_argument('--cache_type', type=str, default='cold', choices=['hot', 'cold'], help="Type of cache to be used. With cold cache we initialize the cache with NC, NNC")
parser.add_argument('--cache_type', type=str, default='hot', choices=['hot', 'cold'], help="Type of cache to be used. With cold cache we initialize the cache with NC, NNC")
parser.add_argument('--shuffle_concepts', action='store_true',help="If set, we shuffle the concepts for randomness")
args = parser.parse_args()

Expand All @@ -24,136 +23,59 @@ def get_cache_size(list_k, path_kg):
return [max(1, int(k * data_size)) for k in list_k]


# results = []
# for path_kg in args.path_kg:
# for cache_size in get_cache_size(args.cache_size_ratios, path_kg):
# for strategy in ['LIFO', 'FIFO', 'LRU', 'MRU', 'RP']:
# result, detailed = run_cache(
# path_kg=path_kg,
# path_kge=args.path_kge,
# cache_size=cache_size,
# name_reasoner=args.name_reasoner,
# eviction=strategy,
# random_seed=args.random_seed_for_RP,
# cache_type=args.cache_type,
# shuffle_concepts=args.shuffle_concepts
# )
# results.append(result)

# data_kg = result['dataset']
# df = pd.DataFrame(results)
# print(df)

# # Save to CSV
# df.to_csv(f'caching_results_{data_kg}/cache_experiments_{args.name_reasoner}_{data_kg}.csv', index=False)


results = []
detailed_results = []
for path_kg in args.path_kg:
for cache_size in get_cache_size(args.cache_size_ratios, path_kg):
for strategy in ['LIFO', 'FIFO', 'LRU', 'MRU', 'RP']:
result, detailed = run_cache(
result, D = run_cache(
path_kg=path_kg,
path_kge=args.path_kge,
cache_size=cache_size,
name_reasoner=args.name_reasoner,
eviction=strategy,
eviction=args.eviction_strategy,
random_seed=args.random_seed_for_RP,
cache_type=args.cache_type,
shuffle_concepts=args.shuffle_concepts
)
results.append(result)

data_kg = result['dataset']
df = pd.DataFrame(results)
print(df)

# Save to CSV
# df.to_csv(f'caching_results_{data_kg}/cache_experiments_{args.name_reasoner}_{data_kg}.csv', index=False)


# name_reasoners = ["EBR",'HermiT','Pellet','JFact','Openllet']
# data_kgs = ["family"]

# for data_kg in data_kgs:

# for name_reasoner in name_reasoners:

# df = pd.read_csv(f'caching_results_{data_kg}/cache_experiments_{name_reasoner}_{data_kg}.csv')
# print(df)


# sns.set_context("talk", font_scale=3.6)

# plot1 = sns.catplot(
# data=df,
# kind="bar",
# x="cache_size",
# y="hit_ratio",
# hue="strategy",
# col="dataset",
# height=10,
# aspect=2
# )
# plt.show()
# plot1.savefig(f'caching_results_{data_kg}/cache_vs_hit_sns_{name_reasoner}_{data_kg}.pdf')

results.append(result)
detailed_results.append(D)

# plot2 = sns.catplot(
# data=df,
# kind="bar",
# x="cache_size",
# y="avg_jaccard",
# hue="strategy",
# col="dataset",
# height=10,
# aspect=2
# )
# plt.show()
# plot2.savefig(f'caching_results_{data_kg}/cache_vs_jaccard_sns_{name_reasoner}_{data_kg}.pdf')
all_detailed_results = [item for sublist in detailed_results for item in sublist]


# plot3 = sns.catplot(esults = []
# detailed_results = []
# for path_kg in args.path_kg:
# for cache_size in get_cache_size(args.cache_size_ratios, path_kg):
# result, D = run_cache(path_kg=path_kg, path_kge=args.path_kge, cache_size=cache_size, name_reasoner=args.name_reasoner,\
# eviction=args.eviction_strategy, random_seed=args.random_seed_for_RP)
# results.append(result)
# detailed_results.append(D)

# all_detailed_results = [item for sublist in detailed_results for item in sublist]

# results = pd.DataFrame(results)
results = pd.DataFrame(results)
# results.to_csv(f'caching_results/cache_experiments_{args.name_reasoner}.csv')

# plot_scale_factor(results, args.name_reasoner)
# plot_jaccard_vs_cache_size(results, args.name_reasoner)

plot_scale_factor(results, args.name_reasoner)
plot_jaccard_vs_cache_size(results, args.name_reasoner)

# # print(results.to_latex(index=False))

# all_detailed_results = pd.DataFrame(all_detailed_results)
# bar_plot_separate_data(all_detailed_results, cache_size=90, name_reasoner=args.name_reasoner)
# data=df,
# kind="bar",
# x="cache_size",
# y="RT_cache",
# hue="strategy",
# col="dataset",
# height=10,
# aspect=2
# )
# plt.show()
# plot3.savefig(f'caching_results_{data_kg}/cache_vs_RT_sns_{name_reasoner}_{data_kg}.pdf')



# results = []
# detailed_results = []
# for path_kg in args.path_kg:
# for cache_size in get_cache_size(args.cache_size_ratios, path_kg):
# result, D = run_cache(
# path_kg=path_kg,
# path_kge=args.path_kge,
# cache_size=cache_size,
# name_reasoner=args.name_reasoner,
# eviction=args.eviction_strategy,
# random_seed=args.random_seed_for_RP,
# cache_type=args.cache_type,
# shuffle_concepts=args.shuffle_concepts
# )
# results.append(result)
# detailed_results.append(D)

# all_detailed_results = [item for sublist in detailed_results for item in sublist]

# results = pd.DataFrame(results)
# # results.to_csv(f'caching_results/cache_experiments_{args.name_reasoner}.csv')

# plot_scale_factor(results, args.name_reasoner)
# plot_jaccard_vs_cache_size(results, args.name_reasoner)

# # # print(results.to_latex(index=False))

# all_detailed_results = pd.DataFrame(all_detailed_results)
# bar_plot_separate_data(all_detailed_results, cache_size=90, name_reasoner=args.name_reasoner)
all_detailed_results = pd.DataFrame(all_detailed_results)
bar_plot_separate_data(all_detailed_results, cache_size=90, name_reasoner=args.name_reasoner)
# bar_plot_all_data(all_detailed_results, cache_size=90, name_reasoner=args.name_reasoner)
# all_detailed_results.to_csv(f'caching_results/detailed_cache_experiments_{args.name_reasoner}.csv')

92 changes: 52 additions & 40 deletions semantic_caching.py → ontolearn/semantic_caching.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,13 +165,11 @@ def get_shuffled_concepts(path_kg, data_name):
random.shuffle(alc_concepts)
with open(save_file, "wb") as f:
pickle.dump(alc_concepts, f)
print("Generated, shuffled, and saved concepts.")

print("Generated, shuffled, and saved concepts.")
return alc_concepts


def concept_retrieval(retriever_func, c) -> Set[str]:

return {i.str for i in retriever_func.individuals(c)}


Expand Down Expand Up @@ -232,7 +230,7 @@ def put(self, key, value):
if self.strategy in ['LRU', 'MRU']:
self.access_times[key] = time.time() # Record access timestamp

def initialize_cache(self, ontology, func, path_onto, third, All_individuals):
def initialize_cache(self, ontology, func, path_onto, third, All_individuals, handle_restriction_func=None):
"""
Initialize the cache with precomputed results.
:param ontology: The loaded ontology.
Expand All @@ -245,16 +243,37 @@ def initialize_cache(self, ontology, func, path_onto, third, All_individuals):
# Fetch object properties and classes from ontology
roles = list(ontology.object_properties())
classes = list(ontology.classes())

for cls in classes:
named_class = OWLClass(cls.iri)
named_class_str = str(cls).split(".")[-1]

# Add named concept
self.put(named_class_str, func(named_class, path_onto, third))
negated_named_class_str = f"¬{named_class_str}"

# Add negated named concept
self.put(negated_named_class_str, All_individuals-self.cache[named_class_str])
negated_class = OWLObjectComplementOf(named_class)

for role in roles:
role_property = OWLObjectProperty(role.iri)
existential_a = OWLObjectSomeValuesFrom(property=role_property, filler=named_class)
self.put(owl_expression_to_dl(existential_a), func(existential_a, path_onto, third))
existential_a = OWLObjectSomeValuesFrom(property=role_property, filler=named_class)

# Add ∃ r.C
if handle_restriction_func is not None:
self.put(owl_expression_to_dl(existential_a), handle_restriction_func(existential_a))
else:
self.put(owl_expression_to_dl(existential_a), func(existential_a, path_onto, third))

# Add ∃ r.(¬C)
existential_negated = OWLObjectSomeValuesFrom(property=role_property, filler=negated_class)
existential_negated_str = owl_expression_to_dl(existential_negated)
if handle_restriction_func is not None:
self.put(existential_negated_str, handle_restriction_func(existential_negated))
else:
self.put(existential_negated_str, func(existential_negated, path_onto, third))

self.initialized = True

def get_all_items(self):
Expand All @@ -265,8 +284,6 @@ def is_full(self):
return len(self.cache) >= self.max_size




def semantic_caching_size(func, cache_size, eviction_strategy, random_seed, cache_type):

'''This function implements the semantic caching algorithm for ALC concepts as presented in the paper'''
Expand Down Expand Up @@ -307,42 +324,43 @@ def retrieve_from_cache(expression):
stats['misses'] += 1
return None

def handle_owl_some_values_from():
def handle_owl_some_values_from(owl_expression):
"""
Process the OWLObjectSomeValuesFrom expression locally.
When called, return the retrieval of OWLObjectSomeValuesFrom
based on the Algorithm described in the paper
"""
object_property = owl_expression.get_property()
filler_expression = owl_expression.get_filler()
instances = retrieve_from_cache(owl_expression_to_dl(filler_expression))
if instances:
result = set()
if isinstance(object_property, OWLObjectInverseOf):
r = onto.search_one(iri=object_property.get_inverse_property().str)
if isinstance(owl_expression, OWLObjectSomeValuesFrom):
object_property = owl_expression.get_property()
filler_expression = owl_expression.get_filler()
instances = retrieve_from_cache(owl_expression_to_dl(filler_expression))
if instances is not None:
result = set()
if isinstance(object_property, OWLObjectInverseOf):
r = onto.search_one(iri=object_property.get_inverse_property().str)
else:
r = onto.search_one(iri=object_property.str)
individual_map = {ind: onto.search_one(iri=ind) for ind in All_individuals | instances}
for ind_a in All_individuals:
a = individual_map[ind_a]
for ind_b in instances:
b = individual_map[ind_b]
if isinstance(object_property, OWLObjectInverseOf):
if a in getattr(b, r.name):
result.add(a)
else:
if b in getattr(a, r.name):
result.add(ind_a)
else:
r = onto.search_one(iri=object_property.str)
individual_map = {ind: onto.search_one(iri=ind) for ind in All_individuals | instances}
for ind_a in All_individuals:
a = individual_map[ind_a]
for ind_b in instances:
b = individual_map[ind_b]
if isinstance(object_property, OWLObjectInverseOf):
if a in getattr(b, r.name):
result.add(a)
else:
if b in getattr(a, r.name):
result.add(ind_a)
else:
result = func(*args)
return result
result = func(*args)
return result

start_time = time.time() #state the timing before the cache initialization

# Cold cache initialization
start_time_initialization = time.time()
if cache_type == 'cold' and not cache.initialized:
cache.initialize_cache(onto, func, path_onto, args[-1], All_individuals)
cache.initialize_cache(onto, func, path_onto, args[-1], All_individuals, handle_restriction_func=handle_owl_some_values_from)
time_initialization = time.time()- start_time_initialization

# start_time = time.time() #state the timing after the cache initialization
Expand Down Expand Up @@ -385,9 +403,9 @@ def handle_owl_some_values_from():
if cached_result_cold is not None:
result = cached_result_cold
else:
result = handle_owl_some_values_from()
result = handle_owl_some_values_from(owl_expression)
else:
result = handle_owl_some_values_from()
result = handle_owl_some_values_from(owl_expression)

elif isinstance(owl_expression, OWLObjectAllValuesFrom):
all_values_expr = owl_expression_to_dl(owl_expression)
Expand Down Expand Up @@ -508,8 +526,6 @@ def run_cache(path_kg:str, path_kge:str, cache_size:int, name_reasoner:str, evic

ground_truth = concept_retrieval(symbolic_kb, expr)



jacc = jaccard_similarity(A, ground_truth)
jacc_reas = jaccard_similarity(retrieve_ebr, ground_truth)
Avg_jaccard.append(jacc)
Expand All @@ -519,8 +535,6 @@ def run_cache(path_kg:str, path_kge:str, cache_size:int, name_reasoner:str, evic
print(f'Jaccard similarity: {jacc}')
# assert jacc == 1.0



stats = cached_retriever.get_stats()

print('-'*50)
Expand Down Expand Up @@ -549,8 +563,6 @@ def run_cache(path_kg:str, path_kge:str, cache_size:int, name_reasoner:str, evic





# def subsumption_based_caching(func, cache_size):
# cache = {} # Dictionary to store cached results

Expand Down

0 comments on commit 59b45c9

Please sign in to comment.