From 31b7828129d5656dde591d805a951356d91b4594 Mon Sep 17 00:00:00 2001 From: Louis-Mozart Date: Wed, 4 Dec 2024 19:04:19 +0100 Subject: [PATCH 1/3] Removing unnecessary comments --- semantic_caching.py | 78 --------------------------------------------- 1 file changed, 78 deletions(-) diff --git a/semantic_caching.py b/semantic_caching.py index 664d3ba..db6757a 100644 --- a/semantic_caching.py +++ b/semantic_caching.py @@ -546,81 +546,3 @@ def run_cache(path_kg:str, path_kge:str, cache_size:int, name_reasoner:str, evic 'strategy': eviction }, D - - - - - -# def subsumption_based_caching(func, cache_size): -# cache = {} # Dictionary to store cached results - -# def store(concept, instances): -# # Check if cache limit will be exceeded -# if len(instances) + len(cache) > cache_size: -# purge(len(instances)) # Adjusted to ensure cache size limit -# # Add concept and instances to cache -# cache[concept] = instances - -# def purge(needed_space): -# # Remove oldest items until there's enough space -# while len(cache) > needed_space: -# cache.pop(next(iter(cache))) - -# def wrapper(*args): -# path_onto = args[1] -# onto = get_ontology(path_onto).load() - -# # Synchronize the reasoner (e.g., using Pellet) -# # with onto: -# # sync_reasoner(infer_property_values=True) - -# all_individuals = {a for a in onto.individuals()} -# str_expression = owl_expression_to_dl(args[0]) -# owl_expression = args[0] - -# # Check cache for existing results -# if str_expression in cache: -# return cache[str_expression] - -# super_concepts = set() -# namespace, class_name = owl_expression.str.split('#') -# class_expression = f"{namespace.split('/')[-1]}.{class_name}" - -# all_classes = [i for i in list(onto.classes())] - -# for j in all_classes: -# if str(j) == class_expression: -# class_expression = j - -# for D in list(cache.keys()): -# # print(owl_expression) -# # exit(0) -# if D in class_expression.ancestors(): # Check if C ⊑ D -# super_concepts.add(D) - -# print(super_concepts) -# exit(0) -# # Compute instances based on subsumption -# if len(super_concepts) == 0: -# instances = all_individuals -# else: -# instances = set.intersection( -# *[wrapper(D, path_onto) for D in super_concepts] -# ) - -# # Filter instances by checking if each is an instance of the concept -# instance_set = set() - -# for individual in instances: -# for type_entry in individual.is_a: -# type_iri = str(type_entry.iri) -# if owl_expression.str == type_iri: -# instance_set.add(individual) -# break - -# # Store in cache -# store(str_expression, instance_set) -# return instance_set - -# return wrapper - From db2ab9b12c90dd509d90e225db60c8478f6ba109 Mon Sep 17 00:00:00 2001 From: Louis-Mozart Date: Thu, 5 Dec 2024 15:04:58 +0100 Subject: [PATCH 2/3] Fixing minor issues at initialisation time --- semantic_caching.py | 157 ++++++++++++++++++++++++++++++++++++-------- 1 file changed, 128 insertions(+), 29 deletions(-) diff --git a/semantic_caching.py b/semantic_caching.py index db6757a..2f90761 100644 --- a/semantic_caching.py +++ b/semantic_caching.py @@ -232,7 +232,7 @@ def put(self, key, value): if self.strategy in ['LRU', 'MRU']: self.access_times[key] = time.time() # Record access timestamp - def initialize_cache(self, ontology, func, path_onto, third, All_individuals): + def initialize_cache(self, ontology, func, path_onto, third, All_individuals, handle_restriction_func=None): """ Initialize the cache with precomputed results. :param ontology: The loaded ontology. @@ -245,16 +245,38 @@ def initialize_cache(self, ontology, func, path_onto, third, All_individuals): # Fetch object properties and classes from ontology roles = list(ontology.object_properties()) classes = list(ontology.classes()) + for cls in classes: named_class = OWLClass(cls.iri) named_class_str = str(cls).split(".")[-1] + + # Add named concept self.put(named_class_str, func(named_class, path_onto, third)) negated_named_class_str = f"¬{named_class_str}" + + # Add negated named concept self.put(negated_named_class_str, All_individuals-self.cache[named_class_str]) + + negated_class = OWLObjectComplementOf(named_class) + existential_negated = OWLObjectSomeValuesFrom(property=role_property, filler=negated_class) + existential_negated_str = owl_expression_to_dl(existential_negated) + for role in roles: role_property = OWLObjectProperty(role.iri) - existential_a = OWLObjectSomeValuesFrom(property=role_property, filler=named_class) - self.put(owl_expression_to_dl(existential_a), func(existential_a, path_onto, third)) + existential_a = OWLObjectSomeValuesFrom(property=role_property, filler=named_class) + + # Add ∃ r.C + if handle_restriction_func is not None: + self.put(owl_expression_to_dl(existential_a), handle_restriction_func(existential_a)) + else: + self.put(owl_expression_to_dl(existential_a), func(existential_a, path_onto, third)) + + # Add ∃ r.(¬C) + if handle_restriction_func is not None: + self.put(existential_negated_str, handle_restriction_func(existential_negated)) + else: + self.put(existential_negated_str, func(existential_negated, path_onto, third)) + self.initialized = True def get_all_items(self): @@ -307,42 +329,43 @@ def retrieve_from_cache(expression): stats['misses'] += 1 return None - def handle_owl_some_values_from(): + def handle_owl_some_values_from(owl_expression): """ Process the OWLObjectSomeValuesFrom expression locally. When called, return the retrieval of OWLObjectSomeValuesFrom based on the Algorithm described in the paper """ - object_property = owl_expression.get_property() - filler_expression = owl_expression.get_filler() - instances = retrieve_from_cache(owl_expression_to_dl(filler_expression)) - if instances: - result = set() - if isinstance(object_property, OWLObjectInverseOf): - r = onto.search_one(iri=object_property.get_inverse_property().str) + if isinstance(owl_expression, OWLObjectSomeValuesFrom): + object_property = owl_expression.get_property() + filler_expression = owl_expression.get_filler() + instances = retrieve_from_cache(owl_expression_to_dl(filler_expression)) + if instances is not None: + result = set() + if isinstance(object_property, OWLObjectInverseOf): + r = onto.search_one(iri=object_property.get_inverse_property().str) + else: + r = onto.search_one(iri=object_property.str) + individual_map = {ind: onto.search_one(iri=ind) for ind in All_individuals | instances} + for ind_a in All_individuals: + a = individual_map[ind_a] + for ind_b in instances: + b = individual_map[ind_b] + if isinstance(object_property, OWLObjectInverseOf): + if a in getattr(b, r.name): + result.add(a) + else: + if b in getattr(a, r.name): + result.add(ind_a) else: - r = onto.search_one(iri=object_property.str) - individual_map = {ind: onto.search_one(iri=ind) for ind in All_individuals | instances} - for ind_a in All_individuals: - a = individual_map[ind_a] - for ind_b in instances: - b = individual_map[ind_b] - if isinstance(object_property, OWLObjectInverseOf): - if a in getattr(b, r.name): - result.add(a) - else: - if b in getattr(a, r.name): - result.add(ind_a) - else: - result = func(*args) - return result + result = func(*args) + return result start_time = time.time() #state the timing before the cache initialization # Cold cache initialization start_time_initialization = time.time() if cache_type == 'cold' and not cache.initialized: - cache.initialize_cache(onto, func, path_onto, args[-1], All_individuals) + cache.initialize_cache(onto, func, path_onto, args[-1], All_individuals, handle_restriction_func=handle_owl_some_values_from) time_initialization = time.time()- start_time_initialization # start_time = time.time() #state the timing after the cache initialization @@ -385,9 +408,9 @@ def handle_owl_some_values_from(): if cached_result_cold is not None: result = cached_result_cold else: - result = handle_owl_some_values_from() + result = handle_owl_some_values_from(owl_expression) else: - result = handle_owl_some_values_from() + result = handle_owl_some_values_from(owl_expression) elif isinstance(owl_expression, OWLObjectAllValuesFrom): all_values_expr = owl_expression_to_dl(owl_expression) @@ -546,3 +569,79 @@ def run_cache(path_kg:str, path_kge:str, cache_size:int, name_reasoner:str, evic 'strategy': eviction }, D + + + +# def subsumption_based_caching(func, cache_size): +# cache = {} # Dictionary to store cached results + +# def store(concept, instances): +# # Check if cache limit will be exceeded +# if len(instances) + len(cache) > cache_size: +# purge(len(instances)) # Adjusted to ensure cache size limit +# # Add concept and instances to cache +# cache[concept] = instances + +# def purge(needed_space): +# # Remove oldest items until there's enough space +# while len(cache) > needed_space: +# cache.pop(next(iter(cache))) + +# def wrapper(*args): +# path_onto = args[1] +# onto = get_ontology(path_onto).load() + +# # Synchronize the reasoner (e.g., using Pellet) +# # with onto: +# # sync_reasoner(infer_property_values=True) + +# all_individuals = {a for a in onto.individuals()} +# str_expression = owl_expression_to_dl(args[0]) +# owl_expression = args[0] + +# # Check cache for existing results +# if str_expression in cache: +# return cache[str_expression] + +# super_concepts = set() +# namespace, class_name = owl_expression.str.split('#') +# class_expression = f"{namespace.split('/')[-1]}.{class_name}" + +# all_classes = [i for i in list(onto.classes())] + +# for j in all_classes: +# if str(j) == class_expression: +# class_expression = j + +# for D in list(cache.keys()): +# # print(owl_expression) +# # exit(0) +# if D in class_expression.ancestors(): # Check if C ⊑ D +# super_concepts.add(D) + +# print(super_concepts) +# exit(0) +# # Compute instances based on subsumption +# if len(super_concepts) == 0: +# instances = all_individuals +# else: +# instances = set.intersection( +# *[wrapper(D, path_onto) for D in super_concepts] +# ) + +# # Filter instances by checking if each is an instance of the concept +# instance_set = set() + +# for individual in instances: +# for type_entry in individual.is_a: +# type_iri = str(type_entry.iri) +# if owl_expression.str == type_iri: +# instance_set.add(individual) +# break + +# # Store in cache +# store(str_expression, instance_set) +# return instance_set + +# return wrapper + From 01c9d81b8ebd479bc8980b0c102a7661497fd679 Mon Sep 17 00:00:00 2001 From: Louis-Mozart Date: Thu, 5 Dec 2024 16:04:03 +0100 Subject: [PATCH 3/3] moving the semantic_caching.py file to Ontolearn --- examples/retrieval_with_cache.py | 154 +++++------------- .../semantic_caching.py | 19 +-- 2 files changed, 43 insertions(+), 130 deletions(-) rename semantic_caching.py => ontolearn/semantic_caching.py (98%) diff --git a/examples/retrieval_with_cache.py b/examples/retrieval_with_cache.py index 9123156..4fbe1db 100644 --- a/examples/retrieval_with_cache.py +++ b/examples/retrieval_with_cache.py @@ -1,19 +1,18 @@ import argparse import pandas as pd -from semantic_caching import run_cache, concept_generator +from ontolearn.semantic_caching import run_cache, concept_generator from plot_metrics import * import seaborn as sns -#5, 16, 32, 128, 256, 512, 700, 800, 1024, , "KGs/Family/family.owl" .2, .4, .6, .8 parser = argparse.ArgumentParser() -parser.add_argument('--cache_size_ratios', type=list, default=[1.], help="cache size is proportional to num_concepts, cache size = k * num_concepts") +parser.add_argument('--cache_size_ratios', type=list, default=[.1, .2, .4, .8, 1.], help="cache size is proportional to num_concepts, cache size = k * num_concepts") parser.add_argument('--path_kg', type=list, default=["KGs/Family/family.owl"]) parser.add_argument('--path_kge', type=list, default=None) parser.add_argument('--name_reasoner', type=str, default='EBR', choices=["EBR",'HermiT', 'Pellet', 'JFact', 'Openllet']) parser.add_argument('--eviction_strategy', type=str, default='LRU', choices=['LIFO', 'FIFO', 'LRU', 'MRU', 'RP']) parser.add_argument('--random_seed_for_RP', type=int, default=10, help="Random seed if the eviction startegy is RP") -parser.add_argument('--cache_type', type=str, default='cold', choices=['hot', 'cold'], help="Type of cache to be used. With cold cache we initialize the cache with NC, NNC") +parser.add_argument('--cache_type', type=str, default='hot', choices=['hot', 'cold'], help="Type of cache to be used. With cold cache we initialize the cache with NC, NNC") parser.add_argument('--shuffle_concepts', action='store_true',help="If set, we shuffle the concepts for randomness") args = parser.parse_args() @@ -24,136 +23,59 @@ def get_cache_size(list_k, path_kg): return [max(1, int(k * data_size)) for k in list_k] +# results = [] +# for path_kg in args.path_kg: +# for cache_size in get_cache_size(args.cache_size_ratios, path_kg): +# for strategy in ['LIFO', 'FIFO', 'LRU', 'MRU', 'RP']: +# result, detailed = run_cache( +# path_kg=path_kg, +# path_kge=args.path_kge, +# cache_size=cache_size, +# name_reasoner=args.name_reasoner, +# eviction=strategy, +# random_seed=args.random_seed_for_RP, +# cache_type=args.cache_type, +# shuffle_concepts=args.shuffle_concepts +# ) +# results.append(result) + +# data_kg = result['dataset'] +# df = pd.DataFrame(results) +# print(df) + +# # Save to CSV +# df.to_csv(f'caching_results_{data_kg}/cache_experiments_{args.name_reasoner}_{data_kg}.csv', index=False) + + results = [] +detailed_results = [] for path_kg in args.path_kg: for cache_size in get_cache_size(args.cache_size_ratios, path_kg): - for strategy in ['LIFO', 'FIFO', 'LRU', 'MRU', 'RP']: - result, detailed = run_cache( + result, D = run_cache( path_kg=path_kg, path_kge=args.path_kge, cache_size=cache_size, name_reasoner=args.name_reasoner, - eviction=strategy, + eviction=args.eviction_strategy, random_seed=args.random_seed_for_RP, cache_type=args.cache_type, shuffle_concepts=args.shuffle_concepts ) - results.append(result) - - data_kg = result['dataset'] - df = pd.DataFrame(results) - print(df) - - # Save to CSV - # df.to_csv(f'caching_results_{data_kg}/cache_experiments_{args.name_reasoner}_{data_kg}.csv', index=False) - - -# name_reasoners = ["EBR",'HermiT','Pellet','JFact','Openllet'] -# data_kgs = ["family"] - -# for data_kg in data_kgs: - -# for name_reasoner in name_reasoners: - -# df = pd.read_csv(f'caching_results_{data_kg}/cache_experiments_{name_reasoner}_{data_kg}.csv') -# print(df) - - - # sns.set_context("talk", font_scale=3.6) - - # plot1 = sns.catplot( - # data=df, - # kind="bar", - # x="cache_size", - # y="hit_ratio", - # hue="strategy", - # col="dataset", - # height=10, - # aspect=2 - # ) - # plt.show() - # plot1.savefig(f'caching_results_{data_kg}/cache_vs_hit_sns_{name_reasoner}_{data_kg}.pdf') - + results.append(result) + detailed_results.append(D) - # plot2 = sns.catplot( - # data=df, - # kind="bar", - # x="cache_size", - # y="avg_jaccard", - # hue="strategy", - # col="dataset", - # height=10, - # aspect=2 - # ) - # plt.show() - # plot2.savefig(f'caching_results_{data_kg}/cache_vs_jaccard_sns_{name_reasoner}_{data_kg}.pdf') +all_detailed_results = [item for sublist in detailed_results for item in sublist] - - # plot3 = sns.catplot(esults = [] -# detailed_results = [] -# for path_kg in args.path_kg: -# for cache_size in get_cache_size(args.cache_size_ratios, path_kg): -# result, D = run_cache(path_kg=path_kg, path_kge=args.path_kge, cache_size=cache_size, name_reasoner=args.name_reasoner,\ -# eviction=args.eviction_strategy, random_seed=args.random_seed_for_RP) -# results.append(result) -# detailed_results.append(D) - -# all_detailed_results = [item for sublist in detailed_results for item in sublist] - -# results = pd.DataFrame(results) +results = pd.DataFrame(results) # results.to_csv(f'caching_results/cache_experiments_{args.name_reasoner}.csv') -# plot_scale_factor(results, args.name_reasoner) -# plot_jaccard_vs_cache_size(results, args.name_reasoner) - +plot_scale_factor(results, args.name_reasoner) +plot_jaccard_vs_cache_size(results, args.name_reasoner) # # print(results.to_latex(index=False)) -# all_detailed_results = pd.DataFrame(all_detailed_results) -# bar_plot_separate_data(all_detailed_results, cache_size=90, name_reasoner=args.name_reasoner) - # data=df, - # kind="bar", - # x="cache_size", - # y="RT_cache", - # hue="strategy", - # col="dataset", - # height=10, - # aspect=2 - # ) - # plt.show() - # plot3.savefig(f'caching_results_{data_kg}/cache_vs_RT_sns_{name_reasoner}_{data_kg}.pdf') - - - -# results = [] -# detailed_results = [] -# for path_kg in args.path_kg: -# for cache_size in get_cache_size(args.cache_size_ratios, path_kg): -# result, D = run_cache( -# path_kg=path_kg, -# path_kge=args.path_kge, -# cache_size=cache_size, -# name_reasoner=args.name_reasoner, -# eviction=args.eviction_strategy, -# random_seed=args.random_seed_for_RP, -# cache_type=args.cache_type, -# shuffle_concepts=args.shuffle_concepts -# ) -# results.append(result) -# detailed_results.append(D) - -# all_detailed_results = [item for sublist in detailed_results for item in sublist] - -# results = pd.DataFrame(results) -# # results.to_csv(f'caching_results/cache_experiments_{args.name_reasoner}.csv') - -# plot_scale_factor(results, args.name_reasoner) -# plot_jaccard_vs_cache_size(results, args.name_reasoner) - -# # # print(results.to_latex(index=False)) - -# all_detailed_results = pd.DataFrame(all_detailed_results) -# bar_plot_separate_data(all_detailed_results, cache_size=90, name_reasoner=args.name_reasoner) +all_detailed_results = pd.DataFrame(all_detailed_results) +bar_plot_separate_data(all_detailed_results, cache_size=90, name_reasoner=args.name_reasoner) # bar_plot_all_data(all_detailed_results, cache_size=90, name_reasoner=args.name_reasoner) # all_detailed_results.to_csv(f'caching_results/detailed_cache_experiments_{args.name_reasoner}.csv') diff --git a/semantic_caching.py b/ontolearn/semantic_caching.py similarity index 98% rename from semantic_caching.py rename to ontolearn/semantic_caching.py index 2f90761..892069c 100644 --- a/semantic_caching.py +++ b/ontolearn/semantic_caching.py @@ -165,13 +165,11 @@ def get_shuffled_concepts(path_kg, data_name): random.shuffle(alc_concepts) with open(save_file, "wb") as f: pickle.dump(alc_concepts, f) - print("Generated, shuffled, and saved concepts.") - + print("Generated, shuffled, and saved concepts.") return alc_concepts def concept_retrieval(retriever_func, c) -> Set[str]: - return {i.str for i in retriever_func.individuals(c)} @@ -256,11 +254,8 @@ def initialize_cache(self, ontology, func, path_onto, third, All_individuals, ha # Add negated named concept self.put(negated_named_class_str, All_individuals-self.cache[named_class_str]) - negated_class = OWLObjectComplementOf(named_class) - existential_negated = OWLObjectSomeValuesFrom(property=role_property, filler=negated_class) - existential_negated_str = owl_expression_to_dl(existential_negated) - + for role in roles: role_property = OWLObjectProperty(role.iri) existential_a = OWLObjectSomeValuesFrom(property=role_property, filler=named_class) @@ -272,6 +267,8 @@ def initialize_cache(self, ontology, func, path_onto, third, All_individuals, ha self.put(owl_expression_to_dl(existential_a), func(existential_a, path_onto, third)) # Add ∃ r.(¬C) + existential_negated = OWLObjectSomeValuesFrom(property=role_property, filler=negated_class) + existential_negated_str = owl_expression_to_dl(existential_negated) if handle_restriction_func is not None: self.put(existential_negated_str, handle_restriction_func(existential_negated)) else: @@ -287,8 +284,6 @@ def is_full(self): return len(self.cache) >= self.max_size - - def semantic_caching_size(func, cache_size, eviction_strategy, random_seed, cache_type): '''This function implements the semantic caching algorithm for ALC concepts as presented in the paper''' @@ -335,7 +330,7 @@ def handle_owl_some_values_from(owl_expression): When called, return the retrieval of OWLObjectSomeValuesFrom based on the Algorithm described in the paper """ - if isinstance(owl_expression, OWLObjectSomeValuesFrom): + if isinstance(owl_expression, OWLObjectSomeValuesFrom): object_property = owl_expression.get_property() filler_expression = owl_expression.get_filler() instances = retrieve_from_cache(owl_expression_to_dl(filler_expression)) @@ -531,8 +526,6 @@ def run_cache(path_kg:str, path_kge:str, cache_size:int, name_reasoner:str, evic ground_truth = concept_retrieval(symbolic_kb, expr) - - jacc = jaccard_similarity(A, ground_truth) jacc_reas = jaccard_similarity(retrieve_ebr, ground_truth) Avg_jaccard.append(jacc) @@ -542,8 +535,6 @@ def run_cache(path_kg:str, path_kge:str, cache_size:int, name_reasoner:str, evic print(f'Jaccard similarity: {jacc}') # assert jacc == 1.0 - - stats = cached_retriever.get_stats() print('-'*50)