From 31b7828129d5656dde591d805a951356d91b4594 Mon Sep 17 00:00:00 2001
From: Louis-Mozart <louismozart.teyou@aims.ac.rw>
Date: Wed, 4 Dec 2024 19:04:19 +0100
Subject: [PATCH 1/3] Removing unnecessary comments

---
 semantic_caching.py | 78 ---------------------------------------------
 1 file changed, 78 deletions(-)

diff --git a/semantic_caching.py b/semantic_caching.py
index 664d3ba..db6757a 100644
--- a/semantic_caching.py
+++ b/semantic_caching.py
@@ -546,81 +546,3 @@ def run_cache(path_kg:str, path_kge:str, cache_size:int, name_reasoner:str, evic
         'strategy': eviction
     }, D
 
-
-
-
-
-
-# def subsumption_based_caching(func, cache_size):
-#     cache = {}  # Dictionary to store cached results
-    
-#     def store(concept, instances):
-#         # Check if cache limit will be exceeded
-#         if len(instances) + len(cache) > cache_size:
-#             purge(len(instances))  # Adjusted to ensure cache size limit
-#         # Add concept and instances to cache
-#         cache[concept] = instances
-
-#     def purge(needed_space):
-#         # Remove oldest items until there's enough space
-#         while len(cache) > needed_space:
-#             cache.pop(next(iter(cache)))
-
-#     def wrapper(*args):
-#         path_onto = args[1]
-#         onto = get_ontology(path_onto).load()
-        
-#         # Synchronize the reasoner (e.g., using Pellet)
-#         # with onto:
-#         #     sync_reasoner(infer_property_values=True)
-
-#         all_individuals = {a for a in onto.individuals()}       
-#         str_expression = owl_expression_to_dl(args[0])
-#         owl_expression = args[0]
-
-#         # Check cache for existing results
-#         if str_expression in cache:
-#             return cache[str_expression]
-
-#         super_concepts = set()
-#         namespace, class_name = owl_expression.str.split('#') 
-#         class_expression = f"{namespace.split('/')[-1]}.{class_name}"
-
-#         all_classes = [i for i in list(onto.classes())]
-
-#         for j in all_classes:
-#             if str(j) == class_expression:
-#                 class_expression = j
-    
-#         for D in list(cache.keys()):
-#             # print(owl_expression)
-#             # exit(0)
-#             if D in class_expression.ancestors():  # Check if C ⊑ D
-#                 super_concepts.add(D)
-
-#         print(super_concepts)
-#         exit(0)
-#         # Compute instances based on subsumption
-#         if len(super_concepts) == 0:
-#             instances = all_individuals
-#         else:
-#             instances = set.intersection(
-#                 *[wrapper(D, path_onto) for D in super_concepts]
-#             )
-
-#         # Filter instances by checking if each is an instance of the concept
-#         instance_set = set()
-    
-#         for individual in instances:
-#              for type_entry in individual.is_a:
-#                 type_iri = str(type_entry.iri)
-#                 if owl_expression.str == type_iri:
-#                     instance_set.add(individual)
-#                     break
-
-#         # Store in cache
-#         store(str_expression, instance_set)
-#         return instance_set
-
-#     return wrapper
-

From db2ab9b12c90dd509d90e225db60c8478f6ba109 Mon Sep 17 00:00:00 2001
From: Louis-Mozart <louismozart.teyou@aims.ac.rw>
Date: Thu, 5 Dec 2024 15:04:58 +0100
Subject: [PATCH 2/3] Fixing minor issues at initialisation time

---
 semantic_caching.py | 157 ++++++++++++++++++++++++++++++++++++--------
 1 file changed, 128 insertions(+), 29 deletions(-)

diff --git a/semantic_caching.py b/semantic_caching.py
index db6757a..2f90761 100644
--- a/semantic_caching.py
+++ b/semantic_caching.py
@@ -232,7 +232,7 @@ def put(self, key, value):
         if self.strategy in ['LRU', 'MRU']:
             self.access_times[key] = time.time()  # Record access timestamp
 
-    def initialize_cache(self, ontology, func, path_onto, third, All_individuals):
+    def initialize_cache(self, ontology, func, path_onto, third, All_individuals, handle_restriction_func=None):
         """
         Initialize the cache with precomputed results.
         :param ontology: The loaded ontology.
@@ -245,16 +245,38 @@ def initialize_cache(self, ontology, func, path_onto, third, All_individuals):
         # Fetch object properties and classes from ontology
         roles = list(ontology.object_properties())
         classes = list(ontology.classes())
+
         for cls in classes:
             named_class = OWLClass(cls.iri)
             named_class_str = str(cls).split(".")[-1]
+
+            # Add named concept
             self.put(named_class_str, func(named_class, path_onto, third))
             negated_named_class_str = f"¬{named_class_str}"
+
+            # Add negated named concept
             self.put(negated_named_class_str, All_individuals-self.cache[named_class_str])
+
+            negated_class = OWLObjectComplementOf(named_class)
+            existential_negated = OWLObjectSomeValuesFrom(property=role_property, filler=negated_class)
+            existential_negated_str = owl_expression_to_dl(existential_negated)
+            
             for role in roles:
                 role_property = OWLObjectProperty(role.iri)
-                existential_a = OWLObjectSomeValuesFrom(property=role_property, filler=named_class)         
-                self.put(owl_expression_to_dl(existential_a), func(existential_a, path_onto, third))
+                existential_a = OWLObjectSomeValuesFrom(property=role_property, filler=named_class)   
+
+                # Add ∃ r.C
+                if handle_restriction_func is not None:     
+                    self.put(owl_expression_to_dl(existential_a), handle_restriction_func(existential_a))
+                else:
+                    self.put(owl_expression_to_dl(existential_a), func(existential_a, path_onto, third))
+
+                # Add ∃ r.(¬C)
+                if handle_restriction_func is not None:
+                    self.put(existential_negated_str, handle_restriction_func(existential_negated))
+                else:
+                    self.put(existential_negated_str, func(existential_negated, path_onto, third))
+        
         self.initialized = True 
 
     def get_all_items(self):
@@ -307,42 +329,43 @@ def retrieve_from_cache(expression):
                 stats['misses'] += 1
                 return None
             
-        def handle_owl_some_values_from():
+        def handle_owl_some_values_from(owl_expression):
             """
             Process the OWLObjectSomeValuesFrom expression locally.
             When called, return the retrieval of OWLObjectSomeValuesFrom
             based on the Algorithm described in the paper
             """
-            object_property = owl_expression.get_property()
-            filler_expression = owl_expression.get_filler()
-            instances = retrieve_from_cache(owl_expression_to_dl(filler_expression))
-            if instances:
-                result = set()
-                if isinstance(object_property, OWLObjectInverseOf):
-                    r = onto.search_one(iri=object_property.get_inverse_property().str)
+            if isinstance(owl_expression, OWLObjectSomeValuesFrom):
+                object_property = owl_expression.get_property()
+                filler_expression = owl_expression.get_filler()
+                instances = retrieve_from_cache(owl_expression_to_dl(filler_expression))
+                if instances is not None:
+                    result = set()
+                    if isinstance(object_property, OWLObjectInverseOf):
+                        r = onto.search_one(iri=object_property.get_inverse_property().str)
+                    else:
+                        r = onto.search_one(iri=object_property.str)
+                    individual_map = {ind: onto.search_one(iri=ind) for ind in All_individuals | instances}
+                    for ind_a in All_individuals:
+                        a = individual_map[ind_a]
+                        for ind_b in instances:
+                            b = individual_map[ind_b]
+                            if isinstance(object_property, OWLObjectInverseOf):
+                                if a in getattr(b, r.name):
+                                    result.add(a)
+                            else:
+                                if b in getattr(a, r.name):
+                                    result.add(ind_a) 
                 else:
-                    r = onto.search_one(iri=object_property.str)
-                individual_map = {ind: onto.search_one(iri=ind) for ind in All_individuals | instances}
-                for ind_a in All_individuals:
-                    a = individual_map[ind_a]
-                    for ind_b in instances:
-                        b = individual_map[ind_b]
-                        if isinstance(object_property, OWLObjectInverseOf):
-                            if a in getattr(b, r.name):
-                                result.add(a)
-                        else:
-                            if b in getattr(a, r.name):
-                                result.add(ind_a) 
-            else:
-                result = func(*args)
-            return result
+                    result = func(*args)
+                return result
 
         start_time = time.time() #state the timing before the cache initialization 
 
         # Cold cache initialization
         start_time_initialization = time.time()
         if cache_type == 'cold' and not cache.initialized:
-            cache.initialize_cache(onto, func, path_onto, args[-1], All_individuals)
+            cache.initialize_cache(onto, func, path_onto, args[-1], All_individuals, handle_restriction_func=handle_owl_some_values_from)
         time_initialization = time.time()- start_time_initialization
 
         # start_time = time.time() #state the timing after the cache initialization 
@@ -385,9 +408,9 @@ def handle_owl_some_values_from():
                 if cached_result_cold is not None:
                     result = cached_result_cold
                 else:
-                    result = handle_owl_some_values_from()   
+                    result = handle_owl_some_values_from(owl_expression)   
             else:
-               result = handle_owl_some_values_from()
+               result = handle_owl_some_values_from(owl_expression)
 
         elif isinstance(owl_expression, OWLObjectAllValuesFrom):
             all_values_expr = owl_expression_to_dl(owl_expression)
@@ -546,3 +569,79 @@ def run_cache(path_kg:str, path_kge:str, cache_size:int, name_reasoner:str, evic
         'strategy': eviction
     }, D
 
+
+
+
+# def subsumption_based_caching(func, cache_size):
+#     cache = {}  # Dictionary to store cached results
+    
+#     def store(concept, instances):
+#         # Check if cache limit will be exceeded
+#         if len(instances) + len(cache) > cache_size:
+#             purge(len(instances))  # Adjusted to ensure cache size limit
+#         # Add concept and instances to cache
+#         cache[concept] = instances
+
+#     def purge(needed_space):
+#         # Remove oldest items until there's enough space
+#         while len(cache) > needed_space:
+#             cache.pop(next(iter(cache)))
+
+#     def wrapper(*args):
+#         path_onto = args[1]
+#         onto = get_ontology(path_onto).load()
+        
+#         # Synchronize the reasoner (e.g., using Pellet)
+#         # with onto:
+#         #     sync_reasoner(infer_property_values=True)
+
+#         all_individuals = {a for a in onto.individuals()}       
+#         str_expression = owl_expression_to_dl(args[0])
+#         owl_expression = args[0]
+
+#         # Check cache for existing results
+#         if str_expression in cache:
+#             return cache[str_expression]
+
+#         super_concepts = set()
+#         namespace, class_name = owl_expression.str.split('#') 
+#         class_expression = f"{namespace.split('/')[-1]}.{class_name}"
+
+#         all_classes = [i for i in list(onto.classes())]
+
+#         for j in all_classes:
+#             if str(j) == class_expression:
+#                 class_expression = j
+    
+#         for D in list(cache.keys()):
+#             # print(owl_expression)
+#             # exit(0)
+#             if D in class_expression.ancestors():  # Check if C ⊑ D
+#                 super_concepts.add(D)
+
+#         print(super_concepts)
+#         exit(0)
+#         # Compute instances based on subsumption
+#         if len(super_concepts) == 0:
+#             instances = all_individuals
+#         else:
+#             instances = set.intersection(
+#                 *[wrapper(D, path_onto) for D in super_concepts]
+#             )
+
+#         # Filter instances by checking if each is an instance of the concept
+#         instance_set = set()
+    
+#         for individual in instances:
+#              for type_entry in individual.is_a:
+#                 type_iri = str(type_entry.iri)
+#                 if owl_expression.str == type_iri:
+#                     instance_set.add(individual)
+#                     break
+
+#         # Store in cache
+#         store(str_expression, instance_set)
+#         return instance_set
+
+#     return wrapper
+

From 01c9d81b8ebd479bc8980b0c102a7661497fd679 Mon Sep 17 00:00:00 2001
From: Louis-Mozart <louismozart.teyou@aims.ac.rw>
Date: Thu, 5 Dec 2024 16:04:03 +0100
Subject: [PATCH 3/3] moving the semantic_caching.py file to Ontolearn

---
 examples/retrieval_with_cache.py              | 154 +++++-------------
 .../semantic_caching.py                       |  19 +--
 2 files changed, 43 insertions(+), 130 deletions(-)
 rename semantic_caching.py => ontolearn/semantic_caching.py (98%)

diff --git a/examples/retrieval_with_cache.py b/examples/retrieval_with_cache.py
index 9123156..4fbe1db 100644
--- a/examples/retrieval_with_cache.py
+++ b/examples/retrieval_with_cache.py
@@ -1,19 +1,18 @@
 
 import argparse 
 import pandas as pd
-from semantic_caching import run_cache, concept_generator
+from ontolearn.semantic_caching import run_cache, concept_generator
 from plot_metrics import *
 import seaborn as sns
 
-#5, 16, 32, 128, 256, 512, 700, 800, 1024, , "KGs/Family/family.owl"  .2, .4, .6, .8
 parser = argparse.ArgumentParser()
-parser.add_argument('--cache_size_ratios', type=list, default=[1.], help="cache size is proportional to num_concepts, cache size = k * num_concepts")
+parser.add_argument('--cache_size_ratios', type=list, default=[.1, .2, .4, .8, 1.], help="cache size is proportional to num_concepts, cache size = k * num_concepts")
 parser.add_argument('--path_kg', type=list, default=["KGs/Family/family.owl"])
 parser.add_argument('--path_kge', type=list, default=None)
 parser.add_argument('--name_reasoner', type=str, default='EBR', choices=["EBR",'HermiT', 'Pellet', 'JFact', 'Openllet'])
 parser.add_argument('--eviction_strategy', type=str, default='LRU', choices=['LIFO', 'FIFO', 'LRU', 'MRU', 'RP'])
 parser.add_argument('--random_seed_for_RP', type=int, default=10, help="Random seed if the eviction startegy is RP")
-parser.add_argument('--cache_type', type=str, default='cold', choices=['hot', 'cold'], help="Type of cache to be used. With cold cache we initialize the cache with NC, NNC")
+parser.add_argument('--cache_type', type=str, default='hot', choices=['hot', 'cold'], help="Type of cache to be used. With cold cache we initialize the cache with NC, NNC")
 parser.add_argument('--shuffle_concepts', action='store_true',help="If set, we shuffle the concepts for randomness")
 args = parser.parse_args()
 
@@ -24,136 +23,59 @@ def get_cache_size(list_k, path_kg):
     return [max(1, int(k * data_size)) for k in list_k]
 
 
+# results = []
+# for path_kg in args.path_kg:
+#     for cache_size in get_cache_size(args.cache_size_ratios, path_kg):
+#         for strategy in ['LIFO', 'FIFO', 'LRU', 'MRU', 'RP']:
+#             result, detailed = run_cache(
+#                 path_kg=path_kg,
+#                 path_kge=args.path_kge,
+#                 cache_size=cache_size,
+#                 name_reasoner=args.name_reasoner,
+#                 eviction=strategy,
+#                 random_seed=args.random_seed_for_RP,
+#                 cache_type=args.cache_type,
+#                 shuffle_concepts=args.shuffle_concepts
+#             )
+#             results.append(result)
+
+#     data_kg = result['dataset']
+#     df = pd.DataFrame(results)
+#     print(df)
+
+#     # Save to CSV
+#     df.to_csv(f'caching_results_{data_kg}/cache_experiments_{args.name_reasoner}_{data_kg}.csv', index=False)
+
+
 results = []
+detailed_results = []
 for path_kg in args.path_kg:
     for cache_size in get_cache_size(args.cache_size_ratios, path_kg):
-        for strategy in ['LIFO', 'FIFO', 'LRU', 'MRU', 'RP']:
-            result, detailed = run_cache(
+        result, D = run_cache(
                 path_kg=path_kg,
                 path_kge=args.path_kge,
                 cache_size=cache_size,
                 name_reasoner=args.name_reasoner,
-                eviction=strategy,
+                eviction=args.eviction_strategy,
                 random_seed=args.random_seed_for_RP,
                 cache_type=args.cache_type,
                 shuffle_concepts=args.shuffle_concepts
             )
-            results.append(result)
-
-    data_kg = result['dataset']
-    df = pd.DataFrame(results)
-    print(df)
-
-    # Save to CSV
-    # df.to_csv(f'caching_results_{data_kg}/cache_experiments_{args.name_reasoner}_{data_kg}.csv', index=False)
-
-
-# name_reasoners = ["EBR",'HermiT','Pellet','JFact','Openllet']
-# data_kgs = ["family"]
-
-# for data_kg in data_kgs:
-
-#     for name_reasoner in name_reasoners:
-
-#         df = pd.read_csv(f'caching_results_{data_kg}/cache_experiments_{name_reasoner}_{data_kg}.csv')
-#         print(df)
-
-
-        # sns.set_context("talk", font_scale=3.6)
-
-        # plot1 = sns.catplot(
-        # data=df,
-        # kind="bar",
-        # x="cache_size",
-        # y="hit_ratio",
-        # hue="strategy",
-        # col="dataset",
-        # height=10,
-        # aspect=2
-        # )
-        # plt.show()
-        # plot1.savefig(f'caching_results_{data_kg}/cache_vs_hit_sns_{name_reasoner}_{data_kg}.pdf')
-
+        results.append(result)
+        detailed_results.append(D)
 
-        # plot2 = sns.catplot(
-        # data=df,
-        # kind="bar",
-        # x="cache_size",
-        # y="avg_jaccard",
-        # hue="strategy",
-        # col="dataset",
-        # height=10,
-        # aspect=2
-        # )
-        # plt.show()
-        # plot2.savefig(f'caching_results_{data_kg}/cache_vs_jaccard_sns_{name_reasoner}_{data_kg}.pdf')
+all_detailed_results = [item for sublist in detailed_results for item in sublist]
 
-
-        # plot3 = sns.catplot(esults = []
-# detailed_results = []
-# for path_kg in args.path_kg:
-#     for cache_size in get_cache_size(args.cache_size_ratios, path_kg):
-#         result, D = run_cache(path_kg=path_kg, path_kge=args.path_kge, cache_size=cache_size, name_reasoner=args.name_reasoner,\
-#                               eviction=args.eviction_strategy, random_seed=args.random_seed_for_RP) 
-#         results.append(result)
-#         detailed_results.append(D)
-
-# all_detailed_results = [item for sublist in detailed_results for item in sublist]
-
-# results = pd.DataFrame(results)
+results = pd.DataFrame(results)
 # results.to_csv(f'caching_results/cache_experiments_{args.name_reasoner}.csv')  
 
-# plot_scale_factor(results, args.name_reasoner)    
-# plot_jaccard_vs_cache_size(results, args.name_reasoner) 
-
+plot_scale_factor(results, args.name_reasoner)    
+plot_jaccard_vs_cache_size(results, args.name_reasoner) 
 
 # # print(results.to_latex(index=False))
 
-# all_detailed_results = pd.DataFrame(all_detailed_results)
-# bar_plot_separate_data(all_detailed_results, cache_size=90, name_reasoner=args.name_reasoner)
-        # data=df,
-        # kind="bar",
-        # x="cache_size",
-        # y="RT_cache",
-        # hue="strategy",
-        # col="dataset",
-        # height=10,
-        # aspect=2
-        # )
-        # plt.show()
-        # plot3.savefig(f'caching_results_{data_kg}/cache_vs_RT_sns_{name_reasoner}_{data_kg}.pdf')
-
-
-
-# results = []
-# detailed_results = []
-# for path_kg in args.path_kg:
-#     for cache_size in get_cache_size(args.cache_size_ratios, path_kg):
-#         result, D = run_cache(
-#                 path_kg=path_kg,
-#                 path_kge=args.path_kge,
-#                 cache_size=cache_size,
-#                 name_reasoner=args.name_reasoner,
-#                 eviction=args.eviction_strategy,
-#                 random_seed=args.random_seed_for_RP,
-#                 cache_type=args.cache_type,
-#                 shuffle_concepts=args.shuffle_concepts
-#             )
-#         results.append(result)
-#         detailed_results.append(D)
-
-# all_detailed_results = [item for sublist in detailed_results for item in sublist]
-
-# results = pd.DataFrame(results)
-# # results.to_csv(f'caching_results/cache_experiments_{args.name_reasoner}.csv')  
-
-# plot_scale_factor(results, args.name_reasoner)    
-# plot_jaccard_vs_cache_size(results, args.name_reasoner) 
-
-# # # print(results.to_latex(index=False))
-
-# all_detailed_results = pd.DataFrame(all_detailed_results)
-# bar_plot_separate_data(all_detailed_results, cache_size=90, name_reasoner=args.name_reasoner)
+all_detailed_results = pd.DataFrame(all_detailed_results)
+bar_plot_separate_data(all_detailed_results, cache_size=90, name_reasoner=args.name_reasoner)
 # bar_plot_all_data(all_detailed_results, cache_size=90, name_reasoner=args.name_reasoner)
 # all_detailed_results.to_csv(f'caching_results/detailed_cache_experiments_{args.name_reasoner}.csv')
 
diff --git a/semantic_caching.py b/ontolearn/semantic_caching.py
similarity index 98%
rename from semantic_caching.py
rename to ontolearn/semantic_caching.py
index 2f90761..892069c 100644
--- a/semantic_caching.py
+++ b/ontolearn/semantic_caching.py
@@ -165,13 +165,11 @@ def get_shuffled_concepts(path_kg, data_name):
         random.shuffle(alc_concepts)
         with open(save_file, "wb") as f:
             pickle.dump(alc_concepts, f)
-        print("Generated, shuffled, and saved concepts.")
-    
+        print("Generated, shuffled, and saved concepts.")   
     return alc_concepts
 
 
 def concept_retrieval(retriever_func, c) -> Set[str]:
-
     return {i.str for i in retriever_func.individuals(c)}
 
 
@@ -256,11 +254,8 @@ def initialize_cache(self, ontology, func, path_onto, third, All_individuals, ha
 
             # Add negated named concept
             self.put(negated_named_class_str, All_individuals-self.cache[named_class_str])
-
             negated_class = OWLObjectComplementOf(named_class)
-            existential_negated = OWLObjectSomeValuesFrom(property=role_property, filler=negated_class)
-            existential_negated_str = owl_expression_to_dl(existential_negated)
-            
+    
             for role in roles:
                 role_property = OWLObjectProperty(role.iri)
                 existential_a = OWLObjectSomeValuesFrom(property=role_property, filler=named_class)   
@@ -272,6 +267,8 @@ def initialize_cache(self, ontology, func, path_onto, third, All_individuals, ha
                     self.put(owl_expression_to_dl(existential_a), func(existential_a, path_onto, third))
 
                 # Add ∃ r.(¬C)
+                existential_negated = OWLObjectSomeValuesFrom(property=role_property, filler=negated_class)
+                existential_negated_str = owl_expression_to_dl(existential_negated)
                 if handle_restriction_func is not None:
                     self.put(existential_negated_str, handle_restriction_func(existential_negated))
                 else:
@@ -287,8 +284,6 @@ def is_full(self):
         return len(self.cache) >= self.max_size
     
 
-
-
 def semantic_caching_size(func, cache_size, eviction_strategy, random_seed, cache_type):
 
     '''This function implements the semantic caching algorithm for ALC concepts as presented in the paper'''
@@ -335,7 +330,7 @@ def handle_owl_some_values_from(owl_expression):
             When called, return the retrieval of OWLObjectSomeValuesFrom
             based on the Algorithm described in the paper
             """
-            if isinstance(owl_expression, OWLObjectSomeValuesFrom):
+            if isinstance(owl_expression, OWLObjectSomeValuesFrom): 
                 object_property = owl_expression.get_property()
                 filler_expression = owl_expression.get_filler()
                 instances = retrieve_from_cache(owl_expression_to_dl(filler_expression))
@@ -531,8 +526,6 @@ def run_cache(path_kg:str, path_kge:str, cache_size:int, name_reasoner:str, evic
 
         ground_truth = concept_retrieval(symbolic_kb, expr)
 
-    
-
         jacc = jaccard_similarity(A, ground_truth)
         jacc_reas = jaccard_similarity(retrieve_ebr, ground_truth)
         Avg_jaccard.append(jacc)
@@ -542,8 +535,6 @@ def run_cache(path_kg:str, path_kge:str, cache_size:int, name_reasoner:str, evic
         print(f'Jaccard similarity: {jacc}')
         # assert jacc == 1.0 
 
-    
-    
     stats = cached_retriever.get_stats()
     
     print('-'*50)