diff --git a/examples/retrieval_with_cache.py b/examples/retrieval_with_cache.py index 4e13167..269ee6b 100644 --- a/examples/retrieval_with_cache.py +++ b/examples/retrieval_with_cache.py @@ -7,7 +7,7 @@ parser = argparse.ArgumentParser() parser.add_argument('--cache_size_ratios', type=list, default=[.1, .2, .4, .8, 1.], help="cache size is proportional to num_concepts, cache size = k * num_concepts") -parser.add_argument('--path_kg', type=list, default=["KGs/Family/family.owl"]) +parser.add_argument('--path_kg', type=str, default=["KGs/Family/father.owl"]) parser.add_argument('--path_kge', type=list, default=None) parser.add_argument('--name_reasoner', type=str, default='EBR', choices=["EBR",'HermiT', 'Pellet', 'JFact', 'Openllet']) parser.add_argument('--eviction_strategy', type=str, default='LRU', choices=['LIFO', 'FIFO', 'LRU', 'MRU', 'RP']) @@ -24,6 +24,7 @@ def get_cache_size(list_k, path_kg): results = [] +detailed_results = [] for path_kg in args.path_kg: for cache_size in get_cache_size(args.cache_size_ratios, path_kg): for strategy in ['LIFO', 'FIFO', 'LRU', 'MRU', 'RP']: @@ -38,44 +39,15 @@ def get_cache_size(list_k, path_kg): shuffle_concepts=args.shuffle_concepts ) results.append(result) + detailed_results.append(detailed) - data_kg = result['dataset'] + data_name = result['dataset'] df = pd.DataFrame(results) + all_detailed_results = pd.DataFrame([item for sublist in detailed_results for item in sublist]) print(df) - + # Save to CSV - df.to_csv(f'caching_results_{data_kg}/cache_experiments_{args.name_reasoner}_{data_kg}.csv', index=False) - - -# results = [] -# detailed_results = [] -# for path_kg in args.path_kg: -# for cache_size in get_cache_size(args.cache_size_ratios, path_kg): -# result, D = run_cache( -# path_kg=path_kg, -# path_kge=args.path_kge, -# cache_size=cache_size, -# name_reasoner=args.name_reasoner, -# eviction=args.eviction_strategy, -# random_seed=args.random_seed_for_RP, -# cache_type=args.cache_type, -# shuffle_concepts=args.shuffle_concepts -# ) -# results.append(result) -# detailed_results.append(D) - -# all_detailed_results = [item for sublist in detailed_results for item in sublist] - -# results = pd.DataFrame(results) -# # results.to_csv(f'caching_results/cache_experiments_{args.name_reasoner}.csv') - -# plot_scale_factor(results, args.name_reasoner) -# plot_jaccard_vs_cache_size(results, args.name_reasoner) - -# # # print(results.to_latex(index=False)) + df.to_csv(f'caching_results_{data_name}/cache_experiments_{args.name_reasoner}_{data_name}_{args.cache_type}.csv', index=False) + df.to_csv(f'caching_results_{data_name}/detailled_experiments_{args.name_reasoner}_{data_name}_{args.cache_type}.csv', index=False) -# all_detailed_results = pd.DataFrame(all_detailed_results) -# bar_plot_separate_data(all_detailed_results, cache_size=90, name_reasoner=args.name_reasoner) -# bar_plot_all_data(all_detailed_results, cache_size=90, name_reasoner=args.name_reasoner) -# all_detailed_results.to_csv(f'caching_results/detailed_cache_experiments_{args.name_reasoner}.csv') diff --git a/ontolearn/semantic_caching.py b/ontolearn/semantic_caching.py index 2050d45..776ac81 100644 --- a/ontolearn/semantic_caching.py +++ b/ontolearn/semantic_caching.py @@ -230,51 +230,39 @@ def put(self, key, value): if self.strategy in ['LRU', 'MRU']: self.access_times[key] = time.time() # Record access timestamp - def initialize_cache(self, ontology, func, path_onto, third, All_individuals, handle_restriction_func=None): + def initialize_cache(self, func, path_onto, third, All_individuals, handle_restriction_func, concepts): """ - Initialize the cache with precomputed results. + Initialize the cache with precomputed results for OWLClass and Existential concepts. :param ontology: The loaded ontology. :param func: Function to retrieve individuals for a given expression. + :param concepts: List of OWL concepts to precompute and store instances for. """ - if self.initialized: return - - # Fetch object properties and classes from ontology - roles = list(ontology.object_properties()) - classes = list(ontology.classes()) - - for cls in classes: - named_class = OWLClass(cls.iri) - named_class_str = str(cls).split(".")[-1] - - # Add named concept - self.put(named_class_str, func(named_class, path_onto, third)) - negated_named_class_str = f"¬{named_class_str}" - - # Add negated named concept - self.put(negated_named_class_str, All_individuals-self.cache[named_class_str]) - negated_class = OWLObjectComplementOf(named_class) - - for role in roles: - role_property = OWLObjectProperty(role.iri) - existential_a = OWLObjectSomeValuesFrom(property=role_property, filler=named_class) - # Add ∃ r.C - if handle_restriction_func is not None: - self.put(owl_expression_to_dl(existential_a), handle_restriction_func(existential_a)) - else: - self.put(owl_expression_to_dl(existential_a), func(existential_a, path_onto, third)) + # Filter OWLClass and OWLObjectSomeValuesFrom concepts + class_concepts = [concept for concept in concepts if isinstance(concept, OWLClass)] + existential_concepts = [concept for concept in concepts if isinstance(concept, OWLObjectSomeValuesFrom)] + + # Process OWLClass concepts + for cls in class_concepts: + concept_str = owl_expression_to_dl(cls) + self.put(concept_str, func(cls, path_onto, third)) + + # Compute and store complement + negated_cls = OWLObjectComplementOf(cls) + negated_cls_str = owl_expression_to_dl(negated_cls) + self.put(negated_cls_str, All_individuals - self.cache[concept_str]) + + # Process Existential concepts + for existential in existential_concepts: + existential_str = owl_expression_to_dl(existential) + if handle_restriction_func is not None: + self.put(existential_str, handle_restriction_func(existential)) + else: + self.put(existential_str, func(existential, path_onto, third)) - # Add ∃ r.(¬C) - existential_negated = OWLObjectSomeValuesFrom(property=role_property, filler=negated_class) - existential_negated_str = owl_expression_to_dl(existential_negated) - if handle_restriction_func is not None: - self.put(existential_negated_str, handle_restriction_func(existential_negated)) - else: - self.put(existential_negated_str, func(existential_negated, path_onto, third)) - - self.initialized = True + self.initialized = True def get_all_items(self): return list(self.cache.keys()) @@ -284,7 +272,7 @@ def is_full(self): return len(self.cache) >= self.max_size -def semantic_caching_size(func, cache_size, eviction_strategy, random_seed, cache_type): +def semantic_caching_size(func, cache_size, eviction_strategy, random_seed, cache_type, concepts): '''This function implements the semantic caching algorithm for ALC concepts as presented in the paper''' @@ -292,6 +280,7 @@ def semantic_caching_size(func, cache_size, eviction_strategy, random_seed, cach loaded_ontologies = {} #Cache for ontologies loaded_individuals = {} #cache for individuals cache_type = cache_type + concepts = concepts stats = { 'hits': 0, 'misses': 0, @@ -360,7 +349,7 @@ def handle_owl_some_values_from(owl_expression): # Cold cache initialization start_time_initialization = time.time() if cache_type == 'cold' and not cache.initialized: - cache.initialize_cache(onto, func, path_onto, args[-1], All_individuals, handle_restriction_func=handle_owl_some_values_from) + cache.initialize_cache(func, path_onto, args[-1], All_individuals, handle_owl_some_values_from, concepts) time_initialization = time.time()- start_time_initialization # start_time = time.time() #state the timing after the cache initialization @@ -482,13 +471,6 @@ def retrieve_other_reasoner(expression, path_kg, name_reasoner='HermiT'): # reas def run_cache(path_kg:str, path_kge:str, cache_size:int, name_reasoner:str, eviction:str, random_seed:int, cache_type:str, shuffle_concepts:str): - if name_reasoner == 'EBR': - # cached_retriever = subsumption_based_caching(retrieve, cache_size=cache_size) - cached_retriever = semantic_caching_size(retrieve, cache_size=cache_size, eviction_strategy=eviction, random_seed=random_seed, cache_type=cache_type) - else: - # cached_retriever = subsumption_based_caching(retrieve, cache_size=cache_size) - cached_retriever = semantic_caching_size(retrieve_other_reasoner, cache_size=cache_size, eviction_strategy=eviction, random_seed=random_seed, cache_type=cache_type) - symbolic_kb = KnowledgeBase(path=path_kg) D = [] Avg_jaccard = [] @@ -500,6 +482,13 @@ def run_cache(path_kg:str, path_kge:str, cache_size:int, name_reasoner:str, evic else: alc_concepts = concept_generator(path_kg) + if name_reasoner == 'EBR': + # cached_retriever = subsumption_based_caching(retrieve, cache_size=cache_size) + cached_retriever = semantic_caching_size(retrieve, cache_size=cache_size, eviction_strategy=eviction, random_seed=random_seed, cache_type=cache_type, concepts=alc_concepts) + else: + # cached_retriever = subsumption_based_caching(retrieve, cache_size=cache_size) + cached_retriever = semantic_caching_size(retrieve_other_reasoner, cache_size=cache_size, eviction_strategy=eviction, random_seed=random_seed, cache_type=cache_type, concepts=alc_concepts) + total_time_ebr = 0 for expr in alc_concepts: @@ -561,4 +550,3 @@ def run_cache(path_kg:str, path_kge:str, cache_size:int, name_reasoner:str, evic -