Skip to content

Commit

Permalink
Merge pull request #2 from Louis-Mozart/retrieval_eval_incomplete
Browse files Browse the repository at this point in the history
Refactoring
  • Loading branch information
Louis-Mozart authored Dec 11, 2024
2 parents 59b45c9 + e4f6aa6 commit 72dec51
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 113 deletions.
74 changes: 37 additions & 37 deletions examples/retrieval_with_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,59 +23,59 @@ def get_cache_size(list_k, path_kg):
return [max(1, int(k * data_size)) for k in list_k]


# results = []
# for path_kg in args.path_kg:
# for cache_size in get_cache_size(args.cache_size_ratios, path_kg):
# for strategy in ['LIFO', 'FIFO', 'LRU', 'MRU', 'RP']:
# result, detailed = run_cache(
# path_kg=path_kg,
# path_kge=args.path_kge,
# cache_size=cache_size,
# name_reasoner=args.name_reasoner,
# eviction=strategy,
# random_seed=args.random_seed_for_RP,
# cache_type=args.cache_type,
# shuffle_concepts=args.shuffle_concepts
# )
# results.append(result)

# data_kg = result['dataset']
# df = pd.DataFrame(results)
# print(df)

# # Save to CSV
# df.to_csv(f'caching_results_{data_kg}/cache_experiments_{args.name_reasoner}_{data_kg}.csv', index=False)


results = []
detailed_results = []
for path_kg in args.path_kg:
for cache_size in get_cache_size(args.cache_size_ratios, path_kg):
result, D = run_cache(
for strategy in ['LIFO', 'FIFO', 'LRU', 'MRU', 'RP']:
result, detailed = run_cache(
path_kg=path_kg,
path_kge=args.path_kge,
cache_size=cache_size,
name_reasoner=args.name_reasoner,
eviction=args.eviction_strategy,
eviction=strategy,
random_seed=args.random_seed_for_RP,
cache_type=args.cache_type,
shuffle_concepts=args.shuffle_concepts
)
results.append(result)
detailed_results.append(D)
results.append(result)

data_kg = result['dataset']
df = pd.DataFrame(results)
print(df)

# Save to CSV
df.to_csv(f'caching_results_{data_kg}/cache_experiments_{args.name_reasoner}_{data_kg}.csv', index=False)


# results = []
# detailed_results = []
# for path_kg in args.path_kg:
# for cache_size in get_cache_size(args.cache_size_ratios, path_kg):
# result, D = run_cache(
# path_kg=path_kg,
# path_kge=args.path_kge,
# cache_size=cache_size,
# name_reasoner=args.name_reasoner,
# eviction=args.eviction_strategy,
# random_seed=args.random_seed_for_RP,
# cache_type=args.cache_type,
# shuffle_concepts=args.shuffle_concepts
# )
# results.append(result)
# detailed_results.append(D)

all_detailed_results = [item for sublist in detailed_results for item in sublist]
# all_detailed_results = [item for sublist in detailed_results for item in sublist]

results = pd.DataFrame(results)
# results.to_csv(f'caching_results/cache_experiments_{args.name_reasoner}.csv')
# results = pd.DataFrame(results)
# # results.to_csv(f'caching_results/cache_experiments_{args.name_reasoner}.csv')

plot_scale_factor(results, args.name_reasoner)
plot_jaccard_vs_cache_size(results, args.name_reasoner)
# plot_scale_factor(results, args.name_reasoner)
# plot_jaccard_vs_cache_size(results, args.name_reasoner)

# # print(results.to_latex(index=False))
# # # print(results.to_latex(index=False))

all_detailed_results = pd.DataFrame(all_detailed_results)
bar_plot_separate_data(all_detailed_results, cache_size=90, name_reasoner=args.name_reasoner)
# all_detailed_results = pd.DataFrame(all_detailed_results)
# bar_plot_separate_data(all_detailed_results, cache_size=90, name_reasoner=args.name_reasoner)
# bar_plot_all_data(all_detailed_results, cache_size=90, name_reasoner=args.name_reasoner)
# all_detailed_results.to_csv(f'caching_results/detailed_cache_experiments_{args.name_reasoner}.csv')

78 changes: 2 additions & 76 deletions ontolearn/semantic_caching.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,7 @@ def __init__(self, cache_size, strategy='LIFO', random_seed=10):

def _evict(self):
'''empty the cache when it is full using different strategy'''
if len(self.cache) >= self.cache_size:
if len(self.cache) > self.cache_size:
if self.strategy == 'FIFO':
self.cache.popitem(last=False) # Evict the oldest item (first in)
elif self.strategy == 'LIFO':
Expand Down Expand Up @@ -412,15 +412,14 @@ def handle_owl_some_values_from(owl_expression):
some_values_expr = transform_forall_to_exists(all_values_expr)
cached_result = retrieve_from_cache(some_values_expr)
result = (All_individuals - cached_result) if cached_result is not None else func(*args)

else:
result = func(*args)

stats['time'] += (time.time() - start_time)
cache.put(str_expression, result)
return result



def transform_forall_to_exists(expression):
pattern_negated = r'∀ (\w+)\.\(¬(\w+)\)'
replacement_negated = r'∃ \1.\2'
Expand Down Expand Up @@ -563,76 +562,3 @@ def run_cache(path_kg:str, path_kge:str, cache_size:int, name_reasoner:str, evic



# def subsumption_based_caching(func, cache_size):
# cache = {} # Dictionary to store cached results

# def store(concept, instances):
# # Check if cache limit will be exceeded
# if len(instances) + len(cache) > cache_size:
# purge(len(instances)) # Adjusted to ensure cache size limit
# # Add concept and instances to cache
# cache[concept] = instances

# def purge(needed_space):
# # Remove oldest items until there's enough space
# while len(cache) > needed_space:
# cache.pop(next(iter(cache)))

# def wrapper(*args):
# path_onto = args[1]
# onto = get_ontology(path_onto).load()

# # Synchronize the reasoner (e.g., using Pellet)
# # with onto:
# # sync_reasoner(infer_property_values=True)

# all_individuals = {a for a in onto.individuals()}
# str_expression = owl_expression_to_dl(args[0])
# owl_expression = args[0]

# # Check cache for existing results
# if str_expression in cache:
# return cache[str_expression]

# super_concepts = set()
# namespace, class_name = owl_expression.str.split('#')
# class_expression = f"{namespace.split('/')[-1]}.{class_name}"

# all_classes = [i for i in list(onto.classes())]

# for j in all_classes:
# if str(j) == class_expression:
# class_expression = j

# for D in list(cache.keys()):
# # print(owl_expression)
# # exit(0)
# if D in class_expression.ancestors(): # Check if C ⊑ D
# super_concepts.add(D)

# print(super_concepts)
# exit(0)
# # Compute instances based on subsumption
# if len(super_concepts) == 0:
# instances = all_individuals
# else:
# instances = set.intersection(
# *[wrapper(D, path_onto) for D in super_concepts]
# )

# # Filter instances by checking if each is an instance of the concept
# instance_set = set()

# for individual in instances:
# for type_entry in individual.is_a:
# type_iri = str(type_entry.iri)
# if owl_expression.str == type_iri:
# instance_set.add(individual)
# break

# # Store in cache
# store(str_expression, instance_set)
# return instance_set

# return wrapper

0 comments on commit 72dec51

Please sign in to comment.