From 6062e5085b40225a52d8813c979b6d97d43cf34c Mon Sep 17 00:00:00 2001 From: Floris-Jan Willemsen Date: Tue, 22 Oct 2024 00:29:40 -0700 Subject: [PATCH] Avoid duplicate execution, resolved an issue which sometimes led to hyperparamtuner tuning returning more results than the searchspace size --- kernel_tuner/backends/hypertuner.py | 1 + kernel_tuner/hyper.py | 29 ++++++++++++++++++++++++++--- kernel_tuner/interface.py | 2 +- test/test_hyper.py | 4 ++-- 4 files changed, 30 insertions(+), 6 deletions(-) diff --git a/kernel_tuner/backends/hypertuner.py b/kernel_tuner/backends/hypertuner.py index ab89c39b..b5cf2b6a 100644 --- a/kernel_tuner/backends/hypertuner.py +++ b/kernel_tuner/backends/hypertuner.py @@ -38,6 +38,7 @@ def __init__(self, iterations): self.observers = [ScoreObserver(self)] self.name = platform.processor() self.max_threads = 1024 + self.last_score = None # set the environment options env = dict() diff --git a/kernel_tuner/hyper.py b/kernel_tuner/hyper.py index 289fcd27..0aacca6c 100644 --- a/kernel_tuner/hyper.py +++ b/kernel_tuner/hyper.py @@ -1,10 +1,21 @@ """Module for functions related to hyperparameter optimization.""" +from pathlib import Path +from random import randint import kernel_tuner +def get_random_unique_filename(prefix = '', suffix=''): + """Get a random, unique filename that does not yet exist.""" + def randpath(): + return Path(f"{prefix}{randint(1000, 9999)}{suffix}") + path = randpath() + while path.exists(): + path = randpath() + return path + def tune_hyper_params(target_strategy: str, hyper_params: dict, *args, **kwargs): """Tune hyperparameters for a given strategy and kernel. @@ -46,8 +57,10 @@ def tune_hyper_params(target_strategy: str, hyper_params: dict, *args, **kwargs) if "iterations" in kwargs: iterations = kwargs['iterations'] del kwargs['iterations'] - if "cache" in kwargs: - del kwargs['cache'] + + # pass a temporary cache file to avoid duplicate execution + cachefile = get_random_unique_filename('temp_', '.json') + kwargs['cache'] = str(cachefile) def put_if_not_present(target_dict, key, value): target_dict[key] = value if key not in target_dict else target_dict[key] @@ -59,8 +72,18 @@ def put_if_not_present(target_dict, key, value): kwargs['verify'] = None arguments = [target_strategy] - return kernel_tuner.tune_kernel('hyperparamtuning', None, [], arguments, hyper_params, *args, lang='Hypertuner', + # execute the hyperparameter tuning + result, env = kernel_tuner.tune_kernel('hyperparamtuning', None, [], arguments, hyper_params, *args, lang='Hypertuner', objective='score', objective_higher_is_better=True, iterations=iterations, **kwargs) + + # remove the temporary cachefile and return only unique results in order + cachefile.unlink() + result_unique = dict() + for r in result: + config_id = ",".join(str(r[k]) for k in hyper_params.keys()) + if config_id not in result_unique: + result_unique[config_id] = r + return list(result_unique.values()), env if __name__ == "__main__": # TODO remove in production # hyperparams = { diff --git a/kernel_tuner/interface.py b/kernel_tuner/interface.py index 3fcb769a..7570c321 100644 --- a/kernel_tuner/interface.py +++ b/kernel_tuner/interface.py @@ -404,7 +404,7 @@ def __deepcopy__(self, _): All strategies support the following two options: 1. "max_fevals": the maximum number of unique valid function evaluations (i.e. compiling and - benchmarking a kernel configuration the strategy is allowed to perform as part of the optimization. + benchmarking a kernel configuration) the strategy is allowed to perform as part of the optimization. Note that some strategies implement a default max_fevals of 100. 2. "time_limit": the maximum amount of time in seconds the strategy is allowed to spent on trying to diff --git a/test/test_hyper.py b/test/test_hyper.py index 1b7ccc55..7251cb3e 100644 --- a/test/test_hyper.py +++ b/test/test_hyper.py @@ -15,6 +15,6 @@ def test_hyper(env): target_strategy = "genetic_algorithm" - result, env = tune_hyper_params(target_strategy, hyper_params, iterations=1, verbose=True, cache=cache_filename) - assert len(result) >= 2 # Look into why the hyperparamtuner returns more results than the searchspace size + result, env = tune_hyper_params(target_strategy, hyper_params, iterations=1, verbose=True) + assert len(result) == 2 assert 'best_config' in env