Skip to content

Commit

Permalink
#487: make subclustering filters configurable
Browse files Browse the repository at this point in the history
  • Loading branch information
cwschilly committed Sep 23, 2024
1 parent db03cad commit 197d8b7
Show file tree
Hide file tree
Showing 6 changed files with 142 additions and 4 deletions.
13 changes: 9 additions & 4 deletions src/lbaf/Execution/lbsClusteringTransferStrategy.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,15 @@ def __init__(self, criterion, parameters: dict, lgr: Logger):
f"Enter subclustering immediately after cluster swapping: {self.__separate_subclustering}")

# Initialize percentage of maximum load required for subclustering
self.pct_of_max_load = 0.0
self.__subclustering_threshold = parameters.get("subclustering_threshold", 0.0)
self._logger.info(
f"Percentage of maximum load required for subclustering: {self.__subclustering_threshold}")

# Initialize fraction of local imbalance that must be resolved by subcluster
self.minimum_fraction_of_local_imbalance = 0.0
self.__subclustering_minimum_improvement = parameters.get("subclustering_minimum_improvement", 0.0)
self._logger.info(
"Fraction of local imbalance that must be resolved by subcluster: "
f"{self.__subclustering_minimum_improvement}")

# Initialize cluster swap relative threshold
self.__cluster_swap_rtol = parameters.get("cluster_swap_rtol", 0.05)
Expand Down Expand Up @@ -175,8 +180,8 @@ def __transfer_subclusters(self, phase: Phase, r_src: Rank, targets: set, ave_lo
r_src, o_src, r_try)

# Additional filters prior to subclustering
if c_try <= self.minimum_fraction_of_local_imbalance * r_src.get_load() or \
r_src.get_load() < self.pct_of_max_load * max_load[0]:
if c_try <= self.__subclustering_minimum_improvement * r_src.get_load() or \
r_src.get_load() < self.__subclustering_threshold * max_load[0]:
continue

l_try = abs(r_try.get_load() + objects_load - ave_load)
Expand Down
8 changes: 8 additions & 0 deletions src/lbaf/IO/lbsConfigurationValidator.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,14 @@ def __init__(self, config_to_validate: dict, logger: Logger):
str,
lambda e: e in ALLOWED_TRANSFER_STRATEGIES,
error=f"{get_error_message(ALLOWED_TRANSFER_STRATEGIES)} must be chosen"),
Optional("subclustering_threshold"): And(
float,
lambda x: x >= 0.0,
error="Should be of type 'float' and >= 0.0"),
Optional("subclustering_minimum_improvement"): And(
float,
lambda x: x >= 0.0,
error="Should be of type 'float' and >= 0.0"),
Optional("cluster_swap_rtol"): And(
float,
lambda x: x > 0.0,
Expand Down
22 changes: 22 additions & 0 deletions tests/unit/IO/test_configuration_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,5 +263,27 @@ def test_config_validator_wrong_separate_subclustering(self):
ConfigurationValidator(config_to_validate=configuration, logger=get_logger()).main()
self.assertEqual(err.exception.args[0], "Key 'parameters' error:\nKey 'separate_subclustering' error:\n'incorrect' should be instance of 'bool'")

def test_config_validator_correct_subclustering_filters(self):
with open(os.path.join(self.config_dir, "conf_correct_subclustering_filters.yml"), "rt", encoding="utf-8") as config_file:
yaml_str = config_file.read()
configuration = yaml.safe_load(yaml_str)
ConfigurationValidator(config_to_validate=configuration, logger=get_logger()).main()

def test_config_validator_wrong_subclustering_minimum_improvement(self):
with open(os.path.join(self.config_dir, "conf_wrong_subclustering_minimum_improvement.yml"), "rt", encoding="utf-8") as config_file:
yaml_str = config_file.read()
configuration = yaml.safe_load(yaml_str)
with self.assertRaises(SchemaError) as err:
ConfigurationValidator(config_to_validate=configuration, logger=get_logger()).main()
self.assertEqual(err.exception.args[0], "Should be of type 'float' and >= 0.0")

def test_config_validator_wrong_subclustering_threshold(self):
with open(os.path.join(self.config_dir, "conf_wrong_subclustering_threshold.yml"), "rt", encoding="utf-8") as config_file:
yaml_str = config_file.read()
configuration = yaml.safe_load(yaml_str)
with self.assertRaises(SchemaError) as err:
ConfigurationValidator(config_to_validate=configuration, logger=get_logger()).main()
self.assertEqual(err.exception.args[0], "Should be of type 'float' and >= 0.0")

if __name__ == "__main__":
unittest.main()
35 changes: 35 additions & 0 deletions tests/unit/config/conf_correct_subclustering_filters.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# Specify input
from_data:
data_stem: ../data/synthetic_lb_data/data
phase_ids:
- 0
check_schema: false

# Specify work model
work_model:
name: AffineCombination
parameters:
alpha: 1.0
beta: 0.0
gamma: 0.0

# Specify algorithm
algorithm:
name: InformAndTransfer
phase_id: 0
parameters:
n_iterations: 4
n_rounds: 2
fanout: 2
order_strategy: arbitrary
transfer_strategy: Clustering
max_subclusters: 10
subclustering_threshold: 0.2
subclustering_minimum_improvement: 0.3
criterion: Tempered
max_objects_per_transfer: 32
deterministic_transfer: true

# Specify output
output_dir: ../output
output_file_stem: output_file
34 changes: 34 additions & 0 deletions tests/unit/config/conf_wrong_subclustering_minimum_improvement.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# Specify input
from_data:
data_stem: ../data/synthetic_lb_data/data
phase_ids:
- 0
check_schema: false

# Specify work model
work_model:
name: AffineCombination
parameters:
alpha: 1.0
beta: 0.0
gamma: 0.0

# Specify algorithm
algorithm:
name: InformAndTransfer
phase_id: 0
parameters:
n_iterations: 4
n_rounds: 2
fanout: 2
order_strategy: arbitrary
transfer_strategy: Clustering
max_subclusters: 10
subclustering_minimum_improvement: -1
criterion: Tempered
max_objects_per_transfer: 32
deterministic_transfer: true

# Specify output
output_dir: ../output
output_file_stem: output_file
34 changes: 34 additions & 0 deletions tests/unit/config/conf_wrong_subclustering_threshold.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# Specify input
from_data:
data_stem: ../data/synthetic_lb_data/data
phase_ids:
- 0
check_schema: false

# Specify work model
work_model:
name: AffineCombination
parameters:
alpha: 1.0
beta: 0.0
gamma: 0.0

# Specify algorithm
algorithm:
name: InformAndTransfer
phase_id: 0
parameters:
n_iterations: 4
n_rounds: 2
fanout: 2
order_strategy: arbitrary
transfer_strategy: Clustering
max_subclusters: 10
subclustering_threshold: -1
criterion: Tempered
max_objects_per_transfer: 32
deterministic_transfer: true

# Specify output
output_dir: ../output
output_file_stem: output_file

0 comments on commit 197d8b7

Please sign in to comment.