diff --git a/eessi/testsuite/constants.py b/eessi/testsuite/constants.py index b4139b0b..9b7d6ac3 100644 --- a/eessi/testsuite/constants.py +++ b/eessi/testsuite/constants.py @@ -11,6 +11,7 @@ INTEL = 'INTEL' NODE = 'NODE' NVIDIA = 'NVIDIA' +ALWAYS_REQUEST_GPUS = 'ALWAYS_REQUEST_GPUS' DEVICE_TYPES = { CPU: 'cpu', @@ -31,6 +32,7 @@ FEATURES = { CPU: 'cpu', GPU: 'gpu', + ALWAYS_REQUEST_GPUS: 'always_request_gpus', } GPU_VENDORS = { diff --git a/eessi/testsuite/hooks.py b/eessi/testsuite/hooks.py index d15ed76c..82c46430 100644 --- a/eessi/testsuite/hooks.py +++ b/eessi/testsuite/hooks.py @@ -7,12 +7,12 @@ import reframe as rfm -from eessi.testsuite.constants import * # noqa +from eessi.testsuite.constants import * from eessi.testsuite.utils import (get_max_avail_gpus_per_node, is_cuda_required_module, log, check_proc_attribute_defined) -def assign_default_num_cpus_per_node(test: rfm.RegressionTest): +def _assign_default_num_cpus_per_node(test: rfm.RegressionTest): """ Check if the default number of cpus per node is already defined in the test (e.g. by earlier hooks like set_tag_scale). @@ -34,6 +34,27 @@ def assign_default_num_cpus_per_node(test: rfm.RegressionTest): log(f'default_num_cpus_per_node set to {test.default_num_cpus_per_node}') +def _assign_default_num_gpus_per_node(test: rfm.RegressionTest): + """ + Check if the default number of gpus per node is already defined in the test + (e.g. by earlier hooks like set_tag_scale). + If so, check if it doesn't exceed the maximum available. + If not, set default_num_gpus_per_node based on the maximum available gpus and node_part + """ + + test.max_avail_gpus_per_node = get_max_avail_gpus_per_node(test) + if test.default_num_gpus_per_node: + # may skip if not enough GPUs + test.skip_if( + test.default_num_gpus_per_node > test.max_avail_gpus_per_node, + f'Number of GPUs per node in selected scale ({test.default_num_gpus_per_node}) is higher than max available' + f' ({test.max_avail_gpus_per_node}) in current partition ({test.current_partition.name}).' + ) + else: + # no default set yet, so setting one + test.default_num_gpus_per_node = math.ceil(test.max_avail_gpus_per_node / test.node_part) + + def assign_tasks_per_compute_unit(test: rfm.RegressionTest, compute_unit: str, num_per: int = 1): """ Assign one task per compute unit (COMPUTE_UNIT[CPU], COMPUTE_UNIT[CPU_SOCKET] or COMPUTE_UNIT[GPU]). @@ -69,15 +90,18 @@ def assign_tasks_per_compute_unit(test: rfm.RegressionTest, compute_unit: str, n # Check if either node_part, or default_num_cpus_per_node and default_num_gpus_per_node are set correctly if not ( - type(test.node_part) == int or - (type(test.default_num_cpus_per_node) == int and type(test.default_num_gpus_per_node) == int) + type(test.node_part) == int + or (type(test.default_num_cpus_per_node) == int and type(test.default_num_gpus_per_node) == int) ): raise ValueError( f'Either node_part ({test.node_part}), or default_num_cpus_per_node ({test.default_num_cpus_per_node}) and' f' default num_gpus_per_node ({test.default_num_gpus_per_node}) must be defined and have integer values.' ) - assign_default_num_cpus_per_node(test) + _assign_default_num_cpus_per_node(test) + + if FEATURES[GPU] in test.current_partition.features: + _assign_default_num_gpus_per_node(test) if compute_unit == COMPUTE_UNIT[GPU]: _assign_one_task_per_gpu(test) @@ -90,6 +114,8 @@ def assign_tasks_per_compute_unit(test: rfm.RegressionTest, compute_unit: str, n else: raise ValueError(f'compute unit {compute_unit} is currently not supported') + _check_always_request_gpus(test) + def _assign_num_tasks_per_node(test: rfm.RegressionTest, num_per: int = 1): """ @@ -112,7 +138,6 @@ def _assign_num_tasks_per_node(test: rfm.RegressionTest, num_per: int = 1): test.num_tasks_per_node = num_per test.num_cpus_per_task = int(test.default_num_cpus_per_node / test.num_tasks_per_node) - # num_tasks_per_node is not set, but num_cpus_per_task is elif not test.num_tasks_per_node: test.num_tasks_per_node = int(test.default_num_cpus_per_node / test.num_cpus_per_task) @@ -222,11 +247,6 @@ def _assign_one_task_per_gpu(test: rfm.RegressionTest): --setvar num_cpus_per_task= and/or --setvar num_gpus_per_node=. - Variables: - - max_avail_gpus_per_node: maximum available number of GPUs per node - - default_num_gpus_per_node: default number of GPUs per node as defined in the test - (e.g. by earlier hooks like set_tag_scale) - Default resources requested: - num_gpus_per_node = default_num_gpus_per_node - num_tasks_per_node = num_gpus_per_node @@ -235,22 +255,6 @@ def _assign_one_task_per_gpu(test: rfm.RegressionTest): If num_tasks_per_node is set, set num_gpus_per_node equal to either num_tasks_per_node or default_num_gpus_per_node (whichever is smallest), unless num_gpus_per_node is also set. """ - max_avail_gpus_per_node = get_max_avail_gpus_per_node(test) - - # Check if the default number of gpus per node is already defined in the test - # (e.g. by earlier hooks like set_tag_scale). - # If so, check if it doesn't exceed the maximum available. - # If not, set default_num_gpus_per_node based on the maximum available gpus and node_part - if test.default_num_gpus_per_node: - # may skip if not enough GPUs - test.skip_if( - test.default_num_gpus_per_node > max_avail_gpus_per_node, - f'Requested GPUs per node ({test.default_num_gpus_per_node}) is higher than max available' - f' ({max_avail_gpus_per_node}) in current partition ({test.current_partition.name}).' - ) - else: - # no default set yet, so setting one - test.default_num_gpus_per_node = math.ceil(max_avail_gpus_per_node / test.node_part) # neither num_tasks_per_node nor num_gpus_per_node are set if not test.num_tasks_per_node and not test.num_gpus_per_node: @@ -273,7 +277,7 @@ def _assign_one_task_per_gpu(test: rfm.RegressionTest): # limit num_cpus_per_task to the maximum available cpus per gpu test.num_cpus_per_task = min( int(test.default_num_cpus_per_node / test.num_tasks_per_node), - int(test.max_avail_cpus_per_node / max_avail_gpus_per_node) + int(test.max_avail_cpus_per_node / test.max_avail_gpus_per_node) ) test.num_tasks = test.num_nodes * test.num_tasks_per_node @@ -303,8 +307,8 @@ def _set_or_append_valid_systems(test: rfm.RegressionTest, valid_systems: str): return # test.valid_systems wasn't set yet, so set it - if len(test.valid_systems) == 0: - # test.valid_systems is empty, meaning all tests are filtered out. This hook shouldn't change that + if len(test.valid_systems) == 0 or test.valid_systems == [INVALID_SYSTEM]: + # test.valid_systems is empty or invalid, meaning all tests are filtered out. This hook shouldn't change that return # test.valid_systems still at default value, so overwrite elif len(test.valid_systems) == 1 and test.valid_systems[0] == '*': @@ -314,8 +318,8 @@ def _set_or_append_valid_systems(test: rfm.RegressionTest, valid_systems: str): test.valid_systems[0] = f'{test.valid_systems[0]} {valid_systems}' else: warn_msg = f"valid_systems has multiple ({len(test.valid_systems)}) items," - warn_msg += f" which is not supported by this hook." - warn_msg += f" Make sure to handle filtering yourself." + warn_msg += " which is not supported by this hook." + warn_msg += " Make sure to handle filtering yourself." warnings.warn(warn_msg) return @@ -333,6 +337,7 @@ def filter_supported_scales(test: rfm.RegressionTest): log(f'valid_systems set to {test.valid_systems}') + def filter_valid_systems_by_device_type(test: rfm.RegressionTest, required_device_type: str): """ Filter valid_systems by required device type and by whether the module supports CUDA, @@ -459,3 +464,12 @@ def set_compact_thread_binding(test: rfm.RegressionTest): log(f'Set environment variable OMP_PLACES to {test.env_vars["OMP_PLACES"]}') log(f'Set environment variable OMP_PROC_BIND to {test.env_vars["OMP_PROC_BIND"]}') log(f'Set environment variable KMP_AFFINITY to {test.env_vars["KMP_AFFINITY"]}') + + +def _check_always_request_gpus(test: rfm.RegressionTest): + """ + Make sure we always request enough GPUs if required for the current GPU partition (cluster-specific policy) + """ + if FEATURES[ALWAYS_REQUEST_GPUS] in test.current_partition.features and not test.num_gpus_per_node: + test.num_gpus_per_node = test.default_num_gpus_per_node + log(f'num_gpus_per_node set to {test.num_gpus_per_node} for partition {test.current_partition.name}') diff --git a/eessi/testsuite/tests/apps/osu.py b/eessi/testsuite/tests/apps/osu.py index 1a2b3d0a..4044cc0b 100644 --- a/eessi/testsuite/tests/apps/osu.py +++ b/eessi/testsuite/tests/apps/osu.py @@ -51,40 +51,50 @@ class EESSI_OSU_Micro_Benchmarks_pt2pt(osu_benchmark): # unset num_tasks_per_node from the hpctestlib. num_tasks_per_node = None + @run_after('init') + def filter_scales_2gpus(self): + """Filter out scales with < 2 GPUs if running on GPUs""" + if ( + self.device_type == DEVICE_TYPES[GPU] + and SCALES[self.scale]['num_nodes'] == 1 + and SCALES[self.scale].get('num_gpus_per_node', 2) < 2 + ): + self.valid_systems = [INVALID_SYSTEM] + log(f'valid_systems set to {self.valid_systems} for scale {self.scale} and device_type {self.device_type}') + + @run_after('init') + def filter_benchmark_pt2pt(self): + """ Filter out all non-mpi.pt2pt benchmarks """ + if not self.benchmark_info[0].startswith('mpi.pt2pt'): + self.valid_systems = [INVALID_SYSTEM] + @run_after('init') def run_after_init(self): """hooks to run after init phase""" - # Note: device_buffers variable is inherited from the hpctestlib class and adds options to the launcher - # commands (before setup) if not equal to 'cpu'. We set it to 'cpu' initially and change it later in this hook depending on the test. - self.device_buffers = 'cpu' + # Filter on which scales are supported by the partitions defined in the ReFrame configuration hooks.filter_supported_scales(self) hooks.filter_valid_systems_by_device_type(self, required_device_type=self.device_type) - is_cuda_module = utils.is_cuda_required_module(self.module_name) - # This part of the hook is meant to be for the OSU cpu tests. This is required since the non CUDA module should - # be able to run in the GPU partition as well. This is specific for this test and not covered by the function - # above. - if is_cuda_module and self.device_type == DEVICE_TYPES[GPU]: - # Sets to cuda as device buffer only if the module is compiled with CUDA. - self.device_buffers = 'cuda' - # If the device_type is CPU then device buffer should always be CPU. - if self.device_type == DEVICE_TYPES[CPU]: - self.device_buffers = 'cpu' - - # This part of the code removes the collective communication calls out of the run list since this test is only - # meant for pt2pt. - if not self.benchmark_info[0].startswith('mpi.pt2pt'): - self.valid_systems = [] hooks.set_modules(self) - @run_after('setup') - def adjust_executable_opts(self): - """The option "D D" is only meant for Devices if and not for CPU tests. This option is added by hpctestlib to - all pt2pt tests which is not required.""" - if(self.device_type == DEVICE_TYPES[CPU]): - self.executable_opts = [ele for ele in self.executable_opts if ele != 'D'] + # Set scales as tags + hooks.set_tag_scale(self) + + @run_after('init') + def set_device_buffers(self): + """ + device_buffers is inherited from the hpctestlib class and adds options to the launcher + commands in a @run_before('setup') hook if not equal to 'cpu'. + Therefore, we must set device_buffers *before* the @run_before('setup') hooks. + """ + if self.device_type == DEVICE_TYPES[GPU]: + self.device_buffers = 'cuda' + + else: + # If the device_type is CPU then device_buffers should always be CPU. + self.device_buffers = 'cpu' @run_after('init') def set_tag_ci(self): @@ -108,16 +118,21 @@ def set_mem(self): requirement.""" self.extra_resources = {'memory': {'size': '12GB'}} - @run_after('init') - def set_num_tasks(self): - """ Setting scales as tags. """ - hooks.set_tag_scale(self) + @run_after('setup') + def adjust_executable_opts(self): + """The option "D D" is only meant for Devices if and not for CPU tests. + This option is added by hpctestlib in a @run_before('setup') to all pt2pt tests which is not required. + Therefore we must override it *after* the 'setup' phase + """ + if self.device_type == DEVICE_TYPES[CPU]: + self.executable_opts = [ele for ele in self.executable_opts if ele != 'D'] + @run_after('setup') def set_num_tasks_per_node(self): """ Setting number of tasks per node and cpus per task in this function. This function sets num_cpus_per_task for 1 node and 2 node options where the request is for full nodes.""" - if(SCALES.get(self.scale).get('num_nodes') == 1): + if SCALES.get(self.scale).get('num_nodes') == 1: hooks.assign_tasks_per_compute_unit(self, COMPUTE_UNIT[NODE], 2) else: hooks.assign_tasks_per_compute_unit(self, COMPUTE_UNIT[NODE]) @@ -125,27 +140,18 @@ def set_num_tasks_per_node(self): @run_after('setup') def set_num_gpus_per_node(self): """ - This test does not require gpus and is for host to host within GPU nodes. But some systems do require a GPU - allocation for to perform any activity in the GPU nodes. + Set number of GPUs per node for GPU-to-GPU tests """ - if(FEATURES[GPU] in self.current_partition.features and not utils.is_cuda_required_module(self.module_name)): - max_avail_gpus_per_node = utils.get_max_avail_gpus_per_node(self) - # Here for the 2_node test we assign max_avail_gpus_per_node but some systems cannot allocate 1_cpn_2_nodes - # for GPUs but need all gpus allocated within the 2 nodes for this work which. The test may fail under such - # conditions for the scale 1_cpn_2_nodes because it is simply not allowed. - self.num_gpus_per_node = self.default_num_gpus_per_node or max_avail_gpus_per_node - elif(FEATURES[GPU] in self.current_partition.features and utils.is_cuda_required_module(self.module_name)): - max_avail_gpus_per_node = utils.get_max_avail_gpus_per_node(self) - if(SCALES.get(self.scale).get('num_nodes') == 1): - # Skip the single node test if there is only 1 device in the node. - if(max_avail_gpus_per_node == 1): - self.skip(msg="There is only 1 device within the node. Skipping tests involving only 1 node.") - else: - self.num_gpus_per_node = 2 - else: - # Note these settings are for 1_cpn_2_nodes. In that case we want to test for only 1 GPU per node since - # we have not requested for full nodes. - self.num_gpus_per_node = self.default_num_gpus_per_node or max_avail_gpus_per_node + if self.device_type == DEVICE_TYPES[GPU]: + # Skip single-node tests with less than 2 GPU devices in the node + self.skip_if( + SCALES[self.scale]['num_nodes'] == 1 and self.default_num_gpus_per_node < 2, + "There are < 2 GPU devices present in the node." + f" Skipping tests with device_type={DEVICE_TYPES[GPU]} involving < 2 GPUs and 1 node." + ) + if not self.num_gpus_per_node: + self.num_gpus_per_node = self.default_num_gpus_per_node + log(f'num_gpus_per_node set to {self.num_gpus_per_node} for partition {self.current_partition.name}') @rfm.simple_test diff --git a/setup.cfg b/setup.cfg index 9839603a..87b688e7 100644 --- a/setup.cfg +++ b/setup.cfg @@ -20,3 +20,9 @@ namespace_packages = eessi [options.packages.find] include = eessi* + +[flake8] +max-line-length = 120 +# ignore star imports (F403, F405) +# ignore obsolete warning (W503) +ignore = F403, F405, W503