From 0436452c0370d017999daa3e48315c0ce300bf14 Mon Sep 17 00:00:00 2001 From: Caspar van Leeuwen Date: Tue, 3 Dec 2024 16:28:48 +0100 Subject: [PATCH 1/2] Add gpu config back into reframe for vega --- config/izum_vega.py | 88 ++++++++++++++++++++++----------------------- 1 file changed, 44 insertions(+), 44 deletions(-) diff --git a/config/izum_vega.py b/config/izum_vega.py index 6577bf32..5973a3c3 100644 --- a/config/izum_vega.py +++ b/config/izum_vega.py @@ -68,50 +68,50 @@ }, 'descr': 'CPU partition Standard, see https://en-doc.vega.izum.si/architecture/' }, - # { - # 'name': 'gpu', - # 'scheduler': 'slurm', - # 'prepare_cmds': [ - # common_eessi_init(), - # # Pass job environment variables like $PATH, etc., into job steps - # 'export SLURM_EXPORT_ENV=ALL', - # # Needed when using srun launcher - # # 'export SLURM_MPI_TYPE=pmix', # WARNING: this broke the GROMACS on Vega - # # Avoid https://github.com/EESSI/software-layer/issues/136 - # # Can be taken out once we don't care about old OpenMPI versions anymore (pre-4.1.1) - # 'export OMPI_MCA_pml=ucx', - # ], - # 'launcher': 'mpirun', - # # Use --export=None to avoid that login environment is passed down to submitted jobs - # 'access': ['-p gpu', '--export=None'], - # 'environs': ['default'], - # 'max_jobs': 60, - # 'devices': [ - # { - # 'type': DEVICE_TYPES[GPU], - # 'num_devices': 4, - # } - # ], - # 'resources': [ - # { - # 'name': '_rfm_gpu', - # 'options': ['--gpus-per-node={num_gpus_per_node}'], - # }, - # { - # 'name': 'memory', - # 'options': ['--mem={size}'], - # } - # ], - # 'features': [ - # FEATURES[GPU], - # ] + list(SCALES.keys()), - # 'extras': { - # # Make sure to round down, otherwise a job might ask for more mem than is available - # # per node - # 'mem_per_node': 476.837 * 1024 # in MiB (should be checked, its unclear from slurm.conf) - # }, - # 'descr': 'GPU partition, see https://en-doc.vega.izum.si/architecture/' - # }, + { + 'name': 'gpu', + 'scheduler': 'slurm', + 'prepare_cmds': [ + common_eessi_init(), + # Pass job environment variables like $PATH, etc., into job steps + 'export SLURM_EXPORT_ENV=ALL', + # Needed when using srun launcher + # 'export SLURM_MPI_TYPE=pmix', # WARNING: this broke the GROMACS on Vega + # Avoid https://github.com/EESSI/software-layer/issues/136 + # Can be taken out once we don't care about old OpenMPI versions anymore (pre-4.1.1) + 'export OMPI_MCA_pml=ucx', + ], + 'launcher': 'mpirun', + # Use --export=None to avoid that login environment is passed down to submitted jobs + 'access': ['-p gpu', '--export=None'], + 'environs': ['default'], + 'max_jobs': 60, + 'devices': [ + { + 'type': DEVICE_TYPES[GPU], + 'num_devices': 4, + } + ], + 'resources': [ + { + 'name': '_rfm_gpu', + 'options': ['--gpus-per-node={num_gpus_per_node}'], + }, + { + 'name': 'memory', + 'options': ['--mem={size}'], + } + ], + 'features': [ + FEATURES[GPU], + ] + list(SCALES.keys()), + 'extras': { + # Make sure to round down, otherwise a job might ask for more mem than is available + # per node + 'mem_per_node': 476.837 * 1024 # in MiB (should be checked, its unclear from slurm.conf) + }, + 'descr': 'GPU partition, see https://en-doc.vega.izum.si/architecture/' + }, ] }, ], From 4e0d2d7645eb168d5b319f953bf898f099fc1be8 Mon Sep 17 00:00:00 2001 From: Caspar van Leeuwen Date: Tue, 3 Dec 2024 16:53:01 +0100 Subject: [PATCH 2/2] Update vega config --- config/izum_vega.py | 1 + 1 file changed, 1 insertion(+) diff --git a/config/izum_vega.py b/config/izum_vega.py index 5973a3c3..86847128 100644 --- a/config/izum_vega.py +++ b/config/izum_vega.py @@ -106,6 +106,7 @@ FEATURES[GPU], ] + list(SCALES.keys()), 'extras': { + GPU_VENDOR: GPU_VENDORS[NVIDIA], # Make sure to round down, otherwise a job might ask for more mem than is available # per node 'mem_per_node': 476.837 * 1024 # in MiB (should be checked, its unclear from slurm.conf)