diff --git a/pytest_parallel/plugin.py b/pytest_parallel/plugin.py index f4d9577..4b78842 100644 --- a/pytest_parallel/plugin.py +++ b/pytest_parallel/plugin.py @@ -20,7 +20,6 @@ def pytest_addoption(parser): ) parser.addoption('--n-workers', dest='n_workers', type=int, help='Max number of processes to run in parallel') - parser.addoption('--scheduler-ip', dest='scheduler_ip', type=str, help='IP address of the SLURM-scheduling pytest process (i.e. IP of the host node where you launch pytest). Defaults to first IP returned by `hostname -I`') parser.addoption('--slurm-options', dest='slurm_options', type=str, help='list of SLURM options e.g. "--time=00:30:00 --qos=my_queue --n_tasks=4"') parser.addoption('--slurm-additional-cmds', dest='slurm_additional_cmds', type=str, help='list of commands to pass to SLURM job e.g. "source my_env.sh"') @@ -56,7 +55,6 @@ def pytest_configure(config): # Get options and check dependent/incompatible options scheduler = config.getoption('scheduler') n_workers = config.getoption('n_workers') - scheduler_ip = config.getoption('scheduler_ip') slurm_options = config.getoption('slurm_options') slurm_additional_cmds = config.getoption('slurm_additional_cmds') slurm_worker = config.getoption('_worker') @@ -68,7 +66,6 @@ def pytest_configure(config): assert not slurm_options, 'Option `--slurm-options` only available when `--scheduler=slurm`' assert not slurm_additional_cmds, 'Option `--slurm-additional-cmds` only available when `--scheduler=slurm`' assert not slurm_file, 'Option `--slurm-file` only available when `--scheduler=slurm`' - assert not scheduler_ip, 'Option `--scheduler-ip` only available when `--scheduler=slurm`' if scheduler == 'slurm' and not slurm_worker: assert slurm_options or slurm_file, 'You need to specify either `--slurm-options` or `--slurm-file` when `--scheduler=slurm`' @@ -101,7 +98,6 @@ def pytest_configure(config): 'additional_cmds': slurm_additional_cmds, 'file' : slurm_file, 'sub_command' : slurm_sub_command, - 'scheduler_ip' : scheduler_ip, } plugin = ProcessScheduler(main_invoke_params, n_workers, slurm_conf, detach) diff --git a/pytest_parallel/process_scheduler.py b/pytest_parallel/process_scheduler.py index 5de1579..e84aa38 100644 --- a/pytest_parallel/process_scheduler.py +++ b/pytest_parallel/process_scheduler.py @@ -31,18 +31,38 @@ def parse_job_id_from_submission_output(s): import re return int(re.search(r'\d+', str(s)).group()) + +# https://stackoverflow.com/a/34177358 +def command_exists(cmd_name): + """Check whether `name` is on PATH and marked as executable.""" + from shutil import which + return which(cmd_name) is not None + +def _get_my_ip_address(): + hostname = socket.gethostname() + + assert command_exists('tracepath'), 'pytest_parallel SLURM scheduler: command `tracepath` is not available' + cmd = ['tracepath','-4','-n',hostname] + r = subprocess.run(cmd, stdout=subprocess.PIPE) + assert r.returncode==0, f'pytest_parallel SLURM scheduler: error running command `{" ".join(cmd)}`' + ips = r.stdout.decode("utf-8") + + try: + my_ip = ips.split('\n')[0].split(':')[1].split()[0] + except: + assert 0, f'pytest_parallel SLURM scheduler: error parsing result `{ips}` of command `{" ".join(cmd)}`' + import ipaddress + try: + ipaddress.ip_address(my_ip) + except ValueError: + assert 0, f'pytest_parallel SLURM scheduler: error parsing result `{ips}` of command `{" ".join(cmd)}`' + + return my_ip + + def submit_items(items_to_run, socket, main_invoke_params, slurm_ntasks, slurm_conf): # Find IP our address - r = subprocess.run(['hostname','-I'], stdout=subprocess.PIPE) - assert r.returncode==0, f'SLURM scheduler: error getting IP address of {socket.gethostname()} with `hostname -I`' - ips = r.stdout.decode("utf-8").strip().split() - assert len(ips) > 0, f'SLURM scheduler: error getting IP address of {socket.gethostname()}, `hostname -I` returned no address' - if slurm_conf['scheduler_ip'] is not None: - given_ip = slurm_conf['scheduler_ip'] - assert given_ip in ips, f'address {given_ip} given by `--scheduler-ip` is in those given by `hostname -I`' - SCHEDULER_IP_ADDRESS = given_ip - else: - SCHEDULER_IP_ADDRESS = ips[0] + SCHEDULER_IP_ADDRESS = _get_my_ip_address() # setup master's socket socket.bind((SCHEDULER_IP_ADDRESS, 0)) # 0: let the OS choose an available port