Skip to content

Commit

Permalink
Merge pull request #3009 from ComputeCanada/build_lock
Browse files Browse the repository at this point in the history
adding locking to prevent two parallel builds of the same installation directory
  • Loading branch information
boegel authored Mar 30, 2020
2 parents c5545c2 + 4b01f31 commit 2473b71
Show file tree
Hide file tree
Showing 4 changed files with 140 additions and 5 deletions.
35 changes: 35 additions & 0 deletions easybuild/framework/easyblock.py
Original file line number Diff line number Diff line change
Expand Up @@ -3043,6 +3043,37 @@ def run_all_steps(self, run_test_cases):

print_msg("building and installing %s..." % self.full_mod_name, log=self.log, silent=self.silent)
trace_msg("installation prefix: %s" % self.installdir)

ignore_locks = build_option('ignore_locks')

if ignore_locks:
self.log.info("Ignoring locks...")
else:
locks_dir = build_option('locks_dir') or os.path.join(install_path('software'), '.locks')
lock_path = os.path.join(locks_dir, '%s.lock' % self.installdir.replace('/', '_'))

# if lock already exists, either abort or wait until it disappears
if os.path.exists(lock_path):
wait_on_lock = build_option('wait_on_lock')
if wait_on_lock:
while os.path.exists(lock_path):
print_msg("lock %s exists, waiting %d seconds..." % (lock_path, wait_on_lock),
silent=self.silent)
time.sleep(wait_on_lock)
else:
raise EasyBuildError("Lock %s already exists, aborting!", lock_path)

# create lock to avoid that another installation running in parallel messes things up;
# we use a directory as a lock, since that's atomically created
try:
mkdir(lock_path, parents=True)
except EasyBuildError as err:
# clean up the error message a bit, get rid of the "Failed to create directory" part + quotes
stripped_err = str(err).split(':', 1)[1].strip().replace("'", '').replace('"', '')
raise EasyBuildError("Failed to create lock %s: %s", lock_path, stripped_err)

self.log.info("Lock created: %s", lock_path)

try:
for (step_name, descr, step_methods, skippable) in steps:
if self._skip_step(step_name, skippable):
Expand All @@ -3057,6 +3088,10 @@ def run_all_steps(self, run_test_cases):

except StopException:
pass
finally:
if not ignore_locks:
remove_dir(lock_path)
self.log.info("Lock removed: %s", lock_path)

# return True for successfull build (or stopped build)
return True
Expand Down
3 changes: 3 additions & 0 deletions easybuild/tools/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,7 @@ def mk_full_default_path(name, prefix=DEFAULT_PREFIX):
'job_output_dir',
'job_polling_interval',
'job_target_resource',
'locks_dir',
'modules_footer',
'modules_header',
'mpi_cmd_template',
Expand Down Expand Up @@ -225,6 +226,7 @@ def mk_full_default_path(name, prefix=DEFAULT_PREFIX):
'group_writable_installdir',
'hidden',
'ignore_checksums',
'ignore_locks',
'install_latest_eb_release',
'lib64_fallback_sanity_check',
'logtostdout',
Expand All @@ -249,6 +251,7 @@ def mk_full_default_path(name, prefix=DEFAULT_PREFIX):
'use_f90cache',
'use_existing_modules',
'set_default_module',
'wait_on_lock',
],
True: [
'cleanup_builddir',
Expand Down
7 changes: 7 additions & 0 deletions easybuild/tools/options.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,8 +255,13 @@ def basic_options(self):
'extended-dry-run-ignore-errors': ("Ignore errors that occur during dry run", None, 'store_true', True),
'force': ("Force to rebuild software even if it's already installed (i.e. if it can be found as module), "
"and skipping check for OS dependencies", None, 'store_true', False, 'f'),
'ignore-locks': ("Ignore locks that prevent two identical installations running in parallel",
None, 'store_true', False),
'job': ("Submit the build as a job", None, 'store_true', False),
'logtostdout': ("Redirect main log to stdout", None, 'store_true', False, 'l'),
'locks-dir': ("Directory to store lock files (should be on a shared filesystem); "
"None implies .locks subdirectory of software installation directory",
None, 'store_or_None', None),
'missing-modules': ("Print list of missing modules for dependencies of specified easyconfigs",
None, 'store_true', False, 'M'),
'only-blocks': ("Only build listed blocks", 'strlist', 'extend', None, 'b', {'metavar': 'BLOCKS'}),
Expand Down Expand Up @@ -434,6 +439,8 @@ def override_options(self):
None, 'store_true', False),
'verify-easyconfig-filenames': ("Verify whether filename of specified easyconfigs matches with contents",
None, 'store_true', False),
'wait-on-lock': ("Wait interval (in seconds) to use when waiting for existing lock to be removed "
"(0: implies no waiting, but exiting with an error)", int, 'store', 0),
'zip-logs': ("Zip logs that are copied to install directory, using specified command",
None, 'store_or_None', 'gzip'),

Expand Down
100 changes: 95 additions & 5 deletions test/framework/toy_build.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
import os
import re
import shutil
import signal
import stat
import sys
import tempfile
Expand Down Expand Up @@ -1415,7 +1416,7 @@ def test_module_only(self):
self.assertTrue(os.path.exists(os.path.join(self.test_installpath, 'software', 'toy', '0.0-deps', 'bin')))
modtxt = read_file(toy_mod)
self.assertTrue(re.search("set root %s" % prefix, modtxt))
self.assertEqual(len(os.listdir(os.path.join(self.test_installpath, 'software'))), 1)
self.assertEqual(len(os.listdir(os.path.join(self.test_installpath, 'software'))), 2)
self.assertEqual(len(os.listdir(os.path.join(self.test_installpath, 'software', 'toy'))), 1)

# install (only) additional module under a hierarchical MNS
Expand All @@ -1430,7 +1431,7 @@ def test_module_only(self):
# existing install is reused
modtxt2 = read_file(toy_core_mod)
self.assertTrue(re.search("set root %s" % prefix, modtxt2))
self.assertEqual(len(os.listdir(os.path.join(self.test_installpath, 'software'))), 2)
self.assertEqual(len(os.listdir(os.path.join(self.test_installpath, 'software'))), 3)
self.assertEqual(len(os.listdir(os.path.join(self.test_installpath, 'software', 'toy'))), 1)

# make sure load statements for dependencies are included
Expand All @@ -1441,7 +1442,7 @@ def test_module_only(self):
os.remove(toy_core_mod)

# test installing (only) additional module in Lua syntax (if Lmod is available)
lmod_abspath = which('lmod')
lmod_abspath = os.environ.get('LMOD_CMD') or which('lmod')
if lmod_abspath is not None:
args = common_args[:-1] + [
'--allow-modules-tool-mismatch',
Expand All @@ -1455,7 +1456,7 @@ def test_module_only(self):
# existing install is reused
modtxt3 = read_file(toy_mod + '.lua')
self.assertTrue(re.search('local root = "%s"' % prefix, modtxt3))
self.assertEqual(len(os.listdir(os.path.join(self.test_installpath, 'software'))), 2)
self.assertEqual(len(os.listdir(os.path.join(self.test_installpath, 'software'))), 3)
self.assertEqual(len(os.listdir(os.path.join(self.test_installpath, 'software', 'toy'))), 1)

# make sure load statements for dependencies are included
Expand Down Expand Up @@ -2057,7 +2058,7 @@ def test_toy_modaltsoftname(self):
self.assertTrue(os.path.exists(os.path.join(modules_path, 'yot', yot_name)))

# only subdirectories for software should be created
self.assertEqual(os.listdir(software_path), ['toy'])
self.assertEqual(sorted(os.listdir(software_path)), sorted(['toy', '.locks']))
self.assertEqual(sorted(os.listdir(os.path.join(software_path, 'toy'))), ['0.0-one', '0.0-two'])

# only subdirectories for modules with alternative names should be created
Expand Down Expand Up @@ -2516,6 +2517,95 @@ def test_toy_ghost_installdir(self):

self.assertFalse(os.path.exists(toy_installdir))

def test_toy_build_lock(self):
"""Test toy installation when a lock is already in place."""

locks_dir = os.path.join(self.test_installpath, 'software', '.locks')
toy_installdir = os.path.join(self.test_installpath, 'software', 'toy', '0.0')
toy_lock_fn = toy_installdir.replace(os.path.sep, '_') + '.lock'

toy_lock_path = os.path.join(locks_dir, toy_lock_fn)
mkdir(toy_lock_path, parents=True)

error_pattern = "Lock .*_software_toy_0.0.lock already exists, aborting!"
self.assertErrorRegex(EasyBuildError, error_pattern, self.test_toy_build, raise_error=True, verbose=False)

locks_dir = os.path.join(self.test_prefix, 'locks')

# no lock in place, so installation proceeds as normal
extra_args = ['--locks-dir=%s' % locks_dir]
self.test_toy_build(extra_args=extra_args, verify=True, raise_error=True)

# put lock in place in custom locks dir, try again
toy_lock_path = os.path.join(locks_dir, toy_lock_fn)
mkdir(toy_lock_path, parents=True)
self.assertErrorRegex(EasyBuildError, error_pattern, self.test_toy_build,
extra_args=extra_args, raise_error=True, verbose=False)

# also test use of --ignore-locks
self.test_toy_build(extra_args=extra_args + ['--ignore-locks'], verify=True, raise_error=True)

# define a context manager that remove a lock after a while, so we can check the use of --wait-for-lock
class remove_lock_after:
def __init__(self, seconds, lock_fp):
self.seconds = seconds
self.lock_fp = lock_fp

def remove_lock(self, *args):
remove_dir(self.lock_fp)

def __enter__(self):
signal.signal(signal.SIGALRM, self.remove_lock)
signal.alarm(self.seconds)

def __exit__(self, type, value, traceback):
pass

# wait for lock to be removed, with 1 second interval of checking
extra_args.append('--wait-on-lock=1')

wait_regex = re.compile("^== lock .*_software_toy_0.0.lock exists, waiting 1 seconds", re.M)
ok_regex = re.compile("^== COMPLETED: Installation ended successfully", re.M)

self.assertTrue(os.path.exists(toy_lock_path))

# use context manager to remove lock after 3 seconds
with remove_lock_after(3, toy_lock_path):
self.mock_stderr(True)
self.mock_stdout(True)
self.test_toy_build(extra_args=extra_args, verify=False, raise_error=True, testing=False)
stderr, stdout = self.get_stderr(), self.get_stdout()
self.mock_stderr(False)
self.mock_stdout(False)

self.assertEqual(stderr, '')

wait_matches = wait_regex.findall(stdout)
# we can't rely on an exact number of 'waiting' messages, so let's go with a range...
self.assertTrue(len(wait_matches) in range(2, 5))

self.assertTrue(ok_regex.search(stdout), "Pattern '%s' found in: %s" % (ok_regex.pattern, stdout))

# when there is no lock in place, --wait-on-lock has no impact
self.assertFalse(os.path.exists(toy_lock_path))
self.mock_stderr(True)
self.mock_stdout(True)
self.test_toy_build(extra_args=extra_args, verify=False, raise_error=True, testing=False)
stderr, stdout = self.get_stderr(), self.get_stdout()
self.mock_stderr(False)
self.mock_stdout(False)

self.assertEqual(stderr, '')
self.assertTrue(ok_regex.search(stdout), "Pattern '%s' found in: %s" % (ok_regex.pattern, stdout))
self.assertFalse(wait_regex.search(stdout), "Pattern '%s' not found in: %s" % (wait_regex.pattern, stdout))

# check for clean error on creation of lock
extra_args = ['--locks-dir=/']
error_pattern = r"Failed to create lock /.*_software_toy_0.0.lock:.* "
error_pattern += r"(Read-only file system|Permission denied)"
self.assertErrorRegex(EasyBuildError, error_pattern, self.test_toy_build,
extra_args=extra_args, raise_error=True, verbose=False)


def suite():
""" return all the tests in this file """
Expand Down

0 comments on commit 2473b71

Please sign in to comment.