Skip to content

Commit

Permalink
[config] Implement a process level lock (#857)
Browse files Browse the repository at this point in the history
Changes:
1.) Implement a class, which uses hsetnx for lock.
2.) lock is expired within timeout period or will be released by owner.
3.) After -y prompt, lock is reacquired, because timer could have expired,
    before user enters yes.
Signed-off-by: Praveen Chaudhary [email protected]
  • Loading branch information
Praveen Chaudhary authored Apr 14, 2020
1 parent cff92dd commit 3f651dc
Showing 1 changed file with 107 additions and 0 deletions.
107 changes: 107 additions & 0 deletions config/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -529,7 +529,98 @@ def is_ipaddress(val):
return False
return True

# class for locking entire config process
class ConfigDbLock():
def __init__(self):
self.lockName = "LOCK|configDbLock"
self.timeout = 10
self.pid = os.getpid()
self.client = None

self._acquireLock()
return

def _acquireLock(self):
try:
# connect to db
db_kwargs = dict()
configdb = ConfigDBConnector(**db_kwargs)
configdb.connect()

self.client = configdb.get_redis_client('CONFIG_DB')
# Set lock and expire time. Process may get killed b/w set lock and
# expire call.
if self.client.hsetnx(self.lockName, "PID", self.pid):
self.client.expire(self.lockName, self.timeout)
# if lock exists but expire timer not running, run expire time and
# abort.
elif not self.client.ttl(self.lockName):
click.echo(":::Unable to acquire lock. Resetting timer and aborting:::");
self.client.expire(self.lockName, self.timeout)
sys.exit(1)
else:
click.echo(":::Unable to acquire lock. Aborting:::");
sys.exit(1)
except Exception as e:
click.echo(":::Exception: {}:::".format(e))
sys.exit(1)
return

def reacquireLock(self):
try:
# Try to set lock first
if self.client.hsetnx(self.lockName, "PID", self.pid):
self.client.expire(self.lockName, self.timeout)
# if lock exists, check who owns it
else:
p = self.client.pipeline(True)
# watch, we do not want to work on modified lock
p.watch(self.lockName)
# if current process holding then extend the timer
if p.hget(self.lockName, "PID") == str(self.pid):
self.client.expire(self.lockName, self.timeout)
p.unwatch()
return
else:
# some other process is holding the lock.
click.echo(":::Unable to reacquire lock (lock PID: {}, self.pid: {}):::".\
format(p.hget(self.lockName, "PID"), self.pid))
p.unwatch()
sys.exit(1)
except Exception as e:
click.echo(":::Exception: {}:::".format(e))
sys.exit(1)
return

def _releaseLock(self):
try:
p = self.client.pipeline(True)
# watch, we do not want to work on modified lock
p.watch(self.lockName)
# if current process holding the lock then release it.
if p.hget(self.lockName, "PID") == str(self.pid):
p.multi()
p.delete(self.lockName)
p.execute()
return
# lock may be None, if timer has expired before releasing lock.
elif not self.lockName:
return
else:
# some other process is holding the lock.
click.echo(":::Unable to release lock (lock PID: {}, self.pid: {}):::".\
format(p.hget(self.lockName, "PID"), self.pid))
p.unwatch()
except Exception as e:
click.echo(":::Exception: {}:::".format(e))
return

def __del__(self):
self._releaseLock()
return
# end of class configdblock

configdb_lock = ConfigDbLock()
# This is our main entrypoint - the main 'config' command
@click.group(context_settings=CONTEXT_SETTINGS)
def config():
Expand All @@ -547,6 +638,8 @@ def config():
@click.argument('filename', default='/etc/sonic/config_db.json', type=click.Path())
def save(filename):
"""Export current config DB to a file on disk."""
# reacquire lock after prompt
configdb_lock.reacquireLock()
command = "{} -d --print-data > {}".format(SONIC_CFGGEN_PATH, filename)
run_command(command, display_cmd=True)

Expand All @@ -557,6 +650,9 @@ def load(filename, yes):
"""Import a previous saved config DB dump file."""
if not yes:
click.confirm('Load config from the file %s?' % filename, abort=True)
# reacquire lock after prompt
configdb_lock.reacquireLock()

command = "{} -j {} --write-to-db".format(SONIC_CFGGEN_PATH, filename)
run_command(command, display_cmd=True)

Expand All @@ -568,6 +664,8 @@ def reload(filename, yes, load_sysinfo):
"""Clear current configuration and import a previous saved config DB dump file."""
if not yes:
click.confirm('Clear current config and reload config from the file %s?' % filename, abort=True)
# reacquire lock after prompt
configdb_lock.reacquireLock()

log_info("'reload' executing...")

Expand Down Expand Up @@ -617,6 +715,8 @@ def reload(filename, yes, load_sysinfo):
@click.argument('filename', default='/etc/sonic/device_desc.xml', type=click.Path(exists=True))
def load_mgmt_config(filename):
"""Reconfigure hostname and mgmt interface based on device description file."""
# reacquire lock after prompt
configdb_lock.reacquireLock()
command = "{} -M {} --write-to-db".format(SONIC_CFGGEN_PATH, filename)
run_command(command, display_cmd=True)
#FIXME: After config DB daemon for hostname and mgmt interface is implemented, we'll no longer need to do manual configuration here
Expand All @@ -639,7 +739,10 @@ def load_mgmt_config(filename):
@click.option('-y', '--yes', is_flag=True, callback=_abort_if_false,
expose_value=False, prompt='Reload config from minigraph?')
def load_minigraph():

"""Reconfigure based on minigraph."""
# reacquire lock after prompt
configdb_lock.reacquireLock()
log_info("'load_minigraph' executing...")

# get the device type
Expand Down Expand Up @@ -2304,6 +2407,8 @@ def ztp():
@click.argument('run', required=False, type=click.Choice(["run"]))
def run(run):
"""Restart ZTP of the device."""
# reacquire lock after prompt
configdb_lock.reacquireLock()
command = "ztp run -y"
run_command(command, display_cmd=True)

Expand All @@ -2313,6 +2418,8 @@ def run(run):
@click.argument('disable', required=False, type=click.Choice(["disable"]))
def disable(disable):
"""Administratively Disable ZTP."""
# reacquire lock after prompt
configdb_lock.reacquireLock()

This comment has been minimized.

Copy link
@lguohan

lguohan Apr 17, 2020

Contributor

why only these commands needs the lock? there are many config commands that touch the config db. why only these commands needs the lock?

This comment has been minimized.

Copy link
@praveen-li

praveen-li May 13, 2020

Member

@lguohan @zhenggen-xu
Every config command takes the lock. But for commands, where we provide a user prompt, we try to reacquire the lock, because lock could have been released due to time out.
Lets spend some time in next review meeting to see if it is good to lock at-least config command as of now.

command = "ztp disable -y"
run_command(command, display_cmd=True)

Expand Down

0 comments on commit 3f651dc

Please sign in to comment.