From 1fddefdc1cf6a994372f414c24ed330f4d3ee23e Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Sat, 26 Oct 2024 01:31:11 +0200 Subject: [PATCH] ipsec: Add a helper function to run commands from the monitor. Until now, functions that needed to call external programs like openssl or ipsec commands were using subprocess commands directly. Most of these calls had no failure checks or any logging making it hard to understand what is happening inside the daemon when something doesn't work as intended. Some commands also had a chance to not read the command output in full. That might sound like not a big problem, but in practice it causes ovs-monitor-ipsec to deadlock pluto and itself with certain versions of Libreswan (mainly Libreswan 5+). The order of events is following: 1. ovs-monitor-ipsec calls ipsec status redirecting the output to a pipe. 2. ipsec status calls ipsec whack. 3. ipsec whack connects to pluto and asks for status. 4. ovs-monitor-ipsec doesn't read the pipe in full. 5. ipsec whack blocks on write to the other side of the pipe when it runs out of buffer space. 6. pluto blocks on sendmsg to ipsec whack for the same reason. 7. ovs-monitor-ipsec calls another ipsec command and blocks on connection to pluto. In this scenario the running process is at the mercy of garbage collector and it doesn't run because we're blocked on calling another ipsec command. All the processes are completely blocked and will not do any work until ipsec whack is killed. With this change we're introducing a new function that will be used for all the external process execution commands and will read the full output before returning, avoiding the deadlock. It will also log all the failures as warnings and the commands themselves at the debug level. We'll be adding more logic into this function in later commits as well, so it will not stay that simple. Signed-off-by: Ilya Maximets --- ipsec/ovs-monitor-ipsec.in | 290 +++++++++++++++++-------------------- 1 file changed, 131 insertions(+), 159 deletions(-) diff --git a/ipsec/ovs-monitor-ipsec.in b/ipsec/ovs-monitor-ipsec.in index 37c509ac682..4885e048fe1 100755 --- a/ipsec/ovs-monitor-ipsec.in +++ b/ipsec/ovs-monitor-ipsec.in @@ -84,6 +84,28 @@ monitor = None xfrm = None +def run_command(args, description=None): + """ This function runs the process args[0] with args[1:] arguments + and returns a tuple: return-code, stdout, stderr. """ + + if not description: + description = args[-1] + + vlog.dbg("Running %s" % args) + proc = subprocess.Popen(args, stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + pout, perr = proc.communicate() + + if proc.returncode or perr: + vlog.warn("Failed to %s; exit code: %d" + % (description, proc.returncode)) + vlog.warn("cmdline: %s" % proc.args) + vlog.warn("stderr: %s" % perr) + vlog.warn("stdout: %s" % pout) + + return proc.returncode, pout.decode(), perr.decode() + + class XFRM(object): """This class is a simple wrapper around ip-xfrm (8) command line utility. We are using this class only for informational purposes @@ -99,13 +121,14 @@ class XFRM(object): where is destination IPv4 address and is SELECTOR of the IPsec policy.""" policies = {} - proc = subprocess.Popen([self.IP, 'xfrm', 'policy'], - stdout=subprocess.PIPE) - while True: - line = proc.stdout.readline().strip().decode() - if line == '': - break - a = line.split(" ") + + ret, pout, perr = run_command([self.IP, 'xfrm', 'policy'], + "get XFRM policies") + if ret: + return policies + + for line in pout.splitlines(): + a = line.strip().split(" ") if len(a) >= 4 and a[0] == "src" and a[2] == "dst": dst = (a[3].split("/"))[0] if dst not in policies: @@ -122,13 +145,14 @@ class XFRM(object): in a dictionary where is destination IPv4 address and is SELECTOR.""" securities = {} - proc = subprocess.Popen([self.IP, 'xfrm', 'state'], - stdout=subprocess.PIPE) - while True: - line = proc.stdout.readline().strip().decode() - if line == '': - break - a = line.split(" ") + + ret, pout, perr = run_command([self.IP, 'xfrm', 'state'], + "get XFRM state") + if ret: + return securities + + for line in pout.splitlines(): + a = line.strip().split(" ") if len(a) >= 4 and a[0] == "sel" \ and a[1] == "src" and a[3] == "dst": remote_ip = a[4].rstrip().split("/")[0] @@ -242,7 +266,7 @@ conn prevent_unencrypted_vxlan f.close() vlog.info("Restarting StrongSwan") - subprocess.call([self.IPSEC, "restart"]) + run_command([self.IPSEC, "restart"], "restart StrongSwan") def get_active_conns(self): """This function parses output from 'ipsec status' command. @@ -252,13 +276,13 @@ conn prevent_unencrypted_vxlan sample line from the parsed outpus as . """ conns = {} - proc = subprocess.Popen([self.IPSEC, 'status'], stdout=subprocess.PIPE) + ret, pout, perr = run_command([self.IPSEC, 'status'], + "get active connections") + if ret: + return conns - while True: - line = proc.stdout.readline().strip().decode() - if line == '': - break - tunnel_name = line.split(":") + for line in pout.splitlines(): + tunnel_name = line.strip().split(":") if len(tunnel_name) < 2: continue m = re.match(r"(.*)(-in-\d+|-out-\d+|-\d+).*", tunnel_name[0]) @@ -341,15 +365,11 @@ conn prevent_unencrypted_vxlan Once strongSwan vici bindings will be distributed with major Linux distributions this function could be simplified.""" vlog.info("Refreshing StrongSwan configuration") - proc = subprocess.Popen([self.IPSEC, "update"], - stdout=subprocess.PIPE, - stderr=subprocess.PIPE) - outs, errs = proc.communicate() - if proc.returncode != 0: - vlog.err("StrongSwan failed to update configuration:\n" - "%s \n %s" % (str(outs), str(errs))) - - subprocess.call([self.IPSEC, "rereadsecrets"]) + + run_command([self.IPSEC, "update"], + "update StrongSwan's configuration") + run_command([self.IPSEC, "rereadsecrets"], "re-read secrets") + # "ipsec update" command does not remove those tunnels that were # updated or that disappeared from the ipsec.conf file. So, we have # to manually remove them by calling "ipsec stroke down-nb " @@ -382,7 +402,8 @@ conn prevent_unencrypted_vxlan if not tunnel or tunnel.version != ver: vlog.info("%s is outdated %u" % (conn, ver)) - subprocess.call([self.IPSEC, "stroke", "down-nb", conn]) + run_command([self.IPSEC, "stroke", "down-nb", conn], + "stroke the outdated %s" % conn) class LibreSwanHelper(object): @@ -460,13 +481,11 @@ conn prevent_unencrypted_vxlan # Collect version infromation self.IPSEC = libreswan_root_prefix + "/usr/sbin/ipsec" self.IPSEC_AUTO = [self.IPSEC] - proc = subprocess.Popen([self.IPSEC, "--version"], - stdout=subprocess.PIPE, - encoding="latin1") - pout, perr = proc.communicate() - v = re.match("^Libreswan v?(.*)$", pout) + ret, pout, perr = run_command([self.IPSEC, "--version"], + "get Libreswan's version") try: + v = re.match("^Libreswan v?(.*)$", pout.strip()) version = int(v.group(1).split(".")[0]) except: version = 0 @@ -513,7 +532,7 @@ conn prevent_unencrypted_vxlan f.close() vlog.info("Restarting LibreSwan") - subprocess.call([self.IPSEC, "restart"]) + run_command([self.IPSEC, "restart"], "restart Libreswan") def config_init(self): self.conf_file = open(self.IPSEC_CONF, "w") @@ -599,8 +618,10 @@ conn prevent_unencrypted_vxlan def refresh(self, monitor): vlog.info("Refreshing LibreSwan configuration") - subprocess.call(self.IPSEC_AUTO + ["--ctlsocket", self.IPSEC_CTL, - "--config", self.IPSEC_CONF, "--rereadsecrets"]) + run_command(self.IPSEC_AUTO + ["--ctlsocket", self.IPSEC_CTL, + "--config", self.IPSEC_CONF, + "--rereadsecrets"], + "re-read secrets") tunnels = set(monitor.tunnels.keys()) # Delete old connections @@ -627,9 +648,10 @@ conn prevent_unencrypted_vxlan if not tunnel or tunnel.version != ver: vlog.info("%s is outdated %u" % (conn, ver)) - subprocess.call(self.IPSEC_AUTO + ["--ctlsocket", - self.IPSEC_CTL, "--config", - self.IPSEC_CONF, "--delete", conn]) + run_command(self.IPSEC_AUTO + + ["--ctlsocket", self.IPSEC_CTL, + "--config", self.IPSEC_CONF, + "--delete", conn], "delete %s" % conn) elif ifname in tunnels: tunnels.remove(ifname) @@ -649,43 +671,43 @@ conn prevent_unencrypted_vxlan # Update shunt policy if changed if monitor.conf_in_use["skb_mark"] != monitor.conf["skb_mark"]: if monitor.conf["skb_mark"]: - subprocess.call(self.IPSEC_AUTO + + run_command(self.IPSEC_AUTO + ["--config", self.IPSEC_CONF, "--ctlsocket", self.IPSEC_CTL, "--add", "--asynchronous", "prevent_unencrypted_gre"]) - subprocess.call(self.IPSEC_AUTO + + run_command(self.IPSEC_AUTO + ["--config", self.IPSEC_CONF, "--ctlsocket", self.IPSEC_CTL, "--add", "--asynchronous", "prevent_unencrypted_geneve"]) - subprocess.call(self.IPSEC_AUTO + + run_command(self.IPSEC_AUTO + ["--config", self.IPSEC_CONF, "--ctlsocket", self.IPSEC_CTL, "--add", "--asynchronous", "prevent_unencrypted_stt"]) - subprocess.call(self.IPSEC_AUTO + + run_command(self.IPSEC_AUTO + ["--config", self.IPSEC_CONF, "--ctlsocket", self.IPSEC_CTL, "--add", "--asynchronous", "prevent_unencrypted_vxlan"]) else: - subprocess.call(self.IPSEC_AUTO + + run_command(self.IPSEC_AUTO + ["--config", self.IPSEC_CONF, "--ctlsocket", self.IPSEC_CTL, "--delete", "--asynchronous", "prevent_unencrypted_gre"]) - subprocess.call(self.IPSEC_AUTO + + run_command(self.IPSEC_AUTO + ["--config", self.IPSEC_CONF, "--ctlsocket", self.IPSEC_CTL, "--delete", "--asynchronous", "prevent_unencrypted_geneve"]) - subprocess.call(self.IPSEC_AUTO + + run_command(self.IPSEC_AUTO + ["--config", self.IPSEC_CONF, "--ctlsocket", self.IPSEC_CTL, "--delete", "--asynchronous", "prevent_unencrypted_stt"]) - subprocess.call(self.IPSEC_AUTO + + run_command(self.IPSEC_AUTO + ["--config", self.IPSEC_CONF, "--ctlsocket", self.IPSEC_CTL, "--delete", @@ -700,14 +722,13 @@ conn prevent_unencrypted_vxlan sample line from the parsed outpus as . """ conns = {} - proc = subprocess.Popen([self.IPSEC, 'status', '--ctlsocket', - self.IPSEC_CTL], stdout=subprocess.PIPE) - - while True: - line = proc.stdout.readline().strip().decode() - if line == '': - break + ret, pout, perr = run_command([self.IPSEC, 'status', + '--ctlsocket', self.IPSEC_CTL], + "get active connections") + if ret: + return conns + for line in pout.splitlines(): m = re.search(r"#\d+: \"(.*)\".*", line) if not m: continue @@ -732,15 +753,12 @@ conn prevent_unencrypted_vxlan # the "ipsec auto --start" command is lost. Just retry to make sure # the command is received by LibreSwan. while True: - proc = subprocess.Popen(self.IPSEC_AUTO + - ["--config", self.IPSEC_CONF, - "--ctlsocket", self.IPSEC_CTL, - "--start", - "--asynchronous", conn], - stdout=subprocess.PIPE, - stderr=subprocess.PIPE) - perr = str(proc.stderr.read()) - pout = str(proc.stdout.read()) + ret, pout, perr = run_command(self.IPSEC_AUTO + + ["--config", self.IPSEC_CONF, + "--ctlsocket", self.IPSEC_CTL, + "--start", + "--asynchronous", conn], + "start %s" % conn) if not re.match(r".*Connection refused.*", perr) and \ not re.match(r".*need --listen.*", pout): break @@ -748,101 +766,59 @@ conn prevent_unencrypted_vxlan if re.match(r".*[F|f]ailed to initiate connection.*", pout): vlog.err('Failed to initiate connection through' ' Interface %s.\n' % (conn.split('-')[0])) - vlog.err(pout) + vlog.err("stdout: %s" % pout) def _nss_clear_database(self): """Remove all OVS IPsec related state from the NSS database""" - try: - proc = subprocess.Popen(['certutil', '-L', '-d', - self.IPSEC_D], - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - universal_newlines=True) - lines = proc.stdout.readlines() - - for line in lines: - s = line.strip().split() - if len(s) < 1: - continue - name = s[0] - if name.startswith(self.CERT_PREFIX): - self._nss_delete_cert(name) - elif name.startswith(self.CERTKEY_PREFIX): - self._nss_delete_cert_and_key(name) + ret, pout, perr = run_command(['certutil', '-L', '-d', self.IPSEC_D], + "clear NSS database") + if ret: + return - except Exception as e: - vlog.err("Failed to clear NSS database.\n" + str(e)) + for line in pout.splitlines(): + s = line.strip().split() + if len(s) < 1: + continue + name = s[0] + if name.startswith(self.CERT_PREFIX): + self._nss_delete_cert(name) + elif name.startswith(self.CERTKEY_PREFIX): + self._nss_delete_cert_and_key(name) def _nss_import_cert(self, cert, name, cert_type): """Cert_type is 'CT,,' for the CA certificate and 'P,P,P' for the normal certificate.""" - try: - proc = subprocess.Popen(['certutil', '-A', '-a', '-i', cert, - '-d', self.IPSEC_D, '-n', - name, '-t', cert_type], - stdout=subprocess.PIPE, - stderr=subprocess.PIPE) - proc.wait() - if proc.returncode: - raise Exception(proc.stderr.read()) - except Exception as e: - vlog.err("Failed to import certificate into NSS.\n" + str(e)) + run_command(['certutil', '-A', '-a', '-i', cert, '-d', self.IPSEC_D, + '-n', name, '-t', cert_type], + "import certificate %s into NSS" % name) def _nss_delete_cert(self, name): - try: - proc = subprocess.Popen(['certutil', '-D', '-d', - self.IPSEC_D, '-n', name], - stdout=subprocess.PIPE, - stderr=subprocess.PIPE) - proc.wait() - if proc.returncode: - raise Exception(proc.stderr.read()) - except Exception as e: - vlog.err("Failed to delete certificate from NSS.\n" + str(e)) + run_command(['certutil', '-D', '-d', self.IPSEC_D, '-n', name], + "delete certificate %s from NSS" % name) def _nss_import_cert_and_key(self, cert, key, name): - try: - # Avoid deleting other files - path = os.path.abspath('/tmp/%s.p12' % name) - if not path.startswith('/tmp/'): - raise Exception("Illegal certificate name!") - - # Create p12 file from pem files - proc = subprocess.Popen(['openssl', 'pkcs12', '-export', - '-in', cert, '-inkey', key, '-out', - path, '-name', name, '-passout', 'pass:'], - stdout=subprocess.PIPE, - stderr=subprocess.PIPE) - proc.wait() - if proc.returncode: - raise Exception(proc.stderr.read()) - - # Load p12 file to the database - proc = subprocess.Popen(['pk12util', '-i', path, '-d', - self.IPSEC_D, '-W', ''], - stdout=subprocess.PIPE, - stderr=subprocess.PIPE) - proc.wait() - if proc.returncode: - raise Exception(proc.stderr.read()) - - except Exception as e: - vlog.err("Import cert and key failed.\n" + str(e)) + # Avoid deleting other files + path = os.path.abspath('/tmp/%s.p12' % name) + if not path.startswith('/tmp/'): + vlog.err("Illegal certificate name '%s'!" % name) + return + + if run_command(['openssl', 'pkcs12', '-export', + '-in', cert, '-inkey', key, + '-out', path, '-name', name, + '-passout', 'pass:'], + "create p12 file from pem files")[0]: + return + + # Load p12 file to the database + run_command(['pk12util', '-i', path, '-d', self.IPSEC_D, '-W', ''], + "load p12 file to the NSS database") os.remove(path) def _nss_delete_cert_and_key(self, name): - try: - # Delete certificate and private key - proc = subprocess.Popen(['certutil', '-F', '-d', - self.IPSEC_D, '-n', name], - stdout=subprocess.PIPE, - stderr=subprocess.PIPE) - proc.wait() - if proc.returncode: - raise Exception(proc.stderr.read()) - - except Exception as e: - vlog.err("Delete cert and key failed.\n" + str(e)) + # Delete certificate and private key + run_command(['certutil', '-F', '-d', self.IPSEC_D, '-n', name], + "delete certificate and private key for %s" % name) class IPsecTunnel(object): @@ -1220,19 +1196,15 @@ class IPsecMonitor(object): self.ike_helper.refresh(self) def _get_cn_from_cert(self, cert): - try: - proc = subprocess.Popen(['openssl', 'x509', '-noout', '-subject', - '-nameopt', 'RFC2253', '-in', cert], - stdout=subprocess.PIPE, - stderr=subprocess.PIPE) - proc.wait() - if proc.returncode: - raise Exception(proc.stderr.read()) - m = re.search(r"CN=(.+?),", proc.stdout.readline().decode()) - if not m: - raise Exception("No CN in the certificate subject.") - except Exception as e: - vlog.warn(str(e)) + ret, pout, perr = run_command(['openssl', 'x509', '-noout', '-subject', + '-nameopt', 'RFC2253', '-in', cert], + "get certificate %s options" % cert) + if ret: + return None + + m = re.search(r"CN=(.+?),", pout.strip()) + if not m: + vlog.warn("No CN in the certificate subject (%s)." % cert) return None return m.group(1)