Skip to content

Commit

Permalink
spire: pause for supervisor ssh before key scan
Browse files Browse the repository at this point in the history
We pull down the host key for the supervisor via a process involving
ssh-keyscan and a set of fingerprints scraped from the MOTD of the
supervisor. For some reason, on Buster, ssh isn't always running at the
point the MOTD is printed -- so we don't always find the SSH hostkeys
we need.

Add a loop to wait for the SSH server to be up before continuing with
the pull process.
  • Loading branch information
celskeggs committed Jan 16, 2020
1 parent b3ab4e3 commit 8e8bcf4
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 7 deletions.
9 changes: 5 additions & 4 deletions platform/spire/src/ssh.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,23 +6,24 @@
import util

SSH_BASE = ["ssh", "-o", "StrictHostKeyChecking=yes", "-o", "ConnectTimeout=1"]
SSH_BASE_INSECURE = ["ssh", "-o", "StrictHostKeyChecking=no", "-o", "CheckHostIP=no", "-o", "UserKnownHostsFile=/dev/null", "-o", "ConnectTimeout=1"]
SCP_BASE = ["scp", "-o", "StrictHostKeyChecking=yes", "-o", "ConnectTimeout=1"]


def ssh_get_login(node: configuration.Node) -> str: # returns root@<HOSTNAME>.<EXTERNAL_DOMAIN>
return "root@%s" % node.external_dns_name()


def build_ssh(node: configuration.Node, *script: str) -> list:
return SSH_BASE + [ssh_get_login(node), "--"] + list(script)
def build_ssh(node: configuration.Node, *script: str, insecure=False) -> list:
return (SSH_BASE_INSECURE if insecure else SSH_BASE) + [ssh_get_login(node), "--"] + list(script)


def build_scp_up(node: configuration.Node, source_path: str, dest_path: str) -> list:
return SCP_BASE + ["--", source_path, ssh_get_login(node) + ":" + dest_path]


def check_ssh(node: configuration.Node, *script: str) -> None:
subprocess.check_call(build_ssh(node, *script))
def check_ssh(node: configuration.Node, *script: str, insecure=False) -> None:
subprocess.check_call(build_ssh(node, *script, insecure=insecure))


def check_ssh_output(node: configuration.Node, *script: str) -> bytes:
Expand Down
4 changes: 2 additions & 2 deletions platform/spire/src/verify.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,10 +128,10 @@ def expect_prometheus_query_bool(query, message, accept_missing=False):


@command.wrap
def check_supervisor_accessible():
def check_supervisor_accessible(insecure: bool=False):
"check whether the supervisor node is accessible over ssh"
config = configuration.get_config()
ssh.check_ssh(config.keyserver, "true")
ssh.check_ssh(config.keyserver, "true", insecure=insecure)


@command.wrap
Expand Down
18 changes: 17 additions & 1 deletion platform/spire/src/virt.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import iso
import seq
import util
import verify


def get_bridge(ip):
Expand Down Expand Up @@ -348,11 +349,26 @@ def boot_with_io(self, phase, output_callback=None, text: bytes=None, delay=None
log_output.close()
raise e

def wait_and_pull_supervisor_key(self, fingerprints):
e = None
for i in range(20):
time.sleep(1)
try:
# needs to be insecure because we aren't ready to validate the new hostkey yet
verify.check_supervisor_accessible(insecure=True)
break
except Exception as e:
print("[delayed supervisor key pull due to error]")
else:
print("[supervisor key pull timeout reached; key pull will likely fail]")
print("[last error: %s]" % e)
access.pull_supervisor_key(fingerprints)

def boot_launch(self, autoadd_fingerprint=False):
if not autoadd_fingerprint:
return self.boot_with_io("launch")
else:
extractor = FingerprintExtractor(access.pull_supervisor_key)
extractor = FingerprintExtractor(self.wait_and_pull_supervisor_key)
self.boot_with_io("launch", extractor.process_line)
extractor.wait()

Expand Down

0 comments on commit 8e8bcf4

Please sign in to comment.