From 2b3c7bddab605328d843b373c75bc074b8f74c1b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Guillermo=20Juli=C3=A1n?= Date: Fri, 12 Apr 2024 10:25:07 +0200 Subject: [PATCH] [EBPF] Minor fixes for KMT system-probe build (#24598) * Ensure binaries on /root are accesible * Use correct SSH options on instances * Fix SSH names when multiple domains have the same tag * Fix paths in system-probe.build * Add kmt.tmux task * Use all vms by default in kmt.build * Document kmt.tmux --- tasks/kernel_matrix_testing/README.md | 6 ++ tasks/kernel_matrix_testing/compiler.py | 1 + tasks/kmt.py | 77 ++++++++++++++++++++++--- 3 files changed, 76 insertions(+), 8 deletions(-) diff --git a/tasks/kernel_matrix_testing/README.md b/tasks/kernel_matrix_testing/README.md index 27c65a125271ef..9cb41aec1a7632 100644 --- a/tasks/kernel_matrix_testing/README.md +++ b/tasks/kernel_matrix_testing/README.md @@ -143,6 +143,12 @@ Then connect to the VM as follows ssh -i /home/kernel-version-testing/ddvm_rsa -o StrictHostKeyChecking=no root@ ``` +#### Connecting to all VMs with tmux + +You can connect to all VMs at once using the `kmt.tmux` task. It will automatically create a new session for your stack (deleting it if it already exists), will open a new window for each instance, and a new panel for each VM in the window. + +A useful command for tmux in these cases is `:set synchronize-panes on`, which will send the same command to all panes at once. This is useful for running the same command in all VMs at once, specially running system-probe all at once. + ### Destroy stack Tear down the stack diff --git a/tasks/kernel_matrix_testing/compiler.py b/tasks/kernel_matrix_testing/compiler.py index d9a348803e3ca0..91ff9d1bfdb687 100644 --- a/tasks/kernel_matrix_testing/compiler.py +++ b/tasks/kernel_matrix_testing/compiler.py @@ -119,6 +119,7 @@ def start(self) -> None: f"chown {uid}:{gid} {CONTAINER_AGENT_PATH} && chown -R {uid}:{gid} {CONTAINER_AGENT_PATH}", user="root" ) + self.exec("chmod a+rx /root", user="root") # Some binaries will be in /root and need to be readable self.exec("apt install sudo", user="root") self.exec("usermod -aG sudo compiler && echo 'compiler ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers", user="root") self.exec("echo conda activate ddpy3 >> /home/compiler/.bashrc", user="compiler") diff --git a/tasks/kmt.py b/tasks/kmt.py index 642988faff1a72..d7614ccc73a36c 100644 --- a/tasks/kmt.py +++ b/tasks/kmt.py @@ -430,6 +430,10 @@ def tests_archive(self): def tools(self): return self.root / self.arch / "tools" + @property + def shared_archive(self): + return self.arch_dir / "shared.tar" + def build_tests_package(ctx: Context, source_dir: str, stack: str, arch: Arch, ci: bool, verbose=True): paths = KMTPaths(stack, arch) @@ -690,7 +694,7 @@ def test( @task( help={ - "vms": "Comma seperated list of vms to target when running tests", + "vms": "Comma seperated list of vms to target when running tests. If None, use all VMs", "stack": "Stack in which the VMs exist. If not provided stack is autogenerated based on branch name", "ssh-key": "SSH key to use for connecting to a remote EC2 instance hosting the target VM. Can be either a name of a file in ~/.ssh, a key name (the comment in the public key) or a full path", "full-rebuild": "Do a full rebuild of all test dependencies to share with VMs, before running tests. Useful when changes are not being picked up correctly", @@ -700,7 +704,7 @@ def test( ) def build( ctx: Context, - vms: str, + vms: Optional[str] = None, stack: Optional[str] = None, ssh_key: Optional[str] = None, full_rebuild=False, @@ -715,6 +719,10 @@ def build( if arch is None: arch = "local" + if vms is None: + vms = ",".join(stacks.get_all_vms_in_stack(stack)) + info(f"[+] Running tests on all VMs in stack {stack}: vms={vms}") + arch = full_arch(arch) paths = KMTPaths(stack, arch) paths.arch_dir.mkdir(parents=True, exist_ok=True) @@ -742,21 +750,21 @@ def build( d.run_cmd(ctx, f"/root/fetch_dependencies.sh {arch_mapping[platform.machine()]}") info(f"[+] Dependencies shared with target VM {d}") - shared_archive = os.path.join(CONTAINER_AGENT_PATH, os.path.relpath(paths.arch_dir / "shared.tar", paths.repo_root)) + shared_archive_rel = os.path.join(CONTAINER_AGENT_PATH, os.path.relpath(paths.shared_archive, paths.repo_root)) cc.exec( f"cd {CONTAINER_AGENT_PATH} && git config --global --add safe.directory {CONTAINER_AGENT_PATH} && inv -e system-probe.build --no-bundle", ) - cc.exec(f"tar cf {shared_archive} {EMBEDDED_SHARE_DIR}") + cc.exec(f"tar cf {shared_archive_rel} {EMBEDDED_SHARE_DIR}") if not os.path.exists(system_probe_yaml): raise Exit(f"file {system_probe_yaml} not found") for d in domains: d.copy(ctx, "./bin/system-probe", "/root") - d.copy(ctx, shared_archive, "/") + d.copy(ctx, paths.shared_archive, "/") d.run_cmd(ctx, "tar xf /shared.tar -C /", verbose=verbose) - d.run_cmd(ctx, "mkdir /opt/datadog-agent/run") - d.run_cmd(ctx, "mkdir /etc/datadog-agent") + d.run_cmd(ctx, "mkdir -p /opt/datadog-agent/run") + d.run_cmd(ctx, "mkdir -p /etc/datadog-agent") d.copy(ctx, DEFAULT_CONFIG_PATH, "/etc/datadog-agent/system-probe.yaml") info(f"[+] system-probe built for {d.name} @ /root") @@ -831,10 +839,21 @@ def ssh_config( if instance.ssh_key_path is not None: print(f" IdentityFile {instance.ssh_key_path}") print(" IdentitiesOnly yes") + for key, value in SSH_OPTIONS.items(): + print(f" {key} {value}") print("") + multiple_instances_with_same_tag = len({i.tag for i in instance.microvms}) != len(instance.microvms) + for domain in instance.microvms: - print(f"Host kmt-{stack_name}-{instance.arch}-{domain.tag}") + domain_name = domain.tag + if multiple_instances_with_same_tag: + id_parts = domain.name.split('-') + mem = id_parts[-1] + cpu = id_parts[-2] + domain_name += f"-mem{mem}-cpu{cpu}" + + print(f"Host kmt-{stack_name}-{instance.arch}-{domain_name}") print(f" HostName {domain.ip}") if instance.arch != "local": print(f" ProxyJump kmt-{stack_name}-{instance.arch}") @@ -1123,3 +1142,45 @@ def groupby_arch_comp(job: KMTTestRunJob) -> Tuple[str, str]: headers=["Distro", "Login prompt found", "setup-ddvm ok", "Assigned IP", "Downloaded boot log"], ) ) + + +@task() +def tmux(ctx: Context, stack: Optional[str] = None): + """Create a tmux session with panes for each VM in the stack. + + Note that this task requires the tmux command to be available on the system, and the SSH + config to have been generated with the kmt.ssh-config task. + """ + stack = check_and_get_stack(stack) + stack_name = stack.replace('-ddvm', '') + + ctx.run(f"tmux kill-session -t kmt-{stack_name} || true") + ctx.run(f"tmux new-session -d -s kmt-{stack_name}") + + for i, (_, instance) in enumerate(build_infrastructure(stack, try_get_ssh_key(ctx, None)).items()): + window_name = instance.arch + if i == 0: + ctx.run(f"tmux rename-window -t kmt-{stack_name} {window_name}") + else: + ctx.run(f"tmux new-window -t kmt-{stack_name} -n {window_name}") + + multiple_instances_with_same_tag = len({i.tag for i in instance.microvms}) != len(instance.microvms) + + needs_split = False + for domain in instance.microvms: + domain_name = domain.tag + if multiple_instances_with_same_tag: + id_parts = domain.name.split('-') + mem = id_parts[-1] + cpu = id_parts[-2] + domain_name += f"-mem{mem}-cpu{cpu}" + ssh_name = f"kmt-{stack_name}-{instance.arch}-{domain_name}" + + if needs_split: + ctx.run(f"tmux split-window -h -t kmt-{stack_name}:{i}") + needs_split = True + + ctx.run(f"tmux send-keys -t kmt-{stack_name}:{i} 'ssh {ssh_name}' Enter") + ctx.run(f"tmux select-layout -t kmt-{stack_name}:{i} tiled") + + info(f"[+] Tmux session kmt-{stack_name} created. Attach with 'tmux attach -t kmt-{stack_name}'")