diff --git a/Dockerfile b/Dockerfile index 6356f0ef..a5f2ec84 100644 --- a/Dockerfile +++ b/Dockerfile @@ -22,14 +22,15 @@ COPY patches patches # QEMU_PATCHES defines additional patches to apply before compilation ARG QEMU_PATCHES=cpu-max # QEMU_PATCHES_ALL defines all patches to apply before compilation -ARG QEMU_PATCHES_ALL=${QEMU_PATCHES},alpine-patches,zero-init-msghdr,sched +ARG QEMU_PATCHES_ALL=${QEMU_PATCHES},alpine-patches ARG QEMU_PRESERVE_ARGV0 RUN < +Date: Mon, 1 Jun 2020 23:08:25 +0000 +Subject: [PATCH] linux-user: have execve call qemu via /proc/self/exe to not + rely on binfmt_misc + +It is assumed that when a guest program calls execve syscall it wants to +execute a program on the same guest architecture and not the host architecture. + +Previously, such a guest program would have execve syscall error out with: +"exec format error". + +A common solution is to register the qemu binary in binfmt_misc but that is not a +userland-friendly solution, requiring to modify kernel state. + +This patch injects /proc/self/exe as the first parameter and the qemu program name +as argv[0] to execve. + +Signed-off-by: Tibor Vass +--- + linux-user/syscall.c | 35 ++++++++++++++++++++++++++++++----- + 1 file changed, 30 insertions(+), 5 deletions(-) + +diff --git a/linux-user/syscall.c b/linux-user/syscall.c +index f65045efe6..73054926a0 100644 +--- a/linux-user/syscall.c ++++ b/linux-user/syscall.c +@@ -8776,10 +8776,37 @@ static abi_long do_syscall1(void *cpu_env, int num, abi_long arg1, + envc++; + } + +- argp = g_new0(char *, argc + 1); ++ argp = g_new0(char *, argc + 4); + envp = g_new0(char *, envc + 1); + +- for (gp = guest_argp, q = argp; gp; ++ if (!(p = lock_user_string(arg1))) ++ goto execve_efault; ++ ++ /* if pathname is /proc/self/exe then retrieve the path passed to qemu via command line */ ++ if (is_proc_myself(p, "exe")) { ++ CPUState *cpu = env_cpu((CPUArchState *)cpu_env); ++ TaskState *ts = cpu->opaque; ++ p = ts->bprm->filename; ++ } ++ ++ /* retrieve guest argv0 */ ++ if (get_user_ual(addr, guest_argp)) ++ goto execve_efault; ++ ++ /* ++ * From the guest, the call ++ * execve(pathname, [argv0, argv1], envp) ++ * on the host, becomes: ++ * execve("/proc/self/exe", [qemu_progname, "-0", argv0, pathname, argv1], envp) ++ * where qemu_progname is the error message prefix for qemu ++ */ ++ argp[0] = (char*)error_get_progname(); ++ argp[1] = (char*)"-0"; ++ argp[2] = (char*)lock_user_string(addr); ++ argp[3] = p; ++ ++ /* copy guest argv1 onwards to host argv4 onwards */ ++ for (gp = guest_argp + 1*sizeof(abi_ulong), q = argp + 4; gp; + gp += sizeof(abi_ulong), q++) { + if (get_user_ual(addr, gp)) + goto execve_efault; +@@ -8801,8 +8828,6 @@ static abi_long do_syscall1(void *cpu_env, int num, abi_long arg1, + } + *q = NULL; + +- if (!(p = lock_user_string(arg1))) +- goto execve_efault; + /* Although execve() is not an interruptible syscall it is + * a special case where we must use the safe_syscall wrapper: + * if we allow a signal to happen before we make the host +@@ -8813,7 +8838,7 @@ static abi_long do_syscall1(void *cpu_env, int num, abi_long arg1, + * before the execve completes and makes it the other + * program's problem. + */ +- ret = get_errno(safe_execve(p, argp, envp)); ++ ret = get_errno(safe_execve("/proc/self/exe", argp, envp)); + unlock_user(p, arg1, 0); + + goto execve_end; +-- +2.34.0 + diff --git a/patches/buildkit-direct-execve-v7.0/0002-linux-user-lookup-user-program-in-PATH.patch b/patches/buildkit-direct-execve-v7.0/0002-linux-user-lookup-user-program-in-PATH.patch new file mode 100644 index 00000000..30e103c5 --- /dev/null +++ b/patches/buildkit-direct-execve-v7.0/0002-linux-user-lookup-user-program-in-PATH.patch @@ -0,0 +1,76 @@ +From d83023eb7a0574cad224c7d88ac8dcf9d745afa3 Mon Sep 17 00:00:00 2001 +From: Tibor Vass +Date: Tue, 2 Jun 2020 10:39:48 +0000 +Subject: [PATCH] linux-user: lookup user program in PATH + +Signed-off-by: Tibor Vass +--- + linux-user/main.c | 45 ++++++++++++++++++++++++++++++++++++++++++++- + 1 file changed, 44 insertions(+), 1 deletion(-) + +diff --git a/linux-user/main.c b/linux-user/main.c +index fbc9bcfd5f..30f163de81 100644 +--- a/linux-user/main.c ++++ b/linux-user/main.c +@@ -558,6 +558,45 @@ static void usage(int exitcode) + exit(exitcode); + } + ++/* ++ * path_lookup searches for an executable filename in the directories named by the PATH environment variable. ++ * Returns a copy of filename if it is an absolute path or could not find a match. ++ * Caller is responsible to free returned string. ++ * Adapted from musl's execvp implementation. ++ */ ++static char *path_lookup(char *filename) { ++ const char *p, *z, *path = getenv("PATH"); ++ size_t l, k; ++ struct stat buf; ++ ++ /* if PATH is not set or filename is absolute path return filename */ ++ if (!path || !filename || filename[0] == '/') ++ return strndup(filename, NAME_MAX+1); ++ ++ k = strnlen(filename, NAME_MAX+1); ++ if (k > NAME_MAX) { ++ errno = ENAMETOOLONG; ++ return NULL; ++ } ++ l = strnlen(path, PATH_MAX-1)+1; ++ ++ for (p = path; ; p = z) { ++ char *b = calloc(l+k+1, sizeof(char)); ++ z = strchrnul(p, ':'); ++ if (z-p >= l) { ++ if (!*z++) break; ++ continue; ++ } ++ memcpy(b, p, z-p); ++ b[z-p] = '/'; ++ memcpy(b+(z-p)+(z>p), filename, k+1); ++ if (!stat(b, &buf) && !(buf.st_mode & S_IFDIR) && (buf.st_mode & (S_IXUSR|S_IXGRP|S_IXOTH))) ++ return b; ++ if (!*z++) break; ++ } ++ return strndup(filename, NAME_MAX+1); ++} ++ + static int parse_args(int argc, char **argv) + { + const char *r; +@@ -623,7 +662,11 @@ static int parse_args(int argc, char **argv) + exit(EXIT_FAILURE); + } + +- exec_path = argv[optind]; ++ /* not freeing exec_path as it is needed for the lifetime of the process */ ++ if (!(exec_path = path_lookup(argv[optind]))) { ++ (void) fprintf(stderr, "qemu: could not find user program %s: %s\n", exec_path, strerror(errno)); ++ exit(EXIT_FAILURE); ++ } + + return optind; + } +-- +2.34.0 + diff --git a/patches/buildkit-direct-execve-v7.0/0003-linux-user-path-in-execve-should-be-relative-to-work.patch b/patches/buildkit-direct-execve-v7.0/0003-linux-user-path-in-execve-should-be-relative-to-work.patch new file mode 100644 index 00000000..81de7e7a --- /dev/null +++ b/patches/buildkit-direct-execve-v7.0/0003-linux-user-path-in-execve-should-be-relative-to-work.patch @@ -0,0 +1,100 @@ +From 1f69e640ec9a5a9f8b7ab9101f4807808f59bc1d Mon Sep 17 00:00:00 2001 +From: Tibor Vass +Date: Sat, 27 Jun 2020 21:42:51 +0000 +Subject: [PATCH] linux-user: path in execve should be relative to working dir + +Fixes regression introduced in parent commit where PATH handling was introduced. + +When guest calls execve(filename, argp, envp) filename can be relative in which +case Linux makes it relative to the working directory. + +However, since execve is now handled by exec-ing qemu process again, filename +would first get looked up in PATH in main() before calling host's execve. + +With this change, if filename is relative and exists in working directory as +well as in PATH, working directory will get precedence over PATH if guest is +doing an execve syscall, but not if relative filename comes from qemu's argv. + +Signed-off-by: Tibor Vass +--- + include/qemu/path.h | 1 + + linux-user/syscall.c | 9 +++++++-- + util/path.c | 30 ++++++++++++++++++++++++++++++ + 3 files changed, 38 insertions(+), 2 deletions(-) + +diff --git a/include/qemu/path.h b/include/qemu/path.h +index c6292a9709..a81fb51e1f 100644 +--- a/include/qemu/path.h ++++ b/include/qemu/path.h +@@ -3,5 +3,6 @@ + + void init_paths(const char *prefix); + const char *path(const char *pathname); ++const char *prepend_workdir_if_relative(const char *path); + + #endif +diff --git a/linux-user/syscall.c b/linux-user/syscall.c +index 73054926a0..b92be0963e 100644 +--- a/linux-user/syscall.c ++++ b/linux-user/syscall.c +@@ -8798,12 +8798,17 @@ static abi_long do_syscall1(void *cpu_env, int num, abi_long arg1, + * execve(pathname, [argv0, argv1], envp) + * on the host, becomes: + * execve("/proc/self/exe", [qemu_progname, "-0", argv0, pathname, argv1], envp) +- * where qemu_progname is the error message prefix for qemu ++ * where qemu_progname is the error message prefix for qemu. ++ * Note: if pathname is relative, it will be prepended with the current working directory. + */ + argp[0] = (char*)error_get_progname(); + argp[1] = (char*)"-0"; + argp[2] = (char*)lock_user_string(addr); +- argp[3] = p; ++ argp[3] = (char*)prepend_workdir_if_relative(p); ++ if (!argp[3]) { ++ ret = -host_to_target_errno(errno); ++ goto execve_end; ++ } + + /* copy guest argv1 onwards to host argv4 onwards */ + for (gp = guest_argp + 1*sizeof(abi_ulong), q = argp + 4; gp; +diff --git a/util/path.c b/util/path.c +index 8e174eb436..f7907b8238 100644 +--- a/util/path.c ++++ b/util/path.c +@@ -68,3 +68,33 @@ const char *path(const char *name) + qemu_mutex_unlock(&lock); + return ret; + } ++ ++/* Prepends working directory if path is relative. ++ * If path is absolute, it is returned as-is without any allocation. ++ * Otherwise, caller is responsible to free returned path. ++ * Returns NULL and sets errno upon error. ++ * Note: realpath is not called to let the kernel do the rest of the resolution. ++ */ ++const char *prepend_workdir_if_relative(const char *path) ++{ ++ char buf[PATH_MAX]; ++ char *p; ++ int i, j, k; ++ ++ if (!path || path[0] == '/') return path; ++ ++ if (!getcwd(buf, PATH_MAX)) return NULL; ++ i = strlen(buf); ++ j = strlen(path); ++ k = i + 1 + j + 1; /* workdir + '/' + path + '\0' */ ++ if (i + j > PATH_MAX) { ++ errno = ERANGE; ++ return NULL; ++ } ++ if (!(p = malloc(k * sizeof(char*)))) return NULL; ++ ++ if (!strncat(p, buf, i)) return NULL; ++ if (!strncat(p, "/", 1)) return NULL; ++ if (!strncat(p, path, j)) return NULL; ++ return p; ++} +-- +2.34.0 + diff --git a/patches/buildkit-direct-execve-v7.0/0004-linux-user-support-loading-scripts-with-shebang.patch b/patches/buildkit-direct-execve-v7.0/0004-linux-user-support-loading-scripts-with-shebang.patch new file mode 100644 index 00000000..6288333e --- /dev/null +++ b/patches/buildkit-direct-execve-v7.0/0004-linux-user-support-loading-scripts-with-shebang.patch @@ -0,0 +1,221 @@ +From 14efa42c9bc061ffcad00bfa4a643e73f9a056ee Mon Sep 17 00:00:00 2001 +From: Tibor Vass +Date: Thu, 18 Jun 2020 20:57:22 +0000 +Subject: [PATCH] linux-user: support loading scripts with shebang (#!) + +The interpreter is assumed to be compatible with the target architecture. + +The script loading logic is taken from Linux source code to match logic as closely as possible. + +An interpreter can itself be a script (#!/other.script), and thus load another interpreter. +This happens in a loop therefore the loading chain of interpreter-scripts is limited to 5 like in Linux. + +Warning: there might be issues with m68k, mips, and mips64 architectures +since the cpu_model returned by those architectures (see linux-user/$arch/target_elf.h) +is dependent on the ELF header of the payload, but in this case the payload +is a script and not a binary. + This could be fixed either by moving the loading logic or +parts of it to before the cpu_model is set, so that the final ELF binary is available. +An alternative fix is to avoid the loop altogether and call qemu binary again with different arguments. +The downside is that it would require one extra exec syscall per interpreter. + +Signed-off-by: Tibor Vass +Signed-off-by: Tonis Tiigi +--- + linux-user/elfload.c | 2 +- + linux-user/linuxload.c | 137 ++++++++++++++++++++++++++++++++++++----- + linux-user/loader.h | 2 + + 3 files changed, 124 insertions(+), 17 deletions(-) + +diff --git a/linux-user/elfload.c b/linux-user/elfload.c +index c45da4d633..3c27aef5d4 100644 +--- a/linux-user/elfload.c ++++ b/linux-user/elfload.c +@@ -3219,10 +3219,10 @@ uint32_t get_elf_eflags(int fd) + return 0; + } + ret = read(fd, &ehdr, sizeof(ehdr)); ++ offset = lseek(fd, offset, SEEK_SET); /* reset seek regardless of error */ + if (ret < sizeof(ehdr)) { + return 0; + } +- offset = lseek(fd, offset, SEEK_SET); + if (offset == (off_t) -1) { + return 0; + } +diff --git a/linux-user/linuxload.c b/linux-user/linuxload.c +index 2ed5fc45ed..354650ef90 100644 +--- a/linux-user/linuxload.c ++++ b/linux-user/linuxload.c +@@ -128,7 +128,7 @@ int loader_exec(int fdexec, const char *filename, char **argv, char **envp, + struct target_pt_regs *regs, struct image_info *infop, + struct linux_binprm *bprm) + { +- int retval; ++ int retval, depth; + + bprm->fd = fdexec; + bprm->filename = (char *)filename; +@@ -137,24 +137,33 @@ int loader_exec(int fdexec, const char *filename, char **argv, char **envp, + bprm->envc = count(envp); + bprm->envp = envp; + +- retval = prepare_binprm(bprm); +- +- if (retval >= 0) { +- if (bprm->buf[0] == 0x7f +- && bprm->buf[1] == 'E' +- && bprm->buf[2] == 'L' +- && bprm->buf[3] == 'F') { +- retval = load_elf_binary(bprm, infop); ++ for (depth = 0; ; depth++) { ++ if (depth > 5) { ++ return -ELOOP; ++ } ++ retval = prepare_binprm(bprm); ++ if (retval >= 0) { ++ if (bprm->buf[0] == 0x7f ++ && bprm->buf[1] == 'E' ++ && bprm->buf[2] == 'L' ++ && bprm->buf[3] == 'F') { ++ retval = load_elf_binary(bprm, infop); + #if defined(TARGET_HAS_BFLT) +- } else if (bprm->buf[0] == 'b' +- && bprm->buf[1] == 'F' +- && bprm->buf[2] == 'L' +- && bprm->buf[3] == 'T') { +- retval = load_flt_binary(bprm, infop); ++ } else if (bprm->buf[0] == 'b' ++ && bprm->buf[1] == 'F' ++ && bprm->buf[2] == 'L' ++ && bprm->buf[3] == 'T') { ++ retval = load_flt_binary(bprm, infop); + #endif +- } else { +- return -ENOEXEC; ++ } else if (bprm->buf[0] == '#' ++ && bprm->buf[1] == '!') { ++ retval = load_script(bprm); ++ if (retval >= 0) continue; ++ } else { ++ return -ENOEXEC; ++ } + } ++ break; + } + + if (retval >= 0) { +@@ -165,3 +174,99 @@ int loader_exec(int fdexec, const char *filename, char **argv, char **envp, + + return retval; + } ++ ++static inline bool spacetab(char c) { return c == ' ' || c == '\t'; } ++static inline const char *next_non_spacetab(const char *first, const char *last) ++{ ++ for (; first <= last; first++) ++ if (!spacetab(*first)) ++ return first; ++ return NULL; ++} ++static inline const char *next_terminator(const char *first, const char *last) ++{ ++ for (; first <= last; first++) ++ if (spacetab(*first) || !*first) ++ return first; ++ return NULL; ++} ++ ++/* ++ * Reads the interpreter (shebang #!) line and modifies bprm object accordingly ++ * This is a modified version of Linux's load_script function. ++*/ ++int load_script(struct linux_binprm *bprm) ++{ ++ const char *i_name, *i_sep, *i_arg, *i_end, *buf_end; ++ int execfd, i, argc_delta; ++ ++ buf_end = bprm->buf + sizeof(bprm->buf) - 1; ++ i_end = (const char*)memchr(bprm->buf, '\n', sizeof(bprm->buf)); ++ if (!i_end) { ++ i_end = next_non_spacetab(bprm->buf + 2, buf_end); ++ if (!i_end) { ++ perror("script_prepare_binprm: no interpreter name found"); ++ return -ENOEXEC; /* Entire buf is spaces/tabs */ ++ } ++ /* ++ * If there is no later space/tab/NUL we must assume the ++ * interpreter path is truncated. ++ */ ++ if (!next_terminator(i_end, buf_end)) { ++ perror("script_prepare_binprm: truncated interpreter path"); ++ return -ENOEXEC; ++ } ++ i_end = buf_end; ++ } ++ /* Trim any trailing spaces/tabs from i_end */ ++ while (spacetab(i_end[-1])) ++ i_end--; ++ *((char *)i_end) = '\0'; ++ /* Skip over leading spaces/tabs */ ++ i_name = next_non_spacetab(bprm->buf+2, i_end); ++ if (!i_name || (i_name == i_end)) { ++ perror("script_prepare_binprm: no interpreter name found"); ++ return -ENOEXEC; /* No interpreter name found */ ++ } ++ ++ /* Is there an optional argument? */ ++ i_arg = NULL; ++ i_sep = next_terminator(i_name, i_end); ++ if (i_sep && (*i_sep != '\0')) { ++ i_arg = next_non_spacetab(i_sep, i_end); ++ *((char *)i_sep) = '\0'; ++ } ++ ++ /* ++ * OK, we've parsed out the interpreter name and ++ * (optional) argument. ++ * Splice in (1) the interpreter's name for argv[0] ++ * (2) (optional) argument to interpreter ++ * (3) filename of shell script (replace argv[0]) ++ * (4) user arguments (argv[1:]) ++ */ ++ ++ execfd = open(i_name, O_RDONLY); ++ if (execfd < 0) { ++ perror("script_prepare_binprm: could not open script"); ++ return -ENOEXEC; /* Could not open interpreter */ ++ } ++ ++ argc_delta = 1 /* extra filename */ + (i_arg ? 1 : 0); ++ bprm->argc += argc_delta; ++ bprm->argv = realloc(bprm->argv, sizeof(char*) * (bprm->argc + 1)); ++ ++ /* shift argv by argc_delta */ ++ for (i = bprm->argc; i >= argc_delta; i--) ++ bprm->argv[i] = bprm->argv[i-argc_delta]; ++ ++ bprm->argv[0] = (char *)strdup(i_name); ++ if (i_arg) ++ bprm->argv[1] = (char *)strdup(i_arg); ++ ++ bprm->fd = execfd; /* not closing fd as it is needed for the duration of the program */ ++ bprm->filename = (char *)strdup(i_name); /* replace filename with script interpreter */ ++ /* envc and envp are kept unchanged */ ++ ++ return 0; ++} +diff --git a/linux-user/loader.h b/linux-user/loader.h +index f375ee0679..f3f3b9ce1b 100644 +--- a/linux-user/loader.h ++++ b/linux-user/loader.h +@@ -56,4 +56,6 @@ abi_long memcpy_to_target(abi_ulong dest, const void *src, + + extern unsigned long guest_stack_size; + ++int load_script(struct linux_binprm *bprm); ++ + #endif /* LINUX_USER_LOADER_H */ +-- +2.34.0 + diff --git a/patches/buildkit-direct-execve-v7.0/0005-set-script-path-as-argv0-in-shebang-handler.patch b/patches/buildkit-direct-execve-v7.0/0005-set-script-path-as-argv0-in-shebang-handler.patch new file mode 100644 index 00000000..6ba6f6b8 --- /dev/null +++ b/patches/buildkit-direct-execve-v7.0/0005-set-script-path-as-argv0-in-shebang-handler.patch @@ -0,0 +1,26 @@ +From baadf95fbcc53dc609480c8432569b01c2ab60a9 Mon Sep 17 00:00:00 2001 +From: Tonis Tiigi +Date: Thu, 26 Aug 2021 01:18:32 +0200 +Subject: [PATCH] set script path as argv0 in shebang handler + +Signed-off-by: Tonis Tiigi +--- + linux-user/linuxload.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/linux-user/linuxload.c b/linux-user/linuxload.c +index 354650ef90..161a0b05bf 100644 +--- a/linux-user/linuxload.c ++++ b/linux-user/linuxload.c +@@ -246,6 +246,8 @@ int load_script(struct linux_binprm *bprm) + * (4) user arguments (argv[1:]) + */ + ++ bprm->argv[0] = bprm->filename; ++ + execfd = open(i_name, O_RDONLY); + if (execfd < 0) { + perror("script_prepare_binprm: could not open script"); +-- +2.34.0 + diff --git a/patches/buildkit-direct-execve-v7.0/0006-linux-user-use-GLib-to-remember-the-program-name.patch b/patches/buildkit-direct-execve-v7.0/0006-linux-user-use-GLib-to-remember-the-program-name.patch new file mode 100644 index 00000000..d113576a --- /dev/null +++ b/patches/buildkit-direct-execve-v7.0/0006-linux-user-use-GLib-to-remember-the-program-name.patch @@ -0,0 +1,26 @@ +From 4151ddd9c57d0442b4adf38be4730629465fe743 Mon Sep 17 00:00:00 2001 +From: CrazyMax +Date: Mon, 11 Jul 2022 15:04:24 +0200 +Subject: [PATCH] linux-user: use GLib to remember the program name + +Signed-off-by: CrazyMax +--- + linux-user/syscall.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/linux-user/syscall.c b/linux-user/syscall.c +index b92be0963e..8f57f26c44 100644 +--- a/linux-user/syscall.c ++++ b/linux-user/syscall.c +@@ -8801,7 +8801,7 @@ static abi_long do_syscall1(void *cpu_env, int num, abi_long arg1, + * where qemu_progname is the error message prefix for qemu. + * Note: if pathname is relative, it will be prepended with the current working directory. + */ +- argp[0] = (char*)error_get_progname(); ++ argp[0] = (char*)g_get_prgname(); + argp[1] = (char*)"-0"; + argp[2] = (char*)lock_user_string(addr); + argp[3] = (char*)prepend_workdir_if_relative(p); +-- +2.34.0 +