diff --git a/libcontainer/nsenter/nsexec.c b/libcontainer/nsenter/nsexec.c index 565b2ca2030..627ddc40e65 100644 --- a/libcontainer/nsenter/nsexec.c +++ b/libcontainer/nsenter/nsexec.c @@ -95,6 +95,12 @@ struct nlconfig_t { size_t timensoffset_len; }; +struct namespace_t { + int fd; + char type[PATH_MAX]; + char path[PATH_MAX]; +}; + /* * List of netlink message types sent to us as part of bootstrapping the init. * These constants are defined in libcontainer/message_linux.go. @@ -444,16 +450,11 @@ void nl_free(struct nlconfig_t *config) free(config->data); } -void join_namespaces(char *nslist) +struct namespace_t * init_namespaces(char *nslist, int *num) { - int num = 0, i; char *saveptr = NULL; char *namespace = strtok_r(nslist, ",", &saveptr); - struct namespace_t { - int fd; - char type[PATH_MAX]; - char path[PATH_MAX]; - } *namespaces = NULL; + struct namespace_t *namespaces = NULL; if (!namespace || !strlen(namespace) || !strlen(nslist)) bail("ns paths are empty"); @@ -469,10 +470,10 @@ void join_namespaces(char *nslist) struct namespace_t *ns; /* Resize the namespace array. */ - namespaces = realloc(namespaces, ++num * sizeof(struct namespace_t)); + namespaces = realloc(namespaces, ++*num * sizeof(struct namespace_t)); if (!namespaces) bail("failed to reallocate namespace array"); - ns = &namespaces[num - 1]; + ns = &namespaces[*num - 1]; /* Split 'ns:path'. */ path = strstr(namespace, ":"); @@ -490,20 +491,40 @@ void join_namespaces(char *nslist) ns->path[PATH_MAX - 1] = '\0'; } while ((namespace = strtok_r(NULL, ",", &saveptr)) != NULL); - /* - * The ordering in which we join namespaces is important. We should - * always join the user namespace *first*. This is all guaranteed - * from the container_linux.go side of this, so we're just going to - * follow the order given to us. - */ + return namespaces; +} + +void join_namespaces(struct namespace_t *namespaces, int num, bool ignoreError) { + int i; for (i = 0; i < num; i++) { struct namespace_t *ns = &namespaces[i]; int flag = nsflag(ns->type); + if (ns->fd < 0) + continue; + + /* + * The ordering in which we join namespaces is important. We should join + * as many namespaces as possible *first* except the user namespace, + * because there may be some ns paths are not owned by the user namespace + * we want to join, then we can join remainning namespaces after we + * join/unshare user ns. (#4390) + * + * When we join remaining namespaces or for rootless container, we should + * always join the user namespace *first*. This is all guaranteed from the + * container_linux.go side of this, so we're just going to follow the order + * given to us. + */ + if (ignoreError && flag == CLONE_NEWUSER) + continue; + write_log(DEBUG, "setns(%#x) into %s namespace (with path %s)", flag, ns->type, ns->path); - if (setns(ns->fd, flag) < 0) - bail("failed to setns into %s namespace", ns->type); + if (setns(ns->fd, flag) < 0) { + if (!ignoreError) + bail("failed to setns into %s namespace", ns->type); + continue; + } /* * If we change user namespaces, make sure we switch to root in the @@ -517,9 +538,8 @@ void join_namespaces(char *nslist) } close(ns->fd); + ns->fd = -1; } - - free(namespaces); } static inline int sane_kill(pid_t pid, int signum) @@ -840,6 +860,8 @@ void nsexec(void) case STAGE_CHILD:{ pid_t stage2_pid = -1; enum sync_t s; + int nslen = 0; + struct namespace_t *namespaces = NULL; /* For debugging. */ current_stage = STAGE_CHILD; @@ -859,8 +881,11 @@ void nsexec(void) * [stage 2: STAGE_INIT]) would be meaningless). We could send it * using cmsg(3) but that's just annoying. */ - if (config.namespaces) - join_namespaces(config.namespaces); + if (config.namespaces) { + namespaces = init_namespaces(config.namespaces, &nslen); + if (nslen > 0) + join_namespaces(namespaces, nslen, !config.is_rootless_euid); + } /* * Deal with user namespaces first. They are quite special, as they @@ -923,6 +948,11 @@ void nsexec(void) if (setresuid(0, 0, 0) < 0) bail("failed to become root in user namespace"); } + /* Join remainning namespaces after we join/unshare user ns. */ + if (nslen > 0) { + join_namespaces(namespaces, nslen, false); + free(namespaces); + } /* * Unshare all of the namespaces. Now, it should be noted that this