Skip to content

Commit

Permalink
Merge pull request #3701 from AkihiroSuda/cherrypick-3697
Browse files Browse the repository at this point in the history
[0.11 backport] rootless: support Bottlerocket OS
  • Loading branch information
crazy-max authored Mar 20, 2023
2 parents 237fee9 + 58c8024 commit a0f2992
Show file tree
Hide file tree
Showing 8 changed files with 204 additions and 0 deletions.
8 changes: 8 additions & 0 deletions cache/refs.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import (
"github.com/containerd/containerd/images"
"github.com/containerd/containerd/leases"
"github.com/containerd/containerd/mount"
"github.com/containerd/containerd/pkg/userns"
"github.com/containerd/containerd/snapshots"
"github.com/docker/docker/pkg/idtools"
"github.com/hashicorp/go-multierror"
Expand All @@ -27,6 +28,7 @@ import (
"github.com/moby/buildkit/util/flightcontrol"
"github.com/moby/buildkit/util/leaseutil"
"github.com/moby/buildkit/util/progress"
rootlessmountopts "github.com/moby/buildkit/util/rootless/mountopts"
"github.com/moby/buildkit/util/winlayers"
"github.com/moby/sys/mountinfo"
digest "github.com/opencontainers/go-digest"
Expand Down Expand Up @@ -1640,6 +1642,12 @@ func (sm *sharableMountable) Mount() (_ []mount.Mount, _ func() error, retErr er
os.Remove(dir)
}
}()
if userns.RunningInUserNS() {
mounts, err = rootlessmountopts.FixUp(mounts)
if err != nil {
return nil, nil, err
}
}
if err := mount.All(mounts, dir); err != nil {
return nil, nil, err
}
Expand Down
11 changes: 11 additions & 0 deletions docs/rootless.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,12 @@ spec:
See also the [example manifests](#Kubernetes).
### Bottlerocket OS
Needs to run `sysctl -w user.max_user_namespaces=N` (N=positive integer, like 63359) on the host nodes.

See [`../examples/kubernetes/sysctl-userns.privileged.yaml`](../examples/kubernetes/sysctl-userns.privileged.yaml).

<details>
<summary>Old distributions</summary>

Expand Down Expand Up @@ -104,6 +110,11 @@ See https://rootlesscontaine.rs/getting-started/common/subuid/
### Error `Options:[rbind ro]}]: operation not permitted`
Make sure to mount an `emptyDir` volume on `/home/user/.local/share/buildkit` .

### Error `fork/exec /proc/self/exe: no space left on device` with `level=warning msg="/proc/sys/user/max_user_namespaces needs to be set to non-zero."`
Run `sysctl -w user.max_user_namespaces=N` (N=positive integer, like 63359) on the host nodes.

See [`../examples/kubernetes/sysctl-userns.privileged.yaml`](../examples/kubernetes/sysctl-userns.privileged.yaml).

## Containerized deployment

### Kubernetes
Expand Down
26 changes: 26 additions & 0 deletions examples/kubernetes/sysctl-userns.privileged.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# Run `sysctl -w user.max_user_namespaces=63359` on all the nodes,
# for errors like "/proc/sys/user/max_user_namespaces needs to be set to non-zero"
# on running rootless buildkitd pods.
#
# This workaround is known to be needed on Bottlerocket OS.
apiVersion: apps/v1
kind: DaemonSet
metadata:
labels:
app: sysctl-userns
name: sysctl-userns
spec:
selector:
matchLabels:
app: sysctl-userns
template:
metadata:
labels:
app: sysctl-userns
spec:
containers:
- name: sysctl-userns
image: busybox
command: ["sh", "-euxc", "sysctl -w user.max_user_namespaces=63359 && sleep infinity"]
securityContext:
privileged: true
10 changes: 10 additions & 0 deletions executor/oci/spec.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,14 @@ import (
"github.com/containerd/containerd/mount"
"github.com/containerd/containerd/namespaces"
"github.com/containerd/containerd/oci"
"github.com/containerd/containerd/pkg/userns"
"github.com/containerd/continuity/fs"
"github.com/docker/docker/pkg/idtools"
"github.com/mitchellh/hashstructure/v2"
"github.com/moby/buildkit/executor"
"github.com/moby/buildkit/snapshot"
"github.com/moby/buildkit/util/network"
rootlessmountopts "github.com/moby/buildkit/util/rootless/mountopts"
traceexec "github.com/moby/buildkit/util/tracing/exec"
specs "github.com/opencontainers/runtime-spec/specs-go"
"github.com/opencontainers/selinux/go-selinux"
Expand Down Expand Up @@ -192,6 +194,14 @@ func GenerateSpec(ctx context.Context, meta executor.Meta, mounts []executor.Mou
}

s.Mounts = dedupMounts(s.Mounts)

if userns.RunningInUserNS() {
s.Mounts, err = rootlessmountopts.FixUpOCI(s.Mounts)
if err != nil {
return nil, nil, err
}
}

return s, releaseAll, nil
}

Expand Down
10 changes: 10 additions & 0 deletions snapshot/localmounter_unix.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ import (
"syscall"

"github.com/containerd/containerd/mount"
"github.com/containerd/containerd/pkg/userns"
rootlessmountopts "github.com/moby/buildkit/util/rootless/mountopts"
"github.com/pkg/errors"
)

Expand All @@ -24,6 +26,14 @@ func (lm *localMounter) Mount() (string, error) {
lm.release = release
}

if userns.RunningInUserNS() {
var err error
lm.mounts, err = rootlessmountopts.FixUp(lm.mounts)
if err != nil {
return "", err
}
}

if len(lm.mounts) == 1 && (lm.mounts[0].Type == "bind" || lm.mounts[0].Type == "rbind") {
ro := false
for _, opt := range lm.mounts[0].Options {
Expand Down
88 changes: 88 additions & 0 deletions util/rootless/mountopts/mountopts_linux.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
package mountopts

import (
"github.com/containerd/containerd/mount"
"github.com/moby/buildkit/util/strutil"
specs "github.com/opencontainers/runtime-spec/specs-go"
"github.com/pkg/errors"
"golang.org/x/sys/unix"
)

// UnprivilegedMountFlags gets the set of mount flags that are set on the mount that contains the given
// path and are locked by CL_UNPRIVILEGED. This is necessary to ensure that
// bind-mounting "with options" will not fail with user namespaces, due to
// kernel restrictions that require user namespace mounts to preserve
// CL_UNPRIVILEGED locked flags.
//
// From https://github.com/moby/moby/blob/v23.0.1/daemon/oci_linux.go#L430-L460
func UnprivilegedMountFlags(path string) ([]string, error) {
var statfs unix.Statfs_t
if err := unix.Statfs(path, &statfs); err != nil {
return nil, err
}

// The set of keys come from https://github.com/torvalds/linux/blob/v4.13/fs/namespace.c#L1034-L1048.
unprivilegedFlags := map[uint64]string{
unix.MS_RDONLY: "ro",
unix.MS_NODEV: "nodev",
unix.MS_NOEXEC: "noexec",
unix.MS_NOSUID: "nosuid",
unix.MS_NOATIME: "noatime",
unix.MS_RELATIME: "relatime",
unix.MS_NODIRATIME: "nodiratime",
}

var flags []string
for mask, flag := range unprivilegedFlags {
if uint64(statfs.Flags)&mask == mask {
flags = append(flags, flag)
}
}

return flags, nil
}

// FixUp is for https://github.com/moby/buildkit/issues/3098
func FixUp(mounts []mount.Mount) ([]mount.Mount, error) {
for i, m := range mounts {
var isBind bool
for _, o := range m.Options {
switch o {
case "bind", "rbind":
isBind = true
}
}
if !isBind {
continue
}
unpriv, err := UnprivilegedMountFlags(m.Source)
if err != nil {
return nil, errors.Wrapf(err, "failed to get unprivileged mount flags for %+v", m)
}
m.Options = strutil.DedupeSlice(append(m.Options, unpriv...))
mounts[i] = m
}
return mounts, nil
}

func FixUpOCI(mounts []specs.Mount) ([]specs.Mount, error) {
for i, m := range mounts {
var isBind bool
for _, o := range m.Options {
switch o {
case "bind", "rbind":
isBind = true
}
}
if !isBind {
continue
}
unpriv, err := UnprivilegedMountFlags(m.Source)
if err != nil {
return nil, errors.Wrapf(err, "failed to get unprivileged mount flags for %+v", m)
}
m.Options = strutil.DedupeSlice(append(m.Options, unpriv...))
mounts[i] = m
}
return mounts, nil
}
21 changes: 21 additions & 0 deletions util/rootless/mountopts/mountopts_others.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
//go:build !linux
// +build !linux

package mountopts

import (
"github.com/containerd/containerd/mount"
specs "github.com/opencontainers/runtime-spec/specs-go"
)

func UnprivilegedMountFlags(path string) ([]string, error) {
return []string{}, nil
}

func FixUp(mounts []mount.Mount) ([]mount.Mount, error) {
return mounts, nil
}

func FixUpOCI(mounts []specs.Mount) ([]specs.Mount, error) {
return mounts, nil
}
30 changes: 30 additions & 0 deletions util/strutil/strutil.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package strutil

// DedupeSlice is from https://github.com/containerd/nerdctl/blob/v1.2.1/pkg/strutil/strutil.go#L72-L82
func DedupeSlice(in []string) []string {
m := make(map[string]struct{})
var res []string
for _, s := range in {
if _, ok := m[s]; !ok {
res = append(res, s)
m[s] = struct{}{}
}
}
return res
}

0 comments on commit a0f2992

Please sign in to comment.