diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 9466d5996be..754fd87f155 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -28,7 +28,6 @@ jobs: rootless: ["rootless", ""] race: ["-race", ""] criu: ["", "criu-dev"] - dmz: ["", "runc_nodmz"] exclude: # Disable most of criu-dev jobs, as they are expensive # (need to compile criu) and don't add much value/coverage. @@ -38,26 +37,12 @@ jobs: rootless: rootless - criu: criu-dev race: -race - - criu: criu-dev - dmz: runc_nodmz - # Disable most of runc_nodmz jobs, as they don't add much value - # (as dmz is disabled by default anyway). - - dmz: runc_nodmz - os: ubuntu-20.04 - - dmz: runc_nodmz - go-version: 1.22.x - - dmz: runc_nodmz - rootless: rootless - - dmz: runc_nodmz - race: -race - go-version: 1.22.x os: actuated-arm64-6cpu-8gb - race: "-race" os: actuated-arm64-6cpu-8gb - criu: criu-dev os: actuated-arm64-6cpu-8gb - - dmz: runc_nodmz - os: actuated-arm64-6cpu-8gb runs-on: ${{ matrix.os }} @@ -150,8 +135,6 @@ jobs: check-latest: true - name: build - env: - EXTRA_BUILDTAGS: ${{ matrix.dmz }} run: sudo -E PATH="$PATH" make EXTRA_FLAGS="${{ matrix.race }}" all - name: Setup Bats and bats libs @@ -171,8 +154,6 @@ jobs: - name: unit test if: matrix.rootless != 'rootless' - env: - EXTRA_BUILDTAGS: ${{ matrix.dmz }} run: sudo -E PATH="$PATH" -- make TESTFLAGS="${{ matrix.race }}" localunittest - name: add rootless user @@ -209,8 +190,6 @@ jobs: timeout-minutes: 15 strategy: fail-fast: false - matrix: - dmz: ["", "runc_nodmz"] runs-on: ubuntu-22.04 steps: @@ -234,8 +213,6 @@ jobs: check-latest: true - name: unit test - env: - EXTRA_BUILDTAGS: ${{ matrix.dmz }} run: sudo -E PATH="$PATH" -- make GOARCH=386 localunittest all-done: diff --git a/.golangci-extra.yml b/.golangci-extra.yml index 23b57e040b6..be33f90d7f9 100644 --- a/.golangci-extra.yml +++ b/.golangci-extra.yml @@ -7,7 +7,6 @@ run: build-tags: - seccomp - - runc_nodmz linters: disable-all: true diff --git a/.golangci.yml b/.golangci.yml index 25d94ecf485..c6959dd690f 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -3,7 +3,6 @@ run: build-tags: - seccomp - - runc_nodmz linters: enable: diff --git a/Makefile b/Makefile index 6bdd031addc..0a15fd908ea 100644 --- a/Makefile +++ b/Makefile @@ -3,9 +3,6 @@ SHELL = /bin/bash CONTAINER_ENGINE := docker GO ?= go -# Get CC values for cross-compilation. -include cc_platform.mk - PREFIX ?= /usr/local BINDIR := $(PREFIX)/sbin MANDIR := $(PREFIX)/share/man @@ -73,10 +70,10 @@ endif .DEFAULT: runc .PHONY: runc -runc: runc-bin verify-dmz-arch +runc: runc-bin .PHONY: runc-bin -runc-bin: runc-dmz +runc-bin: $(GO_BUILD) -o runc . .PHONY: all @@ -92,7 +89,7 @@ recvtty sd-helper seccompagent fs-idmap pidfd-kill remap-rootfs: .PHONY: clean clean: - rm -f runc runc-* libcontainer/dmz/binary/runc-dmz + rm -f runc runc-* rm -f contrib/cmd/memfd-bind/memfd-bind rm -f tests/cmd/recvtty/recvtty rm -f tests/cmd/sd-helper/sd-helper @@ -104,17 +101,12 @@ clean: rm -rf man/man8 .PHONY: static -static: static-bin verify-dmz-arch +static: static-bin .PHONY: static-bin -static-bin: runc-dmz +static-bin: $(GO_BUILD_STATIC) -o runc . -.PHONY: runc-dmz -runc-dmz: - rm -f libcontainer/dmz/binary/runc-dmz - $(GO) generate -tags "$(BUILDTAGS)" ./libcontainer/dmz - .PHONY: releaseall releaseall: RELEASE_ARGS := "-a 386 -a amd64 -a arm64 -a armel -a armhf -a ppc64le -a riscv64 -a s390x" releaseall: release @@ -253,16 +245,6 @@ verify-dependencies: vendor || (echo -e "git status:\n $$(git status -- go.mod go.sum vendor/)\nerror: vendor/, go.mod and/or go.sum not up to date. Run \"make vendor\" to update"; exit 1) \ && echo "all vendor files are up to date." -.PHONY: verify-dmz-arch -verify-dmz-arch: - @if test -s libcontainer/dmz/binary/runc-dmz; then \ - set -Eeuo pipefail; \ - export LC_ALL=C; \ - diff -u \ - <(readelf -h runc | grep -E "(Machine|Flags):") \ - <(readelf -h libcontainer/dmz/binary/runc-dmz | grep -E "(Machine|Flags):"); \ - fi - .PHONY: validate-keyring validate-keyring: script/keyring_validate.sh diff --git a/README.md b/README.md index 134acc65cba..8cbe1fe6878 100644 --- a/README.md +++ b/README.md @@ -106,15 +106,14 @@ make BUILDTAGS="" | Build Tag | Feature | Enabled by Default | Dependencies | |---------------|---------------------------------------|--------------------|---------------------| | `seccomp` | Syscall filtering using `libseccomp`. | yes | `libseccomp` | -| `!runc_nodmz` | Reduce memory usage for CVE-2019-5736 protection by using a small C binary, [see `memfd-bind` for more details][contrib-memfd-bind]. `runc_nodmz` disables this **experimental feature** and causes runc to use a different protection mechanism which will further increases memory usage temporarily during container startup. To enable this feature you also need to set the `RUNC_DMZ=true` environment variable. | yes || The following build tags were used earlier, but are now obsoleted: + - **runc_nodmz** (since runc v1.2.1 runc dmz binary is dropped) - **nokmem** (since runc v1.0.0-rc94 kernel memory settings are ignored) - **apparmor** (since runc v1.0.0-rc93 the feature is always enabled) - **selinux** (since runc v1.0.0-rc93 the feature is always enabled) [contrib-memfd-bind]: /contrib/cmd/memfd-bind/README.md - [dmz README]: /libcontainer/dmz/README.md ### Running the test suite diff --git a/cc_platform.mk b/cc_platform.mk deleted file mode 100644 index 6aa2b5ecb8b..00000000000 --- a/cc_platform.mk +++ /dev/null @@ -1,61 +0,0 @@ -# NOTE: Make sure you keep this file in sync with scripts/lib.sh. - -GO ?= go -GOARCH ?= $(shell $(GO) env GOARCH) - -ifneq ($(shell grep -i "ID_LIKE=.*suse" /etc/os-release),) - # openSUSE has a custom PLATFORM - PLATFORM ?= suse-linux - IS_SUSE := 1 -else - PLATFORM ?= linux-gnu -endif - -ifeq ($(GOARCH),$(shell GOARCH= $(GO) env GOARCH)) - # use the native CC and STRIP - HOST := -else ifeq ($(GOARCH),386) - # Always use the 64-bit compiler to build the 386 binary, which works for - # the more common cross-build method for x86 (namely, the equivalent of - # dpkg --add-architecture). - ifdef IS_SUSE - # There is no x86_64-suse-linux-gcc, so use the native one. - HOST := - CPU_TYPE := i586 - else - HOST := x86_64-$(PLATFORM)- - CPU_TYPE := i686 - endif - CFLAGS := -m32 -march=$(CPU_TYPE) $(CFLAGS) -else ifeq ($(GOARCH),amd64) - ifdef IS_SUSE - # There is no x86_64-suse-linux-gcc, so use the native one. - HOST := - else - HOST := x86_64-$(PLATFORM)- - endif -else ifeq ($(GOARCH),arm64) - HOST := aarch64-$(PLATFORM)- -else ifeq ($(GOARCH),arm) - # HOST already configured by release_build.sh in this case. -else ifeq ($(GOARCH),armel) - HOST := arm-$(PLATFORM)eabi- -else ifeq ($(GOARCH),armhf) - HOST := arm-$(PLATFORM)eabihf- -else ifeq ($(GOARCH),ppc64le) - HOST := powerpc64le-$(PLATFORM)- -else ifeq ($(GOARCH),riscv64) - HOST := riscv64-$(PLATFORM)- -else ifeq ($(GOARCH),s390x) - HOST := s390x-$(PLATFORM)- -else -$(error Unsupported GOARCH $(GOARCH)) -endif - -ifeq ($(origin CC),$(filter $(origin CC),undefined default)) - # Override CC if it's undefined or just the default value set by Make. - CC := $(HOST)gcc - export CC -endif -STRIP ?= $(HOST)strip -export STRIP diff --git a/contrib/cmd/memfd-bind/README.md b/contrib/cmd/memfd-bind/README.md index c4887fa7f0e..0220eef8770 100644 --- a/contrib/cmd/memfd-bind/README.md +++ b/contrib/cmd/memfd-bind/README.md @@ -1,8 +1,8 @@ ## memfd-bind ## -`runc` normally has to make a binary copy of itself (or of a smaller helper -binary called `runc-dmz`) when constructing a container process in order to -defend against certain container runtime attacks such as CVE-2019-5736. +`runc` normally has to make a binary copy of itself when constructing a +container process in order to defend against certain container runtime attacks +such as CVE-2019-5736. This cloned binary only exists until the container process starts (this means for `runc run` and `runc exec`, it only exists for a few hundred milliseconds @@ -34,15 +34,6 @@ much memory usage they can use: * `memfd-bind` only creates a single in-memory copy of the `runc` binary (about 10MB), regardless of how many containers are running. -* `runc-dmz` is (depending on which libc it was compiled with) between 10kB and - 1MB in size, and a copy is created once per process spawned inside a - container by runc (both the pid1 and every `runc exec`). The `RUNC_DMZ=true` - environment variable needs to be set to opt-in. There are circumstances where - using `runc-dmz` will fail in ways that runc cannot predict ahead of time (such - as restrictive LSMs applied to containers). `runc-dmz` also requires an - additional `execve` over the other options, though since the binary is so small - the cost is probably not even noticeable. - * The classic method of making a copy of the entire `runc` binary during container process setup takes up about 10MB per process spawned inside the container by runc (both pid1 and `runc exec`). diff --git a/docs/experimental.md b/docs/experimental.md index a61cf60ed74..3b2e65d33be 100644 --- a/docs/experimental.md +++ b/docs/experimental.md @@ -6,3 +6,4 @@ Feature | Experimental release | Graduation rel ---------------------------------------- | -------------------- | ------------------ cgroup v2 | v1.0.0-rc91 | v1.0.0-rc93 The `runc features` command | v1.1.0 | v1.2.0 +runc-dmz | v1.2.0-rc1 | Dropped in v1.2.1 diff --git a/libcontainer/container_linux.go b/libcontainer/container_linux.go index 12ee6a3ef95..c02116177ad 100644 --- a/libcontainer/container_linux.go +++ b/libcontainer/container_linux.go @@ -10,7 +10,6 @@ import ( "path" "path/filepath" "reflect" - "slices" "strconv" "strings" "sync" @@ -26,7 +25,6 @@ import ( "github.com/opencontainers/runc/libcontainer/dmz" "github.com/opencontainers/runc/libcontainer/intelrdt" "github.com/opencontainers/runc/libcontainer/system" - "github.com/opencontainers/runc/libcontainer/system/kernelversion" "github.com/opencontainers/runc/libcontainer/utils" ) @@ -474,54 +472,21 @@ func (c *Container) includeExecFifo(cmd *exec.Cmd) error { return nil } -func isDmzBinarySafe(c *configs.Config) bool { - // Because we set the dumpable flag in nsexec, the only time when it is - // unsafe to use runc-dmz is when the container process would be able to - // race against "runc init" and bypass the ptrace_may_access() checks. - // - // This is only the case if the container processes could have - // CAP_SYS_PTRACE somehow (i.e. the capability is present in the bounding, - // inheritable, or ambient sets). Luckily, most containers do not have this - // capability. - if c.Capabilities == nil || - (!slices.Contains(c.Capabilities.Bounding, "CAP_SYS_PTRACE") && - !slices.Contains(c.Capabilities.Inheritable, "CAP_SYS_PTRACE") && - !slices.Contains(c.Capabilities.Ambient, "CAP_SYS_PTRACE")) { - return true - } - - // Since Linux 4.10 (see bfedb589252c0) user namespaced containers cannot - // access /proc/$pid/exe of runc after it joins the namespace (until it - // does an exec), regardless of the capability set. This has been - // backported to other distribution kernels, but there's no way of checking - // this cheaply -- better to be safe than sorry here. - linux410 := kernelversion.KernelVersion{Kernel: 4, Major: 10} - if ok, err := kernelversion.GreaterEqualThan(linux410); ok && err == nil { - if c.Namespaces.Contains(configs.NEWUSER) { - return true - } - } - - // Assume it's unsafe otherwise. - return false -} - func (c *Container) newParentProcess(p *Process) (parentProcess, error) { comm, err := newProcessComm() if err != nil { return nil, err } - // Make sure we use a new safe copy of /proc/self/exe or the runc-dmz - // binary each time this is called, to make sure that if a container - // manages to overwrite the file it cannot affect other containers on the - // system. For runc, this code will only ever be called once, but - // libcontainer users might call this more than once. + // Make sure we use a new safe copy of /proc/self/exe binary each time, this + // is called to make sure that if a container manages to overwrite the file, + // it cannot affect other containers on the system. For runc, this code will + // only ever be called once, but libcontainer users might call this more than + // once. p.closeClonedExes() var ( exePath string - // only one of dmzExe or safeExe are used at a time - dmzExe, safeExe *os.File + safeExe *os.File ) if dmz.IsSelfExeCloned() { // /proc/self/exe is already a cloned binary -- no need to do anything @@ -532,42 +497,13 @@ func (c *Container) newParentProcess(p *Process) (parentProcess, error) { exePath = "/proc/self/exe" } else { var err error - if isDmzBinarySafe(c.config) { - dmzExe, err = dmz.Binary(c.stateDir) - if err == nil { - // We can use our own executable without cloning if we are - // using runc-dmz. We don't need to use /proc/thread-self here - // because the exe mm of a thread-group is guaranteed to be the - // same for all threads by definition. This lets us avoid - // having to do runtime.LockOSThread. - exePath = "/proc/self/exe" - p.clonedExes = append(p.clonedExes, dmzExe) - logrus.Debug("runc-dmz: using runc-dmz") // used for tests - } else if errors.Is(err, dmz.ErrNoDmzBinary) { - logrus.Debug("runc-dmz binary not embedded in runc binary, falling back to /proc/self/exe clone") - } else { - return nil, fmt.Errorf("failed to create runc-dmz binary clone: %w", err) - } - } else { - // If the configuration makes it unsafe to use runc-dmz, pretend we - // don't have it embedded so we do /proc/self/exe cloning. - logrus.Debug("container configuration unsafe for runc-dmz, falling back to /proc/self/exe clone") - err = dmz.ErrNoDmzBinary - } - if errors.Is(err, dmz.ErrNoDmzBinary) { - safeExe, err = dmz.CloneSelfExe(c.stateDir) - if err != nil { - return nil, fmt.Errorf("unable to create safe /proc/self/exe clone for runc init: %w", err) - } - exePath = "/proc/self/fd/" + strconv.Itoa(int(safeExe.Fd())) - p.clonedExes = append(p.clonedExes, safeExe) - logrus.Debug("runc-dmz: using /proc/self/exe clone") // used for tests - } - // Just to make sure we don't run without protection. - if dmzExe == nil && safeExe == nil { - // This should never happen. - return nil, fmt.Errorf("[internal error] attempted to spawn a container with no /proc/self/exe protection") + safeExe, err = dmz.CloneSelfExe(c.stateDir) + if err != nil { + return nil, fmt.Errorf("unable to create safe /proc/self/exe clone for runc init: %w", err) } + exePath = "/proc/self/fd/" + strconv.Itoa(int(safeExe.Fd())) + p.clonedExes = append(p.clonedExes, safeExe) + logrus.Debug("runc-dmz: using /proc/self/exe clone") // used for tests } cmd := exec.Command(exePath, "init") @@ -597,12 +533,6 @@ func (c *Container) newParentProcess(p *Process) (parentProcess, error) { "_LIBCONTAINER_SYNCPIPE="+strconv.Itoa(stdioFdCount+len(cmd.ExtraFiles)-1), ) - if dmzExe != nil { - cmd.ExtraFiles = append(cmd.ExtraFiles, dmzExe) - cmd.Env = append(cmd.Env, - "_LIBCONTAINER_DMZEXEFD="+strconv.Itoa(stdioFdCount+len(cmd.ExtraFiles)-1)) - } - cmd.ExtraFiles = append(cmd.ExtraFiles, comm.logPipeChild) cmd.Env = append(cmd.Env, "_LIBCONTAINER_LOGPIPE="+strconv.Itoa(stdioFdCount+len(cmd.ExtraFiles)-1)) diff --git a/libcontainer/dmz/Makefile b/libcontainer/dmz/Makefile deleted file mode 100644 index 959529c5fd9..00000000000 --- a/libcontainer/dmz/Makefile +++ /dev/null @@ -1,19 +0,0 @@ -# Get GO, GOARCH and CC values for cross-compilation. -include ../../cc_platform.mk - -# List of GOARCH that nolibc supports, from: -# https://go.dev/doc/install/source#environment (with GOOS=linux) -# -# See nolibc supported arches in ./nolibc/arch-*.h -NOLIBC_GOARCHES := 386 amd64 arm arm64 loong64 ppc64le riscv64 s390x - -ifneq (,$(filter $(GOARCH), $(NOLIBC_GOARCHES))) - # We use the flags suggested in nolibc/nolibc.h, it makes the binary very small. - CFLAGS += -fno-asynchronous-unwind-tables -fno-ident -s -Os -nostdlib -lgcc -else - CFLAGS += -DRUNC_USE_STDLIB -endif - -binary/runc-dmz: _dmz.c - $(CC) $(CFLAGS) -static -o $@ $^ - $(STRIP) -gs $@ diff --git a/libcontainer/dmz/README.md b/libcontainer/dmz/README.md deleted file mode 100644 index 3cfa913ff68..00000000000 --- a/libcontainer/dmz/README.md +++ /dev/null @@ -1,18 +0,0 @@ -# Runc-dmz - -runc-dmz is a small and very simple binary used to execute the container's entrypoint. - -## Making it small - -To make it small we use the Linux kernel's [nolibc include files][nolibc-upstream], so we don't use the libc. - -A full `cp` of it is here in `nolibc/`, but removing the Makefile that is GPL. DO NOT FORGET to -remove the GPL code if updating the nolibc/ directory. - -The current version in that folder is from Linux 6.6-rc3 tag (556fb7131e03b0283672fb40f6dc2d151752aaa7). - -It also support all the architectures we support in runc. - -If the GOARCH we use for compiling doesn't support nolibc, it fallbacks to using the C stdlib. - -[nolibc-upstream]: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/tools/include/nolibc?h=v6.6-rc3 diff --git a/libcontainer/dmz/_dmz.c b/libcontainer/dmz/_dmz.c deleted file mode 100644 index e4cfcf087e7..00000000000 --- a/libcontainer/dmz/_dmz.c +++ /dev/null @@ -1,27 +0,0 @@ -#ifdef RUNC_USE_STDLIB -# include -# include -# include -# include -#else -# include "xstat.h" -# include "nolibc/nolibc.h" -#endif - -extern char **environ; - -int main(int argc, char **argv) -{ - if (argc < 1) - return 127; - int ret = execve(argv[0], argv, environ); - if (ret) { - /* NOTE: This error message format MUST match Go's format. */ - char err_msg[5 + PATH_MAX + 1] = "exec "; // "exec " + argv[0] + '\0' - strncat(err_msg, argv[0], PATH_MAX); - err_msg[sizeof(err_msg) - 1] = '\0'; - - perror(err_msg); - } - return ret; -} diff --git a/libcontainer/dmz/binary/.gitignore b/libcontainer/dmz/binary/.gitignore deleted file mode 100644 index f163ef41c1f..00000000000 --- a/libcontainer/dmz/binary/.gitignore +++ /dev/null @@ -1 +0,0 @@ -/runc-dmz diff --git a/libcontainer/dmz/binary/dummy-file.txt b/libcontainer/dmz/binary/dummy-file.txt deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/libcontainer/dmz/dmz.go b/libcontainer/dmz/dmz.go deleted file mode 100644 index 9b6b500807c..00000000000 --- a/libcontainer/dmz/dmz.go +++ /dev/null @@ -1,9 +0,0 @@ -package dmz - -import ( - "errors" -) - -// ErrNoDmzBinary is returned by Binary when there is no runc-dmz binary -// embedded in the runc program. -var ErrNoDmzBinary = errors.New("runc-dmz binary not embedded in this program") diff --git a/libcontainer/dmz/dmz_fallback_linux.go b/libcontainer/dmz/dmz_fallback_linux.go deleted file mode 100644 index 4f624e048b9..00000000000 --- a/libcontainer/dmz/dmz_fallback_linux.go +++ /dev/null @@ -1 +0,0 @@ -package dmz diff --git a/libcontainer/dmz/dmz_linux.go b/libcontainer/dmz/dmz_linux.go deleted file mode 100644 index 29f2af08edf..00000000000 --- a/libcontainer/dmz/dmz_linux.go +++ /dev/null @@ -1,76 +0,0 @@ -//go:build !runc_nodmz - -package dmz - -import ( - "bytes" - "debug/elf" - "embed" - "fmt" - "os" - "strconv" - "sync" - - "github.com/sirupsen/logrus" -) - -// Try to build the runc-dmz binary on "go generate". If it fails (or -// libcontainer is being imported as a library), the embed.FS will not contain -// the file, which will then cause us to fall back to a clone of -// /proc/self/exe. -// -// There is an empty file called dummy-file.txt in libcontainer/dmz/binary in -// order to work around the restriction that go:embed requires at least one -// file to match the pattern. -// -//go:generate make -B binary/runc-dmz -//go:embed binary -var runcDmzFs embed.FS - -// A cached copy of the contents of runc-dmz. -var ( - runcDmzBinaryOnce sync.Once - runcDmzBinaryIsValid bool - runcDmzBinary []byte -) - -// Binary returns a cloned copy (see CloneBinary) of a very minimal C program -// that just does an execve() of its arguments. This is used in the final -// execution step of the container execution as an intermediate process before -// the container process is execve'd. This allows for protection against -// CVE-2019-5736 without requiring a complete copy of the runc binary. Each -// call to Binary will return a new copy. -// -// If the runc-dmz binary is not embedded into the runc binary, Binary will -// return ErrNoDmzBinary as the error. -func Binary(tmpDir string) (*os.File, error) { - // Only RUNC_DMZ=true enables runc_dmz. - runcDmz := os.Getenv("RUNC_DMZ") - if runcDmz == "" { - logrus.Debugf("RUNC_DMZ is not set -- switching back to classic /proc/self/exe cloning") - return nil, ErrNoDmzBinary - } - if dmzEnabled, err := strconv.ParseBool(runcDmz); err == nil && !dmzEnabled { - logrus.Debugf("RUNC_DMZ is false -- switching back to classic /proc/self/exe cloning") - return nil, ErrNoDmzBinary - } else if err != nil { - return nil, fmt.Errorf("parsing RUNC_DMZ: %w", err) - } - - runcDmzBinaryOnce.Do(func() { - runcDmzBinary, _ = runcDmzFs.ReadFile("binary/runc-dmz") - // Verify that our embedded binary has a standard ELF header. - if !bytes.HasPrefix(runcDmzBinary, []byte(elf.ELFMAG)) { - if len(runcDmzBinary) != 0 { - logrus.Infof("misconfigured build: embedded runc-dmz binary is non-empty but is missing a proper ELF header") - } - } else { - runcDmzBinaryIsValid = true - } - }) - if !runcDmzBinaryIsValid { - return nil, ErrNoDmzBinary - } - rdr := bytes.NewBuffer(runcDmzBinary) - return CloneBinary(rdr, int64(rdr.Len()), "runc-dmz", tmpDir) -} diff --git a/libcontainer/dmz/dmz_unsupported.go b/libcontainer/dmz/dmz_unsupported.go deleted file mode 100644 index 9e284f65394..00000000000 --- a/libcontainer/dmz/dmz_unsupported.go +++ /dev/null @@ -1,11 +0,0 @@ -//go:build !linux || runc_nodmz - -package dmz - -import ( - "os" -) - -func Binary(_ string) (*os.File, error) { - return nil, ErrNoDmzBinary -} diff --git a/libcontainer/dmz/linux/README.md b/libcontainer/dmz/linux/README.md deleted file mode 100644 index d9838abb675..00000000000 --- a/libcontainer/dmz/linux/README.md +++ /dev/null @@ -1,5 +0,0 @@ -This directory contains some files copied from Linux's repo, from the uapi: - - tools/include/uapi/linux/ - -The linux repo was used at Linux 6.6.-rc3 tag (556fb7131e03b0283672fb40f6dc2d151752aaa7). diff --git a/libcontainer/dmz/linux/stat.h b/libcontainer/dmz/linux/stat.h deleted file mode 100644 index 7cab2c65d3d..00000000000 --- a/libcontainer/dmz/linux/stat.h +++ /dev/null @@ -1,194 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#ifndef _UAPI_LINUX_STAT_H -#define _UAPI_LINUX_STAT_H - -#include - -#if defined(__KERNEL__) || !defined(__GLIBC__) || (__GLIBC__ < 2) - -#define S_IFMT 00170000 -#define S_IFSOCK 0140000 -#define S_IFLNK 0120000 -#define S_IFREG 0100000 -#define S_IFBLK 0060000 -#define S_IFDIR 0040000 -#define S_IFCHR 0020000 -#define S_IFIFO 0010000 -#define S_ISUID 0004000 -#define S_ISGID 0002000 -#define S_ISVTX 0001000 - -#define S_ISLNK(m) (((m) & S_IFMT) == S_IFLNK) -#define S_ISREG(m) (((m) & S_IFMT) == S_IFREG) -#define S_ISDIR(m) (((m) & S_IFMT) == S_IFDIR) -#define S_ISCHR(m) (((m) & S_IFMT) == S_IFCHR) -#define S_ISBLK(m) (((m) & S_IFMT) == S_IFBLK) -#define S_ISFIFO(m) (((m) & S_IFMT) == S_IFIFO) -#define S_ISSOCK(m) (((m) & S_IFMT) == S_IFSOCK) - -#define S_IRWXU 00700 -#define S_IRUSR 00400 -#define S_IWUSR 00200 -#define S_IXUSR 00100 - -#define S_IRWXG 00070 -#define S_IRGRP 00040 -#define S_IWGRP 00020 -#define S_IXGRP 00010 - -#define S_IRWXO 00007 -#define S_IROTH 00004 -#define S_IWOTH 00002 -#define S_IXOTH 00001 - -#endif - -/* - * Timestamp structure for the timestamps in struct statx. - * - * tv_sec holds the number of seconds before (negative) or after (positive) - * 00:00:00 1st January 1970 UTC. - * - * tv_nsec holds a number of nanoseconds (0..999,999,999) after the tv_sec time. - * - * __reserved is held in case we need a yet finer resolution. - */ -struct statx_timestamp { - __s64 tv_sec; - __u32 tv_nsec; - __s32 __reserved; -}; - -/* - * Structures for the extended file attribute retrieval system call - * (statx()). - * - * The caller passes a mask of what they're specifically interested in as a - * parameter to statx(). What statx() actually got will be indicated in - * st_mask upon return. - * - * For each bit in the mask argument: - * - * - if the datum is not supported: - * - * - the bit will be cleared, and - * - * - the datum will be set to an appropriate fabricated value if one is - * available (eg. CIFS can take a default uid and gid), otherwise - * - * - the field will be cleared; - * - * - otherwise, if explicitly requested: - * - * - the datum will be synchronised to the server if AT_STATX_FORCE_SYNC is - * set or if the datum is considered out of date, and - * - * - the field will be filled in and the bit will be set; - * - * - otherwise, if not requested, but available in approximate form without any - * effort, it will be filled in anyway, and the bit will be set upon return - * (it might not be up to date, however, and no attempt will be made to - * synchronise the internal state first); - * - * - otherwise the field and the bit will be cleared before returning. - * - * Items in STATX_BASIC_STATS may be marked unavailable on return, but they - * will have values installed for compatibility purposes so that stat() and - * co. can be emulated in userspace. - */ -struct statx { - /* 0x00 */ - __u32 stx_mask; /* What results were written [uncond] */ - __u32 stx_blksize; /* Preferred general I/O size [uncond] */ - __u64 stx_attributes; /* Flags conveying information about the file [uncond] */ - /* 0x10 */ - __u32 stx_nlink; /* Number of hard links */ - __u32 stx_uid; /* User ID of owner */ - __u32 stx_gid; /* Group ID of owner */ - __u16 stx_mode; /* File mode */ - __u16 __spare0[1]; - /* 0x20 */ - __u64 stx_ino; /* Inode number */ - __u64 stx_size; /* File size */ - __u64 stx_blocks; /* Number of 512-byte blocks allocated */ - __u64 stx_attributes_mask; /* Mask to show what's supported in stx_attributes */ - /* 0x40 */ - struct statx_timestamp stx_atime; /* Last access time */ - struct statx_timestamp stx_btime; /* File creation time */ - struct statx_timestamp stx_ctime; /* Last attribute change time */ - struct statx_timestamp stx_mtime; /* Last data modification time */ - /* 0x80 */ - __u32 stx_rdev_major; /* Device ID of special file [if bdev/cdev] */ - __u32 stx_rdev_minor; - __u32 stx_dev_major; /* ID of device containing file [uncond] */ - __u32 stx_dev_minor; - /* 0x90 */ - __u64 stx_mnt_id; - __u32 stx_dio_mem_align; /* Memory buffer alignment for direct I/O */ - __u32 stx_dio_offset_align; /* File offset alignment for direct I/O */ - /* 0xa0 */ - __u64 __spare3[12]; /* Spare space for future expansion */ - /* 0x100 */ -}; - -/* - * Flags to be stx_mask - * - * Query request/result mask for statx() and struct statx::stx_mask. - * - * These bits should be set in the mask argument of statx() to request - * particular items when calling statx(). - */ -#define STATX_TYPE 0x00000001U /* Want/got stx_mode & S_IFMT */ -#define STATX_MODE 0x00000002U /* Want/got stx_mode & ~S_IFMT */ -#define STATX_NLINK 0x00000004U /* Want/got stx_nlink */ -#define STATX_UID 0x00000008U /* Want/got stx_uid */ -#define STATX_GID 0x00000010U /* Want/got stx_gid */ -#define STATX_ATIME 0x00000020U /* Want/got stx_atime */ -#define STATX_MTIME 0x00000040U /* Want/got stx_mtime */ -#define STATX_CTIME 0x00000080U /* Want/got stx_ctime */ -#define STATX_INO 0x00000100U /* Want/got stx_ino */ -#define STATX_SIZE 0x00000200U /* Want/got stx_size */ -#define STATX_BLOCKS 0x00000400U /* Want/got stx_blocks */ -#define STATX_BASIC_STATS 0x000007ffU /* The stuff in the normal stat struct */ -#define STATX_BTIME 0x00000800U /* Want/got stx_btime */ -#define STATX_MNT_ID 0x00001000U /* Got stx_mnt_id */ -#define STATX_DIOALIGN 0x00002000U /* Want/got direct I/O alignment info */ - -#define STATX__RESERVED 0x80000000U /* Reserved for future struct statx expansion */ - -#ifndef __KERNEL__ -/* - * This is deprecated, and shall remain the same value in the future. To avoid - * confusion please use the equivalent (STATX_BASIC_STATS | STATX_BTIME) - * instead. - */ -#define STATX_ALL 0x00000fffU -#endif - -/* - * Attributes to be found in stx_attributes and masked in stx_attributes_mask. - * - * These give information about the features or the state of a file that might - * be of use to ordinary userspace programs such as GUIs or ls rather than - * specialised tools. - * - * Note that the flags marked [I] correspond to the FS_IOC_SETFLAGS flags - * semantically. Where possible, the numerical value is picked to correspond - * also. Note that the DAX attribute indicates that the file is in the CPU - * direct access state. It does not correspond to the per-inode flag that - * some filesystems support. - * - */ -#define STATX_ATTR_COMPRESSED 0x00000004 /* [I] File is compressed by the fs */ -#define STATX_ATTR_IMMUTABLE 0x00000010 /* [I] File is marked immutable */ -#define STATX_ATTR_APPEND 0x00000020 /* [I] File is append-only */ -#define STATX_ATTR_NODUMP 0x00000040 /* [I] File is not to be dumped */ -#define STATX_ATTR_ENCRYPTED 0x00000800 /* [I] File requires key to decrypt in fs */ -#define STATX_ATTR_AUTOMOUNT 0x00001000 /* Dir: Automount trigger */ -#define STATX_ATTR_MOUNT_ROOT 0x00002000 /* Root of a mount */ -#define STATX_ATTR_VERITY 0x00100000 /* [I] Verity protected file */ -#define STATX_ATTR_DAX 0x00200000 /* File is currently in DAX state */ - - -#endif /* _UAPI_LINUX_STAT_H */ diff --git a/libcontainer/dmz/nolibc/arch-aarch64.h b/libcontainer/dmz/nolibc/arch-aarch64.h deleted file mode 100644 index 6c33c46848e..00000000000 --- a/libcontainer/dmz/nolibc/arch-aarch64.h +++ /dev/null @@ -1,157 +0,0 @@ -/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ -/* - * AARCH64 specific definitions for NOLIBC - * Copyright (C) 2017-2022 Willy Tarreau - */ - -#ifndef _NOLIBC_ARCH_AARCH64_H -#define _NOLIBC_ARCH_AARCH64_H - -#include "compiler.h" -#include "crt.h" - -/* Syscalls for AARCH64 : - * - registers are 64-bit - * - stack is 16-byte aligned - * - syscall number is passed in x8 - * - arguments are in x0, x1, x2, x3, x4, x5 - * - the system call is performed by calling svc 0 - * - syscall return comes in x0. - * - the arguments are cast to long and assigned into the target registers - * which are then simply passed as registers to the asm code, so that we - * don't have to experience issues with register constraints. - * - * On aarch64, select() is not implemented so we have to use pselect6(). - */ -#define __ARCH_WANT_SYS_PSELECT6 - -#define my_syscall0(num) \ -({ \ - register long _num __asm__ ("x8") = (num); \ - register long _arg1 __asm__ ("x0"); \ - \ - __asm__ volatile ( \ - "svc #0\n" \ - : "=r"(_arg1) \ - : "r"(_num) \ - : "memory", "cc" \ - ); \ - _arg1; \ -}) - -#define my_syscall1(num, arg1) \ -({ \ - register long _num __asm__ ("x8") = (num); \ - register long _arg1 __asm__ ("x0") = (long)(arg1); \ - \ - __asm__ volatile ( \ - "svc #0\n" \ - : "=r"(_arg1) \ - : "r"(_arg1), \ - "r"(_num) \ - : "memory", "cc" \ - ); \ - _arg1; \ -}) - -#define my_syscall2(num, arg1, arg2) \ -({ \ - register long _num __asm__ ("x8") = (num); \ - register long _arg1 __asm__ ("x0") = (long)(arg1); \ - register long _arg2 __asm__ ("x1") = (long)(arg2); \ - \ - __asm__ volatile ( \ - "svc #0\n" \ - : "=r"(_arg1) \ - : "r"(_arg1), "r"(_arg2), \ - "r"(_num) \ - : "memory", "cc" \ - ); \ - _arg1; \ -}) - -#define my_syscall3(num, arg1, arg2, arg3) \ -({ \ - register long _num __asm__ ("x8") = (num); \ - register long _arg1 __asm__ ("x0") = (long)(arg1); \ - register long _arg2 __asm__ ("x1") = (long)(arg2); \ - register long _arg3 __asm__ ("x2") = (long)(arg3); \ - \ - __asm__ volatile ( \ - "svc #0\n" \ - : "=r"(_arg1) \ - : "r"(_arg1), "r"(_arg2), "r"(_arg3), \ - "r"(_num) \ - : "memory", "cc" \ - ); \ - _arg1; \ -}) - -#define my_syscall4(num, arg1, arg2, arg3, arg4) \ -({ \ - register long _num __asm__ ("x8") = (num); \ - register long _arg1 __asm__ ("x0") = (long)(arg1); \ - register long _arg2 __asm__ ("x1") = (long)(arg2); \ - register long _arg3 __asm__ ("x2") = (long)(arg3); \ - register long _arg4 __asm__ ("x3") = (long)(arg4); \ - \ - __asm__ volatile ( \ - "svc #0\n" \ - : "=r"(_arg1) \ - : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), \ - "r"(_num) \ - : "memory", "cc" \ - ); \ - _arg1; \ -}) - -#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \ -({ \ - register long _num __asm__ ("x8") = (num); \ - register long _arg1 __asm__ ("x0") = (long)(arg1); \ - register long _arg2 __asm__ ("x1") = (long)(arg2); \ - register long _arg3 __asm__ ("x2") = (long)(arg3); \ - register long _arg4 __asm__ ("x3") = (long)(arg4); \ - register long _arg5 __asm__ ("x4") = (long)(arg5); \ - \ - __asm__ volatile ( \ - "svc #0\n" \ - : "=r" (_arg1) \ - : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \ - "r"(_num) \ - : "memory", "cc" \ - ); \ - _arg1; \ -}) - -#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \ -({ \ - register long _num __asm__ ("x8") = (num); \ - register long _arg1 __asm__ ("x0") = (long)(arg1); \ - register long _arg2 __asm__ ("x1") = (long)(arg2); \ - register long _arg3 __asm__ ("x2") = (long)(arg3); \ - register long _arg4 __asm__ ("x3") = (long)(arg4); \ - register long _arg5 __asm__ ("x4") = (long)(arg5); \ - register long _arg6 __asm__ ("x5") = (long)(arg6); \ - \ - __asm__ volatile ( \ - "svc #0\n" \ - : "=r" (_arg1) \ - : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \ - "r"(_arg6), "r"(_num) \ - : "memory", "cc" \ - ); \ - _arg1; \ -}) - -/* startup code */ -void __attribute__((weak, noreturn, optimize("Os", "omit-frame-pointer"))) __no_stack_protector _start(void) -{ - __asm__ volatile ( - "mov x0, sp\n" /* save stack pointer to x0, as arg1 of _start_c */ - "and sp, x0, -16\n" /* sp must be 16-byte aligned in the callee */ - "bl _start_c\n" /* transfer to c runtime */ - ); - __builtin_unreachable(); -} -#endif /* _NOLIBC_ARCH_AARCH64_H */ diff --git a/libcontainer/dmz/nolibc/arch-arm.h b/libcontainer/dmz/nolibc/arch-arm.h deleted file mode 100644 index cae4afa7c1c..00000000000 --- a/libcontainer/dmz/nolibc/arch-arm.h +++ /dev/null @@ -1,199 +0,0 @@ -/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ -/* - * ARM specific definitions for NOLIBC - * Copyright (C) 2017-2022 Willy Tarreau - */ - -#ifndef _NOLIBC_ARCH_ARM_H -#define _NOLIBC_ARCH_ARM_H - -#include "compiler.h" -#include "crt.h" - -/* Syscalls for ARM in ARM or Thumb modes : - * - registers are 32-bit - * - stack is 8-byte aligned - * ( http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.faqs/ka4127.html) - * - syscall number is passed in r7 - * - arguments are in r0, r1, r2, r3, r4, r5 - * - the system call is performed by calling svc #0 - * - syscall return comes in r0. - * - only lr is clobbered. - * - the arguments are cast to long and assigned into the target registers - * which are then simply passed as registers to the asm code, so that we - * don't have to experience issues with register constraints. - * - the syscall number is always specified last in order to allow to force - * some registers before (gcc refuses a %-register at the last position). - * - in thumb mode without -fomit-frame-pointer, r7 is also used to store the - * frame pointer, and we cannot directly assign it as a register variable, - * nor can we clobber it. Instead we assign the r6 register and swap it - * with r7 before calling svc, and r6 is marked as clobbered. - * We're just using any regular register which we assign to r7 after saving - * it. - * - * Also, ARM supports the old_select syscall if newselect is not available - */ -#define __ARCH_WANT_SYS_OLD_SELECT - -#if (defined(__THUMBEB__) || defined(__THUMBEL__)) && \ - !defined(NOLIBC_OMIT_FRAME_POINTER) -/* swap r6,r7 needed in Thumb mode since we can't use nor clobber r7 */ -#define _NOLIBC_SYSCALL_REG "r6" -#define _NOLIBC_THUMB_SET_R7 "eor r7, r6\neor r6, r7\neor r7, r6\n" -#define _NOLIBC_THUMB_RESTORE_R7 "mov r7, r6\n" - -#else /* we're in ARM mode */ -/* in Arm mode we can directly use r7 */ -#define _NOLIBC_SYSCALL_REG "r7" -#define _NOLIBC_THUMB_SET_R7 "" -#define _NOLIBC_THUMB_RESTORE_R7 "" - -#endif /* end THUMB */ - -#define my_syscall0(num) \ -({ \ - register long _num __asm__(_NOLIBC_SYSCALL_REG) = (num); \ - register long _arg1 __asm__ ("r0"); \ - \ - __asm__ volatile ( \ - _NOLIBC_THUMB_SET_R7 \ - "svc #0\n" \ - _NOLIBC_THUMB_RESTORE_R7 \ - : "=r"(_arg1), "=r"(_num) \ - : "r"(_arg1), \ - "r"(_num) \ - : "memory", "cc", "lr" \ - ); \ - _arg1; \ -}) - -#define my_syscall1(num, arg1) \ -({ \ - register long _num __asm__(_NOLIBC_SYSCALL_REG) = (num); \ - register long _arg1 __asm__ ("r0") = (long)(arg1); \ - \ - __asm__ volatile ( \ - _NOLIBC_THUMB_SET_R7 \ - "svc #0\n" \ - _NOLIBC_THUMB_RESTORE_R7 \ - : "=r"(_arg1), "=r" (_num) \ - : "r"(_arg1), \ - "r"(_num) \ - : "memory", "cc", "lr" \ - ); \ - _arg1; \ -}) - -#define my_syscall2(num, arg1, arg2) \ -({ \ - register long _num __asm__(_NOLIBC_SYSCALL_REG) = (num); \ - register long _arg1 __asm__ ("r0") = (long)(arg1); \ - register long _arg2 __asm__ ("r1") = (long)(arg2); \ - \ - __asm__ volatile ( \ - _NOLIBC_THUMB_SET_R7 \ - "svc #0\n" \ - _NOLIBC_THUMB_RESTORE_R7 \ - : "=r"(_arg1), "=r" (_num) \ - : "r"(_arg1), "r"(_arg2), \ - "r"(_num) \ - : "memory", "cc", "lr" \ - ); \ - _arg1; \ -}) - -#define my_syscall3(num, arg1, arg2, arg3) \ -({ \ - register long _num __asm__(_NOLIBC_SYSCALL_REG) = (num); \ - register long _arg1 __asm__ ("r0") = (long)(arg1); \ - register long _arg2 __asm__ ("r1") = (long)(arg2); \ - register long _arg3 __asm__ ("r2") = (long)(arg3); \ - \ - __asm__ volatile ( \ - _NOLIBC_THUMB_SET_R7 \ - "svc #0\n" \ - _NOLIBC_THUMB_RESTORE_R7 \ - : "=r"(_arg1), "=r" (_num) \ - : "r"(_arg1), "r"(_arg2), "r"(_arg3), \ - "r"(_num) \ - : "memory", "cc", "lr" \ - ); \ - _arg1; \ -}) - -#define my_syscall4(num, arg1, arg2, arg3, arg4) \ -({ \ - register long _num __asm__(_NOLIBC_SYSCALL_REG) = (num); \ - register long _arg1 __asm__ ("r0") = (long)(arg1); \ - register long _arg2 __asm__ ("r1") = (long)(arg2); \ - register long _arg3 __asm__ ("r2") = (long)(arg3); \ - register long _arg4 __asm__ ("r3") = (long)(arg4); \ - \ - __asm__ volatile ( \ - _NOLIBC_THUMB_SET_R7 \ - "svc #0\n" \ - _NOLIBC_THUMB_RESTORE_R7 \ - : "=r"(_arg1), "=r" (_num) \ - : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), \ - "r"(_num) \ - : "memory", "cc", "lr" \ - ); \ - _arg1; \ -}) - -#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \ -({ \ - register long _num __asm__(_NOLIBC_SYSCALL_REG) = (num); \ - register long _arg1 __asm__ ("r0") = (long)(arg1); \ - register long _arg2 __asm__ ("r1") = (long)(arg2); \ - register long _arg3 __asm__ ("r2") = (long)(arg3); \ - register long _arg4 __asm__ ("r3") = (long)(arg4); \ - register long _arg5 __asm__ ("r4") = (long)(arg5); \ - \ - __asm__ volatile ( \ - _NOLIBC_THUMB_SET_R7 \ - "svc #0\n" \ - _NOLIBC_THUMB_RESTORE_R7 \ - : "=r"(_arg1), "=r" (_num) \ - : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \ - "r"(_num) \ - : "memory", "cc", "lr" \ - ); \ - _arg1; \ -}) - -#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \ -({ \ - register long _num __asm__(_NOLIBC_SYSCALL_REG) = (num); \ - register long _arg1 __asm__ ("r0") = (long)(arg1); \ - register long _arg2 __asm__ ("r1") = (long)(arg2); \ - register long _arg3 __asm__ ("r2") = (long)(arg3); \ - register long _arg4 __asm__ ("r3") = (long)(arg4); \ - register long _arg5 __asm__ ("r4") = (long)(arg5); \ - register long _arg6 __asm__ ("r5") = (long)(arg6); \ - \ - __asm__ volatile ( \ - _NOLIBC_THUMB_SET_R7 \ - "svc #0\n" \ - _NOLIBC_THUMB_RESTORE_R7 \ - : "=r"(_arg1), "=r" (_num) \ - : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \ - "r"(_arg6), "r"(_num) \ - : "memory", "cc", "lr" \ - ); \ - _arg1; \ -}) - -/* startup code */ -void __attribute__((weak, noreturn, optimize("Os", "omit-frame-pointer"))) __no_stack_protector _start(void) -{ - __asm__ volatile ( - "mov %r0, sp\n" /* save stack pointer to %r0, as arg1 of _start_c */ - "and ip, %r0, #-8\n" /* sp must be 8-byte aligned in the callee */ - "mov sp, ip\n" - "bl _start_c\n" /* transfer to c runtime */ - ); - __builtin_unreachable(); -} - -#endif /* _NOLIBC_ARCH_ARM_H */ diff --git a/libcontainer/dmz/nolibc/arch-i386.h b/libcontainer/dmz/nolibc/arch-i386.h deleted file mode 100644 index 64415b9fac7..00000000000 --- a/libcontainer/dmz/nolibc/arch-i386.h +++ /dev/null @@ -1,178 +0,0 @@ -/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ -/* - * i386 specific definitions for NOLIBC - * Copyright (C) 2017-2022 Willy Tarreau - */ - -#ifndef _NOLIBC_ARCH_I386_H -#define _NOLIBC_ARCH_I386_H - -#include "compiler.h" -#include "crt.h" - -/* Syscalls for i386 : - * - mostly similar to x86_64 - * - registers are 32-bit - * - syscall number is passed in eax - * - arguments are in ebx, ecx, edx, esi, edi, ebp respectively - * - all registers are preserved (except eax of course) - * - the system call is performed by calling int $0x80 - * - syscall return comes in eax - * - the arguments are cast to long and assigned into the target registers - * which are then simply passed as registers to the asm code, so that we - * don't have to experience issues with register constraints. - * - the syscall number is always specified last in order to allow to force - * some registers before (gcc refuses a %-register at the last position). - * - * Also, i386 supports the old_select syscall if newselect is not available - */ -#define __ARCH_WANT_SYS_OLD_SELECT - -#define my_syscall0(num) \ -({ \ - long _ret; \ - register long _num __asm__ ("eax") = (num); \ - \ - __asm__ volatile ( \ - "int $0x80\n" \ - : "=a" (_ret) \ - : "0"(_num) \ - : "memory", "cc" \ - ); \ - _ret; \ -}) - -#define my_syscall1(num, arg1) \ -({ \ - long _ret; \ - register long _num __asm__ ("eax") = (num); \ - register long _arg1 __asm__ ("ebx") = (long)(arg1); \ - \ - __asm__ volatile ( \ - "int $0x80\n" \ - : "=a" (_ret) \ - : "r"(_arg1), \ - "0"(_num) \ - : "memory", "cc" \ - ); \ - _ret; \ -}) - -#define my_syscall2(num, arg1, arg2) \ -({ \ - long _ret; \ - register long _num __asm__ ("eax") = (num); \ - register long _arg1 __asm__ ("ebx") = (long)(arg1); \ - register long _arg2 __asm__ ("ecx") = (long)(arg2); \ - \ - __asm__ volatile ( \ - "int $0x80\n" \ - : "=a" (_ret) \ - : "r"(_arg1), "r"(_arg2), \ - "0"(_num) \ - : "memory", "cc" \ - ); \ - _ret; \ -}) - -#define my_syscall3(num, arg1, arg2, arg3) \ -({ \ - long _ret; \ - register long _num __asm__ ("eax") = (num); \ - register long _arg1 __asm__ ("ebx") = (long)(arg1); \ - register long _arg2 __asm__ ("ecx") = (long)(arg2); \ - register long _arg3 __asm__ ("edx") = (long)(arg3); \ - \ - __asm__ volatile ( \ - "int $0x80\n" \ - : "=a" (_ret) \ - : "r"(_arg1), "r"(_arg2), "r"(_arg3), \ - "0"(_num) \ - : "memory", "cc" \ - ); \ - _ret; \ -}) - -#define my_syscall4(num, arg1, arg2, arg3, arg4) \ -({ \ - long _ret; \ - register long _num __asm__ ("eax") = (num); \ - register long _arg1 __asm__ ("ebx") = (long)(arg1); \ - register long _arg2 __asm__ ("ecx") = (long)(arg2); \ - register long _arg3 __asm__ ("edx") = (long)(arg3); \ - register long _arg4 __asm__ ("esi") = (long)(arg4); \ - \ - __asm__ volatile ( \ - "int $0x80\n" \ - : "=a" (_ret) \ - : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), \ - "0"(_num) \ - : "memory", "cc" \ - ); \ - _ret; \ -}) - -#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \ -({ \ - long _ret; \ - register long _num __asm__ ("eax") = (num); \ - register long _arg1 __asm__ ("ebx") = (long)(arg1); \ - register long _arg2 __asm__ ("ecx") = (long)(arg2); \ - register long _arg3 __asm__ ("edx") = (long)(arg3); \ - register long _arg4 __asm__ ("esi") = (long)(arg4); \ - register long _arg5 __asm__ ("edi") = (long)(arg5); \ - \ - __asm__ volatile ( \ - "int $0x80\n" \ - : "=a" (_ret) \ - : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \ - "0"(_num) \ - : "memory", "cc" \ - ); \ - _ret; \ -}) - -#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \ -({ \ - long _eax = (long)(num); \ - long _arg6 = (long)(arg6); /* Always in memory */ \ - __asm__ volatile ( \ - "pushl %[_arg6]\n\t" \ - "pushl %%ebp\n\t" \ - "movl 4(%%esp),%%ebp\n\t" \ - "int $0x80\n\t" \ - "popl %%ebp\n\t" \ - "addl $4,%%esp\n\t" \ - : "+a"(_eax) /* %eax */ \ - : "b"(arg1), /* %ebx */ \ - "c"(arg2), /* %ecx */ \ - "d"(arg3), /* %edx */ \ - "S"(arg4), /* %esi */ \ - "D"(arg5), /* %edi */ \ - [_arg6]"m"(_arg6) /* memory */ \ - : "memory", "cc" \ - ); \ - _eax; \ -}) - -/* startup code */ -/* - * i386 System V ABI mandates: - * 1) last pushed argument must be 16-byte aligned. - * 2) The deepest stack frame should be set to zero - * - */ -void __attribute__((weak, noreturn, optimize("Os", "omit-frame-pointer"))) __no_stack_protector _start(void) -{ - __asm__ volatile ( - "xor %ebp, %ebp\n" /* zero the stack frame */ - "mov %esp, %eax\n" /* save stack pointer to %eax, as arg1 of _start_c */ - "and $-16, %esp\n" /* last pushed argument must be 16-byte aligned */ - "push %eax\n" /* push arg1 on stack to support plain stack modes too */ - "call _start_c\n" /* transfer to c runtime */ - "hlt\n" /* ensure it does not return */ - ); - __builtin_unreachable(); -} - -#endif /* _NOLIBC_ARCH_I386_H */ diff --git a/libcontainer/dmz/nolibc/arch-loongarch.h b/libcontainer/dmz/nolibc/arch-loongarch.h deleted file mode 100644 index bf98f622019..00000000000 --- a/libcontainer/dmz/nolibc/arch-loongarch.h +++ /dev/null @@ -1,164 +0,0 @@ -/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ -/* - * LoongArch specific definitions for NOLIBC - * Copyright (C) 2023 Loongson Technology Corporation Limited - */ - -#ifndef _NOLIBC_ARCH_LOONGARCH_H -#define _NOLIBC_ARCH_LOONGARCH_H - -#include "compiler.h" -#include "crt.h" - -/* Syscalls for LoongArch : - * - stack is 16-byte aligned - * - syscall number is passed in a7 - * - arguments are in a0, a1, a2, a3, a4, a5 - * - the system call is performed by calling "syscall 0" - * - syscall return comes in a0 - * - the arguments are cast to long and assigned into the target - * registers which are then simply passed as registers to the asm code, - * so that we don't have to experience issues with register constraints. - * - * On LoongArch, select() is not implemented so we have to use pselect6(). - */ -#define __ARCH_WANT_SYS_PSELECT6 -#define _NOLIBC_SYSCALL_CLOBBERLIST \ - "memory", "$t0", "$t1", "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t8" - -#define my_syscall0(num) \ -({ \ - register long _num __asm__ ("a7") = (num); \ - register long _arg1 __asm__ ("a0"); \ - \ - __asm__ volatile ( \ - "syscall 0\n" \ - : "=r"(_arg1) \ - : "r"(_num) \ - : _NOLIBC_SYSCALL_CLOBBERLIST \ - ); \ - _arg1; \ -}) - -#define my_syscall1(num, arg1) \ -({ \ - register long _num __asm__ ("a7") = (num); \ - register long _arg1 __asm__ ("a0") = (long)(arg1); \ - \ - __asm__ volatile ( \ - "syscall 0\n" \ - : "+r"(_arg1) \ - : "r"(_num) \ - : _NOLIBC_SYSCALL_CLOBBERLIST \ - ); \ - _arg1; \ -}) - -#define my_syscall2(num, arg1, arg2) \ -({ \ - register long _num __asm__ ("a7") = (num); \ - register long _arg1 __asm__ ("a0") = (long)(arg1); \ - register long _arg2 __asm__ ("a1") = (long)(arg2); \ - \ - __asm__ volatile ( \ - "syscall 0\n" \ - : "+r"(_arg1) \ - : "r"(_arg2), \ - "r"(_num) \ - : _NOLIBC_SYSCALL_CLOBBERLIST \ - ); \ - _arg1; \ -}) - -#define my_syscall3(num, arg1, arg2, arg3) \ -({ \ - register long _num __asm__ ("a7") = (num); \ - register long _arg1 __asm__ ("a0") = (long)(arg1); \ - register long _arg2 __asm__ ("a1") = (long)(arg2); \ - register long _arg3 __asm__ ("a2") = (long)(arg3); \ - \ - __asm__ volatile ( \ - "syscall 0\n" \ - : "+r"(_arg1) \ - : "r"(_arg2), "r"(_arg3), \ - "r"(_num) \ - : _NOLIBC_SYSCALL_CLOBBERLIST \ - ); \ - _arg1; \ -}) - -#define my_syscall4(num, arg1, arg2, arg3, arg4) \ -({ \ - register long _num __asm__ ("a7") = (num); \ - register long _arg1 __asm__ ("a0") = (long)(arg1); \ - register long _arg2 __asm__ ("a1") = (long)(arg2); \ - register long _arg3 __asm__ ("a2") = (long)(arg3); \ - register long _arg4 __asm__ ("a3") = (long)(arg4); \ - \ - __asm__ volatile ( \ - "syscall 0\n" \ - : "+r"(_arg1) \ - : "r"(_arg2), "r"(_arg3), "r"(_arg4), \ - "r"(_num) \ - : _NOLIBC_SYSCALL_CLOBBERLIST \ - ); \ - _arg1; \ -}) - -#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \ -({ \ - register long _num __asm__ ("a7") = (num); \ - register long _arg1 __asm__ ("a0") = (long)(arg1); \ - register long _arg2 __asm__ ("a1") = (long)(arg2); \ - register long _arg3 __asm__ ("a2") = (long)(arg3); \ - register long _arg4 __asm__ ("a3") = (long)(arg4); \ - register long _arg5 __asm__ ("a4") = (long)(arg5); \ - \ - __asm__ volatile ( \ - "syscall 0\n" \ - : "+r"(_arg1) \ - : "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \ - "r"(_num) \ - : _NOLIBC_SYSCALL_CLOBBERLIST \ - ); \ - _arg1; \ -}) - -#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \ -({ \ - register long _num __asm__ ("a7") = (num); \ - register long _arg1 __asm__ ("a0") = (long)(arg1); \ - register long _arg2 __asm__ ("a1") = (long)(arg2); \ - register long _arg3 __asm__ ("a2") = (long)(arg3); \ - register long _arg4 __asm__ ("a3") = (long)(arg4); \ - register long _arg5 __asm__ ("a4") = (long)(arg5); \ - register long _arg6 __asm__ ("a5") = (long)(arg6); \ - \ - __asm__ volatile ( \ - "syscall 0\n" \ - : "+r"(_arg1) \ - : "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), "r"(_arg6), \ - "r"(_num) \ - : _NOLIBC_SYSCALL_CLOBBERLIST \ - ); \ - _arg1; \ -}) - -#if __loongarch_grlen == 32 -#define LONG_BSTRINS "bstrins.w" -#else /* __loongarch_grlen == 64 */ -#define LONG_BSTRINS "bstrins.d" -#endif - -/* startup code */ -void __attribute__((weak, noreturn, optimize("Os", "omit-frame-pointer"))) __no_stack_protector _start(void) -{ - __asm__ volatile ( - "move $a0, $sp\n" /* save stack pointer to $a0, as arg1 of _start_c */ - LONG_BSTRINS " $sp, $zero, 3, 0\n" /* $sp must be 16-byte aligned */ - "bl _start_c\n" /* transfer to c runtime */ - ); - __builtin_unreachable(); -} - -#endif /* _NOLIBC_ARCH_LOONGARCH_H */ diff --git a/libcontainer/dmz/nolibc/arch-mips.h b/libcontainer/dmz/nolibc/arch-mips.h deleted file mode 100644 index 4ab6fa54bee..00000000000 --- a/libcontainer/dmz/nolibc/arch-mips.h +++ /dev/null @@ -1,195 +0,0 @@ -/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ -/* - * MIPS specific definitions for NOLIBC - * Copyright (C) 2017-2022 Willy Tarreau - */ - -#ifndef _NOLIBC_ARCH_MIPS_H -#define _NOLIBC_ARCH_MIPS_H - -#include "compiler.h" -#include "crt.h" - -/* Syscalls for MIPS ABI O32 : - * - WARNING! there's always a delayed slot! - * - WARNING again, the syntax is different, registers take a '$' and numbers - * do not. - * - registers are 32-bit - * - stack is 8-byte aligned - * - syscall number is passed in v0 (starts at 0xfa0). - * - arguments are in a0, a1, a2, a3, then the stack. The caller needs to - * leave some room in the stack for the callee to save a0..a3 if needed. - * - Many registers are clobbered, in fact only a0..a2 and s0..s8 are - * preserved. See: https://www.linux-mips.org/wiki/Syscall as well as - * scall32-o32.S in the kernel sources. - * - the system call is performed by calling "syscall" - * - syscall return comes in v0, and register a3 needs to be checked to know - * if an error occurred, in which case errno is in v0. - * - the arguments are cast to long and assigned into the target registers - * which are then simply passed as registers to the asm code, so that we - * don't have to experience issues with register constraints. - */ - -#define _NOLIBC_SYSCALL_CLOBBERLIST \ - "memory", "cc", "at", "v1", "hi", "lo", \ - "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9" - -#define my_syscall0(num) \ -({ \ - register long _num __asm__ ("v0") = (num); \ - register long _arg4 __asm__ ("a3"); \ - \ - __asm__ volatile ( \ - "addiu $sp, $sp, -32\n" \ - "syscall\n" \ - "addiu $sp, $sp, 32\n" \ - : "=r"(_num), "=r"(_arg4) \ - : "r"(_num) \ - : _NOLIBC_SYSCALL_CLOBBERLIST \ - ); \ - _arg4 ? -_num : _num; \ -}) - -#define my_syscall1(num, arg1) \ -({ \ - register long _num __asm__ ("v0") = (num); \ - register long _arg1 __asm__ ("a0") = (long)(arg1); \ - register long _arg4 __asm__ ("a3"); \ - \ - __asm__ volatile ( \ - "addiu $sp, $sp, -32\n" \ - "syscall\n" \ - "addiu $sp, $sp, 32\n" \ - : "=r"(_num), "=r"(_arg4) \ - : "0"(_num), \ - "r"(_arg1) \ - : _NOLIBC_SYSCALL_CLOBBERLIST \ - ); \ - _arg4 ? -_num : _num; \ -}) - -#define my_syscall2(num, arg1, arg2) \ -({ \ - register long _num __asm__ ("v0") = (num); \ - register long _arg1 __asm__ ("a0") = (long)(arg1); \ - register long _arg2 __asm__ ("a1") = (long)(arg2); \ - register long _arg4 __asm__ ("a3"); \ - \ - __asm__ volatile ( \ - "addiu $sp, $sp, -32\n" \ - "syscall\n" \ - "addiu $sp, $sp, 32\n" \ - : "=r"(_num), "=r"(_arg4) \ - : "0"(_num), \ - "r"(_arg1), "r"(_arg2) \ - : _NOLIBC_SYSCALL_CLOBBERLIST \ - ); \ - _arg4 ? -_num : _num; \ -}) - -#define my_syscall3(num, arg1, arg2, arg3) \ -({ \ - register long _num __asm__ ("v0") = (num); \ - register long _arg1 __asm__ ("a0") = (long)(arg1); \ - register long _arg2 __asm__ ("a1") = (long)(arg2); \ - register long _arg3 __asm__ ("a2") = (long)(arg3); \ - register long _arg4 __asm__ ("a3"); \ - \ - __asm__ volatile ( \ - "addiu $sp, $sp, -32\n" \ - "syscall\n" \ - "addiu $sp, $sp, 32\n" \ - : "=r"(_num), "=r"(_arg4) \ - : "0"(_num), \ - "r"(_arg1), "r"(_arg2), "r"(_arg3) \ - : _NOLIBC_SYSCALL_CLOBBERLIST \ - ); \ - _arg4 ? -_num : _num; \ -}) - -#define my_syscall4(num, arg1, arg2, arg3, arg4) \ -({ \ - register long _num __asm__ ("v0") = (num); \ - register long _arg1 __asm__ ("a0") = (long)(arg1); \ - register long _arg2 __asm__ ("a1") = (long)(arg2); \ - register long _arg3 __asm__ ("a2") = (long)(arg3); \ - register long _arg4 __asm__ ("a3") = (long)(arg4); \ - \ - __asm__ volatile ( \ - "addiu $sp, $sp, -32\n" \ - "syscall\n" \ - "addiu $sp, $sp, 32\n" \ - : "=r" (_num), "=r"(_arg4) \ - : "0"(_num), \ - "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4) \ - : _NOLIBC_SYSCALL_CLOBBERLIST \ - ); \ - _arg4 ? -_num : _num; \ -}) - -#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \ -({ \ - register long _num __asm__ ("v0") = (num); \ - register long _arg1 __asm__ ("a0") = (long)(arg1); \ - register long _arg2 __asm__ ("a1") = (long)(arg2); \ - register long _arg3 __asm__ ("a2") = (long)(arg3); \ - register long _arg4 __asm__ ("a3") = (long)(arg4); \ - register long _arg5 = (long)(arg5); \ - \ - __asm__ volatile ( \ - "addiu $sp, $sp, -32\n" \ - "sw %7, 16($sp)\n" \ - "syscall\n" \ - "addiu $sp, $sp, 32\n" \ - : "=r" (_num), "=r"(_arg4) \ - : "0"(_num), \ - "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5) \ - : _NOLIBC_SYSCALL_CLOBBERLIST \ - ); \ - _arg4 ? -_num : _num; \ -}) - -#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \ -({ \ - register long _num __asm__ ("v0") = (num); \ - register long _arg1 __asm__ ("a0") = (long)(arg1); \ - register long _arg2 __asm__ ("a1") = (long)(arg2); \ - register long _arg3 __asm__ ("a2") = (long)(arg3); \ - register long _arg4 __asm__ ("a3") = (long)(arg4); \ - register long _arg5 = (long)(arg5); \ - register long _arg6 = (long)(arg6); \ - \ - __asm__ volatile ( \ - "addiu $sp, $sp, -32\n" \ - "sw %7, 16($sp)\n" \ - "sw %8, 20($sp)\n" \ - "syscall\n" \ - "addiu $sp, $sp, 32\n" \ - : "=r" (_num), "=r"(_arg4) \ - : "0"(_num), \ - "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \ - "r"(_arg6) \ - : _NOLIBC_SYSCALL_CLOBBERLIST \ - ); \ - _arg4 ? -_num : _num; \ -}) - -/* startup code, note that it's called __start on MIPS */ -void __attribute__((weak, noreturn, optimize("Os", "omit-frame-pointer"))) __no_stack_protector __start(void) -{ - __asm__ volatile ( - ".set push\n" - ".set noreorder\n" - ".option pic0\n" - "move $a0, $sp\n" /* save stack pointer to $a0, as arg1 of _start_c */ - "li $t0, -8\n" - "and $sp, $sp, $t0\n" /* $sp must be 8-byte aligned */ - "addiu $sp, $sp, -16\n" /* the callee expects to save a0..a3 there */ - "jal _start_c\n" /* transfer to c runtime */ - " nop\n" /* delayed slot */ - ".set pop\n" - ); - __builtin_unreachable(); -} - -#endif /* _NOLIBC_ARCH_MIPS_H */ diff --git a/libcontainer/dmz/nolibc/arch-powerpc.h b/libcontainer/dmz/nolibc/arch-powerpc.h deleted file mode 100644 index ac212e6185b..00000000000 --- a/libcontainer/dmz/nolibc/arch-powerpc.h +++ /dev/null @@ -1,221 +0,0 @@ -/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ -/* - * PowerPC specific definitions for NOLIBC - * Copyright (C) 2023 Zhangjin Wu - */ - -#ifndef _NOLIBC_ARCH_POWERPC_H -#define _NOLIBC_ARCH_POWERPC_H - -#include "compiler.h" -#include "crt.h" - -/* Syscalls for PowerPC : - * - stack is 16-byte aligned - * - syscall number is passed in r0 - * - arguments are in r3, r4, r5, r6, r7, r8, r9 - * - the system call is performed by calling "sc" - * - syscall return comes in r3, and the summary overflow bit is checked - * to know if an error occurred, in which case errno is in r3. - * - the arguments are cast to long and assigned into the target - * registers which are then simply passed as registers to the asm code, - * so that we don't have to experience issues with register constraints. - */ - -#define _NOLIBC_SYSCALL_CLOBBERLIST \ - "memory", "cr0", "r12", "r11", "r10", "r9" - -#define my_syscall0(num) \ -({ \ - register long _ret __asm__ ("r3"); \ - register long _num __asm__ ("r0") = (num); \ - \ - __asm__ volatile ( \ - " sc\n" \ - " bns+ 1f\n" \ - " neg %0, %0\n" \ - "1:\n" \ - : "=r"(_ret), "+r"(_num) \ - : \ - : _NOLIBC_SYSCALL_CLOBBERLIST, "r8", "r7", "r6", "r5", "r4" \ - ); \ - _ret; \ -}) - -#define my_syscall1(num, arg1) \ -({ \ - register long _ret __asm__ ("r3"); \ - register long _num __asm__ ("r0") = (num); \ - register long _arg1 __asm__ ("r3") = (long)(arg1); \ - \ - __asm__ volatile ( \ - " sc\n" \ - " bns+ 1f\n" \ - " neg %0, %0\n" \ - "1:\n" \ - : "=r"(_ret), "+r"(_num) \ - : "0"(_arg1) \ - : _NOLIBC_SYSCALL_CLOBBERLIST, "r8", "r7", "r6", "r5", "r4" \ - ); \ - _ret; \ -}) - - -#define my_syscall2(num, arg1, arg2) \ -({ \ - register long _ret __asm__ ("r3"); \ - register long _num __asm__ ("r0") = (num); \ - register long _arg1 __asm__ ("r3") = (long)(arg1); \ - register long _arg2 __asm__ ("r4") = (long)(arg2); \ - \ - __asm__ volatile ( \ - " sc\n" \ - " bns+ 1f\n" \ - " neg %0, %0\n" \ - "1:\n" \ - : "=r"(_ret), "+r"(_num), "+r"(_arg2) \ - : "0"(_arg1) \ - : _NOLIBC_SYSCALL_CLOBBERLIST, "r8", "r7", "r6", "r5" \ - ); \ - _ret; \ -}) - - -#define my_syscall3(num, arg1, arg2, arg3) \ -({ \ - register long _ret __asm__ ("r3"); \ - register long _num __asm__ ("r0") = (num); \ - register long _arg1 __asm__ ("r3") = (long)(arg1); \ - register long _arg2 __asm__ ("r4") = (long)(arg2); \ - register long _arg3 __asm__ ("r5") = (long)(arg3); \ - \ - __asm__ volatile ( \ - " sc\n" \ - " bns+ 1f\n" \ - " neg %0, %0\n" \ - "1:\n" \ - : "=r"(_ret), "+r"(_num), "+r"(_arg2), "+r"(_arg3) \ - : "0"(_arg1) \ - : _NOLIBC_SYSCALL_CLOBBERLIST, "r8", "r7", "r6" \ - ); \ - _ret; \ -}) - - -#define my_syscall4(num, arg1, arg2, arg3, arg4) \ -({ \ - register long _ret __asm__ ("r3"); \ - register long _num __asm__ ("r0") = (num); \ - register long _arg1 __asm__ ("r3") = (long)(arg1); \ - register long _arg2 __asm__ ("r4") = (long)(arg2); \ - register long _arg3 __asm__ ("r5") = (long)(arg3); \ - register long _arg4 __asm__ ("r6") = (long)(arg4); \ - \ - __asm__ volatile ( \ - " sc\n" \ - " bns+ 1f\n" \ - " neg %0, %0\n" \ - "1:\n" \ - : "=r"(_ret), "+r"(_num), "+r"(_arg2), "+r"(_arg3), \ - "+r"(_arg4) \ - : "0"(_arg1) \ - : _NOLIBC_SYSCALL_CLOBBERLIST, "r8", "r7" \ - ); \ - _ret; \ -}) - - -#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \ -({ \ - register long _ret __asm__ ("r3"); \ - register long _num __asm__ ("r0") = (num); \ - register long _arg1 __asm__ ("r3") = (long)(arg1); \ - register long _arg2 __asm__ ("r4") = (long)(arg2); \ - register long _arg3 __asm__ ("r5") = (long)(arg3); \ - register long _arg4 __asm__ ("r6") = (long)(arg4); \ - register long _arg5 __asm__ ("r7") = (long)(arg5); \ - \ - __asm__ volatile ( \ - " sc\n" \ - " bns+ 1f\n" \ - " neg %0, %0\n" \ - "1:\n" \ - : "=r"(_ret), "+r"(_num), "+r"(_arg2), "+r"(_arg3), \ - "+r"(_arg4), "+r"(_arg5) \ - : "0"(_arg1) \ - : _NOLIBC_SYSCALL_CLOBBERLIST, "r8" \ - ); \ - _ret; \ -}) - -#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \ -({ \ - register long _ret __asm__ ("r3"); \ - register long _num __asm__ ("r0") = (num); \ - register long _arg1 __asm__ ("r3") = (long)(arg1); \ - register long _arg2 __asm__ ("r4") = (long)(arg2); \ - register long _arg3 __asm__ ("r5") = (long)(arg3); \ - register long _arg4 __asm__ ("r6") = (long)(arg4); \ - register long _arg5 __asm__ ("r7") = (long)(arg5); \ - register long _arg6 __asm__ ("r8") = (long)(arg6); \ - \ - __asm__ volatile ( \ - " sc\n" \ - " bns+ 1f\n" \ - " neg %0, %0\n" \ - "1:\n" \ - : "=r"(_ret), "+r"(_num), "+r"(_arg2), "+r"(_arg3), \ - "+r"(_arg4), "+r"(_arg5), "+r"(_arg6) \ - : "0"(_arg1) \ - : _NOLIBC_SYSCALL_CLOBBERLIST \ - ); \ - _ret; \ -}) - -#ifndef __powerpc64__ -/* FIXME: For 32-bit PowerPC, with newer gcc compilers (e.g. gcc 13.1.0), - * "omit-frame-pointer" fails with __attribute__((no_stack_protector)) but - * works with __attribute__((__optimize__("-fno-stack-protector"))) - */ -#ifdef __no_stack_protector -#undef __no_stack_protector -#define __no_stack_protector __attribute__((__optimize__("-fno-stack-protector"))) -#endif -#endif /* !__powerpc64__ */ - -/* startup code */ -void __attribute__((weak, noreturn, optimize("Os", "omit-frame-pointer"))) __no_stack_protector _start(void) -{ -#ifdef __powerpc64__ -#if _CALL_ELF == 2 - /* with -mabi=elfv2, save TOC/GOT pointer to r2 - * r12 is global entry pointer, we use it to compute TOC from r12 - * https://www.llvm.org/devmtg/2014-04/PDFs/Talks/Euro-LLVM-2014-Weigand.pdf - * https://refspecs.linuxfoundation.org/ELF/ppc64/PPC-elf64abi.pdf - */ - __asm__ volatile ( - "addis 2, 12, .TOC. - _start@ha\n" - "addi 2, 2, .TOC. - _start@l\n" - ); -#endif /* _CALL_ELF == 2 */ - - __asm__ volatile ( - "mr 3, 1\n" /* save stack pointer to r3, as arg1 of _start_c */ - "clrrdi 1, 1, 4\n" /* align the stack to 16 bytes */ - "li 0, 0\n" /* zero the frame pointer */ - "stdu 1, -32(1)\n" /* the initial stack frame */ - "bl _start_c\n" /* transfer to c runtime */ - ); -#else - __asm__ volatile ( - "mr 3, 1\n" /* save stack pointer to r3, as arg1 of _start_c */ - "clrrwi 1, 1, 4\n" /* align the stack to 16 bytes */ - "li 0, 0\n" /* zero the frame pointer */ - "stwu 1, -16(1)\n" /* the initial stack frame */ - "bl _start_c\n" /* transfer to c runtime */ - ); -#endif - __builtin_unreachable(); -} - -#endif /* _NOLIBC_ARCH_POWERPC_H */ diff --git a/libcontainer/dmz/nolibc/arch-riscv.h b/libcontainer/dmz/nolibc/arch-riscv.h deleted file mode 100644 index 950cc2283fd..00000000000 --- a/libcontainer/dmz/nolibc/arch-riscv.h +++ /dev/null @@ -1,160 +0,0 @@ -/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ -/* - * RISCV (32 and 64) specific definitions for NOLIBC - * Copyright (C) 2017-2022 Willy Tarreau - */ - -#ifndef _NOLIBC_ARCH_RISCV_H -#define _NOLIBC_ARCH_RISCV_H - -#include "compiler.h" -#include "crt.h" - -/* Syscalls for RISCV : - * - stack is 16-byte aligned - * - syscall number is passed in a7 - * - arguments are in a0, a1, a2, a3, a4, a5 - * - the system call is performed by calling ecall - * - syscall return comes in a0 - * - the arguments are cast to long and assigned into the target - * registers which are then simply passed as registers to the asm code, - * so that we don't have to experience issues with register constraints. - * - * On riscv, select() is not implemented so we have to use pselect6(). - */ -#define __ARCH_WANT_SYS_PSELECT6 - -#define my_syscall0(num) \ -({ \ - register long _num __asm__ ("a7") = (num); \ - register long _arg1 __asm__ ("a0"); \ - \ - __asm__ volatile ( \ - "ecall\n\t" \ - : "=r"(_arg1) \ - : "r"(_num) \ - : "memory", "cc" \ - ); \ - _arg1; \ -}) - -#define my_syscall1(num, arg1) \ -({ \ - register long _num __asm__ ("a7") = (num); \ - register long _arg1 __asm__ ("a0") = (long)(arg1); \ - \ - __asm__ volatile ( \ - "ecall\n" \ - : "+r"(_arg1) \ - : "r"(_num) \ - : "memory", "cc" \ - ); \ - _arg1; \ -}) - -#define my_syscall2(num, arg1, arg2) \ -({ \ - register long _num __asm__ ("a7") = (num); \ - register long _arg1 __asm__ ("a0") = (long)(arg1); \ - register long _arg2 __asm__ ("a1") = (long)(arg2); \ - \ - __asm__ volatile ( \ - "ecall\n" \ - : "+r"(_arg1) \ - : "r"(_arg2), \ - "r"(_num) \ - : "memory", "cc" \ - ); \ - _arg1; \ -}) - -#define my_syscall3(num, arg1, arg2, arg3) \ -({ \ - register long _num __asm__ ("a7") = (num); \ - register long _arg1 __asm__ ("a0") = (long)(arg1); \ - register long _arg2 __asm__ ("a1") = (long)(arg2); \ - register long _arg3 __asm__ ("a2") = (long)(arg3); \ - \ - __asm__ volatile ( \ - "ecall\n\t" \ - : "+r"(_arg1) \ - : "r"(_arg2), "r"(_arg3), \ - "r"(_num) \ - : "memory", "cc" \ - ); \ - _arg1; \ -}) - -#define my_syscall4(num, arg1, arg2, arg3, arg4) \ -({ \ - register long _num __asm__ ("a7") = (num); \ - register long _arg1 __asm__ ("a0") = (long)(arg1); \ - register long _arg2 __asm__ ("a1") = (long)(arg2); \ - register long _arg3 __asm__ ("a2") = (long)(arg3); \ - register long _arg4 __asm__ ("a3") = (long)(arg4); \ - \ - __asm__ volatile ( \ - "ecall\n" \ - : "+r"(_arg1) \ - : "r"(_arg2), "r"(_arg3), "r"(_arg4), \ - "r"(_num) \ - : "memory", "cc" \ - ); \ - _arg1; \ -}) - -#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \ -({ \ - register long _num __asm__ ("a7") = (num); \ - register long _arg1 __asm__ ("a0") = (long)(arg1); \ - register long _arg2 __asm__ ("a1") = (long)(arg2); \ - register long _arg3 __asm__ ("a2") = (long)(arg3); \ - register long _arg4 __asm__ ("a3") = (long)(arg4); \ - register long _arg5 __asm__ ("a4") = (long)(arg5); \ - \ - __asm__ volatile ( \ - "ecall\n" \ - : "+r"(_arg1) \ - : "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \ - "r"(_num) \ - : "memory", "cc" \ - ); \ - _arg1; \ -}) - -#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \ -({ \ - register long _num __asm__ ("a7") = (num); \ - register long _arg1 __asm__ ("a0") = (long)(arg1); \ - register long _arg2 __asm__ ("a1") = (long)(arg2); \ - register long _arg3 __asm__ ("a2") = (long)(arg3); \ - register long _arg4 __asm__ ("a3") = (long)(arg4); \ - register long _arg5 __asm__ ("a4") = (long)(arg5); \ - register long _arg6 __asm__ ("a5") = (long)(arg6); \ - \ - __asm__ volatile ( \ - "ecall\n" \ - : "+r"(_arg1) \ - : "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), "r"(_arg6), \ - "r"(_num) \ - : "memory", "cc" \ - ); \ - _arg1; \ -}) - -/* startup code */ -void __attribute__((weak, noreturn, optimize("Os", "omit-frame-pointer"))) __no_stack_protector _start(void) -{ - __asm__ volatile ( - ".option push\n" - ".option norelax\n" - "lla gp, __global_pointer$\n" - ".option pop\n" - "mv a0, sp\n" /* save stack pointer to a0, as arg1 of _start_c */ - "andi sp, a0, -16\n" /* sp must be 16-byte aligned */ - "call _start_c\n" /* transfer to c runtime */ - ); - __builtin_unreachable(); -} - -#endif /* _NOLIBC_ARCH_RISCV_H */ diff --git a/libcontainer/dmz/nolibc/arch-s390.h b/libcontainer/dmz/nolibc/arch-s390.h deleted file mode 100644 index 5d60fd43f88..00000000000 --- a/libcontainer/dmz/nolibc/arch-s390.h +++ /dev/null @@ -1,186 +0,0 @@ -/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ -/* - * s390 specific definitions for NOLIBC - */ - -#ifndef _NOLIBC_ARCH_S390_H -#define _NOLIBC_ARCH_S390_H -#include -#include - -#include "compiler.h" -#include "crt.h" - -/* Syscalls for s390: - * - registers are 64-bit - * - syscall number is passed in r1 - * - arguments are in r2-r7 - * - the system call is performed by calling the svc instruction - * - syscall return value is in r2 - * - r1 and r2 are clobbered, others are preserved. - * - * Link s390 ABI: https://github.com/IBM/s390x-abi - * - */ - -#define my_syscall0(num) \ -({ \ - register long _num __asm__ ("1") = (num); \ - register long _rc __asm__ ("2"); \ - \ - __asm__ volatile ( \ - "svc 0\n" \ - : "=d"(_rc) \ - : "d"(_num) \ - : "memory", "cc" \ - ); \ - _rc; \ -}) - -#define my_syscall1(num, arg1) \ -({ \ - register long _num __asm__ ("1") = (num); \ - register long _arg1 __asm__ ("2") = (long)(arg1); \ - \ - __asm__ volatile ( \ - "svc 0\n" \ - : "+d"(_arg1) \ - : "d"(_num) \ - : "memory", "cc" \ - ); \ - _arg1; \ -}) - -#define my_syscall2(num, arg1, arg2) \ -({ \ - register long _num __asm__ ("1") = (num); \ - register long _arg1 __asm__ ("2") = (long)(arg1); \ - register long _arg2 __asm__ ("3") = (long)(arg2); \ - \ - __asm__ volatile ( \ - "svc 0\n" \ - : "+d"(_arg1) \ - : "d"(_arg2), "d"(_num) \ - : "memory", "cc" \ - ); \ - _arg1; \ -}) - -#define my_syscall3(num, arg1, arg2, arg3) \ -({ \ - register long _num __asm__ ("1") = (num); \ - register long _arg1 __asm__ ("2") = (long)(arg1); \ - register long _arg2 __asm__ ("3") = (long)(arg2); \ - register long _arg3 __asm__ ("4") = (long)(arg3); \ - \ - __asm__ volatile ( \ - "svc 0\n" \ - : "+d"(_arg1) \ - : "d"(_arg2), "d"(_arg3), "d"(_num) \ - : "memory", "cc" \ - ); \ - _arg1; \ -}) - -#define my_syscall4(num, arg1, arg2, arg3, arg4) \ -({ \ - register long _num __asm__ ("1") = (num); \ - register long _arg1 __asm__ ("2") = (long)(arg1); \ - register long _arg2 __asm__ ("3") = (long)(arg2); \ - register long _arg3 __asm__ ("4") = (long)(arg3); \ - register long _arg4 __asm__ ("5") = (long)(arg4); \ - \ - __asm__ volatile ( \ - "svc 0\n" \ - : "+d"(_arg1) \ - : "d"(_arg2), "d"(_arg3), "d"(_arg4), "d"(_num) \ - : "memory", "cc" \ - ); \ - _arg1; \ -}) - -#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \ -({ \ - register long _num __asm__ ("1") = (num); \ - register long _arg1 __asm__ ("2") = (long)(arg1); \ - register long _arg2 __asm__ ("3") = (long)(arg2); \ - register long _arg3 __asm__ ("4") = (long)(arg3); \ - register long _arg4 __asm__ ("5") = (long)(arg4); \ - register long _arg5 __asm__ ("6") = (long)(arg5); \ - \ - __asm__ volatile ( \ - "svc 0\n" \ - : "+d"(_arg1) \ - : "d"(_arg2), "d"(_arg3), "d"(_arg4), "d"(_arg5), \ - "d"(_num) \ - : "memory", "cc" \ - ); \ - _arg1; \ -}) - -#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \ -({ \ - register long _num __asm__ ("1") = (num); \ - register long _arg1 __asm__ ("2") = (long)(arg1); \ - register long _arg2 __asm__ ("3") = (long)(arg2); \ - register long _arg3 __asm__ ("4") = (long)(arg3); \ - register long _arg4 __asm__ ("5") = (long)(arg4); \ - register long _arg5 __asm__ ("6") = (long)(arg5); \ - register long _arg6 __asm__ ("7") = (long)(arg6); \ - \ - __asm__ volatile ( \ - "svc 0\n" \ - : "+d"(_arg1) \ - : "d"(_arg2), "d"(_arg3), "d"(_arg4), "d"(_arg5), \ - "d"(_arg6), "d"(_num) \ - : "memory", "cc" \ - ); \ - _arg1; \ -}) - -/* startup code */ -void __attribute__((weak, noreturn, optimize("Os", "omit-frame-pointer"))) __no_stack_protector _start(void) -{ - __asm__ volatile ( - "lgr %r2, %r15\n" /* save stack pointer to %r2, as arg1 of _start_c */ - "aghi %r15, -160\n" /* allocate new stackframe */ - "xc 0(8,%r15), 0(%r15)\n" /* clear backchain */ - "brasl %r14, _start_c\n" /* transfer to c runtime */ - ); - __builtin_unreachable(); -} - -struct s390_mmap_arg_struct { - unsigned long addr; - unsigned long len; - unsigned long prot; - unsigned long flags; - unsigned long fd; - unsigned long offset; -}; - -static __attribute__((unused)) -void *sys_mmap(void *addr, size_t length, int prot, int flags, int fd, - off_t offset) -{ - struct s390_mmap_arg_struct args = { - .addr = (unsigned long)addr, - .len = (unsigned long)length, - .prot = prot, - .flags = flags, - .fd = fd, - .offset = (unsigned long)offset - }; - - return (void *)my_syscall1(__NR_mmap, &args); -} -#define sys_mmap sys_mmap - -static __attribute__((unused)) -pid_t sys_fork(void) -{ - return my_syscall5(__NR_clone, 0, SIGCHLD, 0, 0, 0); -} -#define sys_fork sys_fork - -#endif /* _NOLIBC_ARCH_S390_H */ diff --git a/libcontainer/dmz/nolibc/arch-x86_64.h b/libcontainer/dmz/nolibc/arch-x86_64.h deleted file mode 100644 index e5ccb926c90..00000000000 --- a/libcontainer/dmz/nolibc/arch-x86_64.h +++ /dev/null @@ -1,176 +0,0 @@ -/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ -/* - * x86_64 specific definitions for NOLIBC - * Copyright (C) 2017-2022 Willy Tarreau - */ - -#ifndef _NOLIBC_ARCH_X86_64_H -#define _NOLIBC_ARCH_X86_64_H - -#include "compiler.h" -#include "crt.h" - -/* Syscalls for x86_64 : - * - registers are 64-bit - * - syscall number is passed in rax - * - arguments are in rdi, rsi, rdx, r10, r8, r9 respectively - * - the system call is performed by calling the syscall instruction - * - syscall return comes in rax - * - rcx and r11 are clobbered, others are preserved. - * - the arguments are cast to long and assigned into the target registers - * which are then simply passed as registers to the asm code, so that we - * don't have to experience issues with register constraints. - * - the syscall number is always specified last in order to allow to force - * some registers before (gcc refuses a %-register at the last position). - * - see also x86-64 ABI section A.2 AMD64 Linux Kernel Conventions, A.2.1 - * Calling Conventions. - * - * Link x86-64 ABI: https://gitlab.com/x86-psABIs/x86-64-ABI/-/wikis/home - * - */ - -#define my_syscall0(num) \ -({ \ - long _ret; \ - register long _num __asm__ ("rax") = (num); \ - \ - __asm__ volatile ( \ - "syscall\n" \ - : "=a"(_ret) \ - : "0"(_num) \ - : "rcx", "r11", "memory", "cc" \ - ); \ - _ret; \ -}) - -#define my_syscall1(num, arg1) \ -({ \ - long _ret; \ - register long _num __asm__ ("rax") = (num); \ - register long _arg1 __asm__ ("rdi") = (long)(arg1); \ - \ - __asm__ volatile ( \ - "syscall\n" \ - : "=a"(_ret) \ - : "r"(_arg1), \ - "0"(_num) \ - : "rcx", "r11", "memory", "cc" \ - ); \ - _ret; \ -}) - -#define my_syscall2(num, arg1, arg2) \ -({ \ - long _ret; \ - register long _num __asm__ ("rax") = (num); \ - register long _arg1 __asm__ ("rdi") = (long)(arg1); \ - register long _arg2 __asm__ ("rsi") = (long)(arg2); \ - \ - __asm__ volatile ( \ - "syscall\n" \ - : "=a"(_ret) \ - : "r"(_arg1), "r"(_arg2), \ - "0"(_num) \ - : "rcx", "r11", "memory", "cc" \ - ); \ - _ret; \ -}) - -#define my_syscall3(num, arg1, arg2, arg3) \ -({ \ - long _ret; \ - register long _num __asm__ ("rax") = (num); \ - register long _arg1 __asm__ ("rdi") = (long)(arg1); \ - register long _arg2 __asm__ ("rsi") = (long)(arg2); \ - register long _arg3 __asm__ ("rdx") = (long)(arg3); \ - \ - __asm__ volatile ( \ - "syscall\n" \ - : "=a"(_ret) \ - : "r"(_arg1), "r"(_arg2), "r"(_arg3), \ - "0"(_num) \ - : "rcx", "r11", "memory", "cc" \ - ); \ - _ret; \ -}) - -#define my_syscall4(num, arg1, arg2, arg3, arg4) \ -({ \ - long _ret; \ - register long _num __asm__ ("rax") = (num); \ - register long _arg1 __asm__ ("rdi") = (long)(arg1); \ - register long _arg2 __asm__ ("rsi") = (long)(arg2); \ - register long _arg3 __asm__ ("rdx") = (long)(arg3); \ - register long _arg4 __asm__ ("r10") = (long)(arg4); \ - \ - __asm__ volatile ( \ - "syscall\n" \ - : "=a"(_ret) \ - : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), \ - "0"(_num) \ - : "rcx", "r11", "memory", "cc" \ - ); \ - _ret; \ -}) - -#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \ -({ \ - long _ret; \ - register long _num __asm__ ("rax") = (num); \ - register long _arg1 __asm__ ("rdi") = (long)(arg1); \ - register long _arg2 __asm__ ("rsi") = (long)(arg2); \ - register long _arg3 __asm__ ("rdx") = (long)(arg3); \ - register long _arg4 __asm__ ("r10") = (long)(arg4); \ - register long _arg5 __asm__ ("r8") = (long)(arg5); \ - \ - __asm__ volatile ( \ - "syscall\n" \ - : "=a"(_ret) \ - : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \ - "0"(_num) \ - : "rcx", "r11", "memory", "cc" \ - ); \ - _ret; \ -}) - -#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \ -({ \ - long _ret; \ - register long _num __asm__ ("rax") = (num); \ - register long _arg1 __asm__ ("rdi") = (long)(arg1); \ - register long _arg2 __asm__ ("rsi") = (long)(arg2); \ - register long _arg3 __asm__ ("rdx") = (long)(arg3); \ - register long _arg4 __asm__ ("r10") = (long)(arg4); \ - register long _arg5 __asm__ ("r8") = (long)(arg5); \ - register long _arg6 __asm__ ("r9") = (long)(arg6); \ - \ - __asm__ volatile ( \ - "syscall\n" \ - : "=a"(_ret) \ - : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \ - "r"(_arg6), "0"(_num) \ - : "rcx", "r11", "memory", "cc" \ - ); \ - _ret; \ -}) - -/* startup code */ -/* - * x86-64 System V ABI mandates: - * 1) %rsp must be 16-byte aligned right before the function call. - * 2) The deepest stack frame should be zero (the %rbp). - * - */ -void __attribute__((weak, noreturn, optimize("Os", "omit-frame-pointer"))) __no_stack_protector _start(void) -{ - __asm__ volatile ( - "xor %ebp, %ebp\n" /* zero the stack frame */ - "mov %rsp, %rdi\n" /* save stack pointer to %rdi, as arg1 of _start_c */ - "and $-16, %rsp\n" /* %rsp must be 16-byte aligned before call */ - "call _start_c\n" /* transfer to c runtime */ - "hlt\n" /* ensure it does not return */ - ); - __builtin_unreachable(); -} - -#endif /* _NOLIBC_ARCH_X86_64_H */ diff --git a/libcontainer/dmz/nolibc/arch.h b/libcontainer/dmz/nolibc/arch.h deleted file mode 100644 index e276fb0680a..00000000000 --- a/libcontainer/dmz/nolibc/arch.h +++ /dev/null @@ -1,38 +0,0 @@ -/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ -/* - * Copyright (C) 2017-2022 Willy Tarreau - */ - -/* Below comes the architecture-specific code. For each architecture, we have - * the syscall declarations and the _start code definition. This is the only - * global part. On all architectures the kernel puts everything in the stack - * before jumping to _start just above us, without any return address (_start - * is not a function but an entry point). So at the stack pointer we find argc. - * Then argv[] begins, and ends at the first NULL. Then we have envp which - * starts and ends with a NULL as well. So envp=argv+argc+1. - */ - -#ifndef _NOLIBC_ARCH_H -#define _NOLIBC_ARCH_H - -#if defined(__x86_64__) -#include "arch-x86_64.h" -#elif defined(__i386__) || defined(__i486__) || defined(__i586__) || defined(__i686__) -#include "arch-i386.h" -#elif defined(__ARM_EABI__) -#include "arch-arm.h" -#elif defined(__aarch64__) -#include "arch-aarch64.h" -#elif defined(__mips__) && defined(_ABIO32) -#include "arch-mips.h" -#elif defined(__powerpc__) -#include "arch-powerpc.h" -#elif defined(__riscv) -#include "arch-riscv.h" -#elif defined(__s390x__) -#include "arch-s390.h" -#elif defined(__loongarch__) -#include "arch-loongarch.h" -#endif - -#endif /* _NOLIBC_ARCH_H */ diff --git a/libcontainer/dmz/nolibc/compiler.h b/libcontainer/dmz/nolibc/compiler.h deleted file mode 100644 index beddc3665d6..00000000000 --- a/libcontainer/dmz/nolibc/compiler.h +++ /dev/null @@ -1,25 +0,0 @@ -/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ -/* - * NOLIBC compiler support header - * Copyright (C) 2023 Thomas Weißschuh - */ -#ifndef _NOLIBC_COMPILER_H -#define _NOLIBC_COMPILER_H - -#if defined(__SSP__) || defined(__SSP_STRONG__) || defined(__SSP_ALL__) || defined(__SSP_EXPLICIT__) - -#define _NOLIBC_STACKPROTECTOR - -#endif /* defined(__SSP__) ... */ - -#if defined(__has_attribute) -# if __has_attribute(no_stack_protector) -# define __no_stack_protector __attribute__((no_stack_protector)) -# else -# define __no_stack_protector __attribute__((__optimize__("-fno-stack-protector"))) -# endif -#else -# define __no_stack_protector __attribute__((__optimize__("-fno-stack-protector"))) -#endif /* defined(__has_attribute) */ - -#endif /* _NOLIBC_COMPILER_H */ diff --git a/libcontainer/dmz/nolibc/crt.h b/libcontainer/dmz/nolibc/crt.h deleted file mode 100644 index a5f33fef167..00000000000 --- a/libcontainer/dmz/nolibc/crt.h +++ /dev/null @@ -1,61 +0,0 @@ -/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ -/* - * C Run Time support for NOLIBC - * Copyright (C) 2023 Zhangjin Wu - */ - -#ifndef _NOLIBC_CRT_H -#define _NOLIBC_CRT_H - -char **environ __attribute__((weak)); -const unsigned long *_auxv __attribute__((weak)); - -static void __stack_chk_init(void); -static void exit(int); - -void _start_c(long *sp) -{ - long argc; - char **argv; - char **envp; - const unsigned long *auxv; - /* silence potential warning: conflicting types for 'main' */ - int _nolibc_main(int, char **, char **) __asm__ ("main"); - - /* initialize stack protector */ - __stack_chk_init(); - - /* - * sp : argc <-- argument count, required by main() - * argv: argv[0] <-- argument vector, required by main() - * argv[1] - * ... - * argv[argc-1] - * null - * environ: environ[0] <-- environment variables, required by main() and getenv() - * environ[1] - * ... - * null - * _auxv: _auxv[0] <-- auxiliary vector, required by getauxval() - * _auxv[1] - * ... - * null - */ - - /* assign argc and argv */ - argc = *sp; - argv = (void *)(sp + 1); - - /* find environ */ - environ = envp = argv + argc + 1; - - /* find _auxv */ - for (auxv = (void *)envp; *auxv++;) - ; - _auxv = auxv; - - /* go to application */ - exit(_nolibc_main(argc, argv, envp)); -} - -#endif /* _NOLIBC_CRT_H */ diff --git a/libcontainer/dmz/nolibc/ctype.h b/libcontainer/dmz/nolibc/ctype.h deleted file mode 100644 index 6f90706d064..00000000000 --- a/libcontainer/dmz/nolibc/ctype.h +++ /dev/null @@ -1,102 +0,0 @@ -/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ -/* - * ctype function definitions for NOLIBC - * Copyright (C) 2017-2021 Willy Tarreau - */ - -#ifndef _NOLIBC_CTYPE_H -#define _NOLIBC_CTYPE_H - -#include "std.h" - -/* - * As much as possible, please keep functions alphabetically sorted. - */ - -static __attribute__((unused)) -int isascii(int c) -{ - /* 0x00..0x7f */ - return (unsigned int)c <= 0x7f; -} - -static __attribute__((unused)) -int isblank(int c) -{ - return c == '\t' || c == ' '; -} - -static __attribute__((unused)) -int iscntrl(int c) -{ - /* 0x00..0x1f, 0x7f */ - return (unsigned int)c < 0x20 || c == 0x7f; -} - -static __attribute__((unused)) -int isdigit(int c) -{ - return (unsigned int)(c - '0') < 10; -} - -static __attribute__((unused)) -int isgraph(int c) -{ - /* 0x21..0x7e */ - return (unsigned int)(c - 0x21) < 0x5e; -} - -static __attribute__((unused)) -int islower(int c) -{ - return (unsigned int)(c - 'a') < 26; -} - -static __attribute__((unused)) -int isprint(int c) -{ - /* 0x20..0x7e */ - return (unsigned int)(c - 0x20) < 0x5f; -} - -static __attribute__((unused)) -int isspace(int c) -{ - /* \t is 0x9, \n is 0xA, \v is 0xB, \f is 0xC, \r is 0xD */ - return ((unsigned int)c == ' ') || (unsigned int)(c - 0x09) < 5; -} - -static __attribute__((unused)) -int isupper(int c) -{ - return (unsigned int)(c - 'A') < 26; -} - -static __attribute__((unused)) -int isxdigit(int c) -{ - return isdigit(c) || (unsigned int)(c - 'A') < 6 || (unsigned int)(c - 'a') < 6; -} - -static __attribute__((unused)) -int isalpha(int c) -{ - return islower(c) || isupper(c); -} - -static __attribute__((unused)) -int isalnum(int c) -{ - return isalpha(c) || isdigit(c); -} - -static __attribute__((unused)) -int ispunct(int c) -{ - return isgraph(c) && !isalnum(c); -} - -/* make sure to include all global symbols */ -#include "nolibc.h" - -#endif /* _NOLIBC_CTYPE_H */ diff --git a/libcontainer/dmz/nolibc/errno.h b/libcontainer/dmz/nolibc/errno.h deleted file mode 100644 index a44486ff047..00000000000 --- a/libcontainer/dmz/nolibc/errno.h +++ /dev/null @@ -1,28 +0,0 @@ -/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ -/* - * Minimal errno definitions for NOLIBC - * Copyright (C) 2017-2022 Willy Tarreau - */ - -#ifndef _NOLIBC_ERRNO_H -#define _NOLIBC_ERRNO_H - -#include - -#ifndef NOLIBC_IGNORE_ERRNO -#define SET_ERRNO(v) do { errno = (v); } while (0) -int errno __attribute__((weak)); -#else -#define SET_ERRNO(v) do { } while (0) -#endif - - -/* errno codes all ensure that they will not conflict with a valid pointer - * because they all correspond to the highest addressable memory page. - */ -#define MAX_ERRNO 4095 - -/* make sure to include all global symbols */ -#include "nolibc.h" - -#endif /* _NOLIBC_ERRNO_H */ diff --git a/libcontainer/dmz/nolibc/nolibc.h b/libcontainer/dmz/nolibc/nolibc.h deleted file mode 100644 index 1f8d821000a..00000000000 --- a/libcontainer/dmz/nolibc/nolibc.h +++ /dev/null @@ -1,111 +0,0 @@ -/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ -/* nolibc.h - * Copyright (C) 2017-2018 Willy Tarreau - */ - -/* - * This file is designed to be used as a libc alternative for minimal programs - * with very limited requirements. It consists of a small number of syscall and - * type definitions, and the minimal startup code needed to call main(). - * All syscalls are declared as static functions so that they can be optimized - * away by the compiler when not used. - * - * Syscalls are split into 3 levels: - * - The lower level is the arch-specific syscall() definition, consisting in - * assembly code in compound expressions. These are called my_syscall0() to - * my_syscall6() depending on the number of arguments. All input arguments - * are castto a long stored in a register. These expressions always return - * the syscall's return value as a signed long value which is often either - * a pointer or the negated errno value. - * - * - The second level is mostly architecture-independent. It is made of - * static functions called sys_() which rely on my_syscallN() - * depending on the syscall definition. These functions are responsible - * for exposing the appropriate types for the syscall arguments (int, - * pointers, etc) and for setting the appropriate return type (often int). - * A few of them are architecture-specific because the syscalls are not all - * mapped exactly the same among architectures. For example, some archs do - * not implement select() and need pselect6() instead, so the sys_select() - * function will have to abstract this. - * - * - The third level is the libc call definition. It exposes the lower raw - * sys_() calls in a way that looks like what a libc usually does, - * takes care of specific input values, and of setting errno upon error. - * There can be minor variations compared to standard libc calls. For - * example the open() call always takes 3 args here. - * - * The errno variable is declared static and unused. This way it can be - * optimized away if not used. However this means that a program made of - * multiple C files may observe different errno values (one per C file). For - * the type of programs this project targets it usually is not a problem. The - * resulting program may even be reduced by defining the NOLIBC_IGNORE_ERRNO - * macro, in which case the errno value will never be assigned. - * - * Some stdint-like integer types are defined. These are valid on all currently - * supported architectures, because signs are enforced, ints are assumed to be - * 32 bits, longs the size of a pointer and long long 64 bits. If more - * architectures have to be supported, this may need to be adapted. - * - * Some macro definitions like the O_* values passed to open(), and some - * structures like the sys_stat struct depend on the architecture. - * - * The definitions start with the architecture-specific parts, which are picked - * based on what the compiler knows about the target architecture, and are - * completed with the generic code. Since it is the compiler which sets the - * target architecture, cross-compiling normally works out of the box without - * having to specify anything. - * - * Finally some very common libc-level functions are provided. It is the case - * for a few functions usually found in string.h, ctype.h, or stdlib.h. - * - * The nolibc.h file is only a convenient entry point which includes all other - * files. It also defines the NOLIBC macro, so that it is possible for a - * program to check this macro to know if it is being built against and decide - * to disable some features or simply not to include some standard libc files. - * - * A simple static executable may be built this way : - * $ gcc -fno-asynchronous-unwind-tables -fno-ident -s -Os -nostdlib \ - * -static -include nolibc.h -o hello hello.c -lgcc - * - * Simple programs meant to be reasonably portable to various libc and using - * only a few common includes, may also be built by simply making the include - * path point to the nolibc directory: - * $ gcc -fno-asynchronous-unwind-tables -fno-ident -s -Os -nostdlib \ - * -I../nolibc -o hello hello.c -lgcc - * - * The available standard (but limited) include files are: - * ctype.h, errno.h, signal.h, stdio.h, stdlib.h, string.h, time.h - * - * In addition, the following ones are expected to be provided by the compiler: - * float.h, stdarg.h, stddef.h - * - * The following ones which are part to the C standard are not provided: - * assert.h, locale.h, math.h, setjmp.h, limits.h - * - * A very useful calling convention table may be found here : - * http://man7.org/linux/man-pages/man2/syscall.2.html - * - * This doc is quite convenient though not necessarily up to date : - * https://w3challs.com/syscalls/ - * - */ -#ifndef _NOLIBC_H -#define _NOLIBC_H - -#include "std.h" -#include "arch.h" -#include "types.h" -#include "sys.h" -#include "ctype.h" -#include "signal.h" -#include "unistd.h" -#include "stdio.h" -#include "stdlib.h" -#include "string.h" -#include "time.h" -#include "stackprotector.h" - -/* Used by programs to avoid std includes */ -#define NOLIBC - -#endif /* _NOLIBC_H */ diff --git a/libcontainer/dmz/nolibc/signal.h b/libcontainer/dmz/nolibc/signal.h deleted file mode 100644 index 137552216e4..00000000000 --- a/libcontainer/dmz/nolibc/signal.h +++ /dev/null @@ -1,25 +0,0 @@ -/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ -/* - * signal function definitions for NOLIBC - * Copyright (C) 2017-2022 Willy Tarreau - */ - -#ifndef _NOLIBC_SIGNAL_H -#define _NOLIBC_SIGNAL_H - -#include "std.h" -#include "arch.h" -#include "types.h" -#include "sys.h" - -/* This one is not marked static as it's needed by libgcc for divide by zero */ -__attribute__((weak,unused,section(".text.nolibc_raise"))) -int raise(int signal) -{ - return sys_kill(sys_getpid(), signal); -} - -/* make sure to include all global symbols */ -#include "nolibc.h" - -#endif /* _NOLIBC_SIGNAL_H */ diff --git a/libcontainer/dmz/nolibc/stackprotector.h b/libcontainer/dmz/nolibc/stackprotector.h deleted file mode 100644 index 13f1d0e6038..00000000000 --- a/libcontainer/dmz/nolibc/stackprotector.h +++ /dev/null @@ -1,51 +0,0 @@ -/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ -/* - * Stack protector support for NOLIBC - * Copyright (C) 2023 Thomas Weißschuh - */ - -#ifndef _NOLIBC_STACKPROTECTOR_H -#define _NOLIBC_STACKPROTECTOR_H - -#include "compiler.h" - -#if defined(_NOLIBC_STACKPROTECTOR) - -#include "sys.h" -#include "stdlib.h" - -/* The functions in this header are using raw syscall macros to avoid - * triggering stack protector errors themselves - */ - -__attribute__((weak,noreturn,section(".text.nolibc_stack_chk"))) -void __stack_chk_fail(void) -{ - pid_t pid; - my_syscall3(__NR_write, STDERR_FILENO, "!!Stack smashing detected!!\n", 28); - pid = my_syscall0(__NR_getpid); - my_syscall2(__NR_kill, pid, SIGABRT); - for (;;); -} - -__attribute__((weak,noreturn,section(".text.nolibc_stack_chk"))) -void __stack_chk_fail_local(void) -{ - __stack_chk_fail(); -} - -__attribute__((weak,section(".data.nolibc_stack_chk"))) -uintptr_t __stack_chk_guard; - -static __no_stack_protector void __stack_chk_init(void) -{ - my_syscall3(__NR_getrandom, &__stack_chk_guard, sizeof(__stack_chk_guard), 0); - /* a bit more randomness in case getrandom() fails, ensure the guard is never 0 */ - if (__stack_chk_guard != (uintptr_t) &__stack_chk_guard) - __stack_chk_guard ^= (uintptr_t) &__stack_chk_guard; -} -#else /* !defined(_NOLIBC_STACKPROTECTOR) */ -static void __stack_chk_init(void) {} -#endif /* defined(_NOLIBC_STACKPROTECTOR) */ - -#endif /* _NOLIBC_STACKPROTECTOR_H */ diff --git a/libcontainer/dmz/nolibc/std.h b/libcontainer/dmz/nolibc/std.h deleted file mode 100644 index 933bc0be7e1..00000000000 --- a/libcontainer/dmz/nolibc/std.h +++ /dev/null @@ -1,36 +0,0 @@ -/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ -/* - * Standard definitions and types for NOLIBC - * Copyright (C) 2017-2021 Willy Tarreau - */ - -#ifndef _NOLIBC_STD_H -#define _NOLIBC_STD_H - -/* Declare a few quite common macros and types that usually are in stdlib.h, - * stdint.h, ctype.h, unistd.h and a few other common locations. Please place - * integer type definitions and generic macros here, but avoid OS-specific and - * syscall-specific stuff, as this file is expected to be included very early. - */ - -/* note: may already be defined */ -#ifndef NULL -#define NULL ((void *)0) -#endif - -#include "stdint.h" - -/* those are commonly provided by sys/types.h */ -typedef unsigned int dev_t; -typedef unsigned long ino_t; -typedef unsigned int mode_t; -typedef signed int pid_t; -typedef unsigned int uid_t; -typedef unsigned int gid_t; -typedef unsigned long nlink_t; -typedef signed long off_t; -typedef signed long blksize_t; -typedef signed long blkcnt_t; -typedef signed long time_t; - -#endif /* _NOLIBC_STD_H */ diff --git a/libcontainer/dmz/nolibc/stdint.h b/libcontainer/dmz/nolibc/stdint.h deleted file mode 100644 index 6665e272e21..00000000000 --- a/libcontainer/dmz/nolibc/stdint.h +++ /dev/null @@ -1,113 +0,0 @@ -/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ -/* - * Standard definitions and types for NOLIBC - * Copyright (C) 2023 Vincent Dagonneau - */ - -#ifndef _NOLIBC_STDINT_H -#define _NOLIBC_STDINT_H - -typedef unsigned char uint8_t; -typedef signed char int8_t; -typedef unsigned short uint16_t; -typedef signed short int16_t; -typedef unsigned int uint32_t; -typedef signed int int32_t; -typedef unsigned long long uint64_t; -typedef signed long long int64_t; -typedef __SIZE_TYPE__ size_t; -typedef signed long ssize_t; -typedef unsigned long uintptr_t; -typedef signed long intptr_t; -typedef signed long ptrdiff_t; - -typedef int8_t int_least8_t; -typedef uint8_t uint_least8_t; -typedef int16_t int_least16_t; -typedef uint16_t uint_least16_t; -typedef int32_t int_least32_t; -typedef uint32_t uint_least32_t; -typedef int64_t int_least64_t; -typedef uint64_t uint_least64_t; - -typedef int8_t int_fast8_t; -typedef uint8_t uint_fast8_t; -typedef ssize_t int_fast16_t; -typedef size_t uint_fast16_t; -typedef ssize_t int_fast32_t; -typedef size_t uint_fast32_t; -typedef int64_t int_fast64_t; -typedef uint64_t uint_fast64_t; - -typedef int64_t intmax_t; -typedef uint64_t uintmax_t; - -/* limits of integral types */ - -#define INT8_MIN (-128) -#define INT16_MIN (-32767-1) -#define INT32_MIN (-2147483647-1) -#define INT64_MIN (-9223372036854775807LL-1) - -#define INT8_MAX (127) -#define INT16_MAX (32767) -#define INT32_MAX (2147483647) -#define INT64_MAX (9223372036854775807LL) - -#define UINT8_MAX (255) -#define UINT16_MAX (65535) -#define UINT32_MAX (4294967295U) -#define UINT64_MAX (18446744073709551615ULL) - -#define INT_LEAST8_MIN INT8_MIN -#define INT_LEAST16_MIN INT16_MIN -#define INT_LEAST32_MIN INT32_MIN -#define INT_LEAST64_MIN INT64_MIN - -#define INT_LEAST8_MAX INT8_MAX -#define INT_LEAST16_MAX INT16_MAX -#define INT_LEAST32_MAX INT32_MAX -#define INT_LEAST64_MAX INT64_MAX - -#define UINT_LEAST8_MAX UINT8_MAX -#define UINT_LEAST16_MAX UINT16_MAX -#define UINT_LEAST32_MAX UINT32_MAX -#define UINT_LEAST64_MAX UINT64_MAX - -#define SIZE_MAX ((size_t)(__LONG_MAX__) * 2 + 1) -#define INTPTR_MIN (-__LONG_MAX__ - 1) -#define INTPTR_MAX __LONG_MAX__ -#define PTRDIFF_MIN INTPTR_MIN -#define PTRDIFF_MAX INTPTR_MAX -#define UINTPTR_MAX SIZE_MAX - -#define INT_FAST8_MIN INT8_MIN -#define INT_FAST16_MIN INTPTR_MIN -#define INT_FAST32_MIN INTPTR_MIN -#define INT_FAST64_MIN INT64_MIN - -#define INT_FAST8_MAX INT8_MAX -#define INT_FAST16_MAX INTPTR_MAX -#define INT_FAST32_MAX INTPTR_MAX -#define INT_FAST64_MAX INT64_MAX - -#define UINT_FAST8_MAX UINT8_MAX -#define UINT_FAST16_MAX SIZE_MAX -#define UINT_FAST32_MAX SIZE_MAX -#define UINT_FAST64_MAX UINT64_MAX - -#ifndef INT_MIN -#define INT_MIN (-__INT_MAX__ - 1) -#endif -#ifndef INT_MAX -#define INT_MAX __INT_MAX__ -#endif - -#ifndef LONG_MIN -#define LONG_MIN (-__LONG_MAX__ - 1) -#endif -#ifndef LONG_MAX -#define LONG_MAX __LONG_MAX__ -#endif - -#endif /* _NOLIBC_STDINT_H */ diff --git a/libcontainer/dmz/nolibc/stdio.h b/libcontainer/dmz/nolibc/stdio.h deleted file mode 100644 index cae402c11e5..00000000000 --- a/libcontainer/dmz/nolibc/stdio.h +++ /dev/null @@ -1,383 +0,0 @@ -/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ -/* - * minimal stdio function definitions for NOLIBC - * Copyright (C) 2017-2021 Willy Tarreau - */ - -#ifndef _NOLIBC_STDIO_H -#define _NOLIBC_STDIO_H - -#include - -#include "std.h" -#include "arch.h" -#include "errno.h" -#include "types.h" -#include "sys.h" -#include "stdlib.h" -#include "string.h" - -#ifndef EOF -#define EOF (-1) -#endif - -/* Buffering mode used by setvbuf. */ -#define _IOFBF 0 /* Fully buffered. */ -#define _IOLBF 1 /* Line buffered. */ -#define _IONBF 2 /* No buffering. */ - -/* just define FILE as a non-empty type. The value of the pointer gives - * the FD: FILE=~fd for fd>=0 or NULL for fd<0. This way positive FILE - * are immediately identified as abnormal entries (i.e. possible copies - * of valid pointers to something else). - */ -typedef struct FILE { - char dummy[1]; -} FILE; - -static __attribute__((unused)) FILE* const stdin = (FILE*)(intptr_t)~STDIN_FILENO; -static __attribute__((unused)) FILE* const stdout = (FILE*)(intptr_t)~STDOUT_FILENO; -static __attribute__((unused)) FILE* const stderr = (FILE*)(intptr_t)~STDERR_FILENO; - -/* provides a FILE* equivalent of fd. The mode is ignored. */ -static __attribute__((unused)) -FILE *fdopen(int fd, const char *mode __attribute__((unused))) -{ - if (fd < 0) { - SET_ERRNO(EBADF); - return NULL; - } - return (FILE*)(intptr_t)~fd; -} - -/* provides the fd of stream. */ -static __attribute__((unused)) -int fileno(FILE *stream) -{ - intptr_t i = (intptr_t)stream; - - if (i >= 0) { - SET_ERRNO(EBADF); - return -1; - } - return ~i; -} - -/* flush a stream. */ -static __attribute__((unused)) -int fflush(FILE *stream) -{ - intptr_t i = (intptr_t)stream; - - /* NULL is valid here. */ - if (i > 0) { - SET_ERRNO(EBADF); - return -1; - } - - /* Don't do anything, nolibc does not support buffering. */ - return 0; -} - -/* flush a stream. */ -static __attribute__((unused)) -int fclose(FILE *stream) -{ - intptr_t i = (intptr_t)stream; - - if (i >= 0) { - SET_ERRNO(EBADF); - return -1; - } - - if (close(~i)) - return EOF; - - return 0; -} - -/* getc(), fgetc(), getchar() */ - -#define getc(stream) fgetc(stream) - -static __attribute__((unused)) -int fgetc(FILE* stream) -{ - unsigned char ch; - - if (read(fileno(stream), &ch, 1) <= 0) - return EOF; - return ch; -} - -static __attribute__((unused)) -int getchar(void) -{ - return fgetc(stdin); -} - - -/* putc(), fputc(), putchar() */ - -#define putc(c, stream) fputc(c, stream) - -static __attribute__((unused)) -int fputc(int c, FILE* stream) -{ - unsigned char ch = c; - - if (write(fileno(stream), &ch, 1) <= 0) - return EOF; - return ch; -} - -static __attribute__((unused)) -int putchar(int c) -{ - return fputc(c, stdout); -} - - -/* fwrite(), puts(), fputs(). Note that puts() emits '\n' but not fputs(). */ - -/* internal fwrite()-like function which only takes a size and returns 0 on - * success or EOF on error. It automatically retries on short writes. - */ -static __attribute__((unused)) -int _fwrite(const void *buf, size_t size, FILE *stream) -{ - ssize_t ret; - int fd = fileno(stream); - - while (size) { - ret = write(fd, buf, size); - if (ret <= 0) - return EOF; - size -= ret; - buf += ret; - } - return 0; -} - -static __attribute__((unused)) -size_t fwrite(const void *s, size_t size, size_t nmemb, FILE *stream) -{ - size_t written; - - for (written = 0; written < nmemb; written++) { - if (_fwrite(s, size, stream) != 0) - break; - s += size; - } - return written; -} - -static __attribute__((unused)) -int fputs(const char *s, FILE *stream) -{ - return _fwrite(s, strlen(s), stream); -} - -static __attribute__((unused)) -int puts(const char *s) -{ - if (fputs(s, stdout) == EOF) - return EOF; - return putchar('\n'); -} - - -/* fgets() */ -static __attribute__((unused)) -char *fgets(char *s, int size, FILE *stream) -{ - int ofs; - int c; - - for (ofs = 0; ofs + 1 < size;) { - c = fgetc(stream); - if (c == EOF) - break; - s[ofs++] = c; - if (c == '\n') - break; - } - if (ofs < size) - s[ofs] = 0; - return ofs ? s : NULL; -} - - -/* minimal vfprintf(). It supports the following formats: - * - %[l*]{d,u,c,x,p} - * - %s - * - unknown modifiers are ignored. - */ -static __attribute__((unused)) -int vfprintf(FILE *stream, const char *fmt, va_list args) -{ - char escape, lpref, c; - unsigned long long v; - unsigned int written; - size_t len, ofs; - char tmpbuf[21]; - const char *outstr; - - written = ofs = escape = lpref = 0; - while (1) { - c = fmt[ofs++]; - - if (escape) { - /* we're in an escape sequence, ofs == 1 */ - escape = 0; - if (c == 'c' || c == 'd' || c == 'u' || c == 'x' || c == 'p') { - char *out = tmpbuf; - - if (c == 'p') - v = va_arg(args, unsigned long); - else if (lpref) { - if (lpref > 1) - v = va_arg(args, unsigned long long); - else - v = va_arg(args, unsigned long); - } else - v = va_arg(args, unsigned int); - - if (c == 'd') { - /* sign-extend the value */ - if (lpref == 0) - v = (long long)(int)v; - else if (lpref == 1) - v = (long long)(long)v; - } - - switch (c) { - case 'c': - out[0] = v; - out[1] = 0; - break; - case 'd': - i64toa_r(v, out); - break; - case 'u': - u64toa_r(v, out); - break; - case 'p': - *(out++) = '0'; - *(out++) = 'x'; - /* fall through */ - default: /* 'x' and 'p' above */ - u64toh_r(v, out); - break; - } - outstr = tmpbuf; - } - else if (c == 's') { - outstr = va_arg(args, char *); - if (!outstr) - outstr="(null)"; - } - else if (c == '%') { - /* queue it verbatim */ - continue; - } - else { - /* modifiers or final 0 */ - if (c == 'l') { - /* long format prefix, maintain the escape */ - lpref++; - } - escape = 1; - goto do_escape; - } - len = strlen(outstr); - goto flush_str; - } - - /* not an escape sequence */ - if (c == 0 || c == '%') { - /* flush pending data on escape or end */ - escape = 1; - lpref = 0; - outstr = fmt; - len = ofs - 1; - flush_str: - if (_fwrite(outstr, len, stream) != 0) - break; - - written += len; - do_escape: - if (c == 0) - break; - fmt += ofs; - ofs = 0; - continue; - } - - /* literal char, just queue it */ - } - return written; -} - -static __attribute__((unused)) -int vprintf(const char *fmt, va_list args) -{ - return vfprintf(stdout, fmt, args); -} - -static __attribute__((unused, format(printf, 2, 3))) -int fprintf(FILE *stream, const char *fmt, ...) -{ - va_list args; - int ret; - - va_start(args, fmt); - ret = vfprintf(stream, fmt, args); - va_end(args); - return ret; -} - -static __attribute__((unused, format(printf, 1, 2))) -int printf(const char *fmt, ...) -{ - va_list args; - int ret; - - va_start(args, fmt); - ret = vfprintf(stdout, fmt, args); - va_end(args); - return ret; -} - -static __attribute__((unused)) -void perror(const char *msg) -{ - fprintf(stderr, "%s%serrno=%d\n", (msg && *msg) ? msg : "", (msg && *msg) ? ": " : "", errno); -} - -static __attribute__((unused)) -int setvbuf(FILE *stream __attribute__((unused)), - char *buf __attribute__((unused)), - int mode, - size_t size __attribute__((unused))) -{ - /* - * nolibc does not support buffering so this is a nop. Just check mode - * is valid as required by the spec. - */ - switch (mode) { - case _IOFBF: - case _IOLBF: - case _IONBF: - break; - default: - return EOF; - } - - return 0; -} - -/* make sure to include all global symbols */ -#include "nolibc.h" - -#endif /* _NOLIBC_STDIO_H */ diff --git a/libcontainer/dmz/nolibc/stdlib.h b/libcontainer/dmz/nolibc/stdlib.h deleted file mode 100644 index bacfd35c515..00000000000 --- a/libcontainer/dmz/nolibc/stdlib.h +++ /dev/null @@ -1,444 +0,0 @@ -/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ -/* - * stdlib function definitions for NOLIBC - * Copyright (C) 2017-2021 Willy Tarreau - */ - -#ifndef _NOLIBC_STDLIB_H -#define _NOLIBC_STDLIB_H - -#include "std.h" -#include "arch.h" -#include "types.h" -#include "sys.h" -#include "string.h" -#include - -struct nolibc_heap { - size_t len; - char user_p[] __attribute__((__aligned__)); -}; - -/* Buffer used to store int-to-ASCII conversions. Will only be implemented if - * any of the related functions is implemented. The area is large enough to - * store "18446744073709551615" or "-9223372036854775808" and the final zero. - */ -static __attribute__((unused)) char itoa_buffer[21]; - -/* - * As much as possible, please keep functions alphabetically sorted. - */ - -/* must be exported, as it's used by libgcc for various divide functions */ -__attribute__((weak,unused,noreturn,section(".text.nolibc_abort"))) -void abort(void) -{ - sys_kill(sys_getpid(), SIGABRT); - for (;;); -} - -static __attribute__((unused)) -long atol(const char *s) -{ - unsigned long ret = 0; - unsigned long d; - int neg = 0; - - if (*s == '-') { - neg = 1; - s++; - } - - while (1) { - d = (*s++) - '0'; - if (d > 9) - break; - ret *= 10; - ret += d; - } - - return neg ? -ret : ret; -} - -static __attribute__((unused)) -int atoi(const char *s) -{ - return atol(s); -} - -static __attribute__((unused)) -void free(void *ptr) -{ - struct nolibc_heap *heap; - - if (!ptr) - return; - - heap = container_of(ptr, struct nolibc_heap, user_p); - munmap(heap, heap->len); -} - -/* getenv() tries to find the environment variable named in the - * environment array pointed to by global variable "environ" which must be - * declared as a char **, and must be terminated by a NULL (it is recommended - * to set this variable to the "envp" argument of main()). If the requested - * environment variable exists its value is returned otherwise NULL is - * returned. - */ -static __attribute__((unused)) -char *getenv(const char *name) -{ - int idx, i; - - if (environ) { - for (idx = 0; environ[idx]; idx++) { - for (i = 0; name[i] && name[i] == environ[idx][i];) - i++; - if (!name[i] && environ[idx][i] == '=') - return &environ[idx][i+1]; - } - } - return NULL; -} - -static __attribute__((unused)) -unsigned long getauxval(unsigned long type) -{ - const unsigned long *auxv = _auxv; - unsigned long ret; - - if (!auxv) - return 0; - - while (1) { - if (!auxv[0] && !auxv[1]) { - ret = 0; - break; - } - - if (auxv[0] == type) { - ret = auxv[1]; - break; - } - - auxv += 2; - } - - return ret; -} - -static __attribute__((unused)) -void *malloc(size_t len) -{ - struct nolibc_heap *heap; - - /* Always allocate memory with size multiple of 4096. */ - len = sizeof(*heap) + len; - len = (len + 4095UL) & -4096UL; - heap = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE, - -1, 0); - if (__builtin_expect(heap == MAP_FAILED, 0)) - return NULL; - - heap->len = len; - return heap->user_p; -} - -static __attribute__((unused)) -void *calloc(size_t size, size_t nmemb) -{ - size_t x = size * nmemb; - - if (__builtin_expect(size && ((x / size) != nmemb), 0)) { - SET_ERRNO(ENOMEM); - return NULL; - } - - /* - * No need to zero the heap, the MAP_ANONYMOUS in malloc() - * already does it. - */ - return malloc(x); -} - -static __attribute__((unused)) -void *realloc(void *old_ptr, size_t new_size) -{ - struct nolibc_heap *heap; - size_t user_p_len; - void *ret; - - if (!old_ptr) - return malloc(new_size); - - heap = container_of(old_ptr, struct nolibc_heap, user_p); - user_p_len = heap->len - sizeof(*heap); - /* - * Don't realloc() if @user_p_len >= @new_size, this block of - * memory is still enough to handle the @new_size. Just return - * the same pointer. - */ - if (user_p_len >= new_size) - return old_ptr; - - ret = malloc(new_size); - if (__builtin_expect(!ret, 0)) - return NULL; - - memcpy(ret, heap->user_p, heap->len); - munmap(heap, heap->len); - return ret; -} - -/* Converts the unsigned long integer to its hex representation into - * buffer , which must be long enough to store the number and the - * trailing zero (17 bytes for "ffffffffffffffff" or 9 for "ffffffff"). The - * buffer is filled from the first byte, and the number of characters emitted - * (not counting the trailing zero) is returned. The function is constructed - * in a way to optimize the code size and avoid any divide that could add a - * dependency on large external functions. - */ -static __attribute__((unused)) -int utoh_r(unsigned long in, char *buffer) -{ - signed char pos = (~0UL > 0xfffffffful) ? 60 : 28; - int digits = 0; - int dig; - - do { - dig = in >> pos; - in -= (uint64_t)dig << pos; - pos -= 4; - if (dig || digits || pos < 0) { - if (dig > 9) - dig += 'a' - '0' - 10; - buffer[digits++] = '0' + dig; - } - } while (pos >= 0); - - buffer[digits] = 0; - return digits; -} - -/* converts unsigned long to an hex string using the static itoa_buffer - * and returns the pointer to that string. - */ -static __inline__ __attribute__((unused)) -char *utoh(unsigned long in) -{ - utoh_r(in, itoa_buffer); - return itoa_buffer; -} - -/* Converts the unsigned long integer to its string representation into - * buffer , which must be long enough to store the number and the - * trailing zero (21 bytes for 18446744073709551615 in 64-bit, 11 for - * 4294967295 in 32-bit). The buffer is filled from the first byte, and the - * number of characters emitted (not counting the trailing zero) is returned. - * The function is constructed in a way to optimize the code size and avoid - * any divide that could add a dependency on large external functions. - */ -static __attribute__((unused)) -int utoa_r(unsigned long in, char *buffer) -{ - unsigned long lim; - int digits = 0; - int pos = (~0UL > 0xfffffffful) ? 19 : 9; - int dig; - - do { - for (dig = 0, lim = 1; dig < pos; dig++) - lim *= 10; - - if (digits || in >= lim || !pos) { - for (dig = 0; in >= lim; dig++) - in -= lim; - buffer[digits++] = '0' + dig; - } - } while (pos--); - - buffer[digits] = 0; - return digits; -} - -/* Converts the signed long integer to its string representation into - * buffer , which must be long enough to store the number and the - * trailing zero (21 bytes for -9223372036854775808 in 64-bit, 12 for - * -2147483648 in 32-bit). The buffer is filled from the first byte, and the - * number of characters emitted (not counting the trailing zero) is returned. - */ -static __attribute__((unused)) -int itoa_r(long in, char *buffer) -{ - char *ptr = buffer; - int len = 0; - - if (in < 0) { - in = -in; - *(ptr++) = '-'; - len++; - } - len += utoa_r(in, ptr); - return len; -} - -/* for historical compatibility, same as above but returns the pointer to the - * buffer. - */ -static __inline__ __attribute__((unused)) -char *ltoa_r(long in, char *buffer) -{ - itoa_r(in, buffer); - return buffer; -} - -/* converts long integer to a string using the static itoa_buffer and - * returns the pointer to that string. - */ -static __inline__ __attribute__((unused)) -char *itoa(long in) -{ - itoa_r(in, itoa_buffer); - return itoa_buffer; -} - -/* converts long integer to a string using the static itoa_buffer and - * returns the pointer to that string. Same as above, for compatibility. - */ -static __inline__ __attribute__((unused)) -char *ltoa(long in) -{ - itoa_r(in, itoa_buffer); - return itoa_buffer; -} - -/* converts unsigned long integer to a string using the static itoa_buffer - * and returns the pointer to that string. - */ -static __inline__ __attribute__((unused)) -char *utoa(unsigned long in) -{ - utoa_r(in, itoa_buffer); - return itoa_buffer; -} - -/* Converts the unsigned 64-bit integer to its hex representation into - * buffer , which must be long enough to store the number and the - * trailing zero (17 bytes for "ffffffffffffffff"). The buffer is filled from - * the first byte, and the number of characters emitted (not counting the - * trailing zero) is returned. The function is constructed in a way to optimize - * the code size and avoid any divide that could add a dependency on large - * external functions. - */ -static __attribute__((unused)) -int u64toh_r(uint64_t in, char *buffer) -{ - signed char pos = 60; - int digits = 0; - int dig; - - do { - if (sizeof(long) >= 8) { - dig = (in >> pos) & 0xF; - } else { - /* 32-bit platforms: avoid a 64-bit shift */ - uint32_t d = (pos >= 32) ? (in >> 32) : in; - dig = (d >> (pos & 31)) & 0xF; - } - if (dig > 9) - dig += 'a' - '0' - 10; - pos -= 4; - if (dig || digits || pos < 0) - buffer[digits++] = '0' + dig; - } while (pos >= 0); - - buffer[digits] = 0; - return digits; -} - -/* converts uint64_t to an hex string using the static itoa_buffer and - * returns the pointer to that string. - */ -static __inline__ __attribute__((unused)) -char *u64toh(uint64_t in) -{ - u64toh_r(in, itoa_buffer); - return itoa_buffer; -} - -/* Converts the unsigned 64-bit integer to its string representation into - * buffer , which must be long enough to store the number and the - * trailing zero (21 bytes for 18446744073709551615). The buffer is filled from - * the first byte, and the number of characters emitted (not counting the - * trailing zero) is returned. The function is constructed in a way to optimize - * the code size and avoid any divide that could add a dependency on large - * external functions. - */ -static __attribute__((unused)) -int u64toa_r(uint64_t in, char *buffer) -{ - unsigned long long lim; - int digits = 0; - int pos = 19; /* start with the highest possible digit */ - int dig; - - do { - for (dig = 0, lim = 1; dig < pos; dig++) - lim *= 10; - - if (digits || in >= lim || !pos) { - for (dig = 0; in >= lim; dig++) - in -= lim; - buffer[digits++] = '0' + dig; - } - } while (pos--); - - buffer[digits] = 0; - return digits; -} - -/* Converts the signed 64-bit integer to its string representation into - * buffer , which must be long enough to store the number and the - * trailing zero (21 bytes for -9223372036854775808). The buffer is filled from - * the first byte, and the number of characters emitted (not counting the - * trailing zero) is returned. - */ -static __attribute__((unused)) -int i64toa_r(int64_t in, char *buffer) -{ - char *ptr = buffer; - int len = 0; - - if (in < 0) { - in = -in; - *(ptr++) = '-'; - len++; - } - len += u64toa_r(in, ptr); - return len; -} - -/* converts int64_t to a string using the static itoa_buffer and returns - * the pointer to that string. - */ -static __inline__ __attribute__((unused)) -char *i64toa(int64_t in) -{ - i64toa_r(in, itoa_buffer); - return itoa_buffer; -} - -/* converts uint64_t to a string using the static itoa_buffer and returns - * the pointer to that string. - */ -static __inline__ __attribute__((unused)) -char *u64toa(uint64_t in) -{ - u64toa_r(in, itoa_buffer); - return itoa_buffer; -} - -/* make sure to include all global symbols */ -#include "nolibc.h" - -#endif /* _NOLIBC_STDLIB_H */ diff --git a/libcontainer/dmz/nolibc/string.h b/libcontainer/dmz/nolibc/string.h deleted file mode 100644 index 0c2e06c7c47..00000000000 --- a/libcontainer/dmz/nolibc/string.h +++ /dev/null @@ -1,294 +0,0 @@ -/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ -/* - * string function definitions for NOLIBC - * Copyright (C) 2017-2021 Willy Tarreau - */ - -#ifndef _NOLIBC_STRING_H -#define _NOLIBC_STRING_H - -#include "std.h" - -static void *malloc(size_t len); - -/* - * As much as possible, please keep functions alphabetically sorted. - */ - -static __attribute__((unused)) -int memcmp(const void *s1, const void *s2, size_t n) -{ - size_t ofs = 0; - int c1 = 0; - - while (ofs < n && !(c1 = ((unsigned char *)s1)[ofs] - ((unsigned char *)s2)[ofs])) { - ofs++; - } - return c1; -} - -static __attribute__((unused)) -void *_nolibc_memcpy_up(void *dst, const void *src, size_t len) -{ - size_t pos = 0; - - while (pos < len) { - ((char *)dst)[pos] = ((const char *)src)[pos]; - pos++; - } - return dst; -} - -static __attribute__((unused)) -void *_nolibc_memcpy_down(void *dst, const void *src, size_t len) -{ - while (len) { - len--; - ((char *)dst)[len] = ((const char *)src)[len]; - } - return dst; -} - -/* might be ignored by the compiler without -ffreestanding, then found as - * missing. - */ -__attribute__((weak,unused,section(".text.nolibc_memmove"))) -void *memmove(void *dst, const void *src, size_t len) -{ - size_t dir, pos; - - pos = len; - dir = -1; - - if (dst < src) { - pos = -1; - dir = 1; - } - - while (len) { - pos += dir; - ((char *)dst)[pos] = ((const char *)src)[pos]; - len--; - } - return dst; -} - -/* must be exported, as it's used by libgcc on ARM */ -__attribute__((weak,unused,section(".text.nolibc_memcpy"))) -void *memcpy(void *dst, const void *src, size_t len) -{ - return _nolibc_memcpy_up(dst, src, len); -} - -/* might be ignored by the compiler without -ffreestanding, then found as - * missing. - */ -__attribute__((weak,unused,section(".text.nolibc_memset"))) -void *memset(void *dst, int b, size_t len) -{ - char *p = dst; - - while (len--) { - /* prevent gcc from recognizing memset() here */ - __asm__ volatile(""); - *(p++) = b; - } - return dst; -} - -static __attribute__((unused)) -char *strchr(const char *s, int c) -{ - while (*s) { - if (*s == (char)c) - return (char *)s; - s++; - } - return NULL; -} - -static __attribute__((unused)) -int strcmp(const char *a, const char *b) -{ - unsigned int c; - int diff; - - while (!(diff = (unsigned char)*a++ - (c = (unsigned char)*b++)) && c) - ; - return diff; -} - -static __attribute__((unused)) -char *strcpy(char *dst, const char *src) -{ - char *ret = dst; - - while ((*dst++ = *src++)); - return ret; -} - -/* this function is only used with arguments that are not constants or when - * it's not known because optimizations are disabled. Note that gcc 12 - * recognizes an strlen() pattern and replaces it with a jump to strlen(), - * thus itself, hence the asm() statement below that's meant to disable this - * confusing practice. - */ -static __attribute__((unused)) -size_t strlen(const char *str) -{ - size_t len; - - for (len = 0; str[len]; len++) - __asm__(""); - return len; -} - -/* do not trust __builtin_constant_p() at -O0, as clang will emit a test and - * the two branches, then will rely on an external definition of strlen(). - */ -#if defined(__OPTIMIZE__) -#define nolibc_strlen(x) strlen(x) -#define strlen(str) ({ \ - __builtin_constant_p((str)) ? \ - __builtin_strlen((str)) : \ - nolibc_strlen((str)); \ -}) -#endif - -static __attribute__((unused)) -size_t strnlen(const char *str, size_t maxlen) -{ - size_t len; - - for (len = 0; (len < maxlen) && str[len]; len++); - return len; -} - -static __attribute__((unused)) -char *strdup(const char *str) -{ - size_t len; - char *ret; - - len = strlen(str); - ret = malloc(len + 1); - if (__builtin_expect(ret != NULL, 1)) - memcpy(ret, str, len + 1); - - return ret; -} - -static __attribute__((unused)) -char *strndup(const char *str, size_t maxlen) -{ - size_t len; - char *ret; - - len = strnlen(str, maxlen); - ret = malloc(len + 1); - if (__builtin_expect(ret != NULL, 1)) { - memcpy(ret, str, len); - ret[len] = '\0'; - } - - return ret; -} - -static __attribute__((unused)) -size_t strlcat(char *dst, const char *src, size_t size) -{ - size_t len; - char c; - - for (len = 0; dst[len]; len++) - ; - - for (;;) { - c = *src; - if (len < size) - dst[len] = c; - if (!c) - break; - len++; - src++; - } - - return len; -} - -static __attribute__((unused)) -size_t strlcpy(char *dst, const char *src, size_t size) -{ - size_t len; - char c; - - for (len = 0;;) { - c = src[len]; - if (len < size) - dst[len] = c; - if (!c) - break; - len++; - } - return len; -} - -static __attribute__((unused)) -char *strncat(char *dst, const char *src, size_t size) -{ - char *orig = dst; - - while (*dst) - dst++; - - while (size && (*dst = *src)) { - src++; - dst++; - size--; - } - - *dst = 0; - return orig; -} - -static __attribute__((unused)) -int strncmp(const char *a, const char *b, size_t size) -{ - unsigned int c; - int diff = 0; - - while (size-- && - !(diff = (unsigned char)*a++ - (c = (unsigned char)*b++)) && c) - ; - - return diff; -} - -static __attribute__((unused)) -char *strncpy(char *dst, const char *src, size_t size) -{ - size_t len; - - for (len = 0; len < size; len++) - if ((dst[len] = *src)) - src++; - return dst; -} - -static __attribute__((unused)) -char *strrchr(const char *s, int c) -{ - const char *ret = NULL; - - while (*s) { - if (*s == (char)c) - ret = s; - s++; - } - return (char *)ret; -} - -/* make sure to include all global symbols */ -#include "nolibc.h" - -#endif /* _NOLIBC_STRING_H */ diff --git a/libcontainer/dmz/nolibc/sys.h b/libcontainer/dmz/nolibc/sys.h deleted file mode 100644 index 3b89433e2fd..00000000000 --- a/libcontainer/dmz/nolibc/sys.h +++ /dev/null @@ -1,1189 +0,0 @@ -/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ -/* - * Syscall definitions for NOLIBC (those in man(2)) - * Copyright (C) 2017-2021 Willy Tarreau - */ - -#ifndef _NOLIBC_SYS_H -#define _NOLIBC_SYS_H - -#include -#include "std.h" - -/* system includes */ -#include -#include /* for SIGCHLD */ -#include -#include -#include -#include -#include -#include -#include /* for O_* and AT_* */ -#include /* for statx() */ -#include - -#include "arch.h" -#include "errno.h" -#include "types.h" - -/* Syscall return helper: takes the syscall value in argument and checks for an - * error in it. This may only be used with signed returns (int or long), but - * not with pointers. An error is any value < 0. When an error is encountered, - * -ret is set into errno and -1 is returned. Otherwise the returned value is - * passed as-is with its type preserved. - */ - -#define __sysret(arg) \ -({ \ - __typeof__(arg) __sysret_arg = (arg); \ - (__sysret_arg < 0) /* error ? */ \ - ? (({ SET_ERRNO(-__sysret_arg); }), -1) /* ret -1 with errno = -arg */ \ - : __sysret_arg; /* return original value */ \ -}) - - -/* Functions in this file only describe syscalls. They're declared static so - * that the compiler usually decides to inline them while still being allowed - * to pass a pointer to one of their instances. Each syscall exists in two - * versions: - * - the "internal" ones, which matches the raw syscall interface at the - * kernel level, which may sometimes slightly differ from the documented - * libc-level ones. For example most of them return either a valid value - * or -errno. All of these are prefixed with "sys_". They may be called - * by non-portable applications if desired. - * - * - the "exported" ones, whose interface must closely match the one - * documented in man(2), that applications are supposed to expect. These - * ones rely on the internal ones, and set errno. - * - * Each syscall will be defined with the two functions, sorted in alphabetical - * order applied to the exported names. - * - * In case of doubt about the relevance of a function here, only those which - * set errno should be defined here. Wrappers like those appearing in man(3) - * should not be placed here. - */ - - -/* - * int brk(void *addr); - * void *sbrk(intptr_t inc) - */ - -static __attribute__((unused)) -void *sys_brk(void *addr) -{ - return (void *)my_syscall1(__NR_brk, addr); -} - -static __attribute__((unused)) -int brk(void *addr) -{ - void *ret = sys_brk(addr); - - if (!ret) { - SET_ERRNO(ENOMEM); - return -1; - } - return 0; -} - -static __attribute__((unused)) -void *sbrk(intptr_t inc) -{ - /* first call to find current end */ - void *ret = sys_brk(0); - - if (ret && sys_brk(ret + inc) == ret + inc) - return ret + inc; - - SET_ERRNO(ENOMEM); - return (void *)-1; -} - - -/* - * int chdir(const char *path); - */ - -static __attribute__((unused)) -int sys_chdir(const char *path) -{ - return my_syscall1(__NR_chdir, path); -} - -static __attribute__((unused)) -int chdir(const char *path) -{ - return __sysret(sys_chdir(path)); -} - - -/* - * int chmod(const char *path, mode_t mode); - */ - -static __attribute__((unused)) -int sys_chmod(const char *path, mode_t mode) -{ -#ifdef __NR_fchmodat - return my_syscall4(__NR_fchmodat, AT_FDCWD, path, mode, 0); -#elif defined(__NR_chmod) - return my_syscall2(__NR_chmod, path, mode); -#else - return -ENOSYS; -#endif -} - -static __attribute__((unused)) -int chmod(const char *path, mode_t mode) -{ - return __sysret(sys_chmod(path, mode)); -} - - -/* - * int chown(const char *path, uid_t owner, gid_t group); - */ - -static __attribute__((unused)) -int sys_chown(const char *path, uid_t owner, gid_t group) -{ -#ifdef __NR_fchownat - return my_syscall5(__NR_fchownat, AT_FDCWD, path, owner, group, 0); -#elif defined(__NR_chown) - return my_syscall3(__NR_chown, path, owner, group); -#else - return -ENOSYS; -#endif -} - -static __attribute__((unused)) -int chown(const char *path, uid_t owner, gid_t group) -{ - return __sysret(sys_chown(path, owner, group)); -} - - -/* - * int chroot(const char *path); - */ - -static __attribute__((unused)) -int sys_chroot(const char *path) -{ - return my_syscall1(__NR_chroot, path); -} - -static __attribute__((unused)) -int chroot(const char *path) -{ - return __sysret(sys_chroot(path)); -} - - -/* - * int close(int fd); - */ - -static __attribute__((unused)) -int sys_close(int fd) -{ - return my_syscall1(__NR_close, fd); -} - -static __attribute__((unused)) -int close(int fd) -{ - return __sysret(sys_close(fd)); -} - - -/* - * int dup(int fd); - */ - -static __attribute__((unused)) -int sys_dup(int fd) -{ - return my_syscall1(__NR_dup, fd); -} - -static __attribute__((unused)) -int dup(int fd) -{ - return __sysret(sys_dup(fd)); -} - - -/* - * int dup2(int old, int new); - */ - -static __attribute__((unused)) -int sys_dup2(int old, int new) -{ -#ifdef __NR_dup3 - return my_syscall3(__NR_dup3, old, new, 0); -#elif defined(__NR_dup2) - return my_syscall2(__NR_dup2, old, new); -#else - return -ENOSYS; -#endif -} - -static __attribute__((unused)) -int dup2(int old, int new) -{ - return __sysret(sys_dup2(old, new)); -} - - -/* - * int dup3(int old, int new, int flags); - */ - -#ifdef __NR_dup3 -static __attribute__((unused)) -int sys_dup3(int old, int new, int flags) -{ - return my_syscall3(__NR_dup3, old, new, flags); -} - -static __attribute__((unused)) -int dup3(int old, int new, int flags) -{ - return __sysret(sys_dup3(old, new, flags)); -} -#endif - - -/* - * int execve(const char *filename, char *const argv[], char *const envp[]); - */ - -static __attribute__((unused)) -int sys_execve(const char *filename, char *const argv[], char *const envp[]) -{ - return my_syscall3(__NR_execve, filename, argv, envp); -} - -static __attribute__((unused)) -int execve(const char *filename, char *const argv[], char *const envp[]) -{ - return __sysret(sys_execve(filename, argv, envp)); -} - - -/* - * void exit(int status); - */ - -static __attribute__((noreturn,unused)) -void sys_exit(int status) -{ - my_syscall1(__NR_exit, status & 255); - while(1); /* shut the "noreturn" warnings. */ -} - -static __attribute__((noreturn,unused)) -void exit(int status) -{ - sys_exit(status); -} - - -/* - * pid_t fork(void); - */ - -#ifndef sys_fork -static __attribute__((unused)) -pid_t sys_fork(void) -{ -#ifdef __NR_clone - /* note: some archs only have clone() and not fork(). Different archs - * have a different API, but most archs have the flags on first arg and - * will not use the rest with no other flag. - */ - return my_syscall5(__NR_clone, SIGCHLD, 0, 0, 0, 0); -#elif defined(__NR_fork) - return my_syscall0(__NR_fork); -#else - return -ENOSYS; -#endif -} -#endif - -static __attribute__((unused)) -pid_t fork(void) -{ - return __sysret(sys_fork()); -} - - -/* - * int fsync(int fd); - */ - -static __attribute__((unused)) -int sys_fsync(int fd) -{ - return my_syscall1(__NR_fsync, fd); -} - -static __attribute__((unused)) -int fsync(int fd) -{ - return __sysret(sys_fsync(fd)); -} - - -/* - * int getdents64(int fd, struct linux_dirent64 *dirp, int count); - */ - -static __attribute__((unused)) -int sys_getdents64(int fd, struct linux_dirent64 *dirp, int count) -{ - return my_syscall3(__NR_getdents64, fd, dirp, count); -} - -static __attribute__((unused)) -int getdents64(int fd, struct linux_dirent64 *dirp, int count) -{ - return __sysret(sys_getdents64(fd, dirp, count)); -} - - -/* - * uid_t geteuid(void); - */ - -static __attribute__((unused)) -uid_t sys_geteuid(void) -{ -#ifdef __NR_geteuid32 - return my_syscall0(__NR_geteuid32); -#else - return my_syscall0(__NR_geteuid); -#endif -} - -static __attribute__((unused)) -uid_t geteuid(void) -{ - return sys_geteuid(); -} - - -/* - * pid_t getpgid(pid_t pid); - */ - -static __attribute__((unused)) -pid_t sys_getpgid(pid_t pid) -{ - return my_syscall1(__NR_getpgid, pid); -} - -static __attribute__((unused)) -pid_t getpgid(pid_t pid) -{ - return __sysret(sys_getpgid(pid)); -} - - -/* - * pid_t getpgrp(void); - */ - -static __attribute__((unused)) -pid_t sys_getpgrp(void) -{ - return sys_getpgid(0); -} - -static __attribute__((unused)) -pid_t getpgrp(void) -{ - return sys_getpgrp(); -} - - -/* - * pid_t getpid(void); - */ - -static __attribute__((unused)) -pid_t sys_getpid(void) -{ - return my_syscall0(__NR_getpid); -} - -static __attribute__((unused)) -pid_t getpid(void) -{ - return sys_getpid(); -} - - -/* - * pid_t getppid(void); - */ - -static __attribute__((unused)) -pid_t sys_getppid(void) -{ - return my_syscall0(__NR_getppid); -} - -static __attribute__((unused)) -pid_t getppid(void) -{ - return sys_getppid(); -} - - -/* - * pid_t gettid(void); - */ - -static __attribute__((unused)) -pid_t sys_gettid(void) -{ - return my_syscall0(__NR_gettid); -} - -static __attribute__((unused)) -pid_t gettid(void) -{ - return sys_gettid(); -} - -static unsigned long getauxval(unsigned long key); - -/* - * int getpagesize(void); - */ - -static __attribute__((unused)) -int getpagesize(void) -{ - return __sysret((int)getauxval(AT_PAGESZ) ?: -ENOENT); -} - - -/* - * int gettimeofday(struct timeval *tv, struct timezone *tz); - */ - -static __attribute__((unused)) -int sys_gettimeofday(struct timeval *tv, struct timezone *tz) -{ -#ifdef __NR_gettimeofday - return my_syscall2(__NR_gettimeofday, tv, tz); -#else - return -ENOSYS; -#endif -} - -static __attribute__((unused)) -int gettimeofday(struct timeval *tv, struct timezone *tz) -{ - return __sysret(sys_gettimeofday(tv, tz)); -} - - -/* - * uid_t getuid(void); - */ - -static __attribute__((unused)) -uid_t sys_getuid(void) -{ -#ifdef __NR_getuid32 - return my_syscall0(__NR_getuid32); -#else - return my_syscall0(__NR_getuid); -#endif -} - -static __attribute__((unused)) -uid_t getuid(void) -{ - return sys_getuid(); -} - - -/* - * int ioctl(int fd, unsigned long req, void *value); - */ - -static __attribute__((unused)) -int sys_ioctl(int fd, unsigned long req, void *value) -{ - return my_syscall3(__NR_ioctl, fd, req, value); -} - -static __attribute__((unused)) -int ioctl(int fd, unsigned long req, void *value) -{ - return __sysret(sys_ioctl(fd, req, value)); -} - -/* - * int kill(pid_t pid, int signal); - */ - -static __attribute__((unused)) -int sys_kill(pid_t pid, int signal) -{ - return my_syscall2(__NR_kill, pid, signal); -} - -static __attribute__((unused)) -int kill(pid_t pid, int signal) -{ - return __sysret(sys_kill(pid, signal)); -} - - -/* - * int link(const char *old, const char *new); - */ - -static __attribute__((unused)) -int sys_link(const char *old, const char *new) -{ -#ifdef __NR_linkat - return my_syscall5(__NR_linkat, AT_FDCWD, old, AT_FDCWD, new, 0); -#elif defined(__NR_link) - return my_syscall2(__NR_link, old, new); -#else - return -ENOSYS; -#endif -} - -static __attribute__((unused)) -int link(const char *old, const char *new) -{ - return __sysret(sys_link(old, new)); -} - - -/* - * off_t lseek(int fd, off_t offset, int whence); - */ - -static __attribute__((unused)) -off_t sys_lseek(int fd, off_t offset, int whence) -{ -#ifdef __NR_lseek - return my_syscall3(__NR_lseek, fd, offset, whence); -#else - return -ENOSYS; -#endif -} - -static __attribute__((unused)) -off_t lseek(int fd, off_t offset, int whence) -{ - return __sysret(sys_lseek(fd, offset, whence)); -} - - -/* - * int mkdir(const char *path, mode_t mode); - */ - -static __attribute__((unused)) -int sys_mkdir(const char *path, mode_t mode) -{ -#ifdef __NR_mkdirat - return my_syscall3(__NR_mkdirat, AT_FDCWD, path, mode); -#elif defined(__NR_mkdir) - return my_syscall2(__NR_mkdir, path, mode); -#else - return -ENOSYS; -#endif -} - -static __attribute__((unused)) -int mkdir(const char *path, mode_t mode) -{ - return __sysret(sys_mkdir(path, mode)); -} - -/* - * int rmdir(const char *path); - */ - -static __attribute__((unused)) -int sys_rmdir(const char *path) -{ -#ifdef __NR_rmdir - return my_syscall1(__NR_rmdir, path); -#elif defined(__NR_unlinkat) - return my_syscall3(__NR_unlinkat, AT_FDCWD, path, AT_REMOVEDIR); -#else - return -ENOSYS; -#endif -} - -static __attribute__((unused)) -int rmdir(const char *path) -{ - return __sysret(sys_rmdir(path)); -} - - -/* - * int mknod(const char *path, mode_t mode, dev_t dev); - */ - -static __attribute__((unused)) -long sys_mknod(const char *path, mode_t mode, dev_t dev) -{ -#ifdef __NR_mknodat - return my_syscall4(__NR_mknodat, AT_FDCWD, path, mode, dev); -#elif defined(__NR_mknod) - return my_syscall3(__NR_mknod, path, mode, dev); -#else - return -ENOSYS; -#endif -} - -static __attribute__((unused)) -int mknod(const char *path, mode_t mode, dev_t dev) -{ - return __sysret(sys_mknod(path, mode, dev)); -} - -#ifndef sys_mmap -static __attribute__((unused)) -void *sys_mmap(void *addr, size_t length, int prot, int flags, int fd, - off_t offset) -{ - int n; - -#if defined(__NR_mmap2) - n = __NR_mmap2; - offset >>= 12; -#else - n = __NR_mmap; -#endif - - return (void *)my_syscall6(n, addr, length, prot, flags, fd, offset); -} -#endif - -/* Note that on Linux, MAP_FAILED is -1 so we can use the generic __sysret() - * which returns -1 upon error and still satisfy user land that checks for - * MAP_FAILED. - */ - -static __attribute__((unused)) -void *mmap(void *addr, size_t length, int prot, int flags, int fd, off_t offset) -{ - void *ret = sys_mmap(addr, length, prot, flags, fd, offset); - - if ((unsigned long)ret >= -4095UL) { - SET_ERRNO(-(long)ret); - ret = MAP_FAILED; - } - return ret; -} - -static __attribute__((unused)) -int sys_munmap(void *addr, size_t length) -{ - return my_syscall2(__NR_munmap, addr, length); -} - -static __attribute__((unused)) -int munmap(void *addr, size_t length) -{ - return __sysret(sys_munmap(addr, length)); -} - -/* - * int mount(const char *source, const char *target, - * const char *fstype, unsigned long flags, - * const void *data); - */ -static __attribute__((unused)) -int sys_mount(const char *src, const char *tgt, const char *fst, - unsigned long flags, const void *data) -{ - return my_syscall5(__NR_mount, src, tgt, fst, flags, data); -} - -static __attribute__((unused)) -int mount(const char *src, const char *tgt, - const char *fst, unsigned long flags, - const void *data) -{ - return __sysret(sys_mount(src, tgt, fst, flags, data)); -} - - -/* - * int open(const char *path, int flags[, mode_t mode]); - */ - -static __attribute__((unused)) -int sys_open(const char *path, int flags, mode_t mode) -{ -#ifdef __NR_openat - return my_syscall4(__NR_openat, AT_FDCWD, path, flags, mode); -#elif defined(__NR_open) - return my_syscall3(__NR_open, path, flags, mode); -#else - return -ENOSYS; -#endif -} - -static __attribute__((unused)) -int open(const char *path, int flags, ...) -{ - mode_t mode = 0; - - if (flags & O_CREAT) { - va_list args; - - va_start(args, flags); - mode = va_arg(args, int); - va_end(args); - } - - return __sysret(sys_open(path, flags, mode)); -} - - -/* - * int pipe2(int pipefd[2], int flags); - * int pipe(int pipefd[2]); - */ - -static __attribute__((unused)) -int sys_pipe2(int pipefd[2], int flags) -{ - return my_syscall2(__NR_pipe2, pipefd, flags); -} - -static __attribute__((unused)) -int pipe2(int pipefd[2], int flags) -{ - return __sysret(sys_pipe2(pipefd, flags)); -} - -static __attribute__((unused)) -int pipe(int pipefd[2]) -{ - return pipe2(pipefd, 0); -} - - -/* - * int prctl(int option, unsigned long arg2, unsigned long arg3, - * unsigned long arg4, unsigned long arg5); - */ - -static __attribute__((unused)) -int sys_prctl(int option, unsigned long arg2, unsigned long arg3, - unsigned long arg4, unsigned long arg5) -{ - return my_syscall5(__NR_prctl, option, arg2, arg3, arg4, arg5); -} - -static __attribute__((unused)) -int prctl(int option, unsigned long arg2, unsigned long arg3, - unsigned long arg4, unsigned long arg5) -{ - return __sysret(sys_prctl(option, arg2, arg3, arg4, arg5)); -} - - -/* - * int pivot_root(const char *new, const char *old); - */ - -static __attribute__((unused)) -int sys_pivot_root(const char *new, const char *old) -{ - return my_syscall2(__NR_pivot_root, new, old); -} - -static __attribute__((unused)) -int pivot_root(const char *new, const char *old) -{ - return __sysret(sys_pivot_root(new, old)); -} - - -/* - * int poll(struct pollfd *fds, int nfds, int timeout); - */ - -static __attribute__((unused)) -int sys_poll(struct pollfd *fds, int nfds, int timeout) -{ -#if defined(__NR_ppoll) - struct timespec t; - - if (timeout >= 0) { - t.tv_sec = timeout / 1000; - t.tv_nsec = (timeout % 1000) * 1000000; - } - return my_syscall5(__NR_ppoll, fds, nfds, (timeout >= 0) ? &t : NULL, NULL, 0); -#elif defined(__NR_poll) - return my_syscall3(__NR_poll, fds, nfds, timeout); -#else - return -ENOSYS; -#endif -} - -static __attribute__((unused)) -int poll(struct pollfd *fds, int nfds, int timeout) -{ - return __sysret(sys_poll(fds, nfds, timeout)); -} - - -/* - * ssize_t read(int fd, void *buf, size_t count); - */ - -static __attribute__((unused)) -ssize_t sys_read(int fd, void *buf, size_t count) -{ - return my_syscall3(__NR_read, fd, buf, count); -} - -static __attribute__((unused)) -ssize_t read(int fd, void *buf, size_t count) -{ - return __sysret(sys_read(fd, buf, count)); -} - - -/* - * int reboot(int cmd); - * is among LINUX_REBOOT_CMD_* - */ - -static __attribute__((unused)) -ssize_t sys_reboot(int magic1, int magic2, int cmd, void *arg) -{ - return my_syscall4(__NR_reboot, magic1, magic2, cmd, arg); -} - -static __attribute__((unused)) -int reboot(int cmd) -{ - return __sysret(sys_reboot(LINUX_REBOOT_MAGIC1, LINUX_REBOOT_MAGIC2, cmd, 0)); -} - - -/* - * int sched_yield(void); - */ - -static __attribute__((unused)) -int sys_sched_yield(void) -{ - return my_syscall0(__NR_sched_yield); -} - -static __attribute__((unused)) -int sched_yield(void) -{ - return __sysret(sys_sched_yield()); -} - - -/* - * int select(int nfds, fd_set *read_fds, fd_set *write_fds, - * fd_set *except_fds, struct timeval *timeout); - */ - -static __attribute__((unused)) -int sys_select(int nfds, fd_set *rfds, fd_set *wfds, fd_set *efds, struct timeval *timeout) -{ -#if defined(__ARCH_WANT_SYS_OLD_SELECT) && !defined(__NR__newselect) - struct sel_arg_struct { - unsigned long n; - fd_set *r, *w, *e; - struct timeval *t; - } arg = { .n = nfds, .r = rfds, .w = wfds, .e = efds, .t = timeout }; - return my_syscall1(__NR_select, &arg); -#elif defined(__ARCH_WANT_SYS_PSELECT6) && defined(__NR_pselect6) - struct timespec t; - - if (timeout) { - t.tv_sec = timeout->tv_sec; - t.tv_nsec = timeout->tv_usec * 1000; - } - return my_syscall6(__NR_pselect6, nfds, rfds, wfds, efds, timeout ? &t : NULL, NULL); -#elif defined(__NR__newselect) || defined(__NR_select) -#ifndef __NR__newselect -#define __NR__newselect __NR_select -#endif - return my_syscall5(__NR__newselect, nfds, rfds, wfds, efds, timeout); -#else - return -ENOSYS; -#endif -} - -static __attribute__((unused)) -int select(int nfds, fd_set *rfds, fd_set *wfds, fd_set *efds, struct timeval *timeout) -{ - return __sysret(sys_select(nfds, rfds, wfds, efds, timeout)); -} - - -/* - * int setpgid(pid_t pid, pid_t pgid); - */ - -static __attribute__((unused)) -int sys_setpgid(pid_t pid, pid_t pgid) -{ - return my_syscall2(__NR_setpgid, pid, pgid); -} - -static __attribute__((unused)) -int setpgid(pid_t pid, pid_t pgid) -{ - return __sysret(sys_setpgid(pid, pgid)); -} - - -/* - * pid_t setsid(void); - */ - -static __attribute__((unused)) -pid_t sys_setsid(void) -{ - return my_syscall0(__NR_setsid); -} - -static __attribute__((unused)) -pid_t setsid(void) -{ - return __sysret(sys_setsid()); -} - -/* - * int statx(int fd, const char *path, int flags, unsigned int mask, struct statx *buf); - * int stat(const char *path, struct stat *buf); - */ - -static __attribute__((unused)) -int sys_statx(int fd, const char *path, int flags, unsigned int mask, struct statx *buf) -{ -#ifdef __NR_statx - return my_syscall5(__NR_statx, fd, path, flags, mask, buf); -#else - return -ENOSYS; -#endif -} - -static __attribute__((unused)) -int statx(int fd, const char *path, int flags, unsigned int mask, struct statx *buf) -{ - return __sysret(sys_statx(fd, path, flags, mask, buf)); -} - - -static __attribute__((unused)) -int stat(const char *path, struct stat *buf) -{ - struct statx statx; - long ret; - - ret = __sysret(sys_statx(AT_FDCWD, path, AT_NO_AUTOMOUNT, STATX_BASIC_STATS, &statx)); - if (ret == -1) - return ret; - - buf->st_dev = ((statx.stx_dev_minor & 0xff) - | (statx.stx_dev_major << 8) - | ((statx.stx_dev_minor & ~0xff) << 12)); - buf->st_ino = statx.stx_ino; - buf->st_mode = statx.stx_mode; - buf->st_nlink = statx.stx_nlink; - buf->st_uid = statx.stx_uid; - buf->st_gid = statx.stx_gid; - buf->st_rdev = ((statx.stx_rdev_minor & 0xff) - | (statx.stx_rdev_major << 8) - | ((statx.stx_rdev_minor & ~0xff) << 12)); - buf->st_size = statx.stx_size; - buf->st_blksize = statx.stx_blksize; - buf->st_blocks = statx.stx_blocks; - buf->st_atim.tv_sec = statx.stx_atime.tv_sec; - buf->st_atim.tv_nsec = statx.stx_atime.tv_nsec; - buf->st_mtim.tv_sec = statx.stx_mtime.tv_sec; - buf->st_mtim.tv_nsec = statx.stx_mtime.tv_nsec; - buf->st_ctim.tv_sec = statx.stx_ctime.tv_sec; - buf->st_ctim.tv_nsec = statx.stx_ctime.tv_nsec; - - return 0; -} - - -/* - * int symlink(const char *old, const char *new); - */ - -static __attribute__((unused)) -int sys_symlink(const char *old, const char *new) -{ -#ifdef __NR_symlinkat - return my_syscall3(__NR_symlinkat, old, AT_FDCWD, new); -#elif defined(__NR_symlink) - return my_syscall2(__NR_symlink, old, new); -#else - return -ENOSYS; -#endif -} - -static __attribute__((unused)) -int symlink(const char *old, const char *new) -{ - return __sysret(sys_symlink(old, new)); -} - - -/* - * mode_t umask(mode_t mode); - */ - -static __attribute__((unused)) -mode_t sys_umask(mode_t mode) -{ - return my_syscall1(__NR_umask, mode); -} - -static __attribute__((unused)) -mode_t umask(mode_t mode) -{ - return sys_umask(mode); -} - - -/* - * int umount2(const char *path, int flags); - */ - -static __attribute__((unused)) -int sys_umount2(const char *path, int flags) -{ - return my_syscall2(__NR_umount2, path, flags); -} - -static __attribute__((unused)) -int umount2(const char *path, int flags) -{ - return __sysret(sys_umount2(path, flags)); -} - - -/* - * int unlink(const char *path); - */ - -static __attribute__((unused)) -int sys_unlink(const char *path) -{ -#ifdef __NR_unlinkat - return my_syscall3(__NR_unlinkat, AT_FDCWD, path, 0); -#elif defined(__NR_unlink) - return my_syscall1(__NR_unlink, path); -#else - return -ENOSYS; -#endif -} - -static __attribute__((unused)) -int unlink(const char *path) -{ - return __sysret(sys_unlink(path)); -} - - -/* - * pid_t wait(int *status); - * pid_t wait4(pid_t pid, int *status, int options, struct rusage *rusage); - * pid_t waitpid(pid_t pid, int *status, int options); - */ - -static __attribute__((unused)) -pid_t sys_wait4(pid_t pid, int *status, int options, struct rusage *rusage) -{ -#ifdef __NR_wait4 - return my_syscall4(__NR_wait4, pid, status, options, rusage); -#else - return -ENOSYS; -#endif -} - -static __attribute__((unused)) -pid_t wait(int *status) -{ - return __sysret(sys_wait4(-1, status, 0, NULL)); -} - -static __attribute__((unused)) -pid_t wait4(pid_t pid, int *status, int options, struct rusage *rusage) -{ - return __sysret(sys_wait4(pid, status, options, rusage)); -} - - -static __attribute__((unused)) -pid_t waitpid(pid_t pid, int *status, int options) -{ - return __sysret(sys_wait4(pid, status, options, NULL)); -} - - -/* - * ssize_t write(int fd, const void *buf, size_t count); - */ - -static __attribute__((unused)) -ssize_t sys_write(int fd, const void *buf, size_t count) -{ - return my_syscall3(__NR_write, fd, buf, count); -} - -static __attribute__((unused)) -ssize_t write(int fd, const void *buf, size_t count) -{ - return __sysret(sys_write(fd, buf, count)); -} - - -/* - * int memfd_create(const char *name, unsigned int flags); - */ - -static __attribute__((unused)) -int sys_memfd_create(const char *name, unsigned int flags) -{ - return my_syscall2(__NR_memfd_create, name, flags); -} - -static __attribute__((unused)) -int memfd_create(const char *name, unsigned int flags) -{ - return __sysret(sys_memfd_create(name, flags)); -} - -/* make sure to include all global symbols */ -#include "nolibc.h" - -#endif /* _NOLIBC_SYS_H */ diff --git a/libcontainer/dmz/nolibc/time.h b/libcontainer/dmz/nolibc/time.h deleted file mode 100644 index 84655361b9a..00000000000 --- a/libcontainer/dmz/nolibc/time.h +++ /dev/null @@ -1,31 +0,0 @@ -/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ -/* - * time function definitions for NOLIBC - * Copyright (C) 2017-2022 Willy Tarreau - */ - -#ifndef _NOLIBC_TIME_H -#define _NOLIBC_TIME_H - -#include "std.h" -#include "arch.h" -#include "types.h" -#include "sys.h" - -static __attribute__((unused)) -time_t time(time_t *tptr) -{ - struct timeval tv; - - /* note, cannot fail here */ - sys_gettimeofday(&tv, NULL); - - if (tptr) - *tptr = tv.tv_sec; - return tv.tv_sec; -} - -/* make sure to include all global symbols */ -#include "nolibc.h" - -#endif /* _NOLIBC_TIME_H */ diff --git a/libcontainer/dmz/nolibc/types.h b/libcontainer/dmz/nolibc/types.h deleted file mode 100644 index 8cfc4c860fa..00000000000 --- a/libcontainer/dmz/nolibc/types.h +++ /dev/null @@ -1,241 +0,0 @@ -/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ -/* - * Special types used by various syscalls for NOLIBC - * Copyright (C) 2017-2021 Willy Tarreau - */ - -#ifndef _NOLIBC_TYPES_H -#define _NOLIBC_TYPES_H - -#include "std.h" -#include -#include /* for LINUX_REBOOT_* */ -#include -#include - - -/* Only the generic macros and types may be defined here. The arch-specific - * ones such as the O_RDONLY and related macros used by fcntl() and open() - * must not be defined here. - */ - -/* stat flags (WARNING, octal here). We need to check for an existing - * definition because linux/stat.h may omit to define those if it finds - * that any glibc header was already included. - */ -#if !defined(S_IFMT) -#define S_IFDIR 0040000 -#define S_IFCHR 0020000 -#define S_IFBLK 0060000 -#define S_IFREG 0100000 -#define S_IFIFO 0010000 -#define S_IFLNK 0120000 -#define S_IFSOCK 0140000 -#define S_IFMT 0170000 - -#define S_ISDIR(mode) (((mode) & S_IFMT) == S_IFDIR) -#define S_ISCHR(mode) (((mode) & S_IFMT) == S_IFCHR) -#define S_ISBLK(mode) (((mode) & S_IFMT) == S_IFBLK) -#define S_ISREG(mode) (((mode) & S_IFMT) == S_IFREG) -#define S_ISFIFO(mode) (((mode) & S_IFMT) == S_IFIFO) -#define S_ISLNK(mode) (((mode) & S_IFMT) == S_IFLNK) -#define S_ISSOCK(mode) (((mode) & S_IFMT) == S_IFSOCK) - -#define S_IRWXU 00700 -#define S_IRUSR 00400 -#define S_IWUSR 00200 -#define S_IXUSR 00100 - -#define S_IRWXG 00070 -#define S_IRGRP 00040 -#define S_IWGRP 00020 -#define S_IXGRP 00010 - -#define S_IRWXO 00007 -#define S_IROTH 00004 -#define S_IWOTH 00002 -#define S_IXOTH 00001 -#endif - -/* dirent types */ -#define DT_UNKNOWN 0x0 -#define DT_FIFO 0x1 -#define DT_CHR 0x2 -#define DT_DIR 0x4 -#define DT_BLK 0x6 -#define DT_REG 0x8 -#define DT_LNK 0xa -#define DT_SOCK 0xc - -/* commonly an fd_set represents 256 FDs */ -#ifndef FD_SETSIZE -#define FD_SETSIZE 256 -#endif - -/* PATH_MAX and MAXPATHLEN are often used and found with plenty of different - * values. - */ -#ifndef PATH_MAX -#define PATH_MAX 4096 -#endif - -#ifndef MAXPATHLEN -#define MAXPATHLEN (PATH_MAX) -#endif - -/* flags for mmap */ -#ifndef MAP_FAILED -#define MAP_FAILED ((void *)-1) -#endif - -/* whence values for lseek() */ -#define SEEK_SET 0 -#define SEEK_CUR 1 -#define SEEK_END 2 - -/* flags for reboot */ -#define RB_AUTOBOOT LINUX_REBOOT_CMD_RESTART -#define RB_HALT_SYSTEM LINUX_REBOOT_CMD_HALT -#define RB_ENABLE_CAD LINUX_REBOOT_CMD_CAD_ON -#define RB_DISABLE_CAD LINUX_REBOOT_CMD_CAD_OFF -#define RB_POWER_OFF LINUX_REBOOT_CMD_POWER_OFF -#define RB_SW_SUSPEND LINUX_REBOOT_CMD_SW_SUSPEND -#define RB_KEXEC LINUX_REBOOT_CMD_KEXEC - -/* Macros used on waitpid()'s return status */ -#define WEXITSTATUS(status) (((status) & 0xff00) >> 8) -#define WIFEXITED(status) (((status) & 0x7f) == 0) -#define WTERMSIG(status) ((status) & 0x7f) -#define WIFSIGNALED(status) ((status) - 1 < 0xff) - -/* waitpid() flags */ -#define WNOHANG 1 - -/* standard exit() codes */ -#define EXIT_SUCCESS 0 -#define EXIT_FAILURE 1 - -#define FD_SETIDXMASK (8 * sizeof(unsigned long)) -#define FD_SETBITMASK (8 * sizeof(unsigned long)-1) - -/* for select() */ -typedef struct { - unsigned long fds[(FD_SETSIZE + FD_SETBITMASK) / FD_SETIDXMASK]; -} fd_set; - -#define FD_CLR(fd, set) do { \ - fd_set *__set = (set); \ - int __fd = (fd); \ - if (__fd >= 0) \ - __set->fds[__fd / FD_SETIDXMASK] &= \ - ~(1U << (__fd & FX_SETBITMASK)); \ - } while (0) - -#define FD_SET(fd, set) do { \ - fd_set *__set = (set); \ - int __fd = (fd); \ - if (__fd >= 0) \ - __set->fds[__fd / FD_SETIDXMASK] |= \ - 1 << (__fd & FD_SETBITMASK); \ - } while (0) - -#define FD_ISSET(fd, set) ({ \ - fd_set *__set = (set); \ - int __fd = (fd); \ - int __r = 0; \ - if (__fd >= 0) \ - __r = !!(__set->fds[__fd / FD_SETIDXMASK] & \ -1U << (__fd & FD_SET_BITMASK)); \ - __r; \ - }) - -#define FD_ZERO(set) do { \ - fd_set *__set = (set); \ - int __idx; \ - int __size = (FD_SETSIZE+FD_SETBITMASK) / FD_SETIDXMASK;\ - for (__idx = 0; __idx < __size; __idx++) \ - __set->fds[__idx] = 0; \ - } while (0) - -/* for poll() */ -#define POLLIN 0x0001 -#define POLLPRI 0x0002 -#define POLLOUT 0x0004 -#define POLLERR 0x0008 -#define POLLHUP 0x0010 -#define POLLNVAL 0x0020 - -struct pollfd { - int fd; - short int events; - short int revents; -}; - -/* for getdents64() */ -struct linux_dirent64 { - uint64_t d_ino; - int64_t d_off; - unsigned short d_reclen; - unsigned char d_type; - char d_name[]; -}; - -/* needed by wait4() */ -struct rusage { - struct timeval ru_utime; - struct timeval ru_stime; - long ru_maxrss; - long ru_ixrss; - long ru_idrss; - long ru_isrss; - long ru_minflt; - long ru_majflt; - long ru_nswap; - long ru_inblock; - long ru_oublock; - long ru_msgsnd; - long ru_msgrcv; - long ru_nsignals; - long ru_nvcsw; - long ru_nivcsw; -}; - -/* The format of the struct as returned by the libc to the application, which - * significantly differs from the format returned by the stat() syscall flavours. - */ -struct stat { - dev_t st_dev; /* ID of device containing file */ - ino_t st_ino; /* inode number */ - mode_t st_mode; /* protection */ - nlink_t st_nlink; /* number of hard links */ - uid_t st_uid; /* user ID of owner */ - gid_t st_gid; /* group ID of owner */ - dev_t st_rdev; /* device ID (if special file) */ - off_t st_size; /* total size, in bytes */ - blksize_t st_blksize; /* blocksize for file system I/O */ - blkcnt_t st_blocks; /* number of 512B blocks allocated */ - union { time_t st_atime; struct timespec st_atim; }; /* time of last access */ - union { time_t st_mtime; struct timespec st_mtim; }; /* time of last modification */ - union { time_t st_ctime; struct timespec st_ctim; }; /* time of last status change */ -}; - -/* WARNING, it only deals with the 4096 first majors and 256 first minors */ -#define makedev(major, minor) ((dev_t)((((major) & 0xfff) << 8) | ((minor) & 0xff))) -#define major(dev) ((unsigned int)(((dev) >> 8) & 0xfff)) -#define minor(dev) ((unsigned int)(((dev) & 0xff)) - -#ifndef offsetof -#define offsetof(TYPE, FIELD) ((size_t) &((TYPE *)0)->FIELD) -#endif - -#ifndef container_of -#define container_of(PTR, TYPE, FIELD) ({ \ - __typeof__(((TYPE *)0)->FIELD) *__FIELD_PTR = (PTR); \ - (TYPE *)((char *) __FIELD_PTR - offsetof(TYPE, FIELD)); \ -}) -#endif - -/* make sure to include all global symbols */ -#include "nolibc.h" - -#endif /* _NOLIBC_TYPES_H */ diff --git a/libcontainer/dmz/nolibc/unistd.h b/libcontainer/dmz/nolibc/unistd.h deleted file mode 100644 index e38f3660c05..00000000000 --- a/libcontainer/dmz/nolibc/unistd.h +++ /dev/null @@ -1,68 +0,0 @@ -/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ -/* - * unistd function definitions for NOLIBC - * Copyright (C) 2017-2022 Willy Tarreau - */ - -#ifndef _NOLIBC_UNISTD_H -#define _NOLIBC_UNISTD_H - -#include "std.h" -#include "arch.h" -#include "types.h" -#include "sys.h" - - -#define STDIN_FILENO 0 -#define STDOUT_FILENO 1 -#define STDERR_FILENO 2 - - -static __attribute__((unused)) -int msleep(unsigned int msecs) -{ - struct timeval my_timeval = { msecs / 1000, (msecs % 1000) * 1000 }; - - if (sys_select(0, 0, 0, 0, &my_timeval) < 0) - return (my_timeval.tv_sec * 1000) + - (my_timeval.tv_usec / 1000) + - !!(my_timeval.tv_usec % 1000); - else - return 0; -} - -static __attribute__((unused)) -unsigned int sleep(unsigned int seconds) -{ - struct timeval my_timeval = { seconds, 0 }; - - if (sys_select(0, 0, 0, 0, &my_timeval) < 0) - return my_timeval.tv_sec + !!my_timeval.tv_usec; - else - return 0; -} - -static __attribute__((unused)) -int usleep(unsigned int usecs) -{ - struct timeval my_timeval = { usecs / 1000000, usecs % 1000000 }; - - return sys_select(0, 0, 0, 0, &my_timeval); -} - -static __attribute__((unused)) -int tcsetpgrp(int fd, pid_t pid) -{ - return ioctl(fd, TIOCSPGRP, &pid); -} - -#define __syscall_narg(_0, _1, _2, _3, _4, _5, _6, N, ...) N -#define _syscall_narg(...) __syscall_narg(__VA_ARGS__, 6, 5, 4, 3, 2, 1, 0) -#define _syscall(N, ...) __sysret(my_syscall##N(__VA_ARGS__)) -#define _syscall_n(N, ...) _syscall(N, __VA_ARGS__) -#define syscall(...) _syscall_n(_syscall_narg(__VA_ARGS__), ##__VA_ARGS__) - -/* make sure to include all global symbols */ -#include "nolibc.h" - -#endif /* _NOLIBC_UNISTD_H */ diff --git a/libcontainer/dmz/xstat.h b/libcontainer/dmz/xstat.h deleted file mode 100644 index 4acef77c21c..00000000000 --- a/libcontainer/dmz/xstat.h +++ /dev/null @@ -1,15 +0,0 @@ -#ifndef XSTAT_H -#define XSTAT_H - -// Some old-kernels (like centos-7) don't have statx() defined in linux/stat.h. We can't include -// sys/stat.h because it creates conflicts, so let's just define what we need here and be done with -// this. -// TODO (rata): I'll probably submit a patch to nolibc upstream so we can remove this hack in the -// future. -#include /* for statx() */ - -#ifndef STATX_BASIC_STATS -#include "linux/stat.h" -#endif // STATX_BASIC_STATS - -#endif // XSTAT_H diff --git a/libcontainer/init_linux.go b/libcontainer/init_linux.go index 176a4cdc12b..1eb0279d9e0 100644 --- a/libcontainer/init_linux.go +++ b/libcontainer/init_linux.go @@ -185,17 +185,6 @@ func startInitialization() (retErr error) { defer pidfdSocket.Close() } - // Get runc-dmz fds. - var dmzExe *os.File - if dmzFdStr := os.Getenv("_LIBCONTAINER_DMZEXEFD"); dmzFdStr != "" { - dmzFd, err := strconv.Atoi(dmzFdStr) - if err != nil { - return fmt.Errorf("unable to convert _LIBCONTAINER_DMZEXEFD: %w", err) - } - unix.CloseOnExec(dmzFd) - dmzExe = os.NewFile(uintptr(dmzFd), "runc-dmz") - } - // clear the current process's environment to clean any libcontainer // specific env vars. os.Clearenv() @@ -216,10 +205,10 @@ func startInitialization() (retErr error) { } // If init succeeds, it will not return, hence none of the defers will be called. - return containerInit(it, &config, syncPipe, consoleSocket, pidfdSocket, fifoFile, logPipe, dmzExe) + return containerInit(it, &config, syncPipe, consoleSocket, pidfdSocket, fifoFile, logPipe) } -func containerInit(t initType, config *initConfig, pipe *syncSocket, consoleSocket, pidfdSocket, fifoFile, logPipe, dmzExe *os.File) error { +func containerInit(t initType, config *initConfig, pipe *syncSocket, consoleSocket, pidfdSocket, fifoFile, logPipe *os.File) error { if err := populateProcessEnvironment(config.Env); err != nil { return err } @@ -236,7 +225,6 @@ func containerInit(t initType, config *initConfig, pipe *syncSocket, consoleSock pidfdSocket: pidfdSocket, config: config, logPipe: logPipe, - dmzExe: dmzExe, } return i.Init() case initStandard: @@ -248,7 +236,6 @@ func containerInit(t initType, config *initConfig, pipe *syncSocket, consoleSock config: config, fifoFile: fifoFile, logPipe: logPipe, - dmzExe: dmzExe, } return i.Init() } diff --git a/libcontainer/setns_init_linux.go b/libcontainer/setns_init_linux.go index d14198772aa..e03ab634b2d 100644 --- a/libcontainer/setns_init_linux.go +++ b/libcontainer/setns_init_linux.go @@ -25,7 +25,6 @@ type linuxSetnsInit struct { pidfdSocket *os.File config *initConfig logPipe *os.File - dmzExe *os.File } func (l *linuxSetnsInit) getSessionRingName() string { @@ -141,10 +140,6 @@ func (l *linuxSetnsInit) Init() error { return fmt.Errorf("close log pipe: %w", err) } - if l.dmzExe != nil { - l.config.Args[0] = name - return system.Fexecve(l.dmzExe.Fd(), l.config.Args, os.Environ()) - } // Close all file descriptors we are not passing to the container. This is // necessary because the execve target could use internal runc fds as the // execve path, potentially giving access to binary files from the host diff --git a/libcontainer/standard_init_linux.go b/libcontainer/standard_init_linux.go index ec2e814370a..4631f249ee2 100644 --- a/libcontainer/standard_init_linux.go +++ b/libcontainer/standard_init_linux.go @@ -26,7 +26,6 @@ type linuxStandardInit struct { parentPid int fifoFile *os.File logPipe *os.File - dmzExe *os.File config *initConfig } @@ -275,10 +274,6 @@ func (l *linuxStandardInit) Init() error { return err } - if l.dmzExe != nil { - l.config.Args[0] = name - return system.Fexecve(l.dmzExe.Fd(), l.config.Args, os.Environ()) - } // Close all file descriptors we are not passing to the container. This is // necessary because the execve target could use internal runc fds as the // execve path, potentially giving access to binary files from the host diff --git a/tests/integration/run.bats b/tests/integration/run.bats index 390a69bf083..c6e30709402 100644 --- a/tests/integration/run.bats +++ b/tests/integration/run.bats @@ -127,33 +127,6 @@ function teardown() { [ "${lines[0]}" = "410" ] } -@test "RUNC_DMZ=true runc run [runc-dmz]" { - RUNC_DMZ=true runc --debug run test_hello - [ "$status" -eq 0 ] - [[ "$output" = *"Hello World"* ]] - # We use runc-dmz if we can. - [[ "$output" = *"runc-dmz: using runc-dmz"* ]] -} - -@test "RUNC_DMZ=true runc run [cap_sys_ptrace -> /proc/self/exe clone]" { - # Add CAP_SYS_PTRACE to the bounding set, the minimum needed to indicate a - # container process _could_ get CAP_SYS_PTRACE. - update_config '.process.capabilities.bounding += ["CAP_SYS_PTRACE"]' - - RUNC_DMZ=true runc --debug run test_hello - [ "$status" -eq 0 ] - [[ "$output" = *"Hello World"* ]] - if [ "$EUID" -ne 0 ] && is_kernel_gte 4.10; then - # For Linux 4.10 and later, rootless containers will use runc-dmz - # because they are running in a user namespace. See isDmzBinarySafe(). - [[ "$output" = *"runc-dmz: using runc-dmz"* ]] - else - # If the container has CAP_SYS_PTRACE and is not rootless, we use - # /proc/self/exe cloning. - [[ "$output" = *"runc-dmz: using /proc/self/exe clone"* ]] - fi -} - @test "runc run [/proc/self/exe clone]" { runc --debug run test_hello [ "$status" -eq 0 ] @@ -235,23 +208,6 @@ function teardown() { grep -E '^boottime\s+1337\s+3141519$' <<<"$output" } -@test "RUNC_DMZ=true runc run [exec error]" { - cat <rootfs/run.sh -#!/mmnnttbb foo bar -sh -EOF - chmod +x rootfs/run.sh - update_config '.process.args = [ "/run.sh" ]' - RUNC_DMZ=true runc run test_hello - - # Ensure that the output contains the right error message. For runc-dmz, both - # nolibc and libc have the same formatting string (but libc will print the - # errno description rather than just the number), and for runc_nodmz the error - # message from Go starts with the same string. - [ "$status" -ne 0 ] - [[ "$output" = *"exec /run.sh: "* ]] -} - @test "runc run [execve error]" { cat <rootfs/run.sh #!/mmnnttbb foo bar diff --git a/tests/integration/selinux.bats b/tests/integration/selinux.bats index 19bc9d2070e..84b2368b936 100644 --- a/tests/integration/selinux.bats +++ b/tests/integration/selinux.bats @@ -38,14 +38,6 @@ function teardown() { [ "$status" -eq 0 ] } -# https://github.com/opencontainers/runc/issues/4057 -@test "runc run (custom selinux label, RUNC_DMZ=true)" { - update_config ' .process.selinuxLabel |= "system_u:system_r:container_t:s0:c4,c5" - | .process.args = ["/bin/true"]' - RUNC_DMZ=true runc run tst - [ "$status" -eq 0 ] -} - @test "runc run (custom selinux label)" { update_config ' .process.selinuxLabel |= "system_u:system_r:container_t:s0:c4,c5" | .process.args = ["/bin/true"]'