From d0f9aa03cb19664a8a8913d129fa73c0eaa8c877 Mon Sep 17 00:00:00 2001 From: Jonathan Lebon Date: Tue, 6 Nov 2018 09:05:07 -0500 Subject: [PATCH 1/7] cmd-build: Silence `ostree rev-parse` Makes for a scary message otherwise in the `ENOENT` case. --- src/cmd-build | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cmd-build b/src/cmd-build index 8841dc5206..839e8bbcd0 100755 --- a/src/cmd-build +++ b/src/cmd-build @@ -69,7 +69,7 @@ fi previous_commit= if [ -n "${ref:-}" ]; then - previous_commit=$(ostree --repo=${workdir}/repo rev-parse ${ref} || true) + previous_commit=$(ostree --repo=${workdir}/repo rev-parse ${ref} 2>/dev/null || true) fi # If the ref was unset or missing, look at the previous build if [ -z "${previous_commit}" ] && [ -n "${previous_build}" ]; then From d909410bdfd5784d27162b319b911fabb23fa007 Mon Sep 17 00:00:00 2001 From: Jonathan Lebon Date: Tue, 6 Nov 2018 09:05:08 -0500 Subject: [PATCH 2/7] cmd-build: Handle resuming builds on first build On the very first build, if the treecompose worked, but image generation failed, we would hit `${previous_image_genver}` not being defined (since there wasn't a completed previous build yet) while trying to come up with a new buildid. --- src/cmd-build | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/cmd-build b/src/cmd-build index 839e8bbcd0..c2caee80a8 100755 --- a/src/cmd-build +++ b/src/cmd-build @@ -151,7 +151,8 @@ fi image_input_checksum=$((echo ${commit} && echo ${kickstart_checksum}) | sha256sum_str) echo "New image input checksum: ${image_input_checksum}" version=$(ostree --repo=${workdir}/repo-build show --print-metadata-key=version ${commit} | sed -e "s,',,g") -if [ "${previous_commit}" = "${commit}" ]; then +# Add 1 to previous image genver if there was a completed build for the same commit +if [ "${previous_commit}" = "${commit}" ] && [ -n "${previous_image_genver:-}" ]; then image_genver=$((${previous_image_genver} + 1)) buildid=${version}-${image_genver} else From c44458571a672d0fa3f7b9947b3d0e2eb814dbf4 Mon Sep 17 00:00:00 2001 From: Jonathan Lebon Date: Tue, 6 Nov 2018 09:05:10 -0500 Subject: [PATCH 3/7] Use supermin in unprivileged environments This is a rebased rework of #124 with some modifications: - We auto-detect if we have CAP_SYS_ADMIN and if not, fall back to using supermin. My position is that both approaches will be in use in CI contexts and that the privileged case is faster for local dev, where iterating fast on the content will matter. I've also hopefully implemented things in a way that maintains almost the exact same logic build-wise between the two flows so there's not too much divergence. Anyway, totally open to revisiting this if needed! - In the virtualized path, `fetch` now directly populates the qcow2 cache so that the split `fetch`/`build` approach keeps working as expected. - We drop the repo-build/ repo since it's essentially also a cache and duplicates content from the archive repo. This is also needed to ensure that the pkgcache repo and the repo we commit into are both on the same file system. - The supermin appliance is reused if already generated; the `runvm` command just takes the command you want to run verbatim and plops it into a file the appliance is already coded to check from. Some other minor fixes: - We handle symlinked repos. - Split out supermin packages into a separate file. - Capture rc and bubble that up to the `runvm` caller. - Add virtio-rng device. Originally based on a patch by: Dusty Mabe --- Dockerfile | 2 +- src/cmd-build | 14 +--- src/cmd-clean | 5 +- src/cmd-init | 12 ++- src/cmdlib.sh | 139 +++++++++++++++++++++++++++++------ src/deps.txt | 3 + src/prune_builds | 1 - src/supermin-init-prelude.sh | 27 +++++++ src/vmdeps.txt | 16 ++++ vmdeps.txt | 1 + 10 files changed, 178 insertions(+), 42 deletions(-) create mode 100644 src/supermin-init-prelude.sh create mode 100644 src/vmdeps.txt create mode 120000 vmdeps.txt diff --git a/Dockerfile b/Dockerfile index cc7f0d977b..158ca6e3ef 100644 --- a/Dockerfile +++ b/Dockerfile @@ -2,7 +2,7 @@ FROM registry.fedoraproject.org/fedora:28 WORKDIR /root/containerbuild # Only need a few of our scripts for the first few steps -COPY ./build.sh ./deps.txt ./build-deps.txt /root/containerbuild/ +COPY ./build.sh ./deps.txt ./vmdeps.txt ./build-deps.txt /root/containerbuild/ RUN ./build.sh configure_yum_repos RUN ./build.sh install_rpms diff --git a/src/cmd-build b/src/cmd-build index c2caee80a8..eff7f5f881 100755 --- a/src/cmd-build +++ b/src/cmd-build @@ -145,13 +145,12 @@ else fi if [ -n "${previous_build}" ]; then - rpm-ostree --repo=${workdir}/repo-build db diff ${previous_commit} ${commit} + rpm-ostree --repo=${workdir}/repo db diff ${previous_commit} ${commit} fi image_input_checksum=$((echo ${commit} && echo ${kickstart_checksum}) | sha256sum_str) echo "New image input checksum: ${image_input_checksum}" -version=$(ostree --repo=${workdir}/repo-build show --print-metadata-key=version ${commit} | sed -e "s,',,g") -# Add 1 to previous image genver if there was a completed build for the same commit +version=$(ostree --repo=${workdir}/repo show --print-metadata-key=version ${commit} | sed -e "s,',,g") if [ "${previous_commit}" = "${commit}" ] && [ -n "${previous_image_genver:-}" ]; then image_genver=$((${previous_image_genver} + 1)) buildid=${version}-${image_genver} @@ -161,15 +160,6 @@ else fi echo "New build ID: ${buildid}" -# https://github.com/ostreedev/ostree/issues/1562#issuecomment-385393872 -# The passwd files (among others) don't have world readability. This won't -# actually corrupt the repository as the *canonical* permissions are stored -# as xattrs. Probably what we should do is have an ostree option to specify -# a permission mask for objects. -sudo chmod -R a+rX ${workdir}/repo-build/objects -ostree --repo=${workdir}/repo pull-local ${workdir}/repo-build "${ref:-${commit}}" -ostree --repo=${workdir}/repo summary -u - # Generate JSON if [ -n "${previous_commit}" ]; then previous_commit_json='"'"${previous_commit}"'"' diff --git a/src/cmd-clean b/src/cmd-clean index 2690c02d5b..9d312c0997 100755 --- a/src/cmd-clean +++ b/src/cmd-clean @@ -49,8 +49,7 @@ prepare_build # But go back to the toplevel cd ${workdir} -# Note we don't prune the cache/ dir or the objects -# in the repos. If you want that, just rm -rf them. +# Note we don't prune the cache.qcow2 or the objects +# in the repo. If you want that, just rm -rf them. rm -rf repo/refs/heads/* builds/* tmp/* ostree --repo=repo summary -u -sudo rm -rf repo-build/refs/heads/* diff --git a/src/cmd-init b/src/cmd-init index 60e868b762..7cbc13f1d0 100755 --- a/src/cmd-init +++ b/src/cmd-init @@ -66,13 +66,16 @@ if [ "$FORCE" != "1" -a ! -z "$(ls -A ./)" ]; then fatal "init: current directory is not empty, override with --force" fi -set -x source=$1; shift subdir=${1:-} preflight -sudo chown $USER: . +if has_privileges; then + sudo chown $USER: . +elif [ ! -w . ]; then + fatal "init: running unprivileged, and current directory not writable" +fi INSTALLER=https://download.fedoraproject.org/pub/fedora/linux/releases/28/Everything/x86_64/iso/Fedora-Everything-netinst-x86_64-28-1.1.iso INSTALLER_CHECKSUM=https://download.fedoraproject.org/pub/fedora/linux/releases/28/Everything/x86_64/iso/Fedora-Everything-28-1.1-x86_64-CHECKSUM @@ -116,4 +119,7 @@ mkdir -p cache mkdir -p builds mkdir -p tmp ostree --repo=repo init --mode=archive -ostree --repo=repo-build init --mode=bare-user +if ! has_privileges; then + qemu-img create -f qcow2 cache/cache.qcow2 10G + LIBGUESTFS_BACKEND=direct virt-format --filesystem=xfs -a cache/cache.qcow2 +fi diff --git a/src/cmdlib.sh b/src/cmdlib.sh index e5571b652c..e60ee47eae 100755 --- a/src/cmdlib.sh +++ b/src/cmdlib.sh @@ -1,7 +1,32 @@ # Shared shell script library +DIR=$(dirname $0) + +info() { + echo "info: $@" 1>&2 +} + fatal() { - echo "error: $@" 1>&2; exit 1 + info "$@"; exit 1 +} + +_privileged= +has_privileges() { + if [ -z "${_privileged:-}" ]; then + if [ -n "${FORCE_UNPRIVILEGED:-}" ]; then + info "Detected FORCE_UNPRIVILEGED; using virt" + _privileged=0 + elif ! capsh --print | grep -q 'Current.*cap_sys_admin'; then + info "Missing CAP_SYS_ADMIN; using virt" + _privileged=0 + elif ! sudo true; then + info "Missing sudo privs; using virt" + _privileged=0 + else + _privileged=1 + fi + fi + [ ${_privileged} == 1 ] } preflight() { @@ -28,20 +53,16 @@ preflight() { fatal "Unable to find /dev/kvm" fi - if ! capsh --print | grep -q 'Current.*cap_sys_admin'; then - fatal "This container must currently be run with --privileged" - fi - - if ! sudo true; then - fatal "The user must currently have sudo privileges" - fi - # permissions on /dev/kvm vary by (host) distro. If it's # not writable, recreate it. if ! [ -w /dev/kvm ]; then - sudo rm -f /dev/kvm - sudo mknod /dev/kvm c 10 232 - sudo setfacl -m u:$USER:rw /dev/kvm + if ! has_privileges; then + fatal "running unprivileged, and /dev/kvm not writable" + else + sudo rm -f /dev/kvm + sudo mknod /dev/kvm c 10 232 + sudo setfacl -m u:$USER:rw /dev/kvm + fi fi } @@ -49,6 +70,8 @@ prepare_build() { preflight if ! [ -d repo ]; then fatal "No $(pwd)/repo found; did you run coreos-assembler init?" + elif ! has_privileges && [ ! -f cache/cache.qcow2 ]; then + fatal "No cache.qcow2 found; did you run coreos-assembler init?" fi export workdir=$(pwd) @@ -95,10 +118,6 @@ prepare_build() { } runcompose() { - local treecompose_args="" - if ! grep -q '^# disable-unified-core' "${manifest}"; then - treecompose_args="${treecompose_args} --unified-core" - fi # Implement support for automatic local overrides: # https://github.com/coreos/coreos-assembler/issues/118 local overridesdir=${workdir}/overrides/ @@ -126,10 +145,86 @@ EOF fi rm -f ${changed_stamp} - set -x - sudo rpm-ostree compose tree --repo=${workdir}/repo-build --cachedir=${workdir}/cache \ - --touch-if-changed "${changed_stamp}" \ - ${treecompose_args} \ - ${TREECOMPOSE_FLAGS:-} ${manifest} "$@" - set +x + + set - rpm-ostree compose tree --repo=${workdir}/repo \ + --cachedir=${workdir}/cache --touch-if-changed "${changed_stamp}" \ + ${manifest} "$@" + + if ! grep -q '^# disable-unified-core' "${manifest}"; then + set - "$@" --unified-core + fi + + echo "Running: $@" + + # this is the heart of the privs vs no privs dual path + if has_privileges; then + sudo "$@" + else + runvm "$@" + fi +} + +runvm() { + local vmpreparedir=${workdir}/tmp/supermin.prepare + local vmbuilddir=${workdir}/tmp/supermin.build + + # use REBUILDVM=1 if e.g. hacking on rpm-ostree/ostree and wanting to get + # the new bits in the VM + if [ ! -f ${vmbuilddir}/.done ] || [ -n "${REBUILDVM:-}" ]; then + rm -rf ${vmpreparedir} ${vmbuilddir} + mkdir -p ${vmpreparedir} ${vmbuilddir} + + local rpms= + # then add all the base deps + for dep in $(grep -v '^#' ${DIR}/vmdeps.txt); do + rpms+="$dep " + done + + supermin --prepare --use-installed $rpms -o "${vmpreparedir}" + + # the reason we do a heredoc here is so that the var substition takes + # place immediately instead of having to proxy them through to the VM + cat > "${vmpreparedir}/init" < ${workdir}/tmp/rc +/sbin/fstrim -v ${workdir}/cache +/sbin/reboot -f +EOF + chmod a+x ${vmpreparedir}/init + (cd ${vmpreparedir} && tar -czf init.tar.gz --remove-files init) + supermin --build "${vmpreparedir}" --size 5G -f ext2 -o "${vmbuilddir}" + touch "${vmbuilddir}/.done" + fi + + echo "$@" > ${TMPDIR}/cmd.sh + + # support local dev cases where src/config is a symlink + srcvirtfs= + if [ -L "${workdir}/src/config" ]; then + # qemu follows symlinks + srcvirtfs="-virtfs local,id=source,path=${workdir}/src/config,security_model=none,mount_tag=source" + fi + + qemu-kvm -nodefaults -nographic -m 2048 -no-reboot \ + -kernel "${vmbuilddir}/kernel" \ + -initrd "${vmbuilddir}/initrd" \ + -netdev user,id=eth0,hostname=supermin \ + -device virtio-net-pci,netdev=eth0 \ + -device virtio-scsi-pci,id=scsi0,bus=pci.0,addr=0x3 \ + -drive if=none,id=drive-scsi0-0-0-0,snapshot=on,file="${vmbuilddir}/root" \ + -device scsi-hd,bus=scsi0.0,channel=0,scsi-id=0,lun=0,drive=drive-scsi0-0-0-0,id=scsi0-0-0-0,bootindex=1 \ + -drive if=none,id=drive-scsi0-0-0-1,discard=unmap,file="${workdir}/cache/cache.qcow2" \ + -device scsi-hd,bus=scsi0.0,channel=0,scsi-id=0,lun=1,drive=drive-scsi0-0-0-1,id=scsi0-0-0-1 \ + -virtfs local,id=workdir,path="${workdir}",security_model=none,mount_tag=workdir \ + ${srcvirtfs} -serial stdio -append "root=/dev/sda console=ttyS0 selinux=1 enforcing=0 autorelabel=1" + + if [ ! -f ${workdir}/tmp/rc ]; then + fatal "Couldn't find rc file, something went terribly wrong!" + fi + return $(cat ${workdir}/tmp/rc) } diff --git a/src/deps.txt b/src/deps.txt index 7c1580f941..9514c5dac4 100644 --- a/src/deps.txt +++ b/src/deps.txt @@ -1,3 +1,6 @@ +# For privileged ops +supermin + # We default to builder user, but sudo where necessary sudo diff --git a/src/prune_builds b/src/prune_builds index bf9b486ed7..2a4dfd4bf4 100755 --- a/src/prune_builds +++ b/src/prune_builds @@ -102,4 +102,3 @@ def ostree_prune(repo_name, sudo=False): subprocess.run(argv, check=True) ostree_prune('repo') -ostree_prune('repo-build', sudo=True) diff --git a/src/supermin-init-prelude.sh b/src/supermin-init-prelude.sh new file mode 100644 index 0000000000..d7ed5b40bd --- /dev/null +++ b/src/supermin-init-prelude.sh @@ -0,0 +1,27 @@ +mount -t proc /proc /proc +mount -t sysfs /sys /sys +mount -t devtmpfs devtmpfs /dev + +# load selinux policy +LANG=C /sbin/load_policy -i + +# load kernel module for 9pnet_virtio for 9pfs mount +/sbin/modprobe 9pnet_virtio + +# need fuse module for rofiles-fuse/bwrap during post scripts run +/sbin/modprobe fuse + +# set up networking +/usr/sbin/dhclient eth0 + +# set up workdir +mkdir -p ${workdir} +mount -t 9p -o rw,trans=virtio,version=9p2000.L workdir ${workdir} +if [ -L ${workdir}/src/config ]; then + mkdir -p $(readlink ${workdir}/src/config) + mount -t 9p -o rw,trans=virtio,version=9p2000.L source ${workdir}/src/config +fi +mkdir -p ${workdir}/cache +mount /dev/sdb1 ${workdir}/cache + +cd ${workdir} diff --git a/src/vmdeps.txt b/src/vmdeps.txt new file mode 100644 index 0000000000..ed77186200 --- /dev/null +++ b/src/vmdeps.txt @@ -0,0 +1,16 @@ +# Base deps for a viable VM environment. + +# bare essentials +bash vim-minimal coreutils util-linux procps-ng kmod kernel-modules + +# for composes +rpm-ostree distribution-gpg-keys jq + +# for clean reboot +systemd + +# networking +dhcp-client bind-export-libs iproute + +# SELinux +selinux-policy selinux-policy-targeted policycoreutils diff --git a/vmdeps.txt b/vmdeps.txt new file mode 120000 index 0000000000..bfb8a4a5b9 --- /dev/null +++ b/vmdeps.txt @@ -0,0 +1 @@ +./src/vmdeps.txt \ No newline at end of file From 8c380d0e0342c5c4061c1f9b768ec8f5a10247f8 Mon Sep 17 00:00:00 2001 From: Dusty Mabe Date: Tue, 6 Nov 2018 09:05:11 -0500 Subject: [PATCH 4/7] Support arbitrary UIDs for OpenShift --- Dockerfile | 4 ++++ coreos-assembler | 10 ++++++++++ 2 files changed, 14 insertions(+) diff --git a/Dockerfile b/Dockerfile index 158ca6e3ef..0599f8c8a3 100644 --- a/Dockerfile +++ b/Dockerfile @@ -15,6 +15,10 @@ RUN ./build.sh configure_user WORKDIR /srv/ RUN rm -rf /root/containerbuild +# allow writing to /etc/passwd from arbitrary UID +# https://docs.openshift.com/container-platform/3.10/creating_images/guidelines.html +RUN chmod g=u /etc/passwd + # run as `builder` user USER builder ENTRYPOINT ["/usr/bin/dumb-init", "/usr/bin/coreos-assembler"] diff --git a/coreos-assembler b/coreos-assembler index 7ea5c5eb65..7aa6943d74 100755 --- a/coreos-assembler +++ b/coreos-assembler @@ -7,6 +7,16 @@ set -euo pipefail # docker/podman don't run through PAM, but we want this set export USER=${USER:-$(id -nu)} +# When trying to connect to libvirt we get "Failed to find user record +# for uid" errors if there is no entry for our UID in /etc/passwd. +# This was taken from 'Support Arbitrary User IDs' section of: +# https://docs.openshift.com/container-platform/3.10/creating_images/guidelines.html +if ! whoami &> /dev/null; then + if [ -w /etc/passwd ]; then + echo "${USER_NAME:-default}:x:$(id -u):0:${USER_NAME:-default} user:${HOME}:/sbin/nologin" >> /etc/passwd + fi +fi + # Ensure we've unshared our mount namespace so # the later umount doesn't affect the host potentially if [ -e /sys/fs/selinux/status ]; then From f302c82abd03bfba4b7becff21619eba4cc4a8c9 Mon Sep 17 00:00:00 2001 From: Dusty Mabe Date: Tue, 6 Nov 2018 09:05:12 -0500 Subject: [PATCH 5/7] Set $HOME path in /etc/passwd on openshift If you don't you get errors like this from libvirt: error: Cannot create user runtime directory '//.cache/libvirt': Permission denied --- coreos-assembler | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/coreos-assembler b/coreos-assembler index 7aa6943d74..2d1b3d5a9a 100755 --- a/coreos-assembler +++ b/coreos-assembler @@ -4,7 +4,8 @@ set -euo pipefail # Currently this just wraps the two binaries we have today # under a global entrypoint with subcommands. -# docker/podman don't run through PAM, but we want this set +# docker/podman don't run through PAM, but we want this set for the privileged +# (non-virtualized) path export USER=${USER:-$(id -nu)} # When trying to connect to libvirt we get "Failed to find user record @@ -12,6 +13,10 @@ export USER=${USER:-$(id -nu)} # This was taken from 'Support Arbitrary User IDs' section of: # https://docs.openshift.com/container-platform/3.10/creating_images/guidelines.html if ! whoami &> /dev/null; then + # We need to make sure we set $HOME in the /etc/passwd file because + # if we don't libvirt will try to use `/` and we will get permission + # issues + export HOME="/var/tmp/${USER_NAME:-default}" && mkdir -p $HOME if [ -w /etc/passwd ]; then echo "${USER_NAME:-default}:x:$(id -u):0:${USER_NAME:-default} user:${HOME}:/sbin/nologin" >> /etc/passwd fi From 91d8f2a5e24bd77c354dd81c1704e1cc09a3d10a Mon Sep 17 00:00:00 2001 From: Dusty Mabe Date: Tue, 6 Nov 2018 09:05:13 -0500 Subject: [PATCH 6/7] Don't export TMPDIR variable We don't want to do this because then subprocesses (like anything with libguestfs) will possibly fail on openshift with errors like: libguestfs: error: security: cached appliance # /srv/tmp/build/tmp/.guestfs-1000240000 is not owned by UID # 1000240000 --- src/cmdlib.sh | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/cmdlib.sh b/src/cmdlib.sh index e60ee47eae..e4042cfbd3 100755 --- a/src/cmdlib.sh +++ b/src/cmdlib.sh @@ -111,10 +111,9 @@ prepare_build() { cd ${tmp_builddir} # *This* tmp directory is truly temporary to this build, and # contains artifacts we definitely don't want to outlive it, unlike - # other things in ${workdir}/tmp. We also export it as an environment - # variable for child processes like gf-oemid. - mkdir tmp - export TMPDIR=$(pwd)/tmp + # other things in ${workdir}/tmp. But we don't export it since e.g. if it's + # over an NFS mount (like a PVC in OCP), some apps might error out. + mkdir tmp && TMPDIR=$(pwd)/tmp } runcompose() { From 18a0752b5cad99883332e60d068a8ab191942b19 Mon Sep 17 00:00:00 2001 From: Jonathan Lebon Date: Tue, 6 Nov 2018 09:07:39 -0500 Subject: [PATCH 7/7] Drop support for non-unified core mode Even though rpm-ostree now transparently supports both modes without having to do the bare-user dance, let's just drop it anyway so we can concentrate on a single way of doing things. --- src/cmdlib.sh | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/cmdlib.sh b/src/cmdlib.sh index e4042cfbd3..a7bf4245f0 100755 --- a/src/cmdlib.sh +++ b/src/cmdlib.sh @@ -147,11 +147,7 @@ EOF set - rpm-ostree compose tree --repo=${workdir}/repo \ --cachedir=${workdir}/cache --touch-if-changed "${changed_stamp}" \ - ${manifest} "$@" - - if ! grep -q '^# disable-unified-core' "${manifest}"; then - set - "$@" --unified-core - fi + --unified-core ${manifest} "$@" echo "Running: $@"