diff --git a/docs/working.md b/docs/working.md index 71eb29d950..2dce7e9bb2 100644 --- a/docs/working.md +++ b/docs/working.md @@ -164,9 +164,6 @@ allowing you to directly execute binaries from there. You can also use e.g. `rpm-ostree usroverlay` and then copy binaries from your host `/run/workdir` into the VM's rootfs. -(This currently only works on Fedora CoreOS which ships `9p`, not RHCOS. A future version - will use https://virtio-fs.gitlab.io/ ) - ## Using host binaries Another related trick is: diff --git a/mantle/cmd/kola/qemuexec.go b/mantle/cmd/kola/qemuexec.go index 66b0cf8133..658d9687e9 100644 --- a/mantle/cmd/kola/qemuexec.go +++ b/mantle/cmd/kola/qemuexec.go @@ -20,7 +20,6 @@ import ( "context" "fmt" "os" - "path/filepath" "strconv" "strings" @@ -91,7 +90,7 @@ func init() { cmdQemuExec.Flags().BoolVarP(&devshellConsole, "devshell-console", "c", false, "Connect directly to serial console in devshell mode") cmdQemuExec.Flags().StringVarP(&ignition, "ignition", "i", "", "Path to Ignition config") cmdQemuExec.Flags().StringVarP(&butane, "butane", "B", "", "Path to Butane config") - cmdQemuExec.Flags().StringArrayVar(&bindro, "bind-ro", nil, "Mount readonly via 9pfs a host directory (use --bind-ro=/path/to/host,/var/mnt/guest") + cmdQemuExec.Flags().StringArrayVar(&bindro, "bind-ro", nil, "Mount readonly via virtiofs a host directory (use --bind-ro=/path/to/host,/var/mnt/guest") cmdQemuExec.Flags().StringArrayVar(&bindrw, "bind-rw", nil, "Same as above, but writable") cmdQemuExec.Flags().BoolVarP(&forceConfigInjection, "inject-ignition", "", false, "Force injecting Ignition config using guestfs") cmdQemuExec.Flags().BoolVar(&propagateInitramfsFailure, "propagate-initramfs-failure", false, "Error out if the system fails in the initramfs") @@ -178,14 +177,11 @@ func runQemuExec(cmd *cobra.Command, args []string) error { ignitionFragments = append(ignitionFragments, "autologin") cpuCountHost = true usernet = true - // Can't use 9p on RHEL8, need https://virtio-fs.gitlab.io/ instead in the future - if kola.Options.CosaWorkdir != "" && !strings.HasPrefix(filepath.Base(kola.QEMUOptions.DiskImage), "rhcos") && !strings.HasPrefix(filepath.Base(kola.QEMUOptions.DiskImage), "scos") && kola.Options.Distribution != "rhcos" && kola.Options.Distribution != "scos" { - // Conservatively bind readonly to avoid anything in the guest (stray tests, whatever) - // from destroying stuff - bindro = append(bindro, fmt.Sprintf("%s,/var/mnt/workdir", kola.Options.CosaWorkdir)) - // But provide the tempdir so it's easy to pass stuff back - bindrw = append(bindrw, fmt.Sprintf("%s,/var/mnt/workdir-tmp", kola.Options.CosaWorkdir+"/tmp")) - } + // Conservatively bind readonly to avoid anything in the guest (stray tests, whatever) + // from destroying stuff + bindro = append(bindro, fmt.Sprintf("%s,/var/mnt/workdir", kola.Options.CosaWorkdir)) + // But provide the tempdir so it's easy to pass stuff back + bindrw = append(bindrw, fmt.Sprintf("%s,/var/mnt/workdir-tmp", kola.Options.CosaWorkdir+"/tmp")) if hostname == "" { hostname = devshellHostname } @@ -261,18 +257,18 @@ func runQemuExec(cmd *cobra.Command, args []string) error { if err != nil { return err } - builder.Mount9p(src, dest, true) + builder.MountVirtiofs(src, dest) ensureConfig() - config.Mount9p(dest, true) + config.MountVirtiofs(dest, true) } for _, b := range bindrw { src, dest, err := parseBindOpt(b) if err != nil { return err } - builder.Mount9p(src, dest, false) + builder.MountVirtiofs(src, dest) ensureConfig() - config.Mount9p(dest, false) + config.MountVirtiofs(dest, false) } builder.ForceConfigInjection = forceConfigInjection if len(firstbootkargs) > 0 { diff --git a/mantle/platform/conf/conf.go b/mantle/platform/conf/conf.go index 3070e65a3a..13a24636b5 100644 --- a/mantle/platform/conf/conf.go +++ b/mantle/platform/conf/conf.go @@ -1362,11 +1362,11 @@ resize_terminal() { PROMPT_COMMAND+=(resize_terminal)`, 0644) } -// Mount9p adds an Ignition config to mount an folder with 9p -func (c *Conf) Mount9p(dest string, readonly bool) { +// MountVirtiofs adds an Ignition config to mount an folder with 9p +func (c *Conf) MountVirtiofs(dest string, readonly bool) { readonlyStr := "" if readonly { - readonlyStr = ",ro" + readonlyStr = "ro" } content := fmt.Sprintf(`[Unit] DefaultDependencies=no @@ -1375,8 +1375,8 @@ Before=basic.target [Mount] What=%s Where=%s -Type=9p -Options=trans=virtio,version=9p2000.L%s,msize=10485760 +Type=virtiofs +Options=%s [Install] WantedBy=multi-user.target `, dest, dest, readonlyStr) diff --git a/mantle/platform/qemu.go b/mantle/platform/qemu.go index 305c76049a..a54602f4d7 100644 --- a/mantle/platform/qemu.go +++ b/mantle/platform/qemu.go @@ -17,7 +17,7 @@ // Why not libvirt? // Two main reasons. First, we really do want to use qemu, and not // something else. We rely on qemu features/APIs and there's a general -// assumption that the qemu process is local (e.g. we expose 9p filesystem +// assumption that the qemu process is local (e.g. we expose virtiofs filesystem // sharing). Second, libvirt runs as a daemon, but we want the // VMs "lifecycle bound" to their creating process (e.g. kola), // so that e.g. Ctrl-C (SIGINT) kills both reliably. @@ -154,11 +154,12 @@ type bootIso struct { // QemuInstance holds an instantiated VM through its lifecycle. type QemuInstance struct { - qemu exec.Cmd - architecture string - tempdir string - swtpm exec.Cmd - nbdServers []exec.Cmd + qemu exec.Cmd + architecture string + tempdir string + swtpm exec.Cmd + // Helpers are child processes such as nbd or virtiofsd that should be lifecycle bound to qemu + helpers []exec.Cmd hostForwardedPorts []HostForwardPort journalPipe *os.File @@ -296,12 +297,12 @@ func (inst *QemuInstance) Destroy() { inst.swtpm.Kill() //nolint // Ignore errors inst.swtpm = nil } - for _, nbdServ := range inst.nbdServers { - if nbdServ != nil { - nbdServ.Kill() //nolint // Ignore errors + for _, p := range inst.helpers { + if p != nil { + p.Kill() //nolint // Ignore errors } } - inst.nbdServers = nil + inst.helpers = nil if inst.tempdir != "" { if err := os.RemoveAll(inst.tempdir); err != nil { @@ -430,6 +431,11 @@ func (inst *QemuInstance) RemovePrimaryBlockDevice() (err2 error) { return nil } +type VirtiofsMount struct { + src string + tag string +} + // QemuBuilder is a configurator that can then create a qemu instance type QemuBuilder struct { // ConfigFile is a path to Ignition configuration @@ -486,7 +492,8 @@ type QemuBuilder struct { finalized bool diskID uint disks []*Disk - fs9pID uint + // virtiofs is an array of virtiofs mounts + virtiofs []VirtiofsMount // virtioSerialID is incremented for each device virtioSerialID uint // fds is file descriptors we own to pass to qemu @@ -711,16 +718,10 @@ func (builder *QemuBuilder) encryptIgnitionConfig() error { return nil } -// Mount9p sets up a mount point from the host to guest. To be replaced -// with https://virtio-fs.gitlab.io/ once it lands everywhere. -func (builder *QemuBuilder) Mount9p(source, destHint string, readonly bool) { - builder.fs9pID++ - readonlyStr := "" - if readonly { - readonlyStr = ",readonly=on" - } - builder.Append("--fsdev", fmt.Sprintf("local,id=fs%d,path=%s,security_model=mapped%s", builder.fs9pID, source, readonlyStr)) - builder.Append("-device", virtio(builder.architecture, "9p", fmt.Sprintf("fsdev=fs%d,mount_tag=%s", builder.fs9pID, destHint))) +// MountVirtiofs sets up a mount point from the host to guest. +// Note that virtiofs does not currently support read-only mounts (which is really surprising!) +func (builder *QemuBuilder) MountVirtiofs(source, dest string) { + builder.virtiofs = append(builder.virtiofs, VirtiofsMount{src: source, tag: dest}) } // supportsFwCfg if the target system supports injecting @@ -1636,7 +1637,7 @@ func (builder *QemuBuilder) Exec() (*QemuInstance, error) { if err := cmd.Start(); err != nil { return nil, errors.Wrapf(err, "spawing nbd server") } - inst.nbdServers = append(inst.nbdServers, cmd) + inst.helpers = append(inst.helpers, cmd) } } @@ -1716,6 +1717,31 @@ func (builder *QemuBuilder) Exec() (*QemuInstance, error) { return nil, err } + // Process virtiofs mounts + if len(builder.virtiofs) > 0 { + if err := builder.ensureTempdir(); err != nil { + return nil, err + } + + // Allocate a shared memory object and set up NUMA, which is needed for virtiofs + builder.Append("-object", fmt.Sprintf("memory-backend-file,id=mem,size=%dM,mem-path=/dev/shm,share=on", builder.Memory), "-numa", "node,memdev=mem") + + // We assume we're already running in a container + isolation := "none" + + for i, virtiofs := range builder.virtiofs { + virtiofsChar := fmt.Sprintf("virtiofschar%d", i) + virtiofsdSocket := filepath.Join(builder.tempdir, fmt.Sprintf("virtiofsd-%d.sock", i)) + builder.Append("-chardev", fmt.Sprintf("socket,id=%s,path=%s", virtiofsChar, virtiofsdSocket)) + builder.Append("-device", fmt.Sprintf("vhost-user-fs-pci,queue-size=1024,chardev=%s,tag=%s", virtiofsChar, virtiofs.tag)) + p := exec.Command("/usr/libexec/virtiofsd", "--sandbox", isolation, "--socket-path", virtiofsdSocket, "--shared-dir", virtiofs.src) + if err := p.Start(); err != nil { + return nil, fmt.Errorf("Failed to start virtiofsd") + } + inst.helpers = append(inst.helpers, p) + } + } + fdnum := 3 // first additional file starts at position 3 for i := range builder.fds { fdset := i + 1 // Start at 1 diff --git a/src/buildextend-legacy-oscontainer.sh b/src/buildextend-legacy-oscontainer.sh index 368a6f7b9e..fbb86a4058 100755 --- a/src/buildextend-legacy-oscontainer.sh +++ b/src/buildextend-legacy-oscontainer.sh @@ -5,10 +5,6 @@ set -euo pipefail # Start VM and call buildah final_outfile=$(realpath "$1"); shift -if [ "$(uname -m)" == 'ppc64le' ]; then - # helps with 9p 'cannot allocate memory' errors on ppc64le - COSA_SUPERMIN_MEMORY=6144 -fi IMAGE_TYPE=legacy-oscontainer prepare_build tmp_outfile=${tmp_builddir}/legacy-oscontainer.ociarchive diff --git a/src/cmd-buildextend-metal b/src/cmd-buildextend-metal index c376c07b5b..c62c2bade3 100755 --- a/src/cmd-buildextend-metal +++ b/src/cmd-buildextend-metal @@ -172,7 +172,7 @@ if jq -re '.["deploy-via-container"]' < "${image_json}"; then deploy_via_container="true" fi container_imgref=$(jq -r '.["container-imgref"]//""' < "${image_json}") -# Nowadays we pull the container across 9p rather than accessing the repo, see +# Nowadays we pull the container across virtiofs rather than accessing the repo, see # https://github.com/openshift/os/issues/594 ostree_container=ostree-unverified-image:oci-archive:$builddir/$(meta_key images.ostree.path) diff --git a/src/cmd-run b/src/cmd-run index a26b4325a2..2351516049 100755 --- a/src/cmd-run +++ b/src/cmd-run @@ -9,7 +9,7 @@ set -euo pipefail # You can exit via either exiting the login session cleanly # in SSH (ctrl-d/exit in bash), or via `poweroff`. # -# If 9p is available and this is started from a coreos-assembler +# If this is started from a coreos-assembler # working directory, the build directory will be mounted readonly # at /var/mnt/workdir, and the tmp/ directory will be read *write* # at /var/mnt/workdir-tmp. This allows you to easily exchange diff --git a/src/cmdlib.sh b/src/cmdlib.sh index 3eb82b2542..7692e6fe5e 100755 --- a/src/cmdlib.sh +++ b/src/cmdlib.sh @@ -676,7 +676,7 @@ meta_key() { } # runvm generates and runs a minimal VM which we use to "bootstrap" our build -# process. It mounts the workdir via 9p. If you need to add new packages into +# process. It mounts the workdir via virtiofs. If you need to add new packages into # the vm, update `vmdeps.txt`. # If you need to debug it, one trick is to change the `-serial file` below # into `-serial stdio`, drop the <&- and virtio-serial stuff and then e.g. add @@ -790,28 +790,16 @@ EOF esac kola_args=(kola qemuexec -m "${COSA_SUPERMIN_MEMORY:-${memory_default}}" --auto-cpus -U --workdir none \ - --console-to-file "${runvm_console}") + --console-to-file "${runvm_console}" --bind-rw "${workdir},workdir") base_qemu_args=(-drive 'if=none,id=root,format=raw,snapshot=on,file='"${vmbuilddir}"'/root,index=1' \ -device 'virtio-blk,drive=root' \ -kernel "${vmbuilddir}/kernel" -initrd "${vmbuilddir}/initrd" \ -no-reboot -nodefaults \ -device virtio-serial \ - -virtfs 'local,id=workdir,path='"${workdir}"',security_model=none,mount_tag=workdir' \ -append "root=UUID=${superminrootfsuuid} console=${DEFAULT_TERMINAL} selinux=1 enforcing=0 autorelabel=1" \ ) - # support local dev cases where src/config is a symlink. Note if you change or extend to this set, - # you also need to update supermin-init-prelude.sh to mount it inside the VM. - for maybe_symlink in "${workdir}"/{src/config,src/yumrepos,builds}; do - if [ -L "${maybe_symlink}" ]; then - # qemu follows symlinks - local bn - bn=$(basename "${maybe_symlink}") - base_qemu_args+=("-virtfs" "local,id=${bn},path=${maybe_symlink},security_model=none,mount_tag=${bn}") - fi - done - if [ -z "${RUNVM_SHELL:-}" ]; then if ! "${kola_args[@]}" -- "${base_qemu_args[@]}" \ -device virtserialport,chardev=virtioserial0,name=cosa-cmdout \ diff --git a/src/compose.sh b/src/compose.sh index 2159f23196..a1dd66e2b4 100644 --- a/src/compose.sh +++ b/src/compose.sh @@ -9,14 +9,14 @@ composejson=cache/repo/compose.json rm -rf "${repo}" ostree init --repo="${repo}" --mode=archive-z2 -# we do need to pull at least the overlay bits over 9p, but it shouldn't be that +# we do need to pull at least the overlay bits over virtiofs, but it shouldn't be that # many files ostree refs --repo tmp/repo overlay --list | \ xargs -r ostree pull-local --repo "${repo}" tmp/repo # And import commit metadata for all refs; this will bring in the previous # build if there is one. Ideally, we'd import the SELinux policy too to take # advantage of https://github.com/coreos/rpm-ostree/pull/1704 but that's yet -# more files over 9p and our pipelines don't have cache anyway (and devs likely +# more files over virtiofs and our pipelines don't have cache anyway (and devs likely # use the privileged path). ostree refs --repo tmp/repo | \ xargs -r ostree pull-local --commit-metadata-only --repo "${repo}" tmp/repo diff --git a/src/create_disk.sh b/src/create_disk.sh index 090b5e7470..0ac8a549df 100755 --- a/src/create_disk.sh +++ b/src/create_disk.sh @@ -77,7 +77,7 @@ if [ -z "$platforms_json" ]; then echo "Missing --platforms-json" >&2 exit 1 fi -# just copy it over to /tmp and work from there to minimize 9p I/O +# just copy it over to /tmp and work from there to minimize virtiofs I/O cp "${platforms_json}" /tmp/platforms.json platforms_json=/tmp/platforms.json platform_grub_cmds=$(jq -r ".${arch}.${platform}.grub_commands // [] | join(\"\\\\n\")" < "${platforms_json}") diff --git a/src/deps.txt b/src/deps.txt index 53e3e8111f..bb5d64e85a 100644 --- a/src/deps.txt +++ b/src/deps.txt @@ -29,7 +29,7 @@ genisoimage make git rpm-build # virt dependencies -libguestfs-tools libguestfs-tools-c /usr/bin/qemu-img qemu-kvm swtpm +libguestfs-tools libguestfs-tools-c virtiofsd /usr/bin/qemu-img qemu-kvm swtpm # And the main arch emulators for cross-arch testing qemu-system-aarch64-core qemu-system-ppc-core qemu-system-s390x-core qemu-system-x86-core diff --git a/src/supermin-init-prelude.sh b/src/supermin-init-prelude.sh index 0ef11ca7a5..fc6bf1d2d8 100644 --- a/src/supermin-init-prelude.sh +++ b/src/supermin-init-prelude.sh @@ -15,8 +15,6 @@ mount -t tmpfs tmpfs /dev/shm # load selinux policy LANG=C /sbin/load_policy -i -# load kernel module for 9pnet_virtio for 9pfs mount -/sbin/modprobe 9pnet_virtio # need fuse module for rofiles-fuse/bwrap during post scripts run /sbin/modprobe fuse @@ -36,19 +34,9 @@ fi umask 002 # set up workdir -# For 9p mounts set msize to 100MiB # https://github.com/coreos/coreos-assembler/issues/2171 mkdir -p "${workdir:?}" -mount -t 9p -o rw,trans=virtio,version=9p2000.L,msize=10485760 workdir "${workdir}" - -# This loop pairs with virtfs setups for qemu in cmdlib.sh. Keep them in sync. -for maybe_symlink in "${workdir}"/{src/config,src/yumrepos,builds}; do - if [ -L "${maybe_symlink}" ]; then - bn=$(basename "${maybe_symlink}") - mkdir -p "$(readlink "${maybe_symlink}")" - mount -t 9p -o rw,trans=virtio,version=9p2000.L,msize=10485760 "${bn}" "${maybe_symlink}" - fi -done +mount -t virtiofs -o rw workdir "${workdir}" mkdir -p "${workdir}"/cache cachedev=$(blkid -lt LABEL=cosa-cache -o device || true)