Skip to content

Commit

Permalink
qemu: use x-ignore-shared to implement vm template
Browse files Browse the repository at this point in the history
qemu upstream has x-ignore-shared that works similar
to our private bypass-shared-memory. We can use it to
implement the vm template feature.

Fixes: kata-containers#1798
Depends-on: github.com/kata-containers/packaging#641
Signed-off-by: Peng Tao <[email protected]>
  • Loading branch information
bergwolf authored and Eric Ernst committed Jul 18, 2019
1 parent c192343 commit 0e873b6
Show file tree
Hide file tree
Showing 7 changed files with 189 additions and 91 deletions.
206 changes: 130 additions & 76 deletions virtcontainers/qemu.go
Original file line number Diff line number Diff line change
Expand Up @@ -421,8 +421,7 @@ func (q *qemu) setupTemplate(knobs *govmmQemu.Knobs, memory *govmmQemu.Memory) g
}

if q.config.BootFromTemplate {
incoming.MigrationType = govmmQemu.MigrationExec
incoming.Exec = "cat " + q.config.DevicesStatePath
incoming.MigrationType = govmmQemu.MigrationDefer
}
}

Expand Down Expand Up @@ -585,6 +584,98 @@ func (q *qemu) vhostFSSocketPath(id string) (string, error) {
return utils.BuildSocketPath(store.RunVMStoragePath, id, vhostFSSocket)
}

func (q *qemu) virtiofsdArgs(sockPath string) []string {
// The daemon will terminate when the vhost-user socket
// connection with QEMU closes. Therefore we do not keep track
// of this child process after returning from this function.
sourcePath := filepath.Join(kataHostSharedDir, q.id)
args := []string{
"-o", "vhost_user_socket=" + sockPath,
"-o", "source=" + sourcePath,
"-o", "cache=" + q.config.VirtioFSCache}
if q.config.Debug {
args = append(args, "-d")
} else {
args = append(args, "-f")
}

return args
}

func (q *qemu) setupVirtiofsd(timeout int) (remain int, err error) {
sockPath, err := q.vhostFSSocketPath(q.id)
if err != nil {
return 0, err
}

cmd := exec.Command(q.config.VirtioFSDaemon, q.virtiofsdArgs(sockPath)...)
stderr, err := cmd.StderrPipe()
if err != nil {
return 0, err
}

if err = cmd.Start(); err != nil {
return 0, err
}
defer func() {
if err != nil {
cmd.Process.Kill()
}
}()

// Wait for socket to become available
sockReady := make(chan error, 1)
timeStart := time.Now()
go func() {
scanner := bufio.NewScanner(stderr)
var sent bool
for scanner.Scan() {
if q.config.Debug {
q.Logger().WithField("source", "virtiofsd").Debug(scanner.Text())
}
if !sent && strings.Contains(scanner.Text(), "Waiting for vhost-user socket connection...") {
sockReady <- nil
sent = true
}
}
if !sent {
if err := scanner.Err(); err != nil {
sockReady <- err
} else {
sockReady <- fmt.Errorf("virtiofsd did not announce socket connection")
}
}
q.Logger().Info("virtiofsd quits")
q.stopSandbox()
}()

return q.waitVirtiofsd(timeStart, timeout, sockReady,
fmt.Sprintf("virtiofsd (pid=%d) socket %s", cmd.Process.Pid, sockPath))
}

func (q *qemu) waitVirtiofsd(start time.Time, timeout int, ready chan error, errMsg string) (int, error) {
var err error

timeoutDuration := time.Duration(timeout) * time.Second
select {
case err = <-ready:
case <-time.After(timeoutDuration):
err = fmt.Errorf("timed out waiting for %s", errMsg)
}
if err != nil {
return 0, err
}

// Now reduce timeout by the elapsed time
elapsed := time.Since(start)
if elapsed < timeoutDuration {
timeout = timeout - int(elapsed.Seconds())
} else {
timeout = 0
}
return timeout, nil
}

// startSandbox will start the Sandbox's VM.
func (q *qemu) startSandbox(timeout int) error {
span, _ := q.trace("startSandbox")
Expand Down Expand Up @@ -625,81 +716,10 @@ func (q *qemu) startSandbox(timeout int) error {
}()

if q.config.SharedFS == config.VirtioFS {
sockPath, err := q.vhostFSSocketPath(q.id)
timeout, err = q.setupVirtiofsd(timeout)
if err != nil {
return err
}

// The daemon will terminate when the vhost-user socket
// connection with QEMU closes. Therefore we do not keep track
// of this child process after returning from this function.
sourcePath := filepath.Join(kataHostSharedDir, q.id)
args := []string{
"-o", "vhost_user_socket=" + sockPath,
"-o", "source=" + sourcePath,
"-o", "cache=" + q.config.VirtioFSCache}
if q.config.Debug {
args = append(args, "-d")
} else {
args = append(args, "-f")
}
cmd := exec.Command(q.config.VirtioFSDaemon, args...)
stderr, err := cmd.StderrPipe()
if err != nil {
return err
}

if err = cmd.Start(); err != nil {
return err
}
defer func() {
if err != nil {
cmd.Process.Kill()
}
}()

// Wait for socket to become available
sockReady := make(chan error, 1)
timeStart := time.Now()
go func() {
scanner := bufio.NewScanner(stderr)
var sent bool
for scanner.Scan() {
if q.config.Debug {
q.Logger().WithField("source", "virtiofsd").Debug(scanner.Text())
}
if !sent && strings.Contains(scanner.Text(), "Waiting for vhost-user socket connection...") {
sockReady <- nil
sent = true
}
}
if !sent {
if err := scanner.Err(); err != nil {
sockReady <- err
} else {
sockReady <- fmt.Errorf("virtiofsd did not announce socket connection")
}
}
q.Logger().Info("virtiofsd quits")
q.stopSandbox()
}()
timeoutDuration := time.Duration(timeout) * time.Second
select {
case err = <-sockReady:
case <-time.After(timeoutDuration):
err = fmt.Errorf("timed out waiting for virtiofsd (pid=%d) socket %s", cmd.Process.Pid, sockPath)
}
if err != nil {
return err
}

// Now reduce timeout by the elapsed time
elapsed := time.Since(timeStart)
if elapsed < timeoutDuration {
timeout = timeout - int(elapsed.Seconds())
} else {
timeout = 0
}
}

var strErr string
Expand All @@ -709,9 +729,39 @@ func (q *qemu) startSandbox(timeout int) error {
}

err = q.waitSandbox(timeout) // the virtiofsd deferred checks err's value
if err != nil {
return err
}

if q.config.BootFromTemplate {
if err = q.bootFromTemplate(); err != nil {
return err
}
}

return err
}

func (q *qemu) bootFromTemplate() error {
err := q.qmpSetup()
if err != nil {
return err
}
defer q.qmpShutdown()

err = q.arch.setIgnoreSharedMemoryMigrationCaps(q.qmpMonitorCh.ctx, q.qmpMonitorCh.qmp)
if err != nil {
q.Logger().WithError(err).Error("set migration ignore shared memory")
return err
}
uri := fmt.Sprintf("exec:cat %s", q.config.DevicesStatePath)
err = q.qmpMonitorCh.qmp.ExecuteMigrationIncoming(q.qmpMonitorCh.ctx, uri)
if err != nil {
return err
}
return q.waitMigration()
}

// waitSandbox will wait for the Sandbox's VM to be up and running.
func (q *qemu) waitSandbox(timeout int) error {
span, _ := q.trace("waitSandbox")
Expand Down Expand Up @@ -1498,9 +1548,9 @@ func (q *qemu) saveSandbox() error {
// BootToBeTemplate sets the VM to be a template that other VMs can clone from. We would want to
// bypass shared memory when saving the VM to a local file through migration exec.
if q.config.BootToBeTemplate {
err := q.arch.setBypassSharedMemoryMigrationCaps(q.qmpMonitorCh.ctx, q.qmpMonitorCh.qmp)
err := q.arch.setIgnoreSharedMemoryMigrationCaps(q.qmpMonitorCh.ctx, q.qmpMonitorCh.qmp)
if err != nil {
q.Logger().WithError(err).Error("set migration bypass shared memory")
q.Logger().WithError(err).Error("set migration ignore shared memory")
return err
}
}
Expand All @@ -1511,6 +1561,10 @@ func (q *qemu) saveSandbox() error {
return err
}

return q.waitMigration()
}

func (q *qemu) waitMigration() error {
t := time.NewTimer(qmpMigrationWaitTimeout)
defer t.Stop()
for {
Expand Down
2 changes: 0 additions & 2 deletions virtcontainers/qemu_amd64.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,6 @@ const defaultQemuMachineType = QemuPC

const defaultQemuMachineOptions = "accel=kvm,kernel_irqchip,nvdimm"

const qmpCapMigrationBypassSharedMemory = "bypass-shared-memory"

const qmpMigrationWaitTimeout = 5 * time.Second

var qemuPaths = map[string]string{
Expand Down
10 changes: 6 additions & 4 deletions virtcontainers/qemu_arch_base.go
Original file line number Diff line number Diff line change
Expand Up @@ -102,8 +102,8 @@ type qemuArch interface {
// supportGuestMemoryHotplug returns if the guest supports memory hotplug
supportGuestMemoryHotplug() bool

// setBypassSharedMemoryMigrationCaps set bypass-shared-memory capability for migration
setBypassSharedMemoryMigrationCaps(context.Context, *govmmQemu.QMP) error
// setIgnoreSharedMemoryMigrationCaps set bypass-shared-memory capability for migration
setIgnoreSharedMemoryMigrationCaps(context.Context, *govmmQemu.QMP) error
}

type qemuArchBase struct {
Expand Down Expand Up @@ -153,6 +153,8 @@ const (

// QemuCCWVirtio is a QEMU virt machine type for for s390x
QemuCCWVirtio = "s390-ccw-virtio"

qmpCapMigrationIgnoreShared = "x-ignore-shared"
)

// kernelParamsNonDebug is a list of the default kernel
Expand Down Expand Up @@ -579,10 +581,10 @@ func (q *qemuArchBase) supportGuestMemoryHotplug() bool {
return true
}

func (q *qemuArchBase) setBypassSharedMemoryMigrationCaps(ctx context.Context, qmp *govmmQemu.QMP) error {
func (q *qemuArchBase) setIgnoreSharedMemoryMigrationCaps(ctx context.Context, qmp *govmmQemu.QMP) error {
err := qmp.ExecSetMigrationCaps(ctx, []map[string]interface{}{
{
"capability": qmpCapMigrationBypassSharedMemory,
"capability": qmpCapMigrationIgnoreShared,
"state": true,
},
})
Expand Down
6 changes: 2 additions & 4 deletions virtcontainers/qemu_arm64.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,6 @@ const defaultQemuMachineType = QemuVirt

const qmpMigrationWaitTimeout = 10 * time.Second

const qmpCapMigrationBypassSharedMemory = "bypass-shared-memory"

var defaultQemuMachineOptions = "usb=off,accel=kvm,nvdimm,gic-version=" + getGuestGICVersion()

var qemuPaths = map[string]string{
Expand Down Expand Up @@ -199,7 +197,7 @@ func (q *qemuArm64) appendImage(devices []govmmQemu.Device, path string) ([]govm
return devices, nil
}

func (q *qemuArm64) setBypassSharedMemoryMigrationCaps(_ context.Context, _ *govmmQemu.QMP) error {
// bypass-shared-memory not support in arm64 for now
func (q *qemuArm64) setIgnoreSharedMemoryMigrationCaps(_ context.Context, _ *govmmQemu.QMP) error {
// x-ignore-shared not support in arm64 for now
return nil
}
2 changes: 0 additions & 2 deletions virtcontainers/qemu_ppc64le.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,6 @@ const defaultQemuMachineOptions = "accel=kvm,usb=off,cap-cfpc=broken,cap-sbbc=br

const defaultMemMaxPPC64le = 32256 // Restrict MemMax to 32Gb on PPC64le

const qmpCapMigrationBypassSharedMemory = "bypass-shared-memory"

const qmpMigrationWaitTimeout = 5 * time.Second

var qemuPaths = map[string]string{
Expand Down
5 changes: 2 additions & 3 deletions virtcontainers/qemu_s390x.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,11 @@ package virtcontainers

import (
"fmt"
"time"

govmmQemu "github.com/intel/govmm/qemu"
"github.com/kata-containers/runtime/virtcontainers/device/config"
"github.com/kata-containers/runtime/virtcontainers/types"
"time"
)

type qemuS390x struct {
Expand All @@ -26,8 +27,6 @@ const defaultQemuMachineOptions = "accel=kvm"

const virtioSerialCCW = "virtio-serial-ccw"

const qmpCapMigrationBypassSharedMemory = "bypass-shared-memory"

const qmpMigrationWaitTimeout = 5 * time.Second

var qemuPaths = map[string]string{
Expand Down
Loading

0 comments on commit 0e873b6

Please sign in to comment.