diff --git a/mantle/cmd/kola/qemuexec.go b/mantle/cmd/kola/qemuexec.go index d3a19431db..72f957d9a8 100644 --- a/mantle/cmd/kola/qemuexec.go +++ b/mantle/cmd/kola/qemuexec.go @@ -54,8 +54,9 @@ var ( bindro []string bindrw []string - directIgnition bool - forceConfigInjection bool + directIgnition bool + forceConfigInjection bool + propagateInitramfsFailure bool ) func init() { @@ -72,6 +73,8 @@ func init() { cmdQemuExec.Flags().StringArrayVar(&bindro, "bind-ro", nil, "Mount readonly via 9pfs a host directory (use --bind-ro=/path/to/host,/var/mnt/guest") cmdQemuExec.Flags().StringArrayVar(&bindrw, "bind-rw", nil, "Same as above, but writable") cmdQemuExec.Flags().BoolVarP(&forceConfigInjection, "inject-ignition", "", false, "Force injecting Ignition config using guestfs") + cmdQemuExec.Flags().BoolVar(&propagateInitramfsFailure, "propagate-initramfs-failure", false, "Error out if the system fails in the initramfs") + } func renderFragments(config v3types.Config) (*v3types.Config, error) { @@ -202,8 +205,13 @@ func runQemuExec(cmd *cobra.Command, args []string) error { } defer inst.Destroy() - // Ignore errors - _ = inst.Wait() - - return nil + if propagateInitramfsFailure { + err = inst.WaitAll() + if err != nil { + return err + } + return nil + } else { + return inst.Wait() + } } diff --git a/mantle/cmd/kola/testiso.go b/mantle/cmd/kola/testiso.go index 5455ea873d..beaaaf1059 100644 --- a/mantle/cmd/kola/testiso.go +++ b/mantle/cmd/kola/testiso.go @@ -193,7 +193,7 @@ func runTestIso(cmd *cobra.Command, args []string) error { func awaitCompletion(inst *platform.QemuInstance, qchan *os.File, expected []string) error { errchan := make(chan error) go func() { - if err := inst.Wait(); err != nil { + if err := inst.WaitAll(); err != nil { errchan <- err } time.Sleep(1 * time.Minute) diff --git a/mantle/kola/tests/ignition/qemufailure.go b/mantle/kola/tests/ignition/qemufailure.go new file mode 100644 index 0000000000..e2c6c18eb2 --- /dev/null +++ b/mantle/kola/tests/ignition/qemufailure.go @@ -0,0 +1,105 @@ +// Copyright 2020 Red Hat, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package ignition + +import ( + "fmt" + "os" + "time" + + ignv3types "github.com/coreos/ignition/v2/config/v3_0/types" + "github.com/pkg/errors" + + "github.com/coreos/mantle/kola" + "github.com/coreos/mantle/kola/cluster" + "github.com/coreos/mantle/kola/register" + "github.com/coreos/mantle/platform" + "github.com/coreos/mantle/util" +) + +func init() { + register.RegisterTest(®ister.Test{ + Name: "coreos.ignition.failure", + Run: runIgnitionFailure, + ClusterSize: 0, + Platforms: []string{"qemu-unpriv"}, + }) +} + +func runIgnitionFailure(c cluster.TestCluster) { + if err := ignitionFailure(c); err != nil { + c.Fatal(err.Error()) + } +} + +func ignitionFailure(c cluster.TestCluster) error { + // TODO remove this once the feature lands + c.H.Skip("Needs https://github.com/coreos/ignition-dracut/pull/146") + // We can't create files in / due to the immutable bit OSTree creates, so + // this is a convenient way to test Ignition failure. + failConfig := ignv3types.Config{ + Ignition: ignv3types.Ignition{ + Version: "3.0.0", + }, + Storage: ignv3types.Storage{ + Files: []ignv3types.File{ + { + Node: ignv3types.Node{ + Path: "/notwritable.txt", + }, + FileEmbedded1: ignv3types.FileEmbedded1{ + Contents: ignv3types.FileContents{ + Source: util.StrToPtr("data:,hello%20world%0A"), + }, + Mode: util.IntToPtr(420), + }, + }, + }, + }, + } + builder := platform.NewBuilder() + builder.SetConfig(failConfig, kola.Options.IgnitionVersion == "v2") + builder.AddPrimaryDisk(&platform.Disk{ + BackingFile: kola.QEMUOptions.DiskImage, + }) + builder.Memory = 1024 + inst, err := builder.Exec() + if err != nil { + return err + } + defer inst.Destroy() + errchan := make(chan error) + go func() { + err := inst.WaitAll() + if err == nil { + err = fmt.Errorf("Ignition unexpectedly succeeded") + } else if err == platform.ErrInitramfsEmergency { + // The expected case + err = nil + } else { + err = errors.Wrapf(err, "expected initramfs emergency.target error") + } + errchan <- err + }() + go func() { + time.Sleep(2 * time.Minute) + proc := os.Process{ + Pid: inst.Pid(), + } + proc.Kill() + errchan <- errors.New("timed out waiting for initramfs error") + }() + return <-errchan +} diff --git a/mantle/platform/machine/aws/machine.go b/mantle/platform/machine/aws/machine.go index f97705d7a4..e178917605 100644 --- a/mantle/platform/machine/aws/machine.go +++ b/mantle/platform/machine/aws/machine.go @@ -64,6 +64,10 @@ func (am *machine) SSH(cmd string) ([]byte, []byte, error) { return am.cluster.SSH(am, cmd) } +func (am *machine) IgnitionError() error { + return nil +} + func (am *machine) Reboot() error { return platform.RebootMachine(am, am.journal) } diff --git a/mantle/platform/machine/azure/machine.go b/mantle/platform/machine/azure/machine.go index b9ef0002dd..3d58847a41 100644 --- a/mantle/platform/machine/azure/machine.go +++ b/mantle/platform/machine/azure/machine.go @@ -74,6 +74,10 @@ func (am *machine) SSH(cmd string) ([]byte, []byte, error) { return am.cluster.SSH(am, cmd) } +func (am *machine) IgnitionError() error { + return nil +} + // Re-fetch the Public & Private IP address for the event that it's changed during the reboot func (am *machine) refetchIPs() error { var err error diff --git a/mantle/platform/machine/do/machine.go b/mantle/platform/machine/do/machine.go index beff9df18f..24e413e609 100644 --- a/mantle/platform/machine/do/machine.go +++ b/mantle/platform/machine/do/machine.go @@ -61,6 +61,10 @@ func (dm *machine) SSH(cmd string) ([]byte, []byte, error) { return dm.cluster.SSH(dm, cmd) } +func (dm *machine) IgnitionError() error { + return nil +} + func (dm *machine) Reboot() error { return platform.RebootMachine(dm, dm.journal) } diff --git a/mantle/platform/machine/esx/machine.go b/mantle/platform/machine/esx/machine.go index 793fff998d..b6811c0adb 100644 --- a/mantle/platform/machine/esx/machine.go +++ b/mantle/platform/machine/esx/machine.go @@ -62,6 +62,10 @@ func (em *machine) SSH(cmd string) ([]byte, []byte, error) { return em.cluster.SSH(em, cmd) } +func (em *machine) IgnitionError() error { + return nil +} + func (em *machine) Reboot() error { return platform.RebootMachine(em, em.journal) } diff --git a/mantle/platform/machine/gcloud/machine.go b/mantle/platform/machine/gcloud/machine.go index d57bcc7c8f..89f32d8358 100644 --- a/mantle/platform/machine/gcloud/machine.go +++ b/mantle/platform/machine/gcloud/machine.go @@ -62,6 +62,10 @@ func (gm *machine) SSH(cmd string) ([]byte, []byte, error) { return gm.gc.SSH(gm, cmd) } +func (gm *machine) IgnitionError() error { + return nil +} + func (gm *machine) Reboot() error { return platform.RebootMachine(gm, gm.journal) } diff --git a/mantle/platform/machine/openstack/machine.go b/mantle/platform/machine/openstack/machine.go index 7d19b47062..33655251ad 100644 --- a/mantle/platform/machine/openstack/machine.go +++ b/mantle/platform/machine/openstack/machine.go @@ -83,6 +83,10 @@ func (om *machine) SSH(cmd string) ([]byte, []byte, error) { return om.cluster.SSH(om, cmd) } +func (om *machine) IgnitionError() error { + return nil +} + func (om *machine) Reboot() error { return platform.RebootMachine(om, om.journal) } diff --git a/mantle/platform/machine/packet/machine.go b/mantle/platform/machine/packet/machine.go index 10768b1300..de6bb1384f 100644 --- a/mantle/platform/machine/packet/machine.go +++ b/mantle/platform/machine/packet/machine.go @@ -61,6 +61,10 @@ func (pm *machine) SSH(cmd string) ([]byte, []byte, error) { return pm.cluster.SSH(pm, cmd) } +func (pm *machine) IgnitionError() error { + return nil +} + func (pm *machine) Reboot() error { return platform.RebootMachine(pm, pm.journal) } diff --git a/mantle/platform/machine/unprivqemu/machine.go b/mantle/platform/machine/unprivqemu/machine.go index 7fb8a9b093..d9aeeec2b5 100644 --- a/mantle/platform/machine/unprivqemu/machine.go +++ b/mantle/platform/machine/unprivqemu/machine.go @@ -61,6 +61,15 @@ func (m *machine) SSH(cmd string) ([]byte, []byte, error) { return m.qc.SSH(m, cmd) } +func (m *machine) IgnitionError() error { + _, err := m.inst.WaitIgnitionError() + if err != nil { + return err + } + // TODO render buf + return platform.ErrInitramfsEmergency +} + func (m *machine) Reboot() error { return platform.RebootMachine(m, m.journal) } diff --git a/mantle/platform/platform.go b/mantle/platform/platform.go index a723d0a447..aca335fc3a 100644 --- a/mantle/platform/platform.go +++ b/mantle/platform/platform.go @@ -51,6 +51,9 @@ type Machine interface { // ID returns the plaform-specific machine identifier. ID() string + // IgnitionError returns an error if the machine failed in Ignition + IgnitionError() error + // IP returns the machine's public IP. IP() string diff --git a/mantle/platform/qemu.go b/mantle/platform/qemu.go index b50fc4ab66..3a07bd9e4a 100644 --- a/mantle/platform/qemu.go +++ b/mantle/platform/qemu.go @@ -15,6 +15,7 @@ package platform import ( + "bufio" "encoding/json" "fmt" "io" @@ -37,6 +38,10 @@ import ( "github.com/pkg/errors" ) +var ( + ErrInitramfsEmergency = errors.New("entered emergency.target in initramfs") +) + type MachineOptions struct { AdditionalDisks []Disk } @@ -63,6 +68,8 @@ type QemuInstance struct { swtpm exec.Cmd tmpFiles []string nbdServers []exec.Cmd + + journalPipe *os.File } func (inst *QemuInstance) Pid() int { @@ -127,6 +134,7 @@ func (inst *QemuInstance) SSHAddress() (string, error) { return "", fmt.Errorf("didn't find an address") } +// Wait for the qemu process to exit func (inst *QemuInstance) Wait() error { if inst.qemu != nil { err := inst.qemu.Wait() @@ -136,10 +144,69 @@ func (inst *QemuInstance) Wait() error { return nil } +// WaitIgnitionError will only return if the instance +// failed inside the initramfs. The resulting string will +// be a newline-delimited stream of JSON strings, as returned +// by `journalctl -o json`. +func (inst *QemuInstance) WaitIgnitionError() (string, error) { + b := bufio.NewReaderSize(inst.journalPipe, 64768) + var r strings.Builder + iscorrupted := false + _, err := b.Peek(1) + if err != nil { + if err == io.EOF { + return "", nil + } + return "", errors.Wrapf(err, "Reading from journal") + } + for { + line, prefix, err := b.ReadLine() + if err != nil { + return r.String(), errors.Wrapf(err, "Reading from journal channel") + } + if prefix { + iscorrupted = true + } + if len(line) == 0 || string(line) == "{}" { + break + } + r.Write(line) + } + if iscorrupted { + return r.String(), fmt.Errorf("journal was truncated due to overly long line") + } + return r.String(), nil +} + +// WaitAll wraps the process exit as well as WaitIgnitionError, +// returning an error if either fail. +func (inst *QemuInstance) WaitAll() error { + c := make(chan error) + go func() { + buf, err := inst.WaitIgnitionError() + if err != nil { + c <- err + } else { + // TODO parse buf and try to nicely render something + if buf != "" { + c <- ErrInitramfsEmergency + } + } + }() + go func() { + c <- inst.Wait() + }() + return <-c +} + func (inst *QemuInstance) Destroy() { for _, fN := range inst.tmpFiles { os.RemoveAll(fN) } + if inst.journalPipe != nil { + inst.journalPipe.Close() + inst.journalPipe = nil + } // check if qemu is dead before trying to kill it if inst.qemu != nil { if err := inst.qemu.Kill(); err != nil { @@ -885,6 +952,17 @@ func (builder *QemuBuilder) Exec() (*QemuInstance, error) { "-tpmdev", "emulator,id=tpm0,chardev=chrtpm", "-device", "tpm-tis,tpmdev=tpm0") } + // Set up the virtio channel to get Ignition failures by default + journalPipeR, journalPipeW, err := os.Pipe() + if err != nil { + return nil, errors.Wrapf(err, "creating journal pipe") + } + inst.journalPipe = journalPipeR + argv = append(argv, "-device", "virtio-serial") + // https://www.redhat.com/archives/libvir-list/2015-December/msg00305.html + argv = append(argv, "-chardev", fmt.Sprintf("file,id=ignition-dracut,path=%s,append=on", builder.AddFd(journalPipeW))) + argv = append(argv, "-device", "virtserialport,chardev=ignition-dracut,name=com.coreos.ignition.journal") + fdnum := 3 // first additional file starts at position 3 for i, _ := range builder.fds { fdset := i + 1 // Start at 1 diff --git a/mantle/platform/util.go b/mantle/platform/util.go index f7c97dd272..4b48d37877 100644 --- a/mantle/platform/util.go +++ b/mantle/platform/util.go @@ -183,7 +183,19 @@ func StartMachineAfterReboot(m Machine, j *Journal, oldBootId string) error { // StartMachine will start a given machine, provided the machine's journal. func StartMachine(m Machine, j *Journal) error { - return StartMachineAfterReboot(m, j, "") + errchan := make(chan error) + go func() { + err := m.IgnitionError() + if err != nil { + plog.Infof("machine %s entered emergency.target in initramfs: %v", m.ID(), err) + errchan <- err + } + }() + go func() { + // This one ends up connecting to the journal via ssh + errchan <- StartMachineAfterReboot(m, j, "") + }() + return <-errchan } func GetMachineBootId(m Machine) (string, error) {