Skip to content
This repository has been archived by the owner on May 7, 2021. It is now read-only.

Commit

Permalink
kola: Add --max-machines
Browse files Browse the repository at this point in the history
This is only implemented for qemu at the moment, though it'd
be a mostly mechanical change to propagate it to the other
providers.

For our pipeline testing, we need to have a hard cap on the number
of qemu instances we spawn, otherwise we can go over the RAM
allocated to the pod.

Actually the FCOS pipeline today doesn't impose a hard cap, and
my test pipeline in the coreosci (nested GCP virt) ended up bringing
down the node via the OOM killer.

There were a few bugs here; first we were leaking the spawned
qemu instance.  We also need to invoke `Wait()` synchronously in
destruction.

Then, add a dependency on the `golang/x/semaphore` library, and
use it to implement a max limit.

Closes: https://github.com/coreos/mantle/issues/1157
  • Loading branch information
cgwalters committed Jan 14, 2020
1 parent ebb3426 commit f1a758f
Show file tree
Hide file tree
Showing 14 changed files with 244 additions and 12 deletions.
5 changes: 5 additions & 0 deletions cmd/kola/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ func init() {
root.PersistentFlags().StringVarP(&kolaPlatform, "platform", "p", "qemu", "VM platform: "+strings.Join(kolaPlatforms, ", "))
root.PersistentFlags().StringVarP(&kola.Options.Distribution, "distro", "b", kolaDistros[0], "Distribution: "+strings.Join(kolaDistros, ", "))
root.PersistentFlags().IntVarP(&kola.TestParallelism, "parallel", "j", 1, "number of tests to run in parallel")
root.PersistentFlags().IntVarP(&kola.Options.MaxMachines, "max-machines", "", -1, "Maximum number of machines (-1 = 'same as parallel', 0 = unlimited)")
sv(&kola.TAPFile, "tapfile", "", "file to write TAP results to")
root.PersistentFlags().BoolVarP(&kola.Options.NoTestExitError, "no-test-exit-error", "T", false, "Don't exit with non-zero if tests fail")
sv(&kola.Options.BaseName, "basename", "kola", "Cluster name prefix")
Expand Down Expand Up @@ -176,6 +177,10 @@ func syncOptions() error {
return fmt.Errorf("unsupport board %q", kola.QEMUOptions.Board)
}

if kola.Options.MaxMachines == -1 {
kola.Options.MaxMachines = kola.TestParallelism
}

if kola.QEMUOptions.DiskImage == "" {
kola.QEMUOptions.DiskImage = image
}
Expand Down
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ require (
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2
golang.org/x/net v0.0.0-20190311183353-d8887717615a
golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45
golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e
golang.org/x/sys v0.0.0-20190610200419-93c9922d18ae
golang.org/x/text v0.3.2
google.golang.org/api v0.1.0
Expand Down
3 changes: 3 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,10 @@ golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45 h1:SVwTIAaPC2U/AvvLNZ2a7O
golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4 h1:YUO/7uOKsKeq9UokNS62b8FYywz3ker1l1vDZRCRefw=
golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e h1:vcxGaoTs7kV8m5Np9uUNQin4BrLOthgV7252N8V+FwY=
golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
Expand Down
37 changes: 31 additions & 6 deletions platform/flight.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,13 @@
package platform

import (
"context"
"fmt"
"sync"

"github.com/pborman/uuid"
"golang.org/x/crypto/ssh/agent"
"golang.org/x/sync/semaphore"

"github.com/coreos/mantle/network"
)
Expand All @@ -29,6 +31,8 @@ type BaseFlight struct {
clusterlock sync.Mutex
clustermap map[string]Cluster

machineLock *semaphore.Weighted

name string
platform Name
ctPlatform string
Expand All @@ -47,13 +51,19 @@ func NewBaseFlightWithDialer(opts *Options, platform Name, ctPlatform string, di
return nil, err
}

var machineLock *semaphore.Weighted
if opts.MaxMachines != 0 {
machineLock = semaphore.NewWeighted(int64(opts.MaxMachines))
}

bf := &BaseFlight{
clustermap: make(map[string]Cluster),
name: fmt.Sprintf("%s-%s", opts.BaseName, uuid.New()),
platform: platform,
ctPlatform: ctPlatform,
baseopts: opts,
agent: agent,
clustermap: make(map[string]Cluster),
name: fmt.Sprintf("%s-%s", opts.BaseName, uuid.New()),
machineLock: machineLock,
platform: platform,
ctPlatform: ctPlatform,
baseopts: opts,
agent: agent,
}

return bf, nil
Expand Down Expand Up @@ -89,6 +99,21 @@ func (bf *BaseFlight) DelCluster(c Cluster) {
delete(bf.clustermap, c.Name())
}

func (bf *BaseFlight) AcquireMachineReservation() {
if bf.machineLock == nil {
return
}
bf.machineLock.Acquire(context.TODO(), 1)
}

func (bf *BaseFlight) ReleaseMachineReservation() {
if bf.machineLock == nil {
return
}
bf.machineLock.Release(1)

}

func (bf *BaseFlight) Keys() ([]*agent.Key, error) {
return bf.agent.List()
}
Expand Down
10 changes: 9 additions & 1 deletion platform/machine/unprivqemu/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -110,10 +110,18 @@ func (qc *Cluster) NewMachineWithOptions(userdata *conf.UserData, options platfo
}
builder.EnableUsermodeNetworking(22)

qc.flight.AcquireMachineReservation()
destroyMachine := true
defer func() {
if destroyMachine {
qm.Destroy()
}
}()
inst, err := builder.Exec()
if err != nil {
return nil, err
}
qm.inst = *inst

err = util.Retry(6, 5*time.Second, func() error {
var err error
Expand All @@ -128,10 +136,10 @@ func (qc *Cluster) NewMachineWithOptions(userdata *conf.UserData, options platfo
}

if err := platform.StartMachine(qm, qm.journal); err != nil {
qm.Destroy()
return nil, err
}

destroyMachine = false
qc.AddMach(qm)

return qm, nil
Expand Down
1 change: 1 addition & 0 deletions platform/machine/unprivqemu/machine.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ func (m *machine) Destroy() {
}

m.qc.DelMach(m)
m.qc.flight.ReleaseMachineReservation()
}

func (m *machine) ConsoleOutput() string {
Expand Down
2 changes: 2 additions & 0 deletions platform/platform.go
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,8 @@ type Options struct {

NoTestExitError bool

MaxMachines int

// OSContainer is an image pull spec that can be given to the pivot service
// in RHCOS machines to perform machine content upgrades.
// When specified additional files & units will be automatically generated
Expand Down
13 changes: 8 additions & 5 deletions platform/qemu.go
Original file line number Diff line number Diff line change
Expand Up @@ -111,19 +111,22 @@ func (inst *QemuInstance) SSHAddress() (string, error) {
}

func (inst *QemuInstance) Destroy() {
if inst.qemu != nil {
if err := inst.qemu.Kill(); err != nil {
plog.Errorf("Error killing qemu instance %v: %v", inst.Pid(), err)
}
inst.qemu.Wait() // Ignore errors
}
if inst.swtpmTmpd != "" {
if inst.swtpm != nil {
inst.swtpm.Kill() // Ignore errors
}
// And ensure it's cleaned up
inst.swtpm.Wait()
if err := os.RemoveAll(inst.swtpmTmpd); err != nil {
plog.Errorf("Error removing swtpm dir: %v", err)
}
}
if inst.qemu != nil {
if err := inst.qemu.Kill(); err != nil {
plog.Errorf("Error killing qemu instance %v: %v", inst.Pid(), err)
}
}
}

// QemuBuilder is a configurator that can then create a qemu instance
Expand Down
3 changes: 3 additions & 0 deletions vendor/golang.org/x/sync/AUTHORS

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions vendor/golang.org/x/sync/CONTRIBUTORS

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

27 changes: 27 additions & 0 deletions vendor/golang.org/x/sync/LICENSE

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

22 changes: 22 additions & 0 deletions vendor/golang.org/x/sync/PATENTS

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

127 changes: 127 additions & 0 deletions vendor/golang.org/x/sync/semaphore/semaphore.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions vendor/modules.txt
Original file line number Diff line number Diff line change
Expand Up @@ -374,6 +374,8 @@ golang.org/x/oauth2/google
golang.org/x/oauth2/internal
golang.org/x/oauth2/jws
golang.org/x/oauth2/jwt
# golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e
golang.org/x/sync/semaphore
# golang.org/x/sys v0.0.0-20190610200419-93c9922d18ae
golang.org/x/sys/cpu
golang.org/x/sys/unix
Expand Down

0 comments on commit f1a758f

Please sign in to comment.