Skip to content

Commit

Permalink
Added very simple stress test which scales fleets up/down repeatedly …
Browse files Browse the repository at this point in the history
…and basic stress test harness.

The same test is used during regular e2e tests, except it runs just a few iterations on smaller fleets.

To run stress test simply invoke `make stress-test-e2e` optionally passing `STRESS_TEST_LEVEL`, which controls the fleet sizes to be used (1..100, defaults to 20). Depending on stress test level, you may need a cluster with lots of capacity.

By convention 'make stress-test-e2e' runs all test cases whose names include 'StressTest' and ignores everything else.
  • Loading branch information
jkowalski committed Feb 7, 2019
1 parent 7b6cf3b commit f3ed5f7
Show file tree
Hide file tree
Showing 5 changed files with 186 additions and 3 deletions.
18 changes: 18 additions & 0 deletions build/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,10 @@ GCP_BUCKET_CHARTS ?= agones-chart
MINIKUBE_PROFILE ?= agones
GO_BUILD_TAGS ?= none

# Specify stress test level 1..100
# STRESS_TEST_LEVEL=n requires capacity between 50*n up to 100*n simple-udp Game Servers.
STRESS_TEST_LEVEL=20

# kind cluster name to use
KIND_PROFILE ?= agones
KIND_CONTAINER_NAME=kind-$(KIND_PROFILE)-control-plane
Expand Down Expand Up @@ -217,6 +221,16 @@ test-e2e: $(ensure-build-image)
--gameserver-image=$(GS_TEST_IMAGE) \
--pullsecret=$(IMAGE_PULL_SECRET)

# Runs end-to-end stress tests on the current configured cluster
# For minikube user the minikube-stress-test-e2e targets
stress-test-e2e: $(ensure-build-image)
$(GO_TEST) $(agones_package)/test/e2e $(ARGS) $(GO_E2E_TEST_ARGS) \
-timeout 1h \
-run '.*StressTest.*' \
--gameserver-image=$(GS_TEST_IMAGE) \
--pullsecret=$(IMAGE_PULL_SECRET) \
--stress $(STRESS_TEST_LEVEL)

# Run test on install yaml - make sure there is no change
# mostly this is for CI
test-install-yaml:
Expand Down Expand Up @@ -610,6 +624,10 @@ minikube-transfer-image:
minikube-test-e2e: DOCKER_RUN_ARGS=--network=host -v $(minikube_cert_mount)
minikube-test-e2e: minikube-agones-profile test-e2e

# Runs stress tests against our minikube
minikube-stress-test-e2e: DOCKER_RUN_ARGS=--network=host -v $(minikube_cert_mount)
minikube-stress-test-e2e: minikube-agones-profile stress-test-e2e

# prometheus on minkube
# we have to disable PVC as it's not supported on minkube.
minikube-setup-prometheus:
Expand Down
97 changes: 97 additions & 0 deletions test/e2e/fleet_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -640,6 +640,103 @@ func TestCreateFleetAndUpdateScaleSubresource(t *testing.T) {
framework.WaitForFleetCondition(t, flt, e2e.FleetReadyCount(initialReplicas))
}

// TestScaleUpAndDownInParallelStressTest creates N fleets, half of which start with replicas=0
// and the other half with 0 and scales them up/down 3 times in parallel expecting it to reach
// the desired number of ready replicas each time.
// This test is also used as a stress test with 'make stress-test-e2e', in which case it creates
// many more fleets of bigger sizes and runs many more repetitions.
func TestScaleUpAndDownInParallelStressTest(t *testing.T) {
t.Parallel()

alpha1 := framework.AgonesClient.StableV1alpha1()
fleetCount := 2
fleetSize := int32(10)
repeatCount := 3
deadline := time.Now().Add(1 * time.Minute)

logrus.WithField("fleetCount", fleetCount).
WithField("fleetSize", fleetSize).
WithField("repeatCount", repeatCount).
WithField("deadline", deadline).
Info("starting scale up/down test")

if framework.StressTestLevel > 0 {
fleetSize = 10 * int32(framework.StressTestLevel)
repeatCount = 10
fleetCount = 10
deadline = time.Now().Add(45 * time.Minute)
}

var fleets []*v1alpha1.Fleet

var scaleUpResults e2e.PerfResults
var scaleDownResults e2e.PerfResults

for fleetNumber := 0; fleetNumber < fleetCount; fleetNumber++ {
flt := defaultFleet()
flt.ObjectMeta.GenerateName = fmt.Sprintf("scale-fleet-%v-", fleetNumber)
if fleetNumber%2 == 0 {
// even-numbered fleets starts at fleetSize and are scaled down to zero and back.
flt.Spec.Replicas = fleetSize
} else {
// odd-numbered fleets starts at zero and are scaled up to fleetSize and back.
flt.Spec.Replicas = 0
}

flt, err := alpha1.Fleets(defaultNs).Create(flt)
if assert.Nil(t, err) {
defer alpha1.Fleets(defaultNs).Delete(flt.ObjectMeta.Name, nil) // nolint:errcheck
}
fleets = append(fleets, flt)
}

// wait for initial fleet conditions.
for fleetNumber, flt := range fleets {
if fleetNumber%2 == 0 {
framework.WaitForFleetCondition(t, flt, e2e.FleetReadyCount(fleetSize))
} else {
framework.WaitForFleetCondition(t, flt, e2e.FleetReadyCount(0))
}
}

var wg sync.WaitGroup

for fleetNumber, flt := range fleets {
wg.Add(1)
go func(fleetNumber int, flt *v1alpha1.Fleet) {
defer wg.Done()
defer func() {
if err := recover(); err != nil {
t.Errorf("recovered panic: %v", err)
}
}()

if fleetNumber%2 == 0 {
scaleDownResults.AddSample(scaleAndWait(t, flt, 0))
}
for i := 0; i < repeatCount; i++ {
if time.Now().After(deadline) {
break
}
scaleUpResults.AddSample(scaleAndWait(t, flt, fleetSize))
scaleDownResults.AddSample(scaleAndWait(t, flt, 0))
}
}(fleetNumber, flt)
}

wg.Wait()

scaleUpResults.Report(fmt.Sprintf("scale up 0 to %v with %v fleets", fleetSize, fleetCount))
scaleDownResults.Report(fmt.Sprintf("scale down %v to 0 with %v fleets", fleetSize, fleetCount))
}

func scaleAndWait(t *testing.T, flt *v1alpha1.Fleet, fleetSize int32) time.Duration {
t0 := time.Now()
scaleFleetSubresource(t, flt, fleetSize)
framework.WaitForFleetCondition(t, flt, e2e.FleetReadyCount(fleetSize))
return time.Since(t0)
}

// scaleFleetPatch creates a patch to apply to a Fleet.
// Easier for testing, as it removes object generational issues.
func scaleFleetPatch(t *testing.T, f *v1alpha1.Fleet, scale int32) *v1alpha1.Fleet {
Expand Down
6 changes: 4 additions & 2 deletions test/e2e/framework/framework.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,10 +48,11 @@ type Framework struct {
AgonesClient versioned.Interface
GameServerImage string
PullSecret string
StressTestLevel int
}

// New setups a testing framework using a kubeconfig path and the game server image to use for testing.
func New(kubeconfig, gsimage string, pullSecret string) (*Framework, error) {
func New(kubeconfig, gsimage string, pullSecret string, stressTestLevel int) (*Framework, error) {
config, err := clientcmd.BuildConfigFromFlags("", kubeconfig)
if err != nil {
return nil, errors.Wrap(err, "build config from flags failed")
Expand All @@ -72,6 +73,7 @@ func New(kubeconfig, gsimage string, pullSecret string) (*Framework, error) {
AgonesClient: agonesClient,
GameServerImage: gsimage,
PullSecret: pullSecret,
StressTestLevel: stressTestLevel,
}, nil
}

Expand Down Expand Up @@ -137,7 +139,7 @@ func (f *Framework) WaitForFleetCondition(t *testing.T, flt *v1alpha1.Fleet, con
})
if err != nil {
logrus.WithField("fleet", flt.Name).WithError(err).Info("error waiting for fleet condition")
t.Fatal("error waiting for fleet condition")
t.Fatalf("error waiting for fleet condition on fleet %v", flt.Name)
}
}

Expand Down
65 changes: 65 additions & 0 deletions test/e2e/framework/perf.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
package framework

import (
"sort"
"sync"
"time"

"github.com/sirupsen/logrus"
)

// PerfResults aggregates performance test results.
// The AddSample() method is safe for concurrent use by multiple goroutines.
type PerfResults struct {
mu sync.Mutex
samples []time.Duration

firstSampleTime time.Time
lastSampleTime time.Time
}

// AddSample adds a single time measurement.
func (p *PerfResults) AddSample(d time.Duration) {
p.mu.Lock()
defer p.mu.Unlock()

n := time.Now()
if len(p.samples) == 0 {
p.firstSampleTime = n
}
p.lastSampleTime = n
p.samples = append(p.samples, d)
}

// Report outputs performance report to log.
func (p *PerfResults) Report(name string) {
if len(p.samples) == 0 {
return
}

sort.Slice(p.samples, func(i, j int) bool {
return p.samples[i] < p.samples[j]
})

var sum time.Duration
for _, s := range p.samples {
sum += s
}

avg := time.Duration(int64(sum) / int64(len(p.samples)))
logrus.
WithField("avg", avg).
WithField("count", len(p.samples)).
WithField("min", p.samples[0].Seconds()).
WithField("max", p.samples[len(p.samples)-1].Seconds()).
WithField("p50", p.samples[len(p.samples)*500/1001].Seconds()).
WithField("p90", p.samples[len(p.samples)*900/1001].Seconds()).
WithField("p95", p.samples[len(p.samples)*950/1001].Seconds()).
WithField("p99", p.samples[len(p.samples)*990/1001].Seconds()).
WithField("p999", p.samples[len(p.samples)*999/1001].Seconds()).
WithField("duration", p.lastSampleTime.Sub(p.firstSampleTime).Seconds()).
Info(name)

// TODO - use something like Fortio ("fortio.org/fortio/stats") to
// generate histogram for long-term storage and analysis.
}
3 changes: 2 additions & 1 deletion test/e2e/main_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ func TestMain(m *testing.M) {
"gameserver image to use for those tests, gcr.io/agones-images/udp-server:0.6")
pullSecret := flag.String("pullsecret", "",
"optional secret to be used for pulling the gameserver and/or Agones SDK sidecar images")
stressTestLevel := flag.Int("stress", 0, "enable stress test at given level 0-100")

flag.Parse()

Expand All @@ -45,7 +46,7 @@ func TestMain(m *testing.M) {
exitCode int
)

if framework, err = e2eframework.New(*kubeconfig, *gsimage, *pullSecret); err != nil {
if framework, err = e2eframework.New(*kubeconfig, *gsimage, *pullSecret, *stressTestLevel); err != nil {
log.Printf("failed to setup framework: %v\n", err)
os.Exit(1)
}
Expand Down

0 comments on commit f3ed5f7

Please sign in to comment.