Skip to content

Commit

Permalink
Merge pull request vmware-tanzu#7090 from qiuming-best/perf-test-0
Browse files Browse the repository at this point in the history
Enhance perf test
  • Loading branch information
qiuming-best authored Nov 27, 2023
2 parents 7320bb7 + 507157f commit ccd3f22
Show file tree
Hide file tree
Showing 16 changed files with 261 additions and 63 deletions.
7 changes: 7 additions & 0 deletions pkg/cmd/cli/install/install.go
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ type Options struct {
UseVolumeSnapshots bool
DefaultRepoMaintenanceFrequency time.Duration
GarbageCollectionFrequency time.Duration
PodVolumeOperationTimeout time.Duration
Plugins flag.StringArray
NoDefaultBackupLocation bool
CRDsOnly bool
Expand Down Expand Up @@ -116,6 +117,7 @@ func (o *Options) BindFlags(flags *pflag.FlagSet) {
flags.BoolVar(&o.Wait, "wait", o.Wait, "Wait for Velero deployment to be ready. Optional.")
flags.DurationVar(&o.DefaultRepoMaintenanceFrequency, "default-repo-maintain-frequency", o.DefaultRepoMaintenanceFrequency, "How often 'maintain' is run for backup repositories by default. Optional.")
flags.DurationVar(&o.GarbageCollectionFrequency, "garbage-collection-frequency", o.GarbageCollectionFrequency, "How often the garbage collection runs for expired backups.(default 1h)")
flags.DurationVar(&o.PodVolumeOperationTimeout, "pod-volume-operation-timeout", o.PodVolumeOperationTimeout, "How long to wait for pod volume operations to complete before timing out(default 4h). Optional.")
flags.Var(&o.Plugins, "plugins", "Plugin container images to install into the Velero Deployment")
flags.BoolVar(&o.CRDsOnly, "crds-only", o.CRDsOnly, "Only generate CustomResourceDefinition resources. Useful for updating CRDs for an existing Velero install.")
flags.StringVar(&o.CACertFile, "cacert", o.CACertFile, "File containing a certificate bundle to use when verifying TLS connections to the object store. Optional.")
Expand Down Expand Up @@ -209,6 +211,7 @@ func (o *Options) AsVeleroOptions() (*install.VeleroOptions, error) {
VSLConfig: o.VolumeSnapshotConfig.Data(),
DefaultRepoMaintenanceFrequency: o.DefaultRepoMaintenanceFrequency,
GarbageCollectionFrequency: o.GarbageCollectionFrequency,
PodVolumeOperationTimeout: o.PodVolumeOperationTimeout,
Plugins: o.Plugins,
NoDefaultBackupLocation: o.NoDefaultBackupLocation,
CACertData: caCertData,
Expand Down Expand Up @@ -426,5 +429,9 @@ func (o *Options) Validate(c *cobra.Command, args []string, f client.Factory) er
return errors.New("--garbage-collection-frequency must be non-negative")
}

if o.PodVolumeOperationTimeout < 0 {
return errors.New("--pod-volume-operation-timeout must be non-negative")
}

return nil
}
11 changes: 11 additions & 0 deletions pkg/install/deployment.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ type podTemplateConfig struct {
withSecret bool
defaultRepoMaintenanceFrequency time.Duration
garbageCollectionFrequency time.Duration
podVolumeOperationTimeout time.Duration
plugins []string
features []string
defaultVolumesToFsBackup bool
Expand Down Expand Up @@ -115,6 +116,12 @@ func WithGarbageCollectionFrequency(val time.Duration) podTemplateOption {
}
}

func WithPodVolumeOperationTimeout(val time.Duration) podTemplateOption {
return func(c *podTemplateConfig) {
c.podVolumeOperationTimeout = val
}
}

func WithPlugins(plugins []string) podTemplateOption {
return func(c *podTemplateConfig) {
c.plugins = plugins
Expand Down Expand Up @@ -212,6 +219,10 @@ func Deployment(namespace string, opts ...podTemplateOption) *appsv1.Deployment
args = append(args, fmt.Sprintf("--garbage-collection-frequency=%v", c.garbageCollectionFrequency))
}

if c.podVolumeOperationTimeout > 0 {
args = append(args, fmt.Sprintf("--fs-backup-timeout=%v", c.podVolumeOperationTimeout))
}

deployment := &appsv1.Deployment{
ObjectMeta: objectMeta(namespace, "velero"),
TypeMeta: metav1.TypeMeta{
Expand Down
2 changes: 2 additions & 0 deletions pkg/install/resources.go
Original file line number Diff line number Diff line change
Expand Up @@ -246,6 +246,7 @@ type VeleroOptions struct {
VSLConfig map[string]string
DefaultRepoMaintenanceFrequency time.Duration
GarbageCollectionFrequency time.Duration
PodVolumeOperationTimeout time.Duration
Plugins []string
NoDefaultBackupLocation bool
CACertData []byte
Expand Down Expand Up @@ -335,6 +336,7 @@ func AllResources(o *VeleroOptions) *unstructured.UnstructuredList {
WithDefaultRepoMaintenanceFrequency(o.DefaultRepoMaintenanceFrequency),
WithServiceAccountName(serviceAccountName),
WithGarbageCollectionFrequency(o.GarbageCollectionFrequency),
WithPodVolumeOperationTimeout(o.PodVolumeOperationTimeout),
WithUploaderType(o.UploaderType),
}

Expand Down
2 changes: 2 additions & 0 deletions test/e2e/e2e_suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ import (
"github.com/onsi/ginkgo/reporters"
. "github.com/onsi/gomega"

"github.com/vmware-tanzu/velero/pkg/cmd/cli/install"
. "github.com/vmware-tanzu/velero/test"
. "github.com/vmware-tanzu/velero/test/e2e/backup"
. "github.com/vmware-tanzu/velero/test/e2e/backups"
Expand All @@ -49,6 +50,7 @@ import (
)

func init() {
VeleroCfg.Options = &install.Options{}
flag.StringVar(&VeleroCfg.CloudProvider, "cloud-provider", "", "cloud that Velero will be installed into. Required.")
flag.StringVar(&VeleroCfg.ObjectStoreProvider, "object-store-provider", "", "provider of object store plugin. Required if cloud-provider is kind, otherwise ignored.")
flag.StringVar(&VeleroCfg.BSLBucket, "bucket", "", "name of the object storage bucket where backups from e2e tests should be stored. Required.")
Expand Down
24 changes: 23 additions & 1 deletion test/perf/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,17 @@ NFS_SERVER_PATH ?=
UPLOADER_TYPE ?=
TEST_CASE_DESCRIBE ?= 'velero performance test'
BACKUP_FOR_RESTORE ?=
Delete_Cluster_Resource ?= false
Debug_Velero_Pod_Restart ?= false
NODE_AGENT_POD_CPU_LIMIT ?= 4
NODE_AGENT_POD_MEM_LIMIT ?= 4Gi
NODE_AGENT_POD_CPU_REQUEST ?= 2
NODE_AGENT_POD_MEM_REQUEST ?= 2Gi
VELERO_POD_CPU_LIMIT ?= 4
VELERO_POD_MEM_LIMIT ?= 4Gi
VELERO_POD_CPU_REQUEST ?= 2
VELERO_POD_MEM_REQUEST ?= 2Gi
POD_VOLUME_OPERATION_TIMEOUT ?= 6h

.PHONY:ginkgo
ginkgo: # Make sure ginkgo is in $GOPATH/bin
Expand Down Expand Up @@ -110,7 +121,18 @@ run: ginkgo
-uploader-type=$(UPLOADER_TYPE) \
-nfs-server-path=$(NFS_SERVER_PATH) \
-test-case-describe=$(TEST_CASE_DESCRIBE) \
-backup-for-restore=$(BACKUP_FOR_RESTORE)
-backup-for-restore=$(BACKUP_FOR_RESTORE) \
-delete-cluster-resource=$(Delete_Cluster_Resource) \
-debug-velero-pod-restart=$(Debug_Velero_Pod_Restart) \
-node-agent-pod-cpu-limit=$(NODE_AGENT_POD_CPU_LIMIT) \
-node-agent-pod-mem-limit=$(NODE_AGENT_POD_MEM_LIMIT) \
-node-agent-pod-cpu-request=$(NODE_AGENT_POD_CPU_REQUEST) \
-node-agent-pod-mem-request=$(NODE_AGENT_POD_MEM_REQUEST) \
-velero-pod-cpu-limit=$(VELERO_POD_CPU_LIMIT) \
-velero-pod-mem-limit=$(VELERO_POD_MEM_LIMIT) \
-velero-pod-cpu-request=$(VELERO_POD_CPU_REQUEST) \
-velero-pod-mem-request=$(VELERO_POD_MEM_REQUEST) \
-pod-volume-operation-timeout=$(POD_VOLUME_OPERATION_TIMEOUT)

build: ginkgo
mkdir -p $(OUTPUT_DIR)
Expand Down
2 changes: 1 addition & 1 deletion test/perf/backup/backup.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ type BackupTest struct {

func (b *BackupTest) Init() error {
b.TestCase.Init()
b.Ctx, b.CtxCancel = context.WithTimeout(context.Background(), 1*time.Hour)
b.Ctx, b.CtxCancel = context.WithTimeout(context.Background(), 6*time.Hour)
b.CaseBaseName = "backup"
b.BackupName = "backup-" + b.CaseBaseName + "-" + b.UUIDgen

Expand Down
18 changes: 15 additions & 3 deletions test/perf/basic/basic.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,14 @@ package basic

import (
"context"
"fmt"
"strings"
"time"

"github.com/pkg/errors"

. "github.com/vmware-tanzu/velero/test"
. "github.com/vmware-tanzu/velero/test/perf/test"
"github.com/vmware-tanzu/velero/test/util/k8s"
)

type BasicTest struct {
Expand All @@ -32,7 +34,7 @@ type BasicTest struct {

func (b *BasicTest) Init() error {
b.TestCase.Init()
b.Ctx, b.CtxCancel = context.WithTimeout(context.Background(), 1*time.Hour)
b.Ctx, b.CtxCancel = context.WithTimeout(context.Background(), 6*time.Hour)
b.CaseBaseName = "backuprestore"
b.BackupName = "backup-" + b.CaseBaseName + "-" + b.UUIDgen
b.RestoreName = "restore-" + b.CaseBaseName + "-" + b.UUIDgen
Expand All @@ -49,10 +51,20 @@ func (b *BasicTest) Init() error {
"--from-backup", b.BackupName, "--wait",
}

if !VeleroCfg.DeleteClusterResource {
joinedNsMapping, err := k8s.GetMappingNamespaces(b.Ctx, b.Client, *b.NSExcluded)
if err != nil {
return errors.Wrapf(err, "failed to get mapping namespaces in init")
}

b.RestoreArgs = append(b.RestoreArgs, "--namespace-mappings")
b.RestoreArgs = append(b.RestoreArgs, joinedNsMapping)
}

b.TestMsg = &TestMSG{
Desc: "Do backup and restore resources for performance test",
FailedMSG: "Failed to backup and restore resources",
Text: fmt.Sprintf("Should backup and restore resources success"),
Text: "Should backup and restore resources success",
}
return nil
}
14 changes: 14 additions & 0 deletions test/perf/e2e_suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,14 @@ import (
"flag"
"fmt"
"testing"
"time"

. "github.com/onsi/ginkgo"
"github.com/onsi/ginkgo/reporters"
. "github.com/onsi/gomega"
"github.com/pkg/errors"

"github.com/vmware-tanzu/velero/pkg/cmd/cli/install"
. "github.com/vmware-tanzu/velero/test"

"github.com/vmware-tanzu/velero/test/perf/backup"
Expand All @@ -39,6 +41,7 @@ import (
)

func init() {
VeleroCfg.Options = &install.Options{}
flag.StringVar(&VeleroCfg.CloudProvider, "cloud-provider", "", "cloud that Velero will be installed into. Required.")
flag.StringVar(&VeleroCfg.ObjectStoreProvider, "object-store-provider", "", "provider of object store plugin. Required if cloud-provider is kind, otherwise ignored.")
flag.StringVar(&VeleroCfg.BSLBucket, "bucket", "", "name of the object storage bucket where backups from e2e tests should be stored. Required.")
Expand All @@ -56,6 +59,15 @@ func init() {
flag.BoolVar(&VeleroCfg.InstallVelero, "install-velero", true, "install/uninstall velero during the test. Optional.")
flag.BoolVar(&VeleroCfg.UseNodeAgent, "use-node-agent", true, "whether deploy node agent daemonset velero during the test. Optional.")
flag.StringVar(&VeleroCfg.RegistryCredentialFile, "registry-credential-file", "", "file containing credential for the image registry, follows the same format rules as the ~/.docker/config.json file. Optional.")
flag.StringVar(&VeleroCfg.NodeAgentPodCPULimit, "node-agent-pod-cpu-limit", "4", "CPU limit for node agent pod. Optional.")
flag.StringVar(&VeleroCfg.NodeAgentPodMemLimit, "node-agent-pod-mem-limit", "4Gi", "Memory limit for node agent pod. Optional.")
flag.StringVar(&VeleroCfg.NodeAgentPodCPURequest, "node-agent-pod-cpu-request", "2", "CPU request for node agent pod. Optional.")
flag.StringVar(&VeleroCfg.NodeAgentPodMemRequest, "node-agent-pod-mem-request", "2Gi", "Memory request for node agent pod. Optional.")
flag.StringVar(&VeleroCfg.VeleroPodCPULimit, "velero-pod-cpu-limit", "4", "CPU limit for velero pod. Optional.")
flag.StringVar(&VeleroCfg.VeleroPodMemLimit, "velero-pod-mem-limit", "4Gi", "Memory limit for velero pod. Optional.")
flag.StringVar(&VeleroCfg.VeleroPodCPURequest, "velero-pod-cpu-request", "2", "CPU request for velero pod. Optional.")
flag.StringVar(&VeleroCfg.VeleroPodMemRequest, "velero-pod-mem-request", "2Gi", "Memory request for velero pod. Optional.")
flag.DurationVar(&VeleroCfg.PodVolumeOperationTimeout, "pod-volume-operation-timeout", 360*time.Minute, "Timeout for pod volume operations. Optional.")
//vmware-tanzu-experiments
flag.StringVar(&VeleroCfg.Features, "features", "", "Comma-separated list of features to enable for this Velero process.")
flag.StringVar(&VeleroCfg.DefaultCluster, "default-cluster-context", "", "Default cluster context for migration test.")
Expand All @@ -65,6 +77,8 @@ func init() {
flag.StringVar(&VeleroCfg.NFSServerPath, "nfs-server-path", "", "the path of nfs server")
flag.StringVar(&VeleroCfg.TestCaseDescribe, "test-case-describe", "velero performance test", "the description for the current test")
flag.StringVar(&VeleroCfg.BackupForRestore, "backup-for-restore", "", "the name of backup for restore")
flag.BoolVar(&VeleroCfg.DeleteClusterResource, "delete-cluster-resource", false, "delete cluster resource after test")
flag.BoolVar(&VeleroCfg.DebugVeleroPodRestart, "debug-velero-pod-restart", false, "Switch for debugging velero pod restart.")
}

func initConfig() error {
Expand Down
42 changes: 22 additions & 20 deletions test/perf/metrics/pod.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"context"
"fmt"
"strings"
"time"

"github.com/pkg/errors"

Expand All @@ -29,6 +30,7 @@ import (
)

const PodResourceDesc = "Resource consumption"
const PodMetricsTimeout = 5 * time.Minute

type PodMetrics struct {
Client *metricsclientset.Clientset
Expand All @@ -39,31 +41,31 @@ type PodMetrics struct {
}

func (p *PodMetrics) Update() error {
cpu, mem, err := metrics.GetPodUsageMetrics(p.Ctx, p.Client, p.PodName, p.Namespace)
cpu, mem, err := metrics.GetPodUsageMetrics(p.Ctx, p.Client, p.PodName, p.Namespace, PodMetricsTimeout)
if err != nil {
return errors.WithStack(err)
} else {
keyMaxCPU := p.PodName + ":MaxCPU"
curCPU := cpu.MilliValue()
if curCPU > p.Metrics[keyMaxCPU] {
p.Metrics[keyMaxCPU] = curCPU
}
}
keyMaxCPU := p.PodName + ":MaxCPU"
curCPU := cpu.MilliValue()
if curCPU > p.Metrics[keyMaxCPU] {
p.Metrics[keyMaxCPU] = curCPU
}

keyMaxMem := p.PodName + ":MaxMemory"
curMem := mem.MilliValue()
if curMem > p.Metrics[keyMaxMem] {
p.Metrics[keyMaxMem] = curMem
}
keyMaxMem := p.PodName + ":MaxMemory"
curMem := mem.MilliValue()
if curMem > p.Metrics[keyMaxMem] {
p.Metrics[keyMaxMem] = curMem
}

keyAvgCPU := p.PodName + ":AverageCPU"
preAvgCPU := p.Metrics[keyAvgCPU]
p.Metrics[keyAvgCPU] = (preAvgCPU*p.count + curCPU) / (p.count + 1)
keyAvgCPU := p.PodName + ":AverageCPU"
preAvgCPU := p.Metrics[keyAvgCPU]
p.Metrics[keyAvgCPU] = (preAvgCPU*p.count + curCPU) / (p.count + 1)

keyAvgMem := p.PodName + ":AverageMemory"
preAvgMem := p.Metrics[keyAvgMem]
p.Metrics[keyAvgMem] = (preAvgMem*p.count + curMem) / (p.count + 1)
p.count++

keyAvgMem := p.PodName + ":AverageMemory"
preAvgMem := p.Metrics[keyAvgMem]
p.Metrics[keyAvgMem] = (preAvgMem*p.count + curMem) / (p.count + 1)
p.count++
}
return nil
}

Expand Down
39 changes: 26 additions & 13 deletions test/perf/metrics/time.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,40 +16,53 @@ limitations under the License.

package metrics

import "time"
import (
"fmt"
"time"
)

const TimeCaseDesc = "Time cost"

type TimeSpan struct {
Start time.Time
End time.Time
}

type TimeMetrics struct {
Name string
TimeInfo map[string]time.Time // metric name : start timestamp
Metrics map[string]float64 // metric name : time duration
TimeInfo map[string]TimeSpan // metric name : start timestamp
}

func (t *TimeMetrics) GetMetrics() map[string]string {
tmpMetrics := make(map[string]string)
for k, v := range t.Metrics {
duration := time.Duration(v) * time.Second
tmpMetrics[k] = duration.String()
for k, v := range t.TimeInfo {
duration := v.End.Sub(v.Start)
if duration < time.Second {
// For those too shoter time difference we should ignored
// as it may not really execute the logic
continue
}
tmpMetrics[k] = duration.String() + fmt.Sprintf(" (%s - %s)", v.Start.Format(time.RFC3339), v.End.Format(time.RFC3339))
}
return tmpMetrics
}

func (t *TimeMetrics) Start(name string) {
t.TimeInfo[name] = time.Now()
t.TimeInfo[name] = TimeSpan{
Start: time.Now(),
}
}

func (t *TimeMetrics) End(name string) {
t.Metrics[name] = time.Now().Sub(t.TimeInfo[name]).Seconds()
if t.Metrics[name] < 1 {
// For those too shoter time difference we should ignored
// as it may not really execute the logic
delete(t.Metrics, name)
if _, ok := t.TimeInfo[name]; !ok {
return
}
timeSpan := t.TimeInfo[name]
timeSpan.End = time.Now()
t.TimeInfo[name] = timeSpan
}

func (t *TimeMetrics) Update() error {
t.Metrics[t.Name] = time.Now().Sub(t.TimeInfo[t.Name]).Seconds()
return nil
}

Expand Down
Loading

0 comments on commit ccd3f22

Please sign in to comment.