From 507157f8126ecc7965fd9a3ea88b0e66335c1d0b Mon Sep 17 00:00:00 2001 From: Ming Date: Tue, 7 Nov 2023 06:25:13 +0000 Subject: [PATCH] Add perf test namespace mapping when restore Signed-off-by: Ming --- pkg/cmd/cli/install/install.go | 7 ++++++ pkg/install/deployment.go | 11 +++++++++ pkg/install/resources.go | 2 ++ test/e2e/e2e_suite_test.go | 2 ++ test/perf/Makefile | 24 +++++++++++++++++- test/perf/backup/backup.go | 2 +- test/perf/basic/basic.go | 18 +++++++++++--- test/perf/e2e_suite_test.go | 14 +++++++++++ test/perf/metrics/pod.go | 42 ++++++++++++++++--------------- test/perf/metrics/time.go | 39 +++++++++++++++++++---------- test/perf/restore/restore.go | 20 +++++++++++++-- test/perf/test/test.go | 24 +++++++++--------- test/types.go | 15 +++++------- test/util/k8s/namespace.go | 39 +++++++++++++++++++++++++++++ test/util/metrics/pod.go | 20 ++++++++++++--- test/util/velero/install.go | 45 ++++++++++++++++++++++++++++++++++ 16 files changed, 261 insertions(+), 63 deletions(-) diff --git a/pkg/cmd/cli/install/install.go b/pkg/cmd/cli/install/install.go index 9b8d835f24..fc5784082d 100644 --- a/pkg/cmd/cli/install/install.go +++ b/pkg/cmd/cli/install/install.go @@ -73,6 +73,7 @@ type Options struct { UseVolumeSnapshots bool DefaultRepoMaintenanceFrequency time.Duration GarbageCollectionFrequency time.Duration + PodVolumeOperationTimeout time.Duration Plugins flag.StringArray NoDefaultBackupLocation bool CRDsOnly bool @@ -116,6 +117,7 @@ func (o *Options) BindFlags(flags *pflag.FlagSet) { flags.BoolVar(&o.Wait, "wait", o.Wait, "Wait for Velero deployment to be ready. Optional.") flags.DurationVar(&o.DefaultRepoMaintenanceFrequency, "default-repo-maintain-frequency", o.DefaultRepoMaintenanceFrequency, "How often 'maintain' is run for backup repositories by default. Optional.") flags.DurationVar(&o.GarbageCollectionFrequency, "garbage-collection-frequency", o.GarbageCollectionFrequency, "How often the garbage collection runs for expired backups.(default 1h)") + flags.DurationVar(&o.PodVolumeOperationTimeout, "pod-volume-operation-timeout", o.PodVolumeOperationTimeout, "How long to wait for pod volume operations to complete before timing out(default 4h). Optional.") flags.Var(&o.Plugins, "plugins", "Plugin container images to install into the Velero Deployment") flags.BoolVar(&o.CRDsOnly, "crds-only", o.CRDsOnly, "Only generate CustomResourceDefinition resources. Useful for updating CRDs for an existing Velero install.") flags.StringVar(&o.CACertFile, "cacert", o.CACertFile, "File containing a certificate bundle to use when verifying TLS connections to the object store. Optional.") @@ -209,6 +211,7 @@ func (o *Options) AsVeleroOptions() (*install.VeleroOptions, error) { VSLConfig: o.VolumeSnapshotConfig.Data(), DefaultRepoMaintenanceFrequency: o.DefaultRepoMaintenanceFrequency, GarbageCollectionFrequency: o.GarbageCollectionFrequency, + PodVolumeOperationTimeout: o.PodVolumeOperationTimeout, Plugins: o.Plugins, NoDefaultBackupLocation: o.NoDefaultBackupLocation, CACertData: caCertData, @@ -426,5 +429,9 @@ func (o *Options) Validate(c *cobra.Command, args []string, f client.Factory) er return errors.New("--garbage-collection-frequency must be non-negative") } + if o.PodVolumeOperationTimeout < 0 { + return errors.New("--pod-volume-operation-timeout must be non-negative") + } + return nil } diff --git a/pkg/install/deployment.go b/pkg/install/deployment.go index 5ea680dc16..7c1bd7a81f 100644 --- a/pkg/install/deployment.go +++ b/pkg/install/deployment.go @@ -41,6 +41,7 @@ type podTemplateConfig struct { withSecret bool defaultRepoMaintenanceFrequency time.Duration garbageCollectionFrequency time.Duration + podVolumeOperationTimeout time.Duration plugins []string features []string defaultVolumesToFsBackup bool @@ -115,6 +116,12 @@ func WithGarbageCollectionFrequency(val time.Duration) podTemplateOption { } } +func WithPodVolumeOperationTimeout(val time.Duration) podTemplateOption { + return func(c *podTemplateConfig) { + c.podVolumeOperationTimeout = val + } +} + func WithPlugins(plugins []string) podTemplateOption { return func(c *podTemplateConfig) { c.plugins = plugins @@ -212,6 +219,10 @@ func Deployment(namespace string, opts ...podTemplateOption) *appsv1.Deployment args = append(args, fmt.Sprintf("--garbage-collection-frequency=%v", c.garbageCollectionFrequency)) } + if c.podVolumeOperationTimeout > 0 { + args = append(args, fmt.Sprintf("--fs-backup-timeout=%v", c.podVolumeOperationTimeout)) + } + deployment := &appsv1.Deployment{ ObjectMeta: objectMeta(namespace, "velero"), TypeMeta: metav1.TypeMeta{ diff --git a/pkg/install/resources.go b/pkg/install/resources.go index 21aa83ff65..2e9e1bc3e2 100644 --- a/pkg/install/resources.go +++ b/pkg/install/resources.go @@ -246,6 +246,7 @@ type VeleroOptions struct { VSLConfig map[string]string DefaultRepoMaintenanceFrequency time.Duration GarbageCollectionFrequency time.Duration + PodVolumeOperationTimeout time.Duration Plugins []string NoDefaultBackupLocation bool CACertData []byte @@ -335,6 +336,7 @@ func AllResources(o *VeleroOptions) *unstructured.UnstructuredList { WithDefaultRepoMaintenanceFrequency(o.DefaultRepoMaintenanceFrequency), WithServiceAccountName(serviceAccountName), WithGarbageCollectionFrequency(o.GarbageCollectionFrequency), + WithPodVolumeOperationTimeout(o.PodVolumeOperationTimeout), WithUploaderType(o.UploaderType), } diff --git a/test/e2e/e2e_suite_test.go b/test/e2e/e2e_suite_test.go index b4cb6b22a3..76ef04100d 100644 --- a/test/e2e/e2e_suite_test.go +++ b/test/e2e/e2e_suite_test.go @@ -28,6 +28,7 @@ import ( "github.com/onsi/ginkgo/reporters" . "github.com/onsi/gomega" + "github.com/vmware-tanzu/velero/pkg/cmd/cli/install" . "github.com/vmware-tanzu/velero/test" . "github.com/vmware-tanzu/velero/test/e2e/backup" . "github.com/vmware-tanzu/velero/test/e2e/backups" @@ -49,6 +50,7 @@ import ( ) func init() { + VeleroCfg.Options = &install.Options{} flag.StringVar(&VeleroCfg.CloudProvider, "cloud-provider", "", "cloud that Velero will be installed into. Required.") flag.StringVar(&VeleroCfg.ObjectStoreProvider, "object-store-provider", "", "provider of object store plugin. Required if cloud-provider is kind, otherwise ignored.") flag.StringVar(&VeleroCfg.BSLBucket, "bucket", "", "name of the object storage bucket where backups from e2e tests should be stored. Required.") diff --git a/test/perf/Makefile b/test/perf/Makefile index f30ee5b995..843ccab87e 100644 --- a/test/perf/Makefile +++ b/test/perf/Makefile @@ -76,6 +76,17 @@ NFS_SERVER_PATH ?= UPLOADER_TYPE ?= TEST_CASE_DESCRIBE ?= 'velero performance test' BACKUP_FOR_RESTORE ?= +Delete_Cluster_Resource ?= false +Debug_Velero_Pod_Restart ?= false +NODE_AGENT_POD_CPU_LIMIT ?= 4 +NODE_AGENT_POD_MEM_LIMIT ?= 4Gi +NODE_AGENT_POD_CPU_REQUEST ?= 2 +NODE_AGENT_POD_MEM_REQUEST ?= 2Gi +VELERO_POD_CPU_LIMIT ?= 4 +VELERO_POD_MEM_LIMIT ?= 4Gi +VELERO_POD_CPU_REQUEST ?= 2 +VELERO_POD_MEM_REQUEST ?= 2Gi +POD_VOLUME_OPERATION_TIMEOUT ?= 6h .PHONY:ginkgo ginkgo: # Make sure ginkgo is in $GOPATH/bin @@ -110,7 +121,18 @@ run: ginkgo -uploader-type=$(UPLOADER_TYPE) \ -nfs-server-path=$(NFS_SERVER_PATH) \ -test-case-describe=$(TEST_CASE_DESCRIBE) \ - -backup-for-restore=$(BACKUP_FOR_RESTORE) + -backup-for-restore=$(BACKUP_FOR_RESTORE) \ + -delete-cluster-resource=$(Delete_Cluster_Resource) \ + -debug-velero-pod-restart=$(Debug_Velero_Pod_Restart) \ + -node-agent-pod-cpu-limit=$(NODE_AGENT_POD_CPU_LIMIT) \ + -node-agent-pod-mem-limit=$(NODE_AGENT_POD_MEM_LIMIT) \ + -node-agent-pod-cpu-request=$(NODE_AGENT_POD_CPU_REQUEST) \ + -node-agent-pod-mem-request=$(NODE_AGENT_POD_MEM_REQUEST) \ + -velero-pod-cpu-limit=$(VELERO_POD_CPU_LIMIT) \ + -velero-pod-mem-limit=$(VELERO_POD_MEM_LIMIT) \ + -velero-pod-cpu-request=$(VELERO_POD_CPU_REQUEST) \ + -velero-pod-mem-request=$(VELERO_POD_MEM_REQUEST) \ + -pod-volume-operation-timeout=$(POD_VOLUME_OPERATION_TIMEOUT) build: ginkgo mkdir -p $(OUTPUT_DIR) diff --git a/test/perf/backup/backup.go b/test/perf/backup/backup.go index 7f9f35de08..3a3c059a56 100644 --- a/test/perf/backup/backup.go +++ b/test/perf/backup/backup.go @@ -32,7 +32,7 @@ type BackupTest struct { func (b *BackupTest) Init() error { b.TestCase.Init() - b.Ctx, b.CtxCancel = context.WithTimeout(context.Background(), 1*time.Hour) + b.Ctx, b.CtxCancel = context.WithTimeout(context.Background(), 6*time.Hour) b.CaseBaseName = "backup" b.BackupName = "backup-" + b.CaseBaseName + "-" + b.UUIDgen diff --git a/test/perf/basic/basic.go b/test/perf/basic/basic.go index 80c6b02185..76bf605a68 100644 --- a/test/perf/basic/basic.go +++ b/test/perf/basic/basic.go @@ -18,12 +18,14 @@ package basic import ( "context" - "fmt" "strings" "time" + "github.com/pkg/errors" + . "github.com/vmware-tanzu/velero/test" . "github.com/vmware-tanzu/velero/test/perf/test" + "github.com/vmware-tanzu/velero/test/util/k8s" ) type BasicTest struct { @@ -32,7 +34,7 @@ type BasicTest struct { func (b *BasicTest) Init() error { b.TestCase.Init() - b.Ctx, b.CtxCancel = context.WithTimeout(context.Background(), 1*time.Hour) + b.Ctx, b.CtxCancel = context.WithTimeout(context.Background(), 6*time.Hour) b.CaseBaseName = "backuprestore" b.BackupName = "backup-" + b.CaseBaseName + "-" + b.UUIDgen b.RestoreName = "restore-" + b.CaseBaseName + "-" + b.UUIDgen @@ -49,10 +51,20 @@ func (b *BasicTest) Init() error { "--from-backup", b.BackupName, "--wait", } + if !VeleroCfg.DeleteClusterResource { + joinedNsMapping, err := k8s.GetMappingNamespaces(b.Ctx, b.Client, *b.NSExcluded) + if err != nil { + return errors.Wrapf(err, "failed to get mapping namespaces in init") + } + + b.RestoreArgs = append(b.RestoreArgs, "--namespace-mappings") + b.RestoreArgs = append(b.RestoreArgs, joinedNsMapping) + } + b.TestMsg = &TestMSG{ Desc: "Do backup and restore resources for performance test", FailedMSG: "Failed to backup and restore resources", - Text: fmt.Sprintf("Should backup and restore resources success"), + Text: "Should backup and restore resources success", } return nil } diff --git a/test/perf/e2e_suite_test.go b/test/perf/e2e_suite_test.go index 4d3275dec1..57599ec364 100644 --- a/test/perf/e2e_suite_test.go +++ b/test/perf/e2e_suite_test.go @@ -21,12 +21,14 @@ import ( "flag" "fmt" "testing" + "time" . "github.com/onsi/ginkgo" "github.com/onsi/ginkgo/reporters" . "github.com/onsi/gomega" "github.com/pkg/errors" + "github.com/vmware-tanzu/velero/pkg/cmd/cli/install" . "github.com/vmware-tanzu/velero/test" "github.com/vmware-tanzu/velero/test/perf/backup" @@ -39,6 +41,7 @@ import ( ) func init() { + VeleroCfg.Options = &install.Options{} flag.StringVar(&VeleroCfg.CloudProvider, "cloud-provider", "", "cloud that Velero will be installed into. Required.") flag.StringVar(&VeleroCfg.ObjectStoreProvider, "object-store-provider", "", "provider of object store plugin. Required if cloud-provider is kind, otherwise ignored.") flag.StringVar(&VeleroCfg.BSLBucket, "bucket", "", "name of the object storage bucket where backups from e2e tests should be stored. Required.") @@ -56,6 +59,15 @@ func init() { flag.BoolVar(&VeleroCfg.InstallVelero, "install-velero", true, "install/uninstall velero during the test. Optional.") flag.BoolVar(&VeleroCfg.UseNodeAgent, "use-node-agent", true, "whether deploy node agent daemonset velero during the test. Optional.") flag.StringVar(&VeleroCfg.RegistryCredentialFile, "registry-credential-file", "", "file containing credential for the image registry, follows the same format rules as the ~/.docker/config.json file. Optional.") + flag.StringVar(&VeleroCfg.NodeAgentPodCPULimit, "node-agent-pod-cpu-limit", "4", "CPU limit for node agent pod. Optional.") + flag.StringVar(&VeleroCfg.NodeAgentPodMemLimit, "node-agent-pod-mem-limit", "4Gi", "Memory limit for node agent pod. Optional.") + flag.StringVar(&VeleroCfg.NodeAgentPodCPURequest, "node-agent-pod-cpu-request", "2", "CPU request for node agent pod. Optional.") + flag.StringVar(&VeleroCfg.NodeAgentPodMemRequest, "node-agent-pod-mem-request", "2Gi", "Memory request for node agent pod. Optional.") + flag.StringVar(&VeleroCfg.VeleroPodCPULimit, "velero-pod-cpu-limit", "4", "CPU limit for velero pod. Optional.") + flag.StringVar(&VeleroCfg.VeleroPodMemLimit, "velero-pod-mem-limit", "4Gi", "Memory limit for velero pod. Optional.") + flag.StringVar(&VeleroCfg.VeleroPodCPURequest, "velero-pod-cpu-request", "2", "CPU request for velero pod. Optional.") + flag.StringVar(&VeleroCfg.VeleroPodMemRequest, "velero-pod-mem-request", "2Gi", "Memory request for velero pod. Optional.") + flag.DurationVar(&VeleroCfg.PodVolumeOperationTimeout, "pod-volume-operation-timeout", 360*time.Minute, "Timeout for pod volume operations. Optional.") //vmware-tanzu-experiments flag.StringVar(&VeleroCfg.Features, "features", "", "Comma-separated list of features to enable for this Velero process.") flag.StringVar(&VeleroCfg.DefaultCluster, "default-cluster-context", "", "Default cluster context for migration test.") @@ -65,6 +77,8 @@ func init() { flag.StringVar(&VeleroCfg.NFSServerPath, "nfs-server-path", "", "the path of nfs server") flag.StringVar(&VeleroCfg.TestCaseDescribe, "test-case-describe", "velero performance test", "the description for the current test") flag.StringVar(&VeleroCfg.BackupForRestore, "backup-for-restore", "", "the name of backup for restore") + flag.BoolVar(&VeleroCfg.DeleteClusterResource, "delete-cluster-resource", false, "delete cluster resource after test") + flag.BoolVar(&VeleroCfg.DebugVeleroPodRestart, "debug-velero-pod-restart", false, "Switch for debugging velero pod restart.") } func initConfig() error { diff --git a/test/perf/metrics/pod.go b/test/perf/metrics/pod.go index f341fe918a..56572f6728 100644 --- a/test/perf/metrics/pod.go +++ b/test/perf/metrics/pod.go @@ -20,6 +20,7 @@ import ( "context" "fmt" "strings" + "time" "github.com/pkg/errors" @@ -29,6 +30,7 @@ import ( ) const PodResourceDesc = "Resource consumption" +const PodMetricsTimeout = 5 * time.Minute type PodMetrics struct { Client *metricsclientset.Clientset @@ -39,31 +41,31 @@ type PodMetrics struct { } func (p *PodMetrics) Update() error { - cpu, mem, err := metrics.GetPodUsageMetrics(p.Ctx, p.Client, p.PodName, p.Namespace) + cpu, mem, err := metrics.GetPodUsageMetrics(p.Ctx, p.Client, p.PodName, p.Namespace, PodMetricsTimeout) if err != nil { return errors.WithStack(err) - } else { - keyMaxCPU := p.PodName + ":MaxCPU" - curCPU := cpu.MilliValue() - if curCPU > p.Metrics[keyMaxCPU] { - p.Metrics[keyMaxCPU] = curCPU - } + } + keyMaxCPU := p.PodName + ":MaxCPU" + curCPU := cpu.MilliValue() + if curCPU > p.Metrics[keyMaxCPU] { + p.Metrics[keyMaxCPU] = curCPU + } - keyMaxMem := p.PodName + ":MaxMemory" - curMem := mem.MilliValue() - if curMem > p.Metrics[keyMaxMem] { - p.Metrics[keyMaxMem] = curMem - } + keyMaxMem := p.PodName + ":MaxMemory" + curMem := mem.MilliValue() + if curMem > p.Metrics[keyMaxMem] { + p.Metrics[keyMaxMem] = curMem + } - keyAvgCPU := p.PodName + ":AverageCPU" - preAvgCPU := p.Metrics[keyAvgCPU] - p.Metrics[keyAvgCPU] = (preAvgCPU*p.count + curCPU) / (p.count + 1) + keyAvgCPU := p.PodName + ":AverageCPU" + preAvgCPU := p.Metrics[keyAvgCPU] + p.Metrics[keyAvgCPU] = (preAvgCPU*p.count + curCPU) / (p.count + 1) + + keyAvgMem := p.PodName + ":AverageMemory" + preAvgMem := p.Metrics[keyAvgMem] + p.Metrics[keyAvgMem] = (preAvgMem*p.count + curMem) / (p.count + 1) + p.count++ - keyAvgMem := p.PodName + ":AverageMemory" - preAvgMem := p.Metrics[keyAvgMem] - p.Metrics[keyAvgMem] = (preAvgMem*p.count + curMem) / (p.count + 1) - p.count++ - } return nil } diff --git a/test/perf/metrics/time.go b/test/perf/metrics/time.go index 3334cbb297..aa760389d2 100644 --- a/test/perf/metrics/time.go +++ b/test/perf/metrics/time.go @@ -16,40 +16,53 @@ limitations under the License. package metrics -import "time" +import ( + "fmt" + "time" +) const TimeCaseDesc = "Time cost" +type TimeSpan struct { + Start time.Time + End time.Time +} + type TimeMetrics struct { Name string - TimeInfo map[string]time.Time // metric name : start timestamp - Metrics map[string]float64 // metric name : time duration + TimeInfo map[string]TimeSpan // metric name : start timestamp } func (t *TimeMetrics) GetMetrics() map[string]string { tmpMetrics := make(map[string]string) - for k, v := range t.Metrics { - duration := time.Duration(v) * time.Second - tmpMetrics[k] = duration.String() + for k, v := range t.TimeInfo { + duration := v.End.Sub(v.Start) + if duration < time.Second { + // For those too shoter time difference we should ignored + // as it may not really execute the logic + continue + } + tmpMetrics[k] = duration.String() + fmt.Sprintf(" (%s - %s)", v.Start.Format(time.RFC3339), v.End.Format(time.RFC3339)) } return tmpMetrics } func (t *TimeMetrics) Start(name string) { - t.TimeInfo[name] = time.Now() + t.TimeInfo[name] = TimeSpan{ + Start: time.Now(), + } } func (t *TimeMetrics) End(name string) { - t.Metrics[name] = time.Now().Sub(t.TimeInfo[name]).Seconds() - if t.Metrics[name] < 1 { - // For those too shoter time difference we should ignored - // as it may not really execute the logic - delete(t.Metrics, name) + if _, ok := t.TimeInfo[name]; !ok { + return } + timeSpan := t.TimeInfo[name] + timeSpan.End = time.Now() + t.TimeInfo[name] = timeSpan } func (t *TimeMetrics) Update() error { - t.Metrics[t.Name] = time.Now().Sub(t.TimeInfo[t.Name]).Seconds() return nil } diff --git a/test/perf/restore/restore.go b/test/perf/restore/restore.go index 025ef49865..f07d5df4d6 100644 --- a/test/perf/restore/restore.go +++ b/test/perf/restore/restore.go @@ -25,6 +25,7 @@ import ( . "github.com/vmware-tanzu/velero/test" . "github.com/vmware-tanzu/velero/test/perf/test" + "github.com/vmware-tanzu/velero/test/util/k8s" . "github.com/vmware-tanzu/velero/test/util/velero" ) @@ -34,7 +35,7 @@ type RestoreTest struct { func (r *RestoreTest) Init() error { r.TestCase.Init() - r.Ctx, r.CtxCancel = context.WithTimeout(context.Background(), 1*time.Hour) + r.Ctx, r.CtxCancel = context.WithTimeout(context.Background(), 6*time.Hour) r.CaseBaseName = "restore" r.RestoreName = "restore-" + r.CaseBaseName + "-" + r.UUIDgen @@ -43,7 +44,7 @@ func (r *RestoreTest) Init() error { FailedMSG: "Failed to restore resources", Text: fmt.Sprintf("Should restore resources success"), } - return r.clearUpResourcesBeforRestore() + return nil } func (r *RestoreTest) clearUpResourcesBeforRestore() error { @@ -52,6 +53,11 @@ func (r *RestoreTest) clearUpResourcesBeforRestore() error { } func (r *RestoreTest) Restore() error { + // we need to clear up all resources before do the restore test + err := r.clearUpResourcesBeforRestore() + if err != nil { + return errors.Wrapf(err, "failed to clear up resources before do the restore test") + } var backupName string if VeleroCfg.BackupForRestore != "" { backupName = VeleroCfg.BackupForRestore @@ -71,6 +77,16 @@ func (r *RestoreTest) Restore() error { "--from-backup", r.BackupName, "--wait", } + if !VeleroCfg.DeleteClusterResource { + joinedNsMapping, err := k8s.GetMappingNamespaces(r.Ctx, r.Client, *r.NSExcluded) + if err != nil { + return errors.Wrapf(err, "failed to get mapping namespaces in init") + } + + r.RestoreArgs = append(r.RestoreArgs, "--namespace-mappings") + r.RestoreArgs = append(r.RestoreArgs, joinedNsMapping) + } + return r.TestCase.Restore() } func (r *RestoreTest) Destroy() error { diff --git a/test/perf/test/test.go b/test/perf/test/test.go index 9aed01bb27..c7f80e3fea 100644 --- a/test/perf/test/test.go +++ b/test/perf/test/test.go @@ -97,14 +97,15 @@ func TestFunc(test VeleroBackupRestoreTest) func() { } func (t *TestCase) Init() error { - t.Ctx, t.CtxCancel = context.WithTimeout(context.Background(), 1*time.Hour) + t.Ctx, t.CtxCancel = context.WithTimeout(context.Background(), 6*time.Hour) t.NSExcluded = &[]string{"kube-system", "velero", "default", "kube-public", "kube-node-lease"} t.UUIDgen = t.GenerateUUID() t.Client = *VeleroCfg.DefaultClient t.timer = &metrics.TimeMetrics{ - Name: "Total time cost", - TimeInfo: map[string]time.Time{"Total time cost": time.Now()}, - Metrics: make(map[string]float64), + Name: "Total time cost", + TimeInfo: map[string]metrics.TimeSpan{"Total time cost": { + Start: time.Now(), + }}, } return nil } @@ -131,10 +132,12 @@ func (t *TestCase) Backup() error { } func (t *TestCase) Destroy() error { - By(fmt.Sprintf("Start to destroy namespace %s......", t.CaseBaseName), func() { - Expect(CleanupNamespacesFiterdByExcludes(t.GetTestCase().Ctx, t.Client, *t.NSExcluded)).To(Succeed(), "Could cleanup retrieve namespaces") - Expect(ClearClaimRefForFailedPVs(t.Ctx, t.Client)).To(Succeed(), "Failed to make PV status become to available") - }) + if VeleroCfg.DeleteClusterResource { + By(fmt.Sprintf("Start to destroy namespace %s......", t.CaseBaseName), func() { + Expect(CleanupNamespacesFiterdByExcludes(t.GetTestCase().Ctx, t.Client, *t.NSExcluded)).To(Succeed(), "Could cleanup retrieve namespaces") + Expect(ClearClaimRefForFailedPVs(t.Ctx, t.Client)).To(Succeed(), "Failed to make PV status become to available") + }) + } return nil } @@ -160,7 +163,7 @@ func (t *TestCase) Verify() error { } func (t *TestCase) Clean() error { - if !VeleroCfg.Debug { + if !VeleroCfg.Debug || VeleroCfg.DeleteClusterResource { By("Clean backups and restore after test", func() { if len(t.BackupArgs) != 0 { if err := VeleroBackupDelete(t.Ctx, VeleroCfg.VeleroCLI, VeleroCfg.VeleroNamespace, t.BackupName); err != nil { @@ -269,8 +272,7 @@ func (t *TestCase) MonitorMetircs(ctx context.Context, collectors *metrics.Metri timeMetrics := &metrics.TimeMetrics{ Name: t.CaseBaseName, - TimeInfo: make(map[string]time.Time), - Metrics: make(map[string]float64), + TimeInfo: make(map[string]metrics.TimeSpan), } collectors.RegisterOneTimeMetric(timeMetrics) diff --git a/test/types.go b/test/types.go index 327139f35a..360c904735 100644 --- a/test/types.go +++ b/test/types.go @@ -21,6 +21,7 @@ import ( "github.com/google/uuid" + "github.com/vmware-tanzu/velero/pkg/cmd/cli/install" . "github.com/vmware-tanzu/velero/test/util/k8s" ) @@ -40,6 +41,7 @@ var ReportData *Report type VeleroConfig struct { VeleroCfgInPerf + *install.Options VeleroCLI string VeleroImage string VeleroVersion string @@ -66,7 +68,6 @@ type VeleroConfig struct { AddBSLPlugins string InstallVelero bool KibishiiDirectory string - Features string Debug bool GCFrequency string DefaultCluster string @@ -74,12 +75,7 @@ type VeleroConfig struct { ClientToInstallVelero *TestClient DefaultClient *TestClient StandbyClient *TestClient - UploaderType string - UseNodeAgent bool - UseRestic bool ProvideSnapshotsVolumeParam bool - DefaultVolumesToFsBackup bool - UseVolumeSnapshots bool VeleroServerDebugMode bool SnapshotMoveData bool DataMoverPlugin string @@ -90,9 +86,10 @@ type VeleroConfig struct { } type VeleroCfgInPerf struct { - NFSServerPath string - TestCaseDescribe string - BackupForRestore string + NFSServerPath string + TestCaseDescribe string + BackupForRestore string + DeleteClusterResource bool } type SnapshotCheckPoint struct { diff --git a/test/util/k8s/namespace.go b/test/util/k8s/namespace.go index e056dc9905..3c76867560 100644 --- a/test/util/k8s/namespace.go +++ b/test/util/k8s/namespace.go @@ -194,3 +194,42 @@ func NamespaceShouldNotExist(ctx context.Context, client TestClient, namespace s } return nil } + +func GetBackupNamespaces(ctx context.Context, client TestClient, excludeNS []string) ([]string, error) { + namespaces, err := client.ClientGo.CoreV1().Namespaces().List(ctx, metav1.ListOptions{}) + if err != nil { + return nil, errors.Wrap(err, "Could not retrieve namespaces") + } + var backupNamespaces []string + for _, checkNamespace := range namespaces.Items { + isExclude := false + for k := range excludeNS { + if checkNamespace.Name == excludeNS[k] { + isExclude = true + } + } + if !isExclude { + backupNamespaces = append(backupNamespaces, checkNamespace.Name) + } + } + return backupNamespaces, nil +} + +func GetMappingNamespaces(ctx context.Context, client TestClient, excludeNS []string) (string, error) { + ns, err := GetBackupNamespaces(ctx, client, excludeNS) + if err != nil { + return "", errors.Wrap(err, "Could not retrieve namespaces") + } else if len(ns) == 0 { + return "", errors.Wrap(err, "Get empty namespaces in backup") + } + + nsMapping := []string{} + for _, n := range ns { + nsMapping = append(nsMapping, n+":mapping-"+n) + } + joinedNsMapping := strings.Join(nsMapping, ",") + if len(joinedNsMapping) > 0 { + joinedNsMapping = joinedNsMapping[:len(joinedNsMapping)-1] + } + return joinedNsMapping, nil +} diff --git a/test/util/metrics/pod.go b/test/util/metrics/pod.go index 331211bb0e..d31f6a481a 100644 --- a/test/util/metrics/pod.go +++ b/test/util/metrics/pod.go @@ -18,21 +18,35 @@ package metrics import ( "context" + "time" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/wait" "k8s.io/metrics/pkg/apis/metrics/v1beta1" metricsclientset "k8s.io/metrics/pkg/client/clientset/versioned" ) -func GetPodUsageMetrics(ctx context.Context, metricsClient *metricsclientset.Clientset, podName, namespace string) (cpuUsage, memoryUsage resource.Quantity, err error) { +func GetPodUsageMetrics(ctx context.Context, metricsClient *metricsclientset.Clientset, podName, namespace string, podMetricsTimeout time.Duration) (cpuUsage, memoryUsage resource.Quantity, err error) { + ctx, cancel := context.WithTimeout(context.Background(), podMetricsTimeout) + defer cancel() + var podMetrics *v1beta1.PodMetrics - podMetrics, err = metricsClient.MetricsV1beta1().PodMetricses(namespace).Get(ctx, podName, metav1.GetOptions{}) + err = wait.PollImmediateUntil(time.Second, func() (bool, error) { + var err error + podMetrics, err = metricsClient.MetricsV1beta1().PodMetricses(namespace).Get(ctx, podName, metav1.GetOptions{}) + if err != nil { + return false, nil + } + return true, nil + }, ctx.Done()) + if err != nil { return + } else if podMetrics == nil { + return cpuUsage, memoryUsage, nil } - // Variables to store the max and sum of CPU and memory usage // For velero pod we only return the main container for _, container := range podMetrics.Containers { diff --git a/test/util/velero/install.go b/test/util/velero/install.go index b0bbcf7ff1..9427e19eed 100644 --- a/test/util/velero/install.go +++ b/test/util/velero/install.go @@ -120,6 +120,15 @@ func VeleroInstall(ctx context.Context, veleroCfg *VeleroConfig, isStandbyCluste veleroInstallOptions.UploaderType = veleroCfg.UploaderType GCFrequency, _ := time.ParseDuration(veleroCfg.GCFrequency) veleroInstallOptions.GarbageCollectionFrequency = GCFrequency + veleroInstallOptions.PodVolumeOperationTimeout = veleroCfg.PodVolumeOperationTimeout + veleroInstallOptions.NodeAgentPodCPULimit = veleroCfg.NodeAgentPodCPULimit + veleroInstallOptions.NodeAgentPodCPURequest = veleroCfg.NodeAgentPodCPURequest + veleroInstallOptions.NodeAgentPodMemLimit = veleroCfg.NodeAgentPodMemLimit + veleroInstallOptions.NodeAgentPodMemRequest = veleroCfg.NodeAgentPodMemRequest + veleroInstallOptions.VeleroPodCPULimit = veleroCfg.VeleroPodCPULimit + veleroInstallOptions.VeleroPodCPURequest = veleroCfg.VeleroPodCPURequest + veleroInstallOptions.VeleroPodMemLimit = veleroCfg.VeleroPodMemLimit + veleroInstallOptions.VeleroPodMemRequest = veleroCfg.VeleroPodMemRequest err = installVeleroServer(ctx, veleroCfg.VeleroCLI, veleroCfg.CloudProvider, &installOptions{ Options: veleroInstallOptions, @@ -251,6 +260,42 @@ func installVeleroServer(ctx context.Context, cli, cloudProvider string, options args = append(args, fmt.Sprintf("--garbage-collection-frequency=%v", options.GarbageCollectionFrequency)) } + if options.PodVolumeOperationTimeout > 0 { + args = append(args, fmt.Sprintf("--pod-volume-operation-timeout=%v", options.PodVolumeOperationTimeout)) + } + + if options.NodeAgentPodCPULimit != "" { + args = append(args, fmt.Sprintf("--node-agent-pod-cpu-limit=%v", options.NodeAgentPodCPULimit)) + } + + if options.NodeAgentPodCPURequest != "" { + args = append(args, fmt.Sprintf("--node-agent-pod-mem-request=%v", options.NodeAgentPodCPURequest)) + } + + if options.NodeAgentPodMemLimit != "" { + args = append(args, fmt.Sprintf("--node-agent-pod-mem-limit=%v", options.NodeAgentPodMemLimit)) + } + + if options.NodeAgentPodMemRequest != "" { + args = append(args, fmt.Sprintf("--node-agent-pod-mem-request=%v", options.NodeAgentPodMemRequest)) + } + + if options.VeleroPodCPULimit != "" { + args = append(args, fmt.Sprintf("--velero-pod-cpu-limit=%v", options.VeleroPodCPULimit)) + } + + if options.VeleroPodCPURequest != "" { + args = append(args, fmt.Sprintf("--velero-pod-cpu-request=%v", options.VeleroPodCPURequest)) + } + + if options.VeleroPodMemLimit != "" { + args = append(args, fmt.Sprintf("--velero-pod-mem-limit=%v", options.VeleroPodMemLimit)) + } + + if options.VeleroPodMemRequest != "" { + args = append(args, fmt.Sprintf("--velero-pod-mem-request=%v", options.VeleroPodMemRequest)) + } + if len(options.UploaderType) > 0 { args = append(args, fmt.Sprintf("--uploader-type=%v", options.UploaderType)) }