Skip to content

Commit

Permalink
Pass Velero server command args to the plugins
Browse files Browse the repository at this point in the history
Pass Velero server command args to the plugins

Fixes #7806

Signed-off-by: Wenkai Yin(尹文开) <[email protected]>
  • Loading branch information
ywk253100 committed Sep 4, 2024
1 parent 981f30c commit dc6eeaf
Show file tree
Hide file tree
Showing 22 changed files with 503 additions and 486 deletions.
1 change: 1 addition & 0 deletions changelogs/unreleased/8166-ywk253100
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Pass Velero server command args to the plugins
3 changes: 2 additions & 1 deletion pkg/cmd/cli/nodeagent/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ import (
"github.com/vmware-tanzu/velero/pkg/client"
"github.com/vmware-tanzu/velero/pkg/cmd"
"github.com/vmware-tanzu/velero/pkg/cmd/util/signals"
"github.com/vmware-tanzu/velero/pkg/constant"
"github.com/vmware-tanzu/velero/pkg/controller"
"github.com/vmware-tanzu/velero/pkg/datapath"
"github.com/vmware-tanzu/velero/pkg/metrics"
Expand Down Expand Up @@ -285,7 +286,7 @@ func (s *nodeAgentServer) run() {
credentialGetter, s.nodeName, s.mgr.GetScheme(), s.metrics, s.logger)

if err := pvbReconciler.SetupWithManager(s.mgr); err != nil {
s.logger.Fatal(err, "unable to create controller", "controller", controller.PodVolumeBackup)
s.logger.Fatal(err, "unable to create controller", "controller", constant.ControllerPodVolumeBackup)

Check warning on line 289 in pkg/cmd/cli/nodeagent/server.go

View check run for this annotation

Codecov / codecov/patch

pkg/cmd/cli/nodeagent/server.go#L289

Added line #L289 was not covered by tests
}

if err = controller.NewPodVolumeRestoreReconciler(s.mgr.GetClient(), s.dataPathMgr, repoEnsurer, credentialGetter, s.logger).SetupWithManager(s.mgr); err != nil {
Expand Down
248 changes: 248 additions & 0 deletions pkg/cmd/server/config/config.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,248 @@
package config

import (
"fmt"
"strings"
"time"

"github.com/sirupsen/logrus"
"github.com/spf13/pflag"

"github.com/vmware-tanzu/velero/pkg/cmd/util/flag"
"github.com/vmware-tanzu/velero/pkg/constant"
"github.com/vmware-tanzu/velero/pkg/podvolume"
"github.com/vmware-tanzu/velero/pkg/repository"
"github.com/vmware-tanzu/velero/pkg/types"
"github.com/vmware-tanzu/velero/pkg/uploader"
"github.com/vmware-tanzu/velero/pkg/util/logging"
)

const (
// the port where prometheus metrics are exposed
defaultMetricsAddress = ":8085"

defaultBackupSyncPeriod = time.Minute
defaultStoreValidationFrequency = time.Minute
defaultPodVolumeOperationTimeout = 240 * time.Minute
defaultResourceTerminatingTimeout = 10 * time.Minute

// server's client default qps and burst
defaultClientQPS float32 = 100.0
defaultClientBurst int = 100
defaultClientPageSize int = 500

defaultProfilerAddress = "localhost:6060"

// the default TTL for a backup
defaultBackupTTL = 30 * 24 * time.Hour

defaultCSISnapshotTimeout = 10 * time.Minute
defaultItemOperationTimeout = 4 * time.Hour

resourceTimeout = 10 * time.Minute

defaultMaxConcurrentK8SConnections = 30
defaultDisableInformerCache = false

// defaultCredentialsDirectory is the path on disk where credential
// files will be written to
defaultCredentialsDirectory = "/tmp/credentials"
)

var (
// DisableableControllers is a list of controllers that can be disabled
DisableableControllers = []string{
constant.ControllerBackup,
constant.ControllerBackupOperations,
constant.ControllerBackupDeletion,
constant.ControllerBackupFinalizer,
constant.ControllerBackupSync,
constant.ControllerDownloadRequest,
constant.ControllerGarbageCollection,
constant.ControllerBackupRepo,
constant.ControllerRestore,
constant.ControllerRestoreOperations,
constant.ControllerSchedule,
constant.ControllerServerStatusRequest,
constant.ControllerRestoreFinalizer,
}

/*
High priorities:
- Custom Resource Definitions come before Custom Resource so that they can be
restored with their corresponding CRD.
- Namespaces go second because all namespaced resources depend on them.
- Storage Classes are needed to create PVs and PVCs correctly.
- VolumeSnapshotClasses are needed to provision volumes using volumesnapshots
- VolumeSnapshotContents are needed as they contain the handle to the volume snapshot in the
storage provider
- VolumeSnapshots are needed to create PVCs using the VolumeSnapshot as their data source.
- DataUploads need to restore before PVC for Snapshot DataMover to work, because PVC needs the DataUploadResults to create DataDownloads.
- PVs go before PVCs because PVCs depend on them.
- PVCs go before pods or controllers so they can be mounted as volumes.
- Service accounts go before secrets so service account token secrets can be filled automatically.
- Secrets and ConfigMaps go before pods or controllers so they can be mounted
as volumes.
- Limit ranges go before pods or controllers so pods can use them.
- Pods go before controllers so they can be explicitly restored and potentially
have pod volume restores run before controllers adopt the pods.
- Replica sets go before deployments/other controllers so they can be explicitly
restored and be adopted by controllers.
- CAPI ClusterClasses go before Clusters.
- Endpoints go before Services so no new Endpoints will be created
- Services go before Clusters so they can be adopted by AKO-operator and no new Services will be created
for the same clusters
Low priorities:
- Tanzu ClusterBootstraps go last as it can reference any other kind of resources.
- ClusterBootstraps go before CAPI Clusters otherwise a new default ClusterBootstrap object is created for the cluster
- CAPI Clusters come before ClusterResourceSets because failing to do so means the CAPI controller-manager will panic.
Both Clusters and ClusterResourceSets need to come before ClusterResourceSetBinding in order to properly restore workload clusters.
See https://github.com/kubernetes-sigs/cluster-api/issues/4105
*/
defaultRestorePriorities = types.Priorities{
HighPriorities: []string{
"customresourcedefinitions",
"namespaces",
"storageclasses",
"volumesnapshotclass.snapshot.storage.k8s.io",
"volumesnapshotcontents.snapshot.storage.k8s.io",
"volumesnapshots.snapshot.storage.k8s.io",
"datauploads.velero.io",
"persistentvolumes",
"persistentvolumeclaims",
"serviceaccounts",
"secrets",
"configmaps",
"limitranges",
"pods",
// we fully qualify replicasets.apps because prior to Kubernetes 1.16, replicasets also
// existed in the extensions API group, but we back up replicasets from "apps" so we want
// to ensure that we prioritize restoring from "apps" too, since this is how they're stored
// in the backup.
"replicasets.apps",
"clusterclasses.cluster.x-k8s.io",
"endpoints",
"services",
},
LowPriorities: []string{
"clusterbootstraps.run.tanzu.vmware.com",
"clusters.cluster.x-k8s.io",
"clusterresourcesets.addons.cluster.x-k8s.io",
},
}
)

type Config struct {
PluginDir string
MetricsAddress string
DefaultBackupLocation string // TODO(2.0) Deprecate defaultBackupLocation
BackupSyncPeriod time.Duration
PodVolumeOperationTimeout time.Duration
ResourceTerminatingTimeout time.Duration
DefaultBackupTTL time.Duration
StoreValidationFrequency time.Duration
DefaultCSISnapshotTimeout time.Duration
DefaultItemOperationTimeout time.Duration
ResourceTimeout time.Duration
RestoreResourcePriorities types.Priorities
DefaultVolumeSnapshotLocations flag.Map
RestoreOnly bool
DisabledControllers []string
ClientQPS float32
ClientBurst int
ClientPageSize int
ProfilerAddress string
LogLevel *logging.LevelFlag
LogFormat *logging.FormatFlag
RepoMaintenanceFrequency time.Duration
GarbageCollectionFrequency time.Duration
ItemOperationSyncFrequency time.Duration
DefaultVolumesToFsBackup bool
UploaderType string
MaxConcurrentK8SConnections int
DefaultSnapshotMoveData bool
DisableInformerCache bool
ScheduleSkipImmediately bool
MaintenanceCfg repository.MaintenanceConfig
BackukpRepoConfig string
CredentialsDirectory string
}

func GetDefaultConfig() *Config {
config := &Config{
PluginDir: "/plugins",
MetricsAddress: defaultMetricsAddress,
DefaultBackupLocation: "default",
DefaultVolumeSnapshotLocations: flag.NewMap().WithKeyValueDelimiter(':'),
BackupSyncPeriod: defaultBackupSyncPeriod,
DefaultBackupTTL: defaultBackupTTL,
DefaultCSISnapshotTimeout: defaultCSISnapshotTimeout,
DefaultItemOperationTimeout: defaultItemOperationTimeout,
ResourceTimeout: resourceTimeout,
StoreValidationFrequency: defaultStoreValidationFrequency,
PodVolumeOperationTimeout: defaultPodVolumeOperationTimeout,
RestoreResourcePriorities: defaultRestorePriorities,
ClientQPS: defaultClientQPS,
ClientBurst: defaultClientBurst,
ClientPageSize: defaultClientPageSize,
ProfilerAddress: defaultProfilerAddress,
ResourceTerminatingTimeout: defaultResourceTerminatingTimeout,
LogLevel: logging.LogLevelFlag(logrus.InfoLevel),
LogFormat: logging.NewFormatFlag(),
DefaultVolumesToFsBackup: podvolume.DefaultVolumesToFsBackup,
UploaderType: uploader.ResticType,
MaxConcurrentK8SConnections: defaultMaxConcurrentK8SConnections,
DefaultSnapshotMoveData: false,
DisableInformerCache: defaultDisableInformerCache,
ScheduleSkipImmediately: false,
CredentialsDirectory: defaultCredentialsDirectory,
}

config.MaintenanceCfg = repository.MaintenanceConfig{
KeepLatestMaitenanceJobs: repository.DefaultKeepLatestMaitenanceJobs,
// maintenance job log setting inherited from velero server
FormatFlag: config.LogFormat,
LogLevelFlag: config.LogLevel,
}

return config
}

func (c *Config) BindFlags(flags *pflag.FlagSet) {
flags.Var(c.LogLevel, "log-level", fmt.Sprintf("The level at which to log. Valid values are %s.", strings.Join(c.LogLevel.AllowedValues(), ", ")))
flags.Var(c.LogFormat, "log-format", fmt.Sprintf("The format for log output. Valid values are %s.", strings.Join(c.LogFormat.AllowedValues(), ", ")))
flags.StringVar(&c.PluginDir, "plugin-dir", c.PluginDir, "Directory containing Velero plugins")
flags.StringVar(&c.MetricsAddress, "metrics-address", c.MetricsAddress, "The address to expose prometheus metrics")
flags.DurationVar(&c.BackupSyncPeriod, "backup-sync-period", c.BackupSyncPeriod, "How often to ensure all Velero backups in object storage exist as Backup API objects in the cluster. This is the default sync period if none is explicitly specified for a backup storage location.")
flags.DurationVar(&c.PodVolumeOperationTimeout, "fs-backup-timeout", c.PodVolumeOperationTimeout, "How long pod volume file system backups/restores should be allowed to run before timing out.")
flags.BoolVar(&c.RestoreOnly, "restore-only", c.RestoreOnly, "Run in a mode where only restores are allowed; backups, schedules, and garbage-collection are all disabled. DEPRECATED: this flag will be removed in v2.0. Use read-only backup storage locations instead.")
flags.StringSliceVar(&c.DisabledControllers, "disable-controllers", c.DisabledControllers, fmt.Sprintf("List of controllers to disable on startup. Valid values are %s", strings.Join(DisableableControllers, ",")))
flags.Var(&c.RestoreResourcePriorities, "restore-resource-priorities", "Desired order of resource restores, the priority list contains two parts which are split by \"-\" element. The resources before \"-\" element are restored first as high priorities, the resources after \"-\" element are restored last as low priorities, and any resource not in the list will be restored alphabetically between the high and low priorities.")
flags.StringVar(&c.DefaultBackupLocation, "default-backup-storage-location", c.DefaultBackupLocation, "Name of the default backup storage location. DEPRECATED: this flag will be removed in v2.0. Use \"velero backup-location set --default\" instead.")
flags.DurationVar(&c.StoreValidationFrequency, "store-validation-frequency", c.StoreValidationFrequency, "How often to verify if the storage is valid. Optional. Set this to `0s` to disable sync. Default 1 minute.")
flags.Float32Var(&c.ClientQPS, "client-qps", c.ClientQPS, "Maximum number of requests per second by the server to the Kubernetes API once the burst limit has been reached.")
flags.IntVar(&c.ClientBurst, "client-burst", c.ClientBurst, "Maximum number of requests by the server to the Kubernetes API in a short period of time.")
flags.IntVar(&c.ClientPageSize, "client-page-size", c.ClientPageSize, "Page size of requests by the server to the Kubernetes API when listing objects during a backup. Set to 0 to disable paging.")
flags.StringVar(&c.ProfilerAddress, "profiler-address", c.ProfilerAddress, "The address to expose the pprof profiler.")
flags.DurationVar(&c.ResourceTerminatingTimeout, "terminating-resource-timeout", c.ResourceTerminatingTimeout, "How long to wait on persistent volumes and namespaces to terminate during a restore before timing out.")
flags.DurationVar(&c.DefaultBackupTTL, "default-backup-ttl", c.DefaultBackupTTL, "How long to wait by default before backups can be garbage collected.")
flags.DurationVar(&c.RepoMaintenanceFrequency, "default-repo-maintain-frequency", c.RepoMaintenanceFrequency, "How often 'maintain' is run for backup repositories by default.")
flags.DurationVar(&c.GarbageCollectionFrequency, "garbage-collection-frequency", c.GarbageCollectionFrequency, "How often garbage collection is run for expired backups.")
flags.DurationVar(&c.ItemOperationSyncFrequency, "item-operation-sync-frequency", c.ItemOperationSyncFrequency, "How often to check status on backup/restore operations after backup/restore processing. Default is 10 seconds")
flags.BoolVar(&c.DefaultVolumesToFsBackup, "default-volumes-to-fs-backup", c.DefaultVolumesToFsBackup, "Backup all volumes with pod volume file system backup by default.")
flags.StringVar(&c.UploaderType, "uploader-type", c.UploaderType, "Type of uploader to handle the transfer of data of pod volumes")
flags.DurationVar(&c.DefaultItemOperationTimeout, "default-item-operation-timeout", c.DefaultItemOperationTimeout, "How long to wait on asynchronous BackupItemActions and RestoreItemActions to complete before timing out. Default is 4 hours")
flags.DurationVar(&c.ResourceTimeout, "resource-timeout", c.ResourceTimeout, "How long to wait for resource processes which are not covered by other specific timeout parameters. Default is 10 minutes.")
flags.IntVar(&c.MaxConcurrentK8SConnections, "max-concurrent-k8s-connections", c.MaxConcurrentK8SConnections, "Max concurrent connections number that Velero can create with kube-apiserver. Default is 30.")
flags.BoolVar(&c.DefaultSnapshotMoveData, "default-snapshot-move-data", c.DefaultSnapshotMoveData, "Move data by default for all snapshots supporting data movement.")
flags.BoolVar(&c.DisableInformerCache, "disable-informer-cache", c.DisableInformerCache, "Disable informer cache for Get calls on restore. With this enabled, it will speed up restore in cases where there are backup resources which already exist in the cluster, but for very large clusters this will increase velero memory usage. Default is false (don't disable).")
flags.BoolVar(&c.ScheduleSkipImmediately, "schedule-skip-immediately", c.ScheduleSkipImmediately, "Skip the first scheduled backup immediately after creating a schedule. Default is false (don't skip).")
flags.IntVar(&c.MaintenanceCfg.KeepLatestMaitenanceJobs, "keep-latest-maintenance-jobs", c.MaintenanceCfg.KeepLatestMaitenanceJobs, "Number of latest maintenance jobs to keep each repository. Optional.")
flags.StringVar(&c.MaintenanceCfg.CPURequest, "maintenance-job-cpu-request", c.MaintenanceCfg.CPURequest, "CPU request for maintenance job. Default is no limit.")
flags.StringVar(&c.MaintenanceCfg.MemRequest, "maintenance-job-mem-request", c.MaintenanceCfg.MemRequest, "Memory request for maintenance job. Default is no limit.")
flags.StringVar(&c.MaintenanceCfg.CPULimit, "maintenance-job-cpu-limit", c.MaintenanceCfg.CPULimit, "CPU limit for maintenance job. Default is no limit.")
flags.StringVar(&c.MaintenanceCfg.MemLimit, "maintenance-job-mem-limit", c.MaintenanceCfg.MemLimit, "Memory limit for maintenance job. Default is no limit.")
flags.StringVar(&c.BackukpRepoConfig, "backup-repository-config", c.BackukpRepoConfig, "The name of configMap containing backup repository configurations.")
flags.Var(&c.DefaultVolumeSnapshotLocations, "default-volume-snapshot-locations", "List of unique volume providers and default volume snapshot location (provider1:location-01,provider2:location-02,...)")
}
19 changes: 19 additions & 0 deletions pkg/cmd/server/config/config_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
package config

import (
"testing"

"github.com/spf13/pflag"
"github.com/stretchr/testify/assert"
)

func TestGetDefaultConfig(t *testing.T) {
config := GetDefaultConfig()
assert.Equal(t, "info", config.MaintenanceCfg.LogLevelFlag.String())
}

func TestBindFlags(t *testing.T) {
config := GetDefaultConfig()
config.BindFlags(pflag.CommandLine)
assert.Equal(t, "info", config.MaintenanceCfg.LogLevelFlag.String())
}
6 changes: 6 additions & 0 deletions pkg/cmd/server/plugin/plugin.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,9 @@ func NewCommand(f client.Factory) *cobra.Command {
Hidden: true,
Short: "INTERNAL COMMAND ONLY - not intended to be run directly by users",
Run: func(c *cobra.Command, args []string) {
config := pluginServer.GetConfig()
f.SetClientQPS(config.ClientQPS)
f.SetClientBurst(config.ClientBurst)

Check warning on line 50 in pkg/cmd/server/plugin/plugin.go

View check run for this annotation

Codecov / codecov/patch

pkg/cmd/server/plugin/plugin.go#L48-L50

Added lines #L48 - L50 were not covered by tests
pluginServer = pluginServer.
RegisterBackupItemAction(
"velero.io/pv",
Expand Down Expand Up @@ -197,6 +200,9 @@ func NewCommand(f client.Factory) *cobra.Command {
}
pluginServer.Serve()
},
FParseErrWhitelist: cobra.FParseErrWhitelist{ // Velero.io word list : ignore
UnknownFlags: true,
},
}
pluginServer.BindFlags(c.Flags())
return c
Expand Down
Loading

0 comments on commit dc6eeaf

Please sign in to comment.