Skip to content

Commit

Permalink
Perf improvements for existing resource restore
Browse files Browse the repository at this point in the history
Use informer cache with dynamic client for Get calls on restore
When enabled, also make the Get call before create.

Add server and install parameter to allow disabling this feature,
but enable by default

Signed-off-by: Scott Seago <[email protected]>
  • Loading branch information
sseago committed Aug 30, 2023
1 parent db6784a commit 76b72c1
Show file tree
Hide file tree
Showing 12 changed files with 231 additions and 27 deletions.
8 changes: 8 additions & 0 deletions pkg/client/dynamic.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,15 @@ package client

import (
"context"
"time"

metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
"k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/watch"
"k8s.io/client-go/dynamic"
"k8s.io/client-go/dynamic/dynamicinformer"
)

// DynamicFactory contains methods for retrieving dynamic clients for GroupVersionResources and
Expand All @@ -33,6 +35,8 @@ type DynamicFactory interface {
// ClientForGroupVersionResource returns a Dynamic client for the given group/version
// and resource for the given namespace.
ClientForGroupVersionResource(gv schema.GroupVersion, resource metav1.APIResource, namespace string) (Dynamic, error)
// DynamicSharedInformerFactoryForNamespace returns a DynamicSharedInformerFactory for the given namespace.
DynamicSharedInformerFactoryForNamespace(namespace string) dynamicinformer.DynamicSharedInformerFactory
}

// dynamicFactory implements DynamicFactory.
Expand All @@ -51,6 +55,10 @@ func (f *dynamicFactory) ClientForGroupVersionResource(gv schema.GroupVersion, r
}, nil
}

func (f *dynamicFactory) DynamicSharedInformerFactoryForNamespace(namespace string) dynamicinformer.DynamicSharedInformerFactory {
return dynamicinformer.NewFilteredDynamicSharedInformerFactory(f.dynamicClient, time.Minute, namespace, nil)
}

// Creator creates an object.
type Creator interface {
// Create creates an object.
Expand Down
4 changes: 4 additions & 0 deletions pkg/cmd/cli/install/install.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ type Options struct {
Features string
DefaultVolumesToFsBackup bool
UploaderType string
UseInformerCacheForGet bool
}

// BindFlags adds command line values to the options struct.
Expand Down Expand Up @@ -118,6 +119,7 @@ func (o *Options) BindFlags(flags *pflag.FlagSet) {
flags.StringVar(&o.Features, "features", o.Features, "Comma separated list of Velero feature flags to be set on the Velero deployment and the node-agent daemonset, if node-agent is enabled")
flags.BoolVar(&o.DefaultVolumesToFsBackup, "default-volumes-to-fs-backup", o.DefaultVolumesToFsBackup, "Bool flag to configure Velero server to use pod volume file system backup by default for all volumes on all backups. Optional.")
flags.StringVar(&o.UploaderType, "uploader-type", o.UploaderType, fmt.Sprintf("The type of uploader to transfer the data of pod volumes, the supported values are '%s', '%s'", uploader.ResticType, uploader.KopiaType))
flags.BoolVar(&o.UseInformerCacheForGet, "use-informer-cache-for-get", o.UseInformerCacheForGet, "Use informer cache for Get calls on restore. This will speed up restore in cases where there are backup resources which already exist in the cluster, but for very large clusters this will increase velero memory usage. Default is true. Optional.")
}

// NewInstallOptions instantiates a new, default InstallOptions struct.
Expand All @@ -144,6 +146,7 @@ func NewInstallOptions() *Options {
CRDsOnly: false,
DefaultVolumesToFsBackup: false,
UploaderType: uploader.KopiaType,
UseInformerCacheForGet: true,
}
}

Expand Down Expand Up @@ -206,6 +209,7 @@ func (o *Options) AsVeleroOptions() (*install.VeleroOptions, error) {
Features: strings.Split(o.Features, ","),
DefaultVolumesToFsBackup: o.DefaultVolumesToFsBackup,
UploaderType: o.UploaderType,
UseInformerCacheForGet: o.UseInformerCacheForGet,
}, nil
}

Expand Down
4 changes: 4 additions & 0 deletions pkg/cmd/server/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,7 @@ type serverConfig struct {
defaultVolumesToFsBackup bool
uploaderType string
maxConcurrentK8SConnections int
useInformerCacheForGet bool
}

func NewCommand(f client.Factory) *cobra.Command {
Expand Down Expand Up @@ -163,6 +164,7 @@ func NewCommand(f client.Factory) *cobra.Command {
defaultVolumesToFsBackup: podvolume.DefaultVolumesToFsBackup,
uploaderType: uploader.ResticType,
maxConcurrentK8SConnections: defaultMaxConcurrentK8SConnections,
useInformerCacheForGet: true,
}
)

Expand Down Expand Up @@ -233,6 +235,7 @@ func NewCommand(f client.Factory) *cobra.Command {
command.Flags().DurationVar(&config.defaultItemOperationTimeout, "default-item-operation-timeout", config.defaultItemOperationTimeout, "How long to wait on asynchronous BackupItemActions and RestoreItemActions to complete before timing out. Default is 4 hours")
command.Flags().DurationVar(&config.resourceTimeout, "resource-timeout", config.resourceTimeout, "How long to wait for resource processes which are not covered by other specific timeout parameters. Default is 10 minutes.")
command.Flags().IntVar(&config.maxConcurrentK8SConnections, "max-concurrent-k8s-connections", config.maxConcurrentK8SConnections, "Max concurrent connections number that Velero can create with kube-apiserver. Default is 30.")
command.Flags().BoolVar(&config.useInformerCacheForGet, "use-informer-cache-for-get", config.useInformerCacheForGet, "Use informer cache for Get calls on restore. This will speed up restore in cases where there are backup resources which already exist in the cluster, but for very large clusters this will increase velero memory usage. Default is true")

return command
}
Expand Down Expand Up @@ -933,6 +936,7 @@ func (s *server) runControllers(defaultVolumeSnapshotLocations map[string]string
s.metrics,
s.config.formatFlag.Parse(),
s.config.defaultItemOperationTimeout,
s.config.useInformerCacheForGet,
)

if err = r.SetupWithManager(s.mgr); err != nil {
Expand Down
18 changes: 11 additions & 7 deletions pkg/controller/restore_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ type restoreReconciler struct {
logFormat logging.Format
clock clock.WithTickerAndDelayedExecution
defaultItemOperationTimeout time.Duration
useInformerCacheForGet bool

newPluginManager func(logger logrus.FieldLogger) clientmgmt.Manager
backupStoreGetter persistence.ObjectBackupStoreGetter
Expand All @@ -123,6 +124,7 @@ func NewRestoreReconciler(
metrics *metrics.ServerMetrics,
logFormat logging.Format,
defaultItemOperationTimeout time.Duration,
useInformerCacheForGet bool,
) *restoreReconciler {
r := &restoreReconciler{
ctx: ctx,
Expand All @@ -135,6 +137,7 @@ func NewRestoreReconciler(
logFormat: logFormat,
clock: &clock.RealClock{},
defaultItemOperationTimeout: defaultItemOperationTimeout,
useInformerCacheForGet: useInformerCacheForGet,

// use variables to refer to these functions so they can be
// replaced with fakes for testing.
Expand Down Expand Up @@ -519,13 +522,14 @@ func (r *restoreReconciler) runValidatedRestore(restore *api.Restore, info backu
}

restoreReq := &pkgrestore.Request{
Log: restoreLog,
Restore: restore,
Backup: info.backup,
PodVolumeBackups: podVolumeBackups,
VolumeSnapshots: volumeSnapshots,
BackupReader: backupFile,
ResourceModifiers: resourceModifiers,
Log: restoreLog,
Restore: restore,
Backup: info.backup,
PodVolumeBackups: podVolumeBackups,
VolumeSnapshots: volumeSnapshots,
BackupReader: backupFile,
ResourceModifiers: resourceModifiers,
UseInformerCacheForGet: r.useInformerCacheForGet,
}
restoreWarnings, restoreErrors := r.restorer.RestoreWithResolvers(restoreReq, actionsResolver, pluginManager)

Expand Down
5 changes: 5 additions & 0 deletions pkg/controller/restore_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@ func TestFetchBackupInfo(t *testing.T) {
metrics.NewServerMetrics(),
formatFlag,
60*time.Minute,
true,
)

if test.backupStoreError == nil {
Expand Down Expand Up @@ -191,6 +192,7 @@ func TestProcessQueueItemSkips(t *testing.T) {
metrics.NewServerMetrics(),
formatFlag,
60*time.Minute,
true,
)

_, err := r.Reconcile(context.Background(), ctrl.Request{NamespacedName: types.NamespacedName{
Expand Down Expand Up @@ -445,6 +447,7 @@ func TestRestoreReconcile(t *testing.T) {
metrics.NewServerMetrics(),
formatFlag,
60*time.Minute,
true,
)

r.clock = clocktesting.NewFakeClock(now)
Expand Down Expand Up @@ -616,6 +619,7 @@ func TestValidateAndCompleteWhenScheduleNameSpecified(t *testing.T) {
metrics.NewServerMetrics(),
formatFlag,
60*time.Minute,
true,
)

restore := &velerov1api.Restore{
Expand Down Expand Up @@ -708,6 +712,7 @@ func TestValidateAndCompleteWithResourceModifierSpecified(t *testing.T) {
metrics.NewServerMetrics(),
formatFlag,
60*time.Minute,
true,
)

restore := &velerov1api.Restore{
Expand Down
15 changes: 14 additions & 1 deletion pkg/install/deployment.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ type podTemplateConfig struct {
defaultVolumesToFsBackup bool
serviceAccountName string
uploaderType string
useInformerCacheForGet bool
}

func WithImage(image string) podTemplateOption {
Expand Down Expand Up @@ -136,6 +137,12 @@ func WithDefaultVolumesToFsBackup() podTemplateOption {
}
}

func WithUseInformerCacheForGet(useCache bool) podTemplateOption {
return func(c *podTemplateConfig) {
c.useInformerCacheForGet = useCache
}
}

func WithServiceAccountName(sa string) podTemplateOption {
return func(c *podTemplateConfig) {
c.serviceAccountName = sa
Expand All @@ -145,7 +152,8 @@ func WithServiceAccountName(sa string) podTemplateOption {
func Deployment(namespace string, opts ...podTemplateOption) *appsv1.Deployment {
// TODO: Add support for server args
c := &podTemplateConfig{
image: velero.DefaultVeleroImage(),
image: velero.DefaultVeleroImage(),
useInformerCacheForGet: true,
}

for _, opt := range opts {
Expand All @@ -167,6 +175,11 @@ func Deployment(namespace string, opts ...podTemplateOption) *appsv1.Deployment
args = append(args, "--default-volumes-to-fs-backup=true")
}

// default is true, so set if false
if !c.useInformerCacheForGet {
args = append(args, "--use-informer-cache-for-get=false")
}

if len(c.uploaderType) > 0 {
args = append(args, fmt.Sprintf("--uploader-type=%s", c.uploaderType))
}
Expand Down
7 changes: 7 additions & 0 deletions pkg/install/deployment_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,4 +64,11 @@ func TestDeployment(t *testing.T) {

deploy = Deployment("velero", WithServiceAccountName("test-sa"))
assert.Equal(t, "test-sa", deploy.Spec.Template.Spec.ServiceAccountName)

deploy = Deployment("velero", WithUseInformerCacheForGet(false))
assert.Len(t, deploy.Spec.Template.Spec.Containers[0].Args, 2)
assert.Equal(t, "--use-informer-cache-for-get=false", deploy.Spec.Template.Spec.Containers[0].Args[1])

deploy = Deployment("velero", WithUseInformerCacheForGet(true))
assert.Len(t, deploy.Spec.Template.Spec.Containers[0].Args, 1)
}
3 changes: 3 additions & 0 deletions pkg/install/resources.go
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,7 @@ type VeleroOptions struct {
Features []string
DefaultVolumesToFsBackup bool
UploaderType string
UseInformerCacheForGet bool
}

func AllCRDs() *unstructured.UnstructuredList {
Expand Down Expand Up @@ -343,6 +344,8 @@ func AllResources(o *VeleroOptions) *unstructured.UnstructuredList {
deployOpts = append(deployOpts, WithDefaultVolumesToFsBackup())
}

deployOpts = append(deployOpts, WithUseInformerCacheForGet(o.UseInformerCacheForGet))

deploy := Deployment(o.Namespace, deployOpts...)

if err := appendUnstructured(resources, deploy); err != nil {
Expand Down
17 changes: 9 additions & 8 deletions pkg/restore/request.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,14 +51,15 @@ func resourceKey(obj runtime.Object) string {
type Request struct {
*velerov1api.Restore

Log logrus.FieldLogger
Backup *velerov1api.Backup
PodVolumeBackups []*velerov1api.PodVolumeBackup
VolumeSnapshots []*volume.Snapshot
BackupReader io.Reader
RestoredItems map[itemKey]restoredItemStatus
itemOperationsList *[]*itemoperation.RestoreOperation
ResourceModifiers *resourcemodifiers.ResourceModifiers
Log logrus.FieldLogger
Backup *velerov1api.Backup
PodVolumeBackups []*velerov1api.PodVolumeBackup
VolumeSnapshots []*volume.Snapshot
BackupReader io.Reader
RestoredItems map[itemKey]restoredItemStatus
itemOperationsList *[]*itemoperation.RestoreOperation
ResourceModifiers *resourcemodifiers.ResourceModifiers
UseInformerCacheForGet bool
}

type restoredItemStatus struct {
Expand Down
Loading

0 comments on commit 76b72c1

Please sign in to comment.