Skip to content

Commit

Permalink
Make informer cache disabled by default
Browse files Browse the repository at this point in the history
Make informer cache disabled by default

Fixes vmware-tanzu#7264

Signed-off-by: Wenkai Yin(尹文开) <[email protected]>
  • Loading branch information
ywk253100 committed Jan 8, 2024
1 parent 72f2da9 commit 4cf8812
Show file tree
Hide file tree
Showing 16 changed files with 81 additions and 83 deletions.
8 changes: 4 additions & 4 deletions pkg/cmd/cli/install/install.go
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ type Options struct {
DefaultVolumesToFsBackup bool
UploaderType string
DefaultSnapshotMoveData bool
DisableInformerCache bool
EnableInformerCache bool
ScheduleSkipImmediately bool
}

Expand Down Expand Up @@ -126,7 +126,7 @@ func (o *Options) BindFlags(flags *pflag.FlagSet) {
flags.BoolVar(&o.DefaultVolumesToFsBackup, "default-volumes-to-fs-backup", o.DefaultVolumesToFsBackup, "Bool flag to configure Velero server to use pod volume file system backup by default for all volumes on all backups. Optional.")
flags.StringVar(&o.UploaderType, "uploader-type", o.UploaderType, fmt.Sprintf("The type of uploader to transfer the data of pod volumes, the supported values are '%s', '%s'", uploader.ResticType, uploader.KopiaType))
flags.BoolVar(&o.DefaultSnapshotMoveData, "default-snapshot-move-data", o.DefaultSnapshotMoveData, "Bool flag to configure Velero server to move data by default for all snapshots supporting data movement. Optional.")
flags.BoolVar(&o.DisableInformerCache, "disable-informer-cache", o.DisableInformerCache, "Disable informer cache for Get calls on restore. With this enabled, it will speed up restore in cases where there are backup resources which already exist in the cluster, but for very large clusters this will increase velero memory usage. Default is false (don't disable). Optional.")
flags.BoolVar(&o.EnableInformerCache, "enable-informer-cache", o.EnableInformerCache, "Enable informer cache for Get calls on restore. With this enabled, it will speed up restore in cases where there are backup resources which already exist in the cluster, but for very large clusters this will increase velero memory usage. Default is false (disable). Optional.")
flags.BoolVar(&o.ScheduleSkipImmediately, "schedule-skip-immediately", o.ScheduleSkipImmediately, "Skip the first scheduled backup immediately after creating a schedule. Default is false (don't skip).")
}

Expand Down Expand Up @@ -155,7 +155,7 @@ func NewInstallOptions() *Options {
DefaultVolumesToFsBackup: false,
UploaderType: uploader.KopiaType,
DefaultSnapshotMoveData: false,
DisableInformerCache: true,
EnableInformerCache: false,
ScheduleSkipImmediately: false,
}
}
Expand Down Expand Up @@ -222,7 +222,7 @@ func (o *Options) AsVeleroOptions() (*install.VeleroOptions, error) {
DefaultVolumesToFsBackup: o.DefaultVolumesToFsBackup,
UploaderType: o.UploaderType,
DefaultSnapshotMoveData: o.DefaultSnapshotMoveData,
DisableInformerCache: o.DisableInformerCache,
EnableInformerCache: o.EnableInformerCache,
ScheduleSkipImmediately: o.ScheduleSkipImmediately,
}, nil
}
Expand Down
8 changes: 3 additions & 5 deletions pkg/cmd/server/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,6 @@ const (
defaultCredentialsDirectory = "/tmp/credentials"

defaultMaxConcurrentK8SConnections = 30
defaultDisableInformerCache = false
)

type serverConfig struct {
Expand All @@ -132,7 +131,7 @@ type serverConfig struct {
uploaderType string
maxConcurrentK8SConnections int
defaultSnapshotMoveData bool
disableInformerCache bool
enableInformerCache bool
scheduleSkipImmediately bool
}

Expand Down Expand Up @@ -163,7 +162,6 @@ func NewCommand(f client.Factory) *cobra.Command {
uploaderType: uploader.ResticType,
maxConcurrentK8SConnections: defaultMaxConcurrentK8SConnections,
defaultSnapshotMoveData: false,
disableInformerCache: defaultDisableInformerCache,
scheduleSkipImmediately: false,
}
)
Expand Down Expand Up @@ -236,7 +234,7 @@ func NewCommand(f client.Factory) *cobra.Command {
command.Flags().DurationVar(&config.resourceTimeout, "resource-timeout", config.resourceTimeout, "How long to wait for resource processes which are not covered by other specific timeout parameters. Default is 10 minutes.")
command.Flags().IntVar(&config.maxConcurrentK8SConnections, "max-concurrent-k8s-connections", config.maxConcurrentK8SConnections, "Max concurrent connections number that Velero can create with kube-apiserver. Default is 30.")
command.Flags().BoolVar(&config.defaultSnapshotMoveData, "default-snapshot-move-data", config.defaultSnapshotMoveData, "Move data by default for all snapshots supporting data movement.")
command.Flags().BoolVar(&config.disableInformerCache, "disable-informer-cache", config.disableInformerCache, "Disable informer cache for Get calls on restore. With this enabled, it will speed up restore in cases where there are backup resources which already exist in the cluster, but for very large clusters this will increase velero memory usage. Default is false (don't disable).")
command.Flags().BoolVar(&config.enableInformerCache, "enable-informer-cache", config.enableInformerCache, "Enable informer cache for Get calls on restore. With this enabled, it will speed up restore in cases where there are backup resources which already exist in the cluster, but for very large clusters this will increase velero memory usage. Default is false (disable).")
command.Flags().BoolVar(&config.scheduleSkipImmediately, "schedule-skip-immediately", config.scheduleSkipImmediately, "Skip the first scheduled backup immediately after creating a schedule. Default is false (don't skip).")

return command
Expand Down Expand Up @@ -951,7 +949,7 @@ func (s *server) runControllers(defaultVolumeSnapshotLocations map[string]string
s.metrics,
s.config.formatFlag.Parse(),
s.config.defaultItemOperationTimeout,
s.config.disableInformerCache,
s.config.enableInformerCache,
)

if err = r.SetupWithManager(s.mgr); err != nil {
Expand Down
26 changes: 13 additions & 13 deletions pkg/controller/restore_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ type restoreReconciler struct {
logFormat logging.Format
clock clock.WithTickerAndDelayedExecution
defaultItemOperationTimeout time.Duration
disableInformerCache bool
enableInformerCache bool

newPluginManager func(logger logrus.FieldLogger) clientmgmt.Manager
backupStoreGetter persistence.ObjectBackupStoreGetter
Expand All @@ -126,7 +126,7 @@ func NewRestoreReconciler(
metrics *metrics.ServerMetrics,
logFormat logging.Format,
defaultItemOperationTimeout time.Duration,
disableInformerCache bool,
enableInformerCache bool,
) *restoreReconciler {
r := &restoreReconciler{
ctx: ctx,
Expand All @@ -139,7 +139,7 @@ func NewRestoreReconciler(
logFormat: logFormat,
clock: &clock.RealClock{},
defaultItemOperationTimeout: defaultItemOperationTimeout,
disableInformerCache: disableInformerCache,
enableInformerCache: enableInformerCache,

// use variables to refer to these functions so they can be
// replaced with fakes for testing.
Expand Down Expand Up @@ -540,16 +540,16 @@ func (r *restoreReconciler) runValidatedRestore(restore *api.Restore, info backu
}

restoreReq := &pkgrestore.Request{
Log: restoreLog,
Restore: restore,
Backup: info.backup,
PodVolumeBackups: podVolumeBackups,
VolumeSnapshots: volumeSnapshots,
BackupReader: backupFile,
ResourceModifiers: resourceModifiers,
DisableInformerCache: r.disableInformerCache,
CSIVolumeSnapshots: csiVolumeSnapshots,
VolumeInfoMap: backupVolumeInfoMap,
Log: restoreLog,
Restore: restore,
Backup: info.backup,
PodVolumeBackups: podVolumeBackups,
VolumeSnapshots: volumeSnapshots,
BackupReader: backupFile,
ResourceModifiers: resourceModifiers,
EnableInformerCache: r.enableInformerCache,
CSIVolumeSnapshots: csiVolumeSnapshots,
VolumeInfoMap: backupVolumeInfoMap,
}
restoreWarnings, restoreErrors := r.restorer.RestoreWithResolvers(restoreReq, actionsResolver, pluginManager)

Expand Down
10 changes: 5 additions & 5 deletions pkg/controller/restore_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ func TestFetchBackupInfo(t *testing.T) {
metrics.NewServerMetrics(),
formatFlag,
60*time.Minute,
false,
true,
)

if test.backupStoreError == nil {
Expand Down Expand Up @@ -193,7 +193,7 @@ func TestProcessQueueItemSkips(t *testing.T) {
metrics.NewServerMetrics(),
formatFlag,
60*time.Minute,
false,
true,
)

_, err := r.Reconcile(context.Background(), ctrl.Request{NamespacedName: types.NamespacedName{
Expand Down Expand Up @@ -461,7 +461,7 @@ func TestRestoreReconcile(t *testing.T) {
metrics.NewServerMetrics(),
formatFlag,
60*time.Minute,
false,
true,
)

r.clock = clocktesting.NewFakeClock(now)
Expand Down Expand Up @@ -639,7 +639,7 @@ func TestValidateAndCompleteWhenScheduleNameSpecified(t *testing.T) {
metrics.NewServerMetrics(),
formatFlag,
60*time.Minute,
false,
true,
)

restore := &velerov1api.Restore{
Expand Down Expand Up @@ -732,7 +732,7 @@ func TestValidateAndCompleteWithResourceModifierSpecified(t *testing.T) {
metrics.NewServerMetrics(),
formatFlag,
60*time.Minute,
false,
true,
)

restore := &velerov1api.Restore{
Expand Down
10 changes: 5 additions & 5 deletions pkg/install/deployment.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ type podTemplateConfig struct {
uploaderType string
defaultSnapshotMoveData bool
privilegedNodeAgent bool
disableInformerCache bool
enableInformerCache bool
scheduleSkipImmediately bool
}

Expand Down Expand Up @@ -153,9 +153,9 @@ func WithDefaultSnapshotMoveData() podTemplateOption {
}
}

func WithDisableInformerCache() podTemplateOption {
func WithEnableInformerCache() podTemplateOption {
return func(c *podTemplateConfig) {
c.disableInformerCache = true
c.enableInformerCache = true
}
}

Expand Down Expand Up @@ -206,8 +206,8 @@ func Deployment(namespace string, opts ...podTemplateOption) *appsv1.Deployment
args = append(args, "--default-snapshot-move-data=true")
}

if c.disableInformerCache {
args = append(args, "--disable-informer-cache=true")
if c.enableInformerCache {
args = append(args, "--enable-informer-cache=true")
}

if c.scheduleSkipImmediately {
Expand Down
4 changes: 2 additions & 2 deletions pkg/install/deployment_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ func TestDeployment(t *testing.T) {
deploy = Deployment("velero", WithServiceAccountName("test-sa"))
assert.Equal(t, "test-sa", deploy.Spec.Template.Spec.ServiceAccountName)

deploy = Deployment("velero", WithDisableInformerCache())
deploy = Deployment("velero", WithEnableInformerCache())
assert.Len(t, deploy.Spec.Template.Spec.Containers[0].Args, 2)
assert.Equal(t, "--disable-informer-cache=true", deploy.Spec.Template.Spec.Containers[0].Args[1])
assert.Equal(t, "--enable-informer-cache=true", deploy.Spec.Template.Spec.Containers[0].Args[1])
}
6 changes: 3 additions & 3 deletions pkg/install/resources.go
Original file line number Diff line number Diff line change
Expand Up @@ -254,7 +254,7 @@ type VeleroOptions struct {
DefaultVolumesToFsBackup bool
UploaderType string
DefaultSnapshotMoveData bool
DisableInformerCache bool
EnableInformerCache bool
ScheduleSkipImmediately bool
}

Expand Down Expand Up @@ -362,8 +362,8 @@ func AllResources(o *VeleroOptions) *unstructured.UnstructuredList {
deployOpts = append(deployOpts, WithDefaultSnapshotMoveData())
}

if o.DisableInformerCache {
deployOpts = append(deployOpts, WithDisableInformerCache())
if o.EnableInformerCache {
deployOpts = append(deployOpts, WithEnableInformerCache())
}

deploy := Deployment(o.Namespace, deployOpts...)
Expand Down
22 changes: 11 additions & 11 deletions pkg/restore/request.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,17 +53,17 @@ func resourceKey(obj runtime.Object) string {
type Request struct {
*velerov1api.Restore

Log logrus.FieldLogger
Backup *velerov1api.Backup
PodVolumeBackups []*velerov1api.PodVolumeBackup
VolumeSnapshots []*volume.Snapshot
BackupReader io.Reader
RestoredItems map[itemKey]restoredItemStatus
itemOperationsList *[]*itemoperation.RestoreOperation
ResourceModifiers *resourcemodifiers.ResourceModifiers
DisableInformerCache bool
CSIVolumeSnapshots []*snapshotv1api.VolumeSnapshot
VolumeInfoMap map[string]internalVolume.VolumeInfo
Log logrus.FieldLogger
Backup *velerov1api.Backup
PodVolumeBackups []*velerov1api.PodVolumeBackup
VolumeSnapshots []*volume.Snapshot
BackupReader io.Reader
RestoredItems map[itemKey]restoredItemStatus
itemOperationsList *[]*itemoperation.RestoreOperation
ResourceModifiers *resourcemodifiers.ResourceModifiers
EnableInformerCache bool
CSIVolumeSnapshots []*snapshotv1api.VolumeSnapshot
VolumeInfoMap map[string]internalVolume.VolumeInfo
}

type restoredItemStatus struct {
Expand Down
12 changes: 6 additions & 6 deletions pkg/restore/restore.go
Original file line number Diff line number Diff line change
Expand Up @@ -324,7 +324,7 @@ func (kr *kubernetesRestorer) RestoreWithResolvers(
kbClient: kr.kbClient,
itemOperationsList: req.GetItemOperationsList(),
resourceModifiers: req.ResourceModifiers,
disableInformerCache: req.DisableInformerCache,
enableInformerCache: req.EnableInformerCache,
featureVerifier: kr.featureVerifier,
hookTracker: hook.NewHookTracker(),
volumeInfoMap: req.VolumeInfoMap,
Expand Down Expand Up @@ -378,7 +378,7 @@ type restoreContext struct {
kbClient crclient.Client
itemOperationsList *[]*itemoperation.RestoreOperation
resourceModifiers *resourcemodifiers.ResourceModifiers
disableInformerCache bool
enableInformerCache bool
featureVerifier features.Verifier
hookTracker *hook.HookTracker
volumeInfoMap map[string]internalVolume.VolumeInfo
Expand Down Expand Up @@ -446,7 +446,7 @@ func (ctx *restoreContext) execute() (results.Result, results.Result) {
}()

// Need to stop all informers if enabled
if !ctx.disableInformerCache {
if ctx.enableInformerCache {
defer func() {
// Call the cancel func to close the channel for each started informer
for _, factory := range ctx.dynamicInformerFactories {
Expand Down Expand Up @@ -578,7 +578,7 @@ func (ctx *restoreContext) execute() (results.Result, results.Result) {
errs.Merge(&e)

// initialize informer caches for selected resources if enabled
if !ctx.disableInformerCache {
if ctx.enableInformerCache {
// CRD informer will have already been initialized if any CRDs were created,
// but already-initialized informers aren't re-initialized because getGenericInformer
// looks for an existing one first.
Expand Down Expand Up @@ -1537,7 +1537,7 @@ func (ctx *restoreContext) restoreItem(obj *unstructured.Unstructured, groupReso

// only attempt Get before Create if using informer cache, otherwise this will slow down restore into
// new namespace
if !ctx.disableInformerCache {
if ctx.enableInformerCache {
ctx.log.Debugf("Checking for existence %s: %v", obj.GroupVersionKind().Kind, name)
fromCluster, err = ctx.getResource(groupResource, obj, namespace, name)
}
Expand All @@ -1561,7 +1561,7 @@ func (ctx *restoreContext) restoreItem(obj *unstructured.Unstructured, groupReso
// check for the existence of the object that failed creation due to alreadyExist in cluster, if no error then it implies that object exists.
// and if err then itemExists remains false as we were not able to confirm the existence of the object via Get call or creation call.
// We return the get error as a warning to notify the user that the object could exist in cluster and we were not able to confirm it.
if !ctx.disableInformerCache {
if ctx.enableInformerCache {
fromCluster, err = ctx.getResource(groupResource, obj, namespace, name)
} else {
fromCluster, err = resourceClient.Get(name, metav1.GetOptions{})
Expand Down
22 changes: 11 additions & 11 deletions pkg/restore/restore_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1059,7 +1059,7 @@ func TestRestoreItems(t *testing.T) {
tarball io.Reader
want []*test.APIResource
expectedRestoreItems map[itemKey]restoredItemStatus
disableInformer bool
enableInformer bool
}{
{
name: "metadata uid/resourceVersion/etc. gets removed",
Expand Down Expand Up @@ -1216,7 +1216,7 @@ func TestRestoreItems(t *testing.T) {
apiResources: []*test.APIResource{
test.Secrets(builder.ForSecret("ns-1", "sa-1").Data(map[string][]byte{"foo": []byte("bar")}).Result()),
},
disableInformer: true,
enableInformer: false,
want: []*test.APIResource{
test.Secrets(builder.ForSecret("ns-1", "sa-1").ObjectMeta(builder.WithLabels("velero.io/backup-name", "backup-1", "velero.io/restore-name", "restore-1")).Data(map[string][]byte{"key-1": []byte("value-1")}).Result()),
},
Expand All @@ -1235,7 +1235,7 @@ func TestRestoreItems(t *testing.T) {
apiResources: []*test.APIResource{
test.Secrets(builder.ForSecret("ns-1", "sa-1").Data(map[string][]byte{"foo": []byte("bar")}).Result()),
},
disableInformer: false,
enableInformer: true,
want: []*test.APIResource{
test.Secrets(builder.ForSecret("ns-1", "sa-1").ObjectMeta(builder.WithLabels("velero.io/backup-name", "backup-1", "velero.io/restore-name", "restore-1")).Data(map[string][]byte{"key-1": []byte("value-1")}).Result()),
},
Expand Down Expand Up @@ -1394,14 +1394,14 @@ func TestRestoreItems(t *testing.T) {
}

data := &Request{
Log: h.log,
Restore: tc.restore,
Backup: tc.backup,
PodVolumeBackups: nil,
VolumeSnapshots: nil,
BackupReader: tc.tarball,
RestoredItems: map[itemKey]restoredItemStatus{},
DisableInformerCache: tc.disableInformer,
Log: h.log,
Restore: tc.restore,
Backup: tc.backup,
PodVolumeBackups: nil,
VolumeSnapshots: nil,
BackupReader: tc.tarball,
RestoredItems: map[itemKey]restoredItemStatus{},
EnableInformerCache: tc.enableInformer,
}
warnings, errs := h.restorer.Restore(
data,
Expand Down
4 changes: 2 additions & 2 deletions test/e2e/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ UPLOADER_TYPE ?=

SNAPSHOT_MOVE_DATA ?= false
DATA_MOVER_PLUGIN ?=
DISABLE_INFORMER_CACHE ?= false
ENABLE_INFORMER_CACHE ?= true


.PHONY:ginkgo
Expand Down Expand Up @@ -157,7 +157,7 @@ run: ginkgo
-standby-cluster-plugins=$(STANDBY_CLUSTER_PLUGINS) \
-standby-cluster-object-store-provider=$(STANDBY_CLUSTER_OBJECT_STORE_PROVIDER) \
-debug-velero-pod-restart=$(DEBUG_VELERO_POD_RESTART) \
-disable-informer-cache=$(DISABLE_INFORMER_CACHE)
-enable-informer-cache=$(ENABLE_INFORMER_CACHE)


build: ginkgo
Expand Down
2 changes: 1 addition & 1 deletion test/e2e/e2e_suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ func init() {
flag.StringVar(&VeleroCfg.StandbyClusterPlugins, "standby-cluster-plugins", "", "plugins provider for standby cluster.")
flag.StringVar(&VeleroCfg.StandbyClusterOjbectStoreProvider, "standby-cluster-object-store-provider", "", "object store provider for standby cluster.")
flag.BoolVar(&VeleroCfg.DebugVeleroPodRestart, "debug-velero-pod-restart", false, "a switch for debugging velero pod restart.")
flag.BoolVar(&VeleroCfg.DisableInformerCache, "disable-informer-cache", false, "a switch for disable informer cache.")
flag.BoolVar(&VeleroCfg.EnableInformerCache, "enable-informer-cache", true, "a switch for enable informer cache.")
}

var _ = Describe("[APIGroup][APIVersion] Velero tests with various CRD API group versions", APIGropuVersionsTest)
Expand Down
Loading

0 comments on commit 4cf8812

Please sign in to comment.