From d480f23ea99e9e9f61a2a440235ba05ce8308101 Mon Sep 17 00:00:00 2001 From: ishan tyagi Date: Thu, 12 Jan 2023 15:29:32 +0530 Subject: [PATCH] Enhances the decision to take full snapshot during startup to avoid missing of any full-snapshot. --- pkg/miscellaneous/miscellaneous.go | 13 ++++++++++++ pkg/server/backuprestoreserver.go | 6 +----- pkg/snapshot/snapshotter/snapshotter.go | 27 ++++++++++++++++++++++--- 3 files changed, 38 insertions(+), 8 deletions(-) diff --git a/pkg/miscellaneous/miscellaneous.go b/pkg/miscellaneous/miscellaneous.go index 5917314e0..c2ec8c7b0 100644 --- a/pkg/miscellaneous/miscellaneous.go +++ b/pkg/miscellaneous/miscellaneous.go @@ -520,3 +520,16 @@ func ParsePeerURL(initialAdvertisePeerURLs, podName string) (string, error) { domaiName := fmt.Sprintf("%s.%s.%s", tokens[1], tokens[2], "svc") return fmt.Sprintf("%s://%s.%s:%s", tokens[0], podName, domaiName, tokens[3]), nil } + +// GetPrevDayScheduledSnapTime returns the previous day schedule snapshot time. +func GetPrevDayScheduledSnapTime(nextSnapSchedule time.Time) time.Time { + return time.Date( + nextSnapSchedule.Year(), + nextSnapSchedule.Month(), + nextSnapSchedule.Day()-1, + nextSnapSchedule.Hour(), + nextSnapSchedule.Minute(), + nextSnapSchedule.Second(), + nextSnapSchedule.Nanosecond(), + nextSnapSchedule.Location()) +} diff --git a/pkg/server/backuprestoreserver.go b/pkg/server/backuprestoreserver.go index 6a06717fd..eaade0e68 100644 --- a/pkg/server/backuprestoreserver.go +++ b/pkg/server/backuprestoreserver.go @@ -362,16 +362,12 @@ func (b *BackupRestoreServer) runEtcdProbeLoopWithSnapshotter(ctx context.Contex // the delta snapshot memory limit), after which a full snapshot // is taken and the regular snapshot schedule comes into effect. - // TODO: write code to find out if prev full snapshot is older than it is - // supposed to be, according to the given cron schedule, instead of the - // hard-coded "24 hours" full snapshot interval - // Temporary fix for missing alternate full snapshots for Gardener shoots // with hibernation schedule set: change value from 24 ot 23.5 to // accommodate for slight pod spin-up delays on shoot wake-up const recentFullSnapshotPeriodInHours = 23.5 initialDeltaSnapshotTaken = false - if ssr.PrevFullSnapshot != nil && !ssr.PrevFullSnapshot.IsFinal && time.Since(ssr.PrevFullSnapshot.CreatedOn).Hours() <= recentFullSnapshotPeriodInHours { + if ssr.PrevFullSnapshot != nil && !ssr.PrevFullSnapshot.IsFinal && !ssr.IsScheduledFullSnapshotMissed() { ssrStopped, err := ssr.CollectEventsSincePrevSnapshot(ssrStopCh) if ssrStopped { b.logger.Info("Snapshotter stopped.") diff --git a/pkg/snapshot/snapshotter/snapshotter.go b/pkg/snapshot/snapshotter/snapshotter.go index 5343a21b0..2bb394a9c 100644 --- a/pkg/snapshot/snapshotter/snapshotter.go +++ b/pkg/snapshot/snapshotter/snapshotter.go @@ -42,6 +42,10 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" ) +const ( + recentFullSnapshotPeriodInHours = 23.5 +) + var ( emptyStruct struct{} snapstoreHash = make(map[string]interface{}) @@ -104,7 +108,7 @@ func NewSnapshotter(logger *logrus.Entry, config *brtypes.SnapshotterConfig, sto sdl, err := cron.ParseStandard(config.FullSnapshotSchedule) if err != nil { // Ideally this should be validated before. - return nil, fmt.Errorf("invalid schedule provied %s : %v", config.FullSnapshotSchedule, err) + return nil, fmt.Errorf("invalid full snapshot schedule provided %s : %v", config.FullSnapshotSchedule, err) } var prevSnapshot *brtypes.Snapshot @@ -473,12 +477,12 @@ func (ssr *Snapshotter) TakeDeltaSnapshot() (*brtypes.Snapshot, error) { defer rc.Close() if err := ssr.store.Save(*snap, rc); err != nil { - timeTaken := time.Now().Sub(startTime).Seconds() + timeTaken := time.Since(startTime).Seconds() metrics.SnapshotDurationSeconds.With(prometheus.Labels{metrics.LabelKind: brtypes.SnapshotKindDelta, metrics.LabelSucceeded: metrics.ValueSucceededFalse}).Observe(timeTaken) ssr.logger.Errorf("Error saving delta snapshots. %v", err) return nil, err } - timeTaken := time.Now().Sub(startTime).Seconds() + timeTaken := time.Since(startTime).Seconds() metrics.SnapshotDurationSeconds.With(prometheus.Labels{metrics.LabelKind: brtypes.SnapshotKindDelta, metrics.LabelSucceeded: metrics.ValueSucceededTrue}).Observe(timeTaken) logrus.Infof("Total time to save delta snapshot: %f seconds.", timeTaken) ssr.prevSnapshot = snap @@ -740,3 +744,20 @@ func (ssr *Snapshotter) checkSnapstoreSecretUpdate() bool { snapstoreHash[ssr.snapstoreConfig.Provider] = newSnapstoreSecretHash return true } + +// IsScheduledFullSnapshotMissed checked whether the last scheduled full-snapshot was missed or not. +func (ssr *Snapshotter) IsScheduledFullSnapshotMissed() bool { + if time.Since(ssr.PrevFullSnapshot.CreatedOn).Hours() > recentFullSnapshotPeriodInHours { + return true + } + + now := time.Now() + nextSnapSchedule := ssr.schedule.Next(now) + timeLeftToTakeNextSnap := nextSnapSchedule.Sub(now) + + if miscellaneous.GetPrevDayScheduledSnapTime(nextSnapSchedule) == ssr.PrevFullSnapshot.CreatedOn { + return false + } + + return timeLeftToTakeNextSnap.Hours()+time.Since(ssr.PrevFullSnapshot.CreatedOn).Hours() > recentFullSnapshotPeriodInHours +}