Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use full snapshot interval to compute Backup Ready conditions #906

Merged
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ require (
go.uber.org/mock v0.4.0
)

anveshreddy18 marked this conversation as resolved.
Show resolved Hide resolved
require github.com/robfig/cron/v3 v3.0.1

require (
cloud.google.com/go v0.112.0 // indirect
cloud.google.com/go/compute/metadata v0.3.0 // indirect
Expand Down Expand Up @@ -111,7 +113,6 @@ require (
github.com/prometheus/client_model v0.6.0 // indirect
github.com/prometheus/common v0.45.0 // indirect
github.com/prometheus/procfs v0.12.0 // indirect
github.com/robfig/cron/v3 v3.0.1 // indirect
github.com/shopspring/decimal v1.3.1 // indirect
github.com/sirupsen/logrus v1.9.3 // indirect
github.com/spf13/afero v1.11.0 // indirect
Expand Down
21 changes: 15 additions & 6 deletions internal/health/condition/check_backup_ready.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (
"time"

druidv1alpha1 "github.com/gardener/etcd-druid/api/v1alpha1"
"github.com/gardener/etcd-druid/internal/utils"

coordinationv1 "k8s.io/api/coordination/v1"
"k8s.io/apimachinery/pkg/types"
Expand Down Expand Up @@ -48,13 +49,21 @@ func (a *backupReadyCheck) Check(ctx context.Context, etcd druidv1alpha1.Etcd) R

//Fetch snapshot leases
var (
fullSnapErr, incrSnapErr error
fullSnapLease = &coordinationv1.Lease{}
deltaSnapLease = &coordinationv1.Lease{}
fullSnapErr, incrSnapErr, err error
fullSnapLease = &coordinationv1.Lease{}
fullSnapshotInterval = 24 * time.Hour
deltaSnapLease = &coordinationv1.Lease{}
)
fullSnapErr = a.cl.Get(ctx, types.NamespacedName{Name: getFullSnapLeaseName(&etcd), Namespace: etcd.ObjectMeta.Namespace}, fullSnapLease)
incrSnapErr = a.cl.Get(ctx, types.NamespacedName{Name: getDeltaSnapLeaseName(&etcd), Namespace: etcd.ObjectMeta.Namespace}, deltaSnapLease)

// Compute the full snapshot interval if full snapshot schedule is set
if etcd.Spec.Backup.FullSnapshotSchedule != nil {
if fullSnapshotInterval, err = utils.ComputeScheduleInterval(*etcd.Spec.Backup.FullSnapshotSchedule); err != nil {
return result
}
}

//Set status to Unknown if errors in fetching snapshot leases or lease never renewed
if fullSnapErr != nil || incrSnapErr != nil || (fullSnapLease.Spec.RenewTime == nil && deltaSnapLease.Spec.RenewTime == nil) {
return result
Expand All @@ -66,8 +75,8 @@ func (a *backupReadyCheck) Check(ctx context.Context, etcd druidv1alpha1.Etcd) R

if fullLeaseRenewTime == nil && deltaLeaseRenewTime != nil {
// Most probable during reconcile of existing clusters if fresh leases are created
// Treat backup as succeeded if delta snap lease renewal happens in the required time window and full snap lease is not older than 24h.
if time.Since(deltaLeaseRenewTime.Time) < 2*etcd.Spec.Backup.DeltaSnapshotPeriod.Duration && time.Since(fullLeaseCreateTime.Time) < 24*time.Hour {
// Treat backup as succeeded if delta snap lease renewal happens in the required time window and full snap lease is not older than fullSnapshotInterval
if time.Since(deltaLeaseRenewTime.Time) < 2*etcd.Spec.Backup.DeltaSnapshotPeriod.Duration && time.Since(fullLeaseCreateTime.Time) < fullSnapshotInterval {
result.reason = BackupSucceeded
result.message = "Delta snapshot backup succeeded"
result.status = druidv1alpha1.ConditionTrue
Expand All @@ -82,7 +91,7 @@ func (a *backupReadyCheck) Check(ctx context.Context, etcd druidv1alpha1.Etcd) R
}
} else if deltaLeaseRenewTime != nil && fullLeaseRenewTime != nil {
//Both snap leases are maintained. Both are expected to be renewed periodically
if time.Since(deltaLeaseRenewTime.Time) < 2*etcd.Spec.Backup.DeltaSnapshotPeriod.Duration && time.Since(fullLeaseRenewTime.Time) < 24*time.Hour {
if time.Since(deltaLeaseRenewTime.Time) < 2*etcd.Spec.Backup.DeltaSnapshotPeriod.Duration && time.Since(fullLeaseRenewTime.Time) < fullSnapshotInterval {
result.reason = BackupSucceeded
result.message = "Snapshot backup succeeded"
result.status = druidv1alpha1.ConditionTrue
Expand Down
18 changes: 18 additions & 0 deletions internal/utils/miscellaneous.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@ import (
"fmt"
"maps"
"strings"
"time"

cron "github.com/robfig/cron/v3"
anveshreddy18 marked this conversation as resolved.
Show resolved Hide resolved
"sigs.k8s.io/controller-runtime/pkg/client"
)

Expand Down Expand Up @@ -63,3 +65,19 @@ func IfConditionOr[T any](condition bool, trueVal, falseVal T) T {
}
return falseVal
}

// ComputeScheduleInterval computes the interval between two activations for the given cron schedule.
// Assumes that every cron activation is at equal intervals apart, based on cron schedules such as
// "once every X hours", "once every Y days", "at 1:00pm on every Tuesday", etc.
// TODO: write a new function to accurately compute the previous activation time from the cron schedule
// in order to compute when the previous activation of the cron schedule was supposed to have occurred,
// instead of relying on the assumption that all the cron activations are evenly spaced.
func ComputeScheduleInterval(cronSchedule string) (time.Duration, error) {
schedule, err := cron.ParseStandard(cronSchedule)
if err != nil {
return 0, err
}
nextScheduledTime := schedule.Next(time.Now())
nextNextScheduledTime := schedule.Next(nextScheduledTime)
return nextNextScheduledTime.Sub(nextScheduledTime), nil
}