diff --git a/chart/etcd-backup-restore/templates/etcd-bootstrap-configmap.yaml b/chart/etcd-backup-restore/templates/etcd-bootstrap-configmap.yaml index f9ba123a1..685437c4d 100644 --- a/chart/etcd-backup-restore/templates/etcd-bootstrap-configmap.yaml +++ b/chart/etcd-backup-restore/templates/etcd-bootstrap-configmap.yaml @@ -35,11 +35,11 @@ data: check_and_start_etcd(){ while true; do - wget http://localhost:{{ .Values.servicePorts.backupRestore }}/initialization/status -S -O status; + wget "http://localhost:{{ .Values.servicePorts.backupRestore }}/initialization/status" -S -O status; STATUS=`cat status`; case $STATUS in "New") - wget http://localhost:{{ .Values.servicePorts.backupRestore }}/initialization/start?mode=$1 -S -O - ;; + wget "http://localhost:{{ .Values.servicePorts.backupRestore }}/initialization/start?mode=$1{{- if .Values.backup.failBelowRevision }}&failbelowrevision={{ int $.Values.backup.failBelowRevision }}{{- end }}" -S -O - ;; "Progress") sleep 1; continue;; diff --git a/chart/etcd-backup-restore/values.yaml b/chart/etcd-backup-restore/values.yaml index 7a40295db..55e2e5163 100644 --- a/chart/etcd-backup-restore/values.yaml +++ b/chart/etcd-backup-restore/values.yaml @@ -7,7 +7,7 @@ images: # etcd-backup-restore image to use etcdBackupRestore: repository: eu.gcr.io/gardener-project/gardener/etcdbrctl - tag: 0.6.4 + tag: 0.7.0 pullPolicy: IfNotPresent resources: @@ -65,6 +65,10 @@ backup: # Supported values are ABS,GCS,S3,Swift,OSS,Local, empty means no backup. storageProvider: "Local" + # failBelowRevision indicates the revision below which the validation of etcd will fail and restore will not be triggered in case + # there is no snapshot on configured backup bucket. + # failBelowRevision: 100000 + # Please uncomment the following section based on the storage provider. # s3: # region: region-where-bucket-exists diff --git a/cmd/initializer.go b/cmd/initializer.go index 810a9832d..4489fdab3 100644 --- a/cmd/initializer.go +++ b/cmd/initializer.go @@ -81,7 +81,7 @@ func NewInitializeCommand(stopCh <-chan struct{}) *cobra.Command { } } etcdInitializer := initializer.NewInitializer(options, snapstoreConfig, logger) - err = etcdInitializer.Initialize(mode) + err = etcdInitializer.Initialize(mode, failBelowRevision) if err != nil { logger.Fatalf("initializer failed. %v", err) } @@ -90,6 +90,7 @@ func NewInitializeCommand(stopCh <-chan struct{}) *cobra.Command { initializeEtcdFlags(initializeCmd) initializeSnapstoreFlags(initializeCmd) initializeValidatorFlags(initializeCmd) + initializeCmd.Flags().Int64Var(&failBelowRevision, "experimental-fail-below-revision", 0, "minimum required etcd revision, below which validation fails") return initializeCmd } diff --git a/cmd/server.go b/cmd/server.go index c35de5434..339e26105 100644 --- a/cmd/server.go +++ b/cmd/server.go @@ -156,12 +156,12 @@ func NewServerCommand(stopCh <-chan struct{}) *cobra.Command { // startHTTPServer creates and starts the HTTP handler // with status 503 (Service Unavailable) -func startHTTPServer(etcdInitializer *initializer.EtcdInitializer, ssr *snapshotter.Snapshotter) *server.HTTPHandler { +func startHTTPServer(initializer initializer.Initializer, ssr *snapshotter.Snapshotter) *server.HTTPHandler { // Start http handler with Error state and wait till snapshotter is up // and running before setting the status to OK. handler := &server.HTTPHandler{ Port: port, - EtcdInitializer: *etcdInitializer, + Initializer: initializer, Snapshotter: ssr, Logger: logger, StopCh: make(chan struct{}), diff --git a/cmd/types.go b/cmd/types.go index d9299d2c6..1189b7cae 100644 --- a/cmd/types.go +++ b/cmd/types.go @@ -68,7 +68,8 @@ var ( snapstoreTempDir string //initializer flags - validationMode string + validationMode string + failBelowRevision int64 ) var emptyStruct struct{} diff --git a/pkg/initializer/initializer.go b/pkg/initializer/initializer.go index 797cb6296..e8c8fa732 100644 --- a/pkg/initializer/initializer.go +++ b/pkg/initializer/initializer.go @@ -42,15 +42,20 @@ const ( // * Check if Latest snapshot available. // - Try to perform an Etcd data restoration from the latest snapshot. // - No snapshots are available, start etcd as a fresh installation. -func (e *EtcdInitializer) Initialize(mode validator.Mode) error { +func (e *EtcdInitializer) Initialize(mode validator.Mode, failBelowRevision int64) error { start := time.Now() - dataDirStatus, err := e.Validator.Validate(mode) + dataDirStatus, err := e.Validator.Validate(mode, failBelowRevision) if err != nil && dataDirStatus != validator.DataDirectoryNotExist { metrics.ValidationDurationSeconds.With(prometheus.Labels{metrics.LabelSucceeded: metrics.ValueSucceededFalse}).Observe(time.Now().Sub(start).Seconds()) - err = fmt.Errorf("error while initializing: %v", err) - return err + return fmt.Errorf("error while initializing: %v", err) + } + + if dataDirStatus == validator.FailBelowRevisionConsistencyError { + metrics.ValidationDurationSeconds.With(prometheus.Labels{metrics.LabelSucceeded: metrics.ValueSucceededFalse}).Observe(time.Now().Sub(start).Seconds()) + return fmt.Errorf("failed to initialize since fail below revision check failed") } metrics.ValidationDurationSeconds.With(prometheus.Labels{metrics.LabelSucceeded: metrics.ValueSucceededTrue}).Observe(time.Now().Sub(start).Seconds()) + if dataDirStatus != validator.DataDirectoryValid { start := time.Now() if err := e.restoreCorruptData(); err != nil { diff --git a/pkg/initializer/types.go b/pkg/initializer/types.go index f93b782c7..4bcfc2f90 100644 --- a/pkg/initializer/types.go +++ b/pkg/initializer/types.go @@ -38,5 +38,5 @@ type EtcdInitializer struct { // Initializer is the interface for etcd initialization actions. type Initializer interface { - Initialize(validator.Mode) error + Initialize(validator.Mode, int64) error } diff --git a/pkg/initializer/validator/datavalidator.go b/pkg/initializer/validator/datavalidator.go index 6fc9520a2..3968ce1f6 100644 --- a/pkg/initializer/validator/datavalidator.go +++ b/pkg/initializer/validator/datavalidator.go @@ -76,7 +76,7 @@ func (d *DataValidator) backendPath() string { return filepath.Join(d.snapDir(), // - If data directory structure is invalid return DataDirectoryInvStruct status. // * Check for data corruption. // - return data directory corruption status. -func (d *DataValidator) Validate(mode Mode) (DataDirStatus, error) { +func (d *DataValidator) Validate(mode Mode, failBelowRevision int64) (DataDirStatus, error) { dataDir := d.Config.DataDir dirExists, err := directoryExist(dataDir) if err != nil { @@ -98,8 +98,12 @@ func (d *DataValidator) Validate(mode Mode) (DataDirStatus, error) { if d.Config.SnapstoreConfig != nil { d.Logger.Info("Checking for revision consistency...") - if err = checkRevisionConsistency(d.backendPath(), *d.Config.SnapstoreConfig); err != nil { + failBelowRevisionCheck, err := checkRevisionConsistency(d.backendPath(), *d.Config.SnapstoreConfig, failBelowRevision) + if err != nil { d.Logger.Infof("Etcd revision inconsistent with latest snapshot revision: %v", err) + if failBelowRevisionCheck { + return FailBelowRevisionConsistencyError, nil + } return RevisionConsistencyError, nil } } else { @@ -338,36 +342,40 @@ func verifyDB(path string) error { } // checkRevisionConsistency compares the latest revisions on the etcd db file and the latest snapshot to verify that the etcd revision is not lesser than snapshot revision. -func checkRevisionConsistency(dbPath string, config snapstore.Config) error { +// Return true or false indicating whether it is due to failBelowRevision or latest snapshot revision for snapstore. +func checkRevisionConsistency(dbPath string, config snapstore.Config, failBelowRevision int64) (bool, error) { etcdRevision, err := getLatestEtcdRevision(dbPath) if err != nil { - return fmt.Errorf("unable to get current etcd revision from backend db file: %v", err) + return false, fmt.Errorf("unable to get current etcd revision from backend db file: %v", err) } store, err := snapstore.GetSnapstore(&config) if err != nil { - return fmt.Errorf("unable to fetch snapstore: %v", err) + return false, fmt.Errorf("unable to fetch snapstore: %v", err) } var latestSnapshotRevision int64 fullSnap, deltaSnaps, err := miscellaneous.GetLatestFullSnapshotAndDeltaSnapList(store) if err != nil { - return fmt.Errorf("unable to get snapshots from store: %v", err) + return false, fmt.Errorf("unable to get snapshots from store: %v", err) } - if fullSnap == nil { - logger.Infof("No snapshot found.") - return nil - } else if len(deltaSnaps) == 0 { + if len(deltaSnaps) != 0 { + latestSnapshotRevision = deltaSnaps[len(deltaSnaps)-1].LastRevision + } else if fullSnap != nil { latestSnapshotRevision = fullSnap.LastRevision } else { - latestSnapshotRevision = deltaSnaps[len(deltaSnaps)-1].LastRevision + logger.Infof("No snapshot found.") + if etcdRevision < failBelowRevision { + return true, fmt.Errorf("current etcd revision (%d) is less than fail below revision (%d): possible data loss", etcdRevision, failBelowRevision) + } + return false, nil } if etcdRevision < latestSnapshotRevision { - return fmt.Errorf("current etcd revision (%d) is less than latest snapshot revision (%d): possible data loss", etcdRevision, latestSnapshotRevision) + return false, fmt.Errorf("current etcd revision (%d) is less than latest snapshot revision (%d): possible data loss", etcdRevision, latestSnapshotRevision) } - return nil + return false, nil } // getLatestEtcdRevision finds out the latest revision on the etcd db file without starting etcd server or an embedded etcd server. diff --git a/pkg/initializer/validator/datavalidator_test.go b/pkg/initializer/validator/datavalidator_test.go index 2bbdb37d6..a4756f611 100644 --- a/pkg/initializer/validator/datavalidator_test.go +++ b/pkg/initializer/validator/datavalidator_test.go @@ -45,7 +45,7 @@ var _ = Describe("Running Datavalidator", func() { tempDir := fmt.Sprintf("%s.%s", restoreDataDir, "temp") err = os.Rename(restoreDataDir, tempDir) Expect(err).ShouldNot(HaveOccurred()) - dataDirStatus, err := validator.Validate(Full) + dataDirStatus, err := validator.Validate(Full, 0) Expect(err).Should(HaveOccurred()) Expect(int(dataDirStatus)).Should(SatisfyAny(Equal(DataDirectoryNotExist), Equal(DataDirectoryError))) err = os.Rename(tempDir, restoreDataDir) @@ -61,7 +61,7 @@ var _ = Describe("Running Datavalidator", func() { tempDir := fmt.Sprintf("%s.%s", memberDir, "temp") err = os.Rename(memberDir, tempDir) Expect(err).ShouldNot(HaveOccurred()) - dataDirStatus, err := validator.Validate(Full) + dataDirStatus, err := validator.Validate(Full, 0) Expect(err).ShouldNot(HaveOccurred()) Expect(int(dataDirStatus)).Should(SatisfyAny(Equal(DataDirectoryInvStruct), Equal(DataDirectoryError))) err = os.Rename(tempDir, memberDir) @@ -75,7 +75,7 @@ var _ = Describe("Running Datavalidator", func() { tempDir := fmt.Sprintf("%s.%s", snapDir, "temp") err = os.Rename(snapDir, tempDir) Expect(err).ShouldNot(HaveOccurred()) - dataDirStatus, err := validator.Validate(Full) + dataDirStatus, err := validator.Validate(Full, 0) Expect(err).ShouldNot(HaveOccurred()) Expect(int(dataDirStatus)).Should(SatisfyAny(Equal(DataDirectoryInvStruct), Equal(DataDirectoryError))) err = os.Rename(tempDir, snapDir) @@ -88,7 +88,7 @@ var _ = Describe("Running Datavalidator", func() { tempDir := fmt.Sprintf("%s.%s", walDir, "temp") err = os.Rename(walDir, tempDir) Expect(err).ShouldNot(HaveOccurred()) - dataDirStatus, err := validator.Validate(Full) + dataDirStatus, err := validator.Validate(Full, 0) Expect(err).ShouldNot(HaveOccurred()) Expect(int(dataDirStatus)).Should(SatisfyAny(Equal(DataDirectoryInvStruct), Equal(DataDirectoryError))) err = os.Rename(tempDir, walDir) @@ -104,7 +104,7 @@ var _ = Describe("Running Datavalidator", func() { Expect(err).ShouldNot(HaveOccurred()) err = os.Mkdir(walDir, 0700) Expect(err).ShouldNot(HaveOccurred()) - dataDirStatus, err := validator.Validate(Sanity) + dataDirStatus, err := validator.Validate(Sanity, 0) Expect(err).ShouldNot(HaveOccurred()) Expect(int(dataDirStatus)).Should(Equal(DataDirectoryValid)) err = os.RemoveAll(walDir) @@ -138,7 +138,7 @@ var _ = Describe("Running Datavalidator", func() { // newEtcdRevision: current revision number on etcd db Expect(etcdRevision).To(BeNumerically(">=", newEtcdRevision)) - dataDirStatus, err := validator.Validate(Full) + dataDirStatus, err := validator.Validate(Full, 0) Expect(err).ShouldNot(HaveOccurred()) Expect(int(dataDirStatus)).Should(SatisfyAny(Equal(RevisionConsistencyError), Equal(DataDirectoryError))) @@ -174,7 +174,7 @@ var _ = Describe("Running Datavalidator", func() { _, err = file.Write(byteSlice) Expect(err).ShouldNot(HaveOccurred()) - dataDirStatus, err := validator.Validate(Full) + dataDirStatus, err := validator.Validate(Full, 0) Expect(err).ShouldNot(HaveOccurred()) Expect(int(dataDirStatus)).Should(SatisfyAny(Equal(DataDirectoryCorrupt), Equal(DataDirectoryError), Equal(RevisionConsistencyError))) @@ -187,10 +187,21 @@ var _ = Describe("Running Datavalidator", func() { }) }) Context("with clean data directory", func() { - It("should return DataDirStatus as DataDirectoryValid, and nil error", func() { - dataDirStatus, err := validator.Validate(Full) - Expect(err).ShouldNot(HaveOccurred()) - Expect(int(dataDirStatus)).Should(Equal(DataDirectoryValid)) + Context("with fail below revision configured to low value", func() { + It("should return DataDirStatus as DataDirectoryValid, and nil error", func() { + dataDirStatus, err := validator.Validate(Full, 0) + Expect(err).ShouldNot(HaveOccurred()) + Expect(int(dataDirStatus)).Should(Equal(DataDirectoryValid)) + }) + }) + + Context("with fail below revision configured to high value", func() { + It("should return DataDirStatus as FailBelowRevisionConsistencyError and nil error", func() { + validator.Config.SnapstoreConfig.Container = path.Join(snapstoreBackupDir, "tmp") + dataDirStatus, err := validator.Validate(Full, 1000000) + Expect(err).ShouldNot(HaveOccurred()) + Expect(int(dataDirStatus)).Should(Equal(FailBelowRevisionConsistencyError)) + }) }) }) }) diff --git a/pkg/initializer/validator/types.go b/pkg/initializer/validator/types.go index 56e069b1d..e3b47b24d 100644 --- a/pkg/initializer/validator/types.go +++ b/pkg/initializer/validator/types.go @@ -35,6 +35,8 @@ const ( DataDirectoryError // RevisionConsistencyError indicates current etcd revision is inconsistent with latest snapshot revision. RevisionConsistencyError + //FailBelowRevisionConsistencyError indicates the current etcd revision is inconsistent with failBelowRevision. + FailBelowRevisionConsistencyError ) const ( @@ -65,5 +67,5 @@ type DataValidator struct { // Validator is the interface for data validation actions. type Validator interface { - Validate(Mode) error + Validate(Mode, int64) error } diff --git a/pkg/server/httpAPI.go b/pkg/server/httpAPI.go index e493a3acf..8cdba7b30 100644 --- a/pkg/server/httpAPI.go +++ b/pkg/server/httpAPI.go @@ -18,6 +18,7 @@ import ( "fmt" "net/http" "net/http/pprof" + "strconv" "sync" "sync/atomic" @@ -57,7 +58,7 @@ const ( // HTTPHandler is implementation to handle HTTP API exposed by server type HTTPHandler struct { - EtcdInitializer initializer.EtcdInitializer + Initializer initializer.Initializer Snapshotter *snapshotter.Snapshotter Port int server *http.Server @@ -163,6 +164,20 @@ func (h *HTTPHandler) serveInitialize(rw http.ResponseWriter, req *http.Request) <-h.AckCh } + failBelowRevisionStr := req.URL.Query().Get("failbelowrevision") + h.Logger.Infof("Validation failBelowRevision: %s", failBelowRevisionStr) + var failBelowRevision int64 + if len(failBelowRevisionStr) != 0 { + var err error + failBelowRevision, err = strconv.ParseInt(failBelowRevisionStr, 10, 64) + if err != nil { + h.initializationStatusMutex.Lock() + defer h.initializationStatusMutex.Unlock() + h.Logger.Errorf("Failed initialization due wrong parameter value `failbelowrevision`: %v", err) + h.initializationStatus = initializationStatusFailed + return + } + } switch modeVal := req.URL.Query().Get("mode"); modeVal { case string(validator.Full): mode = validator.Full @@ -172,7 +187,7 @@ func (h *HTTPHandler) serveInitialize(rw http.ResponseWriter, req *http.Request) mode = validator.Full } h.Logger.Infof("Validation mode: %s", mode) - err := h.EtcdInitializer.Initialize(mode) + err := h.Initializer.Initialize(mode, failBelowRevision) h.initializationStatusMutex.Lock() defer h.initializationStatusMutex.Unlock() if err != nil { @@ -180,7 +195,7 @@ func (h *HTTPHandler) serveInitialize(rw http.ResponseWriter, req *http.Request) h.initializationStatus = initializationStatusFailed return } - h.Logger.Infof("Successfully initialized data directory \"%s\" for etcd.", h.EtcdInitializer.Validator.Config.DataDir) + h.Logger.Info("Successfully initialized data directory for etcd.") h.initializationStatus = initializationStatusSuccessful }() } diff --git a/test/e2e/integration/cloud_backup_test.go b/test/e2e/integration/cloud_backup_test.go index c08098cc7..c0c2cdb67 100644 --- a/test/e2e/integration/cloud_backup_test.go +++ b/test/e2e/integration/cloud_backup_test.go @@ -216,7 +216,7 @@ var _ = Describe("CloudBackup", func() { SnapstoreConfig: snapstoreConfig, }, } - dataDirStatus, err := dataValidator.Validate(validator.Full) + dataDirStatus, err := dataValidator.Validate(validator.Full, 0) Expect(err).ShouldNot(HaveOccurred()) Expect(dataDirStatus).Should(Equal(validator.DataDirStatus(validator.DataDirectoryValid))) }) @@ -248,7 +248,7 @@ var _ = Describe("CloudBackup", func() { SnapstoreConfig: snapstoreConfig, }, } - dataDirStatus, err := dataValidator.Validate(validator.Full) + dataDirStatus, err := dataValidator.Validate(validator.Full, 0) Expect(err).ShouldNot(HaveOccurred()) Expect(dataDirStatus).Should(SatisfyAny(Equal(validator.DataDirStatus(validator.DataDirectoryCorrupt)), Equal(validator.DataDirStatus(validator.RevisionConsistencyError)))) })