Skip to content

Commit

Permalink
Add sanity check for revision numbers
Browse files Browse the repository at this point in the history
  • Loading branch information
shreyas-s-rao committed Dec 26, 2018
1 parent 1e1758e commit 4440e3d
Show file tree
Hide file tree
Showing 11 changed files with 651 additions and 31 deletions.
2 changes: 1 addition & 1 deletion cmd/initializer.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ func NewInitializeCommand(stopCh <-chan struct{}) *cobra.Command {
}

options := &restorer.RestoreOptions{
RestoreDataDir: restoreDataDir,
RestoreDataDir: path.Clean(restoreDataDir),
Name: restoreName,
ClusterURLs: clusterUrlsMap,
PeerURLs: peerUrls,
Expand Down
2 changes: 1 addition & 1 deletion cmd/restore.go
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ func NewRestoreCommand(stopCh <-chan struct{}) *cobra.Command {
rs := restorer.NewRestorer(store, logger)

options := &restorer.RestoreOptions{
RestoreDataDir: restoreDataDir,
RestoreDataDir: path.Clean(restoreDataDir),
Name: restoreName,
BaseSnapshot: *baseSnap,
DeltaSnapList: deltaSnapList,
Expand Down
6 changes: 3 additions & 3 deletions cmd/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ func NewServerCommand(stopCh <-chan struct{}) *cobra.Command {
var serverCmd = &cobra.Command{
Use: "server",
Short: "start the http server with backup scheduler.",
Long: `Server will keep listening for http request to deliver its functionality through http endpoins.`,
Long: `Server will keep listening for http request to deliver its functionality through http endpoints.`,
Run: func(cmd *cobra.Command, args []string) {
var (
snapstoreConfig *snapstore.Config
Expand All @@ -56,7 +56,7 @@ func NewServerCommand(stopCh <-chan struct{}) *cobra.Command {
}

options := &restorer.RestoreOptions{
RestoreDataDir: restoreDataDir,
RestoreDataDir: path.Clean(restoreDataDir),
Name: restoreName,
ClusterURLs: clusterUrlsMap,
PeerURLs: peerUrls,
Expand Down Expand Up @@ -238,7 +238,7 @@ func handleNoSsrRequest(handler *server.HTTPHandler) {
}
}

// handleSsrRequest responds to handlers reqeust and stop interrupt.
// handleSsrRequest responds to handlers request and stop interrupt.
func handleSsrRequest(handler *server.HTTPHandler, ssr *snapshotter.Snapshotter, ackCh, ssrStopCh chan struct{}, stopCh <-chan struct{}) {
for {
var ok bool
Expand Down
3 changes: 2 additions & 1 deletion pkg/initializer/initializer.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,8 @@ func NewInitializer(options *restorer.RestoreOptions, snapstoreConfig *snapstore
},
Validator: &validator.DataValidator{
Config: &validator.Config{
DataDir: options.RestoreDataDir,
DataDir: options.RestoreDataDir,
SnapstoreConfig: snapstoreConfig,
},
Logger: logger,
},
Expand Down
103 changes: 81 additions & 22 deletions pkg/initializer/validator/datavalidator.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
package validator

import (
"encoding/binary"
"errors"
"fmt"
"hash/crc32"
Expand All @@ -31,26 +32,10 @@ import (
"github.com/coreos/etcd/snap/snappb"
"github.com/coreos/etcd/wal"
"github.com/coreos/etcd/wal/walpb"
"github.com/gardener/etcd-backup-restore/pkg/snapstore"
"github.com/sirupsen/logrus"
)

const (
// DataDirectoryValid indicates data directory is valid.
DataDirectoryValid = iota
// DataDirectoryNotExist indicates data directory is non-existent.
DataDirectoryNotExist
// DataDirectoryInvStruct indicates data directory has invalid structure.
DataDirectoryInvStruct
// DataDirectoryCorrupt indicates data directory is corrupt.
DataDirectoryCorrupt
// DataDirectoryError indicates unknown error while validation.
DataDirectoryError
)

const (
snapSuffix = ".snap"
)

var (
// A map of valid files that can be present in the snap folder.
validFiles = map[string]bool{
Expand All @@ -70,9 +55,6 @@ var (
logger *logrus.Logger
)

// DataDirStatus represents the status of the etcd data directory.
type DataDirStatus int

func init() {
logger = logrus.New()
}
Expand All @@ -85,7 +67,7 @@ func (d *DataValidator) snapDir() string { return filepath.Join(d.memberDir(), "

func (d *DataValidator) backendPath() string { return filepath.Join(d.snapDir(), "db") }

//Validate performs the steps required to validate data for Etcd instance.
// Validate performs the steps required to validate data for Etcd instance.
// The steps involved are:
// * Check if data directory exists.
// - If data directory exists
Expand All @@ -112,12 +94,27 @@ func (d *DataValidator) Validate() (DataDirStatus, error) {
d.Logger.Infof("Data directory structure invalid.")
return DataDirectoryInvStruct, nil
}

d.Logger.Info("Checking for revision consistency...")
revisionConsistencyCheckStatus, err := d.CheckRevisionConsistency()
if revisionConsistencyCheckStatus == RevisionConsistencyCheckError {
d.Logger.Infof("Unable to check revision consistency. %v", err)
return RevisionConsistencyError, nil
} else if revisionConsistencyCheckStatus == RevisionConsistencyCheckFailure {
d.Logger.Infof("Inconsistent revision numbers between db file and latest snapshot. %v", err)
return RevisionConsistencyError, nil
} else if err != nil {
d.Logger.Infof("Unknown error in revision consistency check. %v", err)
return RevisionConsistencyError, nil
}

d.Logger.Info("Checking for data directory files corruption...")
err = d.checkForDataCorruption()
if err != nil {
d.Logger.Infof("Data directory corrupt. %v", err)
return DataDirectoryCorrupt, nil
}

d.Logger.Info("Data directory valid.")
return DataDirectoryValid, nil
}
Expand Down Expand Up @@ -273,7 +270,7 @@ func checkSuffix(names []string) []string {
snaps = append(snaps, names[i])
} else {
// If we find a file which is not a snapshot then check if it's
// a vaild file. If not throw out a warning.
// a valid file. If not throw out a warning.
if _, ok := validFiles[names[i]]; !ok {
fmt.Printf("skipped unexpected non snapshot file %v", names[i])
}
Expand Down Expand Up @@ -347,3 +344,65 @@ func verifyDB(path string) error {
return nil
})
}

// CheckRevisionConsistency compares the latest revisions on the etcd db file and the latest snapshot to verify that the etcd revision is not lesser than snapshot revision.
func (d *DataValidator) CheckRevisionConsistency() (RevisionConsistencyCheckStatus, error) {
etcdRevision, err := getRevision(d.backendPath())
if err != nil {
return RevisionConsistencyCheckError, fmt.Errorf("unable to get current etcd revision from backend db file: %v", err)
}

store, err := snapstore.GetSnapstore(d.Config.SnapstoreConfig)
snapList, err := store.List()
if err != nil {
return RevisionConsistencyCheckError, fmt.Errorf("unable to list snapshots from store: %v", err)
}

latestSnapshotRevision := snapList[len(snapList)-1].LastRevision

if etcdRevision < latestSnapshotRevision {
return RevisionConsistencyCheckFailure, fmt.Errorf("current etcd revision (%d) is less than latest snapshot revision (%d): possible data loss", etcdRevision, latestSnapshotRevision)
}

return RevisionConsistencyCheckSuccess, nil
}

// getRevision finds out the latest revision on the etcd db file without starting etcd server or an embedded etcd server.
func getRevision(path string) (int64, error) {
if _, err := os.Stat(path); err != nil {
return -1, fmt.Errorf("unable to stat backend db file: %v", err)
}

db, err := bolt.Open(path, 0400, &bolt.Options{ReadOnly: true})
if err != nil {
return -1, fmt.Errorf("unable to open backend boltdb file: %v", err)
}
defer db.Close()

var rev int64

err = db.View(func(tx *bolt.Tx) error {
c := tx.Cursor()

for next, _ := c.First(); next != nil; next, _ = c.Next() {
b := tx.Bucket(next)
if b == nil {
return fmt.Errorf("cannot get hash of bucket %s", string(next))
}
isKey := (string(next) == "key")
b.ForEach(func(k, v []byte) error {
if isKey {
rev = int64(binary.BigEndian.Uint64(k[0:8]))
}
return nil
})
}
return nil
})

if err != nil {
return -1, err
}

return rev, nil
}
Loading

0 comments on commit 4440e3d

Please sign in to comment.