Skip to content

Commit

Permalink
Fix after restore failed (#28)
Browse files Browse the repository at this point in the history
* fix the cluster when restore failed
* check alive before fix
* fix role convert
* fix: use restore backup suffix && remove target data folder when fix
* tidy log && add verify
  • Loading branch information
pengweisong authored Feb 9, 2022
1 parent 17adaf5 commit 079e7c7
Show file tree
Hide file tree
Showing 25 changed files with 407 additions and 102 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -21,5 +21,5 @@ test:
$(GO) test -v $(PKG) -short

fmt:
$(GO) mod tidy && find . -path vendor -prune -o -type f -iname '*.go' -exec go fmt {} \;
$(GO) mod tidy && find . -path vendor -prune -o -type f -iname '*.go' -exec goimports -w {} \;

10 changes: 5 additions & 5 deletions cmd/backup.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ func NewBackupCmd() *cobra.Command {
func newFullBackupCmd() *cobra.Command {
fullBackupCmd := &cobra.Command{
Use: "full",
Short: "full backup Nebula Graph Database",
Short: "Full backup Nebula Graph Database",
RunE: func(cmd *cobra.Command, args []string) error {
err := log.SetLog(cmd.Flags())
if err != nil {
Expand All @@ -46,10 +46,10 @@ func newFullBackupCmd() *cobra.Command {
return err
}

fmt.Println("start to backup cluster...")
fmt.Println("Start to backup cluster...")
backupName, err := b.Backup()
if err != nil {
fmt.Println("backup failed, will try to clean the remaining garbage...")
fmt.Println("Backup failed, will try to clean the remaining garbage...")

if backupName != "" {
cleanCfg := &config.CleanupConfig{
Expand All @@ -66,12 +66,12 @@ func newFullBackupCmd() *cobra.Command {
if err != nil {
return fmt.Errorf("cleanup %s failed when backup failed: %w", backupName, err)
}
fmt.Printf("cleanup backup %s successfully after backup failed", backupName)
fmt.Printf("Cleanup backup %s successfully after backup failed.", backupName)
}
return err
}

fmt.Println("backup succeed.")
fmt.Println("Backup succeed.")
return nil
},
}
Expand Down
14 changes: 12 additions & 2 deletions cmd/restore.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ import (
func NewRestoreCmd() *cobra.Command {
restoreCmd := &cobra.Command{
Use: "restore",
Short: "restore Nebula Graph Database, notice that it will restart the cluster",
Short: "Restore Nebula Graph Database, notice that it will restart the cluster",
SilenceUsage: true,
}
config.AddCommonFlags(restoreCmd.PersistentFlags())
Expand Down Expand Up @@ -46,9 +46,19 @@ func newFullRestoreCmd() *cobra.Command {

err = r.Restore()
if err != nil {
f, ferr := restore.NewFixFrom(r)
if ferr != nil {
return err
}

ferr = f.Fix()
if ferr != nil {
fmt.Println("Fix failed when restore failed", ferr)
}

return err
}
fmt.Println("restore succeed")
fmt.Println("Restore succeed.")
return nil
},
}
Expand Down
2 changes: 1 addition & 1 deletion cmd/show.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ import (
func NewShowCmd() *cobra.Command {
showCmd := &cobra.Command{
Use: "show",
Short: "show backup info list in external storage",
Short: "Show backup info list in external storage",
SilenceUsage: true,
RunE: func(cmd *cobra.Command, args []string) error {
err := log.SetLog(cmd.Flags())
Expand Down
2 changes: 1 addition & 1 deletion cmd/version.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ import (
func NewVersionCmd() *cobra.Command {
versionCmd := &cobra.Command{
Use: "version",
Short: "print the version of nebula br tool",
Short: "Print the version of nebula br tool",
RunE: func(cmd *cobra.Command, args []string) error {
fmt.Printf(`%s,V-%d.%d.%d
GitSha: %s
Expand Down
4 changes: 2 additions & 2 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ require (
github.com/spf13/cobra v1.1.1
github.com/spf13/pflag v1.0.5
github.com/stretchr/testify v1.7.0
github.com/vesoft-inc/nebula-agent v0.1.0
github.com/vesoft-inc/nebula-go/v2 v2.5.2-0.20211228055601-b5b11a36e453
github.com/vesoft-inc/nebula-agent v0.1.1-0.20220208095610-fe80654911f8
github.com/vesoft-inc/nebula-go/v3 v3.0.0-20220119024722-ab348afbb79d
golang.org/x/sys v0.0.0-20211124211545-fe61309f8881 // indirect
)
17 changes: 11 additions & 6 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ github.com/coreos/go-semver v0.3.0/go.mod h1:nnelYz7RCh+5ahJtPPxZlU+153eP4D4r3Ee
github.com/coreos/go-systemd v0.0.0-20190321100706-95778dfbb74e/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4=
github.com/coreos/pkg v0.0.0-20180928190104-399ea9e2e55f/go.mod h1:E3G3o1h8I7cfcXa63jLwjI0eiQQMgzzUDFVpN/nH/eA=
github.com/cpuguy83/go-md2man/v2 v2.0.0/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU=
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
Expand Down Expand Up @@ -142,11 +143,13 @@ github.com/kisielk/errcheck v1.1.0/go.mod h1:EZBBE59ingxPouuu3KfxchcWSUPOHkagtvW
github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc=
github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI=
github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
github.com/kr/pretty v0.3.0 h1:WgNl7dwNpEZ6jJ9k1snq4pZsg7DOEN8hP9Xw0Tsjwk0=
github.com/kr/pretty v0.3.0/go.mod h1:640gp4NfQd8pI5XOwp5fnNeVWj67G7CFk/SaSQn7NBk=
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE=
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
github.com/magiconair/properties v1.8.1/go.mod h1:PppfXfuXeibc/6YijjN8zIbojt8czPbwD3XqdrwzmxQ=
github.com/mattn/go-colorable v0.0.9/go.mod h1:9vuHe8Xs5qXnSaW/c/ABM9alt+Vo+STaOChaDxuIBZU=
github.com/mattn/go-isatty v0.0.3/go.mod h1:M+lRXTBqGeGNdLjl/ufCoiOlB5xdOkqRJdNxMWT7Zi4=
Expand Down Expand Up @@ -189,6 +192,8 @@ github.com/prometheus/tsdb v0.7.1/go.mod h1:qhTCs0VvXwvX/y3TZrWD7rabWM+ijKTux40T
github.com/rogpeppe/fastuuid v0.0.0-20150106093220-6724a57986af/go.mod h1:XWv6SoW27p1b0cqNHllgS5HIMJraePCO15w5zCzIWYg=
github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6LYCDYWNEvQ=
github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4=
github.com/rogpeppe/go-internal v1.6.1 h1:/FiVV8dS/e+YqF2JvO3yXRFbBLTIuSDkuC7aBOAvL+k=
github.com/rogpeppe/go-internal v1.6.1/go.mod h1:xXDCJY+GAPziupqXw64V24skbSoqbTEfhy4qGm1nDQc=
github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
github.com/ryanuber/columnize v0.0.0-20160712163229-9b3edd62028f/go.mod h1:sm1tb6uqfes/u+d4ooFouqFdy9/2g9QGwK3SQygK0Ts=
github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529/go.mod h1:DxrIzT+xaE7yg65j358z/aeFdxmN0P9QXhEzd20vsDc=
Expand Down Expand Up @@ -218,10 +223,10 @@ github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5Cc
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/subosito/gotenv v1.2.0/go.mod h1:N0PQaV/YGNqwC0u51sEeR/aUtSLEXKX9iv69rRypqCw=
github.com/tmc/grpc-websocket-proxy v0.0.0-20190109142713-0ad062ec5ee5/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U=
github.com/vesoft-inc/nebula-agent v0.1.0 h1:ROcnM5BUguBErwMaaZM4tmiCl2T92xE1fxAiB9cAc6g=
github.com/vesoft-inc/nebula-agent v0.1.0/go.mod h1:79lL9wmxYYKgMHP+9Q+MLc3q+G9HFU/sCnBhy85G0B0=
github.com/vesoft-inc/nebula-go/v2 v2.5.2-0.20211228055601-b5b11a36e453 h1:1rwe3LQVuTRUJBf4Gonc47+T3dCD29EzkrRaTzkUNdw=
github.com/vesoft-inc/nebula-go/v2 v2.5.2-0.20211228055601-b5b11a36e453/go.mod h1:YRIuog6zyRKz0SagwwTcqHXCPjJ4GfQelIl+/FgSC+Y=
github.com/vesoft-inc/nebula-agent v0.1.1-0.20220208095610-fe80654911f8 h1:UY7ygJyfzpYWIsdLpLJTf1yjh3FhW8xRYBNQg1OL77o=
github.com/vesoft-inc/nebula-agent v0.1.1-0.20220208095610-fe80654911f8/go.mod h1:uA6GJsvhNdTZkLQp1grX74fUm6TWi7EWA2AEWNeHzNU=
github.com/vesoft-inc/nebula-go/v3 v3.0.0-20220119024722-ab348afbb79d h1:spO7OAtYI1wiqBiJ9417pKhqx0IkqFAFdFQFPm4JIrs=
github.com/vesoft-inc/nebula-go/v3 v3.0.0-20220119024722-ab348afbb79d/go.mod h1:+sXv05jYQBARdTbTcIEsWVXCnF/6ttOlDK35xQ6m54s=
github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2/go.mod h1:UETIi67q53MR2AWcXfiuqkDkRtnGDLqkBTpCHuJHxtU=
go.etcd.io/bbolt v1.3.2/go.mod h1:IbVyRI1SCnLcuJnV2u8VeU0CEYM7e686BmAb1XKL+uU=
go.opencensus.io v0.21.0/go.mod h1:mSImk1erAIZhrmZN+AvHh14ztQfjbGwt4TtuofqLduU=
Expand Down
37 changes: 19 additions & 18 deletions pkg/backup/backup.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,11 @@ import (

"github.com/google/uuid"
log "github.com/sirupsen/logrus"

pb "github.com/vesoft-inc/nebula-agent/pkg/proto"
"github.com/vesoft-inc/nebula-agent/pkg/storage"
"github.com/vesoft-inc/nebula-go/v2/nebula"
"github.com/vesoft-inc/nebula-go/v2/nebula/meta"
"github.com/vesoft-inc/nebula-go/v3/nebula"
"github.com/vesoft-inc/nebula-go/v3/nebula/meta"

"github.com/vesoft-inc/nebula-br/pkg/clients"
"github.com/vesoft-inc/nebula-br/pkg/config"
Expand Down Expand Up @@ -123,7 +124,7 @@ func (b *Backup) uploadStorage(hostDirs map[string]map[string][]string, targetUr
if err != nil {
return fmt.Errorf("upload %s to %s failed:%w", source, target, err)
}
logger.WithField("src", source).WithField("target", target).Info("Upload storage checkpoint successfully")
logger.WithField("src", source).WithField("target", target).Info("Upload storage checkpoint successfully.")
}
}
}
Expand All @@ -146,7 +147,7 @@ func (b *Backup) generateMetaFile(meta *meta.BackupMeta) (string, error) {

// Backup backs up data in given external storage, and return the backup name
func (b *Backup) Backup() (string, error) {
// step2: call the meta service, create backup files in each local
// call the meta service, create backup files in each local
backupRes, err := b.meta.CreateBackup(b.cfg.Spaces)
if err != nil {
if backupRes != nil && backupRes.GetMeta() != nil && backupRes.GetMeta().GetBackupName() != nil {
Expand All @@ -157,9 +158,9 @@ func (b *Backup) Backup() (string, error) {
backupInfo := backupRes.GetMeta()
backupName := string(backupInfo.GetBackupName())
logger := log.WithField("name", backupName)
logger.WithField("backup info", utils.StringifyBackup(backupInfo)).Info("Create backup in nebula machine's local")
logger.WithField("backup info", utils.StringifyBackup(backupInfo)).Info("Create backup in nebula machine's local.")

// step3: ensure root dir
// ensure root dir
rootUri, err := utils.UriJoin(b.cfg.Backend.Uri(), string(backupInfo.BackupName))
if err != nil {
return backupName, err
Expand All @@ -168,9 +169,9 @@ func (b *Backup) Backup() (string, error) {
if err != nil {
return backupName, fmt.Errorf("ensure dir %s failed: %w", rootUri, err)
}
logger.WithField("root", rootUri).Info("Ensure backup root dir")
logger.WithField("root", rootUri).Info("Ensure backup root dir.")

// step4: upload meta files
// upload meta files
metaDir, err := utils.UriJoin(rootUri, "meta")
if err != nil {
return backupName, err
Expand All @@ -182,9 +183,9 @@ func (b *Backup) Backup() (string, error) {
if err = b.uploadMeta(b.meta.LeaderAddr(), metaDir, localMetaDir); err != nil {
return backupName, err
}
logger.WithField("meta", metaDir).Info("Upload meta successfully")
logger.WithField("meta", metaDir).Info("Upload meta successfully.")

// step5: upload storage files
// upload storage files
storageDir, _ := utils.UriJoin(rootUri, "data")
hostDirs := make(map[string]map[string][]string)
// group checkpoint dirs by host and space id
Expand All @@ -203,25 +204,25 @@ func (b *Backup) Backup() (string, error) {
}
err = b.uploadStorage(hostDirs, storageDir)
if err != nil {
return backupName, fmt.Errorf("upload stoarge failed %w", err)
return backupName, fmt.Errorf("upload storage failed %w", err)
}
logger.WithField("data", storageDir).Info("Upload data backup successfully")
logger.WithField("data", storageDir).Info("Upload data backup successfully.")

// step6: generate backup meta files and upload
// generate backup meta files and upload
if err := utils.EnsureDir(utils.LocalTmpDir); err != nil {
return backupName, err
}
defer func() {
if err := utils.RemoveDir(utils.LocalTmpDir); err != nil {
log.WithError(err).Errorf("Remove tmp dir %s failed", utils.LocalTmpDir)
log.WithError(err).Errorf("Remove tmp dir %s failed.", utils.LocalTmpDir)
}
}()

tmpMetaPath, err := b.generateMetaFile(backupInfo)
if err != nil {
return backupName, fmt.Errorf("write meta to tmp path failed: %w", err)
}
logger.WithField("tmp path", tmpMetaPath).Info("Write meta data to local tmp file successfully")
logger.WithField("tmp path", tmpMetaPath).Info("Write meta data to local tmp file successfully.")
backupMetaPath, err := utils.UriJoin(rootUri, filepath.Base(tmpMetaPath))
if err != nil {
return backupName, err
Expand All @@ -230,15 +231,15 @@ func (b *Backup) Backup() (string, error) {
if err != nil {
return backupName, fmt.Errorf("upload local tmp file to remote storage %s failed: %w", backupMetaPath, err)
}
logger.WithField("remote path", backupMetaPath).Info("Upload tmp backup meta file to remote")
logger.WithField("remote path", backupMetaPath).Info("Upload tmp backup meta file to remote.")

// step7: drop backup files in cluster machine local and local tmp files
// drop backup files in cluster machine local and local tmp files
err = b.meta.DropBackup(backupInfo.GetBackupName())
if err != nil {
return backupName, fmt.Errorf("drop backup %s in cluster local failed: %w",
string(backupInfo.BackupName[:]), err)
}
logger.Info("Drop backup in cluster and local tmp folder successfully")
logger.Info("Drop backup in cluster and local tmp folder successfully.")

return backupName, nil
}
13 changes: 7 additions & 6 deletions pkg/cleanup/cleanup.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"strings"

log "github.com/sirupsen/logrus"

pb "github.com/vesoft-inc/nebula-agent/pkg/proto"
"github.com/vesoft-inc/nebula-agent/pkg/storage"

Expand Down Expand Up @@ -60,7 +61,7 @@ func (c *Cleanup) cleanNebula() error {
if err != nil {
return fmt.Errorf("drop backup failed: %w", err)
}
log.Debugf("Drop backup %s successfully", c.cfg.BackupName)
log.Debugf("Drop backup %s successfully.", c.cfg.BackupName)

return nil
}
Expand All @@ -75,7 +76,7 @@ func (c *Cleanup) cleanExternal() error {
if err != nil {
return fmt.Errorf("remove %s in external storage failed: %w", backupUri, err)
}
log.Debugf("Remove %s successfullly", backupUri)
log.Debugf("Remove %s successfully.", backupUri)

// Local backend's data lay in different cluster machines,
// which should be handled separately
Expand All @@ -97,7 +98,7 @@ func (c *Cleanup) cleanExternal() error {
if err != nil {
return fmt.Errorf("remove %s in host: %s failed: %w", backupPath, addr.Host, err)
}
log.Debugf("Remove local data %s in %s successfullly", backupPath, addr.Host)
log.Debugf("Remove local data %s in %s successfully.", backupPath, addr.Host)
}
}

Expand All @@ -107,18 +108,18 @@ func (c *Cleanup) cleanExternal() error {
func (c *Cleanup) Clean() error {
logger := log.WithField("backup name", c.cfg.BackupName)

logger.Info("Start to cleanup data in nebula cluster")
logger.Info("Start to cleanup data in nebula cluster.")
err := c.cleanNebula()
if err != nil {
return fmt.Errorf("clean nebula local data failed: %w", err)
}

logger.Info("Start cleanup data in external storage")
logger.Info("Start cleanup data in external storage.")
err = c.cleanExternal()
if err != nil {
return fmt.Errorf("clean external storage data failed: %w", err)
}

logger.Info("Clean up backup data successfully")
logger.Info("Clean up backup data successfully.")
return nil
}
3 changes: 1 addition & 2 deletions pkg/clients/agent.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,8 @@ import (
"fmt"

agent "github.com/vesoft-inc/nebula-agent/pkg/client"
"github.com/vesoft-inc/nebula-go/v2/nebula"

"github.com/vesoft-inc/nebula-br/pkg/utils"
"github.com/vesoft-inc/nebula-go/v3/nebula"
)

type NebulaAgent struct {
Expand Down
10 changes: 5 additions & 5 deletions pkg/clients/meta.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@ import (
"time"

log "github.com/sirupsen/logrus"
"github.com/vesoft-inc/nebula-go/v2/nebula"
"github.com/vesoft-inc/nebula-go/v2/nebula/meta"

"github.com/vesoft-inc/nebula-br/pkg/utils"
"github.com/vesoft-inc/nebula-go/v3/nebula"
"github.com/vesoft-inc/nebula-go/v3/nebula/meta"
)

type NebulaMeta struct {
Expand Down Expand Up @@ -205,14 +205,14 @@ func (m *NebulaMeta) RestoreMeta(metaAddr *nebula.HostAddr, hostMap []*meta.Host
client, err := connect(metaAddr)
if err != nil {
log.WithError(err).WithField("addr", utils.StringifyAddr(metaAddr)).
Errorf("connect to metad failed, try times %d", try)
Errorf("Connect to metad failed, try times %d.", try)
time.Sleep(time.Second * 2)
continue
}

resp, err := client.RestoreMeta(req)
if err != nil {
log.WithError(err).WithField("req", req).Error("Restore meta failed")
log.WithError(err).WithField("req", req).Error("Restore meta failed.")
return err
}

Expand All @@ -227,7 +227,7 @@ func (m *NebulaMeta) RestoreMeta(metaAddr *nebula.HostAddr, hostMap []*meta.Host
}

func (m *NebulaMeta) getMetaDirInfo(addr *nebula.HostAddr) (*nebula.DirInfo, error) {
log.WithField("addr", utils.StringifyAddr(addr)).Debug("Try to get dir info from meta service")
log.WithField("addr", utils.StringifyAddr(addr)).Debug("Try to get dir info from meta service.")
c, err := connect(addr)
if err != nil {
return nil, err
Expand Down
Loading

0 comments on commit 079e7c7

Please sign in to comment.