Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Moving GC Followup #6905

Merged
merged 8 commits into from
Jul 29, 2021
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
89 changes: 59 additions & 30 deletions blockstore/badger/blockstore.go
Original file line number Diff line number Diff line change
Expand Up @@ -258,29 +258,30 @@ func (b *Blockstore) movingGC() error {
b.moveCond.Broadcast()
b.moveMx.Unlock()

var path string
var newPath string

defer func() {
b.lockMove()

db2 := b.dbNext
dbNext := b.dbNext
b.dbNext = nil

var state bsMoveState
if db2 != nil {
if dbNext != nil {
state = moveStateCleanup
} else {
state = moveStateNone
}

b.unlockMove(state)

if db2 != nil {
err := db2.Close()
if dbNext != nil {
// the move failed and we have a left-over db; delete it.
err := dbNext.Close()
if err != nil {
log.Warnf("error closing badger db: %s", err)
}
b.deleteDB(path)
b.deleteDB(newPath)

b.lockMove()
b.unlockMove(moveStateNone)
Expand All @@ -296,68 +297,96 @@ func (b *Blockstore) movingGC() error {
}

if basePath == linkPath {
path = basePath
newPath = basePath
} else {
// we do this dance to create a name adjacent to the current one, while avoiding clown
// shoes with multiple moves (i.e. we can't just take the basename of the linkPath, as it
// could have been created in a previous move and have the timestamp suffix, which would then
// perpetuate itself.
name := filepath.Base(basePath)
dir := filepath.Dir(linkPath)
path = filepath.Join(dir, name)
newPath = filepath.Join(dir, name)
}
path = fmt.Sprintf("%s.%d", path, time.Now().UnixNano())
newPath = fmt.Sprintf("%s.%d", newPath, time.Now().UnixNano())

log.Infof("moving blockstore from %s to %s", b.opts.Dir, path)
log.Infof("moving blockstore from %s to %s", b.opts.Dir, newPath)

opts := b.opts
opts.Dir = path
opts.ValueDir = path
opts.Dir = newPath
opts.ValueDir = newPath

db2, err := badger.Open(opts.Options)
dbNew, err := badger.Open(opts.Options)
if err != nil {
return fmt.Errorf("failed to open badger blockstore in %s: %w", path, err)
return fmt.Errorf("failed to open badger blockstore in %s: %w", newPath, err)
}

b.lockMove()
b.dbNext = db2
b.dbNext = dbNew
b.unlockMove(moveStateMoving)

log.Info("copying blockstore")
err = b.doCopy(b.db, b.dbNext)
if err != nil {
return fmt.Errorf("error moving badger blockstore to %s: %w", path, err)
return fmt.Errorf("error moving badger blockstore to %s: %w", newPath, err)
}

b.lockMove()
db1 := b.db
dbOld := b.db
b.db = b.dbNext
b.dbNext = nil
b.unlockMove(moveStateCleanup)

err = db1.Close()
err = dbOld.Close()
if err != nil {
log.Warnf("error closing old badger db: %s", err)
}

dbpath := b.opts.Dir
oldpath := fmt.Sprintf("%s.old.%d", dbpath, time.Now().Unix())
// this is the canonical db path; this is where our db lives.
dbPath := b.opts.Dir

if err = os.Rename(dbpath, oldpath); err != nil {
// we first move the existing db out of the way, and only delete it after we have symlinked the
// new db to the canonical path
backupPath := fmt.Sprintf("%s.old.%d", dbPath, time.Now().Unix())
if err = os.Rename(dbPath, backupPath); err != nil {
// this is not catastrophic in the sense that we have not lost any data.
// but it is pretty bad, as the db path points to the old db, while we are now using to the new
// db; we can't continue and leave a ticking bomb for the next restart.
// so a panic is appropriate and user can fix.
panic(fmt.Errorf("error renaming old badger db dir from %s to %s: %w; USER ACTION REQUIRED", dbpath, oldpath, err)) //nolint
panic(fmt.Errorf("error renaming old badger db dir from %s to %s: %w; USER ACTION REQUIRED", dbPath, backupPath, err)) //nolint
}

if err = os.Symlink(path, dbpath); err != nil {
if err = b.symlink(newPath, dbPath); err != nil {
// same here; the db path is pointing to the void. panic and let the user fix.
panic(fmt.Errorf("error symlinking new badger db dir from %s to %s: %w; USER ACTION REQUIRED", path, dbpath, err)) //nolint
panic(fmt.Errorf("error symlinking new badger db dir from %s to %s: %w; USER ACTION REQUIRED", newPath, dbPath, err)) //nolint
}

b.deleteDB(oldpath)
// the delete follows symlinks
b.deleteDB(backupPath)

log.Info("moving blockstore done")
return nil
}

// symlink creates a symlink from path to linkTo; the link is relative if the two are
// in the same directory
func (b *Blockstore) symlink(path, linkTo string) error {
vyzo marked this conversation as resolved.
Show resolved Hide resolved
resolvedPathDir, err := filepath.EvalSymlinks(filepath.Dir(path))
vyzo marked this conversation as resolved.
Show resolved Hide resolved
if err != nil {
return fmt.Errorf("error resolving links in %s: %w", path, err)
}

resolvedLinkDir, err := filepath.EvalSymlinks(filepath.Dir(linkTo))
if err != nil {
return fmt.Errorf("error resolving links in %s: %w", linkTo, err)
}

if resolvedPathDir == resolvedLinkDir {
path = filepath.Base(path)
}

return os.Symlink(path, linkTo)
}

// doCopy copies a badger blockstore to another, with an optional filter; if the filter
// is not nil, then only cids that satisfy the filter will be copied.
func (b *Blockstore) doCopy(from, to *badger.DB) error {
Expand Down Expand Up @@ -390,19 +419,19 @@ func (b *Blockstore) doCopy(from, to *badger.DB) error {

func (b *Blockstore) deleteDB(path string) {
// follow symbolic links, otherwise the data wil be left behind
lpath, err := filepath.EvalSymlinks(path)
linkPath, err := filepath.EvalSymlinks(path)
if err != nil {
log.Warnf("error resolving symlinks in %s", path)
return
}

log.Infof("removing data directory %s", lpath)
if err := os.RemoveAll(lpath); err != nil {
log.Warnf("error deleting db at %s: %s", lpath, err)
log.Infof("removing data directory %s", linkPath)
if err := os.RemoveAll(linkPath); err != nil {
log.Warnf("error deleting db at %s: %s", linkPath, err)
return
}

if path != lpath {
if path != linkPath {
log.Infof("removing link %s", path)
if err := os.Remove(path); err != nil {
log.Warnf("error removing symbolic link %s", err)
Expand Down
15 changes: 15 additions & 0 deletions blockstore/badger/blockstore_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,21 @@ func testMove(t *testing.T, optsF func(string) Options) {

checkBlocks()
checkPath()

// reopen the db to make sure our relative link works:
err = db.Close()
if err != nil {
t.Fatal(err)
}

db, err = Open(optsF(dbPath))
if err != nil {
t.Fatal(err)
}

// db.Close() is already deferred

checkBlocks()
}

func TestMoveNoPrefix(t *testing.T) {
Expand Down