Skip to content

Commit

Permalink
fix(x/swingset): guard snapshot restore for concurrency
Browse files Browse the repository at this point in the history
  • Loading branch information
mhofman committed Aug 7, 2023
1 parent 637660a commit fd678a5
Show file tree
Hide file tree
Showing 2 changed files with 57 additions and 7 deletions.
54 changes: 47 additions & 7 deletions golang/cosmos/x/swingset/keeper/snapshotter.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@ func sanitizeArtifactName(name string) string {
}

type activeSnapshot struct {
// Whether the operation in progress is a restore
isRestore bool
// The block height of the snapshot in progress
height int64
// The logger for this snapshot
Expand Down Expand Up @@ -111,19 +113,33 @@ func NewSwingsetSnapshotter(
}
}

// checkNotActive returns an error if there is an active snapshot.
func (snapshotter *SwingsetSnapshotter) checkNotActive() error {
active := snapshotter.activeSnapshot
if active != nil {
select {
case <-active.done:
snapshotter.activeSnapshot = nil
default:
if active.isRestore {
return fmt.Errorf("snapshot restore already in progress for height %d", active.height)
} else {
return fmt.Errorf("snapshot already in progress for height %d", active.height)
}
}
}
return nil
}

// InitiateSnapshot synchronously initiates a snapshot for the given height.
// If a snapshot is already in progress, or if no snapshot manager is configured,
// this will fail.
// The snapshot operation is performed in a goroutine, and synchronized with the
// main thread through the `WaitUntilSnapshotStarted` method.
func (snapshotter *SwingsetSnapshotter) InitiateSnapshot(height int64) error {
if snapshotter.activeSnapshot != nil {
select {
case <-snapshotter.activeSnapshot.done:
snapshotter.activeSnapshot = nil
default:
return fmt.Errorf("snapshot already in progress for height %d", snapshotter.activeSnapshot.height)
}
err := snapshotter.checkNotActive()
if err != nil {
return err
}

if !snapshotter.isConfigured() {
Expand Down Expand Up @@ -366,6 +382,30 @@ func (snapshotter *SwingsetSnapshotter) RestoreExtension(height uint64, format u
return snapshots.ErrUnknownFormat
}

err := snapshotter.checkNotActive()
if err != nil {
return err
}

// We technically don't need to create an active snapshot here since both
// `InitiateSnapshot` and `RestoreExtension` should only be called from the
// main thread, but it doesn't cost much to add in case things go wrong.
active := &activeSnapshot{
isRestore: true,
height: int64(height),
logger: snapshotter.logger,
// goroutine synchronization is unnecessary since anything checking should
// be called from the same thread.
// Effectively `WaitUntilSnapshotStarted` would block infinitely and
// and `InitiateSnapshot` will error when calling `checkNotActive`.
startedResult: nil,
done: nil,
}
snapshotter.activeSnapshot = active
defer func() {
snapshotter.activeSnapshot = nil
}()

ctx := snapshotter.newRestoreContext(int64(height))

exportDir, err := os.MkdirTemp("", fmt.Sprintf("agd-state-sync-restore-%d-*", height))
Expand Down
10 changes: 10 additions & 0 deletions golang/cosmos/x/swingset/keeper/snapshotter_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package keeper

import (
"errors"
"io"
"testing"

"github.com/Agoric/agoric-sdk/golang/cosmos/vm"
Expand Down Expand Up @@ -40,6 +41,15 @@ func TestSnapshotInProgress(t *testing.T) {
t.Error("wanted error for snapshot in progress")
}

err = swingsetSnapshotter.RestoreExtension(
456, SnapshotFormat,
func() ([]byte, error) {
return nil, io.EOF
})
if err == nil {
t.Error("wanted error for snapshot in progress")
}

close(ch)
<-swingsetSnapshotter.activeSnapshot.done
err = swingsetSnapshotter.InitiateSnapshot(456)
Expand Down

0 comments on commit fd678a5

Please sign in to comment.