From 554a1102a08f466dad5d8291044150236896ec7a Mon Sep 17 00:00:00 2001 From: Mathieu Hofman Date: Fri, 14 Jul 2023 22:26:54 +0000 Subject: [PATCH] fix(x/swingset): guard snapshot restore for concurrency --- .../cosmos/x/swingset/keeper/snapshotter.go | 54 ++++++++++++++++--- .../x/swingset/keeper/snapshotter_test.go | 10 ++++ 2 files changed, 57 insertions(+), 7 deletions(-) diff --git a/golang/cosmos/x/swingset/keeper/snapshotter.go b/golang/cosmos/x/swingset/keeper/snapshotter.go index da402b89b80..d0d4b738c64 100644 --- a/golang/cosmos/x/swingset/keeper/snapshotter.go +++ b/golang/cosmos/x/swingset/keeper/snapshotter.go @@ -41,6 +41,8 @@ func sanitizeArtifactName(name string) string { } type activeSnapshot struct { + // Whether the operation in progress is a restore + isRestore bool // The block height of the snapshot in progress height int64 // The logger for this snapshot @@ -111,19 +113,33 @@ func NewSwingsetSnapshotter( } } +// checkNotActive returns an error if there is an active snapshot. +func (snapshotter *SwingsetSnapshotter) checkNotActive() error { + active := snapshotter.activeSnapshot + if active != nil { + select { + case <-active.done: + snapshotter.activeSnapshot = nil + default: + if active.isRestore { + return fmt.Errorf("snapshot restore already in progress for height %d", active.height) + } else { + return fmt.Errorf("snapshot already in progress for height %d", active.height) + } + } + } + return nil +} + // InitiateSnapshot synchronously initiates a snapshot for the given height. // If a snapshot is already in progress, or if no snapshot manager is configured, // this will fail. // The snapshot operation is performed in a goroutine, and synchronized with the // main thread through the `WaitUntilSnapshotStarted` method. func (snapshotter *SwingsetSnapshotter) InitiateSnapshot(height int64) error { - if snapshotter.activeSnapshot != nil { - select { - case <-snapshotter.activeSnapshot.done: - snapshotter.activeSnapshot = nil - default: - return fmt.Errorf("snapshot already in progress for height %d", snapshotter.activeSnapshot.height) - } + err := snapshotter.checkNotActive() + if err != nil { + return err } if !snapshotter.isConfigured() { @@ -366,6 +382,30 @@ func (snapshotter *SwingsetSnapshotter) RestoreExtension(height uint64, format u return snapshots.ErrUnknownFormat } + err := snapshotter.checkNotActive() + if err != nil { + return err + } + + // We technically don't need to create an active snapshot here since both + // `InitiateSnapshot` and `RestoreExtension` should only be called from the + // main thread, but it doesn't cost much to add in case things go wrong. + active := &activeSnapshot{ + isRestore: true, + height: int64(height), + logger: snapshotter.logger, + // goroutine synchronization is unnecessary since anything checking should + // be called from the same thread. + // Effectively `WaitUntilSnapshotStarted` would block infinitely and + // and `InitiateSnapshot` will error when calling `checkNotActive`. + startedResult: nil, + done: nil, + } + snapshotter.activeSnapshot = active + defer func() { + snapshotter.activeSnapshot = nil + }() + ctx := snapshotter.newRestoreContext(int64(height)) exportDir, err := os.MkdirTemp("", fmt.Sprintf("agd-state-sync-restore-%d-*", height)) diff --git a/golang/cosmos/x/swingset/keeper/snapshotter_test.go b/golang/cosmos/x/swingset/keeper/snapshotter_test.go index 0c1474879e3..196dad37eea 100644 --- a/golang/cosmos/x/swingset/keeper/snapshotter_test.go +++ b/golang/cosmos/x/swingset/keeper/snapshotter_test.go @@ -2,6 +2,7 @@ package keeper import ( "errors" + "io" "testing" "github.com/Agoric/agoric-sdk/golang/cosmos/vm" @@ -40,6 +41,15 @@ func TestSnapshotInProgress(t *testing.T) { t.Error("wanted error for snapshot in progress") } + err = swingsetSnapshotter.RestoreExtension( + 456, SnapshotFormat, + func() ([]byte, error) { + return nil, io.EOF + }) + if err == nil { + t.Error("wanted error for snapshot in progress") + } + close(ch) <-swingsetSnapshotter.activeSnapshot.done err = swingsetSnapshotter.InitiateSnapshot(456)