Skip to content

Commit

Permalink
nodedb: prevent deadlock by releasing DeleteVersionsFrom mutex on error
Browse files Browse the repository at this point in the history
Noticed during an audit that there can be a deadlock resulting from
an unreleased mutex if there happens to be a newer version with one or
more concurrent readers.

Fixes #842
  • Loading branch information
odeke-em committed Oct 13, 2023
1 parent 3574b99 commit 13a7f47
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 0 deletions.
1 change: 1 addition & 0 deletions nodedb.go
Original file line number Diff line number Diff line change
Expand Up @@ -493,6 +493,7 @@ func (ndb *nodeDB) DeleteVersionsFrom(fromVersion int64) error {
ndb.mtx.Lock()
for v, r := range ndb.versionReaders {
if v >= fromVersion && r != 0 {
ndb.mtx.Unlock() // Unlock before exiting
return fmt.Errorf("unable to delete version %v with %v active readers", v, r)
}
}
Expand Down
45 changes: 45 additions & 0 deletions nodedb_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"fmt"
"strconv"
"testing"
"time"

log "cosmossdk.io/log"
db "github.com/cosmos/cosmos-db"
Expand Down Expand Up @@ -398,3 +399,47 @@ func makeAndPopulateMutableTree(tb testing.TB) *MutableTree {
require.Nil(tb, err, "Expected .SaveVersion to succeed")
return tree
}

func TestDeleteVersionsFromNoDeadlock(t *testing.T) {
const expectedVersion = fastStorageVersionValue

db := db.NewMemDB()

ndb := newNodeDB(db, 0, DefaultOptions(), log.NewNopLogger())
require.Equal(t, defaultStorageVersionValue, ndb.getStorageVersion())

err := ndb.setFastStorageVersionToBatch()
require.NoError(t, err)

latestVersion, err := ndb.getLatestVersion()
require.NoError(t, err)
require.Equal(t, expectedVersion+fastStorageVersionDelimiter+strconv.Itoa(int(latestVersion)), ndb.getStorageVersion())
require.NoError(t, ndb.batch.Write())

// Reported in https://github.com/cosmos/iavl/issues/842
// there was a deadlock that triggered on an invalid version being
// checked for deletion.
// Now add in data to trigger the error path.
ndb.versionReaders[latestVersion+1] = 2

errCh := make(chan error)
targetVersion := latestVersion - 1

go func() {
defer close(errCh)
errCh <- ndb.DeleteVersionsFrom(targetVersion)
}()

select {
case err = <-errCh:
// Happy path, the mutex was unlocked fast enough.

case <-time.After(2 * time.Second):
t.Error("code did not return even after 2 seconds")
}

require.True(t, ndb.mtx.TryLock(), "tryLock failed mutex was still locked")
ndb.mtx.Unlock() // Since TryLock passed, the lock is now solely being held by us.
require.Error(t, err, "")
require.Contains(t, err.Error(), fmt.Sprintf("unable to delete version %v with 2 active readers", targetVersion+2))
}

0 comments on commit 13a7f47

Please sign in to comment.