diff --git a/.changelog/2691.internal.2.md b/.changelog/2691.internal.2.md new file mode 100644 index 00000000000..11f801c05ef --- /dev/null +++ b/.changelog/2691.internal.2.md @@ -0,0 +1 @@ +go/storage/mkvs: Add overlay tree to support rolling back state diff --git a/go/storage/mkvs/urkel/insert.go b/go/storage/mkvs/urkel/insert.go index b332f779752..1870c1893ed 100644 --- a/go/storage/mkvs/urkel/insert.go +++ b/go/storage/mkvs/urkel/insert.go @@ -27,16 +27,18 @@ func (t *tree) Insert(ctx context.Context, key []byte, value []byte) error { } // Update the pending write log. - entry := t.pendingWriteLog[node.ToMapKey(key)] - if entry == nil { - t.pendingWriteLog[node.ToMapKey(key)] = &pendingEntry{ - key: key, - value: value, - existed: result.existed, - insertedLeaf: result.insertedLeaf, + if !t.withoutWriteLog { + entry := t.pendingWriteLog[node.ToMapKey(key)] + if entry == nil { + t.pendingWriteLog[node.ToMapKey(key)] = &pendingEntry{ + key: key, + value: value, + existed: result.existed, + insertedLeaf: result.insertedLeaf, + } + } else { + entry.value = value } - } else { - entry.value = value } t.cache.setPendingRoot(result.newRoot) diff --git a/go/storage/mkvs/urkel/overlay.go b/go/storage/mkvs/urkel/overlay.go new file mode 100644 index 00000000000..73ca5d9286c --- /dev/null +++ b/go/storage/mkvs/urkel/overlay.go @@ -0,0 +1,230 @@ +package urkel + +import ( + "context" + "fmt" + + "github.com/oasislabs/oasis-core/go/storage/mkvs/urkel/node" + "github.com/oasislabs/oasis-core/go/storage/mkvs/urkel/syncer" +) + +var _ OverlayTree = (*treeOverlay)(nil) + +type treeOverlay struct { + inner Tree + overlay Tree + + dirty map[string]bool +} + +// NewOverlay creates a new key-value tree overlay that holds all updates in memory and only commits +// them if requested. This can be used to create snapshots that can be discarded. +// +// While updates (inserts, removes) are stored in the overlay, reads are not cached in the overlay +// as the inner tree has its own cache and double caching makes less sense. +// +// The overlay is not safe for concurrent use. +func NewOverlay(inner Tree) OverlayTree { + return &treeOverlay{ + inner: inner, + overlay: New(nil, nil, WithoutWriteLog()), + dirty: make(map[string]bool), + } +} + +// Implements KeyValueTree. +func (o *treeOverlay) Insert(ctx context.Context, key []byte, value []byte) error { + err := o.overlay.Insert(ctx, key, value) + if err != nil { + return err + } + + o.dirty[string(key)] = true + return nil +} + +// Implements KeyValueTree. +func (o *treeOverlay) Get(ctx context.Context, key []byte) ([]byte, error) { + // For dirty values, check the overlay. + if o.dirty[string(key)] { + return o.overlay.Get(ctx, key) + } + + // Otherwise fetch from inner tree. + return o.inner.Get(ctx, key) +} + +// Implements KeyValueTree. +func (o *treeOverlay) RemoveExisting(ctx context.Context, key []byte) ([]byte, error) { + // For dirty values, remove from the overlay. + if o.dirty[string(key)] { + return o.overlay.RemoveExisting(ctx, key) + } + + value, err := o.inner.Get(ctx, key) + if err != nil { + return nil, err + } + + // Do not treat a value as dirty if it was not dirty before and did not exist in the inner tree. + if value != nil { + o.dirty[string(key)] = true + } + return value, nil +} + +// Implements KeyValueTree. +func (o *treeOverlay) Remove(ctx context.Context, key []byte) error { + // Since we don't care about the previous value, we can just record an update. + o.dirty[string(key)] = true + return o.overlay.Remove(ctx, key) +} + +// Implements KeyValueTree. +func (o *treeOverlay) NewIterator(ctx context.Context, options ...IteratorOption) Iterator { + return &treeOverlayIterator{ + tree: o, + inner: o.inner.NewIterator(ctx, options...), + overlay: o.overlay.NewIterator(ctx), + } +} + +// Implements OverlayTree. +func (o *treeOverlay) Commit(ctx context.Context) error { + it := o.overlay.NewIterator(ctx) + defer it.Close() + + // Insert all items present in the overlay. + for it.Rewind(); it.Valid(); it.Next() { + if err := o.inner.Insert(ctx, it.Key(), it.Value()); err != nil { + return err + } + delete(o.dirty, string(it.Key())) + } + if it.Err() != nil { + return it.Err() + } + + // Any remaining dirty items must have been removed. + for key := range o.dirty { + if err := o.inner.Remove(ctx, []byte(key)); err != nil { + return err + } + } + + return nil +} + +// Implements ClosableTree. +func (o *treeOverlay) Close() { + if o.inner == nil { + return + } + + o.overlay.Close() + + o.inner = nil + o.overlay = nil + o.dirty = nil +} + +type treeOverlayIterator struct { + tree *treeOverlay + + inner Iterator + overlay Iterator + + key node.Key + value []byte +} + +func (it *treeOverlayIterator) Valid() bool { + // If either iterator is valid, the merged iterator is valid. + return it.inner.Valid() || it.overlay.Valid() +} + +func (it *treeOverlayIterator) Err() error { + // If either iterator has an error, the merged iterator has an error. + if err := it.inner.Err(); err != nil { + return err + } + if err := it.overlay.Err(); err != nil { + return err + } + return nil +} + +func (it *treeOverlayIterator) Rewind() { + it.inner.Rewind() + it.overlay.Rewind() + + it.updateIteratorPosition() +} + +func (it *treeOverlayIterator) Seek(key node.Key) { + it.inner.Seek(key) + it.overlay.Seek(key) + + it.updateIteratorPosition() +} + +func (it *treeOverlayIterator) Next() { + if !it.overlay.Valid() || it.inner.Key().Compare(it.overlay.Key()) <= 0 { + // Key of inner iterator is smaller or equal than the key of the overlay iterator. + it.inner.Next() + } else { + // Key of inner iterator is greater than the key of the overlay iterator. + it.overlay.Next() + } + + it.updateIteratorPosition() +} + +func (it *treeOverlayIterator) updateIteratorPosition() { + // Skip over any dirty entries from the inner iterator. + for it.inner.Valid() && it.tree.dirty[string(it.inner.Key())] { + it.inner.Next() + } + + iKey := it.inner.Key() + oKey := it.overlay.Key() + + if it.inner.Valid() && (!it.overlay.Valid() || iKey.Compare(oKey) < 0) { + // Key of inner iterator is smaller than the key of the overlay iterator. + it.key = iKey + it.value = it.inner.Value() + } else if it.overlay.Valid() { + // Key of overlay iterator is smaller than or equal to the key of the inner iterator. + it.key = oKey + it.value = it.overlay.Value() + } else { + // Both iterators are invalid. + it.key = nil + it.value = nil + } +} + +func (it *treeOverlayIterator) Key() node.Key { + return it.key +} + +func (it *treeOverlayIterator) Value() []byte { + return it.value +} + +func (it *treeOverlayIterator) GetProof() (*syncer.Proof, error) { + panic(fmt.Errorf("tree overlay: proofs are not supported")) +} + +func (it *treeOverlayIterator) GetProofBuilder() *syncer.ProofBuilder { + panic(fmt.Errorf("tree overlay: proofs are not supported")) +} + +func (it *treeOverlayIterator) Close() { + it.inner.Close() + it.overlay.Close() + + it.key = nil + it.value = nil + it.tree = nil +} diff --git a/go/storage/mkvs/urkel/overlay_test.go b/go/storage/mkvs/urkel/overlay_test.go new file mode 100644 index 00000000000..958697422ac --- /dev/null +++ b/go/storage/mkvs/urkel/overlay_test.go @@ -0,0 +1,156 @@ +package urkel + +import ( + "context" + "testing" + + "github.com/stretchr/testify/require" + + "github.com/oasislabs/oasis-core/go/storage/mkvs/urkel/node" + "github.com/oasislabs/oasis-core/go/storage/mkvs/urkel/writelog" +) + +func TestOverlay(t *testing.T) { + require := require.New(t) + + ctx := context.Background() + tree := New(nil, nil) + defer tree.Close() + + // Insert some items. + items := writelog.WriteLog{ + writelog.LogEntry{Key: []byte("key"), Value: []byte("first")}, + writelog.LogEntry{Key: []byte("key 1"), Value: []byte("one")}, + writelog.LogEntry{Key: []byte("key 2"), Value: []byte("two")}, + writelog.LogEntry{Key: []byte("key 5"), Value: []byte("five")}, + writelog.LogEntry{Key: []byte("key 8"), Value: []byte("eight")}, + writelog.LogEntry{Key: []byte("key 9"), Value: []byte("nine")}, + } + + tests := []testCase{ + {seek: node.Key("k"), pos: 0}, + {seek: node.Key("key 1"), pos: 1}, + {seek: node.Key("key 3"), pos: 3}, + {seek: node.Key("key 4"), pos: 3}, + {seek: node.Key("key 5"), pos: 3}, + {seek: node.Key("key 6"), pos: 4}, + {seek: node.Key("key 7"), pos: 4}, + {seek: node.Key("key 8"), pos: 4}, + {seek: node.Key("key 9"), pos: 5}, + {seek: node.Key("key A"), pos: -1}, + } + + err := tree.ApplyWriteLog(ctx, writelog.NewStaticIterator(items)) + require.NoError(err, "ApplyWriteLog") + + // Create an overlay. + overlay := NewOverlay(tree) + + // Test that all keys can be fetched from an empty overlay. + t.Run("EmptyOverlay/Get", func(t *testing.T) { + for _, item := range items { + var value []byte + value, err = overlay.Get(ctx, item.Key) + require.NoError(err, "Get") + require.Equal(item.Value, value, "value from overlay should be correct") + } + }) + + // Test that merged iterator works correctly on an empty overlay (it should behave exactly the + // same as for the inner tree). + t.Run("EmptyOverlay/Iterator", func(t *testing.T) { + it := overlay.NewIterator(ctx) + defer it.Close() + + testIterator(t, items, it, tests) + }) + + // Add some updates to the overlay. + err = overlay.Remove(ctx, []byte("key 2")) + require.NoError(err, "Remove") + err = overlay.Insert(ctx, []byte("key 7"), []byte("seven")) + require.NoError(err, "Insert") + err = overlay.Remove(ctx, []byte("key 5")) + require.NoError(err, "Remove") + err = overlay.Insert(ctx, []byte("key 5"), []byte("fivey")) + require.NoError(err, "Insert") + + // Make sure updates did not propagate to the inner tree. + t.Run("Updates/NoPropagation", func(t *testing.T) { + var value []byte + value, err = tree.Get(ctx, []byte("key 2")) + require.NoError(err, "Get") + require.Equal([]byte("two"), value, "value in inner tree should be unchanged") + value, err = tree.Get(ctx, []byte("key 7")) + require.NoError(err, "Get") + require.Nil(value, "value should not exist in inner tree") + }) + + // State of overlay after updates. + items = writelog.WriteLog{ + writelog.LogEntry{Key: []byte("key"), Value: []byte("first")}, + writelog.LogEntry{Key: []byte("key 1"), Value: []byte("one")}, + writelog.LogEntry{Key: []byte("key 5"), Value: []byte("fivey")}, + writelog.LogEntry{Key: []byte("key 7"), Value: []byte("seven")}, + writelog.LogEntry{Key: []byte("key 8"), Value: []byte("eight")}, + writelog.LogEntry{Key: []byte("key 9"), Value: []byte("nine")}, + } + + tests = []testCase{ + {seek: node.Key("k"), pos: 0}, + {seek: node.Key("key 1"), pos: 1}, + {seek: node.Key("key 3"), pos: 2}, + {seek: node.Key("key 4"), pos: 2}, + {seek: node.Key("key 5"), pos: 2}, + {seek: node.Key("key 6"), pos: 3}, + {seek: node.Key("key 7"), pos: 3}, + {seek: node.Key("key 8"), pos: 4}, + {seek: node.Key("key 9"), pos: 5}, + {seek: node.Key("key A"), pos: -1}, + } + + // Test that all keys can be fetched from an updated overlay. + t.Run("Updates/Get", func(t *testing.T) { + for _, item := range items { + var value []byte + value, err = overlay.Get(ctx, item.Key) + require.NoError(err, "Get") + require.Equal(item.Value, value, "value from overlay should be correct") + } + }) + + // Make sure that merged overlay iterator works. + t.Run("Updates/Iterator", func(t *testing.T) { + it := overlay.NewIterator(ctx) + defer it.Close() + + testIterator(t, items, it, tests) + }) + + // Commit the overlay. + err = overlay.Commit(ctx) + require.NoError(err, "Commit") + + // Test that all keys can be fetched from an updated tree. + t.Run("Committed/Get", func(t *testing.T) { + for _, item := range items { + var value []byte + value, err = tree.Get(ctx, item.Key) + require.NoError(err, "Get") + require.Equal(item.Value, value, "value from updated tree should be correct") + } + }) + + // Make sure that the updated tree is correct. + t.Run("Committed/Iterator", func(t *testing.T) { + it := tree.NewIterator(ctx) + defer it.Close() + + testIterator(t, items, it, tests) + }) + + // Make sure that closing the overlay does not close the inner tree. + overlay.Close() + _, err = tree.Get(ctx, []byte("key")) + require.NoError(err, "Get") +} diff --git a/go/storage/mkvs/urkel/remove.go b/go/storage/mkvs/urkel/remove.go index 7fde091cfb3..b1022b99130 100644 --- a/go/storage/mkvs/urkel/remove.go +++ b/go/storage/mkvs/urkel/remove.go @@ -25,11 +25,13 @@ func (t *tree) RemoveExisting(ctx context.Context, key []byte) ([]byte, error) { } // Update the pending write log. - entry := t.pendingWriteLog[node.ToMapKey(key)] - if entry == nil { - t.pendingWriteLog[node.ToMapKey(key)] = &pendingEntry{key, nil, changed, nil} - } else { - entry.value = nil + if !t.withoutWriteLog { + entry := t.pendingWriteLog[node.ToMapKey(key)] + if entry == nil { + t.pendingWriteLog[node.ToMapKey(key)] = &pendingEntry{key, nil, changed, nil} + } else { + entry.value = nil + } } t.cache.setPendingRoot(newRoot) diff --git a/go/storage/mkvs/urkel/tree.go b/go/storage/mkvs/urkel/tree.go index b757b047eae..634fe8daee2 100644 --- a/go/storage/mkvs/urkel/tree.go +++ b/go/storage/mkvs/urkel/tree.go @@ -21,20 +21,14 @@ var ( ErrKnownRootMismatch = errors.New("urkel: known root mismatch") ) -// Tree is a MKVS tree interface. -type Tree interface { - syncer.ReadSyncer - +// KeyValueTree is the key-value store tree interface. +type KeyValueTree interface { // Insert inserts a key/value pair into the tree. Insert(ctx context.Context, key []byte, value []byte) error // Get looks up an existing key. Get(ctx context.Context, key []byte) ([]byte, error) - // PrefetchPrefixes populates the in-memory tree with nodes for keys - // starting with given prefixes. - PrefetchPrefixes(ctx context.Context, prefixes [][]byte, limit uint16) error - // RemoveExisting removes a key from the tree and returns the previous value. RemoveExisting(ctx context.Context, key []byte) ([]byte, error) @@ -43,12 +37,10 @@ type Tree interface { // NewIterator returns a new iterator over the tree. NewIterator(ctx context.Context, options ...IteratorOption) Iterator +} - // ApplyWriteLog applies the operations from a write log to the current tree. - // - // The caller is responsible for calling Commit. - ApplyWriteLog(ctx context.Context, wl writelog.Iterator) error - +// ClosableTree is a tree interface that can be closed. +type ClosableTree interface { // Close releases resources associated with this tree. After calling this // method the tree MUST NOT be used anymore and all methods will return // the ErrClosed error. @@ -56,6 +48,31 @@ type Tree interface { // Any pending write operations are discarded. If you need to persist them // you need to call Commit before calling this method. Close() +} + +// OverlayTree is an overlay tree. +type OverlayTree interface { + KeyValueTree + ClosableTree + + // Commit commits any modifications to the underlying tree. + Commit(ctx context.Context) error +} + +// Tree is a general MKVS tree interface. +type Tree interface { + KeyValueTree + ClosableTree + syncer.ReadSyncer + + // PrefetchPrefixes populates the in-memory tree with nodes for keys + // starting with given prefixes. + PrefetchPrefixes(ctx context.Context, prefixes [][]byte, limit uint16) error + + // ApplyWriteLog applies the operations from a write log to the current tree. + // + // The caller is responsible for calling Commit. + ApplyWriteLog(ctx context.Context, wl writelog.Iterator) error // Size calculates the size of the tree in bytes. Size() uint64 diff --git a/go/storage/mkvs/urkel/urkel.go b/go/storage/mkvs/urkel/urkel.go index 2791ef089e4..02385b4b9c9 100644 --- a/go/storage/mkvs/urkel/urkel.go +++ b/go/storage/mkvs/urkel/urkel.go @@ -17,6 +17,7 @@ type tree struct { // NOTE: This can be a map as updates are commutative. pendingWriteLog map[string]*pendingEntry + withoutWriteLog bool // pendingRemovedNodes are the nodes that have been removed from the // in-memory tree and should be marked for garbage collection if this // tree is committed to the node database. @@ -58,6 +59,13 @@ func PersistEverythingFromSyncer(doit bool) Option { } } +// WithoutWriteLog disables building a write log when performing operations. +func WithoutWriteLog() Option { + return func(t *tree) { + t.withoutWriteLog = true + } +} + // New creates a new empty Urkel tree backed by the given node database. func New(rs syncer.ReadSyncer, ndb db.NodeDB, options ...Option) Tree { if rs == nil { @@ -70,6 +78,7 @@ func New(rs syncer.ReadSyncer, ndb db.NodeDB, options ...Option) Tree { t := &tree{ cache: newCache(ndb, rs), pendingWriteLog: make(map[string]*pendingEntry), + withoutWriteLog: false, } for _, v := range options {