Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Reduce archivedb key lengths by 1 byte #2113

Merged
merged 10 commits into from
Sep 28, 2023
2 changes: 1 addition & 1 deletion x/archivedb/batch.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ type batch struct {
func (c *batch) Write() error {
batch := c.db.db.NewBatch()
for _, op := range c.Ops {
key, _ := newDBKey(op.Key, c.height)
key, _ := newDBKeyFromUser(op.Key, c.height)
var value []byte
if !op.Delete {
value = newDBValue(op.Value)
Expand Down
2 changes: 1 addition & 1 deletion x/archivedb/db_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ func TestDBKeySpace(t *testing.T) {

var (
key1 = []byte("key1")
key2, _ = newDBKey([]byte("key1"), 2)
key2, _ = newDBKeyFromUser([]byte("key1"), 2)
key3 = []byte("key3")
value1 = []byte("value1@1")
value2 = []byte("value2@2")
Expand Down
71 changes: 40 additions & 31 deletions x/archivedb/key.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,76 +11,85 @@ import (
)

var (
ErrEmptyKey = errors.New("empty key")
ErrParsingKeyLength = errors.New("failed reading key length")
ErrIncorrectKeyLength = errors.New("incorrect key length")

heightKey = []byte{1}
heightKey = newDBKeyFromMetadata([]byte{})
)

// newDBKey converts a user formatted key and a height into a database formatted
// key.
//
// A database key contains additional information alongside the given user key.
//
// The requirements of a database key are:
//
// 1. A given user key must have a unique database key prefix. This guarantees
// that user keys can not overlap on disk.
// 2. Inside of a database key prefix, the database keys must be sorted by
// decreasing height.
// 3. User keys must never overlap with any metadata keys.

// newDBKeyFromUser converts a user key and height into a database formatted
// key.
//
// To meet these requirements, a database key prefix is defined by concatinating
// the zero byte, the length of the user key, and the user key. The suffix of
// the database key is the negation of the big endian encoded height. This
// suffix guarantees the keys are sorted correctly.
// To meet the requirements of a database key, the prefix is defined by
// concatenating the length of the user key and the user key. The suffix of the
// database key is the negation of the big endian encoded height. This suffix
// guarantees the keys are sorted correctly.
//
// Example (Asumming heights are 1 byte):
// | User given | Stored as |
// |--------------|-------------|
// | foo:10 | 3:foo:245 |
// | foo:20 | 3:foo:235 |
// | User key | Stored as |
// |------------|-------------|
// | foo:10 | 3:foo:245 |
// | foo:20 | 3:foo:235 |
//
// Returns:
// - The database key
// - The database key prefix, which is independent of the height
func newDBKey(key []byte, height uint64) ([]byte, []byte) {
func newDBKeyFromUser(key []byte, height uint64) ([]byte, []byte) {
keyLen := len(key)
dbKeyMaxSize := 1 + binary.MaxVarintLen64 + keyLen + wrappers.LongLen
dbKeyMaxSize := binary.MaxVarintLen64 + keyLen + wrappers.LongLen
dbKey := make([]byte, dbKeyMaxSize)
offset := 1
offset += binary.PutUvarint(dbKey[offset:], uint64(keyLen))
offset := binary.PutUvarint(dbKey, uint64(keyLen))
offset += copy(dbKey[offset:], key)
prefixOffset := offset
binary.BigEndian.PutUint64(dbKey[offset:], ^height)
offset += wrappers.LongLen
return dbKey[:offset], dbKey[:prefixOffset]
}

// parseDBKey takes a database formatted key and returns the user formatted key
// parseDBKeyFromUser takes a database formatted key and returns the user key
// along with its height.
//
// Note: An error should only be returned from this function if the database has
// been corrupted.
func parseDBKey(dbKey []byte) ([]byte, uint64, error) {
dbKeyLen := uint64(len(dbKey))
if dbKeyLen == 0 {
return nil, 0, ErrEmptyKey
}

keyLen, offset := binary.Uvarint(dbKey[1:])
func parseDBKeyFromUser(dbKey []byte) ([]byte, uint64, error) {
keyLen, offset := binary.Uvarint(dbKey)
if offset <= 0 {
return nil, 0, ErrParsingKeyLength
}

keyIndex := 1 + uint64(offset)
heightIndex := keyIndex + keyLen
if dbKeyLen != heightIndex+wrappers.LongLen {
heightIndex := uint64(offset) + keyLen
if uint64(len(dbKey)) != heightIndex+wrappers.LongLen {
return nil, 0, ErrIncorrectKeyLength
}

key := dbKey[keyIndex:heightIndex]
key := dbKey[offset:heightIndex]
height := ^binary.BigEndian.Uint64(dbKey[heightIndex:])
return key, height, nil
}

// newDBKeyFromMetadata converts a metadata key into a database formatted key.
//
// To meet the requirements of a database key, the key is defined by
// concatenating the length of the metadata key + 1 and the metadata key.
//
// Example:
// | Metadata key | Stored as |
// |----------------|-------------|
// | foo | 4:foo |
// | fo | 3:fo |
func newDBKeyFromMetadata(key []byte) []byte {
keyLen := len(key)
dbKeyMaxSize := binary.MaxVarintLen64 + keyLen
dbKey := make([]byte, dbKeyMaxSize)
offset := binary.PutUvarint(dbKey, uint64(keyLen)+1)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note the +1 here making the prefix different than the user keys

offset += copy(dbKey[offset:], key)
return dbKey[:offset]
}
30 changes: 20 additions & 10 deletions x/archivedb/key_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,10 @@ import (
)

func TestNaturalDescSortingForSameKey(t *testing.T) {
key0, _ := newDBKey(make([]byte, 0), 0)
key1, _ := newDBKey(make([]byte, 0), 1)
key2, _ := newDBKey(make([]byte, 0), 2)
key3, _ := newDBKey(make([]byte, 0), 3)
key0, _ := newDBKeyFromUser(make([]byte, 0), 0)
key1, _ := newDBKeyFromUser(make([]byte, 0), 1)
key2, _ := newDBKeyFromUser(make([]byte, 0), 2)
key3, _ := newDBKeyFromUser(make([]byte, 0), 3)

entry := [][]byte{key0, key1, key2, key3}
expected := [][]byte{key3, key2, key1, key0}
Expand All @@ -29,10 +29,10 @@ func TestNaturalDescSortingForSameKey(t *testing.T) {
}

func TestSortingDifferentPrefix(t *testing.T) {
key0, _ := newDBKey([]byte{0}, 0)
key1, _ := newDBKey([]byte{0}, 1)
key2, _ := newDBKey([]byte{1}, 0)
key3, _ := newDBKey([]byte{1}, 1)
key0, _ := newDBKeyFromUser([]byte{0}, 0)
key1, _ := newDBKeyFromUser([]byte{0}, 1)
key2, _ := newDBKeyFromUser([]byte{1}, 0)
key3, _ := newDBKeyFromUser([]byte{1}, 1)

entry := [][]byte{key0, key1, key2, key3}
expected := [][]byte{key1, key0, key3, key2}
Expand All @@ -49,10 +49,20 @@ func TestParseDBKey(t *testing.T) {

key := []byte{0, 1, 2, 3, 4, 5}
height := uint64(102310)
dbKey, _ := newDBKey(key, height)
dbKey, _ := newDBKeyFromUser(key, height)

parsedKey, parsedHeight, err := parseDBKey(dbKey)
parsedKey, parsedHeight, err := parseDBKeyFromUser(dbKey)
require.NoError(err)
require.Equal(key, parsedKey)
require.Equal(height, parsedHeight)
}

func FuzzMetadataKeyInvariant(f *testing.F) {
f.Fuzz(func(t *testing.T, userKey []byte, metadataKey []byte) {
// The prefix is independent of the height, so its value doesn't matter
// for this test.
_, dbKeyPrefix := newDBKeyFromUser(userKey, 0)
dbKey := newDBKeyFromMetadata(metadataKey)
require.False(t, bytes.HasPrefix(dbKey, dbKeyPrefix))
})
}
2 changes: 1 addition & 1 deletion x/archivedb/prefix_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ func TestDBEfficientLookups(t *testing.T) {

var (
key = []byte("key")
maliciousKey, _ = newDBKey(key, 2)
maliciousKey, _ = newDBKeyFromUser(key, 2)
)

db := New(&limitIterationDB{Database: memdb.New()})
Expand Down
4 changes: 2 additions & 2 deletions x/archivedb/reader.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ func (r *Reader) Get(key []byte) ([]byte, error) {
// modified at, and a boolean to indicate if the last modification was an
// insertion. If the key has never been modified, ErrNotFound will be returned.
func (r *Reader) GetEntry(key []byte) ([]byte, uint64, bool, error) {
it := r.db.db.NewIteratorWithStartAndPrefix(newDBKey(key, r.height))
it := r.db.db.NewIteratorWithStartAndPrefix(newDBKeyFromUser(key, r.height))
defer it.Release()

next := it.Next()
Expand All @@ -48,7 +48,7 @@ func (r *Reader) GetEntry(key []byte) ([]byte, uint64, bool, error) {
return nil, 0, false, database.ErrNotFound
}

_, height, err := parseDBKey(it.Key())
_, height, err := parseDBKeyFromUser(it.Key())
if err != nil {
return nil, 0, false, err
}
Expand Down
Loading