Skip to content

Commit

Permalink
feat(orm)!: ordered variable length encoding for uint32 and uint64 ty…
Browse files Browse the repository at this point in the history
…pes (#11090)

## Description

`uint64` values are used in the ORM as auto-incrementing primary keys. Always using 8 bytes for these values is a bit of a waste of space. Unfortunately, varint encoding does not support ordered prefix iteration.

This PR introduces a compact, well-ordered variable length encoding for `uint32` and `uint64` types. `fixed32` and `fixed64` integers are still encoded as 4 and 8 byte fixed-length big-endian arrays. With this, users have a choice of encoding based on what type of data they are storing. An auto-incrementing primary key should prefer the variable length `uint64` whereas a fixed precision decimal might want to use `fixed64`.

See the golden test updates to see how this reduces key lengths.

This encoding works by using the first two bits to encode the buffer length (4 possible lengths). I'm not sure if my choice of 2,4,6 and 9 bytes is the right choice of 4 lenths for `uint64` - there are many alternate choices. I could have also chosen 3 bits and allowed for 8 possible lengths, but way waste an extra bit? Input on the right design parameters would be appreciated.



---

### Author Checklist

*All items are required. Please add a note to the item if the item is not applicable and
please add links to any relevant follow up issues.*

I have...

- [ ] included the correct [type prefix](https://github.com/commitizen/conventional-commit-types/blob/v3.0.0/index.json) in the PR title
- [ ] added `!` to the type prefix if API or client breaking change
- [ ] targeted the correct branch (see [PR Targeting](https://github.com/cosmos/cosmos-sdk/blob/master/CONTRIBUTING.md#pr-targeting))
- [ ] provided a link to the relevant issue or specification
- [ ] followed the guidelines for [building modules](https://github.com/cosmos/cosmos-sdk/blob/master/docs/building-modules)
- [ ] included the necessary unit and integration [tests](https://github.com/cosmos/cosmos-sdk/blob/master/CONTRIBUTING.md#testing)
- [ ] added a changelog entry to `CHANGELOG.md`
- [ ] included comments for [documenting Go code](https://blog.golang.org/godoc)
- [ ] updated the relevant documentation or specification
- [ ] reviewed "Files changed" and left comments if necessary
- [ ] confirmed all CI checks have passed

### Reviewers Checklist

*All items are required. Please add a note if the item is not applicable and please add
your handle next to the items reviewed if you only reviewed selected items.*

I have...

- [ ] confirmed the correct [type prefix](https://github.com/commitizen/conventional-commit-types/blob/v3.0.0/index.json) in the PR title
- [ ] confirmed `!` in the type prefix if API or client breaking change
- [ ] confirmed all author checklist items have been addressed 
- [ ] reviewed state machine logic
- [ ] reviewed API design and naming
- [ ] reviewed documentation is accurate
- [ ] reviewed tests and test coverage
- [ ] manually tested (if applicable)
  • Loading branch information
aaronc authored Feb 7, 2022
1 parent a0a1197 commit 1944a08
Show file tree
Hide file tree
Showing 7 changed files with 780 additions and 377 deletions.
12 changes: 8 additions & 4 deletions orm/encoding/ormfield/codec.go
Original file line number Diff line number Diff line change
Expand Up @@ -78,10 +78,14 @@ func GetCodec(field protoreflect.FieldDescriptor, nonTerminal bool) (Codec, erro
} else {
return StringCodec{}, nil
}
case protoreflect.Uint32Kind, protoreflect.Fixed32Kind:
return Uint32Codec{}, nil
case protoreflect.Uint64Kind, protoreflect.Fixed64Kind:
return Uint64Codec{}, nil
case protoreflect.Uint32Kind:
return CompactUint32Codec{}, nil
case protoreflect.Fixed32Kind:
return FixedUint32Codec{}, nil
case protoreflect.Uint64Kind:
return CompactUint64Codec{}, nil
case protoreflect.Fixed64Kind:
return FixedUint64Codec{}, nil
case protoreflect.Int32Kind, protoreflect.Sint32Kind, protoreflect.Sfixed32Kind:
return Int32Codec{}, nil
case protoreflect.Int64Kind, protoreflect.Sint64Kind, protoreflect.Sfixed64Kind:
Expand Down
94 changes: 94 additions & 0 deletions orm/encoding/ormfield/codec_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -85,3 +85,97 @@ func TestNTBytesTooLong(t *testing.T) {
_, err = cdc.ComputeBufferSize(bz)
assert.ErrorContains(t, err, ormerrors.BytesFieldTooLong.Error())
}

func TestCompactUInt32(t *testing.T) {
var lastBz []byte
testEncodeDecode := func(x uint32, expectedLen int) {
bz := ormfield.EncodeCompactUint32(x)
assert.Equal(t, expectedLen, len(bz))
y, err := ormfield.DecodeCompactUint32(bytes.NewReader(bz))
assert.NilError(t, err)
assert.Equal(t, x, y)
assert.Assert(t, bytes.Compare(lastBz, bz) < 0)
lastBz = bz
}

testEncodeDecode(64, 2)
testEncodeDecode(16383, 2)
testEncodeDecode(16384, 3)
testEncodeDecode(4194303, 3)
testEncodeDecode(4194304, 4)
testEncodeDecode(1073741823, 4)
testEncodeDecode(1073741824, 5)

// randomized tests
rapid.Check(t, func(t *rapid.T) {
x := rapid.Uint32().Draw(t, "x").(uint32)
y := rapid.Uint32().Draw(t, "y").(uint32)

bx := ormfield.EncodeCompactUint32(x)
by := ormfield.EncodeCompactUint32(y)

cmp := bytes.Compare(bx, by)
if x < y {
assert.Equal(t, -1, cmp)
} else if x == y {
assert.Equal(t, 0, cmp)
} else {
assert.Equal(t, 1, cmp)
}

x2, err := ormfield.DecodeCompactUint32(bytes.NewReader(bx))
assert.NilError(t, err)
assert.Equal(t, x, x2)
y2, err := ormfield.DecodeCompactUint32(bytes.NewReader(by))
assert.NilError(t, err)
assert.Equal(t, y, y2)
})
}

func TestCompactUInt64(t *testing.T) {
var lastBz []byte
testEncodeDecode := func(x uint64, expectedLen int) {
bz := ormfield.EncodeCompactUint64(x)
assert.Equal(t, expectedLen, len(bz))
y, err := ormfield.DecodeCompactUint64(bytes.NewReader(bz))
assert.NilError(t, err)
assert.Equal(t, x, y)
assert.Assert(t, bytes.Compare(lastBz, bz) < 0)
lastBz = bz
}

testEncodeDecode(64, 2)
testEncodeDecode(16383, 2)
testEncodeDecode(16384, 4)
testEncodeDecode(4194303, 4)
testEncodeDecode(4194304, 4)
testEncodeDecode(1073741823, 4)
testEncodeDecode(1073741824, 6)
testEncodeDecode(70368744177663, 6)
testEncodeDecode(70368744177664, 9)

// randomized tests
rapid.Check(t, func(t *rapid.T) {
x := rapid.Uint64().Draw(t, "x").(uint64)
y := rapid.Uint64().Draw(t, "y").(uint64)

bx := ormfield.EncodeCompactUint64(x)
by := ormfield.EncodeCompactUint64(y)

cmp := bytes.Compare(bx, by)
if x < y {
assert.Equal(t, -1, cmp)
} else if x == y {
assert.Equal(t, 0, cmp)
} else {
assert.Equal(t, 1, cmp)
}

x2, err := ormfield.DecodeCompactUint64(bytes.NewReader(bx))
assert.NilError(t, err)
assert.Equal(t, x, x2)
y2, err := ormfield.DecodeCompactUint64(bytes.NewReader(by))
assert.NilError(t, err)
assert.Equal(t, y, y2)
})
}
159 changes: 151 additions & 8 deletions orm/encoding/ormfield/uint32.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,36 +2,179 @@ package ormfield

import (
"encoding/binary"
"fmt"
"io"

"google.golang.org/protobuf/reflect/protoreflect"
)

// Uint32Codec encodes uint32 values as 4-byte big-endian integers.
type Uint32Codec struct{}
// FixedUint32Codec encodes uint32 values as 4-byte big-endian integers.
type FixedUint32Codec struct{}

func (u Uint32Codec) FixedBufferSize() int {
func (u FixedUint32Codec) FixedBufferSize() int {
return 4
}

func (u Uint32Codec) ComputeBufferSize(protoreflect.Value) (int, error) {
func (u FixedUint32Codec) ComputeBufferSize(protoreflect.Value) (int, error) {
return u.FixedBufferSize(), nil
}

func (u Uint32Codec) IsOrdered() bool {
func (u FixedUint32Codec) IsOrdered() bool {
return true
}

func (u Uint32Codec) Compare(v1, v2 protoreflect.Value) int {
func (u FixedUint32Codec) Compare(v1, v2 protoreflect.Value) int {
return compareUint(v1, v2)
}

func (u Uint32Codec) Decode(r Reader) (protoreflect.Value, error) {
func (u FixedUint32Codec) Decode(r Reader) (protoreflect.Value, error) {
var x uint32
err := binary.Read(r, binary.BigEndian, &x)
return protoreflect.ValueOfUint32(x), err
}

func (u Uint32Codec) Encode(value protoreflect.Value, w io.Writer) error {
func (u FixedUint32Codec) Encode(value protoreflect.Value, w io.Writer) error {
return binary.Write(w, binary.BigEndian, uint32(value.Uint()))
}

// CompactUint32Codec encodes uint32 values using EncodeCompactUint32.
type CompactUint32Codec struct{}

func (c CompactUint32Codec) Decode(r Reader) (protoreflect.Value, error) {
x, err := DecodeCompactUint32(r)
return protoreflect.ValueOfUint32(x), err
}

func (c CompactUint32Codec) Encode(value protoreflect.Value, w io.Writer) error {
_, err := w.Write(EncodeCompactUint32(uint32(value.Uint())))
return err
}

func (c CompactUint32Codec) Compare(v1, v2 protoreflect.Value) int {
return compareUint(v1, v2)
}

func (c CompactUint32Codec) IsOrdered() bool {
return true
}

func (c CompactUint32Codec) FixedBufferSize() int {
return 5
}

func (c CompactUint32Codec) ComputeBufferSize(protoreflect.Value) (int, error) {
return c.FixedBufferSize(), nil
}

// EncodeCompactUint32 encodes uint32 values in 2,3,4 or 5 bytes.
// Unlike regular varints, this encoding is
// suitable for ordered prefix scans. The length of the output + 2 is encoded
// in the first 2 bits of the first byte and the remaining bits encoded with
// big-endian ordering.
// Values less than 2^14 fill fit in 2 bytes, values less than 2^22 will
// fit in 3, and values less than 2^30 will fit in 4.
func EncodeCompactUint32(x uint32) []byte {
switch {
case x < 16384: // 2^14
buf := make([]byte, 2)
buf[0] = byte(x >> 8)
buf[1] = byte(x)
return buf
case x < 4194304: // 2^22
buf := make([]byte, 3)
buf[0] = 0x40
buf[0] |= byte(x >> 16)
buf[1] = byte(x >> 8)
buf[2] = byte(x)
return buf
case x < 1073741824: // 2^30
buf := make([]byte, 4)
buf[0] = 0x80
buf[0] |= byte(x >> 24)
buf[1] = byte(x >> 16)
buf[2] = byte(x >> 8)
buf[3] = byte(x)
return buf
default:
buf := make([]byte, 5)
buf[0] = 0xC0
buf[0] |= byte(x >> 26)
buf[1] = byte(x >> 18)
buf[2] = byte(x >> 10)
buf[3] = byte(x >> 2)
buf[4] = byte(x) & 0x3
return buf
}
}

// DecodeCompactUint32 decodes a uint32 encoded with EncodeCompactU32.
func DecodeCompactUint32(reader io.Reader) (uint32, error) {
var buf [5]byte

n, err := reader.Read(buf[:1])
if err != nil {
return 0, err
}
if n < 1 {
return 0, io.ErrUnexpectedEOF
}

switch buf[0] >> 6 {
case 0:
n, err := reader.Read(buf[1:2])
if err != nil {
return 0, err
}
if n < 1 {
return 0, io.ErrUnexpectedEOF
}

x := uint32(buf[0]) << 8
x |= uint32(buf[1])
return x, nil
case 1:
n, err := reader.Read(buf[1:3])
if err != nil {
return 0, err
}
if n < 2 {
return 0, io.ErrUnexpectedEOF
}

x := (uint32(buf[0]) & 0x3F) << 16
x |= uint32(buf[1]) << 8
x |= uint32(buf[2])
return x, nil
case 2:
n, err := reader.Read(buf[1:4])
if err != nil {
return 0, err
}
if n < 3 {
return 0, io.ErrUnexpectedEOF
}

x := (uint32(buf[0]) & 0x3F) << 24
x |= uint32(buf[1]) << 16
x |= uint32(buf[2]) << 8
x |= uint32(buf[3])
return x, nil
case 3:
n, err := reader.Read(buf[1:5])
if err != nil {
return 0, err
}
if n < 4 {
return 0, io.ErrUnexpectedEOF
}

x := (uint32(buf[0]) & 0x3F) << 26
x |= uint32(buf[1]) << 18
x |= uint32(buf[2]) << 10
x |= uint32(buf[3]) << 2
x |= uint32(buf[4])
return x, nil
default:
return 0, fmt.Errorf("unexpected case")
}
}
Loading

0 comments on commit 1944a08

Please sign in to comment.