From 0e21c95bd339e5c9419ff38fb878d8c31163902f Mon Sep 17 00:00:00 2001 From: Alfredo Beaumont Date: Wed, 6 Apr 2022 12:46:02 +0200 Subject: [PATCH] fix: read compressed integers properly. Compressed integers were being read as unsigned VLQ which is not correct. The encoding isn't exactly LEB128 either, it's very close to unsigned LEB128, but the end result may be signed. The way it works is as follows: - For 64 bit integers, unsigned LEB128 encoding is used, with a difference: the encoded number is limited to 9 bytes, and the in 9th byte all the 8 bits are encoded as is (so the 64 bit include 8 bytes * 7 bits + 1 byte * 8 bits). For negative integers, the MSB will be set to 1. - For 16 and 32 bit integers, the same encoding is used, except the only the less significant 16 and 32 bits respectively are non-zero. The resulting numbers are also signed and can be negative. --- reader/compressed.go | 51 +++++++++++++++++++++----------------------- 1 file changed, 24 insertions(+), 27 deletions(-) diff --git a/reader/compressed.go b/reader/compressed.go index 52041cc..96919e8 100644 --- a/reader/compressed.go +++ b/reader/compressed.go @@ -1,7 +1,6 @@ package reader import ( - "encoding/binary" "fmt" "io" ) @@ -15,50 +14,48 @@ func newCompressed(r io.ByteReader) VarReader { } func (c compressed) VarShort() (int16, error) { - n, err := binary.ReadUvarint(c) + n, err := c.ulong() if err != nil { return 0, err } if (n >> 48) > 0 { - // TODO - return 0, fmt.Errorf("overflow: %d bigger than 32 bits", n) + return 0, fmt.Errorf("overflow: %d bigger than 16 bits", n) } return int16(n), nil - /* - FIXME: Is it unsigned LEB128? - x := int16(n >> 1) - if n&1 == 1 { - x = ^x - } - return x, nil - */ } func (c compressed) VarInt() (int32, error) { - n, err := binary.ReadUvarint(c) + n, err := c.ulong() if err != nil { return 0, err } if (n >> 32) > 0 { - // TODO return 0, fmt.Errorf("overflow: %d bigger than 32 bits", n) } return int32(n), nil - /* - FIXME: Is it unsigned LEB128? - x := int32(n >> 1) - if n&1 == 1 { - x = ^x - } - return x, nil - */ } func (c compressed) VarLong() (int64, error) { - /* - FIXME: Is it unsigned LEB128? - return binary.ReadVarint(c) - */ - n, err := binary.ReadUvarint(c) + n, err := c.ulong() return int64(n), err } + +func (c compressed) ulong() (n uint64, err error) { + s := 0 + for i := 0; i < 9; i++ { + b, err := c.ReadByte() + if err != nil { + return 0, err + } + if b&0x80 == 0 { + n |= uint64(b) << s + return n, nil + } + if i < 8 { + b &= 0x7f + } + n |= uint64(b) << s + s += 7 + } + return n, nil +}