Skip to content

Commit

Permalink
reader: avoid memory escape and unsafe byte to string
Browse files Browse the repository at this point in the history
  • Loading branch information
zdyj3170101136 committed Aug 1, 2023
1 parent 1ead227 commit 39df228
Show file tree
Hide file tree
Showing 6 changed files with 178 additions and 79 deletions.
16 changes: 7 additions & 9 deletions parser/chunk.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
package parser

import (
"bytes"
"fmt"
"io"

Expand Down Expand Up @@ -52,7 +51,7 @@ func (c *Chunk) Parse(r io.Reader, options *ChunkParseOptions) (err error) {
if _, err := io.ReadFull(r, buf); err != nil {
return fmt.Errorf("unable to read chunk header: %w", err)
}
if err := c.Header.Parse(reader.NewReader(bytes.NewReader(buf), false)); err != nil {
if err := c.Header.Parse(reader.NewReader(buf, false, true)); err != nil {
return fmt.Errorf("unable to parse chunk header: %w", err)
}
c.Header.ChunkSize -= headerSize + 8
Expand All @@ -65,13 +64,12 @@ func (c *Chunk) Parse(r io.Reader, options *ChunkParseOptions) (err error) {
return fmt.Errorf("unable to read chunk contents: %w", err)
}

br := bytes.NewReader(buf)
rd := reader.NewReader(br, useCompression)
rd := reader.NewReader(buf, useCompression, true)
pointer := int64(0)
events := make(map[int64]int32)

// Parse metadata
br.Seek(c.Header.MetadataOffset, io.SeekStart)
rd.SeekStart(c.Header.MetadataOffset)
metadataSize, err := rd.VarInt()
if err != nil {
return fmt.Errorf("unable to parse chunk metadata size: %w", err)
Expand All @@ -84,7 +82,7 @@ func (c *Chunk) Parse(r io.Reader, options *ChunkParseOptions) (err error) {
classes := buildClasses(metadata)

// Parse checkpoint event(s)
br.Seek(c.Header.ConstantPoolOffset, io.SeekStart)
rd.SeekStart(c.Header.ConstantPoolOffset)
checkpointsSize := int32(0)
cpools := make(PoolMap)
delta := int64(0)
Expand All @@ -104,7 +102,7 @@ func (c *Chunk) Parse(r io.Reader, options *ChunkParseOptions) (err error) {
break
}
delta += cp.Delta
br.Seek(c.Header.ConstantPoolOffset+delta, io.SeekStart)
rd.SeekStart(c.Header.ConstantPoolOffset + delta)
}

if options.CPoolProcessor != nil {
Expand All @@ -121,12 +119,12 @@ func (c *Chunk) Parse(r io.Reader, options *ChunkParseOptions) (err error) {
}

// Parse the rest of events
br.Seek(pointer, io.SeekStart)
rd.SeekStart(pointer)
for pointer != c.Header.ChunkSize {
if size, ok := events[pointer]; ok {
pointer += int64(size)
} else {
if _, err := br.Seek(pointer, io.SeekStart); err != nil {
if _, err := rd.SeekStart(pointer); err != nil {
return fmt.Errorf("unable to seek to position %d: %w", pointer, err)
}
size, err := rd.VarInt()
Expand Down
80 changes: 80 additions & 0 deletions reader/coder.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
package reader

import (
"encoding/binary"
"io"
"math"
)

var _ io.ByteReader = (*decoder)(nil)

type coder struct {
order binary.ByteOrder
buf []byte
offset int
}

// decoder implements binary.Read() utility but avoid memory escape
type decoder coder

func (d *decoder) bool() bool {
x := d.buf[d.offset]
d.offset++
return x != 0
}

func (d *decoder) ReadByte() (byte, error) {
if !d.check(1) {
return 0, io.EOF
}
return d.byte(), nil
}

func (d *decoder) check(dataLen int) bool {
return d.offset+dataLen-1 < len(d.buf)
}

func (d *decoder) uint8() uint8 {
x := d.buf[d.offset]
d.offset++
return x
}

func (d *decoder) uint16() uint16 {
x := d.order.Uint16(d.buf[d.offset : d.offset+2])
d.offset += 2
return x
}

func (d *decoder) uint32() uint32 {
x := d.order.Uint32(d.buf[d.offset : d.offset+4])
d.offset += 4
return x
}

func (d *decoder) uint64() uint64 {
x := d.order.Uint64(d.buf[d.offset : d.offset+8])
d.offset += 8
return x
}
func (d *decoder) float32() float32 {
x := math.Float32frombits(d.order.Uint32(d.buf[d.offset : d.offset+4]))
d.offset += 4
return x
}

func (d *decoder) float64() float64 {
x := math.Float64frombits(d.order.Uint64(d.buf[d.offset : d.offset+8]))
d.offset += 8
return x
}

func (d *decoder) int8() int8 { return int8(d.uint8()) }

func (d *decoder) int16() int16 { return int16(d.uint16()) }

func (d *decoder) int32() int32 { return int32(d.uint32()) }

func (d *decoder) int64() int64 { return int64(d.uint64()) }

func (d *decoder) byte() byte { return byte(d.int8()) }
8 changes: 4 additions & 4 deletions reader/compressed.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,14 @@ package reader

import (
"fmt"
"io"
)

type compressed struct {
io.ByteReader
*decoder
}

func newCompressed(r io.ByteReader) VarReader {
return compressed{ByteReader: r}
func newCompressed(d *decoder) VarReader {
return compressed{decoder: d}
}

func (c compressed) VarShort() (int16, error) {
Expand Down Expand Up @@ -40,6 +39,7 @@ func (c compressed) VarLong() (int64, error) {
return int64(n), err
}

// ulong not equal with binary.ReadUvarint(c.decoder)
func (c compressed) ulong() (n uint64, err error) {
s := 0
for i := 0; i < 9; i++ {
Expand Down
24 changes: 0 additions & 24 deletions reader/int.go

This file was deleted.

108 changes: 72 additions & 36 deletions reader/reader.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,10 @@ package reader

import (
"encoding/binary"
"errors"
"fmt"
"io"
"unsafe"
)

type VarReader interface {
Expand All @@ -22,78 +24,102 @@ type Reader interface {
Float() (float32, error)
Double() (float64, error)
String() (string, error)
SeekStart(offset int64) (int64, error)

VarReader

// TODO: Support arrays
}

type InputReader interface {
io.Reader
io.ByteReader
}

type reader struct {
InputReader
varR VarReader
*decoder
unsafeByteToString bool
}

func NewReader(r InputReader, compressed bool) Reader {
func NewReader(b []byte, compressed bool, unsafeByteToString bool) Reader {
d := &decoder{
order: binary.BigEndian,
buf: b,
offset: 0,
}
var varR VarReader
if compressed {
varR = newCompressed(r)
varR = newCompressed(d)
} else {
varR = newUncompressed(r)
varR = newUncompressed(d)
}
return reader{
InputReader: r,
varR: varR,
varR: varR,
decoder: d,
unsafeByteToString: unsafeByteToString,
}
}

func (r reader) Boolean() (bool, error) {
var n int8
err := binary.Read(r, binary.BigEndian, &n)
if n == 0 {
return false, err
if !r.check(1) {
return false, io.EOF
}
return r.bool(), nil
}

// SeekStart implement Seek(offset, io.SeekStart)
func (r reader) SeekStart(offset int64) (int64, error) {
abs := offset
r.offset = int(abs)
if abs < 0 {
return 0, errors.New("bytes.Reader.Seek: negative position")
}
return true, err
return abs, nil
}

func (r reader) Byte() (int8, error) {
var n int8
err := binary.Read(r, binary.BigEndian, &n)
return n, err
if !r.check(1) {
return 0, io.EOF
}
return r.int8(), nil
}

func (r reader) Short() (int16, error) {
return Short(r)
if !r.check(2) {
return 0, io.EOF
}
return r.int16(), nil
}

func (r reader) Char() (uint16, error) {
var n uint16
err := binary.Read(r, binary.BigEndian, &n)
return n, err
if !r.check(2) {
return 0, io.EOF
}
return r.uint16(), nil
}

func (r reader) Int() (int32, error) {
return Int(r)
if !r.check(4) {
return 0, io.EOF
}
return r.int32(), nil
}

func (r reader) Long() (int64, error) {
return Long(r)
if !r.check(8) {
return 0, io.EOF
}
return r.int64(), nil
}

func (r reader) Float() (float32, error) {
var n float32
err := binary.Read(r, binary.BigEndian, &n)
return n, err
if !r.check(4) {
return 0, io.EOF
}
return r.float32(), nil
}

func (r reader) Double() (float64, error) {
var n float64
err := binary.Read(r, binary.BigEndian, &n)
return n, err
if !r.check(8) {
return 0, io.EOF
}
return r.float64(), nil
}

// TODO: Should we differentiate between null and empty?
Expand Down Expand Up @@ -130,10 +156,20 @@ func (r reader) VarLong() (int64, error) {
func (r reader) utf8() (string, error) {
n, err := r.varR.VarInt()
if err != nil {
return "", nil
return "", err
}
// TODO: make sure n is reasonable
b := make([]byte, n)
_, err = io.ReadFull(r, b)
return string(b), err
if !r.check(int(n)) {
return "", io.EOF
}
b := r.decoder.buf[r.decoder.offset : r.decoder.offset+int(n)]
r.decoder.offset += int(n)
if r.unsafeByteToString {
return BytesToString(b), err
}
return string(b), nil
}

// BytesToString converts byte slice to string without a memory allocation.
func BytesToString(b []byte) string {
return *(*string)(unsafe.Pointer(&b))
}
Loading

0 comments on commit 39df228

Please sign in to comment.