-
Notifications
You must be signed in to change notification settings - Fork 9
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add zero-copy serialization APIs. #357
Changes from all commits
05a257e
ed67582
0ab5d35
c30e838
2d174df
42fe380
5696a01
7812651
8da0d7f
e524c53
4040ae7
86c8a73
13629dd
38155e3
827bfe3
49d43a1
417820c
f77c460
994572e
0b8795f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7,7 +7,9 @@ package tiledb | |
import "C" | ||
import ( | ||
"bytes" | ||
"errors" | ||
"fmt" | ||
"io" | ||
"math" | ||
"runtime" | ||
"unsafe" | ||
|
@@ -94,6 +96,8 @@ func (b *Buffer) Type() (Datatype, error) { | |
} | ||
|
||
// Serialize returns a copy of the bytes in the buffer. | ||
// | ||
// Deprecated: Use WriteTo or ReadAt instead for increased performance. | ||
func (b *Buffer) Serialize(serializationType SerializationType) ([]byte, error) { | ||
bs, err := b.dataCopy() | ||
if err != nil { | ||
|
@@ -103,14 +107,94 @@ func (b *Buffer) Serialize(serializationType SerializationType) ([]byte, error) | |
case TILEDB_CAPNP: | ||
// The entire byte array contains Cap'nP data. Don't bother it. | ||
case TILEDB_JSON: | ||
// The data is a null-terminated string. Strip off the terminator. | ||
// The data might be a null-terminated string. Strip off the terminator. | ||
bs = bytes.TrimSuffix(bs, []byte{0}) | ||
default: | ||
return nil, fmt.Errorf("unsupported serialization type: %v", serializationType) | ||
} | ||
return bs, nil | ||
} | ||
|
||
// ReadAt writes the contents of a Buffer at a given offset to a slice. | ||
func (b *Buffer) ReadAt(p []byte, off int64) (int, error) { | ||
if off < 0 { | ||
return 0, errors.New("offset cannot be negative") | ||
} | ||
|
||
var cbuffer unsafe.Pointer | ||
var csize C.uint64_t | ||
|
||
ret := C.tiledb_buffer_get_data(b.context.tiledbContext, b.tiledbBuffer, &cbuffer, &csize) | ||
if ret != C.TILEDB_OK { | ||
return 0, fmt.Errorf("error getting tiledb buffer data: %w", b.context.LastError()) | ||
} | ||
|
||
if uintptr(off) >= uintptr(csize) || cbuffer == nil { | ||
// Match ReaderAt behavior of os.File and fail with io.EOF if the offset is greater or equal to the size. | ||
return 0, io.EOF | ||
} | ||
|
||
availableBytes := uint64(csize) - uint64(off) | ||
var sizeToRead int | ||
if availableBytes > math.MaxInt { | ||
sizeToRead = math.MaxInt | ||
} else { | ||
sizeToRead = int(availableBytes) | ||
} | ||
|
||
readSize := copy(p, unsafe.Slice((*byte)(unsafe.Pointer(uintptr(cbuffer)+uintptr(off))), sizeToRead)) | ||
|
||
var err error | ||
if int64(readSize)+off == int64(csize) { | ||
err = io.EOF | ||
} | ||
|
||
return readSize, err | ||
} | ||
|
||
// WriteTo writes the contents of a Buffer to an io.Writer. | ||
func (b *Buffer) WriteTo(w io.Writer) (int64, error) { | ||
var cbuffer unsafe.Pointer | ||
var csize C.uint64_t | ||
|
||
ret := C.tiledb_buffer_get_data(b.context.tiledbContext, b.tiledbBuffer, &cbuffer, &csize) | ||
if ret != C.TILEDB_OK { | ||
return 0, fmt.Errorf("error getting tiledb buffer data: %w", b.context.LastError()) | ||
} | ||
|
||
if cbuffer == nil || csize == 0 { | ||
return 0, nil | ||
} | ||
|
||
remaining := int64(csize) | ||
|
||
// Because io.Writer supports writing up to 2GB of data at a time, we have to use a loop | ||
// for the bigger buffers. | ||
for remaining > 0 { | ||
// TODO: Use min on Go 1.21+ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There's probably no harm in updating TileDB-Go's Go version. Let's address this in a separate PR. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I will bump when I do SC-58351 which needs 1.21+. |
||
var writeSize int | ||
if remaining > math.MaxInt { | ||
writeSize = math.MaxInt | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is the intention to write at most 2 GiB? If so, let's use There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I used |
||
} else { | ||
writeSize = int(remaining) | ||
} | ||
|
||
// Construct a slice from the buffer's data without copying it. | ||
n, err := w.Write(unsafe.Slice((*byte)(unsafe.Pointer(uintptr(cbuffer)+uintptr(csize)-uintptr(remaining))), writeSize)) | ||
remaining -= int64(n) | ||
|
||
if err != nil { | ||
return int64(csize) - remaining, fmt.Errorf("error writing buffer to writer: %w", err) | ||
} | ||
} | ||
|
||
return int64(csize), nil | ||
} | ||
|
||
// Static assert that Buffer implements io.WriterTo. | ||
var _ io.WriterTo = (*Buffer)(nil) | ||
var _ io.ReaderAt = (*Buffer)(nil) | ||
|
||
// SetBuffer sets the buffer to point at the given Go slice. The memory is now | ||
// Go-managed. | ||
func (b *Buffer) SetBuffer(buffer []byte) error { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7,6 +7,7 @@ package tiledb | |
import "C" | ||
import ( | ||
"fmt" | ||
"io" | ||
) | ||
|
||
// BufferList A list of TileDB BufferList objects | ||
|
@@ -44,6 +45,36 @@ func (b *BufferList) Context() *Context { | |
return b.context | ||
} | ||
|
||
// WriteTo writes the contents of a BufferList to an io.Writer. | ||
func (b *BufferList) WriteTo(w io.Writer) (int64, error) { | ||
nbuffs, err := b.NumBuffers() | ||
if err != nil { | ||
return 0, err | ||
} | ||
|
||
written := int64(0) | ||
|
||
for i := uint(0); i < uint(nbuffs); i++ { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. While unlikely, technically this cast could overflow, no? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
buff, err := b.GetBuffer(i) | ||
if err != nil { | ||
return 0, err | ||
} | ||
n, err := buff.WriteTo(w) | ||
written += n | ||
|
||
buff.Free() | ||
|
||
if err != nil { | ||
return written, err | ||
} | ||
} | ||
|
||
return written, nil | ||
} | ||
|
||
// Static assert that BufferList implements io.WriterTo. | ||
var _ io.WriterTo = (*BufferList)(nil) | ||
|
||
// NumBuffers returns number of buffers in the list. | ||
func (b *BufferList) NumBuffers() (uint64, error) { | ||
var numBuffers C.uint64_t | ||
|
@@ -82,6 +113,8 @@ func (b *BufferList) TotalSize() (uint64, error) { | |
} | ||
|
||
// Flatten copies and concatenates all buffers in the list into a new buffer. | ||
// | ||
// Deprecated: Use WriteTo instead for increased performance. | ||
func (b *BufferList) Flatten() (*Buffer, error) { | ||
buffer := Buffer{context: b.context} | ||
freeOnGC(&buffer) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Are we capping
sizeToRead
because ofcopy()
returning an integer? If so, we may want to useio.Copy()
instead because it returns anint64
. In fact, I don't know if the built-incopy
function even uses theio.WriterTo
interface.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm not sure how
io.Copy()
is relevant here as we don't have neither aReader
nor aWriter
available. See also my other comment about the choice ofint
and the type of slice lengths.