Skip to content

Commit

Permalink
Address @terrelln's comments
Browse files Browse the repository at this point in the history
  • Loading branch information
sean-purcell committed Apr 14, 2017
1 parent 2785b28 commit e0cacc9
Show file tree
Hide file tree
Showing 4 changed files with 110 additions and 79 deletions.
2 changes: 1 addition & 1 deletion contrib/seekable_format/examples/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
LDFLAGS += -L../../../lib/ -lzstd
CPPFLAGS += -I../ -I../../../lib -I../../../lib/common

CFLAGS = -O3
CFLAGS ?= -O3
CFLAGS += -g

SEEKABLE_OBJS = ../zstdseek_compress.c ../zstdseek_decompress.c
Expand Down
9 changes: 4 additions & 5 deletions contrib/seekable_format/zstd_seekable.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,8 @@ typedef struct ZSTD_seekable_DStream_s ZSTD_seekable_DStream;
*
* Data streamed to the seekable compressor will automatically be split into
* frames of size `maxFrameSize` (provided in ZSTD_seekable_initCStream()),
* or if none is provided, will be cut off whenver ZSTD_endFrame() is called
* or when the default maximum frame size is reached (approximately 4GB).
* or if none is provided, will be cut off whenever ZSTD_seekable_endFrame() is
* called or when the default maximum frame size (2GB) is reached.
*
* Use ZSTD_seekable_initCStream() to initialize a ZSTD_seekable_CStream object
* for a new compression operation.
Expand All @@ -59,7 +59,6 @@ typedef struct ZSTD_seekable_DStream_s ZSTD_seekable_DStream;
* ZSTD_isError().
* Note 1 : it's just a hint, to help latency a little, any other
* value will work fine.
* Note 2 : size hint is guaranteed to be <= ZSTD_CStreamInSize()
*
* At any time, call ZSTD_seekable_endFrame() to end the current frame and
* start a new one.
Expand Down Expand Up @@ -98,8 +97,8 @@ ZSTDLIB_API size_t ZSTD_seekable_endStream(ZSTD_seekable_CStream* zcs, ZSTD_outB
* small, a size hint for how much data to provide.
* An error code may also be returned, checkable with ZSTD_isError()
*
* Use ZSTD_initDStream to prepare for a new decompression operation using the
* seektable loaded with ZSTD_seekable_loadSeekTable().
* Use ZSTD_seekable_initDStream to prepare for a new decompression operation
* using the seektable loaded with ZSTD_seekable_loadSeekTable().
* Data in the range [rangeStart, rangeEnd) will be decompressed.
*
* Call ZSTD_seekable_decompressStream() repetitively to consume input stream.
Expand Down
133 changes: 79 additions & 54 deletions contrib/seekable_format/zstdseek_compress.c
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,22 @@
#define XXH_NAMESPACE ZSTD_
#include "xxhash.h"

#include "zstd_internal.h" /* includes zstd.h */
#define ZSTD_STATIC_LINKING_ONLY
#include "zstd.h"
#include "zstd_errors.h"
#include "mem.h"
#include "zstd_seekable.h"

#define CHECK_Z(f) { size_t const ret = (f); if (ret != 0) return ret; }

#undef ERROR
#define ERROR(name) ((size_t)-ZSTD_error_##name)

#undef MIN
#undef MAX
#define MIN(a, b) ((a) < (b) ? (a) : (b))
#define MAX(a, b) ((a) > (b) ? (a) : (b))

typedef struct {
U32 cSize;
U32 dSize;
Expand All @@ -42,6 +55,8 @@ struct ZSTD_seekable_CStream_s {
int checksumFlag;

int writingSeekTable;
U32 seekTablePos;
U32 seekTableIndex;
};

ZSTD_seekable_CStream* ZSTD_seekable_createCStream()
Expand All @@ -57,8 +72,8 @@ ZSTD_seekable_CStream* ZSTD_seekable_createCStream()

/* allocate some initial space */
{ size_t const FRAMELOG_STARTING_CAPACITY = 16;
zcs->framelog.entries =
malloc(sizeof(framelogEntry_t) * FRAMELOG_STARTING_CAPACITY);
zcs->framelog.entries = (framelogEntry_t*)malloc(
sizeof(framelogEntry_t) * FRAMELOG_STARTING_CAPACITY);
if (zcs->framelog.entries == NULL) goto failed2;
zcs->framelog.capacity = FRAMELOG_STARTING_CAPACITY;
}
Expand Down Expand Up @@ -105,6 +120,8 @@ size_t ZSTD_seekable_initCStream(ZSTD_seekable_CStream* zcs,
XXH64_reset(&zcs->xxhState, 0);
}

zcs->seekTablePos = 0;
zcs->seekTableIndex = 0;
zcs->writingSeekTable = 0;

return ZSTD_initCStream(zcs->cstream, compressionLevel);
Expand All @@ -115,17 +132,6 @@ static size_t ZSTD_seekable_logFrame(ZSTD_seekable_CStream* zcs)
if (zcs->framelog.size == ZSTD_SEEKABLE_MAXFRAMES)
return ERROR(frameIndex_tooLarge);

zcs->framelog.entries[zcs->framelog.size] = (framelogEntry_t)
{
.cSize = zcs->frameCSize,
.dSize = zcs->frameDSize,
};
if (zcs->checksumFlag)
zcs->framelog.entries[zcs->framelog.size].checksum =
/* take lower 32 bits of digest */
XXH64_digest(&zcs->xxhState) & 0xFFFFFFFFU;

zcs->framelog.size++;
/* grow the buffer if required */
if (zcs->framelog.size == zcs->framelog.capacity) {
/* exponential size increase for constant amortized runtime */
Expand All @@ -139,6 +145,15 @@ static size_t ZSTD_seekable_logFrame(ZSTD_seekable_CStream* zcs)
zcs->framelog.capacity = newCapacity;
}

zcs->framelog.entries[zcs->framelog.size] = (framelogEntry_t){
zcs->frameCSize, zcs->frameDSize,
};
if (zcs->checksumFlag)
zcs->framelog.entries[zcs->framelog.size].checksum =
XXH64_digest(&zcs->xxhState) & 0xFFFFFFFFU; /* take lower 32 bits of digest */

zcs->framelog.size++;

return 0;
}

Expand Down Expand Up @@ -208,7 +223,7 @@ size_t ZSTD_seekable_compressStream(ZSTD_seekable_CStream* zcs, ZSTD_outBuffer*
return (size_t)(zcs->maxFrameSize - zcs->frameDSize);
}

static size_t ZSTD_seekable_seekTableSize(ZSTD_seekable_CStream* zcs)
static inline size_t ZSTD_seekable_seekTableSize(ZSTD_seekable_CStream const* zcs)
{
size_t const sizePerFrame = 8 + (zcs->checksumFlag?4:0);
size_t const seekTableLen = ZSTD_skippableHeaderSize +
Expand All @@ -218,14 +233,29 @@ static size_t ZSTD_seekable_seekTableSize(ZSTD_seekable_CStream* zcs)
return seekTableLen;
}

static size_t ZSTD_seekable_writeSeekTable(ZSTD_seekable_CStream* zcs, ZSTD_outBuffer* output)
static inline size_t ZSTD_stwrite32(ZSTD_seekable_CStream* zcs,
ZSTD_outBuffer* output, U32 const value,
U32 const offset)
{
BYTE* op = (BYTE*) output->dst;
BYTE tmp[4]; /* so that we can work with buffers too small to write a whole word to */
if (zcs->seekTablePos < offset + 4) {
BYTE tmp[4]; /* so that we can work with buffers too small to write a whole word to */
size_t const lenWrite =
MIN(output->size - output->pos, offset + 4 - zcs->seekTablePos);
MEM_writeLE32(tmp, value);
memcpy((BYTE*)output->dst + output->pos,
tmp + (zcs->seekTablePos - offset), lenWrite);
output->pos += lenWrite;
zcs->seekTablePos += lenWrite;

if (lenWrite < 4) return ZSTD_seekable_seekTableSize(zcs) - zcs->seekTablePos;
}
return 0;
}

/* repurpose
* zcs->frameDSize: the current index in the table and
* zcs->frameCSize: the amount of the table written so far
static size_t ZSTD_seekable_writeSeekTable(ZSTD_seekable_CStream* zcs, ZSTD_outBuffer* output)
{
/* seekTableIndex: the current index in the table and
* seekTableSize: the amount of the table written so far
*
* This function is written this way so that if it has to return early
* because of a small buffer, it can keep going where it left off.
Expand All @@ -234,53 +264,48 @@ static size_t ZSTD_seekable_writeSeekTable(ZSTD_seekable_CStream* zcs, ZSTD_outB
size_t const sizePerFrame = 8 + (zcs->checksumFlag?4:0);
size_t const seekTableLen = ZSTD_seekable_seekTableSize(zcs);

#define st_write32(x, o) \
do { \
if (zcs->frameCSize < (o) + 4) { \
size_t const lenWrite = MIN(output->size - output->pos, \
(o) + 4 - zcs->frameCSize); \
MEM_writeLE32(tmp, (x)); \
memcpy(op + output->pos, tmp + (zcs->frameCSize - (o)), lenWrite); \
zcs->frameCSize += lenWrite; \
output->pos += lenWrite; \
if (lenWrite < 4) return seekTableLen - zcs->frameCSize; \
} \
} while (0)

st_write32(ZSTD_MAGIC_SKIPPABLE_START | 0xE, 0);
st_write32(seekTableLen - ZSTD_skippableHeaderSize, 4);

while (zcs->frameDSize < zcs->framelog.size) {
st_write32(zcs->framelog.entries[zcs->frameDSize].cSize,
ZSTD_skippableHeaderSize + sizePerFrame * zcs->frameDSize);
st_write32(zcs->framelog.entries[zcs->frameDSize].dSize,
ZSTD_skippableHeaderSize + sizePerFrame * zcs->frameDSize + 4);
CHECK_Z(ZSTD_stwrite32(zcs, output, ZSTD_MAGIC_SKIPPABLE_START | 0xE, 0));
CHECK_Z(ZSTD_stwrite32(zcs, output, seekTableLen - ZSTD_skippableHeaderSize,
4));

while (zcs->seekTableIndex < zcs->framelog.size) {
CHECK_Z(ZSTD_stwrite32(
zcs, output, zcs->framelog.entries[zcs->seekTableIndex].cSize,
ZSTD_skippableHeaderSize + sizePerFrame * zcs->seekTableIndex));

CHECK_Z(ZSTD_stwrite32(
zcs, output, zcs->framelog.entries[zcs->seekTableIndex].dSize,
ZSTD_skippableHeaderSize + sizePerFrame * zcs->seekTableIndex + 4));

if (zcs->checksumFlag) {
st_write32(zcs->framelog.entries[zcs->frameDSize].checksum,
ZSTD_skippableHeaderSize + sizePerFrame * zcs->frameDSize + 8);
CHECK_Z(ZSTD_stwrite32(
zcs, output,
zcs->framelog.entries[zcs->seekTableIndex].checksum,
ZSTD_skippableHeaderSize + sizePerFrame * zcs->seekTableIndex +
8));
}

zcs->frameDSize++;
zcs->seekTableIndex++;
}

st_write32(zcs->framelog.size, seekTableLen - ZSTD_seekTableFooterSize);
CHECK_Z(ZSTD_stwrite32(zcs, output, zcs->framelog.size,
seekTableLen - ZSTD_seekTableFooterSize));

if (output->size - output->pos < 1) return seekTableLen - zcs->frameCSize;
if (zcs->frameCSize < seekTableLen - 4) {
if (output->size - output->pos < 1) return seekTableLen - zcs->seekTablePos;
if (zcs->seekTablePos < seekTableLen - 4) {
BYTE sfd = 0;
sfd |= (zcs->checksumFlag) << 7;

op[output->pos] = sfd;
((BYTE*)output->dst)[output->pos] = sfd;
output->pos++;
zcs->frameCSize++;
zcs->seekTablePos++;
}

st_write32(ZSTD_SEEKABLE_MAGICNUMBER, seekTableLen - 4);
CHECK_Z(ZSTD_stwrite32(zcs, output, ZSTD_SEEKABLE_MAGICNUMBER,
seekTableLen - 4));

if (zcs->frameCSize != seekTableLen) return ERROR(GENERIC);
if (zcs->seekTablePos != seekTableLen) return ERROR(GENERIC);
return 0;

#undef st_write32
}

size_t ZSTD_seekable_endStream(ZSTD_seekable_CStream* zcs, ZSTD_outBuffer* output)
Expand Down
45 changes: 26 additions & 19 deletions contrib/seekable_format/zstdseek_decompress.c
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,20 @@
#define XXH_NAMESPACE ZSTD_
#include "xxhash.h"

#include "zstd_internal.h" /* includes zstd.h */
#define ZSTD_STATIC_LINKING_ONLY
#include "zstd.h"
#include "zstd_errors.h"
#include "mem.h" /* includes zstd.h */
#include "zstd_seekable.h"

#undef ERROR
#define ERROR(name) ((size_t)-ZSTD_error_##name)

#undef MIN
#undef MAX
#define MIN(a, b) ((a) < (b) ? (a) : (b))
#define MAX(a, b) ((a) > (b) ? (a) : (b))

typedef struct {
U64 cOffset;
U64 dOffset;
Expand Down Expand Up @@ -107,11 +118,8 @@ size_t ZSTD_seekable_loadSeekTable(ZSTD_seekable_DStream* zds, const void* src,
{
const BYTE* ip = (const BYTE*)src + srcSize;

U32 numFrames;
int checksumFlag;

U32 sizePerEntry;

/* footer is fixed size */
if (srcSize < ZSTD_seekTableFooterSize)
return ZSTD_seekTableFooterSize;
Expand All @@ -129,10 +137,9 @@ size_t ZSTD_seekable_loadSeekTable(ZSTD_seekable_DStream* zds, const void* src,
}
}

numFrames = MEM_readLE32(ip-9);
sizePerEntry = 8 + (checksumFlag?4:0);

{ U32 const tableSize = sizePerEntry * numFrames;
{ U32 const numFrames = MEM_readLE32(ip-9);
U32 const sizePerEntry = 8 + (checksumFlag?4:0);
U32 const tableSize = sizePerEntry * numFrames;
U32 const frameSize = tableSize + ZSTD_seekTableFooterSize + ZSTD_skippableHeaderSize;

const BYTE* base = ip - frameSize;
Expand All @@ -148,7 +155,8 @@ size_t ZSTD_seekable_loadSeekTable(ZSTD_seekable_DStream* zds, const void* src,

{ /* Allocate an extra entry at the end so that we can do size
* computations on the last element without special case */
seekEntry_t* entries = malloc(sizeof(seekEntry_t) * (numFrames + 1));
seekEntry_t* entries =
(seekEntry_t*)malloc(sizeof(seekEntry_t) * (numFrames + 1));
const BYTE* tableBase = base + ZSTD_skippableHeaderSize;

U32 idx;
Expand All @@ -167,8 +175,10 @@ size_t ZSTD_seekable_loadSeekTable(ZSTD_seekable_DStream* zds, const void* src,
entries[idx].cOffset = cOffset;
entries[idx].dOffset = dOffset;

cOffset += MEM_readLE32(tableBase + pos); pos += 4;
dOffset += MEM_readLE32(tableBase + pos); pos += 4;
cOffset += MEM_readLE32(tableBase + pos);
pos += 4;
dOffset += MEM_readLE32(tableBase + pos);
pos += 4;
if (checksumFlag) {
entries[idx].checksum = MEM_readLE32(tableBase + pos);
pos += 4;
Expand All @@ -188,18 +198,17 @@ size_t ZSTD_seekable_loadSeekTable(ZSTD_seekable_DStream* zds, const void* src,
size_t ZSTD_seekable_initDStream(ZSTD_seekable_DStream* zds, U64 rangeStart, U64 rangeEnd)
{
/* restrict range to the end of the file, of non-negative size */
rangeStart = MIN(rangeStart, zds->seekTable.entries[zds->seekTable.tableLen].dOffset);
rangeEnd = MIN(rangeEnd, zds->seekTable.entries[zds->seekTable.tableLen].dOffset);
rangeEnd = MAX(rangeEnd, rangeStart);
rangeStart = MIN(rangeStart, rangeEnd);

zds->targetStart = rangeStart;
zds->targetEnd = rangeEnd;
zds->stage = zsds_seek;

/* force a seek first */
zds->curFrame = (U32) -1;
zds->compressedOffset = (U64) -1;
zds->decompressedOffset = (U64) -1;
zds->curFrame = (U32)-1;
zds->compressedOffset = (U64)-1;
zds->decompressedOffset = (U64)-1;

if (zds->seekTable.checksumFlag) {
XXH64_reset(&zds->xxhState, 0);
Expand Down Expand Up @@ -247,9 +256,7 @@ size_t ZSTD_seekable_decompressStream(ZSTD_seekable_DStream* zds, ZSTD_outBuffer
size_t const prevInputPos = input->pos;

ZSTD_outBuffer outTmp = {
.dst = outBase,
.size = (size_t)MIN((U64)outLen, toDecompress),
.pos = 0};
outBase, (size_t)MIN((U64)outLen, toDecompress), 0};

size_t const ret =
ZSTD_decompressStream(zds->dstream, &outTmp, input);
Expand Down

0 comments on commit e0cacc9

Please sign in to comment.