Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fuzz on maxBlockSize #3418

Merged
merged 8 commits into from
Jan 19, 2023
46 changes: 38 additions & 8 deletions lib/compress/zstd_compress.c
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,15 @@ static int ZSTD_resolveExternalSequenceValidation(int mode) {
#endif
}

/* Resolves maxBlockSize to the default if no value is present. */
static size_t ZSTD_resolveMaxBlockSize(size_t maxBlockSize) {
daniellerozenblit marked this conversation as resolved.
Show resolved Hide resolved
if (maxBlockSize == 0) {
return ZSTD_BLOCKSIZE_MAX;
} else {
return maxBlockSize;
}
}

/* Returns 1 if compression parameters are such that CDict hashtable and chaintable indices are tagged.
* If so, the tags need to be removed in ZSTD_resetCCtx_byCopyingCDict. */
static int ZSTD_CDictIndicesAreTagged(const ZSTD_compressionParameters* const cParams) {
Expand All @@ -312,6 +321,7 @@ static ZSTD_CCtx_params ZSTD_makeCCtxParamsFromCParams(
cctxParams.useBlockSplitter = ZSTD_resolveBlockSplitterMode(cctxParams.useBlockSplitter, &cParams);
cctxParams.useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(cctxParams.useRowMatchFinder, &cParams);
cctxParams.validateSequences = ZSTD_resolveExternalSequenceValidation(cctxParams.validateSequences);
cctxParams.maxBlockSize = ZSTD_resolveMaxBlockSize(cctxParams.maxBlockSize);
assert(!ZSTD_checkCParams(cParams));
return cctxParams;
}
Expand Down Expand Up @@ -377,6 +387,7 @@ ZSTD_CCtxParams_init_internal(ZSTD_CCtx_params* cctxParams,
cctxParams->useBlockSplitter = ZSTD_resolveBlockSplitterMode(cctxParams->useBlockSplitter, &params->cParams);
cctxParams->ldmParams.enableLdm = ZSTD_resolveEnableLdm(cctxParams->ldmParams.enableLdm, &params->cParams);
cctxParams->validateSequences = ZSTD_resolveExternalSequenceValidation(cctxParams->validateSequences);
cctxParams->maxBlockSize = ZSTD_resolveMaxBlockSize(cctxParams->maxBlockSize);
DEBUGLOG(4, "ZSTD_CCtxParams_init_internal: useRowMatchFinder=%d, useBlockSplitter=%d ldm=%d",
cctxParams->useRowMatchFinder, cctxParams->useBlockSplitter, cctxParams->ldmParams.enableLdm);
}
Expand Down Expand Up @@ -604,6 +615,11 @@ ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param)
bounds.upperBound = 1;
return bounds;

case ZSTD_c_maxBlockSize:
bounds.lowerBound = ZSTD_BLOCKSIZE_MAX_MIN;
bounds.upperBound = ZSTD_BLOCKSIZE_MAX;
return bounds;

default:
bounds.error = ERROR(parameter_unsupported);
return bounds;
Expand Down Expand Up @@ -670,6 +686,7 @@ static int ZSTD_isUpdateAuthorized(ZSTD_cParameter param)
case ZSTD_c_deterministicRefPrefix:
case ZSTD_c_prefetchCDictTables:
case ZSTD_c_enableMatchFinderFallback:
case ZSTD_c_maxBlockSize:
default:
return 0;
}
Expand Down Expand Up @@ -727,6 +744,7 @@ size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value)
case ZSTD_c_deterministicRefPrefix:
case ZSTD_c_prefetchCDictTables:
case ZSTD_c_enableMatchFinderFallback:
case ZSTD_c_maxBlockSize:
break;

default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
Expand Down Expand Up @@ -964,6 +982,11 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams,
CCtxParams->enableMatchFinderFallback = value;
return CCtxParams->enableMatchFinderFallback;

case ZSTD_c_maxBlockSize:
BOUNDCHECK(ZSTD_c_maxBlockSize, value);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What happens if you set the value to 0?

I think we can leave the bounds as-is, but also allow setting to zero to use the default maxBlockSize. And we should also call that out in the docs.

CCtxParams->maxBlockSize = value;
return CCtxParams->maxBlockSize;

default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
}
}
Expand Down Expand Up @@ -1102,6 +1125,9 @@ size_t ZSTD_CCtxParams_getParameter(
case ZSTD_c_enableMatchFinderFallback:
*value = CCtxParams->enableMatchFinderFallback;
break;
case ZSTD_c_maxBlockSize:
*value = (int)CCtxParams->maxBlockSize;
break;
default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
}
return 0;
Expand Down Expand Up @@ -1546,10 +1572,11 @@ static size_t ZSTD_estimateCCtxSize_usingCCtxParams_internal(
const size_t buffInSize,
const size_t buffOutSize,
const U64 pledgedSrcSize,
int useExternalMatchFinder)
int useExternalMatchFinder,
size_t maxBlockSize)
{
size_t const windowSize = (size_t) BOUNDED(1ULL, 1ULL << cParams->windowLog, pledgedSrcSize);
size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, windowSize);
size_t const blockSize = MIN(maxBlockSize, windowSize);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think maxBlockSize could be equal to zero here, you have to resolve it.

size_t const maxNbSeq = ZSTD_maxNbSeq(blockSize, cParams->minMatch, useExternalMatchFinder);
size_t const tokenSpace = ZSTD_cwksp_alloc_size(WILDCOPY_OVERLENGTH + blockSize)
+ ZSTD_cwksp_aligned_alloc_size(maxNbSeq * sizeof(seqDef))
Expand Down Expand Up @@ -1601,7 +1628,7 @@ size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params)
* be needed. However, we still allocate two 0-sized buffers, which can
* take space under ASAN. */
return ZSTD_estimateCCtxSize_usingCCtxParams_internal(
&cParams, &params->ldmParams, 1, useRowMatchFinder, 0, 0, ZSTD_CONTENTSIZE_UNKNOWN, params->useExternalMatchFinder);
&cParams, &params->ldmParams, 1, useRowMatchFinder, 0, 0, ZSTD_CONTENTSIZE_UNKNOWN, params->useExternalMatchFinder, params->maxBlockSize);
}

size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams)
Expand Down Expand Up @@ -1651,7 +1678,7 @@ size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params)
RETURN_ERROR_IF(params->nbWorkers > 0, GENERIC, "Estimate CCtx size is supported for single-threaded compression only.");
{ ZSTD_compressionParameters const cParams =
ZSTD_getCParamsFromCCtxParams(params, ZSTD_CONTENTSIZE_UNKNOWN, 0, ZSTD_cpm_noAttachDict);
size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << cParams.windowLog);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think params->maxBlockSize could be equal to zero here, you have to resolve it.

size_t const blockSize = MIN(params->maxBlockSize, (size_t)1 << cParams.windowLog);
size_t const inBuffSize = (params->inBufferMode == ZSTD_bm_buffered)
? ((size_t)1 << cParams.windowLog) + blockSize
: 0;
Expand All @@ -1662,7 +1689,7 @@ size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params)

return ZSTD_estimateCCtxSize_usingCCtxParams_internal(
&cParams, &params->ldmParams, 1, useRowMatchFinder, inBuffSize, outBuffSize,
ZSTD_CONTENTSIZE_UNKNOWN, params->useExternalMatchFinder);
ZSTD_CONTENTSIZE_UNKNOWN, params->useExternalMatchFinder, params->maxBlockSize);
}
}

Expand Down Expand Up @@ -1944,7 +1971,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
}

{ size_t const windowSize = MAX(1, (size_t)MIN(((U64)1 << params->cParams.windowLog), pledgedSrcSize));
size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, windowSize);
size_t const blockSize = MIN(params->maxBlockSize, windowSize);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you add an assert that params->maxBlockSize != 0 up by the group of asserts on lines 1964-1966?

size_t const maxNbSeq = ZSTD_maxNbSeq(blockSize, params->cParams.minMatch, params->useExternalMatchFinder);
size_t const buffOutSize = (zbuff == ZSTDb_buffered && params->outBufferMode == ZSTD_bm_buffered)
? ZSTD_compressBound(blockSize) + 1
Expand All @@ -1962,7 +1989,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
size_t const neededSpace =
ZSTD_estimateCCtxSize_usingCCtxParams_internal(
&params->cParams, &params->ldmParams, zc->staticSize != 0, params->useRowMatchFinder,
buffInSize, buffOutSize, pledgedSrcSize, params->useExternalMatchFinder);
buffInSize, buffOutSize, pledgedSrcSize, params->useExternalMatchFinder, params->maxBlockSize);
int resizeWorkspace;

FORWARD_IF_ERROR(neededSpace, "cctx size estimate failed!");
Expand Down Expand Up @@ -2340,6 +2367,7 @@ static size_t ZSTD_copyCCtx_internal(ZSTD_CCtx* dstCCtx,
params.useBlockSplitter = srcCCtx->appliedParams.useBlockSplitter;
params.ldmParams = srcCCtx->appliedParams.ldmParams;
params.fParams = fParams;
params.maxBlockSize = srcCCtx->appliedParams.maxBlockSize;
ZSTD_resetCCtx_internal(dstCCtx, &params, pledgedSrcSize,
/* loadedDictSize */ 0,
ZSTDcrp_leaveDirty, zbuff);
Expand Down Expand Up @@ -4474,7 +4502,7 @@ size_t ZSTD_getBlockSize(const ZSTD_CCtx* cctx)
{
ZSTD_compressionParameters const cParams = cctx->appliedParams.cParams;
assert(!ZSTD_checkCParams(cParams));
return MIN (ZSTD_BLOCKSIZE_MAX, (U32)1 << cParams.windowLog);
return MIN(cctx->appliedParams.maxBlockSize, (size_t)1 << cParams.windowLog);
}

size_t ZSTD_compressBlock(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize)
Expand Down Expand Up @@ -5912,6 +5940,7 @@ static size_t ZSTD_CCtx_init_compressStream2(ZSTD_CCtx* cctx,
params.ldmParams.enableLdm = ZSTD_resolveEnableLdm(params.ldmParams.enableLdm, &params.cParams);
params.useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(params.useRowMatchFinder, &params.cParams);
params.validateSequences = ZSTD_resolveExternalSequenceValidation(params.validateSequences);
params.maxBlockSize = ZSTD_resolveMaxBlockSize(params.maxBlockSize);

#ifdef ZSTD_MULTITHREAD
if ((cctx->pledgedSrcSizePlusOne-1) <= ZSTDMT_JOBSIZE_MIN) {
Expand Down Expand Up @@ -6103,6 +6132,7 @@ size_t ZSTD_compress2(ZSTD_CCtx* cctx,
/* Reset to the original values. */
cctx->requestedParams.inBufferMode = originalInBufferMode;
cctx->requestedParams.outBufferMode = originalOutBufferMode;

FORWARD_IF_ERROR(result, "ZSTD_compressStream2_simpleArgs failed");
if (result != 0) { /* compression not completed, due to lack of output space */
assert(oPos == dstCapacity);
Expand Down
3 changes: 3 additions & 0 deletions lib/compress/zstd_compress_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -355,6 +355,9 @@ struct ZSTD_CCtx_params_s {
* Users can't set this externally.
* It is set internally in ZSTD_registerExternalMatchFinder(). */
int useExternalMatchFinder;

/* Adjust the max block size*/
size_t maxBlockSize;
}; /* typedef'd to ZSTD_CCtx_params within "zstd.h" */

#define COMPRESS_SEQUENCES_WORKSPACE_SIZE (sizeof(unsigned) * (MaxSeq + 2))
Expand Down
12 changes: 11 additions & 1 deletion lib/zstd.h
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,7 @@ ZSTDLIB_API const char* ZSTD_versionString(void);

#define ZSTD_BLOCKSIZELOG_MAX 17
#define ZSTD_BLOCKSIZE_MAX (1<<ZSTD_BLOCKSIZELOG_MAX)
#define ZSTD_BLOCKSIZE_MAX_MIN (1 << 10) /* The minimum valid max blocksize. Maximum blocksizes smaller than this make compressBound() inaccurate. */
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please move this down below into the static linking only section

Putting this constant here means that it will always be present, and we don't like to do that until it has baked in the unstable section.



/***************************************
Expand Down Expand Up @@ -479,6 +480,7 @@ typedef enum {
* ZSTD_c_useRowMatchFinder
* ZSTD_c_prefetchCDictTables
* ZSTD_c_enableMatchFinderFallback
* ZSTD_c_maxBlockSize
* Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them.
* note : never ever use experimentalParam? names directly;
* also, the enums values themselves are unstable and can still change.
Expand All @@ -499,7 +501,8 @@ typedef enum {
ZSTD_c_experimentalParam14=1011,
ZSTD_c_experimentalParam15=1012,
ZSTD_c_experimentalParam16=1013,
ZSTD_c_experimentalParam17=1014
ZSTD_c_experimentalParam17=1014,
ZSTD_c_experimentalParam18=1015
} ZSTD_cParameter;

typedef struct {
Expand Down Expand Up @@ -2067,6 +2070,13 @@ ZSTDLIB_STATIC_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const vo
* documentation (below) before setting this parameter. */
#define ZSTD_c_enableMatchFinderFallback ZSTD_c_experimentalParam17

/* ZSTD_c_maxBlockSize
*
* Default is ZSTD_BLOCKSIZE_MAX.
*
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please add a little more detail here. Say what this parameter controls, what the min/max are (and why), and explain what setting to 0 means.

*/
#define ZSTD_c_maxBlockSize ZSTD_c_experimentalParam18

/*! ZSTD_CCtx_getParameter() :
* Get the requested compression parameter value, selected by enum ZSTD_cParameter,
* and store it into int* value.
Expand Down
1 change: 1 addition & 0 deletions tests/fuzz/zstd_helpers.c
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ void FUZZ_setRandomParameters(ZSTD_CCtx *cctx, size_t srcSize, FUZZ_dataProducer
setRand(cctx, ZSTD_c_useBlockSplitter, 0, 2, producer);
setRand(cctx, ZSTD_c_deterministicRefPrefix, 0, 1, producer);
setRand(cctx, ZSTD_c_prefetchCDictTables, 0, 2, producer);
setRand(cctx, ZSTD_c_maxBlockSize, ZSTD_BLOCKSIZE_MAX_MIN, ZSTD_BLOCKSIZE_MAX, producer);
if (FUZZ_dataProducer_uint32Range(producer, 0, 1) == 0) {
setRand(cctx, ZSTD_c_srcSizeHint, ZSTD_SRCSIZEHINT_MIN, 2 * srcSize, producer);
}
Expand Down
43 changes: 43 additions & 0 deletions tests/zstreamtest.c
Original file line number Diff line number Diff line change
Expand Up @@ -1925,6 +1925,49 @@ static int basicUnitTests(U32 seed, double compressibility, int bigTests)
}
DISPLAYLEVEL(3, "OK \n");


/* Test maxBlockSize cctx param functionality */
DISPLAYLEVEL(3, "test%3i : Testing maxBlockSize PR#3418: ", testNb++);
{
ZSTD_CCtx* cctx = ZSTD_createCCtx();
size_t const srcSize = 2 << 10;
void* const src = CNBuffer;
size_t const dstSize = ZSTD_compressBound(srcSize);
void* const dst1 = compressedBuffer;
void* const dst2 = (BYTE*)compressedBuffer + dstSize;
size_t size1, size2;
void* const checkBuf = malloc(srcSize);

memset(src, 'x', srcSize);

/* Quick test to make sure maxBlockSize bounds are enforced */
assert(ZSTD_isError(ZSTD_CCtx_setParameter(cctx, ZSTD_c_maxBlockSize, ZSTD_BLOCKSIZE_MAX_MIN - 1)));
assert(ZSTD_isError(ZSTD_CCtx_setParameter(cctx, ZSTD_c_maxBlockSize, ZSTD_BLOCKSIZE_MAX + 1)));

/* maxBlockSize = 1KB */
CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_maxBlockSize, 1 << 10));
size1 = ZSTD_compress2(cctx, dst1, dstSize, src, srcSize);

if (ZSTD_isError(size1)) goto _output_error;
CHECK_Z(ZSTD_decompress(checkBuf, srcSize, dst1, size1));
CHECK(memcmp(src, checkBuf, srcSize) != 0, "Corruption!");

/* maxBlockSize = 3KB */
CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_maxBlockSize, 3 << 10));
size2 = ZSTD_compress2(cctx, dst2, dstSize, src, srcSize);

if (ZSTD_isError(size2)) goto _output_error;
CHECK_Z(ZSTD_decompress(checkBuf, srcSize, dst2, size2));
CHECK(memcmp(src, checkBuf, srcSize) != 0, "Corruption!");

/* Compressed output should not be equal */
assert(memcmp(dst1, dst2, dstSize) != 0);
assert(size1 - size2 == 4); /* We add another RLE block with header + character */
ZSTD_freeCCtx(cctx);
free(checkBuf);
}
DISPLAYLEVEL(3, "OK \n");
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍 Great !

One more test I would like to see is what happens when maxBlockSize > windowSize.
In such case, we want to be sure that the code will not produce blocks larger than windowSize.
I think your code is fine and will respect this condition.
The purpose of the test is to ensure it remains this way, a future modification could break this condition.


_end:
FUZ_freeDictionary(dictionary);
ZSTD_freeCStream(zc);
Expand Down