-
Notifications
You must be signed in to change notification settings - Fork 2.2k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* First building commit with sample matchfinder * Set up ZSTD_externalMatchCtx struct * move seqBuffer to ZSTD_Sequence* * support non-contiguous dictionary * clean up parens * add clearExternalMatchfinder, handle allocation errors * Add useExternalMatchfinder cParam * validate useExternalMatchfinder cParam * Disable LDM + external matchfinder * Check for static CCtx * Validate mState and mStateDestructor * Improve LDM check to cover both branches * Error API with optional fallback * handle RLE properly for external matchfinder * nit * Move to a CDict-like model for resource ownership * Add hidden useExternalMatchfinder bool to CCtx_params_s * Eliminate malloc, move to cwksp allocation * Handle CCtx reset properly * Ensure seqStore has enough space for external sequences * fix capitalization * Add DEBUGLOG statements * Add compressionLevel param to matchfinder API * fix c99 issues and add a param combination error code * nits * Test external matchfinder API * C90 compat for simpleExternalMatchFinder * Fix some @nocommits and an ASAN bug * nit * nit * nits * forward declare copySequencesToSeqStore functions in zstd_compress_internal.h * nit * nit * nits * Update copyright headers * Fix CMake zstreamtest build * Fix copyright headers (again) * typo * Add externalMatchfinder demo program to make contrib * Reduce memory consumption for small blockSize * ZSTD_postProcessExternalMatchFinderResult nits * test sum(matchlen) + sum(litlen) == srcSize in debug builds * refExternalMatchFinder -> registerExternalMatchFinder * C90 nit * zstreamtest nits * contrib nits * contrib nits * allow block splitter + external matchfinder, refactor * add windowSize param * add contrib/externalMatchfinder/README.md * docs * go back to old RLE heuristic because of the first block issue * fix initializer element is not a constant expression * ref contrib from zstd.h * extremely pedantic compiler warning fix, meson fix, typo fix * Additional docs on API limitations * minor nits * Refactor maxNbSeq calculation into a helper function * Fix copyright
- Loading branch information
Showing
18 changed files
with
929 additions
and
40 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
# build artifacts | ||
externalMatchfinder |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
# ################################################################ | ||
# Copyright (c) Yann Collet, Meta Platforms, Inc. | ||
# All rights reserved. | ||
# | ||
# This source code is licensed under both the BSD-style license (found in the | ||
# LICENSE file in the root directory of this source tree) and the GPLv2 (found | ||
# in the COPYING file in the root directory of this source tree). | ||
# ################################################################ | ||
|
||
PROGDIR = ../../programs | ||
LIBDIR = ../../lib | ||
|
||
LIBZSTD = $(LIBDIR)/libzstd.a | ||
|
||
CPPFLAGS+= -I$(LIBDIR) -I$(LIBDIR)/compress -I$(LIBDIR)/common | ||
|
||
CFLAGS ?= -O3 | ||
CFLAGS += -std=gnu99 | ||
DEBUGFLAGS= -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \ | ||
-Wstrict-aliasing=1 -Wswitch-enum \ | ||
-Wstrict-prototypes -Wundef -Wpointer-arith \ | ||
-Wvla -Wformat=2 -Winit-self -Wfloat-equal -Wwrite-strings \ | ||
-Wredundant-decls | ||
CFLAGS += $(DEBUGFLAGS) $(MOREFLAGS) | ||
|
||
default: externalMatchfinder | ||
|
||
all: externalMatchfinder | ||
|
||
externalMatchfinder: matchfinder.c main.c $(LIBZSTD) | ||
$(CC) $(CPPFLAGS) $(CFLAGS) $^ $(LDFLAGS) -o $@ | ||
|
||
.PHONY: $(LIBZSTD) | ||
$(LIBZSTD): | ||
$(MAKE) -C $(LIBDIR) libzstd.a CFLAGS="$(CFLAGS)" | ||
|
||
clean: | ||
$(RM) *.o | ||
$(MAKE) -C $(LIBDIR) clean > /dev/null | ||
$(RM) externalMatchfinder |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
externalMatchfinder | ||
===================== | ||
|
||
`externalMatchfinder` is a test tool for the external matchfinder API. | ||
It demonstrates how to use the API to perform a simple round-trip test. | ||
|
||
A sample matchfinder is provided in matchfinder.c, but the user can swap | ||
this out with a different one if desired. The sample matchfinder implements | ||
LZ compression with a 1KB hashtable. Dictionary compression is not currently supported. | ||
|
||
Command line : | ||
``` | ||
externalMatchfinder filename | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,107 @@ | ||
/* | ||
* Copyright (c) Yann Collet, Meta Platforms, Inc. | ||
* All rights reserved. | ||
* | ||
* This source code is licensed under both the BSD-style license (found in the | ||
* LICENSE file in the root directory of this source tree) and the GPLv2 (found | ||
* in the COPYING file in the root directory of this source tree). | ||
* You may select, at your option, one of the above-listed licenses. | ||
*/ | ||
|
||
#include <stdio.h> | ||
#include <stdlib.h> | ||
#include <string.h> | ||
#include <assert.h> | ||
|
||
#define ZSTD_STATIC_LINKING_ONLY | ||
#include "zstd.h" | ||
#include "zstd_errors.h" | ||
#include "matchfinder.h" // simpleExternalMatchFinder | ||
|
||
#define CHECK(res) \ | ||
do { \ | ||
if (ZSTD_isError(res)) { \ | ||
printf("ERROR: %s\n", ZSTD_getErrorName(res)); \ | ||
return 1; \ | ||
} \ | ||
} while (0) \ | ||
|
||
int main(int argc, char *argv[]) { | ||
if (argc != 2) { | ||
printf("Usage: exampleMatchfinder <file>\n"); | ||
return 1; | ||
} | ||
|
||
ZSTD_CCtx* const zc = ZSTD_createCCtx(); | ||
|
||
int simpleExternalMatchState = 0xdeadbeef; | ||
|
||
// Here is the crucial bit of code! | ||
ZSTD_registerExternalMatchFinder( | ||
zc, | ||
&simpleExternalMatchState, | ||
simpleExternalMatchFinder | ||
); | ||
|
||
{ | ||
size_t const res = ZSTD_CCtx_setParameter(zc, ZSTD_c_enableMatchFinderFallback, 1); | ||
CHECK(res); | ||
} | ||
|
||
FILE *f = fopen(argv[1], "rb"); | ||
assert(f); | ||
{ | ||
int const ret = fseek(f, 0, SEEK_END); | ||
assert(ret == 0); | ||
} | ||
size_t const srcSize = ftell(f); | ||
{ | ||
int const ret = fseek(f, 0, SEEK_SET); | ||
assert(ret == 0); | ||
} | ||
|
||
char* const src = malloc(srcSize + 1); | ||
assert(src); | ||
{ | ||
size_t const ret = fread(src, srcSize, 1, f); | ||
assert(ret == 1); | ||
int const ret2 = fclose(f); | ||
assert(ret2 == 0); | ||
} | ||
|
||
size_t const dstSize = ZSTD_compressBound(srcSize); | ||
char* const dst = malloc(dstSize); | ||
assert(dst); | ||
|
||
size_t const cSize = ZSTD_compress2(zc, dst, dstSize, src, srcSize); | ||
CHECK(cSize); | ||
|
||
char* const val = malloc(srcSize); | ||
assert(val); | ||
|
||
{ | ||
size_t const res = ZSTD_decompress(val, srcSize, dst, cSize); | ||
CHECK(res); | ||
} | ||
|
||
if (memcmp(src, val, srcSize) == 0) { | ||
printf("Compression and decompression were successful!\n"); | ||
printf("Original size: %lu\n", srcSize); | ||
printf("Compressed size: %lu\n", cSize); | ||
} else { | ||
printf("ERROR: input and validation buffers don't match!\n"); | ||
for (size_t i = 0; i < srcSize; i++) { | ||
if (src[i] != val[i]) { | ||
printf("First bad index: %zu\n", i); | ||
break; | ||
} | ||
} | ||
return 1; | ||
} | ||
|
||
ZSTD_freeCCtx(zc); | ||
free(src); | ||
free(dst); | ||
free(val); | ||
return 0; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,80 @@ | ||
/* | ||
* Copyright (c) Yann Collet, Meta Platforms, Inc. | ||
* All rights reserved. | ||
* | ||
* This source code is licensed under both the BSD-style license (found in the | ||
* LICENSE file in the root directory of this source tree) and the GPLv2 (found | ||
* in the COPYING file in the root directory of this source tree). | ||
* You may select, at your option, one of the above-listed licenses. | ||
*/ | ||
|
||
#include "zstd_compress_internal.h" | ||
#include "matchfinder.h" | ||
|
||
#define HSIZE 1024 | ||
static U32 const HLOG = 10; | ||
static U32 const MLS = 4; | ||
static U32 const BADIDX = 0xffffffff; | ||
|
||
size_t simpleExternalMatchFinder( | ||
void* externalMatchState, | ||
ZSTD_Sequence* outSeqs, size_t outSeqsCapacity, | ||
const void* src, size_t srcSize, | ||
const void* dict, size_t dictSize, | ||
int compressionLevel, | ||
size_t windowSize | ||
) { | ||
const BYTE* const istart = (const BYTE*)src; | ||
const BYTE* const iend = istart + srcSize; | ||
const BYTE* ip = istart; | ||
const BYTE* anchor = istart; | ||
size_t seqCount = 0; | ||
U32 hashTable[HSIZE]; | ||
|
||
(void)externalMatchState; | ||
(void)dict; | ||
(void)dictSize; | ||
(void)outSeqsCapacity; | ||
(void)compressionLevel; | ||
|
||
{ int i; | ||
for (i=0; i < HSIZE; i++) { | ||
hashTable[i] = BADIDX; | ||
} } | ||
|
||
while (ip + MLS < iend) { | ||
size_t const hash = ZSTD_hashPtr(ip, HLOG, MLS); | ||
U32 const matchIndex = hashTable[hash]; | ||
hashTable[hash] = (U32)(ip - istart); | ||
|
||
if (matchIndex != BADIDX) { | ||
const BYTE* const match = istart + matchIndex; | ||
U32 const matchLen = (U32)ZSTD_count(ip, match, iend); | ||
if (matchLen >= ZSTD_MINMATCH_MIN) { | ||
U32 const litLen = (U32)(ip - anchor); | ||
U32 const offset = (U32)(ip - match); | ||
ZSTD_Sequence const seq = { | ||
offset, litLen, matchLen, 0 | ||
}; | ||
|
||
/* Note: it's crucial to stay within the window size! */ | ||
if (offset <= windowSize) { | ||
outSeqs[seqCount++] = seq; | ||
ip += matchLen; | ||
anchor = ip; | ||
continue; | ||
} | ||
} | ||
} | ||
|
||
ip++; | ||
} | ||
|
||
{ ZSTD_Sequence const finalSeq = { | ||
0, (U32)(iend - anchor), 0, 0 | ||
}; | ||
outSeqs[seqCount++] = finalSeq; | ||
} | ||
|
||
return seqCount; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
/* | ||
* Copyright (c) Yann Collet, Meta Platforms, Inc. | ||
* All rights reserved. | ||
* | ||
* This source code is licensed under both the BSD-style license (found in the | ||
* LICENSE file in the root directory of this source tree) and the GPLv2 (found | ||
* in the COPYING file in the root directory of this source tree). | ||
* You may select, at your option, one of the above-listed licenses. | ||
*/ | ||
|
||
#ifndef MATCHFINDER_H | ||
#define MATCHFINDER_H | ||
|
||
#define ZSTD_STATIC_LINKING_ONLY | ||
#include "zstd.h" | ||
|
||
size_t simpleExternalMatchFinder( | ||
void* externalMatchState, | ||
ZSTD_Sequence* outSeqs, size_t outSeqsCapacity, | ||
const void* src, size_t srcSize, | ||
const void* dict, size_t dictSize, | ||
int compressionLevel, | ||
size_t windowSize | ||
); | ||
|
||
#endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.