-
Notifications
You must be signed in to change notification settings - Fork 2.2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Improve optimal parser performance on small data #2771
Changes from all commits
eab6922
f0fc8cb
27a8bbe
23a9368
08ceda3
b096a5c
42a3ed7
ef78611
7fce9a4
4f0b1b9
b7f46eb
5449ede
640c5b1
f58e63b
b6b2855
fd94b9d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -14,7 +14,6 @@ | |
|
||
|
||
#define ZSTD_LITFREQ_ADD 2 /* scaling factor for litFreq, so that frequencies adapt faster to new stats */ | ||
#define ZSTD_FREQ_DIV 4 /* log factor when using previous stats to init next stats */ | ||
#define ZSTD_MAX_PRICE (1<<30) | ||
|
||
#define ZSTD_PREDEF_THRESHOLD 1024 /* if srcSize < ZSTD_PREDEF_THRESHOLD, symbols' cost is assumed static, directly determined by pre-defined distributions */ | ||
|
@@ -24,11 +23,11 @@ | |
* Price functions for optimal parser | ||
***************************************/ | ||
|
||
#if 0 /* approximation at bit level */ | ||
#if 0 /* approximation at bit level (for tests) */ | ||
# define BITCOST_ACCURACY 0 | ||
# define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY) | ||
# define WEIGHT(stat) ((void)opt, ZSTD_bitWeight(stat)) | ||
#elif 0 /* fractional bit accuracy */ | ||
# define WEIGHT(stat, opt) ((void)opt, ZSTD_bitWeight(stat)) | ||
#elif 0 /* fractional bit accuracy (for tests) */ | ||
# define BITCOST_ACCURACY 8 | ||
# define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY) | ||
# define WEIGHT(stat,opt) ((void)opt, ZSTD_fracWeight(stat)) | ||
|
@@ -79,25 +78,46 @@ static void ZSTD_setBasePrices(optState_t* optPtr, int optLevel) | |
} | ||
|
||
|
||
/* ZSTD_downscaleStat() : | ||
* reduce all elements in table by a factor 2^(ZSTD_FREQ_DIV+malus) | ||
* return the resulting sum of elements */ | ||
static U32 ZSTD_downscaleStat(unsigned* table, U32 lastEltIndex, int malus) | ||
static U32 sum_u32(const unsigned table[], size_t nbElts) | ||
{ | ||
size_t n; | ||
U32 total = 0; | ||
for (n=0; n<nbElts; n++) { | ||
total += table[n]; | ||
} | ||
return total; | ||
} | ||
|
||
static U32 ZSTD_downscaleStats(unsigned* table, U32 lastEltIndex, U32 shift) | ||
{ | ||
U32 s, sum=0; | ||
DEBUGLOG(5, "ZSTD_downscaleStat (nbElts=%u)", (unsigned)lastEltIndex+1); | ||
assert(ZSTD_FREQ_DIV+malus > 0 && ZSTD_FREQ_DIV+malus < 31); | ||
DEBUGLOG(5, "ZSTD_downscaleStats (nbElts=%u, shift=%u)", (unsigned)lastEltIndex+1, (unsigned)shift); | ||
assert(shift < 30); | ||
for (s=0; s<lastEltIndex+1; s++) { | ||
table[s] = 1 + (table[s] >> (ZSTD_FREQ_DIV+malus)); | ||
table[s] = 1 + (table[s] >> shift); | ||
sum += table[s]; | ||
} | ||
return sum; | ||
} | ||
|
||
/* ZSTD_scaleStats() : | ||
* reduce all elements in table is sum too large | ||
* return the resulting sum of elements */ | ||
static U32 ZSTD_scaleStats(unsigned* table, U32 lastEltIndex, U32 logTarget) | ||
{ | ||
U32 const prevsum = sum_u32(table, lastEltIndex+1); | ||
U32 const factor = prevsum >> logTarget; | ||
DEBUGLOG(5, "ZSTD_scaleStats (nbElts=%u, target=%u)", (unsigned)lastEltIndex+1, (unsigned)logTarget); | ||
assert(logTarget < 30); | ||
if (factor <= 1) return prevsum; | ||
return ZSTD_downscaleStats(table, lastEltIndex, ZSTD_highbit32(factor)); | ||
} | ||
|
||
/* ZSTD_rescaleFreqs() : | ||
* if first block (detected by optPtr->litLengthSum == 0) : init statistics | ||
* take hints from dictionary if there is one | ||
* or init from zero, using src for literals stats, or flat 1 for match symbols | ||
* and init from zero if there is none, | ||
* using src for literals stats, and baseline stats for sequence symbols | ||
* otherwise downscale existing stats, to be used as seed for next block. | ||
*/ | ||
static void | ||
|
@@ -174,36 +194,44 @@ ZSTD_rescaleFreqs(optState_t* const optPtr, | |
if (compressedLiterals) { | ||
unsigned lit = MaxLit; | ||
HIST_count_simple(optPtr->litFreq, &lit, src, srcSize); /* use raw first block to init statistics */ | ||
optPtr->litSum = ZSTD_downscaleStat(optPtr->litFreq, MaxLit, 1); | ||
optPtr->litSum = ZSTD_downscaleStats(optPtr->litFreq, MaxLit, 8); | ||
} | ||
|
||
{ unsigned ll; | ||
for (ll=0; ll<=MaxLL; ll++) | ||
optPtr->litLengthFreq[ll] = 1; | ||
{ unsigned const baseLLfreqs[MaxLL+1] = { | ||
4, 2, 1, 1, 1, 1, 1, 1, | ||
1, 1, 1, 1, 1, 1, 1, 1, | ||
1, 1, 1, 1, 1, 1, 1, 1, | ||
1, 1, 1, 1, 1, 1, 1, 1, | ||
1, 1, 1, 1 | ||
}; | ||
ZSTD_memcpy(optPtr->litLengthFreq, baseLLfreqs, sizeof(baseLLfreqs)); optPtr->litLengthSum = sum_u32(baseLLfreqs, MaxLL+1); | ||
} | ||
optPtr->litLengthSum = MaxLL+1; | ||
|
||
{ unsigned ml; | ||
for (ml=0; ml<=MaxML; ml++) | ||
optPtr->matchLengthFreq[ml] = 1; | ||
} | ||
optPtr->matchLengthSum = MaxML+1; | ||
|
||
{ unsigned of; | ||
for (of=0; of<=MaxOff; of++) | ||
optPtr->offCodeFreq[of] = 1; | ||
{ unsigned const baseOFCfreqs[MaxOff+1] = { | ||
6, 2, 1, 1, 2, 3, 4, 4, | ||
4, 3, 2, 1, 1, 1, 1, 1, | ||
1, 1, 1, 1, 1, 1, 1, 1, | ||
1, 1, 1, 1, 1, 1, 1, 1 | ||
}; | ||
ZSTD_memcpy(optPtr->offCodeFreq, baseOFCfreqs, sizeof(baseOFCfreqs)); optPtr->offCodeSum = sum_u32(baseOFCfreqs, MaxOff+1); | ||
} | ||
optPtr->offCodeSum = MaxOff+1; | ||
|
||
|
||
} | ||
|
||
} else { /* new block : re-use previous statistics, scaled down */ | ||
|
||
if (compressedLiterals) | ||
optPtr->litSum = ZSTD_downscaleStat(optPtr->litFreq, MaxLit, 1); | ||
optPtr->litLengthSum = ZSTD_downscaleStat(optPtr->litLengthFreq, MaxLL, 0); | ||
optPtr->matchLengthSum = ZSTD_downscaleStat(optPtr->matchLengthFreq, MaxML, 0); | ||
optPtr->offCodeSum = ZSTD_downscaleStat(optPtr->offCodeFreq, MaxOff, 0); | ||
optPtr->litSum = ZSTD_scaleStats(optPtr->litFreq, MaxLit, 12); | ||
optPtr->litLengthSum = ZSTD_scaleStats(optPtr->litLengthFreq, MaxLL, 11); | ||
optPtr->matchLengthSum = ZSTD_scaleStats(optPtr->matchLengthFreq, MaxML, 11); | ||
optPtr->offCodeSum = ZSTD_scaleStats(optPtr->offCodeFreq, MaxOff, 11); | ||
Comment on lines
+231
to
+234
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should the I would expect smaller blocks to want to have a smaller history. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The update policy could be refined even further. But note that the topic of "block size" has many sub-cases, that probably deserve separate policies. For example, is the "small block" also the "only block" ? But if the "small block" is the N-th block in a long stream, and is also expected to be followed by other blocks of variable size, the situation is different. I would guess that a "stream level" This can certainly be analyzed even more. As can be guessed though, this is a fairly complex topic, which would require a dedicated (and time-consuming) study. |
||
} | ||
|
||
ZSTD_setBasePrices(optPtr, optLevel); | ||
|
@@ -901,11 +929,11 @@ static void ZSTD_optLdm_processMatchCandidate(ZSTD_optLdm_t* optLdm, ZSTD_match_ | |
ZSTD_optLdm_maybeAddMatch(matches, nbMatches, optLdm, currPosInBlock); | ||
} | ||
|
||
|
||
/*-******************************* | ||
* Optimal parser | ||
*********************************/ | ||
|
||
|
||
static U32 ZSTD_totalLen(ZSTD_optimal_t sol) | ||
{ | ||
return sol.litlen + sol.mlen; | ||
|
@@ -987,7 +1015,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, | |
* in every price. We include the literal length to avoid negative | ||
* prices when we subtract the previous literal length. | ||
*/ | ||
opt[0].price = ZSTD_litLengthPrice(litlen, optStatePtr, optLevel); | ||
opt[0].price = (int)ZSTD_litLengthPrice(litlen, optStatePtr, optLevel); | ||
|
||
/* large match -> immediate encoding */ | ||
{ U32 const maxML = matches[nbMatches-1].len; | ||
|
@@ -1007,7 +1035,8 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, | |
} } | ||
|
||
/* set prices for first matches starting position == 0 */ | ||
{ U32 const literalsPrice = opt[0].price + ZSTD_litLengthPrice(0, optStatePtr, optLevel); | ||
assert(opt[0].price >= 0); | ||
{ U32 const literalsPrice = (U32)opt[0].price + ZSTD_litLengthPrice(0, optStatePtr, optLevel); | ||
U32 pos; | ||
U32 matchNb; | ||
for (pos = 1; pos < minMatch; pos++) { | ||
|
@@ -1024,7 +1053,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, | |
opt[pos].mlen = pos; | ||
opt[pos].off = offset; | ||
opt[pos].litlen = litlen; | ||
opt[pos].price = sequencePrice; | ||
opt[pos].price = (int)sequencePrice; | ||
} } | ||
last_pos = pos-1; | ||
} | ||
|
@@ -1039,9 +1068,9 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, | |
/* Fix current position with one literal if cheaper */ | ||
{ U32 const litlen = (opt[cur-1].mlen == 0) ? opt[cur-1].litlen + 1 : 1; | ||
int const price = opt[cur-1].price | ||
+ ZSTD_rawLiteralsCost(ip+cur-1, 1, optStatePtr, optLevel) | ||
+ ZSTD_litLengthPrice(litlen, optStatePtr, optLevel) | ||
- ZSTD_litLengthPrice(litlen-1, optStatePtr, optLevel); | ||
+ (int)ZSTD_rawLiteralsCost(ip+cur-1, 1, optStatePtr, optLevel) | ||
+ (int)ZSTD_litLengthPrice(litlen, optStatePtr, optLevel) | ||
- (int)ZSTD_litLengthPrice(litlen-1, optStatePtr, optLevel); | ||
assert(price < 1000000000); /* overflow check */ | ||
if (price <= opt[cur].price) { | ||
DEBUGLOG(7, "cPos:%zi==rPos:%u : better price (%.2f<=%.2f) using literal (ll==%u) (hist:%u,%u,%u)", | ||
|
@@ -1084,9 +1113,10 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, | |
continue; /* skip unpromising positions; about ~+6% speed, -0.01 ratio */ | ||
} | ||
|
||
assert(opt[cur].price >= 0); | ||
{ U32 const ll0 = (opt[cur].mlen != 0); | ||
U32 const litlen = (opt[cur].mlen == 0) ? opt[cur].litlen : 0; | ||
U32 const previousPrice = opt[cur].price; | ||
U32 const previousPrice = (U32)opt[cur].price; | ||
U32 const basePrice = previousPrice + ZSTD_litLengthPrice(0, optStatePtr, optLevel); | ||
U32 nbMatches = ZSTD_BtGetAllMatches(matches, ms, &nextToUpdate3, inr, iend, dictMode, opt[cur].rep, ll0, minMatch); | ||
U32 matchNb; | ||
|
@@ -1126,7 +1156,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, | |
|
||
for (mlen = lastML; mlen >= startML; mlen--) { /* scan downward */ | ||
U32 const pos = cur + mlen; | ||
int const price = basePrice + ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel); | ||
int const price = (int)basePrice + (int)ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel); | ||
|
||
if ((pos > last_pos) || (price < opt[pos].price)) { | ||
DEBUGLOG(7, "rPos:%u (ml=%2u) => new better price (%.2f<%.2f)", | ||
|
@@ -1222,28 +1252,7 @@ size_t ZSTD_compressBlock_btopt( | |
} | ||
|
||
|
||
/* used in 2-pass strategy */ | ||
static U32 ZSTD_upscaleStat(unsigned* table, U32 lastEltIndex, int bonus) | ||
{ | ||
U32 s, sum=0; | ||
assert(ZSTD_FREQ_DIV+bonus >= 0); | ||
for (s=0; s<lastEltIndex+1; s++) { | ||
table[s] <<= ZSTD_FREQ_DIV+bonus; | ||
table[s]--; | ||
sum += table[s]; | ||
} | ||
return sum; | ||
} | ||
|
||
/* used in 2-pass strategy */ | ||
MEM_STATIC void ZSTD_upscaleStats(optState_t* optPtr) | ||
{ | ||
if (ZSTD_compressedLiterals(optPtr)) | ||
optPtr->litSum = ZSTD_upscaleStat(optPtr->litFreq, MaxLit, 0); | ||
optPtr->litLengthSum = ZSTD_upscaleStat(optPtr->litLengthFreq, MaxLL, 0); | ||
optPtr->matchLengthSum = ZSTD_upscaleStat(optPtr->matchLengthFreq, MaxML, 0); | ||
optPtr->offCodeSum = ZSTD_upscaleStat(optPtr->offCodeFreq, MaxOff, 0); | ||
} | ||
|
||
/* ZSTD_initStats_ultra(): | ||
* make a first compression pass, just to seed stats with more accurate starting values. | ||
|
@@ -1274,8 +1283,6 @@ ZSTD_initStats_ultra(ZSTD_matchState_t* ms, | |
ms->window.lowLimit = ms->window.dictLimit; | ||
ms->nextToUpdate = ms->window.dictLimit; | ||
|
||
/* re-inforce weight of collected statistics */ | ||
ZSTD_upscaleStats(&ms->opt); | ||
} | ||
|
||
size_t ZSTD_compressBlock_btultra( | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is the change that makes the majority of the difference