Skip to content

Commit

Permalink
deflate: Improve entropy compression (#338)
Browse files Browse the repository at this point in the history
Improve entropy coding and make various cleanups.

Before:
```
file	out	level	insize	outsize	millis	mb/s
dickens	flatekp	-2	1019244600	589553800	3460	280.86
dickens	flatekp	1	1019244600	460143946	7778	124.96
dickens	flatekp	2	1019244600	447624657	7993	121.60
dickens	flatekp	3	1019244600	442275633	10105	96.19
dickens	flatekp	4	1019244600	411669371	10310	94.28
dickens	flatekp	5	1019244600	406856462	11506	84.48
dickens	flatekp	6	1019244600	403864317	11894	81.72
dickens	flatekp	7	1019244600	391734230	27703	35.09
dickens	flatekp	8	1019244600	386774915	38643	25.15
dickens	flatekp	9	1019244600	385598868	48084	20.21
```

After:
```
file	out	level	insize	outsize	millis	mb/s
dickens	flatekp	-2	1019244600	582799774	3812	254.97
dickens	flatekp	1	1019244600	458664090	7490	129.76
dickens	flatekp	2	1019244600	445420813	7872	123.47
dickens	flatekp	3	1019244600	439874073	9659	100.63
dickens	flatekp	4	1019244600	407860161	9766	99.52
dickens	flatekp	5	1019244600	404161695	11432	85.02
dickens	flatekp	6	1019244600	400997375	11605	83.75
dickens	flatekp	7	1019244600	391734230	26570	36.58
dickens	flatekp	8	1019244600	386774915	37690	25.79
dickens	flatekp	9	1019244600	385598868	47283	20.56
```
  • Loading branch information
klauspost authored Mar 19, 2021
1 parent 5e8a147 commit 4fd183f
Show file tree
Hide file tree
Showing 6 changed files with 62 additions and 60 deletions.
6 changes: 3 additions & 3 deletions flate/deflate.go
Original file line number Diff line number Diff line change
Expand Up @@ -645,15 +645,15 @@ func (d *compressor) init(w io.Writer, level int) (err error) {
d.fill = (*compressor).fillBlock
d.step = (*compressor).store
case level == ConstantCompression:
d.w.logNewTablePenalty = 4
d.window = make([]byte, maxStoreBlockSize)
d.w.logNewTablePenalty = 8
d.window = make([]byte, 32<<10)
d.fill = (*compressor).fillBlock
d.step = (*compressor).storeHuff
case level == DefaultCompression:
level = 5
fallthrough
case level >= 1 && level <= 6:
d.w.logNewTablePenalty = 6
d.w.logNewTablePenalty = 8
d.fast = newFastEnc(level)
d.window = make([]byte, maxStoreBlockSize)
d.fill = (*compressor).fillBlock
Expand Down
24 changes: 7 additions & 17 deletions flate/fast_encoder.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
package flate

import (
"encoding/binary"
"fmt"
"math/bits"
)
Expand Down Expand Up @@ -65,26 +66,15 @@ func load32(b []byte, i int) uint32 {
}

func load64(b []byte, i int) uint64 {
// Help the compiler eliminate bounds checks on the read so it can be done in a single read.
b = b[i:]
b = b[:8]
return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 |
uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56
return binary.LittleEndian.Uint64(b[i:])
}

func load3232(b []byte, i int32) uint32 {
// Help the compiler eliminate bounds checks on the read so it can be done in a single read.
b = b[i:]
b = b[:4]
return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24
return binary.LittleEndian.Uint32(b[i:])
}

func load6432(b []byte, i int32) uint64 {
// Help the compiler eliminate bounds checks on the read so it can be done in a single read.
b = b[i:]
b = b[:8]
return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 |
uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56
return binary.LittleEndian.Uint64(b[i:])
}

func hash(u uint32) uint32 {
Expand Down Expand Up @@ -225,9 +215,9 @@ func (e *fastGen) Reset() {
func matchLen(a, b []byte) int {
b = b[:len(a)]
var checked int
if len(a) > 4 {
if len(a) >= 4 {
// Try 4 bytes first
if diff := load32(a, 0) ^ load32(b, 0); diff != 0 {
if diff := binary.LittleEndian.Uint32(a) ^ binary.LittleEndian.Uint32(b); diff != 0 {
return bits.TrailingZeros32(diff) >> 3
}
// Switch to 8 byte matching.
Expand All @@ -236,7 +226,7 @@ func matchLen(a, b []byte) int {
b = b[4:]
for len(a) >= 8 {
b = b[:len(a)]
if diff := load64(a, 0) ^ load64(b, 0); diff != 0 {
if diff := binary.LittleEndian.Uint64(a) ^ binary.LittleEndian.Uint64(b); diff != 0 {
return checked + (bits.TrailingZeros64(diff) >> 3)
}
checked += 8
Expand Down
39 changes: 19 additions & 20 deletions flate/huffman_bit_writer.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
package flate

import (
"encoding/binary"
"io"
)

Expand Down Expand Up @@ -206,7 +207,7 @@ func (w *huffmanBitWriter) write(b []byte) {
}

func (w *huffmanBitWriter) writeBits(b int32, nb uint16) {
w.bits |= uint64(b) << (w.nbits & reg16SizeMask64)
w.bits |= uint64(b) << w.nbits
w.nbits += nb
if w.nbits >= 48 {
w.writeOutBits()
Expand Down Expand Up @@ -420,13 +421,11 @@ func (w *huffmanBitWriter) writeOutBits() {
w.bits >>= 48
w.nbits -= 48
n := w.nbytes
w.bytes[n] = byte(bits)
w.bytes[n+1] = byte(bits >> 8)
w.bytes[n+2] = byte(bits >> 16)
w.bytes[n+3] = byte(bits >> 24)
w.bytes[n+4] = byte(bits >> 32)
w.bytes[n+5] = byte(bits >> 40)

// We over-write, but faster...
binary.LittleEndian.PutUint64(w.bytes[n:], bits)
n += 6

if n >= bufferFlushSize {
if w.err != nil {
n = 0
Expand All @@ -435,6 +434,7 @@ func (w *huffmanBitWriter) writeOutBits() {
w.write(w.bytes[:n])
n = 0
}

w.nbytes = n
}

Expand Down Expand Up @@ -759,7 +759,7 @@ func (w *huffmanBitWriter) writeTokens(tokens []token, leCodes, oeCodes []hcode)
} else {
// inlined
c := lengths[lengthCode&31]
w.bits |= uint64(c.code) << (w.nbits & reg16SizeMask64)
w.bits |= uint64(c.code) << w.nbits
w.nbits += c.len
if w.nbits >= 48 {
w.writeOutBits()
Expand All @@ -779,7 +779,7 @@ func (w *huffmanBitWriter) writeTokens(tokens []token, leCodes, oeCodes []hcode)
} else {
// inlined
c := offs[offsetCode&31]
w.bits |= uint64(c.code) << (w.nbits & reg16SizeMask64)
w.bits |= uint64(c.code) << w.nbits
w.nbits += c.len
if w.nbits >= 48 {
w.writeOutBits()
Expand Down Expand Up @@ -830,8 +830,8 @@ func (w *huffmanBitWriter) writeBlockHuff(eof bool, input []byte, sync bool) {
// Assume header is around 70 bytes:
// https://stackoverflow.com/a/25454430
const guessHeaderSizeBits = 70 * 8
estBits, estExtra := histogramSize(input, w.literalFreq[:], !eof && !sync)
estBits += w.lastHeader + 15
estBits := histogramSize(input, w.literalFreq[:], !eof && !sync)
estBits += w.lastHeader + len(input)/32
if w.lastHeader == 0 {
estBits += guessHeaderSizeBits
}
Expand All @@ -845,9 +845,9 @@ func (w *huffmanBitWriter) writeBlockHuff(eof bool, input []byte, sync bool) {
return
}

reuseSize := 0
if w.lastHeader > 0 {
reuseSize := w.literalEncoding.bitLength(w.literalFreq[:256])
estBits += estExtra
reuseSize = w.literalEncoding.bitLength(w.literalFreq[:256])

if estBits < reuseSize {
// We owe an EOB
Expand All @@ -859,6 +859,10 @@ func (w *huffmanBitWriter) writeBlockHuff(eof bool, input []byte, sync bool) {
const numLiterals = endBlockMarker + 1
const numOffsets = 1
if w.lastHeader == 0 {
if !eof && !sync {
// Generate a slightly suboptimal tree that can be used for all.
fillHist(w.literalFreq[:numLiterals])
}
w.literalFreq[endBlockMarker] = 1
w.literalEncoding.generate(w.literalFreq[:numLiterals], 15)

Expand All @@ -878,19 +882,14 @@ func (w *huffmanBitWriter) writeBlockHuff(eof bool, input []byte, sync bool) {
for _, t := range input {
// Bitwriting inlined, ~30% speedup
c := encoding[t]
w.bits |= uint64(c.code) << ((w.nbits) & reg16SizeMask64)
w.bits |= uint64(c.code) << w.nbits
w.nbits += c.len
if w.nbits >= 48 {
bits := w.bits
w.bits >>= 48
w.nbits -= 48
n := w.nbytes
w.bytes[n] = byte(bits)
w.bytes[n+1] = byte(bits >> 8)
w.bytes[n+2] = byte(bits >> 16)
w.bytes[n+3] = byte(bits >> 24)
w.bytes[n+4] = byte(bits >> 32)
w.bytes[n+5] = byte(bits >> 40)
binary.LittleEndian.PutUint64(w.bytes[n:], bits)
n += 6
if n >= bufferFlushSize {
if w.err != nil {
Expand Down
53 changes: 33 additions & 20 deletions flate/huffman_code.go
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,16 @@ func (h *huffmanEncoder) bitLength(freq []uint16) int {
return total
}

func (h *huffmanEncoder) bitLengthRaw(b []byte) int {
var total int
for _, f := range b {
if f != 0 {
total += int(h.codes[f].len)
}
}
return total
}

// Return the number of literals assigned to each bit size in the Huffman encoding
//
// This method is only called when list.length >= 3
Expand Down Expand Up @@ -327,37 +337,40 @@ func atLeastOne(v float32) float32 {
return v
}

// Unassigned values are assigned '1' in the histogram.
func fillHist(b []uint16) {
for i, v := range b {
if v == 0 {
b[i] = 1
}
}
}

// histogramSize accumulates a histogram of b in h.
// An estimated size in bits is returned.
// Unassigned values are assigned '1' in the histogram.
// len(h) must be >= 256, and h's elements must be all zeroes.
func histogramSize(b []byte, h []uint16, fill bool) (int, int) {
func histogramSize(b []byte, h []uint16, fill bool) (bits int) {
h = h[:256]
for _, t := range b {
h[t]++
}
invTotal := 1.0 / float32(len(b))
shannon := float32(0.0)
var extra float32
total := len(b)
if fill {
oneBits := atLeastOne(-mFastLog2(invTotal))
for i, v := range h[:] {
if v > 0 {
n := float32(v)
shannon += atLeastOne(-mFastLog2(n*invTotal)) * n
} else {
h[i] = 1
extra += oneBits
for _, v := range h {
if v == 0 {
total++
}
}
} else {
for _, v := range h[:] {
if v > 0 {
n := float32(v)
shannon += atLeastOne(-mFastLog2(n*invTotal)) * n
}
}

invTotal := 1.0 / float32(total)
shannon := float32(0.0)
for _, v := range h {
if v > 0 {
n := float32(v)
shannon += atLeastOne(-mFastLog2(n*invTotal)) * n
}
}

return int(shannon + 0.99), int(extra + 0.99)
return int(shannon + 0.99)
}
Binary file modified flate/testdata/huffman-rand-limit.golden
Binary file not shown.
Binary file modified flate/testdata/huffman-text-shift.golden
Binary file not shown.

0 comments on commit 4fd183f

Please sign in to comment.