diff --git a/README.md b/README.md index 98312930c7..9bb3def74e 100644 --- a/README.md +++ b/README.md @@ -272,6 +272,10 @@ The packages contains the same as the standard library, so you can use the godoc Currently there is only minor speedup on decompression (mostly CRC32 calculation). +Memory usage is typically 1MB for a Writer. stdlib is in the same range. +If you expect to have a lot of concurrently allocated Writers consider using +the stateless compress described below. + # Stateless compression This package offers stateless compression as a special option for gzip/deflate. diff --git a/flate/fast_encoder.go b/flate/fast_encoder.go index 678f081052..347ac2c902 100644 --- a/flate/fast_encoder.go +++ b/flate/fast_encoder.go @@ -45,7 +45,7 @@ const ( bTableBits = 17 // Bits used in the big tables bTableSize = 1 << bTableBits // Size of the table - allocHistory = maxStoreBlockSize * 10 // Size to preallocate for history. + allocHistory = maxStoreBlockSize * 5 // Size to preallocate for history. bufferReset = (1 << 31) - allocHistory - maxStoreBlockSize - 1 // Reset the buffer offset when reaching this. ) diff --git a/flate/huffman_code.go b/flate/huffman_code.go index 50fb2718f3..67b2b38728 100644 --- a/flate/huffman_code.go +++ b/flate/huffman_code.go @@ -21,9 +21,13 @@ type hcode struct { } type huffmanEncoder struct { - codes []hcode - freqcache []literalNode - bitCount [17]int32 + codes []hcode + bitCount [17]int32 + + // Allocate a reusable buffer with the longest possible frequency table. + // Possible lengths are codegenCodeCount, offsetCodeCount and literalCount. + // The largest of these is literalCount, so we allocate for that case. + freqcache [literalCount + 1]literalNode } type literalNode struct { @@ -306,12 +310,6 @@ func (h *huffmanEncoder) assignEncodingAndSize(bitCount []int32, list []literalN // freq An array of frequencies, in which frequency[i] gives the frequency of literal i. // maxBits The maximum number of bits to use for any literal. func (h *huffmanEncoder) generate(freq []uint16, maxBits int32) { - if h.freqcache == nil { - // Allocate a reusable buffer with the longest possible frequency table. - // Possible lengths are codegenCodeCount, offsetCodeCount and literalCount. - // The largest of these is literalCount, so we allocate for that case. - h.freqcache = make([]literalNode, literalCount+1) - } list := h.freqcache[:len(freq)+1] // Number of non-zero literals count := 0 diff --git a/flate/reader_test.go b/flate/reader_test.go index 55439646d7..f851c3227e 100644 --- a/flate/reader_test.go +++ b/flate/reader_test.go @@ -6,6 +6,8 @@ package flate import ( "bytes" + "compress/flate" + "fmt" "io" "io/ioutil" "runtime" @@ -13,6 +15,52 @@ import ( "testing" ) +func TestMemUsage(t *testing.T) { + testMem := func(t *testing.T, fn func()) { + var before, after runtime.MemStats + runtime.GC() + runtime.ReadMemStats(&before) + fn() + runtime.GC() + runtime.ReadMemStats(&after) + t.Logf("%s: Memory Used: %dKB, %d allocs", t.Name(), (after.HeapInuse-before.HeapInuse)/1024, after.HeapObjects-before.HeapObjects) + } + data := make([]byte, 100000) + t.Run(fmt.Sprint("stateless"), func(t *testing.T) { + testMem(t, func() { + StatelessDeflate(ioutil.Discard, data, false, nil) + }) + }) + for level := HuffmanOnly; level <= BestCompression; level++ { + t.Run(fmt.Sprint("level-", level), func(t *testing.T) { + var zr *Writer + var err error + testMem(t, func() { + zr, err = NewWriter(ioutil.Discard, level) + if err != nil { + t.Fatal(err) + } + zr.Write(data) + }) + zr.Close() + }) + } + for level := HuffmanOnly; level <= BestCompression; level++ { + t.Run(fmt.Sprint("stdlib-", level), func(t *testing.T) { + var zr *flate.Writer + var err error + testMem(t, func() { + zr, err = flate.NewWriter(ioutil.Discard, level) + if err != nil { + t.Fatal(err) + } + zr.Write(data) + }) + zr.Close() + }) + } +} + func TestNlitOutOfRange(t *testing.T) { // Trying to decode this bogus flate data, which has a Huffman table // with nlit=288, should not panic.