From b3140cec1de9c14baa176b9cef794b1c9bb38dbc Mon Sep 17 00:00:00 2001 From: Klaus Post Date: Mon, 19 Dec 2022 14:20:48 +0100 Subject: [PATCH] flate: Improve speed in big stateless blocks. (#718) * flate: Improve speed in big stateless blocks. Don't re-alloc and copy dict for every block when compressing more than 32KB. ``` benchmark old ns/op new ns/op delta BenchmarkEncodeDigitsSL1e4-32 52954 52850 -0.20% BenchmarkEncodeDigitsSL1e5-32 781061 745420 -4.56% BenchmarkEncodeDigitsSL1e6-32 8143640 7715674 -5.26% BenchmarkEncodeTwainSL1e4-32 68150 68415 +0.39% BenchmarkEncodeTwainSL1e5-32 715140 687326 -3.89% BenchmarkEncodeTwainSL1e6-32 7718175 7339694 -4.90% benchmark old MB/s new MB/s speedup BenchmarkEncodeDigitsSL1e4-32 188.84 189.21 1.00x BenchmarkEncodeDigitsSL1e5-32 128.03 134.15 1.05x BenchmarkEncodeDigitsSL1e6-32 122.80 129.61 1.06x BenchmarkEncodeTwainSL1e4-32 146.74 146.17 1.00x BenchmarkEncodeTwainSL1e5-32 139.83 145.49 1.04x BenchmarkEncodeTwainSL1e6-32 129.56 136.25 1.05x benchmark old allocs new allocs delta BenchmarkEncodeDigitsSL1e4-32 0 0 +0.00% BenchmarkEncodeDigitsSL1e5-32 3 0 -100.00% BenchmarkEncodeDigitsSL1e6-32 41 0 -100.00% BenchmarkEncodeTwainSL1e4-32 0 0 +0.00% BenchmarkEncodeTwainSL1e5-32 3 0 -100.00% BenchmarkEncodeTwainSL1e6-32 41 0 -100.00% benchmark old bytes new bytes delta BenchmarkEncodeDigitsSL1e4-32 0 0 +0.00% BenchmarkEncodeDigitsSL1e5-32 92929 9 -99.99% BenchmarkEncodeDigitsSL1e6-32 1298964 97 -99.99% BenchmarkEncodeTwainSL1e4-32 0 0 +0.00% BenchmarkEncodeTwainSL1e5-32 92928 8 -99.99% BenchmarkEncodeTwainSL1e6-32 1298871 92 -99.99% ``` * Pin garble to v0.7.2 --- .github/workflows/go.yml | 2 +- .github/workflows/release.yml | 2 +- .goreleaser.yml | 2 +- flate/stateless.go | 19 ++++++++++++++++--- 4 files changed, 19 insertions(+), 6 deletions(-) diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml index 79177d6ac4..6e0e9154c2 100644 --- a/.github/workflows/go.yml +++ b/.github/workflows/go.yml @@ -85,7 +85,7 @@ jobs: run: go build github.com/klauspost/compress/s2/cmd/s2c && go build github.com/klauspost/compress/s2/cmd/s2d&&./s2c -verify s2c &&./s2d s2c.s2&&rm ./s2c&&rm s2d&&rm s2c.s2 - name: install garble - run: go install mvdan.cc/garble@v0.7.0 + run: go install mvdan.cc/garble@v0.7.2 - name: goreleaser deprecation run: curl -sfL https://git.io/goreleaser | VERSION=v1.9.2 sh -s -- check diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index fb61d35b74..49b0ace4e0 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -21,7 +21,7 @@ jobs: go-version: 1.19.x - name: install garble - run: go install mvdan.cc/garble@v0.7.1 + run: go install mvdan.cc/garble@v0.7.2 - name: Run GoReleaser uses: goreleaser/goreleaser-action@v2 diff --git a/.goreleaser.yml b/.goreleaser.yml index 0af08e65e6..a2bf06e94f 100644 --- a/.goreleaser.yml +++ b/.goreleaser.yml @@ -3,7 +3,7 @@ before: hooks: - ./gen.sh - - go install mvdan.cc/garble@latest + - go install mvdan.cc/garble@v0.7.2 builds: - diff --git a/flate/stateless.go b/flate/stateless.go index 93a1d15031..f3d4139ef3 100644 --- a/flate/stateless.go +++ b/flate/stateless.go @@ -86,11 +86,19 @@ func StatelessDeflate(out io.Writer, in []byte, eof bool, dict []byte) error { dict = dict[len(dict)-maxStatelessDict:] } + // For subsequent loops, keep shallow dict reference to avoid alloc+copy. + var inDict []byte + for len(in) > 0 { todo := in - if len(todo) > maxStatelessBlock-len(dict) { + if len(inDict) > 0 { + if len(todo) > maxStatelessBlock-maxStatelessDict { + todo = todo[:maxStatelessBlock-maxStatelessDict] + } + } else if len(todo) > maxStatelessBlock-len(dict) { todo = todo[:maxStatelessBlock-len(dict)] } + inOrg := in in = in[len(todo):] uncompressed := todo if len(dict) > 0 { @@ -102,7 +110,11 @@ func StatelessDeflate(out io.Writer, in []byte, eof bool, dict []byte) error { todo = combined } // Compress - statelessEnc(&dst, todo, int16(len(dict))) + if len(inDict) == 0 { + statelessEnc(&dst, todo, int16(len(dict))) + } else { + statelessEnc(&dst, inDict[:maxStatelessDict+len(todo)], maxStatelessDict) + } isEof := eof && len(in) == 0 if dst.n == 0 { @@ -119,7 +131,8 @@ func StatelessDeflate(out io.Writer, in []byte, eof bool, dict []byte) error { } if len(in) > 0 { // Retain a dict if we have more - dict = todo[len(todo)-maxStatelessDict:] + inDict = inOrg[len(uncompressed)-maxStatelessDict:] + dict = nil dst.Reset() } if bw.err != nil {