From e5c6ce29387d0f408fbf3f7563a5ad634456b982 Mon Sep 17 00:00:00 2001 From: greatroar <61184462+greatroar@users.noreply.github.com> Date: Wed, 30 Nov 2022 20:39:57 +0100 Subject: [PATCH] huff0: Check for zeros earlier in Scratch.countSimple (#704) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit v == 0 implies v <= m, so we can skip a CMOV in that case. Benchmarks show a tiny gain on most inputs and a large gain on the few with very small alphabets: ``` name old speed new speed delta Compress1XReuseNone/digits-8 432MB/s ± 1% 434MB/s ± 1% ~ (p=0.023 n=14+35) Compress1XReuseNone/gettysburg-8 241MB/s ± 1% 242MB/s ± 1% +0.58% (p=0.000 n=14+34) Compress1XReuseNone/twain-8 352MB/s ± 1% 354MB/s ± 1% +0.62% (p=0.000 n=14+29) Compress1XReuseNone/low-ent.10k-8 460MB/s ± 2% 464MB/s ± 1% +0.92% (p=0.000 n=14+30) Compress1XReuseNone/superlow-ent-10k-8 304MB/s ± 1% 305MB/s ± 1% ~ (p=0.040 n=15+33) Compress1XReuseNone/crash2-8 10.6MB/s ± 1% 10.9MB/s ± 1% +3.28% (p=0.000 n=15+33) Compress1XReuseNone/endzerobits-8 13.1MB/s ± 4% 15.6MB/s ± 4% +19.15% (p=0.000 n=15+34) Compress1XReuseNone/endnonzero-8 7.26MB/s ± 2% 7.36MB/s ± 1% +1.44% (p=0.000 n=15+32) Compress1XReuseNone/case1-8 13.3MB/s ± 1% 13.4MB/s ± 1% +0.71% (p=0.000 n=15+35) Compress1XReuseNone/case2-8 11.1MB/s ± 1% 11.2MB/s ± 1% +0.57% (p=0.005 n=14+34) Compress1XReuseNone/case3-8 11.6MB/s ± 1% 11.8MB/s ± 2% +1.22% (p=0.000 n=15+34) Compress1XReuseNone/pngdata.001-8 298MB/s ± 1% 300MB/s ± 1% ~ (p=0.019 n=15+35) Compress1XReuseNone/normcount2-8 32.3MB/s ± 1% 32.7MB/s ± 1% +1.14% (p=0.000 n=15+34) Compress1XReuseAllow/digits-8 433MB/s ± 1% 436MB/s ± 1% +0.68% (p=0.000 n=15+33) Compress1XReuseAllow/gettysburg-8 269MB/s ± 1% 272MB/s ± 1% +1.26% (p=0.000 n=15+32) Compress1XReuseAllow/twain-8 361MB/s ± 1% 363MB/s ± 1% +0.47% (p=0.002 n=15+34) Compress1XReuseAllow/low-ent.10k-8 465MB/s ± 1% 466MB/s ± 1% ~ (p=0.155 n=15+34) Compress1XReuseAllow/superlow-ent-10k-8 303MB/s ± 1% 304MB/s ± 1% ~ (p=0.074 n=13+35) Compress1XReuseAllow/crash2-8 14.0MB/s ± 1% 14.7MB/s ± 1% +5.34% (p=0.000 n=15+35) Compress1XReuseAllow/endzerobits-8 13.9MB/s ± 4% 16.9MB/s ± 1% +21.23% (p=0.000 n=14+32) Compress1XReuseAllow/endnonzero-8 9.92MB/s ± 3% 10.97MB/s ± 2% +10.61% (p=0.000 n=15+31) Compress1XReuseAllow/case1-8 15.8MB/s ± 1% 16.0MB/s ± 2% +1.42% (p=0.000 n=15+33) Compress1XReuseAllow/case2-8 13.3MB/s ± 1% 13.5MB/s ± 1% +1.52% (p=0.000 n=14+32) Compress1XReuseAllow/case3-8 14.1MB/s ± 1% 14.2MB/s ± 2% +1.21% (p=0.001 n=15+34) Compress1XReuseAllow/pngdata.001-8 301MB/s ± 1% 301MB/s ± 1% ~ (p=0.948 n=15+34) Compress1XReuseAllow/normcount2-8 40.6MB/s ± 1% 41.4MB/s ± 1% +2.03% (p=0.000 n=15+31) Compress1XReusePrefer/digits-8 435MB/s ± 1% 437MB/s ± 2% ~ (p=0.046 n=15+34) Compress1XReusePrefer/gettysburg-8 420MB/s ± 2% 425MB/s ± 2% +1.25% (p=0.000 n=15+34) Compress1XReusePrefer/twain-8 363MB/s ± 1% 364MB/s ± 1% +0.43% (p=0.003 n=15+34) Compress1XReusePrefer/low-ent.10k-8 468MB/s ± 1% 469MB/s ± 1% ~ (p=0.029 n=15+34) Compress1XReusePrefer/superlow-ent-10k-8 308MB/s ± 1% 310MB/s ± 1% +0.66% (p=0.000 n=15+34) Compress1XReusePrefer/crash2-8 50.7MB/s ± 2% 61.5MB/s ± 2% +21.25% (p=0.000 n=14+33) Compress1XReusePrefer/endzerobits-8 19.0MB/s ± 1% 24.4MB/s ± 2% +28.68% (p=0.000 n=13+30) Compress1XReusePrefer/endnonzero-8 25.7MB/s ± 1% 33.1MB/s ± 1% +28.60% (p=0.000 n=14+32) Compress1XReusePrefer/case1-8 129MB/s ± 6% 149MB/s ± 6% +15.55% (p=0.000 n=15+35) Compress1XReusePrefer/case2-8 124MB/s ± 1% 143MB/s ± 1% +14.84% (p=0.000 n=14+30) Compress1XReusePrefer/case3-8 132MB/s ± 1% 154MB/s ± 1% +16.91% (p=0.000 n=15+34) Compress1XReusePrefer/pngdata.001-8 312MB/s ± 1% 312MB/s ± 1% ~ (p=0.936 n=14+28) Compress1XReusePrefer/normcount2-8 192MB/s ± 1% 207MB/s ± 1% +7.98% (p=0.000 n=14+34) Compress1XSizes/digits-100-8 59.2MB/s ± 1% 60.7MB/s ± 1% +2.56% (p=0.000 n=15+34) Compress1XSizes/digits-200-8 104MB/s ± 1% 107MB/s ± 1% +2.69% (p=0.000 n=15+32) Compress1XSizes/digits-500-8 195MB/s ± 1% 199MB/s ± 1% +2.04% (p=0.000 n=14+35) Compress1XSizes/digits-1000-8 278MB/s ± 1% 283MB/s ± 1% +1.55% (p=0.000 n=14+34) Compress1XSizes/digits-5000-8 416MB/s ± 1% 420MB/s ± 1% +0.82% (p=0.000 n=15+31) Compress1XSizes/digits-10000-8 440MB/s ± 1% 442MB/s ± 1% +0.50% (p=0.004 n=15+34) Compress1XSizes/digits-50000-8 434MB/s ± 1% 436MB/s ± 1% +0.44% (p=0.008 n=15+34) ``` --- huff0/compress.go | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/huff0/compress.go b/huff0/compress.go index 4d14542fac..d9223a91ef 100644 --- a/huff0/compress.go +++ b/huff0/compress.go @@ -365,29 +365,29 @@ func (s *Scratch) countSimple(in []byte) (max int, reuse bool) { m := uint32(0) if len(s.prevTable) > 0 { for i, v := range s.count[:] { + if v == 0 { + continue + } if v > m { m = v } - if v > 0 { - s.symbolLen = uint16(i) + 1 - if i >= len(s.prevTable) { - reuse = false - } else { - if s.prevTable[i].nBits == 0 { - reuse = false - } - } + s.symbolLen = uint16(i) + 1 + if i >= len(s.prevTable) { + reuse = false + } else if s.prevTable[i].nBits == 0 { + reuse = false } } return int(m), reuse } for i, v := range s.count[:] { + if v == 0 { + continue + } if v > m { m = v } - if v > 0 { - s.symbolLen = uint16(i) + 1 - } + s.symbolLen = uint16(i) + 1 } return int(m), false }