Skip to content

Commit

Permalink
x/crypto/internal/poly1305: improve sum_ppc64le.s
Browse files Browse the repository at this point in the history
This contains a few minor improvements to sum_ppc64le.s
which result in up to 10% performance improvement for
some of the benchmarks in this directory.

- ADDZE followed by ADD can be combined into ADDE
- PCALIGN added to the loop
- Eliminate a few unnecessary register moves

goos: linux
goarch: ppc64le
pkg: golang.org/x/crypto/internal/poly1305
cpu: POWER10
                 │ poly.orig.out │              poly.out              │
                 │    sec/op     │   sec/op     vs base               │
64                   40.34n ± 0%   38.13n ± 0%   -5.47% (p=0.002 n=6)
1K                   482.2n ± 0%   444.6n ± 0%   -7.81% (p=0.002 n=6)
2M                   978.4µ ± 0%   879.3µ ± 0%  -10.12% (p=0.002 n=6)
64Unaligned          40.35n ± 0%   38.16n ± 0%   -5.42% (p=0.002 n=6)
1KUnaligned          482.0n ± 0%   444.2n ± 0%   -7.84% (p=0.002 n=6)
2MUnaligned          978.4µ ± 0%   879.4µ ± 0%  -10.12% (p=0.002 n=6)
Write64              32.69n ± 0%   30.71n ± 0%   -6.04% (p=0.002 n=6)
Write1K              472.4n ± 0%   436.5n ± 0%   -7.60% (p=0.002 n=6)
Write2M              978.3µ ± 0%   879.4µ ± 0%  -10.11% (p=0.002 n=6)
Write64Unaligned     32.67n ± 0%   30.71n ± 0%   -6.00% (p=0.002 n=6)
Write1KUnaligned     472.6n ± 0%   436.4n ± 0%   -7.66% (p=0.002 n=6)
Write2MUnaligned     978.5µ ± 0%   879.6µ ± 0%  -10.10% (p=0.002 n=6)
geomean              2.569µ        2.367µ        -7.87%

Change-Id: I63314e7252ef10fb2d157f623c4bc2e31a63ae32
Reviewed-on: https://go-review.googlesource.com/c/crypto/+/558775
Reviewed-by: David Chase <[email protected]>
Reviewed-by: Michael Knyszek <[email protected]>
LUCI-TryBot-Result: Go LUCI <[email protected]>
Reviewed-by: Paul Murphy <[email protected]>
Run-TryBot: Lynn Boger <[email protected]>
TryBot-Result: Gopher Robot <[email protected]>
Reviewed-by: Than McIntosh <[email protected]>
  • Loading branch information
laboger committed Feb 15, 2024
1 parent 1c981e6 commit 1a86580
Showing 1 changed file with 6 additions and 8 deletions.
14 changes: 6 additions & 8 deletions internal/poly1305/sum_ppc64le.s
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,14 @@

#define POLY1305_MUL(h0, h1, h2, r0, r1, t0, t1, t2, t3, t4, t5) \
MULLD r0, h0, t0; \
MULLD r0, h1, t4; \
MULHDU r0, h0, t1; \
MULLD r0, h1, t4; \
MULHDU r0, h1, t5; \
ADDC t4, t1, t1; \
MULLD r0, h2, t2; \
ADDZE t5; \
MULHDU r1, h0, t4; \
MULLD r1, h0, h0; \
ADD t5, t2, t2; \
ADDE t5, t2, t2; \
ADDC h0, t1, t1; \
MULLD h2, r1, t3; \
ADDZE t4, h0; \
Expand All @@ -37,13 +36,11 @@
ADDE t5, t3, t3; \
ADDC h0, t2, t2; \
MOVD $-4, t4; \
MOVD t0, h0; \
MOVD t1, h1; \
ADDZE t3; \
ANDCC $3, t2, h2; \
AND t2, t4, t0; \
RLDICL $0, t2, $62, h2; \
AND t2, t4, h0; \
ADDC t0, h0, h0; \
ADDE t3, h1, h1; \
ADDE t3, t1, h1; \
SLD $62, t3, t4; \
SRD $2, t2; \
ADDZE h2; \
Expand Down Expand Up @@ -75,6 +72,7 @@ TEXT ·update(SB), $0-32
loop:
POLY1305_ADD(R4, R8, R9, R10, R20, R21, R22)

PCALIGN $16
multiply:
POLY1305_MUL(R8, R9, R10, R11, R12, R16, R17, R18, R14, R20, R21)
ADD $-16, R5
Expand Down

0 comments on commit 1a86580

Please sign in to comment.