Skip to content

Commit

Permalink
crypto/aes: On ppc64le, use better instructions when available
Browse files Browse the repository at this point in the history
Several operations emulate instructions available on power9. Use
the GOPPC64_power9 macro provided by the compiler to select the
native instructions if the minimum cpu requirements are met.

Likewise rework the LXSDX_BE to simplify usage when overriding
it. It is only used in one place.

All three configurations are tested via CI.

On POWER9:

pkg:crypto/cipher goos:linux goarch:ppc64le
AESCBCEncrypt1K   949MB/s ± 0%   957MB/s ± 0%  +0.83%
AESCBCDecrypt1K  1.82GB/s ± 0%  1.99GB/s ± 0%  +8.93%
pkg:crypto/aes goos:linux goarch:ppc64le
Encrypt          1.01GB/s ± 0%  1.05GB/s ± 0%  +4.36%
Decrypt           987MB/s ± 0%  1024MB/s ± 0%  +3.77%

Change-Id: I56d0eb845647dd3c43bcad71eb281b499e1d1789
Reviewed-on: https://go-review.googlesource.com/c/go/+/449116
Reviewed-by: Lynn Boger <[email protected]>
Auto-Submit: Paul Murphy <[email protected]>
TryBot-Result: Gopher Robot <[email protected]>
Reviewed-by: Joedian Reid <[email protected]>
Reviewed-by: Bryan Mills <[email protected]>
Run-TryBot: Paul Murphy <[email protected]>
  • Loading branch information
pmur authored and felixge committed Nov 21, 2022
1 parent 0d85b7c commit d087ee0
Showing 1 changed file with 23 additions and 19 deletions.
42 changes: 23 additions & 19 deletions src/crypto/aes/asm_ppc64x.s
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@

// For P9 instruction emulation
#define ESPERM V21 // Endian swapping permute into BE
#define TMP2 V22 // Temporary for P8_STXVB16X/P8_STXV
#define TMP2 V22 // Temporary for P8_STXVB16X/P8_STXVB16X

// For {en,de}cryptBlockAsm
#define BLK_INP R3
Expand All @@ -69,8 +69,15 @@ DATA ·rcon+0x40(SB)/8, $0x0000000000000000
DATA ·rcon+0x48(SB)/8, $0x0000000000000000
GLOBL ·rcon(SB), RODATA, $80

// Emulate unaligned BE vector load/stores on LE targets
#ifdef GOARCH_ppc64le
# ifdef GOPPC64_power9
#define P8_LXVB16X(RA,RB,VT) LXVB16X (RA+RB), VT
#define P8_STXVB16X(VS,RA,RB) STXVB16X VS, (RA+RB)
#define XXBRD_ON_LE(VA,VT) XXBRD VA, VT
# else
// On POWER8/ppc64le, emulate the POWER9 instructions by loading unaligned
// doublewords and byte-swapping each doubleword to emulate BE load/stores.
#define NEEDS_ESPERM
#define P8_LXVB16X(RA,RB,VT) \
LXVD2X (RA+RB), VT \
VPERM VT, VT, ESPERM, VT
Expand All @@ -79,19 +86,15 @@ GLOBL ·rcon(SB), RODATA, $80
VPERM VS, VS, ESPERM, TMP2 \
STXVD2X TMP2, (RA+RB)

#define LXSDX_BE(RA,RB,VT) \
LXSDX (RA+RB), VT \
VPERM VT, VT, ESPERM, VT
#else
#define P8_LXVB16X(RA,RB,VT) \
LXVD2X (RA+RB), VT
#define XXBRD_ON_LE(VA,VT) \
VPERM VA, VA, ESPERM, VT

#define P8_STXVB16X(VS,RA,RB) \
STXVD2X VS, (RA+RB)

#define LXSDX_BE(RA,RB,VT) \
LXSDX (RA+RB), VT
#endif
# endif // defined(GOPPC64_power9)
#else
#define P8_LXVB16X(RA,RB,VT) LXVD2X (RA+RB), VT
#define P8_STXVB16X(VS,RA,RB) STXVD2X VS, (RA+RB)
#define XXBRD_ON_LE(VA, VT)
#endif // defined(GOARCH_ppc64le)

// func setEncryptKeyAsm(nr int, key *byte, enc *uint32, dec *uint32)
TEXT ·expandKeyAsm(SB), NOSPLIT|NOFRAME, $0
Expand All @@ -101,7 +104,7 @@ TEXT ·expandKeyAsm(SB), NOSPLIT|NOFRAME, $0
MOVD enc+16(FP), OUTENC
MOVD dec+24(FP), OUTDEC

#ifdef GOARCH_ppc64le
#ifdef NEEDS_ESPERM
MOVD $·rcon(SB), PTR // PTR points to rcon addr
LVX (PTR), ESPERM
ADD $0x10, PTR
Expand Down Expand Up @@ -191,7 +194,8 @@ loop128:
RET

l192:
LXSDX_BE(INP, R0, IN1) // Load next 8 bytes into upper half of VSR in BE order.
LXSDX (INP+R0), IN1 // Load next 8 bytes into upper half of VSR.
XXBRD_ON_LE(IN1, IN1) // and convert to BE ordering on LE hosts.
MOVD $4, CNT // li 7,4
STXVD2X IN0, (R0+OUTENC)
STXVD2X IN0, (R0+OUTDEC)
Expand Down Expand Up @@ -309,7 +313,7 @@ TEXT ·encryptBlockAsm(SB), NOSPLIT|NOFRAME, $0
MOVD xk+8(FP), R5 // Key pointer
MOVD dst+16(FP), R3 // Dest pointer
MOVD src+24(FP), R4 // Src pointer
#ifdef GOARCH_ppc64le
#ifdef NEEDS_ESPERM
MOVD $·rcon(SB), R7
LVX (R7), ESPERM // Permute value for P8_ macros.
#endif
Expand Down Expand Up @@ -404,7 +408,7 @@ TEXT ·decryptBlockAsm(SB), NOSPLIT|NOFRAME, $0
MOVD xk+8(FP), R5 // Key pointer
MOVD dst+16(FP), R3 // Dest pointer
MOVD src+24(FP), R4 // Src pointer
#ifdef GOARCH_ppc64le
#ifdef NEEDS_ESPERM
MOVD $·rcon(SB), R7
LVX (R7), ESPERM // Permute value for P8_ macros.
#endif
Expand Down Expand Up @@ -622,7 +626,7 @@ TEXT ·cryptBlocksChain(SB), NOSPLIT|NOFRAME, $0
MOVD enc+40(FP), ENC
MOVD nr+48(FP), ROUNDS

#ifdef GOARCH_ppc64le
#ifdef NEEDS_ESPERM
MOVD $·rcon(SB), R11
LVX (R11), ESPERM // Permute value for P8_ macros.
#endif
Expand Down

0 comments on commit d087ee0

Please sign in to comment.