-
Notifications
You must be signed in to change notification settings - Fork 2k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
internal/poly1305: Port sum_amd64.s to Avo
This implementation utilizes the same registers found in the reference implementation, aiming to produce a minimal semantic diff between the Avo-generated output and the original hand-written assembly. To verify the Avo implementation, the reference and Avo-generated assembly files are fed to `go tool asm`, capturing the debug output into corresponding temp files. The debug output contains supplementary metadata (line numbers, instruction offsets, and source file references) that must be removed in order to obtain a semantic diff of the two files. This is accomplished via a small utility script written in awk. Commands used to verify Avo output: GOROOT=$(go env GOROOT) ASM_PATH="internal/poly1305/sum_amd64.s" REFERENCE="b2d3a6a4b4d36521cd7f653879cf6981e7c5c340" go tool asm -o /dev/null -I "$GOROOT"/src/runtime -debug \ <(git cat-file -p "$REFERENCE:$ASM_PATH") \ > /tmp/reference.s go tool asm -o /dev/null -I "$GOROOT"/src/runtime -debug \ "$ASM_PATH" \ > /tmp/avo.s normalize(){ awk '{ $1=$2=$3=""; print substr($0,4) }' } diff <(normalize < /tmp/reference.s) <(normalize < /tmp/avo.s) Change-Id: I80212c95d1b05335d7f6b73a3030b6f812f6105b Reviewed-on: https://go-review.googlesource.com/c/crypto/+/600035 Reviewed-by: Roland Shoemaker <[email protected]> Reviewed-by: Filippo Valsorda <[email protected]> Reviewed-by: Dmitri Shuralyov <[email protected]> LUCI-TryBot-Result: Go LUCI <[email protected]>
- Loading branch information
1 parent
7eace71
commit bcb0f91
Showing
4 changed files
with
212 additions
and
74 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
module internal/poly1305/_asm | ||
|
||
go 1.23 | ||
|
||
require ( | ||
github.com/mmcloughlin/avo v0.6.0 | ||
golang.org/x/crypto v0.26.0 | ||
) | ||
|
||
require ( | ||
golang.org/x/mod v0.20.0 // indirect | ||
golang.org/x/sync v0.8.0 // indirect | ||
golang.org/x/sys v0.24.0 // indirect | ||
golang.org/x/tools v0.24.0 // indirect | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
github.com/mmcloughlin/avo v0.6.0 h1:QH6FU8SKoTLaVs80GA8TJuLNkUYl4VokHKlPhVDg4YY= | ||
github.com/mmcloughlin/avo v0.6.0/go.mod h1:8CoAGaCSYXtCPR+8y18Y9aB/kxb8JSS6FRI7mSkvD+8= | ||
golang.org/x/crypto v0.26.0 h1:RrRspgV4mU+YwB4FYnuBoKsUapNIL5cohGAmSH3azsw= | ||
golang.org/x/crypto v0.26.0/go.mod h1:GY7jblb9wI+FOo5y8/S2oY4zWP07AkOJ4+jxCqdqn54= | ||
golang.org/x/mod v0.20.0 h1:utOm6MM3R3dnawAiJgn0y+xvuYRsm1RKM/4giyfDgV0= | ||
golang.org/x/mod v0.20.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= | ||
golang.org/x/sync v0.8.0 h1:3NFvSEYkUoMifnESzZl15y791HH1qU2xm6eCJU5ZPXQ= | ||
golang.org/x/sync v0.8.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= | ||
golang.org/x/sys v0.24.0 h1:Twjiwq9dn6R1fQcyiK+wQyHWfaz/BJB+YIpzU/Cv3Xg= | ||
golang.org/x/sys v0.24.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= | ||
golang.org/x/tools v0.24.0 h1:J1shsA93PJUEVaUSaay7UXAyE8aimq3GW0pjlolpa24= | ||
golang.org/x/tools v0.24.0/go.mod h1:YhNqVBIfWHdzvTLs0d8LCuMhkKUgSUKldakyV7W/WDQ= |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,126 @@ | ||
// Copyright 2024 The Go Authors. All rights reserved. | ||
// Use of this source code is governed by a BSD-style | ||
// license that can be found in the LICENSE file. | ||
|
||
package main | ||
|
||
import ( | ||
. "github.com/mmcloughlin/avo/build" | ||
. "github.com/mmcloughlin/avo/operand" | ||
. "github.com/mmcloughlin/avo/reg" | ||
_ "golang.org/x/crypto/sha3" | ||
) | ||
|
||
//go:generate go run . -out ../sum_amd64.s -pkg poly1305 | ||
|
||
func main() { | ||
Package("golang.org/x/crypto/internal/poly1305") | ||
ConstraintExpr("gc,!purego") | ||
update() | ||
Generate() | ||
} | ||
|
||
func update() { | ||
Implement("update") | ||
|
||
Load(Param("state"), RDI) | ||
MOVQ(NewParamAddr("msg_base", 8), RSI) | ||
MOVQ(NewParamAddr("msg_len", 16), R15) | ||
|
||
MOVQ(Mem{Base: DI}.Offset(0), R8) // h0 | ||
MOVQ(Mem{Base: DI}.Offset(8), R9) // h1 | ||
MOVQ(Mem{Base: DI}.Offset(16), R10) // h2 | ||
MOVQ(Mem{Base: DI}.Offset(24), R11) // r0 | ||
MOVQ(Mem{Base: DI}.Offset(32), R12) // r1 | ||
|
||
CMPQ(R15, Imm(16)) | ||
JB(LabelRef("bytes_between_0_and_15")) | ||
|
||
Label("loop") | ||
POLY1305_ADD(RSI, R8, R9, R10) | ||
|
||
Label("multiply") | ||
POLY1305_MUL(R8, R9, R10, R11, R12, RBX, RCX, R13, R14) | ||
SUBQ(Imm(16), R15) | ||
CMPQ(R15, Imm(16)) | ||
JAE(LabelRef("loop")) | ||
|
||
Label("bytes_between_0_and_15") | ||
TESTQ(R15, R15) | ||
JZ(LabelRef("done")) | ||
MOVQ(U32(1), RBX) | ||
XORQ(RCX, RCX) | ||
XORQ(R13, R13) | ||
ADDQ(R15, RSI) | ||
|
||
Label("flush_buffer") | ||
SHLQ(Imm(8), RBX, RCX) | ||
SHLQ(Imm(8), RBX) | ||
MOVB(Mem{Base: SI}.Offset(-1), R13B) | ||
XORQ(R13, RBX) | ||
DECQ(RSI) | ||
DECQ(R15) | ||
JNZ(LabelRef("flush_buffer")) | ||
|
||
ADDQ(RBX, R8) | ||
ADCQ(RCX, R9) | ||
ADCQ(Imm(0), R10) | ||
MOVQ(U32(16), R15) | ||
JMP(LabelRef("multiply")) | ||
|
||
Label("done") | ||
MOVQ(R8, Mem{Base: DI}.Offset(0)) | ||
MOVQ(R9, Mem{Base: DI}.Offset(8)) | ||
MOVQ(R10, Mem{Base: DI}.Offset(16)) | ||
RET() | ||
} | ||
|
||
func POLY1305_ADD(msg, h0, h1, h2 GPPhysical) { | ||
ADDQ(Mem{Base: msg}.Offset(0), h0) | ||
ADCQ(Mem{Base: msg}.Offset(8), h1) | ||
ADCQ(Imm(1), h2) | ||
LEAQ(Mem{Base: msg}.Offset(16), msg) | ||
} | ||
|
||
func POLY1305_MUL(h0, h1, h2, r0, r1, t0, t1, t2, t3 GPPhysical) { | ||
MOVQ(r0, RAX) | ||
MULQ(h0) | ||
MOVQ(RAX, t0) | ||
MOVQ(RDX, t1) | ||
MOVQ(r0, RAX) | ||
MULQ(h1) | ||
ADDQ(RAX, t1) | ||
ADCQ(Imm(0), RDX) | ||
MOVQ(r0, t2) | ||
IMULQ(h2, t2) | ||
ADDQ(RDX, t2) | ||
|
||
MOVQ(r1, RAX) | ||
MULQ(h0) | ||
ADDQ(RAX, t1) | ||
ADCQ(Imm(0), RDX) | ||
MOVQ(RDX, h0) | ||
MOVQ(r1, t3) | ||
IMULQ(h2, t3) | ||
MOVQ(r1, RAX) | ||
MULQ(h1) | ||
ADDQ(RAX, t2) | ||
ADCQ(RDX, t3) | ||
ADDQ(h0, t2) | ||
ADCQ(Imm(0), t3) | ||
|
||
MOVQ(t0, h0) | ||
MOVQ(t1, h1) | ||
MOVQ(t2, h2) | ||
ANDQ(Imm(3), h2) | ||
MOVQ(t2, t0) | ||
ANDQ(I32(-4), t0) | ||
ADDQ(t0, h0) | ||
ADCQ(t3, h1) | ||
ADCQ(Imm(0), h2) | ||
SHRQ(Imm(2), t3, t2) | ||
SHRQ(Imm(2), t3) | ||
ADDQ(t2, h0) | ||
ADCQ(t3, h1) | ||
ADCQ(Imm(0), h2) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,108 +1,93 @@ | ||
// Copyright 2012 The Go Authors. All rights reserved. | ||
// Use of this source code is governed by a BSD-style | ||
// license that can be found in the LICENSE file. | ||
// Code generated by command: go run sum_amd64_asm.go -out ../sum_amd64.s -pkg poly1305. DO NOT EDIT. | ||
|
||
//go:build gc && !purego | ||
|
||
#include "textflag.h" | ||
|
||
#define POLY1305_ADD(msg, h0, h1, h2) \ | ||
ADDQ 0(msg), h0; \ | ||
ADCQ 8(msg), h1; \ | ||
ADCQ $1, h2; \ | ||
LEAQ 16(msg), msg | ||
|
||
#define POLY1305_MUL(h0, h1, h2, r0, r1, t0, t1, t2, t3) \ | ||
MOVQ r0, AX; \ | ||
MULQ h0; \ | ||
MOVQ AX, t0; \ | ||
MOVQ DX, t1; \ | ||
MOVQ r0, AX; \ | ||
MULQ h1; \ | ||
ADDQ AX, t1; \ | ||
ADCQ $0, DX; \ | ||
MOVQ r0, t2; \ | ||
IMULQ h2, t2; \ | ||
ADDQ DX, t2; \ | ||
\ | ||
MOVQ r1, AX; \ | ||
MULQ h0; \ | ||
ADDQ AX, t1; \ | ||
ADCQ $0, DX; \ | ||
MOVQ DX, h0; \ | ||
MOVQ r1, t3; \ | ||
IMULQ h2, t3; \ | ||
MOVQ r1, AX; \ | ||
MULQ h1; \ | ||
ADDQ AX, t2; \ | ||
ADCQ DX, t3; \ | ||
ADDQ h0, t2; \ | ||
ADCQ $0, t3; \ | ||
\ | ||
MOVQ t0, h0; \ | ||
MOVQ t1, h1; \ | ||
MOVQ t2, h2; \ | ||
ANDQ $3, h2; \ | ||
MOVQ t2, t0; \ | ||
ANDQ $0xFFFFFFFFFFFFFFFC, t0; \ | ||
ADDQ t0, h0; \ | ||
ADCQ t3, h1; \ | ||
ADCQ $0, h2; \ | ||
SHRQ $2, t3, t2; \ | ||
SHRQ $2, t3; \ | ||
ADDQ t2, h0; \ | ||
ADCQ t3, h1; \ | ||
ADCQ $0, h2 | ||
|
||
// func update(state *[7]uint64, msg []byte) | ||
// func update(state *macState, msg []byte) | ||
TEXT ·update(SB), $0-32 | ||
MOVQ state+0(FP), DI | ||
MOVQ msg_base+8(FP), SI | ||
MOVQ msg_len+16(FP), R15 | ||
|
||
MOVQ 0(DI), R8 // h0 | ||
MOVQ 8(DI), R9 // h1 | ||
MOVQ 16(DI), R10 // h2 | ||
MOVQ 24(DI), R11 // r0 | ||
MOVQ 32(DI), R12 // r1 | ||
|
||
CMPQ R15, $16 | ||
MOVQ (DI), R8 | ||
MOVQ 8(DI), R9 | ||
MOVQ 16(DI), R10 | ||
MOVQ 24(DI), R11 | ||
MOVQ 32(DI), R12 | ||
CMPQ R15, $0x10 | ||
JB bytes_between_0_and_15 | ||
|
||
loop: | ||
POLY1305_ADD(SI, R8, R9, R10) | ||
ADDQ (SI), R8 | ||
ADCQ 8(SI), R9 | ||
ADCQ $0x01, R10 | ||
LEAQ 16(SI), SI | ||
|
||
multiply: | ||
POLY1305_MUL(R8, R9, R10, R11, R12, BX, CX, R13, R14) | ||
SUBQ $16, R15 | ||
CMPQ R15, $16 | ||
JAE loop | ||
MOVQ R11, AX | ||
MULQ R8 | ||
MOVQ AX, BX | ||
MOVQ DX, CX | ||
MOVQ R11, AX | ||
MULQ R9 | ||
ADDQ AX, CX | ||
ADCQ $0x00, DX | ||
MOVQ R11, R13 | ||
IMULQ R10, R13 | ||
ADDQ DX, R13 | ||
MOVQ R12, AX | ||
MULQ R8 | ||
ADDQ AX, CX | ||
ADCQ $0x00, DX | ||
MOVQ DX, R8 | ||
MOVQ R12, R14 | ||
IMULQ R10, R14 | ||
MOVQ R12, AX | ||
MULQ R9 | ||
ADDQ AX, R13 | ||
ADCQ DX, R14 | ||
ADDQ R8, R13 | ||
ADCQ $0x00, R14 | ||
MOVQ BX, R8 | ||
MOVQ CX, R9 | ||
MOVQ R13, R10 | ||
ANDQ $0x03, R10 | ||
MOVQ R13, BX | ||
ANDQ $-4, BX | ||
ADDQ BX, R8 | ||
ADCQ R14, R9 | ||
ADCQ $0x00, R10 | ||
SHRQ $0x02, R14, R13 | ||
SHRQ $0x02, R14 | ||
ADDQ R13, R8 | ||
ADCQ R14, R9 | ||
ADCQ $0x00, R10 | ||
SUBQ $0x10, R15 | ||
CMPQ R15, $0x10 | ||
JAE loop | ||
|
||
bytes_between_0_and_15: | ||
TESTQ R15, R15 | ||
JZ done | ||
MOVQ $1, BX | ||
MOVQ $0x00000001, BX | ||
XORQ CX, CX | ||
XORQ R13, R13 | ||
ADDQ R15, SI | ||
|
||
flush_buffer: | ||
SHLQ $8, BX, CX | ||
SHLQ $8, BX | ||
SHLQ $0x08, BX, CX | ||
SHLQ $0x08, BX | ||
MOVB -1(SI), R13 | ||
XORQ R13, BX | ||
DECQ SI | ||
DECQ R15 | ||
JNZ flush_buffer | ||
|
||
ADDQ BX, R8 | ||
ADCQ CX, R9 | ||
ADCQ $0, R10 | ||
MOVQ $16, R15 | ||
ADCQ $0x00, R10 | ||
MOVQ $0x00000010, R15 | ||
JMP multiply | ||
|
||
done: | ||
MOVQ R8, 0(DI) | ||
MOVQ R8, (DI) | ||
MOVQ R9, 8(DI) | ||
MOVQ R10, 16(DI) | ||
RET |