From b929d749f3d09995aa04848cc6fb24f0aec87da4 Mon Sep 17 00:00:00 2001 From: Nevine Ebeid <66388554+nebeid@users.noreply.github.com> Date: Fri, 2 Aug 2024 14:06:56 -0400 Subject: [PATCH] Make aes_hw_ctr32_encrypt_blocks handle len=0 correctly (#1690) When the input length is 0 blocks, `aes_hw_ctr32_encrypt_blocks` encrypts (and writes to the output) 2 blocks in the case of AArch64 and 1 block in the case of x86_64 and x86. (The function is guarded in the surrounding code wherever it's called by checks that `len != 0`.) This change fixes this behaviour without taxing the performance. Tested the performance on Graviton3 and on Mac x86_64. --- crypto/fipsmodule/aes/aes_test.cc | 11 ++++++++ crypto/fipsmodule/aes/asm/aesni-x86.pl | 1 + crypto/fipsmodule/aes/asm/aesni-x86_64.pl | 1 + crypto/fipsmodule/aes/asm/aesv8-armx.pl | 17 ++++++++---- .../crypto/fipsmodule/aesv8-armx.S | 9 +++---- .../ios-arm/crypto/fipsmodule/aesv8-armx.S | 8 +++--- .../crypto/fipsmodule/aesv8-armx.S | 9 +++---- .../linux-arm/crypto/fipsmodule/aesv8-armx.S | 8 +++--- .../linux-x86/crypto/fipsmodule/aesni-x86.S | 27 ++++++++++--------- .../crypto/fipsmodule/aesni-x86_64.S | 1 + .../mac-x86/crypto/fipsmodule/aesni-x86.S | 27 ++++++++++--------- .../crypto/fipsmodule/aesni-x86_64.S | 1 + .../crypto/fipsmodule/aesv8-armx.S | 9 +++---- .../win-x86/crypto/fipsmodule/aesni-x86.asm | 27 ++++++++++--------- .../crypto/fipsmodule/aesni-x86_64.asm | 1 + 15 files changed, 90 insertions(+), 67 deletions(-) diff --git a/crypto/fipsmodule/aes/aes_test.cc b/crypto/fipsmodule/aes/aes_test.cc index 35bb40a312..a90ffb7b4e 100644 --- a/crypto/fipsmodule/aes/aes_test.cc +++ b/crypto/fipsmodule/aes/aes_test.cc @@ -479,6 +479,17 @@ TEST(AESTest, ABI) { SCOPED_TRACE(blocks); CHECK_ABI(aes_hw_cbc_encrypt, buf, buf, AES_BLOCK_SIZE * blocks, &key, block, AES_ENCRYPT); + if (blocks == 0) { + // Without this initialization, valgrind complains + // about using an unitialized value. + for (size_t i = 0; i < 64; i++) { + buf[i] = i; + } + std::string buf_before = testing::PrintToString(Bytes(buf,64)); + CHECK_ABI(aes_hw_ctr32_encrypt_blocks, buf, buf, blocks, &key, block); + EXPECT_EQ(buf_before, testing::PrintToString(Bytes(buf,64))); + } + CHECK_ABI(aes_hw_ctr32_encrypt_blocks, buf, buf, blocks, &key, block); #if defined(HWAES_ECB) CHECK_ABI(aes_hw_ecb_encrypt, buf, buf, AES_BLOCK_SIZE * blocks, &key, diff --git a/crypto/fipsmodule/aes/asm/aesni-x86.pl b/crypto/fipsmodule/aes/asm/aesni-x86.pl index 1d5f6561fa..091cc26aef 100644 --- a/crypto/fipsmodule/aes/asm/aesni-x86.pl +++ b/crypto/fipsmodule/aes/asm/aesni-x86.pl @@ -897,6 +897,7 @@ sub aesni_generate6 &mov (&DWP(80,"esp"),$key_); &cmp ($len,1); + &jb (&label("ctr32_ret")); &je (&label("ctr32_one_shortcut")); &movdqu ($inout5,&QWP(0,$rounds_)); # load ivec diff --git a/crypto/fipsmodule/aes/asm/aesni-x86_64.pl b/crypto/fipsmodule/aes/asm/aesni-x86_64.pl index f64dacdec4..9e8804d591 100644 --- a/crypto/fipsmodule/aes/asm/aesni-x86_64.pl +++ b/crypto/fipsmodule/aes/asm/aesni-x86_64.pl @@ -1214,6 +1214,7 @@ sub aesni_generate8 { movb \$1,BORINGSSL_function_hit(%rip) #endif cmp \$1,$len + jb .Lctr32_epilogue # if $len < 1, go to done jne .Lctr32_bulk # handle single block without allocating stack frame, diff --git a/crypto/fipsmodule/aes/asm/aesv8-armx.pl b/crypto/fipsmodule/aes/asm/aesv8-armx.pl index 4603028f80..a61f896fb8 100644 --- a/crypto/fipsmodule/aes/asm/aesv8-armx.pl +++ b/crypto/fipsmodule/aes/asm/aesv8-armx.pl @@ -775,7 +775,6 @@ () vld1.32 {$rndlast},[$key_] add $key_,$key,#32 mov $cnt,$rounds - cclr $step,lo // ARM Cortex-A57 and Cortex-A72 cores running in 32-bit mode are // affected by silicon errata #1742098 [0] and #1655431 [1], @@ -895,11 +894,12 @@ () adds $len,$len,#3 b.eq .Lctr32_done - cmp $len,#1 - mov $step,#16 - cclr $step,eq .Lctr32_tail: + cmp $len,#1 + mov $step,#16 + cclr $step,eq + b.lt .Lctr32_done // if len = 0, go to done aese $dat0,q8 aesmc $dat0,$dat0 aese $dat1,q8 @@ -940,11 +940,18 @@ () aese $dat0,q15 aese $dat1,q15 - cmp $len,#1 veor $in0,$in0,$dat0 veor $in1,$in1,$dat1 vst1.8 {$in0},[$out],#16 +___ +$code.=<<___ if ($flavour =~ /64/); + cbz $step,.Lctr32_done // if step = 0 (len = 1), go to done +___ +$code.=<<___ if ($flavour !~ /64/); + cmp $step, #0 b.eq .Lctr32_done +___ +$code.=<<___; vst1.8 {$in1},[$out] .Lctr32_done: diff --git a/generated-src/ios-aarch64/crypto/fipsmodule/aesv8-armx.S b/generated-src/ios-aarch64/crypto/fipsmodule/aesv8-armx.S index 9285f4dfa4..2c7a05363b 100644 --- a/generated-src/ios-aarch64/crypto/fipsmodule/aesv8-armx.S +++ b/generated-src/ios-aarch64/crypto/fipsmodule/aesv8-armx.S @@ -639,7 +639,6 @@ _aes_hw_ctr32_encrypt_blocks: ld1 {v7.4s},[x7] add x7,x3,#32 mov w6,w5 - csel x12,xzr,x12,lo // ARM Cortex-A57 and Cortex-A72 cores running in 32-bit mode are // affected by silicon errata #1742098 [0] and #1655431 [1], @@ -759,11 +758,12 @@ Loop3x_ctr32: adds x2,x2,#3 b.eq Lctr32_done + +Lctr32_tail: cmp x2,#1 mov x12,#16 csel x12,xzr,x12,eq - -Lctr32_tail: + b.lt Lctr32_done // if len = 0, go to done aese v0.16b,v16.16b aesmc v0.16b,v0.16b aese v1.16b,v16.16b @@ -804,11 +804,10 @@ Lctr32_tail: aese v0.16b,v23.16b aese v1.16b,v23.16b - cmp x2,#1 eor v2.16b,v2.16b,v0.16b eor v3.16b,v3.16b,v1.16b st1 {v2.16b},[x1],#16 - b.eq Lctr32_done + cbz x12,Lctr32_done // if step = 0 (len = 1), go to done st1 {v3.16b},[x1] Lctr32_done: diff --git a/generated-src/ios-arm/crypto/fipsmodule/aesv8-armx.S b/generated-src/ios-arm/crypto/fipsmodule/aesv8-armx.S index 638062a1e9..c041918a05 100644 --- a/generated-src/ios-arm/crypto/fipsmodule/aesv8-armx.S +++ b/generated-src/ios-arm/crypto/fipsmodule/aesv8-armx.S @@ -621,7 +621,6 @@ _aes_hw_ctr32_encrypt_blocks: vld1.32 {q7},[r7] add r7,r3,#32 mov r6,r5 - movlo r12,#0 @ ARM Cortex-A57 and Cortex-A72 cores running in 32-bit mode are @ affected by silicon errata #1742098 [0] and #1655431 [1], @@ -741,11 +740,12 @@ Loop3x_ctr32: adds r2,r2,#3 beq Lctr32_done + +Lctr32_tail: cmp r2,#1 mov r12,#16 moveq r12,#0 - -Lctr32_tail: + blt Lctr32_done @ if len = 0, go to done .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 .byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8 @@ -786,10 +786,10 @@ Lctr32_tail: .byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15 .byte 0x2e,0x23,0xb0,0xf3 @ aese q1,q15 - cmp r2,#1 veor q2,q2,q0 veor q3,q3,q1 vst1.8 {q2},[r1]! + cmp r12, #0 beq Lctr32_done vst1.8 {q3},[r1] diff --git a/generated-src/linux-aarch64/crypto/fipsmodule/aesv8-armx.S b/generated-src/linux-aarch64/crypto/fipsmodule/aesv8-armx.S index f7dc3384a6..3fa0455f66 100644 --- a/generated-src/linux-aarch64/crypto/fipsmodule/aesv8-armx.S +++ b/generated-src/linux-aarch64/crypto/fipsmodule/aesv8-armx.S @@ -639,7 +639,6 @@ aes_hw_ctr32_encrypt_blocks: ld1 {v7.4s},[x7] add x7,x3,#32 mov w6,w5 - csel x12,xzr,x12,lo // ARM Cortex-A57 and Cortex-A72 cores running in 32-bit mode are // affected by silicon errata #1742098 [0] and #1655431 [1], @@ -759,11 +758,12 @@ aes_hw_ctr32_encrypt_blocks: adds x2,x2,#3 b.eq .Lctr32_done + +.Lctr32_tail: cmp x2,#1 mov x12,#16 csel x12,xzr,x12,eq - -.Lctr32_tail: + b.lt .Lctr32_done // if len = 0, go to done aese v0.16b,v16.16b aesmc v0.16b,v0.16b aese v1.16b,v16.16b @@ -804,11 +804,10 @@ aes_hw_ctr32_encrypt_blocks: aese v0.16b,v23.16b aese v1.16b,v23.16b - cmp x2,#1 eor v2.16b,v2.16b,v0.16b eor v3.16b,v3.16b,v1.16b st1 {v2.16b},[x1],#16 - b.eq .Lctr32_done + cbz x12,.Lctr32_done // if step = 0 (len = 1), go to done st1 {v3.16b},[x1] .Lctr32_done: diff --git a/generated-src/linux-arm/crypto/fipsmodule/aesv8-armx.S b/generated-src/linux-arm/crypto/fipsmodule/aesv8-armx.S index 420af9b6ea..93bb9bdd2d 100644 --- a/generated-src/linux-arm/crypto/fipsmodule/aesv8-armx.S +++ b/generated-src/linux-arm/crypto/fipsmodule/aesv8-armx.S @@ -609,7 +609,6 @@ aes_hw_ctr32_encrypt_blocks: vld1.32 {q7},[r7] add r7,r3,#32 mov r6,r5 - movlo r12,#0 @ ARM Cortex-A57 and Cortex-A72 cores running in 32-bit mode are @ affected by silicon errata #1742098 [0] and #1655431 [1], @@ -729,11 +728,12 @@ aes_hw_ctr32_encrypt_blocks: adds r2,r2,#3 beq .Lctr32_done + +.Lctr32_tail: cmp r2,#1 mov r12,#16 moveq r12,#0 - -.Lctr32_tail: + blt .Lctr32_done @ if len = 0, go to done .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 .byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8 @@ -774,10 +774,10 @@ aes_hw_ctr32_encrypt_blocks: .byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15 .byte 0x2e,0x23,0xb0,0xf3 @ aese q1,q15 - cmp r2,#1 veor q2,q2,q0 veor q3,q3,q1 vst1.8 {q2},[r1]! + cmp r12, #0 beq .Lctr32_done vst1.8 {q3},[r1] diff --git a/generated-src/linux-x86/crypto/fipsmodule/aesni-x86.S b/generated-src/linux-x86/crypto/fipsmodule/aesni-x86.S index 54daf1808e..394e3cde9f 100644 --- a/generated-src/linux-x86/crypto/fipsmodule/aesni-x86.S +++ b/generated-src/linux-x86/crypto/fipsmodule/aesni-x86.S @@ -866,7 +866,8 @@ aes_hw_ctr32_encrypt_blocks: andl $-16,%esp movl %ebp,80(%esp) cmpl $1,%eax - je .L039ctr32_one_shortcut + jb .L039ctr32_ret + je .L040ctr32_one_shortcut movdqu (%ebx),%xmm7 movl $202182159,(%esp) movl $134810123,4(%esp) @@ -904,7 +905,7 @@ aes_hw_ctr32_encrypt_blocks: pshufd $192,%xmm0,%xmm2 pshufd $128,%xmm0,%xmm3 cmpl $6,%eax - jb .L040ctr32_tail + jb .L041ctr32_tail pxor %xmm6,%xmm7 shll $4,%ecx movl $16,%ebx @@ -913,9 +914,9 @@ aes_hw_ctr32_encrypt_blocks: subl %ecx,%ebx leal 32(%edx,%ecx,1),%edx subl $6,%eax - jmp .L041ctr32_loop6 + jmp .L042ctr32_loop6 .align 16 -.L041ctr32_loop6: +.L042ctr32_loop6: pshufd $64,%xmm0,%xmm4 movdqa 32(%esp),%xmm0 pshufd $192,%xmm1,%xmm5 @@ -969,14 +970,14 @@ aes_hw_ctr32_encrypt_blocks: leal 96(%edi),%edi pshufd $128,%xmm0,%xmm3 subl $6,%eax - jnc .L041ctr32_loop6 + jnc .L042ctr32_loop6 addl $6,%eax - jz .L042ctr32_ret + jz .L039ctr32_ret movdqu (%ebp),%xmm7 movl %ebp,%edx pxor 32(%esp),%xmm7 movl 240(%ebp),%ecx -.L040ctr32_tail: +.L041ctr32_tail: por %xmm7,%xmm2 cmpl $2,%eax jb .L043ctr32_one @@ -1007,9 +1008,9 @@ aes_hw_ctr32_encrypt_blocks: movups %xmm4,32(%edi) movups %xmm5,48(%edi) movups %xmm6,64(%edi) - jmp .L042ctr32_ret + jmp .L039ctr32_ret .align 16 -.L039ctr32_one_shortcut: +.L040ctr32_one_shortcut: movups (%ebx),%xmm2 movl 240(%edx),%ecx .L043ctr32_one: @@ -1027,7 +1028,7 @@ aes_hw_ctr32_encrypt_blocks: movups (%esi),%xmm6 xorps %xmm2,%xmm6 movups %xmm6,(%edi) - jmp .L042ctr32_ret + jmp .L039ctr32_ret .align 16 .L044ctr32_two: call _aesni_encrypt2 @@ -1037,7 +1038,7 @@ aes_hw_ctr32_encrypt_blocks: xorps %xmm6,%xmm3 movups %xmm2,(%edi) movups %xmm3,16(%edi) - jmp .L042ctr32_ret + jmp .L039ctr32_ret .align 16 .L045ctr32_three: call _aesni_encrypt3 @@ -1050,7 +1051,7 @@ aes_hw_ctr32_encrypt_blocks: xorps %xmm7,%xmm4 movups %xmm3,16(%edi) movups %xmm4,32(%edi) - jmp .L042ctr32_ret + jmp .L039ctr32_ret .align 16 .L046ctr32_four: call _aesni_encrypt4 @@ -1066,7 +1067,7 @@ aes_hw_ctr32_encrypt_blocks: xorps %xmm0,%xmm5 movups %xmm4,32(%edi) movups %xmm5,48(%edi) -.L042ctr32_ret: +.L039ctr32_ret: pxor %xmm0,%xmm0 pxor %xmm1,%xmm1 pxor %xmm2,%xmm2 diff --git a/generated-src/linux-x86_64/crypto/fipsmodule/aesni-x86_64.S b/generated-src/linux-x86_64/crypto/fipsmodule/aesni-x86_64.S index ff7185f8c5..b6ae7fc080 100644 --- a/generated-src/linux-x86_64/crypto/fipsmodule/aesni-x86_64.S +++ b/generated-src/linux-x86_64/crypto/fipsmodule/aesni-x86_64.S @@ -886,6 +886,7 @@ _CET_ENDBR movb $1,BORINGSSL_function_hit(%rip) #endif cmpq $1,%rdx + jb .Lctr32_epilogue jne .Lctr32_bulk diff --git a/generated-src/mac-x86/crypto/fipsmodule/aesni-x86.S b/generated-src/mac-x86/crypto/fipsmodule/aesni-x86.S index 44676044c6..e2bc1f2b03 100644 --- a/generated-src/mac-x86/crypto/fipsmodule/aesni-x86.S +++ b/generated-src/mac-x86/crypto/fipsmodule/aesni-x86.S @@ -839,7 +839,8 @@ L038pic: andl $-16,%esp movl %ebp,80(%esp) cmpl $1,%eax - je L039ctr32_one_shortcut + jb L039ctr32_ret + je L040ctr32_one_shortcut movdqu (%ebx),%xmm7 movl $202182159,(%esp) movl $134810123,4(%esp) @@ -877,7 +878,7 @@ L038pic: pshufd $192,%xmm0,%xmm2 pshufd $128,%xmm0,%xmm3 cmpl $6,%eax - jb L040ctr32_tail + jb L041ctr32_tail pxor %xmm6,%xmm7 shll $4,%ecx movl $16,%ebx @@ -886,9 +887,9 @@ L038pic: subl %ecx,%ebx leal 32(%edx,%ecx,1),%edx subl $6,%eax - jmp L041ctr32_loop6 + jmp L042ctr32_loop6 .align 4,0x90 -L041ctr32_loop6: +L042ctr32_loop6: pshufd $64,%xmm0,%xmm4 movdqa 32(%esp),%xmm0 pshufd $192,%xmm1,%xmm5 @@ -942,14 +943,14 @@ L041ctr32_loop6: leal 96(%edi),%edi pshufd $128,%xmm0,%xmm3 subl $6,%eax - jnc L041ctr32_loop6 + jnc L042ctr32_loop6 addl $6,%eax - jz L042ctr32_ret + jz L039ctr32_ret movdqu (%ebp),%xmm7 movl %ebp,%edx pxor 32(%esp),%xmm7 movl 240(%ebp),%ecx -L040ctr32_tail: +L041ctr32_tail: por %xmm7,%xmm2 cmpl $2,%eax jb L043ctr32_one @@ -980,9 +981,9 @@ L040ctr32_tail: movups %xmm4,32(%edi) movups %xmm5,48(%edi) movups %xmm6,64(%edi) - jmp L042ctr32_ret + jmp L039ctr32_ret .align 4,0x90 -L039ctr32_one_shortcut: +L040ctr32_one_shortcut: movups (%ebx),%xmm2 movl 240(%edx),%ecx L043ctr32_one: @@ -1000,7 +1001,7 @@ L047enc1_loop_7: movups (%esi),%xmm6 xorps %xmm2,%xmm6 movups %xmm6,(%edi) - jmp L042ctr32_ret + jmp L039ctr32_ret .align 4,0x90 L044ctr32_two: call __aesni_encrypt2 @@ -1010,7 +1011,7 @@ L044ctr32_two: xorps %xmm6,%xmm3 movups %xmm2,(%edi) movups %xmm3,16(%edi) - jmp L042ctr32_ret + jmp L039ctr32_ret .align 4,0x90 L045ctr32_three: call __aesni_encrypt3 @@ -1023,7 +1024,7 @@ L045ctr32_three: xorps %xmm7,%xmm4 movups %xmm3,16(%edi) movups %xmm4,32(%edi) - jmp L042ctr32_ret + jmp L039ctr32_ret .align 4,0x90 L046ctr32_four: call __aesni_encrypt4 @@ -1039,7 +1040,7 @@ L046ctr32_four: xorps %xmm0,%xmm5 movups %xmm4,32(%edi) movups %xmm5,48(%edi) -L042ctr32_ret: +L039ctr32_ret: pxor %xmm0,%xmm0 pxor %xmm1,%xmm1 pxor %xmm2,%xmm2 diff --git a/generated-src/mac-x86_64/crypto/fipsmodule/aesni-x86_64.S b/generated-src/mac-x86_64/crypto/fipsmodule/aesni-x86_64.S index 41263854ba..418c0550a6 100644 --- a/generated-src/mac-x86_64/crypto/fipsmodule/aesni-x86_64.S +++ b/generated-src/mac-x86_64/crypto/fipsmodule/aesni-x86_64.S @@ -884,6 +884,7 @@ _CET_ENDBR movb $1,_BORINGSSL_function_hit(%rip) #endif cmpq $1,%rdx + jb L$ctr32_epilogue jne L$ctr32_bulk diff --git a/generated-src/win-aarch64/crypto/fipsmodule/aesv8-armx.S b/generated-src/win-aarch64/crypto/fipsmodule/aesv8-armx.S index 8e15fdb90f..6ac70cac67 100644 --- a/generated-src/win-aarch64/crypto/fipsmodule/aesv8-armx.S +++ b/generated-src/win-aarch64/crypto/fipsmodule/aesv8-armx.S @@ -651,7 +651,6 @@ aes_hw_ctr32_encrypt_blocks: ld1 {v7.4s},[x7] add x7,x3,#32 mov w6,w5 - csel x12,xzr,x12,lo // ARM Cortex-A57 and Cortex-A72 cores running in 32-bit mode are // affected by silicon errata #1742098 [0] and #1655431 [1], @@ -771,11 +770,12 @@ Loop3x_ctr32: adds x2,x2,#3 b.eq Lctr32_done + +Lctr32_tail: cmp x2,#1 mov x12,#16 csel x12,xzr,x12,eq - -Lctr32_tail: + b.lt Lctr32_done // if len = 0, go to done aese v0.16b,v16.16b aesmc v0.16b,v0.16b aese v1.16b,v16.16b @@ -816,11 +816,10 @@ Lctr32_tail: aese v0.16b,v23.16b aese v1.16b,v23.16b - cmp x2,#1 eor v2.16b,v2.16b,v0.16b eor v3.16b,v3.16b,v1.16b st1 {v2.16b},[x1],#16 - b.eq Lctr32_done + cbz x12,Lctr32_done // if step = 0 (len = 1), go to done st1 {v3.16b},[x1] Lctr32_done: diff --git a/generated-src/win-x86/crypto/fipsmodule/aesni-x86.asm b/generated-src/win-x86/crypto/fipsmodule/aesni-x86.asm index 173f54fb97..cff657bc92 100644 --- a/generated-src/win-x86/crypto/fipsmodule/aesni-x86.asm +++ b/generated-src/win-x86/crypto/fipsmodule/aesni-x86.asm @@ -833,7 +833,8 @@ L$038pic: and esp,-16 mov DWORD [80+esp],ebp cmp eax,1 - je NEAR L$039ctr32_one_shortcut + jb NEAR L$039ctr32_ret + je NEAR L$040ctr32_one_shortcut movdqu xmm7,[ebx] mov DWORD [esp],202182159 mov DWORD [4+esp],134810123 @@ -871,7 +872,7 @@ db 102,15,56,0,202 pshufd xmm2,xmm0,192 pshufd xmm3,xmm0,128 cmp eax,6 - jb NEAR L$040ctr32_tail + jb NEAR L$041ctr32_tail pxor xmm7,xmm6 shl ecx,4 mov ebx,16 @@ -880,9 +881,9 @@ db 102,15,56,0,202 sub ebx,ecx lea edx,[32+ecx*1+edx] sub eax,6 - jmp NEAR L$041ctr32_loop6 + jmp NEAR L$042ctr32_loop6 align 16 -L$041ctr32_loop6: +L$042ctr32_loop6: pshufd xmm4,xmm0,64 movdqa xmm0,[32+esp] pshufd xmm5,xmm1,192 @@ -936,14 +937,14 @@ db 102,15,56,0,202 lea edi,[96+edi] pshufd xmm3,xmm0,128 sub eax,6 - jnc NEAR L$041ctr32_loop6 + jnc NEAR L$042ctr32_loop6 add eax,6 - jz NEAR L$042ctr32_ret + jz NEAR L$039ctr32_ret movdqu xmm7,[ebp] mov edx,ebp pxor xmm7,[32+esp] mov ecx,DWORD [240+ebp] -L$040ctr32_tail: +L$041ctr32_tail: por xmm2,xmm7 cmp eax,2 jb NEAR L$043ctr32_one @@ -974,9 +975,9 @@ L$040ctr32_tail: movups [32+edi],xmm4 movups [48+edi],xmm5 movups [64+edi],xmm6 - jmp NEAR L$042ctr32_ret + jmp NEAR L$039ctr32_ret align 16 -L$039ctr32_one_shortcut: +L$040ctr32_one_shortcut: movups xmm2,[ebx] mov ecx,DWORD [240+edx] L$043ctr32_one: @@ -994,7 +995,7 @@ db 102,15,56,221,209 movups xmm6,[esi] xorps xmm6,xmm2 movups [edi],xmm6 - jmp NEAR L$042ctr32_ret + jmp NEAR L$039ctr32_ret align 16 L$044ctr32_two: call __aesni_encrypt2 @@ -1004,7 +1005,7 @@ L$044ctr32_two: xorps xmm3,xmm6 movups [edi],xmm2 movups [16+edi],xmm3 - jmp NEAR L$042ctr32_ret + jmp NEAR L$039ctr32_ret align 16 L$045ctr32_three: call __aesni_encrypt3 @@ -1017,7 +1018,7 @@ L$045ctr32_three: xorps xmm4,xmm7 movups [16+edi],xmm3 movups [32+edi],xmm4 - jmp NEAR L$042ctr32_ret + jmp NEAR L$039ctr32_ret align 16 L$046ctr32_four: call __aesni_encrypt4 @@ -1033,7 +1034,7 @@ L$046ctr32_four: xorps xmm5,xmm0 movups [32+edi],xmm4 movups [48+edi],xmm5 -L$042ctr32_ret: +L$039ctr32_ret: pxor xmm0,xmm0 pxor xmm1,xmm1 pxor xmm2,xmm2 diff --git a/generated-src/win-x86_64/crypto/fipsmodule/aesni-x86_64.asm b/generated-src/win-x86_64/crypto/fipsmodule/aesni-x86_64.asm index bd2c081d80..0583f0ec8a 100644 --- a/generated-src/win-x86_64/crypto/fipsmodule/aesni-x86_64.asm +++ b/generated-src/win-x86_64/crypto/fipsmodule/aesni-x86_64.asm @@ -926,6 +926,7 @@ _CET_ENDBR mov BYTE[BORINGSSL_function_hit],1 %endif cmp rdx,1 + jb NEAR $L$ctr32_epilogue jne NEAR $L$ctr32_bulk