Skip to content

Commit

Permalink
Merge branch 'main' into supportcallbacks
Browse files Browse the repository at this point in the history
  • Loading branch information
skmcgrail authored Jul 31, 2024
2 parents d1abde2 + 518df30 commit 18661be
Show file tree
Hide file tree
Showing 119 changed files with 5,092 additions and 1,971 deletions.
34 changes: 34 additions & 0 deletions .github/workflows/go.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
name: Go Compatability
on:
push:
branches: [ '*' ]
pull_request:
branches: [ '*' ]
concurrency:
group: ${{ github.workflow }}-${{ github.ref_name }}
cancel-in-progress: true
env:
DOCKER_BUILDKIT: 1
GOPROXY: https://proxy.golang.org,direct
jobs:
go-version-1_17_13:
if: github.repository_owner == 'aws'
env:
GOROOT: "/usr/local/go"
GO_ARCHIVE: "go1.17.13.linux-amd64.tar.gz"
runs-on: ubuntu-latest
steps:
- name: Install OS Dependencies
run: |
which go
sudo apt-get update
sudo apt-get -y --no-install-recommends install cmake gcc ninja-build make
sudo rm -rf /usr/local/go
sudo rm /usr/bin/go
wget -q "https://dl.google.com/go/${GO_ARCHIVE}"
sudo tar -C /usr/local -xf $GO_ARCHIVE
echo "${GOROOT}/bin" >> $GITHUB_PATH
- uses: actions/checkout@v3
- name: Run integration build
run: |
./tests/ci/run_fips_tests.sh
2 changes: 1 addition & 1 deletion BUILDING.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ If in doubt, use the most recent stable version of each build tool.
`PERL_EXECUTABLE`.
* To build without Perl (not recommended) see [this section.](#using-pre-generated-build-files)

* [Go](https://golang.org/dl/) 1.18 or later is required. If not found by
* [Go](https://golang.org/dl/) 1.17.13 or later is required. If not found by
CMake, the go executable may be configured explicitly by setting
`GO_EXECUTABLE`.
* To build without Go (not recommended) see [this section.](#using-pre-generated-build-files)
Expand Down
2 changes: 1 addition & 1 deletion cmake/go.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ elseif(NOT DISABLE_GO)
string(REGEX MATCH "([0-9]+\\.)*[0-9]+" go_version ${go_version_output})

# This should track /go.mod and /BUILDING.md
set(minimum_go_version "1.18")
set(minimum_go_version "1.17.13")
if(go_version VERSION_LESS minimum_go_version)
message(FATAL_ERROR "Go compiler version must be at least ${minimum_go_version}. Found version ${go_version}")
else()
Expand Down
2 changes: 2 additions & 0 deletions crypto/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -447,6 +447,8 @@ add_library(
lhash/lhash.c
mem.c
ml_kem/ml_kem_512_ipd.c
ml_kem/ml_kem_768_ipd.c
ml_kem/ml_kem_1024_ipd.c
ml_kem/ml_kem.c
obj/obj.c
obj/obj_xref.c
Expand Down
98 changes: 35 additions & 63 deletions crypto/chacha/asm/chacha-x86_64.pl
Original file line number Diff line number Diff line change
Expand Up @@ -80,8 +80,6 @@
$code.=<<___;
.text
.extern OPENSSL_ia32cap_P
.section .rodata
.align 64
.Lzero:
Expand Down Expand Up @@ -230,24 +228,12 @@ sub ROUND { # critical path is 24 cycles per round
########################################################################
# Generic code path that handles all lengths on pre-SSSE3 processors.
$code.=<<___;
.globl ChaCha20_ctr32
.type ChaCha20_ctr32,\@function,5
.globl ChaCha20_ctr32_nohw
.type ChaCha20_ctr32_nohw,\@function,5
.align 64
ChaCha20_ctr32:
ChaCha20_ctr32_nohw:
.cfi_startproc
_CET_ENDBR
cmp \$0,$len
je .Lno_data
mov OPENSSL_ia32cap_P+4(%rip),%r10
___
$code.=<<___ if ($avx>2);
bt \$48,%r10 # check for AVX512F
jc .LChaCha20_avx512
___
$code.=<<___;
test \$`1<<(41-32)`,%r10d
jnz .LChaCha20_ssse3
push %rbx
.cfi_push rbx
push %rbp
Expand Down Expand Up @@ -419,7 +405,7 @@ sub ROUND { # critical path is 24 cycles per round
.Lno_data:
ret
.cfi_endproc
.size ChaCha20_ctr32,.-ChaCha20_ctr32
.size ChaCha20_ctr32_nohw,.-ChaCha20_ctr32_nohw
___

########################################################################
Expand Down Expand Up @@ -454,19 +440,16 @@ sub SSSE3ROUND { # critical path is 20 "SIMD ticks" per round
my $xframe = $win64 ? 32+8 : 8;

$code.=<<___;
.type ChaCha20_ssse3,\@function,5
.globl ChaCha20_ctr32_ssse3
.type ChaCha20_ctr32_ssse3,\@function,5
.align 32
ChaCha20_ssse3:
.LChaCha20_ssse3:
ChaCha20_ctr32_ssse3:
.cfi_startproc
_CET_ENDBR
mov %rsp,%r9 # frame pointer
.cfi_def_cfa_register r9
___
$code.=<<___;
cmp \$128,$len # we might throw away some data,
ja .LChaCha20_4x # but overall it won't be slower
.Ldo_sse3_after_all:
sub \$64+$xframe,%rsp
___
$code.=<<___ if ($win64);
Expand Down Expand Up @@ -576,7 +559,7 @@ sub SSSE3ROUND { # critical path is 20 "SIMD ticks" per round
.Lssse3_epilogue:
ret
.cfi_endproc
.size ChaCha20_ssse3,.-ChaCha20_ssse3
.size ChaCha20_ctr32_ssse3,.-ChaCha20_ctr32_ssse3
___
}

Expand Down Expand Up @@ -714,29 +697,17 @@ sub SSSE3_lane_ROUND {
my $xframe = $win64 ? 0xa8 : 8;

$code.=<<___;
.type ChaCha20_4x,\@function,5
.globl ChaCha20_ctr32_ssse3_4x
.type ChaCha20_ctr32_ssse3_4x,\@function,5
.align 32
ChaCha20_4x:
.LChaCha20_4x:
ChaCha20_ctr32_ssse3_4x:
.cfi_startproc
_CET_ENDBR
mov %rsp,%r9 # frame pointer
.cfi_def_cfa_register r9
mov %r10,%r11
___
$code.=<<___ if ($avx>1);
shr \$32,%r10 # OPENSSL_ia32cap_P+8
test \$`1<<5`,%r10 # test AVX2
jnz .LChaCha20_8x
___
$code.=<<___;
cmp \$192,$len
ja .Lproceed4x
and \$`1<<26|1<<22`,%r11 # isolate XSAVE+MOVBE
cmp \$`1<<22`,%r11 # check for MOVBE without XSAVE
je .Ldo_sse3_after_all # to detect Atom
.Lproceed4x:
sub \$0x140+$xframe,%rsp
___
################ stack layout
Expand Down Expand Up @@ -1164,7 +1135,7 @@ sub SSSE3_lane_ROUND {
.L4x_epilogue:
ret
.cfi_endproc
.size ChaCha20_4x,.-ChaCha20_4x
.size ChaCha20_ctr32_ssse3_4x,.-ChaCha20_ctr32_ssse3_4x
___
}

Expand Down Expand Up @@ -1293,11 +1264,12 @@ sub AVX2_lane_ROUND {
my $xframe = $win64 ? 0xa8 : 8;

$code.=<<___;
.type ChaCha20_8x,\@function,5
.globl ChaCha20_ctr32_avx2
.type ChaCha20_ctr32_avx2,\@function,5
.align 32
ChaCha20_8x:
.LChaCha20_8x:
ChaCha20_ctr32_avx2:
.cfi_startproc
_CET_ENDBR
mov %rsp,%r9 # frame register
.cfi_def_cfa_register r9
sub \$0x280+$xframe,%rsp
Expand Down Expand Up @@ -1809,7 +1781,7 @@ sub AVX2_lane_ROUND {
.L8x_epilogue:
ret
.cfi_endproc
.size ChaCha20_8x,.-ChaCha20_8x
.size ChaCha20_ctr32_avx2,.-ChaCha20_ctr32_avx2
___
}

Expand Down Expand Up @@ -2719,22 +2691,22 @@ sub AVX512_lane_ROUND {
.section .pdata
.align 4
.rva .LSEH_begin_ChaCha20_ctr32
.rva .LSEH_end_ChaCha20_ctr32
.rva .LSEH_info_ChaCha20_ctr32
.rva .LSEH_begin_ChaCha20_ctr32_nohw
.rva .LSEH_end_ChaCha20_ctr32_nohw
.rva .LSEH_info_ChaCha20_ctr32_nohw
.rva .LSEH_begin_ChaCha20_ssse3
.rva .LSEH_end_ChaCha20_ssse3
.rva .LSEH_info_ChaCha20_ssse3
.rva .LSEH_begin_ChaCha20_ctr32_ssse3
.rva .LSEH_end_ChaCha20_ctr32_ssse3
.rva .LSEH_info_ChaCha20_ctr32_ssse3
.rva .LSEH_begin_ChaCha20_4x
.rva .LSEH_end_ChaCha20_4x
.rva .LSEH_info_ChaCha20_4x
.rva .LSEH_begin_ChaCha20_ctr32_ssse3_4x
.rva .LSEH_end_ChaCha20_ctr32_ssse3_4x
.rva .LSEH_info_ChaCha20_ctr32_ssse3_4x
___
$code.=<<___ if ($avx>1);
.rva .LSEH_begin_ChaCha20_8x
.rva .LSEH_end_ChaCha20_8x
.rva .LSEH_info_ChaCha20_8x
.rva .LSEH_begin_ChaCha20_ctr32_avx2
.rva .LSEH_end_ChaCha20_ctr32_avx2
.rva .LSEH_info_ChaCha20_ctr32_avx2
___
$code.=<<___ if ($avx>2);
.rva .LSEH_begin_ChaCha20_avx512
Expand All @@ -2748,22 +2720,22 @@ sub AVX512_lane_ROUND {
$code.=<<___;
.section .xdata
.align 8
.LSEH_info_ChaCha20_ctr32:
.LSEH_info_ChaCha20_ctr32_nohw:
.byte 9,0,0,0
.rva se_handler
.LSEH_info_ChaCha20_ssse3:
.LSEH_info_ChaCha20_ctr32_ssse3:
.byte 9,0,0,0
.rva ssse3_handler
.rva .Lssse3_body,.Lssse3_epilogue
.LSEH_info_ChaCha20_4x:
.LSEH_info_ChaCha20_ctr32_ssse3_4x:
.byte 9,0,0,0
.rva full_handler
.rva .L4x_body,.L4x_epilogue
___
$code.=<<___ if ($avx>1);
.LSEH_info_ChaCha20_8x:
.LSEH_info_ChaCha20_ctr32_avx2:
.byte 9,0,0,0
.rva full_handler
.rva .L8x_body,.L8x_epilogue # HandlerData[]
Expand Down
18 changes: 18 additions & 0 deletions crypto/chacha/chacha.c
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,24 @@ static void ChaCha20_ctr32(uint8_t *out, const uint8_t *in, size_t in_len,
ChaCha20_ctr32_neon(out, in, in_len, key, counter);
return;
}
#endif
#if defined(CHACHA20_ASM_AVX2) && !defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX)
if (ChaCha20_ctr32_avx2_capable(in_len)) {
ChaCha20_ctr32_avx2(out, in, in_len, key, counter);
return;
}
#endif
#if defined(CHACHA20_ASM_SSSE3_4X)
if (ChaCha20_ctr32_ssse3_4x_capable(in_len)) {
ChaCha20_ctr32_ssse3_4x(out, in, in_len, key, counter);
return;
}
#endif
#if defined(CHACHA20_ASM_SSSE3)
if (ChaCha20_ctr32_ssse3_capable(in_len)) {
ChaCha20_ctr32_ssse3(out, in, in_len, key, counter);
return;
}
#endif
if (in_len > 0) {
ChaCha20_ctr32_nohw(out, in, in_len, key, counter);
Expand Down
15 changes: 15 additions & 0 deletions crypto/chacha/chacha_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -359,6 +359,21 @@ static void check_abi(uint8_t *out, const uint8_t *in, size_t in_len,
CHECK_ABI(ChaCha20_ctr32_neon, out, in, in_len, key, counter);
}
#endif
#if defined(CHACHA20_ASM_AVX2) && !defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX)
if (ChaCha20_ctr32_avx2_capable(in_len)) {
CHECK_ABI(ChaCha20_ctr32_avx2, out, in, in_len, key, counter);
}
#endif
#if defined(CHACHA20_ASM_SSSE3_4X)
if (ChaCha20_ctr32_ssse3_4x_capable(in_len)) {
CHECK_ABI(ChaCha20_ctr32_ssse3_4x, out, in, in_len, key, counter);
}
#endif
#if defined(CHACHA20_ASM_SSSE3)
if (ChaCha20_ctr32_ssse3_capable(in_len)) {
CHECK_ABI(ChaCha20_ctr32_ssse3, out, in, in_len, key, counter);
}
#endif
#if defined(CHACHA20_ASM_NOHW)
if (in_len > 0) {
CHECK_ABI(ChaCha20_ctr32_nohw, out, in, in_len, key, counter);
Expand Down
28 changes: 26 additions & 2 deletions crypto/chacha/internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,7 @@ extern "C" {
void CRYPTO_hchacha20(uint8_t out[32], const uint8_t key[32],
const uint8_t nonce[16]);

#if !defined(OPENSSL_NO_ASM) && \
(defined(OPENSSL_X86) || defined(OPENSSL_X86_64))
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86)

#define CHACHA20_ASM

Expand All @@ -46,6 +45,31 @@ OPENSSL_INLINE int ChaCha20_ctr32_neon_capable(size_t len) {
}
void ChaCha20_ctr32_neon(uint8_t *out, const uint8_t *in, size_t in_len,
const uint32_t key[8], const uint32_t counter[4]);
#elif !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64)
#define CHACHA20_ASM_NOHW

#define CHACHA20_ASM_AVX2
OPENSSL_INLINE int ChaCha20_ctr32_avx2_capable(size_t len) {
return (len > 128) && CRYPTO_is_AVX2_capable();
}
void ChaCha20_ctr32_avx2(uint8_t *out, const uint8_t *in, size_t in_len,
const uint32_t key[8], const uint32_t counter[4]);

#define CHACHA20_ASM_SSSE3_4X
OPENSSL_INLINE int ChaCha20_ctr32_ssse3_4x_capable(size_t len) {
int capable = (len > 128) && CRYPTO_is_SSSE3_capable();
int faster = (len > 192) || !CRYPTO_cpu_perf_is_like_silvermont();
return capable && faster;
}
void ChaCha20_ctr32_ssse3_4x(uint8_t *out, const uint8_t *in, size_t in_len,
const uint32_t key[8], const uint32_t counter[4]);

#define CHACHA20_ASM_SSSE3
OPENSSL_INLINE int ChaCha20_ctr32_ssse3_capable(size_t len) {
return (len > 128) && CRYPTO_is_SSSE3_capable();
}
void ChaCha20_ctr32_ssse3(uint8_t *out, const uint8_t *in, size_t in_len,
const uint32_t key[8], const uint32_t counter[4]);
#endif

#if defined(CHACHA20_ASM)
Expand Down
2 changes: 2 additions & 0 deletions crypto/evp_extra/evp_extra_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2073,6 +2073,8 @@ static const struct KnownKEM kKEMs[] = {
{"Kyber768r3", NID_KYBER768_R3, 1184, 2400, 1088, 32, 64, 32, "kyber/kat/kyber768r3.txt"},
{"Kyber1024r3", NID_KYBER1024_R3, 1568, 3168, 1568, 32, 64, 32, "kyber/kat/kyber1024r3.txt"},
{"MLKEM512IPD", NID_MLKEM512IPD, 800, 1632, 768, 32, 64, 32, "ml_kem/kat/mlkem512ipd.txt"},
{"MLKEM768IPD", NID_MLKEM768IPD, 1184, 2400, 1088, 32, 64, 32, "ml_kem/kat/mlkem768ipd.txt"},
{"MLKEM1024IPD", NID_MLKEM1024IPD, 1568, 3168, 1568, 32, 64, 32, "ml_kem/kat/mlkem1024ipd.txt"},
};

class PerKEMTest : public testing::TestWithParam<KnownKEM> {};
Expand Down
6 changes: 4 additions & 2 deletions crypto/fipsmodule/cpucap/cpu_intel.c
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,8 @@ void OPENSSL_cpuid_setup(void) {

// Clear the XSAVE bit on Knights Landing to mimic Silvermont. This enables
// some Silvermont-specific codepaths which perform better. See OpenSSL
// commit 64d92d74985ebb3d0be58a9718f9e080a14a8e7f.
// commit 64d92d74985ebb3d0be58a9718f9e080a14a8e7f and
// |CRYPTO_cpu_perf_is_like_silvermont|.
if ((eax & 0x0fff0ff0) == 0x00050670 /* Knights Landing */ ||
(eax & 0x0fff0ff0) == 0x00080650 /* Knights Mill (per SDE) */) {
ecx &= ~(1u << 26);
Expand All @@ -267,7 +268,8 @@ void OPENSSL_cpuid_setup(void) {
// Clear AVX2 and AVX512* bits.
//
// TODO(davidben): Should bits 17 and 26-28 also be cleared? Upstream
// doesn't clear those.
// doesn't clear those. See the comments in
// |CRYPTO_hardware_supports_XSAVE|.
extended_features[0] &=
~((1u << 5) | (1u << 16) | (1u << 21) | (1u << 30) | (1u << 31));
}
Expand Down
Loading

0 comments on commit 18661be

Please sign in to comment.