From 77b606f91ac3ed7759fdbb13158314d8e79792ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Bylica?= Date: Mon, 7 Dec 2020 23:28:50 +0100 Subject: [PATCH 1/6] Provide portable __has_attribute() macro --- lib/support/attributes.h | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/lib/support/attributes.h b/lib/support/attributes.h index b8913c42..5195c62d 100644 --- a/lib/support/attributes.h +++ b/lib/support/attributes.h @@ -4,22 +4,23 @@ #pragma once +// Provide __has_attribute macro if not defined. +#ifndef __has_attribute +#define __has_attribute(name) 0 +#endif + // [[always_inline]] #if _MSC_VER #define ALWAYS_INLINE __forceinline -#elif defined(__has_attribute) -#if __has_attribute(always_inline) +#elif __has_attribute(always_inline) #define ALWAYS_INLINE __attribute__((always_inline)) -#endif -#endif -#if !defined(ALWAYS_INLINE) +#else #define ALWAYS_INLINE #endif // [[no_sanitize()]] #if __clang__ -#define NO_SANITIZE(sanitizer) \ - __attribute__((no_sanitize(sanitizer))) +#define NO_SANITIZE(sanitizer) __attribute__((no_sanitize(sanitizer))) #else #define NO_SANITIZE(sanitizer) #endif From 4cb8399b91ee54c18950aa1df21988701bece78c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Bylica?= Date: Mon, 7 Dec 2020 23:04:55 +0100 Subject: [PATCH 2/6] keccak: Move Keccak-f[1600] implementation to a static inline function --- lib/keccak/keccakf1600.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/lib/keccak/keccakf1600.c b/lib/keccak/keccakf1600.c index 63713526..68bb55bf 100644 --- a/lib/keccak/keccakf1600.c +++ b/lib/keccak/keccakf1600.c @@ -2,6 +2,7 @@ // Copyright 2018-2019 Pawel Bylica. // Licensed under the Apache License, Version 2.0. +#include "../support/attributes.h" #include /// Rotates the bits of x left by the count value specified by s. @@ -43,7 +44,7 @@ static const uint64_t round_constants[24] = { /// The implementation based on: /// - "simple" implementation by Ronny Van Keer, included in "Reference and optimized code in C", /// https://keccak.team/archives.html, CC0-1.0 / Public Domain. -void ethash_keccakf1600(uint64_t state[25]) +static inline ALWAYS_INLINE void keccakf1600_implementation(uint64_t state[25]) { uint64_t Aba, Abe, Abi, Abo, Abu; uint64_t Aga, Age, Agi, Ago, Agu; @@ -255,3 +256,8 @@ void ethash_keccakf1600(uint64_t state[25]) state[23] = Aso; state[24] = Asu; } + +void ethash_keccakf1600(uint64_t state[25]) +{ + keccakf1600_implementation(state); +} From 5419a2c87ac96afd88c8b78184718db3c247220c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Bylica?= Date: Mon, 7 Dec 2020 23:52:35 +0100 Subject: [PATCH 3/6] keccak: Add optimized implementation for BMI+BMI2 --- include/ethash/keccak.h | 4 ++++ lib/keccak/keccakf1600.c | 7 +++++++ test/benchmarks/keccak_benchmarks.cpp | 18 ++++++++++-------- 3 files changed, 21 insertions(+), 8 deletions(-) diff --git a/include/ethash/keccak.h b/include/ethash/keccak.h index 873b289f..b3feadb9 100644 --- a/include/ethash/keccak.h +++ b/include/ethash/keccak.h @@ -29,6 +29,10 @@ extern "C" { */ void ethash_keccakf1600(uint64_t state[25]) NOEXCEPT; +/// Variant of ethash_keccakf1600() with additional optimization provided by BMI and BMI2 +/// instruction set extensions. May only be used on hardware supporting these extensions. +void ethash_keccakf1600_bmi(uint64_t state[25]) NOEXCEPT; + /** * The Keccak-f[800] function. * diff --git a/lib/keccak/keccakf1600.c b/lib/keccak/keccakf1600.c index 68bb55bf..a9d08609 100644 --- a/lib/keccak/keccakf1600.c +++ b/lib/keccak/keccakf1600.c @@ -261,3 +261,10 @@ void ethash_keccakf1600(uint64_t state[25]) { keccakf1600_implementation(state); } + +#if defined(__x86_64__) && __has_attribute(target) +__attribute__((target("bmi,bmi2"))) void ethash_keccakf1600_bmi(uint64_t state[25]) +{ + keccakf1600_implementation(state); +} +#endif diff --git a/test/benchmarks/keccak_benchmarks.cpp b/test/benchmarks/keccak_benchmarks.cpp index 776fcd17..447ab1a0 100644 --- a/test/benchmarks/keccak_benchmarks.cpp +++ b/test/benchmarks/keccak_benchmarks.cpp @@ -3,11 +3,9 @@ // Licensed under the Apache License, Version 2.0. #include "keccak_utils.hpp" - -#include -#include - +#include "support/attributes.h" #include +#include void fake_keccakf1600(uint64_t* state) noexcept @@ -17,17 +15,21 @@ void fake_keccakf1600(uint64_t* state) noexcept } +template static void keccakf1600(benchmark::State& state) { uint64_t keccak_state[25] = {}; for (auto _ : state) { - ethash_keccakf1600(keccak_state); + Fn(keccak_state); benchmark::DoNotOptimize(keccak_state); } } -BENCHMARK(keccakf1600); +BENCHMARK_TEMPLATE(keccakf1600, ethash_keccakf1600); +#if defined(__x86_64__) && __has_attribute(target) +BENCHMARK_TEMPLATE(keccakf1600, ethash_keccakf1600_bmi); +#endif static void keccakf800(benchmark::State& state) @@ -71,7 +73,7 @@ static void keccak512(benchmark::State& state) BENCHMARK(keccak512)->Arg(32)->Arg(64)->Arg(71)->Arg(72)->Arg(142)->Arg(143)->Arg(144); -template +template static void fake_keccak256(benchmark::State& state) { std::vector data(static_cast(state.range(0)), 0xaa); @@ -88,7 +90,7 @@ BENCHMARK_TEMPLATE(fake_keccak256, fake_keccak256_default)->Arg(128)->Arg(17 * 8 BENCHMARK_TEMPLATE(fake_keccak256, fake_keccak256_fastest)->Arg(128)->Arg(17 * 8)->Arg(4096)->Arg(16 * 1024); -template +template static void fake_keccak256_unaligned(benchmark::State& state) { const auto size = static_cast(state.range(0)); From 56a7d0bfc4c96e475dc50e6757699dacd800e188 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Bylica?= Date: Tue, 8 Dec 2020 00:42:30 +0100 Subject: [PATCH 4/6] keccak: Change ethash_keccakf1600 to function pointer --- include/ethash/keccak.h | 7 ++++++- lib/keccak/keccakf1600.c | 4 +++- test/benchmarks/keccak_benchmarks.cpp | 7 ++++++- 3 files changed, 15 insertions(+), 3 deletions(-) diff --git a/include/ethash/keccak.h b/include/ethash/keccak.h index b3feadb9..47d4eb0c 100644 --- a/include/ethash/keccak.h +++ b/include/ethash/keccak.h @@ -19,6 +19,11 @@ extern "C" { #endif +typedef void (*ethash_keccakf1600_func)(uint64_t[25]); + +/// The pointer to the Keccak-f[1600] function implementation. +extern ethash_keccakf1600_func ethash_keccakf1600; + /** * The Keccak-f[1600] function. * @@ -27,7 +32,7 @@ extern "C" { * * @param state The state of 25 64-bit words on which the permutation is to be performed. */ -void ethash_keccakf1600(uint64_t state[25]) NOEXCEPT; +void ethash_keccakf1600_generic(uint64_t state[25]) NOEXCEPT; /// Variant of ethash_keccakf1600() with additional optimization provided by BMI and BMI2 /// instruction set extensions. May only be used on hardware supporting these extensions. diff --git a/lib/keccak/keccakf1600.c b/lib/keccak/keccakf1600.c index a9d08609..1ef41d66 100644 --- a/lib/keccak/keccakf1600.c +++ b/lib/keccak/keccakf1600.c @@ -257,11 +257,13 @@ static inline ALWAYS_INLINE void keccakf1600_implementation(uint64_t state[25]) state[24] = Asu; } -void ethash_keccakf1600(uint64_t state[25]) +void ethash_keccakf1600_generic(uint64_t state[25]) { keccakf1600_implementation(state); } +ethash_keccakf1600_func ethash_keccakf1600 = ethash_keccakf1600_generic; + #if defined(__x86_64__) && __has_attribute(target) __attribute__((target("bmi,bmi2"))) void ethash_keccakf1600_bmi(uint64_t state[25]) { diff --git a/test/benchmarks/keccak_benchmarks.cpp b/test/benchmarks/keccak_benchmarks.cpp index 447ab1a0..3ca66737 100644 --- a/test/benchmarks/keccak_benchmarks.cpp +++ b/test/benchmarks/keccak_benchmarks.cpp @@ -14,6 +14,10 @@ void fake_keccakf1600(uint64_t* state) noexcept (void)state; } +inline void best(uint64_t state[25]) noexcept +{ + ethash_keccakf1600(state); +} template static void keccakf1600(benchmark::State& state) @@ -26,10 +30,11 @@ static void keccakf1600(benchmark::State& state) benchmark::DoNotOptimize(keccak_state); } } -BENCHMARK_TEMPLATE(keccakf1600, ethash_keccakf1600); +BENCHMARK_TEMPLATE(keccakf1600, ethash_keccakf1600_generic); #if defined(__x86_64__) && __has_attribute(target) BENCHMARK_TEMPLATE(keccakf1600, ethash_keccakf1600_bmi); #endif +BENCHMARK_TEMPLATE(keccakf1600, best); static void keccakf800(benchmark::State& state) From 200bb06007e652f967eb8f2f6dd1b6d786ff0347 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Bylica?= Date: Tue, 8 Dec 2020 00:45:57 +0100 Subject: [PATCH 5/6] keccak: Select best implementation at startup --- lib/keccak/keccakf1600.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/lib/keccak/keccakf1600.c b/lib/keccak/keccakf1600.c index 1ef41d66..f95d79c8 100644 --- a/lib/keccak/keccakf1600.c +++ b/lib/keccak/keccakf1600.c @@ -269,4 +269,10 @@ __attribute__((target("bmi,bmi2"))) void ethash_keccakf1600_bmi(uint64_t state[2 { keccakf1600_implementation(state); } + +__attribute__((constructor)) static void select_keccakf1600_implementation() +{ + if (__builtin_cpu_supports("bmi2")) + ethash_keccakf1600 = ethash_keccakf1600_bmi; +} #endif From 1e0b5b22d7c5e189b3cb839903abef0bb2430032 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Bylica?= Date: Tue, 8 Dec 2020 10:04:56 +0100 Subject: [PATCH 6/6] keccak: Use size_t for round index --- lib/keccak/keccakf1600.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/keccak/keccakf1600.c b/lib/keccak/keccakf1600.c index f95d79c8..c9dcccba 100644 --- a/lib/keccak/keccakf1600.c +++ b/lib/keccak/keccakf1600.c @@ -88,7 +88,7 @@ static inline ALWAYS_INLINE void keccakf1600_implementation(uint64_t state[25]) Aso = state[23]; Asu = state[24]; - for (int round = 0; round < 24; round += 2) + for (size_t round = 0; round < 24; round += 2) { /* Round (round + 0): Axx -> Exx */