Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

keccak: Optimize Keccak with BMI extension #162

Merged
merged 6 commits into from
Dec 8, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion include/ethash/keccak.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,11 @@
extern "C" {
#endif

typedef void (*ethash_keccakf1600_func)(uint64_t[25]);

/// The pointer to the Keccak-f[1600] function implementation.
extern ethash_keccakf1600_func ethash_keccakf1600;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe instead of exposing global variable it would look simpler to have another wrapper function like ethash_keccakf1600_best or something

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Or just ethash_keccakf1600 that would call ethash_keccakf1600_best function pointer inside

Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I considered this, but that mean call + indirect call instead of just indirect call.
Anyway, don't want to expose keccakf1600 any more. It is convenient for benchmarking only so I will either expose these under some macro KECCAK_EXPORT_INTERNALS or will just use keccak256(nullptr, 0). I'm not sure why I exposed these in the first place.

But I don't want to do it here.


/**
* The Keccak-f[1600] function.
*
Expand All @@ -27,7 +32,11 @@ extern "C" {
*
* @param state The state of 25 64-bit words on which the permutation is to be performed.
*/
void ethash_keccakf1600(uint64_t state[25]) NOEXCEPT;
void ethash_keccakf1600_generic(uint64_t state[25]) NOEXCEPT;

/// Variant of ethash_keccakf1600() with additional optimization provided by BMI and BMI2
/// instruction set extensions. May only be used on hardware supporting these extensions.
void ethash_keccakf1600_bmi(uint64_t state[25]) NOEXCEPT;

/**
* The Keccak-f[800] function.
Expand Down
25 changes: 23 additions & 2 deletions lib/keccak/keccakf1600.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
// Copyright 2018-2019 Pawel Bylica.
// Licensed under the Apache License, Version 2.0.

#include "../support/attributes.h"
#include <ethash/keccak.h>

/// Rotates the bits of x left by the count value specified by s.
Expand Down Expand Up @@ -43,7 +44,7 @@ static const uint64_t round_constants[24] = {
/// The implementation based on:
/// - "simple" implementation by Ronny Van Keer, included in "Reference and optimized code in C",
/// https://keccak.team/archives.html, CC0-1.0 / Public Domain.
void ethash_keccakf1600(uint64_t state[25])
static inline ALWAYS_INLINE void keccakf1600_implementation(uint64_t state[25])
{
uint64_t Aba, Abe, Abi, Abo, Abu;
uint64_t Aga, Age, Agi, Ago, Agu;
Expand Down Expand Up @@ -87,7 +88,7 @@ void ethash_keccakf1600(uint64_t state[25])
Aso = state[23];
Asu = state[24];

for (int round = 0; round < 24; round += 2)
for (size_t round = 0; round < 24; round += 2)
{
/* Round (round + 0): Axx -> Exx */

Expand Down Expand Up @@ -255,3 +256,23 @@ void ethash_keccakf1600(uint64_t state[25])
state[23] = Aso;
state[24] = Asu;
}

void ethash_keccakf1600_generic(uint64_t state[25])
gumb0 marked this conversation as resolved.
Show resolved Hide resolved
{
keccakf1600_implementation(state);
}

ethash_keccakf1600_func ethash_keccakf1600 = ethash_keccakf1600_generic;

#if defined(__x86_64__) && __has_attribute(target)
__attribute__((target("bmi,bmi2"))) void ethash_keccakf1600_bmi(uint64_t state[25])
{
keccakf1600_implementation(state);
}

__attribute__((constructor)) static void select_keccakf1600_implementation()
{
if (__builtin_cpu_supports("bmi2"))
ethash_keccakf1600 = ethash_keccakf1600_bmi;
}
#endif
15 changes: 8 additions & 7 deletions lib/support/attributes.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,22 +4,23 @@

#pragma once

// Provide __has_attribute macro if not defined.
#ifndef __has_attribute
#define __has_attribute(name) 0
#endif

// [[always_inline]]
#if _MSC_VER
#define ALWAYS_INLINE __forceinline
#elif defined(__has_attribute)
#if __has_attribute(always_inline)
#elif __has_attribute(always_inline)
#define ALWAYS_INLINE __attribute__((always_inline))
#endif
#endif
#if !defined(ALWAYS_INLINE)
#else
#define ALWAYS_INLINE
#endif

// [[no_sanitize()]]
#if __clang__
#define NO_SANITIZE(sanitizer) \
__attribute__((no_sanitize(sanitizer)))
#define NO_SANITIZE(sanitizer) __attribute__((no_sanitize(sanitizer)))
#else
#define NO_SANITIZE(sanitizer)
#endif
23 changes: 15 additions & 8 deletions test/benchmarks/keccak_benchmarks.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,9 @@
// Licensed under the Apache License, Version 2.0.

#include "keccak_utils.hpp"

#include <ethash/keccak.h>
#include <ethash/keccak.hpp>

#include "support/attributes.h"
#include <benchmark/benchmark.h>
#include <ethash/keccak.h>


void fake_keccakf1600(uint64_t* state) noexcept
Expand All @@ -16,18 +14,27 @@ void fake_keccakf1600(uint64_t* state) noexcept
(void)state;
}

inline void best(uint64_t state[25]) noexcept
{
ethash_keccakf1600(state);
}

template <void Fn(uint64_t*)>
static void keccakf1600(benchmark::State& state)
{
uint64_t keccak_state[25] = {};

for (auto _ : state)
{
ethash_keccakf1600(keccak_state);
Fn(keccak_state);
benchmark::DoNotOptimize(keccak_state);
}
}
BENCHMARK(keccakf1600);
BENCHMARK_TEMPLATE(keccakf1600, ethash_keccakf1600_generic);
#if defined(__x86_64__) && __has_attribute(target)
BENCHMARK_TEMPLATE(keccakf1600, ethash_keccakf1600_bmi);
#endif
BENCHMARK_TEMPLATE(keccakf1600, best);


static void keccakf800(benchmark::State& state)
Expand Down Expand Up @@ -71,7 +78,7 @@ static void keccak512(benchmark::State& state)
BENCHMARK(keccak512)->Arg(32)->Arg(64)->Arg(71)->Arg(72)->Arg(142)->Arg(143)->Arg(144);


template<void keccak_fn(uint64_t*, const uint8_t*, size_t)>
template <void keccak_fn(uint64_t*, const uint8_t*, size_t)>
static void fake_keccak256(benchmark::State& state)
{
std::vector<uint8_t> data(static_cast<size_t>(state.range(0)), 0xaa);
Expand All @@ -88,7 +95,7 @@ BENCHMARK_TEMPLATE(fake_keccak256, fake_keccak256_default)->Arg(128)->Arg(17 * 8
BENCHMARK_TEMPLATE(fake_keccak256, fake_keccak256_fastest)->Arg(128)->Arg(17 * 8)->Arg(4096)->Arg(16 * 1024);


template<void keccak_fn(uint64_t*, const uint8_t*, size_t)>
template <void keccak_fn(uint64_t*, const uint8_t*, size_t)>
static void fake_keccak256_unaligned(benchmark::State& state)
{
const auto size = static_cast<size_t>(state.range(0));
Expand Down