Skip to content

Commit

Permalink
Merge branch 'master' of github.com:filecoin-project/neptune into hal…
Browse files Browse the repository at this point in the history
…o2-pick-master-changes
  • Loading branch information
storojs72 committed Jan 13, 2023
2 parents fb74454 + 4e29c4c commit 1c7a4c9
Show file tree
Hide file tree
Showing 22 changed files with 454 additions and 292 deletions.
6 changes: 3 additions & 3 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ jobs:
RUST_LOG: debug
# Build the kernel only for the single architecture that is used on CI. This should reduce
# the overall compile-time significantly.
NEPTUNE_CUDA_NVCC_ARGS: --fatbin --gpu-architecture=sm_75 --generate-code=arch=compute_75,code=sm_75
EC_GPU_CUDA_NVCC_ARGS: --fatbin --gpu-architecture=sm_75 --generate-code=arch=compute_75,code=sm_75
steps:
- set-env-path
- install-gpu-deps
Expand Down Expand Up @@ -122,7 +122,7 @@ jobs:
RUST_LOG: debug
# Build the kernel only for the single architecture that is used on CI. This should reduce
# the overall compile-time significantly.
NEPTUNE_CUDA_NVCC_ARGS: --fatbin --gpu-architecture=sm_75 --generate-code=arch=compute_75,code=sm_75
EC_GPU_CUDA_NVCC_ARGS: --fatbin --gpu-architecture=sm_75 --generate-code=arch=compute_75,code=sm_75
steps:
- set-env-path
- install-gpu-deps
Expand All @@ -136,7 +136,7 @@ jobs:
RUST_LOG: debug
# Build the kernel only for the single architecture that is used on CI. This should reduce
# the overall compile-time significantly.
NEPTUNE_CUDA_NVCC_ARGS: --fatbin --gpu-architecture=sm_75 --generate-code=arch=compute_75,code=sm_75
EC_GPU_CUDA_NVCC_ARGS: --fatbin --gpu-architecture=sm_75 --generate-code=arch=compute_75,code=sm_75
steps:
- set-env-path
- install-gpu-deps
Expand Down
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,13 @@ and this project adheres to [Semantic Versioning](https://book.async.rs/overview

## Unreleased

## 8.1.0
- Implement serde for PoseidonConstants (https://github.com/filecoin-project/neptune/pull/165)

## 8.0.0
- fix: update to newer ec-gpu version (https://github.com/filecoin-project/neptune/pull/164)
- chore: fix CI MacOS build (https://github.com/filecoin-project/neptune/pull/161)

## 7.2.0 - 2022-8-9
- Change write_rate_element to add_rate_element to agree with spec.

Expand Down
35 changes: 16 additions & 19 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[package]
name = "neptune"
description = "Poseidon hashing over BLS12-381 for Filecoin."
version = "7.2.0"
version = "8.1.0"
authors = ["porcuquine <[email protected]>"]
edition = "2018"
license = "MIT OR Apache-2.0"
Expand All @@ -10,39 +10,37 @@ resolver = "2"

[dependencies]
lazy_static = "1.4.0"
bellperson = { version = "0.22", default-features = false }
bellperson = { version = "0.24", default-features = false }
blake2s_simd = "0.5"
blstrs = { version = "0.5.0", optional = true }
blstrs = { version = "0.6.0", optional = true }
byteorder = "1"
ec-gpu = { version = "0.1.0", optional = true }
ec-gpu-gen = { version = "0.3.0", default-features = false, optional = true }
ec-gpu = { version = "0.2.0", optional = true }
ec-gpu-gen = { version = "0.5.0", optional = true }
ff = "0.12.0"
generic-array = "0.14.4"
itertools = { version = "0.8.0" }
log = "0.4.8"
halo2_proofs = "0.2.0"
# `halo2_proofs` does not expose the feature `pasta_curves/gpu`, thus we must also import
# `pasta_curves/gpu` when building with `neptune/pasta`.
pasta_curves = { version = "0.4.0", optional = true }
pasta_curves = { version = "0.5.1", features = ["serde"], package = "fil_pasta_curves", optional = true }
rust-gpu-tools = { version = "0.5.0", default-features = false, optional = true }
trait-set = "0.3.0"
serde = { version = "1.0", features = ["derive"] }

[dev-dependencies]
blstrs = { version = "0.5.0" }
blstrs = "0.6.1"
criterion = "0.3"
rand = "0.8.0"
sha2 = "0.9"
tempdir = "0.3"
rand_xorshift = "0.3.0"
serde_json = "1.0.53"
sha2 = "0.9"

[build-dependencies]
blstrs = "0.5.0"
ec-gpu = { version = "0.1.0", optional = true }
ec-gpu-gen = { version = "0.3.0", default-features = false, optional = true }
execute = "0.2.9"
hex = "0.4"
pasta_curves = "0.4.0"
sha2 = "0.9"
blstrs = "0.6.1"
ec-gpu = { version = "0.2.0", optional = true }
ec-gpu-gen = { version = "0.5.0", optional = true }
pasta_curves = { version = "0.5.1", package = "fil_pasta_curves" }

[[bench]]
name = "hash"
Expand All @@ -58,8 +56,8 @@ codegen-units = 1

[features]
default = [ "bellperson/default" ]
cuda = ["rust-gpu-tools/cuda", "blstrs/gpu", "ec-gpu-gen/cuda", "ec-gpu"]
opencl = ["rust-gpu-tools/opencl", "blstrs/gpu", "ec-gpu-gen/opencl", "ec-gpu"]
cuda = ["ec-gpu-gen/cuda", "ec-gpu", "pasta_curves/gpu"]
opencl = ["ec-gpu-gen/opencl", "ec-gpu", "pasta_curves/gpu"]
# The supported arities for Poseidon running on the GPU are specified at compile-time.
arity2 = []
arity4 = []
Expand All @@ -72,7 +70,6 @@ arity36 = []
strengthened = []
# Compiles the "even-partial" strength GPU kernel.
even-partial = []
wasm = [ "bellperson/wasm" ]
# The supported fields for Poseidon running on the GPU are specified at compile-time.
bls = ["blstrs/gpu"]
pasta = ["pasta_curves/gpu", "even-partial"]
Expand Down
12 changes: 4 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,18 +35,14 @@ If you make changes to the spec in `neptune`, you must make those same changes t

## Environment variables

- `NEPTUNE_DEFAULT_GPU=<unique-id>` allows you to select the default GPU that tree-builder is going to run on given its unique ID.
- `EC_GPU_FRAMEWORK=<cuda | opencl>` allows to select whether the CUDA or OpenCL implementation should be used. If not set, `cuda` will be used if available.

(The unique ID is the UUID or the hexadecimal Bus-ID that can be found through `nvidia-smi`, `rocm-smi`, `lspci` and etc.)
- `EC_GPU_CUDA_NVCC_ARGS`

- `NEPTUNE_GPU_FRAMEWORK=<cuda | opencl>` allows to select whether the CUDA or OpenCL implementation should be used. If not set, `cuda` will be used if available.

- `NEPTUNE_CUDA_NVCC_ARGS`

By default the CUDA kernel is compiled for several architectures, which may take a long time. `BELLMAN_CUDA_NVCC_ARGS` can be used to override those arguments. The input and output file will still be automatically set.
By default the CUDA kernel is compiled for several architectures, which may take a long time. `EC_GPU_CUDA_NVCC_ARGS` can be used to override those arguments. The input and output file will still be automatically set.

// Example for compiling the kernel for only the Turing architecture
NEPTUNE_CUDA_NVCC_ARGS="--fatbin --gpu-architecture=sm_75 --generate-code=arch=compute_75,code=sm_75"
EC_GPU_CUDA_NVCC_ARGS="--fatbin --gpu-architecture=sm_75 --generate-code=arch=compute_75,code=sm_75"

## Rust feature flags

Expand Down
83 changes: 6 additions & 77 deletions build.rs
Original file line number Diff line number Diff line change
@@ -1,84 +1,13 @@
/// The build script is needed to compile the CUDA kernel.

#[cfg(feature = "cuda")]
/// The build script is used to generate the CUDA kernel and OpenCL source at compile-time, if the
/// `cuda` and/or `opencl` feature is enabled.
#[cfg(any(feature = "cuda", feature = "opencl"))]
fn main() {
use std::path::PathBuf;
use std::process::Command;
use std::{env, fs};

use ec_gpu_gen::Limb32;
use sha2::{Digest, Sha256};

#[path = "src/proteus/sources.rs"]
mod sources;

let kernel_source = sources::generate_program::<Limb32>();
let out_dir = env::var("OUT_DIR").expect("OUT_DIR was not set.");

// Make it possible to override the default options. Though the source and output file is
// always set automatically.
let mut nvcc = match env::var("NEPTUNE_CUDA_NVCC_ARGS") {
Ok(args) => execute::command(format!("nvcc {}", args)),
Err(_) => {
let mut command = Command::new("nvcc");
command
.arg("--optimize=6")
// Compile with as many threads as CPUs are available.
.arg("--threads=0")
.arg("--fatbin")
.arg("--gpu-architecture=sm_86")
.arg("--generate-code=arch=compute_86,code=sm_86")
.arg("--generate-code=arch=compute_80,code=sm_80")
.arg("--generate-code=arch=compute_75,code=sm_75");
command
}
};

// Hash the source and and the compile flags. Use that as the filename, so that the kernel is
// only rebuilt if any of them change.
let mut hasher = Sha256::new();
hasher.update(kernel_source.as_bytes());
hasher.update(&format!("{:?}", &nvcc));
let kernel_digest = hex::encode(hasher.finalize());

let source_path: PathBuf = [&out_dir, &format!("{}.cu", &kernel_digest)]
.iter()
.collect();
let fatbin_path: PathBuf = [&out_dir, &format!("{}.fatbin", &kernel_digest)]
.iter()
.collect();

fs::write(&source_path, &kernel_source).unwrap_or_else(|_| {
panic!(
"Cannot write kernel source at {}.",
source_path.to_str().unwrap()
)
});

// Only compile if the output doesn't exist yet.
if !fatbin_path.as_path().exists() {
let status = nvcc
.arg("--output-file")
.arg(&fatbin_path)
.arg(&source_path)
.status()
.expect("Cannot run nvcc.");

if !status.success() {
panic!(
"nvcc failed. See the kernel source at {}",
source_path.to_str().unwrap()
);
}
}

// The idea to put the path to the farbin into a compile-time env variable is from
// https://github.com/LutzCle/fast-interconnects-demo/blob/b80ea8e04825167f486ab8ac1b5d67cf7dd51d2c/rust-demo/build.rs
println!(
"cargo:rustc-env=CUDA_FATBIN={}",
fatbin_path.to_str().unwrap()
);
let source_builder = sources::generate_program();
ec_gpu_gen::generate(&source_builder);
}

#[cfg(not(feature = "cuda"))]
#[cfg(not(any(feature = "cuda", feature = "opencl")))]
fn main() {}
14 changes: 7 additions & 7 deletions gbench/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,21 +8,21 @@ license = "MIT OR Apache-2.0"

[dependencies]
lazy_static = "1.4.0"
bellperson = { version = "0.22.0", default-features = false }
bellperson = { version = "0.24.0", default-features = false }
blake2s_simd = "0.5"
byteorder = "1"
env_logger = "0.7.1"
ff = "0.12.0"
generic-array = "0.14.4"
log = "0.4.8"
neptune = { path = "../", default-features = false, features = ["arity8", "arity11", "bls", "pasta"] }
rust-gpu-tools = { version = "0.5.0", default-features = false, optional = true }
structopt = { version = "0.3", default-features = false }
blstrs = { version = "0.5.0", features = ["gpu"] }
pasta_curves = { version = "0.4.0", features = ["gpu"] }
ec-gpu = "0.1.0"
blstrs = { version = "0.6.1", features = ["gpu"] }
pasta_curves = { version = "0.5.1", features = ["gpu"], package = "fil_pasta_curves" }
ec-gpu = "0.2.0"
ec-gpu-gen = "0.5.0"

[features]
default = ["opencl"]
cuda = ["neptune/cuda", "bellperson/cuda", "rust-gpu-tools/cuda"]
opencl = ["neptune/opencl", "bellperson/opencl", "rust-gpu-tools/opencl"]
cuda = ["neptune/cuda", "bellperson/cuda", "ec-gpu-gen/cuda"]
opencl = ["neptune/opencl", "bellperson/opencl", "ec-gpu-gen/opencl"]
6 changes: 3 additions & 3 deletions gbench/src/main.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use blstrs::Scalar as Fr;
use ec_gpu::GpuField;
use ec_gpu::GpuName;
use ec_gpu_gen::rust_gpu_tools::{Device, UniqueId};
use ff::PrimeField;
use generic_array::sequence::GenericSequence;
use generic_array::typenum::{U11, U8};
Expand All @@ -8,14 +9,13 @@ use log::info;
use neptune::column_tree_builder::{ColumnTreeBuilder, ColumnTreeBuilderTrait};
use neptune::{batch_hasher::Batcher, BatchHasher};
use pasta_curves::{Fp, Fq as Fv};
use rust_gpu_tools::{Device, UniqueId};
use std::convert::TryFrom;
use std::str::FromStr;
use std::thread;
use std::time::Instant;
use structopt::StructOpt;

fn bench_column_building<F: PrimeField + GpuField>(
fn bench_column_building<F: PrimeField + GpuName>(
device: &Device,
log_prefix: &str,
max_column_batch_size: usize,
Expand Down
2 changes: 1 addition & 1 deletion rust-toolchain
Original file line number Diff line number Diff line change
@@ -1 +1 @@
1.56.1
1.57.0
19 changes: 8 additions & 11 deletions src/batch_hasher.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,28 +6,25 @@ use crate::error::{ClError, Error};
use crate::poseidon::SimplePoseidonBatchHasher;
#[cfg(any(feature = "cuda", feature = "opencl"))]
use crate::proteus::gpu::ClBatchHasher;
use crate::{Arity, BatchHasher, Strength, DEFAULT_STRENGTH};
#[cfg(any(feature = "cuda", feature = "opencl"))]
use ec_gpu::GpuField;
use crate::{Arity, BatchHasher, NeptuneField, Strength, DEFAULT_STRENGTH};
use ec_gpu_gen::rust_gpu_tools::Device;
use ff::PrimeField;
use generic_array::GenericArray;
use rust_gpu_tools::Device;

pub enum Batcher<F, A>
where
F: PrimeField,
F: NeptuneField,
A: Arity<F>,
{
Cpu(SimplePoseidonBatchHasher<F, A>),
#[cfg(any(feature = "cuda", feature = "opencl"))]
OpenCl(ClBatchHasher<F, A>),
}

impl<
#[cfg(not(any(feature = "cuda", feature = "opencl")))] F: PrimeField,
#[cfg(any(feature = "cuda", feature = "opencl"))] F: PrimeField + GpuField,
A: Arity<F>,
> Batcher<F, A>
impl<F, A> Batcher<F, A>
where
F: NeptuneField,
A: Arity<F>,
{
/// Create a new CPU batcher.
pub fn new_cpu(max_batch_size: usize) -> Self {
Expand Down Expand Up @@ -100,7 +97,7 @@ where

impl<F, A> BatchHasher<F, A> for Batcher<F, A>
where
F: PrimeField,
F: NeptuneField,
A: Arity<F>,
{
fn hash(&mut self, preimages: &[GenericArray<F, A>]) -> Result<Vec<F>, Error> {
Expand Down
9 changes: 5 additions & 4 deletions src/column_tree_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@ use crate::batch_hasher::Batcher;
use crate::error::Error;
use crate::poseidon::{Poseidon, PoseidonConstants};
use crate::tree_builder::{TreeBuilder, TreeBuilderTrait};
use crate::{Arity, BatchHasher, Strength};
use crate::{Arity, BatchHasher, Strength, NeptuneField};

use ff::{Field, PrimeField};
use generic_array::{ArrayLength, GenericArray};

Expand All @@ -23,7 +24,7 @@ where

pub struct ColumnTreeBuilder<F, ColumnArity, TreeArity>
where
F: PrimeField,
F: NeptuneField,
ColumnArity: Arity<F>,
TreeArity: Arity<F>,
{
Expand All @@ -39,7 +40,7 @@ where
impl<F, ColumnArity, TreeArity> ColumnTreeBuilderTrait<F, ColumnArity, TreeArity>
for ColumnTreeBuilder<F, ColumnArity, TreeArity>
where
F: PrimeField,
F: NeptuneField,
ColumnArity: Arity<F>,
TreeArity: Arity<F>,
{
Expand Down Expand Up @@ -104,7 +105,7 @@ fn as_generic_arrays<A: Arity<F>, F: PrimeField>(vec: &[F]) -> &[GenericArray<F,

impl<F, ColumnArity, TreeArity> ColumnTreeBuilder<F, ColumnArity, TreeArity>
where
F: PrimeField,
F: NeptuneField,
ColumnArity: Arity<F>,
TreeArity: Arity<F>,
{
Expand Down
11 changes: 9 additions & 2 deletions src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,12 +54,19 @@ pub enum Error {
}

#[cfg(any(feature = "cuda", feature = "opencl"))]
impl From<rust_gpu_tools::GPUError> for Error {
fn from(e: rust_gpu_tools::GPUError) -> Self {
impl From<ec_gpu_gen::rust_gpu_tools::GPUError> for Error {
fn from(e: ec_gpu_gen::rust_gpu_tools::GPUError) -> Self {
Self::GpuError(format!("GPU tools error: {}", e))
}
}

#[cfg(any(feature = "cuda", feature = "opencl"))]
impl From<ec_gpu_gen::EcError> for Error {
fn from(e: ec_gpu_gen::EcError) -> Self {
Self::GpuError(format!("EC-GPU error: {}", e))
}
}

impl error::Error for Error {}

impl fmt::Display for Error {
Expand Down
Loading

0 comments on commit 1c7a4c9

Please sign in to comment.