Merge branch 'master' of github.com:filecoin-project/neptune into hal…

…o2-pick-master-changes
argumentcomputer · Jan 13, 2023 · 1c7a4c9 · 1c7a4c9
2 parents fb74454 + 4e29c4c
commit 1c7a4c9
Show file tree

Hide file tree

Showing 22 changed files with 454 additions and 292 deletions.
diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -89,7 +89,7 @@ jobs:
       RUST_LOG: debug
       # Build the kernel only for the single architecture that is used on CI. This should reduce
       # the overall compile-time significantly.
-      NEPTUNE_CUDA_NVCC_ARGS: --fatbin --gpu-architecture=sm_75 --generate-code=arch=compute_75,code=sm_75
+      EC_GPU_CUDA_NVCC_ARGS: --fatbin --gpu-architecture=sm_75 --generate-code=arch=compute_75,code=sm_75
     steps:
       - set-env-path
       - install-gpu-deps
@@ -122,7 +122,7 @@ jobs:
       RUST_LOG: debug
       # Build the kernel only for the single architecture that is used on CI. This should reduce
       # the overall compile-time significantly.
-      NEPTUNE_CUDA_NVCC_ARGS: --fatbin --gpu-architecture=sm_75 --generate-code=arch=compute_75,code=sm_75
+      EC_GPU_CUDA_NVCC_ARGS: --fatbin --gpu-architecture=sm_75 --generate-code=arch=compute_75,code=sm_75
     steps:
       - set-env-path
       - install-gpu-deps
@@ -136,7 +136,7 @@ jobs:
       RUST_LOG: debug
       # Build the kernel only for the single architecture that is used on CI. This should reduce
       # the overall compile-time significantly.
-      NEPTUNE_CUDA_NVCC_ARGS: --fatbin --gpu-architecture=sm_75 --generate-code=arch=compute_75,code=sm_75
+      EC_GPU_CUDA_NVCC_ARGS: --fatbin --gpu-architecture=sm_75 --generate-code=arch=compute_75,code=sm_75
     steps:
       - set-env-path
       - install-gpu-deps

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -7,6 +7,13 @@ and this project adheres to [Semantic Versioning](https://book.async.rs/overview
 
 ## Unreleased
 
+## 8.1.0
+- Implement serde for PoseidonConstants (https://github.com/filecoin-project/neptune/pull/165)
+
+## 8.0.0
+- fix: update to newer ec-gpu version (https://github.com/filecoin-project/neptune/pull/164)
+- chore: fix CI MacOS build (https://github.com/filecoin-project/neptune/pull/161)
+
 ## 7.2.0 - 2022-8-9
 - Change write_rate_element to add_rate_element to agree with spec.
 

diff --git a/Cargo.toml b/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 name = "neptune"
 description = "Poseidon hashing over BLS12-381 for Filecoin."
-version = "7.2.0"
+version = "8.1.0"
 authors = ["porcuquine <[email protected]>"]
 edition = "2018"
 license = "MIT OR Apache-2.0"
@@ -10,39 +10,37 @@ resolver = "2"
 
 [dependencies]
 lazy_static = "1.4.0"
-bellperson = { version = "0.22", default-features = false }
+bellperson = { version = "0.24", default-features = false }
 blake2s_simd = "0.5"
-blstrs = { version = "0.5.0", optional = true }
+blstrs = { version = "0.6.0", optional = true }
 byteorder = "1"
-ec-gpu = { version = "0.1.0", optional = true }
-ec-gpu-gen = { version = "0.3.0", default-features = false, optional = true }
+ec-gpu = { version = "0.2.0", optional = true }
+ec-gpu-gen = { version = "0.5.0", optional = true }
 ff = "0.12.0"
 generic-array = "0.14.4"
 itertools = { version = "0.8.0" }
 log = "0.4.8"
 halo2_proofs = "0.2.0"
 # `halo2_proofs` does not expose the feature `pasta_curves/gpu`, thus we must also import
 # `pasta_curves/gpu` when building with `neptune/pasta`.
-pasta_curves = { version = "0.4.0", optional = true }
+pasta_curves = { version = "0.5.1", features = ["serde"], package = "fil_pasta_curves", optional = true }
 rust-gpu-tools = { version = "0.5.0", default-features = false, optional = true }
+trait-set = "0.3.0"
+serde = { version = "1.0", features = ["derive"] }
 
 [dev-dependencies]
-blstrs = { version = "0.5.0" }
+blstrs = "0.6.1"
 criterion = "0.3"
 rand = "0.8.0"
-sha2 = "0.9"
-tempdir = "0.3"
 rand_xorshift = "0.3.0"
 serde_json = "1.0.53"
+sha2 = "0.9"
 
 [build-dependencies]
-blstrs = "0.5.0"
-ec-gpu = { version = "0.1.0", optional = true }
-ec-gpu-gen = { version = "0.3.0", default-features = false, optional = true }
-execute = "0.2.9"
-hex = "0.4"
-pasta_curves = "0.4.0"
-sha2 = "0.9"
+blstrs = "0.6.1"
+ec-gpu = { version = "0.2.0", optional = true }
+ec-gpu-gen = { version = "0.5.0", optional = true }
+pasta_curves = { version = "0.5.1", package = "fil_pasta_curves" }
 
 [[bench]]
 name = "hash"
@@ -58,8 +56,8 @@ codegen-units = 1
 
 [features]
 default = [ "bellperson/default" ]
-cuda = ["rust-gpu-tools/cuda", "blstrs/gpu", "ec-gpu-gen/cuda", "ec-gpu"]
-opencl = ["rust-gpu-tools/opencl", "blstrs/gpu", "ec-gpu-gen/opencl", "ec-gpu"]
+cuda = ["ec-gpu-gen/cuda", "ec-gpu", "pasta_curves/gpu"]
+opencl = ["ec-gpu-gen/opencl", "ec-gpu", "pasta_curves/gpu"]
 # The supported arities for Poseidon running on the GPU are specified at compile-time.
 arity2 = []
 arity4 = []
@@ -72,7 +70,6 @@ arity36 = []
 strengthened = []
 # Compiles the "even-partial" strength GPU kernel.
 even-partial = []
-wasm = [ "bellperson/wasm" ]
 # The supported fields for Poseidon running on the GPU are specified at compile-time.
 bls = ["blstrs/gpu"]
 pasta = ["pasta_curves/gpu", "even-partial"]

diff --git a/README.md b/README.md
@@ -35,18 +35,14 @@ If you make changes to the spec in `neptune`, you must make those same changes t
 
 ## Environment variables
 
- - `NEPTUNE_DEFAULT_GPU=<unique-id>` allows you to select the default GPU that tree-builder is going to run on given its unique ID.
+ - `EC_GPU_FRAMEWORK=<cuda | opencl>` allows to select whether the CUDA or OpenCL implementation should be used. If not set, `cuda` will be used if available.
 
-(The unique ID is the UUID or the hexadecimal Bus-ID that can be found through `nvidia-smi`, `rocm-smi`, `lspci` and etc.)
+ - `EC_GPU_CUDA_NVCC_ARGS`
 
- - `NEPTUNE_GPU_FRAMEWORK=<cuda | opencl>` allows to select whether the CUDA or OpenCL implementation should be used. If not set, `cuda` will be used if available.
-
- - `NEPTUNE_CUDA_NVCC_ARGS`
-
-By default the CUDA kernel is compiled for several architectures, which may take a long time. `BELLMAN_CUDA_NVCC_ARGS` can be used to override those arguments. The input and output file will still be automatically set.
+By default the CUDA kernel is compiled for several architectures, which may take a long time. `EC_GPU_CUDA_NVCC_ARGS` can be used to override those arguments. The input and output file will still be automatically set.
 
     // Example for compiling the kernel for only the Turing architecture
-    NEPTUNE_CUDA_NVCC_ARGS="--fatbin --gpu-architecture=sm_75 --generate-code=arch=compute_75,code=sm_75"
+    EC_GPU_CUDA_NVCC_ARGS="--fatbin --gpu-architecture=sm_75 --generate-code=arch=compute_75,code=sm_75"
 
 ## Rust feature flags
 

diff --git a/build.rs b/build.rs
@@ -1,84 +1,13 @@
-/// The build script is needed to compile the CUDA kernel.
-
-#[cfg(feature = "cuda")]
+/// The build script is used to generate the CUDA kernel and OpenCL source at compile-time, if the
+/// `cuda` and/or `opencl` feature is enabled.
+#[cfg(any(feature = "cuda", feature = "opencl"))]
 fn main() {
-    use std::path::PathBuf;
-    use std::process::Command;
-    use std::{env, fs};
-
-    use ec_gpu_gen::Limb32;
-    use sha2::{Digest, Sha256};
-
     #[path = "src/proteus/sources.rs"]
     mod sources;
 
-    let kernel_source = sources::generate_program::<Limb32>();
-    let out_dir = env::var("OUT_DIR").expect("OUT_DIR was not set.");
-
-    // Make it possible to override the default options. Though the source and output file is
-    // always set automatically.
-    let mut nvcc = match env::var("NEPTUNE_CUDA_NVCC_ARGS") {
-        Ok(args) => execute::command(format!("nvcc {}", args)),
-        Err(_) => {
-            let mut command = Command::new("nvcc");
-            command
-                .arg("--optimize=6")
-                // Compile with as many threads as CPUs are available.
-                .arg("--threads=0")
-                .arg("--fatbin")
-                .arg("--gpu-architecture=sm_86")
-                .arg("--generate-code=arch=compute_86,code=sm_86")
-                .arg("--generate-code=arch=compute_80,code=sm_80")
-                .arg("--generate-code=arch=compute_75,code=sm_75");
-            command
-        }
-    };
-
-    // Hash the source and and the compile flags. Use that as the filename, so that the kernel is
-    // only rebuilt if any of them change.
-    let mut hasher = Sha256::new();
-    hasher.update(kernel_source.as_bytes());
-    hasher.update(&format!("{:?}", &nvcc));
-    let kernel_digest = hex::encode(hasher.finalize());
-
-    let source_path: PathBuf = [&out_dir, &format!("{}.cu", &kernel_digest)]
-        .iter()
-        .collect();
-    let fatbin_path: PathBuf = [&out_dir, &format!("{}.fatbin", &kernel_digest)]
-        .iter()
-        .collect();
-
-    fs::write(&source_path, &kernel_source).unwrap_or_else(|_| {
-        panic!(
-            "Cannot write kernel source at {}.",
-            source_path.to_str().unwrap()
-        )
-    });
-
-    // Only compile if the output doesn't exist yet.
-    if !fatbin_path.as_path().exists() {
-        let status = nvcc
-            .arg("--output-file")
-            .arg(&fatbin_path)
-            .arg(&source_path)
-            .status()
-            .expect("Cannot run nvcc.");
-
-        if !status.success() {
-            panic!(
-                "nvcc failed. See the kernel source at {}",
-                source_path.to_str().unwrap()
-            );
-        }
-    }
-
-    // The idea to put the path to the farbin into a compile-time env variable is from
-    // https://github.com/LutzCle/fast-interconnects-demo/blob/b80ea8e04825167f486ab8ac1b5d67cf7dd51d2c/rust-demo/build.rs
-    println!(
-        "cargo:rustc-env=CUDA_FATBIN={}",
-        fatbin_path.to_str().unwrap()
-    );
+    let source_builder = sources::generate_program();
+    ec_gpu_gen::generate(&source_builder);
 }
 
-#[cfg(not(feature = "cuda"))]
+#[cfg(not(any(feature = "cuda", feature = "opencl")))]
 fn main() {}
diff --git a/gbench/Cargo.toml b/gbench/Cargo.toml
@@ -8,21 +8,21 @@ license = "MIT OR Apache-2.0"
 
 [dependencies]
 lazy_static = "1.4.0"
-bellperson = { version = "0.22.0", default-features = false }
+bellperson = { version = "0.24.0", default-features = false }
 blake2s_simd = "0.5"
 byteorder = "1"
 env_logger = "0.7.1"
 ff = "0.12.0"
 generic-array = "0.14.4"
 log = "0.4.8"
 neptune = { path = "../", default-features = false, features = ["arity8", "arity11", "bls", "pasta"] }
-rust-gpu-tools = { version = "0.5.0", default-features = false, optional = true }
 structopt = { version = "0.3", default-features = false }
-blstrs = { version = "0.5.0", features = ["gpu"] }
-pasta_curves = { version = "0.4.0", features = ["gpu"] }
-ec-gpu = "0.1.0"
+blstrs = { version = "0.6.1", features = ["gpu"] }
+pasta_curves = { version = "0.5.1", features = ["gpu"], package = "fil_pasta_curves" }
+ec-gpu = "0.2.0"
+ec-gpu-gen = "0.5.0"
 
 [features]
 default = ["opencl"]
-cuda = ["neptune/cuda", "bellperson/cuda", "rust-gpu-tools/cuda"]
-opencl = ["neptune/opencl", "bellperson/opencl", "rust-gpu-tools/opencl"]
+cuda = ["neptune/cuda", "bellperson/cuda", "ec-gpu-gen/cuda"]
+opencl = ["neptune/opencl", "bellperson/opencl", "ec-gpu-gen/opencl"]
diff --git a/gbench/src/main.rs b/gbench/src/main.rs
@@ -1,5 +1,6 @@
 use blstrs::Scalar as Fr;
-use ec_gpu::GpuField;
+use ec_gpu::GpuName;
+use ec_gpu_gen::rust_gpu_tools::{Device, UniqueId};
 use ff::PrimeField;
 use generic_array::sequence::GenericSequence;
 use generic_array::typenum::{U11, U8};
@@ -8,14 +9,13 @@ use log::info;
 use neptune::column_tree_builder::{ColumnTreeBuilder, ColumnTreeBuilderTrait};
 use neptune::{batch_hasher::Batcher, BatchHasher};
 use pasta_curves::{Fp, Fq as Fv};
-use rust_gpu_tools::{Device, UniqueId};
 use std::convert::TryFrom;
 use std::str::FromStr;
 use std::thread;
 use std::time::Instant;
 use structopt::StructOpt;
 
-fn bench_column_building<F: PrimeField + GpuField>(
+fn bench_column_building<F: PrimeField + GpuName>(
     device: &Device,
     log_prefix: &str,
     max_column_batch_size: usize,

diff --git a/rust-toolchain b/rust-toolchain
@@ -1 +1 @@
-1.56.1
+1.57.0
diff --git a/src/batch_hasher.rs b/src/batch_hasher.rs
@@ -6,28 +6,25 @@ use crate::error::{ClError, Error};
 use crate::poseidon::SimplePoseidonBatchHasher;
 #[cfg(any(feature = "cuda", feature = "opencl"))]
 use crate::proteus::gpu::ClBatchHasher;
-use crate::{Arity, BatchHasher, Strength, DEFAULT_STRENGTH};
-#[cfg(any(feature = "cuda", feature = "opencl"))]
-use ec_gpu::GpuField;
+use crate::{Arity, BatchHasher, NeptuneField, Strength, DEFAULT_STRENGTH};
+use ec_gpu_gen::rust_gpu_tools::Device;
 use ff::PrimeField;
 use generic_array::GenericArray;
-use rust_gpu_tools::Device;
 
 pub enum Batcher<F, A>
 where
-    F: PrimeField,
+    F: NeptuneField,
     A: Arity<F>,
 {
     Cpu(SimplePoseidonBatchHasher<F, A>),
     #[cfg(any(feature = "cuda", feature = "opencl"))]
     OpenCl(ClBatchHasher<F, A>),
 }
 
-impl<
-        #[cfg(not(any(feature = "cuda", feature = "opencl")))] F: PrimeField,
-        #[cfg(any(feature = "cuda", feature = "opencl"))] F: PrimeField + GpuField,
-        A: Arity<F>,
-    > Batcher<F, A>
+impl<F, A> Batcher<F, A>
+where
+    F: NeptuneField,
+    A: Arity<F>,
 {
     /// Create a new CPU batcher.
     pub fn new_cpu(max_batch_size: usize) -> Self {
@@ -100,7 +97,7 @@ where
 
 impl<F, A> BatchHasher<F, A> for Batcher<F, A>
 where
-    F: PrimeField,
+    F: NeptuneField,
     A: Arity<F>,
 {
     fn hash(&mut self, preimages: &[GenericArray<F, A>]) -> Result<Vec<F>, Error> {

diff --git a/src/column_tree_builder.rs b/src/column_tree_builder.rs
@@ -2,7 +2,8 @@ use crate::batch_hasher::Batcher;
 use crate::error::Error;
 use crate::poseidon::{Poseidon, PoseidonConstants};
 use crate::tree_builder::{TreeBuilder, TreeBuilderTrait};
-use crate::{Arity, BatchHasher, Strength};
+use crate::{Arity, BatchHasher, Strength, NeptuneField};
+
 use ff::{Field, PrimeField};
 use generic_array::{ArrayLength, GenericArray};
 
@@ -23,7 +24,7 @@ where
 
 pub struct ColumnTreeBuilder<F, ColumnArity, TreeArity>
 where
-    F: PrimeField,
+    F: NeptuneField,
     ColumnArity: Arity<F>,
     TreeArity: Arity<F>,
 {
@@ -39,7 +40,7 @@ where
 impl<F, ColumnArity, TreeArity> ColumnTreeBuilderTrait<F, ColumnArity, TreeArity>
     for ColumnTreeBuilder<F, ColumnArity, TreeArity>
 where
-    F: PrimeField,
+    F: NeptuneField,
     ColumnArity: Arity<F>,
     TreeArity: Arity<F>,
 {
@@ -104,7 +105,7 @@ fn as_generic_arrays<A: Arity<F>, F: PrimeField>(vec: &[F]) -> &[GenericArray<F,
 
 impl<F, ColumnArity, TreeArity> ColumnTreeBuilder<F, ColumnArity, TreeArity>
 where
-    F: PrimeField,
+    F: NeptuneField,
     ColumnArity: Arity<F>,
     TreeArity: Arity<F>,
 {

diff --git a/src/error.rs b/src/error.rs
@@ -54,12 +54,19 @@ pub enum Error {
 }
 
 #[cfg(any(feature = "cuda", feature = "opencl"))]
-impl From<rust_gpu_tools::GPUError> for Error {
-    fn from(e: rust_gpu_tools::GPUError) -> Self {
+impl From<ec_gpu_gen::rust_gpu_tools::GPUError> for Error {
+    fn from(e: ec_gpu_gen::rust_gpu_tools::GPUError) -> Self {
         Self::GpuError(format!("GPU tools error: {}", e))
     }
 }
 
+#[cfg(any(feature = "cuda", feature = "opencl"))]
+impl From<ec_gpu_gen::EcError> for Error {
+    fn from(e: ec_gpu_gen::EcError) -> Self {
+        Self::GpuError(format!("EC-GPU error: {}", e))
+    }
+}
+
 impl error::Error for Error {}
 
 impl fmt::Display for Error {