diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index a984dd9..f5200a1 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -17,6 +17,25 @@ jobs: - uses: actions-rust-lang/setup-rust-toolchain@v1 - run: cargo build --verbose - run: cargo test --verbose --all + + build_old: + name: cargo build and test (packed_simd) + strategy: + matrix: + # Needs big runners to run tests + # Only macos-13-xlarge is Apple Silicon, as per: + # https://docs.github.com/en/actions/using-github-hosted-runners/about-larger-runners/about-larger-runners#about-macos-larger-runners + os: [ubuntu-22.04-github-hosted-16core, macos-13-xlarge] + runs-on: ${{ matrix.os }} + steps: + - uses: actions/checkout@v4 + - uses: actions-rust-lang/setup-rust-toolchain@v1 + with: + toolchain: nightly-2023-05-31 + + # Still compile the old rust nightly with packed simd - until we have a good replacement in poseidon. + - run: RUSTFLAGS=-Awarnings cargo +nightly-2023-05-31 build --features include_packed_simd + - run: RUSTFLAGS=-Awarnings cargo +nightly-2023-05-31 test --features include_packed_simd formatting: name: cargo fmt diff --git a/.gitignore b/.gitignore index d54047a..2460227 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,5 @@ /target /profiling-target/target /Cargo.lock + +.idea/ \ No newline at end of file diff --git a/Cargo.toml b/Cargo.toml index 3d658ff..e44d1ba 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -23,7 +23,7 @@ itertools = "0.10" blake2 = "0.10" sha2 = "0.10" num-modular = "0.5.1" -packed_simd = { version = "0.3.9" } +packed_simd = { version = "0.3.9" , optional = true} pairing = { package = "pairing_ce", git = "https://github.com/matter-labs/pairing.git" } crypto-bigint = "0.5" convert_case = "*" @@ -52,4 +52,11 @@ lto = "fat" opt-level = 3 [features] +# If enabled, logs will be using trace, if disabled, they will be printed to stdout. log_tracing = ["tracing"] +# Currently packed_simd is no longer working with the newest nightly. +# But we still keep it as a feature, as we didn't migrate all the code, and +# some people might want to use older rust nightly, to be able to gain some performance. +include_packed_simd = ["packed_simd"] +cr_paranoia_mode = [] +debug_track = [] \ No newline at end of file diff --git a/rust-toolchain.toml b/rust-toolchain.toml index 6b48c00..a671fa6 100644 --- a/rust-toolchain.toml +++ b/rust-toolchain.toml @@ -1,2 +1,2 @@ [toolchain] -channel = "nightly-2023-06-25" +channel = "nightly-2024-05-07" diff --git a/src/cs/implementations/setup.rs b/src/cs/implementations/setup.rs index 104cd8e..979f816 100644 --- a/src/cs/implementations/setup.rs +++ b/src/cs/implementations/setup.rs @@ -127,8 +127,11 @@ impl< let required_rows = num_used_rows + 1; assert!(required_rows <= self.max_trace_len); - dbg!(required_rows); - dbg!(self.lookups_tables_total_len()); + log!("Required rows {:?}", required_rows); + log!( + "lookup_tables_total_len = {}", + self.lookups_tables_total_len() + ); let required_rows = std::cmp::max(required_rows, self.lookups_tables_total_len()); @@ -185,12 +188,27 @@ impl< .max() .unwrap_or(0); - dbg!(required_size); - dbg!(max_copiable_in_specialized_columns); - dbg!(max_witnesses_in_general_purpose_columns); - dbg!(max_witnesses_in_specialized_columns); - dbg!(max_constants_for_general_purpose_gates); - dbg!(max_in_column_for_specialized_gates); + log!("required size = {}", required_size); + log!( + "max_copiable_in_specialized_columns = {}", + max_copiable_in_specialized_columns + ); + log!( + "max_witnesses_in_general_purpose_columns = {}", + max_witnesses_in_general_purpose_columns + ); + log!( + "max_witnesses_in_specialized_columns = {}", + max_witnesses_in_specialized_columns + ); + log!( + "max_constants_for_general_purpose_gates = {}", + max_constants_for_general_purpose_gates + ); + log!( + "max_in_column_for_specialized_gates = {}", + max_in_column_for_specialized_gates + ); assert!(max_constants_for_general_purpose_gates <= required_size); @@ -312,7 +330,7 @@ impl< dst.resize(precise_required_size, F::ZERO); } - dbg!(precise_required_size); + log!("precise_required_size = {}", precise_required_size); self.max_trace_len = precise_required_size; finalization_hints.final_trace_len = precise_required_size; @@ -729,7 +747,7 @@ impl< let extra_polys_for_selectors = number_of_constant_polys_for_general_purpose_gates - self.parameters.num_constant_columns; - dbg!(extra_polys_for_selectors); + log!("extra_polys_for_selector = {}", extra_polys_for_selectors); let quotient_degree_from_constraits = if max_constraint_contribution_degree > 0 { max_constraint_contribution_degree - 1 @@ -1037,7 +1055,7 @@ impl< let (constant_columns, selectors_placement, min_degree) = self.create_constant_setup_polys(worker); - dbg!(min_degree); + log!("min_degree = {}", min_degree); let (_, total_num_constants_for_gates_over_general_purpose_columns) = selectors_placement.compute_stats(); diff --git a/src/cs/traits/cs.rs b/src/cs/traits/cs.rs index da3b51d..5d4d786 100644 --- a/src/cs/traits/cs.rs +++ b/src/cs/traits/cs.rs @@ -20,7 +20,7 @@ impl<'set, 'tgt: 'set, T: SmallField> DstBuffer<'set, 'tgt, T> { *offset += 1; } DstBuffer::MutSliceIndirect(dst, debug_track, offset) => { - if cfg!(debug_track) && *debug_track { + if cfg!(feature = "debug_track") && *debug_track { log!(" set out {} <- {}", *offset, value.as_raw_u64()) } diff --git a/src/dag/guide.rs b/src/dag/guide.rs index 0eba95b..47a6315 100644 --- a/src/dag/guide.rs +++ b/src/dag/guide.rs @@ -384,7 +384,7 @@ impl<'a, T: Copy + Debug, F: SmallField, Cfg: CSResolverConfig> GuideOrder<'a, T pos += span.buffer.len(); } - if cfg!(cr_paranoia_mode) && self.guide.tracing { + if cfg!(feature = "cr_paranoia_mode") && self.guide.tracing { log!( "Released span {}: {:?}", self.guide.spans[0].id.0, @@ -684,7 +684,7 @@ impl BufferGuide { } pub(crate) fn flush(&mut self) -> BufferGuideFinalization<'_, T, F, Cfg> { - if cfg!(cr_paranoia_mode) && self.tracing { + if cfg!(feature = "cr_paranoia_mode") && self.tracing { log!("CRG: flush."); } diff --git a/src/dag/resolver_box.rs b/src/dag/resolver_box.rs index 397d551..d0f7dd8 100644 --- a/src/dag/resolver_box.rs +++ b/src/dag/resolver_box.rs @@ -424,7 +424,7 @@ pub(crate) fn invocation_binder( // Safety: This is the actual type of the provided function. let bound = resolver.resolve_fn::(); - if (cfg!(cr_paranoia_mode) || crate::dag::resolvers::mt::PARANOIA) && false { + if (cfg!(feature = "cr_paranoia_mode") || crate::dag::resolvers::mt::PARANOIA) && false { log!( "Ivk: Ins [{}], Out [{}], Out-addr [{}], Thread [{}]", resolver @@ -448,7 +448,10 @@ pub(crate) fn invocation_binder( ) } - if (cfg!(cr_paranoia_mode) || crate::dag::resolvers::mt::PARANOIA) && debug_track && false { + if (cfg!(feature = "cr_paranoia_mode") || crate::dag::resolvers::mt::PARANOIA) + && debug_track + && false + { log!( "Ivk: provided inputs:\n - {:?}", ins.iter().map(|x| x.as_raw_u64()).collect_vec() @@ -457,7 +460,10 @@ pub(crate) fn invocation_binder( bound(ins, &mut DstBuffer::MutSliceIndirect(out, debug_track, 0)); - if (cfg!(cr_paranoia_mode) || crate::dag::resolvers::mt::PARANOIA) && debug_track && true { + if (cfg!(feature = "cr_paranoia_mode") || crate::dag::resolvers::mt::PARANOIA) + && debug_track + && true + { log!( "Ivk: calculated outputs:\n - {:?}", out.iter().map(|x| x.as_raw_u64()).collect_vec() diff --git a/src/dag/resolvers/mt/mod.rs b/src/dag/resolvers/mt/mod.rs index cad5822..8de30f5 100644 --- a/src/dag/resolvers/mt/mod.rs +++ b/src/dag/resolvers/mt/mod.rs @@ -169,7 +169,7 @@ impl, CFG: CSResolverConfig> let debug_track = vec![]; - if cfg!(cr_paranoia_mode) || PARANOIA { + if cfg!(feature = "cr_paranoia_mode") || PARANOIA { log!("Contains tracked keys {:?} ", debug_track); } @@ -269,7 +269,7 @@ impl, CFG: CSResolverConfig> self.sorter.write_sequence(); - if cfg!(cr_paranoia_mode) || PARANOIA { + if cfg!(feature = "cr_paranoia_mode") || PARANOIA { log!("CR {:?}", unsafe { self.common.awaiters_broker.stats.u_deref() }); @@ -1487,7 +1487,7 @@ mod test { storage.wait_till_resolved(); - if cfg!(cr_paranoia_mode) { + if cfg!(feature = "cr_paranoia_mode") { log!("Test: total value result: \n - {}", unsafe { (*storage.common.values.get()) .variables @@ -1509,7 +1509,7 @@ mod test { let act = Place::from_variable(Variable::from_variable_index(ix as u64)) .to(|x| storage.get_value_unchecked(x)); - if cfg!(cr_paranoia_mode) { + if cfg!(feature = "cr_paranoia_mode") { log!("Test: per item value: ix {}, value {}", ix, act); } @@ -1542,7 +1542,7 @@ mod test { storage.wait_till_resolved(); - if cfg!(cr_paranoia_mode) { + if cfg!(feature = "cr_paranoia_mode") { log!("Test: total value result: \n - {}", unsafe { (*storage.common.values.get()) .variables @@ -1564,7 +1564,7 @@ mod test { let act = Place::from_variable(Variable::from_variable_index(ix as u64)) .to(|x| storage.get_value_unchecked(x)); - if cfg!(cr_paranoia_mode) { + if cfg!(feature = "cr_paranoia_mode") { log!("Test: per item value: ix {}, value {}", ix, act); } diff --git a/src/dag/resolvers/mt/registrar.rs b/src/dag/resolvers/mt/registrar.rs index 3c43c6b..257703f 100644 --- a/src/dag/resolvers/mt/registrar.rs +++ b/src/dag/resolvers/mt/registrar.rs @@ -116,7 +116,7 @@ impl Registrar { } pub(crate) fn is_empty(&self) -> bool { - if cfg!(cr_paranoia_mode) { + if cfg!(feature = "cr_paranoia_mode") { log!( "CRR: total remaining resolvers: {}", self.vars.values().map(|x| x.len()).sum::() diff --git a/src/dag/resolvers/mt/resolution_window.rs b/src/dag/resolvers/mt/resolution_window.rs index 9efe2cb..60d1d23 100644 --- a/src/dag/resolvers/mt/resolution_window.rs +++ b/src/dag/resolvers/mt/resolution_window.rs @@ -163,8 +163,12 @@ impl + 'static> comms, track_list: Vec::new(), - execution_list: if cfg!(cr_paranoia_mode) { 1 << 26 } else { 0 } - .to(|x| Vec::with_capacity(x).op(|v| v.resize(x, 0))), + execution_list: if cfg!(feature = "cr_paranoia_mode") { + 1 << 26 + } else { + 0 + } + .to(|x| Vec::with_capacity(x).op(|v| v.resize(x, 0))), phantom: PhantomData, }; @@ -207,7 +211,7 @@ impl + 'static> data[data_ix].push(order_ix.into(), task.order_info.value); - if cfg!(cr_paranoia_mode) { + if cfg!(feature = "cr_paranoia_mode") { self.execution_list[order_ix] += 1; if self.execution_list[order_ix] > 1 { @@ -238,7 +242,7 @@ impl + 'static> } } - if (cfg!(cr_paranoia_mode) || crate::dag::resolvers::mt::PARANOIA) && true { + if (cfg!(feature = "cr_paranoia_mode") || crate::dag::resolvers::mt::PARANOIA) && true { log!("RW: Batch! {} tasks.", count); } @@ -264,7 +268,7 @@ impl + 'static> .for_each(|x| { x.state = ResolverState::Done; - if cfg!(cr_paranoia_mode) || crate::dag::resolvers::mt::PARANOIA { + if cfg!(feature = "cr_paranoia_mode") || crate::dag::resolvers::mt::PARANOIA { unsafe { let r = self.common.resolvers.u_deref().get(x.order_info.value); @@ -291,7 +295,7 @@ impl + 'static> } }); - if cfg!(cr_paranoia_mode) || crate::dag::resolvers::mt::PARANOIA { + if cfg!(feature = "cr_paranoia_mode") || crate::dag::resolvers::mt::PARANOIA { if self .exec_order_buffer .iter() @@ -343,7 +347,7 @@ impl + 'static> drop(awaiters); - if cfg!(cr_paranoia_mode) && count > 0 { + if cfg!(feature = "cr_paranoia_mode") && count > 0 { log!( "RW: Shifted by {}, new range is: {}..{}, buffer len: {}", count, @@ -412,7 +416,7 @@ impl + 'static> self.stats.total_consumption = extend_to as u64; - if crate::dag::resolvers::mt::PARANOIA || cfg!(cr_paranoia_mode) { + if crate::dag::resolvers::mt::PARANOIA || cfg!(feature = "cr_paranoia_mode") { log!( "RW: Extended range by {}, new range {}..{}", extend_to, @@ -474,7 +478,7 @@ impl + 'static> } } - if crate::dag::resolvers::mt::PARANOIA || cfg!(cr_paranoia_mode) { + if crate::dag::resolvers::mt::PARANOIA || cfg!(feature = "cr_paranoia_mode") { log!("[{:?}] RW: Exit conditions met.", std::time::Instant::now()) } @@ -484,7 +488,7 @@ impl + 'static> self.stats.total_time = start_instant.elapsed(); - if cfg!(cr_paranoia_mode) || crate::dag::resolvers::mt::PARANOIA { + if cfg!(feature = "cr_paranoia_mode") || crate::dag::resolvers::mt::PARANOIA { log!("CR {:#?}", self.stats); log!("CR {:#?}", unsafe { &*self.channel.stats.get() }); @@ -554,7 +558,7 @@ impl, const SIZE: usize> // here, as this is an unsynchronizd access. let resolver = this.common.resolvers.u_deref().get(*resolver_ix); - if cfg!(cr_paranoia_mode) || crate::dag::resolvers::mt::PARANOIA { + if cfg!(feature="cr_paranoia_mode") || crate::dag::resolvers::mt::PARANOIA { std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| { this.invoke(resolver, *order_ix); @@ -590,7 +594,7 @@ impl, const SIZE: usize> }); } - if cfg!(cr_paranoia_mode) || crate::dag::resolvers::mt::PARANOIA { + if cfg!(feature = "cr_paranoia_mode") || crate::dag::resolvers::mt::PARANOIA { log!( "{}\n{:#?}\n{:#?}", std::thread::current().name().unwrap_or_default(), @@ -629,7 +633,7 @@ impl, const SIZE: usize> .map(|x| { let (vs, md) = self.common.values.u_deref().get_item_ref(*x); - if cfg!(cr_paranoia_mode) || true { + if cfg!(feature = "cr_paranoia_mode") || true { if Cfg::ASSERT_TRACKED_VALUES { assert!(md.is_tracked()); } @@ -678,7 +682,7 @@ impl, const SIZE: usize> let mut track = false; - if cfg!(cr_paranoia_mode) || crate::dag::resolvers::mt::PARANOIA { + if cfg!(feature = "cr_paranoia_mode") || crate::dag::resolvers::mt::PARANOIA { if let Some(x) = self .debug_track .iter() @@ -831,7 +835,7 @@ impl LockStepChannel { fn execute(&self) { use std::sync::atomic::Ordering::*; - if (cfg!(cr_paranoia_mode) || crate::dag::resolvers::mt::PARANOIA) && false { + if (cfg!(feature = "cr_paranoia_mode") || crate::dag::resolvers::mt::PARANOIA) && false { log!("RW: batch sent {:#?}", unsafe { self.data.u_deref() }); } diff --git a/src/dag/resolvers/mt/sorters/sorter_live.rs b/src/dag/resolvers/mt/sorters/sorter_live.rs index c0c1298..6b1e423 100644 --- a/src/dag/resolvers/mt/sorters/sorter_live.rs +++ b/src/dag/resolvers/mt/sorters/sorter_live.rs @@ -191,7 +191,7 @@ impl } } - if cfg!(cr_paranoia_mode) { + if cfg!(feature = "cr_paranoia_mode") { // This ugly block checks that the calculated parallelism is // correct. It's a bit slower than O(n^2). Also note, that it // checks only the last 1050 items, so it's not a full check, @@ -297,7 +297,7 @@ impl ResolverS } fn set_value(&mut self, key: crate::cs::Place, value: F) { - if (cfg!(cr_paranoia_mode) || crate::dag::resolvers::mt::PARANOIA) + if (cfg!(feature = "cr_paranoia_mode") || crate::dag::resolvers::mt::PARANOIA) && self.debug_track.contains(&key) && false { @@ -378,7 +378,7 @@ impl ResolverS let mut hit = false; - if (cfg!(cr_paranoia_mode) || crate::dag::resolvers::mt::PARANOIA) && true { + if (cfg!(feature = "cr_paranoia_mode") || crate::dag::resolvers::mt::PARANOIA) && true { if let Some(x) = self.debug_track.iter().find(|x| inputs.contains(x)) { log!("CR: added resolution with tracked input {:?}", x); @@ -498,7 +498,7 @@ impl ResolverS outputs: &[Place], added_at: RegistrationNum, ) -> Vec { - if cfg!(cr_paranoia_mode) { + if cfg!(feature = "cr_paranoia_mode") { if let Some(x) = self.debug_track.iter().find(|x| inputs.contains(x)) { log!("CR: internalized resolution with tracked input {:?}", x); } @@ -519,7 +519,7 @@ impl ResolverS let deps = inputs.iter().map(|x| &values.get_item_ref(*x).1); - if cfg!(cr_paranoia_mode) { + if cfg!(feature = "cr_paranoia_mode") { debug_assert!( deps.clone().all(|x| { x.is_tracked() }), "Attempting to internalize a resolution with an untracked input. All inputs must be tracked." @@ -610,14 +610,14 @@ impl ResolverS self.record.values_count = unsafe { self.common.values.u_deref().max_tracked + 1 } as usize; self.record.registrations_count = self.stats.registrations_added as usize; - if cfg!(cr_paranoia_mode) || crate::dag::resolvers::mt::PARANOIA { + if cfg!(feature = "cr_paranoia_mode") || crate::dag::resolvers::mt::PARANOIA { log!( "CR: Final order written. Order len {}", self.common.exec_order.lock().unwrap().items.len() ); } - if cfg!(cr_paranoia_mode) || crate::dag::resolvers::mt::PARANOIA { + if cfg!(feature = "cr_paranoia_mode") || crate::dag::resolvers::mt::PARANOIA { self.guide.stats.finalize(); log!("CR {:?}", self.guide.stats); diff --git a/src/field/goldilocks/arm_asm_impl.rs b/src/field/goldilocks/arm_asm_impl.rs index 03399c4..369b881 100644 --- a/src/field/goldilocks/arm_asm_impl.rs +++ b/src/field/goldilocks/arm_asm_impl.rs @@ -2,8 +2,10 @@ use crate::cs::implementations::utils::precompute_twiddles_for_fft; use crate::cs::traits::GoodAllocator; use crate::field::{Field, PrimeField}; use crate::worker::Worker; -use packed_simd::shuffle; +use std::intrinsics::simd::simd_shuffle; use std::ops::{Add, BitOr, Sub}; +use std::simd::cmp::{SimdPartialEq, SimdPartialOrd}; +use std::simd::{u64x4, u64x8}; use std::usize; use super::GoldilocksField; @@ -17,7 +19,7 @@ pub struct MixedGL(pub [GoldilocksField; 16]); // we also need holder for SIMD targets, because u64x4 has smaller alignment than u64x8 #[derive(Clone, Copy)] #[repr(C, align(64))] -struct U64x4Holder([packed_simd::u64x4; 4]); +struct U64x4Holder([u64x4; 4]); impl std::fmt::Debug for MixedGL { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { @@ -38,8 +40,8 @@ impl MixedGL { pub const T: u64 = (Self::ORDER - 1) >> Self::TWO_ADICITY; pub const BARRETT: u128 = 18446744078004518912; // 0x10000000100000000 pub const EPSILON: u64 = (1 << 32) - 1; - pub const EPSILON_VECTOR: packed_simd::u64x4 = packed_simd::u64x4::splat(Self::EPSILON); - pub const EPSILON_VECTOR_D: packed_simd::u64x8 = packed_simd::u64x8::splat(Self::EPSILON); + pub const EPSILON_VECTOR: u64x4 = u64x4::from_array([Self::EPSILON; 4]); + pub const EPSILON_VECTOR_D: u64x8 = u64x8::from_array([Self::EPSILON; 8]); #[inline(always)] pub fn new() -> Self { @@ -64,7 +66,7 @@ impl MixedGL { for i in 0..4 { let a = a_u64.0[i]; let a_reduced = a.add(Self::EPSILON_VECTOR); - let cmp = a_reduced.lt(Self::EPSILON_VECTOR); + let cmp = a_reduced.simd_lt(Self::EPSILON_VECTOR); let res = cmp.select(a_reduced, a); a_u64.0[i] = res; @@ -108,13 +110,13 @@ impl MixedGL { let b = b_u64.0[i]; //additional reduction over b let b_reduced = b.add(Self::EPSILON_VECTOR); - let cmp = b_reduced.lt(Self::EPSILON_VECTOR); + let cmp = b_reduced.simd_lt(Self::EPSILON_VECTOR); let b = cmp.select(b_reduced, b); //a+b let sum = a.add(b); let sum_reduced = sum.add(Self::EPSILON_VECTOR); - let cmp0 = sum_reduced.lt(sum); - let cmp1 = sum.lt(a); + let cmp0 = sum_reduced.simd_lt(sum); + let cmp1 = sum.simd_lt(a); let reduce_flag = cmp0.bitor(cmp1); let res = reduce_flag.select(sum_reduced, sum); @@ -139,12 +141,12 @@ impl MixedGL { let b = b_u64.0[i]; //additional reduction over b let b_reduced = b.add(Self::EPSILON_VECTOR); - let cmp = b_reduced.lt(Self::EPSILON_VECTOR); + let cmp = b_reduced.simd_lt(Self::EPSILON_VECTOR); let b = cmp.select(b_reduced, b); //a-b let diff = a.sub(b); let diff_reduced = diff.sub(Self::EPSILON_VECTOR); - let cmp = a.lt(b); + let cmp = a.simd_lt(b); let res = cmp.select(diff_reduced, diff); a_u64.0[i] = res; @@ -159,27 +161,28 @@ impl MixedGL { pub unsafe fn butterfly_1x1_impl(&mut self) -> &mut Self { let [part1, part2] = MixedGL::as_u64x8_arrays(&*self); - let u: packed_simd::u64x8 = shuffle!(part1, part2, [0, 2, 4, 6, 8, 10, 12, 14]); - let v: packed_simd::u64x8 = shuffle!(part1, part2, [1, 3, 5, 7, 9, 11, 13, 15]); + + let u: u64x8 = simd_shuffle(part1, part2, const { [0u32, 2, 4, 6, 8, 10, 12, 14] }); + let v: u64x8 = simd_shuffle(part1, part2, const { [1u32, 3, 5, 7, 9, 11, 13, 15] }); //additional reduction over v let v_reduced = v.add(Self::EPSILON_VECTOR_D); - let cmp = v_reduced.lt(Self::EPSILON_VECTOR_D); + let cmp = v_reduced.simd_lt(Self::EPSILON_VECTOR_D); let v = cmp.select(v_reduced, v); // u + v let sum = u.add(v); let sum_reduced = sum.add(Self::EPSILON_VECTOR_D); - let cmp0 = sum_reduced.lt(sum); - let cmp1 = sum.lt(u); + let cmp0 = sum_reduced.simd_lt(sum); + let cmp1 = sum.simd_lt(u); let reduce_flag = cmp0.bitor(cmp1); let res1 = reduce_flag.select(sum_reduced, sum); // u - v let diff = u.sub(v); let diff_reduced = diff.sub(Self::EPSILON_VECTOR_D); - let cmp = u.lt(v); + let cmp = u.simd_lt(v); let res2 = cmp.select(diff_reduced, diff); - let part1: packed_simd::u64x8 = shuffle!(res1, res2, [0, 8, 1, 9, 2, 10, 3, 11]); - let part2: packed_simd::u64x8 = shuffle!(res1, res2, [4, 12, 5, 13, 6, 14, 7, 15]); + let part1: u64x8 = simd_shuffle(res1, res2, const { [0u32, 8, 1, 9, 2, 10, 3, 11] }); + let part2: u64x8 = simd_shuffle(res1, res2, const { [4u32, 12, 5, 13, 6, 14, 7, 15] }); *self = MixedGL::from_u64x8_arrays([part1, part2]); @@ -188,27 +191,27 @@ impl MixedGL { pub unsafe fn butterfly_2x2_impl(&mut self) -> &mut Self { let [part1, part2] = MixedGL::as_u64x8_arrays(&*self); - let u: packed_simd::u64x8 = shuffle!(part1, part2, [0, 1, 4, 5, 8, 9, 12, 13]); - let v: packed_simd::u64x8 = shuffle!(part1, part2, [2, 3, 6, 7, 10, 11, 14, 15]); + let u: u64x8 = simd_shuffle(part1, part2, const { [0u32, 1, 4, 5, 8, 9, 12, 13] }); + let v: u64x8 = simd_shuffle(part1, part2, const { [2u32, 3, 6, 7, 10, 11, 14, 15] }); //additional reduction over v let v_reduced = v.add(Self::EPSILON_VECTOR_D); - let cmp = v_reduced.lt(Self::EPSILON_VECTOR_D); + let cmp = v_reduced.simd_lt(Self::EPSILON_VECTOR_D); let v = cmp.select(v_reduced, v); // u + v let sum = u.add(v); let sum_reduced = sum.add(Self::EPSILON_VECTOR_D); - let cmp0 = sum_reduced.lt(sum); - let cmp1 = sum.lt(u); + let cmp0 = sum_reduced.simd_lt(sum); + let cmp1 = sum.simd_lt(u); let reduce_flag = cmp0.bitor(cmp1); let res1 = reduce_flag.select(sum_reduced, sum); // u - v let diff = u.sub(v); let diff_reduced = diff.sub(Self::EPSILON_VECTOR_D); - let cmp = u.lt(v); + let cmp = u.simd_lt(v); let res2 = cmp.select(diff_reduced, diff); - let part1: packed_simd::u64x8 = shuffle!(res1, res2, [0, 1, 8, 9, 2, 3, 10, 11]); - let part2: packed_simd::u64x8 = shuffle!(res1, res2, [4, 5, 12, 13, 6, 7, 14, 15]); + let part1: u64x8 = simd_shuffle(res1, res2, const { [0u32, 1, 8, 9, 2, 3, 10, 11] }); + let part2: u64x8 = simd_shuffle(res1, res2, const { [4u32, 5, 12, 13, 6, 7, 14, 15] }); *self = MixedGL::from_u64x8_arrays([part1, part2]); @@ -217,27 +220,27 @@ impl MixedGL { pub unsafe fn butterfly_4x4_impl(&mut self) -> &mut Self { let [part1, part2] = MixedGL::as_u64x8_arrays(&*self); - let u: packed_simd::u64x8 = shuffle!(part1, part2, [0, 1, 2, 3, 8, 9, 10, 11]); - let v: packed_simd::u64x8 = shuffle!(part1, part2, [4, 5, 6, 7, 12, 13, 14, 15]); + let u: u64x8 = simd_shuffle(part1, part2, const { [0u32, 1, 2, 3, 8, 9, 10, 11] }); + let v: u64x8 = simd_shuffle(part1, part2, const { [4u32, 5, 6, 7, 12, 13, 14, 15] }); //additional reduction over v let v_reduced = v.add(Self::EPSILON_VECTOR_D); - let cmp = v_reduced.lt(Self::EPSILON_VECTOR_D); + let cmp = v_reduced.simd_lt(Self::EPSILON_VECTOR_D); let v = cmp.select(v_reduced, v); // u + v let sum = u.add(v); let sum_reduced = sum.add(Self::EPSILON_VECTOR_D); - let cmp0 = sum_reduced.lt(sum); - let cmp1 = sum.lt(u); + let cmp0 = sum_reduced.simd_lt(sum); + let cmp1 = sum.simd_lt(u); let reduce_flag = cmp0.bitor(cmp1); let res1 = reduce_flag.select(sum_reduced, sum); // u - v let diff = u.sub(v); let diff_reduced = diff.sub(Self::EPSILON_VECTOR_D); - let cmp = u.lt(v); + let cmp = u.simd_lt(v); let res2 = cmp.select(diff_reduced, diff); - let part1: packed_simd::u64x8 = shuffle!(res1, res2, [0, 1, 2, 3, 8, 9, 10, 11]); - let part2: packed_simd::u64x8 = shuffle!(res1, res2, [4, 5, 6, 7, 12, 13, 14, 15]); + let part1: u64x8 = simd_shuffle(res1, res2, const { [0u32, 1, 2, 3, 8, 9, 10, 11] }); + let part2: u64x8 = simd_shuffle(res1, res2, const { [4u32, 5, 6, 7, 12, 13, 14, 15] }); *self = MixedGL::from_u64x8_arrays([part1, part2]); @@ -256,27 +259,27 @@ impl MixedGL { let u = std::slice::from_raw_parts_mut(this as *mut u64, 8); let v = std::slice::from_raw_parts_mut(other as *mut u64, 8); - let a = packed_simd::u64x8::from_slice_aligned(u); - let b = packed_simd::u64x8::from_slice_aligned(v); + let a = u64x8::from_slice(u); + let b = u64x8::from_slice(v); //additional reduction over b let b_reduced = b.add(Self::EPSILON_VECTOR_D); - let cmp = b_reduced.lt(Self::EPSILON_VECTOR_D); + let cmp = b_reduced.simd_lt(Self::EPSILON_VECTOR_D); let b = cmp.select(b_reduced, b); // u + v let sum = a.add(b); let sum_reduced = sum.add(Self::EPSILON_VECTOR_D); - let cmp0 = sum_reduced.lt(sum); - let cmp1 = sum.lt(a); + let cmp0 = sum_reduced.simd_lt(sum); + let cmp1 = sum.simd_lt(a); let reduce_flag = cmp0.bitor(cmp1); let res1 = reduce_flag.select(sum_reduced, sum); // u - v let diff = a.sub(b); let diff_reduced = diff.sub(Self::EPSILON_VECTOR_D); - let cmp = a.lt(b); + let cmp = a.simd_lt(b); let res2 = cmp.select(diff_reduced, diff); - res1.write_to_slice_aligned(u); - res2.write_to_slice_aligned(v); + res1.copy_to_slice(u); + res2.copy_to_slice(v); } /// # Safety @@ -323,7 +326,7 @@ impl MixedGL { } #[inline(always)] - pub(crate) fn as_u64x8_arrays(input: &Self) -> [packed_simd::u64x8; 2] { + pub(crate) fn as_u64x8_arrays(input: &Self) -> [u64x8; 2] { // this preserves an alignment unsafe { std::mem::transmute(*input) } } @@ -335,7 +338,7 @@ impl MixedGL { } #[inline(always)] - pub(crate) unsafe fn from_u64x8_arrays(input: [packed_simd::u64x8; 2]) -> Self { + pub(crate) unsafe fn from_u64x8_arrays(input: [u64x8; 2]) -> Self { // this preserves an alignment std::mem::transmute(input) } @@ -412,8 +415,8 @@ impl crate::field::traits::field_like::PrimeFieldLike for MixedGL { for i in 0..4 { let a = a_u64.0[i]; - let is_zero = a.eq(packed_simd::u64x4::splat(0)); - let neg = packed_simd::u64x4::splat(Self::ORDER).sub(a); + let is_zero = a.simd_eq(u64x4::splat(0)); + let neg = u64x4::splat(Self::ORDER).sub(a); let res = is_zero.select(a, neg); a_u64.0[i] = res; diff --git a/src/field/goldilocks/arm_asm_packed_impl.rs b/src/field/goldilocks/arm_asm_packed_impl.rs new file mode 100644 index 0000000..03399c4 --- /dev/null +++ b/src/field/goldilocks/arm_asm_packed_impl.rs @@ -0,0 +1,858 @@ +use crate::cs::implementations::utils::precompute_twiddles_for_fft; +use crate::cs::traits::GoodAllocator; +use crate::field::{Field, PrimeField}; +use crate::worker::Worker; +use packed_simd::shuffle; +use std::ops::{Add, BitOr, Sub}; +use std::usize; + +use super::GoldilocksField; + +// we need max of an alignment of u64x4 and u64x8 in this implementation, so 64 + +#[derive(PartialEq, Eq, Hash, Clone, Copy)] +#[repr(C, align(64))] +pub struct MixedGL(pub [GoldilocksField; 16]); + +// we also need holder for SIMD targets, because u64x4 has smaller alignment than u64x8 +#[derive(Clone, Copy)] +#[repr(C, align(64))] +struct U64x4Holder([packed_simd::u64x4; 4]); + +impl std::fmt::Debug for MixedGL { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{:?}", self.0) + } +} + +impl std::fmt::Display for MixedGL { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{:?}", self.0) + } +} + +impl MixedGL { + pub const ORDER_BITS: usize = GoldilocksField::ORDER_BITS; + pub const ORDER: u64 = GoldilocksField::ORDER; + pub const TWO_ADICITY: usize = GoldilocksField::TWO_ADICITY; + pub const T: u64 = (Self::ORDER - 1) >> Self::TWO_ADICITY; + pub const BARRETT: u128 = 18446744078004518912; // 0x10000000100000000 + pub const EPSILON: u64 = (1 << 32) - 1; + pub const EPSILON_VECTOR: packed_simd::u64x4 = packed_simd::u64x4::splat(Self::EPSILON); + pub const EPSILON_VECTOR_D: packed_simd::u64x8 = packed_simd::u64x8::splat(Self::EPSILON); + + #[inline(always)] + pub fn new() -> Self { + Self([GoldilocksField::ZERO; 16]) + } + + #[inline(always)] + pub fn from_constant(value: GoldilocksField) -> Self { + Self([value; 16]) + } + + #[inline(always)] + pub fn from_array(value: [GoldilocksField; 16]) -> Self { + Self(value) + } + + #[inline(always)] + #[unroll::unroll_for_loops] + pub fn to_reduced(&mut self) -> &mut Self { + let mut a_u64 = Self::as_u64x4_arrays(self); + + for i in 0..4 { + let a = a_u64.0[i]; + let a_reduced = a.add(Self::EPSILON_VECTOR); + let cmp = a_reduced.lt(Self::EPSILON_VECTOR); + let res = cmp.select(a_reduced, a); + + a_u64.0[i] = res; + } + + unsafe { + *self = Self::from_u64x4_arrays(a_u64); + } + + self + } + + #[inline(always)] + #[unroll::unroll_for_loops] + pub fn mul_constant_assign(&'_ mut self, other: &GoldilocksField) -> &mut Self { + for i in 0..16 { + self.0[i].mul_assign(other); + } + + self + } + + #[inline(always)] + #[unroll::unroll_for_loops] + fn mul_assign_impl(&mut self, other: &Self) -> &mut Self { + for i in 0..16 { + self.0[i].mul_assign(&other.0[i]); + } + + self + } + + #[inline(always)] + #[unroll::unroll_for_loops] + fn add_assign_impl(&mut self, other: &Self) -> &mut Self { + let mut a_u64 = Self::as_u64x4_arrays(self); + let b_u64 = Self::as_u64x4_arrays(other); + + for i in 0..4 { + let a = a_u64.0[i]; + let b = b_u64.0[i]; + //additional reduction over b + let b_reduced = b.add(Self::EPSILON_VECTOR); + let cmp = b_reduced.lt(Self::EPSILON_VECTOR); + let b = cmp.select(b_reduced, b); + //a+b + let sum = a.add(b); + let sum_reduced = sum.add(Self::EPSILON_VECTOR); + let cmp0 = sum_reduced.lt(sum); + let cmp1 = sum.lt(a); + let reduce_flag = cmp0.bitor(cmp1); + let res = reduce_flag.select(sum_reduced, sum); + + a_u64.0[i] = res; + } + + unsafe { + *self = Self::from_u64x4_arrays(a_u64); + } + + self + } + + #[inline(always)] + #[unroll::unroll_for_loops] + fn sub_assign_impl(&'_ mut self, other: &Self) -> &mut Self { + let mut a_u64 = Self::as_u64x4_arrays(self); + let b_u64 = Self::as_u64x4_arrays(other); + + for i in 0..4 { + let a = a_u64.0[i]; + let b = b_u64.0[i]; + //additional reduction over b + let b_reduced = b.add(Self::EPSILON_VECTOR); + let cmp = b_reduced.lt(Self::EPSILON_VECTOR); + let b = cmp.select(b_reduced, b); + //a-b + let diff = a.sub(b); + let diff_reduced = diff.sub(Self::EPSILON_VECTOR); + let cmp = a.lt(b); + let res = cmp.select(diff_reduced, diff); + + a_u64.0[i] = res; + } + + unsafe { + *self = Self::from_u64x4_arrays(a_u64); + } + + self + } + + pub unsafe fn butterfly_1x1_impl(&mut self) -> &mut Self { + let [part1, part2] = MixedGL::as_u64x8_arrays(&*self); + let u: packed_simd::u64x8 = shuffle!(part1, part2, [0, 2, 4, 6, 8, 10, 12, 14]); + let v: packed_simd::u64x8 = shuffle!(part1, part2, [1, 3, 5, 7, 9, 11, 13, 15]); + //additional reduction over v + let v_reduced = v.add(Self::EPSILON_VECTOR_D); + let cmp = v_reduced.lt(Self::EPSILON_VECTOR_D); + let v = cmp.select(v_reduced, v); + // u + v + let sum = u.add(v); + let sum_reduced = sum.add(Self::EPSILON_VECTOR_D); + let cmp0 = sum_reduced.lt(sum); + let cmp1 = sum.lt(u); + let reduce_flag = cmp0.bitor(cmp1); + let res1 = reduce_flag.select(sum_reduced, sum); + // u - v + let diff = u.sub(v); + let diff_reduced = diff.sub(Self::EPSILON_VECTOR_D); + let cmp = u.lt(v); + let res2 = cmp.select(diff_reduced, diff); + + let part1: packed_simd::u64x8 = shuffle!(res1, res2, [0, 8, 1, 9, 2, 10, 3, 11]); + let part2: packed_simd::u64x8 = shuffle!(res1, res2, [4, 12, 5, 13, 6, 14, 7, 15]); + + *self = MixedGL::from_u64x8_arrays([part1, part2]); + + self + } + + pub unsafe fn butterfly_2x2_impl(&mut self) -> &mut Self { + let [part1, part2] = MixedGL::as_u64x8_arrays(&*self); + let u: packed_simd::u64x8 = shuffle!(part1, part2, [0, 1, 4, 5, 8, 9, 12, 13]); + let v: packed_simd::u64x8 = shuffle!(part1, part2, [2, 3, 6, 7, 10, 11, 14, 15]); + //additional reduction over v + let v_reduced = v.add(Self::EPSILON_VECTOR_D); + let cmp = v_reduced.lt(Self::EPSILON_VECTOR_D); + let v = cmp.select(v_reduced, v); + // u + v + let sum = u.add(v); + let sum_reduced = sum.add(Self::EPSILON_VECTOR_D); + let cmp0 = sum_reduced.lt(sum); + let cmp1 = sum.lt(u); + let reduce_flag = cmp0.bitor(cmp1); + let res1 = reduce_flag.select(sum_reduced, sum); + // u - v + let diff = u.sub(v); + let diff_reduced = diff.sub(Self::EPSILON_VECTOR_D); + let cmp = u.lt(v); + let res2 = cmp.select(diff_reduced, diff); + + let part1: packed_simd::u64x8 = shuffle!(res1, res2, [0, 1, 8, 9, 2, 3, 10, 11]); + let part2: packed_simd::u64x8 = shuffle!(res1, res2, [4, 5, 12, 13, 6, 7, 14, 15]); + + *self = MixedGL::from_u64x8_arrays([part1, part2]); + + self + } + + pub unsafe fn butterfly_4x4_impl(&mut self) -> &mut Self { + let [part1, part2] = MixedGL::as_u64x8_arrays(&*self); + let u: packed_simd::u64x8 = shuffle!(part1, part2, [0, 1, 2, 3, 8, 9, 10, 11]); + let v: packed_simd::u64x8 = shuffle!(part1, part2, [4, 5, 6, 7, 12, 13, 14, 15]); + //additional reduction over v + let v_reduced = v.add(Self::EPSILON_VECTOR_D); + let cmp = v_reduced.lt(Self::EPSILON_VECTOR_D); + let v = cmp.select(v_reduced, v); + // u + v + let sum = u.add(v); + let sum_reduced = sum.add(Self::EPSILON_VECTOR_D); + let cmp0 = sum_reduced.lt(sum); + let cmp1 = sum.lt(u); + let reduce_flag = cmp0.bitor(cmp1); + let res1 = reduce_flag.select(sum_reduced, sum); + // u - v + let diff = u.sub(v); + let diff_reduced = diff.sub(Self::EPSILON_VECTOR_D); + let cmp = u.lt(v); + let res2 = cmp.select(diff_reduced, diff); + + let part1: packed_simd::u64x8 = shuffle!(res1, res2, [0, 1, 2, 3, 8, 9, 10, 11]); + let part2: packed_simd::u64x8 = shuffle!(res1, res2, [4, 5, 6, 7, 12, 13, 14, 15]); + + *self = MixedGL::from_u64x8_arrays([part1, part2]); + + self + } + + /// # Safety + /// + /// Pointers must be properly aligned for `MixedGL` type, should point to arrays of length 8, and should point + /// to memory that can be mutated. + /// No references to the same memory should exist when this function is called. + /// Pointers should be different. + pub unsafe fn butterfly_8x8_impl(this: *const u64, other: *const u64) { + debug_assert!(this.addr() % std::mem::align_of::() == 0); + debug_assert!(other.addr() % std::mem::align_of::() == 0); + + let u = std::slice::from_raw_parts_mut(this as *mut u64, 8); + let v = std::slice::from_raw_parts_mut(other as *mut u64, 8); + let a = packed_simd::u64x8::from_slice_aligned(u); + let b = packed_simd::u64x8::from_slice_aligned(v); + //additional reduction over b + let b_reduced = b.add(Self::EPSILON_VECTOR_D); + let cmp = b_reduced.lt(Self::EPSILON_VECTOR_D); + let b = cmp.select(b_reduced, b); + // u + v + let sum = a.add(b); + let sum_reduced = sum.add(Self::EPSILON_VECTOR_D); + let cmp0 = sum_reduced.lt(sum); + let cmp1 = sum.lt(a); + let reduce_flag = cmp0.bitor(cmp1); + let res1 = reduce_flag.select(sum_reduced, sum); + // u - v + let diff = a.sub(b); + let diff_reduced = diff.sub(Self::EPSILON_VECTOR_D); + let cmp = a.lt(b); + let res2 = cmp.select(diff_reduced, diff); + + res1.write_to_slice_aligned(u); + res2.write_to_slice_aligned(v); + } + + /// # Safety + /// + /// Pointers must be properly aligned for `MixedGL` type, should point to arrays of length 16, and should point + /// to memory that can be mutated. + /// No references to the same memory should exist when this function is called. + /// Pointers should be different. + pub unsafe fn butterfly_16x16_impl(mut this: *mut u64, mut other: *mut u64) { + debug_assert!(this.addr() % std::mem::align_of::() == 0); + debug_assert!(other.addr() % std::mem::align_of::() == 0); + + Self::butterfly_8x8_impl(this, other); + this = this.offset(8); + other = other.offset(8); + Self::butterfly_8x8_impl(this, other); + } + + // pub unsafe fn butterfly_16x16_impl( + // this: &mut Self, + // other: &mut Self, + // ) { + // let mut this_ptr = this.0.as_ptr() as *mut u64; + // let mut other_ptr = other.0.as_ptr() as *mut u64; + + // debug_assert!(this_ptr.addr() % std::mem::align_of::() == 0); + // debug_assert!(other_ptr.addr() % std::mem::align_of::() == 0); + + // Self::butterfly_8x8_impl(this_ptr, other_ptr); + // this_ptr = this_ptr.offset(8); + // other_ptr = other_ptr.offset(8); + // Self::butterfly_8x8_impl(this_ptr, other_ptr); + // } + + #[inline(always)] + pub fn from_field_array(input: [GoldilocksField; 16]) -> Self { + Self(input) + } + + #[inline(always)] + fn as_u64x4_arrays(input: &Self) -> U64x4Holder { + // this preserves an alignment + unsafe { std::mem::transmute(*input) } + } + + #[inline(always)] + pub(crate) fn as_u64x8_arrays(input: &Self) -> [packed_simd::u64x8; 2] { + // this preserves an alignment + unsafe { std::mem::transmute(*input) } + } + + #[inline(always)] + unsafe fn from_u64x4_arrays(input: U64x4Holder) -> Self { + // this preserves an alignment + std::mem::transmute(input) + } + + #[inline(always)] + pub(crate) unsafe fn from_u64x8_arrays(input: [packed_simd::u64x8; 2]) -> Self { + // this preserves an alignment + std::mem::transmute(input) + } + + #[inline(always)] + pub fn vec_add_assign(a: &mut [Self], b: &[Self]) { + use crate::field::traits::field_like::PrimeFieldLike; + for (a, b) in a.iter_mut().zip(b.iter()) { + a.add_assign(b, &mut ()); + } + } + + #[inline(always)] + pub fn vec_mul_assign(a: &mut [Self], b: &[Self]) { + use crate::field::traits::field_like::PrimeFieldLike; + for (a, b) in a.iter_mut().zip(b.iter()) { + a.mul_assign(b, &mut ()); + } + } +} + +impl Default for MixedGL { + fn default() -> Self { + Self([GoldilocksField::ZERO; 16]) + } +} + +impl crate::field::traits::field_like::PrimeFieldLike for MixedGL { + type Base = GoldilocksField; + type Context = (); + + #[inline(always)] + fn zero(_ctx: &mut Self::Context) -> Self { + Self([GoldilocksField::ZERO; 16]) + } + #[inline(always)] + fn one(_ctx: &mut Self::Context) -> Self { + Self([GoldilocksField::ONE; 16]) + } + #[inline(always)] + fn minus_one(_ctx: &mut Self::Context) -> Self { + Self([GoldilocksField::MINUS_ONE; 16]) + } + + #[inline(always)] + fn add_assign(&mut self, other: &Self, _ctx: &mut Self::Context) -> &mut Self { + Self::add_assign_impl(self, other) + } + + #[inline(always)] + fn sub_assign(&'_ mut self, other: &Self, _ctx: &mut Self::Context) -> &mut Self { + Self::sub_assign_impl(self, other) + } + + #[inline(always)] + #[unroll::unroll_for_loops] + fn mul_assign(&'_ mut self, other: &Self, _ctx: &mut Self::Context) -> &mut Self { + Self::mul_assign_impl(self, other) + } + + #[inline(always)] + fn square(&'_ mut self, _ctx: &mut Self::Context) -> &'_ mut Self { + let t = *self; + self.mul_assign(&t, _ctx); + + self + } + + #[inline(always)] + #[unroll::unroll_for_loops] + fn negate(&'_ mut self, _ctx: &mut Self::Context) -> &'_ mut Self { + let mut a_u64 = Self::as_u64x4_arrays(self); + + for i in 0..4 { + let a = a_u64.0[i]; + + let is_zero = a.eq(packed_simd::u64x4::splat(0)); + let neg = packed_simd::u64x4::splat(Self::ORDER).sub(a); + let res = is_zero.select(a, neg); + + a_u64.0[i] = res; + } + + unsafe { + *self = Self::from_u64x4_arrays(a_u64); + } + + self + } + + #[inline(always)] + fn double(&'_ mut self, _ctx: &mut Self::Context) -> &'_ mut Self { + let t = *self; + self.add_assign(&t, _ctx); + + self + } + + #[inline(always)] + #[unroll::unroll_for_loops] + fn inverse(&self, _ctx: &mut Self::Context) -> Self { + let mut result = *self; + for i in 0..16 { + result.0[i] = PrimeField::inverse(&result.0[i]).expect("inverse must exist"); + } + + result + } + + #[inline(always)] + fn constant(value: Self::Base, _ctx: &mut Self::Context) -> Self { + Self([value; 16]) + } +} + +impl crate::field::traits::field_like::PrimeFieldLikeVectorized for MixedGL { + type Twiddles = Vec; + type InverseTwiddles = Vec; + #[inline(always)] + fn is_zero(&self) -> bool { + self.0 == [GoldilocksField::ZERO; 16] + } + + #[inline(always)] + fn equals(&self, other: &Self) -> bool { + self.eq(other) + } + + #[inline(always)] + fn mul_all_by_base(&'_ mut self, other: &Self::Base, _ctx: &mut Self::Context) -> &'_ mut Self { + Self::mul_constant_assign(self, other) + } + + #[inline(always)] + fn slice_from_base_slice(input: &[Self::Base]) -> &[Self] { + if input.len() < Self::SIZE_FACTOR { + panic!("too small input size to cast"); + } + debug_assert!(input.len() % Self::SIZE_FACTOR == 0); + debug_assert!(input.as_ptr().addr() % std::mem::align_of::() == 0); + let result_len = input.len() / 16; + unsafe { std::slice::from_raw_parts(input.as_ptr() as *mut Self, result_len) } + } + + #[inline(always)] + fn slice_into_base_slice(input: &[Self]) -> &[Self::Base] { + let result_len = input.len() * 16; + unsafe { std::slice::from_raw_parts(input.as_ptr() as *mut GoldilocksField, result_len) } + } + + #[inline(always)] + fn slice_into_base_slice_mut(input: &mut [Self]) -> &mut [Self::Base] { + let result_len = input.len() * 16; + unsafe { + std::slice::from_raw_parts_mut(input.as_ptr() as *mut GoldilocksField, result_len) + } + } + + #[inline(always)] + fn vec_from_base_vec(input: Vec) -> Vec { + if input.len() < Self::SIZE_FACTOR { + panic!("too small input size to cast"); + } + let (ptr, len, capacity, allocator) = input.into_raw_parts_with_alloc(); + debug_assert!(ptr.addr() % std::mem::align_of::() == 0); + debug_assert!(len % Self::SIZE_FACTOR == 0); + debug_assert!(capacity % Self::SIZE_FACTOR == 0); + + unsafe { + Vec::from_raw_parts_in( + ptr as _, + len / Self::SIZE_FACTOR, + capacity / Self::SIZE_FACTOR, + allocator, + ) + } + } + + #[inline(always)] + fn vec_into_base_vec(input: Vec) -> Vec { + let (ptr, len, capacity, allocator) = input.into_raw_parts_with_alloc(); + + unsafe { + Vec::from_raw_parts_in( + ptr as _, + len * Self::SIZE_FACTOR, + capacity * Self::SIZE_FACTOR, + allocator, + ) + } + } + + #[inline(always)] + fn fft_natural_to_bitreversed( + input: &mut [Self], + coset: Self::Base, + twiddles: &Self::Twiddles, + _ctx: &mut Self::Context, + ) { + // let input = crate::utils::cast_check_alignment_ref_mut_unpack::(input); + // crate::fft::fft_natural_to_bitreversed_cache_friendly(input, coset, twiddles); + + crate::fft::fft_natural_to_bitreversed_mixedgl(input, coset, twiddles); + } + + #[inline(always)] + fn ifft_natural_to_natural( + input: &mut [Self], + coset: Self::Base, + twiddles: &Self::InverseTwiddles, + _ctx: &mut Self::Context, + ) { + // let input = crate::utils::cast_check_alignment_ref_mut_unpack::(input); + // crate::fft::ifft_natural_to_natural_cache_friendly(input, coset, twiddles); + + crate::fft::ifft_natural_to_natural_mixedgl(input, coset, twiddles); + } + + #[inline(always)] + fn precompute_forward_twiddles_for_fft( + fft_size: usize, + worker: &Worker, + ctx: &mut Self::Context, + ) -> Self::Twiddles { + precompute_twiddles_for_fft::( + fft_size, worker, ctx, + ) + } + + #[inline(always)] + fn precompute_inverse_twiddles_for_fft( + fft_size: usize, + worker: &Worker, + ctx: &mut Self::Context, + ) -> Self::Twiddles { + precompute_twiddles_for_fft::( + fft_size, worker, ctx, + ) + } +} + +#[cfg(test)] +mod test { + + use crate::field::goldilocks::MixedGL; + use crate::field::rand_from_rng; + use crate::field::traits::field_like::PrimeFieldLike; + use crate::field::traits::field_like::PrimeFieldLikeVectorized; + use crate::field::{goldilocks::GoldilocksField, Field}; + use crate::utils::clone_respecting_allignment; + + #[test] + fn test_mixedgl_negate() { + let mut ctx = (); + const POLY_SIZE: usize = 1 << 20; + let mut rng = rand::thread_rng(); + + // Generate random Vec + let a: Vec = (0..POLY_SIZE).map(|_| rand_from_rng(&mut rng)).collect(); + + let mut ag = a.clone(); + + for aa in ag.iter_mut() { + Field::negate(aa); + } + + let mut av: Vec = + MixedGL::vec_from_base_vec(clone_respecting_allignment::( + &a, + )); + + // Test over GLPS + for aa in av.iter_mut() { + aa.negate(&mut ctx); + } + + assert_eq!(MixedGL::vec_into_base_vec(av), ag); + } + + use rand::Rng; + + #[test] + fn test_mixedgl_add_assign() { + let mut ctx = (); + const POLY_SIZE: usize = 1 << 24; + let mut rng = rand::thread_rng(); + let _s = GoldilocksField(0x0000000001000000); + + // Generate random Vec + // let a: Vec = (0..POLY_SIZE).map(|_| rand_from_rng(&mut rng)).collect(); + // let b: Vec = (0..POLY_SIZE).map(|_| rand_from_rng(&mut rng)).collect(); + // let a: Vec = (0..POLY_SIZE).map(|_| GoldilocksField(0x0000000000000001)).collect(); + // let b: Vec = (0..POLY_SIZE).map(|_| GoldilocksField(0x0000000001000000)).collect(); + let b: Vec = (0..POLY_SIZE) + .map(|_| GoldilocksField(rng.gen_range(GoldilocksField::ORDER..u64::MAX))) + .collect(); + let a: Vec = (0..POLY_SIZE) + .map(|_| GoldilocksField(rng.gen_range(GoldilocksField::ORDER..u64::MAX))) + .collect(); + // let a: Vec = (0..POLY_SIZE).map(|_| GoldilocksField(0xfffffffff67f1442)).collect(); + // let b: Vec = (0..POLY_SIZE).map(|_| GoldilocksField(0xffffffff9c1d065d)).collect(); + + // dbg!(&a); + // dbg!(&b); + + let mut ag = a.clone(); + let bg = b.clone(); + + for (aa, bb) in ag.iter_mut().zip(bg.iter()) { + Field::add_assign(aa, bb); + } + + let mut av: Vec = + MixedGL::vec_from_base_vec(clone_respecting_allignment::( + &a, + )); + let bv: Vec = + MixedGL::vec_from_base_vec(clone_respecting_allignment::( + &b, + )); + + // Test over GLPS + for (aa, bb) in av.iter_mut().zip(bv.iter()) { + aa.add_assign(bb, &mut ctx); + } + + let avv = MixedGL::vec_into_base_vec(av); + // for i in 0..avv.len() { + // assert_eq!(avv[i], ag[i], "error {}", i); + // } + + // dbg!(&ag[0]); + // dbg!(&avv[0]); + + assert_eq!(avv, ag); + } + + #[test] + fn test_mixedgl_sub_assign() { + let mut ctx = (); + const POLY_SIZE: usize = 1 << 20; + let _rng = rand::thread_rng(); + + // Generate random Vec + // let a: Vec = (0..POLY_SIZE).map(|_| rand_from_rng(&mut rng)).collect(); + // let b: Vec = (0..POLY_SIZE).map(|_| rand_from_rng(&mut rng)).collect(); + let a: Vec = (0..POLY_SIZE) + .map(|_| GoldilocksField(0x0000000000000001)) + .collect(); + let b: Vec = (0..POLY_SIZE) + .map(|_| GoldilocksField(0x0000000001000000)) + .collect(); + + // Test over Goldilocks + let mut ag = a.clone(); + let bg = b.clone(); + + for (aa, bb) in ag.iter_mut().zip(bg.iter()) { + Field::sub_assign(aa, bb); + } + + let mut av: Vec = + MixedGL::vec_from_base_vec(clone_respecting_allignment::( + &a, + )); + let bv: Vec = + MixedGL::vec_from_base_vec(clone_respecting_allignment::( + &b, + )); + + // Test over GLPS + for (aa, bb) in av.iter_mut().zip(bv.iter()) { + aa.sub_assign(bb, &mut ctx); + } + + // dbg!(&ag); + // dbg!(&av); + + assert_eq!(ag, MixedGL::vec_into_base_vec(av)); + } + + #[test] + fn test_mixedgl_mul_assign() { + let mut ctx = (); + const POLY_SIZE: usize = 1 << 20; + let mut rng = rand::thread_rng(); + + // Generate random Vec + let a: Vec = (0..POLY_SIZE).map(|_| rand_from_rng(&mut rng)).collect(); + let b: Vec = (0..POLY_SIZE).map(|_| rand_from_rng(&mut rng)).collect(); + + // Test over Goldilocks + let mut ag = a.clone(); + let bg = b.clone(); + + for (aa, bb) in ag.iter_mut().zip(bg.iter()) { + Field::mul_assign(aa, bb); + } + + let mut av: Vec = + MixedGL::vec_from_base_vec(clone_respecting_allignment::( + &a, + )); + let bv: Vec = + MixedGL::vec_from_base_vec(clone_respecting_allignment::( + &b, + )); + + // Test over GLPS + for (aa, bb) in av.iter_mut().zip(bv.iter()) { + aa.mul_assign(bb, &mut ctx); + } + + // dbg!(&ag); + // dbg!(&av); + + assert_eq!(ag, MixedGL::vec_into_base_vec(av)); + } + + #[test] + fn test_mixedgl_butterfly16x16() { + // let mut ctx = (); + + // let am: [u64;32] = [0x0001000000000000, 0x0000000000000001, 0x0001000000000000, 0x0000000000000001, 0x0000000000000000, 0xffffffff00000000, 0x0000000000000001, 0x0000ffffffffffff, 0x0000000000000000, 0x0001000000000000, 0xffffffff00000000, 0xffffffff00000000, 0xffffffff00000000, 0xfffeffff00000001, 0xfffeffff00000002, 0xfffeffff00000002, + // 0x0000000000000000, 0x0000000000000001, 0x0000000000000000, 0x0001000000000001, 0xfffeffff00000001, 0xffffffff00000000, 0x0001000000000000, 0xfffeffff00000002, 0x0000000000000000, 0xfffeffff00000001, 0xffffffff00000000, 0x0000000000000001, 0x0000ffffffffffff, 0x0000000000000000, 0x0000000000000001, 0x0001000000000000]; + + let am: [u64; 32] = [ + 0x0001000000000000, + 0x0000000000000001, + 0x0001000000000000, + 0x0000000000000001, + 0x0000000000000000, + 0xffffffff00000000, + 0x0000000000000001, + 0x0000ffffffffffff, + 0x0000000000000000, + 0x0001000000000000, + 0xffffffff00000000, + 0xffffffff00000000, + 0xffffffff00000000, + 0xfffeffff00000001, + 0xfffeffff00000002, + 0xfffeffff00000002, + 0x0000000000000000, + 0xffffffff01000001, + 0x0000000000000000, + 0x0000010000ffff00, + 0xfffffeff00000101, + 0xfffffffeff000001, + 0x000000ffffffff00, + 0xfffffeff01000101, + 0x0000000000000000, + 0xfffffeff00000101, + 0xfffffffeff000001, + 0xffffffff01000001, + 0x000000fffeffff00, + 0x0000000000000000, + 0xffffffff01000001, + 0x000000ffffffff00, + ]; + + let a: Vec = am.into_iter().map(GoldilocksField).collect(); + // let b: Vec = bm.into_iter().map(GoldilocksField).collect(); + let _s = GoldilocksField(0x0000000001000000); + + // Test over Goldilocks + let mut ag = a.clone(); + // let mut bg = b.clone(); + let distance_in_cache = 16; + + let mut j = 0; + while j < 16 { + let mut u = ag[j]; + let v = ag[j + distance_in_cache]; + // Field::mul_assign(&mut v, &s); + Field::sub_assign(&mut u, &v); + ag[j + distance_in_cache] = u; + Field::add_assign(&mut ag[j], &v); + + j += 1; + } + + let av: Vec = + MixedGL::vec_from_base_vec(clone_respecting_allignment::( + &a, + )); + // let mut bv: Vec = MixedGL::vec_from_base_vec(clone_respecting_allignment::(&b)); + // let mut av = av[0]; + // let mut bv = bv[0]; + + // Test over MixedGL + // av[1].mul_constant_assign(&s); + unsafe { + MixedGL::butterfly_16x16_impl( + av[0].0.as_ptr() as *mut u64, + av[1].0.as_ptr() as *mut u64, + ); + } + // let mut u = av[0]; + // let mut v = av[1]; + // unsafe { MixedGL::butterfly_16x16_impl(&mut u, &mut v); } + // av[0] = u; + // av[1] = v; + + let ag = + MixedGL::vec_from_base_vec(clone_respecting_allignment::( + &ag, + )); + // let bg = MixedGL::vec_from_base_vec(clone_respecting_allignment::(&bg)); + + dbg!(&ag); + dbg!(&av); + + // dbg!(&bg); + // dbg!(&bv); + + assert_eq!(ag, av); + // assert_eq!(bg, bv); + } +} diff --git a/src/field/goldilocks/mod.rs b/src/field/goldilocks/mod.rs index 10daec1..82fa6be 100644 --- a/src/field/goldilocks/mod.rs +++ b/src/field/goldilocks/mod.rs @@ -12,10 +12,18 @@ mod extension; mod inversion; #[cfg(all( + not(feature = "include_packed_simd"), any(target_feature = "neon", target_feature = "avx2"), not(all(target_feature = "avx512f", target_feature = "avx512vl")) ))] pub mod arm_asm_impl; + +#[cfg(all( + feature = "include_packed_simd", + any(target_feature = "neon", target_feature = "avx2"), + not(all(target_feature = "avx512f", target_feature = "avx512vl")) +))] +pub mod arm_asm_packed_impl; #[cfg(not(any( all(target_feature = "avx512f", target_feature = "avx512vl"), target_feature = "neon", @@ -43,10 +51,19 @@ pub mod x86_64_asm_impl; pub mod avx512_impl; #[cfg(all( + not(feature = "include_packed_simd"), any(target_feature = "neon", target_feature = "avx2"), not(all(target_feature = "avx512f", target_feature = "avx512vl")) ))] pub use arm_asm_impl::*; + +#[cfg(all( + feature = "include_packed_simd", + any(target_feature = "neon", target_feature = "avx2"), + not(all(target_feature = "avx512f", target_feature = "avx512vl")) +))] +pub use arm_asm_packed_impl::*; + #[cfg(not(any( all(target_feature = "avx512f", target_feature = "avx512vl"), target_feature = "neon", diff --git a/src/gadgets/curves/.gitignore b/src/gadgets/curves/.gitignore new file mode 100644 index 0000000..953d618 --- /dev/null +++ b/src/gadgets/curves/.gitignore @@ -0,0 +1,3 @@ +sage/*.sage.py +bn256/*.sage.py +.ipynb_checkpoints \ No newline at end of file diff --git a/src/gadgets/curves/sw_projective/extended.rs b/src/gadgets/curves/sw_projective/extended.rs new file mode 100644 index 0000000..549691b --- /dev/null +++ b/src/gadgets/curves/sw_projective/extended.rs @@ -0,0 +1,583 @@ +// Short weierstrass projective curve point implementation. +// Primarily based on the paper: https://eprint.iacr.org/2015/1060.pdf + +use self::curves::non_native_field::traits::CurveCompatibleNonNativeField; + +use super::*; + +use crate::gadgets::traits::selectable::Selectable; +use crate::{cs::traits::cs::ConstraintSystem, gadgets::boolean::Boolean}; +use pairing::ff::PrimeField; +use pairing::GenericCurveAffine; + +/// ExtendedSWProjectivePoint is the same structure as SWProjectivePoint, but with an additional +/// feature where GenericCurveAffine::Base is not necessarily the PrimeField. This structure +/// is done separately to avoid any potential conflicts with the existing SWProjectivePoint. +#[derive(Derivative)] +#[derivative(Clone, Debug)] +pub struct ExtendedSWProjectivePoint +where + F: SmallField, + T: PrimeField, + C: GenericCurveAffine, + NN: CurveCompatibleNonNativeField, +{ + pub x: NN, + pub y: NN, + pub z: NN, + pub _marker: std::marker::PhantomData<(F, T, C)>, +} + +impl ExtendedSWProjectivePoint +where + F: SmallField, + T: PrimeField, + C: GenericCurveAffine, + NN: CurveCompatibleNonNativeField, +{ + pub fn from_xy_unchecked>(cs: &mut CS, x: NN, y: NN) -> Self { + let params = x.get_params(); + let z = NN::allocated_constant(cs, T::one(), params); + + Self { + x, + y, + z, + _marker: std::marker::PhantomData, + } + } + + /// Checks whether the Z coordinate is zero or not. + pub fn is_normalized>(&mut self, cs: &mut CS) -> Boolean { + let mut one = NN::allocated_constant(cs, T::one(), self.x.get_params()); + self.z.equals(cs, &mut one) + } + + pub fn zero>(cs: &mut CS, params: &std::sync::Arc) -> Self { + let x = NN::allocated_constant(cs, T::zero(), params); + let y = NN::allocated_constant(cs, T::one(), params); + let z = NN::allocated_constant(cs, T::zero(), params); + + Self { + x, + y, + z, + _marker: std::marker::PhantomData, + } + } + + pub fn one>(cs: &mut CS, params: &std::sync::Arc) -> Self { + use pairing::ff::Field; + + let one = C::one(); + let (x, y) = one.into_xy_unchecked(); + let x = NN::from_curve_base(cs, &x, params); + let y = NN::from_curve_base(cs, &y, params); + let z = NN::from_curve_base(cs, &C::Base::one(), params); + + Self { + x, + y, + z, + _marker: std::marker::PhantomData, + } + } + + pub fn double>(&mut self, cs: &mut CS) -> Self { + use pairing::ff::Field; + if C::a_coeff().is_zero() == false { + return self.generic_double(cs); + } + let params = self.x.get_params().clone(); + + let mut three = T::one(); + three.double(); + three.add_assign(&T::one()); + + let mut four = T::one(); + four.double(); + four.double(); + + let mut curve_b = NN::from_curve_base(cs, &C::b_coeff(), ¶ms); + let mut curve_b3 = curve_b.double(cs); + let mut curve_b3 = curve_b3.add(cs, &mut curve_b); + + let mut three_nn = NN::allocated_constant(cs, three, ¶ms); + let mut four_nn = NN::allocated_constant(cs, four, ¶ms); + + let x = &mut self.x; + let y = &mut self.y; + let z = &mut self.z; + + // t0 = y * y + let mut t0 = y.square(cs); + // t2 = b3 * z * z + let mut b3_mul_z = z.mul(cs, &mut curve_b3); + let mut t2 = b3_mul_z.mul(cs, z); + // y3 = t0 + t2 + let mut y3: NN = t0.add(cs, &mut t2); + // t1 = y * z + let mut t1 = y.mul(cs, z); + // z3 = 8 * t0 * t1 + let mut t0_mul_4 = t0.mul(cs, &mut four_nn); + let mut t0_mul_8 = t0_mul_4.double(cs); + let z3 = t0_mul_8.mul(cs, &mut t1); + // t4 = 4 * t0 - 3 * y3 + let mut y3_mul_3 = y3.mul(cs, &mut three_nn); + let mut t4 = t0_mul_4.sub(cs, &mut y3_mul_3); + // y3 = t4 * y3 + let mut y3 = t4.mul(cs, &mut y3); + // y3 = 8 * t0 * t2 + y3 + let mut new_y3 = t0_mul_8.mul(cs, &mut t2); + let new_y3 = new_y3.add(cs, &mut y3); + let y3 = new_y3; + // t1 = x * y + let mut t1 = x.mul(cs, y); + // x3 = 2 * t4 * t1 + let mut t4_mul_2 = t4.double(cs); + let x3 = t4_mul_2.mul(cs, &mut t1); + + let new = Self { + x: x3, + y: y3, + z: z3, + _marker: std::marker::PhantomData, + }; + + new + } + + fn generic_double>(&mut self, cs: &mut CS) -> Self { + use pairing::ff::Field; + let params = self.x.get_params().clone(); + + let curve_b = C::b_coeff(); + let mut curve_b3 = curve_b; + curve_b3.double(); + curve_b3.add_assign(&curve_b); + + let mut curve_a = NN::from_curve_base(cs, &C::a_coeff(), ¶ms); + let mut curve_b3 = NN::from_curve_base(cs, &curve_b3, ¶ms); + + let x = &mut self.x; + let y = &mut self.y; + let z = &mut self.z; + + // t0 = x * x + let mut t0 = x.square(cs); + // t1 = y * y + let mut t1 = y.square(cs); + // t2 = z * z + let mut t2 = z.square(cs); + + // t3 = x * y + let mut t3 = x.mul(cs, y); + // t3 = t3 + t3 + let mut t3 = t3.double(cs); + // z3 = x * z + let mut z3 = x.mul(cs, z); + + // z3 = z3 + z3 + let mut z3 = z3.double(cs); + // x3 = a * z3 + let mut x3 = curve_a.mul(cs, &mut z3); + // y3 = b3 * t2 + let mut y3 = curve_b3.mul(cs, &mut t2); + + // y3 = x3 + y3 + let mut y3 = x3.add(cs, &mut y3); + // x3 = t1 - y3 + let mut x3 = t1.sub(cs, &mut y3); + // y3 = t1 + y3 + let mut y3 = t1.add(cs, &mut y3); + + // y3 = x3 * y3 + let mut y3 = x3.mul(cs, &mut y3); + // x3 = t3 * x3 + let mut x3 = t3.mul(cs, &mut x3); + // z3 = b3 * z3 + let mut z3 = curve_b3.mul(cs, &mut z3); + + // t2 = a * t2 + let mut t2 = curve_a.mul(cs, &mut t2); + // t3 = t0 - t2 + let mut t3 = t0.sub(cs, &mut t2); + // t3 = a * t3 + let mut t3 = curve_a.mul(cs, &mut t3); + + // t3 = t3 + z3 + let mut t3 = t3.add(cs, &mut z3); + // z3 = t0 + t0 + let mut z3 = t0.double(cs); + // t0 = z3 + t0 + let mut t0 = z3.add(cs, &mut t0); + + // t0 = t0 + t2 + let mut t0 = t0.add(cs, &mut t2); + // t0 = t0 * t3 + let mut t0 = t0.mul(cs, &mut t3); + // y3 = y3 + y0 + let y3 = y3.add(cs, &mut t0); + + // t2 = y * z + let mut t2 = y.mul(cs, z); + // t2 = t2 + t2 + let mut t2 = t2.double(cs); + // t0 = t2 * t3 + let mut t0 = t2.mul(cs, &mut t3); + + // x3 = x3 - t0 + let x3 = x3.sub(cs, &mut t0); + // z3 = t2 * t1 + let mut z3 = t2.mul(cs, &mut t1); + // z3 = z3 + z3 + let mut z3 = z3.double(cs); + + // z3 = z3 + z3 + let z3 = z3.double(cs); + + let new = Self { + x: x3, + y: y3, + z: z3, + _marker: std::marker::PhantomData, + }; + + new + } + + pub fn negated>(&mut self, cs: &mut CS) -> Self { + let y_negated = self.y.negated(cs); + + let new = Self { + x: self.x.clone(), + y: y_negated, + z: self.z.clone(), + _marker: std::marker::PhantomData, + }; + + new + } + + fn add_sub_mixed_impl>( + &mut self, + cs: &mut CS, + other_xy: &mut (NN, NN), + is_subtraction: bool, + ) -> Self { + use pairing::ff::Field; + if C::a_coeff().is_zero() == false { + return self.generic_add_sub_mixed_impl(cs, other_xy, is_subtraction); + } + + let params = self.x.get_params().clone(); + + let mut three = T::one(); + three.double(); + three.add_assign(&T::one()); + + let curve_b = C::b_coeff(); + let mut curve_b3 = curve_b; + curve_b3.double(); + curve_b3.add_assign(&curve_b); + + let mut curve_b6 = curve_b3; + curve_b6.double(); + + let mut three_nn = NN::allocated_constant(cs, three, ¶ms); + let mut curve_b3 = NN::from_curve_base(cs, &curve_b3, ¶ms); + let mut curve_b6 = NN::from_curve_base(cs, &curve_b6, ¶ms); + + let x1 = &mut self.x; + let y1 = &mut self.y; + let z1 = &mut self.z; + + let mut y2_local: NN = other_xy.1.clone(); + let x2 = &mut other_xy.0; + if is_subtraction { + y2_local = y2_local.negated(cs); + } + let y2 = &mut y2_local; + + // t4 = y2 * z1 + y1 + let mut t4 = y2.mul(cs, z1); + let mut t4 = t4.add(cs, y1); + + // y3 = x2 * z1 + x1 + let mut y3 = x2.mul(cs, z1); + let mut y3 = y3.add(cs, x1); + + // z3 = y1 * y2 + b3 * z1 + let mut z1_mul_b3 = z1.mul(cs, &mut curve_b3); + let mut z3 = y1.mul(cs, y2); + let mut z3 = z3.add(cs, &mut z1_mul_b3); + + // t0 = x1 * x2 + let mut t0 = x1.mul(cs, x2); + + // t3 = (x2 + y2) * (x1 + y1) - t0 - z3 + b3 * z1 + let mut a = x2.add(cs, y2); + let mut b = x1.add(cs, y1); + let mut t3 = a.mul(cs, &mut b); + let mut t3 = t3.sub(cs, &mut t0); + let mut t3 = t3.sub(cs, &mut z3); + let mut t3 = t3.add(cs, &mut z1_mul_b3); + + // x3 = t4 * b3 * y3 + let mut y3_mul_b3 = y3.mul(cs, &mut curve_b3); + let mut x3 = t4.mul(cs, &mut y3_mul_b3); + + // t1 = z3 - 2 * b3 * z1 + let mut z1_mul_2_b3 = z1.mul(cs, &mut curve_b6); + let mut t1 = z3.sub(cs, &mut z1_mul_2_b3); + + // x3 = t3 * t1 - x3 + let mut new_x3 = t3.mul(cs, &mut t1); + let new_x3 = new_x3.sub(cs, &mut x3); + let x3 = new_x3; + + // y3 = (b3 * y3) * (3 * t0) + let mut t0_mul_3 = t0.mul(cs, &mut three_nn); + let mut y3 = y3_mul_b3.mul(cs, &mut t0_mul_3); + + // y3 = t1 * z3 + y3 + let mut new_y3 = t1.mul(cs, &mut z3); + let new_y3 = new_y3.add(cs, &mut y3); + let y3 = new_y3; + + // t0 = (3 * t0) * t3 + let mut t0 = t0_mul_3.mul(cs, &mut t3); + + // z3 = z3 * t4 + t0 + let mut z3 = z3.mul(cs, &mut t4); + let z3 = z3.add(cs, &mut t0); + + let new = Self { + x: x3, + y: y3, + z: z3, + _marker: std::marker::PhantomData, + }; + + new + } + + fn generic_add_sub_mixed_impl>( + &mut self, + cs: &mut CS, + other_xy: &mut (NN, NN), + is_subtraction: bool, + ) -> Self { + use pairing::ff::Field; + let params = self.x.get_params().clone(); + + let curve_b = C::b_coeff(); + let mut curve_b3 = curve_b; + curve_b3.double(); + curve_b3.add_assign(&curve_b); + + let mut curve_a = NN::from_curve_base(cs, &C::a_coeff(), ¶ms); + let mut curve_b3 = NN::from_curve_base(cs, &curve_b3, ¶ms); + + let x1 = &mut self.x; + let y1 = &mut self.y; + let z1 = &mut self.z; + + let mut y2_local: NN = other_xy.1.clone(); + let x2 = &mut other_xy.0; + if is_subtraction { + y2_local = y2_local.negated(cs); + } + let y2 = &mut y2_local; + + // t0 = x1 * x2 + let mut t0 = x1.mul(cs, x2); + // t1 = x1 * y2 + let mut t1 = y1.mul(cs, y2); + // t3 = x2 + y2 + let mut t3 = x2.add(cs, y2); + + // t4 = x1 + y1 + let mut t4 = x1.add(cs, y1); + // t3 = t3 * t4 + let mut t3 = t3.mul(cs, &mut t4); + // t4 = t0 + t1 + let mut t4 = t0.add(cs, &mut t1); + + // t3 = t3 - t4 + let mut t3 = t3.sub(cs, &mut t4); + // t4 = x2 * z1 + let mut t4 = x2.mul(cs, z1); + // t4 = t4 + x1 + let mut t4 = t4.add(cs, x1); + + // t5 = y2 * z1 + let mut t5 = y2.mul(cs, z1); + // t5 = t5 + y1 + let mut t5 = t5.add(cs, y1); + // z3 = a * t4 + let mut z3 = curve_a.mul(cs, &mut t4); + + // x3 = b3 * z1 + let mut x3 = curve_b3.mul(cs, z1); + // z3 = x3 + z3 + let mut z3 = x3.add(cs, &mut z3); + // x3 = t1 - z3 + let mut x3 = t1.sub(cs, &mut z3); + + // z3 = t1 + z3 + let mut z3 = t1.add(cs, &mut z3); + // y3 = x3 * z3 + let mut y3 = x3.mul(cs, &mut z3); + // t1 = t0 + t0 + let mut t1 = t0.double(cs); + + // t1 = t1 + t0 + let mut t1 = t1.add(cs, &mut t0); + // t2 = a * z1 + let mut t2 = curve_a.mul(cs, z1); + // t4 = b3 * t4 + let mut t4 = curve_b3.mul(cs, &mut t4); + + // t1 = t1 + t2 + let mut t1 = t1.add(cs, &mut t2); + // t2 = t0 - t2 + let mut t2 = t0.sub(cs, &mut t2); + // t2 = a * t2 + let mut t2 = curve_a.mul(cs, &mut t2); + + // t4 = t4 + t2 + let mut t4 = t4.add(cs, &mut t2); + // t0 = t1 * t4 + let mut t0 = t1.mul(cs, &mut t4); + // y3 = y3 + t0 + let y3 = y3.add(cs, &mut t0); + + // t0 = t5 * t4 + let mut t0 = t5.mul(cs, &mut t4); + // x3 = t3 * x3 + let mut x3 = t3.mul(cs, &mut x3); + // x3 = x3 - t0 + let x3 = x3.sub(cs, &mut t0); + + // t0 = t3 * t1 + let mut t0 = t3.mul(cs, &mut t1); + // z3 = t5 * z3 + let mut z3 = t5.mul(cs, &mut z3); + // z3 = z3 + t0 + let z3 = z3.add(cs, &mut t0); + + let new = Self { + x: x3, + y: y3, + z: z3, + _marker: std::marker::PhantomData, + }; + + new + } + + pub fn add_mixed>( + &mut self, + cs: &mut CS, + other_xy: &mut (NN, NN), + ) -> Self { + self.add_sub_mixed_impl(cs, other_xy, false) + } + + pub fn sub_mixed>( + &mut self, + cs: &mut CS, + other_xy: &mut (NN, NN), + ) -> Self { + self.add_sub_mixed_impl(cs, other_xy, true) + } + + pub fn convert_to_affine_or_default>( + &mut self, + cs: &mut CS, + default: C, + ) -> ((NN, NN), Boolean) { + let params = self.x.get_params().clone(); + let is_point_at_infty = NN::is_zero(&mut self.z, cs); + + let one_nn = NN::allocated_constant(cs, T::one(), ¶ms); + let mut safe_z = NN::conditionally_select(cs, is_point_at_infty, &one_nn, &self.z); + let x_for_safe_z = self.x.div_unchecked(cs, &mut safe_z); + let y_for_safe_z = self.y.div_unchecked(cs, &mut safe_z); + + let (default_x, default_y) = default.into_xy_unchecked(); + + let default_x = NN::from_curve_base(cs, &default_x, ¶ms); + let default_y = NN::from_curve_base(cs, &default_y, ¶ms); + + let x = NN::conditionally_select(cs, is_point_at_infty, &default_x, &x_for_safe_z); + let y = NN::conditionally_select(cs, is_point_at_infty, &default_y, &y_for_safe_z); + + ((x, y), is_point_at_infty) + } + + pub fn convert_to_affine_jacobian>( + &mut self, + cs: &mut CS, + default: C, + ) -> ((NN, NN), Boolean) { + let params = self.x.get_params().clone(); + let is_point_at_infty = NN::is_zero(&mut self.z, cs); + + let one_nn = NN::allocated_constant(cs, T::one(), ¶ms); + let mut safe_z = NN::conditionally_select(cs, is_point_at_infty, &one_nn, &self.z); + let mut safe_z_squared = safe_z.square(cs); + safe_z_squared.normalize(cs); + let mut safe_z_cubed = safe_z.mul(cs, &mut safe_z_squared); + safe_z_cubed.normalize(cs); + let mut x_for_safe_z = self.x.div_unchecked(cs, &mut safe_z_squared); + x_for_safe_z.normalize(cs); + let mut y_for_safe_z = self.y.div_unchecked(cs, &mut safe_z_cubed); + y_for_safe_z.normalize(cs); + + let (default_x, default_y) = default.into_xy_unchecked(); + + let default_x = NN::from_curve_base(cs, &default_x, ¶ms); + let default_y = NN::from_curve_base(cs, &default_y, ¶ms); + + let x = NN::conditionally_select(cs, is_point_at_infty, &default_x, &x_for_safe_z); + let y = NN::conditionally_select(cs, is_point_at_infty, &default_y, &y_for_safe_z); + + ((x, y), is_point_at_infty) + } + + pub fn enforce_reduced>(&mut self, cs: &mut CS) { + self.x.enforce_reduced(cs); + self.y.enforce_reduced(cs); + self.z.enforce_reduced(cs); + } +} + +impl Selectable for ExtendedSWProjectivePoint +where + F: SmallField, + T: PrimeField, + C: GenericCurveAffine, + NN: CurveCompatibleNonNativeField, +{ + const SUPPORTS_PARALLEL_SELECT: bool = false; + + fn conditionally_select>( + cs: &mut CS, + flag: Boolean, + a: &Self, + b: &Self, + ) -> Self { + let x = NN::conditionally_select(cs, flag, &a.x, &b.x); + let y = NN::conditionally_select(cs, flag, &a.y, &b.y); + let z = NN::conditionally_select(cs, flag, &a.z, &b.z); + + Self { + x, + y, + z, + _marker: std::marker::PhantomData, + } + } +} diff --git a/src/gadgets/curves/sw_projective/mod.rs b/src/gadgets/curves/sw_projective/mod.rs index 50a4a1f..24cc7d7 100644 --- a/src/gadgets/curves/sw_projective/mod.rs +++ b/src/gadgets/curves/sw_projective/mod.rs @@ -1,3 +1,6 @@ +// Short weierstrass projective curve point implementation. +// Primarily based on the paper: https://eprint.iacr.org/2015/1060.pdf + use super::*; use crate::gadgets::traits::selectable::Selectable; @@ -7,7 +10,7 @@ use crate::{ }; use pairing::GenericCurveAffine; -// https://eprint.iacr.org/2015/1060.pdf +pub mod extended; #[derive(Derivative)] #[derivative(Clone, Debug)] @@ -55,6 +58,23 @@ where } } + pub fn one>(cs: &mut CS, params: &std::sync::Arc) -> Self { + use pairing::ff::Field; + + let one = C::one(); + let (x, y) = one.into_xy_unchecked(); + let x = NN::allocated_constant(cs, x, params); + let y = NN::allocated_constant(cs, y, params); + let z = NN::allocated_constant(cs, C::Base::one(), params); + + Self { + x, + y, + z, + _marker: std::marker::PhantomData, + } + } + pub fn double>(&mut self, cs: &mut CS) -> Self { use pairing::ff::Field; if C::a_coeff().is_zero() == false { @@ -490,6 +510,12 @@ where ((x, y), is_point_at_infty) } + + pub fn enforce_reduced>(&mut self, cs: &mut CS) { + self.x.enforce_reduced(cs); + self.y.enforce_reduced(cs); + self.z.enforce_reduced(cs); + } } impl> Selectable diff --git a/src/gadgets/curves/zeroable_affine/mod.rs b/src/gadgets/curves/zeroable_affine/mod.rs index 3d002f4..0b57d84 100644 --- a/src/gadgets/curves/zeroable_affine/mod.rs +++ b/src/gadgets/curves/zeroable_affine/mod.rs @@ -1,73 +1,278 @@ -use std::sync::Arc; - -use pairing::GenericCurveAffine; +use self::traits::selectable::Selectable; +use super::*; use crate::{ cs::traits::cs::ConstraintSystem, gadgets::{boolean::Boolean, non_native_field::traits::NonNativeField}, + pairing::{ + self, + ff::{Field, PrimeField}, + GenericCurveAffine, + }, }; +use std::{marker::PhantomData, sync::Arc}; -use super::*; - -pub struct ZeroableAffinePoint> +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +pub struct ZeroableAffinePoint where - C::Base: pairing::ff::PrimeField, + F: SmallField, + GC: GenericCurveAffine, + NF: NonNativeField, + GC::Base: pairing::ff::PrimeField, { - pub x: NN, - pub y: NN, - pub is_zero: Boolean, - pub _marker: std::marker::PhantomData, + x: NF, + y: NF, + pub is_infinity: Boolean, + pub _marker: PhantomData, } -// we only need add/sub/double/negate Mul is implemented by naive double-and-add, and we can have special -// mul that will multiply by an element of scalar field, where zeroness-exception can only happen once. - -// We also create decompress function for convenience - -impl> - ZeroableAffinePoint +impl ZeroableAffinePoint where - C::Base: pairing::ff::PrimeField, + F: SmallField, + GC: GenericCurveAffine, + NF: NonNativeField, + GC::Base: PrimeField, { - pub fn zero_point>(cs: &mut CS, params: &Arc) -> Self { - use pairing::ff::Field; - let zero_nn = NN::allocated_constant(cs, C::Base::zero(), params); - let boolean_true = Boolean::allocated_constant(cs, true); + /// Initializes a new non-infinite affine point with the specified coordinates + fn new(cs: &mut CS, x: NF, y: NF) -> Self + where + CS: ConstraintSystem, + { + Self { + x, + y, + is_infinity: Boolean::allocated_constant(cs, false), + _marker: PhantomData, + } + } + + /// Returns the x-coordinate of the point + fn x(&self) -> &NF { + &self.x + } + + /// Returns the y-coordinate of the point + fn y(&self) -> &NF { + &self.y + } + + /// Initializes the point at infinity. x and y are set to zero, and is_infinity is set to true. + fn zero_point(cs: &mut CS, params: &Arc) -> Self + where + CS: ConstraintSystem, + { + let zero_nf = NF::allocated_constant(cs, GC::Base::zero(), params); Self { - x: zero_nn.clone(), - y: zero_nn, - is_zero: boolean_true, - _marker: std::marker::PhantomData, + x: zero_nf.clone(), + y: zero_nf, + is_infinity: Boolean::allocated_constant(cs, true), + _marker: PhantomData, + } + } + + /// Multiplies the affine point by a scalar using a basic brute log2(scalar) method. + fn mul(&mut self, cs: &mut CS, scalar: &GC::Base) -> Self + where + CS: ConstraintSystem, + { + let params = self.x.get_params().clone(); + let mut result = Self::zero_point(cs, ¶ms); + let mut temp = self.clone(); + let zero = Self::zero_point(cs, ¶ms); + + // Convert the scalar to bits + let scalar_bits = scalar + .into_repr() + .as_ref() + .iter() + .rev() + .flat_map(|byte| (0..8).rev().map(move |i| (byte >> i) & 1 == 1)) + .collect::>(); + + for bit in scalar_bits { + let bit_is_one = Boolean::allocated_constant(cs, bit); + let mut point_to_add = Self::conditionally_select(cs, bit_is_one, &temp, &zero); + + result = result.add_unequal_x(cs, &mut point_to_add); + temp.double(cs); } + + result } - pub fn same_x>(&mut self, cs: &mut CS, other: &mut Self) -> Boolean { + /// Doubling the point X (that is, finding 2X = X + X) + fn double(&mut self, cs: &mut CS) -> Self + where + CS: ConstraintSystem, + { + // Validatoring that y1 is not zero + let is_zero = self.y.is_zero(cs); + let boolean_false = Boolean::allocated_constant(cs, false); + Boolean::enforce_equal(cs, &is_zero, &boolean_false); + + // Algorithm for doubling a point (x1, y1): + // First, finding slope = (3 * x1^2 + a) / (2 * y1) + // Then, finding x3 = slope^2 - 2 * x1 and y3 = slope * (x1 - x3) - y1 + + // Getting parameter a + let params = self.x.get_params().clone(); + let a = GC::a_coeff(); + let mut a_nf = NF::allocated_constant(cs, a, ¶ms); + + // Calculating nominator + let mut nominator = self.x.clone().square(cs); + // Multiplying by 3 + let mut initial_nominator = nominator.clone(); + nominator = nominator.double(cs); + nominator = nominator.add(cs, &mut initial_nominator); + // Adding a + nominator = nominator.add(cs, &mut a_nf); + + // Calculating denominator + let mut denominator = self.y.clone(); + // Multiplying by 2 + denominator = denominator.double(cs); + + // Calculating slope + let mut slope = nominator.div_unchecked(cs, &mut denominator); + + // Finding x3 + let mut x = slope.clone().square(cs); + x = x.sub(cs, &mut self.x); + x = x.sub(cs, &mut self.x); + + // Finding y3 + let mut y = self.x.sub(cs, &mut x); + y = slope.mul(cs, &mut y); + y = y.sub(cs, &mut self.y); + + self.x = x; + self.y = y; + Self { + x: self.x.clone(), + y: self.y.clone(), + is_infinity: self.is_infinity, + _marker: PhantomData, + } + } + + /// Negates the point by negating the y coordinate + fn negate(&mut self, cs: &mut CS) -> Self + where + CS: ConstraintSystem, + { + self.y = self.y.negated(cs); + + Self { + x: self.x.clone(), + y: self.y.clone(), + is_infinity: self.is_infinity, + _marker: PhantomData, + } + } +} + +impl ZeroableAffinePoint +where + F: SmallField, + GC: GenericCurveAffine, + NF: NonNativeField, + GC::Base: PrimeField, +{ + /// Returns a boolean that is true if the x coordinates of the two points are equal. + pub fn same_x(&mut self, cs: &mut CS, other: &mut Self) -> Boolean + where + CS: ConstraintSystem, + { self.x.equals(cs, &mut other.x) } - pub fn same_y>(&mut self, cs: &mut CS, other: &mut Self) -> Boolean { + /// Returns a boolean that is true if the y coordinates of the two points are equal. + pub fn same_y(&mut self, cs: &mut CS, other: &mut Self) -> Boolean + where + CS: ConstraintSystem, + { self.y.equals(cs, &mut other.y) } - #[allow(unused_assignments)] - pub fn add_unequal>(&mut self, cs: &mut CS, other: &mut Self) -> Self { + /// Adds two affine points elementwise. + pub fn elementwise_add(&mut self, cs: &mut CS, other: &mut Self) -> Self + where + CS: ConstraintSystem, + { + self.x = self.x.add(cs, &mut other.x); + self.y = self.y.add(cs, &mut other.y); + Self { + x: self.x.clone(), + y: self.y.clone(), + is_infinity: self.is_infinity, + _marker: PhantomData, + } + } + + /// Adds two points with unequal x coordinates. If the x coordinates are equal, the result is undefined + /// and therefore the panic is raised. + pub fn add_unequal_x(&mut self, cs: &mut CS, other: &mut Self) -> Self + where + CS: ConstraintSystem, + { + // Verify that the x coordinates are not equal let same_x = self.same_x(cs, other); let boolean_false = Boolean::allocated_constant(cs, false); Boolean::enforce_equal(cs, &same_x, &boolean_false); - let mut divisor = self.x.sub(cs, &mut other.x); - let mut numerator = self.y.sub(cs, &mut other.y); - let mut slope = numerator.div_unchecked(cs, &mut divisor); - let mut x2 = slope.clone(); - x2 = x2.mul(cs, &mut slope); - let mut tmp = self.x.add(cs, &mut other.x); - x2 = x2.sub(cs, &mut tmp); + // Algorithm for having two points (x1, y1) and (x2, y2) and adding them together: + // First, finding slope = (y2 - y1) / (x2 - x1) + // Then, finding x3 = slope^2 - x1 - x2 and y3 = slope * (x1 - x3) - y1 + let mut dx = self.x.sub(cs, &mut other.x); + let mut dy = self.y.sub(cs, &mut other.y); + // slope = dy / dx and we do not care whether dx is zero or not since we have already checked that + let mut slope = dy.div_unchecked(cs, &mut dx); + + // x3 = slope^2 - x1 - x2 + let mut x = slope.clone().square(cs); + x = x.sub(cs, &mut self.x); + x = x.sub(cs, &mut other.x); - let mut tmp = self.x.sub(cs, &mut x2); - let mut y2 = slope.mul(cs, &mut tmp); - y2 = y2.add(cs, &mut self.y); + // y3 = slope * (x1 - x3) - y1 + let mut y = self.x.sub(cs, &mut x); + y = slope.mul(cs, &mut y); + y = y.sub(cs, &mut self.y); + + self.x = x; + self.y = y; + Self { + x: self.x.clone(), + y: self.y.clone(), + is_infinity: self.is_infinity, + _marker: PhantomData, + } + } +} - todo!() +impl> Selectable + for ZeroableAffinePoint +where + C::Base: pairing::ff::PrimeField, +{ + const SUPPORTS_PARALLEL_SELECT: bool = false; + + fn conditionally_select>( + cs: &mut CS, + flag: Boolean, + a: &Self, + b: &Self, + ) -> Self { + let x = NN::conditionally_select(cs, flag, &a.x, &b.x); + let y = NN::conditionally_select(cs, flag, &a.y, &b.y); + let is_infinity = Boolean::conditionally_select(cs, flag, &a.is_infinity, &b.is_infinity); + + Self { + x, + y, + is_infinity, + _marker: std::marker::PhantomData, + } } } diff --git a/src/gadgets/mod.rs b/src/gadgets/mod.rs index 7c3b400..992853e 100644 --- a/src/gadgets/mod.rs +++ b/src/gadgets/mod.rs @@ -16,10 +16,14 @@ pub mod recursion; pub mod round_function; pub mod sha256; pub mod tables; +pub mod tower_extension; pub mod traits; +pub mod u1024; pub mod u16; pub mod u160; +pub mod u2048; pub mod u256; pub mod u32; +pub mod u4096; pub mod u512; pub mod u8; diff --git a/src/gadgets/non_native_field/implementations/impl_traits.rs b/src/gadgets/non_native_field/implementations/impl_traits.rs index 51ba091..2917f9a 100644 --- a/src/gadgets/non_native_field/implementations/impl_traits.rs +++ b/src/gadgets/non_native_field/implementations/impl_traits.rs @@ -42,6 +42,9 @@ where fn enforce_reduced>(&mut self, cs: &mut CS) { NonNativeFieldOverU16::::enforce_reduced(self, cs) } + fn enforce_equal>(cs: &mut CS, a: &Self, b: &Self) { + NonNativeFieldOverU16::::enforce_equal(cs, a, b) + } fn normalize>(&mut self, cs: &mut CS) { NonNativeFieldOverU16::::normalize(self, cs) } @@ -94,12 +97,12 @@ where NonNativeFieldOverU16::::div_unchecked(self, cs, other) } #[must_use] - fn allocate_inverse_or_zero>(&self, _cs: &mut CS) -> Self { - todo!() + fn allocate_inverse_or_zero>(&self, cs: &mut CS) -> Self { + NonNativeFieldOverU16::::allocate_inverse_or_zero(&self, cs) } #[must_use] - fn inverse_unchecked>(&mut self, _cs: &mut CS) -> Self { - todo!() + fn inverse_unchecked>(&mut self, cs: &mut CS) -> Self { + NonNativeFieldOverU16::::inverse_unchecked(self, cs) } #[must_use] fn is_zero>(&mut self, cs: &mut CS) -> Boolean { diff --git a/src/gadgets/non_native_field/implementations/implementation_u16.rs b/src/gadgets/non_native_field/implementations/implementation_u16.rs index f29509b..3edcb63 100644 --- a/src/gadgets/non_native_field/implementations/implementation_u16.rs +++ b/src/gadgets/non_native_field/implementations/implementation_u16.rs @@ -1,4 +1,7 @@ use crypto_bigint::CheckedMul; +use serde::de::Visitor; +use serde::{de, Deserialize, Deserializer, Serialize}; +use std::fmt; use crate::cs::gates::{ ConstantAllocatableCS, DotProductGate, FmaGateInBaseFieldWithoutConstant, UIntXAddGate, @@ -6,7 +9,7 @@ use crate::cs::gates::{ use crate::cs::traits::cs::DstBuffer; use crate::gadgets::boolean::Boolean; use crate::gadgets::num::Num; -use crate::gadgets::traits::allocatable::CSAllocatable; +use crate::gadgets::traits::allocatable::{CSAllocatable, CSPlaceholder}; use crate::gadgets::traits::castable::WitnessCastable; use crate::gadgets::traits::selectable::Selectable; use crate::gadgets::traits::witnessable::{CSWitnessable, WitnessHookable}; @@ -126,6 +129,25 @@ where self.tracker.max_moduluses = 1; } + pub fn enforce_equal>(cs: &mut CS, a: &Self, b: &Self) { + let mut a = a.clone(); + let mut b = b.clone(); + + a.normalize(cs); + b.normalize(cs); + + if ::DebugConfig::PERFORM_RUNTIME_ASSERTS { + assert_eq!( + a.non_zero_limbs, b.non_zero_limbs, + "enforce equal failed: non_zero_limbs divergence" + ); + } + + for (a_el, b_el) in a.limbs.iter().zip(b.limbs.iter()) { + Num::enforce_equal(cs, &Num::from_variable(*a_el), &Num::from_variable(*b_el)); + } + } + pub fn normalize>(&mut self, cs: &mut CS) where [(); N + 1]:, @@ -258,6 +280,7 @@ where } } + assert!(self.tracker.max_moduluses <= self.params.max_mods_to_fit); new } @@ -334,6 +357,7 @@ where } } + assert!(self.tracker.max_moduluses <= self.params.max_mods_to_fit); new } @@ -713,6 +737,7 @@ where // enforce that r is canonical new.enforce_reduced(cs); + assert!(self.tracker.max_moduluses <= self.params.max_mods_to_fit); new } @@ -764,7 +789,9 @@ where let new = Self { limbs, non_zero_limbs: used_words, - tracker: OverflowTracker { max_moduluses: 2 }, // NOTE: if self == 0, then limbs will be == modulus, so use 2 + tracker: OverflowTracker { + max_moduluses: std::cmp::max(2, self.tracker.max_moduluses), + }, // NOTE: if self == 0, then limbs will be == modulus, so use 2 form: RepresentationForm::Normalized, params: self.params.clone(), _marker: std::marker::PhantomData, @@ -858,6 +885,7 @@ where } } + assert!(self.tracker.max_moduluses <= self.params.max_mods_to_fit); new } @@ -1032,14 +1060,106 @@ impl CSAllocatable } } +impl CSPlaceholder + for NonNativeFieldOverU16 +{ + fn placeholder>(cs: &mut CS) -> Self { + let variable = Variable::placeholder(); + + Self { + limbs: [variable; N], + non_zero_limbs: 0, + tracker: OverflowTracker { max_moduluses: 0 }, + form: RepresentationForm::Normalized, + params: Arc::new(NonNativeFieldOverU16Params::placeholder(cs)), + _marker: std::marker::PhantomData, + } + } +} + +impl CircuitVarLengthEncodable + for NonNativeFieldOverU16 +{ + fn encoding_length(&self) -> usize { + N + } + + fn encode_to_buffer>(&self, _cs: &mut CS, dst: &mut Vec) { + dst.extend_from_slice(self.limbs.as_slice()) + } +} + // We need this to ensure no conflicting implementations without negative impls -#[derive(Derivative)] -#[derivative(Clone, Copy, Debug, Hash)] +#[derive(Derivative, Serialize, PartialEq)] +#[derivative(Clone, Copy, Debug, Hash, Eq)] pub struct FFProxyValue { value: T, } +// Implement custom Deserialize, because we cannot derive: +// PrimeField inherits only DeserializeOwned. +impl<'de, T, const N: usize> Deserialize<'de> for FFProxyValue +where + T: pairing::ff::PrimeField, +{ + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + struct FFProxyValueVisitor + where + T: pairing::ff::PrimeField, + { + marker: std::marker::PhantomData, + } + + impl<'de, T, const N: usize> Visitor<'de> for FFProxyValueVisitor + where + T: pairing::ff::PrimeField + serde::de::DeserializeOwned, + { + type Value = FFProxyValue; + + fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + formatter.write_str("a valid PrimeField value") + } + + fn visit_map(self, mut map: M) -> Result + where + M: de::MapAccess<'de>, + { + let mut value = None; + + while let Some(key) = map.next_key()? { + match key { + "value" => { + if value.is_some() { + return Err(de::Error::duplicate_field("value")); + } + value = Some(map.next_value()?); + } + _ => { + return Err(de::Error::unknown_field(key, FIELDS)); + } + } + } + + let value = value.ok_or_else(|| de::Error::missing_field("value"))?; + Ok(FFProxyValue { value }) + } + } + + const FIELDS: &[&str] = &["value"]; + deserializer.deserialize_struct( + "FFProxyValue", + FIELDS, + FFProxyValueVisitor { + marker: std::marker::PhantomData, + }, + ) + } +} + impl FFProxyValue { pub const fn get(&self) -> T { self.value @@ -1080,6 +1200,7 @@ impl WitnessCastable< } use crate::gadgets::traits::castable::Convertor; +use crate::gadgets::traits::encodable::CircuitVarLengthEncodable; impl CSWitnessable for NonNativeFieldOverU16 diff --git a/src/gadgets/non_native_field/implementations/mod.rs b/src/gadgets/non_native_field/implementations/mod.rs index 9c38036..16a322f 100644 --- a/src/gadgets/non_native_field/implementations/mod.rs +++ b/src/gadgets/non_native_field/implementations/mod.rs @@ -6,6 +6,7 @@ use super::*; use crate::config::*; use crate::cs::gates::ConstantAllocatableCS; use crate::cs::traits::cs::ConstraintSystem; +use crate::gadgets::traits::allocatable::CSPlaceholder; use crate::gadgets::u16::UInt16; use crate::{cs::Variable, gadgets::u8::get_8_by_8_range_check_table}; use crypto_bigint::{CheckedMul, NonZero, Zero, U1024}; @@ -177,6 +178,24 @@ impl NonNativeFieldOverU16Params CSPlaceholder + for NonNativeFieldOverU16Params +{ + fn placeholder>(_cs: &mut CS) -> Self { + Self { + modulus: [0u16; N], + modulus_bits: 0, + modulus_limbs: 0, + modulus_u1024: NonZero::::new(U1024::ONE).expect("ONE is non-zero"), + max_product_before_reduction: U1024::ZERO, + max_mods_to_fit: 0, + max_mods_in_allocation: 0, + max_mods_before_multiplication: 0, + _marker: std::marker::PhantomData, + } + } +} + #[derive(Derivative)] #[derivative(Clone, Copy, Debug, PartialEq, Eq)] pub enum RepresentationForm { diff --git a/src/gadgets/non_native_field/traits/mod.rs b/src/gadgets/non_native_field/traits/mod.rs index b4cfc94..fb751d9 100644 --- a/src/gadgets/non_native_field/traits/mod.rs +++ b/src/gadgets/non_native_field/traits/mod.rs @@ -1,6 +1,9 @@ use super::*; + use crate::gadgets::boolean::Boolean; use crate::{cs::traits::cs::ConstraintSystem, gadgets::traits::witnessable::WitnessHookable}; + +use pairing::GenericCurveAffine; use std::sync::Arc; pub trait NonNativeField: @@ -26,6 +29,7 @@ pub trait NonNativeField: ) -> Self; fn enforce_reduced>(&mut self, cs: &mut CS); + fn enforce_equal>(cs: &mut CS, a: &Self, b: &Self); fn normalize>(&mut self, cs: &mut CS); fn add>(&mut self, cs: &mut CS, other: &mut Self) -> Self; @@ -79,3 +83,16 @@ pub trait NonNativeField: b: &Self, ) -> Self; } + +pub trait CurveCompatibleNonNativeField< + F: SmallField, + T: pairing::ff::PrimeField, + C: GenericCurveAffine, +>: NonNativeField +{ + fn from_curve_base>( + cs: &mut CS, + point: &C::Base, + params: &Arc, + ) -> Self; +} diff --git a/src/gadgets/tower_extension/algebraic_torus.rs b/src/gadgets/tower_extension/algebraic_torus.rs new file mode 100644 index 0000000..bf30501 --- /dev/null +++ b/src/gadgets/tower_extension/algebraic_torus.rs @@ -0,0 +1,480 @@ +use pairing::{ff::PrimeField, BitIterator}; +use std::sync::Arc; + +use super::{fq12::Fq12, fq2::Fq2, fq6::Fq6, params::TorusExtension12Params}; +use crate::gadgets::non_native_field::implementations::NonNativeFieldOverU16; +use crate::gadgets::tower_extension::params::{Extension2Params, Extension6Params}; +use crate::gadgets::traits::witnessable::WitnessHookable; +use crate::{ + cs::traits::cs::ConstraintSystem, + field::SmallField, + gadgets::{ + boolean::Boolean, + non_native_field::traits::NonNativeField, + traits::{hardexp_compatible::HardexpCompatible, selectable::Selectable}, + }, +}; + +/// [`TorusWrapper`] is an algebraic compression of the `Fq12` element via underlying encoding of `Fq6`. +/// In compressed form operations over Fq12 are less expensive. +/// +/// The implementation is based on the following paper: +/// https://eprint.iacr.org/2022/1162.pdf. +#[derive(Clone, Debug, Copy)] +pub struct TorusWrapper +where + F: SmallField, + T: PrimeField, + NN: NonNativeField, + P: TorusExtension12Params, +{ + pub encoding: Fq6, +} + +// TODO: Probably, this could be implemented generally for any two Fqk and Fq(k/2) elements. +impl TorusWrapper, P> +where + F: SmallField, + T: PrimeField, + P: TorusExtension12Params, + [(); N + 1]:, +{ + /// Creates a new instance of the [`TorusWrapper`] with the given encoding. + pub fn new(encoding: Fq6, P::Ex6>) -> Self { + Self { encoding } + } + + pub fn one( + cs: &mut CS, + params: &Arc< as NonNativeField>::Params>, + ) -> Self + where + CS: ConstraintSystem, + { + let encoding = Fq6::zero(cs, params); + Self::new(encoding) + } + + /// Returns the underlying parameters of the encoded `Fq6` element. + pub fn get_params( + &self, + ) -> &Arc< as NonNativeField>::Params> { + self.encoding.get_params() + } + + /// Normalizes the encoding of the `Fq6` element. + pub fn normalize(&mut self, cs: &mut CS) + where + CS: ConstraintSystem, + { + self.encoding.normalize(cs); + } + + /// Returns an instance if `flag` is `true`, otherwise returns a zero element. + pub fn mask(&mut self, cs: &mut CS, flag: Boolean) -> Self + where + CS: ConstraintSystem, + { + let zero = Fq6::zero(cs, self.get_params()); + let new_encoding = + , P::Ex6>>::conditionally_select( + cs, + flag, + &self.encoding, + &zero, + ); + + Self::new(new_encoding) + } + + /// Compresses the `Fq12` element `c0 + c1*w` to the Torus (`T2`) element. + /// + /// Uses the formula `m <- (1 + c0) / c1` to compress the `Fq12` element with the additional + /// check for the exceptional case when `c1` is zero. + /// + /// If `SAFE=false`, then the function will not check for the exceptional case when `c1` is zero. + pub fn compress( + cs: &mut CS, + f: &mut Fq12, P>, + ) -> Self + where + CS: ConstraintSystem, + { + let params = f.get_params(); + let mut c0 = f.c0.clone(); + let mut c1 = f.c1.clone(); + + let mut encoding = if SAFE { + // Preparing flags for exception cases + let is_exceptional = Fq6::is_zero(&mut c1, cs); + let mut c0_is_one = Fq6::one(cs, params); + let c0_is_one = c0_is_one.equals(cs, &mut c0); + let mut is_exceptional = Fq6::from_boolean(cs, is_exceptional, params); + let mut c0_is_one = Fq6::from_boolean(cs, c0_is_one, params); + + // m <- (1 + c0) / c1 if c1 is non-zero. However, to account for the case where + // c1 is zero, we set numerator to 1 + c0 - 2*c0_is_one and denominator to c1 + is_exceptional. + let mut numerator = Fq6::one(cs, params); + let mut numerator = numerator.add(cs, &mut c0); + let mut c0_is_one_doubled = c0_is_one.double(cs); + let mut numerator = numerator.sub(cs, &mut c0_is_one_doubled); + let mut denominator = f.c1.add(cs, &mut is_exceptional); + denominator.normalize(cs); + + let encoding = numerator.div(cs, &mut denominator); + encoding + } else { + // Verifying that c1 is non-zero + let boolean_false = Boolean::allocated_constant(cs, false); + let c1_is_zero = c1.is_zero(cs); + Boolean::enforce_equal(cs, &c1_is_zero, &boolean_false); + + // m <- (1 + c0) / c1 + let mut encoding = Fq6::one(cs, params); + let mut encoding = encoding.add(cs, &mut f.c0); + let encoding = encoding.div(cs, &mut f.c1); + + encoding + }; + + encoding.normalize(cs); + Self::new(encoding) + } + + /// Decompresses the Torus (`T2`) element `g` back to the `Fq12` element by using the formula + /// + /// `zeta^{-1} = (g + w)/(g - w)` + pub fn decompress(&self, cs: &mut CS) -> Fq12, P> + where + CS: ConstraintSystem, + { + let params = self.get_params(); + let mut one = Fq6::one(cs, params); + let negative_one = one.negated(cs); + + // Since `g` is a pure `Fq6` element, `g+w` is just an `Fq12` element with `c0 = g` and `c1 = 1`. + let mut numerator = Fq12::new(self.encoding.clone(), one); + // Since `g` is a pure `Fq6` element, `g-w` is just an `Fq12` element with `c0 = g` and `c1 = -1`. + let mut denominator = Fq12::new(self.encoding.clone(), negative_one); + + // zeta^{-1} = (g + w)/(g - w) + let decompressed = numerator.div(cs, &mut denominator); + + decompressed + } + + /// Computes the inverse of the Torus element using the formula g -> -g. + pub fn inverse(&mut self, cs: &mut CS) -> Self + where + CS: ConstraintSystem, + { + let encoding = self.encoding.negated(cs); + Self::new(encoding) + } + + /// Computes the conjugate of the Torus element using the formula g -> -g. + /// Note that the conjugate of the Torus element is the same as its inverse. + pub fn conjugate(&mut self, cs: &mut CS) -> Self + where + CS: ConstraintSystem, + { + self.inverse(cs) + } + + /// Computes the Frobenius map of the Torus element with the given power using the formula + /// + /// frob_map(g, i) = g^(p^i) / \gamma^{(p^i-1)/2} + pub fn frobenius_map(&mut self, cs: &mut CS, power: usize) -> Self + where + CS: ConstraintSystem, + { + // We compute frobenius map unconstrained: + let witness_self = self.encoding_to_witness(cs); + let witness_frob = P::torus_frobenius_map(witness_self, power); + + // Now, we constraint the frobenius map with a cheaper version: + // Suppose r = f(g,i) / (f(w,i) * w^{-1}). Then, we require: + // f(g, i) = f(w, i) * (w^{-1}) * r + // Notice that `f(w,i)*w^{-1}` must yield an element + // from Fq6. Thus, we need one frobenius map + mul over Fq6, and + // one frobenius map + mul over Fq12. + let params = self.encoding.get_params(); + let mut encoding_new = Fq6::allocate_from_witness(cs, witness_frob, params); + + // rhs = f(w, i) * (w^{-1}) * r + // First, allocating the w^{-1} + let w_inverse = P::get_w_inverse_coeffs_c5(); + let mut w_inverse: Fq2<_, _, _, >::Ex2> = Fq2::constant(cs, w_inverse, params); + + let mut rhs: Fq12, P> = Fq12::one_imaginary(cs, params); + rhs = rhs.frobenius_map(cs, power); + rhs = rhs.mul_by_c5(cs, &mut w_inverse); + + // Asserting that c1 is zero since rhs must be a pure Fq6 element at this point. + let boolean_true = Boolean::allocated_constant(cs, true); + let c1_is_zero = rhs.c1.is_zero(cs); + Boolean::enforce_equal(cs, &c1_is_zero, &boolean_true); + let mut rhs = rhs.c0.clone(); + + // Finishing rhs by multiplying by result + rhs = rhs.mul(cs, &mut encoding_new); + + // lhs = f(g, i) + let mut lhs = self.encoding.clone(); + lhs = lhs.frobenius_map(cs, power); + + // Asserting that lhs == rhs + Fq6::enforce_equal(cs, &lhs, &rhs); + + Self::new(encoding_new) + } + + /// Computes the product of two Torus elements using the formula + /// + /// `(g, g') -> (g * g' + \gamma) / (g + g')` + /// + /// The formula handles the exceptional case when `g + g'` is zero. + pub fn mul(&mut self, cs: &mut CS, other: &mut Self) -> Self + where + CS: ConstraintSystem, + { + // We compute multiplication unconstrained: + let witness_self = self.encoding_to_witness(cs); + let witness_other = other.encoding_to_witness(cs); + let witness_mul = P::torus_mul(witness_self, witness_other); + + // Now, we constraint the multiplication with a cheaper version: + // g'' = (g * g' + \gamma) / (g + g') is equivalent to + // g'' * (g + g') = (g * g' + \gamma) + // Here, g'' is the new encoding. + let params = self.encoding.get_params(); + let encoding_new = Fq6::allocate_from_witness(cs, witness_mul, params); + + // lhs = g'' * (g + g') + let mut sum = self.encoding.clone().add(cs, &mut other.encoding); + let lhs = encoding_new.clone().mul(cs, &mut sum); + + // rhs = {(g + g') == 0} ? zero : (g * g' + \gamma) + let mut gamma = Fq6::gamma(cs, params); + let mut rhs = self.encoding.clone().mul(cs, &mut other.encoding); + let rhs = rhs.add(cs, &mut gamma); + + let zero = Fq6::zero(cs, params); + let is_zero_sum = sum.is_zero(cs); + + let rhs = , P::Ex6>>::conditionally_select( + cs, + is_zero_sum, + &zero, + &rhs, + ); + + // Enforce equality + Fq6::enforce_equal(cs, &lhs, &rhs); + + Self::new(encoding_new) + } + + pub fn pow_naf_decomposition>( + &mut self, + cs: &mut CS, + decomposition: S, + ) -> Self + where + CS: ConstraintSystem, + { + // Intializing the result with 1 + let mut result = Self::one(cs, self.get_params()); + + // Preparing self and self inverse in advance + let mut self_cloned = self.clone(); + let mut self_inverse = self.conjugate(cs); + + for bit in decomposition.as_ref().iter() { + result = result.square(cs); + + // If bit is 1, multiply by initial torus + let bit_is_one = Boolean::allocated_constant(cs, *bit == 1); + let result_times_self = result.mul(cs, &mut self_cloned); + result = Self::conditionally_select(cs, bit_is_one, &result_times_self, &result); + + // If bit is -1, multiply by inverse initial torus + let bit_is_minus_one = Boolean::allocated_constant(cs, *bit == -1); + let result_times_self_inverse = result.mul(cs, &mut self_inverse); + result = Self::conditionally_select( + cs, + bit_is_minus_one, + &result_times_self_inverse, + &result, + ); + } + + result + } + + pub fn pow_u32>(&mut self, cs: &mut CS, exponent: S) -> Self + where + CS: ConstraintSystem, + { + let mut result = Self::one(cs, self.get_params()); + let mut found_one = false; + + for bit in BitIterator::new(exponent) { + let apply_squaring = Boolean::allocated_constant(cs, found_one); + let result_squared = result.square(cs); + result = Self::conditionally_select(cs, apply_squaring, &result_squared, &result); + if !found_one { + found_one = bit; + } + + let result_multiplied = result.mul(cs, self); + let apply_multiplication = Boolean::allocated_constant(cs, bit); + result = + Self::conditionally_select(cs, apply_multiplication, &result_multiplied, &result); + + result.normalize(cs); + } + + result + } + + pub fn square(&mut self, cs: &mut CS) -> Self + where + CS: ConstraintSystem, + { + // We compute squaring unconstrained: + let witness = self.encoding_to_witness(cs); + let witness_squared = P::torus_square(witness); + + // Now, we constraint squaring with a cheaper version: + // g' = (1/2)(g + \gamma/g) is equivalent to + // (2g' - g)*g = gamma + let params = self.encoding.get_params(); + let encoding_new = Fq6::allocate_from_witness(cs, witness_squared, params); + + // lhs = (2g' - g)*g + let mut lhs = encoding_new.clone(); + lhs = lhs.double(cs); + lhs = lhs.sub(cs, &mut self.encoding.clone()); + let lhs = self.encoding.clone().mul(cs, &mut lhs); + + // rhs = g == 0 ? zero : gamma + let zero = Fq6::zero(cs, params); + let gamma = Fq6::gamma(cs, params); + let is_zero_g = self.encoding.is_zero(cs); + let rhs = , P::Ex6>>::conditionally_select( + cs, is_zero_g, &zero, &gamma, + ); + + // We can just enforce equality without subbing + Fq6::enforce_equal(cs, &lhs, &rhs); + Self::new(encoding_new) + } + + // TODO: Probably, this can be done less weirdly. + /// Converts the encoding of the `Fq6` element to the structured witness. + pub(super) fn encoding_to_witness( + &self, + cs: &mut CS, + ) -> >::Witness + where + CS: ConstraintSystem, + { + let (c0, c1, c2) = self.encoding.witness_hook(cs)().unwrap(); + + let (c0_c0, c0_c1) = c0; + let (c1_c0, c1_c1) = c1; + let (c2_c0, c2_c1) = c2; + + let (c0_c0, c0_c1) = (c0_c0.get(), c0_c1.get()); + let (c1_c0, c1_c1) = (c1_c0.get(), c1_c1.get()); + let (c2_c0, c2_c1) = (c2_c0.get(), c2_c1.get()); + + let c0 = >::Ex2::convert_to_structured_witness(c0_c0, c0_c1); + let c1 = >::Ex2::convert_to_structured_witness(c1_c0, c1_c1); + let c2 = >::Ex2::convert_to_structured_witness(c2_c0, c2_c1); + + P::Ex6::convert_to_structured_witness(c0, c1, c2) + } +} + +impl Selectable + for TorusWrapper, P> +where + F: SmallField, + T: PrimeField, + P: TorusExtension12Params, + [(); N + 1]:, +{ + fn conditionally_select(cs: &mut CS, flag: Boolean, a: &Self, b: &Self) -> Self + where + CS: ConstraintSystem, + { + let encoding = , P::Ex6>>::conditionally_select( + cs, + flag, + &a.encoding, + &b.encoding, + ); + + Self::new(encoding) + } +} + +impl HardexpCompatible + for TorusWrapper, P> +where + F: SmallField, + T: PrimeField, + P: TorusExtension12Params, + [(); N + 1]:, +{ + fn mul(&mut self, cs: &mut CS, other: &mut Self) -> Self + where + CS: ConstraintSystem, + { + self.mul(cs, other) + } + + fn square(&mut self, cs: &mut CS) -> Self + where + CS: ConstraintSystem, + { + self.square(cs) + } + + fn conjugate(&mut self, cs: &mut CS) -> Self + where + CS: ConstraintSystem, + { + self.conjugate(cs) + } + + fn inverse(&mut self, cs: &mut CS) -> Self + where + CS: ConstraintSystem, + { + self.inverse(cs) + } + + fn frobenius_map(&mut self, cs: &mut CS, power: usize) -> Self + where + CS: ConstraintSystem, + { + self.frobenius_map(cs, power) + } + + fn pow_u32>(&mut self, cs: &mut CS, exponent: S) -> Self + where + CS: ConstraintSystem, + { + self.pow_u32(cs, exponent) + } + + fn normalize(&mut self, cs: &mut CS) + where + CS: ConstraintSystem, + { + self.normalize(cs); + } +} diff --git a/src/gadgets/tower_extension/fq12.rs b/src/gadgets/tower_extension/fq12.rs new file mode 100644 index 0000000..bc9f68a --- /dev/null +++ b/src/gadgets/tower_extension/fq12.rs @@ -0,0 +1,819 @@ +use std::sync::Arc; + +use pairing::{bn256::Fq as BN256Fq, ff::PrimeField, BitIterator}; + +use super::{ + fq2::Fq2, + fq6::Fq6, + params::{ + bn256::{BN256Extension12Params, BN256Extension6Params}, + Extension12Params, Extension6Params, + }, +}; + +use crate::gadgets::traits::allocatable::CSPlaceholder; +use crate::gadgets::traits::encodable::CircuitVarLengthEncodable; +use crate::{ + cs::traits::cs::ConstraintSystem, + field::SmallField, + gadgets::{ + boolean::Boolean, + non_native_field::traits::NonNativeField, + traits::{ + allocatable::CSAllocatable, selectable::Selectable, witnessable::WitnessHookable, + }, + }, +}; +use crate::{cs::Variable, gadgets::traits::hardexp_compatible::HardexpCompatible}; + +/// `Fq12` field extension implementation in the constraint system. It is implemented +/// as `Fq6[w]/(w^2-v)` where `w^6=9+u`. In other words, it is a set of +/// linear polynomials in a form `c0+c1*w`, where `c0` and `c1` are elements of `Fq6`. +/// See https://hackmd.io/@jpw/bn254#Field-extension-towers for reference. For +/// implementation reference, see https://eprint.iacr.org/2006/471.pdf. +#[derive(Clone, Debug, Copy)] +pub struct Fq12 +where + F: SmallField, + T: PrimeField, + NN: NonNativeField, + P: Extension12Params, +{ + pub c0: Fq6, + pub c1: Fq6, + _marker: std::marker::PhantomData<(F, T)>, +} + +impl Fq12 +where + F: SmallField, + T: PrimeField, + NN: NonNativeField, + P: Extension12Params, +{ + /// Creates a new `Fq12` element from two `Fq6` components. + pub fn new(c0: Fq6, c1: Fq6) -> Self { + Self { + c0, + c1, + _marker: std::marker::PhantomData::<(F, T)>, + } + } + + pub fn from_c0c3c4( + cs: &mut CS, + c0: Fq2>::Ex6 as Extension6Params>::Ex2>, + c3: Fq2>::Ex6 as Extension6Params>::Ex2>, + c4: Fq2>::Ex6 as Extension6Params>::Ex2>, + ) -> Self + where + CS: ConstraintSystem, + { + let zero = Fq2::zero(cs, c0.c0.get_params()); + let c0 = Fq6::new(c0.clone(), zero.clone(), zero.clone()); + let c1 = Fq6::new(c3.clone(), c4.clone(), zero); + + Self::new(c0, c1) + } + + pub fn pow_u32>(&mut self, cs: &mut CS, exponent: S) -> Self + where + CS: ConstraintSystem, + { + let mut result = Self::one(cs, self.c0.c0.get_params()); + let mut found_one = false; + + for i in BitIterator::new(exponent) { + let apply_squaring = Boolean::allocated_constant(cs, found_one); + let result_squared = result.square(cs); + result = Self::conditionally_select(cs, apply_squaring, &result_squared, &result); + if !found_one { + found_one = i; + } + + let result_multiplied = result.mul(cs, self); + let apply_multiplication = Boolean::allocated_constant(cs, i); + result = + Self::conditionally_select(cs, apply_multiplication, &result_multiplied, &result); + + // Normalize the result to stay in field + NonNativeField::normalize(&mut result, cs); + } + + result + } + + /// Creates a new zero `Fq12` in a form `0+0*w` + pub fn zero(cs: &mut CS, params: &Arc) -> Self + where + CS: ConstraintSystem, + { + let zero = Fq6::zero(cs, params); + Self::new(zero.clone(), zero) + } + + /// Creates a unit `Fq12` in a form `1+0*w` + pub fn one(cs: &mut CS, params: &Arc) -> Self + where + CS: ConstraintSystem, + { + let one = Fq6::one(cs, params); + let zero = Fq6::zero(cs, params); + Self::new(one, zero) + } + + /// Creates a unit `Fq12` in a form `0+1*w` + pub fn one_imaginary(cs: &mut CS, params: &Arc) -> Self + where + CS: ConstraintSystem, + { + let one = Fq6::zero(cs, params); + let zero = Fq6::one(cs, params); + Self::new(one, zero) + } + + /// Returns true if the `Fq12` element is zero. + pub fn is_zero(&mut self, cs: &mut CS) -> Boolean + where + CS: ConstraintSystem, + { + let is_c0_zero = self.c0.is_zero(cs); + let is_c1_zero = self.c1.is_zero(cs); + is_c0_zero.and(cs, is_c1_zero) + } + + /// Allocate `Fq12` tower extension element from the Witness represented in two components + /// from the `Fq6` tower extension. + pub fn constant(cs: &mut CS, wit: P::Witness, params: &Arc) -> Self + where + CS: ConstraintSystem, + { + let (c0, c1) = P::convert_from_structured_witness(wit); + let c0 = Fq6::constant(cs, c0, params); + let c1 = Fq6::constant(cs, c1, params); + + Self::new(c0, c1) + } + + /// Allocate `Fq12` tower extension element from the Witness represented in two components + /// from the `Fq6` tower extension. + pub fn allocate_from_witness(cs: &mut CS, wit: P::Witness, params: &Arc) -> Self + where + CS: ConstraintSystem, + { + let (c0, c1) = P::convert_from_structured_witness(wit); + let c0 = Fq6::allocate_from_witness(cs, c0, params); + let c1 = Fq6::allocate_from_witness(cs, c1, params); + + Self::new(c0, c1) + } + + /// Conjugates the `Fq12` element by negating the `c1` component. + pub fn conjugate(&mut self, cs: &mut CS) -> Self + where + CS: ConstraintSystem, + { + let c1 = self.c1.negated(cs); + Self::new(self.c0.clone(), c1) + } + + #[must_use] + pub fn add(&mut self, cs: &mut CS, other: &mut Self) -> Self + where + CS: ConstraintSystem, + { + let c0 = self.c0.add(cs, &mut other.c0); + let c1 = self.c1.add(cs, &mut other.c1); + Self::new(c0, c1) + } + + #[must_use] + pub fn double(&mut self, cs: &mut CS) -> Self + where + CS: ConstraintSystem, + { + let c0 = self.c0.double(cs); + let c1 = self.c1.double(cs); + Self::new(c0, c1) + } + + #[must_use] + pub fn negated(&mut self, cs: &mut CS) -> Self + where + CS: ConstraintSystem, + { + let c0 = self.c0.negated(cs); + let c1 = self.c1.negated(cs); + Self::new(c0, c1) + } + + #[must_use] + pub fn sub(&mut self, cs: &mut CS, other: &mut Self) -> Self + where + CS: ConstraintSystem, + { + let c0 = self.c0.sub(cs, &mut other.c0); + let c1 = self.c1.sub(cs, &mut other.c1); + Self::new(c0, c1) + } + + #[must_use] + pub fn mul(&mut self, cs: &mut CS, other: &mut Self) -> Self + where + CS: ConstraintSystem, + { + let mut v0 = self.c0.mul(cs, &mut other.c0); + let mut v1 = self.c1.mul(cs, &mut other.c1); + let mut o = other.c0.add(cs, &mut other.c1); + + let mut c1 = self.c1.add(cs, &mut self.c0); + let mut c1 = c1.mul(cs, &mut o); + let mut c1 = c1.sub(cs, &mut v0); + let c1 = c1.sub(cs, &mut v1); + + let mut c0 = v1.mul_by_nonresidue(cs); + let c0 = c0.add(cs, &mut v0); + + Self::new(c0, c1) + } + + #[must_use] + pub fn square(&mut self, cs: &mut CS) -> Self + where + CS: ConstraintSystem, + { + let mut ab = self.c0.mul(cs, &mut self.c1); + let mut c0c1 = self.c0.add(cs, &mut self.c1); + + let mut c0 = self.c1.mul_by_nonresidue(cs); + let mut c0 = c0.add(cs, &mut self.c0); + let mut c0 = c0.mul(cs, &mut c0c1); + let mut c0 = c0.sub(cs, &mut ab); + + let c1 = ab.double(cs); + let mut ab_residue = ab.mul_by_nonresidue(cs); + let c0 = c0.sub(cs, &mut ab_residue); + + Self::new(c0, c1) + } + + pub fn mul_by_c0c1c4( + &mut self, + cs: &mut CS, + c0: &mut Fq2>::Ex6 as Extension6Params>::Ex2>, + c1: &mut Fq2>::Ex6 as Extension6Params>::Ex2>, + c4: &mut Fq2>::Ex6 as Extension6Params>::Ex2>, + ) -> Self + where + CS: ConstraintSystem, + { + let mut aa = self.c0.mul_by_c0c1(cs, c0, c1); + let mut bb = self.c1.mul_by_c1(cs, c4); + let mut o = c1.add(cs, c4); + + let mut new_c1 = self.c1.add(cs, &mut self.c0); + let mut new_c1 = new_c1.mul_by_c0c1(cs, c0, &mut o); + let mut new_c1 = new_c1.sub(cs, &mut aa); + let new_c1 = new_c1.sub(cs, &mut bb); + + let mut new_c0 = bb.mul_by_nonresidue(cs); + let new_c0 = new_c0.add(cs, &mut aa); + + Self::new(new_c0, new_c1) + } + + /// Sparse multiplication by constants `c0` and `c3` and `c4` in the form `c0 + (c3 + c4*v)*w`. + /// See _Algorithm_ 21 from https://eprint.iacr.org/2010/354.pdf. + pub fn mul_by_c0c3c4( + &mut self, + cs: &mut CS, + c0: &mut Fq2>::Ex6 as Extension6Params>::Ex2>, + c3: &mut Fq2>::Ex6 as Extension6Params>::Ex2>, + c4: &mut Fq2>::Ex6 as Extension6Params>::Ex2>, + ) -> Self + where + CS: ConstraintSystem, + { + // Below, a0+a1*w is self b0+b1*w with b0=b00=c0 and b1=b10+b11*v=c3+c4*v + // is the element to multiply with + + // t0 <- a0*b0 + let mut t0 = self.c0.mul_by_c0(cs, c0); + // t1 <- a1*b1 + let mut t1 = self.c1.mul_by_c0c1(cs, c3, c4); + // c0 <- t0 + t1*gamma + let mut t1_gamma = t1.mul_by_nonresidue(cs); + let new_c0 = t0.add(cs, &mut t1_gamma); + // t2 <- (b0+b10)v + b11*v + 0*v^2 + let mut t2_c0 = c0.add(cs, c3); + let mut t2_c1 = c4.clone(); + // c1 <- (a0 + a1) * t2 + let mut new_c1 = self.c0.add(cs, &mut self.c1); + let mut new_c1 = new_c1.mul_by_c0c1(cs, &mut t2_c0, &mut t2_c1); + // c1 <- c1 - t0 - t1 + let mut new_c1 = new_c1.sub(cs, &mut t0); + let new_c1 = new_c1.sub(cs, &mut t1); + + Self::new(new_c0, new_c1) + } + + /// Multiplies the `Fq12` element by a constant `c5*v^2*w` represented as `Fq2`. + pub fn mul_by_c5( + &mut self, + cs: &mut CS, + c5: &mut Fq2>::Ex6 as Extension6Params>::Ex2>, + ) -> Self + where + CS: ConstraintSystem, + { + // Suppose our element is a0+a1*w. Then, + // (a0+a1*w)*c5*v^2*w = a1*c5*w^2*v^2 + a0*c5*v^2*w + // Notice that w^2*v^2 = v^3 = \xi and therefore our result + // is a1*c5*\xi + a0*c5*v^2*w + + // new_c0 <- a1*c5*\xi + let mut new_c0 = self.c1.mul_by_c0(cs, c5); + new_c0 = new_c0.mul_by_xi(cs); + + // new_c1 <- a0*c5*v^2*w + let new_c1 = self.c0.mul_by_c2(cs, c5); + + Self::new(new_c0, new_c1) + } + + /// Compute the Frobenius map - raise this element to power. + pub fn frobenius_map(&mut self, cs: &mut CS, power: usize) -> Self + where + CS: ConstraintSystem, + { + let c0 = self.c0.frobenius_map(cs, power); + let mut c1 = self.c1.frobenius_map(cs, power); + + let c1_c0_frobenius_constant = P::FROBENIUS_COEFFS_C1[power % 12]; + let c1_c1_frobenius_constant = P::FROBENIUS_COEFFS_C1[power % 12]; + let c1_c2_frobenius_constant = P::FROBENIUS_COEFFS_C1[power % 12]; + + let params = c1.c0.get_params(); + + let mut c1_c0_frobenius_coeff = Fq2::constant(cs, c1_c0_frobenius_constant, params); + let mut c1_c1_frobenius_coeff = Fq2::constant(cs, c1_c1_frobenius_constant, params); + let mut c1_c2_frobenius_coeff = Fq2::constant(cs, c1_c2_frobenius_constant, params); + + let c1_c0 = c1.c0.mul(cs, &mut c1_c0_frobenius_coeff); + let c1_c1 = c1.c1.mul(cs, &mut c1_c1_frobenius_coeff); + let c1_c2 = c1.c2.mul(cs, &mut c1_c2_frobenius_coeff); + + let c1 = Fq6::new(c1_c0, c1_c1, c1_c2); + + Self::new(c0, c1) + } + + pub fn inverse(&mut self, cs: &mut CS) -> Self + where + CS: ConstraintSystem, + { + let mut c0s = self.c0.square(cs); + let mut c1s = self.c1.square(cs); + let mut c1s = c1s.mul_by_nonresidue(cs); + let mut c0s = c0s.sub(cs, &mut c1s); + + c0s.normalize(cs); + let mut t = c0s.inverse(cs); + let c0_new = t.mul(cs, &mut self.c0); + let mut c1_new = t.mul(cs, &mut self.c1); + let c1_new = c1_new.negated(cs); + + Self::new(c0_new, c1_new) + } + + pub fn div(&mut self, cs: &mut CS, other: &mut Self) -> Self + where + CS: ConstraintSystem, + { + let mut t = other.inverse(cs); + self.mul(cs, &mut t) + } +} + +impl CSAllocatable for Fq12 +where + F: SmallField, + T: PrimeField, + NN: NonNativeField, + P: Extension12Params, +{ + type Witness = ( + as CSAllocatable>::Witness, + as CSAllocatable>::Witness, + ); + + #[inline(always)] + fn placeholder_witness() -> Self::Witness { + ( + as CSAllocatable>::placeholder_witness(), + as CSAllocatable>::placeholder_witness(), + ) + } + + #[inline(always)] + fn allocate_without_value(cs: &mut CS) -> Self + where + CS: ConstraintSystem, + { + let c0 = as CSAllocatable>::allocate_without_value(cs); + let c1 = as CSAllocatable>::allocate_without_value(cs); + + Self::new(c0, c1) + } + + #[inline(always)] + fn allocate(cs: &mut CS, witness: Self::Witness) -> Self + where + CS: ConstraintSystem, + { + let (c0, c1) = witness; + + let c0 = as CSAllocatable>::allocate(cs, c0); + let c1 = as CSAllocatable>::allocate(cs, c1); + + Self::new(c0, c1) + } + + #[inline(always)] + fn allocate_constant(cs: &mut CS, witness: Self::Witness) -> Self + where + CS: ConstraintSystem, + { + let (c0, c1) = witness; + + let c0 = as CSAllocatable>::allocate_constant(cs, c0); + let c1 = as CSAllocatable>::allocate_constant(cs, c1); + + Self::new(c0, c1) + } +} + +impl WitnessHookable for Fq12 +where + F: SmallField, + T: PrimeField, + NN: NonNativeField, + P: Extension12Params, +{ + fn witness_hook(&self, cs: &CS) -> Box Option + 'static> + where + CS: ConstraintSystem, + { + let c0 = self.c0.witness_hook(cs); + let c1 = self.c1.witness_hook(cs); + + Box::new(move || { + let c0 = c0()?; + let c1 = c1()?; + + Some((c0, c1)) + }) + } +} + +impl CSPlaceholder for Fq12 +where + F: SmallField, + T: PrimeField, + NN: NonNativeField + CSPlaceholder, + P: Extension12Params, +{ + fn placeholder>(cs: &mut CS) -> Self { + let placeholder = as CSPlaceholder>::placeholder(cs); + + Self::new(placeholder.clone(), placeholder.clone()) + } +} + +impl CircuitVarLengthEncodable for Fq12 +where + F: SmallField, + T: PrimeField, + NN: NonNativeField + CircuitVarLengthEncodable, + P: Extension12Params, +{ + fn encoding_length(&self) -> usize { + self.c0.encoding_length() + self.c1.encoding_length() + } + + fn encode_to_buffer>(&self, cs: &mut CS, dst: &mut Vec) { + self.c0.encode_to_buffer(cs, dst); + self.c1.encode_to_buffer(cs, dst); + } +} + +impl NonNativeField for Fq12 +where + F: SmallField, + T: PrimeField, + NN: NonNativeField, + P: Extension12Params, +{ + type Params = NN::Params; + + fn get_params(&self) -> &Arc { + self.c0.get_params() + } + + fn allocated_constant(cs: &mut CS, value: T, params: &Arc) -> Self + where + CS: ConstraintSystem, + { + let zero = NN::allocated_constant(cs, T::zero(), params); + let c0 = NN::allocated_constant(cs, value, params); + let c0 = Fq2::new(c0, zero); + let c0 = Fq6::new(c0, Fq2::zero(cs, params), Fq2::zero(cs, params)); + let c1 = Fq6::zero(cs, params); + + Self::new(c0, c1) + } + + fn allocate_checked(cs: &mut CS, witness: T, params: &Arc) -> Self + where + CS: ConstraintSystem, + { + let zero = NN::allocate_checked(cs, T::zero(), params); + let c0 = NN::allocate_checked(cs, witness, params); + let c0 = Fq2::new(c0, zero); + let c0 = Fq6::new(c0, Fq2::zero(cs, params), Fq2::zero(cs, params)); + let c1 = Fq6::zero(cs, params); + + Self::new(c0, c1) + } + + fn allocate_checked_without_value(cs: &mut CS, params: &Arc) -> Self + where + CS: ConstraintSystem, + { + let c0 = Fq6::allocate_checked_without_value(cs, params); + let c1 = Fq6::allocate_checked_without_value(cs, params); + + Self::new(c0, c1) + } + + fn is_zero(&mut self, cs: &mut CS) -> Boolean + where + CS: ConstraintSystem, + { + self.is_zero(cs) + } + + fn negated(&mut self, cs: &mut CS) -> Self + where + CS: ConstraintSystem, + { + self.negated(cs) + } + + fn equals(&mut self, cs: &mut CS, other: &mut Self) -> Boolean + where + CS: ConstraintSystem, + { + let is_c0_equal = self.c0.equals(cs, &mut other.c0); + let is_c1_equal = self.c1.equals(cs, &mut other.c1); + is_c0_equal.and(cs, is_c1_equal) + } + + fn add(&mut self, cs: &mut CS, other: &mut Self) -> Self + where + CS: ConstraintSystem, + { + self.add(cs, other) + } + + fn lazy_add(&mut self, cs: &mut CS, other: &mut Self) -> Self + where + CS: ConstraintSystem, + { + self.add(cs, other) + } + + fn add_many_lazy(cs: &mut CS, inputs: [&mut Self; M]) -> Self + where + CS: ConstraintSystem, + { + assert!(M != 0, "add_many_lazy: inputs must not be empty"); + + let params = inputs[0].get_params(); + let mut result = Self::zero(cs, params); + + for i in 0..M { + result = result.add(cs, inputs[i]); + } + + result + } + + fn sub(&mut self, cs: &mut CS, other: &mut Self) -> Self + where + CS: ConstraintSystem, + { + self.sub(cs, other) + } + + fn lazy_sub(&mut self, cs: &mut CS, other: &mut Self) -> Self + where + CS: ConstraintSystem, + { + self.sub(cs, other) + } + + fn double(&mut self, cs: &mut CS) -> Self + where + CS: ConstraintSystem, + { + self.double(cs) + } + + fn lazy_double(&mut self, cs: &mut CS) -> Self + where + CS: ConstraintSystem, + { + self.double(cs) + } + + fn mul(&mut self, cs: &mut CS, other: &mut Self) -> Self + where + CS: ConstraintSystem, + { + self.mul(cs, other) + } + + fn square(&mut self, cs: &mut CS) -> Self + where + CS: ConstraintSystem, + { + self.square(cs) + } + + fn div_unchecked(&mut self, cs: &mut CS, other: &mut Self) -> Self + where + CS: ConstraintSystem, + { + self.div(cs, other) + } + + #[allow(unused_variables)] + fn conditionally_select>( + cs: &mut CS, + flag: Boolean, + a: &Self, + b: &Self, + ) -> Self { + let c0 = >::Ex6>>::conditionally_select( + cs, flag, &a.c0, &b.c0, + ); + let c1 = >::Ex6>>::conditionally_select( + cs, flag, &a.c1, &b.c1, + ); + + Self::new(c0, c1) + } + + #[allow(unused_variables)] + fn allocate_inverse_or_zero(&self, cs: &mut CS) -> Self + where + CS: ConstraintSystem, + { + // TODO: Make check for zero. + let mut self_cloned = self.clone(); + self_cloned.inverse(cs) + } + + fn inverse_unchecked(&mut self, cs: &mut CS) -> Self + where + CS: ConstraintSystem, + { + self.inverse(cs) + } + + #[allow(unused_variables)] + fn normalize(&mut self, cs: &mut CS) + where + CS: ConstraintSystem, + { + self.c0.normalize(cs); + self.c1.normalize(cs); + } + + fn mask(&self, cs: &mut CS, masking_bit: Boolean) -> Self + where + CS: ConstraintSystem, + { + let c0 = self.c0.mask(cs, masking_bit); + let c1 = self.c1.mask(cs, masking_bit); + + Self::new(c0, c1) + } + + fn mask_negated(&self, cs: &mut CS, masking_bit: Boolean) -> Self + where + CS: ConstraintSystem, + { + let c0 = self.c0.mask_negated(cs, masking_bit); + let c1 = self.c1.mask_negated(cs, masking_bit); + + Self::new(c0, c1) + } + + fn enforce_reduced(&mut self, cs: &mut CS) + where + CS: ConstraintSystem, + { + self.c0.enforce_reduced(cs); + self.c1.enforce_reduced(cs); + } + + fn enforce_equal(cs: &mut CS, a: &Self, b: &Self) + where + CS: ConstraintSystem, + { + Fq6::enforce_equal(cs, &a.c0, &b.c0); + Fq6::enforce_equal(cs, &a.c1, &b.c1); + } +} + +impl Selectable for Fq12 +where + F: SmallField, + NN: NonNativeField, +{ + fn conditionally_select(cs: &mut CS, flag: Boolean, a: &Self, b: &Self) -> Self + where + CS: ConstraintSystem, + { + let c0 = + as Selectable>::conditionally_select( + cs, flag, &a.c0, &b.c0, + ); + let c1 = + as Selectable>::conditionally_select( + cs, flag, &a.c1, &b.c1, + ); + + Self::new(c0, c1) + } +} + +impl HardexpCompatible for Fq12 +where + F: SmallField, + T: PrimeField, + NN: NonNativeField, + P: Extension12Params, +{ + fn conjugate(&mut self, cs: &mut CS) -> Self + where + CS: ConstraintSystem, + { + self.conjugate(cs) + } + + fn pow_u32>(&mut self, cs: &mut CS, exponent: S) -> Self + where + CS: ConstraintSystem, + { + self.pow_u32(cs, exponent) + } + + fn frobenius_map(&mut self, cs: &mut CS, power: usize) -> Self + where + CS: ConstraintSystem, + { + self.frobenius_map(cs, power) + } + + fn mul(&mut self, cs: &mut CS, other: &mut Self) -> Self + where + CS: ConstraintSystem, + { + self.mul(cs, other) + } + + fn square(&mut self, cs: &mut CS) -> Self + where + CS: ConstraintSystem, + { + self.square(cs) + } + + fn inverse(&mut self, cs: &mut CS) -> Self + where + CS: ConstraintSystem, + { + self.inverse(cs) + } + + fn normalize(&mut self, cs: &mut CS) + where + CS: ConstraintSystem, + { + self.c0.normalize(cs); + self.c1.normalize(cs); + } +} diff --git a/src/gadgets/tower_extension/fq2.rs b/src/gadgets/tower_extension/fq2.rs new file mode 100644 index 0000000..93b0e18 --- /dev/null +++ b/src/gadgets/tower_extension/fq2.rs @@ -0,0 +1,657 @@ +use std::sync::Arc; + +use pairing::{ + bn256::{Fq as BN256Fq, Fq2 as BN256Fq2, G2Affine}, + ff::PrimeField, +}; + +use super::params::{bn256::BN256Extension2Params, Extension2Params}; + +use crate::cs::Variable; +use crate::gadgets::traits::allocatable::CSPlaceholder; +use crate::gadgets::traits::encodable::CircuitVarLengthEncodable; +use crate::{ + cs::traits::cs::ConstraintSystem, + field::SmallField, + gadgets::{ + boolean::Boolean, + non_native_field::traits::{CurveCompatibleNonNativeField, NonNativeField}, + traits::{ + allocatable::CSAllocatable, selectable::Selectable, witnessable::WitnessHookable, + }, + }, +}; + +/// BN256Fq2Params represents a pair of elements in the extension field `Fq2=Fq[u]/(u^2-beta)` +/// where `beta^2=-1`. The implementation is primarily based on the following paper: +/// https://eprint.iacr.org/2006/471.pdf. +#[derive(Clone, Debug, Copy)] +pub struct Fq2 +where + F: SmallField, + T: PrimeField, + NN: NonNativeField, + P: Extension2Params, +{ + pub c0: NN, + pub c1: NN, + wit: Option, + _marker: std::marker::PhantomData<(F, T, P)>, +} + +impl Fq2 +where + F: SmallField, + T: PrimeField, + NN: NonNativeField, + P: Extension2Params, +{ + /// Creates a new `Fq2` element from two `Fq` components. + pub fn new(c0: NN, c1: NN) -> Self { + Self { + c0, + c1, + wit: Option::None, // to get placeholder_witness we need CS + _marker: std::marker::PhantomData::<(F, T, P)>, + } + } + + /// Creates a new `Fq2` in a form `0+0*u` + pub fn zero(cs: &mut CS, params: &Arc) -> Self + where + CS: ConstraintSystem, + { + let zero = NN::allocated_constant(cs, T::zero(), params); + + Self::new(zero.clone(), zero) + } + + /// Creates a new `Fq2` in a form `1+0*u` + pub fn one(cs: &mut CS, params: &Arc) -> Self + where + CS: ConstraintSystem, + { + let one = NN::allocated_constant(cs, T::one(), params); + let zero = NN::allocated_constant(cs, T::zero(), params); + + Self::new(one, zero) + } + + /// Adds two elements of `Fq2` by adding their components elementwise. + #[must_use] + pub fn add(&mut self, cs: &mut CS, other: &mut Self) -> Self + where + CS: ConstraintSystem, + { + let c0 = self.c0.add(cs, &mut other.c0); + let c1 = self.c1.add(cs, &mut other.c1); + Self::new(c0, c1) + } + + /// Returns whether the element of `Fq2` is zero. + pub fn is_zero(&mut self, cs: &mut CS) -> Boolean + where + CS: ConstraintSystem, + { + let is_c0_zero = self.c0.is_zero(cs); + let is_c1_zero = self.c1.is_zero(cs); + is_c0_zero.and(cs, is_c1_zero) + } + + /// Doubles the element of `Fq2` by doubling its components. + #[must_use] + pub fn double(&mut self, cs: &mut CS) -> Self + where + CS: ConstraintSystem, + { + let c0 = self.c0.double(cs); + let c1 = self.c1.double(cs); + Self::new(c0, c1) + } + + /// Negates the element of `Fq2` by negating its components. + #[must_use] + pub fn negated(&mut self, cs: &mut CS) -> Self + where + CS: ConstraintSystem, + { + let c0 = self.c0.negated(cs); + let c1 = self.c1.negated(cs); + Self::new(c0, c1) + } + + /// Conjugates the element `c=c0+c1*u` by computing `c=c0-c1*u`. + #[must_use] + pub fn conjugate(&mut self, cs: &mut CS) -> Self + where + CS: ConstraintSystem, + { + let c1 = self.c1.negated(cs); + Self::new(self.c0.clone(), c1) + } + + /// Subtracts two elements of `Fq2` by subtracting their components elementwise. + #[must_use] + pub fn sub(&mut self, cs: &mut CS, other: &mut Self) -> Self + where + CS: ConstraintSystem, + { + let c0 = self.c0.sub(cs, &mut other.c0); + let c1 = self.c1.sub(cs, &mut other.c1); + Self::new(c0, c1) + } + + /// Multiply the element `a=a0+a1*u` by the element `b=b0+b1*u` using the Karatsuba method. + #[must_use] + pub fn mul(&mut self, cs: &mut CS, other: &mut Self) -> Self + where + CS: ConstraintSystem, + { + // v0 <- a0*b0, v1 <- a1*b1 + let mut v0 = self.c0.mul(cs, &mut other.c0); + let mut v1 = self.c1.mul(cs, &mut other.c1); + + // c0 <- v0 + beta*v1 + let c0 = v0.sub(cs, &mut v1); + + // c1 <- (a0 + a1)(b0 + b1) - v0 - v1 + let mut a0_plus_a1 = self.c0.add(cs, &mut self.c1); + let mut b0_plus_b1 = other.c0.add(cs, &mut other.c1); + let mut c1 = a0_plus_a1.mul(cs, &mut b0_plus_b1); + let mut c1 = c1.sub(cs, &mut v0); + let c1 = c1.sub(cs, &mut v1); + + Self::new(c0, c1) + } + + /// Square the element `a=a0+a1*u` by using the Karatsuba method. + #[must_use] + pub fn square(&mut self, cs: &mut CS) -> Self + where + CS: ConstraintSystem, + { + // v0 <- a0^2, v1 <- a1^2 + let mut v0 = self.c0.square(cs); + let mut v1 = self.c1.square(cs); + + // c0 <- v0 + beta*v1 + let c0 = v0.sub(cs, &mut v1); + + // c1 <- (a0 + a1)^2 - v0 - v1 + let mut a0_plus_a1 = self.c0.add(cs, &mut self.c1); + let mut c1 = a0_plus_a1.square(cs); + let mut c1 = c1.sub(cs, &mut v0); + let c1 = c1.sub(cs, &mut v1); + + Self::new(c0, c1) + } + + /// Multiply the element `a=a0+a1*u` by the element in the base field `Fq`. + #[must_use] + pub fn mul_c0(&mut self, cs: &mut CS, c0: &mut NN) -> Self + where + CS: ConstraintSystem, + { + // a*f = (a0 + a1*u)*f = (a0*f) + (a1*f)*u + let new_c0 = self.c0.mul(cs, c0); + let new_c1 = self.c1.mul(cs, c0); + Self::new(new_c0, new_c1) + } + + /// Finds the inverse of the element `a=a0+a1*u` in the extension field `Fq2`. + #[must_use] + pub fn inverse(&mut self, cs: &mut CS) -> Self + where + CS: ConstraintSystem, + { + let mut t0 = self.c0.square(cs); + let mut t1 = self.c1.square(cs); + let mut t0 = t0.add(cs, &mut t1); + let mut t = t0.inverse_unchecked(cs); + + let c0 = self.c0.mul(cs, &mut t); + let mut c1 = self.c1.mul(cs, &mut t); + let c1 = c1.negated(cs); + + Self::new(c0, c1) + } + + /// Divides the element `a=a0+a1*u` by the element `b=b0+b1*u` in the extension field `Fq2`. + #[must_use] + pub fn div(&mut self, cs: &mut CS, other: &mut Self) -> Self + where + CS: ConstraintSystem, + { + let mut inv = other.inverse(cs); + self.mul(cs, &mut inv) + } + + /// Multiply this element by quadratic nonresidue 9 + u. + pub fn mul_by_nonresidue(&mut self, cs: &mut CS) -> Self + where + CS: ConstraintSystem, + { + // Finding 8(a0 + a1*u) + let mut new = self.double(cs); + new = new.double(cs); + new = new.double(cs); + + // c0 <- 9*c0 - c1 + let mut c0 = new.c0.add(cs, &mut self.c0); + let c0 = c0.sub(cs, &mut self.c1); + + // c1 <- c0 + 9*c1 + let mut c1 = new.c1.add(cs, &mut self.c1); + let c1 = c1.add(cs, &mut self.c0); + + Self::new(c0, c1) + } + + /// Compute the Frobenius map - raise this element to power. + pub fn frobenius_map(&mut self, cs: &mut CS, power: usize) -> Self + where + CS: ConstraintSystem, + { + let is_even = Boolean::allocated_constant(cs, power % 2 == 0); + + // TODO: check what non-residue == -1. + + let c0 = self.c0.clone(); + let c1 = self.c1.negated(cs); + + // TODO: assert what Fp2 under CS computes frobenius map same as without CS and this optimizational hack. + + as NonNativeField>::conditionally_select( + cs, + is_even, + &self.clone(), + &Self::new(c0, c1), + ) + } + + /// Allocate `Fq2` tower extension element from the Witness represented in two PrimeField components `c0` and `c1`. + pub fn constant(cs: &mut CS, wit: P::Witness, params: &Arc) -> Self + where + CS: ConstraintSystem, + { + let (c0, c1) = P::convert_from_structured_witness(wit); + + let c0 = NN::allocated_constant(cs, c0, params); + let c1 = NN::allocated_constant(cs, c1, params); + + Self::new(c0, c1) + } + + /// Allocate `Fq2` tower extension element from the Witness represented in two PrimeField components `c0` and `c1`. + pub fn allocate_from_witness(cs: &mut CS, wit: P::Witness, params: &Arc) -> Self + where + CS: ConstraintSystem, + { + let (c0, c1) = P::convert_from_structured_witness(wit); + + let c0 = NN::allocate_checked(cs, c0, params); + let c1 = NN::allocate_checked(cs, c1, params); + + Self::new(c0, c1) + } +} + +impl CSAllocatable for Fq2 +where + F: SmallField, + T: PrimeField, + NN: NonNativeField, + P: Extension2Params, +{ + type Witness = (NN::Witness, NN::Witness); + + #[inline(always)] + fn placeholder_witness() -> Self::Witness { + (NN::placeholder_witness(), NN::placeholder_witness()) + } + + #[inline(always)] + fn allocate_without_value(cs: &mut CS) -> Self + where + CS: ConstraintSystem, + { + let c0 = NN::allocate_without_value(cs); + let c1 = NN::allocate_without_value(cs); + + Self::new(c0, c1) + } + + #[inline(always)] + fn allocate(cs: &mut CS, witness: Self::Witness) -> Self + where + CS: ConstraintSystem, + { + let (c0, c1) = witness; + + let c0 = NN::allocate(cs, c0); + let c1 = NN::allocate(cs, c1); + + Self::new(c0, c1) + } + + #[inline(always)] + fn allocate_constant(cs: &mut CS, witness: Self::Witness) -> Self + where + CS: ConstraintSystem, + { + let (c0, c1) = witness; + + let c0 = NN::allocate_constant(cs, c0); + let c1 = NN::allocate_constant(cs, c1); + + Self::new(c0, c1) + } +} + +impl WitnessHookable for Fq2 +where + F: SmallField, + T: PrimeField, + NN: NonNativeField, + P: Extension2Params, +{ + fn witness_hook(&self, cs: &CS) -> Box Option + 'static> + where + CS: ConstraintSystem, + { + let c0 = self.c0.witness_hook(cs); + let c1 = self.c1.witness_hook(cs); + + Box::new(move || { + let c0 = c0()?; + let c1 = c1()?; + + Some((c0, c1)) + }) + } +} + +impl CSPlaceholder for Fq2 +where + F: SmallField, + T: PrimeField, + NN: NonNativeField + CSPlaceholder, + P: Extension2Params, +{ + fn placeholder>(cs: &mut CS) -> Self { + let c0 = NN::placeholder(cs); + let c1 = NN::placeholder(cs); + + Self::new(c0, c1) + } +} + +impl CircuitVarLengthEncodable for Fq2 +where + F: SmallField, + T: PrimeField, + NN: NonNativeField + CircuitVarLengthEncodable, + P: Extension2Params, +{ + fn encoding_length(&self) -> usize { + self.c0.encoding_length() + self.c1.encoding_length() + } + + fn encode_to_buffer>(&self, cs: &mut CS, dst: &mut Vec) { + self.c0.encode_to_buffer(cs, dst); + self.c1.encode_to_buffer(cs, dst); + } +} + +impl NonNativeField for Fq2 +where + F: SmallField, + T: PrimeField, + NN: NonNativeField, + P: Extension2Params, +{ + type Params = NN::Params; + + fn get_params(&self) -> &Arc { + self.c0.get_params() + } + + fn allocated_constant(cs: &mut CS, value: T, params: &Arc) -> Self + where + CS: ConstraintSystem, + { + let c0 = NN::allocated_constant(cs, value, params); + let c1 = NN::allocated_constant(cs, T::zero(), params); + + Self::new(c0, c1) + } + + fn allocate_checked(cs: &mut CS, witness: T, params: &Arc) -> Self + where + CS: ConstraintSystem, + { + let c0 = NN::allocate_checked(cs, witness, params); + let c1 = NN::allocate_checked(cs, witness, params); + + Self::new(c0, c1) + } + + fn allocate_checked_without_value(cs: &mut CS, params: &Arc) -> Self + where + CS: ConstraintSystem, + { + let c0 = NN::allocate_checked_without_value(cs, params); + let c1 = NN::allocate_checked_without_value(cs, params); + + Self::new(c0, c1) + } + + fn is_zero(&mut self, cs: &mut CS) -> Boolean + where + CS: ConstraintSystem, + { + self.is_zero(cs) + } + + fn negated(&mut self, cs: &mut CS) -> Self + where + CS: ConstraintSystem, + { + self.negated(cs) + } + + fn equals(&mut self, cs: &mut CS, other: &mut Self) -> Boolean + where + CS: ConstraintSystem, + { + let is_c0_equal = self.c0.equals(cs, &mut other.c0); + let is_c1_equal = self.c1.equals(cs, &mut other.c1); + is_c0_equal.and(cs, is_c1_equal) + } + + fn add(&mut self, cs: &mut CS, other: &mut Self) -> Self + where + CS: ConstraintSystem, + { + self.add(cs, other) + } + + fn lazy_add(&mut self, cs: &mut CS, other: &mut Self) -> Self + where + CS: ConstraintSystem, + { + self.add(cs, other) + } + + fn add_many_lazy(cs: &mut CS, inputs: [&mut Self; M]) -> Self + where + CS: ConstraintSystem, + { + assert!(M != 0, "add_many_lazy: inputs must not be empty"); + + let params = inputs[0].get_params(); + let mut result = Self::zero(cs, params); + + for i in 0..M { + result = result.add(cs, inputs[i]); + } + + result + } + + fn sub(&mut self, cs: &mut CS, other: &mut Self) -> Self + where + CS: ConstraintSystem, + { + self.sub(cs, other) + } + + fn lazy_sub(&mut self, cs: &mut CS, other: &mut Self) -> Self + where + CS: ConstraintSystem, + { + self.sub(cs, other) + } + + fn double(&mut self, cs: &mut CS) -> Self + where + CS: ConstraintSystem, + { + self.double(cs) + } + + fn lazy_double(&mut self, cs: &mut CS) -> Self + where + CS: ConstraintSystem, + { + self.double(cs) + } + + fn mul(&mut self, cs: &mut CS, other: &mut Self) -> Self + where + CS: ConstraintSystem, + { + self.mul(cs, other) + } + + fn square(&mut self, cs: &mut CS) -> Self + where + CS: ConstraintSystem, + { + self.square(cs) + } + + fn div_unchecked(&mut self, cs: &mut CS, other: &mut Self) -> Self + where + CS: ConstraintSystem, + { + self.div(cs, other) + } + + fn conditionally_select>( + cs: &mut CS, + flag: Boolean, + a: &Self, + b: &Self, + ) -> Self { + let c0 = NN::conditionally_select(cs, flag, &a.c0, &b.c0); + let c1 = NN::conditionally_select(cs, flag, &a.c1, &b.c1); + + Self::new(c0, c1) + } + + #[allow(unused_variables)] + fn allocate_inverse_or_zero(&self, cs: &mut CS) -> Self + where + CS: ConstraintSystem, + { + // TODO: Make check for zero. + let mut self_cloned = self.clone(); + self_cloned.inverse(cs) + } + + fn inverse_unchecked(&mut self, cs: &mut CS) -> Self + where + CS: ConstraintSystem, + { + self.inverse(cs) + } + + #[allow(unused_variables)] + fn normalize(&mut self, cs: &mut CS) + where + CS: ConstraintSystem, + { + self.c0.normalize(cs); + self.c1.normalize(cs); + } + + fn mask(&self, cs: &mut CS, masking_bit: Boolean) -> Self + where + CS: ConstraintSystem, + { + let c0 = self.c0.mask(cs, masking_bit); + let c1 = self.c1.mask(cs, masking_bit); + + Self::new(c0, c1) + } + + fn mask_negated(&self, cs: &mut CS, masking_bit: Boolean) -> Self + where + CS: ConstraintSystem, + { + let c0 = self.c0.mask_negated(cs, masking_bit); + let c1 = self.c1.mask_negated(cs, masking_bit); + + Self::new(c0, c1) + } + + fn enforce_reduced(&mut self, cs: &mut CS) + where + CS: ConstraintSystem, + { + self.c0.enforce_reduced(cs); + self.c1.enforce_reduced(cs); + } + + fn enforce_equal(cs: &mut CS, a: &Self, b: &Self) + where + CS: ConstraintSystem, + { + NN::enforce_equal(cs, &a.c0, &b.c0); + NN::enforce_equal(cs, &a.c1, &b.c1); + } +} + +impl Selectable for Fq2 +where + F: SmallField, + NN: NonNativeField, +{ + fn conditionally_select(cs: &mut CS, flag: Boolean, a: &Self, b: &Self) -> Self + where + CS: ConstraintSystem, + { + let c0 = NN::conditionally_select(cs, flag, &a.c0, &b.c0); + let c1 = NN::conditionally_select(cs, flag, &a.c1, &b.c1); + + Self::new(c0, c1) + } +} + +impl CurveCompatibleNonNativeField + for Fq2 +where + F: SmallField, + NN: NonNativeField, +{ + fn from_curve_base(cs: &mut CS, point: &BN256Fq2, params: &Arc) -> Self + where + CS: ConstraintSystem, + { + let c0 = NN::allocated_constant(cs, point.c0, params); + let c1 = NN::allocated_constant(cs, point.c1, params); + + Self::new(c0, c1) + } +} diff --git a/src/gadgets/tower_extension/fq6.rs b/src/gadgets/tower_extension/fq6.rs new file mode 100644 index 0000000..e68ca98 --- /dev/null +++ b/src/gadgets/tower_extension/fq6.rs @@ -0,0 +1,860 @@ +use std::sync::Arc; + +use pairing::{bn256::Fq as BN256Fq, ff::PrimeField}; + +use super::{ + fq2::Fq2, + params::{ + bn256::{BN256Extension2Params, BN256Extension6Params}, + Extension6Params, + }, +}; + +use crate::cs::Variable; +use crate::gadgets::traits::allocatable::CSPlaceholder; +use crate::gadgets::traits::encodable::CircuitVarLengthEncodable; +use crate::{ + cs::traits::cs::ConstraintSystem, + field::SmallField, + gadgets::{ + boolean::Boolean, + non_native_field::traits::NonNativeField, + traits::{ + allocatable::CSAllocatable, selectable::Selectable, witnessable::WitnessHookable, + }, + }, +}; + +/// `Fq6` field extension implementation in the constraint system. It is implemented +/// as `Fq2[v]/(v^3-xi)` where `xi=9+u`. In other words, +/// it is a set of quadratic polynomials of a form `c0+c1*v+c2*v^2`, +/// where `c0`, `c1`, `c2` are elements of `Fq2`. +/// See https://hackmd.io/@jpw/bn254#Field-extension-towers for reference. For +/// implementation reference, see https://eprint.iacr.org/2006/471.pdf. +#[derive(Clone, Debug, Copy)] +pub struct Fq6 +where + F: SmallField, + T: PrimeField, + NN: NonNativeField, + P: Extension6Params, +{ + pub c0: Fq2, + pub c1: Fq2, + pub c2: Fq2, + _marker: std::marker::PhantomData<(F, T)>, +} + +impl Fq6 +where + F: SmallField, + T: pairing::ff::PrimeField, + NN: NonNativeField, + P: Extension6Params, +{ + /// Creates a new `Fq6` element from three `Fq2` components. + pub fn new( + c0: Fq2, + c1: Fq2, + c2: Fq2, + ) -> Self { + Self { + c0, + c1, + c2, + _marker: std::marker::PhantomData::<(F, T)>, + } + } + + /// Creates a new zero `Fq6` in a form `0+0*v+0*v^2` + pub fn zero(cs: &mut CS, params: &Arc) -> Self + where + CS: ConstraintSystem, + { + let zero = Fq2::zero(cs, params); + Self::new(zero.clone(), zero.clone(), zero) + } + + /// Creates a unit `Fq6` in a form `1+0*v+0*v^2` + pub fn one(cs: &mut CS, params: &Arc) -> Self + where + CS: ConstraintSystem, + { + let one = Fq2::one(cs, params); + let zero = Fq2::zero(cs, params); + Self::new(one, zero.clone(), zero) + } + + /// Returns the `\gamma`: square root of `w`, being just a `0+1*v+0*v^2` element. + pub fn gamma(cs: &mut CS, params: &Arc) -> Self + where + CS: ConstraintSystem, + { + let one = Fq2::one(cs, params); + let zero = Fq2::zero(cs, params); + Self::new(zero.clone(), one, zero) + } + + /// Returns `Fq6::one()` if `b` is true, and `Fq6::zero()` if `b` is false. + pub fn from_boolean(cs: &mut CS, b: Boolean, params: &Arc) -> Self + where + CS: ConstraintSystem, + { + let zero = Self::zero(cs, params); + let one = Self::one(cs, params); + Self::conditionally_select(cs, b, &one, &zero) + } + + /// Returns true if the `Fq6` element is zero. + pub fn is_zero(&mut self, cs: &mut CS) -> Boolean + where + CS: ConstraintSystem, + { + let is_c0_zero = self.c0.is_zero(cs); + let is_c1_zero = self.c1.is_zero(cs); + let is_c2_zero = self.c2.is_zero(cs); + Boolean::multi_and(cs, &[is_c0_zero, is_c1_zero, is_c2_zero]) + } + + /// Adds two elements of `Fq6` by adding their components elementwise. + #[must_use] + pub fn add(&mut self, cs: &mut CS, other: &mut Self) -> Self + where + CS: ConstraintSystem, + { + let c0 = self.c0.add(cs, &mut other.c0); + let c1 = self.c1.add(cs, &mut other.c1); + let c2 = self.c2.add(cs, &mut other.c2); + Self::new(c0, c1, c2) + } + + /// Doubles the element of `Fq6` by doubling its components. + #[must_use] + pub fn double(&mut self, cs: &mut CS) -> Self + where + CS: ConstraintSystem, + { + let c0 = self.c0.double(cs); + let c1 = self.c1.double(cs); + let c2 = self.c2.double(cs); + Self::new(c0, c1, c2) + } + + /// Negates the element of `Fq6` by negating its components. + #[must_use] + pub fn negated(&mut self, cs: &mut CS) -> Self + where + CS: ConstraintSystem, + { + let c0 = self.c0.negated(cs); + let c1 = self.c1.negated(cs); + let c2 = self.c2.negated(cs); + Self::new(c0, c1, c2) + } + + /// Subtracts two elements of `Fq6` by subtracting their components elementwise. + #[must_use] + pub fn sub(&mut self, cs: &mut CS, other: &mut Self) -> Self + where + CS: ConstraintSystem, + { + let c0 = self.c0.sub(cs, &mut other.c0); + let c1 = self.c1.sub(cs, &mut other.c1); + let c2 = self.c2.sub(cs, &mut other.c2); + Self::new(c0, c1, c2) + } + + /// Multiplies the element in `Fq6` by a non-residue `v`. + pub fn mul_by_nonresidue(&mut self, cs: &mut CS) -> Self + where + CS: ConstraintSystem, + { + // c0, c1, c2 -> c2, c0, c1 + let new_c2 = self.c2.mul_by_nonresidue(cs); + Self::new(new_c2, self.c0.clone(), self.c1.clone()) + } + + /// Multiplies the element in `Fq6` by a non-residue `\xi=9+u`. + pub fn mul_by_xi(&mut self, cs: &mut CS) -> Self + where + CS: ConstraintSystem, + { + let new_c0 = self.c0.mul_by_nonresidue(cs); + let new_c1 = self.c1.mul_by_nonresidue(cs); + let new_c2 = self.c2.mul_by_nonresidue(cs); + + Self::new(new_c0, new_c1, new_c2) + } + + /// Multiplies two elements `a=a0+a1*v+a2*v^2` + /// and `b=b0+b1*v+b2*v^2` in `Fq6` using Karatsuba multiplication. + #[must_use] + pub fn mul(&mut self, cs: &mut CS, other: &mut Self) -> Self + where + CS: ConstraintSystem, + { + let mut v0 = self.c0.mul(cs, &mut other.c0); + let mut v1 = self.c1.mul(cs, &mut other.c1); + let mut v2 = self.c2.mul(cs, &mut other.c2); + + let mut t1 = other.c1.add(cs, &mut other.c2); + let mut tmp = self.c1.add(cs, &mut self.c2); + + let mut t1 = t1.mul(cs, &mut tmp); + let mut t1 = t1.sub(cs, &mut v1); + let mut t1 = t1.sub(cs, &mut v2); + let mut t1 = t1.mul_by_nonresidue(cs); + let t1 = t1.add(cs, &mut v0); + + let mut t3 = other.c0.add(cs, &mut other.c2); + let mut tmp = self.c0.add(cs, &mut self.c2); + let mut t3 = t3.mul(cs, &mut tmp); + let mut t3 = t3.sub(cs, &mut v0); + let mut t3 = t3.add(cs, &mut v1); + let t3 = t3.sub(cs, &mut v2); + + let mut t2 = other.c0.add(cs, &mut other.c1); + let mut tmp = self.c0.add(cs, &mut self.c1); + let mut t2 = t2.mul(cs, &mut tmp); + let mut t2 = t2.sub(cs, &mut v0); + let mut t2 = t2.sub(cs, &mut v1); + let mut v2 = v2.mul_by_nonresidue(cs); + let t2 = t2.add(cs, &mut v2); + + Self::new(t1, t2, t3) + } + + /// Squares the element `a=a0+a1*v+a2*v^2` in `Fq6` using Karatsuba squaring. + #[must_use] + pub fn square>(&mut self, cs: &mut CS) -> Self { + // v0 <- a0^2, v1 <- a1^2, v2 <- a2^2 + let mut v0 = self.c0.square(cs); + let mut v1 = self.c1.square(cs); + let mut v2 = self.c2.square(cs); + + // c0 <- v0 + xi*((a1 + a2)^2 - v1 - v2) + let mut a1_plus_a2 = self.c1.add(cs, &mut self.c2); + let mut c0 = a1_plus_a2.square(cs); + let mut c0 = c0.sub(cs, &mut v1); + let mut c0 = c0.sub(cs, &mut v2); + let mut c0 = c0.mul_by_nonresidue(cs); + let c0 = c0.add(cs, &mut v0); + + // c1 <- (a0 + a1)^2 - v0 - v1 + xi*v2 + let mut a0_plus_a1 = self.c0.add(cs, &mut self.c1); + let mut c1 = a0_plus_a1.square(cs); + let mut c1 = c1.sub(cs, &mut v0); + let mut c1 = c1.sub(cs, &mut v1); + let mut xi_v2 = v2.mul_by_nonresidue(cs); + let c1 = c1.add(cs, &mut xi_v2); + + // c2 <- (a0 + a2)^2 - v0 + v1 - v2 + let mut a0_plus_a2 = self.c0.add(cs, &mut self.c2); + let mut c2 = a0_plus_a2.square(cs); + let mut c2 = c2.sub(cs, &mut v0); + let mut c2 = c2.add(cs, &mut v1); + let c2 = c2.sub(cs, &mut v2); + + Self::new(c0, c1, c2) + } + + /// Multiplies the element `a=a0+a1*v+a2*v^2` in `Fq6` by the element `b = b1*v` + pub fn mul_by_c1(&mut self, cs: &mut CS, c1: &mut Fq2) -> Self + where + CS: ConstraintSystem, + { + let mut b_b = self.c1.mul(cs, c1); + let mut tmp = self.c1.add(cs, &mut self.c2); + + let mut t1 = c1.mul(cs, &mut tmp); + let mut t1 = t1.sub(cs, &mut b_b); + let t1 = t1.mul_by_nonresidue(cs); + + let mut tmp = self.c0.add(cs, &mut self.c1); + let mut t2 = c1.mul(cs, &mut tmp); + let t2 = t2.sub(cs, &mut b_b); + + Self::new(t1, t2, b_b) + } + + /// Multiplies the element `a=a0+a1*v+a2*v^2` in `Fq6` by the element in `NonNativeField` + pub fn mul_by_fq(&mut self, cs: &mut CS, c0: &mut NN) -> Self + where + CS: ConstraintSystem, + { + // Simply multiply element-wise + let t0 = self.c0.mul_c0(cs, c0); + let t1 = self.c1.mul_c0(cs, c0); + let t2 = self.c2.mul_c0(cs, c0); + + Self::new(t0, t1, t2) + } + + /// Multiplies the element `a=a0+a1*v+a2*v^2` in `Fq6` by the element `c=c0` in `Fq2` + pub fn mul_by_c0(&mut self, cs: &mut CS, c0: &mut Fq2) -> Self + where + CS: ConstraintSystem, + { + // Simply multiply element-wise + let t0 = self.c0.mul(cs, c0); + let t1 = self.c1.mul(cs, c0); + let t2 = self.c2.mul(cs, c0); + + Self::new(t0, t1, t2) + } + + /// Multiplies the element `a=a0+a1*v+a2*v^2` in `Fq6` by the element `c2*v^2` + pub fn mul_by_c2(&mut self, cs: &mut CS, c2: &mut Fq2) -> Self + where + CS: ConstraintSystem, + { + // Suppose a = a0 + a1*v + a2*v^2. In this case, + // (a0 + a1*v + a2*v^2) * c2 * v^2 = + // a1*c2*\xi + a2*c2*\xi*v + a0*c2*v^2 + // NOTE: There might be a better way to calculate three coefficients + // without using 3 multiplications and 2 mul_by_nonresidues, similarly to mul_by_c1 + + // Setting coefficients + let mut a0 = self.c0.clone(); + let mut a1 = self.c1.clone(); + let mut a2 = self.c2.clone(); + + // new_c0 <- a1*c2*\xi + let mut new_c0 = a1.mul(cs, c2); + new_c0 = new_c0.mul_by_nonresidue(cs); + + // new_c1 <- a2*c2*\xi + let mut new_c1 = a2.mul(cs, c2); + new_c1 = new_c1.mul_by_nonresidue(cs); + + // new_c2 <- a0*c2 + let new_c2 = a0.mul(cs, c2); + + Self::new(new_c0, new_c1, new_c2) + } + + /// Multiplies the element `a=a0+a1*v+a2*v^2` in `Fq6` by the element `b = b0+b1*v` + pub fn mul_by_c0c1( + &mut self, + cs: &mut CS, + c0: &mut Fq2, + c1: &mut Fq2, + ) -> Self + where + CS: ConstraintSystem, + { + let mut a_a = self.c0.mul(cs, c0); + let mut b_b = self.c1.mul(cs, c1); + + let mut tmp = self.c1.add(cs, &mut self.c2); + let mut t1 = c1.mul(cs, &mut tmp); + let mut t1 = t1.sub(cs, &mut b_b); + let mut t1 = t1.mul_by_nonresidue(cs); + let t1 = t1.add(cs, &mut a_a); + + let mut tmp = self.c0.add(cs, &mut self.c2); + let mut t3 = c0.mul(cs, &mut tmp); + let mut t3 = t3.sub(cs, &mut a_a); + let t3 = t3.add(cs, &mut b_b); + + let mut t2 = c0.add(cs, c1); + let mut tmp = self.c0.add(cs, &mut self.c1); + let mut t2 = t2.mul(cs, &mut tmp); + let mut t2 = t2.sub(cs, &mut a_a); + let t2 = t2.sub(cs, &mut b_b); + + Self::new(t1, t2, t3) + } + + /// Find the inverse element in Fq6 + pub fn inverse(&mut self, cs: &mut CS) -> Self + where + CS: ConstraintSystem, + { + let mut c0 = self.c2.mul_by_nonresidue(cs); + let mut c0 = c0.mul(cs, &mut self.c1); + let mut c0 = c0.negated(cs); + + let mut c0s = self.c0.square(cs); + let mut c0 = c0.add(cs, &mut c0s); + + let mut c1 = self.c2.square(cs); + let mut c1 = c1.mul_by_nonresidue(cs); + + let mut c01 = self.c0.mul(cs, &mut self.c1); + let mut c1 = c1.sub(cs, &mut c01); + + let mut c2 = self.c1.square(cs); + let mut c02 = self.c0.mul(cs, &mut self.c2); + let mut c2 = c2.sub(cs, &mut c02); + + let mut tmp1 = self.c2.mul(cs, &mut c1); + let mut tmp2 = self.c1.mul(cs, &mut c2); + let mut tmp1 = tmp1.add(cs, &mut tmp2); + let mut tmp1 = tmp1.mul_by_nonresidue(cs); + let mut tmp2 = self.c0.mul(cs, &mut c0); + let mut tmp1 = tmp1.add(cs, &mut tmp2); + + let mut t = tmp1.inverse(cs); + let c0_new = t.mul(cs, &mut c0); + let c1_new = t.mul(cs, &mut c1); + let c2_new = t.mul(cs, &mut c2); + + Self::new(c0_new, c1_new, c2_new) + } + + pub fn div(&mut self, cs: &mut CS, other: &mut Self) -> Self + where + CS: ConstraintSystem, + { + let mut inv = other.inverse(cs); + self.mul(cs, &mut inv) + } + + /// Compute the Frobenius map - raise this element to power. + #[allow(unused_variables)] + pub fn frobenius_map(&mut self, cs: &mut CS, power: usize) -> Self + where + CS: ConstraintSystem, + { + let c0 = self.c0.frobenius_map(cs, power); + let mut c1 = self.c1.frobenius_map(cs, power); + let mut c2 = self.c2.frobenius_map(cs, power); + + let c1_frobenius_constant = P::FROBENIUS_COEFFS_C1[power % 6]; + let c2_frobenius_constant = P::FROBENIUS_COEFFS_C2[power % 6]; + + let params = c1.get_params(); + + let mut c1_frobenius_coeff = Fq2::constant(cs, c1_frobenius_constant, params); + let mut c2_frobenius_coeff = Fq2::constant(cs, c2_frobenius_constant, params); + + let c1 = c1.mul(cs, &mut c1_frobenius_coeff); + let c2 = c2.mul(cs, &mut c2_frobenius_coeff); + + Self::new(c0, c1, c2) + } + + /// Normalizes the element of `Fq6` by normalizing its components. + pub fn normalize(&mut self, cs: &mut CS) + where + CS: ConstraintSystem, + { + self.c0.normalize(cs); + self.c1.normalize(cs); + self.c2.normalize(cs); + } + + /// Allocate `Fq6` tower extension element from the Witness represented in three components + /// from the `Fq2` tower extension. + pub fn constant(cs: &mut CS, wit: P::Witness, params: &Arc) -> Self + where + CS: ConstraintSystem, + { + let constants = P::convert_from_structured_witness(wit); + let c0 = Fq2::constant(cs, constants[0], params); + let c1 = Fq2::constant(cs, constants[1], params); + let c2 = Fq2::constant(cs, constants[2], params); + + Self::new(c0, c1, c2) + } + + /// Allocate `Fq6` tower extension element from the Witness represented in three components + /// from the `Fq2` tower extension. + pub fn allocate_from_witness(cs: &mut CS, wit: P::Witness, params: &Arc) -> Self + where + CS: ConstraintSystem, + { + let components = P::convert_from_structured_witness(wit); + let c0 = Fq2::allocate_from_witness(cs, components[0], params); + let c1 = Fq2::allocate_from_witness(cs, components[1], params); + let c2 = Fq2::allocate_from_witness(cs, components[2], params); + + Self::new(c0, c1, c2) + } +} + +impl CSAllocatable for Fq6 +where + F: SmallField, + T: PrimeField, + NN: NonNativeField, + P: Extension6Params, +{ + type Witness = ( + as CSAllocatable>::Witness, + as CSAllocatable>::Witness, + as CSAllocatable>::Witness, + ); + + #[inline(always)] + fn placeholder_witness() -> Self::Witness { + ( + as CSAllocatable>::placeholder_witness(), + as CSAllocatable>::placeholder_witness(), + as CSAllocatable>::placeholder_witness(), + ) + } + + #[inline(always)] + fn allocate_without_value(cs: &mut CS) -> Self + where + CS: ConstraintSystem, + { + let c0 = as CSAllocatable>::allocate_without_value(cs); + let c1 = as CSAllocatable>::allocate_without_value(cs); + let c2 = as CSAllocatable>::allocate_without_value(cs); + + Self::new(c0, c1, c2) + } + + #[inline(always)] + fn allocate(cs: &mut CS, witness: Self::Witness) -> Self + where + CS: ConstraintSystem, + { + let (c0, c1, c2) = witness; + + let c0 = as CSAllocatable>::allocate(cs, c0); + let c1 = as CSAllocatable>::allocate(cs, c1); + let c2 = as CSAllocatable>::allocate(cs, c2); + + Self::new(c0, c1, c2) + } + + #[inline(always)] + fn allocate_constant(cs: &mut CS, witness: Self::Witness) -> Self + where + CS: ConstraintSystem, + { + let (c0, c1, c2) = witness; + + let c0 = as CSAllocatable>::allocate_constant(cs, c0); + let c1 = as CSAllocatable>::allocate_constant(cs, c1); + let c2 = as CSAllocatable>::allocate_constant(cs, c2); + + Self::new(c0, c1, c2) + } +} + +impl WitnessHookable for Fq6 +where + F: SmallField, + T: PrimeField, + NN: NonNativeField, + P: Extension6Params, +{ + fn witness_hook(&self, cs: &CS) -> Box Option + 'static> + where + CS: ConstraintSystem, + { + let c0 = self.c0.witness_hook(cs); + let c1 = self.c1.witness_hook(cs); + let c2 = self.c2.witness_hook(cs); + + Box::new(move || { + let c0 = c0()?; + let c1 = c1()?; + let c2 = c2()?; + + Some((c0, c1, c2)) + }) + } +} + +impl CSPlaceholder for Fq6 +where + F: SmallField, + T: PrimeField, + NN: NonNativeField + CSPlaceholder, + P: Extension6Params, +{ + fn placeholder>(cs: &mut CS) -> Self { + let placeholder = as CSPlaceholder>::placeholder(cs); + + Self::new(placeholder.clone(), placeholder.clone(), placeholder) + } +} + +impl CircuitVarLengthEncodable for Fq6 +where + F: SmallField, + T: PrimeField, + NN: NonNativeField + CircuitVarLengthEncodable, + P: Extension6Params, +{ + fn encoding_length(&self) -> usize { + self.c0.encoding_length() + self.c1.encoding_length() + self.c1.encoding_length() + } + + fn encode_to_buffer>(&self, cs: &mut CS, dst: &mut Vec) { + self.c0.encode_to_buffer(cs, dst); + self.c1.encode_to_buffer(cs, dst); + self.c2.encode_to_buffer(cs, dst); + } +} + +impl NonNativeField for Fq6 +where + F: SmallField, + T: PrimeField, + NN: NonNativeField, + P: Extension6Params, +{ + type Params = NN::Params; + + fn get_params(&self) -> &Arc { + self.c0.get_params() + } + + fn allocated_constant(cs: &mut CS, value: T, params: &Arc) -> Self + where + CS: ConstraintSystem, + { + let c0 = NN::allocated_constant(cs, value, params); + let c0 = Fq2::new(c0, NN::allocated_constant(cs, T::zero(), params)); + let c1 = Fq2::zero(cs, params); + let c2 = Fq2::zero(cs, params); + + Self::new(c0, c1, c2) + } + + fn allocate_checked(cs: &mut CS, witness: T, params: &Arc) -> Self + where + CS: ConstraintSystem, + { + let c0 = NN::allocate_checked(cs, witness, params); + let c0 = Fq2::new(c0, NN::allocated_constant(cs, T::zero(), params)); + let c1 = Fq2::zero(cs, params); + let c2 = Fq2::zero(cs, params); + + Self::new(c0, c1, c2) + } + + fn allocate_checked_without_value(cs: &mut CS, params: &Arc) -> Self + where + CS: ConstraintSystem, + { + let c0 = Fq2::allocate_checked_without_value(cs, params); + let c1 = Fq2::allocate_checked_without_value(cs, params); + let c2 = Fq2::allocate_checked_without_value(cs, params); + + Self::new(c0, c1, c2) + } + + fn is_zero(&mut self, cs: &mut CS) -> Boolean + where + CS: ConstraintSystem, + { + self.is_zero(cs) + } + + fn negated(&mut self, cs: &mut CS) -> Self + where + CS: ConstraintSystem, + { + self.negated(cs) + } + + fn equals(&mut self, cs: &mut CS, other: &mut Self) -> Boolean + where + CS: ConstraintSystem, + { + let is_c0_equal = self.c0.equals(cs, &mut other.c0); + let is_c1_equal = self.c1.equals(cs, &mut other.c1); + let is_c2_equal = self.c2.equals(cs, &mut other.c2); + Boolean::multi_and(cs, &[is_c0_equal, is_c1_equal, is_c2_equal]) + } + + fn add(&mut self, cs: &mut CS, other: &mut Self) -> Self + where + CS: ConstraintSystem, + { + self.add(cs, other) + } + + fn lazy_add(&mut self, cs: &mut CS, other: &mut Self) -> Self + where + CS: ConstraintSystem, + { + self.add(cs, other) + } + + fn add_many_lazy(cs: &mut CS, inputs: [&mut Self; M]) -> Self + where + CS: ConstraintSystem, + { + assert!(M != 0, "add_many_lazy: inputs must not be empty"); + + let params = inputs[0].get_params(); + let mut result = Self::zero(cs, params); + + for i in 0..M { + result = result.add(cs, inputs[i]); + } + + result + } + + fn sub(&mut self, cs: &mut CS, other: &mut Self) -> Self + where + CS: ConstraintSystem, + { + self.sub(cs, other) + } + + fn lazy_sub(&mut self, cs: &mut CS, other: &mut Self) -> Self + where + CS: ConstraintSystem, + { + self.sub(cs, other) + } + + fn double(&mut self, cs: &mut CS) -> Self + where + CS: ConstraintSystem, + { + self.double(cs) + } + + fn lazy_double(&mut self, cs: &mut CS) -> Self + where + CS: ConstraintSystem, + { + self.double(cs) + } + + fn mul(&mut self, cs: &mut CS, other: &mut Self) -> Self + where + CS: ConstraintSystem, + { + self.mul(cs, other) + } + + fn square(&mut self, cs: &mut CS) -> Self + where + CS: ConstraintSystem, + { + self.square(cs) + } + + fn div_unchecked(&mut self, cs: &mut CS, other: &mut Self) -> Self + where + CS: ConstraintSystem, + { + self.div(cs, other) + } + + #[allow(unused_variables)] + fn conditionally_select>( + cs: &mut CS, + flag: Boolean, + a: &Self, + b: &Self, + ) -> Self { + let c0 = >::Ex2>>::conditionally_select( + cs, flag, &a.c0, &b.c0, + ); + let c1 = >::Ex2>>::conditionally_select( + cs, flag, &a.c1, &b.c1, + ); + let c2 = >::Ex2>>::conditionally_select( + cs, flag, &a.c2, &b.c2, + ); + + Self::new(c0, c1, c2) + } + + #[allow(unused_variables)] + fn allocate_inverse_or_zero(&self, cs: &mut CS) -> Self + where + CS: ConstraintSystem, + { + // TODO: Make check for zero. + let mut self_cloned = self.clone(); + self_cloned.inverse(cs) + } + + fn inverse_unchecked(&mut self, cs: &mut CS) -> Self + where + CS: ConstraintSystem, + { + self.inverse(cs) + } + + #[allow(unused_variables)] + fn normalize(&mut self, cs: &mut CS) + where + CS: ConstraintSystem, + { + self.c0.normalize(cs); + self.c1.normalize(cs); + self.c2.normalize(cs); + } + + fn mask(&self, cs: &mut CS, masking_bit: Boolean) -> Self + where + CS: ConstraintSystem, + { + let c0 = self.c0.mask(cs, masking_bit); + let c1 = self.c1.mask(cs, masking_bit); + let c2 = self.c2.mask(cs, masking_bit); + + Self::new(c0, c1, c2) + } + + fn mask_negated(&self, cs: &mut CS, masking_bit: Boolean) -> Self + where + CS: ConstraintSystem, + { + let c0 = self.c0.mask_negated(cs, masking_bit); + let c1 = self.c1.mask_negated(cs, masking_bit); + let c2 = self.c2.mask_negated(cs, masking_bit); + + Self::new(c0, c1, c2) + } + + fn enforce_reduced(&mut self, cs: &mut CS) + where + CS: ConstraintSystem, + { + self.c0.enforce_reduced(cs); + self.c1.enforce_reduced(cs); + self.c2.enforce_reduced(cs); + } + + fn enforce_equal(cs: &mut CS, a: &Self, b: &Self) + where + CS: ConstraintSystem, + { + Fq2::enforce_equal(cs, &a.c0, &b.c0); + Fq2::enforce_equal(cs, &a.c1, &b.c1); + Fq2::enforce_equal(cs, &a.c2, &b.c2); + } +} + +impl Selectable for Fq6 +where + F: SmallField, + NN: NonNativeField, +{ + fn conditionally_select(cs: &mut CS, flag: Boolean, a: &Self, b: &Self) -> Self + where + CS: ConstraintSystem, + { + let c0 = + as Selectable>::conditionally_select( + cs, flag, &a.c0, &b.c0, + ); + let c1 = + as Selectable>::conditionally_select( + cs, flag, &a.c1, &b.c1, + ); + let c2 = + as Selectable>::conditionally_select( + cs, flag, &a.c2, &b.c2, + ); + + Self::new(c0, c1, c2) + } +} diff --git a/src/gadgets/tower_extension/mod.rs b/src/gadgets/tower_extension/mod.rs new file mode 100644 index 0000000..d6a83be --- /dev/null +++ b/src/gadgets/tower_extension/mod.rs @@ -0,0 +1,5 @@ +pub mod algebraic_torus; +pub mod fq12; +pub mod fq2; +pub mod fq6; +pub mod params; diff --git a/src/gadgets/tower_extension/params/bn256.rs b/src/gadgets/tower_extension/params/bn256.rs new file mode 100644 index 0000000..0a40779 --- /dev/null +++ b/src/gadgets/tower_extension/params/bn256.rs @@ -0,0 +1,253 @@ +use pairing::bn256::{fq::Fq as BN256Fq, Fq12 as BN256Fq12, Fq2 as BN256Fq2, Fq6 as BN256Fq6}; + +use super::*; +use pairing::bn256::fq::{ + FROBENIUS_COEFF_FQ12_C1 as BN256_FROBENIUS_COEFF_FQ12_C1, + FROBENIUS_COEFF_FQ6_C1 as BN256_FROBENIUS_COEFF_FQ6_C1, + FROBENIUS_COEFF_FQ6_C2 as BN256_FROBENIUS_COEFF_FQ6_C2, +}; + +#[derive(Clone, Debug, Copy)] +pub struct BN256Extension2Params {} +impl Extension2Params for BN256Extension2Params { + type Witness = BN256Fq2; + + fn convert_to_structured_witness(c0: BN256Fq, c1: BN256Fq) -> Self::Witness { + BN256Fq2 { c0, c1 } + } + + fn convert_from_structured_witness(wit: Self::Witness) -> (BN256Fq, BN256Fq) { + (wit.c0, wit.c1) + } +} + +#[derive(Clone, Debug, Copy)] +pub struct BN256Extension6Params {} +impl Extension6Params for BN256Extension6Params { + type Ex2 = BN256Extension2Params; + type Witness = BN256Fq6; + + const FROBENIUS_COEFFS_C1: [BN256Fq2; 6] = BN256_FROBENIUS_COEFF_FQ6_C1; + const FROBENIUS_COEFFS_C2: [BN256Fq2; 6] = BN256_FROBENIUS_COEFF_FQ6_C2; + + fn convert_to_structured_witness(c0: BN256Fq2, c1: BN256Fq2, c2: BN256Fq2) -> Self::Witness { + Self::Witness { c0, c1, c2 } + } + + fn convert_from_structured_witness(wit: Self::Witness) -> [BN256Fq2; 3] { + [wit.c0, wit.c1, wit.c2] + } +} + +#[derive(Clone, Debug, Copy)] +pub struct BN256Extension12Params {} +impl Extension12Params for BN256Extension12Params { + type Ex6 = BN256Extension6Params; + type Witness = BN256Fq12; + + // These are Fp2 because we will multiply them with c1 `Fp6`, which has underlying `Fp2`. + const FROBENIUS_COEFFS_C1: + [<>::Ex2 as Extension2Params>::Witness; 12] = + BN256_FROBENIUS_COEFF_FQ12_C1; + + fn convert_to_structured_witness(c0: BN256Fq6, c1: BN256Fq6) -> Self::Witness { + Self::Witness { c0, c1 } + } + + fn convert_from_structured_witness(wit: Self::Witness) -> (BN256Fq6, BN256Fq6) { + (wit.c0, wit.c1) + } +} + +// Constants for torus extension +const TWO_INVERSE_C0: &str = + "10944121435919637611123202872628637544348155578648911831344518947322613104292"; +const W_INVERSE_C5_C0: &str = + "21087453498479301738505683583845423561061080261299122796980902361914303298513"; +const W_INVERSE_C5_C1: &str = + "14681138511599513868579906292550611339979233093309515871315818100066920017952"; + +impl BN256Extension12Params { + /// Returns the `gamma` element in `Fq6`, + /// being simply the element `0+1*v+0*v^2` in `Fq6`. + pub(super) fn gamma() -> BN256Fq6 { + BN256Fq6 { + c0: BN256Fq2::zero(), + c1: BN256Fq2::one(), + c2: BN256Fq2::zero(), + } + } + + /// Returns the `0+1*w` element in `Fq12` + pub(super) fn w() -> BN256Fq12 { + BN256Fq12 { + c0: BN256Fq6::zero(), + c1: BN256Fq6::one(), + } + } + + /// Decompresses a torus element from Fq6 to a field element Fq12. + /// + /// `g -> (g + w) / (g - w)` + pub(super) fn decompress_torus(g: BN256Fq6) -> BN256Fq12 { + let mut one = BN256Fq6::one(); + let mut result = BN256Fq12 { + c0: g, + c1: one.clone(), + }; + one.negate(); + let denominator = BN256Fq12 { c0: g, c1: one }; + let denominator_inverse = denominator.inverse().unwrap(); + result.mul_assign(&denominator_inverse); + + result + } + + /// Compresses a field element from Fq12 to torus Fq6. + /// + /// `m -> (1 + m0) / m1, m = m0 + m1*w` + pub(super) fn compress_torus(m: BN256Fq12) -> BN256Fq6 { + let mut result = m.c0.clone(); + result.add_assign(&BN256Fq6::one()); + + let inverse_denominator = m.c1.inverse().unwrap(); + result.mul_assign(&inverse_denominator); + + result + } +} + +impl TorusExtension12Params for BN256Extension12Params { + fn get_two_inverse_coeffs_c0() -> BN256Fq { + BN256Fq::from_str(TWO_INVERSE_C0).unwrap() + } + + fn get_w_inverse_coeffs_c5() -> BN256Fq2 { + BN256Fq2 { + c0: BN256Fq::from_str(W_INVERSE_C5_C0).unwrap(), + c1: BN256Fq::from_str(W_INVERSE_C5_C1).unwrap(), + } + } + + /// Native computation of torus squaring on encoding in Fq6. + /// + /// `g' = 1/2 (g + \gamma / g)` + fn torus_square(g: BN256Fq6) -> BN256Fq6 { + let gamma = Self::gamma(); + + let result = if g.is_zero() { + BN256Fq6::zero() + } else { + // Decompress g + let mut decompressed = Self::decompress_torus(g); + // Now that we are in fq12, square + decompressed.square(); + // Now, compress g back onto the torus so we can use it + Self::compress_torus(decompressed) + }; + + // Constraint check + // (2g' - g) * g = \gamma + let mut lhs = result.clone(); + + lhs.double(); + lhs.sub_assign(&g); + lhs.mul_assign(&g); + + let rhs = gamma.clone(); + + if !g.is_zero() { + assert_eq!(lhs, rhs, "witness lhs == rhs"); + } else { + assert_eq!(lhs, BN256Fq6::zero(), "g is zero, witness lhs == rhs"); + } + + result + } + + /// Native computation of torus multiplication on encoding in Fq6. + /// + /// `(g, g') -> (g * g' + \gamma) / (g + g')` + fn torus_mul( + g1: >::Witness, + g2: >::Witness, + ) -> >::Witness { + let gamma = Self::gamma(); + + let mut g1_add_g2 = g1.clone(); + g1_add_g2.add_assign(&g2); + + let result = if g1_add_g2.is_zero() { + BN256Fq6::zero() + } else { + // Decompress g1 + let decompressed_g1 = Self::decompress_torus(g1); + // Decompress g2 + let decompressed_g2 = Self::decompress_torus(g2); + // Multiply + let mut decompressed_g1_times_g2 = decompressed_g1.clone(); + decompressed_g1_times_g2.mul_assign(&decompressed_g2); + // Compress the result + Self::compress_torus(decompressed_g1_times_g2) + }; + + // Since we have g12 = (g1*g2 + \gamma) / (g1+g2), we can + // constraint require: + // g12 * (g1 + g2) == g1 * g2 + \gamma + + let mut lhs = result.clone(); + lhs.mul_assign(&g1_add_g2); + + let mut g1_times_g2 = g1.clone(); + g1_times_g2.mul_assign(&g2); + let mut rhs = g1_times_g2.clone(); + rhs.add_assign(&gamma); + + if g1_add_g2.is_zero() { + assert_eq!(lhs, BN256Fq6::zero(), "g1 + g2 is zero, witness lhs == rhs"); + } else { + assert_eq!(lhs, rhs, "witness lhs == rhs"); + } + + result + } + + /// Native computation of frobenius map + /// + /// `(g,i) -> f(g,i) / (f(w,i) * w^{-1})` where `f(g,i) = g^{q^{i}}` + fn torus_frobenius_map( + g: >::Witness, + power: usize, + ) -> >::Witness { + let mut result = Self::decompress_torus(g); + result.frobenius_map(power); + let result = Self::compress_torus(result); + + // Now, we need to check the constraint. Namely, suppose + // r is our result. Then, + // w * f(g, i) = f(w, i) * r + + // lhs = f(g, i) * w + let w = Self::w(); + let mut lhs = g.clone(); + lhs.frobenius_map(power); + let mut lhs = BN256Fq12{ + c0: lhs, + c1: BN256Fq6::zero(), + }; + lhs.mul_assign(&w); + + // rhs = f(w, i) * r + let mut rhs = Self::w(); + rhs.frobenius_map(power); + let r = BN256Fq12{ + c0: result, + c1: BN256Fq6::zero(), + }; + rhs.mul_assign(&r); + + assert_eq!(lhs, rhs, "witness lhs == rhs"); + + result + } +} diff --git a/src/gadgets/tower_extension/params/mod.rs b/src/gadgets/tower_extension/params/mod.rs new file mode 100644 index 0000000..d820c6e --- /dev/null +++ b/src/gadgets/tower_extension/params/mod.rs @@ -0,0 +1,95 @@ +use pairing::ff::{Field, PrimeField}; + +use std::fmt::Debug; + +pub mod bn256; + +// We don't have generic unconstrained tower extensions element, so we resolve it using following. +// Besides, one may include here field-specific characteristics, such as non-residue for example, +// and branch out implementations with the help of it. + +pub trait Extension2Params: 'static + Clone + Copy + Send + Sync + Debug { + /// Witness here represents field element not under CS. + type Witness: Field; + + fn convert_to_structured_witness(c0: P, c1: P) -> Self::Witness; + fn convert_from_structured_witness(val: Self::Witness) -> (P, P); +} + +pub trait Extension6Params: 'static + Clone + Copy + Send + Sync + Debug { + type Ex2: Extension2Params

; + /// Witness here represents field element not under CS. + type Witness: Field; + + const FROBENIUS_COEFFS_C1: [>::Witness; 6]; + const FROBENIUS_COEFFS_C2: [>::Witness; 6]; + + fn convert_to_structured_witness( + c0: >::Witness, + c1: >::Witness, + c2: >::Witness, + ) -> Self::Witness; + fn convert_from_structured_witness( + wit: Self::Witness, + ) -> [>::Witness; 3]; +} + +pub trait Extension12Params: 'static + Clone + Copy + Send + Sync + Debug { + type Ex6: Extension6Params

; + /// Witness here represents field element not under CS. + type Witness: Field; + + const FROBENIUS_COEFFS_C1: [<>::Ex2 as Extension2Params< + P, + >>::Witness; 12]; + + fn convert_to_structured_witness( + c0: >::Witness, + c1: >::Witness, + ) -> Self::Witness; + + fn convert_from_structured_witness( + wit: Self::Witness, + ) -> ( + >::Witness, + >::Witness, + ); +} + +pub trait TorusExtension12Params: + 'static + Clone + Copy + Send + Sync + Debug + Extension12Params +where + T: PrimeField, +{ + // NOTE: Here, we use selectors instead of constants as BN256Fq2 does not allow to allocate constant without accessing a private field. + // TODO: Not sure whether w^{-1} is just c5*v^2*w in a general Fq12 extension, but this is the case for BN254. + /// Assuming `w^{-1} = c5*v^2*w`, returns the coefficient `c5`. + fn get_w_inverse_coeffs_c5( + ) -> <>::Ex2 as Extension2Params>::Witness; + + /// Returns the constant c0 = 1/2 + fn get_two_inverse_coeffs_c0() -> T; + + /// Computes the square of a Torus element using the formula + /// + /// `g' -> 1/2 * (g - gamma/g)` + fn torus_square( + g: >::Witness, + ) -> >::Witness; + + /// Computes the product of two Torus elements using the formula + /// + /// `(g, g') -> (g * g' + \gamma) / (g + g')` + fn torus_mul( + g1: >::Witness, + g2: >::Witness, + ) -> >::Witness; + + /// Computes the Frobenius map of a Torus element + /// + /// `(g,i) -> w*f(g,i) / f(w,i)` where `f(g,i) = g^{q^{i}}` + fn torus_frobenius_map( + g: >::Witness, + power: usize, + ) -> >::Witness; +} diff --git a/src/gadgets/traits/hardexp_compatible.rs b/src/gadgets/traits/hardexp_compatible.rs new file mode 100644 index 0000000..3d2ae43 --- /dev/null +++ b/src/gadgets/traits/hardexp_compatible.rs @@ -0,0 +1,38 @@ +use crate::cs::traits::cs::ConstraintSystem; + +use super::SmallField; + +/// This trait is used to define the requirements for an element to be compatible +/// with the hard exponentiation step +pub trait HardexpCompatible: Clone +where + F: SmallField, +{ + fn mul(&mut self, cs: &mut CS, other: &mut Self) -> Self + where + CS: ConstraintSystem; + + fn square(&mut self, cs: &mut CS) -> Self + where + CS: ConstraintSystem; + + fn conjugate(&mut self, cs: &mut CS) -> Self + where + CS: ConstraintSystem; + + fn inverse(&mut self, cs: &mut CS) -> Self + where + CS: ConstraintSystem; + + fn frobenius_map(&mut self, cs: &mut CS, power: usize) -> Self + where + CS: ConstraintSystem; + + fn pow_u32>(&mut self, cs: &mut CS, exponent: S) -> Self + where + CS: ConstraintSystem; + + fn normalize(&mut self, cs: &mut CS) + where + CS: ConstraintSystem; +} diff --git a/src/gadgets/traits/mod.rs b/src/gadgets/traits/mod.rs index 8e79402..d1c4ac3 100644 --- a/src/gadgets/traits/mod.rs +++ b/src/gadgets/traits/mod.rs @@ -5,6 +5,7 @@ pub mod auxiliary; pub mod castable; pub mod configuration; pub mod encodable; +pub mod hardexp_compatible; pub mod round_function; pub mod selectable; pub mod witnessable; diff --git a/src/gadgets/u1024/mod.rs b/src/gadgets/u1024/mod.rs new file mode 100644 index 0000000..a4c66df --- /dev/null +++ b/src/gadgets/u1024/mod.rs @@ -0,0 +1,395 @@ +use super::*; +use crate::cs::traits::cs::ConstraintSystem; +use crate::cs::traits::cs::DstBuffer; +use crate::field::SmallField; +use crate::gadgets::boolean::Boolean; +use crate::gadgets::traits::allocatable::CSAllocatable; +use crate::gadgets::traits::allocatable::CSAllocatableExt; +use crate::gadgets::traits::witnessable::CSWitnessable; +use crate::gadgets::traits::witnessable::WitnessHookable; +use crate::gadgets::u32::UInt32; +use crate::gadgets::u8::UInt8; +use ethereum_types::U512; +use u512::UInt512; + +use crate::config::*; + +#[derive(Derivative)] +#[derivative(Clone, Copy, Debug, Hash)] +pub struct UInt1024 { + pub inner: [UInt32; 32], +} + +pub fn decompose_u1024_as_u32x32(value: (U512, U512)) -> [u32; 32] { + let mut result = [0u32; 32]; + // Filling the low limb + for i in 0..8 { + result[i * 2] = value.0 .0[i] as u32; + result[i * 2 + 1] = (value.0 .0[i] >> 32) as u32; + } + // Filling the high limb + for i in 0..8 { + result[i * 2 + 16] = value.1 .0[i] as u32; + result[i * 2 + 1 + 16] = (value.1 .0[i] >> 32) as u32; + } + + result +} + +pub fn recompose_u1024_as_u32x32(value: [u32; 32]) -> (U512, U512) { + // Filling the low limb + let mut low = U512::zero(); + for i in 0..8 { + low.0[i] = (value[i * 2] as u64) | ((value[i * 2 + 1] as u64) << 32); + } + + // Filling the high limb + let mut high = U512::zero(); + for i in 0..8 { + high.0[i] = (value[i * 2 + 16] as u64) | ((value[i * 2 + 1 + 16] as u64) << 32); + } + + (low, high) +} + +impl CSAllocatable for UInt1024 { + type Witness = (U512, U512); + fn placeholder_witness() -> Self::Witness { + (U512::zero(), U512::zero()) + } + + #[inline(always)] + #[must_use] + fn allocate_without_value>(cs: &mut CS) -> Self { + let vars = cs.alloc_multiple_variables_without_values::<32>(); + + let as_u32 = vars.map(|el| UInt32::from_variable_checked(cs, el)); + + Self { inner: as_u32 } + } + + #[must_use] + fn allocate>(cs: &mut CS, witness: Self::Witness) -> Self { + let chunks = decompose_u1024_as_u32x32(witness); + let chunks = chunks.map(|el| UInt32::allocate_checked(cs, el)); + Self { inner: chunks } + } +} + +impl CSAllocatableExt for UInt1024 { + const INTERNAL_STRUCT_LEN: usize = 32; + + fn witness_from_set_of_values(values: [F; Self::INTERNAL_STRUCT_LEN]) -> Self::Witness { + // value + recompose_u1024_as_u32x32( + values.map(|el| >::cast_from_source(el)), + ) + } + + // we should be able to allocate without knowing values yet + fn create_without_value>(cs: &mut CS) -> Self { + Self::allocate_without_value(cs) + } + + fn flatten_as_variables(&self) -> [Variable; Self::INTERNAL_STRUCT_LEN] + where + [(); Self::INTERNAL_STRUCT_LEN]:, + { + self.inner.map(|el| el.get_variable()) + } + + fn set_internal_variables_values(witness: Self::Witness, dst: &mut DstBuffer<'_, '_, F>) { + decompose_u1024_as_u32x32(witness).map(|el| UInt32::set_internal_variables_values(el, dst)); + } +} + +use crate::gadgets::traits::selectable::Selectable; + +impl Selectable for UInt1024 { + #[must_use] + fn conditionally_select>( + cs: &mut CS, + flag: Boolean, + a: &Self, + b: &Self, + ) -> Self { + let inner = Selectable::conditionally_select(cs, flag, &a.inner, &b.inner); + + Self { inner } + } +} + +impl UInt1024 { + #[must_use] + pub fn allocated_constant>( + cs: &mut CS, + constant: (U512, U512), + ) -> Self { + debug_assert!(F::CAPACITY_BITS >= 32); + + let chunks = decompose_u1024_as_u32x32(constant); + let chunks = chunks.map(|el| UInt32::allocated_constant(cs, el)); + Self { inner: chunks } + } + + #[must_use] + pub fn allocate_from_closure_and_dependencies< + CS: ConstraintSystem, + FN: FnOnce(&[F]) -> (U512, U512) + 'static + Send + Sync, + >( + cs: &mut CS, + witness_closure: FN, + dependencies: &[Place], + ) -> Self { + let outputs = cs.alloc_multiple_variables_without_values::<32>(); + + if ::WitnessConfig::EVALUATE_WITNESS { + let value_fn = move |inputs: &[F], output_buffer: &mut DstBuffer<'_, '_, F>| { + debug_assert!(F::CAPACITY_BITS >= 32); + let witness = (witness_closure)(inputs); + let chunks = decompose_u1024_as_u32x32(witness); + + output_buffer.extend(chunks.map(|el| F::from_u64_unchecked(el as u64))); + }; + + cs.set_values_with_dependencies_vararg( + dependencies, + &Place::from_variables(outputs), + value_fn, + ); + } + + let chunks = outputs.map(|el| UInt32::from_variable_checked(cs, el)); + Self { inner: chunks } + } + + #[must_use] + pub fn zero>(cs: &mut CS) -> Self { + Self::allocated_constant(cs, (U512::zero(), U512::zero())) + } + + #[must_use] + pub fn overflowing_add>( + &self, + cs: &mut CS, + other: &Self, + ) -> (Self, Boolean) { + let mut carry_out = Boolean::allocated_constant(cs, false); + let mut result = *self; // any uninit would be fine too + for ((a, b), dst) in self + .inner + .iter() + .zip(other.inner.iter()) + .zip(result.inner.iter_mut()) + { + let (c, carry) = (*a).overflowing_add_with_carry_in(cs, *b, carry_out); + *dst = c; + carry_out = carry; + } + + (result, carry_out) + } + + #[must_use] + pub fn overflowing_sub>( + &self, + cs: &mut CS, + other: &Self, + ) -> (Self, Boolean) { + let mut borrow_out = Boolean::allocated_constant(cs, false); + let mut result = *self; // any uninit would be fine too + for ((a, b), dst) in self + .inner + .iter() + .zip(other.inner.iter()) + .zip(result.inner.iter_mut()) + { + let (c, borrow) = (*a).overflowing_sub_with_borrow_in(cs, *b, borrow_out); + *dst = c; + borrow_out = borrow; + } + + (result, borrow_out) + } + + /// Multiplies a number by 2^{32}. Panics if the number overflows. + #[must_use] + pub fn must_mul_by_two_pow_32>(&self, cs: &mut CS) -> Self { + let boolean_true = Boolean::allocated_constant(cs, true); + let last_limb_zero = self.inner[31].is_zero(cs); + Boolean::enforce_equal(cs, &last_limb_zero, &boolean_true); + + let mut new_inner = self.inner; + new_inner.copy_within(0..31, 1); + new_inner[0] = UInt32::zero(cs); + + Self { inner: new_inner } + } + + // Returns the value unchanges if `bit` is `true`, and 0 otherwise + #[must_use] + pub fn mask>(&self, cs: &mut CS, masking_bit: Boolean) -> Self { + let new_inner = self.inner.map(|el| el.mask(cs, masking_bit)); + Self { inner: new_inner } + } + + // Returns the value unchanges if `bit` is `false`, and 0 otherwise + #[must_use] + pub fn mask_negated>( + &self, + cs: &mut CS, + masking_bit: Boolean, + ) -> Self { + let new_inner = self.inner.map(|el| el.mask_negated(cs, masking_bit)); + Self { inner: new_inner } + } + + #[must_use] + pub fn equals>(cs: &mut CS, a: &Self, b: &Self) -> Boolean { + let equals: [_; 32] = + std::array::from_fn(|idx| UInt32::equals(cs, &a.inner[idx], &b.inner[idx])); + + Boolean::multi_and(cs, &equals) + } + + #[must_use] + pub fn from_le_bytes>(cs: &mut CS, bytes: [UInt8; 128]) -> Self { + let mut inner = [std::mem::MaybeUninit::uninit(); 32]; + for (dst, src) in inner.iter_mut().zip(bytes.array_chunks::<4>()) { + dst.write(UInt32::from_le_bytes(cs, *src)); + } + + let inner = unsafe { inner.map(|el| el.assume_init()) }; + + Self { inner } + } + + #[must_use] + pub fn from_limbs(limbs: [UInt32; 32]) -> Self { + Self { inner: limbs } + } + + #[must_use] + pub fn from_be_bytes>(cs: &mut CS, bytes: [UInt8; 128]) -> Self { + let mut inner = [std::mem::MaybeUninit::uninit(); 32]; + for (dst, src) in inner.iter_mut().rev().zip(bytes.array_chunks::<4>()) { + dst.write(UInt32::from_be_bytes(cs, *src)); + } + + let inner = unsafe { inner.map(|el| el.assume_init()) }; + + Self { inner } + } + + #[must_use] + pub fn is_zero>(&self, cs: &mut CS) -> Boolean { + let limbs_are_zero = self.inner.map(|el| el.is_zero(cs)); + Boolean::multi_and(cs, &limbs_are_zero) + } + + #[must_use] + pub fn to_le_bytes>(self, cs: &mut CS) -> [UInt8; 128] { + let mut encoding = [std::mem::MaybeUninit::uninit(); 128]; + for (dst, src) in encoding + .iter_mut() + .zip(self.inner.iter().flat_map(|el| el.to_le_bytes(cs))) + { + dst.write(src); + } + + unsafe { encoding.map(|el| el.assume_init()) } + } + + #[must_use] + pub fn to_be_bytes>(self, cs: &mut CS) -> [UInt8; 128] { + let mut bytes = self.to_le_bytes(cs); + bytes.reverse(); + + bytes + } + + #[must_use] + pub fn to_low(self) -> UInt512 { + UInt512 { + inner: self.inner[..16].try_into().expect("incorrect slice size"), + } + } + + #[must_use] + pub fn to_high(self) -> UInt512 { + UInt512 { + inner: self.inner[16..].try_into().expect("incorrect slice size"), + } + } +} + +use crate::cs::Variable; +use crate::gadgets::traits::castable::Convertor; +use crate::gadgets::traits::castable::WitnessCastable; + +impl WitnessCastable for (U512, U512) { + #[inline] + fn cast_from_source(witness: [F; 32]) -> Self { + let reduced = witness.map(|el| { + let el = el.as_u64_reduced(); + debug_assert!(el <= u32::MAX as u64); + + el as u32 + }); + + recompose_u1024_as_u32x32(reduced) + } + + #[inline] + fn cast_into_source(self) -> [F; 32] { + let limbs = decompose_u1024_as_u32x32(self); + limbs.map(|el| WitnessCastable::cast_into_source(el)) + } +} + +impl CSWitnessable for UInt1024 { + type ConversionFunction = Convertor; + + fn witness_from_set_of_values(values: [F; 32]) -> Self::Witness { + WitnessCastable::cast_from_source(values) + } + + fn as_variables_set(&self) -> [Variable; 32] { + self.inner.map(|el| el.get_variable()) + } +} + +impl WitnessHookable for UInt1024 { + fn witness_hook>( + &self, + cs: &CS, + ) -> Box Option> { + let raw_witness = self.get_witness(cs); + Box::new(move || raw_witness.wait()) + } +} + +use crate::gadgets::traits::selectable::MultiSelectable; +// multiselect doesn't make much sense here because we can do parallel over chunks, +// so we degrade to default impl via normal select +impl MultiSelectable for UInt1024 {} + +use crate::gadgets::traits::encodable::CircuitVarLengthEncodable; + +impl CircuitVarLengthEncodable for UInt1024 { + #[inline(always)] + fn encoding_length(&self) -> usize { + 32 + } + fn encode_to_buffer>(&self, cs: &mut CS, dst: &mut Vec) { + CircuitVarLengthEncodable::::encode_to_buffer(&self.inner, cs, dst); + } +} + +use crate::gadgets::traits::allocatable::CSPlaceholder; + +impl CSPlaceholder for UInt1024 { + fn placeholder>(cs: &mut CS) -> Self { + Self::zero(cs) + } +} diff --git a/src/gadgets/u2048/mod.rs b/src/gadgets/u2048/mod.rs new file mode 100644 index 0000000..e47fe5a --- /dev/null +++ b/src/gadgets/u2048/mod.rs @@ -0,0 +1,485 @@ +use super::*; +use crate::cs::traits::cs::ConstraintSystem; +use crate::cs::traits::cs::DstBuffer; +use crate::field::SmallField; +use crate::gadgets::boolean::Boolean; +use crate::gadgets::traits::allocatable::CSAllocatable; +use crate::gadgets::traits::allocatable::CSAllocatableExt; +use crate::gadgets::traits::witnessable::CSWitnessable; +use crate::gadgets::traits::witnessable::WitnessHookable; +use crate::gadgets::u32::UInt32; +use crate::gadgets::u8::UInt8; +use blake2s::mixing_function::merge_byte_using_table; +use crypto_bigint::U1024; +use tables::ByteSplitTable; +use u1024::UInt1024; +use u4096::UInt4096; + +use crate::config::*; + +#[derive(Derivative)] +#[derivative(Clone, Copy, Debug, Hash)] +pub struct UInt2048 { + pub inner: [UInt32; 64], +} + +pub fn decompose_u2048_as_u32x64(value: (U1024, U1024)) -> [u32; 64] { + let low_limbs = value.0.as_limbs(); + let high_limbs = value.1.as_limbs(); + + let mut result = [0u32; 64]; + // Filling the low limb + for i in 0..16 { + result[i * 2] = low_limbs[i].0 as u32; + result[i * 2 + 1] = (low_limbs[i].0 >> 32) as u32; + } + // Filling the high limb + for i in 0..16 { + result[i * 2 + 32] = high_limbs[i].0 as u32; + result[i * 2 + 1 + 32] = (high_limbs[i].0 >> 32) as u32; + } + + result +} + +pub fn recompose_u2048_as_u32x64(value: [u32; 64]) -> (U1024, U1024) { + // Filling the low limb + let mut low = [0u64; 16]; + for i in 0..16 { + low[i] = (value[i * 2] as u64) | ((value[i * 2 + 1] as u64) << 32); + } + + // Filling the high limb + let mut high = [0u64; 16]; + for i in 0..16 { + high[i] = (value[i * 2 + 32] as u64) | ((value[i * 2 + 1 + 32] as u64) << 32); + } + + (U1024::from_words(low), U1024::from_words(high)) +} + +impl CSAllocatable for UInt2048 { + type Witness = (U1024, U1024); + fn placeholder_witness() -> Self::Witness { + (U1024::ZERO, U1024::ZERO) + } + + #[inline(always)] + #[must_use] + fn allocate_without_value>(cs: &mut CS) -> Self { + let vars = cs.alloc_multiple_variables_without_values::<64>(); + + let as_u32 = vars.map(|el| UInt32::from_variable_checked(cs, el)); + + Self { inner: as_u32 } + } + + #[must_use] + fn allocate>(cs: &mut CS, witness: Self::Witness) -> Self { + let chunks = decompose_u2048_as_u32x64(witness); + let chunks = chunks.map(|el| UInt32::allocate_checked(cs, el)); + Self { inner: chunks } + } +} + +impl CSAllocatableExt for UInt2048 { + const INTERNAL_STRUCT_LEN: usize = 64; + + fn witness_from_set_of_values(values: [F; Self::INTERNAL_STRUCT_LEN]) -> Self::Witness { + // value + recompose_u2048_as_u32x64( + values.map(|el| >::cast_from_source(el)), + ) + } + + // we should be able to allocate without knowing values yet + fn create_without_value>(cs: &mut CS) -> Self { + Self::allocate_without_value(cs) + } + + fn flatten_as_variables(&self) -> [Variable; Self::INTERNAL_STRUCT_LEN] + where + [(); Self::INTERNAL_STRUCT_LEN]:, + { + self.inner.map(|el| el.get_variable()) + } + + fn set_internal_variables_values(witness: Self::Witness, dst: &mut DstBuffer<'_, '_, F>) { + decompose_u2048_as_u32x64(witness).map(|el| UInt32::set_internal_variables_values(el, dst)); + } +} + +use crate::gadgets::traits::selectable::Selectable; + +impl Selectable for UInt2048 { + #[must_use] + fn conditionally_select>( + cs: &mut CS, + flag: Boolean, + a: &Self, + b: &Self, + ) -> Self { + let inner = Selectable::conditionally_select(cs, flag, &a.inner, &b.inner); + + Self { inner } + } +} + +impl UInt2048 { + #[must_use] + pub fn allocated_constant>( + cs: &mut CS, + constant: (U1024, U1024), + ) -> Self { + debug_assert!(F::CAPACITY_BITS >= 32); + + let chunks = decompose_u2048_as_u32x64(constant); + let chunks = chunks.map(|el| UInt32::allocated_constant(cs, el)); + Self { inner: chunks } + } + + #[must_use] + pub fn allocate_from_closure_and_dependencies< + CS: ConstraintSystem, + FN: FnOnce(&[F]) -> (U1024, U1024) + 'static + Send + Sync, + >( + cs: &mut CS, + witness_closure: FN, + dependencies: &[Place], + ) -> Self { + let outputs = cs.alloc_multiple_variables_without_values::<64>(); + + if ::WitnessConfig::EVALUATE_WITNESS { + let value_fn = move |inputs: &[F], output_buffer: &mut DstBuffer<'_, '_, F>| { + debug_assert!(F::CAPACITY_BITS >= 64); + let witness = (witness_closure)(inputs); + let chunks = decompose_u2048_as_u32x64(witness); + + output_buffer.extend(chunks.map(|el| F::from_u64_unchecked(el as u64))); + }; + + cs.set_values_with_dependencies_vararg( + dependencies, + &Place::from_variables(outputs), + value_fn, + ); + } + + let chunks = outputs.map(|el| UInt32::from_variable_checked(cs, el)); + Self { inner: chunks } + } + + #[must_use] + pub fn zero>(cs: &mut CS) -> Self { + Self::allocated_constant(cs, (U1024::ZERO, U1024::ZERO)) + } + + #[must_use] + pub fn overflowing_add>( + &self, + cs: &mut CS, + other: &Self, + ) -> (Self, Boolean) { + let mut carry_out = Boolean::allocated_constant(cs, false); + let mut result = *self; // any uninit would be fine too + for ((a, b), dst) in self + .inner + .iter() + .zip(other.inner.iter()) + .zip(result.inner.iter_mut()) + { + let (c, carry) = (*a).overflowing_add_with_carry_in(cs, *b, carry_out); + *dst = c; + carry_out = carry; + } + + (result, carry_out) + } + + #[must_use] + pub fn overflowing_sub>( + &self, + cs: &mut CS, + other: &Self, + ) -> (Self, Boolean) { + let mut borrow_out = Boolean::allocated_constant(cs, false); + let mut result = *self; // any uninit would be fine too + for ((a, b), dst) in self + .inner + .iter() + .zip(other.inner.iter()) + .zip(result.inner.iter_mut()) + { + let (c, borrow) = (*a).overflowing_sub_with_borrow_in(cs, *b, borrow_out); + *dst = c; + borrow_out = borrow; + } + + (result, borrow_out) + } + + #[must_use] + pub fn widening_mul>( + &self, + cs: &mut CS, + other: &Self, + self_limbs: usize, + other_limbs: usize, + ) -> UInt4096 { + assert!( + self_limbs + other_limbs <= 128, + "total number of limbs must be <= 128" + ); + + let zero = UInt32::allocated_constant(cs, 0); + let mut remainders = vec![UInt32::::zero(cs); self_limbs + other_limbs]; + + for i in 0..self_limbs { + let mut carry = UInt32::allocated_constant(cs, 0); + for j in 0..other_limbs { + let res = UInt32::fma_with_carry( + cs, + self.inner[i], + other.inner[j], + if i == 0 { zero } else { remainders[i + j] }, + carry, + ); + (remainders[i + j], carry) = (res[0].0, res[1].0); + } + remainders[i + other_limbs] = carry; + } + + let mut inner = [UInt32::::zero(cs); 128]; + inner[..self_limbs + other_limbs].copy_from_slice(&remainders); + UInt4096 { inner } + } + + /// Multiplies a number by 2^{32}. Panics if the number overflows. + #[must_use] + pub fn must_mul_by_two_pow_32>(&self, cs: &mut CS) -> Self { + let boolean_true = Boolean::allocated_constant(cs, true); + let last_limb_zero = self.inner[63].is_zero(cs); + Boolean::enforce_equal(cs, &last_limb_zero, &boolean_true); + + let mut new_inner = self.inner; + new_inner.copy_within(0..65, 1); + new_inner[0] = UInt32::zero(cs); + + Self { inner: new_inner } + } + + #[must_use] + pub fn div2>(&self, cs: &mut CS) -> Self { + let byte_split_id = cs + .get_table_id_for_marker::>() + .expect("table should exist"); + let mut bytes = self.to_le_bytes(cs); + let mut bit: Option = None; + bytes.iter_mut().rev().for_each(|b| { + let res = cs.perform_lookup::<1, 2>(byte_split_id, &[b.get_variable()]); + let mut shifted = res[1]; + let new_bit = res[0]; + if let Some(top_bit) = bit { + shifted = merge_byte_using_table::<_, _, 7>(cs, shifted, top_bit); + } + *b = UInt8 { + variable: shifted, + _marker: std::marker::PhantomData, + }; + bit = Some(new_bit); + }); + Self::from_le_bytes(cs, bytes) + } + + /// Finds the result of multiplying `self` by `other` mod `modulo`. + pub fn modmul>( + &self, + cs: &mut CS, + other: &UInt2048, + modulo: &UInt2048, + ) -> UInt2048 { + // We take 8 limbs since scalar can be of any size + let product = self.widening_mul(cs, other, 64, 64); + let (_, remainder) = product.long_division(cs, modulo); + remainder + } + + #[must_use] + pub fn is_odd>(&self, cs: &mut CS) -> Boolean { + self.inner[0].into_num().spread_into_bits::(cs)[0] + } + + // Returns the value unchanges if `bit` is `true`, and 0 otherwise + #[must_use] + pub fn mask>(&self, cs: &mut CS, masking_bit: Boolean) -> Self { + let new_inner = self.inner.map(|el| el.mask(cs, masking_bit)); + Self { inner: new_inner } + } + + // Returns the value unchanges if `bit` is `false`, and 0 otherwise + #[must_use] + pub fn mask_negated>( + &self, + cs: &mut CS, + masking_bit: Boolean, + ) -> Self { + let new_inner = self.inner.map(|el| el.mask_negated(cs, masking_bit)); + Self { inner: new_inner } + } + + #[must_use] + pub fn to_u4096>(&self, cs: &mut CS) -> UInt4096 { + let mut u4096: UInt4096 = UInt4096::zero(cs); + u4096.inner[..64].copy_from_slice(&self.inner); + u4096 + } + + #[must_use] + pub fn equals>(cs: &mut CS, a: &Self, b: &Self) -> Boolean { + let equals: [_; 64] = + std::array::from_fn(|idx| UInt32::equals(cs, &a.inner[idx], &b.inner[idx])); + + Boolean::multi_and(cs, &equals) + } + + #[must_use] + pub fn from_le_bytes>(cs: &mut CS, bytes: [UInt8; 256]) -> Self { + let mut inner = [std::mem::MaybeUninit::uninit(); 64]; + for (dst, src) in inner.iter_mut().zip(bytes.array_chunks::<4>()) { + dst.write(UInt32::from_le_bytes(cs, *src)); + } + + let inner = unsafe { inner.map(|el| el.assume_init()) }; + + Self { inner } + } + + #[must_use] + pub fn from_limbs(limbs: [UInt32; 64]) -> Self { + Self { inner: limbs } + } + + #[must_use] + pub fn from_be_bytes>(cs: &mut CS, bytes: [UInt8; 256]) -> Self { + let mut inner = [std::mem::MaybeUninit::uninit(); 64]; + for (dst, src) in inner.iter_mut().rev().zip(bytes.array_chunks::<4>()) { + dst.write(UInt32::from_be_bytes(cs, *src)); + } + + let inner = unsafe { inner.map(|el| el.assume_init()) }; + + Self { inner } + } + + #[must_use] + pub fn is_zero>(&self, cs: &mut CS) -> Boolean { + let limbs_are_zero = self.inner.map(|el| el.is_zero(cs)); + Boolean::multi_and(cs, &limbs_are_zero) + } + + #[must_use] + pub fn to_le_bytes>(self, cs: &mut CS) -> [UInt8; 256] { + let mut encoding = [std::mem::MaybeUninit::uninit(); 256]; + for (dst, src) in encoding + .iter_mut() + .zip(self.inner.iter().flat_map(|el| el.to_le_bytes(cs))) + { + dst.write(src); + } + + unsafe { encoding.map(|el| el.assume_init()) } + } + + #[must_use] + pub fn to_be_bytes>(self, cs: &mut CS) -> [UInt8; 256] { + let mut bytes = self.to_le_bytes(cs); + bytes.reverse(); + + bytes + } + + #[must_use] + pub fn to_low(self) -> UInt1024 { + UInt1024 { + inner: self.inner[..32].try_into().expect("incorrect slice size"), + } + } + + #[must_use] + pub fn to_high(self) -> UInt1024 { + UInt1024 { + inner: self.inner[32..].try_into().expect("incorrect slice size"), + } + } +} + +use crate::cs::Variable; +use crate::gadgets::traits::castable::Convertor; +use crate::gadgets::traits::castable::WitnessCastable; + +impl WitnessCastable for (U1024, U1024) { + #[inline] + fn cast_from_source(witness: [F; 64]) -> Self { + let reduced = witness.map(|el| { + let el = el.as_u64_reduced(); + debug_assert!(el <= u32::MAX as u64); + + el as u32 + }); + + recompose_u2048_as_u32x64(reduced) + } + + #[inline] + fn cast_into_source(self) -> [F; 64] { + let limbs = decompose_u2048_as_u32x64(self); + limbs.map(|el| WitnessCastable::cast_into_source(el)) + } +} + +impl CSWitnessable for UInt2048 { + type ConversionFunction = Convertor; + + fn witness_from_set_of_values(values: [F; 64]) -> Self::Witness { + WitnessCastable::cast_from_source(values) + } + + fn as_variables_set(&self) -> [Variable; 64] { + self.inner.map(|el| el.get_variable()) + } +} + +impl WitnessHookable for UInt2048 { + fn witness_hook>( + &self, + cs: &CS, + ) -> Box Option> { + let raw_witness = self.get_witness(cs); + Box::new(move || raw_witness.wait()) + } +} + +use crate::gadgets::traits::selectable::MultiSelectable; +// multiselect doesn't make much sense here because we can do parallel over chunks, +// so we degrade to default impl via normal select +impl MultiSelectable for UInt2048 {} + +use crate::gadgets::traits::encodable::CircuitVarLengthEncodable; + +impl CircuitVarLengthEncodable for UInt2048 { + #[inline(always)] + fn encoding_length(&self) -> usize { + 64 + } + fn encode_to_buffer>(&self, cs: &mut CS, dst: &mut Vec) { + CircuitVarLengthEncodable::::encode_to_buffer(&self.inner, cs, dst); + } +} + +use crate::gadgets::traits::allocatable::CSPlaceholder; + +impl CSPlaceholder for UInt2048 { + fn placeholder>(cs: &mut CS) -> Self { + Self::zero(cs) + } +} diff --git a/src/gadgets/u256/mod.rs b/src/gadgets/u256/mod.rs index 5047f97..819f802 100644 --- a/src/gadgets/u256/mod.rs +++ b/src/gadgets/u256/mod.rs @@ -329,6 +329,13 @@ impl UInt256 { bytes } + #[must_use] + pub fn to_u512>(&self, cs: &mut CS) -> UInt512 { + let mut u512 = UInt512::zero(cs); + u512.inner[..8].copy_from_slice(&self.inner); + u512 + } + #[must_use] pub fn div2>(&self, cs: &mut CS) -> Self { let byte_split_id = cs @@ -351,6 +358,19 @@ impl UInt256 { }); Self::from_le_bytes(cs, bytes) } + + /// Finds the result of multiplying `self` by `other` mod `modulo`. + pub fn modmul>( + &self, + cs: &mut CS, + other: &UInt256, + modulo: &UInt256, + ) -> UInt256 { + // We take 8 limbs since scalar can be of any size + let product = self.widening_mul(cs, other, 8, 8); + let (_, remainder) = product.long_division(cs, modulo); + remainder + } } use crate::cs::Variable; diff --git a/src/gadgets/u4096/mod.rs b/src/gadgets/u4096/mod.rs new file mode 100644 index 0000000..310ee4b --- /dev/null +++ b/src/gadgets/u4096/mod.rs @@ -0,0 +1,542 @@ +use super::*; +use crate::cs::traits::cs::ConstraintSystem; +use crate::cs::traits::cs::DstBuffer; +use crate::field::SmallField; +use crate::gadgets::boolean::Boolean; +use crate::gadgets::traits::allocatable::CSAllocatable; +use crate::gadgets::traits::allocatable::CSAllocatableExt; +use crate::gadgets::traits::witnessable::CSWitnessable; +use crate::gadgets::traits::witnessable::WitnessHookable; +use crate::gadgets::u32::UInt32; +use crate::gadgets::u8::UInt8; +use crypto_bigint::U1024; +use crypto_bigint::U2048; +use u2048::UInt2048; + +use crate::config::*; + +#[derive(Derivative)] +#[derivative(Clone, Copy, Debug, Hash)] +pub struct UInt4096 { + pub inner: [UInt32; 128], +} + +pub fn decompose_u4096_as_u32x128(value: (U2048, U2048)) -> [u32; 128] { + let low_limbs = value.0.as_limbs(); + let high_limbs = value.1.as_limbs(); + + let mut result = [0u32; 128]; + // Filling the low limb + for i in 0..32 { + result[i * 2] = low_limbs[i].0 as u32; + result[i * 2 + 1] = (low_limbs[i].0 >> 32) as u32; + } + // Filling the high limb + for i in 0..32 { + result[i * 2 + 64] = high_limbs[i].0 as u32; + result[i * 2 + 1 + 64] = (high_limbs[i].0 >> 32) as u32; + } + + result +} + +pub fn recompose_u4096_as_u32x128(value: [u32; 128]) -> (U2048, U2048) { + // Filling the low limb + let mut low = [0u64; 32]; + for i in 0..32 { + low[i] = (value[i * 2] as u64) | ((value[i * 2 + 1] as u64) << 32); + } + + // Filling the high limb + let mut high = [0u64; 32]; + for i in 0..32 { + high[i] = (value[i * 2 + 32] as u64) | ((value[i * 2 + 1 + 32] as u64) << 32); + } + + (U2048::from_words(low), U2048::from_words(high)) +} + +pub fn convert_limb_to_u4096(cs: &mut CS, limb: &UInt32) -> UInt4096 +where + F: SmallField, + CS: ConstraintSystem, +{ + let mut u4096 = UInt4096::zero(cs); + u4096.inner[0] = *limb; + u4096 +} + +impl CSAllocatable for UInt4096 { + type Witness = (U2048, U2048); + fn placeholder_witness() -> Self::Witness { + (U2048::ZERO, U2048::ZERO) + } + + #[inline(always)] + #[must_use] + fn allocate_without_value>(cs: &mut CS) -> Self { + let vars = cs.alloc_multiple_variables_without_values::<128>(); + + let as_u32 = vars.map(|el| UInt32::from_variable_checked(cs, el)); + + Self { inner: as_u32 } + } + + #[must_use] + fn allocate>(cs: &mut CS, witness: Self::Witness) -> Self { + let chunks = decompose_u4096_as_u32x128(witness); + let chunks = chunks.map(|el| UInt32::allocate_checked(cs, el)); + Self { inner: chunks } + } +} + +impl CSAllocatableExt for UInt4096 { + const INTERNAL_STRUCT_LEN: usize = 128; + + fn witness_from_set_of_values(values: [F; Self::INTERNAL_STRUCT_LEN]) -> Self::Witness { + // value + recompose_u4096_as_u32x128( + values.map(|el| >::cast_from_source(el)), + ) + } + + // we should be able to allocate without knowing values yet + fn create_without_value>(cs: &mut CS) -> Self { + Self::allocate_without_value(cs) + } + + fn flatten_as_variables(&self) -> [Variable; Self::INTERNAL_STRUCT_LEN] + where + [(); Self::INTERNAL_STRUCT_LEN]:, + { + self.inner.map(|el| el.get_variable()) + } + + fn set_internal_variables_values(witness: Self::Witness, dst: &mut DstBuffer<'_, '_, F>) { + decompose_u4096_as_u32x128(witness) + .map(|el| UInt32::set_internal_variables_values(el, dst)); + } +} + +use crate::gadgets::traits::selectable::Selectable; + +impl Selectable for UInt4096 { + #[must_use] + fn conditionally_select>( + cs: &mut CS, + flag: Boolean, + a: &Self, + b: &Self, + ) -> Self { + let inner = Selectable::conditionally_select(cs, flag, &a.inner, &b.inner); + + Self { inner } + } +} + +impl UInt4096 { + #[must_use] + pub fn allocated_constant>( + cs: &mut CS, + constant: (U2048, U2048), + ) -> Self { + debug_assert!(F::CAPACITY_BITS >= 32); + + let chunks = decompose_u4096_as_u32x128(constant); + let chunks = chunks.map(|el| UInt32::allocated_constant(cs, el)); + Self { inner: chunks } + } + + #[must_use] + pub fn allocate_from_closure_and_dependencies< + CS: ConstraintSystem, + FN: FnOnce(&[F]) -> (U2048, U2048) + 'static + Send + Sync, + >( + cs: &mut CS, + witness_closure: FN, + dependencies: &[Place], + ) -> Self { + let outputs = cs.alloc_multiple_variables_without_values::<128>(); + + if ::WitnessConfig::EVALUATE_WITNESS { + let value_fn = move |inputs: &[F], output_buffer: &mut DstBuffer<'_, '_, F>| { + debug_assert!(F::CAPACITY_BITS >= 64); + let witness = (witness_closure)(inputs); + let chunks = decompose_u4096_as_u32x128(witness); + + output_buffer.extend(chunks.map(|el| F::from_u64_unchecked(el as u64))); + }; + + cs.set_values_with_dependencies_vararg( + dependencies, + &Place::from_variables(outputs), + value_fn, + ); + } + + let chunks = outputs.map(|el| UInt32::from_variable_checked(cs, el)); + Self { inner: chunks } + } + + #[must_use] + pub fn zero>(cs: &mut CS) -> Self { + Self::allocated_constant(cs, (U2048::ZERO, U2048::ZERO)) + } + + /// Returns `true` if `self >= other`, and `false` otherwise. + /// Here, `self` and `other` are represented as [`UInt4096`] and [`UInt2048`] respectively. + #[must_use] + pub fn geq_than_u2048(&self, cs: &mut CS, other: &UInt2048) -> Boolean + where + CS: ConstraintSystem, + { + let high = self.to_high(); + let under_2048 = high.is_zero(cs); + let over_2048 = under_2048.negated(cs); + let low = self.to_low(); + let (sub, overflow) = other.overflowing_sub(cs, &low); + let a_equal_b = sub.is_zero(cs); + Boolean::multi_or(cs, &[overflow, a_equal_b, over_2048]) + } + + #[must_use] + pub fn overflowing_add>( + &self, + cs: &mut CS, + other: &Self, + ) -> (Self, Boolean) { + let mut carry_out = Boolean::allocated_constant(cs, false); + let mut result = *self; // any uninit would be fine too + for ((a, b), dst) in self + .inner + .iter() + .zip(other.inner.iter()) + .zip(result.inner.iter_mut()) + { + let (c, carry) = (*a).overflowing_add_with_carry_in(cs, *b, carry_out); + *dst = c; + carry_out = carry; + } + + (result, carry_out) + } + + #[must_use] + pub fn overflowing_sub>( + &self, + cs: &mut CS, + other: &Self, + ) -> (Self, Boolean) { + let mut borrow_out = Boolean::allocated_constant(cs, false); + let mut result = *self; // any uninit would be fine too + for ((a, b), dst) in self + .inner + .iter() + .zip(other.inner.iter()) + .zip(result.inner.iter_mut()) + { + let (c, borrow) = (*a).overflowing_sub_with_borrow_in(cs, *b, borrow_out); + *dst = c; + borrow_out = borrow; + } + + (result, borrow_out) + } + + /// Multiplies a number by 2^{32}. Panics if the number overflows. + #[must_use] + pub fn must_mul_by_two_pow_32>(&self, cs: &mut CS) -> Self { + let boolean_true = Boolean::allocated_constant(cs, true); + let last_limb_zero = self.inner[127].is_zero(cs); + Boolean::enforce_equal(cs, &last_limb_zero, &boolean_true); + + let mut new_inner = self.inner; + new_inner.copy_within(0..127, 1); + new_inner[0] = UInt32::zero(cs); + + Self { inner: new_inner } + } + + /// Find quotient and remainder of division of `self` by `other` using the naive long division algorithm in base `2^{32}` + /// since both [`UInt4096`] and [`UInt2048`] are represented as arrays of [`UInt32`]. The implementation is based on + /// algorithm https://en.wikipedia.org/wiki/Long_division#Algorithm_for_arbitrary_base, + /// where `k=128`, `l=64`, and base `b=2^{32}`. + #[must_use] + pub fn long_division(&self, cs: &mut CS, other: &UInt2048) -> (UInt4096, UInt2048) + where + CS: ConstraintSystem, + { + const U2048_MAX_LIMBS: usize = 64; + const U4096_MAX_LIMBS: usize = 128; + const MAX_BINARY_SEARCH_ITERATIONS: usize = 1025; + + // Initializing constants + let base = U1024::from_le_hex("0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000100000000"); + let base = UInt2048::allocated_constant(cs, (base, U1024::ZERO)); + let boolean_false = Boolean::allocated_constant(cs, false); + let one = UInt2048::allocated_constant(cs, (U1024::ONE, U1024::ZERO)); + + // q <- 0 + let mut q = UInt4096::zero(cs); + + // r <- first 63 limbs of self, thus it fits in UInt2048 + let mut r = self.to_high(); + r.inner[0] = UInt32::zero(cs); + r.inner.copy_within(1..U2048_MAX_LIMBS, 0); + r.inner[U2048_MAX_LIMBS - 1] = UInt32::zero(cs); + + for i in 0..U2048_MAX_LIMBS + 1 { + // \alpha_{i+l-1} is (k-l-i)th limb of n + let alpha = self.inner[U2048_MAX_LIMBS - i]; + let alpha = convert_limb_to_u4096(cs, &alpha); + + // d_i <- b*r_{i-1} + \alpha_{i+l-1} + // d_i can safely be UInt512 in size. + // r can have any number of limbs up to 8. + // base is 2 limbs wide since b=(2^{32}-1)+1 + // TODO: Mul by base might be optimized + let d = base.widening_mul(cs, &r, 2, 8); + let (d_plus_alpha, overflow) = d.overflowing_add(cs, &alpha); + // d_i cannot overflow UInt512 + Boolean::enforce_equal(cs, &overflow, &boolean_false); + let d = d_plus_alpha; + + // beta_i <- next digit of quotient. We use + // binary search to find suitable beta_i + let mut beta = UInt2048::zero(cs); + let mut left = UInt2048::zero(cs); + let mut right = base; + + // Preparing new_r to further update r + let mut new_r = UInt4096::zero(cs); + + for _ in 0..MAX_BINARY_SEARCH_ITERATIONS { + // beta <- ceil((right + left) / 2) + let (new_beta, overflow) = right.overflowing_add(cs, &left); + // Cannot overflow since right and left are less than b=2^{32} + Boolean::enforce_equal(cs, &overflow, &boolean_false); + + // Since new_beta.div2 gives floor, we need to add 1 if new_beta is odd to get ceil + let odd = new_beta.is_odd(cs); + let beta_div_2 = new_beta.div2(cs); + let (beta_div_2_plus_1, overflow) = beta_div_2.overflowing_add(cs, &one); + // Cannot overflow since beta_div_2+one is less than b=2^{32} + Boolean::enforce_equal(cs, &overflow, &boolean_false); + beta = UInt2048::conditionally_select(cs, odd, &beta_div_2_plus_1, &beta_div_2); + + // r <- d - m * beta + // beta can fit in 2 limbs since it is less or equal to b=2^{32} + let m_beta = other.widening_mul(cs, &beta, 8, 2); + let (r, r_negative) = d.overflowing_sub(cs, &m_beta); + + // if r < 0 (that is, overflow occurred), then right <- beta - 1 + // beta - 1 might overflow at step 33, but we don't care about it + let (beta_minus_1, _) = beta.overflowing_sub(cs, &one); + right = UInt2048::conditionally_select(cs, r_negative, &beta_minus_1, &right); + + // if r >= m, then left <- beta + 1 + let r_geq_m = r.geq_than_u2048(cs, other); + // We should handle the case when r overflowed + let r_positive = r_negative.negated(cs); + let r_greater_m = r_geq_m.and(cs, r_positive); + let (beta_plus_1, overflow) = beta.overflowing_add(cs, &one); + // Cannot overflow since beta < b=2^{32} + Boolean::enforce_equal(cs, &overflow, &boolean_false); + left = UInt2048::conditionally_select(cs, r_greater_m, &beta_plus_1, &left); + + // Updating r + new_r = r + } + + // Asserting that new_r is indeed fits in UInt256 + let boolean_true = Boolean::allocated_constant(cs, true); + for limb in new_r.inner[8..].iter() { + let limb_is_zero = limb.is_zero(cs); + Boolean::enforce_equal(cs, &limb_is_zero, &boolean_true); + } + // Update r + r = new_r.to_low(); + + // Asserting that r < m + let (_, overflow) = other.overflowing_sub(cs, &r); + Boolean::enforce_equal(cs, &overflow, &boolean_false); + + // q_i <- b*q_{i-1} + beta_i + let beta_u512 = beta.to_u4096(cs); + q = q.must_mul_by_two_pow_32(cs); + let (new_q, overflow) = q.overflowing_add(cs, &beta_u512); + // Cannot overflow since quotient cannot exceed 2^{512} + Boolean::enforce_equal(cs, &overflow, &boolean_false); + q = new_q; + } + + (q, r) + } + + // Returns the value unchanges if `bit` is `true`, and 0 otherwise + #[must_use] + pub fn mask>(&self, cs: &mut CS, masking_bit: Boolean) -> Self { + let new_inner = self.inner.map(|el| el.mask(cs, masking_bit)); + Self { inner: new_inner } + } + + // Returns the value unchanges if `bit` is `false`, and 0 otherwise + #[must_use] + pub fn mask_negated>( + &self, + cs: &mut CS, + masking_bit: Boolean, + ) -> Self { + let new_inner = self.inner.map(|el| el.mask_negated(cs, masking_bit)); + Self { inner: new_inner } + } + + #[must_use] + pub fn equals>(cs: &mut CS, a: &Self, b: &Self) -> Boolean { + let equals: [_; 128] = + std::array::from_fn(|idx| UInt32::equals(cs, &a.inner[idx], &b.inner[idx])); + + Boolean::multi_and(cs, &equals) + } + + #[must_use] + pub fn from_le_bytes>(cs: &mut CS, bytes: [UInt8; 512]) -> Self { + let mut inner = [std::mem::MaybeUninit::uninit(); 128]; + for (dst, src) in inner.iter_mut().zip(bytes.array_chunks::<4>()) { + dst.write(UInt32::from_le_bytes(cs, *src)); + } + + let inner = unsafe { inner.map(|el| el.assume_init()) }; + + Self { inner } + } + + #[must_use] + pub fn from_limbs(limbs: [UInt32; 128]) -> Self { + Self { inner: limbs } + } + + #[must_use] + pub fn from_be_bytes>(cs: &mut CS, bytes: [UInt8; 512]) -> Self { + let mut inner = [std::mem::MaybeUninit::uninit(); 128]; + for (dst, src) in inner.iter_mut().rev().zip(bytes.array_chunks::<4>()) { + dst.write(UInt32::from_be_bytes(cs, *src)); + } + + let inner = unsafe { inner.map(|el| el.assume_init()) }; + + Self { inner } + } + + #[must_use] + pub fn is_zero>(&self, cs: &mut CS) -> Boolean { + let limbs_are_zero = self.inner.map(|el| el.is_zero(cs)); + Boolean::multi_and(cs, &limbs_are_zero) + } + + #[must_use] + pub fn to_le_bytes>(self, cs: &mut CS) -> [UInt8; 512] { + let mut encoding = [std::mem::MaybeUninit::uninit(); 512]; + for (dst, src) in encoding + .iter_mut() + .zip(self.inner.iter().flat_map(|el| el.to_le_bytes(cs))) + { + dst.write(src); + } + + unsafe { encoding.map(|el| el.assume_init()) } + } + + #[must_use] + pub fn to_be_bytes>(self, cs: &mut CS) -> [UInt8; 512] { + let mut bytes = self.to_le_bytes(cs); + bytes.reverse(); + + bytes + } + + #[must_use] + pub fn to_low(self) -> UInt2048 { + UInt2048 { + inner: self.inner[..64].try_into().expect("incorrect slice size"), + } + } + + #[must_use] + pub fn to_high(self) -> UInt2048 { + UInt2048 { + inner: self.inner[64..].try_into().expect("incorrect slice size"), + } + } +} + +use crate::cs::Variable; +use crate::gadgets::traits::castable::Convertor; +use crate::gadgets::traits::castable::WitnessCastable; + +impl WitnessCastable for (U2048, U2048) { + #[inline] + fn cast_from_source(witness: [F; 128]) -> Self { + let reduced = witness.map(|el| { + let el = el.as_u64_reduced(); + debug_assert!(el <= u32::MAX as u64); + + el as u32 + }); + + recompose_u4096_as_u32x128(reduced) + } + + #[inline] + fn cast_into_source(self) -> [F; 128] { + let limbs = decompose_u4096_as_u32x128(self); + limbs.map(|el| WitnessCastable::cast_into_source(el)) + } +} + +impl CSWitnessable for UInt4096 { + type ConversionFunction = Convertor; + + fn witness_from_set_of_values(values: [F; 128]) -> Self::Witness { + WitnessCastable::cast_from_source(values) + } + + fn as_variables_set(&self) -> [Variable; 128] { + self.inner.map(|el| el.get_variable()) + } +} + +impl WitnessHookable for UInt4096 { + fn witness_hook>( + &self, + cs: &CS, + ) -> Box Option> { + let raw_witness = self.get_witness(cs); + Box::new(move || raw_witness.wait()) + } +} + +use crate::gadgets::traits::selectable::MultiSelectable; +// multiselect doesn't make much sense here because we can do parallel over chunks, +// so we degrade to default impl via normal select +impl MultiSelectable for UInt4096 {} + +use crate::gadgets::traits::encodable::CircuitVarLengthEncodable; + +impl CircuitVarLengthEncodable for UInt4096 { + #[inline(always)] + fn encoding_length(&self) -> usize { + 64 + } + fn encode_to_buffer>(&self, cs: &mut CS, dst: &mut Vec) { + CircuitVarLengthEncodable::::encode_to_buffer(&self.inner, cs, dst); + } +} + +use crate::gadgets::traits::allocatable::CSPlaceholder; + +impl CSPlaceholder for UInt4096 { + fn placeholder>(cs: &mut CS) -> Self { + Self::zero(cs) + } +} diff --git a/src/gadgets/u512/mod.rs b/src/gadgets/u512/mod.rs index bb3781b..43b435f 100644 --- a/src/gadgets/u512/mod.rs +++ b/src/gadgets/u512/mod.rs @@ -56,6 +56,16 @@ pub fn recompose_u512_as_u32x16(value: [u32; 16]) -> (U256, U256) { (result_1, result_2) } +pub fn convert_limb_to_u512(cs: &mut CS, limb: &UInt32) -> UInt512 +where + F: SmallField, + CS: ConstraintSystem, +{ + let mut u512 = UInt512::zero(cs); + u512.inner[0] = *limb; + u512 +} + impl CSAllocatable for UInt512 { type Witness = (U256, U256); fn placeholder_witness() -> Self::Witness { @@ -216,6 +226,153 @@ impl UInt512 { (result, borrow_out) } + /// Multiplies a number by 2^{32}. Panics if the number overflows. + #[must_use] + pub fn must_mul_by_2_pow_32>(&self, cs: &mut CS) -> Self { + let boolean_true = Boolean::allocated_constant(cs, true); + let last_limb_zero = self.inner[15].is_zero(cs); + Boolean::enforce_equal(cs, &last_limb_zero, &boolean_true); + + let mut new_inner = self.inner; + new_inner.copy_within(0..15, 1); + new_inner[0] = UInt32::zero(cs); + + Self { inner: new_inner } + } + + /// Returns `true` if `self >= other`, and `false` otherwise. + /// Here, `self` and `other` are represented as `UInt512` and `UInt256` respectively. + pub fn geq_than_u256(&self, cs: &mut CS, other: &UInt256) -> Boolean + where + CS: ConstraintSystem, + { + let high = self.to_high(); + let under_256 = high.is_zero(cs); + let over_256 = under_256.negated(cs); + let low = self.to_low(); + let (sub, overflow) = other.overflowing_sub(cs, &low); + let a_equal_b = sub.is_zero(cs); + Boolean::multi_or(cs, &[overflow, a_equal_b, over_256]) + } + + /// Find quotient and remainder of division of `self` by `other` using the naive long division algorithm in base 2^{32} + /// since both [`UInt512`] and [`UInt256`] are represented as arrays of [`UInt32`]. The implementation is based on + /// algorithm https://en.wikipedia.org/wiki/Long_division#Algorithm_for_arbitrary_base, + /// where `k=16`, `l=8`, and base `b=2^{32}`. + /// + /// Currently, only the division by [`UInt256`] is supported. + #[must_use] + pub fn long_division(&self, cs: &mut CS, other: &UInt256) -> (UInt512, UInt256) + where + CS: ConstraintSystem, + { + const U256_MAX_LIMBS: usize = 8; + const U512_MAX_LIMBS: usize = 16; + const MAX_BINARY_SEARCH_ITERATIONS: usize = 33; + + // Initializing constants + let base = U256::from_str_radix("0x100000000", 16).unwrap(); + let base = UInt256::allocated_constant(cs, base); + let boolean_false = Boolean::allocated_constant(cs, false); + let one = UInt256::allocated_constant(cs, U256::one()); + + // q <- 0 + let mut q = UInt512::zero(cs); + + // r <- first 7 limbs of n, thus it fits in UInt256 + let mut r = self.to_high(); + r.inner[0] = UInt32::zero(cs); + r.inner.copy_within(1..U256_MAX_LIMBS, 0); + r.inner[U256_MAX_LIMBS - 1] = UInt32::zero(cs); + + for i in 0..U256_MAX_LIMBS + 1 { + // \alpha_{i+l-1} is (k-l-i)th limb of n + let alpha = self.inner[U256_MAX_LIMBS - i]; + let alpha = convert_limb_to_u512(cs, &alpha); + + // d_i <- b*r_{i-1} + \alpha_{i+l-1} + // d_i can safely be UInt512 in size. + // r can have any number of limbs up to 8. + // base is 2 limbs wide since b=(2^{32}-1)+1 + // TODO: Mul by base might be optimized + let d = base.widening_mul(cs, &r, 2, 8); + let (d_plus_alpha, overflow) = d.overflowing_add(cs, &alpha); + // d_i cannot overflow UInt512 + Boolean::enforce_equal(cs, &overflow, &boolean_false); + let d = d_plus_alpha; + + // beta_i <- next digit of quotient. We use + // binary search to find suitable beta_i + let mut beta = UInt256::zero(cs); + let mut left = UInt256::zero(cs); + let mut right = base; + + // Preparing new_r to further update r + let mut new_r = UInt512::zero(cs); + + for _ in 0..MAX_BINARY_SEARCH_ITERATIONS { + // beta <- ceil((right + left) / 2) + let (new_beta, overflow) = right.overflowing_add(cs, &left); + // Cannot overflow since right and left are less than b=2^{32} + Boolean::enforce_equal(cs, &overflow, &boolean_false); + + // Since new_beta.div2 gives floor, we need to add 1 if new_beta is odd to get ceil + let odd = new_beta.is_odd(cs); + let beta_div_2 = new_beta.div2(cs); + let (beta_div_2_plus_1, overflow) = beta_div_2.overflowing_add(cs, &one); + // Cannot overflow since beta_div_2+one is less than b=2^{32} + Boolean::enforce_equal(cs, &overflow, &boolean_false); + beta = UInt256::conditionally_select(cs, odd, &beta_div_2_plus_1, &beta_div_2); + + // r <- d - m * beta + // beta can fit in 2 limbs since it is less or equal to b=2^{32} + let m_beta = other.widening_mul(cs, &beta, 8, 2); + let (r, r_negative) = d.overflowing_sub(cs, &m_beta); + + // if r < 0 (that is, overflow occurred), then right <- beta - 1 + // beta - 1 might overflow at step 33, but we don't care about it + let (beta_minus_1, _) = beta.overflowing_sub(cs, &one); + right = UInt256::conditionally_select(cs, r_negative, &beta_minus_1, &right); + + // if r >= m, then left <- beta + 1 + let r_geq_m = r.geq_than_u256(cs, other); + // We should handle the case when r overflowed + let r_positive = r_negative.negated(cs); + let r_greater_m = r_geq_m.and(cs, r_positive); + let (beta_plus_1, overflow) = beta.overflowing_add(cs, &one); + // Cannot overflow since beta < b=2^{32} + Boolean::enforce_equal(cs, &overflow, &boolean_false); + left = UInt256::conditionally_select(cs, r_greater_m, &beta_plus_1, &left); + + // Updating r + new_r = r + } + + // Asserting that new_r is indeed fits in UInt256 + let boolean_true = Boolean::allocated_constant(cs, true); + for limb in new_r.inner[8..].iter() { + let limb_is_zero = limb.is_zero(cs); + Boolean::enforce_equal(cs, &limb_is_zero, &boolean_true); + } + // Update r + r = new_r.to_low(); + + // Asserting that r < m + let (_, overflow) = other.overflowing_sub(cs, &r); + Boolean::enforce_equal(cs, &overflow, &boolean_false); + + // q_i <- b*q_{i-1} + beta_i + let beta_u512 = beta.to_u512(cs); + q = q.must_mul_by_2_pow_32(cs); + let (new_q, overflow) = q.overflowing_add(cs, &beta_u512); + // Cannot overflow since quotient cannot exceed 2^{512} + Boolean::enforce_equal(cs, &overflow, &boolean_false); + q = new_q; + } + + (q, r) + } + // Returns the value unchanges if `bit` is `true`, and 0 otherwise #[must_use] pub fn mask>(&self, cs: &mut CS, masking_bit: Boolean) -> Self { @@ -254,6 +411,11 @@ impl UInt512 { Self { inner } } + #[must_use] + pub fn from_limbs(limbs: [UInt32; 16]) -> Self { + Self { inner: limbs } + } + #[must_use] pub fn from_be_bytes>(cs: &mut CS, bytes: [UInt8; 64]) -> Self { let mut inner = [std::mem::MaybeUninit::uninit(); 16]; diff --git a/src/implementations/poseidon2/mod.rs b/src/implementations/poseidon2/mod.rs index ecb1326..6dbb7e0 100644 --- a/src/implementations/poseidon2/mod.rs +++ b/src/implementations/poseidon2/mod.rs @@ -4,18 +4,25 @@ use crate::field::goldilocks::GoldilocksField; pub mod params; pub mod state_generic_impl; -#[cfg(not(any( - target_feature = "neon", - target_feature = "avx2", - target_feature = "avx512bw", - target_feature = "avx512cd", - target_feature = "avx512dq", - target_feature = "avx512f", - target_feature = "avx512vl" +#[cfg(not(all( + feature = "include_packed_simd", + any( + target_feature = "neon", + target_feature = "avx2", + target_feature = "avx512bw", + target_feature = "avx512cd", + target_feature = "avx512dq", + target_feature = "avx512f", + target_feature = "avx512vl", + ) )))] pub use state_generic_impl::*; +// Other poseidon implementations depend on packed_simd 128 +// which is no longer available in std::simd (and packed_simd is no longer +// supported in the newest rust nightly). #[cfg(all( + feature = "include_packed_simd", any(target_feature = "neon", target_feature = "avx2"), not(any( target_feature = "avx512bw", @@ -28,6 +35,7 @@ pub use state_generic_impl::*; pub mod state_vectorized_double; #[cfg(all( + feature = "include_packed_simd", any(target_feature = "neon", target_feature = "avx2"), not(any( target_feature = "avx512bw", @@ -40,6 +48,7 @@ pub mod state_vectorized_double; pub use state_vectorized_double::*; #[cfg(all( + feature = "include_packed_simd", target_feature = "avx512bw", target_feature = "avx512cd", target_feature = "avx512dq", @@ -49,6 +58,7 @@ pub use state_vectorized_double::*; pub mod state_avx512; #[cfg(all( + feature = "include_packed_simd", target_feature = "avx512bw", target_feature = "avx512cd", target_feature = "avx512dq", diff --git a/src/implementations/poseidon2/state_generic_impl.rs b/src/implementations/poseidon2/state_generic_impl.rs index 02cb079..c9b74e8 100644 --- a/src/implementations/poseidon2/state_generic_impl.rs +++ b/src/implementations/poseidon2/state_generic_impl.rs @@ -29,7 +29,9 @@ impl State { pub const T: u64 = (Self::ORDER - 1) >> Self::TWO_ADICITY; pub const BARRETT: u128 = 18446744078004518912; // 0x10000000100000000 pub const EPSILON: u64 = (1 << 32) - 1; + #[cfg(feature = "include_packed_simd")] pub const EPSILON_VECTOR: packed_simd::u64x4 = packed_simd::u64x4::splat(Self::EPSILON); + #[cfg(feature = "include_packed_simd")] pub const EPSILON_VECTOR_D: packed_simd::u64x8 = packed_simd::u64x8::splat(Self::EPSILON); pub const RATE: usize = poseidon_goldilocks_params::RATE; diff --git a/src/lib.rs b/src/lib.rs index 72775d5..4f2e1ee 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -21,6 +21,9 @@ #![allow(dead_code)] #![allow(dropping_references)] // Required to explicitly show that mutable references are dropped. #![allow(incomplete_features)] +#![allow(internal_features)] // Required for core_intrinsics +#![allow(stable_features)] +#![allow(unused_unsafe)] // Enabled features #![feature(allocator_api)] #![feature(const_mut_refs)] @@ -43,7 +46,6 @@ #![feature(generic_const_exprs)] #![feature(iter_array_chunks)] // #![recursion_limit = "1024"] -#![feature(stdsimd)] #![feature(avx512_target_feature)] #![feature(associated_type_defaults)] #![feature(trait_alias)] @@ -51,6 +53,7 @@ #![feature(return_position_impl_trait_in_trait)] #![feature(type_changing_struct_update)] #![feature(slice_flatten)] +#![cfg_attr(feature = "include_packed_simd", feature(stdsimd))] pub mod algebraic_props; pub mod config;