Implement endomorphism acceleration for BLS12-381 (needed cofactor cl…

…earing of the point)
mratsim · Jun 14, 2020 · e9e84ab · e9e84ab
1 parent 10ded15
commit e9e84ab
Show file tree

Hide file tree

Showing 13 changed files with 478 additions and 160 deletions.
diff --git a/benchmarks/bench_ec_swei_proj_g1.nim → benchmarks/bench_ec_g1.nim b/benchmarks/bench_ec_swei_proj_g1.nim → benchmarks/bench_ec_g1.nim
@@ -10,7 +10,7 @@ import
   # Internals
   ../constantine/config/curves,
   ../constantine/arithmetic,
-  ../constantine/elliptic/[ec_weierstrass_projective, ec_scalar_mul],
+  ../constantine/elliptic/ec_weierstrass_projective,
   # Helpers
   ../helpers/static_for,
   ./bench_elliptic_template,
@@ -51,14 +51,16 @@ proc main() =
     separator()
     doublingBench(ECP_SWei_Proj[Fp[curve]], Iters)
     separator()
+    scalarMulUnsafeDoubleAddBench(ECP_SWei_Proj[Fp[curve]], MulIters)
+    separator()
     scalarMulGenericBench(ECP_SWei_Proj[Fp[curve]], scratchSpaceSize = 1 shl 2, MulIters)
     separator()
     scalarMulGenericBench(ECP_SWei_Proj[Fp[curve]], scratchSpaceSize = 1 shl 3, MulIters)
     separator()
     scalarMulGenericBench(ECP_SWei_Proj[Fp[curve]], scratchSpaceSize = 1 shl 4, MulIters)
     separator()
-    # scalarMulUnsafeDoubleAddBench(ECP_SWei_Proj[Fp[curve]], MulIters)
-    # separator()
+    scalarMulGLV(ECP_SWei_Proj[Fp[curve]], MulIters)
+    separator()
   separator()
 
 main()

diff --git a/benchmarks/bench_elliptic_template.nim b/benchmarks/bench_elliptic_template.nim
@@ -17,11 +17,14 @@ import
   ../constantine/config/curves,
   ../constantine/arithmetic,
   ../constantine/io/io_bigints,
+  ../constantine/elliptic/[ec_weierstrass_projective, ec_scalar_mul, ec_endomorphism_accel],
   # Helpers
   ../helpers/[prng_unsafe, static_for],
   ./platforms,
   # Standard library
-  std/[monotimes, times, strformat, strutils, macros]
+  std/[monotimes, times, strformat, strutils, macros],
+  # Reference unsafe scalar multiplication
+  ../tests/support/ec_reference_scalar_mult
 
 var rng: RngState
 let seed = uint32(getTime().toUnix() and (1'i64 shl 32 - 1)) # unixTime mod 2^32
@@ -71,15 +74,15 @@ when SupportsGetTicks:
 echo "\n=================================================================================================================\n"
 
 proc separator*() =
-  echo "-".repeat(157)
+  echo "-".repeat(177)
 
 proc report(op, elliptic: string, start, stop: MonoTime, startClk, stopClk: int64, iters: int) =
   let ns = inNanoseconds((stop-start) div iters)
   let throughput = 1e9 / float64(ns)
   when SupportsGetTicks:
-    echo &"{op:<40} {elliptic:<40} {throughput:>15.3f} ops/s     {ns:>9} ns/op     {(stopClk - startClk) div iters:>9} CPU cycles (approx)"
+    echo &"{op:<60} {elliptic:<40} {throughput:>15.3f} ops/s     {ns:>9} ns/op     {(stopClk - startClk) div iters:>9} CPU cycles (approx)"
   else:
-    echo &"{op:<40} {elliptic:<40} {throughput:>15.3f} ops/s     {ns:>9} ns/op"
+    echo &"{op:<60} {elliptic:<40} {throughput:>15.3f} ops/s     {ns:>9} ns/op"
 
 macro fixEllipticDisplay(T: typedesc): untyped =
   # At compile-time, enums are integers and their display is buggy
@@ -124,7 +127,7 @@ proc scalarMulGenericBench*(T: typedesc, scratchSpaceSize: static int, iters: in
   const bits = T.F.C.getCurveOrderBitwidth()
 
   var r {.noInit.}: T
-  let P = rng.random_unsafe(T)
+  let P = rng.random_unsafe(T) # TODO: clear cofactor
 
   let exponent = rng.random_unsafe(BigInt[bits])
   var exponentCanonical{.noInit.}: array[(bits+7) div 8, byte]
@@ -136,18 +139,28 @@ proc scalarMulGenericBench*(T: typedesc, scratchSpaceSize: static int, iters: in
     r = P
     r.scalarMulGeneric(exponentCanonical, scratchSpace)
 
-# import ../tests/support/ec_reference_scalar_mult
-#
-# proc scalarMulUnsafeDoubleAddBench*(T: typedesc, iters: int) =
-#   const bits = T.F.C.getCurveOrderBitwidth()
-#
-#   var r {.noInit.}: T
-#   let P = rng.random_unsafe(T)
-#
-#   let exponent = rng.random_unsafe(BigInt[bits])
-#   var exponentCanonical{.noInit.}: array[(bits+7) div 8, byte]
-#   exponentCanonical.exportRawUint(exponent, bigEndian)
-#
-#   bench("EC ScalarMul G1 (unsafe DoubleAdd)", T, iters):
-#     r = P
-#     r.unsafe_ECmul_double_add(exponentCanonical)
+proc scalarMulGLV*(T: typedesc, iters: int) =
+  const bits = T.F.C.getCurveOrderBitwidth()
+
+  var r {.noInit.}: T
+  let P = rng.random_unsafe(T) # TODO: clear cofactor
+
+  let exponent = rng.random_unsafe(BigInt[bits])
+
+  bench("EC ScalarMul G1 (GLV endomorphism accelerated)", T, iters):
+    r = P
+    r.scalarMulGLV(exponent)
+
+proc scalarMulUnsafeDoubleAddBench*(T: typedesc, iters: int) =
+  const bits = T.F.C.getCurveOrderBitwidth()
+
+  var r {.noInit.}: T
+  let P = rng.random_unsafe(T) # TODO: clear cofactor
+
+  let exponent = rng.random_unsafe(BigInt[bits])
+  var exponentCanonical{.noInit.}: array[(bits+7) div 8, byte]
+  exponentCanonical.exportRawUint(exponent, bigEndian)
+
+  bench("EC ScalarMul G1 (unsafe reference DoubleAdd)", T, iters):
+    r = P
+    r.unsafe_ECmul_double_add(exponentCanonical)
diff --git a/constantine.nimble b/constantine.nimble
@@ -124,7 +124,7 @@ task test, "Run all tests":
     runBench("bench_fp2")
     runBench("bench_fp6")
     runBench("bench_fp12")
-    runBench("bench_ec_swei_proj_g1")
+    runBench("bench_ec_g1")
 
 task test_no_gmp, "Run tests that don't require GMP":
   # -d:testingCurves is configured in a *.nim.cfg for convenience
@@ -320,11 +320,11 @@ task bench_fp12_gcc, "Run benchmark 𝔽p12 with gcc":
 task bench_fp12_clang, "Run benchmark 𝔽p12 with clang":
   runBench("bench_fp12", "clang")
 
-task bench_ec_swei_proj_g1, "Run benchmark on Elliptic Curve group 𝔾1 - Short Weierstrass with Projective Coordinates - GCC":
-  runBench("bench_ec_swei_proj_g1")
+task bench_ec_g1, "Run benchmark on Elliptic Curve group 𝔾1 - Short Weierstrass with Projective Coordinates - GCC":
+  runBench("bench_ec_g1")
 
-task bench_ec_swei_proj_g1_gcc, "Run benchmark on Elliptic Curve group 𝔾1 - Short Weierstrass with Projective Coordinates - GCC":
-  runBench("bench_ec_swei_proj_g1", "gcc")
+task bench_ec_gcc, "Run benchmark on Elliptic Curve group 𝔾1 - Short Weierstrass with Projective Coordinates - GCC":
+  runBench("bench_ec_g1", "gcc")
 
-task bench_ec_swei_proj_g1_clang, "Run benchmark on Elliptic Curve group 𝔾1 - Short Weierstrass with Projective Coordinates - Clang":
-  runBench("bench_ec_swei_proj_g1", "clang")
+task bench_ec_g1_clang, "Run benchmark on Elliptic Curve group 𝔾1 - Short Weierstrass with Projective Coordinates - Clang":
+  runBench("bench_ec_g1", "clang")
diff --git a/constantine/config/curves_declaration.nim b/constantine/config/curves_declaration.nim
@@ -144,7 +144,7 @@ declareCurves:
     modulus: "0x1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9feffffffffaaab"
     family: BarretoLynnScott
     # u: -(2^63 + 2^62 + 2^60 + 2^57 + 2^48 + 2^16)
-    cubicRootOfUnity_mod_p: "0x5f19672fdf76ce51ba69c6076a0f77eaddb3a93be6f89688de17d813620a00022e01fffffffefffe"
+    cubicRootOfUnity_mod_p: "0x1a0111ea397fe699ec02408663d4de85aa0d857d89759ad4897d29650fb85f9b409427eb4f49fffd8bfd00000000aaac"
 
     # G1 Equation: y² = x³ + 4
     # G2 Equation: y² = x³ + 4 (1+i)

diff --git a/constantine/elliptic/ec_endomorphism_accel.nim b/constantine/elliptic/ec_endomorphism_accel.nim
@@ -17,7 +17,8 @@ import
   ../io/io_bigints,
   ../towers,
   ./ec_weierstrass_affine,
-  ./ec_weierstrass_projective
+  ./ec_weierstrass_projective,
+  ./ec_endomorphism_params
 
 # ############################################################
 #
@@ -71,9 +72,6 @@ type
     ##
     ## Digit-Endianness is bigEndian
 
-  MultiScalar[M, LengthInBits: static int] = array[M, BigInt[LengthInBits]]
-    ## Decomposition of a secret scalar in multiple scalars
-
 const
   BitSize   = 2
   Shift     = 2    # log2(4) - we can store 4 digit per byte
@@ -149,9 +147,9 @@ proc `[]=`(recoding: var Recoded,
   slot[] = slot[] or shifted
 
 
-func nDimMultiScalarRecoding[M, LengthInBits, LengthInDigits: static int](
-    dst: var GLV_SAC[M, LengthInDigits],
-    src: MultiScalar[M, LengthInBits]
+func nDimMultiScalarRecoding[M, L: static int](
+    dst: var GLV_SAC[M, L],
+    src: MultiScalar[M, L]
   ) =
   ## This recodes N scalar for GLV multi-scalar multiplication
   ## with side-channel resistance.
@@ -203,19 +201,17 @@ func nDimMultiScalarRecoding[M, LengthInBits, LengthInDigits: static int](
   #   For that floored division, bji may be negative!!!
   # In particular floored division of -1 is -1 not 0.
   # This means that arithmetic right shift must be used instead of logical right shift
-  static: doAssert LengthInDigits == LengthInBits+1,
-    "Length in digits: " & $LengthInDigits & " Length in bits: " & $LengthInBits
-  # " # VScode broken highlight
+
   # assert src[0].isOdd - Only happen on implementation error, we don't want to leak a single bit
 
   var k = src # Keep the source multiscalar in registers
   template b: untyped {.dirty.} = dst
 
-  b[0][LengthInDigits-1] = 1
-  for i in 0 .. LengthInDigits-2:
+  b[0][L-1] = 1
+  for i in 0 .. L-2:
     b[0][i] = 2 * k[0].bit(i+1).int8 - 1
   for j in 1 .. M-1:
-    for i in 0 .. LengthInDigits-1:
+    for i in 0 .. L-1:
       let bji = b[0][i] * k[j].bit0.int8
       b[j][i] = bji
       # In the following equation
@@ -276,61 +272,6 @@ func buildLookupTable[M: static int, F](
     lut[u].sum(lut[u.clearBit(msb)], endomorphisms[msb])
     # } # highlight bug, ...
 
-# Chapter 6.3.1 - Guide to Pairing-based Cryptography
-const Lattice_BN254_Snarks_G1: array[2, array[2, tuple[b: BigInt[127], isNeg: bool]]] = [
-  # Curve of order 254 -> mini scalars of size 127
-  # u = 0x44E992B44A6909F1
-  [(BigInt[127].fromHex"0x89d3256894d213e3", false),                  # 2u + 1
-   (BigInt[127].fromHex"0x6f4d8248eeb859fd0be4e1541221250b", false)], # 6u² + 4u + 1
-  [(BigInt[127].fromHex"0x6f4d8248eeb859fc8211bbeb7d4f1128", false),  # 6u² + 2u
-   (BigInt[127].fromHex"0x89d3256894d213e3", true)]                   # -2u - 1
-]
-
-const Babai_BN254_Snarks_G1 = [
-  # Vector for Babai rounding
-  BigInt[127].fromHex"0x89d3256894d213e3",                            # 2u + 1
-  BigInt[127].fromHex"0x6f4d8248eeb859fd0be4e1541221250b"             # 6u² + 4u + 1
-]
-
-func decomposeScalar_BN254_Snarks_G1[M, scalBits, miniBits: static int](
-       scalar: BigInt[scalBits],
-       miniScalars: var MultiScalar[M, miniBits]
-     ) =
-  ## Decompose a secret scalar into mini-scalar exploiting
-  ## BN254_Snarks specificities.
-  ##
-  ## TODO: Generalize to all BN curves
-  ##       - needs a Lattice type
-  ##       - needs to better support negative bigints, (extra bit for sign?)
-
-  static: doAssert miniBits == (scalBits + M - 1) div M
-  # 𝛼0 = (0x2d91d232ec7e0b3d7 * s) >> 256
-  # 𝛼1 = (0x24ccef014a773d2d25398fd0300ff6565 * s) >> 256
-  const
-    w = BN254_Snarks.getCurveOrderBitwidth().wordsRequired()
-    alphaHats = (BigInt[66].fromHex"0x2d91d232ec7e0b3d7",
-                 BigInt[130].fromHex"0x24ccef014a773d2d25398fd0300ff6565")
-
-  var alphas{.noInit.}: array[M, BigInt[scalBits]] # TODO size 66+254 and 130+254
-
-  staticFor i, 0, M:
-    alphas[i].prod_high_words(alphaHats[i], scalar, w)
-
-  # We have k0 = s - 𝛼0 b00 - 𝛼1 b10
-  # and kj = 0 - 𝛼j b0j - 𝛼1 b1j
-  var k: array[M, BigInt[scalBits]]
-  k[0] = scalar
-  for miniScalarIdx in 0 ..< M:
-    for basisIdx in 0 ..< M:
-      var alphaB {.noInit.}: BigInt[scalBits]
-      alphaB.prod(alphas[basisIdx], Lattice_BN254_Snarks_G1[basisIdx][miniScalarIdx].b) # TODO small lattice size
-      if Lattice_BN254_Snarks_G1[basisIdx][miniScalarIdx].isNeg:
-        k[miniScalarIdx] += alphaB
-      else:
-        k[miniScalarIdx] -= alphaB
-
-    miniScalars[miniScalarIdx].copyTruncatedFrom(k[miniScalarIdx])
-
 func tableIndex(glv: GLV_SAC, bit: int): SecretWord =
   ## Compose the secret table index from
   ## the GLV-SAC representation and the "bit" accessed
@@ -353,29 +294,39 @@ func secretLookup[T](dst: var T, table: openArray[T], index: SecretWord) =
     let selector = SecretWord(i) == index
     dst.ccopy(table[i], selector)
 
-func scalarMulGLV_BN254*(
+func scalarMulGLV*[scalBits](
        P: var ECP_SWei_Proj,
-       scalar: BigInt[BN254_Snarks.getCurveOrderBitwidth()]
+       scalar: BigInt[scalBits]
      ) =
   ## Elliptic Curve Scalar Multiplication
   ##
   ##   P <- [k] P
   ##
   ## This is a scalar multiplication accelerated by an endomorphism
   ## via the GLV (Gallant-lambert-Vanstone) decomposition.
-  const M = 2
+  const C = P.F.C # curve
+  static: doAssert: scalBits == C.getCurveOrderBitwidth()
+  when P.F is Fp:
+    const M = 2
 
   # 1. Compute endomorphisms
   var endomorphisms: array[M-1, typeof(P)] # TODO: zero-init not required
   endomorphisms[0] = P
-  endomorphisms[0].x *= BN254_Snarks.getCubicRootOfUnity_mod_p()
+  endomorphisms[0].x *= C.getCubicRootOfUnity_mod_p()
 
   # 2. Decompose scalar into mini-scalars
-  const L = (BN254_Snarks.getCurveOrderBitwidth() + M - 1) div M + 1
-  var miniScalars: array[M, BigInt[L-1]] # TODO: zero-init not required
-  scalar.decomposeScalar_BN254_Snarks_G1(
-    miniScalars
-  )
+  const L = (C.getCurveOrderBitwidth() + M - 1) div M + 1
+  var miniScalars: array[M, BigInt[L]] # TODO: zero-init not required
+  when C == BN254_Snarks:
+    scalar.decomposeScalar_BN254_Snarks_G1(
+      miniScalars
+    )
+  elif C == BLS12_381:
+    scalar.decomposeScalar_BLS12_381_G1(
+      miniScalars
+    )
+  else:
+    {.error: "Unsupported curve for GLV acceleration".}
 
   # 3. TODO: handle negative mini-scalars
   #    Either negate the associated base and the scalar (in the `endomorphisms` array)