Skip to content

Commit

Permalink
Merge pull request aws#36 from jargh/main
Browse files Browse the repository at this point in the history
Basic point operations for the Weierstrass curves
s2n-bignum original commit: awslabs/s2n-bignum@44003b0
  • Loading branch information
jargh authored Jul 22, 2022
2 parents 39b1a7e + e01e3be commit 2af9c6a
Show file tree
Hide file tree
Showing 15 changed files with 11,677 additions and 33 deletions.
5 changes: 4 additions & 1 deletion arm/p384/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,10 @@ OBJ = bignum_add_p384.o \
bignum_optneg_p384.o \
bignum_sub_p384.o \
bignum_tomont_p384.o \
bignum_triple_p384.o
bignum_triple_p384.o \
p384_montjadd.o \
p384_montjdouble.o \
p384_montjmixadd.o

%.o : %.S ; $(CC) -E -I../../include $< | $(GAS) -o $@ -

Expand Down
893 changes: 893 additions & 0 deletions arm/p384/p384_montjadd.S

Large diffs are not rendered by default.

963 changes: 963 additions & 0 deletions arm/p384/p384_montjdouble.S

Large diffs are not rendered by default.

884 changes: 884 additions & 0 deletions arm/p384/p384_montjmixadd.S

Large diffs are not rendered by default.

5 changes: 4 additions & 1 deletion arm/p521/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,10 @@ OBJ = bignum_add_p521.o \
bignum_sub_p521.o \
bignum_tolebytes_p521.o \
bignum_tomont_p521.o \
bignum_triple_p521.o
bignum_triple_p521.o \
p521_jadd.o \
p521_jdouble.o \
p521_jmixadd.o

%.o : %.S ; $(CC) -E -I../../include $< | $(GAS) -o $@ -

Expand Down
68 changes: 37 additions & 31 deletions arm/p521/bignum_sqr_p521_alt.S
Original file line number Diff line number Diff line change
Expand Up @@ -43,23 +43,23 @@

#define l x10

#define u0 x11
#define u1 x12
#define u2 x13
#define u3 x14
#define u4 x15
#define u5 x16
#define u6 x17
#define u7 x19
#define u8 x20
#define u9 x21
#define u10 x22
#define u11 x23
#define u12 x24
#define u13 x25
#define u14 x26
#define u15 x27
#define u16 x29
#define u0 x2 // The same as a0
#define u1 x11
#define u2 x12
#define u3 x13
#define u4 x14
#define u5 x15
#define u6 x16
#define u7 x17
#define u8 x19
#define u9 x20
#define u10 x21
#define u11 x22
#define u12 x23
#define u13 x24
#define u14 x25
#define u15 x26
#define u16 x4 // The same as a2

S2N_BN_SYMBOL(bignum_sqr_p521_alt):

Expand All @@ -69,7 +69,6 @@ S2N_BN_SYMBOL(bignum_sqr_p521_alt):
stp x21, x22, [sp, #-16]!
stp x23, x24, [sp, #-16]!
stp x25, x26, [sp, #-16]!
stp x27, x29, [sp, #-16]!

// Load low 8 elements as [a7;a6;a5;a4;a3;a2;a1;a0], set up an initial
// window [u8;u7;u6;u5;u4;u3;u2;u1] = 10 + 20 + 30 + 40 + 50 + 60 + 70
Expand Down Expand Up @@ -231,7 +230,6 @@ S2N_BN_SYMBOL(bignum_sqr_p521_alt):
// Add the homogeneous terms 00 + 11 + 22 + 33 + 44 + 55 + 66 + 77

umulh l, a0, a0
mul u0, a0, a0
adds u1, u1, l

mul l, a1, a1
Expand Down Expand Up @@ -269,49 +267,58 @@ S2N_BN_SYMBOL(bignum_sqr_p521_alt):
umulh l, a7, a7
adc u15, u15, l

// Now load in the top digit a8, and also set up its double and square
// Now load in the top digit a8, and immediately double the register

ldr a8, [x, #64]
mul u16, a8, a8
add a8, a8, a8

// Add a8 * [a7;...;a0] into the top of the buffer
// Add (2 * a8) * [a7;...;a0] into the top of the buffer
// At the end of the first chain we form u16 = a8 ^ 2.
// This needs us to shift right the modified a8 again but it saves a
// register, and the overall performance impact seems slightly positive.

mul l, a8, a0
adds u8, u8, l
mul l, a8, a1
umulh l, a8, a0
adcs u9, u9, l
mul l, a8, a2
adcs u10, u10, l
mul l, a8, a3
umulh l, a8, a2
adcs u11, u11, l
mul l, a8, a4
adcs u12, u12, l
mul l, a8, a5
umulh l, a8, a4
adcs u13, u13, l
mul l, a8, a6
adcs u14, u14, l
mul l, a8, a7
umulh l, a8, a6
adcs u15, u15, l
lsr u16, a8, #1
mul u16, u16, u16
adc u16, u16, xzr

umulh l, a8, a0
mul l, a8, a1
adds u9, u9, l
umulh l, a8, a1
adcs u10, u10, l
umulh l, a8, a2
mul l, a8, a3
adcs u11, u11, l
umulh l, a8, a3
adcs u12, u12, l
umulh l, a8, a4
mul l, a8, a5
adcs u13, u13, l
umulh l, a8, a5
adcs u14, u14, l
umulh l, a8, a6
mul l, a8, a7
adcs u15, u15, l
umulh l, a8, a7
adc u16, u16, l

// Finally squeeze in the lowest mul. This didn't need to be involved
// in the addition chains and moreover lets us re-use u0 == a0

mul u0, a0, a0

// Now we have the full product, which we consider as
// 2^521 * h + l. Form h + l + 1

Expand Down Expand Up @@ -361,7 +368,6 @@ S2N_BN_SYMBOL(bignum_sqr_p521_alt):

// Restore registers and return

ldp x27, x29, [sp], #16
ldp x25, x26, [sp], #16
ldp x23, x24, [sp], #16
ldp x21, x22, [sp], #16
Expand Down
Loading

0 comments on commit 2af9c6a

Please sign in to comment.