Skip to content

Commit

Permalink
Inline ld32 and st32.
Browse files Browse the repository at this point in the history
Makes Salsa20 ~1.1 (FF) - 1.8 (Chrome) faster.
  • Loading branch information
dchest committed Sep 9, 2014
1 parent c7fff4a commit 1ea7e43
Show file tree
Hide file tree
Showing 2 changed files with 154 additions and 71 deletions.
223 changes: 153 additions & 70 deletions nacl-fast.js
Original file line number Diff line number Diff line change
Expand Up @@ -30,18 +30,6 @@ var gf0 = gf(),
Y = gf([0x6658, 0x6666, 0x6666, 0x6666, 0x6666, 0x6666, 0x6666, 0x6666, 0x6666, 0x6666, 0x6666, 0x6666, 0x6666, 0x6666, 0x6666, 0x6666]),
I = gf([0xa0b0, 0x4a0e, 0x1b27, 0xc4ee, 0xe478, 0xad2f, 0x1806, 0x2f43, 0xd7a7, 0x3dfb, 0x0099, 0x2b4d, 0xdf0b, 0x4fc1, 0x2480, 0x2b83]);

function ld32(x, i) {
var u = x[i+3] & 0xff;
u = (u<<8)|(x[i+2] & 0xff);
u = (u<<8)|(x[i+1] & 0xff);
return (u<<8)|(x[i+0] & 0xff);
}

function st32(x, j, u) {
var i;
for (i = 0; i < 4; i++) { x[j+i] = u & 255; u >>>= 8; }
}

function ts64(x, i, h, l) {
x[i] = (h >> 24) & 0xff;
x[i+1] = (h >> 16) & 0xff;
Expand All @@ -67,23 +55,23 @@ function crypto_verify_32(x, xi, y, yi) {
return vn(x,xi,y,yi,32);
}

function core_salsa20(out, inp, k, c) {
var j0 = ld32(c, 0),
j1 = ld32(k, 0),
j2 = ld32(k, 4),
j3 = ld32(k, 8),
j4 = ld32(k, 12),
j5 = ld32(c, 4),
j6 = ld32(inp, 0),
j7 = ld32(inp, 4),
j8 = ld32(inp, 8),
j9 = ld32(inp, 12),
j10 = ld32(c, 8),
j11 = ld32(k, 16),
j12 = ld32(k, 20),
j13 = ld32(k, 24),
j14 = ld32(k, 28),
j15 = ld32(c, 12);
function core_salsa20(o, p, k, c) {
var j0 = c[ 0] & 0xff | (c[ 1] & 0xff)<<8 | (c[ 2] & 0xff)<<16 | (c[ 3] & 0xff)<<24,
j1 = k[ 0] & 0xff | (k[ 1] & 0xff)<<8 | (k[ 2] & 0xff)<<16 | (k[ 3] & 0xff)<<24,
j2 = k[ 4] & 0xff | (k[ 5] & 0xff)<<8 | (k[ 6] & 0xff)<<16 | (k[ 7] & 0xff)<<24,
j3 = k[ 8] & 0xff | (k[ 9] & 0xff)<<8 | (k[10] & 0xff)<<16 | (k[11] & 0xff)<<24,
j4 = k[12] & 0xff | (k[13] & 0xff)<<8 | (k[14] & 0xff)<<16 | (k[15] & 0xff)<<24,
j5 = c[ 4] & 0xff | (c[ 5] & 0xff)<<8 | (c[ 6] & 0xff)<<16 | (c[ 7] & 0xff)<<24,
j6 = p[ 0] & 0xff | (p[ 1] & 0xff)<<8 | (p[ 2] & 0xff)<<16 | (p[ 3] & 0xff)<<24,
j7 = p[ 4] & 0xff | (p[ 5] & 0xff)<<8 | (p[ 6] & 0xff)<<16 | (p[ 7] & 0xff)<<24,
j8 = p[ 8] & 0xff | (p[ 9] & 0xff)<<8 | (p[10] & 0xff)<<16 | (p[11] & 0xff)<<24,
j9 = p[12] & 0xff | (p[13] & 0xff)<<8 | (p[14] & 0xff)<<16 | (p[15] & 0xff)<<24,
j10 = c[ 8] & 0xff | (c[ 9] & 0xff)<<8 | (c[10] & 0xff)<<16 | (c[11] & 0xff)<<24,
j11 = k[16] & 0xff | (k[17] & 0xff)<<8 | (k[18] & 0xff)<<16 | (k[19] & 0xff)<<24,
j12 = k[20] & 0xff | (k[21] & 0xff)<<8 | (k[22] & 0xff)<<16 | (k[23] & 0xff)<<24,
j13 = k[24] & 0xff | (k[25] & 0xff)<<8 | (k[26] & 0xff)<<16 | (k[27] & 0xff)<<24,
j14 = k[28] & 0xff | (k[29] & 0xff)<<8 | (k[30] & 0xff)<<16 | (k[31] & 0xff)<<24,
j15 = c[12] & 0xff | (c[13] & 0xff)<<8 | (c[14] & 0xff)<<16 | (c[15] & 0xff)<<24;

var x0 = j0, x1 = j1, x2 = j2, x3 = j3, x4 = j4, x5 = j5, x6 = j6, x7 = j7,
x8 = j8, x9 = j9, x10 = j10, x11 = j11, x12 = j12, x13 = j13, x14 = j14,
Expand Down Expand Up @@ -179,41 +167,104 @@ function core_salsa20(out, inp, k, c) {
x14 = x14 + j14 | 0;
x15 = x15 + j15 | 0;

st32(out, 0, x0);
st32(out, 4, x1);
st32(out, 8, x2);
st32(out, 12, x3);
st32(out, 16, x4);
st32(out, 20, x5);
st32(out, 24, x6);
st32(out, 28, x7);
st32(out, 32, x8);
st32(out, 36, x9);
st32(out, 40, x10);
st32(out, 44, x11);
st32(out, 48, x12);
st32(out, 52, x13);
st32(out, 56, x14);
st32(out, 60, x15);
o[ 0] = x0 >>> 0 & 0xff;
o[ 1] = x0 >>> 8 & 0xff;
o[ 2] = x0 >>> 16 & 0xff;
o[ 3] = x0 >>> 24 & 0xff;

o[ 4] = x1 >>> 0 & 0xff;
o[ 5] = x1 >>> 8 & 0xff;
o[ 6] = x1 >>> 16 & 0xff;
o[ 7] = x1 >>> 24 & 0xff;

o[ 8] = x2 >>> 0 & 0xff;
o[ 9] = x2 >>> 8 & 0xff;
o[10] = x2 >>> 16 & 0xff;
o[11] = x2 >>> 24 & 0xff;

o[12] = x3 >>> 0 & 0xff;
o[13] = x3 >>> 8 & 0xff;
o[14] = x3 >>> 16 & 0xff;
o[15] = x3 >>> 24 & 0xff;

o[16] = x4 >>> 0 & 0xff;
o[17] = x4 >>> 8 & 0xff;
o[18] = x4 >>> 16 & 0xff;
o[19] = x4 >>> 24 & 0xff;

o[20] = x5 >>> 0 & 0xff;
o[21] = x5 >>> 8 & 0xff;
o[22] = x5 >>> 16 & 0xff;
o[23] = x5 >>> 24 & 0xff;

o[24] = x6 >>> 0 & 0xff;
o[25] = x6 >>> 8 & 0xff;
o[26] = x6 >>> 16 & 0xff;
o[27] = x6 >>> 24 & 0xff;

o[28] = x7 >>> 0 & 0xff;
o[29] = x7 >>> 8 & 0xff;
o[30] = x7 >>> 16 & 0xff;
o[31] = x7 >>> 24 & 0xff;

o[32] = x8 >>> 0 & 0xff;
o[33] = x8 >>> 8 & 0xff;
o[34] = x8 >>> 16 & 0xff;
o[35] = x8 >>> 24 & 0xff;

o[36] = x9 >>> 0 & 0xff;
o[37] = x9 >>> 8 & 0xff;
o[38] = x9 >>> 16 & 0xff;
o[39] = x9 >>> 24 & 0xff;

o[40] = x10 >>> 0 & 0xff;
o[41] = x10 >>> 8 & 0xff;
o[42] = x10 >>> 16 & 0xff;
o[43] = x10 >>> 24 & 0xff;

o[44] = x11 >>> 0 & 0xff;
o[45] = x11 >>> 8 & 0xff;
o[46] = x11 >>> 16 & 0xff;
o[47] = x11 >>> 24 & 0xff;

o[48] = x12 >>> 0 & 0xff;
o[49] = x12 >>> 8 & 0xff;
o[50] = x12 >>> 16 & 0xff;
o[51] = x12 >>> 24 & 0xff;

o[52] = x13 >>> 0 & 0xff;
o[53] = x13 >>> 8 & 0xff;
o[54] = x13 >>> 16 & 0xff;
o[55] = x13 >>> 24 & 0xff;

o[56] = x14 >>> 0 & 0xff;
o[57] = x14 >>> 8 & 0xff;
o[58] = x14 >>> 16 & 0xff;
o[59] = x14 >>> 24 & 0xff;

o[60] = x15 >>> 0 & 0xff;
o[61] = x15 >>> 8 & 0xff;
o[62] = x15 >>> 16 & 0xff;
o[63] = x15 >>> 24 & 0xff;
}

function core_hsalsa20(out,inp,k,c) {
var j0 = ld32(c, 0),
j1 = ld32(k, 0),
j2 = ld32(k, 4),
j3 = ld32(k, 8),
j4 = ld32(k, 12),
j5 = ld32(c, 4),
j6 = ld32(inp, 0),
j7 = ld32(inp, 4),
j8 = ld32(inp, 8),
j9 = ld32(inp, 12),
j10 = ld32(c, 8),
j11 = ld32(k, 16),
j12 = ld32(k, 20),
j13 = ld32(k, 24),
j14 = ld32(k, 28),
j15 = ld32(c, 12);
function core_hsalsa20(o,p,k,c) {
var j0 = c[ 0] & 0xff | (c[ 1] & 0xff)<<8 | (c[ 2] & 0xff)<<16 | (c[ 3] & 0xff)<<24,
j1 = k[ 0] & 0xff | (k[ 1] & 0xff)<<8 | (k[ 2] & 0xff)<<16 | (k[ 3] & 0xff)<<24,
j2 = k[ 4] & 0xff | (k[ 5] & 0xff)<<8 | (k[ 6] & 0xff)<<16 | (k[ 7] & 0xff)<<24,
j3 = k[ 8] & 0xff | (k[ 9] & 0xff)<<8 | (k[10] & 0xff)<<16 | (k[11] & 0xff)<<24,
j4 = k[12] & 0xff | (k[13] & 0xff)<<8 | (k[14] & 0xff)<<16 | (k[15] & 0xff)<<24,
j5 = c[ 4] & 0xff | (c[ 5] & 0xff)<<8 | (c[ 6] & 0xff)<<16 | (c[ 7] & 0xff)<<24,
j6 = p[ 0] & 0xff | (p[ 1] & 0xff)<<8 | (p[ 2] & 0xff)<<16 | (p[ 3] & 0xff)<<24,
j7 = p[ 4] & 0xff | (p[ 5] & 0xff)<<8 | (p[ 6] & 0xff)<<16 | (p[ 7] & 0xff)<<24,
j8 = p[ 8] & 0xff | (p[ 9] & 0xff)<<8 | (p[10] & 0xff)<<16 | (p[11] & 0xff)<<24,
j9 = p[12] & 0xff | (p[13] & 0xff)<<8 | (p[14] & 0xff)<<16 | (p[15] & 0xff)<<24,
j10 = c[ 8] & 0xff | (c[ 9] & 0xff)<<8 | (c[10] & 0xff)<<16 | (c[11] & 0xff)<<24,
j11 = k[16] & 0xff | (k[17] & 0xff)<<8 | (k[18] & 0xff)<<16 | (k[19] & 0xff)<<24,
j12 = k[20] & 0xff | (k[21] & 0xff)<<8 | (k[22] & 0xff)<<16 | (k[23] & 0xff)<<24,
j13 = k[24] & 0xff | (k[25] & 0xff)<<8 | (k[26] & 0xff)<<16 | (k[27] & 0xff)<<24,
j14 = k[28] & 0xff | (k[29] & 0xff)<<8 | (k[30] & 0xff)<<16 | (k[31] & 0xff)<<24,
j15 = c[12] & 0xff | (c[13] & 0xff)<<8 | (c[14] & 0xff)<<16 | (c[15] & 0xff)<<24;

var x0 = j0, x1 = j1, x2 = j2, x3 = j3, x4 = j4, x5 = j5, x6 = j6, x7 = j7,
x8 = j8, x9 = j9, x10 = j10, x11 = j11, x12 = j12, x13 = j13, x14 = j14,
Expand Down Expand Up @@ -292,14 +343,46 @@ function core_hsalsa20(out,inp,k,c) {
u = x14 + x13 | 0;
x15 ^= u<<18 | u>>>(32-18);
}
st32(out, 0, x0);
st32(out, 4, x5);
st32(out, 8, x10);
st32(out, 12, x15);
st32(out, 16, x6);
st32(out, 20, x7);
st32(out, 24, x8);
st32(out, 28, x9);

o[ 0] = x0 >>> 0 & 0xff;
o[ 1] = x0 >>> 8 & 0xff;
o[ 2] = x0 >>> 16 & 0xff;
o[ 3] = x0 >>> 24 & 0xff;

o[ 4] = x5 >>> 0 & 0xff;
o[ 5] = x5 >>> 8 & 0xff;
o[ 6] = x5 >>> 16 & 0xff;
o[ 7] = x5 >>> 24 & 0xff;

o[ 8] = x10 >>> 0 & 0xff;
o[ 9] = x10 >>> 8 & 0xff;
o[10] = x10 >>> 16 & 0xff;
o[11] = x10 >>> 24 & 0xff;

o[12] = x15 >>> 0 & 0xff;
o[13] = x15 >>> 8 & 0xff;
o[14] = x15 >>> 16 & 0xff;
o[15] = x15 >>> 24 & 0xff;

o[16] = x6 >>> 0 & 0xff;
o[17] = x6 >>> 8 & 0xff;
o[18] = x6 >>> 16 & 0xff;
o[19] = x6 >>> 24 & 0xff;

o[20] = x7 >>> 0 & 0xff;
o[21] = x7 >>> 8 & 0xff;
o[22] = x7 >>> 16 & 0xff;
o[23] = x7 >>> 24 & 0xff;

o[24] = x8 >>> 0 & 0xff;
o[25] = x8 >>> 8 & 0xff;
o[26] = x8 >>> 16 & 0xff;
o[27] = x8 >>> 24 & 0xff;

o[28] = x9 >>> 0 & 0xff;
o[29] = x9 >>> 8 & 0xff;
o[30] = x9 >>> 16 & 0xff;
o[31] = x9 >>> 24 & 0xff;
}

function crypto_core_salsa20(out,inp,k,c) {
Expand Down
Loading

0 comments on commit 1ea7e43

Please sign in to comment.