Skip to content

Commit

Permalink
Deal with padding in one place, in blockwise.
Browse files Browse the repository at this point in the history
We introduce two functions, to assist with processing
sequences of fixed bytes.  One processes a single byte a bunch of
times, the other does something more complicated.

We use this for all hashes, CMAC and CBCMAC.  This gives a good
performance improvement.
  • Loading branch information
ctz committed Sep 16, 2015
1 parent 8f2769c commit d62aa26
Show file tree
Hide file tree
Showing 11 changed files with 167 additions and 50 deletions.
67 changes: 67 additions & 0 deletions src/blockwise.c
Original file line number Diff line number Diff line change
Expand Up @@ -126,3 +126,70 @@ void cf_blockwise_xor(uint8_t *partial, size_t *npartial, size_t nblock,
inb += taken;
}
}

void cf_blockwise_acc_byte(uint8_t *partial, size_t *npartial,
size_t nblock,
uint8_t byte, size_t nbytes,
cf_blockwise_in_fn process,
void *ctx)
{
/* only memset the whole of the block once */
int filled = 0;

while (nbytes)
{
size_t start = *npartial;
size_t count = MIN(nbytes, nblock - start);

if (!filled)
memset(partial + start, byte, count);

if (start == 0 && count == nblock)
filled = 1;

if (start + count == nblock)
{
process(ctx, partial);
*npartial = 0;
} else {
*npartial += count;
}

nbytes -= count;
}
}

void cf_blockwise_acc_pad(uint8_t *partial, size_t *npartial,
size_t nblock,
uint8_t fbyte, uint8_t mbyte, uint8_t lbyte,
size_t nbytes,
cf_blockwise_in_fn process,
void *ctx)
{

switch (nbytes)
{
case 0: break;
case 1: fbyte ^= lbyte;
cf_blockwise_accumulate(partial, npartial, nblock, &fbyte, 1, process, ctx);
break;
case 2:
cf_blockwise_accumulate(partial, npartial, nblock, &fbyte, 1, process, ctx);
cf_blockwise_accumulate(partial, npartial, nblock, &lbyte, 1, process, ctx);
break;
default:
cf_blockwise_accumulate(partial, npartial, nblock, &fbyte, 1, process, ctx);

/* If the middle and last bytes differ, then process the last byte separately.
* Otherwise, just extend the middle block size. */
if (lbyte != mbyte)
{
cf_blockwise_acc_byte(partial, npartial, nblock, mbyte, nbytes - 2, process, ctx);
cf_blockwise_accumulate(partial, npartial, nblock, &lbyte, 1, process, ctx);
} else {
cf_blockwise_acc_byte(partial, npartial, nblock, mbyte, nbytes - 1, process, ctx);
}

break;
}
}
52 changes: 52 additions & 0 deletions src/blockwise.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,4 +92,56 @@ void cf_blockwise_xor(uint8_t *partial, size_t *npartial,
cf_blockwise_out_fn newblock,
void *ctx);

/* This function processes a single byte a number of times. It's useful
* for padding, and more efficient than calling cf_blockwise_accumulate
* a bunch of times.
*
* partial is the buffer (maintained by the caller)
* on entry, npartial is the currently valid count of used bytes on
* the front of partial.
* on exit, npartial is updated to reflect the status of partial.
* nblock is the blocksize to accumulate -- partial must be at least
* this long!
* process is the processing function, passed ctx and a pointer
* to the data to process (always exactly nblock bytes long!)
* which may not neccessarily be the same as partial.
* byte is the byte to process, nbytes times.
*/
void cf_blockwise_acc_byte(uint8_t *partial, size_t *npartial,
size_t nblock,
uint8_t byte, size_t nbytes,
cf_blockwise_in_fn process,
void *ctx);

/* This function attempts to process patterns of bytes common in
* block cipher padding.
*
* This takes three bytes:
* - a first byte, fbyte,
* - a middle byte, mbyte,
* - a last byte, lbyte.
*
* If nbytes is zero, nothing happens.
* If nbytes is one, the byte fbyte ^ lbyte is processed.
* If nbytes is two, the fbyte then lbyte are processed.
* If nbytes is three or more, fbyte, then one or more mbytes, then fbyte
* is processed.
*
* partial is the buffer (maintained by the caller)
* on entry, npartial is the currently valid count of used bytes on
* the front of partial.
* on exit, npartial is updated to reflect the status of partial.
* nblock is the blocksize to accumulate -- partial must be at least
* this long!
* process is the processing function, passed ctx and a pointer
* to the data to process (always exactly nblock bytes long!)
* which may not neccessarily be the same as partial.
*/
void cf_blockwise_acc_pad(uint8_t *partial, size_t *npartial,
size_t nblock,
uint8_t fbyte, uint8_t mbyte, uint8_t lbyte,
size_t nbytes,
cf_blockwise_in_fn process,
void *ctx);

#endif
15 changes: 13 additions & 2 deletions src/cbcmac.c
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,16 @@ void cf_cbcmac_stream_update(cf_cbcmac_stream *ctx, const uint8_t *data, size_t
ctx);
}

void cf_cbcmac_stream_finish_block_zero(cf_cbcmac_stream *ctx)
{
if (ctx->used == 0)
return;

memset(ctx->buffer + ctx->used, 0, ctx->prp->blocksz - ctx->used);
cbcmac_process(ctx, ctx->buffer);
ctx->used = 0;
}

void cf_cbcmac_stream_nopad_final(cf_cbcmac_stream *ctx, uint8_t out[CF_MAXBLOCK])
{
assert(ctx->used == 0);
Expand All @@ -62,7 +72,8 @@ void cf_cbcmac_stream_nopad_final(cf_cbcmac_stream *ctx, uint8_t out[CF_MAXBLOCK
void cf_cbcmac_stream_pad_final(cf_cbcmac_stream *ctx, uint8_t out[CF_MAXBLOCK])
{
uint8_t npad = ctx->prp->blocksz - ctx->used;
for (size_t i = 0; i < npad; i++)
cf_cbcmac_stream_update(ctx, &npad, 1);
cf_blockwise_acc_byte(ctx->buffer, &ctx->used, ctx->prp->blocksz,
npad, npad,
cbcmac_process, ctx);
cf_cbcmac_stream_nopad_final(ctx, out);
}
4 changes: 1 addition & 3 deletions src/ccm.c
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,7 @@ static void write_be(uint8_t *out, size_t value, size_t bytes)

static void zero_pad(cf_cbcmac_stream *cm)
{
const uint8_t zero_byte = 0;
while (cm->used != 0)
cf_cbcmac_stream_update(cm, &zero_byte, 1);
cf_cbcmac_stream_finish_block_zero(cm);
}

/* nb. block is general workspace. */
Expand Down
7 changes: 3 additions & 4 deletions src/cmac.c
Original file line number Diff line number Diff line change
Expand Up @@ -132,10 +132,9 @@ void cf_cmac_stream_update(cf_cmac_stream *ctx, const uint8_t *data, size_t len,
/* Input padding */
if (needpad)
{
uint8_t pad_block[CF_MAXBLOCK] = { 0x80 };
cf_blockwise_accumulate(ctx->buffer, &ctx->used, blocksz,
pad_block, blocksz - ctx->used,
cmac_process_final_pad, ctx);
cf_blockwise_acc_pad(ctx->buffer, &ctx->used, blocksz,
0x80, 0x00, 0x00, blocksz - ctx->used,
cmac_process_final_pad, ctx);
}
}

Expand Down
11 changes: 7 additions & 4 deletions src/gcm.c
Original file line number Diff line number Diff line change
Expand Up @@ -56,17 +56,20 @@ static void ghash_block(void *vctx, const uint8_t *data)
static void ghash_add(ghash_ctx *ctx, const uint8_t *buf, size_t n)
{
cf_blockwise_accumulate(ctx->buffer, &ctx->buffer_used,
16,
sizeof ctx->buffer,
buf, n,
ghash_block,
ctx);
}

static void ghash_add_pad(ghash_ctx *ctx)
{
uint8_t byte = 0x00;
while (ctx->buffer_used != 0)
ghash_add(ctx, &byte, 1);
if (ctx->buffer_used == 0)
return;

memset(ctx->buffer + ctx->buffer_used, 0, sizeof(ctx->buffer) - ctx->buffer_used);
ghash_block(ctx, ctx->buffer);
ctx->buffer_used = 0;
}

static void ghash_add_aad(ghash_ctx *ctx, const uint8_t *buf, size_t n)
Expand Down
5 changes: 5 additions & 0 deletions src/modes.h
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,11 @@ void cf_cbcmac_stream_reset(cf_cbcmac_stream *ctx);
* Process ndata bytes at data. */
void cf_cbcmac_stream_update(cf_cbcmac_stream *ctx, const uint8_t *data, size_t ndata);

/* .. c:function:: $DECL
* Finish the current block of data by adding zeroes. Does nothing if there
* are no bytes awaiting processing. */
void cf_cbcmac_stream_finish_block_zero(cf_cbcmac_stream *ctx);

/* .. c:function:: $DECL
* Output the MAC to ctx->prp->blocksz bytes at out.
* ctx->used must be zero: the inputed message must be an exact number of
Expand Down
13 changes: 5 additions & 8 deletions src/sha1.c
Original file line number Diff line number Diff line change
Expand Up @@ -116,18 +116,15 @@ void cf_sha1_digest_final(cf_sha1_context *ctx, uint8_t hash[CF_SHA1_HASHSZ])
digested_bytes = digested_bytes * CF_SHA1_BLOCKSZ + ctx->npartial;
uint64_t digested_bits = digested_bytes * 8;

size_t zeroes = CF_SHA1_BLOCKSZ - ((digested_bytes + 1 + 8) % CF_SHA1_BLOCKSZ);
size_t padbytes = CF_SHA1_BLOCKSZ - ((digested_bytes + 8) % CF_SHA1_BLOCKSZ);

/* Hash 0x80 00 ... block first. */
uint8_t buf[8];
buf[0] = 0x80;
buf[1] = 0x00;
cf_sha1_update(ctx, &buf[0], 1);

while (zeroes--)
cf_sha1_update(ctx, &buf[1], 1);
cf_blockwise_acc_pad(ctx->partial, &ctx->npartial, sizeof ctx->partial,
0x80, 0x00, 0x00, padbytes,
sha1_update_block, ctx);

/* Now hash length. */
uint8_t buf[8];
write64_be(digested_bits, buf);
cf_sha1_update(ctx, buf, 8);

Expand Down
13 changes: 5 additions & 8 deletions src/sha256.c
Original file line number Diff line number Diff line change
Expand Up @@ -172,18 +172,15 @@ void cf_sha256_digest_final(cf_sha256_context *ctx, uint8_t hash[CF_SHA256_HASHS
digested_bytes = digested_bytes * CF_SHA256_BLOCKSZ + ctx->npartial;
uint64_t digested_bits = digested_bytes * 8;

size_t zeroes = CF_SHA256_BLOCKSZ - ((digested_bytes + 1 + 8) % CF_SHA256_BLOCKSZ);
size_t padbytes = CF_SHA256_BLOCKSZ - ((digested_bytes + 8) % CF_SHA256_BLOCKSZ);

/* Hash 0x80 00 ... block first. */
uint8_t buf[8];
buf[0] = 0x80;
buf[1] = 0x00;
cf_sha256_update(ctx, &buf[0], 1);

while (zeroes--)
cf_sha256_update(ctx, &buf[1], 1);
cf_blockwise_acc_pad(ctx->partial, &ctx->npartial, sizeof ctx->partial,
0x80, 0x00, 0x00, padbytes,
sha256_update_block, ctx);

/* Now hash length. */
uint8_t buf[8];
write64_be(digested_bits, buf);
cf_sha256_update(ctx, buf, 8);

Expand Down
17 changes: 4 additions & 13 deletions src/sha3.c
Original file line number Diff line number Diff line change
Expand Up @@ -311,21 +311,12 @@ static void sha3_update(cf_sha3_context *ctx, const void *data, size_t nbytes)

static void pad(cf_sha3_context *ctx, uint8_t domain, size_t npad)
{
uint8_t padding[CF_SHA3_224_BLOCKSZ];

assert(npad >= 1);

if (npad == 1)
{
padding[0] = domain | 0x80;
sha3_update(ctx, padding, 1);
return;
}

memset(padding, 0, npad);
padding[0] = domain;
padding[npad - 1] = 0x80;
sha3_update(ctx, padding, npad);
cf_blockwise_acc_pad(ctx->partial, &ctx->npartial, ctx->rate,
domain, 0x00, 0x80,
npad,
sha3_block, ctx);
}

static void pad_and_squeeze(cf_sha3_context *ctx, uint8_t *out, size_t nout)
Expand Down
13 changes: 5 additions & 8 deletions src/sha512.c
Original file line number Diff line number Diff line change
Expand Up @@ -188,18 +188,15 @@ void cf_sha512_digest_final(cf_sha512_context *ctx, uint8_t hash[CF_SHA512_HASHS
digested_bytes = digested_bytes * CF_SHA512_BLOCKSZ + ctx->npartial;
uint64_t digested_bits = digested_bytes * 8;

size_t zeroes = CF_SHA512_BLOCKSZ - ((digested_bytes + 1 + 16) % CF_SHA512_BLOCKSZ);
size_t padbytes = CF_SHA512_BLOCKSZ - ((digested_bytes + 16) % CF_SHA512_BLOCKSZ);

/* Hash 0x80 00 ... block first. */
uint8_t buf[8];
buf[0] = 0x80;
buf[1] = 0x00;
cf_sha512_update(ctx, &buf[0], 1);

while (zeroes--)
cf_sha512_update(ctx, &buf[1], 1);
cf_blockwise_acc_pad(ctx->partial, &ctx->npartial, sizeof ctx->partial,
0x80, 0x00, 0x00, padbytes,
sha512_update_block, ctx);

/* Now hash length (this is 128 bits long). */
uint8_t buf[8];
write64_be(0, buf);
cf_sha512_update(ctx, buf, 8);
write64_be(digested_bits, buf);
Expand Down

0 comments on commit d62aa26

Please sign in to comment.