Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[aes] Also advance the masking PRNG when not used during processing #22844

Merged
merged 6 commits into from
Apr 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion hw/ip/aes/data/aes.hjson
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@
{ name: "RndCnstMaskingLfsrPerm",
type: "aes_pkg::masking_lfsr_perm_t",
desc: '''
Permutation applied to the concatenated LFSRs of the PRNG used for masking.
Permutation applied to the output of the PRNG used for masking.
'''
randcount: "160",
randtype: "perm"
Expand Down
2 changes: 0 additions & 2 deletions hw/ip/aes/pre_sca/alma/cpp/verilator_tb_aes_sbox.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,8 @@ int main(int argc, char **argv) {
tb.m_core.out_ack_i = 0;

tb.m_core.en_i = 0;
tb.m_core.prd_we_i = 1; // Present new PRD in next cycle.
tb.tick();
tb.m_core.en_i = 1;
tb.m_core.prd_we_i = 1; // Keep previous PRD.
tb.tick();

while (tb.m_core.out_req_o != 1) {
Expand Down
6 changes: 2 additions & 4 deletions hw/ip/aes/pre_sca/alma/cpp/verilator_tb_aes_sub_bytes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,9 @@ int main(int argc, char **argv) {
tb.m_core.out_ack_i = 3; // SP2V_HIGH, always ack
tb.m_core.op_i = 0; // encrypt

tb.m_core.en_i = 4; // SP2V_LOW, disable
tb.m_core.prd_we_i = 1; // Present new PRD in next cycle.
tb.m_core.en_i = 4; // SP2V_LOW, disable
tb.tick();
tb.m_core.en_i = 3; // SP2V_HIGH, enable
tb.m_core.prd_we_i = 0; // Keep previous PRD.
tb.m_core.en_i = 3; // SP2V_HIGH, enable
tb.tick();

while (tb.m_core.out_req_o != 3) {
Expand Down
34 changes: 18 additions & 16 deletions hw/ip/aes/rtl/aes_cipher_control_fsm.sv
Original file line number Diff line number Diff line change
Expand Up @@ -232,11 +232,11 @@ module aes_cipher_control_fsm import aes_pkg::*;
if (key_len_i != AES_256) begin
// Advance in sync with KeyExpand. Based on the S-Box implementation, it can take
// multiple cycles to finish. Wait for handshake. The DOM S-Boxes consume fresh PRD
// only in the first clock cycle. By requesting the PRNG update in any clock cycle
// other than the last one, the PRD fed into the DOM S-Boxes is guaranteed to be stable.
// This is better in terms of SCA resistance. Request the PRNG update in the first cycle.
// only in the first clock cycle and that PRD is taken from the buffer stage updated
// based on key_full_we_o. The PRNG itself is updated in every clock cycle to increase
// the noise.
advance = key_expand_out_req_i & cyc_ctr_expr;
prng_update_o = (SecSBoxImpl == SBoxImplDom) ? cyc_ctr_q == 3'd0 : SecMasking;
prng_update_o = SecMasking;
key_expand_en_o = 1'b1;
if (advance) begin
key_expand_out_ack_o = 1'b1;
Expand Down Expand Up @@ -275,12 +275,11 @@ module aes_cipher_control_fsm import aes_pkg::*;

// Advance in sync with SubBytes and KeyExpand. Based on the S-Box implementation, both can
// take multiple cycles to finish. Wait for handshake. The DOM S-Boxes consume fresh PRD
// only in the first clock cycle. By requesting the PRNG update in any clock cycle other
// than the last one, the PRD fed into the DOM S-Boxes is guaranteed to be stable. This is
// better in terms of SCA resistance. Request the PRNG update in the first cycle. Non-DOM
// S-Boxes need fresh PRD in every clock cycle.
// only in the first clock cycle and that PRD is taken from the buffer stages updated
// with state_we_o / based on key_full_we_o. The PRNG itself is updated in every clock
// cycle to increase the noise.
advance = key_expand_out_req_i & cyc_ctr_expr & (dec_key_gen_q_i | sub_bytes_out_req_i);
prng_update_o = (SecSBoxImpl == SBoxImplDom) ? cyc_ctr_q == 3'd0 : SecMasking;
prng_update_o = SecMasking;
sub_bytes_en_o = ~dec_key_gen_q_i;
key_expand_en_o = 1'b1;

Expand Down Expand Up @@ -357,14 +356,14 @@ module aes_cipher_control_fsm import aes_pkg::*;
cyc_ctr_d =
(SecSBoxImpl == SBoxImplDom) ? (!advance ? cyc_ctr_q + 3'd1 : cyc_ctr_q) : 3'd0;

// The DOM S-Boxes consume fresh PRD only in the first clock cycle. By requesting the PRNG
// update in any clock cycle other than the last one, the PRD fed into the DOM S-Boxes is
// guaranteed to be stable. This is better in terms of SCA resistance. Request the PRNG
// update in the first cycle. We update it only once and in the last cycle for non-DOM
// S-Boxes where otherwise updating the PRNG while being stalled would cause the S-Boxes
// to be re-evaluated, thereby creating additional SCA leakage.
// The DOM S-Boxes consume fresh PRD only in the first clock cycle and that PRD is taken
// from the buffer stages updated with state_we_o / based on key_full_we_o. The PRNG itself
// is updated in every but the last processing clock cycle to increase the noise. Once the
// processing is all done (e.g. if we're just waiting for the PRNG reseeding to finish or
// if we're waiting for out_ready_i), the PRNG is no longer updated to save power. In the
// very last clock cycle, we update the PRNG again to get ready for the next block.
prng_update_o =
(SecSBoxImpl == SBoxImplDom) ? cyc_ctr_q == 3'd0 : out_valid_o & out_ready_i;
((SecSBoxImpl == SBoxImplDom) ? !advance : 1'b0) | (out_valid_o & out_ready_i);

if (out_valid_o && out_ready_i) begin
sub_bytes_out_ack_o = ~dec_key_gen_q_i;
Expand All @@ -385,6 +384,9 @@ module aes_cipher_control_fsm import aes_pkg::*;
// Keep requesting PRNG reseeding until it is acknowledged.
prng_reseed_req_o = prng_reseed_q_i & ~prng_reseed_done_q;

// Don't update the cycle counter as we don't need it.
cyc_ctr_d = 3'd0;

// Once we're done, wait for handshake.
out_valid_o = prng_reseed_done_q;
if (out_valid_o && out_ready_i) begin
Expand Down
63 changes: 41 additions & 22 deletions hw/ip/aes/rtl/aes_cipher_core.sv
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,6 @@ module aes_cipher_core import aes_pkg::*;
logic state_sel_err;

sp2v_e sub_bytes_en;
logic sub_bytes_prd_we;
sp2v_e sub_bytes_out_req;
sp2v_e sub_bytes_out_ack;
logic sub_bytes_err;
Expand Down Expand Up @@ -225,7 +224,8 @@ module aes_cipher_core import aes_pkg::*;

// Pseudo-random data for masking purposes
logic [WidthPRDMasking-1:0] prd_masking;
logic [3:0][3:0][WidthPRDSBox-1:0] prd_sub_bytes;
logic [3:0][3:0][WidthPRDSBox-1:0] prd_sub_bytes_d;
logic [3:0][3:0][WidthPRDSBox-1:0] prd_sub_bytes_q;
logic [WidthPRDKey-1:0] prd_key_expand;
logic prd_masking_upd;
logic prd_masking_rsd_req;
Expand Down Expand Up @@ -316,21 +316,45 @@ module aes_cipher_core import aes_pkg::*;
// Extract randomness for key expand module and SubBytes.
//
// The masking PRNG output has the following shape:
// prd_masking = { prd_key_expand, prd_sub_bytes }
assign prd_key_expand = prd_masking[WidthPRDMasking-1 -: WidthPRDKey];
assign prd_sub_bytes = prd_masking[WidthPRDData-1 -: WidthPRDData];
// prd_masking = { prd_key_expand, prd_sub_bytes_d }
assign prd_key_expand = prd_masking[WidthPRDMasking-1 -: WidthPRDKey];
assign prd_sub_bytes_d = prd_masking[WidthPRDData-1 -: WidthPRDData];

// PRD buffering
if (!SecMasking) begin : gen_no_prd_buffer
// The masks are ignored anyway.
assign prd_sub_bytes_q = prd_sub_bytes_d;

end else begin : gen_prd_buffer
// PRD buffer stage to:
// 1. Make sure the S-Boxes get always presented new data/mask inputs together with fresh PRD
// for remasking.
// 2. Prevent glitches originating from inside the masking PRNG from propagating into the
// masked S-Boxes.
always_ff @(posedge clk_i or negedge rst_ni) begin : prd_sub_bytes_reg
if (!rst_ni) begin
prd_sub_bytes_q <= '0;
end else if (state_we == SP2V_HIGH) begin
prd_sub_bytes_q <= prd_sub_bytes_d;
end
end
andreaskurth marked this conversation as resolved.
Show resolved Hide resolved
end

// Convert the 3-dimensional prd_sub_bytes_q array to a 1-dimensional packed array for the
// aes_prd_get_lsbs() function used below.
logic [WidthPRDData-1:0] prd_sub_bytes;
assign prd_sub_bytes = prd_sub_bytes_q;

// Extract randomness for masking the input data.
//
// The masking PRNG is used for generating both the PRD for the S-Boxes/SubBytes operation as
// well as for the input data masks. When using any of the masked Canright S-Box implementations,
// it is important that the SubBytes input masks (generated by the PRNG in Round X-1) and the
// SubBytes output masks (generated by the PRNG in Round X) are independent. Inside the PRNG,
// this is achieved by using multiple, separately re-seeded LFSR chunks and by selecting the
// separate LFSR chunks in alternating fashion. Since the input data masks become the SubBytes
// input masks in the first round, we select the same 8 bit lanes for the input data masks which
// are also used to form the SubBytes output mask for the masked Canright S-Box implementations,
// i.e., the 8 LSBs of the per S-Box PRD. In particular, we have:
// SubBytes output masks (generated by the PRNG in Round X) are independent. This can be achieved
// by using e.g. an unrolled Bivium stream cipher primitive inside the PRNG. Since the input data
// masks become the SubBytes input masks in the first round, we select the same 8 bit lanes for
// the input data masks which are also used to form the SubBytes output mask for the masked
// Canright S-Box implementations, i.e., the 8 LSBs of the per S-Box PRD. In particular, we have:
//
// prd_masking = { prd_key_expand, ... , sb_prd[4], sb_out_mask[4], sb_prd[0], sb_out_mask[0] }
//
Expand All @@ -340,35 +364,30 @@ module aes_cipher_core import aes_pkg::*;
//
// When using a masked S-Box implementation other than Canright, we still select the 8 LSBs of
// the per-S-Box PRD to form the input data mask of the corresponding byte. We do this to
// distribute the input data masks over all LFSR chunks of the masking PRNG. We do the extraction
// on a row basis.
// distribute the input data masks over all output bits the masking PRNG. We do the extraction on
// a row basis.
localparam int unsigned WidthPRDRow = 4*WidthPRDSBox;
for (genvar i = 0; i < 4; i++) begin : gen_in_mask
assign data_in_mask[i] = aes_prd_get_lsbs(prd_masking[i * WidthPRDRow +: WidthPRDRow]);
assign data_in_mask[i] = aes_prd_get_lsbs(prd_sub_bytes[i * WidthPRDRow +: WidthPRDRow]);
end

// Rotate the data input masks by two LFSR chunks to ensure the data input masks are independent
// Rotate the data input masks by 64 bits to ensure the data input masks are independent
// from the PRD fed to the S-Boxes/SubBytes operation.
assign data_in_mask_o = {data_in_mask[1], data_in_mask[0], data_in_mask[3], data_in_mask[2]};

// Make sure that whenever the data/mask inputs of the S-Boxes update, the internally buffered
// PRD is updated in sync.
assign sub_bytes_prd_we = (state_we == SP2V_HIGH) ? 1'b1 : 1'b0;

// Cipher data path
aes_sub_bytes #(
.SecSBoxImpl ( SecSBoxImpl )
) u_aes_sub_bytes (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.en_i ( sub_bytes_en ),
.prd_we_i ( sub_bytes_prd_we ),
.out_req_o ( sub_bytes_out_req ),
.out_ack_i ( sub_bytes_out_ack ),
.op_i ( op_i ),
.data_i ( state_q[0] ),
.mask_i ( sb_in_mask ),
.prd_i ( prd_sub_bytes ),
.prd_i ( prd_sub_bytes_q ),
.data_o ( sub_bytes_out ),
.mask_o ( sb_out_mask ),
.err_o ( sub_bytes_err )
Expand Down Expand Up @@ -821,7 +840,7 @@ module aes_cipher_core import aes_pkg::*;
sub_bytes_en == SP2V_HIGH && ($past(sub_bytes_en) == SP2V_LOW ||
($past(sub_bytes_out_req) == SP2V_HIGH &&
$past(sub_bytes_out_ack) == SP2V_HIGH)) |=>
$past(prd_sub_bytes) != $past(prd_sub_bytes, NumCyclesPerRound + 1) ||
$past(prd_sub_bytes_q) != $past(prd_sub_bytes_q, NumCyclesPerRound + 1) ||
SecAllowForcingMasks && force_masks_i)

// Ensure that the PRNG has been updated between masking the input and starting the first
Expand Down
49 changes: 37 additions & 12 deletions hw/ip/aes/rtl/aes_key_expand.sv
Original file line number Diff line number Diff line change
Expand Up @@ -207,15 +207,8 @@ module aes_key_expand import aes_pkg::*;
assign sw_in_mask = use_rot_word ? rot_word_out[1] : rot_word_in[1];
end

// SubWord - individually substitute bytes.
// Every DOM S-Box instance consumes 28 bits of randomness but itself produces 20 bits for use in
// another S-Box instance. For other S-Box implementations, only the bits corresponding to prd_i
// are used. Other bits are ignored and tied to 0.
logic [3:0][WidthPRDSBox+19:0] in_prd;
logic [3:0] [19:0] out_prd;

// Make sure that whenever the data/mask inputs of the S-Boxes update, the internally buffered
// PRD is updated in sync. There are two special cases we need to handle here:
// Make sure that whenever the data/mask inputs of the S-Boxes update, the buffered PRD is
// updated in sync. There are two special cases we need to handle here:
// - For AES-256, the initial round is short (no round key computation). But the data/mask inputs
// are updated either way. Thus, we need to force a PRD update as well.
// - For AES-192 in FWD mode, the data/mask inputs aren't updated in Round 1, 4, 7 and 10. Thus,
Expand All @@ -225,18 +218,50 @@ module aes_key_expand import aes_pkg::*;
(rnd == 0 || rnd == 3 || rnd == 6 || rnd == 9);
assign prd_we = (prd_we_i & ~prd_we_inhibit) | prd_we_force;

// PRD buffering
logic [WidthPRDKey-1:0] prd_q;

if (!SecMasking) begin : gen_no_prd_buffer
// The masks are ignored anyway.
assign prd_q = prd_i;

// Tie-off unused signals.
logic unused_prd_we;
assign unused_prd_we = prd_we;

end else begin : gen_prd_buffer
// PRD buffer stage to:
// 1. Make sure the S-Boxes get always presented new data/mask inputs together with fresh PRD
// for remasking.
// 2. Prevent glitches originating from inside the masking PRNG from propagating into the
// masked S-Boxes.
always_ff @(posedge clk_i or negedge rst_ni) begin : prd_reg
if (!rst_ni) begin
prd_q <= '0;
end else if (prd_we) begin
prd_q <= prd_i;
end
end
andreaskurth marked this conversation as resolved.
Show resolved Hide resolved
end

// SubWord - individually substitute bytes.
// Every DOM S-Box instance consumes 28 bits of randomness but itself produces 20 bits for use in
// another S-Box instance. For other S-Box implementations, only the bits corresponding to prd_q
// are used. Other bits are ignored and tied to 0.
logic [3:0][WidthPRDSBox+19:0] in_prd;
logic [3:0] [19:0] out_prd;

for (genvar i = 0; i < 4; i++) begin : gen_sbox
// Rotate the randomness produced by the S-Boxes. The LSBs are taken from the masking PRNG
// (prd_i) whereas the MSBs are produced by the other S-Box instances.
assign in_prd[i] = {out_prd[aes_rot_int(i,4)], prd_i[WidthPRDSBox*i +: WidthPRDSBox]};
// (prd_q) whereas the MSBs are produced by the other S-Box instances.
assign in_prd[i] = {out_prd[aes_rot_int(i,4)], prd_q[WidthPRDSBox*i +: WidthPRDSBox]};

aes_sbox #(
.SecSBoxImpl ( SecSBoxImpl )
) u_aes_sbox_i (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.en_i ( en == SP2V_HIGH ),
.prd_we_i ( prd_we ),
.out_req_o ( sub_word_out_req[i] ),
.out_ack_i ( out_ack == SP2V_HIGH ),
.op_i ( CIPH_FWD ),
Expand Down
13 changes: 6 additions & 7 deletions hw/ip/aes/rtl/aes_pkg.sv
Original file line number Diff line number Diff line change
Expand Up @@ -554,12 +554,11 @@ endfunction
// The masking PRNG is used for generating both the PRD for the S-Boxes/SubBytes operation as
// well as for the input data masks. When using any of the masked Canright S-Box implementations,
// it is important that the SubBytes input masks (generated by the PRNG in Round X-1) and the
// SubBytes output masks (generated by the PRNG in Round X) are independent. Inside the PRNG,
// this is achieved by using multiple, separately re-seeded LFSR chunks and by selecting the
// separate LFSR chunks in alternating fashion. Since the input data masks become the SubBytes
// input masks in the first round, we select the same 8 bit lanes for the input data masks which
// are also used to form the SubBytes output mask for the masked Canright S-Box implementations,
// i.e., the 8 LSBs of the per S-Box PRD. In particular, we have:
// SubBytes output masks (generated by the PRNG in Round X) are independent. This can be achieved
// by using e.g. an unrolled Bivium stream cipher primitive inside the PRNG. Since the input data
// masks become the SubBytes input masks in the first round, we select the same 8 bit lanes for the
// input data masks which are also used to form the SubBytes output mask for the masked Canright
// S-Box implementations, i.e., the 8 LSBs of the per S-Box PRD. In particular, we have:
//
// prng_output = { prd_key_expand, ... , sb_prd[4], sb_out_mask[4], sb_prd[0], sb_out_mask[0] }
//
Expand All @@ -569,7 +568,7 @@ endfunction
//
// When using a masked S-Box implementation other than Canright, we still select the 8 LSBs of
// the per-S-Box PRD to form the input data mask of the corresponding byte. We do this to
// distribute the input data masks over all LFSR chunks of the masking PRNG.
// distribute the input data masks over all output bits the masking PRNG.

// For one row of the state matrix, extract the 8 LSBs of the per-S-Box PRD from the PRNG output.
// These bits are used as:
Expand Down
2 changes: 0 additions & 2 deletions hw/ip/aes/rtl/aes_reduced_round.sv
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ module aes_reduced_round import aes_pkg::*;
input logic clk_i,
input logic rst_ni,
input sp2v_e en_i,
input logic prd_we_i,
output sp2v_e out_req_o,
input sp2v_e out_ack_i,
input ciph_op_e op_i,
Expand Down Expand Up @@ -41,7 +40,6 @@ module aes_reduced_round import aes_pkg::*;
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.en_i ( en_i ),
.prd_we_i ( prd_we_i ),
.out_req_o ( out_req_o ),
.out_ack_i ( out_ack_i ),
.op_i ( op_i ),
Expand Down
4 changes: 0 additions & 4 deletions hw/ip/aes/rtl/aes_sbox.sv
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ module aes_sbox import aes_pkg::*;
input logic clk_i,
input logic rst_ni,
input logic en_i,
input logic prd_we_i,
output logic out_req_o,
input logic out_ack_i,
input ciph_op_e op_i,
Expand Down Expand Up @@ -73,7 +72,6 @@ module aes_sbox import aes_pkg::*;
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.en_i ( en_i ),
.prd_we_i ( prd_we_i ),
.out_req_o ( out_req_o ),
.out_ack_i ( out_ack_i ),
.op_i ( op_i ),
Expand Down Expand Up @@ -137,9 +135,7 @@ module aes_sbox import aes_pkg::*;
if (SBoxSingleCycle) begin : gen_req_singlecycle
// Tie off unused inputs.
logic unused_out_ack;
logic unused_prd_we;
assign unused_out_ack = out_ack_i;
assign unused_prd_we = prd_we_i;

// Signal that we have valid output right away.
assign out_req_o = en_i;
Expand Down
Loading