Skip to content

Commit

Permalink
Merge pull request openhwgroup#860 from pascalgouedo/dev_dd_pgo_rtl-b…
Browse files Browse the repository at this point in the history
…ug_fixes

Some Core RTL bug fixes and CVFPU 0.8.1 vendorization
  • Loading branch information
davideschiavone authored Aug 30, 2023
2 parents c36466b + f3a904f commit 9470ac1
Show file tree
Hide file tree
Showing 15 changed files with 184 additions and 128 deletions.
4 changes: 2 additions & 2 deletions bhv/cv32e40p_tb_wrapper.sv
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,7 @@ module cv32e40p_tb_wrapper
.apu_en_i (cv32e40p_top_i.apu_req),
.apu_singlecycle_i(cv32e40p_top_i.core_i.ex_stage_i.apu_singlecycle),
.apu_multicycle_i (cv32e40p_top_i.core_i.ex_stage_i.apu_multicycle),
.apu_rvalid_i (cv32e40p_top_i.apu_rvalid)
.apu_rvalid_i (cv32e40p_top_i.core_i.ex_stage_i.apu_valid)
);
`endif

Expand Down Expand Up @@ -344,7 +344,7 @@ module cv32e40p_tb_wrapper
// APU
.apu_req_i (cv32e40p_top_i.core_i.apu_req_o),
.apu_gnt_i (cv32e40p_top_i.core_i.apu_gnt_i),
.apu_rvalid_i(cv32e40p_top_i.core_i.apu_rvalid_i),
.apu_rvalid_i(cv32e40p_top_i.core_i.ex_stage_i.apu_valid),

// Controller FSM probes
.ctrl_fsm_cs_i(cv32e40p_top_i.core_i.id_stage_i.controller_i.ctrl_fsm_cs),
Expand Down
5 changes: 5 additions & 0 deletions rtl/cv32e40p_apu_disp.sv
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ module cv32e40p_apu_disp (
input logic [2:0][5:0] read_regs_i,
input logic [2:0] read_regs_valid_i,
output logic read_dep_o,
output logic read_dep_for_jalr_o,

input logic [1:0][5:0] write_regs_i,
input logic [1:0] write_regs_valid_i,
Expand Down Expand Up @@ -189,6 +190,10 @@ module cv32e40p_apu_disp (
assign read_dep_o = (read_dep_req | read_dep_inflight | read_dep_waiting) & is_decoding_i;
assign write_dep_o = (write_dep_req | write_dep_inflight | write_dep_waiting) & is_decoding_i;

assign read_dep_for_jalr_o = is_decoding_i & ((|read_deps_req & enable_i) |
(|read_deps_inflight & valid_inflight) |
(|read_deps_waiting & valid_waiting));

//
// Stall signals
//
Expand Down
9 changes: 7 additions & 2 deletions rtl/cv32e40p_controller.sv
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,8 @@
module cv32e40p_controller import cv32e40p_pkg::*;
#(
parameter COREV_CLUSTER = 0,
parameter COREV_PULP = 1
parameter COREV_PULP = 0,
parameter FPU = 0
)
(
input logic clk, // Gated clock
Expand Down Expand Up @@ -104,6 +105,7 @@ module cv32e40p_controller import cv32e40p_pkg::*;
// APU dependency checks
input logic apu_en_i,
input logic apu_read_dep_i,
input logic apu_read_dep_for_jalr_i,
input logic apu_write_dep_i,

output logic apu_stall_o,
Expand Down Expand Up @@ -1338,7 +1340,10 @@ endgenerate
if ((ctrl_transfer_insn_in_dec_i == BRANCH_JALR) &&
(((regfile_we_wb_i == 1'b1) && (reg_d_wb_is_reg_a_i == 1'b1)) ||
((regfile_we_ex_i == 1'b1) && (reg_d_ex_is_reg_a_i == 1'b1)) ||
((regfile_alu_we_fw_i == 1'b1) && (reg_d_alu_is_reg_a_i == 1'b1))) )
((regfile_alu_we_fw_i == 1'b1) && (reg_d_alu_is_reg_a_i == 1'b1)) ||
(FPU && (apu_read_dep_for_jalr_i == 1'b1))
)
)
begin
jr_stall_o = 1'b1;
deassert_we_o = 1'b1;
Expand Down
40 changes: 22 additions & 18 deletions rtl/cv32e40p_core.sv
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,7 @@ module cv32e40p_core
logic [ 2:0][ 5:0] apu_read_regs;
logic [ 2:0] apu_read_regs_valid;
logic apu_read_dep;
logic apu_read_dep_for_jalr;
logic [ 1:0][ 5:0] apu_write_regs;
logic [ 1:0] apu_write_regs_valid;
logic apu_write_dep;
Expand Down Expand Up @@ -361,7 +362,6 @@ module cv32e40p_core

// APU master signals
assign apu_flags_o = apu_flags_ex;
assign fflags_csr = apu_flags_i;

//////////////////////////////////////////////////////////////////////////////////////////////
// ____ _ _ __ __ _ //
Expand Down Expand Up @@ -621,14 +621,15 @@ module cv32e40p_core
.apu_flags_ex_o (apu_flags_ex),
.apu_waddr_ex_o (apu_waddr_ex),

.apu_read_regs_o (apu_read_regs),
.apu_read_regs_valid_o (apu_read_regs_valid),
.apu_read_dep_i (apu_read_dep),
.apu_write_regs_o (apu_write_regs),
.apu_write_regs_valid_o(apu_write_regs_valid),
.apu_write_dep_i (apu_write_dep),
.apu_perf_dep_o (perf_apu_dep),
.apu_busy_i (apu_busy),
.apu_read_regs_o (apu_read_regs),
.apu_read_regs_valid_o (apu_read_regs_valid),
.apu_read_dep_i (apu_read_dep),
.apu_read_dep_for_jalr_i(apu_read_dep_for_jalr),
.apu_write_regs_o (apu_write_regs),
.apu_write_regs_valid_o (apu_write_regs_valid),
.apu_write_dep_i (apu_write_dep),
.apu_perf_dep_o (perf_apu_dep),
.apu_busy_i (apu_busy),

// CSR ID/EX
.csr_access_ex_o (csr_access_ex),
Expand Down Expand Up @@ -779,23 +780,27 @@ module cv32e40p_core

.mult_multicycle_o(mult_multicycle), // to ID/EX pipe registers

.data_misaligned_ex_i(data_misaligned_ex), // from ID/EX pipeline
.data_misaligned_i (data_misaligned),

// FPU
.fpu_fflags_we_o(fflags_we),
.fpu_fflags_o (fflags_csr),

// APU
.apu_en_i (apu_en_ex),
.apu_op_i (apu_op_ex),
.apu_lat_i (apu_lat_ex),
.apu_operands_i(apu_operands_ex),
.apu_waddr_i (apu_waddr_ex),
.apu_flags_i (apu_flags_ex),

.apu_read_regs_i (apu_read_regs),
.apu_read_regs_valid_i (apu_read_regs_valid),
.apu_read_dep_o (apu_read_dep),
.apu_write_regs_i (apu_write_regs),
.apu_write_regs_valid_i(apu_write_regs_valid),
.apu_write_dep_o (apu_write_dep),
.apu_read_regs_i (apu_read_regs),
.apu_read_regs_valid_i (apu_read_regs_valid),
.apu_read_dep_o (apu_read_dep),
.apu_read_dep_for_jalr_o(apu_read_dep_for_jalr),
.apu_write_regs_i (apu_write_regs),
.apu_write_regs_valid_i (apu_write_regs_valid),
.apu_write_dep_o (apu_write_dep),

.apu_perf_type_o(perf_apu_type),
.apu_perf_cont_o(perf_apu_cont),
Expand All @@ -813,6 +818,7 @@ module cv32e40p_core
// response channel
.apu_rvalid_i (apu_rvalid_i),
.apu_result_i (apu_result_i),
.apu_flags_i (apu_flags_i),

.lsu_en_i (data_req_ex),
.lsu_rdata_i(lsu_rdata),
Expand Down Expand Up @@ -901,8 +907,6 @@ module cv32e40p_core
.data_misaligned_ex_i(data_misaligned_ex), // from ID/EX pipeline
.data_misaligned_o (data_misaligned),

.apu_busy_i(apu_busy),

.p_elw_start_o (p_elw_start),
.p_elw_finish_o(p_elw_finish),

Expand Down
152 changes: 92 additions & 60 deletions rtl/cv32e40p_ex_stage.sv
Original file line number Diff line number Diff line change
Expand Up @@ -76,20 +76,25 @@ module cv32e40p_ex_stage

output logic mult_multicycle_o,

input logic data_misaligned_ex_i,
input logic data_misaligned_i,

// FPU signals
output logic fpu_fflags_we_o,
output logic [APU_NUSFLAGS_CPU-1:0] fpu_fflags_o,

// APU signals
input logic apu_en_i,
input logic [ APU_WOP_CPU-1:0] apu_op_i,
input logic [ 1:0] apu_lat_i,
input logic [ APU_NARGS_CPU-1:0][31:0] apu_operands_i,
input logic [ 5:0] apu_waddr_i,
input logic [APU_NDSFLAGS_CPU-1:0] apu_flags_i,
input logic [APU_NUSFLAGS_CPU-1:0] apu_flags_i,

input logic [2:0][5:0] apu_read_regs_i,
input logic [2:0] apu_read_regs_valid_i,
output logic apu_read_dep_o,
output logic apu_read_dep_for_jalr_o,
input logic [1:0][5:0] apu_write_regs_i,
input logic [1:0] apu_write_regs_valid_i,
output logic apu_write_dep_o,
Expand Down Expand Up @@ -143,7 +148,7 @@ module cv32e40p_ex_stage
output logic branch_decision_o,

// Stall Control
input logic is_decoding_i, // Used to mask data Dependency inside the APU dispatcher in case of an istruction non valid
input logic is_decoding_i, // Used to mask data Dependency inside the APU dispatcher in case of an istruction non valid
input logic lsu_ready_ex_i, // EX part of LSU is done
input logic lsu_err_i,

Expand All @@ -152,29 +157,34 @@ module cv32e40p_ex_stage
input logic wb_ready_i // WB stage ready for new data
);

logic [31:0] alu_result;
logic [31:0] mult_result;
logic alu_cmp_result;
logic [ 31:0] alu_result;
logic [ 31:0] mult_result;
logic alu_cmp_result;

logic regfile_we_lsu;
logic [ 5:0] regfile_waddr_lsu;
logic regfile_we_lsu;
logic [ 5:0] regfile_waddr_lsu;

logic wb_contention;
logic wb_contention_lsu;
logic wb_contention;
logic wb_contention_lsu;

logic alu_ready;
logic mult_ready;
logic alu_ready;
logic mulh_active;
logic mult_ready;

// APU signals
logic apu_valid;
logic [ 5:0] apu_waddr;
logic [31:0] apu_result;
logic apu_stall;
logic apu_active;
logic apu_singlecycle;
logic apu_multicycle;
logic apu_req;
logic apu_gnt;
logic apu_valid;
logic [ 5:0] apu_waddr;
logic [ 31:0] apu_result;
logic apu_stall;
logic apu_active;
logic apu_singlecycle;
logic apu_multicycle;
logic apu_req;
logic apu_gnt;

logic apu_rvalid_q;
logic [ 31:0] apu_result_q;
logic [APU_NUSFLAGS_CPU-1:0] apu_flags_q;

// ALU write port mux
always_comb begin
Expand Down Expand Up @@ -295,9 +305,10 @@ module cv32e40p_ex_stage

.result_o(mult_result),

.multicycle_o(mult_multicycle_o),
.ready_o (mult_ready),
.ex_ready_i (ex_ready_o)
.multicycle_o (mult_multicycle_o),
.mulh_active_o(mulh_active),
.ready_o (mult_ready),
.ex_ready_i (ex_ready_o)
);

generate
Expand Down Expand Up @@ -326,13 +337,14 @@ module cv32e40p_ex_stage
.active_o(apu_active),
.stall_o (apu_stall),

.is_decoding_i (is_decoding_i),
.read_regs_i (apu_read_regs_i),
.read_regs_valid_i (apu_read_regs_valid_i),
.read_dep_o (apu_read_dep_o),
.write_regs_i (apu_write_regs_i),
.write_regs_valid_i(apu_write_regs_valid_i),
.write_dep_o (apu_write_dep_o),
.is_decoding_i (is_decoding_i),
.read_regs_i (apu_read_regs_i),
.read_regs_valid_i (apu_read_regs_valid_i),
.read_dep_o (apu_read_dep_o),
.read_dep_for_jalr_o(apu_read_dep_for_jalr_o),
.write_regs_i (apu_write_regs_i),
.write_regs_valid_i (apu_write_regs_valid_i),
.write_dep_o (apu_write_dep_o),

.perf_type_o(apu_perf_type_o),
.perf_cont_o(apu_perf_cont_o),
Expand All @@ -345,40 +357,60 @@ module cv32e40p_ex_stage
.apu_rvalid_i(apu_valid)
);

assign apu_perf_wb_o = wb_contention | wb_contention_lsu;
assign apu_ready_wb_o = ~(apu_active | apu_en_i | apu_stall) | apu_valid;
assign apu_perf_wb_o = wb_contention | wb_contention_lsu;
assign apu_ready_wb_o = ~(apu_active | apu_en_i | apu_stall) | apu_valid;

///////////////////////////////////////
// APU result memorization Register //
///////////////////////////////////////
always_ff @(posedge clk, negedge rst_n) begin : APU_Result_Memorization
if (~rst_n) begin
apu_rvalid_q <= 1'b0;
apu_result_q <= 'b0;
apu_flags_q <= 'b0;
end else begin
if (apu_rvalid_i && apu_multicycle && (data_misaligned_i || data_misaligned_ex_i || regfile_alu_we_i || (mulh_active && (mult_operator_i == MUL_H)))) begin
apu_rvalid_q <= 1'b1;
apu_result_q <= apu_result_i;
apu_flags_q <= apu_flags_i;
end else if (apu_rvalid_q && !(data_misaligned_i || data_misaligned_ex_i || regfile_alu_we_i || (mulh_active && (mult_operator_i == MUL_H)))) begin
apu_rvalid_q <= 1'b0;
end
end
end

assign apu_req_o = apu_req;
assign apu_gnt = apu_gnt_i;
assign apu_valid = apu_rvalid_i;
assign apu_operands_o = apu_operands_i;
assign apu_op_o = apu_op_i;
assign apu_result = apu_result_i;
assign apu_req_o = apu_req;
assign apu_gnt = apu_gnt_i;
assign apu_valid = (apu_multicycle && (data_misaligned_i || data_misaligned_ex_i || regfile_alu_we_i || (mulh_active && (mult_operator_i == MUL_H)))) ? 1'b0 : (apu_rvalid_i || apu_rvalid_q);
assign apu_operands_o = apu_operands_i;
assign apu_op_o = apu_op_i;
assign apu_result = apu_rvalid_q ? apu_result_q : apu_result_i;
assign fpu_fflags_we_o = apu_valid;
assign fpu_fflags_o = apu_rvalid_q ? apu_flags_q : apu_flags_i;
end else begin : gen_no_apu
// default assignements for the case when no FPU/APU is attached.
assign apu_req_o = '0;
assign apu_operands_o[0] = '0;
assign apu_operands_o[1] = '0;
assign apu_operands_o[2] = '0;
assign apu_op_o = '0;
assign apu_req = 1'b0;
assign apu_gnt = 1'b0;
assign apu_result = 32'b0;
assign apu_valid = 1'b0;
assign apu_waddr = 6'b0;
assign apu_stall = 1'b0;
assign apu_active = 1'b0;
assign apu_ready_wb_o = 1'b1;
assign apu_perf_wb_o = 1'b0;
assign apu_perf_cont_o = 1'b0;
assign apu_perf_type_o = 1'b0;
assign apu_singlecycle = 1'b0;
assign apu_multicycle = 1'b0;
assign apu_read_dep_o = 1'b0;
assign apu_write_dep_o = 1'b0;
assign fpu_fflags_we_o = 1'b0;

assign apu_req_o = '0;
assign apu_operands_o[0] = '0;
assign apu_operands_o[1] = '0;
assign apu_operands_o[2] = '0;
assign apu_op_o = '0;
assign apu_req = 1'b0;
assign apu_gnt = 1'b0;
assign apu_result = 32'b0;
assign apu_valid = 1'b0;
assign apu_waddr = 6'b0;
assign apu_stall = 1'b0;
assign apu_active = 1'b0;
assign apu_ready_wb_o = 1'b1;
assign apu_perf_wb_o = 1'b0;
assign apu_perf_cont_o = 1'b0;
assign apu_perf_type_o = 1'b0;
assign apu_singlecycle = 1'b0;
assign apu_multicycle = 1'b0;
assign apu_read_dep_o = 1'b0;
assign apu_read_dep_for_jalr_o = 1'b0;
assign apu_write_dep_o = 1'b0;
assign fpu_fflags_o = '0;
end
endgenerate

Expand Down
Loading

0 comments on commit 9470ac1

Please sign in to comment.