Skip to content

Commit

Permalink
Integration step openhwgroup#1
Browse files Browse the repository at this point in the history
  • Loading branch information
rohan-10xe committed May 9, 2024
1 parent 43ff606 commit 4d81cbd
Show file tree
Hide file tree
Showing 11 changed files with 1,122 additions and 9 deletions.
7 changes: 7 additions & 0 deletions cv32e40p_fpu_manifest.flist
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,13 @@ ${DESIGN_RTL_DIR}/vendor/pulp_platform_fpnew/src/fpnew_opgroup_fmt_slice.sv
${DESIGN_RTL_DIR}/vendor/pulp_platform_fpnew/src/fpnew_opgroup_multifmt_slice.sv
${DESIGN_RTL_DIR}/vendor/pulp_platform_fpnew/src/fpnew_opgroup_block.sv
${DESIGN_RTL_DIR}/vendor/pulp_platform_fpnew/src/fpnew_top.sv
${DESIGN_RTL_DIR}/vendor/bf16_acc/bf16fp32conv.sv
${DESIGN_RTL_DIR}/vendor/bf16_acc/fp32bf16conv.sv
${DESIGN_RTL_DIR}/vendor/bf16_acc/bf16_conv_top.sv
${DESIGN_RTL_DIR}/vendor/bf16_acc/bf16_maxmin.sv
${DESIGN_RTL_DIR}/vendor/bf16_acc/lzc.sv
${DESIGN_RTL_DIR}/vendor/bf16_acc/bf16_fma.sv
${DESIGN_RTL_DIR}/vendor/bf16_acc/bf16_accelerator_top.sv
${DESIGN_RTL_DIR}/cv32e40p_fp_wrapper.sv

${DESIGN_RTL_DIR}/cv32e40p_top.sv
Expand Down
51 changes: 45 additions & 6 deletions rtl/cv32e40p_decoder.sv
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ module cv32e40p_decoder
parameter COREV_PULP = 1, // PULP ISA Extension (including PULP specific CSRs and hardware loop, excluding cv.elw)
parameter COREV_CLUSTER = 0, // PULP ISA Extension cv.elw (need COREV_PULP = 1)
parameter A_EXTENSION = 0,
parameter FPU = 0,
parameter FPU = 1,
parameter FPU_ADDMUL_LAT = 0,
parameter FPU_OTHERS_LAT = 0,
parameter ZFINX = 0,
Expand Down Expand Up @@ -1135,13 +1135,20 @@ module cv32e40p_decoder
fpu_op = cv32e40p_fpu_pkg::F2F;
fp_op_group = CONV;
// bits [22:20] used, other bits must be 0
if (instr_rdata_i[24:23]) illegal_insn_o = 1'b1;
// if (instr_rdata_i[24:23]) illegal_insn_o = 1'b1;
// check source format
unique case (instr_rdata_i[22:20])
// Only process instruction if corresponding extension is active (static)
3'b000: begin
if (!(C_RVF && (C_XF16 || C_XF16ALT || C_XF8))) illegal_insn_o = 1'b1;
fpu_src_fmt_o = cv32e40p_fpu_pkg::FP32;
if(~C_BF16) begin
if (!(C_RVF && (C_XF16 || C_XF16ALT || C_XF8))) illegal_insn_o = 1'b1;
fpu_src_fmt_o = cv32e40p_fpu_pkg::FP32;
end
else begin
fpu_src_fmt_o = cv32e40p_fpu_pkg::BF16;
// set dst format to FP32
fpu_dst_fmt_o = cv32e40p_fpu_pkg::FP32;
end
end
3'b001: begin
if (~C_RVD) illegal_insn_o = 1'b1;
Expand All @@ -1152,8 +1159,15 @@ module cv32e40p_decoder
fpu_src_fmt_o = cv32e40p_fpu_pkg::FP16;
end
3'b110: begin
if (~C_XF16ALT) illegal_insn_o = 1'b1;
fpu_src_fmt_o = cv32e40p_fpu_pkg::FP16ALT;
if(~C_BF16) begin
if (~C_XF16ALT) illegal_insn_o = 1'b1;
fpu_src_fmt_o = cv32e40p_fpu_pkg::FP16ALT;
end
else begin
fpu_src_fmt_o = cv32e40p_fpu_pkg::FP32;
// set dst format to FP32
fpu_dst_fmt_o = cv32e40p_fpu_pkg::BF16;
end
end
3'b011: begin
if (~C_XF8) illegal_insn_o = 1'b1;
Expand Down Expand Up @@ -1185,6 +1199,31 @@ module cv32e40p_decoder
// set dst format to FP32
fpu_dst_fmt_o = cv32e40p_fpu_pkg::FP32;
end
// bf16_min/max
5'b01101, 5'b01111: begin
if(C_BF16)begin
unique case (instr_rdata_i[28])
1'b0: begin
fpu_op = cv32e40p_fpu_pkg::MINMAX;
fp_rnd_mode_o = 3'b000; // min
fp_op_group = NONCOMP;
check_fprm = 1'b0; // instruction encoded in rm
fpu_dst_fmt_o = cv32e40p_fpu_pkg::BF16;
fpu_src_fmt_o = cv32e40p_fpu_pkg::BF16;
end
1'b1: begin
fpu_op = cv32e40p_fpu_pkg::MINMAX;
fp_rnd_mode_o = 3'b001; // max
fp_op_group = NONCOMP;
check_fprm = 1'b0; // instruction encoded in rm
fpu_dst_fmt_o = cv32e40p_fpu_pkg::BF16;
fpu_src_fmt_o = cv32e40p_fpu_pkg::BF16;
end
endcase
end
else
illegal_insn_o = 1;
end
// feq/flt/fle.fmt - FP Comparisons
5'b10100: begin
fpu_op = cv32e40p_fpu_pkg::CMP;
Expand Down
5 changes: 3 additions & 2 deletions rtl/include/cv32e40p_fpu_pkg.sv
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ package cv32e40p_fpu_pkg;
// *NOTE:* Add new formats only at the end of the enumeration for backwards compatibilty!


localparam int unsigned NUM_FP_FORMATS = 5; // change me to add formats
localparam int unsigned NUM_FP_FORMATS = 6; // change me to add formats
localparam int unsigned FP_FORMAT_BITS = $clog2(NUM_FP_FORMATS);

// FP formats
Expand All @@ -59,7 +59,8 @@ package cv32e40p_fpu_pkg;
FP64 = 'd1,
FP16 = 'd2,
FP8 = 'd3,
FP16ALT = 'd4
FP16ALT = 'd4,
BF16 = 'd5
// add new formats here
} fp_format_e;

Expand Down
4 changes: 3 additions & 1 deletion rtl/include/cv32e40p_pkg.sv
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ package cv32e40p_pkg;
parameter OPCODE_JAL = 7'h6f;
parameter OPCODE_AUIPC = 7'h17;
parameter OPCODE_LUI = 7'h37;
parameter OPCODE_OP_FP = 7'h53;
parameter OPCODE_OP_FP = 7'h53; // to be used for bf16_min, bf16_max, bf16_fp32_conv, and fp32_bf16_conv instr
parameter OPCODE_OP_FMADD = 7'h43;
parameter OPCODE_OP_FNMADD = 7'h4f;
parameter OPCODE_OP_FMSUB = 7'h47;
Expand Down Expand Up @@ -771,6 +771,7 @@ package cv32e40p_pkg;
parameter bit C_XF16 = 1'b0; // Is half-precision float extension (Xf16) enabled
parameter bit C_XF16ALT = 1'b0; // Is alternative half-precision float extension (Xf16alt) enabled
parameter bit C_XF8 = 1'b0; // Is quarter-precision float extension (Xf8) enabled
parameter bit C_BF16 = 1'b1; // Is half-precision float extension (bf16) enabled
parameter bit C_XFVEC = 1'b0; // Is vectorial float extension (Xfvec) enabled

// Latency of FP operations: 0 = no pipe registers, 1 = 1 pipe register etc.
Expand All @@ -790,6 +791,7 @@ package cv32e40p_pkg;
C_RVF ? 32 : // F ext.
C_XF16 ? 16 : // Xf16 ext.
C_XF16ALT ? 16 : // Xf16alt ext.
C_BF16 ? 16 : // bf16 ext.
C_XF8 ? 8 : // Xf8 ext.
0; // Unused in case of no FP

Expand Down
132 changes: 132 additions & 0 deletions rtl/vendor/bf16_acc/bf16_accelerator_top.sv
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
`timescale 1ns / 1ps
//////////////////////////////////////////////////////////////////////////////////
// Company:
// Engineer:
//
// Create Date: 12/06/2023 03:56:06 PM
// Design Name:
// Module Name: acc_top
// Project Name:
// Target Devices:
// Tool Versions:
// Description:
//
// Dependencies:
//
// Revision:
// Revision 0.01 - File Created
// Additional Comments:
//
//////////////////////////////////////////////////////////////////////////////////


module bf16_accelerator_top(
input logic clk,
input logic reset,
input logic enable, // Enable signal for the accelerator
input logic [31:0] operand_a, // First operand
input logic [15:0] operand_b, // Second operand
input logic [31:0] operand_c, // Third operand for FMA operations
input logic [3:0] operation, // Operation type
output logic [31:0] result, // Result of the operation
output logic [3:0] fpcsr, // Floating-point control and status register
output logic valid // Output valid signal
);

// Internal enable signals for submodules
logic conv_enable, maxmin_enable, addmul_enable;

// Internal result and FPCSR signals from submodules
logic [15:0] maxmin_result;
logic [31:0] conv_result, addmul_result;
logic [3:0] conv_fpcsr, maxmin_fpcsr, addmul_fpcsr;

//Instantiate the conversion module
bf16_conversion bf16_fp32_conversion_inst (
.clk(clk),
.reset(reset),
.enable(conv_enable),
.operation(operation), // Pass the operation code
.operand(operand_a), // Pass the operand
.result(conv_result), // Receive the result
.fpcsr(conv_fpcsr) // Receive the FPCSR status
);

// Instantiate the max/min module
bf16_minmax maxmin_module (
.clk(clk),
.reset(reset),
.enable(maxmin_enable),
.operand_a(operand_a[15:0]),
.operand_b(operand_b[15:0]),
.operation(operation),
.result(maxmin_result),
.fpcsr(maxmin_fpcsr)
);

// Instantiate the add/mul module
bf16_fma_op addmul_module (
.clk(clk),
.reset(reset),
.en(addmul_enable),
.op_a(operand_a),
.op_b(operand_b),
.op_c(operand_c),
.oper(operation),
.result(addmul_result),
.fpcsr(addmul_fpcsr)
);



// // Conversion Operations
// 4'b0000: conv_enable = 1; // BF16 to FP32 Conversion
// 4'b0001: conv_enable = 1; // FP32 to BF16 Conversion

// // Max/Min Operations
// 4'b0010: maxmin_enable = 1; // Max
// 4'b0011: maxmin_enable = 1; // Min

// // Add/Mul Operations
// 4'b0100: addmul_enable = 1; // Add
// 4'b0101: addmul_enable = 1; // Mul
// 4'b0110: addmul_enable = 1; // Sub
// 4'b0111: addmul_enable = 1; // Fused Multiply-Add (FMADD)
// 4'b1000: addmul_enable = 1; // Fused Multiply-Subtract (FMSUB)
// 4'b1001: addmul_enable = 1; // Fused Negative Multiply-Add (FMNADD)
// 4'b1010: addmul_enable = 1; // Fused Negative Multiply-Subtract (FMNSUB)


assign conv_enable = !operation[3] & !operation[2] & !operation[1];
assign maxmin_enable = !operation[3] & !operation[2] & operation[1];
assign addmul_enable = operation[3] | operation[2];
assign result = ({32{conv_enable}} & conv_result) | ({32{maxmin_enable}} & maxmin_result) | ({32{addmul_enable}} & addmul_result);
assign fpcsr = ({32{conv_enable}} & conv_fpcsr) | ({32{maxmin_enable}} & maxmin_fpcsr) | ({32{addmul_enable}} & addmul_fpcsr);
assign valid = enable && (conv_enable || maxmin_enable || addmul_enable);
// Result and FPCSR aggregation
//always @(posedge clk) begin
// valid = enable && (conv_enable || maxmin_enable || addmul_enable);
// conv_enable = !operation[3] & !operation[2] & !operation[1];
// maxmin_enable = !operation[3] & !operation[2] & operation[1];
// addmul_enable = operation[3] | operation[2];
// if (!reset) begin
// if (conv_enable) begin
// result = conv_result;
// fpcsr = conv_fpcsr;
// end else if (maxmin_enable) begin
// result = maxmin_result;
// fpcsr = maxmin_fpcsr;
// end else if (addmul_enable) begin
// result = addmul_result;
// fpcsr = addmul_fpcsr;
// end
// end else begin
// result = 32'h0;
// fpcsr = 4'h0;
// end
//end



endmodule

108 changes: 108 additions & 0 deletions rtl/vendor/bf16_acc/bf16_conv_top.sv
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
`timescale 1ns / 1ps
//////////////////////////////////////////////////////////////////////////////////
// Company:
// Engineer:
//
// Create Date: 12/06/2023 06:19:26 PM
// Design Name:
// Module Name: conversion_bf16
// Project Name:
// Target Devices:
// Tool Versions:
// Description:
//
// Dependencies:
//
// Revision:
// Revision 0.01 - File Created
// Additional Comments:
//
//////////////////////////////////////////////////////////////////////////////////


module bf16_conversion(
input logic clk,
input logic reset,
input logic enable, // Enable signal for conversion operations
input logic [3:0] operation, // 4-bit operation code
input logic [31:0] operand, // Universal operand for both BF16 and FP32
output logic [31:0] result, // Result, either BF16 or FP32
output logic [3:0] fpcsr // Floating Point Control and Status Register
);

// Define operation codes for conversions
localparam BF16_TO_FP32_OP = 4'b0000;
localparam FP32_TO_BF16_OP = 4'b0001;

wire bf16tofp32_en;
wire fp32tobf16_en;



// Internal signals for the submodules
wire [15:0] bf16_result;
wire [31:0] fp32_result;
wire [3:0] bf16_fpcsr;
wire [3:0] fp32_fpcsr;

assign bf16tofp32_en = enable & (operation == BF16_TO_FP32_OP);
assign fp32tobf16_en = enable & (operation == FP32_TO_BF16_OP);


// Instantiate bf16_to_fp32 module
bf16_to_fp32 bf16_to_fp32_inst (
.clk(clk),
.reset(reset),
.instruction_enable(bf16tofp32_en),
.operand_a(operand[15:0]),
.result(fp32_result),
.fpcsr(bf16_fpcsr)
);

// Instantiate fp32_to_bf16 module
fp32_to_bf16 fp32_to_bf16_inst (
.clk(clk),
.reset(reset),
.instruction_enable(fp32tobf16_en),
.operand_a(operand),
.result(bf16_result),
.fpcsr(fp32_fpcsr)
);

// Logic to select the appropriate output based on operation

assign result = (operation == BF16_TO_FP32_OP) ? fp32_result :
(operation == FP32_TO_BF16_OP) ? {16'h0000, bf16_result} :
32'h00000000;

assign fpcsr = (operation == BF16_TO_FP32_OP) ? bf16_fpcsr :
(operation == FP32_TO_BF16_OP) ? fp32_fpcsr :
4'b0000;



// always @(posedge clk ) begin
// if(enable)begin


// case (operation)
// BF16_TO_FP32_OP: begin
// result = fp32_result;
// fpcsr = bf16_fpcsr;
// end
// FP32_TO_BF16_OP: begin
// result = {16'h0000, bf16_result}; // Zero-extend BF16 result to 32 bits
// fpcsr = fp32_fpcsr;
// end
// default: begin
// result = 32'h00000000;
// fpcsr = 4'b0000;
// end
// endcase
// end
// end

endmodule



Loading

0 comments on commit 4d81cbd

Please sign in to comment.