Skip to content

Commit

Permalink
Added bcd support for adc/sbc instructions.
Browse files Browse the repository at this point in the history
Fixed various timing paths.
  • Loading branch information
RedGuyyyy committed Aug 8, 2018
1 parent 22e098a commit 37c5103
Show file tree
Hide file tree
Showing 2 changed files with 114 additions and 64 deletions.
168 changes: 109 additions & 59 deletions verilog/sd2snes_sa1/sa1.v
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ module sa1(
`define DMA_TYPE1_ENABLE
`define DMA_TYPE2_ENABLE
`define VBD_ENABLE
//`define BCD_ENABLE
`define BCD_ENABLE

`define EXE_FAST_FETCH

Expand Down Expand Up @@ -1096,7 +1096,7 @@ wire exe_mmc_int;

wire exe_fetch_byte_val;
wire [7:0] exe_fetch_byte;
wire [31:0] exe_data;
wire [7:0] exe_fetch_data;

//-------------------------------------------------------------------
// COMMON PIPELINE
Expand Down Expand Up @@ -2519,7 +2519,7 @@ end

// need to take from the input so we get a clock
//wire [7:0] dec_addr = MMC_STATE[clog2(ST_MMC_ROM)] ? ROM_BUS_RDDATA[7:0] : mmc_data_r[7:0];
wire [7:0] dec_addr = exe_mmc_int ? 8'h00 : exe_fetch_byte_val ? exe_fetch_byte[7:0] : mmc_exe_end ? exe_mmc_rddata[7:0] : exe_data[7:0];
wire [7:0] dec_addr = exe_mmc_int ? 8'h00 : exe_fetch_byte_val ? exe_fetch_byte[7:0] : mmc_exe_end ? exe_mmc_rddata[7:0] : exe_fetch_data[7:0];
wire [31:0] dec_data;

dec_table dec (
Expand Down Expand Up @@ -2607,11 +2607,70 @@ end
assign exe_wai = EXE_STATE[clog2(ST_EXE_EXECUTE)] & int_wai;
assign exe_mmc_int = int_pending_r;

//-------------------------------------------------------------------
// BCD
//-------------------------------------------------------------------
`ifdef BCD_ENABLE
reg [15:0] bcd_a_r;
reg [15:0] bcd_b_r;
reg [15:0] bcd_o_r;
reg [3:0] bcd_c_r;

reg [1:0] bcd_cnt_r; initial bcd_cnt_r = 3;
reg bcd_state_r; initial bcd_state_r = 0;
reg bcd_done_r; initial bcd_done_r = 0;

// exe inputs
reg exe_bcd_val_r; initial exe_bcd_val_r = 0;
reg [15:0] exe_bcd_a_r; initial exe_bcd_a_r = 0;
reg [15:0] exe_bcd_b_r; initial exe_bcd_b_r = 0;
reg exe_bcd_c_r; initial exe_bcd_c_r = 0;

wire [4:0] bcd_result;

always @(posedge CLK) begin
if (RST) begin
bcd_state_r <= 0;
bcd_done_r <= 0;
bcd_cnt_r <= 3;
end
else begin
if (~bcd_state_r) begin
bcd_done_r <= 0;

if (exe_bcd_val_r) begin
bcd_a_r <= exe_bcd_a_r;
bcd_b_r <= exe_bcd_b_r;
bcd_c_r[3] <= exe_bcd_c_r;

bcd_state_r <= 1;
end
end
else begin
bcd_a_r <= {bcd_a_r[3:0],bcd_a_r[15:4]};
bcd_b_r <= {bcd_b_r[3:0],bcd_b_r[15:4]};

bcd_o_r[11:0] <= bcd_o_r[15:4];
bcd_c_r[2:0] <= bcd_c_r[3:1];

{bcd_c_r[3],bcd_o_r[15:12]} <= bcd_result[4:0] + bcd_adder(bcd_result[4],bcd_result[3:0]);

bcd_cnt_r <= bcd_cnt_r - 1;
bcd_done_r <= ~|bcd_cnt_r;
bcd_state_r <= |bcd_cnt_r;
end
end
end

assign bcd_result = bcd_a_r[3:0] + bcd_b_r[3:0] + bcd_c_r[3];
`endif

//-------------------------------------------------------------------
// EXECUTION PIPELINE
//-------------------------------------------------------------------

reg [31:0] exe_data_r;
reg [15:0] exe_fetch_data_r;
reg [23:0] exe_addr_r; initial exe_addr_r = 0;
reg [23:0] exe_mmc_addr_r; initial exe_mmc_addr_r = 0;

Expand Down Expand Up @@ -2655,20 +2714,20 @@ wire exe_dpe = ~|D_r[7:0] & E_r;
wire exe_data_word = |({~P_r[`P_X],~P_r[`P_M]}&dec_data[`DEC_PRC]) | &dec_data[`DEC_PRC];
wire exe_dec_imm16 = (|({~P_r[`P_X],~P_r[`P_M]}&dec_data[`DEC_PRC]) & dec_data[`ADD_IMM]);

`ifdef BCD_ENABLE
// bcd
reg [2:0] exe_bcd_mode_r;
reg [15:0] exe_bcd_src_r;
reg [15:0] exe_bcd_result_r;
reg exe_bcd_carry_r;
reg [4:0] exe_bcd_result;
`endif

// temporary
reg [16:0] exe_result;
reg [16:0] add_result;

always @(posedge CLK) begin

`ifdef BCD_ENABLE
// drive BCD inputs
exe_bcd_a_r <= exe_src_r[15:0];
// invert for SBC
exe_bcd_b_r <= exe_opcode_r[7] ? ~exe_data_r[15:0] : exe_data_r[15:0];
exe_bcd_c_r <= P_r[`P_C];
`endif

if (RST) begin
EXE_STATE <= ST_EXE_IDLE;

Expand All @@ -2683,18 +2742,18 @@ always @(posedge CLK) begin
P_r <= 8'h34;
E_r <= 1;

exe_fetch_addr_r <= 0;
exe_addr_r <= 0;
exe_mmc_addr_r <= 0;
//exe_fetch_addr_r <= 0;
//exe_addr_r <= 0;
//exe_mmc_addr_r <= 0;

exe_opsize_r <= 0;
exe_fetch_size_r <= 0;
exe_opcode_r <= 0;
exe_operand_r <= 0;
exe_decode_r <= 0;

exe_src_r <= 16'h0BAD;
exe_dst_r <= 16'h0BAD;
//exe_src_r <= 16'h0BAD;
//exe_dst_r <= 16'h0BAD;

exe_control_r <= 0;
exe_pbr_r <= 0;
Expand All @@ -2704,7 +2763,7 @@ always @(posedge CLK) begin

exe_mmc_rd_r <= 0;
exe_mmc_wr_r <= 0;
exe_mmc_data_r<= 0;
//exe_mmc_data_r<= 0;
exe_mmc_long_r<= 0;
exe_mmc_byte_total_r <= 0;

Expand All @@ -2714,6 +2773,8 @@ always @(posedge CLK) begin

exe_prefetch_val_r <= 0;

exe_bcd_val_r <= 0;

e2c_waitcnt_r <= 0;
end
else begin
Expand Down Expand Up @@ -2756,18 +2817,18 @@ always @(posedge CLK) begin
// always stop the read at END
if (mmc_exe_end) begin
exe_mmc_rd_r <= 0;
exe_data_r <= exe_mmc_rddata;
exe_fetch_data_r <= exe_mmc_rddata[15:0];
end

// TODO: fill in other data sources
exe_mmc_state_exe_end_r <= mmc_exe_end | int_pending_r | exe_prefetch_val_r;

// The decode rom takes an additional clock.
if (exe_mmc_state_exe_end_r) begin
exe_data_r <= int_pending_r ? 8'h00 : exe_prefetch_val_r ? exe_prefetch_r : exe_data_r;
//exe_fetch_data_r <= int_pending_r ? 8'h00 : exe_prefetch_val_r ? exe_prefetch_r : exe_fetch_data_r;

if (~|exe_opsize_r) begin
exe_opcode_r <= exe_mmc_int ? 8'h00 : exe_prefetch_val_r ? exe_prefetch_r : exe_data_r[7:0];
exe_opcode_r <= exe_mmc_int ? 8'h00 : exe_prefetch_val_r ? exe_prefetch_r : exe_fetch_data_r[7:0];
// word size only affects immediate for fetch
exe_opsize_r <= dec_data[`DEC_SIZE] ^ {2{exe_dec_imm16}};
exe_control_r <= dec_data[`DEC_CONTROL];
Expand Down Expand Up @@ -2800,18 +2861,12 @@ always @(posedge CLK) begin
exe_p_r <= P_r;
exe_e_r <= E_r;

`ifdef BCD_ENABLE
exe_bcd_mode_r <= 0;
exe_bcd_carry_r <= P_r[`P_C];
exe_bcd_src_r <= A_r;
`endif

`ifdef EXE_FAST_FETCH
// next state, address, and prefetch logic.
if (~|exe_opsize_r) begin
// initial decode
// `define DEC_SIZE 16:15
exe_operand_r[7:0] <= exe_data_r[15:8];
exe_operand_r[7:0] <= exe_fetch_data_r[15:8];

if (dec_data[16] | exe_dec_imm16 | (exe_fetch_addr_r[0] & dec_data[15])) begin
// 3,4 bytes or 2 misaligned bytes
Expand All @@ -2830,7 +2885,7 @@ always @(posedge CLK) begin

// prefetch is valid if aligned 1 byte
exe_prefetch_val_r <= ~exe_fetch_addr_r[0] && (dec_data[`DEC_SIZE] == `SZE_1);
exe_prefetch_r <= exe_data_r[15:8];
exe_prefetch_r <= exe_fetch_data_r[15:8];

EXE_STATE <= ST_EXE_ADDRESS;
end
Expand All @@ -2841,11 +2896,11 @@ always @(posedge CLK) begin

case (exe_fetch_size_r)
// have 1 byte
`SZE_1: exe_operand_r[15:0] <= exe_data_r[15:0];
`SZE_1: exe_operand_r[15:0] <= exe_fetch_data_r[15:0];
// have 2 bytes
`SZE_2: exe_operand_r[23:8] <= exe_data_r[15:0];
`SZE_2: exe_operand_r[23:8] <= exe_fetch_data_r[15:0];
// have 3 bytes
`SZE_3: exe_operand_r[23:16] <= exe_data_r[7:0];
`SZE_3: exe_operand_r[23:16] <= exe_fetch_data_r[7:0];
// have 4 bytes. not possible
`SZE_4: exe_operand_r[23:0] <= 24'hBADBAD;
endcase
Expand All @@ -2867,7 +2922,7 @@ always @(posedge CLK) begin

// check if prefetch available (overfetch)
exe_prefetch_val_r <= exe_fetch_size_r[0] ^ exe_opsize_r[0];
exe_prefetch_r <= exe_data_r[15:8];
exe_prefetch_r <= exe_fetch_data_r[15:8];

EXE_STATE <= ST_EXE_ADDRESS;
end
Expand All @@ -2879,14 +2934,14 @@ always @(posedge CLK) begin

case (exe_fetch_size_r)
`SZE_1: begin end
`SZE_2: exe_operand_r[7 : 0] <= exe_prefetch_val_r ? exe_prefetch_r : exe_data_r[7:0];
`SZE_3: exe_operand_r[15: 8] <= exe_prefetch_val_r ? exe_prefetch_r : exe_data_r[7:0];
`SZE_4: exe_operand_r[23:16] <= exe_prefetch_val_r ? exe_prefetch_r : exe_data_r[7:0];
`SZE_2: exe_operand_r[7 : 0] <= exe_prefetch_val_r ? exe_prefetch_r : exe_fetch_data_r[7:0];
`SZE_3: exe_operand_r[15: 8] <= exe_prefetch_val_r ? exe_prefetch_r : exe_fetch_data_r[7:0];
`SZE_4: exe_operand_r[23:16] <= exe_prefetch_val_r ? exe_prefetch_r : exe_fetch_data_r[7:0];
endcase

// TODO: the memory controller actually returns 2 sequential bytes independent of source, but we still want to force alignment.
exe_prefetch_val_r <= ~exe_prefetch_val_r & (~exe_fetch_addr_r[0] | ~`IS_ROM(exe_fetch_addr_r));
exe_prefetch_r <= exe_data_r[15:8];
exe_prefetch_r <= exe_fetch_data_r[15:8];

EXE_STATE <= ~|exe_opsize_r ? (~|dec_data[`DEC_SIZE] ? ST_EXE_ADDRESS : ST_EXE_FETCH) : (exe_fetch_size_r == exe_opsize_r ? ST_EXE_ADDRESS : ST_EXE_FETCH);
`endif
Expand Down Expand Up @@ -2977,27 +3032,18 @@ always @(posedge CLK) begin
3: begin
`ifdef BCD_ENABLE
if (P_r[`P_D]) begin
// do a nibble per clock to ease timing.
if (~exe_load_r & ~exe_bcd_mode_r[2]) begin
exe_bcd_src_r <= {exe_bcd_src_r[3:0],exe_bcd_src_r[15:4]};
exe_data_r <= {exe_data_r[3:0],exe_data_r[15:4]};

exe_bcd_result_r[11:0] <= exe_bcd_result_r[15:4];
if (~exe_load_r) begin
exe_bcd_val_r <= |bcd_cnt_r & ~bcd_done_r;

if (exe_data_word_r | ~exe_bcd_mode_r[1]) begin
exe_bcd_result[4:0] = exe_bcd_src_r[3:0] + exe_data_r[3:0] + exe_bcd_carry_r;
{exe_bcd_carry_r,exe_bcd_result_r[15:12]} <= exe_bcd_result[4:0] + bcd_adder(exe_bcd_result[4],exe_bcd_result[3:0]);
if (~bcd_done_r) begin
// wait on bcd state machine if not done
exe_mmc_wr_r <= 0;
EXE_STATE <= ST_EXE_EXECUTE;
end

exe_bcd_mode_r <= exe_bcd_mode_r + 1;

// cancel transition
exe_mmc_wr_r <= 0;
EXE_STATE <= ST_EXE_EXECUTE;
end
end

// NOTE: this won't set the overflow flag properly
exe_result[16:0] = exe_data_word_r ? {exe_bcd_carry_r,exe_bcd_result_r[15:0]} : {8'h00,exe_bcd_carry_r,exe_bcd_result_r[7:0]};
exe_result[16:0] = exe_data_word_r ? {bcd_c_r[3],bcd_o_r[15:0]} : {8'h00,bcd_c_r[1],bcd_o_r[7:0]};
end
else
`endif
Expand Down Expand Up @@ -3025,7 +3071,7 @@ always @(posedge CLK) begin
exe_result[16] = 0;
case (exe_opcode_r[7:5])
0: exe_result[16:0] = {exe_data_r[15:0],1'b0}; // ASL
1: exe_result[16:0] = exe_data_word_r ? {exe_data_r[15:0],P_r[`P_C]} : {8'h00,exe_data_r[7:0],P_r[`P_C]}; // ROL
1: exe_result[16:0] = {exe_data_r[15:0],P_r[`P_C]}; // ROL
2: exe_result[16:0] = exe_data_word_r ? {exe_data_r[0],1'b0,exe_data_r[15:1]} : {8'h00,exe_data_r[0],1'b0,exe_data_r[7:1]}; // LSR
3: exe_result[16:0] = exe_data_word_r ? {exe_data_r[0],P_r[`P_C],exe_data_r[15:1]} : {8'h00,exe_data_r[0],P_r[`P_C],exe_data_r[7:1]}; // ROR
//4: // STX,STY
Expand Down Expand Up @@ -3363,7 +3409,7 @@ assign int_wai = (exe_opcode_r == 8'hCB);
assign exe_mmc_addr = EXE_STATE[clog2(ST_EXE_FETCH_END)] ? exe_fetch_addr_r : EXE_STATE[clog2(ST_EXE_ADDRESS_END)] ? exe_addr_r : exe_mmc_addr_r;
assign exe_fetch_byte_val = exe_prefetch_val_r;
assign exe_fetch_byte = exe_prefetch_r;
assign exe_data = exe_data_r;
assign exe_fetch_data = exe_fetch_data_r[7:0];

`ifdef DEBUG
// breakpoints
Expand Down Expand Up @@ -3417,13 +3463,14 @@ always @(posedge CLK) begin
end
`endif

assign pipeline_advance = sa1_clock_en & ~|exe_waitcnt_r & EXE_STATE[clog2(ST_EXE_WAIT)] & step_r & ~dma_cc1_active_r & ~dma_normal_pri_active_r & ~vbd_active_r & ~WAI_r;
assign op_complete = 1;

// performance counter
reg cycle_wait_r;
reg dma_active_r;

always @(posedge CLK) begin
dma_active_r <= dma_cc1_active_r | dma_normal_pri_active_r | vbd_active_r;

`ifdef DEBUG
if (sa1_clock_en & ~|exe_waitcnt_r & EXE_STATE[clog2(ST_EXE_WAIT)] & ~step_r) cycle_wait_r <= 1;
else if (pipeline_advance) cycle_wait_r <= 0;

Expand All @@ -3433,8 +3480,11 @@ always @(posedge CLK) begin
else if ((~EXE_STATE[clog2(ST_EXE_WAIT)] | ~cycle_wait_r) & ~EXE_STATE[clog2(ST_EXE_IDLE)]) begin
sa1_cycle_cnt_r <= sa1_cycle_cnt_r + 1;
end
`endif
end

assign pipeline_advance = sa1_clock_en & ~|exe_waitcnt_r & EXE_STATE[clog2(ST_EXE_WAIT)] & step_r & ~dma_active_r & ~WAI_r;

//-------------------------------------------------------------------
// DEBUG OUTPUT
//-------------------------------------------------------------------
Expand Down
10 changes: 5 additions & 5 deletions verilog/sd2snes_sa1/sd2snes.xise
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,7 @@
<property xil_pn:name="Evaluation Development Board" xil_pn:value="None Specified" xil_pn:valueState="default"/>
<property xil_pn:name="Exclude Compilation of Deprecated EDK Cores" xil_pn:value="true" xil_pn:valueState="default"/>
<property xil_pn:name="Exclude Compilation of EDK Sub-Libraries" xil_pn:value="false" xil_pn:valueState="default"/>
<property xil_pn:name="Extra Effort" xil_pn:value="Normal" xil_pn:valueState="non-default" xil_pn:x_locked="true"/>
<property xil_pn:name="Extra Effort" xil_pn:value="None" xil_pn:valueState="default"/>
<property xil_pn:name="Extra Effort (Highest PAR level only)" xil_pn:value="None" xil_pn:valueState="default"/>
<property xil_pn:name="FPGA Start-Up Clock" xil_pn:value="CCLK" xil_pn:valueState="default"/>
<property xil_pn:name="FSM Encoding Algorithm" xil_pn:value="Auto" xil_pn:valueState="default"/>
Expand Down Expand Up @@ -272,7 +272,7 @@
<property xil_pn:name="Keep Hierarchy" xil_pn:value="No" xil_pn:valueState="default"/>
<property xil_pn:name="Language" xil_pn:value="VHDL" xil_pn:valueState="default"/>
<property xil_pn:name="Last Applied Goal" xil_pn:value="Timing Performance" xil_pn:valueState="non-default"/>
<property xil_pn:name="Last Applied Strategy" xil_pn:value="SmartXplorer - maptimingextraeffort;C:/Users/O/dev/sd2snes-sa1/verilog/sd2snes_sa1/maptimingextraeffort.xds" xil_pn:valueState="non-default"/>
<property xil_pn:name="Last Applied Strategy" xil_pn:value="SmartXplorer - maptiming2;C:/Users/O/dev/sd2snes-sa1/verilog/sd2snes_sa1/maptiming2.xds" xil_pn:valueState="non-default"/>
<property xil_pn:name="Last Unlock Status" xil_pn:value="false" xil_pn:valueState="default"/>
<property xil_pn:name="Launch SDK after Export" xil_pn:value="true" xil_pn:valueState="default"/>
<property xil_pn:name="Library for Verilog Sources" xil_pn:value="" xil_pn:valueState="default"/>
Expand Down Expand Up @@ -412,8 +412,8 @@
<property xil_pn:name="Specify Top Level Instance Names Post-Route" xil_pn:value="Default" xil_pn:valueState="default"/>
<property xil_pn:name="Specify Top Level Instance Names Post-Translate" xil_pn:value="Default" xil_pn:valueState="default"/>
<property xil_pn:name="Speed Grade" xil_pn:value="-4" xil_pn:valueState="non-default"/>
<property xil_pn:name="Starting Placer Cost Table (1-100) Map" xil_pn:value="1" xil_pn:valueState="default"/>
<property xil_pn:name="Starting Placer Cost Table (1-100) Par" xil_pn:value="1" xil_pn:valueState="default"/>
<property xil_pn:name="Starting Placer Cost Table (1-100) Map" xil_pn:value="9" xil_pn:valueState="non-default" xil_pn:x_locked="true"/>
<property xil_pn:name="Starting Placer Cost Table (1-100) Par" xil_pn:value="9" xil_pn:valueState="non-default" xil_pn:x_locked="true"/>
<property xil_pn:name="Structure window" xil_pn:value="true" xil_pn:valueState="default"/>
<property xil_pn:name="Synthesis Tool" xil_pn:value="XST (VHDL/Verilog)" xil_pn:valueState="default"/>
<property xil_pn:name="Target Simulator" xil_pn:value="Please Specify" xil_pn:valueState="default"/>
Expand Down Expand Up @@ -451,7 +451,7 @@
<property xil_pn:name="Use Synchronous Reset" xil_pn:value="Yes" xil_pn:valueState="default"/>
<property xil_pn:name="Use Synchronous Set" xil_pn:value="Yes" xil_pn:valueState="default"/>
<property xil_pn:name="Use Synthesis Constraints File" xil_pn:value="true" xil_pn:valueState="default"/>
<property xil_pn:name="User Browsed Strategy Files" xil_pn:value="C:/Users/O/dev/sd2snes-sa1/verilog/sd2snes_sa1/maptimingextraeffort.xds" xil_pn:valueState="non-default"/>
<property xil_pn:name="User Browsed Strategy Files" xil_pn:value="C:/Users/O/dev/sd2snes-sa1/verilog/sd2snes_sa1/maptiming2.xds" xil_pn:valueState="non-default"/>
<property xil_pn:name="UserID Code (8 Digit Hexadecimal)" xil_pn:value="0xFFFFFFFF" xil_pn:valueState="default"/>
<property xil_pn:name="VHDL Source Analysis Standard" xil_pn:value="VHDL-93" xil_pn:valueState="default"/>
<property xil_pn:name="VHDL Syntax" xil_pn:value="93" xil_pn:valueState="default"/>
Expand Down

0 comments on commit 37c5103

Please sign in to comment.