diff --git a/verilog/sd2snes_sa1/sa1.v b/verilog/sd2snes_sa1/sa1.v index 18031fd4..5b316b3b 100644 --- a/verilog/sd2snes_sa1/sa1.v +++ b/verilog/sd2snes_sa1/sa1.v @@ -142,7 +142,7 @@ module sa1( `define DMA_TYPE1_ENABLE `define DMA_TYPE2_ENABLE `define VBD_ENABLE -//`define BCD_ENABLE +`define BCD_ENABLE `define EXE_FAST_FETCH @@ -1096,7 +1096,7 @@ wire exe_mmc_int; wire exe_fetch_byte_val; wire [7:0] exe_fetch_byte; -wire [31:0] exe_data; +wire [7:0] exe_fetch_data; //------------------------------------------------------------------- // COMMON PIPELINE @@ -2519,7 +2519,7 @@ end // need to take from the input so we get a clock //wire [7:0] dec_addr = MMC_STATE[clog2(ST_MMC_ROM)] ? ROM_BUS_RDDATA[7:0] : mmc_data_r[7:0]; -wire [7:0] dec_addr = exe_mmc_int ? 8'h00 : exe_fetch_byte_val ? exe_fetch_byte[7:0] : mmc_exe_end ? exe_mmc_rddata[7:0] : exe_data[7:0]; +wire [7:0] dec_addr = exe_mmc_int ? 8'h00 : exe_fetch_byte_val ? exe_fetch_byte[7:0] : mmc_exe_end ? exe_mmc_rddata[7:0] : exe_fetch_data[7:0]; wire [31:0] dec_data; dec_table dec ( @@ -2607,11 +2607,70 @@ end assign exe_wai = EXE_STATE[clog2(ST_EXE_EXECUTE)] & int_wai; assign exe_mmc_int = int_pending_r; +//------------------------------------------------------------------- +// BCD +//------------------------------------------------------------------- +`ifdef BCD_ENABLE +reg [15:0] bcd_a_r; +reg [15:0] bcd_b_r; +reg [15:0] bcd_o_r; +reg [3:0] bcd_c_r; + +reg [1:0] bcd_cnt_r; initial bcd_cnt_r = 3; +reg bcd_state_r; initial bcd_state_r = 0; +reg bcd_done_r; initial bcd_done_r = 0; + +// exe inputs +reg exe_bcd_val_r; initial exe_bcd_val_r = 0; +reg [15:0] exe_bcd_a_r; initial exe_bcd_a_r = 0; +reg [15:0] exe_bcd_b_r; initial exe_bcd_b_r = 0; +reg exe_bcd_c_r; initial exe_bcd_c_r = 0; + +wire [4:0] bcd_result; + +always @(posedge CLK) begin + if (RST) begin + bcd_state_r <= 0; + bcd_done_r <= 0; + bcd_cnt_r <= 3; + end + else begin + if (~bcd_state_r) begin + bcd_done_r <= 0; + + if (exe_bcd_val_r) begin + bcd_a_r <= exe_bcd_a_r; + bcd_b_r <= exe_bcd_b_r; + bcd_c_r[3] <= exe_bcd_c_r; + + bcd_state_r <= 1; + end + end + else begin + bcd_a_r <= {bcd_a_r[3:0],bcd_a_r[15:4]}; + bcd_b_r <= {bcd_b_r[3:0],bcd_b_r[15:4]}; + + bcd_o_r[11:0] <= bcd_o_r[15:4]; + bcd_c_r[2:0] <= bcd_c_r[3:1]; + + {bcd_c_r[3],bcd_o_r[15:12]} <= bcd_result[4:0] + bcd_adder(bcd_result[4],bcd_result[3:0]); + + bcd_cnt_r <= bcd_cnt_r - 1; + bcd_done_r <= ~|bcd_cnt_r; + bcd_state_r <= |bcd_cnt_r; + end + end +end + +assign bcd_result = bcd_a_r[3:0] + bcd_b_r[3:0] + bcd_c_r[3]; +`endif + //------------------------------------------------------------------- // EXECUTION PIPELINE //------------------------------------------------------------------- reg [31:0] exe_data_r; +reg [15:0] exe_fetch_data_r; reg [23:0] exe_addr_r; initial exe_addr_r = 0; reg [23:0] exe_mmc_addr_r; initial exe_mmc_addr_r = 0; @@ -2655,20 +2714,20 @@ wire exe_dpe = ~|D_r[7:0] & E_r; wire exe_data_word = |({~P_r[`P_X],~P_r[`P_M]}&dec_data[`DEC_PRC]) | &dec_data[`DEC_PRC]; wire exe_dec_imm16 = (|({~P_r[`P_X],~P_r[`P_M]}&dec_data[`DEC_PRC]) & dec_data[`ADD_IMM]); -`ifdef BCD_ENABLE -// bcd -reg [2:0] exe_bcd_mode_r; -reg [15:0] exe_bcd_src_r; -reg [15:0] exe_bcd_result_r; -reg exe_bcd_carry_r; -reg [4:0] exe_bcd_result; -`endif - // temporary reg [16:0] exe_result; reg [16:0] add_result; always @(posedge CLK) begin + +`ifdef BCD_ENABLE + // drive BCD inputs + exe_bcd_a_r <= exe_src_r[15:0]; + // invert for SBC + exe_bcd_b_r <= exe_opcode_r[7] ? ~exe_data_r[15:0] : exe_data_r[15:0]; + exe_bcd_c_r <= P_r[`P_C]; +`endif + if (RST) begin EXE_STATE <= ST_EXE_IDLE; @@ -2683,9 +2742,9 @@ always @(posedge CLK) begin P_r <= 8'h34; E_r <= 1; - exe_fetch_addr_r <= 0; - exe_addr_r <= 0; - exe_mmc_addr_r <= 0; + //exe_fetch_addr_r <= 0; + //exe_addr_r <= 0; + //exe_mmc_addr_r <= 0; exe_opsize_r <= 0; exe_fetch_size_r <= 0; @@ -2693,8 +2752,8 @@ always @(posedge CLK) begin exe_operand_r <= 0; exe_decode_r <= 0; - exe_src_r <= 16'h0BAD; - exe_dst_r <= 16'h0BAD; + //exe_src_r <= 16'h0BAD; + //exe_dst_r <= 16'h0BAD; exe_control_r <= 0; exe_pbr_r <= 0; @@ -2704,7 +2763,7 @@ always @(posedge CLK) begin exe_mmc_rd_r <= 0; exe_mmc_wr_r <= 0; - exe_mmc_data_r<= 0; + //exe_mmc_data_r<= 0; exe_mmc_long_r<= 0; exe_mmc_byte_total_r <= 0; @@ -2714,6 +2773,8 @@ always @(posedge CLK) begin exe_prefetch_val_r <= 0; + exe_bcd_val_r <= 0; + e2c_waitcnt_r <= 0; end else begin @@ -2756,7 +2817,7 @@ always @(posedge CLK) begin // always stop the read at END if (mmc_exe_end) begin exe_mmc_rd_r <= 0; - exe_data_r <= exe_mmc_rddata; + exe_fetch_data_r <= exe_mmc_rddata[15:0]; end // TODO: fill in other data sources @@ -2764,10 +2825,10 @@ always @(posedge CLK) begin // The decode rom takes an additional clock. if (exe_mmc_state_exe_end_r) begin - exe_data_r <= int_pending_r ? 8'h00 : exe_prefetch_val_r ? exe_prefetch_r : exe_data_r; + //exe_fetch_data_r <= int_pending_r ? 8'h00 : exe_prefetch_val_r ? exe_prefetch_r : exe_fetch_data_r; if (~|exe_opsize_r) begin - exe_opcode_r <= exe_mmc_int ? 8'h00 : exe_prefetch_val_r ? exe_prefetch_r : exe_data_r[7:0]; + exe_opcode_r <= exe_mmc_int ? 8'h00 : exe_prefetch_val_r ? exe_prefetch_r : exe_fetch_data_r[7:0]; // word size only affects immediate for fetch exe_opsize_r <= dec_data[`DEC_SIZE] ^ {2{exe_dec_imm16}}; exe_control_r <= dec_data[`DEC_CONTROL]; @@ -2800,18 +2861,12 @@ always @(posedge CLK) begin exe_p_r <= P_r; exe_e_r <= E_r; -`ifdef BCD_ENABLE - exe_bcd_mode_r <= 0; - exe_bcd_carry_r <= P_r[`P_C]; - exe_bcd_src_r <= A_r; -`endif - `ifdef EXE_FAST_FETCH // next state, address, and prefetch logic. if (~|exe_opsize_r) begin // initial decode // `define DEC_SIZE 16:15 - exe_operand_r[7:0] <= exe_data_r[15:8]; + exe_operand_r[7:0] <= exe_fetch_data_r[15:8]; if (dec_data[16] | exe_dec_imm16 | (exe_fetch_addr_r[0] & dec_data[15])) begin // 3,4 bytes or 2 misaligned bytes @@ -2830,7 +2885,7 @@ always @(posedge CLK) begin // prefetch is valid if aligned 1 byte exe_prefetch_val_r <= ~exe_fetch_addr_r[0] && (dec_data[`DEC_SIZE] == `SZE_1); - exe_prefetch_r <= exe_data_r[15:8]; + exe_prefetch_r <= exe_fetch_data_r[15:8]; EXE_STATE <= ST_EXE_ADDRESS; end @@ -2841,11 +2896,11 @@ always @(posedge CLK) begin case (exe_fetch_size_r) // have 1 byte - `SZE_1: exe_operand_r[15:0] <= exe_data_r[15:0]; + `SZE_1: exe_operand_r[15:0] <= exe_fetch_data_r[15:0]; // have 2 bytes - `SZE_2: exe_operand_r[23:8] <= exe_data_r[15:0]; + `SZE_2: exe_operand_r[23:8] <= exe_fetch_data_r[15:0]; // have 3 bytes - `SZE_3: exe_operand_r[23:16] <= exe_data_r[7:0]; + `SZE_3: exe_operand_r[23:16] <= exe_fetch_data_r[7:0]; // have 4 bytes. not possible `SZE_4: exe_operand_r[23:0] <= 24'hBADBAD; endcase @@ -2867,7 +2922,7 @@ always @(posedge CLK) begin // check if prefetch available (overfetch) exe_prefetch_val_r <= exe_fetch_size_r[0] ^ exe_opsize_r[0]; - exe_prefetch_r <= exe_data_r[15:8]; + exe_prefetch_r <= exe_fetch_data_r[15:8]; EXE_STATE <= ST_EXE_ADDRESS; end @@ -2879,14 +2934,14 @@ always @(posedge CLK) begin case (exe_fetch_size_r) `SZE_1: begin end - `SZE_2: exe_operand_r[7 : 0] <= exe_prefetch_val_r ? exe_prefetch_r : exe_data_r[7:0]; - `SZE_3: exe_operand_r[15: 8] <= exe_prefetch_val_r ? exe_prefetch_r : exe_data_r[7:0]; - `SZE_4: exe_operand_r[23:16] <= exe_prefetch_val_r ? exe_prefetch_r : exe_data_r[7:0]; + `SZE_2: exe_operand_r[7 : 0] <= exe_prefetch_val_r ? exe_prefetch_r : exe_fetch_data_r[7:0]; + `SZE_3: exe_operand_r[15: 8] <= exe_prefetch_val_r ? exe_prefetch_r : exe_fetch_data_r[7:0]; + `SZE_4: exe_operand_r[23:16] <= exe_prefetch_val_r ? exe_prefetch_r : exe_fetch_data_r[7:0]; endcase // TODO: the memory controller actually returns 2 sequential bytes independent of source, but we still want to force alignment. exe_prefetch_val_r <= ~exe_prefetch_val_r & (~exe_fetch_addr_r[0] | ~`IS_ROM(exe_fetch_addr_r)); - exe_prefetch_r <= exe_data_r[15:8]; + exe_prefetch_r <= exe_fetch_data_r[15:8]; EXE_STATE <= ~|exe_opsize_r ? (~|dec_data[`DEC_SIZE] ? ST_EXE_ADDRESS : ST_EXE_FETCH) : (exe_fetch_size_r == exe_opsize_r ? ST_EXE_ADDRESS : ST_EXE_FETCH); `endif @@ -2977,27 +3032,18 @@ always @(posedge CLK) begin 3: begin `ifdef BCD_ENABLE if (P_r[`P_D]) begin - // do a nibble per clock to ease timing. - if (~exe_load_r & ~exe_bcd_mode_r[2]) begin - exe_bcd_src_r <= {exe_bcd_src_r[3:0],exe_bcd_src_r[15:4]}; - exe_data_r <= {exe_data_r[3:0],exe_data_r[15:4]}; - - exe_bcd_result_r[11:0] <= exe_bcd_result_r[15:4]; + if (~exe_load_r) begin + exe_bcd_val_r <= |bcd_cnt_r & ~bcd_done_r; - if (exe_data_word_r | ~exe_bcd_mode_r[1]) begin - exe_bcd_result[4:0] = exe_bcd_src_r[3:0] + exe_data_r[3:0] + exe_bcd_carry_r; - {exe_bcd_carry_r,exe_bcd_result_r[15:12]} <= exe_bcd_result[4:0] + bcd_adder(exe_bcd_result[4],exe_bcd_result[3:0]); + if (~bcd_done_r) begin + // wait on bcd state machine if not done + exe_mmc_wr_r <= 0; + EXE_STATE <= ST_EXE_EXECUTE; end - - exe_bcd_mode_r <= exe_bcd_mode_r + 1; - - // cancel transition - exe_mmc_wr_r <= 0; - EXE_STATE <= ST_EXE_EXECUTE; - end + end // NOTE: this won't set the overflow flag properly - exe_result[16:0] = exe_data_word_r ? {exe_bcd_carry_r,exe_bcd_result_r[15:0]} : {8'h00,exe_bcd_carry_r,exe_bcd_result_r[7:0]}; + exe_result[16:0] = exe_data_word_r ? {bcd_c_r[3],bcd_o_r[15:0]} : {8'h00,bcd_c_r[1],bcd_o_r[7:0]}; end else `endif @@ -3025,7 +3071,7 @@ always @(posedge CLK) begin exe_result[16] = 0; case (exe_opcode_r[7:5]) 0: exe_result[16:0] = {exe_data_r[15:0],1'b0}; // ASL - 1: exe_result[16:0] = exe_data_word_r ? {exe_data_r[15:0],P_r[`P_C]} : {8'h00,exe_data_r[7:0],P_r[`P_C]}; // ROL + 1: exe_result[16:0] = {exe_data_r[15:0],P_r[`P_C]}; // ROL 2: exe_result[16:0] = exe_data_word_r ? {exe_data_r[0],1'b0,exe_data_r[15:1]} : {8'h00,exe_data_r[0],1'b0,exe_data_r[7:1]}; // LSR 3: exe_result[16:0] = exe_data_word_r ? {exe_data_r[0],P_r[`P_C],exe_data_r[15:1]} : {8'h00,exe_data_r[0],P_r[`P_C],exe_data_r[7:1]}; // ROR //4: // STX,STY @@ -3363,7 +3409,7 @@ assign int_wai = (exe_opcode_r == 8'hCB); assign exe_mmc_addr = EXE_STATE[clog2(ST_EXE_FETCH_END)] ? exe_fetch_addr_r : EXE_STATE[clog2(ST_EXE_ADDRESS_END)] ? exe_addr_r : exe_mmc_addr_r; assign exe_fetch_byte_val = exe_prefetch_val_r; assign exe_fetch_byte = exe_prefetch_r; -assign exe_data = exe_data_r; +assign exe_fetch_data = exe_fetch_data_r[7:0]; `ifdef DEBUG // breakpoints @@ -3417,13 +3463,14 @@ always @(posedge CLK) begin end `endif -assign pipeline_advance = sa1_clock_en & ~|exe_waitcnt_r & EXE_STATE[clog2(ST_EXE_WAIT)] & step_r & ~dma_cc1_active_r & ~dma_normal_pri_active_r & ~vbd_active_r & ~WAI_r; -assign op_complete = 1; - // performance counter reg cycle_wait_r; +reg dma_active_r; always @(posedge CLK) begin + dma_active_r <= dma_cc1_active_r | dma_normal_pri_active_r | vbd_active_r; + +`ifdef DEBUG if (sa1_clock_en & ~|exe_waitcnt_r & EXE_STATE[clog2(ST_EXE_WAIT)] & ~step_r) cycle_wait_r <= 1; else if (pipeline_advance) cycle_wait_r <= 0; @@ -3433,8 +3480,11 @@ always @(posedge CLK) begin else if ((~EXE_STATE[clog2(ST_EXE_WAIT)] | ~cycle_wait_r) & ~EXE_STATE[clog2(ST_EXE_IDLE)]) begin sa1_cycle_cnt_r <= sa1_cycle_cnt_r + 1; end +`endif end +assign pipeline_advance = sa1_clock_en & ~|exe_waitcnt_r & EXE_STATE[clog2(ST_EXE_WAIT)] & step_r & ~dma_active_r & ~WAI_r; + //------------------------------------------------------------------- // DEBUG OUTPUT //------------------------------------------------------------------- diff --git a/verilog/sd2snes_sa1/sd2snes.xise b/verilog/sd2snes_sa1/sd2snes.xise index 370fe54d..fc304e3f 100644 --- a/verilog/sd2snes_sa1/sd2snes.xise +++ b/verilog/sd2snes_sa1/sd2snes.xise @@ -217,7 +217,7 @@ - + @@ -272,7 +272,7 @@ - + @@ -412,8 +412,8 @@ - - + + @@ -451,7 +451,7 @@ - +