Skip to content

Commit

Permalink
Use TCO of C compiler to speed up emulation
Browse files Browse the repository at this point in the history
We need to modify the function emulate into a recursive version for
meeting the requirement of tail-call optimization(TCO). To achieve this,
I add a variable is_tail to the struct rv_insn_t to help us determine
whether the basic block is terminate or not. As a result, we can use
this variable to rewrite function emulate into a self-recursive
function.

Running coremark and dhrystone benchmark now produces faster results
than it did previously, and the test results show below.

Test environment: Intel® Core™ i7-8700 CPU @ 3.20GHz
Coremark test result:
Previous: 476.792332 Iterations/Sec
Now: 911.991797 Iterations/Sec
Dhrystone test result:
Previous: 414 DMIPS
Now: 775 DMIPS

Previously, when the function emulate terminated, it returned to
function block_emulate because the previous calling route was rv_step
-> block_emulate -> emulate -> block_emulate -> emulate -> … .
So, each time the function emulate was called, a function stack frame
was created. However, the current calling route is rv_step -> emulate ->
emulate -> …, so function emulate can now use the same function stack
frame because of TCO. That is, any instructions in a basic block can
execute function emulate by using the same function stack frame and save
the overhead of creating function stack frame.
  • Loading branch information
qwe661234 committed Dec 12, 2022
1 parent 285a988 commit 24cd6c0
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 23 deletions.
1 change: 1 addition & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ OUT ?= build
BIN := $(OUT)/rv32emu

CFLAGS = -std=gnu99 -O2 -Wall -Wextra
CFLAGS += -fomit-frame-pointer -fno-stack-check -fno-stack-protector
CFLAGS += -include src/common.h

# Set the default stack pointer
Expand Down
1 change: 1 addition & 0 deletions src/decode.h
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,7 @@ typedef struct {

/* instruction length */
uint8_t insn_len;
bool is_tail;
} rv_insn_t;

/* translated basic block */
Expand Down
48 changes: 25 additions & 23 deletions src/emulate.c
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,7 @@ static bool insn_is_misaligned(uint32_t pc)

static bool emulate(riscv_t *rv, const rv_insn_t *ir)
{
rv->X[rv_reg_zero] = 0;
switch (ir->opcode) {
/* RV32I Base Instruction Set */
case rv_insn_lui: /* LUI: Load Upper Immediate */
Expand Down Expand Up @@ -292,6 +293,7 @@ static bool emulate(riscv_t *rv, const rv_insn_t *ir)
return false;
}
/* can branch */
rv->csr_cycle++;
return true;
}
case rv_insn_jalr: { /* JALR: Jump and Link Register */
Expand All @@ -315,6 +317,7 @@ static bool emulate(riscv_t *rv, const rv_insn_t *ir)
return false;
}
/* can branch */
rv->csr_cycle++;
return true;
}
case rv_insn_beq: { /* BEQ: Branch if Equal */
Expand All @@ -328,6 +331,7 @@ static bool emulate(riscv_t *rv, const rv_insn_t *ir)
return false;
}
/* can branch */
rv->csr_cycle++;
return true;
}
break;
Expand All @@ -343,6 +347,7 @@ static bool emulate(riscv_t *rv, const rv_insn_t *ir)
return false;
}
/* can branch */
rv->csr_cycle++;
return true;
}
break;
Expand All @@ -358,6 +363,7 @@ static bool emulate(riscv_t *rv, const rv_insn_t *ir)
return false;
}
/* can branch */
rv->csr_cycle++;
return true;
}
break;
Expand All @@ -373,6 +379,7 @@ static bool emulate(riscv_t *rv, const rv_insn_t *ir)
return false;
}
/* can branch */
rv->csr_cycle++;
return true;
}
break;
Expand All @@ -388,6 +395,7 @@ static bool emulate(riscv_t *rv, const rv_insn_t *ir)
return false;
}
/* can branch */
rv->csr_cycle++;
return true;
}
break;
Expand All @@ -403,6 +411,7 @@ static bool emulate(riscv_t *rv, const rv_insn_t *ir)
return false;
}
/* can branch */
rv->csr_cycle++;
return true;
}
break;
Expand Down Expand Up @@ -553,10 +562,12 @@ static bool emulate(riscv_t *rv, const rv_insn_t *ir)
case rv_insn_ecall: /* ECALL: Environment Call */
rv->compressed = false;
rv->io.on_ecall(rv);
rv->csr_cycle++;
return true;
case rv_insn_ebreak: /* EBREAK: Environment Break */
rv->compressed = false;
rv->io.on_ebreak(rv);
rv->csr_cycle++;
return true;
case rv_insn_wfi: /* WFI: Wait for Interrupt */
case rv_insn_uret: /* URET: return from traps in U-mode */
Expand All @@ -567,6 +578,7 @@ static bool emulate(riscv_t *rv, const rv_insn_t *ir)
case rv_insn_mret: /* MRET: return from traps in U-mode */
rv->PC = rv->csr_mepc;
/* this is a branch */
rv->csr_cycle++;
return true;

#if RV32_HAS(Zifencei)
Expand Down Expand Up @@ -986,6 +998,7 @@ static bool emulate(riscv_t *rv, const rv_insn_t *ir)
return false;
}
/* can branch */
rv->csr_cycle++;
return true;
case rv_insn_cli: /* C.LI */
/* C.LI loads the sign-extended 6-bit immediate, imm, into register rd.
Expand Down Expand Up @@ -1064,6 +1077,7 @@ static bool emulate(riscv_t *rv, const rv_insn_t *ir)
return false;
}
/* can branch */
rv->csr_cycle++;
return true;
case rv_insn_cbeqz: /* C.BEQZ */
/* BEQZ performs conditional control transfers. The offset is
Expand All @@ -1074,10 +1088,12 @@ static bool emulate(riscv_t *rv, const rv_insn_t *ir)
*/
rv->PC += (!rv->X[ir->rs1]) ? (uint32_t) ir->imm : ir->insn_len;
/* can branch */
rv->csr_cycle++;
return true;
case rv_insn_cbnez: /* C.BEQZ */
rv->PC += (rv->X[ir->rs1]) ? (uint32_t) ir->imm : ir->insn_len;
/* can branch */
rv->csr_cycle++;
return true;
case rv_insn_cslli: /* C.SLLI */
/* C.SLLI is a CI-format instruction that performs a logical left shift
Expand All @@ -1100,6 +1116,7 @@ static bool emulate(riscv_t *rv, const rv_insn_t *ir)
case rv_insn_cjr: /* C.JR */
rv->PC = rv->X[ir->rs1];
/* can branch */
rv->csr_cycle++;
return true;
case rv_insn_cmv: /* C.MV */
rv->X[ir->rd] = rv->X[ir->rs2];
Expand All @@ -1108,6 +1125,7 @@ static bool emulate(riscv_t *rv, const rv_insn_t *ir)
rv->compressed = true;
rv->io.on_ebreak(rv);
/* can branch */
rv->csr_cycle++;
return true;
case rv_insn_cjalr: { /* C.JALR */
/* Unconditional jump and store PC+2 to ra */
Expand All @@ -1120,6 +1138,7 @@ static bool emulate(riscv_t *rv, const rv_insn_t *ir)
return false;
}
/* can branch */
rv->csr_cycle++;
return true;
}
case rv_insn_cadd: /* C.ADD */
Expand Down Expand Up @@ -1147,7 +1166,10 @@ static bool emulate(riscv_t *rv, const rv_insn_t *ir)

/* step over instruction */
rv->PC += ir->insn_len;
return true;
rv->csr_cycle++;
if (ir->is_tail)
return true;
return emulate(rv, ir + 1);
}

static bool insn_is_branch(uint8_t opcode)
Expand Down Expand Up @@ -1240,27 +1262,6 @@ static block_t *block_find(const block_map_t *map, const uint32_t addr)
return NULL;
}

/* execute a basic block */
static bool block_emulate(riscv_t *rv, const block_t *block)
{
const uint32_t n_insn = block->n_insn;
const rv_insn_t *ir = block->ir;

/* execute the block */
for (uint32_t i = 0; i < n_insn; i++) {
/* enforce zero register */
rv->X[rv_reg_zero] = 0;

/* execute the instruction */
if (!emulate(rv, ir + i))
return false;

/* increment the cycles csr */
rv->csr_cycle++;
}
return true;
}

static void block_translate(riscv_t *rv, block_t *block)
{
block->pc_start = block->pc_end = rv->PC;
Expand Down Expand Up @@ -1288,6 +1289,7 @@ static void block_translate(riscv_t *rv, block_t *block)
if (insn_is_branch(ir->opcode))
break;
}
(block->ir + block->n_insn - 1)->is_tail = true;
}

static block_t *block_find_or_translate(riscv_t *rv, block_t *prev)
Expand Down Expand Up @@ -1350,7 +1352,7 @@ void rv_step(riscv_t *rv, int32_t cycles)
assert(block);

/* execute the block */
if (!block_emulate(rv, block))
if (!emulate(rv, block->ir))
break;

prev = block;
Expand Down

0 comments on commit 24cd6c0

Please sign in to comment.