Skip to content

Commit

Permalink
JIT compiler update
Browse files Browse the repository at this point in the history
  • Loading branch information
Zoltan Herczeg committed Dec 3, 2022
1 parent 8d56880 commit 8d99540
Show file tree
Hide file tree
Showing 16 changed files with 1,850 additions and 350 deletions.
57 changes: 53 additions & 4 deletions src/sljit/sljitLir.c
Original file line number Diff line number Diff line change
Expand Up @@ -993,14 +993,14 @@ static const char* op0_names[] = {
static const char* op1_names[] = {
"", ".u8", ".s8", ".u16",
".s16", ".u32", ".s32", "32",
".p", "not", "clz",
".p", "not", "clz", "ctz"
};

static const char* op2_names[] = {
"add", "addc", "sub", "subc",
"mul", "and", "or", "xor",
"shl", "mshl", "lshr", "mlshr",
"ashr", "mashr"
"ashr", "mashr", "rotl", "rotr"
};

static const char* op_src_names[] = {
Expand Down Expand Up @@ -1326,7 +1326,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op1(struct sljit_compiler
}

#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
CHECK_ARGUMENT(GET_OPCODE(op) >= SLJIT_MOV && GET_OPCODE(op) <= SLJIT_CLZ);
CHECK_ARGUMENT(GET_OPCODE(op) >= SLJIT_MOV && GET_OPCODE(op) <= SLJIT_CTZ);

switch (GET_OPCODE(op)) {
case SLJIT_NOT:
Expand Down Expand Up @@ -1387,7 +1387,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op2(struct sljit_compiler
}

#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
CHECK_ARGUMENT(GET_OPCODE(op) >= SLJIT_ADD && GET_OPCODE(op) <= SLJIT_MASHR);
CHECK_ARGUMENT(GET_OPCODE(op) >= SLJIT_ADD && GET_OPCODE(op) <= SLJIT_ROTR);

switch (GET_OPCODE(op)) {
case SLJIT_AND:
Expand Down Expand Up @@ -1423,6 +1423,10 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op2(struct sljit_compiler
CHECK_ARGUMENT((compiler->last_flags & 0xff) == GET_FLAG_TYPE(SLJIT_SET_CARRY));
CHECK_ARGUMENT((op & SLJIT_32) == (compiler->last_flags & SLJIT_32));
break;
case SLJIT_ROTL:
case SLJIT_ROTR:
CHECK_ARGUMENT(!(op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK)));
break;
default:
SLJIT_UNREACHABLE();
break;
Expand Down Expand Up @@ -1456,6 +1460,35 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op2(struct sljit_compiler
CHECK_RETURN_OK;
}

static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_shift_into(struct sljit_compiler *compiler, sljit_s32 op,
sljit_s32 src_dst,
sljit_s32 src1, sljit_sw src1w,
sljit_s32 src2, sljit_sw src2w)
{
#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
CHECK_ARGUMENT(GET_OPCODE(op) == SLJIT_SHL || GET_OPCODE(op) == SLJIT_LSHR
|| GET_OPCODE(op) == SLJIT_MSHL || GET_OPCODE(op) == SLJIT_MLSHR);
CHECK_ARGUMENT((op & ~(0xff | SLJIT_32 | SLJIT_SHIFT_INTO_NON_ZERO)) == 0);
CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(src_dst));
FUNCTION_CHECK_SRC(src1, src1w);
FUNCTION_CHECK_SRC(src2, src2w);
#endif
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
if (SLJIT_UNLIKELY(!!compiler->verbose)) {
fprintf(compiler->verbose, " %s%s.into%s ", op2_names[GET_OPCODE(op) - SLJIT_OP2_BASE], !(op & SLJIT_32) ? "" : "32",
(op & SLJIT_SHIFT_INTO_NON_ZERO) ? ".nz" : "");

sljit_verbose_reg(compiler, src_dst);
fprintf(compiler->verbose, ", ");
sljit_verbose_param(compiler, src1, src1w);
fprintf(compiler->verbose, ", ");
sljit_verbose_param(compiler, src2, src2w);
fprintf(compiler->verbose, "\n");
}
#endif
CHECK_RETURN_OK;
}

static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op,
sljit_s32 src, sljit_sw srcw)
{
Expand Down Expand Up @@ -2809,6 +2842,22 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compil
return SLJIT_ERR_UNSUPPORTED;
}

SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *compiler, sljit_s32 op,
sljit_s32 src_dst,
sljit_s32 src1, sljit_sw src1w,
sljit_s32 src2, sljit_sw src2w)
{
SLJIT_UNUSED_ARG(compiler);
SLJIT_UNUSED_ARG(op);
SLJIT_UNUSED_ARG(src_dst);
SLJIT_UNUSED_ARG(src1);
SLJIT_UNUSED_ARG(src1w);
SLJIT_UNUSED_ARG(src2);
SLJIT_UNUSED_ARG(src2w);
SLJIT_UNREACHABLE();
return SLJIT_ERR_UNSUPPORTED;
}

SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op,
sljit_s32 src, sljit_sw srcw)
{
Expand Down
69 changes: 66 additions & 3 deletions src/sljit/sljitLir.h
Original file line number Diff line number Diff line change
Expand Up @@ -609,7 +609,8 @@ static SLJIT_INLINE sljit_sw sljit_get_executable_offset(struct sljit_compiler *
static SLJIT_INLINE sljit_uw sljit_get_generated_code_size(struct sljit_compiler *compiler) { return compiler->executable_size; }

/* Returns with non-zero if the feature or limitation type passed as its
argument is present on the current CPU.
argument is present on the current CPU. The return value is one, if a
feature is fully supported, and it is two, if partially supported.
Some features (e.g. floating point operations) require hardware (CPU)
support while others (e.g. move with update) are emulated if not available.
Expand All @@ -625,10 +626,14 @@ static SLJIT_INLINE sljit_uw sljit_get_generated_code_size(struct sljit_compiler
#define SLJIT_HAS_ZERO_REGISTER 2
/* [Emulated] Count leading zero is supported. */
#define SLJIT_HAS_CLZ 3
/* [Emulated] Count trailing zero is supported. */
#define SLJIT_HAS_CTZ 4
/* [Emulated] Rotate left/right is supported. */
#define SLJIT_HAS_ROT 5
/* [Emulated] Conditional move is supported. */
#define SLJIT_HAS_CMOV 4
#define SLJIT_HAS_CMOV 6
/* [Emulated] Prefetch instruction is available (emulated as a nop). */
#define SLJIT_HAS_PREFETCH 5
#define SLJIT_HAS_PREFETCH 7

#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
/* [Not emulated] SSE2 support is available on x86. */
Expand Down Expand Up @@ -1061,6 +1066,11 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile
Note: immediate source argument is not supported */
#define SLJIT_CLZ (SLJIT_OP1_BASE + 10)
#define SLJIT_CLZ32 (SLJIT_CLZ | SLJIT_32)
/* Count trailing zeroes
Flags: - (may destroy flags)
Note: immediate source argument is not supported */
#define SLJIT_CTZ (SLJIT_OP1_BASE + 11)
#define SLJIT_CTZ32 (SLJIT_CTZ | SLJIT_32)

SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op,
sljit_s32 dst, sljit_sw dstw,
Expand Down Expand Up @@ -1132,6 +1142,16 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile
always masked by the length of the shift operation. */
#define SLJIT_MASHR (SLJIT_OP2_BASE + 13)
#define SLJIT_MASHR32 (SLJIT_MASHR | SLJIT_32)
/* Flags: - (may destroy flags)
Let bit_length be the length of the rotate operation: 32 or 64.
The second operand is always masked by (bit_length - 1). */
#define SLJIT_ROTL (SLJIT_OP2_BASE + 14)
#define SLJIT_ROTL32 (SLJIT_ROTL | SLJIT_32)
/* Flags: - (may destroy flags)
Let bit_length be the length of the rotate operation: 32 or 64.
The second operand is always masked by (bit_length - 1). */
#define SLJIT_ROTR (SLJIT_OP2_BASE + 15)
#define SLJIT_ROTR32 (SLJIT_ROTR | SLJIT_32)

SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op,
sljit_s32 dst, sljit_sw dstw,
Expand All @@ -1145,6 +1165,49 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compil
sljit_s32 src1, sljit_sw src1w,
sljit_s32 src2, sljit_sw src2w);

/* Emit a left or right shift operation, where the bits shifted
in comes from a separate source operand. All operands are
interpreted as unsigned integers.
In the followings the value_mask variable is 31 for 32 bit
operations and word_size - 1 otherwise.
op must be one of the following operations:
SLJIT_SHL or SLJIT_SHL32:
src_dst <<= src2
src_dst |= ((src1 >> 1) >> (src2 ^ value_mask))
SLJIT_MSHL or SLJIT_MSHL32:
src2 &= value_mask
perform the SLJIT_SHL or SLJIT_SHL32 operation
SLJIT_LSHR or SLJIT_LSHR32:
src_dst >>= src2
src_dst |= ((src1 << 1) << (src2 ^ value_mask))
SLJIT_MLSHR or SLJIT_MLSHR32:
src2 &= value_mask
perform the SLJIT_LSHR or SLJIT_LSHR32 operation
op can be combined (or'ed) with SLJIT_SHIFT_INTO_NON_ZERO
src_dst must be a register which content is updated after
the operation is completed
src1 / src1w contains the bits which shifted into src_dst
src2 / src2w contains the shift amount
Note: a rotate operation can be performed if src_dst and
src1 are set to the same register
Flags: - (may destroy flags) */

/* The src2 contains a non-zero value. Improves the generated
code on certain architectures, which provides a small
performance improvement. */
#define SLJIT_SHIFT_INTO_NON_ZERO 0x200

SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *compiler, sljit_s32 op,
sljit_s32 src_dst,
sljit_s32 src1, sljit_sw src1w,
sljit_s32 src2, sljit_sw src2w);

/* Starting index of opcodes for sljit_emit_op2. */
#define SLJIT_OP_SRC_BASE 128

Expand Down
98 changes: 96 additions & 2 deletions src/sljit/sljitNativeARM_32.c
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
#define ORR 0xe1800000
#define PUSH 0xe92d0000
#define POP 0xe8bd0000
#define RBIT 0xe6ff0f30
#define RSB 0xe0600000
#define RSC 0xe0e00000
#define SBC 0xe0c00000
Expand Down Expand Up @@ -959,12 +960,19 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
#endif

case SLJIT_HAS_CLZ:
case SLJIT_HAS_ROT:
case SLJIT_HAS_CMOV:
#if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7)
case SLJIT_HAS_CTZ:
case SLJIT_HAS_PREFETCH:
#endif
return 1;

#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
case SLJIT_HAS_CTZ:
return 2;
#endif

default:
return 0;
}
Expand Down Expand Up @@ -1478,11 +1486,24 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
return push_inst(compiler, MVN | (flags & SET_FLAGS) | RD(dst) | RM(src2));

case SLJIT_CLZ:
SLJIT_ASSERT(!(flags & INV_IMM));
SLJIT_ASSERT(!(src2 & SRC2_IMM));
SLJIT_ASSERT(!(flags & INV_IMM) && !(src2 & SRC2_IMM));
FAIL_IF(push_inst(compiler, CLZ | RD(dst) | RM(src2)));
return SLJIT_SUCCESS;

case SLJIT_CTZ:
SLJIT_ASSERT(!(flags & INV_IMM) && !(src2 & SRC2_IMM));
SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED));
#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
FAIL_IF(push_inst(compiler, RSB | SRC2_IMM | RD(TMP_REG1) | RN(src2) | 0));
FAIL_IF(push_inst(compiler, AND | RD(TMP_REG2) | RN(src2) | RM(TMP_REG1)));
FAIL_IF(push_inst(compiler, CLZ | RD(dst) | RM(TMP_REG2)));
FAIL_IF(push_inst(compiler, CMP | SET_FLAGS | SRC2_IMM | RN(dst) | 32));
return push_inst(compiler, (EOR ^ 0xf0000000) | SRC2_IMM | RD(dst) | RN(dst) | 0x1f);
#else /* !SLJIT_CONFIG_ARM_V5 */
FAIL_IF(push_inst(compiler, RBIT | RD(dst) | RM(src2)));
return push_inst(compiler, CLZ | RD(dst) | RM(dst));
#endif /* SLJIT_CONFIG_ARM_V5 */

case SLJIT_ADD:
SLJIT_ASSERT(!(flags & INV_IMM));

Expand Down Expand Up @@ -1553,6 +1574,19 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
is_masked = GET_OPCODE(op) == SLJIT_MASHR;
break;

case SLJIT_ROTL:
if (compiler->shift_imm == 0x20) {
FAIL_IF(push_inst(compiler, RSB | SRC2_IMM | RD(TMP_REG2) | RN(src2) | 0));
src2 = TMP_REG2;
} else
compiler->shift_imm = (sljit_uw)(-(sljit_sw)compiler->shift_imm) & 0x1f;
/* fallthrough */

case SLJIT_ROTR:
shift_type = 3;
is_masked = 0;
break;

default:
SLJIT_UNREACHABLE();
return SLJIT_SUCCESS;
Expand Down Expand Up @@ -2125,6 +2159,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile
return emit_op(compiler, op, ALLOW_ANY_IMM, dst, dstw, TMP_REG1, 0, src, srcw);

case SLJIT_CLZ:
case SLJIT_CTZ:
return emit_op(compiler, op, 0, dst, dstw, TMP_REG1, 0, src, srcw);
}

Expand Down Expand Up @@ -2165,6 +2200,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile
case SLJIT_MLSHR:
case SLJIT_ASHR:
case SLJIT_MASHR:
case SLJIT_ROTL:
case SLJIT_ROTR:
if (src2 & SLJIT_IMM) {
compiler->shift_imm = src2w & 0x1f;
return emit_op(compiler, op, 0, dst, dstw, TMP_REG1, 0, src1, src1w);
Expand All @@ -2188,6 +2225,63 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compil
return sljit_emit_op2(compiler, op, TMP_REG2, 0, src1, src1w, src2, src2w);
}

SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *compiler, sljit_s32 op,
sljit_s32 src_dst,
sljit_s32 src1, sljit_sw src1w,
sljit_s32 src2, sljit_sw src2w)
{
sljit_s32 is_left;

CHECK_ERROR();
CHECK(check_sljit_emit_shift_into(compiler, op, src_dst, src1, src1w, src2, src2w));

op = GET_OPCODE(op);
is_left = (op == SLJIT_SHL || op == SLJIT_MSHL);

if (src_dst == src1) {
SLJIT_SKIP_CHECKS(compiler);
return sljit_emit_op2(compiler, is_left ? SLJIT_ROTL : SLJIT_ROTR, src_dst, 0, src_dst, 0, src2, src2w);
}

ADJUST_LOCAL_OFFSET(src1, src1w);
ADJUST_LOCAL_OFFSET(src2, src2w);

/* Shift type of ROR is 3. */
if (src2 & SLJIT_IMM) {
src2w &= 0x1f;

if (src2w == 0)
return SLJIT_SUCCESS;
} else if (src2 & SLJIT_MEM) {
FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, TMP_REG2, src2, src2w, TMP_REG2));
src2 = TMP_REG2;
}

if (src1 & SLJIT_MEM) {
FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, TMP_REG1, src1, src1w, TMP_REG1));
src1 = TMP_REG1;
} else if (src1 & SLJIT_IMM) {
FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)src1w));
src1 = TMP_REG1;
}

if (src2 & SLJIT_IMM) {
FAIL_IF(push_inst(compiler, MOV | RD(src_dst) | RM(src_dst) | ((sljit_uw)(is_left ? 0 : 1) << 5) | ((sljit_uw)src2w << 7)));
src2w = (src2w ^ 0x1f) + 1;
return push_inst(compiler, ORR | RD(src_dst) | RN(src_dst) | RM(src1) | ((sljit_uw)(is_left ? 1 : 0) << 5) | ((sljit_uw)src2w << 7));
}

if (op == SLJIT_MSHL || op == SLJIT_MLSHR) {
FAIL_IF(push_inst(compiler, AND | SRC2_IMM | RD(TMP_REG2) | RN(src2) | 0x1f));
src2 = TMP_REG2;
}

FAIL_IF(push_inst(compiler, MOV | RD(src_dst) | RM8(src2) | ((sljit_uw)(is_left ? 0 : 1) << 5) | 0x10 | RM(src_dst)));
FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG1) | RM(src1) | ((sljit_uw)(is_left ? 1 : 0) << 5) | (1 << 7)));
FAIL_IF(push_inst(compiler, EOR | SRC2_IMM | RD(TMP_REG2) | RN(src2) | 0x1f));
return push_inst(compiler, ORR | RD(src_dst) | RN(src_dst) | RM(TMP_REG1) | ((sljit_uw)(is_left ? 1 : 0) << 5) | 0x10 | RM8(TMP_REG2));
}

SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op,
sljit_s32 src, sljit_sw srcw)
{
Expand Down
Loading

0 comments on commit 8d99540

Please sign in to comment.