Skip to content

Commit

Permalink
arm64: implement atomic-store with CAS
Browse files Browse the repository at this point in the history
Only available with ARMv8.1 or higher, so behind a flag that would
be enabled by an ACE compatible compiler depending on -march.

As a sideeffect, avoids the races that could result in an infloop
with M1.
  • Loading branch information
carenas committed Apr 27, 2023
1 parent b7fed8e commit fc65186
Show file tree
Hide file tree
Showing 2 changed files with 80 additions and 12 deletions.
57 changes: 48 additions & 9 deletions sljit_src/sljitNativeARM_64.c
Original file line number Diff line number Diff line change
Expand Up @@ -80,11 +80,18 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
#define BLR 0xd63f0000
#define BR 0xd61f0000
#define BRK 0xd4200000
#define CASA 0xc8e07c00
#define CASB 0x08a07c00
#define CASH 0x48a07c00
#define CASL 0xc8a0fc00
#define CASLB 0x08a07c00
#define CASLH 0x48a0fc00
#define CBZ 0xb4000000
#define CCMPI 0xfa400800
#define CLZ 0xdac01000
#define CSEL 0x9a800000
#define CSINC 0x9a800400
#define CLREX 0xd5033f5f
#define EOR 0xca000000
#define EORI 0xd2000000
#define EXTR 0x93c00000
Expand Down Expand Up @@ -2483,14 +2490,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem_update(struct sljit_compiler
return push_inst(compiler, inst | VT(freg) | RN(mem & REG_MASK) | (sljit_ins)((memw & 0x1ff) << 12));
}

SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_load(struct sljit_compiler *compiler, sljit_s32 op,
sljit_s32 dst_reg,
sljit_s32 mem_reg)
static SLJIT_INLINE sljit_ins atomic_load_ins(const sljit_s32 op)
{
sljit_ins ins = 0;

CHECK_ERROR();
CHECK(check_sljit_emit_atomic_load(compiler, op, dst_reg, mem_reg));
sljit_ins ins;

switch (GET_OPCODE(op)) {
case SLJIT_MOV32:
Expand All @@ -2508,7 +2510,21 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_load(struct sljit_compiler
break;
}

return push_inst(compiler, ins | RN(mem_reg) | RT(dst_reg));
return ins;
}

SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_load(struct sljit_compiler *compiler, sljit_s32 op,
sljit_s32 dst_reg,
sljit_s32 mem_reg)
{
CHECK_ERROR();
CHECK(check_sljit_emit_atomic_load(compiler, op, dst_reg, mem_reg));

#ifdef __ARM_FEATURE_ATOMICS
return sljit_emit_op1(compiler, op, dst_reg, 0, SLJIT_MEM1(mem_reg), 0);
#else
return push_inst(compiler, atomic_load_ins(op) | RN(mem_reg) | RT(dst_reg));
#endif /* ARM_FEATURE_ATOMICS */
}

SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_store(struct sljit_compiler *compiler, sljit_s32 op,
Expand All @@ -2521,6 +2537,28 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_store(struct sljit_compiler
CHECK_ERROR();
CHECK(check_sljit_emit_atomic_store(compiler, op, src_reg, mem_reg, temp_reg));

#if __ARM_FEATURE_ATOMICS
switch (GET_OPCODE(op)) {
case SLJIT_MOV32:
case SLJIT_MOV_U32:
ins = CASL ^ (1 << 30);
break;
case SLJIT_MOV_U8:
ins = CASLB;
break;
case SLJIT_MOV_U16:
ins = CASLH;
break;
default:
ins = CASL;
break;
}
FAIL_IF(push_inst(compiler, atomic_load_ins(op) | RN(mem_reg) | RT(TMP_REG2)));
FAIL_IF(push_inst(compiler, SUBS | RM(TMP_REG2) | RN(temp_reg) | RD(TMP_REG1)));
FAIL_IF(push_inst(compiler, B_CC | (2 << 5) | 0x1));
FAIL_IF(push_inst(compiler, ins | RM(temp_reg) | RN(mem_reg) | RD(src_reg)));
FAIL_IF(push_inst(compiler, CLREX));
#else
switch (GET_OPCODE(op)) {
case SLJIT_MOV32:
case SLJIT_MOV_U32:
Expand All @@ -2538,7 +2576,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_store(struct sljit_compiler
}

FAIL_IF(push_inst(compiler, ins | RM(TMP_REG1) | RN(mem_reg) | RT(src_reg)));
return push_inst(compiler, (SUBI ^ W_OP) | (1 << 29) | RD(TMP_ZERO) | RN(TMP_REG1));
#endif /* Armv8.1 LSE */
return (op & SLJIT_SET_ATOMIC_STORED) ? push_inst(compiler, (SUBI ^ W_OP) | (1 << 29) | RD(TMP_ZERO) | RN(TMP_REG1)) : SLJIT_SUCCESS;
}

SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_local_base(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw offset)
Expand Down
35 changes: 32 additions & 3 deletions test_src/sljitTest.c
Original file line number Diff line number Diff line change
Expand Up @@ -11530,15 +11530,15 @@ static void test92(void)
struct sljit_compiler *compiler = sljit_create_compiler(NULL, NULL);
struct sljit_label *label;
struct sljit_jump *jump;
sljit_sw buf[32];
sljit_sw buf[36];
sljit_s32 i;

if (verbose)
printf("Run test92\n");

FAILED(!compiler, "cannot create compiler\n");

for (i = 0; i < 32; i++)
for (i = 0; i < 36; i++)
buf[i] = -1;

buf[0] = 4678;
Expand All @@ -11553,6 +11553,7 @@ static void test92(void)
((sljit_u8*)(buf + 26))[1] = 105;
((sljit_u8*)(buf + 28))[2] = 13;
((sljit_u16*)(buf + 30))[1] = 14876;
buf[34] = -4678;

sljit_emit_enter(compiler, 0, SLJIT_ARGS1(VOID, P), 5, 5, 0, 0, 2 * sizeof(sljit_sw));

Expand Down Expand Up @@ -11707,6 +11708,29 @@ static void test92(void)
sljit_emit_atomic_store(compiler, SLJIT_MOV_U16 | SLJIT_SET_ATOMIC_STORED, SLJIT_R2, SLJIT_R1, SLJIT_R0);
sljit_set_label(sljit_emit_jump(compiler, SLJIT_ATOMIC_NOT_STORED), label);

#if !defined(SLJIT_CONFIG_ARM) || defined(__ARM_FEATURE_ATOMICS)
sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R1, 0, SLJIT_S0, 0, SLJIT_IMM, 32 * sizeof(sljit_sw));
sljit_emit_atomic_load(compiler, SLJIT_MOV, SLJIT_R0, SLJIT_R1);
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, 666);
/* buf[32] */
sljit_emit_atomic_store(compiler, SLJIT_MOV | SLJIT_SET_ATOMIC_STORED, SLJIT_R2, SLJIT_R1, SLJIT_R1);
jump = sljit_emit_jump(compiler, SLJIT_ATOMIC_NOT_STORED);
/* buf[33] */
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 33 * sizeof(sljit_sw), SLJIT_IMM, 0);
sljit_set_label(jump, sljit_emit_label(compiler));

sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R1, 0, SLJIT_S0, 0, SLJIT_IMM, 34 * sizeof(sljit_sw));
sljit_emit_atomic_load(compiler, SLJIT_MOV, SLJIT_R0, SLJIT_R1);
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), 0, SLJIT_IMM, 0);
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, 666);
/* buf[34] */
sljit_emit_atomic_store(compiler, SLJIT_MOV | SLJIT_SET_ATOMIC_STORED, SLJIT_R2, SLJIT_R1, SLJIT_R0);
jump = sljit_emit_jump(compiler, SLJIT_ATOMIC_NOT_STORED);
/* buf[35] */
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 35 * sizeof(sljit_sw), SLJIT_IMM, 0);
sljit_set_label(jump, sljit_emit_label(compiler));
#endif /* !ARMv8+LSE */

sljit_emit_return_void(compiler);

code.code = sljit_generate_code(compiler);
Expand Down Expand Up @@ -11763,7 +11787,12 @@ static void test92(void)
FAILED(((sljit_u16*)(buf + 30))[0] != 65535, "test92 case 42 failed\n");
FAILED(((sljit_u16*)(buf + 30))[1] != 51403, "test92 case 43 failed\n");
FAILED(buf[31] != 14876, "test92 case 44 failed\n");

#if !defined(SLJIT_CONFIG_ARM) || defined(__ARM_FEATURE_ATOMICS)
FAILED(buf[32] == 666, "test92 case 45 failed\n");
FAILED(!buf[33], "test92 case 46 failed\n");
FAILED(buf[34] == 666, "test92 case 47 failed\n");
FAILED(!buf[35], "test92 case 48 failed\n");
#endif /* CAS */
sljit_free_code(code.code, NULL);
#endif
successful_tests++;
Expand Down

0 comments on commit fc65186

Please sign in to comment.