Skip to content

Commit

Permalink
[JSC] Add "AddZeroExtend64" Air opcode
Browse files Browse the repository at this point in the history
https://bugs.webkit.org/show_bug.cgi?id=249765
rdar://103631099

Reviewed by Mark Lam.

In ARM64, we are leveraging LDR style address, which can take 32bit index in addressing and zero-extend / sign-extend that in load/store.
This is useful since WasmAddress' index is 32bit and we need to zero-extend it. However, we cannot use this addressing when there is an
offset since this addressing cannot encode offset. As a result, we are emitting Move32 and Add64 when there is an offset.
However, ARM64 can do even better for that case since ARM64 add / sub instructions also support LDR style extension.

This patch adds AddZeroExtend64 and AddSignExtend64. They take 32bit second operand and extend it before adding. This is particularly useful
when computing WasmAddress. We also leverage this in AirIRGenerator.

In the added testb3, the generated code is changed as follows.

    Before:
        O2: testWasmAddressWithOffset()...
        Generated JIT code for Compilation:
            Code at [0x115f74980, 0x115f749a0):
                     <0> 0x115f74980:    pacibsp
                     <4> 0x115f74984:    stp      fp, lr, [sp, #-16]!
                     <8> 0x115f74988:    mov      fp, sp
                    <12> 0x115f7498c:    ubfx     x0, x0, #0, WebKit#32; emitSave
                    <16> 0x115f74990:    add      x0, x2, x0
                    <20> 0x115f74994:    sturb    w1, [x0, #1]
                    <24> 0x115f74998:    ldp      fp, lr, [sp], WebKit#16
                    <28> 0x115f7499c:    retab

    After:
        O2: testWasmAddressWithOffset()...
        Generated JIT code for Compilation:
            Code at [0x121108980, 0x1211089a0):
                     <0> 0x121108980:    pacibsp
                     <4> 0x121108984:    stp      fp, lr, [sp, #-16]!
                     <8> 0x121108988:    mov      fp, sp
                    <12> 0x12110898c:    add      x0, x2, w0, uxtw; emitSave
                    <16> 0x121108990:    sturb    w1, [x0, #1]
                    <20> 0x121108994:    ldp      fp, lr, [sp], WebKit#16
                    <24> 0x121108998:    retab

* Source/JavaScriptCore/assembler/MacroAssemblerARM64.h:
(JSC::MacroAssemblerARM64::addZeroExtend64):
(JSC::MacroAssemblerARM64::addSignExtend64):
* Source/JavaScriptCore/b3/B3LowerToAir.cpp:
* Source/JavaScriptCore/b3/air/AirInstInlines.h:
(JSC::B3::Air::isAddZeroExtend64Valid):
(JSC::B3::Air::isAddSignExtend64Valid):
* Source/JavaScriptCore/b3/air/AirOpcode.opcodes:

Canonical link: https://commits.webkit.org/258259@main
  • Loading branch information
Constellation committed Dec 22, 2022
1 parent b395978 commit 90a9fbb
Show file tree
Hide file tree
Showing 8 changed files with 104 additions and 4 deletions.
12 changes: 12 additions & 0 deletions Source/JavaScriptCore/assembler/MacroAssemblerARM64.h
Original file line number Diff line number Diff line change
Expand Up @@ -336,6 +336,18 @@ class MacroAssemblerARM64 : public AbstractMacroAssembler<Assembler> {
store64(dataTempRegister, address.m_ptr);
}

void addZeroExtend64(RegisterID src, RegisterID srcExtend, RegisterID dest)
{
ASSERT(srcExtend != ARM64Registers::sp);
m_assembler.add<64>(dest, src, srcExtend, Assembler::UXTW, 0);
}

void addSignExtend64(RegisterID src, RegisterID srcExtend, RegisterID dest)
{
ASSERT(srcExtend != ARM64Registers::sp);
m_assembler.add<64>(dest, src, srcExtend, Assembler::SXTW, 0);
}

void addPtrNoFlags(TrustedImm32 imm, RegisterID srcDest)
{
add64(imm, srcDest);
Expand Down
19 changes: 19 additions & 0 deletions Source/JavaScriptCore/b3/B3LowerToAir.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3728,6 +3728,25 @@ class LowerToAir {
case WasmAddress: {
WasmAddressValue* address = m_value->as<WasmAddressValue>();

if constexpr (isARM64()) {
Value* index = m_value->child(0);
if (canBeInternal(index)) {
// Maybe, the ideal approach is to introduce a decorator (Index@EXT) to the Air operand
// to provide an extension opportunity for the specific form under the Air opcode.
if (isMergeableValue(index, ZExt32)) {
append(AddZeroExtend64, Arg(address->pinnedGPR()), tmp(index->child(0)), tmp(address));
commitInternal(index);
return;
}

if (isMergeableValue(index, SExt32)) {
append(AddSignExtend64, Arg(address->pinnedGPR()), tmp(index->child(0)), tmp(address));
commitInternal(index);
return;
}
}
}

append(Add64, Arg(address->pinnedGPR()), tmp(m_value->child(0)), tmp(address));
return;
}
Expand Down
20 changes: 20 additions & 0 deletions Source/JavaScriptCore/b3/air/AirInstInlines.h
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,26 @@ inline std::optional<unsigned> Inst::shouldTryAliasingDef()
return std::nullopt;
}

inline bool isAddZeroExtend64Valid(const Inst& inst)
{
#if CPU(ARM64)
return inst.args[1] != Tmp(ARM64Registers::sp);
#else
UNUSED_PARAM(inst);
return true;
#endif
}

inline bool isAddSignExtend64Valid(const Inst& inst)
{
#if CPU(ARM64)
return inst.args[1] != Tmp(ARM64Registers::sp);
#else
UNUSED_PARAM(inst);
return true;
#endif
}

inline bool isShiftValid(const Inst& inst)
{
#if CPU(X86) || CPU(X86_64)
Expand Down
6 changes: 6 additions & 0 deletions Source/JavaScriptCore/b3/air/AirOpcode.opcodes
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,12 @@ Add32 U:G:32, UZD:G:32
x86: Tmp, Addr
x86: Tmp, Index

arm64: AddZeroExtend64 U:G:64, U:G:32, D:G:64
Tmp, Tmp*, Tmp

arm64: AddSignExtend64 U:G:64, U:G:32, D:G:64
Tmp, Tmp*, Tmp

x86: Add8 U:G:8, UD:G:8
Imm, Addr
Imm, Index
Expand Down
1 change: 1 addition & 0 deletions Source/JavaScriptCore/b3/testb3.h
Original file line number Diff line number Diff line change
Expand Up @@ -1171,6 +1171,7 @@ void testFloatMaxMin();
void testDoubleMaxMin();

void testWasmAddressDoesNotCSE();
void testWasmAddressWithOffset();
void testStoreAfterClobberExitsSideways();
void testStoreAfterClobberDifferentWidth();
void testStoreAfterClobberDifferentWidthSuccessor();
Expand Down
1 change: 1 addition & 0 deletions Source/JavaScriptCore/b3/testb3_1.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -815,6 +815,7 @@ void run(const char* filter)
RUN(testWasmBoundsCheck(std::numeric_limits<unsigned>::max() - 5));

RUN(testWasmAddress());
RUN(testWasmAddressWithOffset());

RUN(testFastTLSLoad());
RUN(testFastTLSStore());
Expand Down
28 changes: 28 additions & 0 deletions Source/JavaScriptCore/b3/testb3_7.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1191,6 +1191,34 @@ void testWasmAddress()
CHECK_EQ(numToStore, value);
}

void testWasmAddressWithOffset()
{
Procedure proc;
GPRReg pinnedGPR = GPRInfo::argumentGPR2;
proc.pinRegister(pinnedGPR);

Vector<uint8_t> values(3);
values[0] = 20;
values[1] = 21;
values[2] = 22;
uint8_t numToStore = 42;

BasicBlock* root = proc.addBlock();

// Root
Value* offset = root->appendNew<Value>(proc, Trunc, Origin(), root->appendNew<ArgumentRegValue>(proc, Origin(), GPRInfo::argumentGPR0));
Value* valueToStore = root->appendNew<Value>(proc, Trunc, Origin(), root->appendNew<ArgumentRegValue>(proc, Origin(), GPRInfo::argumentGPR1));
Value* pointer = root->appendNew<Value>(proc, ZExt32, Origin(), offset);
root->appendNew<MemoryValue>(proc, Store8, Origin(), valueToStore, root->appendNew<WasmAddressValue>(proc, Origin(), pointer, pinnedGPR), 1);
root->appendNewControlValue(proc, Return, Origin());

auto code = compileProc(proc);
invoke<void>(*code, 1, numToStore, values.data());
CHECK_EQ(20U, values[0]);
CHECK_EQ(21U, values[1]);
CHECK_EQ(42U, values[2]);
}

void testFastTLSLoad()
{
#if ENABLE(FAST_TLS_JIT)
Expand Down
21 changes: 17 additions & 4 deletions Source/JavaScriptCore/wasm/WasmAirIRGenerator64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -962,7 +962,6 @@ inline AirIRGenerator64::ExpressionType AirIRGenerator64::emitCheckAndPreparePoi
ASSERT(m_memoryBaseGPR);

auto result = g64();
append(Move32, pointer, result);

switch (m_mode) {
case MemoryMode::BoundsChecking: {
Expand All @@ -972,7 +971,12 @@ inline AirIRGenerator64::ExpressionType AirIRGenerator64::emitCheckAndPreparePoi
ASSERT(sizeOfOperation + offset > offset);
auto temp = g64();
append(Move, Arg::bigImm(static_cast<uint64_t>(sizeOfOperation) + offset - 1), temp);
append(Add64, result, temp);
if constexpr (isARM64())
append(AddZeroExtend64, temp, pointer, temp);
else {
append(Move32, pointer, result);
append(Add64, result, temp);
}

emitCheck([&] {
return Inst(Branch64, nullptr, Arg::relCond(MacroAssembler::AboveOrEqual), temp, Tmp(m_boundsCheckingSizeGPR));
Expand All @@ -994,11 +998,17 @@ inline AirIRGenerator64::ExpressionType AirIRGenerator64::emitCheckAndPreparePoi
// PROT_NONE region, but it's better if we use a smaller immediate because it can codegens better. We know that anything equal to or greater
// than the declared 'maximum' will trap, so we can compare against that number. If there was no declared 'maximum' then we still know that
// any access equal to or greater than 4GiB will trap, no need to add the redzone.
if constexpr (!isARM64())
append(Move32, pointer, result);
if (offset >= Memory::fastMappedRedzoneBytes()) {
uint64_t maximum = m_info.memory.maximum() ? m_info.memory.maximum().bytes() : std::numeric_limits<uint32_t>::max();
auto temp = g64();
append(Move, Arg::bigImm(static_cast<uint64_t>(sizeOfOperation) + offset - 1), temp);
append(Add64, result, temp);
if constexpr (isARM64())
append(AddZeroExtend64, temp, pointer, temp);
else
append(Add64, result, temp);

auto sizeMax = addConstant(Types::I64, maximum);

emitCheck([&] {
Expand All @@ -1012,7 +1022,10 @@ inline AirIRGenerator64::ExpressionType AirIRGenerator64::emitCheckAndPreparePoi
#endif
}

append(Add64, Tmp(m_memoryBaseGPR), result);
if constexpr (isARM64())
append(AddZeroExtend64, Tmp(m_memoryBaseGPR), pointer, result);
else
append(Add64, Tmp(m_memoryBaseGPR), result);
return result;
}

Expand Down

0 comments on commit 90a9fbb

Please sign in to comment.