diff --git a/src/coreclr/nativeaot/Runtime/loongarch64/AllocFast.S b/src/coreclr/nativeaot/Runtime/loongarch64/AllocFast.S new file mode 100644 index 0000000000000..dc344183e927b --- /dev/null +++ b/src/coreclr/nativeaot/Runtime/loongarch64/AllocFast.S @@ -0,0 +1,273 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include +#include "AsmOffsets.inc" + +// GC type flags +GC_ALLOC_FINALIZE = 1 + +// +// Rename fields of nested structs +// +OFFSETOF__Thread__m_alloc_context__alloc_ptr = OFFSETOF__Thread__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_ptr +OFFSETOF__Thread__m_alloc_context__alloc_limit = OFFSETOF__Thread__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_limit + + + +// Allocate non-array, non-finalizable object. If the allocation doesn't fit into the current thread's +// allocation context then automatically fallback to the slow allocation path. +// $a0 == MethodTable + LEAF_ENTRY RhpNewFast, _TEXT + + // a1 = GetThread() +#ifdef FEATURE_EMULATED_TLS + GETTHREAD_ETLS_1 +#else + INLINE_GETTHREAD $a1 +#endif + + // + // a0 contains MethodTable pointer + // + ld.w $a2, $a0, OFFSETOF__MethodTable__m_uBaseSize + + // + // a0: MethodTable pointer + // a1: Thread pointer + // a2: base size + // + + // Load potential new object address into t3. + ld.d $t3, $a1, OFFSETOF__Thread__m_alloc_context__alloc_ptr + + // Determine whether the end of the object would lie outside of the current allocation context. If so, + // we abandon the attempt to allocate the object directly and fall back to the slow helper. + add.d $a2, $a2, $t3 + ld.d $t4, $a1, OFFSETOF__Thread__m_alloc_context__alloc_limit + bltu $t4, $a2, RhpNewFast_RarePath + + // Update the alloc pointer to account for the allocation. + st.d $a2, $a1, OFFSETOF__Thread__m_alloc_context__alloc_ptr + + // Set the new objects MethodTable pointer + st.d $a0, $t3, OFFSETOF__Object__m_pEEType + + ori $a0, $t3, 0 + jirl $r0, $ra, 0 + +RhpNewFast_RarePath: + ori $a1, $zero, 0 + b RhpNewObject + LEAF_END RhpNewFast, _TEXT + +// Allocate non-array object with finalizer. +// a0 == MethodTable + LEAF_ENTRY RhpNewFinalizable, _TEXT + ori $a1, $zero, GC_ALLOC_FINALIZE + b RhpNewObject + LEAF_END RhpNewFinalizable, _TEXT + +// Allocate non-array object. +// a0 == MethodTable +// a1 == alloc flags + NESTED_ENTRY RhpNewObject, _TEXT, NoHandler + + PUSH_COOP_PINVOKE_FRAME $a3 + + // a3: transition frame + + // Preserve the MethodTable in s0 + ori $s0, $a0, 0 + + ori $a2, $zero, 0 // numElements + + // Call the rest of the allocation helper. + // void* RhpGcAlloc(MethodTable *pEEType, uint32_t uFlags, uintptr_t numElements, void * pTransitionFrame) + bl C_FUNC(RhpGcAlloc) + + // Set the new objects MethodTable pointer on success. + beq $a0, $zero, NewOutOfMemory + + .cfi_remember_state + POP_COOP_PINVOKE_FRAME + EPILOG_RETURN + + .cfi_restore_state +NewOutOfMemory: + // This is the OOM failure path. We are going to tail-call to a managed helper that will throw + // an out of memory exception that the caller of this allocator understands. + + ori $a0, $s0, 0 // MethodTable pointer + ori $a1, $zero, 0 // Indicate that we should throw OOM. + + POP_COOP_PINVOKE_FRAME + b C_FUNC(RhExceptionHandling_FailedAllocation) + + NESTED_END RhpNewObject, _TEXT + +// Allocate a string. +// a0 == MethodTable +// a1 == element/character count + LEAF_ENTRY RhNewString, _TEXT + // Make sure computing the overall allocation size wont overflow + lu12i.w $a2, ((MAX_STRING_LENGTH >> 12) & 0xFFFFF) + ori $a2, $a2, (MAX_STRING_LENGTH & 0xFFF) + bltu $a2, $a1, StringSizeOverflow + + // Compute overall allocation size (align(base size + (element size * elements), 8)). + ori $a2, $zero, STRING_COMPONENT_SIZE + mulw.d.w $a2, $a1, $a2 // a2 = (a1[31:0] * a2[31:0])[64:0] + addi.d $a2, $a2, STRING_BASE_SIZE + 7 // a2 = a2 + STRING_BASE_SIZE + 7 + bstrins.d $a2, $r0, 2, 0 // clear the bits[2:0] of $a2 + + // a0 == MethodTable + // a1 == element count + // a2 == string size + +#ifdef FEATURE_EMULATED_TLS + GETTHREAD_ETLS_3 +#else + INLINE_GETTHREAD $a3 +#endif + + // Load potential new object address into t3. + ld.d $t3, $a3, OFFSETOF__Thread__m_alloc_context__alloc_ptr + + // Determine whether the end of the object would lie outside of the current allocation context. If so, + // we abandon the attempt to allocate the object directly and fall back to the slow helper. + add.d $a2, $a2, $t3 + ld.d $t3, $a3, OFFSETOF__Thread__m_alloc_context__alloc_limit + bltu $t3, $a2, RhNewString_Rare + + // Reload new object address into r12. + ld.d $t3, $a3, OFFSETOF__Thread__m_alloc_context__alloc_ptr + + // Update the alloc pointer to account for the allocation. + st.d $a2, $a3, OFFSETOF__Thread__m_alloc_context__alloc_ptr + + // Set the new objects MethodTable pointer and element count. + st.d $a0, $t3, OFFSETOF__Object__m_pEEType + st.d $a1, $t3, OFFSETOF__Array__m_Length + + // Return the object allocated in a0. + ori $a0, $t3, 0 + + jirl $r0, $ra, 0 + +StringSizeOverflow: + // We get here if the length of the final string object can not be represented as an unsigned + // 32-bit value. We are going to tail-call to a managed helper that will throw + // an OOM exception that the caller of this allocator understands. + + // a0 holds MethodTable pointer already + ori $a1, $zero, 1 // Indicate that we should throw OverflowException + b C_FUNC(RhExceptionHandling_FailedAllocation) + +RhNewString_Rare: + b C_FUNC(RhpNewArrayRare) + LEAF_END RhNewString, _Text + +// Allocate one dimensional, zero based array (SZARRAY). +// $a0 == MethodTable +// $a1 == element count + LEAF_ENTRY RhpNewArray, _Text + + // We want to limit the element count to the non-negative 32-bit int range. + // If the element count is <= 0x7FFFFFFF, no overflow is possible because the component + // size is <= 0xffff (it is an unsigned 16-bit value), and the base size for the worst + // case (32 dimensional MdArray) is less than 0xffff, and thus the product fits in 64 bits. + lu12i.w $a2, 0x7ffff + ori $a2, $a2, 0xfff + bltu $a2, $a1, ArraySizeOverflow + + ld.h $a2, $a0, OFFSETOF__MethodTable__m_usComponentSize + mulw.d.w $a2, $a1, $a2 + ld.w $a3, $a0, OFFSETOF__MethodTable__m_uBaseSize + add.d $a2, $a2, $a3 + addi.d $a2, $a2, 7 + bstrins.d $a2, $r0, 2, 0 + // a0 == MethodTable + // a1 == element count + // a2 == array size + + INLINE_GETTHREAD $a3 + + // Load potential new object address into t3. + ld.d $t3, $a3, OFFSETOF__Thread__m_alloc_context__alloc_ptr + + // Determine whether the end of the object would lie outside of the current allocation context. If so, + // we abandon the attempt to allocate the object directly and fall back to the slow helper. + add.d $a2, $a2, $t3 + ld.d $t3, $a3, OFFSETOF__Thread__m_alloc_context__alloc_limit + bltu $t3, $a2, RhpNewArray_Rare + + // Reload new object address into t3. + ld.d $t3, $a3, OFFSETOF__Thread__m_alloc_context__alloc_ptr + + // Update the alloc pointer to account for the allocation. + st.d $a2, $a3, OFFSETOF__Thread__m_alloc_context__alloc_ptr + + // Set the new objects MethodTable pointer and element count. + st.d $a0, $t3, OFFSETOF__Object__m_pEEType + st.d $a1, $t3, OFFSETOF__Array__m_Length + + // Return the object allocated in r0. + ori $a0, $t3, 0 + + jirl $r0, $ra, 0 + +ArraySizeOverflow: + // We get here if the size of the final array object can not be represented as an unsigned + // 32-bit value. We are going to tail-call to a managed helper that will throw + // an overflow exception that the caller of this allocator understands. + + // $a0 holds MethodTable pointer already + ori $a1, $zero, 1 // Indicate that we should throw OverflowException + b C_FUNC(RhExceptionHandling_FailedAllocation) + +RhpNewArray_Rare: + b C_FUNC(RhpNewArrayRare) + LEAF_END RhpNewArray, _TEXT + +// Allocate one dimensional, zero based array (SZARRAY) using the slow path that calls a runtime helper. +// a0 == MethodTable +// a1 == element count +// a2 == array size + Thread::m_alloc_context::alloc_ptr +// a3 == Thread + NESTED_ENTRY RhpNewArrayRare, _TEXT, NoHandler + + // Recover array size by subtracting the alloc_ptr from a2. + ld.d $t3, $a3, OFFSETOF__Thread__m_alloc_context__alloc_ptr + sub.d $a2, $a2, $t3 + + PUSH_COOP_PINVOKE_FRAME $a3 + + // Preserve data we will need later into the callee saved registers + ori $s0, $a0, 0 // Preserve MethodTable + + ori $a2, $a1, 0 // numElements + ori $a1, $zero, 0 // uFlags + + // void* RhpGcAlloc(MethodTable *pEEType, uint32_t uFlags, uintptr_t numElements, void * pTransitionFrame) + bl C_FUNC(RhpGcAlloc) + + // Set the new objects MethodTable pointer and length on success. + beq $a0, $zero, ArrayOutOfMemory + + .cfi_remember_state + POP_COOP_PINVOKE_FRAME + EPILOG_RETURN + + .cfi_restore_state +ArrayOutOfMemory: + // This is the OOM failure path. We are going to tail-call to a managed helper that will throw + // an out of memory exception that the caller of this allocator understands. + + ori $a0, $s0, 0 // MethodTable Pointer + ori $a1, $zero, 0 // Indicate that we should throw OOM. + + POP_COOP_PINVOKE_FRAME + b C_FUNC(RhExceptionHandling_FailedAllocation) + + NESTED_END RhpNewArrayRare, _TEXT diff --git a/src/coreclr/nativeaot/Runtime/loongarch64/AsmOffsetsCpu.h b/src/coreclr/nativeaot/Runtime/loongarch64/AsmOffsetsCpu.h new file mode 100644 index 0000000000000..0724e0f86fcff --- /dev/null +++ b/src/coreclr/nativeaot/Runtime/loongarch64/AsmOffsetsCpu.h @@ -0,0 +1,67 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// +// This file is used by AsmOffsets.h to validate that our +// assembly-code offsets always match their C++ counterparts. +// +// NOTE: the offsets MUST be in hex notation WITHOUT the 0x prefix + +PLAT_ASM_SIZEOF(280, ExInfo) +PLAT_ASM_OFFSET(0, ExInfo, m_pPrevExInfo) +PLAT_ASM_OFFSET(8, ExInfo, m_pExContext) +PLAT_ASM_OFFSET(10, ExInfo, m_exception) +PLAT_ASM_OFFSET(18, ExInfo, m_kind) +PLAT_ASM_OFFSET(19, ExInfo, m_passNumber) +PLAT_ASM_OFFSET(1c, ExInfo, m_idxCurClause) +PLAT_ASM_OFFSET(20, ExInfo, m_frameIter) +PLAT_ASM_OFFSET(278, ExInfo, m_notifyDebuggerSP) + +PLAT_ASM_OFFSET(8, PInvokeTransitionFrame, m_FramePointer) +PLAT_ASM_OFFSET(0, PInvokeTransitionFrame, m_RIP) +PLAT_ASM_OFFSET(10, PInvokeTransitionFrame, m_pThread) +PLAT_ASM_OFFSET(18, PInvokeTransitionFrame, m_Flags) +PLAT_ASM_OFFSET(20, PInvokeTransitionFrame, m_PreservedRegs) + +PLAT_ASM_SIZEOF(258, StackFrameIterator) +PLAT_ASM_OFFSET(10, StackFrameIterator, m_FramePointer) +PLAT_ASM_OFFSET(18, StackFrameIterator, m_ControlPC) +PLAT_ASM_OFFSET(20, StackFrameIterator, m_RegDisplay) +PLAT_ASM_OFFSET(248, StackFrameIterator, m_OriginalControlPC) +PLAT_ASM_OFFSET(250, StackFrameIterator, m_pPreviousTransitionFrame) + +PLAT_ASM_SIZEOF(C0, PAL_LIMITED_CONTEXT) + +PLAT_ASM_OFFSET(0, PAL_LIMITED_CONTEXT, FP) +PLAT_ASM_OFFSET(8, PAL_LIMITED_CONTEXT, RA) +PLAT_ASM_OFFSET(10, PAL_LIMITED_CONTEXT, R4) +PLAT_ASM_OFFSET(18, PAL_LIMITED_CONTEXT, R5) +PLAT_ASM_OFFSET(20, PAL_LIMITED_CONTEXT, R23) +PLAT_ASM_OFFSET(28, PAL_LIMITED_CONTEXT, R24) +PLAT_ASM_OFFSET(30, PAL_LIMITED_CONTEXT, R25) +PLAT_ASM_OFFSET(38, PAL_LIMITED_CONTEXT, R26) +PLAT_ASM_OFFSET(40, PAL_LIMITED_CONTEXT, R27) +PLAT_ASM_OFFSET(48, PAL_LIMITED_CONTEXT, R28) +PLAT_ASM_OFFSET(50, PAL_LIMITED_CONTEXT, R29) +PLAT_ASM_OFFSET(58, PAL_LIMITED_CONTEXT, R30) +PLAT_ASM_OFFSET(60, PAL_LIMITED_CONTEXT, R31) +PLAT_ASM_OFFSET(68, PAL_LIMITED_CONTEXT, R2) +PLAT_ASM_OFFSET(70, PAL_LIMITED_CONTEXT, SP) +PLAT_ASM_OFFSET(78, PAL_LIMITED_CONTEXT, IP) + +PLAT_ASM_SIZEOF(148, REGDISPLAY) +PLAT_ASM_OFFSET(18, REGDISPLAY, SP) + +PLAT_ASM_OFFSET(b8, REGDISPLAY, pR23) +PLAT_ASM_OFFSET(c0, REGDISPLAY, pR24) +PLAT_ASM_OFFSET(c8, REGDISPLAY, pR25) +PLAT_ASM_OFFSET(d0, REGDISPLAY, pR26) +PLAT_ASM_OFFSET(d8, REGDISPLAY, pR27) +PLAT_ASM_OFFSET(e0, REGDISPLAY, pR28) +PLAT_ASM_OFFSET(e8, REGDISPLAY, pR29) +PLAT_ASM_OFFSET(f0, REGDISPLAY, pR30) +PLAT_ASM_OFFSET(f8, REGDISPLAY, pR31) +PLAT_ASM_OFFSET(10, REGDISPLAY, pR2) +PLAT_ASM_OFFSET(b0, REGDISPLAY, pFP) +PLAT_ASM_OFFSET(8, REGDISPLAY, pRA) +PLAT_ASM_OFFSET(108, REGDISPLAY, F) diff --git a/src/coreclr/nativeaot/Runtime/loongarch64/ExceptionHandling.S b/src/coreclr/nativeaot/Runtime/loongarch64/ExceptionHandling.S new file mode 100644 index 0000000000000..7cd047c126348 --- /dev/null +++ b/src/coreclr/nativeaot/Runtime/loongarch64/ExceptionHandling.S @@ -0,0 +1,804 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include +#include "AsmOffsets.inc" + +#define STACKSIZEOF_ExInfo ((SIZEOF__ExInfo + 15)&(~15)) + +#define HARDWARE_EXCEPTION 1 +#define SOFTWARE_EXCEPTION 0 + +.global RhpTrapThreads + +// ----------------------------------------------------------------------------- +// Macro used to create frame of exception throwing helpers (RhpThrowEx, RhpThrowHwEx) + .macro ALLOC_THROW_FRAME exceptionType + + ori $a3, $sp, 0 + + // Setup a PAL_LIMITED_CONTEXT on the stack { + .if \exceptionType == HARDWARE_EXCEPTION + addi.d $sp, $sp, -0x50 + .cfi_adjust_cfa_offset 0x50 + st.d $a3, $sp, 0 // a3 is the SP and a1 is the IP of the fault site + st.d $a1, $sp, 8 + .else + PROLOG_STACK_ALLOC 0x50 + .cfi_adjust_cfa_offset 0x50 + st.d $a3, $sp, 0 // a3 is the SP and ra is the IP of the fault site + st.d $ra, $sp, 8 + .endif + fst.d $f24, $sp, 0x10 + fst.d $f25, $sp, 0x18 + fst.d $f26, $sp, 0x20 + fst.d $f27, $sp, 0x28 + fst.d $f28, $sp, 0x30 + fst.d $f29, $sp, 0x38 + fst.d $f30, $sp, 0x40 + fst.d $f31, $sp, 0x48 + PROLOG_SAVE_REG_PAIR_INDEXED 22, 1, 0x70 + st.d $zero, $sp, 0x10 // locations reserved for return value, not used for exception handling + st.d $zero, $sp, 0x18 + PROLOG_SAVE_REG_PAIR 23, 24, 0x20 + PROLOG_SAVE_REG_PAIR 25, 26, 0x30 + PROLOG_SAVE_REG_PAIR 27, 28, 0x40 + PROLOG_SAVE_REG_PAIR 29, 30, 0x50 + PROLOG_SAVE_REG_PAIR 31, 2, 0x60 + // } end PAL_LIMITED_CONTEXT + + PROLOG_STACK_ALLOC STACKSIZEOF_ExInfo + .endm + +// ----------------------------------------------------------------------------- +// Macro used to create frame of funclet calling helpers (RhpCallXXXXFunclet) +// extraStackSize - extra stack space that the user of the macro can use to +// store additional registers + .macro ALLOC_CALL_FUNCLET_FRAME extraStackSize + + // Using below prolog instead of PROLOG_SAVE_REG_PAIR fp,ra, #-60! + // is intentional. Above statement would also emit instruction to save + // sp in fp. If sp is saved in fp in prolog then it is not expected that fp can change in the body + // of method. However, this method needs to be able to change fp before calling funclet. + // This is required to access locals in funclet. + PROLOG_SAVE_REG_PAIR_NO_FP_INDEXED 22, 1, 0x60 + PROLOG_SAVE_REG_PAIR 23, 24, 0x10 + PROLOG_SAVE_REG_PAIR 25, 26, 0x20 + PROLOG_SAVE_REG_PAIR 27, 28, 0x30 + PROLOG_SAVE_REG_PAIR 29, 30, 0x40 + PROLOG_SAVE_REG_PAIR 31, 2, 0x50 + ori $fp, $sp, 0 + .cfi_def_cfa_register 22 //fp + + .if \extraStackSize != 0 + PROLOG_STACK_ALLOC \extraStackSize + .endif + .endm + +// ----------------------------------------------------------------------------- +// Macro used to free frame of funclet calling helpers (RhpCallXXXXFunclet) +// extraStackSize - extra stack space that the user of the macro can use to +// store additional registers. +// It needs to match the value passed to the corresponding +// ALLOC_CALL_FUNCLET_FRAME. + .macro FREE_CALL_FUNCLET_FRAME extraStackSize + + .if \extraStackSize != 0 + EPILOG_STACK_FREE \extraStackSize + .endif + + EPILOG_RESTORE_REG_PAIR 23, 24, 0x10 + EPILOG_RESTORE_REG_PAIR 25, 26, 0x20 + EPILOG_RESTORE_REG_PAIR 27, 28, 0x30 + EPILOG_RESTORE_REG_PAIR 29, 30, 0x40 + EPILOG_RESTORE_REG_PAIR 31, 2, 0x50 + EPILOG_RESTORE_REG_PAIR_INDEXED 22, 1, 0x60 + .endm + + +// ----------------------------------------------------------------------------- +// Macro used to restore preserved general purpose and FP registers from REGDISPLAY +// regdisplayReg - register pointing to the REGDISPLAY structure + .macro RESTORE_PRESERVED_REGISTERS regdisplayReg + + ld.d $t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR23 + ld.d $s0, $t3, 0 + ld.d $t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR24 + ld.d $s1, $t3, 0 + ld.d $t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR25 + ld.d $s2, $t3, 0 + ld.d $t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR26 + ld.d $s3, $t3, 0 + ld.d $t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR27 + ld.d $s4, $t3, 0 + ld.d $t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR28 + ld.d $s5, $t3, 0 + ld.d $t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR29 + ld.d $s6, $t3, 0 + ld.d $t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR30 + ld.d $s7, $t3, 0 + ld.d $t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR31 + ld.d $s8, $t3, 0 + ld.d $t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pFP + ld.d $fp, $t3, 0 + // + // load FP preserved regs + // + addi.d $t3, \regdisplayReg, OFFSETOF__REGDISPLAY__F + fld.d $f24, $t3, 0x00 + fld.d $f25, $t3, 0x08 + fld.d $f26, $t3, 0x10 + fld.d $f27, $t3, 0x18 + fld.d $f28, $t3, 0x20 + fld.d $f29, $t3, 0x28 + fld.d $f30, $t3, 0x30 + fld.d $f31, $t3, 0x38 + .endm + +// ----------------------------------------------------------------------------- +// Macro used to save preserved general purpose and FP registers to REGDISPLAY +// regdisplayReg - register pointing to the REGDISPLAY structure + .macro SAVE_PRESERVED_REGISTERS regdisplayReg + ld.d $t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR23 + st.d $s0, $t3, 0 + ld.d $t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR24 + st.d $s1, $t3, 0 + ld.d $t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR25 + st.d $s2, $t3, 0 + ld.d $t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR26 + st.d $s3, $t3, 0 + ld.d $t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR27 + st.d $s4, $t3, 0 + ld.d $t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR28 + st.d $s5, $t3, 0 + ld.d $t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR29 + st.d $s6, $t3, 0 + ld.d $t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR30 + st.d $s7, $t3, 0 + ld.d $t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR31 + st.d $s8, $t3, 0 + ld.d $t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pFP + st.d $fp, $t3, 0 + // + // store vfp preserved regs + // + addi.d $t3, \regdisplayReg, OFFSETOF__REGDISPLAY__F + fst.d $f24, $t3, 0x00 + fst.d $f25, $t3, 0x08 + fst.d $f26, $t3, 0x10 + fst.d $f27, $t3, 0x18 + fst.d $f28, $t3, 0x20 + fst.d $f29, $t3, 0x28 + fst.d $f30, $t3, 0x30 + fst.d $f31, $t3, 0x38 + .endm + + +// ----------------------------------------------------------------------------- +// Macro used to thrash preserved general purpose registers in REGDISPLAY +// to make sure nobody uses them +// regdisplayReg - register pointing to the REGDISPLAY structure + .macro TRASH_PRESERVED_REGISTERS_STORAGE regdisplayReg + +#if _DEBUG + lu12i.w $a3, 0xbaadd + ori $a3, $a3, 0xeed + lu32i.d $a3, 0xddeed + lu52i.d $a3, $a3, 0xbaa + ld.d $t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR23 + st.d $a3, $t3, 0 + ld.d $t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR24 + st.d $a3, $t3, 0 + ld.d $t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR25 + st.d $a3, $t3, 0 + ld.d $t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR26 + st.d $a3, $t3, 0 + ld.d $t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR27 + st.d $a3, $t3, 0 + ld.d $t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR28 + st.d $a3, $t3, 0 + ld.d $t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR29 + st.d $a3, $t3, 0 + ld.d $t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR30 + st.d $a3, $t3, 0 + ld.d $t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR31 + st.d $a3, $t3, 0 + ld.d $t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pFP + st.d $a3, $t3, 0 +#endif // _DEBUG + .endm + +.macro GetThreadA2 + addi.d $sp, $sp, -16 + st.d $a0, $sp, 0 + st.d $a1, $sp, 8 + bl C_FUNC(RhpGetThread) + ori $a2, $a0, 0 + ld.d $a0, $sp, 0 + ld.d $a1, $sp, 8 + addi.d $sp, $sp, 16 +.endm + +#define rsp_offsetof_ExInfo 0 +#define rsp_offsetof_Context STACKSIZEOF_ExInfo + +// +// RhpThrowHwEx +// +// INPUT: a0[31:0]: exception code of fault +// a1: faulting IP +// +// OUTPUT: +// + NESTED_ENTRY RhpThrowHwEx, _TEXT, NoHandler + + ALLOC_THROW_FRAME HARDWARE_EXCEPTION + + GetThreadA2 + + addi.d $a1, $sp, rsp_offsetof_ExInfo // a1 <- ExInfo* + st.d $zero, $a1, OFFSETOF__ExInfo__m_exception // pExInfo->m_exception = null + ori $a3, $zero, 1 + st.b $a3, $a1, OFFSETOF__ExInfo__m_passNumber // pExInfo->m_passNumber = 1 + addi.w $a3, $zero, -1 + st.w $a3, $a1, OFFSETOF__ExInfo__m_idxCurClause // pExInfo->m_idxCurClause = MaxTryRegionIdx + ori $a3, $zero, 2 + st.b $a3, $a1, OFFSETOF__ExInfo__m_kind // pExInfo->m_kind = ExKind.HardwareFault + + // link the ExInfo into the thread's ExInfo chain + ld.d $a3, $a2, OFFSETOF__Thread__m_pExInfoStackHead + st.d $a3, $a1, OFFSETOF__ExInfo__m_pPrevExInfo // pExInfo->m_pPrevExInfo = m_pExInfoStackHead + st.d $a1, $a2, OFFSETOF__Thread__m_pExInfoStackHead // m_pExInfoStackHead = pExInfo + + // set the exception context field on the ExInfo + addi.d $a2, $sp, rsp_offsetof_Context // a2 <- PAL_LIMITED_CONTEXT* + st.d $a2, $a1, OFFSETOF__ExInfo__m_pExContext // pExInfo->m_pExContext = pContext + + // a0[31:0]: exception code + // a1: ExInfo* + bl C_FUNC(RhThrowHwEx) + + ALTERNATE_ENTRY RhpThrowHwEx2 + + // no return + EMIT_BREAKPOINT + + NESTED_END RhpThrowHwEx, _TEXT + +// +// RhpThrowEx +// +// INPUT: a0: exception object +// +// OUTPUT: +// + + NESTED_ENTRY RhpThrowEx, _TEXT, NoHandler + + ALLOC_THROW_FRAME SOFTWARE_EXCEPTION + + GetThreadA2 + + // There is runtime C# code that can tail call to RhpThrowEx using a binder intrinsic. So the return + // address could have been hijacked when we were in that C# code and we must remove the hijack and + // reflect the correct return address in our exception context record. The other throw helpers don't + // need this because they cannot be tail-called from C#. + + // NOTE: we cannot use INLINE_THREAD_UNHIJACK because it will write into the stack at the location + // where the tail-calling thread had saved RA, which may not match where we have saved RA. + + ld.d $a1, $a2, OFFSETOF__Thread__m_pvHijackedReturnAddress + beq $a1, $zero, NotHijacked + + ld.d $a3, $a2, OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation + + // a0: exception object + // a1: hijacked return address + // a2: pThread + // a3: hijacked return address location + + addi.d $t3, $sp, (STACKSIZEOF_ExInfo + SIZEOF__PAL_LIMITED_CONTEXT) // re-compute SP at callsite + bltu $a3, $t3, TailCallWasHijacked // if (m_ppvHijackedReturnAddressLocation < SP at callsite) + + // normal case where a valid return address location is hijacked + st.d $a1, $a3, 0 + b ClearThreadState + +TailCallWasHijacked: + + // Abnormal case where the return address location is now invalid because we ended up here via a tail + // call. In this case, our hijacked return address should be the correct caller of this method. + + // stick the previous return address in RA as well as in the right spots in our PAL_LIMITED_CONTEXT. + ori $ra, $a1, 0 + st.d $ra, $sp, (rsp_offsetof_Context + OFFSETOF__PAL_LIMITED_CONTEXT__RA) + st.d $ra, $sp, (rsp_offsetof_Context + OFFSETOF__PAL_LIMITED_CONTEXT__IP) + +ClearThreadState: + + // clear the Thread's hijack state + st.d $zero, $a2, OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation + st.d $zero, $a2, OFFSETOF__Thread__m_pvHijackedReturnAddress + +NotHijacked: + + addi.d $a1, $sp, rsp_offsetof_ExInfo // a1 <- ExInfo* + st.d $zero, $a1, OFFSETOF__ExInfo__m_exception // pExInfo->m_exception = null + ori $a3, $zero, 1 + st.b $a3, $a1, OFFSETOF__ExInfo__m_passNumber // pExInfo->m_passNumber = 1 + addi.w $a3, $zero, -1 + st.w $a3, $a1, OFFSETOF__ExInfo__m_idxCurClause // pExInfo->m_idxCurClause = MaxTryRegionIdx + ori $a3, $zero, 1 + st.b $a3, $a1, OFFSETOF__ExInfo__m_kind // pExInfo->m_kind = ExKind.Throw + + // link the ExInfo into the thread's ExInfo chain + ld.d $a3, $a2, OFFSETOF__Thread__m_pExInfoStackHead + st.d $a3, $a1, OFFSETOF__ExInfo__m_pPrevExInfo // pExInfo->m_pPrevExInfo = m_pExInfoStackHead + st.d $a1, $a2, OFFSETOF__Thread__m_pExInfoStackHead // m_pExInfoStackHead = pExInfo + + // set the exception context field on the ExInfo + addi.d $a2, $sp, rsp_offsetof_Context // a2 <- PAL_LIMITED_CONTEXT* + st.d $a2, $a1, OFFSETOF__ExInfo__m_pExContext // pExInfo->m_pExContext = pContext + + // a0: exception object + // a1: ExInfo* + bl C_FUNC(RhThrowEx) + + ALTERNATE_ENTRY RhpThrowEx2 + + // no return + EMIT_BREAKPOINT + NESTED_END RhpThrowEx, _TEXT + + +// +// void FASTCALL RhpRethrow() +// +// SUMMARY: Similar to RhpThrowEx, except that it passes along the currently active ExInfo +// +// INPUT: +// +// OUTPUT: +// + + NESTED_ENTRY RhpRethrow, _TEXT, NoHandler + + ALLOC_THROW_FRAME SOFTWARE_EXCEPTION + + GetThreadA2 + + addi.d $a1, $sp, rsp_offsetof_ExInfo // a1 <- ExInfo* + st.d $zero, $a1, OFFSETOF__ExInfo__m_exception // pExInfo->m_exception = null + st.b $zero, $a1, OFFSETOF__ExInfo__m_kind // init to a deterministic value (ExKind.None) + ori $a3, $zero, 1 + st.b $a3, $a1, OFFSETOF__ExInfo__m_passNumber // pExInfo->m_passNumber = 1 + addi.w $a3, $zero, -1 + st.w $a3, $a1, OFFSETOF__ExInfo__m_idxCurClause // pExInfo->m_idxCurClause = MaxTryRegionIdx + + // link the ExInfo into the thread's ExInfo chain + ld.d $a3, $a2, OFFSETOF__Thread__m_pExInfoStackHead + ori $a0, $a3, 0 // a0 <- current ExInfo + st.d $a3, $a1, OFFSETOF__ExInfo__m_pPrevExInfo // pExInfo->m_pPrevExInfo = m_pExInfoStackHead + st.d $a1, $a2, OFFSETOF__Thread__m_pExInfoStackHead // m_pExInfoStackHead = pExInfo + + // set the exception context field on the ExInfo + addi.d $a2, $sp, rsp_offsetof_Context // a2 <- PAL_LIMITED_CONTEXT* + st.d $a2, $a1, OFFSETOF__ExInfo__m_pExContext // pExInfo->m_pExContext = pContext + + // a0 contains the currently active ExInfo + // a1 contains the address of the new ExInfo + bl C_FUNC(RhRethrow) + + ALTERNATE_ENTRY RhpRethrow2 + + // no return + EMIT_BREAKPOINT + NESTED_END RhpRethrow, _TEXT + +// +// void* FASTCALL RhpCallCatchFunclet(OBJECTREF exceptionObj, void* pHandlerIP, REGDISPLAY* pRegDisplay, +// ExInfo* pExInfo) +// +// INPUT: a0: exception object +// a1: handler funclet address +// a2: REGDISPLAY* +// a3: ExInfo* +// +// OUTPUT: +// + + NESTED_ENTRY RhpCallCatchFunclet, _TEXT, NoHandler + + ALLOC_CALL_FUNCLET_FRAME 0x70 + fst.d $f24, $sp, 0x00 + fst.d $f25, $sp, 0x08 + fst.d $f26, $sp, 0x10 + fst.d $f27, $sp, 0x18 + fst.d $f28, $sp, 0x20 + fst.d $f29, $sp, 0x28 + fst.d $f30, $sp, 0x30 + fst.d $f31, $sp, 0x38 + st.d $a0, $sp, 0x40 // a0 to a3 are stored to restore them anytime + st.d $a1, $sp, 0x48 + st.d $a2, $sp, 0x50 + st.d $a3, $sp, 0x58 + st.d $zero, $sp, 0x60 // $zero makes space for the local "is_not_handling_thread_abort"; last qword will store the thread obj + +#define rsp_offset_is_not_handling_thread_abort 0x60 +#define rsp_offset_a0 0x40 +#define rsp_offset_a1 0x48 +#define rsp_offset_a2 0x50 +#define rsp_offset_a3 0x58 +#define rsp_CatchFunclet_offset_thread 0x68 + + // + // clear the DoNotTriggerGc flag, trashes a4-a6 + // + + bl C_FUNC(RhpGetThread) + st.d $a0, $sp, rsp_CatchFunclet_offset_thread + ori $a5, $a0, 0 + ld.d $a0, $sp, 0x40 + ld.d $a1, $sp, 0x48 + ld.d $a2, $sp, 0x50 + ld.d $a3, $sp, 0x58 + + ld.d $a4, $a5, OFFSETOF__Thread__m_threadAbortException + sub.d $a4, $a4, $a0 + st.d $a4, $sp, rsp_offset_is_not_handling_thread_abort // Non-zero if the exception is not ThreadAbortException + + addi.d $t3, $a5, OFFSETOF__Thread__m_ThreadStateFlags + + addi.w $a6, $zero, -17 // $a6 = $a6 & ~TSF_DoNotTriggerGc, TSF_DoNotTriggerGc=0x10. + amand_db.w $a4, $a6, $t3 + + // + // set preserved regs to the values expected by the funclet + // + RESTORE_PRESERVED_REGISTERS $a2 + // + // trash the values at the old homes to make sure nobody uses them + // + TRASH_PRESERVED_REGISTERS_STORAGE $a2 + + // + // call the funclet + // + // a0 still contains the exception object + jirl $ra, $a1, 0 + + ALTERNATE_ENTRY RhpCallCatchFunclet2 + + // $a0 contains resume IP + + ld.d $a2, $sp, rsp_offset_a2 // a2 <- REGDISPLAY* + +#ifdef _DEBUG + // Call into some C++ code to validate the pop of the ExInfo. We only do this in debug because we + // have to spill all the preserved registers and then refill them after the call. + + st.d $a0, $sp, rsp_offset_a0 + + SAVE_PRESERVED_REGISTERS $a2 + + ld.d $a0, $sp, rsp_CatchFunclet_offset_thread // a0 <- Thread* + ld.d $a1, $sp, rsp_offset_a3 // a1 <- current ExInfo* + ld.d $a2, $a2, OFFSETOF__REGDISPLAY__SP // a2 <- resume SP value + bl C_FUNC(RhpValidateExInfoPop) + + ld.d $a2, $sp, rsp_offset_a2 // a2 <- REGDISPLAY* + + RESTORE_PRESERVED_REGISTERS $a2 + + ld.d $a0, $sp, rsp_offset_a0 // reload resume IP +#endif + + ld.d $a1, $sp, rsp_CatchFunclet_offset_thread + + // We must unhijack the thread at this point because the section of stack where the hijack is applied + // may go dead. If it does, then the next time we try to unhijack the thread, it will corrupt the stack. + INLINE_THREAD_UNHIJACK $a1, $a3, $t3 // Thread in a1, trashes a3 and t3 + + ld.d $a3, $sp, rsp_offset_a3 // a3 <- current ExInfo* + ld.d $a2, $a2, OFFSETOF__REGDISPLAY__SP // a2 <- resume SP value + +PopExInfoLoop: + ld.d $a3, $a3, OFFSETOF__ExInfo__m_pPrevExInfo // a3 <- next ExInfo + beq $a3, $zero, DonePopping // if (pExInfo == null) { we're done } + blt $a3, $a2, PopExInfoLoop // if (pExInfo < resume SP} { keep going } + +DonePopping: + st.d $a3, $a1, OFFSETOF__Thread__m_pExInfoStackHead // store the new head on the Thread + + PREPARE_EXTERNAL_VAR_INDIRECT_W RhpTrapThreads, $a3 + + andi $t7, $a3, TrapThreadsFlags_AbortInProgress_Bit + beq $t7, $zero, NoAbort + + ld.d $a3, $sp, rsp_offset_is_not_handling_thread_abort + bne $a3, $zero, NoAbort + + // It was the ThreadAbortException, so rethrow it + // reset SP + ori $a1, $a0, 0 // a1 <- continuation address as exception PC + addi.w $a0, $zero, STATUS_REDHAWK_THREAD_ABORT + ori $sp, $a2, 0 + b C_FUNC(RhpThrowHwEx) + +NoAbort: + // reset SP and jump to continuation address + ori $sp, $a2, 0 + jirl $r0, $a0, 0 + +#undef rsp_offset_is_not_handling_thread_abort +#undef rsp_offset_a0 +#undef rsp_offset_a1 +#undef rsp_offset_a2 +#undef rsp_offset_a3 +#undef rsp_CatchFunclet_offset_thread + + NESTED_END RhpCallCatchFunclet, _Text + +// +// void FASTCALL RhpCallFinallyFunclet(void* pHandlerIP, REGDISPLAY* pRegDisplay) +// +// INPUT: a0: handler funclet address +// a1: REGDISPLAY* +// +// OUTPUT: +// + + NESTED_ENTRY RhpCallFinallyFunclet, _TEXT, NoHandler + + ALLOC_CALL_FUNCLET_FRAME 0x60 + fst.d $f24, $sp, 0x00 + fst.d $f25, $sp, 0x08 + fst.d $f26, $sp, 0x10 + fst.d $f27, $sp, 0x18 + fst.d $f28, $sp, 0x20 + fst.d $f29, $sp, 0x28 + fst.d $f30, $sp, 0x30 + fst.d $f31, $sp, 0x38 + st.d $a0, $sp, 0x40 // a0 and a1 are saved so we have them later + st.d $a1, $sp, 0x48 + +#define rsp_offset_a1 0x48 +#define rsp_FinallyFunclet_offset_thread 0x50 + + + // We want to suppress hijacking between invocations of subsequent finallys. We do this because we + // cannot tolerate a GC after one finally has run (and possibly side-effected the GC state of the + // method) and then been popped off the stack, leaving behind no trace of its effect. + // + // So we clear the state before and set it after invocation of the handler. + // + + // + // clear the DoNotTriggerGc flag, trashes a2-a4 + // + + bl C_FUNC(RhpGetThread) + st.d $a0, $sp, rsp_FinallyFunclet_offset_thread + ori $a2, $a0, 0 + ld.d $a0, $sp, 0x40 + ld.d $a1, $sp, 0x48 + + addi.d $t3, $a2, OFFSETOF__Thread__m_ThreadStateFlags + + addi.w $a3, $zero, -17 // $a3 = $a3 & ~TSF_DoNotTriggerGc, TSF_DoNotTriggerGc=0x10. + amand_db.w $a4, $a3, $t3 + + // + // set preserved regs to the values expected by the funclet + // + RESTORE_PRESERVED_REGISTERS $a1 + // + // trash the values at the old homes to make sure nobody uses them + // + TRASH_PRESERVED_REGISTERS_STORAGE $a1 + + // + // call the funclet + // + jirl $ra, $a0, 0 + + ALTERNATE_ENTRY RhpCallFinallyFunclet2 + + ld.d $a1, $sp, rsp_offset_a1 // reload REGDISPLAY pointer + + // + // save new values of preserved regs into REGDISPLAY + // + SAVE_PRESERVED_REGISTERS $a1 + + // + // set the DoNotTriggerGc flag, trashes a1-a3 + // + + ld.d $a2, $sp, rsp_FinallyFunclet_offset_thread + + addi.d $t3, $a2, OFFSETOF__Thread__m_ThreadStateFlags + addi.w $a3, $zero, -17 // $a3 = $a3 & ~TSF_DoNotTriggerGc, TSF_DoNotTriggerGc=0x10. + amand_db.w $a1, $a3, $t3 + + fld.d $f24, $sp, 0x00 + fld.d $f25, $sp, 0x08 + fld.d $f26, $sp, 0x10 + fld.d $f27, $sp, 0x18 + fld.d $f28, $sp, 0x20 + fld.d $f29, $sp, 0x28 + fld.d $f30, $sp, 0x30 + fld.d $f31, $sp, 0x38 + + FREE_CALL_FUNCLET_FRAME 0x60 + EPILOG_RETURN + +#undef rsp_offset_a1 +#undef rsp_FinallyFunclet_offset_thread + + NESTED_END RhpCallFinallyFunclet, _Text + + +// +// void* FASTCALL RhpCallFilterFunclet(OBJECTREF exceptionObj, void* pFilterIP, REGDISPLAY* pRegDisplay) +// +// INPUT: a0: exception object +// a1: filter funclet address +// a2: REGDISPLAY* +// +// OUTPUT: +// + + NESTED_ENTRY RhpCallFilterFunclet, _TEXT, NoHandler + ALLOC_CALL_FUNCLET_FRAME 0x40 + fst.d $f24, $sp, 0x00 + fst.d $f25, $sp, 0x08 + fst.d $f26, $sp, 0x10 + fst.d $f27, $sp, 0x18 + fst.d $f28, $sp, 0x20 + fst.d $f29, $sp, 0x28 + fst.d $f30, $sp, 0x30 + fst.d $f31, $sp, 0x38 + + ld.d $t3, $a2, OFFSETOF__REGDISPLAY__pFP + ld.d $fp, $t3, 0 + + // + // call the funclet + // + // $a0 still contains the exception object + jirl $ra, $a1, 0 + + ALTERNATE_ENTRY RhpCallFilterFunclet2 + + fld.d $f24, $sp, 0x00 + fld.d $f25, $sp, 0x08 + fld.d $f26, $sp, 0x10 + fld.d $f27, $sp, 0x18 + fld.d $f28, $sp, 0x20 + fld.d $f29, $sp, 0x28 + fld.d $f30, $sp, 0x30 + fld.d $f31, $sp, 0x38 + + FREE_CALL_FUNCLET_FRAME 0x40 + EPILOG_RETURN + + NESTED_END RhpCallFilterFunclet, Text + +#ifdef FEATURE_OBJCMARSHAL + +// +// void* FASTCALL RhpCallPropagateExceptionCallback(void* pCallbackContext, void* pCallback, REGDISPLAY* pRegDisplay, +// ExInfo* pExInfo, PInvokeTransitionFrame* pPreviousTransitionFrame) +// +// INPUT: a0: callback context +// a1: callback +// a2: REGDISPLAY* +// a3: ExInfo* +// a4: pPreviousTransitionFrame +// +// OUTPUT: +// + + NESTED_ENTRY RhpCallPropagateExceptionCallback, _TEXT, NoHandler + +#define rsp_offset_a0 0x10 +#define rsp_offset_a1 0x18 +#define rsp_offset_a2 0x20 +#define rsp_offset_a3 0x28 +#define rsp_offset_a4 0x30 +#define rsp_CallPropagationCallback_offset_thread 0x38 + + // Using the NO_FP macro so that the debugger unwinds using SP. + // This makes backtraces work even after using RESTORE_PRESERVED_REGISTERS. + PROLOG_SAVE_REG_PAIR_NO_FP_INDEXED 22, 1, 0x40 + ori $fp, $sp, 0 + st.d $a0, $sp, rsp_offset_a0 // a0 to a3 are stored to restore them anytime + st.d $a1, $sp, rsp_offset_a1 // a0 to a3 are stored to restore them anytime + st.d $a2, $sp, rsp_offset_a2 + st.d $a3, $sp, rsp_offset_a3 + st.d $a4, $sp, rsp_offset_a4 + st.d $zero, $sp, rsp_CallPropagationCallback_offset_thread // $zero makes space to store the thread obj + + // + // clear the DoNotTriggerGc flag, trashes a4-a6 + // + + bl C_FUNC(RhpGetThread) + st.d $a0, $sp, rsp_CallPropagationCallback_offset_thread + ori $a5, $a0, 0 + ld.d $a0, $sp, rsp_offset_a0 + ld.d $a1, $sp, rsp_offset_a1 + ld.d $a2, $sp, rsp_offset_a2 + ld.d $a3, $sp, rsp_offset_a3 + + addi.d $t3, $a5, OFFSETOF__Thread__m_ThreadStateFlags + + addi.w $a6, $zero, -17 // $a6 = $a6 & ~TSF_DoNotTriggerGc, TSF_DoNotTriggerGc=0x10. + amand_db.w $a4, $a6, $t3 + + // + // set preserved regs to the values expected by the funclet + // + RESTORE_PRESERVED_REGISTERS $a2 + // + // trash the values at the old homes to make sure nobody uses them + // + TRASH_PRESERVED_REGISTERS_STORAGE $a2 + +#ifdef _DEBUG + // Call into some C++ code to validate the pop of the ExInfo. We only do this in debug because we + // have to spill all the preserved registers and then refill them after the call. + + SAVE_PRESERVED_REGISTERS $a2 + + ld.d $a0, $sp, rsp_CallPropagationCallback_offset_thread // a0 <- Thread* + ld.d $a1, $sp, rsp_offset_a3 // a1 <- current ExInfo* + ld.d $a2, $a2, OFFSETOF__REGDISPLAY__SP // a2 <- resume SP value + bl C_FUNC(RhpValidateExInfoPop) + + ld.d $a2, $sp, rsp_offset_a2 // a2 <- REGDISPLAY* + + RESTORE_PRESERVED_REGISTERS $a2 +#endif + + ld.d $a1, $sp, rsp_CallPropagationCallback_offset_thread + + // We must unhijack the thread at this point because the section of stack where the hijack is applied + // may go dead. If it does, then the next time we try to unhijack the thread, it will corrupt the stack. + INLINE_THREAD_UNHIJACK $a1, $a3, $t3 // Thread in a1, trashes a3 and t3 + + ld.d $a3, $sp, rsp_offset_a3 // a3 <- current ExInfo* + ld.d $a2, $a2, OFFSETOF__REGDISPLAY__SP // a2 <- resume SP value + +Propagate_PopExInfoLoop: + ld.d $a3, $a3, OFFSETOF__ExInfo__m_pPrevExInfo // a3 <- next ExInfo + beq $a3, $zero, Propagate_DonePopping // if (pExInfo == null) { we're done } + blt $a3, $a2, Propagate_PopExInfoLoop // if (pExInfo < resume SP} { keep going } + +Propagate_DonePopping: + st.d $a3, $a1, OFFSETOF__Thread__m_pExInfoStackHead // store the new head on the Thread + + // restore preemptive mode + ld.d $a4, $sp, rsp_offset_a4 // pPreviousTransitionFrame + st.d $a4, $a1, OFFSETOF__Thread__m_pTransitionFrame + + // reset SP and RA and jump to continuation address + ld.d $a0, $sp, rsp_offset_a0 // callback context + ld.d $a1, $sp, rsp_offset_a1 // callback + ld.d $a2, $sp, rsp_offset_a2 // REGDISPLAY* + ld.d $a3, $a2, OFFSETOF__REGDISPLAY__pRA // a3 <- &resume RA value + ld.d $ra, $a3 + ld.d $a3, $a2, OFFSETOF__REGDISPLAY__SP // a3 <- resume SP value + ori $sp, $a3, 0 + jirl $r0, $a1, 0 + +#undef rsp_offset_a0 +#undef rsp_offset_a1 +#undef rsp_offset_a2 +#undef rsp_offset_a3 +#undef rsp_CallPropagationCallback_offset_thread + + NESTED_END RhpCallPropagateExceptionCallback, _Text + +#endif // FEATURE_OBJCMARSHAL diff --git a/src/coreclr/nativeaot/Runtime/loongarch64/GcProbe.S b/src/coreclr/nativeaot/Runtime/loongarch64/GcProbe.S new file mode 100644 index 0000000000000..497666bff3a07 --- /dev/null +++ b/src/coreclr/nativeaot/Runtime/loongarch64/GcProbe.S @@ -0,0 +1,198 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include +#include "AsmOffsets.inc" + +PROBE_FRAME_SIZE = 0xD0 // 4 * 8 for fixed part of PInvokeTransitionFrame (fp, ra, m_pThread, m_Flags) + + // 10 * 8 for callee saved registers + + // 1 * 8 for caller SP + + // 2 * 8 for int returns + + // 1 * 8 for alignment padding + + // 4 * 16 for FP returns + +// See PUSH_COOP_PINVOKE_FRAME, this macro is very similar, but also saves return registers +// and accepts the register bitmask +// Call this macro first in the method (no further prolog instructions can be added after this). +// +// threadReg : register containing the Thread* (this will be preserved). +// trashReg : register that can be trashed by this macro +// BITMASK : value to initialize m_dwFlags field with (register or #constant) +.macro PUSH_PROBE_FRAME threadReg, trashReg, BITMASK + + // Define the method prolog, allocating enough stack space for the PInvokeTransitionFrame and saving + // incoming register values into it. + + // First create PInvokeTransitionFrame + PROLOG_SAVE_REG_PAIR_INDEXED 22, 1, PROBE_FRAME_SIZE // Push down stack pointer and store FP and RA + + // Slot at $sp+0x10 is reserved for Thread * + // Slot at $sp+0x18 is reserved for bitmask of saved registers + + // Save callee saved registers + PROLOG_SAVE_REG_PAIR 23, 24, 0x20 + PROLOG_SAVE_REG_PAIR 25, 26, 0x30 + PROLOG_SAVE_REG_PAIR 27, 28, 0x40 + PROLOG_SAVE_REG_PAIR 29, 30, 0x50 + PROLOG_SAVE_REG_PAIR 31, 2, 0x60 + + // Slot at $sp+0x70 is reserved for caller sp + + // Save the integer return registers + st.d $a0, $sp, 0x78 + st.d $a1, $sp, 0x80 + + // Slot at $sp+0x88 is alignment padding + + // Save the FP return registers + fst.d $f0, $sp, 0x90 + fst.d $f1, $sp, 0x98 + fst.d $f2, $sp, 0xA0 + fst.d $f3, $sp, 0xA8 + + // Perform the rest of the PInvokeTransitionFrame initialization. + st.d \threadReg, $sp, OFFSETOF__PInvokeTransitionFrame__m_pThread // Thread * (unused by stackwalker) + st.d \BITMASK, $sp, OFFSETOF__PInvokeTransitionFrame__m_pThread + 8 // save the register bitmask passed in by caller + + addi.d \trashReg, $sp, PROBE_FRAME_SIZE // recover value of caller's SP + st.d \trashReg, $sp, 0x70 // save caller's SP + + // link the frame into the Thread + ori \trashReg, $sp, 0 + st.d \trashReg, \threadReg, OFFSETOF__Thread__m_pDeferredTransitionFrame +.endm + +// +// Remove the frame from a previous call to PUSH_PROBE_FRAME from the top of the stack and restore preserved +// registers and return value to their values from before the probe was called (while also updating any +// object refs or byrefs). +// +.macro POP_PROBE_FRAME + + // Restore the integer return registers + ld.d $a0, $sp, 0x78 + ld.d $a1, $sp, 0x80 + + // Restore the FP return registers + fld.d $f0, $sp, 0x90 + fld.d $f1, $sp, 0x98 + fld.d $f2, $sp, 0xA0 + fld.d $f3, $sp, 0xA8 + + // Restore callee saved registers + EPILOG_RESTORE_REG_PAIR 23, 24, 0x20 + EPILOG_RESTORE_REG_PAIR 25, 26, 0x30 + EPILOG_RESTORE_REG_PAIR 27, 28, 0x40 + EPILOG_RESTORE_REG_PAIR 29, 30, 0x50 + EPILOG_RESTORE_REG_PAIR 31, 2, 0x60 + + EPILOG_RESTORE_REG_PAIR_INDEXED 22, 1, PROBE_FRAME_SIZE +.endm + +// +// The prolog for all GC suspension hijacks (normal and stress). Fixes up the hijacked return address, and +// clears the hijack state. +// +// Register state on entry: +// All registers correct for return to the original return address. +// +// Register state on exit: +// a2: thread pointer +// t3: transition frame flags for the return registers a0 and a1 +// +.macro FixupHijackedCallstack + + // a2 <- GetThread() +#ifdef FEATURE_EMULATED_TLS + GETTHREAD_ETLS_2 +#else + INLINE_GETTHREAD $a2 +#endif + + // + // Fix the stack by restoring the original return address + // + // Load m_pvHijackedReturnAddress and m_uHijackedReturnValueFlags + ld.d $ra, $a2, OFFSETOF__Thread__m_pvHijackedReturnAddress + ld.d $t3, $a2, OFFSETOF__Thread__m_pvHijackedReturnAddress + 8 + + // + // Clear hijack state + // + // Clear m_ppvHijackedReturnAddressLocation and m_pvHijackedReturnAddress + st.d $zero, $a2, OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation + st.d $zero, $a2, OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation + 8 + // Clear m_uHijackedReturnValueFlags + st.d $zero, $a2, OFFSETOF__Thread__m_uHijackedReturnValueFlags + +.endm + +// +// GC Probe Hijack target +// +NESTED_ENTRY RhpGcProbeHijack, _TEXT, NoHandler + FixupHijackedCallstack + + PREPARE_EXTERNAL_VAR_INDIRECT_W RhpTrapThreads, $a3 + andi $t8, $a3, TrapThreadsFlags_TrapThreads_Bit + bne $t8, $zero, WaitForGC + jirl $r0, $ra, 0 + +WaitForGC: + lu12i.w $t7, ((DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_R4 + PTFF_SAVE_R5) >> 12) & 0xfffff + ori $t7, $t7, (DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_R4 + PTFF_SAVE_R5) & 0xfff + or $t3, $t3, $t7 + b C_FUNC(RhpWaitForGC) +NESTED_END RhpGcProbeHijack + +.global C_FUNC(RhpThrowHwEx) + +NESTED_ENTRY RhpWaitForGC, _TEXT, NoHandler + PUSH_PROBE_FRAME $a2, $a3, $t3 + + ld.d $a0, $a2, OFFSETOF__Thread__m_pDeferredTransitionFrame + bl C_FUNC(RhpWaitForGC2) + + ld.d $a2,$sp, OFFSETOF__PInvokeTransitionFrame__m_Flags + andi $t8, $a2, PTFF_THREAD_ABORT_BIT + bne $t8, $zero, ThrowThreadAbort + + .cfi_remember_state + POP_PROBE_FRAME + EPILOG_RETURN + + .cfi_restore_state +ThrowThreadAbort: + POP_PROBE_FRAME + addi.w $a0, $zero, STATUS_REDHAWK_THREAD_ABORT + ori $a1, $ra, 0 // return address as exception PC + b RhpThrowHwEx +NESTED_END RhpWaitForGC + +.global C_FUNC(RhpGcPoll2) + +LEAF_ENTRY RhpGcPoll + PREPARE_EXTERNAL_VAR_INDIRECT_W RhpTrapThreads, $a0 + bne $a0, $zero, C_FUNC(RhpGcPollRare) + jirl $r0, $ra, 0 +LEAF_END RhpGcPoll + +NESTED_ENTRY RhpGcPollRare, _TEXT, NoHandler + PUSH_COOP_PINVOKE_FRAME $a0 + bl RhpGcPoll2 + POP_COOP_PINVOKE_FRAME + jirl $r0, $ra, 0 +NESTED_END RhpGcPollRare + + +#ifdef FEATURE_GC_STRESS + +// +// GC Stress Hijack targets +// +LEAF_ENTRY RhpGcStressHijack, _TEXT + // NYI + EMIT_BREAKPOINT +LEAF_END RhpGcStressHijack, _TEXT + +#endif // FEATURE_GC_STRESS diff --git a/src/coreclr/nativeaot/Runtime/loongarch64/InteropThunksHelpers.S b/src/coreclr/nativeaot/Runtime/loongarch64/InteropThunksHelpers.S new file mode 100644 index 0000000000000..c096d77796397 --- /dev/null +++ b/src/coreclr/nativeaot/Runtime/loongarch64/InteropThunksHelpers.S @@ -0,0 +1,52 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include + +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; DATA SECTIONS ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +POINTER_SIZE = 0x08 + +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; Interop Thunks Helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + // + // RhCommonStub + // + // INPUT: tp: thunk's data block + // + // TRASHES: t0, t1, tp + // + LEAF_ENTRY RhCommonStub, _TEXT + // There are arbitrary callers passing arguments with arbitrary signatures. + // Custom calling convention: + // tp pointer to the current thunk's data block (data contains 2 pointer values: context + target pointers) + +#ifdef FEATURE_EMULATED_TLS + // This doesn't save and restore the floating point argument registers. If we encounter a + // target system that uses TLS emulation and modify these registers during this call we + // need to save and restore them, too + GETTHUNKDATA_ETLS_9 +#else + INLINE_GET_TLS_VAR $t0, C_FUNC(tls_thunkData) +#endif + + // t0 = base address of TLS data + // tp = address of context cell in thunk's data + + // store thunk address in thread static + ld.d $t1, $t7, 0 + st.d $t1, $t0, 0 + + // Now load the target address and jump to it. + ld.d $t7, $t7, POINTER_SIZE + jirl $r0, $t7, 0 + + LEAF_END RhCommonStub, _TEXT + + // + // IntPtr RhGetCommonStubAddress() + // + LEAF_ENTRY RhGetCommonStubAddress, _TEXT + PREPARE_EXTERNAL_VAR RhCommonStub, $a0 + jirl $r0, $ra, 0 + LEAF_END RhGetCommonStubAddress, _TEXT diff --git a/src/coreclr/nativeaot/Runtime/loongarch64/MiscStubs.S b/src/coreclr/nativeaot/Runtime/loongarch64/MiscStubs.S new file mode 100644 index 0000000000000..ea5d91a1a1c1f --- /dev/null +++ b/src/coreclr/nativeaot/Runtime/loongarch64/MiscStubs.S @@ -0,0 +1,5 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include +#include "AsmOffsets.inc" diff --git a/src/coreclr/nativeaot/Runtime/loongarch64/PInvoke.S b/src/coreclr/nativeaot/Runtime/loongarch64/PInvoke.S new file mode 100644 index 0000000000000..5d17a016cc989 --- /dev/null +++ b/src/coreclr/nativeaot/Runtime/loongarch64/PInvoke.S @@ -0,0 +1,58 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + + +#include +#include "AsmOffsets.inc" + +.global RhpTrapThreads + +// Note: these must match the defs in PInvokeTransitionFrameFlags defined in rhbinder.h +PTFF_SAVE_SP = 0x00000200 + +// Bit position for the flags above, to be used with andi+beq/bne instructions +PTFF_THREAD_ABORT_BIT = 36 + +// +// RhpPInvoke +// +// IN: a0: address of pinvoke frame +// +// This helper assumes that its callsite is as good to start the stackwalk as the actual PInvoke callsite. +// The codegenerator must treat the callsite of this helper as GC triggering and generate the GC info for it. +// Also, the codegenerator must ensure that there are no live GC references in callee saved registers. +// + +NESTED_ENTRY RhpPInvoke, _TEXT, NoHandler + st.d $fp, $a0, OFFSETOF__PInvokeTransitionFrame__m_FramePointer + st.d $ra, $a0, OFFSETOF__PInvokeTransitionFrame__m_RIP + st.d $sp, $a0, OFFSETOF__PInvokeTransitionFrame__m_PreservedRegs + ori $t0, $zero, PTFF_SAVE_SP + st.d $t0, $a0, OFFSETOF__PInvokeTransitionFrame__m_Flags + + // get TLS global variable address + +#ifdef FEATURE_EMULATED_TLS + GETTHREAD_ETLS_1 +#else + INLINE_GETTHREAD $a1 +#endif + + st.d $a1, $a0, OFFSETOF__PInvokeTransitionFrame__m_pThread + st.d $a0, $a1, OFFSETOF__Thread__m_pTransitionFrame + jirl $r0, $ra, 0 +NESTED_END RhpPInvoke, _TEXT + + +LEAF_ENTRY RhpPInvokeReturn, _TEXT + ld.d $t0, $a0, OFFSETOF__PInvokeTransitionFrame__m_pThread + st.d $zero, $t0, OFFSETOF__Thread__m_pTransitionFrame + + PREPARE_EXTERNAL_VAR_INDIRECT_W RhpTrapThreads, $a5 + + bne $t0, $zero, 0f // TrapThreadsFlags_None = 0 + jirl $r0, $ra, 0 +0: + // passing transition frame pointer in x0 + b C_FUNC(RhpWaitForGC2) +LEAF_END RhpPInvokeReturn, _TEXT diff --git a/src/coreclr/nativeaot/Runtime/loongarch64/StubDispatch.S b/src/coreclr/nativeaot/Runtime/loongarch64/StubDispatch.S new file mode 100644 index 0000000000000..138992ef1a329 --- /dev/null +++ b/src/coreclr/nativeaot/Runtime/loongarch64/StubDispatch.S @@ -0,0 +1,117 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include +#include "AsmOffsets.inc" + +#ifdef FEATURE_CACHED_INTERFACE_DISPATCH + + .extern RhpCidResolve + .extern RhpUniversalTransition_DebugStepTailCall + + // Macro that generates code to check a single cache entry. + .macro CHECK_CACHE_ENTRY entry + // Check a single entry in the cache. + // t0 : Cache data structure. Also used for target address jump. + // t1 : Instance MethodTable* + // t2 : Indirection cell address, preserved + // t3 : Trashed + ld.d $t3, $t0, (OFFSETOF__InterfaceDispatchCache__m_rgEntries + (\entry * 16)) + bne $t1, $t3, 0f + ld.d $t0, $t0, (OFFSETOF__InterfaceDispatchCache__m_rgEntries + (\entry * 16) + 8) + jirl $r0, $t0, 0 +0: + .endm + +// +// Macro that generates a stub consuming a cache with the given number of entries. +// + .macro DEFINE_INTERFACE_DISPATCH_STUB entries + + NESTED_ENTRY "RhpInterfaceDispatch\entries", _TEXT, NoHandler + + // t2 holds the indirection cell address. Load the cache pointer. + ld.d $t0, $t8, OFFSETOF__InterfaceDispatchCell__m_pCache + + // Load the MethodTable from the object instance in a0. + ALTERNATE_ENTRY RhpInterfaceDispatchAVLocation\entries + ld.d $t1, $a0, 0 + + .global CurrentEntry + .set CurrentEntry, 0 + + .rept \entries + CHECK_CACHE_ENTRY CurrentEntry + .set CurrentEntry, CurrentEntry + 1 + .endr + + // t2 still contains the indirection cell address. + b C_FUNC(RhpInterfaceDispatchSlow) + + NESTED_END "RhpInterfaceDispatch\entries", _TEXT + + .endm + +// +// Define all the stub routines we currently need. +// +// If you change or add any new dispatch stubs, exception handling might need to be aware because it refers to the +// *AVLocation symbols defined by the dispatch stubs to be able to unwind and blame user code if a NullRef happens +// during the interface dispatch. +// + DEFINE_INTERFACE_DISPATCH_STUB 1 + DEFINE_INTERFACE_DISPATCH_STUB 2 + DEFINE_INTERFACE_DISPATCH_STUB 4 + DEFINE_INTERFACE_DISPATCH_STUB 8 + DEFINE_INTERFACE_DISPATCH_STUB 16 + DEFINE_INTERFACE_DISPATCH_STUB 32 + DEFINE_INTERFACE_DISPATCH_STUB 64 + +// +// Initial dispatch on an interface when we don't have a cache yet. +// + LEAF_ENTRY RhpInitialInterfaceDispatch, _TEXT + ALTERNATE_ENTRY RhpInitialDynamicInterfaceDispatch + // Trigger an AV if we're dispatching on a null this. + // The exception handling infrastructure is aware of the fact that this is the first + // instruction of RhpInitialInterfaceDispatch and uses it to translate an AV here + // to a NullReferenceException at the callsite. + ld.d $zero, $a0, 0 + + // Just tail call to the cache miss helper. + b C_FUNC(RhpInterfaceDispatchSlow) + LEAF_END RhpInitialInterfaceDispatch, _TEXT + +// +// Stub dispatch routine for dispatch to a vtable slot +// + LEAF_ENTRY RhpVTableOffsetDispatch, _TEXT + // t2 contains the interface dispatch cell address. + // load t3 to point to the vtable offset (which is stored in the m_pCache field). + ld.d $t3, $t2, OFFSETOF__InterfaceDispatchCell__m_pCache + + // Load the MethodTable from the object instance in a0, and add it to the vtable offset + // to get the address in the vtable of what we want to dereference + ld.d $t4, $a0, 0 + add.d $t3, $t3, $t4 + + // Load the target address of the vtable into t3 + ld.d $t3, $t3, 0 + + jirl $r0, $t3, 0 + LEAF_END RhpVTableOffsetDispatch, _TEXT + +// +// Cache miss case, call the runtime to resolve the target and update the cache. +// Use universal transition helper to allow an exception to flow out of resolution. +// + LEAF_ENTRY RhpInterfaceDispatchSlow, _TEXT + // t2 contains the interface dispatch cell address. + // Calling convention of the universal thunk is: + // t7: target address for the thunk to call + // t8: parameter of the thunk's target + PREPARE_EXTERNAL_VAR RhpCidResolve, $t7 + b C_FUNC(RhpUniversalTransition_DebugStepTailCall) + LEAF_END RhpInterfaceDispatchSlow, _TEXT + +#endif // FEATURE_CACHED_INTERFACE_DISPATCH diff --git a/src/coreclr/nativeaot/Runtime/loongarch64/UniversalTransition.S b/src/coreclr/nativeaot/Runtime/loongarch64/UniversalTransition.S new file mode 100644 index 0000000000000..469f7b6cdaa5b --- /dev/null +++ b/src/coreclr/nativeaot/Runtime/loongarch64/UniversalTransition.S @@ -0,0 +1,191 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include + +#ifdef _DEBUG +#define TRASH_SAVED_ARGUMENT_REGISTERS +#endif + +#ifdef TRASH_SAVED_ARGUMENT_REGISTERS + .global RhpIntegerTrashValues + .global RhpFpTrashValues +#endif // TRASH_SAVED_ARGUMENT_REGISTERS + +// Padding to account for the odd number of saved integer registers +#define ALIGNMENT_PADDING_SIZE (8) + +#define COUNT_ARG_REGISTERS (9) +#define INTEGER_REGISTER_SIZE (8) +#define ARGUMENT_REGISTERS_SIZE (COUNT_ARG_REGISTERS * INTEGER_REGISTER_SIZE) + +// Largest return block is 4 doubles +#define RETURN_BLOCK_SIZE (32) + +#define COUNT_FLOAT_ARG_REGISTERS (8) +#define FLOAT_REGISTER_SIZE (16) +#define FLOAT_ARG_REGISTERS_SIZE (COUNT_FLOAT_ARG_REGISTERS * FLOAT_REGISTER_SIZE) + +#define PUSHED_RA_SIZE (8) +#define PUSHED_FP_SIZE (8) + +// +// From CallerSP to ChildSP, the stack frame is composed of the following adjacent regions: +// +// ALIGNMENT_PADDING_SIZE +// ARGUMENT_REGISTERS_SIZE +// RETURN_BLOCK_SIZE +// FLOAT_ARG_REGISTERS_SIZE +// PUSHED_RA_SIZE +// PUSHED_FP_SIZE +// + +#define DISTANCE_FROM_CHILDSP_TO_RETURN_BLOCK (PUSHED_FP_SIZE + PUSHED_RA_SIZE + FLOAT_ARG_REGISTERS_SIZE) + +#define STACK_SIZE (ALIGNMENT_PADDING_SIZE + ARGUMENT_REGISTERS_SIZE + RETURN_BLOCK_SIZE + FLOAT_ARG_REGISTERS_SIZE + PUSHED_RA_SIZE + PUSHED_FP_SIZE) + +#define FLOAT_ARG_OFFSET (PUSHED_FP_SIZE + PUSHED_RA_SIZE) +#define ARGUMENT_REGISTERS_OFFSET (FLOAT_ARG_OFFSET + FLOAT_ARG_REGISTERS_SIZE + RETURN_BLOCK_SIZE) + +// +// RhpUniversalTransition +// +// At input to this function, a0-7/tp, f0-7 and the stack may contain any number of arguments. +// +// In addition, there are 2 extra arguments passed in the intra-procedure-call scratch register: +// t7 will contain the managed function that is to be called by this transition function +// t8 will contain the pointer sized extra argument to the managed function +// +// When invoking the callee: +// +// a0 shall contain a pointer to the TransitionBlock +// a1 shall contain the value that was in t8 at entry to this function +// +// Frame layout is: +// +// {StackPassedArgs} ChildSP+100 CallerSP+000 +// {AlignmentPad (0x8 bytes)} ChildSP+0F8 CallerSP-008 +// {IntArgRegs (a0-a7/tp) (0x48 bytes)} ChildSP+0B0 CallerSP-050 +// {ReturnBlock (0x20 bytes)} ChildSP+090 CallerSP-070 +// -- The base address of the Return block is the TransitionBlock pointer, the floating point args are +// in the neg space of the TransitionBlock pointer. Note that the callee has knowledge of the exact +// layout of all pieces of the frame that lie at or above the pushed floating point registers. +// {FpArgRegs (f0-f7) (0x80 bytes)} ChildSP+010 CallerSP-0F0 +// {PushedRA} ChildSP+008 CallerSP-0F8 +// {PushedFP} ChildSP+000 CallerSP-100 +// +// NOTE: If the frame layout ever changes, the C++ UniversalTransitionStackFrame structure +// must be updated as well. +// +// NOTE: The callee receives a pointer to the base of the ReturnBlock, and the callee has +// knowledge of the exact layout of all pieces of the frame that lie at or above the pushed +// FpArgRegs. +// +// NOTE: The stack walker guarantees that conservative GC reporting will be applied to +// everything between the base of the ReturnBlock and the top of the StackPassedArgs. +// + + .text + + .macro UNIVERSAL_TRANSITION FunctionName + + NESTED_ENTRY Rhp\FunctionName, _TEXT, NoHandler + + // FP and RA registers + PROLOG_SAVE_REG_PAIR_INDEXED 22, 1, STACK_SIZE // ;; Push down stack pointer and store FP and RA + + // Floating point registers + fst.d $f0, $sp, FLOAT_ARG_OFFSET + fst.d $f1, $sp, FLOAT_ARG_OFFSET + 0x08 + fst.d $f2, $sp, FLOAT_ARG_OFFSET + 0x10 + fst.d $f3, $sp, FLOAT_ARG_OFFSET + 0x18 + fst.d $f4, $sp, FLOAT_ARG_OFFSET + 0x20 + fst.d $f5, $sp, FLOAT_ARG_OFFSET + 0x28 + fst.d $f6, $sp, FLOAT_ARG_OFFSET + 0x30 + fst.d $f7, $sp, FLOAT_ARG_OFFSET + 0x38 + + // Space for return buffer data (0x40 bytes) + + // Save argument registers + st.d $a0, $sp, ARGUMENT_REGISTERS_OFFSET + st.d $a1, $sp, ARGUMENT_REGISTERS_OFFSET + 0x08 + st.d $a2, $sp, ARGUMENT_REGISTERS_OFFSET + 0x10 + st.d $a3, $sp, ARGUMENT_REGISTERS_OFFSET + 0x18 + st.d $a4, $sp, ARGUMENT_REGISTERS_OFFSET + 0x20 + st.d $a5, $sp, ARGUMENT_REGISTERS_OFFSET + 0x28 + st.d $a6, $sp, ARGUMENT_REGISTERS_OFFSET + 0x30 + st.d $a7, $sp, ARGUMENT_REGISTERS_OFFSET + 0x38 + st.d $tp, $sp, ARGUMENT_REGISTERS_OFFSET + 0x40 + st.d $r0, $sp, ARGUMENT_REGISTERS_OFFSET + 0x48 + +#ifdef TRASH_SAVED_ARGUMENT_REGISTERS + PREPARE_EXTERNAL_VAR RhpFpTrashValues, $a1 + + fld.d $f0, $a1, 0 + fld.d $f1, $a1, 0x08 + fld.d $f2, $a1, 0x10 + fld.d $f3, $a1, 0x18 + fld.d $f4, $a1, 0x20 + fld.d $f5, $a1, 0x28 + fld.d $f6, $a1, 0x30 + fld.d $f7, $a1, 0x38 + + PREPARE_EXTERNAL_VAR RhpIntegerTrashValues, $a1 + + ld.d $a2, $a1, 0x10 + ld.d $a3, $a1, 0x18 + ld.d $a4, $a1, 0x20 + ld.d $a5, $a1, 0x28 + ld.d $a6, $a1, 0x30 + ld.d $a7, $a1, 0x38 +#endif // TRASH_SAVED_ARGUMENT_REGISTERS + + addi.d $a0, $sp, DISTANCE_FROM_CHILDSP_TO_RETURN_BLOCK // First parameter to target function is a pointer to the return block + ori $a1, $t8, 0 // Second parameter to target function + jirl $ra, $t7, 0 + + // We cannot make the label public as that tricks DIA stackwalker into thinking + // it's the beginning of a method. For this reason we export an auxiliary variable + // holding the address instead. + ALTERNATE_ENTRY ReturnFrom\FunctionName + + // Move the result (the target address) to t3 so it doesn't get overridden when we restore the + // argument registers. + ori $t3, $a0, 0 + + // Restore floating point registers + fld.d $f0, $sp, FLOAT_ARG_OFFSET + fld.d $f1, $sp, FLOAT_ARG_OFFSET + 0x08 + fld.d $f2, $sp, FLOAT_ARG_OFFSET + 0x10 + fld.d $f3, $sp, FLOAT_ARG_OFFSET + 0x18 + fld.d $f4, $sp, FLOAT_ARG_OFFSET + 0x20 + fld.d $f5, $sp, FLOAT_ARG_OFFSET + 0x28 + fld.d $f6, $sp, FLOAT_ARG_OFFSET + 0x30 + fld.d $f7, $sp, FLOAT_ARG_OFFSET + 0x38 + + // Restore the argument registers + ld.d $a0, $sp, ARGUMENT_REGISTERS_OFFSET + ld.d $a1, $sp, ARGUMENT_REGISTERS_OFFSET + 0x08 + ld.d $a2, $sp, ARGUMENT_REGISTERS_OFFSET + 0x10 + ld.d $a3, $sp, ARGUMENT_REGISTERS_OFFSET + 0x18 + ld.d $a4, $sp, ARGUMENT_REGISTERS_OFFSET + 0x20 + ld.d $a5, $sp, ARGUMENT_REGISTERS_OFFSET + 0x28 + ld.d $a6, $sp, ARGUMENT_REGISTERS_OFFSET + 0x30 + ld.d $a7, $sp, ARGUMENT_REGISTERS_OFFSET + 0x38 + ld.d $tp, $sp, ARGUMENT_REGISTERS_OFFSET + 0x40 + + // Restore FP and RA registers, and free the allocated stack block + EPILOG_RESTORE_REG_PAIR_INDEXED 22, 1, STACK_SIZE + + // Tailcall to the target address. + jirl $r0, $t3, 0 + + NESTED_END Rhp\FunctionName, _TEXT + + .endm + + // To enable proper step-in behavior in the debugger, we need to have two instances + // of the thunk. For the first one, the debugger steps into the call in the function, + // for the other, it steps over it. + UNIVERSAL_TRANSITION UniversalTransition + UNIVERSAL_TRANSITION UniversalTransition_DebugStepTailCall diff --git a/src/coreclr/nativeaot/Runtime/loongarch64/WriteBarriers.S b/src/coreclr/nativeaot/Runtime/loongarch64/WriteBarriers.S new file mode 100644 index 0000000000000..f8d23d2356d8d --- /dev/null +++ b/src/coreclr/nativeaot/Runtime/loongarch64/WriteBarriers.S @@ -0,0 +1,355 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include + +// Macro used to copy contents of newly updated GC heap locations to a shadow copy of the heap. This is used +// during garbage collections to verify that object references where never written to the heap without using a +// write barrier. Note that we are potentially racing to update the shadow heap while other threads are writing +// new references to the real heap. Since this can not be solved perfectly without critical sections around the +// entire update process, we instead update the shadow location and then re-check the real location (as two +// ordered operations) and if there is a disparity we will re-write the shadow location with a special value +// (INVALIDGCVALUE) which disables the check for that location. Since the shadow heap is only validated at GC +// time and these write barrier operations are atomic wrt to GCs this is sufficient to guarantee that the +// shadow heap contains only valid copies of real heap values or INVALIDGCVALUE. +#ifdef WRITE_BARRIER_CHECK + + .global $g_GCShadow + .global $g_GCShadowEnd + + // On entry: + // $destReg: location to be updated + // $refReg: objectref to be stored + // + // On exit: + // t3,t4: trashed + // other registers are preserved + // + .macro UPDATE_GC_SHADOW destReg, refReg + + // If g_GCShadow is 0, don't perform the check. + PREPARE_EXTERNAL_VAR_INDIRECT g_GCShadow, $t3 + beq $t3, $zero, 1f + ori $t4, $t3, 0 + + // Save destReg since we're about to modify it (and we need the original value both within the macro and + // once we exit the macro). + ori $t4, \destReg, 0 + + // Transform destReg into the equivalent address in the shadow heap. + PREPARE_EXTERNAL_VAR_INDIRECT g_lowest_address, $t3 + sub.d \destReg, \destReg, $t3 + bltu $t4, $zero, 0f + + PREPARE_EXTERNAL_VAR_INDIRECT g_GCShadow, $t3 + add.d \destReg, \destReg, $t3 + + PREPARE_EXTERNAL_VAR_INDIRECT g_GCShadowEnd, $t3 + bgeu \destReg, $t3, 0f + + // Update the shadow heap. + st.d \refReg, \destReg, 0 + + // The following read must be strongly ordered wrt to the write we have just performed in order to + // prevent race conditions. + dbar 0 + + // Now check that the real heap location still contains the value we just wrote into the shadow heap. + ori $t3, $t4, 0 + ld.d $t3, $t3, 0 + beq $t3, \refReg, 0f + + // Someone went and updated the real heap. We need to invalidate INVALIDGCVALUE the shadow location since we can not + // guarantee whose shadow update won. + lu12i.w $t3, ((INVALIDGCVALUE >> 12) & 0xFFFFF) + ori $t3, $t3, (INVALIDGCVALUE & 0xFFF) + st.d $t3, \destReg, 0 + +0: + // Restore original destReg value + ori \destReg, $t4, 0 + +1: + .endm + +#else // WRITE_BARRIER_CHECK + + .macro UPDATE_GC_SHADOW destReg, refReg + .endm + +#endif // WRITE_BARRIER_CHECK + +// There are several different helpers used depending on which register holds the object reference. Since all +// the helpers have identical structure we use a macro to define this structure. Two arguments are taken, the +// name of the register that points to the location to be updated and the name of the register that holds the +// object reference (this should be in upper case as it is used in the definition of the name of the helper). + +// Define a sub-macro first that expands to the majority of the barrier implementation. This is used below for +// some interlocked helpers that need an inline barrier. + + // On entry: + // destReg: location to be updated (cannot be t3,t4) + // refReg: objectref to be stored (cannot be t3,t4) + // + // On exit: + // t3,t4: trashed + // + .macro INSERT_UNCHECKED_WRITE_BARRIER_CORE destReg, refReg + + // Update the shadow copy of the heap with the same value just written to the same heap. (A no-op unless + // we are in a debug build and write barrier checking has been enabled). + UPDATE_GC_SHADOW \destReg, \refReg + +#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP + // Update the write watch table if necessary + PREPARE_EXTERNAL_VAR_INDIRECT g_write_watch_table, $t3 + + beq $t3, $zero, 2f + srli.d $t5, \destReg, 12 + add.d $t3, $t3, $t5 // SoftwareWriteWatch::AddressToTableByteIndexShift + ld.b $t4, $t3, 0 + bne $t4, $zero, 2f + ori $t4, $zero, 0xFF + st.b $t4, $t3, 0 +#endif + +2: + // We can skip the card table write if the reference is to + // an object not on the epehemeral segment. + PREPARE_EXTERNAL_VAR_INDIRECT g_ephemeral_low, $t3 + bltu \refReg, $t3, 0f + + PREPARE_EXTERNAL_VAR_INDIRECT g_ephemeral_high, $t3 + bgeu \refReg, $t3, 0f + + // Set this objects card, if it has not already been set. + PREPARE_EXTERNAL_VAR_INDIRECT g_card_table, $t3 + srli.d $t5, \destReg, 11 + add.d $t4, $t3, $t5 + + // Check that this card has not already been written. Avoiding useless writes is a big win on + // multi-proc systems since it avoids cache thrashing. + ld.b $t3, $t4, 0 + ori $t5, $zero, 0xFF + beq $t3, $t5, 0f + + ori $t3, $zero, 0xFF + st.b $t3, $t4, 0 + +#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES + // Check if we need to update the card bundle table + PREPARE_EXTERNAL_VAR_INDIRECT g_card_bundle_table, $t3 + srli.d $t5, \destReg, 21 + add.d $t4, $t3, $t5 + ld.b $t3, $t4, 0 + ori $t5, $zero, 0xFF + beq $t3, $t5, 0f + + ori $t3, $zero, 0xFF + st.b $t3, $t4, 0 +#endif + +0: + // Exit label + .endm + + // On entry: + // destReg: location to be updated + // refReg: objectref to be stored + // + // On exit: + // t3, t4: trashed + // + .macro INSERT_CHECKED_WRITE_BARRIER_CORE destReg, refReg + + // The "check" of this checked write barrier - is destReg + // within the heap? if no, early out. + PREPARE_EXTERNAL_VAR_INDIRECT g_lowest_address, $t3 + sltu $t4, \destReg, $t3 + + PREPARE_EXTERNAL_VAR_INDIRECT g_highest_address, $t3 + + // If \destReg >= g_lowest_address, compare \destReg to g_highest_address. + // Otherwise, set the C flag (0x2) to take the next branch. + bnez $t4, 1f + bgeu \destReg, $t3, 0f + +1: + INSERT_UNCHECKED_WRITE_BARRIER_CORE \destReg, \refReg + +0: + // Exit label + .endm + +// void JIT_ByRefWriteBarrier +// On entry: +// t8 : the source address (points to object reference to write) +// t6 : the destination address (object reference written here) +// +// On exit: +// t8 : incremented by 8 +// t6 : incremented by 8 +// t7 : trashed +// t3, t4 : trashed +// +// NOTE: Keep in sync with RBM_CALLEE_TRASH_WRITEBARRIER_BYREF and RBM_CALLEE_GCTRASH_WRITEBARRIER_BYREF +// if you add more trashed registers. +// +// WARNING: Code in EHHelpers.cpp makes assumptions about write barrier code, in particular: +// - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen at RhpByRefAssignRefAVLocation1 +// - Function "UnwindSimpleHelperToCaller" assumes no registers were pushed and RA contains the return address +LEAF_ENTRY RhpByRefAssignRef, _TEXT + + ALTERNATE_ENTRY RhpByRefAssignRefAVLocation1 + ld.d $t7, $t8, 0 + addi.d $t8, $t8, 8 + b C_FUNC(RhpCheckedAssignRef) + +LEAF_END RhpByRefAssignRef, _TEXT + +// JIT_CheckedWriteBarrier(Object** dst, Object* src) +// +// Write barrier for writes to objects that may reside +// on the managed heap. +// +// On entry: +// t6 : the destination address (LHS of the assignment). +// May not be a heap location (hence the checked). +// t7 : the object reference (RHS of the assignment). +// +// On exit: +// t3, t4 : trashed +// t6 : incremented by 8 + LEAF_ENTRY RhpCheckedAssignRef, _TEXT + + // is destReg within the heap? + PREPARE_EXTERNAL_VAR_INDIRECT g_lowest_address, $t3 + sltu $t4, $t6, $t3 + + PREPARE_EXTERNAL_VAR_INDIRECT g_highest_address, $t3 + sltu $t0, $t3, $t6 + or $t4, $t0, $t4 + beq $t4, $zero, C_FUNC(RhpAssignRefLoongArch64) + +NotInHeap: + ALTERNATE_ENTRY RhpCheckedAssignRefAVLocation + st.d $t7, $t6, 0 + addi.d $t6, $t6, 8 + jirl $r0, $ra, 0 + +LEAF_END RhpCheckedAssignRef, _TEXT + +// JIT_WriteBarrier(Object** dst, Object* src) +// +// Write barrier for writes to objects that are known to +// reside on the managed heap. +// +// On entry: +// t6 : the destination address (LHS of the assignment). +// t7 : the object reference (RHS of the assignment). +// +// On exit: +// t3, t4 : trashed +// t6 : incremented by 8 +LEAF_ENTRY RhpAssignRefLoongArch64, _TEXT + + ALTERNATE_ENTRY RhpAssignRefAVLocation + st.d $t7, $t6, 0 + + INSERT_UNCHECKED_WRITE_BARRIER_CORE $t6, $t7 + + addi.d $t6, $t6, 8 + jirl $r0, $ra, 0 + +LEAF_END RhpAssignRefLoongArch64, _TEXT + +// Same as RhpAssignRefLoongArch64, but with standard ABI. +LEAF_ENTRY RhpAssignRef, _TEXT + ori $t6, $a0, 0 ; t6 = dst + ori $t7, $a1, 0 ; t7 = val + b C_FUNC(RhpAssignRefLoongArch64) +LEAF_END RhpAssignRef, _TEXT + + +// Interlocked operation helpers where the location is an objectref, thus requiring a GC write barrier upon +// successful updates. + +// WARNING: Code in EHHelpers.cpp makes assumptions about write barrier code, in particular: +// - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen at RhpCheckedLockCmpXchgAVLocation +// - Function "UnwindSimpleHelperToCaller" assumes no registers were pushed and RA contains the return address + +// RhpCheckedLockCmpXchg(Object** dest, Object* value, Object* comparand) +// +// Interlocked compare exchange on objectref. +// +// On entry: +// a0: pointer to objectref +// a1: exchange value +// a2: comparand +// +// On exit: +// a0: original value of objectref +// t1, t3, t6, t4: trashed +// + LEAF_ENTRY RhpCheckedLockCmpXchg + + ori $t1, $a2, 0 + ld.d $t0, $a0, 0 + beq $t0, $t1, 12 + ori $t1, $t0, 0 + b EndOfExchange + st.d $a1, $a0, 0 + +EndOfExchange: + bne $a2, $t1, CmpXchgNoUpdate + +DoCardsCmpXchg: + // We have successfully updated the value of the objectref so now we need a GC write barrier. + // The following barrier code takes the destination in $a0 and the value in $a1 so the arguments are + // already correctly set up. + + INSERT_CHECKED_WRITE_BARRIER_CORE $a0, $a1 + +CmpXchgNoUpdate: + // t1 still contains the original value. + ori $a0, $t1, 0 + + jirl $r0, $ra, 0 + + LEAF_END RhpCheckedLockCmpXchg, _TEXT + +// WARNING: Code in EHHelpers.cpp makes assumptions about write barrier code, in particular: +// - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen within at RhpCheckedXchgAVLocation +// - Function "UnwindSimpleHelperToCaller" assumes no registers were pushed and RA contains the return address + +// RhpCheckedXchg(Object** destination, Object* value) +// +// Interlocked exchange on objectref. +// +// On entry: +// a0: pointer to objectref +// a1: exchange value +// +// On exit: +// a0: original value of objectref +// t1: trashed +// t3, t6, t4: trashed +// + LEAF_ENTRY RhpCheckedXchg, _TEXT + + ld.d $t1, $a0, 0 + st.d $a1, $a0, 0 + +DoCardsXchg: + // We have successfully updated the value of the objectref so now we need a GC write barrier. + // The following barrier code takes the destination in $a0 and the value in $a1 so the arguments are + // already correctly set up. + + INSERT_CHECKED_WRITE_BARRIER_CORE $a0, $a1 + + // $t1 still contains the original value. + ori $a0, $t1, 0 + + jirl $r0, $ra, 0 + + LEAF_END RhpCheckedXchg, _TEXT diff --git a/src/coreclr/nativeaot/Runtime/unix/unixasmmacros.inc b/src/coreclr/nativeaot/Runtime/unix/unixasmmacros.inc index 80f633327c830..68ba993209e42 100644 --- a/src/coreclr/nativeaot/Runtime/unix/unixasmmacros.inc +++ b/src/coreclr/nativeaot/Runtime/unix/unixasmmacros.inc @@ -42,4 +42,6 @@ #include "unixasmmacrosarm64.inc" #elif defined(HOST_X86) #include "unixasmmacrosx86.inc" +#elif defined(HOST_LOONGARCH64) +#include "unixasmmacrosloongarch64.inc" #endif diff --git a/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosloongarch64.inc b/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosloongarch64.inc new file mode 100644 index 0000000000000..d7d2bedb3dd2e --- /dev/null +++ b/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosloongarch64.inc @@ -0,0 +1,335 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include "AsmOffsets.inc" + +.macro NESTED_ENTRY Name, Section, Handler + LEAF_ENTRY \Name, \Section + .ifnc \Handler, NoHandler + .cfi_personality 0x1b, C_FUNC(\Handler) // 0x1b == DW_EH_PE_pcrel | DW_EH_PE_sdata4 + .endif +.endm + +.macro NESTED_END Name, Section + LEAF_END \Name, \Section +.endm + +.macro PATCH_LABEL Name + .global C_FUNC(\Name) +C_FUNC(\Name): +.endm + +.macro ALTERNATE_ENTRY Name + .global C_FUNC(\Name) + .hidden C_FUNC(\Name) +C_FUNC(\Name): +.endm + +.macro LEAF_ENTRY Name, Section + .global C_FUNC(\Name) + .hidden C_FUNC(\Name) + .type \Name, %function +C_FUNC(\Name): + .cfi_startproc +.endm + +.macro LEAF_END Name, Section + .size \Name, .-\Name + .cfi_endproc +.endm + +.macro PREPARE_EXTERNAL_VAR Name, HelperReg + la.local \HelperReg, \Name +.endm + +.macro PREPARE_EXTERNAL_VAR_INDIRECT Name, HelperReg + la.local \HelperReg, \Name + ld.d \HelperReg, \HelperReg, 0 +.endm + +.macro PREPARE_EXTERNAL_VAR_INDIRECT_W Name, HelperReg + la.local \HelperReg, \Name + ld.w \HelperReg, \HelperReg, 0 +.endm + + +.macro PROLOG_STACK_ALLOC Size + addi.d $sp, $sp, -\Size +.endm + +.macro EPILOG_STACK_FREE Size + addi.d $sp, $sp, \Size + .cfi_adjust_cfa_offset -\Size +.endm + +.macro EPILOG_STACK_RESTORE + ori $sp, $fp, 0 + .cfi_restore 3 +.endm + +.macro PROLOG_SAVE_REG reg, ofs + st.d $r\reg, $sp, \ofs + .cfi_rel_offset \reg, \ofs +.endm + +.macro PROLOG_SAVE_REG_PAIR reg1, reg2, ofs + st.d $r\reg1, $sp, \ofs + st.d $r\reg2, $sp, \ofs + 8 + .cfi_rel_offset \reg1, \ofs + .cfi_rel_offset \reg2, \ofs + 8 + .ifc \reg1, $fp + ori $fp, $sp, 0 + .cfi_def_cfa_register 22 + .endif +.endm + +.macro PROLOG_SAVE_REG_PAIR_INDEXED reg1, reg2, ssize, __def_cfa_save=1 + addi.d $sp, $sp, -\ssize + //.cfi_adjust_cfa_offset \ssize + .cfi_def_cfa 3, \ssize + + st.d $r\reg1, $sp, 0 + st.d $r\reg2, $sp, 8 + + .cfi_rel_offset \reg1, 0 + .cfi_rel_offset \reg2, 8 + .if (\__def_cfa_save == 1) + ori $fp, $sp, 0 + .cfi_def_cfa_register 22 + .endif +.endm + +.macro PROLOG_SAVE_REG_PAIR_NO_FP_INDEXED reg1, reg2, ssize + addi.d $sp, $sp, -\ssize + //.cfi_adjust_cfa_offset \ssize + .cfi_def_cfa 3, \ssize + + st.d $r\reg1, $sp, 0 + st.d $r\reg2, $sp, 8 + + .cfi_rel_offset \reg1, 0 + .cfi_rel_offset \reg2, 8 +.endm + + +.macro EPILOG_RESTORE_REG reg, ofs + ld.d $r\reg, $sp, \ofs + .cfi_restore \reg +.endm + +.macro EPILOG_RESTORE_REG_PAIR reg1, reg2, ofs + ld.d $r\reg1, $sp, \ofs + ld.d $r\reg2, $sp, \ofs + 8 + .cfi_restore \reg1 + .cfi_restore \reg2 +.endm + +.macro EPILOG_RESTORE_REG_PAIR_INDEXED reg1, reg2, ofs + ld.d $r\reg1, $sp, 0 + ld.d $r\reg2, $sp, 8 + addi.d $sp, $sp, \ofs + .cfi_restore \reg1 + .cfi_restore \reg2 + .cfi_adjust_cfa_offset -\ofs +.endm + +.macro EPILOG_RETURN + jirl $r0, $ra, 0 +.endm + +.macro EMIT_BREAKPOINT + break 0 +.endm + +.macro EPILOG_BRANCH_REG reg + + jirl $r0, \reg, 0 + +.endm + +// Loads the address of a thread-local variable into the target register, +// which cannot be a0. Preserves all other registers. +.macro INLINE_GET_TLS_VAR target, var + .ifc \target, $a0 + .error "target cannot be a0" + .endif + + st.d $a0, $sp, -0x10 + st.d $ra, $sp, -0x8 + addi.d $sp, $sp, -16 + + // This sequence of instructions is recognized and potentially patched + // by the linker (GD->IE/LE relaxation). + // TODO-LOONGARCH64: Fix once TLSDESC is supported by LLVM + //la.local $a0, \var + //ld.d \target, $a0, 0 + //.tlsdesccall \var + //jirl $ra, \target, 0 + la.tls.ie $a0, \var + // End of the sequence + + ori \target, $tp, 0 + add.d \target, \target, $a0 + + ld.d $a0, $sp, 0 + ld.d $ra, $sp, 8 + addi.d $sp, $sp, 16 +.endm + +// Inlined version of RhpGetThread. Target cannot be a0. +.macro INLINE_GETTHREAD target + INLINE_GET_TLS_VAR \target, C_FUNC(tls_CurrentThread) +.endm + +// Do not use these ETLS macros in functions that already create a stack frame. +// Creating two stack frames in one function can confuse the unwinder/debugger + +.macro GETTHREAD_ETLS_1 + PROLOG_SAVE_REG_PAIR_INDEXED 22, 1, 32 // ;; Push down stack pointer and store FP and RA + st.d $a0, $sp, 0x10 + + bl C_FUNC(RhpGetThread) + ori $a1, $a0, 0 + + ld.d $a0, $sp, 0x10 + EPILOG_RESTORE_REG_PAIR_INDEXED 22, 1, 32 +.endm + +.macro GETTHREAD_ETLS_2 + PROLOG_SAVE_REG_PAIR_INDEXED 22, 1, 32 // ;; Push down stack pointer and store FP and RA + st.d $a0, $sp, 0x10 + st.d $a1, $sp, 0x18 + + bl C_FUNC(RhpGetThread) + ori $a2, $a0, 0 + + ld.d $a0, $sp, 0x10 + ld.d $a1, $sp, 0x18 + EPILOG_RESTORE_REG_PAIR_INDEXED 22, 1, 32 +.endm + +.macro GETTHREAD_ETLS_3 + PROLOG_SAVE_REG_PAIR_INDEXED 22, 1, 48 // ;; Push down stack pointer and store FP and RA + st.d $a0, $sp, 0x10 + st.d $a1, $sp, 0x18 + st.d $a2, $sp, 0x20 + + bl C_FUNC(RhpGetThread) + ori $a3, $a0, 0 + + ld.d $a0, $sp, 0x10 + ld.d $a1, $sp, 0x18 + ld.d $a2, $sp, 0x20 + EPILOG_RESTORE_REG_PAIR_INDEXED 22, 1, 48 +.endm + +.macro GETTHUNKDATA_ETLS_9 + PROLOG_SAVE_REG_PAIR_INDEXED 22, 1, 96 // ;; Push down stack pointer and store FP and RA + st.d $a0, $sp, 0x10 + st.d $a1, $sp, 0x18 + st.d $a2, $sp, 0x20 + st.d $a3, $sp, 0x28 + st.d $a4, $sp, 0x30 + st.d $a5, $sp, 0x38 + st.d $a6, $sp, 0x40 + st.d $a7, $sp, 0x48 + st.d $t6, $sp, 0x50 + st.d $t7, $sp, 0x58 + + bl RhpGetThunkData + ori $t0, $a0, 0 + + ld.d $a0, $sp, 0x10 + ld.d $a1, $sp, 0x18 + ld.d $a2, $sp, 0x20 + ld.d $a3, $sp, 0x28 + ld.d $a4, $sp, 0x30 + ld.d $a5, $sp, 0x38 + ld.d $a6, $sp, 0x40 + ld.d $a7, $sp, 0x48 + ld.d $t6, $sp, 0x50 + ld.d $t7, $sp, 0x58 + EPILOG_RESTORE_REG_PAIR_INDEXED 22, 1, 96 +.endm + +.macro InterlockedOperationBarrier + dbar 0 +.endm + +.macro INLINE_THREAD_UNHIJACK threadReg, trashReg1, trashReg2 + // + // Thread::Unhijack() + // + ld.d \trashReg1, \threadReg, OFFSETOF__Thread__m_pvHijackedReturnAddress + beq \trashReg1, $zero, 0f + + ld.d \trashReg2, \threadReg, OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation + st.d \trashReg1, \trashReg2, 0 + st.d $zero, \threadReg, OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation + st.d $zero, \threadReg, OFFSETOF__Thread__m_pvHijackedReturnAddress +0: +.endm + +// Note: these must match the defs in PInvokeTransitionFrameFlags +PTFF_SAVE_SP = 0x00000200 +PTFF_SAVE_R4 = 0x00001000 +PTFF_SAVE_R5 = 0x00002000 +PTFF_SAVE_ALL_PRESERVED = 0x000001FF // NOTE: r23-r31 + +DEFAULT_FRAME_SAVE_FLAGS = PTFF_SAVE_ALL_PRESERVED + PTFF_SAVE_SP + +.macro PUSH_COOP_PINVOKE_FRAME trashReg + + PROLOG_SAVE_REG_PAIR_INDEXED 22, 1, 0x80 // Push down stack pointer and store FP and RA + + // 0x10 bytes reserved for Thread* and flags + + // Save callee saved registers + PROLOG_SAVE_REG_PAIR 23, 24, 0x20 + PROLOG_SAVE_REG_PAIR 25, 26, 0x30 + PROLOG_SAVE_REG_PAIR 27, 28, 0x40 + PROLOG_SAVE_REG_PAIR 29, 30, 0x50 + PROLOG_SAVE_REG_PAIR 31, 2, 0x60 + + // Save the value of SP before stack allocation to the last slot in the frame (slot #15) + addi.d \trashReg, $sp, 0x80 + st.d \trashReg, $sp, 0x70 + + // Record the bitmask of saved registers in the frame (slot #3) + ori \trashReg, $zero, DEFAULT_FRAME_SAVE_FLAGS + st.d \trashReg, $sp, 0x18 + + ori \trashReg, $sp, 0 +.endm + +// Pop the frame and restore register state preserved by PUSH_COOP_PINVOKE_FRAME +.macro POP_COOP_PINVOKE_FRAME + + // $s0,$s1 + EPILOG_RESTORE_REG_PAIR 23, 24, 0x20 + EPILOG_RESTORE_REG_PAIR 25, 26, 0x30 + EPILOG_RESTORE_REG_PAIR 27, 28, 0x40 + EPILOG_RESTORE_REG_PAIR 29, 30, 0x50 + EPILOG_RESTORE_REG_PAIR 31, 2, 0x60 + EPILOG_RESTORE_REG_PAIR_INDEXED 22, 1, 0x80 +.endm + +// Bit position for the flags above, to be used with andi+beq/bne instructions +PTFF_THREAD_ABORT_BIT = 36 + +// +// CONSTANTS -- INTEGER +// +#define TSF_Attached 0x01 +#define TSF_SuppressGcStress 0x08 +#define TSF_DoNotTriggerGc 0x10 +#define TSF_SuppressGcStress__OR__TSF_DoNotTriggerGC 0x18 + +// Bit position for the flags above, to be used with andi+beq/bne instructions +TrapThreadsFlags_AbortInProgress_Bit = 0 +TrapThreadsFlags_TrapThreads_Bit = 1 + +// These must match the TrapThreadsFlags enum +#define TrapThreadsFlags_None 0 +#define TrapThreadsFlags_AbortInProgress 1 +#define TrapThreadsFlags_TrapThreads 2