From ebeeff545779f2ddbca25a5d4099cb0dc6d132be Mon Sep 17 00:00:00 2001 From: Santiago Fernandez Madero Date: Mon, 30 Mar 2020 16:47:21 -0700 Subject: [PATCH] Revert "Switch reverse PInvoke to the NativeCallable plan (#34251)" This reverts commit 4e30ff033fbd915c5a39df8901d43c601deeeeb4. --- .../tools/Common/JitInterface/CorInfoImpl.cs | 3 +- .../JitInterface/CorInfoImpl.ReadyToRun.cs | 4 +- src/coreclr/src/vm/amd64/UMThunkStub.asm | 241 ++++++++++++++++++ src/coreclr/src/vm/amd64/asmconstants.h | 17 ++ src/coreclr/src/vm/amd64/umthunkstub.S | 154 +++++++++++ src/coreclr/src/vm/arm/asmconstants.h | 9 + src/coreclr/src/vm/arm/asmhelpers.S | 90 +++++++ src/coreclr/src/vm/arm/asmhelpers.asm | 93 +++++++ src/coreclr/src/vm/arm64/asmconstants.h | 9 + src/coreclr/src/vm/arm64/asmhelpers.S | 108 ++++++++ src/coreclr/src/vm/arm64/asmhelpers.asm | 115 +++++++++ src/coreclr/src/vm/dllimportcallback.cpp | 63 ++--- src/coreclr/src/vm/dllimportcallback.h | 44 +++- src/coreclr/src/vm/i386/asmhelpers.S | 16 ++ src/coreclr/src/vm/ilstubcache.cpp | 3 - src/coreclr/src/vm/jithelpers.cpp | 14 +- src/coreclr/src/vm/jitinterface.cpp | 13 +- src/coreclr/src/vm/method.cpp | 5 - src/coreclr/src/vm/method.hpp | 2 - src/coreclr/src/zap/zapinfo.cpp | 8 +- 20 files changed, 936 insertions(+), 75 deletions(-) diff --git a/src/coreclr/src/tools/Common/JitInterface/CorInfoImpl.cs b/src/coreclr/src/tools/Common/JitInterface/CorInfoImpl.cs index 472b1050acb45..ebf41f59cd513 100644 --- a/src/coreclr/src/tools/Common/JitInterface/CorInfoImpl.cs +++ b/src/coreclr/src/tools/Common/JitInterface/CorInfoImpl.cs @@ -2921,7 +2921,8 @@ private uint getJitFlags(ref CORJIT_FLAGS flags, uint sizeInBytes) if (this.MethodBeingCompiled.IsNativeCallable) { #if READYTORUN - if (targetArchitecture == TargetArchitecture.X86) + if (targetArchitecture == TargetArchitecture.X86 + && _compilation.TypeSystemContext.Target.OperatingSystem == TargetOS.Windows) { throw new RequiresRuntimeJitException("ReadyToRun: Methods with NativeCallableAttribute not implemented"); } diff --git a/src/coreclr/src/tools/crossgen2/ILCompiler.ReadyToRun/JitInterface/CorInfoImpl.ReadyToRun.cs b/src/coreclr/src/tools/crossgen2/ILCompiler.ReadyToRun/JitInterface/CorInfoImpl.ReadyToRun.cs index 8aa4adc279930..1f477b034bb58 100644 --- a/src/coreclr/src/tools/crossgen2/ILCompiler.ReadyToRun/JitInterface/CorInfoImpl.ReadyToRun.cs +++ b/src/coreclr/src/tools/crossgen2/ILCompiler.ReadyToRun/JitInterface/CorInfoImpl.ReadyToRun.cs @@ -1661,7 +1661,9 @@ private void getCallInfo(ref CORINFO_RESOLVED_TOKEN pResolvedToken, CORINFO_RESO pResult->methodFlags = FilterNamedIntrinsicMethodAttribs(pResult->methodFlags, methodToCall); var targetDetails = _compilation.TypeSystemContext.Target; - if (targetDetails.Architecture == TargetArchitecture.X86 && targetMethod.IsNativeCallable) + if (targetDetails.Architecture == TargetArchitecture.X86 + && targetDetails.OperatingSystem == TargetOS.Windows + && targetMethod.IsNativeCallable) { throw new RequiresRuntimeJitException("ReadyToRun: References to methods with NativeCallableAttribute not implemented"); } diff --git a/src/coreclr/src/vm/amd64/UMThunkStub.asm b/src/coreclr/src/vm/amd64/UMThunkStub.asm index cee9866329552..58239125018a6 100644 --- a/src/coreclr/src/vm/amd64/UMThunkStub.asm +++ b/src/coreclr/src/vm/amd64/UMThunkStub.asm @@ -11,8 +11,13 @@ include include AsmConstants.inc +extern CreateThreadBlockThrow:proc extern TheUMEntryPrestubWorker:proc extern UMEntryPrestubUnwindFrameChainHandler:proc +extern UMThunkStubUnwindFrameChainHandler:proc +extern g_TrapReturningThreads:dword +extern UMThunkStubRareDisableWorker:proc +extern ReversePInvokeBadTransition:proc ; ; METHODDESC_REGISTER: UMEntryThunk* @@ -73,4 +78,240 @@ endif NESTED_END TheUMEntryPrestub, _TEXT + +; +; METHODDESC_REGISTER: UMEntryThunk* +; +NESTED_ENTRY UMThunkStub, _TEXT, UMThunkStubUnwindFrameChainHandler + +UMThunkStubAMD64_STACK_FRAME_SIZE = 0 + +; number of integer registers saved in prologue +UMThunkStubAMD64_NUM_REG_PUSHES = 2 +UMThunkStubAMD64_STACK_FRAME_SIZE = UMThunkStubAMD64_STACK_FRAME_SIZE + (UMThunkStubAMD64_NUM_REG_PUSHES * 8) + +; rare path spill area +UMThunkStubAMD64_RARE_PATH_SPILL_SIZE = 10h +UMThunkStubAMD64_STACK_FRAME_SIZE = UMThunkStubAMD64_STACK_FRAME_SIZE + UMThunkStubAMD64_RARE_PATH_SPILL_SIZE +UMThunkStubAMD64_RARE_PATH_SPILL_NEGOFFSET = UMThunkStubAMD64_STACK_FRAME_SIZE + + + +; HOST_NOTIFY_FLAG +UMThunkStubAMD64_STACK_FRAME_SIZE = UMThunkStubAMD64_STACK_FRAME_SIZE + 8 +UMThunkStubAMD64_HOST_NOTIFY_FLAG_NEGOFFSET = UMThunkStubAMD64_STACK_FRAME_SIZE + +; XMM save area +UMThunkStubAMD64_STACK_FRAME_SIZE = UMThunkStubAMD64_STACK_FRAME_SIZE + SIZEOF_MAX_FP_ARG_SPILL + +; Ensure that the offset of the XMM save area will be 16-byte aligned. +if ((UMThunkStubAMD64_STACK_FRAME_SIZE + 8) MOD 16) ne 0 ; +8 for caller-pushed return address +UMThunkStubAMD64_STACK_FRAME_SIZE = UMThunkStubAMD64_STACK_FRAME_SIZE + 8 +endif + +UMThunkStubAMD64_XMM_SAVE_NEGOFFSET = UMThunkStubAMD64_STACK_FRAME_SIZE + +; Add in the callee scratch area size. +UMThunkStubAMD64_CALLEE_SCRATCH_SIZE = SIZEOF_MAX_OUTGOING_ARGUMENT_HOMES +UMThunkStubAMD64_STACK_FRAME_SIZE = UMThunkStubAMD64_STACK_FRAME_SIZE + UMThunkStubAMD64_CALLEE_SCRATCH_SIZE + +; Now we have the full size of the stack frame. The offsets have been computed relative to the +; top, so negate them to make them relative to the post-prologue rsp. +UMThunkStubAMD64_FRAME_OFFSET = UMThunkStubAMD64_CALLEE_SCRATCH_SIZE +UMThunkStubAMD64_RARE_PATH_SPILL_OFFSET = UMThunkStubAMD64_STACK_FRAME_SIZE - UMThunkStubAMD64_FRAME_OFFSET - UMThunkStubAMD64_RARE_PATH_SPILL_NEGOFFSET +UMThunkStubAMD64_HOST_NOTIFY_FLAG_OFFSET = UMThunkStubAMD64_STACK_FRAME_SIZE - UMThunkStubAMD64_FRAME_OFFSET - UMThunkStubAMD64_HOST_NOTIFY_FLAG_NEGOFFSET +UMThunkStubAMD64_XMM_SAVE_OFFSET = UMThunkStubAMD64_STACK_FRAME_SIZE - UMThunkStubAMD64_FRAME_OFFSET - UMThunkStubAMD64_XMM_SAVE_NEGOFFSET +UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET = UMThunkStubAMD64_STACK_FRAME_SIZE + 8 - UMThunkStubAMD64_FRAME_OFFSET ; +8 for return address +UMThunkStubAMD64_FIXED_STACK_ALLOC_SIZE = UMThunkStubAMD64_STACK_FRAME_SIZE - (UMThunkStubAMD64_NUM_REG_PUSHES * 8) + +.errnz UMTHUNKSTUB_HOST_NOTIFY_FLAG_RBPOFFSET - UMThunkStubAMD64_HOST_NOTIFY_FLAG_OFFSET, update UMTHUNKSTUB_HOST_NOTIFY_FLAG_RBPOFFSET + + +; +; [ callee scratch ] <-- new RSP +; [ callee scratch ] +; [ callee scratch ] +; [ callee scratch ] +; {optional stack args passed to callee} +; xmm0 <-- RBP +; xmm1 +; xmm2 +; xmm3 +; {optional padding to align xmm regs} +; HOST_NOTIFY_FLAG (needs to make ReverseLeaveRuntime call flag) +; [rare path spill area] +; [rare path spill area] +; rbp save +; r12 save +; return address <-- entry RSP +; [rcx home] +; [rdx home] +; [r8 home] +; [r9 home] +; stack arg 0 +; stack arg 1 +; ... + + push_nonvol_reg r12 + push_nonvol_reg rbp ; stack_args + alloc_stack UMThunkStubAMD64_FIXED_STACK_ALLOC_SIZE + set_frame rbp, UMThunkStubAMD64_FRAME_OFFSET ; stack_args + mov byte ptr [rbp + UMThunkStubAMD64_HOST_NOTIFY_FLAG_OFFSET], 0 ; hosted + END_PROLOGUE + + ; + ; Call GetThread() + ; + INLINE_GETTHREAD r12 ; will not trash r10 + test r12, r12 + jz DoThreadSetup + +HaveThread: + + ;FailFast if a native callable method invoked via ldftn and calli. + cmp dword ptr [r12 + OFFSETOF__Thread__m_fPreemptiveGCDisabled], 1 + jz InvalidTransition + + ; + ; disable preemptive GC + ; + mov dword ptr [r12 + OFFSETOF__Thread__m_fPreemptiveGCDisabled], 1 + + ; + ; catch returning thread here if a GC is in progress + ; + cmp [g_TrapReturningThreads], 0 + jnz DoTrapReturningThreadsTHROW + +InCooperativeMode: + + mov r11, [METHODDESC_REGISTER + OFFSETOF__UMEntryThunk__m_pUMThunkMarshInfo] + mov eax, [r11 + OFFSETOF__UMThunkMarshInfo__m_cbActualArgSize] ; stack_args + test rax, rax ; stack_args + jnz CopyStackArgs ; stack_args + +ArgumentsSetup: + + mov rax, [r11 + OFFSETOF__UMThunkMarshInfo__m_pILStub] ; rax <- Stub* + call rax + +PostCall: + ; + ; enable preemptive GC + ; + mov dword ptr [r12 + OFFSETOF__Thread__m_fPreemptiveGCDisabled], 0 + + ; epilog + lea rsp, [rbp - UMThunkStubAMD64_FRAME_OFFSET + UMThunkStubAMD64_FIXED_STACK_ALLOC_SIZE] + pop rbp ; stack_args + pop r12 + ret + + +DoThreadSetup: + mov [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 0h], rcx + mov [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 8h], rdx + mov [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 10h], r8 + mov [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 18h], r9 + + ; @CONSIDER: mark UMEntryThunks that have FP params and only save/restore xmm regs on those calls + ; initial measurements indidcate that this could be worth about a 5% savings in reverse + ; pinvoke overhead. + movdqa xmmword ptr[rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 0h], xmm0 + movdqa xmmword ptr[rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 10h], xmm1 + movdqa xmmword ptr[rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 20h], xmm2 + movdqa xmmword ptr[rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 30h], xmm3 + + mov [rbp + UMThunkStubAMD64_RARE_PATH_SPILL_OFFSET], METHODDESC_REGISTER + call CreateThreadBlockThrow + mov METHODDESC_REGISTER, [rbp + UMThunkStubAMD64_RARE_PATH_SPILL_OFFSET] + + mov rcx, [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 0h] + mov rdx, [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 8h] + mov r8, [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 10h] + mov r9, [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 18h] + + ; @CONSIDER: mark UMEntryThunks that have FP params and only save/restore xmm regs on those calls + movdqa xmm0, xmmword ptr [rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 0h] + movdqa xmm1, xmmword ptr [rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 10h] + movdqa xmm2, xmmword ptr [rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 20h] + movdqa xmm3, xmmword ptr [rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 30h] + + mov r12, rax + + jmp HaveThread + +InvalidTransition: + ; ReversePInvokeBadTransition will failfast + call ReversePInvokeBadTransition + +DoTrapReturningThreadsTHROW: + + mov [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 0h], rcx + mov [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 8h], rdx + mov [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 10h], r8 + mov [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 18h], r9 + + ; @CONSIDER: mark UMEntryThunks that have FP params and only save/restore xmm regs on those calls + ; initial measurements indidcate that this could be worth about a 5% savings in reverse + ; pinvoke overhead. + movdqa xmmword ptr [rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 0h], xmm0 + movdqa xmmword ptr [rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 10h], xmm1 + movdqa xmmword ptr [rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 20h], xmm2 + movdqa xmmword ptr [rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 30h], xmm3 + + mov [rbp + UMThunkStubAMD64_RARE_PATH_SPILL_OFFSET], METHODDESC_REGISTER + mov rcx, r12 ; Thread* pThread + mov rdx, METHODDESC_REGISTER ; UMEntryThunk* pUMEntry + call UMThunkStubRareDisableWorker + mov METHODDESC_REGISTER, [rbp + UMThunkStubAMD64_RARE_PATH_SPILL_OFFSET] + + mov rcx, [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 0h] + mov rdx, [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 8h] + mov r8, [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 10h] + mov r9, [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 18h] + + ; @CONSIDER: mark UMEntryThunks that have FP params and only save/restore xmm regs on those calls + movdqa xmm0, xmmword ptr [rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 0h] + movdqa xmm1, xmmword ptr [rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 10h] + movdqa xmm2, xmmword ptr [rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 20h] + movdqa xmm3, xmmword ptr [rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 30h] + + jmp InCooperativeMode + +CopyStackArgs: + ; rax = cbStackArgs (with 20h for register args subtracted out already) + + sub rsp, rax + and rsp, -16 + + mov [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 0h], rcx + mov [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 8h], rdx + mov [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 10h], r8 + + ; rax = number of bytes + + lea rcx, [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + SIZEOF_MAX_OUTGOING_ARGUMENT_HOMES] + lea rdx, [rsp + UMThunkStubAMD64_CALLEE_SCRATCH_SIZE] + +CopyLoop: + ; rax = number of bytes + ; rcx = src + ; rdx = dest + ; r8 = sratch + + add rax, -8 + mov r8, [rcx + rax] + mov [rdx + rax], r8 + jnz CopyLoop + + mov rcx, [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 0h] + mov rdx, [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 8h] + mov r8, [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 10h] + + jmp ArgumentsSetup + +NESTED_END UMThunkStub, _TEXT + end + diff --git a/src/coreclr/src/vm/amd64/asmconstants.h b/src/coreclr/src/vm/amd64/asmconstants.h index 15c5663e79aec..6c361f9bcf056 100644 --- a/src/coreclr/src/vm/amd64/asmconstants.h +++ b/src/coreclr/src/vm/amd64/asmconstants.h @@ -98,6 +98,21 @@ ASMCONSTANTS_C_ASSERT(SIZEOF__ComPrestubMethodFrame #define SIZEOF__ComMethodFrame 0x20 ASMCONSTANTS_C_ASSERT(SIZEOF__ComMethodFrame == sizeof(ComMethodFrame)); +#endif // FEATURE_COMINTEROP + +#define OFFSETOF__UMEntryThunk__m_pUMThunkMarshInfo 0x18 +ASMCONSTANTS_C_ASSERT(OFFSETOF__UMEntryThunk__m_pUMThunkMarshInfo + == offsetof(UMEntryThunk, m_pUMThunkMarshInfo)); + +#define OFFSETOF__UMThunkMarshInfo__m_pILStub 0x00 +ASMCONSTANTS_C_ASSERT(OFFSETOF__UMThunkMarshInfo__m_pILStub + == offsetof(UMThunkMarshInfo, m_pILStub)); + +#define OFFSETOF__UMThunkMarshInfo__m_cbActualArgSize 0x08 +ASMCONSTANTS_C_ASSERT(OFFSETOF__UMThunkMarshInfo__m_cbActualArgSize + == offsetof(UMThunkMarshInfo, m_cbActualArgSize)); + +#ifdef FEATURE_COMINTEROP #define OFFSETOF__ComPlusCallMethodDesc__m_pComPlusCallInfo DBG_FRE(0x30, 0x08) ASMCONSTANTS_C_ASSERT(OFFSETOF__ComPlusCallMethodDesc__m_pComPlusCallInfo @@ -482,6 +497,8 @@ ASMCONSTANTS_C_ASSERT(OFFSET__TEB__ThreadLocalStoragePointer == offsetof(TEB, Th #define THROWSTUB_ESTABLISHER_OFFSET_FaultingExceptionFrame 0x30 +#define UMTHUNKSTUB_HOST_NOTIFY_FLAG_RBPOFFSET (0x40) // xmm save size + #define Thread__ObjectRefFlush ?ObjectRefFlush@Thread@@SAXPEAV1@@Z diff --git a/src/coreclr/src/vm/amd64/umthunkstub.S b/src/coreclr/src/vm/amd64/umthunkstub.S index a3bbb7f432a8e..4c2b0a32a2f00 100644 --- a/src/coreclr/src/vm/amd64/umthunkstub.S +++ b/src/coreclr/src/vm/amd64/umthunkstub.S @@ -27,3 +27,157 @@ NESTED_ENTRY TheUMEntryPrestub, _TEXT, UnhandledExceptionHandlerUnix TAILJMP_RAX NESTED_END TheUMEntryPrestub, _TEXT + +// +// METHODDESC_REGISTER: UMEntryThunk* +// +NESTED_ENTRY UMThunkStub, _TEXT, UnhandledExceptionHandlerUnix +#define UMThunkStubAMD64_FIXED_STACK_ALLOC_SIZE (SIZEOF_MAX_INT_ARG_SPILL + SIZEOF_MAX_FP_ARG_SPILL + 0x8) +#define UMThunkStubAMD64_XMM_SAVE_OFFSET 0x0 +#define UMThunkStubAMD64_INT_ARG_OFFSET (SIZEOF_MAX_FP_ARG_SPILL + 0x8) +#define UMThunkStubAMD64_METHODDESC_OFFSET SIZEOF_MAX_FP_ARG_SPILL +#define UMThunkStubAMD64_RBP_OFFSET (UMThunkStubAMD64_FIXED_STACK_ALLOC_SIZE + 8) + +// {optional stack args passed to callee} <-- new RSP +// xmm0 <-- RBP +// xmm1 +// xmm2 +// xmm3 +// xmm4 +// xmm5 +// xmm6 +// xmm7 +// METHODDESC_REGISTER +// rdi +// rsi +// rcx +// rdx +// r8 +// r9 +// r12 +// rbp +// return address <-- entry RSP + push_nonvol_reg rbp + mov rbp, rsp + push_nonvol_reg r12 // stack_args + alloc_stack UMThunkStubAMD64_FIXED_STACK_ALLOC_SIZE + save_reg_postrsp rdi, (UMThunkStubAMD64_INT_ARG_OFFSET) + save_reg_postrsp rsi, (UMThunkStubAMD64_INT_ARG_OFFSET + 0x08) + save_reg_postrsp rdx, (UMThunkStubAMD64_INT_ARG_OFFSET + 0x10) + save_reg_postrsp rcx, (UMThunkStubAMD64_INT_ARG_OFFSET + 0x18) + save_reg_postrsp r8, (UMThunkStubAMD64_INT_ARG_OFFSET + 0x20) + save_reg_postrsp r9, (UMThunkStubAMD64_INT_ARG_OFFSET + 0x28) + save_reg_postrsp METHODDESC_REGISTER, UMThunkStubAMD64_METHODDESC_OFFSET + SAVE_FLOAT_ARGUMENT_REGISTERS UMThunkStubAMD64_XMM_SAVE_OFFSET + set_cfa_register rbp, (2*8) + END_PROLOGUE + + // + // Call GetThread() + // + call C_FUNC(GetThread) + test rax, rax + jz LOCAL_LABEL(DoThreadSetup) + +LOCAL_LABEL(HaveThread): + + mov r12, rax // r12 <- Thread* + + //FailFast if a native callable method is invoked via ldftn and calli. + cmp dword ptr [r12 + OFFSETOF__Thread__m_fPreemptiveGCDisabled], 1 + jz LOCAL_LABEL(InvalidTransition) + + // + // disable preemptive GC + // + mov dword ptr [r12 + OFFSETOF__Thread__m_fPreemptiveGCDisabled], 1 + + // + // catch returning thread here if a GC is in progress + // + PREPARE_EXTERNAL_VAR g_TrapReturningThreads, rax + cmp dword ptr [rax], 0 + jnz LOCAL_LABEL(DoTrapReturningThreadsTHROW) + +LOCAL_LABEL(InCooperativeMode): + + mov METHODDESC_REGISTER, [rbp - UMThunkStubAMD64_RBP_OFFSET + UMThunkStubAMD64_METHODDESC_OFFSET] + + mov r11, [METHODDESC_REGISTER + OFFSETOF__UMEntryThunk__m_pUMThunkMarshInfo] + mov eax, [r11 + OFFSETOF__UMThunkMarshInfo__m_cbActualArgSize] // stack_args + test rax, rax // stack_args + jnz LOCAL_LABEL(UMThunkStub_CopyStackArgs) // stack_args + +LOCAL_LABEL(UMThunkStub_ArgumentsSetup): + mov rdi, [rbp - UMThunkStubAMD64_RBP_OFFSET + UMThunkStubAMD64_INT_ARG_OFFSET] + mov rsi, [rbp - UMThunkStubAMD64_RBP_OFFSET + UMThunkStubAMD64_INT_ARG_OFFSET + 0x08] + mov rdx, [rbp - UMThunkStubAMD64_RBP_OFFSET + UMThunkStubAMD64_INT_ARG_OFFSET + 0x10] + mov rcx, [rbp - UMThunkStubAMD64_RBP_OFFSET + UMThunkStubAMD64_INT_ARG_OFFSET + 0x18] + mov r8, [rbp - UMThunkStubAMD64_RBP_OFFSET + UMThunkStubAMD64_INT_ARG_OFFSET + 0x20] + mov r9, [rbp - UMThunkStubAMD64_RBP_OFFSET + UMThunkStubAMD64_INT_ARG_OFFSET + 0x28] + movdqa xmm0, xmmword ptr [rbp - UMThunkStubAMD64_RBP_OFFSET + UMThunkStubAMD64_XMM_SAVE_OFFSET] + movdqa xmm1, xmmword ptr [rbp - UMThunkStubAMD64_RBP_OFFSET + UMThunkStubAMD64_XMM_SAVE_OFFSET + 0x10] + movdqa xmm2, xmmword ptr [rbp - UMThunkStubAMD64_RBP_OFFSET + UMThunkStubAMD64_XMM_SAVE_OFFSET + 0x20] + movdqa xmm3, xmmword ptr [rbp - UMThunkStubAMD64_RBP_OFFSET + UMThunkStubAMD64_XMM_SAVE_OFFSET + 0x30] + movdqa xmm4, xmmword ptr [rbp - UMThunkStubAMD64_RBP_OFFSET + UMThunkStubAMD64_XMM_SAVE_OFFSET + 0x40] + movdqa xmm5, xmmword ptr [rbp - UMThunkStubAMD64_RBP_OFFSET + UMThunkStubAMD64_XMM_SAVE_OFFSET + 0x50] + movdqa xmm6, xmmword ptr [rbp - UMThunkStubAMD64_RBP_OFFSET + UMThunkStubAMD64_XMM_SAVE_OFFSET + 0x60] + movdqa xmm7, xmmword ptr [rbp - UMThunkStubAMD64_RBP_OFFSET + UMThunkStubAMD64_XMM_SAVE_OFFSET + 0x70] + + mov rax, [r11 + OFFSETOF__UMThunkMarshInfo__m_pILStub] // rax <- Stub* + call rax + +LOCAL_LABEL(PostCall): + // + // enable preemptive GC + // + mov dword ptr [r12 + OFFSETOF__Thread__m_fPreemptiveGCDisabled], 0 + + // epilog + lea rsp, [rbp - 8] // deallocate arguments + set_cfa_register rsp, (3*8) + pop_nonvol_reg r12 + pop_nonvol_reg rbp + ret + + +LOCAL_LABEL(DoThreadSetup): + call C_FUNC(CreateThreadBlockThrow) + jmp LOCAL_LABEL(HaveThread) + +LOCAL_LABEL(InvalidTransition): + //No arguments to setup , ReversePInvokeBadTransition will failfast + call C_FUNC(ReversePInvokeBadTransition) + +LOCAL_LABEL(DoTrapReturningThreadsTHROW): + mov rdi, r12 // Thread* pThread + mov rsi, [rbp - UMThunkStubAMD64_RBP_OFFSET + UMThunkStubAMD64_METHODDESC_OFFSET] // UMEntryThunk* pUMEntry + call C_FUNC(UMThunkStubRareDisableWorker) + + jmp LOCAL_LABEL(InCooperativeMode) + +LOCAL_LABEL(UMThunkStub_CopyStackArgs): + // rax = cbStackArgs + + sub rsp, rax + and rsp, -16 + + // rax = number of bytes + + lea rdi, [rbp + 0x10] // rbp + ra + lea rsi, [rsp] + +LOCAL_LABEL(CopyLoop): + // rax = number of bytes + // rdi = src + // rsi = dest + // rdx = sratch + + add rax, -8 + mov rdx, [rdi + rax] + mov [rsi + rax], rdx + jnz LOCAL_LABEL(CopyLoop) + + jmp LOCAL_LABEL(UMThunkStub_ArgumentsSetup) + +NESTED_END UMThunkStub, _TEXT diff --git a/src/coreclr/src/vm/arm/asmconstants.h b/src/coreclr/src/vm/arm/asmconstants.h index 58bbb8807098c..f6d782d69811d 100644 --- a/src/coreclr/src/vm/arm/asmconstants.h +++ b/src/coreclr/src/vm/arm/asmconstants.h @@ -124,6 +124,15 @@ ASMCONSTANTS_C_ASSERT(SIZEOF__FloatArgumentRegisters == sizeof(FloatArgumentRegi #define ASM_ENREGISTERED_RETURNTYPE_MAXSIZE 0x20 ASMCONSTANTS_C_ASSERT(ASM_ENREGISTERED_RETURNTYPE_MAXSIZE == ENREGISTERED_RETURNTYPE_MAXSIZE) +#define UMEntryThunk__m_pUMThunkMarshInfo 0x0C +ASMCONSTANTS_C_ASSERT(UMEntryThunk__m_pUMThunkMarshInfo == offsetof(UMEntryThunk, m_pUMThunkMarshInfo)) + +#define UMThunkMarshInfo__m_pILStub 0x00 +ASMCONSTANTS_C_ASSERT(UMThunkMarshInfo__m_pILStub == offsetof(UMThunkMarshInfo, m_pILStub)) + +#define UMThunkMarshInfo__m_cbActualArgSize 0x04 +ASMCONSTANTS_C_ASSERT(UMThunkMarshInfo__m_cbActualArgSize == offsetof(UMThunkMarshInfo, m_cbActualArgSize)) + #define MethodDesc__m_wFlags DBG_FRE(0x1A, 0x06) ASMCONSTANTS_C_ASSERT(MethodDesc__m_wFlags == offsetof(MethodDesc, m_wFlags)) diff --git a/src/coreclr/src/vm/arm/asmhelpers.S b/src/coreclr/src/vm/arm/asmhelpers.S index 0bb2dbee981fc..3d7c96edc2754 100644 --- a/src/coreclr/src/vm/arm/asmhelpers.S +++ b/src/coreclr/src/vm/arm/asmhelpers.S @@ -306,6 +306,96 @@ LOCAL_LABEL(LNullThis): NESTED_END TheUMEntryPrestub,_TEXT +// +// r12 = UMEntryThunk* +// + NESTED_ENTRY UMThunkStub,_TEXT,UnhandledExceptionHandlerUnix + PROLOG_PUSH "{r4,r5,r7,r11,lr}" + PROLOG_STACK_SAVE_OFFSET r7, #8 + + alloc_stack 4 * 5 + stm sp, {r0-r3,r12} + + //GBLA UMThunkStub_HiddenArgOffest // offset of saved UMEntryThunk * + //GBLA UMThunkStub_StackArgsOffest // offset of original stack args + //GBLA UMThunkStub_StackArgsSize // total size of UMThunkStub frame +UMThunkStub_HiddenArgOffset = (-3)*4 +UMThunkStub_StackArgsOffset = 3*4 +UMThunkStub_StackArgsSize = 10*4 + + CHECK_STACK_ALIGNMENT + + bl C_FUNC(GetThread) + cbz r0, LOCAL_LABEL(UMThunkStub_DoThreadSetup) + +LOCAL_LABEL(UMThunkStub_HaveThread): + mov r5, r0 // r5 = Thread * + + ldr r2, =g_TrapReturningThreads + + mov r4, 1 + str r4, [r5, #Thread__m_fPreemptiveGCDisabled] + + ldr r3, [r2] + cbnz r3, LOCAL_LABEL(UMThunkStub_DoTrapReturningThreads) + +LOCAL_LABEL(UMThunkStub_InCooperativeMode): + ldr r12, [r7, #UMThunkStub_HiddenArgOffset] + ldr r3, [r12, #UMEntryThunk__m_pUMThunkMarshInfo] + ldr r2, [r3, #UMThunkMarshInfo__m_cbActualArgSize] + cbz r2, LOCAL_LABEL(UMThunkStub_ArgumentsSetup) + + add r0, r7, #UMThunkStub_StackArgsOffset // Source pointer + add r0, r0, r2 + lsr r1, r2, #2 // Count of stack slots to copy + + and r2, r2, #4 // Align the stack + sub sp, sp, r2 + +LOCAL_LABEL(UMThunkStub_StackLoop): + ldr r2, [r0,#-4]! + str r2, [sp,#-4]! + subs r1, r1, #1 + bne LOCAL_LABEL(UMThunkStub_StackLoop) + +LOCAL_LABEL(UMThunkStub_ArgumentsSetup): + ldr r4, [r3, #UMThunkMarshInfo__m_pILStub] + + // reload argument registers + sub r0, r7, #28 + ldm r0, {r0-r3} + + CHECK_STACK_ALIGNMENT + + blx r4 + +LOCAL_LABEL(UMThunkStub_PostCall): + mov r4, 0 + str r4, [r5, #Thread__m_fPreemptiveGCDisabled] + + EPILOG_STACK_RESTORE_OFFSET r7, #8 + EPILOG_POP "{r4,r5,r7,r11,pc}" + +LOCAL_LABEL(UMThunkStub_DoThreadSetup): + sub sp, #SIZEOF__FloatArgumentRegisters + vstm sp, {d0-d7} + bl C_FUNC(CreateThreadBlockThrow) + vldm sp, {d0-d7} + add sp, #SIZEOF__FloatArgumentRegisters + b LOCAL_LABEL(UMThunkStub_HaveThread) + +LOCAL_LABEL(UMThunkStub_DoTrapReturningThreads): + sub sp, #SIZEOF__FloatArgumentRegisters + vstm sp, {d0-d7} + mov r0, r5 // Thread* pThread + ldr r1, [r7, #UMThunkStub_HiddenArgOffset] // UMEntryThunk* pUMEntry + bl C_FUNC(UMThunkStubRareDisableWorker) + vldm sp, {d0-d7} + add sp, #SIZEOF__FloatArgumentRegisters + b LOCAL_LABEL(UMThunkStub_InCooperativeMode) + + NESTED_END UMThunkStub,_TEXT + // ------------------------------------------------------------------ NESTED_ENTRY ThePreStub, _TEXT, NoHandler diff --git a/src/coreclr/src/vm/arm/asmhelpers.asm b/src/coreclr/src/vm/arm/asmhelpers.asm index a76f1103c13ac..aad5395f97694 100644 --- a/src/coreclr/src/vm/arm/asmhelpers.asm +++ b/src/coreclr/src/vm/arm/asmhelpers.asm @@ -16,6 +16,8 @@ IMPORT JIT_InternalThrow IMPORT JIT_WriteBarrier IMPORT TheUMEntryPrestubWorker + IMPORT CreateThreadBlockThrow + IMPORT UMThunkStubRareDisableWorker IMPORT PreStubWorker IMPORT PreStubGetMethodDescForCompactEntryPoint IMPORT NDirectImportWorker @@ -38,6 +40,7 @@ #endif IMPORT CallDescrWorkerUnwindFrameChainHandler IMPORT UMEntryPrestubUnwindFrameChainHandler + IMPORT UMThunkStubUnwindFrameChainHandler #ifdef FEATURE_COMINTEROP IMPORT ReverseComUnwindFrameChainHandler #endif @@ -364,6 +367,96 @@ LNullThis NESTED_END +; +; r12 = UMEntryThunk* +; + NESTED_ENTRY UMThunkStub,,UMThunkStubUnwindFrameChainHandler + PROLOG_PUSH {r4,r5,r7,r11,lr} + PROLOG_PUSH {r0-r3,r12} + PROLOG_STACK_SAVE r7 + + GBLA UMThunkStub_HiddenArg ; offset of saved UMEntryThunk * + GBLA UMThunkStub_StackArgs ; offset of original stack args (total size of UMThunkStub frame) +UMThunkStub_HiddenArg SETA 4*4 +UMThunkStub_StackArgs SETA 10*4 + + CHECK_STACK_ALIGNMENT + + ; r0 = GetThread(). Trashes r5 + INLINE_GETTHREAD r0, r5 + cbz r0, UMThunkStub_DoThreadSetup + +UMThunkStub_HaveThread + mov r5, r0 ; r5 = Thread * + + ldr r2, =g_TrapReturningThreads + + mov r4, 1 + str r4, [r5, #Thread__m_fPreemptiveGCDisabled] + + ldr r3, [r2] + cbnz r3, UMThunkStub_DoTrapReturningThreads + +UMThunkStub_InCooperativeMode + ldr r12, [r7, #UMThunkStub_HiddenArg] + + ldr r3, [r12, #UMEntryThunk__m_pUMThunkMarshInfo] + ldr r2, [r3, #UMThunkMarshInfo__m_cbActualArgSize] + cbz r2, UMThunkStub_ArgumentsSetup + + add r0, r7, #UMThunkStub_StackArgs ; Source pointer + add r0, r0, r2 + lsr r1, r2, #2 ; Count of stack slots to copy + + and r2, r2, #4 ; Align the stack + sub sp, sp, r2 + +UMThunkStub_StackLoop + ldr r2, [r0,#-4]! + str r2, [sp,#-4]! + subs r1, r1, #1 + bne UMThunkStub_StackLoop + +UMThunkStub_ArgumentsSetup + ldr r4, [r3, #UMThunkMarshInfo__m_pILStub] + + ; reload argument registers + ldm r7, {r0-r3} + + CHECK_STACK_ALIGNMENT + + blx r4 + +UMThunkStub_PostCall + mov r4, 0 + str r4, [r5, #Thread__m_fPreemptiveGCDisabled] + + EPILOG_STACK_RESTORE r7 + EPILOG_STACK_FREE 4 * 5 + EPILOG_POP {r4,r5,r7,r11,pc} + +UMThunkStub_DoThreadSetup + sub sp, #SIZEOF__FloatArgumentRegisters + vstm sp, {d0-d7} + bl CreateThreadBlockThrow + vldm sp, {d0-d7} + add sp, #SIZEOF__FloatArgumentRegisters + b UMThunkStub_HaveThread + +UMThunkStub_DoTrapReturningThreads + sub sp, #SIZEOF__FloatArgumentRegisters + vstm sp, {d0-d7} + mov r0, r5 ; Thread* pThread + ldr r1, [r7, #UMThunkStub_HiddenArg] ; UMEntryThunk* pUMEntry + bl UMThunkStubRareDisableWorker + vldm sp, {d0-d7} + add sp, #SIZEOF__FloatArgumentRegisters + b UMThunkStub_InCooperativeMode + + NESTED_END + + INLINE_GETTHREAD_CONSTANT_POOL + ; ------------------------------------------------------------------ NESTED_ENTRY ThePreStub diff --git a/src/coreclr/src/vm/arm64/asmconstants.h b/src/coreclr/src/vm/arm64/asmconstants.h index bb65454a7fd2d..544d09cb5d2cd 100644 --- a/src/coreclr/src/vm/arm64/asmconstants.h +++ b/src/coreclr/src/vm/arm64/asmconstants.h @@ -157,6 +157,15 @@ ASMCONSTANTS_C_ASSERT(UnmanagedToManagedFrame__m_pvDatum == offsetof(UnmanagedTo #endif // FEATURE_COMINTEROP +#define UMEntryThunk__m_pUMThunkMarshInfo 0x18 +ASMCONSTANTS_C_ASSERT(UMEntryThunk__m_pUMThunkMarshInfo == offsetof(UMEntryThunk, m_pUMThunkMarshInfo)) + +#define UMThunkMarshInfo__m_pILStub 0x00 +ASMCONSTANTS_C_ASSERT(UMThunkMarshInfo__m_pILStub == offsetof(UMThunkMarshInfo, m_pILStub)) + +#define UMThunkMarshInfo__m_cbActualArgSize 0x08 +ASMCONSTANTS_C_ASSERT(UMThunkMarshInfo__m_cbActualArgSize == offsetof(UMThunkMarshInfo, m_cbActualArgSize)) + #define REDIRECTSTUB_SP_OFFSET_CONTEXT 0 #define CONTEXT_Pc 0x108 diff --git a/src/coreclr/src/vm/arm64/asmhelpers.S b/src/coreclr/src/vm/arm64/asmhelpers.S index 48a82f1e9765c..4706319b30a62 100644 --- a/src/coreclr/src/vm/arm64/asmhelpers.S +++ b/src/coreclr/src/vm/arm64/asmhelpers.S @@ -745,6 +745,114 @@ NESTED_ENTRY TheUMEntryPrestub, _TEXT, UnhandledExceptionHandlerUnix NESTED_END TheUMEntryPrestub, _TEXT +// +// x12 = UMEntryThunk* +// +NESTED_ENTRY UMThunkStub, _TEXT, UnhandledExceptionHandlerUnix + + // Save arguments and return address + PROLOG_SAVE_REG_PAIR_INDEXED fp, lr, -112 // 72 for regArgs, 8 for x19 & 8 for x12 + // save callee saved reg x19. x19 is used in the method to store thread* + PROLOG_SAVE_REG x19, 96 + + SAVE_ARGUMENT_REGISTERS sp, 16 + +#define UMThunkStub_HiddenArg 88 // offset of saved UMEntryThunk * +#define UMThunkStub_StackArgs 112 // offset of original stack args (total size of UMThunkStub frame) + + // save UMEntryThunk* + str x12, [sp, #UMThunkStub_HiddenArg] + + // assuming GetThread does not clobber FP Args + bl C_FUNC(GetThread) + cbz x0, LOCAL_LABEL(UMThunkStub_DoThreadSetup) + +LOCAL_LABEL(UMThunkStub_HaveThread): + mov x19, x0 // x19 = Thread * + + mov x9, 1 + // m_fPreemptiveGCDisabled is 4 byte field so using 32-bit variant + str w9, [x19, #Thread__m_fPreemptiveGCDisabled] + + PREPARE_EXTERNAL_VAR g_TrapReturningThreads, x2 + ldr x3, [x2] + // assuming x0 contains Thread* before jumping to UMThunkStub_DoTrapReturningThreads + cbnz x3, LOCAL_LABEL(UMThunkStub_DoTrapReturningThreads) + +LOCAL_LABEL(UMThunkStub_InCooperativeMode): + ldr x12, [fp, #UMThunkStub_HiddenArg] // x12 = UMEntryThunk* + ldr x3, [x12, #UMEntryThunk__m_pUMThunkMarshInfo] // x3 = m_pUMThunkMarshInfo + + // m_cbActualArgSize is UINT32 and hence occupies 4 bytes + ldr w2, [x3, #UMThunkMarshInfo__m_cbActualArgSize] // w2 = Stack arg bytes + cbz w2, LOCAL_LABEL(UMThunkStub_RegArgumentsSetup) + + // extend to 64-bits + uxtw x2, w2 + + // Source pointer + add x0, fp, #UMThunkStub_StackArgs + + // move source pointer to end of Stack Args + add x0, x0, x2 + + // Count of stack slot pairs to copy (divide by 16) + lsr x1, x2, #4 + + // Is there an extra stack slot (can happen when stack arg bytes not multiple of 16) + and x2, x2, #8 + + // If yes then start source pointer from 16 byte aligned stack slot + add x0, x0, x2 + + // increment stack slot pair count by 1 if x2 is not zero + add x1, x1, x2, LSR #3 + +LOCAL_LABEL(UMThunkStub_StackLoop): + ldp x4, x5, [x0, #-16]! // pre-Index + stp x4, x5, [sp, #-16]! // pre-Index + subs x1, x1, #1 + bne LOCAL_LABEL(UMThunkStub_StackLoop) + +LOCAL_LABEL(UMThunkStub_RegArgumentsSetup): + ldr x16, [x3, #UMThunkMarshInfo__m_pILStub] + + RESTORE_ARGUMENT_REGISTERS fp, 16 + + blr x16 + +LOCAL_LABEL(UMThunkStub_PostCall): + mov x4, 0 + // m_fPreemptiveGCDisabled is 4 byte field so using 32-bit variant + str w4, [x19, #Thread__m_fPreemptiveGCDisabled] + + EPILOG_STACK_RESTORE + EPILOG_RESTORE_REG x19, 96 + EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, 112 + + EPILOG_RETURN + +LOCAL_LABEL(UMThunkStub_DoThreadSetup): + sub sp, sp, #SIZEOF__FloatArgumentRegisters + SAVE_FLOAT_ARGUMENT_REGISTERS sp, 0 + bl C_FUNC(CreateThreadBlockThrow) + RESTORE_FLOAT_ARGUMENT_REGISTERS sp, 0 + add sp, sp, #SIZEOF__FloatArgumentRegisters + b LOCAL_LABEL(UMThunkStub_HaveThread) + +LOCAL_LABEL(UMThunkStub_DoTrapReturningThreads): + sub sp, sp, #SIZEOF__FloatArgumentRegisters + SAVE_FLOAT_ARGUMENT_REGISTERS sp, 0 + // x0 already contains Thread* pThread + // UMEntryThunk* pUMEntry + ldr x1, [fp, #UMThunkStub_HiddenArg] + bl C_FUNC(UMThunkStubRareDisableWorker) + RESTORE_FLOAT_ARGUMENT_REGISTERS sp, 0 + add sp, sp, #SIZEOF__FloatArgumentRegisters + b LOCAL_LABEL(UMThunkStub_InCooperativeMode) + +NESTED_END UMThunkStub, _TEXT + #ifdef FEATURE_HIJACK // ------------------------------------------------------------------ // Hijack function for functions which return a scalar type or a struct (value type) diff --git a/src/coreclr/src/vm/arm64/asmhelpers.asm b/src/coreclr/src/vm/arm64/asmhelpers.asm index 2e58e43fec73b..11e7d0fef0643 100644 --- a/src/coreclr/src/vm/arm64/asmhelpers.asm +++ b/src/coreclr/src/vm/arm64/asmhelpers.asm @@ -24,7 +24,10 @@ IMPORT COMToCLRWorker IMPORT CallDescrWorkerUnwindFrameChainHandler IMPORT UMEntryPrestubUnwindFrameChainHandler + IMPORT UMThunkStubUnwindFrameChainHandler IMPORT TheUMEntryPrestubWorker + IMPORT CreateThreadBlockThrow + IMPORT UMThunkStubRareDisableWorker IMPORT GetCurrentSavedRedirectContext IMPORT LinkFrameAndThrow IMPORT FixContextHandler @@ -950,6 +953,118 @@ COMToCLRDispatchHelper_RegSetup NESTED_END +; +; x12 = UMEntryThunk* +; + NESTED_ENTRY UMThunkStub,,UMThunkStubUnwindFrameChainHandler + + ; Save arguments and return address + PROLOG_SAVE_REG_PAIR fp, lr, #-112! ; 72 for regArgs, 8 for x19 & 8 for x12 & 8 for 16-byte align + ; save callee saved reg x19. x19 is used in the method to store thread* + PROLOG_SAVE_REG x19, #96 + + SAVE_ARGUMENT_REGISTERS sp, 16 + + GBLA UMThunkStub_HiddenArg ; offset of saved UMEntryThunk * + GBLA UMThunkStub_StackArgs ; offset of original stack args (total size of UMThunkStub frame) +UMThunkStub_HiddenArg SETA 88 +UMThunkStub_StackArgs SETA 112 + + ; save UMEntryThunk* + str x12, [sp, #UMThunkStub_HiddenArg] + + ; x0 = GetThread(). Trashes x19 + INLINE_GETTHREAD x0, x19 + cbz x0, UMThunkStub_DoThreadSetup + +UMThunkStub_HaveThread + mov x19, x0 ; x19 = Thread * + + mov x9, 1 + ; m_fPreemptiveGCDisabled is 4 byte field so using 32-bit variant + str w9, [x19, #Thread__m_fPreemptiveGCDisabled] + + ldr x2, =g_TrapReturningThreads + ldr x3, [x2] + ; assuming x0 contains Thread* before jumping to UMThunkStub_DoTrapReturningThreads + cbnz x3, UMThunkStub_DoTrapReturningThreads + +UMThunkStub_InCooperativeMode + ldr x12, [fp, #UMThunkStub_HiddenArg] ; x12 = UMEntryThunk* + ldr x3, [x12, #UMEntryThunk__m_pUMThunkMarshInfo] ; x3 = m_pUMThunkMarshInfo + + ; m_cbActualArgSize is UINT32 and hence occupies 4 bytes + ldr w2, [x3, #UMThunkMarshInfo__m_cbActualArgSize] ; w2 = Stack arg bytes + cbz w2, UMThunkStub_RegArgumentsSetup + + ; extend to 64-bits + uxtw x2, w2 + + ; Source pointer + add x0, fp, #UMThunkStub_StackArgs + + ; move source pointer to end of Stack Args + add x0, x0, x2 + + ; Count of stack slot pairs to copy (divide by 16) + lsr x1, x2, #4 + + ; Is there an extra stack slot (can happen when stack arg bytes not multiple of 16) + and x2, x2, #8 + + ; If yes then start source pointer from 16 byte aligned stack slot + add x0, x0, x2 + + ; increment stack slot pair count by 1 if x2 is not zero + add x1, x1, x2, LSR #3 + +UMThunkStub_StackLoop + ldp x4, x5, [x0, #-16]! ; pre-Index + stp x4, x5, [sp, #-16]! ; pre-Index + subs x1, x1, #1 + bne UMThunkStub_StackLoop + +UMThunkStub_RegArgumentsSetup + ldr x16, [x3, #UMThunkMarshInfo__m_pILStub] + + RESTORE_ARGUMENT_REGISTERS fp, 16 + + blr x16 + +UMThunkStub_PostCall + mov x4, 0 + ; m_fPreemptiveGCDisabled is 4 byte field so using 32-bit variant + str w4, [x19, #Thread__m_fPreemptiveGCDisabled] + + EPILOG_STACK_RESTORE + EPILOG_RESTORE_REG x19, #96 + EPILOG_RESTORE_REG_PAIR fp, lr, #112! + + EPILOG_RETURN + +UMThunkStub_DoThreadSetup + sub sp, sp, #SIZEOF__FloatArgumentRegisters + SAVE_FLOAT_ARGUMENT_REGISTERS sp, 0 + bl CreateThreadBlockThrow + RESTORE_FLOAT_ARGUMENT_REGISTERS sp, 0 + add sp, sp, #SIZEOF__FloatArgumentRegisters + b UMThunkStub_HaveThread + +UMThunkStub_DoTrapReturningThreads + sub sp, sp, #SIZEOF__FloatArgumentRegisters + SAVE_FLOAT_ARGUMENT_REGISTERS sp, 0 + ; x0 already contains Thread* pThread + ; UMEntryThunk* pUMEntry + ldr x1, [fp, #UMThunkStub_HiddenArg] + bl UMThunkStubRareDisableWorker + RESTORE_FLOAT_ARGUMENT_REGISTERS sp, 0 + add sp, sp, #SIZEOF__FloatArgumentRegisters + b UMThunkStub_InCooperativeMode + + NESTED_END + + INLINE_GETTHREAD_CONSTANT_POOL + #ifdef FEATURE_HIJACK ; ------------------------------------------------------------------ ; Hijack function for functions which return a scalar type or a struct (value type) diff --git a/src/coreclr/src/vm/dllimportcallback.cpp b/src/coreclr/src/vm/dllimportcallback.cpp index 659a3d9f789c8..4ea1de13b8a1b 100644 --- a/src/coreclr/src/vm/dllimportcallback.cpp +++ b/src/coreclr/src/vm/dllimportcallback.cpp @@ -105,20 +105,7 @@ class UMEntryThunkFreeList static UMEntryThunkFreeList s_thunkFreeList(DEFAULT_THUNK_FREE_LIST_THRESHOLD); -#ifdef TARGET_X86 - -#ifdef FEATURE_STUBS_AS_IL - -EXTERN_C void UMThunkStub(void); - -PCODE UMThunkMarshInfo::GetExecStubEntryPoint() -{ - LIMITED_METHOD_CONTRACT; - - return GetEEFuncEntryPoint(UMThunkStub); -} - -#else // FEATURE_STUBS_AS_IL +#if defined(TARGET_X86) && !defined(FEATURE_STUBS_AS_IL) EXTERN_C VOID __cdecl UMThunkStubRareDisable(); EXTERN_C Thread* __stdcall CreateThreadBlockThrow(); @@ -767,18 +754,16 @@ Stub *UMThunkMarshInfo::CompileNExportThunk(LoaderHeap *pLoaderHeap, PInvokeStat return pcpusl->Link(pLoaderHeap); } -#endif // FEATURE_STUBS_AS_IL - -#else // TARGET_X86 +#else // TARGET_X86 && !FEATURE_STUBS_AS_IL PCODE UMThunkMarshInfo::GetExecStubEntryPoint() { LIMITED_METHOD_CONTRACT; - return m_pILStub; + return GetEEFuncEntryPoint(UMThunkStub); } -#endif // TARGET_X86 +#endif // TARGET_X86 && !FEATURE_STUBS_AS_IL UMEntryThunkCache::UMEntryThunkCache(AppDomain *pDomain) : m_crst(CrstUMEntryThunkCache), @@ -1209,20 +1194,33 @@ VOID UMThunkMarshInfo::RunTimeInit() if (pFinalILStub == NULL) { - PInvokeStaticSigInfo sigInfo; - - if (pMD != NULL) - new (&sigInfo) PInvokeStaticSigInfo(pMD); + if (pMD != NULL && !pMD->IsEEImpl() && + !NDirect::MarshalingRequired(pMD, GetSignature().GetRawSig(), GetModule())) + { + // Call the method directly in no-delegate case if possible. This is important to avoid JITing + // for stubs created via code:ICLRRuntimeHost2::CreateDelegate during coreclr startup. + pFinalILStub = pMD->GetMultiCallableAddrOfCode(); + } else - new (&sigInfo) PInvokeStaticSigInfo(GetSignature(), GetModule()); + { + // For perf, it is important to avoid expensive initialization of + // PInvokeStaticSigInfo if we have NGened stub. + PInvokeStaticSigInfo sigInfo; - DWORD dwStubFlags = 0; + if (pMD != NULL) + new (&sigInfo) PInvokeStaticSigInfo(pMD); + else + new (&sigInfo) PInvokeStaticSigInfo(GetSignature(), GetModule()); + + DWORD dwStubFlags = 0; + + if (sigInfo.IsDelegateInterop()) + dwStubFlags |= NDIRECTSTUB_FL_DELEGATE; - if (sigInfo.IsDelegateInterop()) - dwStubFlags |= NDIRECTSTUB_FL_DELEGATE; + pStubMD = GetILStubMethodDesc(pMD, &sigInfo, dwStubFlags); + pFinalILStub = JitILStub(pStubMD); - pStubMD = GetILStubMethodDesc(pMD, &sigInfo, dwStubFlags); - pFinalILStub = JitILStub(pStubMD); + } } #if defined(TARGET_X86) @@ -1279,6 +1277,13 @@ VOID UMThunkMarshInfo::RunTimeInit() // For all the other calling convention except cdecl, callee pops the stack arguments m_cbRetPop = cbRetPop + static_cast(m_cbActualArgSize); } +#else // TARGET_X86 + // + // m_cbActualArgSize gets the number of arg bytes for the NATIVE signature + // + m_cbActualArgSize = + (pStubMD != NULL) ? pStubMD->AsDynamicMethodDesc()->GetNativeStackArgSize() : pMD->SizeOfArgStack(); + #endif // TARGET_X86 #endif // TARGET_X86 && !FEATURE_STUBS_AS_IL diff --git a/src/coreclr/src/vm/dllimportcallback.h b/src/coreclr/src/vm/dllimportcallback.h index 12bc89a167fdc..0b3414ffc1696 100644 --- a/src/coreclr/src/vm/dllimportcallback.h +++ b/src/coreclr/src/vm/dllimportcallback.h @@ -170,6 +170,20 @@ class UMThunkMarshInfo PCODE GetExecStubEntryPoint(); #endif + UINT32 GetCbActualArgSize() + { + CONTRACTL + { + NOTHROW; + GC_NOTRIGGER; + MODE_ANY; + PRECONDITION(IsCompletelyInited()); + } + CONTRACTL_END; + + return m_cbActualArgSize; + } + BOOL IsCompletelyInited() { LIMITED_METHOD_CONTRACT; @@ -184,9 +198,13 @@ class UMThunkMarshInfo return (UINT32)offsetof(UMThunkMarshInfo, m_pILStub); } -#ifdef TARGET_X86 +#if defined(TARGET_X86) && !defined(FEATURE_STUBS_AS_IL) + // Compiles an unmanaged to managed thunk for the given signature. The thunk + // will call the stub or, if fNoStub == TRUE, directly the managed target. + Stub *CompileNExportThunk(LoaderHeap *pLoaderHeap, PInvokeStaticSigInfo* pSigInfo, MetaSig *pMetaSig, BOOL fNoStub); +#endif // TARGET_X86 && !FEATURE_STUBS_AS_IL -#ifdef FEATURE_STUBS_AS_IL +#if defined(TARGET_X86) && defined(FEATURE_STUBS_AS_IL) struct ArgumentRegisters { UINT32 Ecx; @@ -194,23 +212,17 @@ class UMThunkMarshInfo }; VOID SetupArguments(char *pSrc, ArgumentRegisters *pArgRegs, char *pDst); -#else - // Compiles an unmanaged to managed thunk for the given signature. The thunk - // will call the stub or, if fNoStub == TRUE, directly the managed target. - Stub *CompileNExportThunk(LoaderHeap *pLoaderHeap, PInvokeStaticSigInfo* pSigInfo, MetaSig *pMetaSig, BOOL fNoStub); -#endif // FEATURE_STUBS_AS_IL - -#endif // TARGET_X86 +#endif // TARGET_X86 && FEATURE_STUBS_AS_IL private: PCODE m_pILStub; // IL stub for marshaling // On x86, NULL for no-marshal signatures // On non-x86, the managed entrypoint for no-delegate no-marshal signatures -#ifdef TARGET_X86 UINT32 m_cbActualArgSize; // caches m_pSig.SizeOfFrameArgumentArray() // On x86/Linux we have to augment with numRegistersUsed * STACK_ELEM_SIZE +#if defined(TARGET_X86) UINT16 m_cbRetPop; // stack bytes popped by callee (for UpdateRegDisplay) -#ifdef FEATURE_STUBS_AS_IL +#if defined(FEATURE_STUBS_AS_IL) UINT32 m_cbStackArgSize; // stack bytes pushed for managed code #else Stub* m_pExecStub; // UMEntryThunk jumps directly here @@ -531,15 +543,21 @@ class UMEntryThunkCache // One-time creation of special prestub to initialize UMEntryThunks. //------------------------------------------------------------------------- Stub *GenerateUMThunkPrestub(); +#endif // TARGET_X86 && !FEATURE_STUBS_AS_IL +//------------------------------------------------------------------------- +// NExport stub +//------------------------------------------------------------------------- +#if !defined(HOST_64BIT) && !defined(DACCESS_COMPILE) && !defined(CROSS_COMPILE) EXCEPTION_HANDLER_DECL(FastNExportExceptHandler); EXCEPTION_HANDLER_DECL(UMThunkPrestubHandler); - -#endif // TARGET_X86 && !FEATURE_STUBS_AS_IL +#endif // HOST_64BIT extern "C" void TheUMEntryPrestub(void); extern "C" PCODE TheUMEntryPrestubWorker(UMEntryThunk * pUMEntryThunk); +EXTERN_C void UMThunkStub(void); + #ifdef _DEBUG void STDCALL LogUMTransition(UMEntryThunk* thunk); #endif diff --git a/src/coreclr/src/vm/i386/asmhelpers.S b/src/coreclr/src/vm/i386/asmhelpers.S index c318dc3903096..dcc210c98bb37 100644 --- a/src/coreclr/src/vm/i386/asmhelpers.S +++ b/src/coreclr/src/vm/i386/asmhelpers.S @@ -555,6 +555,22 @@ LEAF_ENTRY PrecodeFixupThunk, _TEXT jmp C_FUNC(ThePreStub) LEAF_END PrecodeFixupThunk, _TEXT +NESTED_ENTRY UMThunkStubRareDisable, _TEXT, NoHandler + push eax + push ecx + + sub esp, 12 + push eax // Push the UMEntryThunk + push ecx // Push thread + CHECK_STACK_ALIGNMENT + call C_FUNC(UMThunkStubRareDisableWorker) + add esp, 12 + + pop ecx + pop eax + ret +NESTED_END UMThunkStubRareDisable, _TEXT + // // Used to get the current instruction pointer value // diff --git a/src/coreclr/src/vm/ilstubcache.cpp b/src/coreclr/src/vm/ilstubcache.cpp index 86299dfeeef07..9e17fe18ed312 100644 --- a/src/coreclr/src/vm/ilstubcache.cpp +++ b/src/coreclr/src/vm/ilstubcache.cpp @@ -279,9 +279,6 @@ MethodDesc* ILStubCache::CreateNewMethodDesc(LoaderHeap* pCreationHeap, MethodTa if (SF_IsReverseStub(dwStubFlags)) { pMD->m_dwExtendedFlags |= DynamicMethodDesc::nomdReverseStub; -#if !defined(TARGET_X86) - pMD->m_dwExtendedFlags |= DynamicMethodDesc::nomdNativeCallableStub; -#endif pMD->GetILStubResolver()->SetStubType(ILStubResolver::NativeToCLRInteropStub); } else diff --git a/src/coreclr/src/vm/jithelpers.cpp b/src/coreclr/src/vm/jithelpers.cpp index 658e4e6cdd07e..8d64b2629fffa 100644 --- a/src/coreclr/src/vm/jithelpers.cpp +++ b/src/coreclr/src/vm/jithelpers.cpp @@ -5383,14 +5383,8 @@ NOINLINE static void JIT_ReversePInvokeEnterRare(ReversePInvokeFrame* frame) if (thread->PreemptiveGCDisabled()) ReversePInvokeBadTransition(); - frame->currentThread = thread; - thread->DisablePreemptiveGC(); -} - -NOINLINE static void JIT_ReversePInvokeEnterRare2(ReversePInvokeFrame* frame) -{ - frame->currentThread->RareDisablePreemptiveGC(); + frame->currentThread = thread; } EXTERN_C void JIT_ReversePInvokeEnter(ReversePInvokeFrame* frame) @@ -5403,17 +5397,13 @@ EXTERN_C void JIT_ReversePInvokeEnter(ReversePInvokeFrame* frame) if (thread != NULL && !thread->PreemptiveGCDisabled()) { - frame->currentThread = thread; - // Manually inline the fast path in Thread::DisablePreemptiveGC(). thread->m_fPreemptiveGCDisabled.StoreWithoutBarrier(1); if (g_TrapReturningThreads.LoadWithoutBarrier() == 0) { + frame->currentThread = thread; return; } - - JIT_ReversePInvokeEnterRare2(frame); - return; } JIT_ReversePInvokeEnterRare(frame); diff --git a/src/coreclr/src/vm/jitinterface.cpp b/src/coreclr/src/vm/jitinterface.cpp index 1f71c90307aab..356ee2b4352e2 100644 --- a/src/coreclr/src/vm/jitinterface.cpp +++ b/src/coreclr/src/vm/jitinterface.cpp @@ -9209,10 +9209,10 @@ void CEEInfo::getFunctionFixedEntryPoint(CORINFO_METHOD_HANDLE ftn, pResult->accessType = IAT_VALUE; -#if defined(TARGET_X86) && !defined(CROSSGEN_COMPILE) +// Also see GetBaseCompileFlags() below for an additional check. +#if defined(TARGET_X86) && defined(TARGET_WINDOWS) && !defined(CROSSGEN_COMPILE) // Deferring X86 support until a need is observed or // time permits investigation into all the potential issues. - // https://github.com/dotnet/runtime/issues/33582 if (pMD->HasNativeCallableAttribute()) { pResult->addr = (void*)COMDelegate::ConvertToCallback(pMD); @@ -9221,9 +9221,12 @@ void CEEInfo::getFunctionFixedEntryPoint(CORINFO_METHOD_HANDLE ftn, { pResult->addr = (void*)pMD->GetMultiCallableAddrOfCode(); } + #else + pResult->addr = (void*)pMD->GetMultiCallableAddrOfCode(); -#endif + +#endif // !(TARGET_X86 && TARGET_WINDOWS) || CROSSGEN_COMPILE EE_TO_JIT_TRANSITION(); } @@ -12438,10 +12441,10 @@ CorJitResult CallCompileMethodWithSEHWrapper(EEJitManager *jitMgr, } } -#if !defined(TARGET_X86) +#if !defined(TARGET_X86) || !defined(TARGET_WINDOWS) if (ftn->HasNativeCallableAttribute()) flags.Set(CORJIT_FLAGS::CORJIT_FLAG_REVERSE_PINVOKE); -#endif // !TARGET_X86 +#endif // !TARGET_X86 || !TARGET_WINDOWS return flags; } diff --git a/src/coreclr/src/vm/method.cpp b/src/coreclr/src/vm/method.cpp index a458fa9d65cd6..ee5b6b5876f68 100644 --- a/src/coreclr/src/vm/method.cpp +++ b/src/coreclr/src/vm/method.cpp @@ -5415,11 +5415,6 @@ BOOL MethodDesc::HasNativeCallableAttribute() } CONTRACTL_END; - if (IsILStub()) - { - return AsDynamicMethodDesc()->IsNativeCallableStub(); - } - HRESULT hr = GetCustomAttribute( WellKnownAttribute::NativeCallable, nullptr, diff --git a/src/coreclr/src/vm/method.hpp b/src/coreclr/src/vm/method.hpp index 5973cd6d6053f..9a558d293084b 100644 --- a/src/coreclr/src/vm/method.hpp +++ b/src/coreclr/src/vm/method.hpp @@ -2618,7 +2618,6 @@ class DynamicMethodDesc : public StoredSigMethodDesc nomdMulticastStub = 0x1000, nomdUnboxingILStub = 0x2000, nomdWrapperDelegateStub = 0x4000, - nomdNativeCallableStub = 0x8000, nomdILStub = 0x00010000, nomdLCGMethod = 0x00020000, @@ -2711,7 +2710,6 @@ class DynamicMethodDesc : public StoredSigMethodDesc } bool IsReverseStub() { LIMITED_METHOD_DAC_CONTRACT; _ASSERTE(IsILStub()); return (0 != (m_dwExtendedFlags & nomdReverseStub)); } - bool IsNativeCallableStub() { LIMITED_METHOD_DAC_CONTRACT; _ASSERTE(IsILStub()); return (0 != (m_dwExtendedFlags & nomdNativeCallableStub)); } bool IsCALLIStub() { LIMITED_METHOD_DAC_CONTRACT; _ASSERTE(IsILStub()); return (0 != (m_dwExtendedFlags & nomdCALLIStub)); } bool IsDelegateStub() { LIMITED_METHOD_DAC_CONTRACT; _ASSERTE(IsILStub()); return (0 != (m_dwExtendedFlags & nomdDelegateStub)); } bool IsCLRToCOMStub() { LIMITED_METHOD_CONTRACT; _ASSERTE(IsILStub()); return ((0 == (m_dwExtendedFlags & mdStatic)) && !IsReverseStub() && !IsDelegateStub() && !IsStructMarshalStub()); } diff --git a/src/coreclr/src/zap/zapinfo.cpp b/src/coreclr/src/zap/zapinfo.cpp index f65ec7c8aaf1f..d09204bddb806 100644 --- a/src/coreclr/src/zap/zapinfo.cpp +++ b/src/coreclr/src/zap/zapinfo.cpp @@ -482,14 +482,14 @@ void ZapInfo::CompileMethod() } #endif -#ifdef TARGET_X86 +#if defined(TARGET_X86) && defined(TARGET_WINDOWS) if (GetCompileInfo()->IsNativeCallableMethod(m_currentMethodHandle)) { if (m_zapper->m_pOpt->m_verbose) m_zapper->Warning(W("ReadyToRun: Methods with NativeCallableAttribute not implemented\n")); ThrowHR(E_NOTIMPL); } -#endif // TARGET_X86 +#endif // (TARGET_X86) && defined(TARGET_WINDOWS) if (m_pImage->m_stats) { @@ -2285,14 +2285,14 @@ void ZapInfo::getCallInfo(CORINFO_RESOLVED_TOKEN * pResolvedToken, } #endif -#ifdef TARGET_X86 +#if defined(TARGET_X86) && defined(TARGET_WINDOWS) if (GetCompileInfo()->IsNativeCallableMethod(pResult->hMethod)) { if (m_zapper->m_pOpt->m_verbose) m_zapper->Warning(W("ReadyToRun: References to methods with NativeCallableAttribute not implemented\n")); ThrowHR(E_NOTIMPL); } -#endif // TARGET_X86 +#endif // (TARGET_X86) && defined(TARGET_WINDOWS) if (flags & CORINFO_CALLINFO_KINDONLY) return;