From e93b15843376cd1580cbd34d8e0050d63483101b Mon Sep 17 00:00:00 2001 From: Jan Vorlicek Date: Thu, 17 Mar 2022 23:19:15 +0100 Subject: [PATCH] Reimplement stubs to improve performance (#65738) * Reimplement stubs to improve performance This change implements `FixupPrecodeStub`, `PrecodeStub` and `CallCountingStub` using a new mechanism with fixed code and separate RW data. The `LoaderHeap` was updated to support a new kind of allocation using interleaved code and data pages to support this new mechanism. The JIT now generates code that uses indirection slot to jump to the methods using `FixupPrecode`, improving performance of the ASPNet plaintext benchmark by 3-4% depending on the target platform (measured on x64 Windows / Linux and arm64 Linux). I have also removed the Holders, as the stubs are naturally properly aligned due to the way they are allocated. There is now only a single variant of each stub, there are no long / short ones anymore as they are not needed - the indirect jumps we use now are not range limited. Most of the stubs stuff is now target agnostic and the originally split implementation is now in single place for all targets. Only a few constants are defined as target specific in these. The code for the stubs is no longer generated as bytes by C++ code, but rather written in asm and compiled. These precompiled templates are then used as a source to copy the code from. The x86 is a bit more complex than that due to the fact that it doesn't support PC relative indirect addressing, so we need to relocate all access to the data slots when generating the code pages. As a further improvement, we could generate just a single page of the code and then just map it many times. This is left for future work. ARM64 Unix differs from the other targets / platforms - there are various page sizes being used. So the asm templates are generated for 4k..64k page sizes and the variant is then picked at runtime based on the page size extracted from the OS. This also removes a lot of writeable mappings created for modifications of the stub code when W^X is enabled, in the plaintext benchmark they were reduced by 75%. That results in a significant reducing of the .NET application startup time with W^X enabled. I think the `LoaderHeap` would benefit from some refactoring, but I'd prefer leaving it for a follow up. It seems that for the sake of the review, it is better to keep it as is. The change also implements logging of number of mappings and their exact locations. This helped me to drive the work and I am planning to use it for further changes. It can be removed in the future once we reach a final state. There are still opportunities for improvement, but these stubs allowed me to scrape off the most significant portion of the mappings. --- src/coreclr/inc/daccess.h | 1 - src/coreclr/inc/executableallocator.h | 54 +- src/coreclr/inc/holder.h | 16 +- src/coreclr/inc/loaderheap.h | 42 +- src/coreclr/minipal/Windows/doublemapping.cpp | 4 +- src/coreclr/pal/inc/unixasmmacrosamd64.inc | 2 +- src/coreclr/utilcode/executableallocator.cpp | 195 ++++++-- src/coreclr/utilcode/loaderheap.cpp | 267 +++++++--- src/coreclr/vm/CMakeLists.txt | 10 +- src/coreclr/vm/amd64/AsmHelpers.asm | 34 +- src/coreclr/vm/amd64/asmconstants.h | 24 + src/coreclr/vm/amd64/cgenamd64.cpp | 58 --- src/coreclr/vm/amd64/cgencpu.h | 331 ------------- src/coreclr/vm/amd64/theprestubamd64.S | 1 - src/coreclr/vm/amd64/thunktemplates.S | 31 ++ src/coreclr/vm/amd64/thunktemplates.asm | 34 ++ src/coreclr/vm/amd64/unixasmhelpers.S | 33 +- src/coreclr/vm/appdomain.cpp | 1 - src/coreclr/vm/arm/asmconstants.h | 24 + src/coreclr/vm/arm/asmhelpers.S | 23 - src/coreclr/vm/arm/asmhelpers.asm | 23 - src/coreclr/vm/arm/cgencpu.h | 360 +------------- src/coreclr/vm/arm/stubs.cpp | 145 +----- src/coreclr/vm/arm/thunktemplates.S | 42 ++ src/coreclr/vm/arm/thunktemplates.asm | 43 ++ src/coreclr/vm/arm64/asmconstants.h | 39 +- src/coreclr/vm/arm64/asmhelpers.S | 21 +- src/coreclr/vm/arm64/asmhelpers.asm | 23 +- src/coreclr/vm/arm64/cgencpu.h | 375 -------------- src/coreclr/vm/arm64/stubs.cpp | 129 +---- src/coreclr/vm/arm64/thunktemplates.S | 39 ++ src/coreclr/vm/arm64/thunktemplates.asm | 37 ++ src/coreclr/vm/callcounting.cpp | 122 +++-- src/coreclr/vm/callcounting.h | 122 +++++ src/coreclr/vm/ceeload.cpp | 2 +- src/coreclr/vm/ceemain.cpp | 30 +- src/coreclr/vm/cgensys.h | 6 - src/coreclr/vm/codeman.cpp | 10 +- src/coreclr/vm/codeman.h | 7 + src/coreclr/vm/comcallablewrapper.cpp | 4 +- src/coreclr/vm/common.h | 1 - src/coreclr/vm/corhost.cpp | 5 + src/coreclr/vm/dynamicmethod.cpp | 8 +- src/coreclr/vm/gccover.cpp | 27 +- src/coreclr/vm/i386/AsmMacros.inc | 23 + src/coreclr/vm/i386/asmconstants.h | 20 + src/coreclr/vm/i386/asmhelpers.S | 30 +- src/coreclr/vm/i386/asmhelpers.asm | 29 +- src/coreclr/vm/i386/cgencpu.h | 207 -------- src/coreclr/vm/i386/cgenx86.cpp | 57 --- src/coreclr/vm/i386/jitinterfacex86.cpp | 12 +- src/coreclr/vm/i386/stublinkerx86.cpp | 287 +---------- src/coreclr/vm/i386/stublinkerx86.h | 240 +-------- src/coreclr/vm/i386/thunktemplates.S | 57 +++ src/coreclr/vm/i386/thunktemplates.asm | 60 +++ src/coreclr/vm/jitinterface.cpp | 42 +- src/coreclr/vm/loaderallocator.cpp | 37 +- src/coreclr/vm/loaderallocator.hpp | 16 + src/coreclr/vm/method.cpp | 28 +- src/coreclr/vm/method.hpp | 6 +- src/coreclr/vm/peimage.cpp | 2 +- src/coreclr/vm/precode.cpp | 463 +++++++++++++----- src/coreclr/vm/precode.h | 397 ++++++++++++--- src/coreclr/vm/stublink.cpp | 4 +- src/coreclr/vm/stubmgr.cpp | 29 +- src/coreclr/vm/stubmgr.h | 24 +- src/coreclr/vm/util.cpp | 18 + src/coreclr/vm/util.hpp | 9 + src/coreclr/vm/virtualcallstub.cpp | 12 +- 69 files changed, 2063 insertions(+), 2851 deletions(-) create mode 100644 src/coreclr/vm/amd64/thunktemplates.S create mode 100644 src/coreclr/vm/amd64/thunktemplates.asm create mode 100644 src/coreclr/vm/arm/thunktemplates.S create mode 100644 src/coreclr/vm/arm/thunktemplates.asm create mode 100644 src/coreclr/vm/arm64/thunktemplates.S create mode 100644 src/coreclr/vm/arm64/thunktemplates.asm create mode 100644 src/coreclr/vm/i386/thunktemplates.S create mode 100644 src/coreclr/vm/i386/thunktemplates.asm diff --git a/src/coreclr/inc/daccess.h b/src/coreclr/inc/daccess.h index a1e812276d853..5ad8b99b67b36 100644 --- a/src/coreclr/inc/daccess.h +++ b/src/coreclr/inc/daccess.h @@ -614,7 +614,6 @@ typedef struct _DacGlobals #endif // TARGET_ARM ULONG fn__ThePreStubPatchLabel; - ULONG fn__PrecodeFixupThunk; #ifdef FEATURE_COMINTEROP ULONG fn__Unknown_AddRef; ULONG fn__Unknown_AddRefSpecial; diff --git a/src/coreclr/inc/executableallocator.h b/src/coreclr/inc/executableallocator.h index 04dfdf031b41f..c229f5546aa7f 100644 --- a/src/coreclr/inc/executableallocator.h +++ b/src/coreclr/inc/executableallocator.h @@ -15,6 +15,8 @@ #ifndef DACCESS_COMPILE +//#define LOG_EXECUTABLE_ALLOCATOR_STATISTICS + // This class is responsible for allocation of all the executable memory in the runtime. class ExecutableAllocator { @@ -49,7 +51,17 @@ class ExecutableAllocator }; typedef void (*FatalErrorHandler)(UINT errorCode, LPCWSTR pszMessage); - +#ifdef LOG_EXECUTABLE_ALLOCATOR_STATISTICS + static int64_t g_mapTimeSum; + static int64_t g_mapTimeWithLockSum; + static int64_t g_unmapTimeSum; + static int64_t g_unmapTimeWithLockSum; + static int64_t g_mapFindRXTimeSum; + static int64_t g_mapCreateTimeSum; + + static int64_t g_releaseCount; + static int64_t g_reserveCount; +#endif // Instance of the allocator static ExecutableAllocator* g_instance; @@ -142,8 +154,28 @@ class ExecutableAllocator // Initialize the allocator instance bool Initialize(); +#ifdef LOG_EXECUTABLE_ALLOCATOR_STATISTICS + static CRITSEC_COOKIE s_LoggerCriticalSection; + + struct LogEntry + { + const char* source; + const char* function; + int line; + int count; + }; + + static LogEntry s_usageLog[256]; + static int s_logMaxIndex; +#endif + public: +#ifdef LOG_EXECUTABLE_ALLOCATOR_STATISTICS + static void LogUsage(const char* source, int line, const char* function); + static void DumpHolderUsage(); +#endif + // Return the ExecuteAllocator singleton instance static ExecutableAllocator* Instance(); @@ -201,6 +233,8 @@ class ExecutableAllocator void UnmapRW(void* pRW); }; +#define ExecutableWriterHolder ExecutableWriterHolderNoLog + // Holder class to map read-execute memory as read-write so that it can be modified without using read-write-execute mapping. // At the moment the implementation is dummy, returning the same addresses for both cases and expecting them to be read-write-execute. // The class uses the move semantics to ensure proper unmapping in case of re-assigning of the holder value. @@ -274,6 +308,24 @@ class ExecutableWriterHolder { return m_addressRW; } + + void AssignExecutableWriterHolder(T* addressRX, size_t size) + { + *this = ExecutableWriterHolder(addressRX, size); + } }; +#ifdef LOG_EXECUTABLE_ALLOCATOR_STATISTICS +#undef ExecutableWriterHolder +#ifdef TARGET_UNIX +#define ExecutableWriterHolder ExecutableAllocator::LogUsage(__FILE__, __LINE__, __PRETTY_FUNCTION__); ExecutableWriterHolderNoLog +#define AssignExecutableWriterHolder(addressRX, size) AssignExecutableWriterHolder(addressRX, size); ExecutableAllocator::LogUsage(__FILE__, __LINE__, __PRETTY_FUNCTION__); +#else +#define ExecutableWriterHolder ExecutableAllocator::LogUsage(__FILE__, __LINE__, __FUNCTION__); ExecutableWriterHolderNoLog +#define AssignExecutableWriterHolder(addressRX, size) AssignExecutableWriterHolder(addressRX, size); ExecutableAllocator::LogUsage(__FILE__, __LINE__, __FUNCTION__); +#endif +#else +#define ExecutableWriterHolder ExecutableWriterHolderNoLog +#endif + #endif // !DACCESS_COMPILE diff --git a/src/coreclr/inc/holder.h b/src/coreclr/inc/holder.h index 4ec7b106cc0e8..88b7993a5cf92 100644 --- a/src/coreclr/inc/holder.h +++ b/src/coreclr/inc/holder.h @@ -934,15 +934,25 @@ using NonVMComHolder = SpecializedWrapper<_TYPE, DoTheRelease<_TYPE>>; // } // foo->DecRef() on out of scope // //----------------------------------------------------------------------------- + template -class ExecutableWriterHolder; +class ExecutableWriterHolderNoLog; -template +class ExecutableAllocator; + +template FORCEINLINE void StubRelease(TYPE* value) { if (value) { - ExecutableWriterHolder stubWriterHolder(value, sizeof(TYPE)); +#ifdef LOG_EXECUTABLE_ALLOCATOR_STATISTICS +#ifdef TARGET_UNIX + LOGGER::LogUsage(__FILE__, __LINE__, __PRETTY_FUNCTION__); +#else + LOGGER::LogUsage(__FILE__, __LINE__, __FUNCTION__); +#endif +#endif // LOG_EXECUTABLE_ALLOCATOR_STATISTICS + ExecutableWriterHolderNoLog stubWriterHolder(value, sizeof(TYPE)); stubWriterHolder.GetRW()->DecRef(); } } diff --git a/src/coreclr/inc/loaderheap.h b/src/coreclr/inc/loaderheap.h index 42b9caa6330f3..324cf2f161c50 100644 --- a/src/coreclr/inc/loaderheap.h +++ b/src/coreclr/inc/loaderheap.h @@ -191,6 +191,15 @@ class UnlockedLoaderHeap friend class ClrDataAccess; #endif +public: + + enum class HeapKind + { + Data, + Executable, + Interleaved + }; + private: // Linked list of ClrVirtualAlloc'd pages PTR_LoaderHeapBlock m_pFirstBlock; @@ -208,12 +217,16 @@ class UnlockedLoaderHeap // When we need to commit pages from our reserved list, number of bytes to commit at a time DWORD m_dwCommitBlockSize; + // For interleaved heap (RX pages interleaved with RW ones), this specifies the allocation granularity, + // which is the individual code block size + DWORD m_dwGranularity; + // Range list to record memory ranges in RangeList * m_pRangeList; size_t m_dwTotalAlloc; - DWORD m_Options; + HeapKind m_kind; LoaderHeapFreeBlock *m_pFirstFreeBlock; @@ -263,6 +276,7 @@ class UnlockedLoaderHeap public: BOOL m_fExplicitControl; // Am I a LoaderHeap or an ExplicitControlLoaderHeap? + void (*m_codePageGenerator)(BYTE* pageBase, BYTE* pageBaseRX); #ifdef DACCESS_COMPILE public: @@ -283,7 +297,9 @@ class UnlockedLoaderHeap const BYTE* dwReservedRegionAddress, SIZE_T dwReservedRegionSize, RangeList *pRangeList = NULL, - BOOL fMakeExecutable = FALSE); + HeapKind kind = HeapKind::Data, + void (*codePageGenerator)(BYTE* pageBase, BYTE* pageBaseRX) = NULL, + DWORD dwGranularity = 1); ~UnlockedLoaderHeap(); #endif @@ -400,6 +416,7 @@ class UnlockedLoaderHeap } BOOL IsExecutable(); + BOOL IsInterleaved(); public: #ifdef _DEBUG @@ -443,14 +460,18 @@ class LoaderHeap : public UnlockedLoaderHeap, public ILoaderHeapBackout LoaderHeap(DWORD dwReserveBlockSize, DWORD dwCommitBlockSize, RangeList *pRangeList = NULL, - BOOL fMakeExecutable = FALSE, - BOOL fUnlocked = FALSE + UnlockedLoaderHeap::HeapKind kind = UnlockedLoaderHeap::HeapKind::Data, + BOOL fUnlocked = FALSE, + void (*codePageGenerator)(BYTE* pageBase, BYTE* pageBaseRX) = NULL, + DWORD dwGranularity = 1 ) : UnlockedLoaderHeap(dwReserveBlockSize, dwCommitBlockSize, NULL, 0, pRangeList, - fMakeExecutable), + kind, + codePageGenerator, + dwGranularity), m_CriticalSection(fUnlocked ? NULL : CreateLoaderHeapLock()) { WRAPPER_NO_CONTRACT; @@ -463,15 +484,18 @@ class LoaderHeap : public UnlockedLoaderHeap, public ILoaderHeapBackout const BYTE* dwReservedRegionAddress, SIZE_T dwReservedRegionSize, RangeList *pRangeList = NULL, - BOOL fMakeExecutable = FALSE, - BOOL fUnlocked = FALSE + UnlockedLoaderHeap::HeapKind kind = UnlockedLoaderHeap::HeapKind::Data, + BOOL fUnlocked = FALSE, + void (*codePageGenerator)(BYTE* pageBase, BYTE* pageBaseRX) = NULL, + DWORD dwGranularity = 1 ) : UnlockedLoaderHeap(dwReserveBlockSize, dwCommitBlockSize, dwReservedRegionAddress, dwReservedRegionSize, pRangeList, - fMakeExecutable), + kind, + codePageGenerator, dwGranularity), m_CriticalSection(fUnlocked ? NULL : CreateLoaderHeapLock()) { WRAPPER_NO_CONTRACT; @@ -776,7 +800,7 @@ class ExplicitControlLoaderHeap : public UnlockedLoaderHeap ) : UnlockedLoaderHeap(0, 0, NULL, 0, pRangeList, - fMakeExecutable) + fMakeExecutable ? UnlockedLoaderHeap::HeapKind::Executable : UnlockedLoaderHeap::HeapKind::Data) { WRAPPER_NO_CONTRACT; m_fExplicitControl = TRUE; diff --git a/src/coreclr/minipal/Windows/doublemapping.cpp b/src/coreclr/minipal/Windows/doublemapping.cpp index e265f1d139ad0..0d7033b567056 100644 --- a/src/coreclr/minipal/Windows/doublemapping.cpp +++ b/src/coreclr/minipal/Windows/doublemapping.cpp @@ -184,8 +184,8 @@ void *VMToOSInterface::CommitDoubleMappedMemory(void* pStart, size_t size, bool bool VMToOSInterface::ReleaseDoubleMappedMemory(void *mapperHandle, void* pStart, size_t offset, size_t size) { - // Zero the memory before the unmapping - VirtualAlloc(pStart, size, MEM_COMMIT, PAGE_READWRITE); + LPVOID result = VirtualAlloc(pStart, size, MEM_COMMIT, PAGE_READWRITE); + assert(result != NULL); memset(pStart, 0, size); return UnmapViewOfFile(pStart); } diff --git a/src/coreclr/pal/inc/unixasmmacrosamd64.inc b/src/coreclr/pal/inc/unixasmmacrosamd64.inc index 9a656ddf1bec2..2aca375faa838 100644 --- a/src/coreclr/pal/inc/unixasmmacrosamd64.inc +++ b/src/coreclr/pal/inc/unixasmmacrosamd64.inc @@ -27,7 +27,7 @@ .macro PATCH_LABEL Name .global C_FUNC(\Name) -C_FUNC(\Name): + C_FUNC(\Name) = . .endm .macro LEAF_ENTRY Name, Section diff --git a/src/coreclr/utilcode/executableallocator.cpp b/src/coreclr/utilcode/executableallocator.cpp index 49431b6ecce74..197ce6e8e6929 100644 --- a/src/coreclr/utilcode/executableallocator.cpp +++ b/src/coreclr/utilcode/executableallocator.cpp @@ -17,9 +17,90 @@ BYTE * ExecutableAllocator::g_preferredRangeMax; bool ExecutableAllocator::g_isWXorXEnabled = false; ExecutableAllocator::FatalErrorHandler ExecutableAllocator::g_fatalErrorHandler = NULL; - ExecutableAllocator* ExecutableAllocator::g_instance = NULL; +#ifdef LOG_EXECUTABLE_ALLOCATOR_STATISTICS +int64_t ExecutableAllocator::g_mapTimeSum = 0; +int64_t ExecutableAllocator::g_mapTimeWithLockSum = 0; +int64_t ExecutableAllocator::g_unmapTimeSum = 0; +int64_t ExecutableAllocator::g_unmapTimeWithLockSum = 0; +int64_t ExecutableAllocator::g_mapFindRXTimeSum = 0; +int64_t ExecutableAllocator::g_mapCreateTimeSum = 0; +int64_t ExecutableAllocator::g_releaseCount = 0; +int64_t ExecutableAllocator::g_reserveCount = 0; + +ExecutableAllocator::LogEntry ExecutableAllocator::s_usageLog[256]; +int ExecutableAllocator::s_logMaxIndex = 0; +CRITSEC_COOKIE ExecutableAllocator::s_LoggerCriticalSection; + +class StopWatch +{ + LARGE_INTEGER m_start; + int64_t* m_accumulator; + +public: + StopWatch(int64_t* accumulator) : m_accumulator(accumulator) + { + QueryPerformanceCounter(&m_start); + } + + ~StopWatch() + { + LARGE_INTEGER end; + QueryPerformanceCounter(&end); + + InterlockedExchangeAdd64(m_accumulator, end.QuadPart - m_start.QuadPart); + } +}; + +void ExecutableAllocator::LogUsage(const char* source, int line, const char* function) +{ + CRITSEC_Holder csh(s_LoggerCriticalSection); + + for (int i = 0; i < s_logMaxIndex; i++) + { + if (s_usageLog[i].source == source && s_usageLog[i].line == line) + { + s_usageLog[i].count++; + return; + } + } + + int i = s_logMaxIndex; + s_logMaxIndex++; + s_usageLog[i].source = source; + s_usageLog[i].function = function; + s_usageLog[i].line = line; + s_usageLog[i].count = 1; +} + +void ExecutableAllocator::DumpHolderUsage() +{ + CRITSEC_Holder csh(s_LoggerCriticalSection); + + LARGE_INTEGER freq; + QueryPerformanceFrequency(&freq); + + fprintf(stderr, "Map time with lock sum: %I64dms\n", g_mapTimeWithLockSum / (freq.QuadPart / 1000)); + fprintf(stderr, "Map time sum: %I64dms\n", g_mapTimeSum / (freq.QuadPart / 1000)); + fprintf(stderr, "Map find RX time sum: %I64dms\n", g_mapFindRXTimeSum / (freq.QuadPart / 1000)); + fprintf(stderr, "Map create time sum: %I64dms\n", g_mapCreateTimeSum / (freq.QuadPart / 1000)); + fprintf(stderr, "Unmap time with lock sum: %I64dms\n", g_unmapTimeWithLockSum / (freq.QuadPart / 1000)); + fprintf(stderr, "Unmap time sum: %I64dms\n", g_unmapTimeSum / (freq.QuadPart / 1000)); + + fprintf(stderr, "Reserve count: %I64d\n", g_reserveCount); + fprintf(stderr, "Release count: %I64d\n", g_releaseCount); + + fprintf(stderr, "ExecutableWriterHolder usage:\n"); + + for (int i = 0; i < s_logMaxIndex; i++) + { + fprintf(stderr, "Count: %d at %s:%d in %s\n", s_usageLog[i].count, s_usageLog[i].source, s_usageLog[i].line, s_usageLog[i].function); + } +} + +#endif // LOG_EXECUTABLE_ALLOCATOR_STATISTICS + bool ExecutableAllocator::IsDoubleMappingEnabled() { LIMITED_METHOD_CONTRACT; @@ -154,6 +235,9 @@ HRESULT ExecutableAllocator::StaticInitialize(FatalErrorHandler fatalErrorHandle return E_FAIL; } +#ifdef LOG_EXECUTABLE_ALLOCATOR_STATISTICS + s_LoggerCriticalSection = ClrCreateCriticalSection(CrstExecutableAllocatorLock, CrstFlags(CRST_UNSAFE_ANYMODE | CRST_DEBUGGER_THREAD)); +#endif return S_OK; } @@ -212,7 +296,11 @@ void* ExecutableAllocator::FindRWBlock(void* baseRX, size_t size) { if (pBlock->baseRX <= baseRX && ((size_t)baseRX + size) <= ((size_t)pBlock->baseRX + pBlock->size)) { - pBlock->refCount++; +#ifdef TARGET_64BIT + InterlockedIncrement64((LONG64*)& pBlock->refCount); +#else + InterlockedIncrement((LONG*)&pBlock->refCount); +#endif UpdateCachedMapping(pBlock); return (BYTE*)pBlock->baseRW + ((size_t)baseRX - (size_t)pBlock->baseRX); @@ -226,14 +314,6 @@ bool ExecutableAllocator::AddRWBlock(void* baseRW, void* baseRX, size_t size) { LIMITED_METHOD_CONTRACT; - for (BlockRW* pBlock = m_pFirstBlockRW; pBlock != NULL; pBlock = pBlock->next) - { - if (pBlock->baseRX <= baseRX && ((size_t)baseRX + size) <= ((size_t)pBlock->baseRX + pBlock->size)) - { - break; - } - } - // The new "nothrow" below failure is handled as fail fast since it is not recoverable PERMANENT_CONTRACT_VIOLATION(FaultViolation, ReasonContractInfrastructure); @@ -340,6 +420,10 @@ void ExecutableAllocator::Release(void* pRX) { LIMITED_METHOD_CONTRACT; +#ifdef LOG_EXECUTABLE_ALLOCATOR_STATISTICS + InterlockedIncrement64(&g_releaseCount); +#endif + if (IsDoubleMappingEnabled()) { CRITSEC_Holder csh(m_CriticalSection); @@ -386,54 +470,40 @@ void ExecutableAllocator::Release(void* pRX) } } -// Find a free block with the closest size >= the requested size. +// Find a free block with the size == the requested size. // Returns NULL if no such block exists. ExecutableAllocator::BlockRX* ExecutableAllocator::FindBestFreeBlock(size_t size) { LIMITED_METHOD_CONTRACT; BlockRX* pPrevBlock = NULL; - BlockRX* pPrevBestBlock = NULL; - BlockRX* pBestBlock = NULL; BlockRX* pBlock = m_pFirstFreeBlockRX; while (pBlock != NULL) { - if (pBlock->size >= size) + if (pBlock->size == size) { - if (pBestBlock != NULL) - { - if (pBlock->size < pBestBlock->size) - { - pPrevBestBlock = pPrevBlock; - pBestBlock = pBlock; - } - } - else - { - pPrevBestBlock = pPrevBlock; - pBestBlock = pBlock; - } + break; } pPrevBlock = pBlock; pBlock = pBlock->next; } - if (pBestBlock != NULL) + if (pBlock != NULL) { - if (pPrevBestBlock != NULL) + if (pPrevBlock != NULL) { - pPrevBestBlock->next = pBestBlock->next; + pPrevBlock->next = pBlock->next; } else { - m_pFirstFreeBlockRX = pBestBlock->next; + m_pFirstFreeBlockRX = pBlock->next; } - pBestBlock->next = NULL; + pBlock->next = NULL; } - return pBestBlock; + return pBlock; } // Allocate a new block of executable memory and the related descriptor structure. @@ -491,6 +561,10 @@ void* ExecutableAllocator::ReserveWithinRange(size_t size, const void* loAddress { LIMITED_METHOD_CONTRACT; +#ifdef LOG_EXECUTABLE_ALLOCATOR_STATISTICS + InterlockedIncrement64(&g_reserveCount); +#endif + _ASSERTE((size & (Granularity() - 1)) == 0); if (IsDoubleMappingEnabled()) { @@ -537,6 +611,10 @@ void* ExecutableAllocator::Reserve(size_t size) { LIMITED_METHOD_CONTRACT; +#ifdef LOG_EXECUTABLE_ALLOCATOR_STATISTICS + InterlockedIncrement64(&g_reserveCount); +#endif + _ASSERTE((size & (Granularity() - 1)) == 0); BYTE *result = NULL; @@ -625,6 +703,10 @@ void* ExecutableAllocator::ReserveAt(void* baseAddressRX, size_t size) { LIMITED_METHOD_CONTRACT; +#ifdef LOG_EXECUTABLE_ALLOCATOR_STATISTICS + InterlockedIncrement64(&g_reserveCount); +#endif + _ASSERTE((size & (Granularity() - 1)) == 0); if (IsDoubleMappingEnabled()) @@ -670,30 +752,45 @@ void* ExecutableAllocator::MapRW(void* pRX, size_t size) return pRX; } +#ifdef LOG_EXECUTABLE_ALLOCATOR_STATISTICS + StopWatch swAll(&g_mapTimeWithLockSum); +#endif + CRITSEC_Holder csh(m_CriticalSection); +#ifdef LOG_EXECUTABLE_ALLOCATOR_STATISTICS + StopWatch sw(&g_mapTimeSum); +#endif + void* result = FindRWBlock(pRX, size); if (result != NULL) { return result; } +#ifdef LOG_EXECUTABLE_ALLOCATOR_STATISTICS + StopWatch sw2(&g_mapFindRXTimeSum); +#endif for (BlockRX* pBlock = m_pFirstBlockRX; pBlock != NULL; pBlock = pBlock->next) { if (pRX >= pBlock->baseRX && ((size_t)pRX + size) <= ((size_t)pBlock->baseRX + pBlock->size)) { - // Offset of the RX address in the originally allocated block - size_t offset = (size_t)pRX - (size_t)pBlock->baseRX; - // Offset of the RX address that will start the newly mapped block - size_t mapOffset = ALIGN_DOWN(offset, Granularity()); - // Size of the block we will map - size_t mapSize = ALIGN_UP(offset - mapOffset + size, Granularity()); - void* pRW = VMToOSInterface::GetRWMapping(m_doubleMemoryMapperHandle, (BYTE*)pBlock->baseRX + mapOffset, pBlock->offset + mapOffset, mapSize); - - if (pRW == NULL) - { - g_fatalErrorHandler(COR_E_EXECUTIONENGINE, W("Failed to create RW mapping for RX memory")); - } + // Offset of the RX address in the originally allocated block + size_t offset = (size_t)pRX - (size_t)pBlock->baseRX; + // Offset of the RX address that will start the newly mapped block + size_t mapOffset = ALIGN_DOWN(offset, Granularity()); + // Size of the block we will map + size_t mapSize = ALIGN_UP(offset - mapOffset + size, Granularity()); + +#ifdef LOG_EXECUTABLE_ALLOCATOR_STATISTICS + StopWatch sw2(&g_mapCreateTimeSum); +#endif + void* pRW = VMToOSInterface::GetRWMapping(m_doubleMemoryMapperHandle, (BYTE*)pBlock->baseRX + mapOffset, pBlock->offset + mapOffset, mapSize); + + if (pRW == NULL) + { + g_fatalErrorHandler(COR_E_EXECUTIONENGINE, W("Failed to create RW mapping for RX memory")); + } AddRWBlock(pRW, (BYTE*)pBlock->baseRX + mapOffset, mapSize); @@ -720,6 +817,10 @@ void ExecutableAllocator::UnmapRW(void* pRW) { LIMITED_METHOD_CONTRACT; +#ifdef LOG_EXECUTABLE_ALLOCATOR_STATISTICS + StopWatch swAll(&g_unmapTimeWithLockSum); +#endif + if (!IsDoubleMappingEnabled()) { return; @@ -728,6 +829,10 @@ void ExecutableAllocator::UnmapRW(void* pRW) CRITSEC_Holder csh(m_CriticalSection); _ASSERTE(pRW != NULL); +#ifdef LOG_EXECUTABLE_ALLOCATOR_STATISTICS + StopWatch swNoLock(&g_unmapTimeSum); +#endif + void* unmapAddress = NULL; size_t unmapSize; diff --git a/src/coreclr/utilcode/loaderheap.cpp b/src/coreclr/utilcode/loaderheap.cpp index 51e39de70ecf9..7638031add7db 100644 --- a/src/coreclr/utilcode/loaderheap.cpp +++ b/src/coreclr/utilcode/loaderheap.cpp @@ -8,8 +8,6 @@ #define DONOT_DEFINE_ETW_CALLBACK #include "eventtracebase.h" -#define LHF_EXECUTABLE 0x1 - #ifndef DACCESS_COMPILE INDEBUG(DWORD UnlockedLoaderHeap::s_dwNumInstancesOfLoaderHeaps = 0;) @@ -728,15 +726,25 @@ struct LoaderHeapFreeBlock } #endif - void* pMemRW = pMem; - ExecutableWriterHolder memWriterHolder; - if (pHeap->IsExecutable()) +#ifdef DEBUG + if (!pHeap->IsInterleaved()) { - memWriterHolder = ExecutableWriterHolder(pMem, dwTotalSize); - pMemRW = memWriterHolder.GetRW(); + void* pMemRW = pMem; + ExecutableWriterHolderNoLog memWriterHolder; + if (pHeap->IsExecutable()) + { + memWriterHolder.AssignExecutableWriterHolder(pMem, dwTotalSize); + pMemRW = memWriterHolder.GetRW(); + } + + memset(pMemRW, 0xcc, dwTotalSize); } + else + { + memset((BYTE*)pMem + GetOsPageSize(), 0xcc, dwTotalSize); + } +#endif // DEBUG - INDEBUG(memset(pMemRW, 0xcc, dwTotalSize);) LoaderHeapFreeBlock *pNewBlock = new (nothrow) LoaderHeapFreeBlock; // If we fail allocating the LoaderHeapFreeBlock, ignore the failure and don't insert the free block at all. if (pNewBlock != NULL) @@ -793,10 +801,10 @@ struct LoaderHeapFreeBlock if (pResult) { void *pResultRW = pResult; - ExecutableWriterHolder resultWriterHolder; + ExecutableWriterHolderNoLog resultWriterHolder; if (pHeap->IsExecutable()) { - resultWriterHolder = ExecutableWriterHolder(pResult, dwSize); + resultWriterHolder.AssignExecutableWriterHolder(pResult, dwSize); pResultRW = resultWriterHolder.GetRW(); } // Callers of loaderheap assume allocated memory is zero-inited so we must preserve this invariant! @@ -828,10 +836,10 @@ struct LoaderHeapFreeBlock size_t dwCombinedSize = dwSize + pNextBlock->m_dwSize; LoaderHeapFreeBlock *pNextNextBlock = pNextBlock->m_pNext; void *pMemRW = pFreeBlock->m_pBlockAddress; - ExecutableWriterHolder memWriterHolder; + ExecutableWriterHolderNoLog memWriterHolder; if (pHeap->IsExecutable()) { - memWriterHolder = ExecutableWriterHolder(pFreeBlock->m_pBlockAddress, dwCombinedSize); + memWriterHolder.AssignExecutableWriterHolder(pFreeBlock->m_pBlockAddress, dwCombinedSize); pMemRW = memWriterHolder.GetRW(); } INDEBUG(memset(pMemRW, 0xcc, dwCombinedSize);) @@ -875,18 +883,23 @@ inline size_t AllocMem_TotalSize(size_t dwRequestedSize, UnlockedLoaderHeap *pHe LIMITED_METHOD_CONTRACT; size_t dwSize = dwRequestedSize; + + // Interleaved heap cannot ad any extra to the requested size + if (!pHeap->IsInterleaved()) + { #ifdef _DEBUG - dwSize += LOADER_HEAP_DEBUG_BOUNDARY; - dwSize = ((dwSize + ALLOC_ALIGN_CONSTANT) & (~ALLOC_ALIGN_CONSTANT)); + dwSize += LOADER_HEAP_DEBUG_BOUNDARY; + dwSize = ((dwSize + ALLOC_ALIGN_CONSTANT) & (~ALLOC_ALIGN_CONSTANT)); #endif - if (!pHeap->m_fExplicitControl) - { + if (!pHeap->m_fExplicitControl) + { #ifdef _DEBUG - dwSize += sizeof(LoaderHeapValidationTag); + dwSize += sizeof(LoaderHeapValidationTag); #endif + } + dwSize = ((dwSize + ALLOC_ALIGN_CONSTANT) & (~ALLOC_ALIGN_CONSTANT)); } - dwSize = ((dwSize + ALLOC_ALIGN_CONSTANT) & (~ALLOC_ALIGN_CONSTANT)); return dwSize; } @@ -919,7 +932,9 @@ UnlockedLoaderHeap::UnlockedLoaderHeap(DWORD dwReserveBlockSize, const BYTE* dwReservedRegionAddress, SIZE_T dwReservedRegionSize, RangeList *pRangeList, - BOOL fMakeExecutable) + HeapKind kind, + void (*codePageGenerator)(BYTE* pageBase, BYTE* pageBaseRX), + DWORD dwGranularity) { CONTRACTL { @@ -943,6 +958,8 @@ UnlockedLoaderHeap::UnlockedLoaderHeap(DWORD dwReserveBlockSize, // Round to VIRTUAL_ALLOC_RESERVE_GRANULARITY m_dwTotalAlloc = 0; + m_dwGranularity = dwGranularity; + #ifdef _DEBUG m_dwDebugWastedBytes = 0; s_dwNumInstancesOfLoaderHeaps++; @@ -952,10 +969,10 @@ UnlockedLoaderHeap::UnlockedLoaderHeap(DWORD dwReserveBlockSize, m_fStubUnwindInfoUnregistered= FALSE; #endif - m_Options = 0; + m_kind = kind; - if (fMakeExecutable) - m_Options |= LHF_EXECUTABLE; + _ASSERTE((kind != HeapKind::Interleaved) || (codePageGenerator != NULL)); + m_codePageGenerator = codePageGenerator; m_pFirstFreeBlock = NULL; @@ -1059,12 +1076,6 @@ size_t UnlockedLoaderHeap::GetBytesAvailReservedRegion() return 0; } -#define SETUP_NEW_BLOCK(pData, dwSizeToCommit, dwSizeToReserve) \ - m_pPtrToEndOfCommittedRegion = (BYTE *) (pData) + (dwSizeToCommit); \ - m_pAllocPtr = (BYTE *) (pData); \ - m_pEndReservedRegion = (BYTE *) (pData) + (dwSizeToReserve); - - #ifndef DACCESS_COMPILE void ReleaseReservedMemory(BYTE* value) @@ -1132,6 +1143,7 @@ BOOL UnlockedLoaderHeap::UnlockedReservePages(size_t dwSizeToCommit) pData = (BYTE *)ExecutableAllocator::Instance()->Reserve(dwSizeToReserve); if (pData == NULL) { + _ASSERTE(!"Unable to reserve memory range for a loaderheap"); return FALSE; } } @@ -1143,26 +1155,44 @@ BOOL UnlockedLoaderHeap::UnlockedReservePages(size_t dwSizeToCommit) // and notify the user to provide more reserved mem. _ASSERTE((dwSizeToCommit <= dwSizeToReserve) && "Loaderheap tried to commit more memory than reserved by user"); - if (pData == NULL) + if (!fReleaseMemory) { - //_ASSERTE(!"Unable to ClrVirtualAlloc reserve in a loaderheap"); - return FALSE; + pData.SuppressRelease(); } - if (!fReleaseMemory) + size_t dwSizeToCommitPart = dwSizeToCommit; + if (IsInterleaved()) { - pData.SuppressRelease(); + // For interleaved heaps, we perform two commits, each being half of the requested size + dwSizeToCommitPart /= 2; } // Commit first set of pages, since it will contain the LoaderHeapBlock - void *pTemp = ExecutableAllocator::Instance()->Commit(pData, dwSizeToCommit, (m_Options & LHF_EXECUTABLE)); + void *pTemp = ExecutableAllocator::Instance()->Commit(pData, dwSizeToCommitPart, IsExecutable()); if (pTemp == NULL) { - //_ASSERTE(!"Unable to ClrVirtualAlloc commit in a loaderheap"); + _ASSERTE(!"Unable to commit a loaderheap code page"); return FALSE; } + if (IsInterleaved()) + { + _ASSERTE(dwSizeToCommitPart == GetOsPageSize()); + + void *pTemp = ExecutableAllocator::Instance()->Commit((BYTE*)pData + dwSizeToCommitPart, dwSizeToCommitPart, FALSE); + if (pTemp == NULL) + { + _ASSERTE(!"Unable to commit a loaderheap data page"); + + return FALSE; + } + + ExecutableWriterHolder codePageWriterHolder(pData, GetOsPageSize()); + m_codePageGenerator(codePageWriterHolder.GetRW(), pData); + FlushInstructionCache(GetCurrentProcess(), pData, GetOsPageSize()); + } + // Record reserved range in range list, if one is specified // Do this AFTER the commit - otherwise we'll have bogus ranges included. if (m_pRangeList != NULL) @@ -1193,7 +1223,14 @@ BOOL UnlockedLoaderHeap::UnlockedReservePages(size_t dwSizeToCommit) // Add to the linked list m_pFirstBlock = pNewBlock; - SETUP_NEW_BLOCK(pData, dwSizeToCommit, dwSizeToReserve); + if (IsInterleaved()) + { + dwSizeToCommit /= 2; + } + + m_pPtrToEndOfCommittedRegion = (BYTE *) (pData) + (dwSizeToCommit); \ + m_pAllocPtr = (BYTE *) (pData); \ + m_pEndReservedRegion = (BYTE *) (pData) + (dwSizeToReserve); return TRUE; } @@ -1216,30 +1253,108 @@ BOOL UnlockedLoaderHeap::GetMoreCommittedPages(size_t dwMinSize) // If we have memory we can use, what are you doing here! _ASSERTE(dwMinSize > (SIZE_T)(m_pPtrToEndOfCommittedRegion - m_pAllocPtr)); + if (IsInterleaved()) + { + // This mode interleaves data and code pages 1:1. So the code size is required to be smaller than + // or equal to the page size to ensure that the code range is consecutive. + _ASSERTE(dwMinSize <= GetOsPageSize()); + // For interleaved heap, we always get two memory pages - one for code and one for data + dwMinSize = 2 * GetOsPageSize(); + } + // Does this fit in the reserved region? if (dwMinSize <= (size_t)(m_pEndReservedRegion - m_pAllocPtr)) { - SIZE_T dwSizeToCommit = (m_pAllocPtr + dwMinSize) - m_pPtrToEndOfCommittedRegion; + SIZE_T dwSizeToCommit; - if (dwSizeToCommit < m_dwCommitBlockSize) - dwSizeToCommit = min((SIZE_T)(m_pEndReservedRegion - m_pPtrToEndOfCommittedRegion), (SIZE_T)m_dwCommitBlockSize); + if (IsInterleaved()) + { + // For interleaved heaps, the allocation cannot cross page boundary since there are data and executable + // pages interleaved in a 1:1 fashion. + dwSizeToCommit = dwMinSize; + } + else + { + dwSizeToCommit = (m_pAllocPtr + dwMinSize) - m_pPtrToEndOfCommittedRegion; + } + + size_t unusedRemainder = (size_t)((BYTE*)m_pPtrToEndOfCommittedRegion - m_pAllocPtr); - // Round to page size - dwSizeToCommit = ALIGN_UP(dwSizeToCommit, GetOsPageSize()); + if (IsInterleaved()) + { + // The end of commited region for interleaved heaps points to the end of the executable + // page and the data pages goes right after that. So we skip the data page here. + m_pPtrToEndOfCommittedRegion += GetOsPageSize(); + } + else + { + if (dwSizeToCommit < m_dwCommitBlockSize) + dwSizeToCommit = min((SIZE_T)(m_pEndReservedRegion - m_pPtrToEndOfCommittedRegion), (SIZE_T)m_dwCommitBlockSize); + + // Round to page size + dwSizeToCommit = ALIGN_UP(dwSizeToCommit, GetOsPageSize()); + } + + size_t dwSizeToCommitPart = dwSizeToCommit; + if (IsInterleaved()) + { + // For interleaved heaps, we perform two commits, each being half of the requested size + dwSizeToCommitPart /= 2; + } // Yes, so commit the desired number of reserved pages - void *pData = ExecutableAllocator::Instance()->Commit(m_pPtrToEndOfCommittedRegion, dwSizeToCommit, (m_Options & LHF_EXECUTABLE)); + void *pData = ExecutableAllocator::Instance()->Commit(m_pPtrToEndOfCommittedRegion, dwSizeToCommitPart, IsExecutable()); if (pData == NULL) + { + _ASSERTE(!"Unable to commit a loaderheap page"); return FALSE; + } + + if (IsInterleaved()) + { + // Commit a data page after the code page + ExecutableAllocator::Instance()->Commit(m_pPtrToEndOfCommittedRegion + dwSizeToCommitPart, dwSizeToCommitPart, FALSE); + + ExecutableWriterHolder codePageWriterHolder((BYTE*)pData, GetOsPageSize()); + m_codePageGenerator(codePageWriterHolder.GetRW(), (BYTE*)pData); + FlushInstructionCache(GetCurrentProcess(), pData, GetOsPageSize()); + // If the remaning bytes are large enough to allocate data of the allocation granularity, add them to the free + // block list. + // Otherwise the remaining bytes that are available will be wasted. + if (unusedRemainder >= m_dwGranularity) + { + LoaderHeapFreeBlock::InsertFreeBlock(&m_pFirstFreeBlock, m_pAllocPtr, unusedRemainder, this); + } + else + { + INDEBUG(m_dwDebugWastedBytes += unusedRemainder;) + } + + // For interleaved heaps, further allocations will start from the newly committed page as they cannot + // cross page boundary. + m_pAllocPtr = (BYTE*)pData; + } + + m_pPtrToEndOfCommittedRegion += dwSizeToCommitPart; m_dwTotalAlloc += dwSizeToCommit; - m_pPtrToEndOfCommittedRegion += dwSizeToCommit; return TRUE; } - // Need to allocate a new set of reserved pages - INDEBUG(m_dwDebugWastedBytes += (size_t)(m_pPtrToEndOfCommittedRegion - m_pAllocPtr);) + // Need to allocate a new set of reserved pages that will be located likely at a nonconsecutive virtual address. + // If the remaning bytes are large enough to allocate data of the allocation granularity, add them to the free + // block list. + // Otherwise the remaining bytes that are available will be wasted. + size_t unusedRemainder = (size_t)(m_pPtrToEndOfCommittedRegion - m_pAllocPtr); + if (unusedRemainder >= AllocMem_TotalSize(m_dwGranularity, this)) + { + LoaderHeapFreeBlock::InsertFreeBlock(&m_pFirstFreeBlock, m_pAllocPtr, unusedRemainder, this); + } + else + { + INDEBUG(m_dwDebugWastedBytes += (size_t)(m_pPtrToEndOfCommittedRegion - m_pAllocPtr);) + } // Note, there are unused reserved pages at end of current region -can't do much about that // Provide dwMinSize here since UnlockedReservePages will round up the commit size again @@ -1321,7 +1436,7 @@ void *UnlockedLoaderHeap::UnlockedAllocMem_NoThrow(size_t dwSize INCONTRACT(_ASSERTE(!ARE_FAULTS_FORBIDDEN())); #ifdef RANDOMIZE_ALLOC - if (!m_fExplicitControl) + if (!m_fExplicitControl && !IsInterleaved()) dwSize += s_random.Next() % 256; #endif @@ -1346,10 +1461,10 @@ void *UnlockedLoaderHeap::UnlockedAllocMem_NoThrow(size_t dwSize { #ifdef _DEBUG BYTE *pAllocatedBytes = (BYTE*)pData; - ExecutableWriterHolder dataWriterHolder; - if (m_Options & LHF_EXECUTABLE) + ExecutableWriterHolderNoLog dataWriterHolder; + if (IsExecutable()) { - dataWriterHolder = ExecutableWriterHolder(pData, dwSize); + dataWriterHolder.AssignExecutableWriterHolder(pData, dwSize); pAllocatedBytes = (BYTE *)dataWriterHolder.GetRW(); } @@ -1363,7 +1478,7 @@ void *UnlockedLoaderHeap::UnlockedAllocMem_NoThrow(size_t dwSize "LoaderHeap must return zero-initialized memory"); } - if (!m_fExplicitControl) + if (!m_fExplicitControl && !IsInterleaved()) { LoaderHeapValidationTag *pTag = AllocMem_GetTag(pAllocatedBytes, dwRequestedSize); pTag->m_allocationType = kAllocMem; @@ -1425,6 +1540,7 @@ void UnlockedLoaderHeap::UnlockedBackoutMem(void *pMem, } #ifdef _DEBUG + if (!IsInterleaved()) { DEBUG_ONLY_REGION(); @@ -1511,7 +1627,7 @@ void UnlockedLoaderHeap::UnlockedBackoutMem(void *pMem, size_t dwSize = AllocMem_TotalSize(dwRequestedSize, this); #ifdef _DEBUG - if (m_dwDebugFlags & kCallTracing) + if ((m_dwDebugFlags & kCallTracing) && !IsInterleaved()) { DEBUG_ONLY_REGION(); @@ -1533,17 +1649,25 @@ void UnlockedLoaderHeap::UnlockedBackoutMem(void *pMem, if (m_pAllocPtr == ( ((BYTE*)pMem) + dwSize )) { - void *pMemRW = pMem; - ExecutableWriterHolder memWriterHolder; - if (m_Options & LHF_EXECUTABLE) + if (IsInterleaved()) { - memWriterHolder = ExecutableWriterHolder(pMem, dwSize); - pMemRW = memWriterHolder.GetRW(); + // Clear the RW page + memset((BYTE*)pMem + GetOsPageSize(), 0x00, dwSize); // Fill freed region with 0 } + else + { + void *pMemRW = pMem; + ExecutableWriterHolderNoLog memWriterHolder; + if (IsExecutable()) + { + memWriterHolder.AssignExecutableWriterHolder(pMem, dwSize); + pMemRW = memWriterHolder.GetRW(); + } - // Cool. This was the last block allocated. We can just undo the allocation instead - // of going to the freelist. - memset(pMemRW, 0x00, dwSize); // Fill freed region with 0 + // Cool. This was the last block allocated. We can just undo the allocation instead + // of going to the freelist. + memset(pMemRW, 0x00, dwSize); // Fill freed region with 0 + } m_pAllocPtr = (BYTE*)pMem; } else @@ -1588,6 +1712,7 @@ void *UnlockedLoaderHeap::UnlockedAllocAlignedMem_NoThrow(size_t dwRequestedSiz PRECONDITION( alignment != 0 ); PRECONDITION(0 == (alignment & (alignment - 1))); // require power of 2 + PRECONDITION((dwRequestedSize % m_dwGranularity) == 0); POSTCONDITION( (RETVAL) ? (0 == ( ((UINT_PTR)(RETVAL)) & (alignment - 1))) : // If non-null, pointer must be aligned (pdwExtra == NULL || 0 == *pdwExtra) // or else *pdwExtra must be set to 0 @@ -1632,6 +1757,11 @@ void *UnlockedLoaderHeap::UnlockedAllocAlignedMem_NoThrow(size_t dwRequestedSiz pResult = m_pAllocPtr; size_t extra = alignment - ((size_t)pResult & ((size_t)alignment - 1)); + if ((IsInterleaved())) + { + _ASSERTE(alignment == 1); + extra = 0; + } // On DEBUG, we force a non-zero extra so people don't forget to adjust for it on backout #ifndef _DEBUG @@ -1655,10 +1785,10 @@ void *UnlockedLoaderHeap::UnlockedAllocAlignedMem_NoThrow(size_t dwRequestedSiz #ifdef _DEBUG BYTE *pAllocatedBytes = (BYTE *)pResult; - ExecutableWriterHolder resultWriterHolder; - if (m_Options & LHF_EXECUTABLE) + ExecutableWriterHolderNoLog resultWriterHolder; + if (IsExecutable()) { - resultWriterHolder = ExecutableWriterHolder(pResult, dwSize - extra); + resultWriterHolder.AssignExecutableWriterHolder(pResult, dwSize - extra); pAllocatedBytes = (BYTE *)resultWriterHolder.GetRW(); } @@ -1667,7 +1797,7 @@ void *UnlockedLoaderHeap::UnlockedAllocAlignedMem_NoThrow(size_t dwRequestedSiz memset(pAllocatedBytes + dwRequestedSize, 0xee, LOADER_HEAP_DEBUG_BOUNDARY); #endif - if (dwRequestedSize != 0) + if (dwRequestedSize != 0 && !IsInterleaved()) { _ASSERTE_MSG(pAllocatedBytes[0] == 0 && memcmp(pAllocatedBytes, pAllocatedBytes + 1, dwRequestedSize - 1) == 0, "LoaderHeap must return zero-initialized memory"); @@ -1689,7 +1819,7 @@ void *UnlockedLoaderHeap::UnlockedAllocAlignedMem_NoThrow(size_t dwRequestedSiz EtwAllocRequest(this, pResult, dwSize); - if (!m_fExplicitControl) + if (!m_fExplicitControl && !IsInterleaved()) { LoaderHeapValidationTag *pTag = AllocMem_GetTag(pAllocatedBytes - extra, dwRequestedSize + extra); pTag->m_allocationType = kAllocMem; @@ -1789,7 +1919,12 @@ void *UnlockedLoaderHeap::UnlockedAllocMemForCode_NoThrow(size_t dwHeaderSize, s BOOL UnlockedLoaderHeap::IsExecutable() { - return (m_Options & LHF_EXECUTABLE); + return (m_kind == HeapKind::Executable) || IsInterleaved(); +} + +BOOL UnlockedLoaderHeap::IsInterleaved() +{ + return m_kind == HeapKind::Interleaved; } #ifdef DACCESS_COMPILE @@ -2081,7 +2216,7 @@ void LoaderHeapSniffer::ValidateFreeList(UnlockedLoaderHeap *pHeap) ( ((UINT_PTR)pProbeThis) - ((UINT_PTR)(pPrevEvent->m_pMem)) + pPrevEvent->m_dwSize ) < 1024) { message.AppendASCII("\nThis block is located close to the corruption point. "); - if (pPrevEvent->QuietValidate()) + if (!pHeap->IsInterleaved() && pPrevEvent->QuietValidate()) { message.AppendASCII("If it was overrun, it might have caused this."); } diff --git a/src/coreclr/vm/CMakeLists.txt b/src/coreclr/vm/CMakeLists.txt index 08a5bb92a66ed..b9147ba7ca93f 100644 --- a/src/coreclr/vm/CMakeLists.txt +++ b/src/coreclr/vm/CMakeLists.txt @@ -665,6 +665,7 @@ if(CLR_CMAKE_TARGET_ARCH_AMD64) ${ARCH_SOURCES_DIR}/PInvokeStubs.asm ${ARCH_SOURCES_DIR}/RedirectedHandledJITCase.asm ${ARCH_SOURCES_DIR}/ThePreStubAMD64.asm + ${ARCH_SOURCES_DIR}/thunktemplates.asm ${ARCH_SOURCES_DIR}/Context.asm ${ARCH_SOURCES_DIR}/ExternalMethodFixupThunk.asm ${ARCH_SOURCES_DIR}/UMThunkStub.asm @@ -681,6 +682,7 @@ elseif(CLR_CMAKE_TARGET_ARCH_I386) ${ARCH_SOURCES_DIR}/gmsasm.asm ${ARCH_SOURCES_DIR}/jithelp.asm ${ARCH_SOURCES_DIR}/PInvokeStubs.asm + ${ARCH_SOURCES_DIR}/thunktemplates.asm ) set(VM_HEADERS_WKS_ARCH_ASM @@ -693,6 +695,7 @@ elseif(CLR_CMAKE_TARGET_ARCH_ARM) ${ARCH_SOURCES_DIR}/ehhelpers.asm ${ARCH_SOURCES_DIR}/patchedcode.asm ${ARCH_SOURCES_DIR}/PInvokeStubs.asm + ${ARCH_SOURCES_DIR}/thunktemplates.asm ) set(VM_HEADERS_WKS_ARCH_ASM @@ -704,6 +707,7 @@ elseif(CLR_CMAKE_TARGET_ARCH_ARM64) ${ARCH_SOURCES_DIR}/CallDescrWorkerARM64.asm ${ARCH_SOURCES_DIR}/CrtHelpers.asm ${ARCH_SOURCES_DIR}/PInvokeStubs.asm + ${ARCH_SOURCES_DIR}/thunktemplates.asm ) set(VM_HEADERS_WKS_ARCH_ASM @@ -726,6 +730,7 @@ else(CLR_CMAKE_TARGET_WIN32) ${ARCH_SOURCES_DIR}/jithelpers_slow.S ${ARCH_SOURCES_DIR}/pinvokestubs.S ${ARCH_SOURCES_DIR}/theprestubamd64.S + ${ARCH_SOURCES_DIR}/thunktemplates.S ${ARCH_SOURCES_DIR}/unixasmhelpers.S ${ARCH_SOURCES_DIR}/umthunkstub.S ${ARCH_SOURCES_DIR}/virtualcallstubamd64.S @@ -738,7 +743,8 @@ else(CLR_CMAKE_TARGET_WIN32) ${ARCH_SOURCES_DIR}/gmsasm.S ${ARCH_SOURCES_DIR}/pinvokestubs.S ${ARCH_SOURCES_DIR}/umthunkstub.S - ) + ${ARCH_SOURCES_DIR}/thunktemplates.S + ) elseif(CLR_CMAKE_TARGET_ARCH_ARM) set(VM_SOURCES_WKS_ARCH_ASM ${ARCH_SOURCES_DIR}/asmhelpers.S @@ -746,6 +752,7 @@ else(CLR_CMAKE_TARGET_WIN32) ${ARCH_SOURCES_DIR}/ehhelpers.S ${ARCH_SOURCES_DIR}/patchedcode.S ${ARCH_SOURCES_DIR}/pinvokestubs.S + ${ARCH_SOURCES_DIR}/thunktemplates.S ) elseif(CLR_CMAKE_TARGET_ARCH_ARM64) set(VM_SOURCES_WKS_ARCH_ASM @@ -753,6 +760,7 @@ else(CLR_CMAKE_TARGET_WIN32) ${ARCH_SOURCES_DIR}/calldescrworkerarm64.S ${ARCH_SOURCES_DIR}/crthelpers.S ${ARCH_SOURCES_DIR}/pinvokestubs.S + ${ARCH_SOURCES_DIR}/thunktemplates.S ) endif() diff --git a/src/coreclr/vm/amd64/AsmHelpers.asm b/src/coreclr/vm/amd64/AsmHelpers.asm index 0fd77a277f58b..90b3dc62faefa 100644 --- a/src/coreclr/vm/amd64/AsmHelpers.asm +++ b/src/coreclr/vm/amd64/AsmHelpers.asm @@ -239,30 +239,6 @@ NESTED_ENTRY JIT_RareDisableHelper, _TEXT NESTED_END JIT_RareDisableHelper, _TEXT -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; -;; PrecodeFixupThunk -;; -;; The call in fixup precode initally points to this function. -;; The pupose of this function is to load the MethodDesc and forward the call the prestub. -;; -; EXTERN_C VOID __stdcall PrecodeFixupThunk(); -LEAF_ENTRY PrecodeFixupThunk, _TEXT - - pop rax ; Pop the return address. It points right after the call instruction in the precode. - - ; Inline computation done by FixupPrecode::GetMethodDesc() - movzx r10,byte ptr [rax+2] ; m_PrecodeChunkIndex - movzx r11,byte ptr [rax+1] ; m_MethodDescChunkIndex - mov rax,qword ptr [rax+r10*8+3] - lea METHODDESC_REGISTER,[rax+r11*8] - - ; Tail call to prestub - jmp ThePreStub - -LEAF_END PrecodeFixupThunk, _TEXT - - ; extern "C" void setFPReturn(int fpSize, INT64 retVal); LEAF_ENTRY setFPReturn, _TEXT cmp ecx, 4 @@ -721,13 +697,7 @@ ifdef FEATURE_TIERED_COMPILATION extern OnCallCountThresholdReached:proc -LEAF_ENTRY OnCallCountThresholdReachedStub, _TEXT - ; Pop the return address (the stub-identifying token) into a non-argument volatile register that can be trashed - pop rax - jmp OnCallCountThresholdReachedStub2 -LEAF_END OnCallCountThresholdReachedStub, _TEXT - -NESTED_ENTRY OnCallCountThresholdReachedStub2, _TEXT +NESTED_ENTRY OnCallCountThresholdReachedStub, _TEXT PROLOG_WITH_TRANSITION_BLOCK lea rcx, [rsp + __PWTB_TransitionBlock] ; TransitionBlock * @@ -736,7 +706,7 @@ NESTED_ENTRY OnCallCountThresholdReachedStub2, _TEXT EPILOG_WITH_TRANSITION_BLOCK_TAILCALL TAILJMP_RAX -NESTED_END OnCallCountThresholdReachedStub2, _TEXT +NESTED_END OnCallCountThresholdReachedStub, _TEXT endif ; FEATURE_TIERED_COMPILATION diff --git a/src/coreclr/vm/amd64/asmconstants.h b/src/coreclr/vm/amd64/asmconstants.h index 9d7d3159842b1..2afddae98a4d3 100644 --- a/src/coreclr/vm/amd64/asmconstants.h +++ b/src/coreclr/vm/amd64/asmconstants.h @@ -566,6 +566,30 @@ ASMCONSTANTS_C_ASSERT(CallDescrData__returnValue == offsetof(CallDescrD ASMCONSTANTS_C_ASSERT(OFFSETOF__TransitionBlock__m_argumentRegisters == offsetof(TransitionBlock, m_argumentRegisters)) #endif // UNIX_AMD64_ABI +#define FixupPrecodeData__Target 0x00 +ASMCONSTANTS_C_ASSERT(FixupPrecodeData__Target == offsetof(FixupPrecodeData, Target)) + +#define FixupPrecodeData__MethodDesc 0x08 +ASMCONSTANTS_C_ASSERT(FixupPrecodeData__MethodDesc == offsetof(FixupPrecodeData, MethodDesc)) + +#define FixupPrecodeData__PrecodeFixupThunk 0x10 +ASMCONSTANTS_C_ASSERT(FixupPrecodeData__PrecodeFixupThunk == offsetof(FixupPrecodeData, PrecodeFixupThunk)) + +#define StubPrecodeData__Target 0x08 +ASMCONSTANTS_C_ASSERT(StubPrecodeData__Target == offsetof(StubPrecodeData, Target)) + +#define StubPrecodeData__MethodDesc 0x00 +ASMCONSTANTS_C_ASSERT(StubPrecodeData__MethodDesc == offsetof(StubPrecodeData, MethodDesc)) + +#define CallCountingStubData__RemainingCallCountCell 0x00 +ASMCONSTANTS_C_ASSERT(CallCountingStubData__RemainingCallCountCell == offsetof(CallCountingStubData, RemainingCallCountCell)) + +#define CallCountingStubData__TargetForMethod 0x08 +ASMCONSTANTS_C_ASSERT(CallCountingStubData__TargetForMethod == offsetof(CallCountingStubData, TargetForMethod)) + +#define CallCountingStubData__TargetForThresholdReached 0x10 +ASMCONSTANTS_C_ASSERT(CallCountingStubData__TargetForThresholdReached == offsetof(CallCountingStubData, TargetForThresholdReached)) + #undef ASMCONSTANTS_RUNTIME_ASSERT #undef ASMCONSTANTS_C_ASSERT #ifndef UNIX_AMD64_ABI diff --git a/src/coreclr/vm/amd64/cgenamd64.cpp b/src/coreclr/vm/amd64/cgenamd64.cpp index 86c74f6e7d344..1e2e3f64b460a 100644 --- a/src/coreclr/vm/amd64/cgenamd64.cpp +++ b/src/coreclr/vm/amd64/cgenamd64.cpp @@ -690,64 +690,6 @@ INT32 rel32UsingPreallocatedJumpStub(INT32 UNALIGNED * pRel32, PCODE target, PCO _ASSERTE(FitsInI4(offset)); return static_cast(offset); } - -BOOL DoesSlotCallPrestub(PCODE pCode) -{ - CONTRACTL { - NOTHROW; - GC_NOTRIGGER; - PRECONDITION(pCode != GetPreStubEntryPoint()); - } CONTRACTL_END; - - // AMD64 has the following possible sequences for prestub logic: - // 1. slot -> temporary entrypoint -> prestub - // 2. slot -> precode -> prestub - // 3. slot -> precode -> jumprel64 (jump stub) -> prestub - // 4. slot -> precode -> jumprel64 (NGEN case) -> prestub - -#ifdef HAS_COMPACT_ENTRYPOINTS - if (MethodDescChunk::GetMethodDescFromCompactEntryPoint(pCode, TRUE) != NULL) - { - return TRUE; - } -#endif - - if (!IS_ALIGNED(pCode, PRECODE_ALIGNMENT)) - { - return FALSE; - } - -#ifdef HAS_FIXUP_PRECODE - if (*PTR_BYTE(pCode) == X86_INSTR_CALL_REL32) - { - // Note that call could have been patched to jmp in the meantime - pCode = rel32Decode(pCode+1); - - // JumpStub - if (isJumpRel64(pCode)) { - pCode = decodeJump64(pCode); - } - - return pCode == (TADDR)PrecodeFixupThunk; - } -#endif - - if (*PTR_USHORT(pCode) != X86_INSTR_MOV_R10_IMM64 || // mov rax,XXXX - *PTR_BYTE(pCode+10) != X86_INSTR_NOP || // nop - *PTR_BYTE(pCode+11) != X86_INSTR_JMP_REL32) // jmp rel32 - { - return FALSE; - } - pCode = rel32Decode(pCode+12); - - // JumpStub - if (isJumpRel64(pCode)) { - pCode = decodeJump64(pCode); - } - - return pCode == GetPreStubEntryPoint(); -} - // // Some AMD64 assembly functions have one or more DWORDS at the end of the function // that specify the offsets where significant instructions are diff --git a/src/coreclr/vm/amd64/cgencpu.h b/src/coreclr/vm/amd64/cgencpu.h index 33589c27bae39..d562627856295 100644 --- a/src/coreclr/vm/amd64/cgencpu.h +++ b/src/coreclr/vm/amd64/cgencpu.h @@ -53,8 +53,6 @@ EXTERN_C void FastCallFinalizeWorker(Object *obj, PCODE funcPtr); #define HAS_NDIRECT_IMPORT_PRECODE 1 #define HAS_FIXUP_PRECODE 1 -#define HAS_FIXUP_PRECODE_CHUNKS 1 -#define FIXUP_PRECODE_PREALLOCATE_DYNAMIC_METHOD_JUMP_STUBS 1 // ThisPtrRetBufPrecode one is necessary for closed delegates over static methods with return buffer #define HAS_THISPTR_RETBUF_PRECODE 1 @@ -513,334 +511,5 @@ inline BOOL ClrFlushInstructionCache(LPCVOID pCodeAddr, size_t sizeOfCode) #define JIT_GetSharedNonGCStaticBaseNoCtor JIT_GetSharedNonGCStaticBaseNoCtor_SingleAppDomain -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// Call counting - -#ifdef FEATURE_TIERED_COMPILATION - -#define DISABLE_COPY(T) \ - T(const T &) = delete; \ - T &operator =(const T &) = delete - -typedef UINT16 CallCount; -typedef DPTR(CallCount) PTR_CallCount; - -//////////////////////////////////////////////////////////////// -// CallCountingStub - -class CallCountingStub; -typedef DPTR(const CallCountingStub) PTR_CallCountingStub; - -class CallCountingStub -{ -public: - static const SIZE_T Alignment = sizeof(void *); - -#ifndef DACCESS_COMPILE -protected: - static const PCODE TargetForThresholdReached; - - CallCountingStub() = default; - -public: - static const CallCountingStub *From(TADDR stubIdentifyingToken); - - PCODE GetEntryPoint() const - { - WRAPPER_NO_CONTRACT; - return PINSTRToPCODE((TADDR)this); - } -#endif // !DACCESS_COMPILE - -public: - PTR_CallCount GetRemainingCallCountCell() const; - PCODE GetTargetForMethod() const; - -#ifndef DACCESS_COMPILE -protected: - template static INT_PTR GetRelativeOffset(const T *relRef, PCODE target) - { - WRAPPER_NO_CONTRACT; - static_assert_no_msg(sizeof(T) != 0); - static_assert_no_msg(sizeof(T) <= sizeof(void *)); - static_assert_no_msg((sizeof(T) & (sizeof(T) - 1)) == 0); // is a power of 2 - _ASSERTE(relRef != nullptr); - - TADDR targetAddress = PCODEToPINSTR(target); - _ASSERTE(targetAddress != NULL); - return (INT_PTR)targetAddress - (INT_PTR)(relRef + 1); - } -#endif - -protected: - template static PCODE GetTarget(const T *relRef) - { - WRAPPER_NO_CONTRACT; - static_assert_no_msg(sizeof(T) == 1 || sizeof(T) == 2 || sizeof(T) == 4 || sizeof(T) == 8); - _ASSERTE(relRef != nullptr); - - return PINSTRToPCODE((INT_PTR)(relRef + 1) + *relRef); - } - - DISABLE_COPY(CallCountingStub); -}; - -//////////////////////////////////////////////////////////////// -// CallCountingStubShort - -class CallCountingStubShort; -typedef DPTR(const CallCountingStubShort) PTR_CallCountingStubShort; -class CallCountingStubLong; -typedef DPTR(const CallCountingStubLong) PTR_CallCountingStubLong; - -#pragma pack(push, 1) -class CallCountingStubShort : public CallCountingStub -{ -private: - const UINT8 m_part0[2]; - CallCount *const m_remainingCallCountCell; - const UINT8 m_part1[5]; - const INT32 m_rel32TargetForMethod; - const UINT8 m_part2[1]; - const INT32 m_rel32TargetForThresholdReached; - const UINT8 m_alignmentPadding[0]; - -#ifndef DACCESS_COMPILE -public: - CallCountingStubShort(CallCountingStubShort* stubRX, CallCount *remainingCallCountCell, PCODE targetForMethod) - : m_part0{ 0x48, 0xb8}, // mov rax, - m_remainingCallCountCell(remainingCallCountCell), // - m_part1{ 0x66, 0xff, 0x08, // dec word ptr [rax] - 0x0f, 0x85}, // jnz - m_rel32TargetForMethod( // - GetRelative32BitOffset( - &stubRX->m_rel32TargetForMethod, - targetForMethod)), - m_part2{ 0xe8}, // call - m_rel32TargetForThresholdReached( // - GetRelative32BitOffset( - &stubRX->m_rel32TargetForThresholdReached, - TargetForThresholdReached)), - // (rip == stub-identifying token) - m_alignmentPadding{} - { - WRAPPER_NO_CONTRACT; - static_assert_no_msg(sizeof(CallCountingStubShort) % Alignment == 0); - _ASSERTE(remainingCallCountCell != nullptr); - _ASSERTE(PCODEToPINSTR(targetForMethod) != NULL); - } - - static bool Is(TADDR stubIdentifyingToken) - { - WRAPPER_NO_CONTRACT; - static_assert_no_msg((offsetof(CallCountingStubShort, m_alignmentPadding[0]) & 1) == 0); - - return (stubIdentifyingToken & 1) == 0; - } - - static const CallCountingStubShort *From(TADDR stubIdentifyingToken) - { - WRAPPER_NO_CONTRACT; - _ASSERTE(Is(stubIdentifyingToken)); - _ASSERTE(stubIdentifyingToken % Alignment == offsetof(CallCountingStubShort, m_alignmentPadding[0]) % Alignment); - - const CallCountingStubShort *stub = - (const CallCountingStubShort *)(stubIdentifyingToken - offsetof(CallCountingStubShort, m_alignmentPadding[0])); - _ASSERTE(IS_ALIGNED(stub, Alignment)); - return stub; - } -#endif // !DACCESS_COMPILE - -public: - static bool Is(PTR_CallCountingStub callCountingStub) - { - WRAPPER_NO_CONTRACT; - return dac_cast(callCountingStub)->m_part1[4] == 0x85; - } - - static PTR_CallCountingStubShort From(PTR_CallCountingStub callCountingStub) - { - WRAPPER_NO_CONTRACT; - _ASSERTE(Is(callCountingStub)); - - return dac_cast(callCountingStub); - } - - PCODE GetTargetForMethod() const - { - WRAPPER_NO_CONTRACT; - return GetTarget(&m_rel32TargetForMethod); - } - -#ifndef DACCESS_COMPILE -private: - static bool CanUseRelative32BitOffset(const INT32 *rel32Ref, PCODE target) - { - WRAPPER_NO_CONTRACT; - - INT_PTR relativeOffset = GetRelativeOffset(rel32Ref, target); - return (INT32)relativeOffset == relativeOffset; - } - -public: - static bool CanUseFor(const void *allocationAddress, PCODE targetForMethod) - { - WRAPPER_NO_CONTRACT; - - const CallCountingStubShort *fakeStub = (const CallCountingStubShort *)allocationAddress; - return - CanUseRelative32BitOffset(&fakeStub->m_rel32TargetForMethod, targetForMethod) && - CanUseRelative32BitOffset(&fakeStub->m_rel32TargetForThresholdReached, TargetForThresholdReached); - } - -private: - static INT32 GetRelative32BitOffset(const INT32 *rel32Ref, PCODE target) - { - WRAPPER_NO_CONTRACT; - - INT_PTR relativeOffset = GetRelativeOffset(rel32Ref, target); - _ASSERTE((INT32)relativeOffset == relativeOffset); - return (INT32)relativeOffset; - } -#endif // !DACCESS_COMPILE - - friend CallCountingStub; - friend CallCountingStubLong; - DISABLE_COPY(CallCountingStubShort); -}; -#pragma pack(pop) - -//////////////////////////////////////////////////////////////// -// CallCountingStubLong - -#pragma pack(push, 1) -class CallCountingStubLong : public CallCountingStub -{ -private: - const UINT8 m_part0[2]; - CallCount *const m_remainingCallCountCell; - const UINT8 m_part1[7]; - const PCODE m_targetForMethod; - const UINT8 m_part2[4]; - const PCODE m_targetForThresholdReached; - const UINT8 m_part3[2]; - const UINT8 m_alignmentPadding[1]; - -#ifndef DACCESS_COMPILE -public: - CallCountingStubLong(CallCount *remainingCallCountCell, PCODE targetForMethod) - : m_part0{ 0x48, 0xb8}, // mov rax, - m_remainingCallCountCell(remainingCallCountCell), // - m_part1{ 0x66, 0xff, 0x08, // dec word ptr [rax] - 0x74, 0x0c, // jz L0 - 0x48, 0xb8}, // mov rax, - m_targetForMethod(targetForMethod), // - m_part2{ 0xff, 0xe0, // jmp rax - 0x48, 0xb8}, // L0: mov rax, - m_targetForThresholdReached(TargetForThresholdReached), // - m_part3{ 0xff, 0xd0}, // call rax - // (rip == stub-identifying token) - m_alignmentPadding{ 0xcc} // int 3 - { - WRAPPER_NO_CONTRACT; - static_assert_no_msg(sizeof(CallCountingStubLong) % Alignment == 0); - static_assert_no_msg(sizeof(CallCountingStubLong) > sizeof(CallCountingStubShort)); - _ASSERTE(remainingCallCountCell != nullptr); - _ASSERTE(PCODEToPINSTR(targetForMethod) != NULL); - } - - static bool Is(TADDR stubIdentifyingToken) - { - WRAPPER_NO_CONTRACT; - static_assert_no_msg((offsetof(CallCountingStubLong, m_alignmentPadding[0]) & 1) != 0); - - return (stubIdentifyingToken & 1) != 0; - } - - static const CallCountingStubLong *From(TADDR stubIdentifyingToken) - { - WRAPPER_NO_CONTRACT; - _ASSERTE(Is(stubIdentifyingToken)); - _ASSERTE(stubIdentifyingToken % Alignment == offsetof(CallCountingStubLong, m_alignmentPadding[0]) % Alignment); - - const CallCountingStubLong *stub = - (const CallCountingStubLong *)(stubIdentifyingToken - offsetof(CallCountingStubLong, m_alignmentPadding[0])); - _ASSERTE(IS_ALIGNED(stub, Alignment)); - return stub; - } -#endif // !DACCESS_COMPILE - -public: - static bool Is(PTR_CallCountingStub callCountingStub) - { - WRAPPER_NO_CONTRACT; - static_assert_no_msg(offsetof(CallCountingStubShort, m_part1[4]) == offsetof(CallCountingStubLong, m_part1[4])); - static_assert_no_msg(sizeof(CallCountingStubShort::m_part1[4]) == sizeof(CallCountingStubLong::m_part1[4])); - - return dac_cast(callCountingStub)->m_part1[4] == 0x0c; - } - - static PTR_CallCountingStubLong From(PTR_CallCountingStub callCountingStub) - { - WRAPPER_NO_CONTRACT; - _ASSERTE(Is(callCountingStub)); - - return dac_cast(callCountingStub); - } - - PCODE GetTargetForMethod() const - { - WRAPPER_NO_CONTRACT; - return m_targetForMethod; - } - - friend CallCountingStub; - DISABLE_COPY(CallCountingStubLong); -}; -#pragma pack(pop) - -//////////////////////////////////////////////////////////////// -// CallCountingStub definitions - -#ifndef DACCESS_COMPILE -inline const CallCountingStub *CallCountingStub::From(TADDR stubIdentifyingToken) -{ - WRAPPER_NO_CONTRACT; - _ASSERTE(stubIdentifyingToken != NULL); - - return - CallCountingStubShort::Is(stubIdentifyingToken) - ? (const CallCountingStub *)CallCountingStubShort::From(stubIdentifyingToken) - : (const CallCountingStub *)CallCountingStubLong::From(stubIdentifyingToken); -} -#endif - -inline PTR_CallCount CallCountingStub::GetRemainingCallCountCell() const -{ - WRAPPER_NO_CONTRACT; - static_assert_no_msg( - offsetof(CallCountingStubShort, m_remainingCallCountCell) == - offsetof(CallCountingStubLong, m_remainingCallCountCell)); - - return PTR_CallCount(dac_cast(this)->m_remainingCallCountCell); -} - -inline PCODE CallCountingStub::GetTargetForMethod() const -{ - WRAPPER_NO_CONTRACT; - - return - CallCountingStubShort::Is(PTR_CallCountingStub(this)) - ? CallCountingStubShort::From(PTR_CallCountingStub(this))->GetTargetForMethod() - : CallCountingStubLong::From(PTR_CallCountingStub(this))->GetTargetForMethod(); -} - -//////////////////////////////////////////////////////////////// - -#undef DISABLE_COPY - -#endif // FEATURE_TIERED_COMPILATION - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// #endif // __cgencpu_h__ diff --git a/src/coreclr/vm/amd64/theprestubamd64.S b/src/coreclr/vm/amd64/theprestubamd64.S index 82ddc075de6fe..dd02f70780e2f 100644 --- a/src/coreclr/vm/amd64/theprestubamd64.S +++ b/src/coreclr/vm/amd64/theprestubamd64.S @@ -26,4 +26,3 @@ LEAF_ENTRY ThePreStubPatch, _TEXT PATCH_LABEL ThePreStubPatchLabel ret LEAF_END ThePreStubPatch, _TEXT - diff --git a/src/coreclr/vm/amd64/thunktemplates.S b/src/coreclr/vm/amd64/thunktemplates.S new file mode 100644 index 0000000000000..11d417cb3b971 --- /dev/null +++ b/src/coreclr/vm/amd64/thunktemplates.S @@ -0,0 +1,31 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +.intel_syntax noprefix +#include "unixasmmacros.inc" +#include "asmconstants.h" + +PAGE_SIZE = 4096 + +#define DATA_SLOT(stub, field) C_FUNC(stub##Code) + PAGE_SIZE + stub##Data__##field + +LEAF_ENTRY StubPrecodeCode, _TEXT + mov r10, [rip + DATA_SLOT(StubPrecode, MethodDesc)] + jmp [rip + DATA_SLOT(StubPrecode, Target)] +LEAF_END_MARKED StubPrecodeCode, _TEXT + +LEAF_ENTRY FixupPrecodeCode, _TEXT + jmp [rip + DATA_SLOT(FixupPrecode, Target)] +PATCH_LABEL FixupPrecodeCode_Fixup + mov r10, [rip + DATA_SLOT(FixupPrecode, MethodDesc)] + jmp [rip + DATA_SLOT(FixupPrecode, PrecodeFixupThunk)] +LEAF_END_MARKED FixupPrecodeCode, _TEXT + +LEAF_ENTRY CallCountingStubCode, _TEXT + mov rax,QWORD PTR [rip + DATA_SLOT(CallCountingStub, RemainingCallCountCell)] + dec WORD PTR [rax] + je LOCAL_LABEL(CountReachedZero) + jmp QWORD PTR [rip + DATA_SLOT(CallCountingStub, TargetForMethod)] + LOCAL_LABEL(CountReachedZero): + jmp QWORD PTR [rip + DATA_SLOT(CallCountingStub, TargetForThresholdReached)] +LEAF_END_MARKED CallCountingStubCode, _TEXT diff --git a/src/coreclr/vm/amd64/thunktemplates.asm b/src/coreclr/vm/amd64/thunktemplates.asm new file mode 100644 index 0000000000000..af3d03135619e --- /dev/null +++ b/src/coreclr/vm/amd64/thunktemplates.asm @@ -0,0 +1,34 @@ +; Licensed to the .NET Foundation under one or more agreements. +; The .NET Foundation licenses this file to you under the MIT license. + +include +include AsmConstants.inc + +PAGE_SIZE = 4096 + +DATA_SLOT macro stub, field + exitm @CatStr(stub, , stub, , field) +endm + +LEAF_ENTRY StubPrecodeCode, _TEXT + mov r10, QWORD PTR [DATA_SLOT(StubPrecode, MethodDesc)] + jmp QWORD PTR [DATA_SLOT(StubPrecode, Target)] +LEAF_END_MARKED StubPrecodeCode, _TEXT + +LEAF_ENTRY FixupPrecodeCode, _TEXT + jmp QWORD PTR [DATA_SLOT(FixupPrecode, Target)] +PATCH_LABEL FixupPrecodeCode_Fixup + mov r10, QWORD PTR [DATA_SLOT(FixupPrecode, MethodDesc)] + jmp QWORD PTR [DATA_SLOT(FixupPrecode, PrecodeFixupThunk)] +LEAF_END_MARKED FixupPrecodeCode, _TEXT + +LEAF_ENTRY CallCountingStubCode, _TEXT + mov rax,QWORD PTR [DATA_SLOT(CallCountingStub, RemainingCallCountCell)] + dec WORD PTR [rax] + je CountReachedZero + jmp QWORD PTR [DATA_SLOT(CallCountingStub, TargetForMethod)] + CountReachedZero: + jmp QWORD PTR [DATA_SLOT(CallCountingStub, TargetForThresholdReached)] +LEAF_END_MARKED CallCountingStubCode, _TEXT + + end diff --git a/src/coreclr/vm/amd64/unixasmhelpers.S b/src/coreclr/vm/amd64/unixasmhelpers.S index 5d9cd711df7d6..4711ee9857f2c 100644 --- a/src/coreclr/vm/amd64/unixasmhelpers.S +++ b/src/coreclr/vm/amd64/unixasmhelpers.S @@ -5,29 +5,6 @@ #include "unixasmmacros.inc" #include "asmconstants.h" -////////////////////////////////////////////////////////////////////////// -// -// PrecodeFixupThunk -// -// The call in fixup precode initally points to this function. -// The pupose of this function is to load the MethodDesc and forward the call the prestub. -// -// EXTERN_C VOID __stdcall PrecodeFixupThunk() -LEAF_ENTRY PrecodeFixupThunk, _TEXT - - pop rax // Pop the return address. It points right after the call instruction in the precode. - - // Inline computation done by FixupPrecode::GetMethodDesc() - movzx r10,byte ptr [rax+2] // m_PrecodeChunkIndex - movzx r11,byte ptr [rax+1] // m_MethodDescChunkIndex - mov rax,qword ptr [rax+r10*8+3] - lea METHODDESC_REGISTER,[rax+r11*8] - - // Tail call to prestub - jmp C_FUNC(ThePreStub) - -LEAF_END PrecodeFixupThunk, _TEXT - // EXTERN_C int __fastcall HelperMethodFrameRestoreState( // INDEBUG_COMMA(HelperMethodFrame *pFrame) // MachState *pState @@ -230,13 +207,7 @@ LEAF_END SinglecastDelegateInvokeStub, _TEXT #ifdef FEATURE_TIERED_COMPILATION -LEAF_ENTRY OnCallCountThresholdReachedStub, _TEXT - // Pop the return address (the stub-identifying token) into a non-argument volatile register that can be trashed - pop rax - jmp C_FUNC(OnCallCountThresholdReachedStub2) -LEAF_END OnCallCountThresholdReachedStub, _TEXT - -NESTED_ENTRY OnCallCountThresholdReachedStub2, _TEXT, NoHandler +NESTED_ENTRY OnCallCountThresholdReachedStub, _TEXT, NoHandler PROLOG_WITH_TRANSITION_BLOCK lea rdi, [rsp + __PWTB_TransitionBlock] // TransitionBlock * @@ -245,6 +216,6 @@ NESTED_ENTRY OnCallCountThresholdReachedStub2, _TEXT, NoHandler EPILOG_WITH_TRANSITION_BLOCK_TAILCALL TAILJMP_RAX -NESTED_END OnCallCountThresholdReachedStub2, _TEXT +NESTED_END OnCallCountThresholdReachedStub, _TEXT #endif // FEATURE_TIERED_COMPILATION diff --git a/src/coreclr/vm/appdomain.cpp b/src/coreclr/vm/appdomain.cpp index 5171b57c565da..55076560a46a4 100644 --- a/src/coreclr/vm/appdomain.cpp +++ b/src/coreclr/vm/appdomain.cpp @@ -3176,7 +3176,6 @@ DomainAssembly * AppDomain::FindAssembly(PEAssembly * pPEAssembly, FindAssemblyO if (pManifestFile && pManifestFile->Equals(pPEAssembly)) { - // Caller already has PEAssembly, so we can give DomainAssembly away freely without added reference return pDomainAssembly.GetValue(); } } diff --git a/src/coreclr/vm/arm/asmconstants.h b/src/coreclr/vm/arm/asmconstants.h index 7f2ffa77923d7..8d8a1f4f0ea0e 100644 --- a/src/coreclr/vm/arm/asmconstants.h +++ b/src/coreclr/vm/arm/asmconstants.h @@ -223,5 +223,29 @@ ASMCONSTANTS_C_ASSERT(InlinedCallFrame__m_pThread == offsetof(InlinedCallFrame, #define InlinedCallFrame__m_pSPAfterProlog 0x1C ASMCONSTANTS_C_ASSERT(InlinedCallFrame__m_pSPAfterProlog == offsetof(InlinedCallFrame, m_pSPAfterProlog)) +#define FixupPrecodeData__Target 0x00 +ASMCONSTANTS_C_ASSERT(FixupPrecodeData__Target == offsetof(FixupPrecodeData, Target)) + +#define FixupPrecodeData__MethodDesc 0x04 +ASMCONSTANTS_C_ASSERT(FixupPrecodeData__MethodDesc == offsetof(FixupPrecodeData, MethodDesc)) + +#define FixupPrecodeData__PrecodeFixupThunk 0x08 +ASMCONSTANTS_C_ASSERT(FixupPrecodeData__PrecodeFixupThunk == offsetof(FixupPrecodeData, PrecodeFixupThunk)) + +#define StubPrecodeData__MethodDesc 0x00 +ASMCONSTANTS_C_ASSERT(StubPrecodeData__MethodDesc == offsetof(StubPrecodeData, MethodDesc)) + +#define StubPrecodeData__Target 0x04 +ASMCONSTANTS_C_ASSERT(StubPrecodeData__Target == offsetof(StubPrecodeData, Target)) + +#define CallCountingStubData__RemainingCallCountCell 0x00 +ASMCONSTANTS_C_ASSERT(CallCountingStubData__RemainingCallCountCell == offsetof(CallCountingStubData, RemainingCallCountCell)) + +#define CallCountingStubData__TargetForMethod 0x04 +ASMCONSTANTS_C_ASSERT(CallCountingStubData__TargetForMethod == offsetof(CallCountingStubData, TargetForMethod)) + +#define CallCountingStubData__TargetForThresholdReached 0x08 +ASMCONSTANTS_C_ASSERT(CallCountingStubData__TargetForThresholdReached == offsetof(CallCountingStubData, TargetForThresholdReached)) + #undef ASMCONSTANTS_RUNTIME_ASSERT #undef ASMCONSTANTS_C_ASSERT diff --git a/src/coreclr/vm/arm/asmhelpers.S b/src/coreclr/vm/arm/asmhelpers.S index f49ed946bfec7..84dc9783630e6 100644 --- a/src/coreclr/vm/arm/asmhelpers.S +++ b/src/coreclr/vm/arm/asmhelpers.S @@ -261,29 +261,6 @@ ThePreStubPatchLabel: NESTED_END NDirectImportThunk, _TEXT -// ------------------------------------------------------------------ -// The call in fixup precode initally points to this function. -// The pupose of this function is to load the MethodDesc and forward the call the prestub. - NESTED_ENTRY PrecodeFixupThunk, _TEXT, NoHandler - - // r12 = FixupPrecode * - - PROLOG_PUSH "{r0-r1}" - - // Inline computation done by FixupPrecode::GetMethodDesc() - ldrb r0, [r12, #3] // m_PrecodeChunkIndex - ldrb r1, [r12, #2] // m_MethodDescChunkIndex - - add r12,r12,r0,lsl #3 - add r0,r12,r0,lsl #2 - ldr r0, [r0,#8] - add r12,r0,r1,lsl #2 - - EPILOG_POP "{r0-r1}" - b C_FUNC(ThePreStub) - - NESTED_END PrecodeFixupThunk, _TEXT - // ------------------------------------------------------------------ // void ResolveWorkerAsmStub(r0, r1, r2, r3, r4:IndirectionCellAndFlags, r12:DispatchToken) // diff --git a/src/coreclr/vm/arm/asmhelpers.asm b/src/coreclr/vm/arm/asmhelpers.asm index 0afdbf444f2a1..d550137316b69 100644 --- a/src/coreclr/vm/arm/asmhelpers.asm +++ b/src/coreclr/vm/arm/asmhelpers.asm @@ -311,29 +311,6 @@ ThePreStubPatchLabel NESTED_END -; ------------------------------------------------------------------ -; The call in fixup precode initally points to this function. -; The pupose of this function is to load the MethodDesc and forward the call the prestub. - NESTED_ENTRY PrecodeFixupThunk - - ; r12 = FixupPrecode * - - PROLOG_PUSH {r0-r1} - - ; Inline computation done by FixupPrecode::GetMethodDesc() - ldrb r0, [r12, #3] ; m_PrecodeChunkIndex - ldrb r1, [r12, #2] ; m_MethodDescChunkIndex - - add r12,r12,r0,lsl #3 - add r0,r12,r0,lsl #2 - ldr r0, [r0,#8] - add r12,r0,r1,lsl #2 - - EPILOG_POP {r0-r1} - EPILOG_BRANCH ThePreStub - - NESTED_END - ; ------------------------------------------------------------------ ; void ResolveWorkerAsmStub(r0, r1, r2, r3, r4:IndirectionCellAndFlags, r12:DispatchToken) ; diff --git a/src/coreclr/vm/arm/cgencpu.h b/src/coreclr/vm/arm/cgencpu.h index feafd7335cc90..598e11c9f4112 100644 --- a/src/coreclr/vm/arm/cgencpu.h +++ b/src/coreclr/vm/arm/cgencpu.h @@ -16,7 +16,10 @@ #define DATA_ALIGNMENT 4 #define DISPATCH_STUB_FIRST_WORD 0xf8d0 +#define DISPATCH_STUB_THIRD_WORD 0xb420 #define RESOLVE_STUB_FIRST_WORD 0xf8d0 +#define RESOLVE_STUB_THIRD_WORD 0xb460 +#define LOOKUP_STUB_FIRST_WORD 0xf8df class MethodDesc; class FramedMethodFrame; @@ -66,7 +69,6 @@ EXTERN_C void getFPReturn(int fpSize, INT64 *pRetVal); EXTERN_C void setFPReturn(int fpSize, INT64 retVal); #define HAS_FIXUP_PRECODE 1 -#define HAS_FIXUP_PRECODE_CHUNKS 1 // ThisPtrRetBufPrecode one is necessary for closed delegates over static methods with return buffer #define HAS_THISPTR_RETBUF_PRECODE 1 @@ -1021,202 +1023,10 @@ inline BOOL ClrFlushInstructionCache(LPCVOID pCodeAddr, size_t sizeOfCode) // Note: If you introduce new precode implementation below, then please // update PrecodeStubManager::CheckIsStub_Internal to account for it. -EXTERN_C VOID STDCALL PrecodeFixupThunk(); - -#define PRECODE_ALIGNMENT sizeof(void*) -#define SIZEOF_PRECODE_BASE CODE_SIZE_ALIGN -#define OFFSETOF_PRECODE_TYPE 0 - -// Invalid precode type -struct InvalidPrecode { - static const int Type = 0; -}; - -struct StubPrecode { - - static const int Type = 0xdf; - - // ldr r12, [pc, #8] ; =m_pMethodDesc - // ldr pc, [pc, #0] ; =m_pTarget - // dcd pTarget - // dcd pMethodDesc - WORD m_rgCode[4]; - TADDR m_pTarget; - TADDR m_pMethodDesc; - - void Init(StubPrecode* pPrecodeRX, MethodDesc* pMD, LoaderAllocator *pLoaderAllocator); - - TADDR GetMethodDesc() - { - LIMITED_METHOD_DAC_CONTRACT; - return m_pMethodDesc; - } - - PCODE GetTarget() - { - LIMITED_METHOD_DAC_CONTRACT; - return m_pTarget; - } - -#ifndef DACCESS_COMPILE - void ResetTargetInterlocked() - { - CONTRACTL - { - THROWS; - GC_NOTRIGGER; - } - CONTRACTL_END; - - ExecutableWriterHolder precodeWriterHolder(this, sizeof(StubPrecode)); - InterlockedExchange((LONG*)&precodeWriterHolder.GetRW()->m_pTarget, (LONG)GetPreStubEntryPoint()); - } - - BOOL SetTargetInterlocked(TADDR target, TADDR expected) - { - CONTRACTL - { - THROWS; - GC_NOTRIGGER; - } - CONTRACTL_END; - - ExecutableWriterHolder precodeWriterHolder(this, sizeof(StubPrecode)); - return (TADDR)InterlockedCompareExchange( - (LONG*)&precodeWriterHolder.GetRW()->m_pTarget, (LONG)target, (LONG)expected) == expected; - } -#endif // !DACCESS_COMPILE - -}; -typedef DPTR(StubPrecode) PTR_StubPrecode; - - -struct NDirectImportPrecode { - - static const int Type = 0xe0; - - // ldr r12, [pc, #4] ; =m_pMethodDesc - // ldr pc, [pc, #4] ; =m_pTarget - // dcd pMethodDesc - // dcd pTarget - WORD m_rgCode[4]; - TADDR m_pMethodDesc; // Notice that the fields are reversed compared to StubPrecode. Precode::GetType - // takes advantage of this to detect NDirectImportPrecode. - TADDR m_pTarget; - - void Init(NDirectImportPrecode* pPrecodeRX, MethodDesc* pMD, LoaderAllocator *pLoaderAllocator); - - TADDR GetMethodDesc() - { - LIMITED_METHOD_DAC_CONTRACT; - return m_pMethodDesc; - } - - PCODE GetTarget() - { - LIMITED_METHOD_DAC_CONTRACT; - return m_pTarget; - } - - LPVOID GetEntrypoint() - { - LIMITED_METHOD_CONTRACT; - return (LPVOID)(dac_cast(this) + THUMB_CODE); - } - -}; -typedef DPTR(NDirectImportPrecode) PTR_NDirectImportPrecode; - - -struct FixupPrecode { - - static const int Type = 0xfc; - - // mov r12, pc - // ldr pc, [pc, #4] ; =m_pTarget - // dcb m_MethodDescChunkIndex - // dcb m_PrecodeChunkIndex - // dcd m_pTarget - WORD m_rgCode[3]; - BYTE m_MethodDescChunkIndex; - BYTE m_PrecodeChunkIndex; - TADDR m_pTarget; - - void Init(FixupPrecode* pPrecodeRX, MethodDesc* pMD, LoaderAllocator *pLoaderAllocator, int iMethodDescChunkIndex = 0, int iPrecodeChunkIndex = 0); - - TADDR GetBase() - { - LIMITED_METHOD_CONTRACT; - SUPPORTS_DAC; - - return dac_cast(this) + (m_PrecodeChunkIndex + 1) * sizeof(FixupPrecode); - } - - size_t GetSizeRW() - { - LIMITED_METHOD_CONTRACT; - - return GetBase() + sizeof(void*) - dac_cast(this); - } - - TADDR GetMethodDesc(); - - PCODE GetTarget() - { - LIMITED_METHOD_DAC_CONTRACT; - return m_pTarget; - } - -#ifndef DACCESS_COMPILE - void ResetTargetInterlocked() - { - CONTRACTL - { - THROWS; - GC_TRIGGERS; - } - CONTRACTL_END; - - ExecutableWriterHolder precodeWriterHolder(this, sizeof(FixupPrecode)); - InterlockedExchange((LONG*)&precodeWriterHolder.GetRW()->m_pTarget, (LONG)GetEEFuncEntryPoint(PrecodeFixupThunk)); - } - - BOOL SetTargetInterlocked(TADDR target, TADDR expected) - { - CONTRACTL - { - THROWS; - GC_TRIGGERS; - } - CONTRACTL_END; - - ExecutableWriterHolder precodeWriterHolder(this, sizeof(FixupPrecode)); - return (TADDR)InterlockedCompareExchange( - (LONG*)&precodeWriterHolder.GetRW()->m_pTarget, (LONG)target, (LONG)expected) == expected; - } -#endif // !DACCESS_COMPILE - - static BOOL IsFixupPrecodeByASM(PCODE addr) - { - PTR_WORD pInstr = dac_cast(PCODEToPINSTR(addr)); - - return - (pInstr[0] == 0x46fc) && - (pInstr[1] == 0xf8df) && - (pInstr[2] == 0xf004); - } - -#ifdef DACCESS_COMPILE - void EnumMemoryRegions(CLRDataEnumMemoryFlags flags); -#endif -}; -typedef DPTR(FixupPrecode) PTR_FixupPrecode; - - // Precode to shuffle this and retbuf for closed delegates over static methods with return buffer struct ThisPtrRetBufPrecode { - static const int Type = 0x84; + static const int Type = 0x46; // mov r12, r0 // mov r0, r1 @@ -1294,166 +1104,4 @@ inline size_t GetARMInstructionLength(PBYTE pInstr) return GetARMInstructionLength(*(WORD*)pInstr); } - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// Call counting - -#ifdef FEATURE_TIERED_COMPILATION - -#define DISABLE_COPY(T) \ - T(const T &) = delete; \ - T &operator =(const T &) = delete - -typedef UINT16 CallCount; -typedef DPTR(CallCount) PTR_CallCount; - -//////////////////////////////////////////////////////////////// -// CallCountingStub - -class CallCountingStub; -typedef DPTR(const CallCountingStub) PTR_CallCountingStub; - -class CallCountingStub -{ -public: - static const SIZE_T Alignment = sizeof(void *); - -#ifndef DACCESS_COMPILE -protected: - static const PCODE TargetForThresholdReached; - - CallCountingStub() = default; - -public: - static const CallCountingStub *From(TADDR stubIdentifyingToken); - - PCODE GetEntryPoint() const - { - WRAPPER_NO_CONTRACT; - return PINSTRToPCODE((TADDR)this); - } -#endif - -public: - PTR_CallCount GetRemainingCallCountCell() const; - PCODE GetTargetForMethod() const; - - DISABLE_COPY(CallCountingStub); -}; - -//////////////////////////////////////////////////////////////// -// CallCountingStubShort - -class CallCountingStubShort; -typedef DPTR(const CallCountingStubShort) PTR_CallCountingStubShort; - -#pragma pack(push, 1) -class CallCountingStubShort : public CallCountingStub -{ -private: - const UINT16 m_part0[16]; - CallCount *const m_remainingCallCountCell; - const PCODE m_targetForMethod; - const PCODE m_targetForThresholdReached; - -#ifndef DACCESS_COMPILE -public: - CallCountingStubShort(CallCountingStubShort* stubRX, CallCount *remainingCallCountCell, PCODE targetForMethod) - : m_part0{ 0xb401, // push {r0} - 0xf8df, 0xc01c, // ldr r12, [pc, #(m_remainingCallCountCell)] - 0xf8bc, 0x0000, // ldrh r0, [r12] - 0x1e40, // subs r0, r0, #1 - 0xf8ac, 0x0000, // strh r0, [r12] - 0xbc01, // pop {r0} - 0xd001, // beq L0 - 0xf8df, 0xf00c, // ldr pc, [pc, #(m_targetForMethod)] - 0xf2af, 0x0c1c, // L0: adr r12, #(this) - // (r12 == stub-identifying token == this) - 0xf8df, 0xf008}, // ldr pc, [pc, #(m_targetForThresholdReached)] - m_remainingCallCountCell(remainingCallCountCell), - m_targetForMethod(targetForMethod), - m_targetForThresholdReached(TargetForThresholdReached) - { - WRAPPER_NO_CONTRACT; - static_assert_no_msg(sizeof(CallCountingStubShort) % Alignment == 0); - _ASSERTE(remainingCallCountCell != nullptr); - _ASSERTE(PCODEToPINSTR(targetForMethod) != NULL); - } - - static bool Is(TADDR stubIdentifyingToken) - { - WRAPPER_NO_CONTRACT; - return true; - } - - static const CallCountingStubShort *From(TADDR stubIdentifyingToken) - { - WRAPPER_NO_CONTRACT; - _ASSERTE(Is(stubIdentifyingToken)); - - const CallCountingStubShort *stub = (const CallCountingStubShort *)stubIdentifyingToken; - _ASSERTE(IS_ALIGNED(stub, Alignment)); - return stub; - } -#endif - -public: - static bool Is(PTR_CallCountingStub callCountingStub) - { - WRAPPER_NO_CONTRACT; - return true; - } - - static PTR_CallCountingStubShort From(PTR_CallCountingStub callCountingStub) - { - WRAPPER_NO_CONTRACT; - _ASSERTE(Is(callCountingStub)); - - return dac_cast(callCountingStub); - } - - PCODE GetTargetForMethod() const - { - WRAPPER_NO_CONTRACT; - return m_targetForMethod; - } - - friend CallCountingStub; - DISABLE_COPY(CallCountingStubShort); -}; -#pragma pack(pop) - -//////////////////////////////////////////////////////////////// -// CallCountingStub definitions - -#ifndef DACCESS_COMPILE -inline const CallCountingStub *CallCountingStub::From(TADDR stubIdentifyingToken) -{ - WRAPPER_NO_CONTRACT; - _ASSERTE(stubIdentifyingToken != NULL); - - return CallCountingStubShort::From(stubIdentifyingToken); -} -#endif - -inline PTR_CallCount CallCountingStub::GetRemainingCallCountCell() const -{ - WRAPPER_NO_CONTRACT; - return PTR_CallCount(dac_cast(this)->m_remainingCallCountCell); -} - -inline PCODE CallCountingStub::GetTargetForMethod() const -{ - WRAPPER_NO_CONTRACT; - return CallCountingStubShort::From(PTR_CallCountingStub(this))->GetTargetForMethod(); -} - -//////////////////////////////////////////////////////////////// - -#undef DISABLE_COPY - -#endif // FEATURE_TIERED_COMPILATION - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - #endif // __cgencpu_h__ diff --git a/src/coreclr/vm/arm/stubs.cpp b/src/coreclr/vm/arm/stubs.cpp index 810d6ec98f162..4c0d7cc456a3d 100644 --- a/src/coreclr/vm/arm/stubs.cpp +++ b/src/coreclr/vm/arm/stubs.cpp @@ -348,10 +348,10 @@ void CopyWriteBarrier(PCODE dstCode, PCODE srcCode, PCODE endCode) size_t size = (PBYTE)end - (PBYTE)src; - ExecutableWriterHolder writeBarrierWriterHolder; + ExecutableWriterHolderNoLog writeBarrierWriterHolder; if (IsWriteBarrierCopyEnabled()) { - writeBarrierWriterHolder = ExecutableWriterHolder((void*)dst, size); + writeBarrierWriterHolder.AssignExecutableWriterHolder((void*)dst, size); dst = (TADDR)writeBarrierWriterHolder.GetRW(); } @@ -458,10 +458,10 @@ void UpdateGCWriteBarriers(bool postGrow = false) if(to) { to = (PBYTE)PCODEToPINSTR((PCODE)GetWriteBarrierCodeLocation(to)); - ExecutableWriterHolder barrierWriterHolder; + ExecutableWriterHolderNoLog barrierWriterHolder; if (IsWriteBarrierCopyEnabled()) { - barrierWriterHolder = ExecutableWriterHolder(to, barrierSize); + barrierWriterHolder.AssignExecutableWriterHolder(to, barrierSize); to = barrierWriterHolder.GetRW(); } GWB_PATCH_OFFSET(g_lowest_address); @@ -721,98 +721,8 @@ void HelperMethodFrame::UpdateRegDisplay(const PREGDISPLAY pRD) pRD->pCurrentContextPointers->Lr = NULL; } -TADDR FixupPrecode::GetMethodDesc() -{ - LIMITED_METHOD_DAC_CONTRACT; - - // This lookup is also manually inlined in PrecodeFixupThunk assembly code - TADDR base = *PTR_TADDR(GetBase()); - if (base == NULL) - return NULL; - return base + (m_MethodDescChunkIndex * MethodDesc::ALIGNMENT); -} - -#ifdef DACCESS_COMPILE -void FixupPrecode::EnumMemoryRegions(CLRDataEnumMemoryFlags flags) -{ - SUPPORTS_DAC; - DacEnumMemoryRegion(dac_cast(this), sizeof(FixupPrecode)); - - DacEnumMemoryRegion(GetBase(), sizeof(TADDR)); -} -#endif // DACCESS_COMPILE - #ifndef DACCESS_COMPILE -void StubPrecode::Init(StubPrecode* pPrecodeRX, MethodDesc* pMD, LoaderAllocator *pLoaderAllocator) -{ - WRAPPER_NO_CONTRACT; - - int n = 0; - - m_rgCode[n++] = 0xf8df; // ldr r12, [pc, #8] - m_rgCode[n++] = 0xc008; - m_rgCode[n++] = 0xf8df; // ldr pc, [pc, #0] - m_rgCode[n++] = 0xf000; - - _ASSERTE(n == ARRAY_SIZE(m_rgCode)); - - m_pTarget = GetPreStubEntryPoint(); - m_pMethodDesc = (TADDR)pMD; -} - -void NDirectImportPrecode::Init(NDirectImportPrecode* pPrecodeRX, MethodDesc* pMD, LoaderAllocator *pLoaderAllocator) -{ - WRAPPER_NO_CONTRACT; - - int n = 0; - - m_rgCode[n++] = 0xf8df; // ldr r12, [pc, #4] - m_rgCode[n++] = 0xc004; - m_rgCode[n++] = 0xf8df; // ldr pc, [pc, #4] - m_rgCode[n++] = 0xf004; - - _ASSERTE(n == ARRAY_SIZE(m_rgCode)); - - m_pMethodDesc = (TADDR)pMD; - m_pTarget = GetEEFuncEntryPoint(NDirectImportThunk); -} - -void FixupPrecode::Init(FixupPrecode* pPrecodeRX, MethodDesc* pMD, LoaderAllocator *pLoaderAllocator, int iMethodDescChunkIndex /*=0*/, int iPrecodeChunkIndex /*=0*/) -{ - WRAPPER_NO_CONTRACT; - - m_rgCode[0] = 0x46fc; // mov r12, pc - m_rgCode[1] = 0xf8df; // ldr pc, [pc, #4] - m_rgCode[2] = 0xf004; - - // Initialize chunk indices only if they are not initialized yet. This is necessary to make MethodDesc::Reset work. - if (m_PrecodeChunkIndex == 0) - { - _ASSERTE(FitsInU1(iPrecodeChunkIndex)); - m_PrecodeChunkIndex = static_cast(iPrecodeChunkIndex); - } - - if (iMethodDescChunkIndex != -1) - { - if (m_MethodDescChunkIndex == 0) - { - _ASSERTE(FitsInU1(iMethodDescChunkIndex)); - m_MethodDescChunkIndex = static_cast(iMethodDescChunkIndex); - } - - if (*(void**)GetBase() == NULL) - *(void**)GetBase() = (BYTE*)pMD - (iMethodDescChunkIndex * MethodDesc::ALIGNMENT); - } - - _ASSERTE(GetMethodDesc() == (TADDR)pMD); - - if (pLoaderAllocator != NULL) - { - m_pTarget = GetEEFuncEntryPoint(PrecodeFixupThunk); - } -} - void ThisPtrRetBufPrecode::Init(MethodDesc* pMD, LoaderAllocator *pLoaderAllocator) { WRAPPER_NO_CONTRACT; @@ -1205,53 +1115,6 @@ void ResolveHolder::Initialize(ResolveHolder* pResolveHolderRX, _ASSERTE(patcherTarget == NULL); } -BOOL DoesSlotCallPrestub(PCODE pCode) -{ - PTR_WORD pInstr = dac_cast(PCODEToPINSTR(pCode)); - -#ifdef HAS_COMPACT_ENTRYPOINTS - if (MethodDescChunk::GetMethodDescFromCompactEntryPoint(pCode, TRUE) != NULL) - { - return TRUE; - } -#endif // HAS_COMPACT_ENTRYPOINTS - - // FixupPrecode - if (pInstr[0] == 0x46fc && // // mov r12, pc - pInstr[1] == 0xf8df && - pInstr[2] == 0xf004) - { - PCODE pTarget = dac_cast(pInstr)->m_pTarget; - - // Check for jump stub (NGen case) - if (isJump(pTarget)) - { - pTarget = decodeJump(pTarget); - } - - return pTarget == (TADDR)PrecodeFixupThunk; - } - - // StubPrecode - if (pInstr[0] == 0xf8df && // ldr r12, [pc + 8] - pInstr[1] == 0xc008 && - pInstr[2] == 0xf8df && // ldr pc, [pc] - pInstr[3] == 0xf000) - { - PCODE pTarget = dac_cast(pInstr)->m_pTarget; - - // Check for jump stub (NGen case) - if (isJump(pTarget)) - { - pTarget = decodeJump(pTarget); - } - - return pTarget == GetPreStubEntryPoint(); - } - - return FALSE; -} - Stub *GenerateInitPInvokeFrameHelper() { CONTRACT(Stub*) diff --git a/src/coreclr/vm/arm/thunktemplates.S b/src/coreclr/vm/arm/thunktemplates.S new file mode 100644 index 0000000000000..0686bb2ed4b73 --- /dev/null +++ b/src/coreclr/vm/arm/thunktemplates.S @@ -0,0 +1,42 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include "unixasmmacros.inc" +#include "asmconstants.h" + +.syntax unified +.thumb + + .align 4 + +PAGE_SIZE = 4096 + +#define DATA_SLOT(stub, field) stub##Code + PAGE_SIZE + stub##Data__##field + + LEAF_ENTRY StubPrecodeCode + ldr r12, DATA_SLOT(StubPrecode, MethodDesc) + ldr pc, DATA_SLOT(StubPrecode, Target) + LEAF_END_MARKED StubPrecodeCode + + .align 4 + + LEAF_ENTRY FixupPrecodeCode + ldr pc, DATA_SLOT(FixupPrecode, Target) + ldr r12, DATA_SLOT(FixupPrecode, MethodDesc) + ldr pc, DATA_SLOT(FixupPrecode, PrecodeFixupThunk) + LEAF_END_MARKED FixupPrecodeCode + + .align 4 + + LEAF_ENTRY CallCountingStubCode + push {r0} + ldr r12, DATA_SLOT(CallCountingStub, RemainingCallCountCell) + ldrh r0, [r12] + subs r0, r0, #1 + strh r0, [r12] + pop {r0} + beq LOCAL_LABEL(CountReachedZero) + ldr pc, DATA_SLOT(CallCountingStub, TargetForMethod) +LOCAL_LABEL(CountReachedZero): + ldr pc, DATA_SLOT(CallCountingStub, TargetForThresholdReached) + LEAF_END_MARKED CallCountingStubCode diff --git a/src/coreclr/vm/arm/thunktemplates.asm b/src/coreclr/vm/arm/thunktemplates.asm new file mode 100644 index 0000000000000..6562be72146c2 --- /dev/null +++ b/src/coreclr/vm/arm/thunktemplates.asm @@ -0,0 +1,43 @@ +; Licensed to the .NET Foundation under one or more agreements. +; The .NET Foundation licenses this file to you under the MIT license. + +#include "ksarm.h" +#include "asmconstants.h" +#include "asmmacros.h" + + + TEXTAREA + + ALIGN 4 + + #define DATA_SLOT(stub, field) stub##Code + PAGE_SIZE + stub##Data__##field + + LEAF_ENTRY StubPrecodeCode + ldr r12, DATA_SLOT(StubPrecode, MethodDesc) + ldr pc, DATA_SLOT(StubPrecode, Target) + LEAF_END_MARKED StubPrecodeCode + + ALIGN 4 + + LEAF_ENTRY FixupPrecodeCode + ldr pc, DATA_SLOT(FixupPrecode, Target) + ldr r12, DATA_SLOT(FixupPrecode, MethodDesc) + ldr pc, DATA_SLOT(FixupPrecode, PrecodeFixupThunk) + LEAF_END_MARKED FixupPrecodeCode + + ALIGN 4 + + LEAF_ENTRY CallCountingStubCode + push {r0} + ldr r12, DATA_SLOT(CallCountingStub, RemainingCallCountCell) + ldrh r0, [r12] + subs r0, r0, #1 + strh r0, [r12] + pop {r0} + beq CountReachedZero + ldr pc, DATA_SLOT(CallCountingStub, TargetForMethod) +CountReachedZero + ldr pc, DATA_SLOT(CallCountingStub, TargetForThresholdReached) + LEAF_END_MARKED CallCountingStubCode + + END diff --git a/src/coreclr/vm/arm64/asmconstants.h b/src/coreclr/vm/arm64/asmconstants.h index 5f5560fbc25f3..fadd6be2ded14 100644 --- a/src/coreclr/vm/arm64/asmconstants.h +++ b/src/coreclr/vm/arm64/asmconstants.h @@ -165,18 +165,20 @@ ASMCONSTANTS_C_ASSERT(SIZEOF__FaultingExceptionFrame == sizeof(FaultingEx ASMCONSTANTS_C_ASSERT(FaultingExceptionFrame__m_fFilterExecuted == offsetof(FaultingExceptionFrame, m_fFilterExecuted)); #define SIZEOF__FixupPrecode 24 -#define Offset_PrecodeChunkIndex 15 -#define Offset_MethodDescChunkIndex 14 +//#define Offset_PrecodeChunkIndex 15 +//#define Offset_MethodDescChunkIndex 14 #define MethodDesc_ALIGNMENT_SHIFT 3 -#define FixupPrecode_ALIGNMENT_SHIFT_1 3 -#define FixupPrecode_ALIGNMENT_SHIFT_2 4 +//#define FixupPrecode_ALIGNMENT_SHIFT_1 3 +//#define FixupPrecode_ALIGNMENT_SHIFT_2 4 ASMCONSTANTS_C_ASSERT(SIZEOF__FixupPrecode == sizeof(FixupPrecode)); -ASMCONSTANTS_C_ASSERT(Offset_PrecodeChunkIndex == offsetof(FixupPrecode, m_PrecodeChunkIndex)); -ASMCONSTANTS_C_ASSERT(Offset_MethodDescChunkIndex == offsetof(FixupPrecode, m_MethodDescChunkIndex)); +//ASMCONSTANTS_C_ASSERT(Offset_PrecodeChunkIndex == offsetof(FixupPrecode, m_PrecodeChunkIndex)); +//ASMCONSTANTS_C_ASSERT(Offset_MethodDescChunkIndex == offsetof(FixupPrecode, m_MethodDescChunkIndex)); ASMCONSTANTS_C_ASSERT(MethodDesc_ALIGNMENT_SHIFT == MethodDesc::ALIGNMENT_SHIFT); -ASMCONSTANTS_C_ASSERT((1< precodeWriterHolder(this, sizeof(StubPrecode)); - InterlockedExchange64((LONGLONG*)&precodeWriterHolder.GetRW()->m_pTarget, (TADDR)GetPreStubEntryPoint()); - } - - BOOL SetTargetInterlocked(TADDR target, TADDR expected) - { - CONTRACTL - { - THROWS; - GC_NOTRIGGER; - } - CONTRACTL_END; - - ExecutableWriterHolder precodeWriterHolder(this, sizeof(StubPrecode)); - return (TADDR)InterlockedCompareExchange64( - (LONGLONG*)&precodeWriterHolder.GetRW()->m_pTarget, (TADDR)target, (TADDR)expected) == expected; - } -#endif // !DACCESS_COMPILE - -}; -typedef DPTR(StubPrecode) PTR_StubPrecode; - - -struct NDirectImportPrecode { - - static const int Type = 0x8B; - - // adr x11, #16 ; Notice that x11 register is used to differentiate the stub from StubPrecode which uses x9 - // ldp x10,x12,[x11] ; =m_pTarget,m_pMethodDesc - // br x10 - // 4 byte padding for 8 byte allignement - // dcd pTarget - // dcd pMethodDesc - DWORD m_rgCode[4]; - TADDR m_pTarget; - TADDR m_pMethodDesc; - - void Init(NDirectImportPrecode* pPrecodeRX, MethodDesc* pMD, LoaderAllocator *pLoaderAllocator); - - TADDR GetMethodDesc() - { - LIMITED_METHOD_DAC_CONTRACT; - return m_pMethodDesc; - } - - PCODE GetTarget() - { - LIMITED_METHOD_DAC_CONTRACT; - return m_pTarget; - } - - LPVOID GetEntrypoint() - { - LIMITED_METHOD_CONTRACT; - return this; - } - -}; -typedef DPTR(NDirectImportPrecode) PTR_NDirectImportPrecode; - - -struct FixupPrecode { - - static const int Type = 0x0C; - - // adr x12, #0 - // ldr x11, [pc, #12] ; =m_pTarget - // br x11 - // dcb m_MethodDescChunkIndex - // dcb m_PrecodeChunkIndex - // 2 byte padding - // dcd m_pTarget - - - UINT32 m_rgCode[3]; - BYTE padding[2]; - BYTE m_MethodDescChunkIndex; - BYTE m_PrecodeChunkIndex; - TADDR m_pTarget; - - void Init(FixupPrecode* pPrecodeRX, MethodDesc* pMD, LoaderAllocator *pLoaderAllocator, int iMethodDescChunkIndex = 0, int iPrecodeChunkIndex = 0); - void InitCommon() - { - WRAPPER_NO_CONTRACT; - int n = 0; - - m_rgCode[n++] = 0x1000000C; // adr x12, #0 - m_rgCode[n++] = 0x5800006B; // ldr x11, [pc, #12] ; =m_pTarget - - _ASSERTE((UINT32*)&m_pTarget == &m_rgCode[n + 2]); - - m_rgCode[n++] = 0xD61F0160; // br x11 - - _ASSERTE(n == ARRAY_SIZE(m_rgCode)); - } - - TADDR GetBase() - { - LIMITED_METHOD_CONTRACT; - SUPPORTS_DAC; - - return dac_cast(this) + (m_PrecodeChunkIndex + 1) * sizeof(FixupPrecode); - } - - size_t GetSizeRW() - { - LIMITED_METHOD_CONTRACT; - - return GetBase() + sizeof(void*) - dac_cast(this); - } - - TADDR GetMethodDesc(); - - PCODE GetTarget() - { - LIMITED_METHOD_DAC_CONTRACT; - return m_pTarget; - } - -#ifndef DACCESS_COMPILE - void ResetTargetInterlocked() - { - CONTRACTL - { - THROWS; - GC_NOTRIGGER; - } - CONTRACTL_END; - - ExecutableWriterHolder precodeWriterHolder(this, sizeof(FixupPrecode)); - InterlockedExchange64((LONGLONG*)&precodeWriterHolder.GetRW()->m_pTarget, (TADDR)GetEEFuncEntryPoint(PrecodeFixupThunk)); - } - - BOOL SetTargetInterlocked(TADDR target, TADDR expected) - { - CONTRACTL - { - THROWS; - GC_NOTRIGGER; - } - CONTRACTL_END; - - ExecutableWriterHolder precodeWriterHolder(this, sizeof(FixupPrecode)); - return (TADDR)InterlockedCompareExchange64( - (LONGLONG*)&precodeWriterHolder.GetRW()->m_pTarget, (TADDR)target, (TADDR)expected) == expected; - } -#endif // !DACCESS_COMPILE - - static BOOL IsFixupPrecodeByASM(PCODE addr) - { - PTR_DWORD pInstr = dac_cast(PCODEToPINSTR(addr)); - return - (pInstr[0] == 0x1000000C) && - (pInstr[1] == 0x5800006B) && - (pInstr[2] == 0xD61F0160); - } - -#ifdef DACCESS_COMPILE - void EnumMemoryRegions(CLRDataEnumMemoryFlags flags); -#endif -}; -typedef DPTR(FixupPrecode) PTR_FixupPrecode; - - // Precode to shuffle this and retbuf for closed delegates over static methods with return buffer struct ThisPtrRetBufPrecode { @@ -805,165 +591,4 @@ struct ThisPtrRetBufPrecode { }; typedef DPTR(ThisPtrRetBufPrecode) PTR_ThisPtrRetBufPrecode; -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// Call counting - -#ifdef FEATURE_TIERED_COMPILATION - -#define DISABLE_COPY(T) \ - T(const T &) = delete; \ - T &operator =(const T &) = delete - -typedef UINT16 CallCount; -typedef DPTR(CallCount) PTR_CallCount; - -//////////////////////////////////////////////////////////////// -// CallCountingStub - -class CallCountingStub; -typedef DPTR(const CallCountingStub) PTR_CallCountingStub; - -class CallCountingStub -{ -public: - static const SIZE_T Alignment = sizeof(void *); - -#ifndef DACCESS_COMPILE -protected: - static const PCODE TargetForThresholdReached; - - CallCountingStub() = default; - -public: - static const CallCountingStub *From(TADDR stubIdentifyingToken); - - PCODE GetEntryPoint() const - { - WRAPPER_NO_CONTRACT; - return PINSTRToPCODE((TADDR)this); - } -#endif // !DACCESS_COMPILE - -public: - PTR_CallCount GetRemainingCallCountCell() const; - PCODE GetTargetForMethod() const; - - DISABLE_COPY(CallCountingStub); -}; - -//////////////////////////////////////////////////////////////// -// CallCountingStubShort - -class CallCountingStubShort; -typedef DPTR(const CallCountingStubShort) PTR_CallCountingStubShort; - -#pragma pack(push, 1) -class CallCountingStubShort : public CallCountingStub -{ -private: - const UINT32 m_part0[10]; - CallCount *const m_remainingCallCountCell; - const PCODE m_targetForMethod; - const PCODE m_targetForThresholdReached; - -#ifndef DACCESS_COMPILE -public: - CallCountingStubShort(CallCountingStubShort* stubRX, CallCount *remainingCallCountCell, PCODE targetForMethod) - : m_part0{ 0x58000149, // ldr x9, [pc, #(m_remainingCallCountCell)] - 0x7940012a, // ldrh w10, [x9] - 0x7100054a, // subs w10, w10, #1 - 0x7900012a, // strh w10, [x9] - 0x54000060, // beq L0 - 0x580000e9, // ldr x9, [pc, #(m_targetForMethod)] - 0xd61f0120, // br x9 - 0x10ffff2a, // L0: adr x10, #(this) - // (x10 == stub-identifying token == this) - 0x580000c9, // ldr x9, [pc, #(m_targetForThresholdReached)] - 0xd61f0120}, // br x9 - m_remainingCallCountCell(remainingCallCountCell), - m_targetForMethod(targetForMethod), - m_targetForThresholdReached(TargetForThresholdReached) - { - WRAPPER_NO_CONTRACT; - static_assert_no_msg(sizeof(CallCountingStubShort) % Alignment == 0); - _ASSERTE(remainingCallCountCell != nullptr); - _ASSERTE(PCODEToPINSTR(targetForMethod) != NULL); - } - - static bool Is(TADDR stubIdentifyingToken) - { - WRAPPER_NO_CONTRACT; - return true; - } - - static const CallCountingStubShort *From(TADDR stubIdentifyingToken) - { - WRAPPER_NO_CONTRACT; - _ASSERTE(Is(stubIdentifyingToken)); - - const CallCountingStubShort *stub = (const CallCountingStubShort *)stubIdentifyingToken; - _ASSERTE(IS_ALIGNED(stub, Alignment)); - return stub; - } -#endif // !DACCESS_COMPILE - -public: - static bool Is(PTR_CallCountingStub callCountingStub) - { - WRAPPER_NO_CONTRACT; - return true; - } - - static PTR_CallCountingStubShort From(PTR_CallCountingStub callCountingStub) - { - WRAPPER_NO_CONTRACT; - _ASSERTE(Is(callCountingStub)); - - return dac_cast(callCountingStub); - } - - PCODE GetTargetForMethod() const - { - WRAPPER_NO_CONTRACT; - return m_targetForMethod; - } - - friend CallCountingStub; - DISABLE_COPY(CallCountingStubShort); -}; -#pragma pack(pop) - -//////////////////////////////////////////////////////////////// -// CallCountingStub definitions - -#ifndef DACCESS_COMPILE -inline const CallCountingStub *CallCountingStub::From(TADDR stubIdentifyingToken) -{ - WRAPPER_NO_CONTRACT; - _ASSERTE(stubIdentifyingToken != NULL); - - return CallCountingStubShort::From(stubIdentifyingToken); -} -#endif - -inline PTR_CallCount CallCountingStub::GetRemainingCallCountCell() const -{ - WRAPPER_NO_CONTRACT; - return PTR_CallCount(dac_cast(this)->m_remainingCallCountCell); -} - -inline PCODE CallCountingStub::GetTargetForMethod() const -{ - WRAPPER_NO_CONTRACT; - return CallCountingStubShort::From(PTR_CallCountingStub(this))->GetTargetForMethod(); -} - -//////////////////////////////////////////////////////////////// - -#undef DISABLE_COPY - -#endif // FEATURE_TIERED_COMPILATION - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - #endif // __cgencpu_h__ diff --git a/src/coreclr/vm/arm64/stubs.cpp b/src/coreclr/vm/arm64/stubs.cpp index 5e44995868483..bd1aaaa632f42 100644 --- a/src/coreclr/vm/arm64/stubs.cpp +++ b/src/coreclr/vm/arm64/stubs.cpp @@ -543,93 +543,7 @@ void HelperMethodFrame::UpdateRegDisplay(const PREGDISPLAY pRD) ClearRegDisplayArgumentAndScratchRegisters(pRD); } -TADDR FixupPrecode::GetMethodDesc() -{ - LIMITED_METHOD_DAC_CONTRACT; - - // This lookup is also manually inlined in PrecodeFixupThunk assembly code - TADDR base = *PTR_TADDR(GetBase()); - if (base == NULL) - return NULL; - return base + (m_MethodDescChunkIndex * MethodDesc::ALIGNMENT); -} - -#ifdef DACCESS_COMPILE -void FixupPrecode::EnumMemoryRegions(CLRDataEnumMemoryFlags flags) -{ - SUPPORTS_DAC; - DacEnumMemoryRegion(dac_cast(this), sizeof(FixupPrecode)); - - DacEnumMemoryRegion(GetBase(), sizeof(TADDR)); -} -#endif // DACCESS_COMPILE - #ifndef DACCESS_COMPILE -void StubPrecode::Init(StubPrecode* pPrecodeRX, MethodDesc* pMD, LoaderAllocator *pLoaderAllocator) -{ - WRAPPER_NO_CONTRACT; - - int n = 0; - - m_rgCode[n++] = 0x10000089; // adr x9, #16 - m_rgCode[n++] = 0xA940312A; // ldp x10,x12,[x9] - m_rgCode[n++] = 0xD61F0140; // br x10 - - _ASSERTE(n+1 == ARRAY_SIZE(m_rgCode)); - - m_pTarget = GetPreStubEntryPoint(); - m_pMethodDesc = (TADDR)pMD; -} - -void NDirectImportPrecode::Init(NDirectImportPrecode* pPrecodeRX, MethodDesc* pMD, LoaderAllocator *pLoaderAllocator) -{ - WRAPPER_NO_CONTRACT; - - int n = 0; - - m_rgCode[n++] = 0x1000008B; // adr x11, #16 - m_rgCode[n++] = 0xA940316A; // ldp x10,x12,[x11] - m_rgCode[n++] = 0xD61F0140; // br x10 - - _ASSERTE(n+1 == ARRAY_SIZE(m_rgCode)); - - m_pTarget = GetEEFuncEntryPoint(NDirectImportThunk); - m_pMethodDesc = (TADDR)pMD; -} - -void FixupPrecode::Init(FixupPrecode* pPrecodeRX, MethodDesc* pMD, LoaderAllocator *pLoaderAllocator, int iMethodDescChunkIndex /*=0*/, int iPrecodeChunkIndex /*=0*/) -{ - WRAPPER_NO_CONTRACT; - - InitCommon(); - - // Initialize chunk indices only if they are not initialized yet. This is necessary to make MethodDesc::Reset work. - if (m_PrecodeChunkIndex == 0) - { - _ASSERTE(FitsInU1(iPrecodeChunkIndex)); - m_PrecodeChunkIndex = static_cast(iPrecodeChunkIndex); - } - - if (iMethodDescChunkIndex != -1) - { - if (m_MethodDescChunkIndex == 0) - { - _ASSERTE(FitsInU1(iMethodDescChunkIndex)); - m_MethodDescChunkIndex = static_cast(iMethodDescChunkIndex); - } - - if (*(void**)GetBase() == NULL) - *(void**)GetBase() = (BYTE*)pMD - (iMethodDescChunkIndex * MethodDesc::ALIGNMENT); - } - - _ASSERTE(pPrecodeRX->GetMethodDesc() == (TADDR)pMD); - - if (pLoaderAllocator != NULL) - { - m_pTarget = GetEEFuncEntryPoint(PrecodeFixupThunk); - } -} - void ThisPtrRetBufPrecode::Init(MethodDesc* pMD, LoaderAllocator *pLoaderAllocator) { WRAPPER_NO_CONTRACT; @@ -652,45 +566,6 @@ void ThisPtrRetBufPrecode::Init(MethodDesc* pMD, LoaderAllocator *pLoaderAllocat m_pMethodDesc = (TADDR)pMD; } -BOOL DoesSlotCallPrestub(PCODE pCode) -{ - PTR_DWORD pInstr = dac_cast(PCODEToPINSTR(pCode)); - - //FixupPrecode -#if defined(HAS_FIXUP_PRECODE) - if (FixupPrecode::IsFixupPrecodeByASM(pCode)) - { - PCODE pTarget = dac_cast(pInstr)->m_pTarget; - - if (isJump(pTarget)) - { - pTarget = decodeJump(pTarget); - } - - return pTarget == (TADDR)PrecodeFixupThunk; - } -#endif - - // StubPrecode - if (pInstr[0] == 0x10000089 && // adr x9, #16 - pInstr[1] == 0xA940312A && // ldp x10,x12,[x9] - pInstr[2] == 0xD61F0140) // br x10 - { - PCODE pTarget = dac_cast(pInstr)->m_pTarget; - - if (isJump(pTarget)) - { - pTarget = decodeJump(pTarget); - } - - return pTarget == GetPreStubEntryPoint(); - } - - return FALSE; - -} - - #endif // !DACCESS_COMPILE void UpdateRegDisplayFromCalleeSavedRegisters(REGDISPLAY * pRD, CalleeSavedRegisters * pCalleeSaved) @@ -983,10 +858,10 @@ static void UpdateWriteBarrierState(bool skipEphemeralCheck) { BYTE *writeBarrierCodeStart = GetWriteBarrierCodeLocation((void*)JIT_PatchedCodeStart); BYTE *writeBarrierCodeStartRW = writeBarrierCodeStart; - ExecutableWriterHolder writeBarrierWriterHolder; + ExecutableWriterHolderNoLog writeBarrierWriterHolder; if (IsWriteBarrierCopyEnabled()) { - writeBarrierWriterHolder = ExecutableWriterHolder(writeBarrierCodeStart, (BYTE*)JIT_PatchedCodeLast - (BYTE*)JIT_PatchedCodeStart); + writeBarrierWriterHolder.AssignExecutableWriterHolder(writeBarrierCodeStart, (BYTE*)JIT_PatchedCodeLast - (BYTE*)JIT_PatchedCodeStart); writeBarrierCodeStartRW = writeBarrierWriterHolder.GetRW(); } JIT_UpdateWriteBarrierState(GCHeapUtilities::IsServerHeap(), writeBarrierCodeStartRW - writeBarrierCodeStart); diff --git a/src/coreclr/vm/arm64/thunktemplates.S b/src/coreclr/vm/arm64/thunktemplates.S new file mode 100644 index 0000000000000..4645ba17be59c --- /dev/null +++ b/src/coreclr/vm/arm64/thunktemplates.S @@ -0,0 +1,39 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include "unixasmmacros.inc" +#include "asmconstants.h" + +#define DATA_SLOT(stub, field) (C_FUNC(stub##Code\PAGE_SIZE) + \PAGE_SIZE + stub##Data__##field) + + .irp PAGE_SIZE, 4096, 8192, 16384, 32768, 65536 + + LEAF_ENTRY StubPrecodeCode\PAGE_SIZE + ldr x10, DATA_SLOT(StubPrecode, Target) + ldr x12, DATA_SLOT(StubPrecode, MethodDesc) + br x10 + LEAF_END_MARKED StubPrecodeCode\PAGE_SIZE + + LEAF_ENTRY FixupPrecodeCode\PAGE_SIZE + ldr x11, DATA_SLOT(FixupPrecode, Target) + br x11 + ldr x12, DATA_SLOT(FixupPrecode, MethodDesc) + ldr x11, DATA_SLOT(FixupPrecode, PrecodeFixupThunk) + br x11 + LEAF_END_MARKED FixupPrecodeCode\PAGE_SIZE + + LEAF_ENTRY CallCountingStubCode\PAGE_SIZE +LOCAL_LABEL(StubStart\PAGE_SIZE): + ldr x9, DATA_SLOT(CallCountingStub, RemainingCallCountCell) + ldrh w10, [x9] + subs w10, w10, #1 + strh w10, [x9] + beq LOCAL_LABEL(CountReachedZero\PAGE_SIZE) + ldr x9, DATA_SLOT(CallCountingStub, TargetForMethod) + br x9 +LOCAL_LABEL(CountReachedZero\PAGE_SIZE): + ldr x10, DATA_SLOT(CallCountingStub, TargetForThresholdReached) + br x10 + LEAF_END_MARKED CallCountingStubCode\PAGE_SIZE + + .endr diff --git a/src/coreclr/vm/arm64/thunktemplates.asm b/src/coreclr/vm/arm64/thunktemplates.asm new file mode 100644 index 0000000000000..958ddb029a6ee --- /dev/null +++ b/src/coreclr/vm/arm64/thunktemplates.asm @@ -0,0 +1,37 @@ +; Licensed to the .NET Foundation under one or more agreements. +; The .NET Foundation licenses this file to you under the MIT license. + +#include "ksarm64.h" +#include "asmconstants.h" +#include "asmmacros.h" + +#define DATA_SLOT(stub, field) (stub##Code + PAGE_SIZE + stub##Data__##field) + + LEAF_ENTRY StubPrecodeCode + ldr x10, DATA_SLOT(StubPrecode, Target) + ldr x12, DATA_SLOT(StubPrecode, MethodDesc) + br x10 + LEAF_END_MARKED StubPrecodeCode + + LEAF_ENTRY FixupPrecodeCode + ldr x11, DATA_SLOT(FixupPrecode, Target) + br x11 + ldr x12, DATA_SLOT(FixupPrecode, MethodDesc) + ldr x11, DATA_SLOT(FixupPrecode, PrecodeFixupThunk) + br x11 + LEAF_END_MARKED FixupPrecodeCode + + LEAF_ENTRY CallCountingStubCode + ldr x9, DATA_SLOT(CallCountingStub, RemainingCallCountCell) + ldrh w10, [x9] + subs w10, w10, #1 + strh w10, [x9] + beq CountReachedZero + ldr x9, DATA_SLOT(CallCountingStub, TargetForMethod) + br x9 +CountReachedZero + ldr x10, DATA_SLOT(CallCountingStub, TargetForThresholdReached) + br x10 + LEAF_END_MARKED CallCountingStubCode + + END diff --git a/src/coreclr/vm/callcounting.cpp b/src/coreclr/vm/callcounting.cpp index 0f431f0f9e542..2ff450334494b 100644 --- a/src/coreclr/vm/callcounting.cpp +++ b/src/coreclr/vm/callcounting.cpp @@ -118,6 +118,7 @@ PTR_CallCount CallCountingManager::CallCountingInfo::GetRemainingCallCountCell() { WRAPPER_NO_CONTRACT; _ASSERTE(m_stage != Stage::Disabled); + //_ASSERTE(m_callCountingStub != nullptr); return &m_remainingCallCount; } @@ -257,49 +258,93 @@ const CallCountingStub *CallCountingManager::CallCountingStubAllocator::Allocate heap = AllocateHeap(); } - SIZE_T sizeInBytes; - const CallCountingStub *stub; - do + SIZE_T sizeInBytes = sizeof(CallCountingStub); + AllocMemHolder allocationAddressHolder(heap->AllocAlignedMem(sizeInBytes, 1)); + CallCountingStub *stub = (CallCountingStub*)(void*)allocationAddressHolder; + allocationAddressHolder.SuppressRelease(); + stub->Initialize(targetForMethod, remainingCallCountCell); + + return stub; +} + +#if defined(TARGET_ARM64) && defined(TARGET_UNIX) + #define ENUM_PAGE_SIZE(size) \ + extern "C" void CallCountingStubCode##size(); \ + extern "C" void CallCountingStubCode##size##_End(); + + ENUM_PAGE_SIZES + #undef ENUM_PAGE_SIZE +#else +extern "C" void CallCountingStubCode(); +extern "C" void CallCountingStubCode_End(); +#endif + +#ifdef TARGET_X86 +extern "C" size_t CallCountingStubCode_RemainingCallCountCell_Offset; +extern "C" size_t CallCountingStubCode_TargetForMethod_Offset; +extern "C" size_t CallCountingStubCode_TargetForThresholdReached_Offset; + +#define SYMBOL_VALUE(name) ((size_t)&name) + +#endif + +#if defined(TARGET_ARM64) && defined(TARGET_UNIX) +void (*CallCountingStub::CallCountingStubCode)(); +#endif + +#ifndef DACCESS_COMPILE + +void CallCountingStub::StaticInitialize() +{ +#if defined(TARGET_ARM64) && defined(TARGET_UNIX) + int pageSize = GetOsPageSize(); + #define ENUM_PAGE_SIZE(size) \ + case size: \ + CallCountingStubCode = CallCountingStubCode##size; \ + _ASSERTE(((BYTE*)CallCountingStubCode##size##_End - (BYTE*)CallCountingStubCode##size) <= CallCountingStub::CodeSize); \ + break; + + switch (pageSize) { - bool forceLongStub = false; - #if defined(_DEBUG) && defined(TARGET_AMD64) - if (s_callCountingStubCount % 2 == 0) - { - forceLongStub = true; - } - #endif + ENUM_PAGE_SIZES + default: + EEPOLICY_HANDLE_FATAL_ERROR_WITH_MESSAGE(COR_E_EXECUTIONENGINE, W("Unsupported OS page size")); + } + #undef ENUM_PAGE_SIZE +#else + _ASSERTE(((BYTE*)CallCountingStubCode_End - (BYTE*)CallCountingStubCode) <= CallCountingStub::CodeSize); +#endif +} - if (!forceLongStub) - { - sizeInBytes = sizeof(CallCountingStubShort); - AllocMemHolder allocationAddressHolder(heap->AllocAlignedMem(sizeInBytes, CallCountingStub::Alignment)); - #ifdef TARGET_AMD64 - if (CallCountingStubShort::CanUseFor(allocationAddressHolder, targetForMethod)) - #endif - { - ExecutableWriterHolder writerHolder(allocationAddressHolder, sizeInBytes); - new(writerHolder.GetRW()) CallCountingStubShort((CallCountingStubShort*)(void*)allocationAddressHolder, remainingCallCountCell, targetForMethod); - stub = (CallCountingStub*)(void*)allocationAddressHolder; - allocationAddressHolder.SuppressRelease(); - break; - } - } +#endif // DACCESS_COMPILE - #ifdef TARGET_AMD64 - sizeInBytes = sizeof(CallCountingStubLong); - void *allocationAddress = (void *)heap->AllocAlignedMem(sizeInBytes, CallCountingStub::Alignment); - ExecutableWriterHolder writerHolder(allocationAddress, sizeInBytes); - new(writerHolder.GetRW()) CallCountingStubLong(remainingCallCountCell, targetForMethod); - stub = (CallCountingStub*)allocationAddress; - #else - UNREACHABLE(); - #endif - } while (false); +void CallCountingStub::GenerateCodePage(BYTE* pageBase, BYTE* pageBaseRX) +{ + int pageSize = GetOsPageSize(); - ClrFlushInstructionCache(stub, sizeInBytes); - return stub; +#ifdef TARGET_X86 + int totalCodeSize = (pageSize / CallCountingStub::CodeSize) * CallCountingStub::CodeSize; + + for (int i = 0; i < totalCodeSize; i += CallCountingStub::CodeSize) + { + memcpy(pageBase + i, (const void*)CallCountingStubCode, CallCountingStub::CodeSize); + + // Set absolute addresses of the slots in the stub + BYTE* pCounterSlot = pageBaseRX + i + pageSize + offsetof(CallCountingStubData, RemainingCallCountCell); + *(BYTE**)(pageBase + i + SYMBOL_VALUE(CallCountingStubCode_RemainingCallCountCell_Offset)) = pCounterSlot; + + BYTE* pTargetSlot = pageBaseRX + i + pageSize + offsetof(CallCountingStubData, TargetForMethod); + *(BYTE**)(pageBase + i + SYMBOL_VALUE(CallCountingStubCode_TargetForMethod_Offset)) = pTargetSlot; + + BYTE* pCountReachedZeroSlot = pageBaseRX + i + pageSize + offsetof(CallCountingStubData, TargetForThresholdReached); + *(BYTE**)(pageBase + i + SYMBOL_VALUE(CallCountingStubCode_TargetForThresholdReached_Offset)) = pCountReachedZeroSlot; + } +#else // TARGET_X86 + FillStubCodePage(pageBase, (const void*)PCODEToPINSTR((PCODE)CallCountingStubCode), CallCountingStub::CodeSize, pageSize); +#endif } + NOINLINE LoaderHeap *CallCountingManager::CallCountingStubAllocator::AllocateHeap() { CONTRACTL @@ -312,7 +357,7 @@ NOINLINE LoaderHeap *CallCountingManager::CallCountingStubAllocator::AllocateHea _ASSERTE(m_heap == nullptr); - LoaderHeap *heap = new LoaderHeap(0, 0, &m_heapRangeList, true /* fMakeExecutable */, true /* fUnlocked */); + LoaderHeap *heap = new LoaderHeap(0, 0, &m_heapRangeList, UnlockedLoaderHeap::HeapKind::Interleaved, true /* fUnlocked */, CallCountingStub::GenerateCodePage, CallCountingStub::CodeSize); m_heap = heap; return heap; } @@ -437,6 +482,7 @@ void CallCountingManager::StaticInitialize() { WRAPPER_NO_CONTRACT; s_callCountingManagers = PTR_CallCountingManagerHash(new CallCountingManagerHash()); + CallCountingStub::StaticInitialize(); } #endif diff --git a/src/coreclr/vm/callcounting.h b/src/coreclr/vm/callcounting.h index fa0345238c870..089702e066cb1 100644 --- a/src/coreclr/vm/callcounting.h +++ b/src/coreclr/vm/callcounting.h @@ -65,6 +65,95 @@ Miscellaneous T(const T &) = delete; \ T &operator =(const T &) = delete +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// Call counting + +typedef UINT16 CallCount; +typedef DPTR(CallCount) PTR_CallCount; + +//////////////////////////////////////////////////////////////// +// CallCountingStub + +class CallCountingStub; +typedef DPTR(const CallCountingStub) PTR_CallCountingStub; + +struct CallCountingStubData +{ + PTR_CallCount RemainingCallCountCell; + PCODE TargetForMethod; + PCODE TargetForThresholdReached; +}; + +typedef DPTR(CallCountingStubData) PTR_CallCountingStubData; + +class CallCountingStub +{ +public: +#if defined(TARGET_AMD64) + static const int CodeSize = 24; +#elif defined(TARGET_X86) + static const int CodeSize = 24; +#elif defined(TARGET_ARM64) + static const int CodeSize = 40; +#elif defined(TARGET_ARM) + static const int CodeSize = 32; +#endif + +private: + UINT8 m_code[CodeSize]; + +#if defined(TARGET_ARM64) && defined(TARGET_UNIX) + static void (*CallCountingStubCode)(); +#endif + +public: + static const SIZE_T Alignment = sizeof(void *); + +protected: + PTR_CallCountingStubData GetData() const + { + return dac_cast(dac_cast(this) + GetOsPageSize()); + } + +#ifndef DACCESS_COMPILE + static const PCODE TargetForThresholdReached; + + CallCountingStub() = default; + +public: + static const CallCountingStub *From(TADDR stubIdentifyingToken); + + PCODE GetEntryPoint() const + { + WRAPPER_NO_CONTRACT; + return PINSTRToPCODE((TADDR)this); + } +#endif // !DACCESS_COMPILE + +public: + +#ifndef DACCESS_COMPILE + void Initialize(PCODE targetForMethod, CallCount* remainingCallCountCell) + { + PTR_CallCountingStubData pStubData = GetData(); + pStubData->RemainingCallCountCell = remainingCallCountCell; + pStubData->TargetForMethod = targetForMethod; + pStubData->TargetForThresholdReached = CallCountingStub::TargetForThresholdReached; + } + + static void StaticInitialize(); +#endif // !DACCESS_COMPILE + + static void GenerateCodePage(BYTE* pageBase, BYTE* pageBaseRX); + + PTR_CallCount GetRemainingCallCountCell() const; + PCODE GetTargetForMethod() const; + +protected: + + DISABLE_COPY(CallCountingStub); +}; + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // CallCountingManager @@ -277,6 +366,10 @@ class CallCountingManager public: static void StopAndDeleteAllCallCountingStubs(); + static const CallCountingStub* GetCallCountingStub(CallCount *pCallCount) + { + return CallCountingInfo::From(pCallCount)->GetCallCountingStub(); + } private: static void StopAllCallCounting(TieredCompilationManager *tieredCompilationManager); static void DeleteAllCallCountingStubs(); @@ -293,6 +386,35 @@ class CallCountingManager DISABLE_COPY(CallCountingManager); }; +//////////////////////////////////////////////////////////////// +// CallCountingStub definitions + +#ifndef DACCESS_COMPILE +inline const CallCountingStub *CallCountingStub::From(TADDR stubIdentifyingToken) +{ + WRAPPER_NO_CONTRACT; + _ASSERTE(stubIdentifyingToken != NULL); + + // The stubIdentifyingToken is the pointer to the CallCount + const CallCountingStub *stub = CallCountingManager::GetCallCountingStub((CallCount*)stubIdentifyingToken); + + _ASSERTE(IS_ALIGNED(stub, Alignment)); + return stub; +} +#endif + +inline PTR_CallCount CallCountingStub::GetRemainingCallCountCell() const +{ + WRAPPER_NO_CONTRACT; + return GetData()->RemainingCallCountCell; +} + +inline PCODE CallCountingStub::GetTargetForMethod() const +{ + WRAPPER_NO_CONTRACT; + return GetData()->TargetForMethod; +} + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // CallCountingManager::CallCountingStubManager diff --git a/src/coreclr/vm/ceeload.cpp b/src/coreclr/vm/ceeload.cpp index 8def0646a26e6..56616d3db9c4a 100644 --- a/src/coreclr/vm/ceeload.cpp +++ b/src/coreclr/vm/ceeload.cpp @@ -4469,7 +4469,7 @@ LoaderHeap *Module::GetThunkHeap() LoaderHeap *pNewHeap = new LoaderHeap(VIRTUAL_ALLOC_RESERVE_GRANULARITY, // DWORD dwReserveBlockSize 0, // DWORD dwCommitBlockSize ThunkHeapStubManager::g_pManager->GetRangeList(), - TRUE); // BOOL fMakeExecutable + UnlockedLoaderHeap::HeapKind::Executable); if (FastInterlockCompareExchangePointer(&m_pThunkHeap, pNewHeap, 0) != 0) { diff --git a/src/coreclr/vm/ceemain.cpp b/src/coreclr/vm/ceemain.cpp index 23e5a77eaa422..8404413beb345 100644 --- a/src/coreclr/vm/ceemain.cpp +++ b/src/coreclr/vm/ceemain.cpp @@ -672,6 +672,19 @@ void EEStartupHelper() CallCountingManager::StaticInitialize(); OnStackReplacementManager::StaticInitialize(); +#ifdef TARGET_UNIX + ExecutableAllocator::InitPreferredRange(); +#else + { + // Record coreclr.dll geometry + PEDecoder pe(GetClrModuleBase()); + + g_runtimeLoadedBaseAddress = (SIZE_T)pe.GetBase(); + g_runtimeVirtualSize = (SIZE_T)pe.GetVirtualSize(); + ExecutableAllocator::InitLazyPreferredRange(g_runtimeLoadedBaseAddress, g_runtimeVirtualSize, GetRandomInt(64)); + } +#endif // !TARGET_UNIX + InitThreadManager(); STRESS_LOG0(LF_STARTUP, LL_ALWAYS, "Returned successfully from InitThreadManager"); @@ -807,20 +820,6 @@ void EEStartupHelper() StubManager::InitializeStubManagers(); -#ifdef TARGET_UNIX - ExecutableAllocator::InitPreferredRange(); -#else - { - // Record coreclr.dll geometry - PEDecoder pe(GetClrModuleBase()); - - g_runtimeLoadedBaseAddress = (SIZE_T)pe.GetBase(); - g_runtimeVirtualSize = (SIZE_T)pe.GetVirtualSize(); - ExecutableAllocator::InitLazyPreferredRange(g_runtimeLoadedBaseAddress, g_runtimeVirtualSize, GetRandomInt(64)); - } -#endif // !TARGET_UNIX - - // Set up the cor handle map. This map is used to load assemblies in // memory instead of using the normal system load PEImage::Startup(); @@ -831,7 +830,8 @@ void EEStartupHelper() Stub::Init(); StubLinkerCPU::Init(); - + StubPrecode::StaticInitialize(); + FixupPrecode::StaticInitialize(); InitializeGarbageCollector(); diff --git a/src/coreclr/vm/cgensys.h b/src/coreclr/vm/cgensys.h index ad02efe70d13d..f66614a63d25f 100644 --- a/src/coreclr/vm/cgensys.h +++ b/src/coreclr/vm/cgensys.h @@ -119,12 +119,6 @@ inline bool TargetHasAVXSupport() return false; } - -#ifndef DACCESS_COMPILE -// Given an address in a slot, figure out if the prestub will be called -BOOL DoesSlotCallPrestub(PCODE pCode); -#endif - #ifdef DACCESS_COMPILE // Used by dac/strike to make sense of non-jit/non-jit-helper call targets diff --git a/src/coreclr/vm/codeman.cpp b/src/coreclr/vm/codeman.cpp index 71b7d55133df4..aece10d072ac2 100644 --- a/src/coreclr/vm/codeman.cpp +++ b/src/coreclr/vm/codeman.cpp @@ -3155,7 +3155,7 @@ JumpStubBlockHeader * EEJitManager::allocJumpStubBlock(MethodDesc* pMD, DWORD n requestInfo.setThrowOnOutOfMemoryWithinRange(throwOnOutOfMemoryWithinRange); TADDR mem; - ExecutableWriterHolder blockWriterHolder; + ExecutableWriterHolderNoLog blockWriterHolder; // Scope the lock { @@ -3175,7 +3175,7 @@ JumpStubBlockHeader * EEJitManager::allocJumpStubBlock(MethodDesc* pMD, DWORD n NibbleMapSetUnlocked(pCodeHeap, mem, TRUE); - blockWriterHolder = ExecutableWriterHolder((JumpStubBlockHeader *)mem, sizeof(JumpStubBlockHeader)); + blockWriterHolder.AssignExecutableWriterHolder((JumpStubBlockHeader *)mem, sizeof(JumpStubBlockHeader)); _ASSERTE(IS_ALIGNED(blockWriterHolder.GetRW(), CODE_SIZE_ALIGN)); } @@ -5241,7 +5241,7 @@ PCODE ExecutionManager::getNextJumpStub(MethodDesc* pMD, PCODE target, JumpStubBlockHeader ** ppHead = &(pJumpStubCache->m_pBlocks); JumpStubBlockHeader * curBlock = *ppHead; - ExecutableWriterHolder curBlockWriterHolder; + ExecutableWriterHolderNoLog curBlockWriterHolder; // allocate a new jumpstub from 'curBlock' if it is not fully allocated // @@ -5257,7 +5257,7 @@ PCODE ExecutionManager::getNextJumpStub(MethodDesc* pMD, PCODE target, { // We will update curBlock->m_used at "DONE" size_t blockSize = sizeof(JumpStubBlockHeader) + (size_t) numJumpStubs * BACK_TO_BACK_JUMP_ALLOCATE_SIZE; - curBlockWriterHolder = ExecutableWriterHolder(curBlock, blockSize); + curBlockWriterHolder.AssignExecutableWriterHolder(curBlock, blockSize); jumpStubRW = (BYTE *)((TADDR)jumpStub + (TADDR)curBlockWriterHolder.GetRW() - (TADDR)curBlock); goto DONE; } @@ -5297,7 +5297,7 @@ PCODE ExecutionManager::getNextJumpStub(MethodDesc* pMD, PCODE target, RETURN(NULL); } - curBlockWriterHolder = ExecutableWriterHolder(curBlock, sizeof(JumpStubBlockHeader) + ((size_t) (curBlock->m_used + 1) * BACK_TO_BACK_JUMP_ALLOCATE_SIZE)); + curBlockWriterHolder.AssignExecutableWriterHolder(curBlock, sizeof(JumpStubBlockHeader) + ((size_t) (curBlock->m_used + 1) * BACK_TO_BACK_JUMP_ALLOCATE_SIZE)); jumpStubRW = (BYTE *) curBlockWriterHolder.GetRW() + sizeof(JumpStubBlockHeader) + ((size_t) curBlock->m_used * BACK_TO_BACK_JUMP_ALLOCATE_SIZE); jumpStub = (BYTE *) curBlock + sizeof(JumpStubBlockHeader) + ((size_t) curBlock->m_used * BACK_TO_BACK_JUMP_ALLOCATE_SIZE); diff --git a/src/coreclr/vm/codeman.h b/src/coreclr/vm/codeman.h index e2afd8ce64540..6cb9edf8d429b 100644 --- a/src/coreclr/vm/codeman.h +++ b/src/coreclr/vm/codeman.h @@ -1420,6 +1420,13 @@ class ExecutionManager static unsigned m_LCG_JumpStubBlockFullCount; public: + + static void DumpExecutionManagerUsage() + { + fprintf(stderr, "JumpStub usage count:\n"); + fprintf(stderr, "Normal: %u, LCG: %u\n", m_normal_JumpStubLookup, m_LCG_JumpStubLookup); + } + struct JumpStubCache { JumpStubCache() diff --git a/src/coreclr/vm/comcallablewrapper.cpp b/src/coreclr/vm/comcallablewrapper.cpp index d9fc2fe0ac4d5..938a80d123b17 100644 --- a/src/coreclr/vm/comcallablewrapper.cpp +++ b/src/coreclr/vm/comcallablewrapper.cpp @@ -3299,7 +3299,7 @@ void ComMethodTable::LayOutClassMethodTable() if (!m_pMT->HasGenericClassInstantiationInHierarchy()) { - ExecutableWriterHolder methodDescMemoryWriteableHolder; + ExecutableWriterHolderNoLog methodDescMemoryWriteableHolder; // // Allocate method desc's for the rest of the slots. // @@ -3310,7 +3310,7 @@ void ComMethodTable::LayOutClassMethodTable() pMDMemoryPtr = m_pMT->GetLoaderAllocator()->GetStubHeap()->AllocMem(S_SIZE_T(cbAlloc + sizeof(UINT_PTR))); pMethodDescMemory = pMDMemoryPtr; - methodDescMemoryWriteableHolder = ExecutableWriterHolder(pMethodDescMemory, cbAlloc + sizeof(UINT_PTR)); + methodDescMemoryWriteableHolder.AssignExecutableWriterHolder(pMethodDescMemory, cbAlloc + sizeof(UINT_PTR)); writeableOffset = methodDescMemoryWriteableHolder.GetRW() - pMethodDescMemory; // initialize the method desc memory to zero diff --git a/src/coreclr/vm/common.h b/src/coreclr/vm/common.h index 07edf43e24810..dd750e91002af 100644 --- a/src/coreclr/vm/common.h +++ b/src/coreclr/vm/common.h @@ -408,7 +408,6 @@ extern DummyGlobalContract ___contract; #endif // defined(_DEBUG) - // All files get to see all of these .inl files to make sure all files // get the benefit of inlining. #include "ceeload.inl" diff --git a/src/coreclr/vm/corhost.cpp b/src/coreclr/vm/corhost.cpp index 861ec15be77a8..efb35656b5259 100644 --- a/src/coreclr/vm/corhost.cpp +++ b/src/coreclr/vm/corhost.cpp @@ -394,6 +394,11 @@ HRESULT CorHost2::ExecuteAssembly(DWORD dwAppDomainId, UNINSTALL_UNWIND_AND_CONTINUE_HANDLER; UNINSTALL_UNHANDLED_MANAGED_EXCEPTION_TRAP; +#ifdef LOG_EXECUTABLE_ALLOCATOR_STATISTICS + ExecutableAllocator::DumpHolderUsage(); + ExecutionManager::DumpExecutionManagerUsage(); +#endif + ErrExit: return hr; diff --git a/src/coreclr/vm/dynamicmethod.cpp b/src/coreclr/vm/dynamicmethod.cpp index 8ff5b8822ba8b..4ee05faaafd8a 100644 --- a/src/coreclr/vm/dynamicmethod.cpp +++ b/src/coreclr/vm/dynamicmethod.cpp @@ -505,10 +505,10 @@ HostCodeHeap::TrackAllocation* HostCodeHeap::AllocFromFreeList(size_t header, si // found a block LOG((LF_BCL, LL_INFO100, "Level2 - CodeHeap [0x%p] - Block found, size 0x%X\n", this, pCurrent->size)); - ExecutableWriterHolder previousWriterHolder; + ExecutableWriterHolderNoLog previousWriterHolder; if (pPrevious) { - previousWriterHolder = ExecutableWriterHolder(pPrevious, sizeof(TrackAllocation)); + previousWriterHolder.AssignExecutableWriterHolder(pPrevious, sizeof(TrackAllocation)); } ExecutableWriterHolder currentWriterHolder(pCurrent, sizeof(TrackAllocation)); @@ -587,11 +587,11 @@ void HostCodeHeap::AddToFreeList(TrackAllocation *pBlockToInsert, TrackAllocatio { // found the point of insertion pBlockToInsertRW->pNext = pCurrent; - ExecutableWriterHolder previousWriterHolder; + ExecutableWriterHolderNoLog previousWriterHolder; if (pPrevious) { - previousWriterHolder = ExecutableWriterHolder(pPrevious, sizeof(TrackAllocation)); + previousWriterHolder.AssignExecutableWriterHolder(pPrevious, sizeof(TrackAllocation)); previousWriterHolder.GetRW()->pNext = pBlockToInsert; LOG((LF_BCL, LL_INFO100, "Level2 - CodeHeap [0x%p] - Insert block [%p, 0x%X] -> [%p, 0x%X] -> [%p, 0x%X]\n", this, pPrevious, pPrevious->size, diff --git a/src/coreclr/vm/gccover.cpp b/src/coreclr/vm/gccover.cpp index 8c5a050130685..b0e6aa953b4e4 100644 --- a/src/coreclr/vm/gccover.cpp +++ b/src/coreclr/vm/gccover.cpp @@ -67,6 +67,29 @@ static MethodDesc* getTargetMethodDesc(PCODE target) return MethodDesc::GetMethodDescFromStubAddr(target, TRUE); } + if (PrecodeStubManager::g_pManager->GetStubPrecodeRangeList()->IsInRange(target)) + { + return (MethodDesc*)((StubPrecode*)PCODEToPINSTR(target))->GetMethodDesc(); + } + + if (PrecodeStubManager::g_pManager->GetFixupPrecodeRangeList()->IsInRange(target)) + { + if (!FixupPrecode::IsFixupPrecodeByASM(target)) + { + // If the target slot points to the fixup part of the stub, the actual + // stub starts FixupPrecode::FixupCodeOffset bytes below the target, + // so we need to compensate for it. + target -= FixupPrecode::FixupCodeOffset; + if (!FixupPrecode::IsFixupPrecodeByASM(target)) + { + _ASSERTE(!"Invalid FixupPrecode address"); // We should never get other precode type here + return nullptr; + } + } + + return (MethodDesc*)((FixupPrecode*)PCODEToPINSTR(target))->GetMethodDesc(); + } + return nullptr; } @@ -418,7 +441,7 @@ void GCCoverageInfo::SprinkleBreakpoints( #if (defined(TARGET_X86) || defined(TARGET_AMD64)) && USE_DISASSEMBLER BYTE * codeStart = (BYTE *)pCode; - ExecutableWriterHolder codeWriterHolder; + ExecutableWriterHolderNoLog codeWriterHolder; size_t writeableOffset; memcpy(saveAddr, codeStart, codeSize); @@ -432,7 +455,7 @@ void GCCoverageInfo::SprinkleBreakpoints( } else { - codeWriterHolder = ExecutableWriterHolder(codeStart, codeSize); + codeWriterHolder.AssignExecutableWriterHolder(codeStart, codeSize); writeableOffset = codeWriterHolder.GetRW() - codeStart; } diff --git a/src/coreclr/vm/i386/AsmMacros.inc b/src/coreclr/vm/i386/AsmMacros.inc index 6b9eb6eb3fae5..ac77064f0da5c 100644 --- a/src/coreclr/vm/i386/AsmMacros.inc +++ b/src/coreclr/vm/i386/AsmMacros.inc @@ -21,3 +21,26 @@ INLINE_GETTHREAD macro destReg, trashReg add trashReg, SECTIONREL gCurrentThreadInfo mov destReg, [trashReg] endm + +LEAF_ENTRY macro functionName + functionName PROC PUBLIC +endm + +LEAF_END macro functionName + functionName ENDP +endm + +LEAF_END_MARKED macro functionName + LOCAL stackArgsSize, bareFunctionName, endMarkerName + stackArgsSize TEXTEQU @SubStr(functionName, @InStr(,functionName, <@>)) + bareFunctionName TEXTEQU @SubStr(functionName, 1, @SizeStr(functionName)-@SizeStr(%stackArgsSize)) + endMarkerName TEXTEQU @CatStr(%bareFunctionName, <_End@0>) + %endMarkerName: + PUBLIC endMarkerName + functionName ENDP +endm + +PATCH_LABEL macro labelName + labelName: + PUBLIC labelName +endm diff --git a/src/coreclr/vm/i386/asmconstants.h b/src/coreclr/vm/i386/asmconstants.h index b24d70302076f..fa9f2b79657a7 100644 --- a/src/coreclr/vm/i386/asmconstants.h +++ b/src/coreclr/vm/i386/asmconstants.h @@ -330,6 +330,26 @@ ASMCONSTANTS_C_ASSERT(ResolveCacheElem__token == offsetof(ResolveCacheElem, to ASMCONSTANTS_C_ASSERT(ResolveCacheElem__target == offsetof(ResolveCacheElem, target)); ASMCONSTANTS_C_ASSERT(ResolveCacheElem__pNext == offsetof(ResolveCacheElem, pNext)); +#define FixupPrecodeData__Target 0x00 +ASMCONSTANTS_C_ASSERT(FixupPrecodeData__Target == offsetof(FixupPrecodeData, Target)) +#define FixupPrecodeData__MethodDesc 0x04 +ASMCONSTANTS_C_ASSERT(FixupPrecodeData__MethodDesc == offsetof(FixupPrecodeData, MethodDesc)) +#define FixupPrecodeData__PrecodeFixupThunk 0x08 +ASMCONSTANTS_C_ASSERT(FixupPrecodeData__PrecodeFixupThunk == offsetof(FixupPrecodeData, PrecodeFixupThunk)) + +#define StubPrecodeData__Target 0x04 +ASMCONSTANTS_C_ASSERT(StubPrecodeData__Target == offsetof(StubPrecodeData, Target)) +#define StubPrecodeData__MethodDesc 0x00 +ASMCONSTANTS_C_ASSERT(StubPrecodeData__MethodDesc == offsetof(StubPrecodeData, MethodDesc)) + +#define CallCountingStubData__RemainingCallCountCell 0x00 +ASMCONSTANTS_C_ASSERT(CallCountingStubData__RemainingCallCountCell == offsetof(CallCountingStubData, RemainingCallCountCell)) + +#define CallCountingStubData__TargetForMethod 0x04 +ASMCONSTANTS_C_ASSERT(CallCountingStubData__TargetForMethod == offsetof(CallCountingStubData, TargetForMethod)) + +#define CallCountingStubData__TargetForThresholdReached 0x08 +ASMCONSTANTS_C_ASSERT(CallCountingStubData__TargetForThresholdReached == offsetof(CallCountingStubData, TargetForThresholdReached)) #undef ASMCONSTANTS_C_ASSERT #undef ASMCONSTANTS_RUNTIME_ASSERT diff --git a/src/coreclr/vm/i386/asmhelpers.S b/src/coreclr/vm/i386/asmhelpers.S index ee675c838bc56..7a620dd0c1f7a 100644 --- a/src/coreclr/vm/i386/asmhelpers.S +++ b/src/coreclr/vm/i386/asmhelpers.S @@ -534,26 +534,6 @@ LEAF_ENTRY NDirectImportThunk, _TEXT jmp eax // Jump to DLL target LEAF_END NDirectImportThunk, _TEXT -// ========================================================================== -// The call in fixup precode initally points to this function. -// The pupose of this function is to load the MethodDesc and forward the call the prestub. -LEAF_ENTRY PrecodeFixupThunk, _TEXT - // Pop the return address. It points right after the call instruction in the precode. - pop eax - push esi - push edi - - // Inline computation done by FixupPrecode::GetMethodDesc() - movzx esi, BYTE PTR [eax + 2] // m_PrecodeChunkIndex - movzx edi, BYTE PTR [eax + 1] // m_MethodDescChunkIndex - mov eax, DWORD PTR [eax + esi*8 +3] - lea eax, [eax + edi*4] - - pop edi - pop esi - jmp C_FUNC(ThePreStub) -LEAF_END PrecodeFixupThunk, _TEXT - // // Used to get the current instruction pointer value // @@ -1217,13 +1197,7 @@ NESTED_END JIT_ProfilerEnterLeaveTailcallStub, _TEXT #ifdef FEATURE_TIERED_COMPILATION -LEAF_ENTRY OnCallCountThresholdReachedStub, _TEXT - // Pop the return address (the stub-identifying token) into a non-argument volatile register that can be trashed - pop eax - jmp C_FUNC(OnCallCountThresholdReachedStub2) -LEAF_END OnCallCountThresholdReachedStub, _TEXT - -NESTED_ENTRY OnCallCountThresholdReachedStub2, _TEXT, NoHandler +NESTED_ENTRY OnCallCountThresholdReachedStub, _TEXT, NoHandler STUB_PROLOG mov esi, esp @@ -1246,6 +1220,6 @@ NESTED_ENTRY OnCallCountThresholdReachedStub2, _TEXT, NoHandler // This will never be executed. It is just to help out stack-walking logic // which disassembles the epilog to unwind the stack. ret -NESTED_END OnCallCountThresholdReachedStub2, _TEXT +NESTED_END OnCallCountThresholdReachedStub, _TEXT #endif // FEATURE_TIERED_COMPILATION diff --git a/src/coreclr/vm/i386/asmhelpers.asm b/src/coreclr/vm/i386/asmhelpers.asm index 9258b7848f39f..20cfa31a7556f 100644 --- a/src/coreclr/vm/i386/asmhelpers.asm +++ b/src/coreclr/vm/i386/asmhelpers.asm @@ -804,27 +804,6 @@ _NDirectImportThunk@0 proc public jmp eax ; Jump to DLL target _NDirectImportThunk@0 endp -;========================================================================== -; The call in fixup precode initally points to this function. -; The pupose of this function is to load the MethodDesc and forward the call the prestub. -_PrecodeFixupThunk@0 proc public - - pop eax ; Pop the return address. It points right after the call instruction in the precode. - push esi - push edi - - ; Inline computation done by FixupPrecode::GetMethodDesc() - movzx esi,byte ptr [eax+2] ; m_PrecodeChunkIndex - movzx edi,byte ptr [eax+1] ; m_MethodDescChunkIndex - mov eax,dword ptr [eax+esi*8+3] - lea eax,[eax+edi*4] - - pop edi - pop esi - jmp _ThePreStub@0 - -_PrecodeFixupThunk@0 endp - ; void __stdcall setFPReturn(int fpSize, INT64 retVal) _setFPReturn@12 proc public mov ecx, [esp+4] @@ -1525,12 +1504,6 @@ ifdef FEATURE_TIERED_COMPILATION EXTERN _OnCallCountThresholdReached@8:proc _OnCallCountThresholdReachedStub@0 proc public - ; Pop the return address (the stub-identifying token) into a non-argument volatile register that can be trashed - pop eax - jmp _OnCallCountThresholdReachedStub2@0 -_OnCallCountThresholdReachedStub@0 endp - -_OnCallCountThresholdReachedStub2@0 proc public STUB_PROLOG mov esi, esp @@ -1545,7 +1518,7 @@ _OnCallCountThresholdReachedStub2@0 proc public ; This will never be executed. It is just to help out stack-walking logic ; which disassembles the epilog to unwind the stack. ret -_OnCallCountThresholdReachedStub2@0 endp +_OnCallCountThresholdReachedStub@0 endp endif ; FEATURE_TIERED_COMPILATION diff --git a/src/coreclr/vm/i386/cgencpu.h b/src/coreclr/vm/i386/cgencpu.h index ab4fc5b120bda..1cc63b10d8b9d 100644 --- a/src/coreclr/vm/i386/cgencpu.h +++ b/src/coreclr/vm/i386/cgencpu.h @@ -81,7 +81,6 @@ EXTERN_C void SinglecastDelegateInvokeStub(); #define HAS_NDIRECT_IMPORT_PRECODE 1 #define HAS_FIXUP_PRECODE 1 -#define HAS_FIXUP_PRECODE_CHUNKS 1 // ThisPtrRetBufPrecode one is necessary for closed delegates over static methods with return buffer #define HAS_THISPTR_RETBUF_PRECODE 1 @@ -525,210 +524,4 @@ inline BOOL ClrFlushInstructionCache(LPCVOID pCodeAddr, size_t sizeOfCode) #define JIT_NewCrossContext JIT_NewCrossContext #endif // TARGET_UNIX -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// Call counting - -#ifdef FEATURE_TIERED_COMPILATION - -#define DISABLE_COPY(T) \ - T(const T &) = delete; \ - T &operator =(const T &) = delete - -typedef UINT16 CallCount; -typedef DPTR(CallCount) PTR_CallCount; - -//////////////////////////////////////////////////////////////// -// CallCountingStub - -class CallCountingStub; -typedef DPTR(const CallCountingStub) PTR_CallCountingStub; - -class CallCountingStub -{ -public: - static const SIZE_T Alignment = sizeof(void *); - -#ifndef DACCESS_COMPILE -protected: - static const PCODE TargetForThresholdReached; - - CallCountingStub() = default; - -public: - static const CallCountingStub *From(TADDR stubIdentifyingToken); - - PCODE GetEntryPoint() const - { - WRAPPER_NO_CONTRACT; - return PINSTRToPCODE((TADDR)this); - } -#endif // !DACCESS_COMPILE - -public: - PTR_CallCount GetRemainingCallCountCell() const; - PCODE GetTargetForMethod() const; - -#ifndef DACCESS_COMPILE -protected: - template static INT_PTR GetRelativeOffset(const T *relRef, PCODE target) - { - WRAPPER_NO_CONTRACT; - static_assert_no_msg(sizeof(T) != 0); - static_assert_no_msg(sizeof(T) <= sizeof(void *)); - static_assert_no_msg((sizeof(T) & (sizeof(T) - 1)) == 0); // is a power of 2 - _ASSERTE(relRef != nullptr); - - TADDR targetAddress = PCODEToPINSTR(target); - _ASSERTE(targetAddress != NULL); - return (INT_PTR)targetAddress - (INT_PTR)(relRef + 1); - } -#endif - -protected: - template static PCODE GetTarget(const T *relRef) - { - WRAPPER_NO_CONTRACT; - static_assert_no_msg(sizeof(T) == 1 || sizeof(T) == 2 || sizeof(T) == 4 || sizeof(T) == 8); - _ASSERTE(relRef != nullptr); - - return PINSTRToPCODE((INT_PTR)(relRef + 1) + *relRef); - } - - DISABLE_COPY(CallCountingStub); -}; - -//////////////////////////////////////////////////////////////// -// CallCountingStubShort - -class CallCountingStubShort; -typedef DPTR(const CallCountingStubShort) PTR_CallCountingStubShort; - -#pragma pack(push, 1) -class CallCountingStubShort : public CallCountingStub -{ -private: - const UINT8 m_part0[1]; - CallCount *const m_remainingCallCountCell; - const UINT8 m_part1[5]; - const INT32 m_rel32TargetForMethod; - const UINT8 m_part2[1]; - const INT32 m_rel32TargetForThresholdReached; - const UINT8 m_alignmentPadding[1]; - -#ifndef DACCESS_COMPILE -public: - CallCountingStubShort(CallCountingStubShort* stubRX, CallCount *remainingCallCountCell, PCODE targetForMethod) - : m_part0{ 0xb8}, // mov eax, - m_remainingCallCountCell(remainingCallCountCell), // - m_part1{ 0x66, 0xff, 0x08, // dec word ptr [eax] - 0x0f, 0x85}, // jnz - m_rel32TargetForMethod( // - GetRelative32BitOffset( - &stubRX->m_rel32TargetForMethod, - targetForMethod)), - m_part2{ 0xe8}, // call - m_rel32TargetForThresholdReached( // - GetRelative32BitOffset( - &stubRX->m_rel32TargetForThresholdReached, - TargetForThresholdReached)), - // (eip == stub-identifying token) - m_alignmentPadding{ 0xcc} // int 3 - { - WRAPPER_NO_CONTRACT; - static_assert_no_msg(sizeof(CallCountingStubShort) % Alignment == 0); - _ASSERTE(remainingCallCountCell != nullptr); - _ASSERTE(PCODEToPINSTR(targetForMethod) != NULL); - } - -public: - static bool Is(TADDR stubIdentifyingToken) - { - WRAPPER_NO_CONTRACT; - return true; - } - - static const CallCountingStubShort *From(TADDR stubIdentifyingToken) - { - WRAPPER_NO_CONTRACT; - _ASSERTE(Is(stubIdentifyingToken)); - _ASSERTE(stubIdentifyingToken % Alignment == offsetof(CallCountingStubShort, m_alignmentPadding[0]) % Alignment); - - const CallCountingStubShort *stub = - (const CallCountingStubShort *)(stubIdentifyingToken - offsetof(CallCountingStubShort, m_alignmentPadding[0])); - _ASSERTE(IS_ALIGNED(stub, Alignment)); - return stub; - } -#endif // !DACCESS_COMPILE - -public: - static bool Is(PTR_CallCountingStub callCountingStub) - { - WRAPPER_NO_CONTRACT; - return true; - } - - static PTR_CallCountingStubShort From(PTR_CallCountingStub callCountingStub) - { - WRAPPER_NO_CONTRACT; - _ASSERTE(Is(callCountingStub)); - - return dac_cast(callCountingStub); - } - - PCODE GetTargetForMethod() const - { - WRAPPER_NO_CONTRACT; - return GetTarget(&m_rel32TargetForMethod); - } - -#ifndef DACCESS_COMPILE -private: - static INT32 GetRelative32BitOffset(const INT32 *rel32Ref, PCODE target) - { - WRAPPER_NO_CONTRACT; - - INT_PTR relativeOffset = GetRelativeOffset(rel32Ref, target); - _ASSERTE((INT32)relativeOffset == relativeOffset); - return (INT32)relativeOffset; - } -#endif - - friend CallCountingStub; - DISABLE_COPY(CallCountingStubShort); -}; -#pragma pack(pop) - -//////////////////////////////////////////////////////////////// -// CallCountingStub definitions - -#ifndef DACCESS_COMPILE -inline const CallCountingStub *CallCountingStub::From(TADDR stubIdentifyingToken) -{ - WRAPPER_NO_CONTRACT; - _ASSERTE(stubIdentifyingToken != NULL); - - return CallCountingStubShort::From(stubIdentifyingToken); -} -#endif - -inline PTR_CallCount CallCountingStub::GetRemainingCallCountCell() const -{ - WRAPPER_NO_CONTRACT; - return PTR_CallCount(dac_cast(this)->m_remainingCallCountCell); -} - -inline PCODE CallCountingStub::GetTargetForMethod() const -{ - WRAPPER_NO_CONTRACT; - return CallCountingStubShort::From(PTR_CallCountingStub(this))->GetTargetForMethod(); -} - -//////////////////////////////////////////////////////////////// - -#undef DISABLE_COPY - -#endif // FEATURE_TIERED_COMPILATION - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - #endif // __cgenx86_h__ diff --git a/src/coreclr/vm/i386/cgenx86.cpp b/src/coreclr/vm/i386/cgenx86.cpp index 83bffa716e0f2..8ced6a6c58a50 100644 --- a/src/coreclr/vm/i386/cgenx86.cpp +++ b/src/coreclr/vm/i386/cgenx86.cpp @@ -1186,63 +1186,6 @@ UMEntryThunk* UMEntryThunk::Decode(LPVOID pCallback) return *(UMEntryThunk**)( 1 + (BYTE*)pCallback ); } -BOOL DoesSlotCallPrestub(PCODE pCode) -{ - CONTRACTL { - NOTHROW; - GC_NOTRIGGER; - PRECONDITION(pCode != NULL); - PRECONDITION(pCode != GetPreStubEntryPoint()); - } CONTRACTL_END; - - // x86 has the following possible sequences for prestub logic: - // 1. slot -> temporary entrypoint -> prestub - // 2. slot -> precode -> prestub - // 3. slot -> precode -> jumprel32 (NGEN case) -> prestub - -#ifdef HAS_COMPACT_ENTRYPOINTS - if (MethodDescChunk::GetMethodDescFromCompactEntryPoint(pCode, TRUE) != NULL) - { - return TRUE; - } -#endif // HAS_COMPACT_ENTRYPOINTS - - if (!IS_ALIGNED(pCode, PRECODE_ALIGNMENT)) - { - return FALSE; - } - -#ifdef HAS_FIXUP_PRECODE - if (*PTR_BYTE(pCode) == X86_INSTR_CALL_REL32) - { - // Note that call could have been patched to jmp in the meantime - pCode = rel32Decode(pCode+1); - - // NGEN case - if (*PTR_BYTE(pCode) == X86_INSTR_JMP_REL32) { - pCode = rel32Decode(pCode+1); - } - - return pCode == (TADDR)PrecodeFixupThunk; - } -#endif - - if (*PTR_BYTE(pCode) != X86_INSTR_MOV_EAX_IMM32 || - *PTR_BYTE(pCode+5) != X86_INSTR_MOV_RM_R || - *PTR_BYTE(pCode+7) != X86_INSTR_JMP_REL32) - { - return FALSE; - } - pCode = rel32Decode(pCode+8); - - // NGEN case - if (*PTR_BYTE(pCode) == X86_INSTR_JMP_REL32) { - pCode = rel32Decode(pCode+1); - } - - return pCode == GetPreStubEntryPoint(); -} - #ifdef FEATURE_READYTORUN // diff --git a/src/coreclr/vm/i386/jitinterfacex86.cpp b/src/coreclr/vm/i386/jitinterfacex86.cpp index 0467f347aaacb..641925821ac67 100644 --- a/src/coreclr/vm/i386/jitinterfacex86.cpp +++ b/src/coreclr/vm/i386/jitinterfacex86.cpp @@ -1054,10 +1054,10 @@ void InitJITHelpers1() int reg = c_rgWriteBarrierRegs[iBarrier]; BYTE * pBufRW = pBuf; - ExecutableWriterHolder barrierWriterHolder; + ExecutableWriterHolderNoLog barrierWriterHolder; if (IsWriteBarrierCopyEnabled()) { - barrierWriterHolder = ExecutableWriterHolder(pBuf, 34); + barrierWriterHolder.AssignExecutableWriterHolder(pBuf, 34); pBufRW = barrierWriterHolder.GetRW(); } @@ -1206,10 +1206,10 @@ int StompWriteBarrierEphemeral(bool /* isRuntimeSuspended */) BYTE * pBuf = GetWriteBarrierCodeLocation((BYTE *)c_rgWriteBarriers[iBarrier]); BYTE * pBufRW = pBuf; - ExecutableWriterHolder barrierWriterHolder; + ExecutableWriterHolderNoLog barrierWriterHolder; if (IsWriteBarrierCopyEnabled()) { - barrierWriterHolder = ExecutableWriterHolder(pBuf, 42); + barrierWriterHolder.AssignExecutableWriterHolder(pBuf, 42); pBufRW = barrierWriterHolder.GetRW(); } @@ -1275,10 +1275,10 @@ int StompWriteBarrierResize(bool isRuntimeSuspended, bool bReqUpperBoundsCheck) size_t *pfunc; BYTE * pBufRW = pBuf; - ExecutableWriterHolder barrierWriterHolder; + ExecutableWriterHolderNoLog barrierWriterHolder; if (IsWriteBarrierCopyEnabled()) { - barrierWriterHolder = ExecutableWriterHolder(pBuf, 42); + barrierWriterHolder.AssignExecutableWriterHolder(pBuf, 42); pBufRW = barrierWriterHolder.GetRW(); } diff --git a/src/coreclr/vm/i386/stublinkerx86.cpp b/src/coreclr/vm/i386/stublinkerx86.cpp index 33d6a4acd531b..35750c87f18d2 100644 --- a/src/coreclr/vm/i386/stublinkerx86.cpp +++ b/src/coreclr/vm/i386/stublinkerx86.cpp @@ -4963,289 +4963,6 @@ Thread* __stdcall CreateThreadBlockReturnHr(ComMethodFrame *pFrame) #endif // !DACCESS_COMPILE -#ifdef HAS_FIXUP_PRECODE - -#ifdef HAS_FIXUP_PRECODE_CHUNKS -TADDR FixupPrecode::GetMethodDesc() -{ - LIMITED_METHOD_CONTRACT; - SUPPORTS_DAC; - - // This lookup is also manually inlined in PrecodeFixupThunk assembly code - TADDR base = *PTR_TADDR(GetBase()); - if (base == NULL) - return NULL; - return base + (m_MethodDescChunkIndex * MethodDesc::ALIGNMENT); -} -#endif - -#ifdef FIXUP_PRECODE_PREALLOCATE_DYNAMIC_METHOD_JUMP_STUBS -PCODE FixupPrecode::GetDynamicMethodPrecodeFixupJumpStub() -{ - WRAPPER_NO_CONTRACT; - _ASSERTE(((PTR_MethodDesc)GetMethodDesc())->IsLCGMethod()); - - // The precode fixup jump stub is shared by all fixup precodes in a chunk, and immediately follows the MethodDesc. Jump - // stubs cannot be reused currently for the same method: - // - The jump stub's target would change separately from the precode being updated from "call Func" to "jmp Func", both - // changes would have to be done atomically with runtime suspension, which is not done currently - // - When changing the entry point from one version of jitted code to another, the jump stub's target pointer is not - // aligned to 8 bytes in order to be able to do an interlocked update of the target address - // So, when initially the precode intends to be of the form "call PrecodeFixupThunk", if the target address happens to be - // too far for a relative 32-bit jump, it will use the shared precode fixup jump stub. When changing the entry point to - // jitted code, the jump stub associated with the precode is patched, and the precode is updated to use that jump stub. - // - // Notes: - // - Dynamic method descs, and hence their precodes and preallocated jump stubs, may be reused for a different method - // (along with reinitializing the precode), but only with a transition where the original method is no longer accessible - // to user code - // - Concurrent calls to a dynamic method that has not yet been jitted may trigger multiple writes to the jump stub - // associated with the precode, but only to the same target address (and while the precode is still pointing to - // PrecodeFixupThunk) - return GetBase() + sizeof(PTR_MethodDesc); -} - -PCODE FixupPrecode::GetDynamicMethodEntryJumpStub() -{ - WRAPPER_NO_CONTRACT; - _ASSERTE(((PTR_MethodDesc)GetMethodDesc())->IsLCGMethod()); - - // m_PrecodeChunkIndex has a value inverted to the order of precodes in memory (the precode at the lowest address has the - // highest index, and the precode at the highest address has the lowest index). To map a precode to its jump stub by memory - // order, invert the precode index to get the jump stub index. Also skip the precode fixup jump stub (see - // GetDynamicMethodPrecodeFixupJumpStub()). - UINT32 count = ((PTR_MethodDesc)GetMethodDesc())->GetMethodDescChunk()->GetCount(); - _ASSERTE(m_PrecodeChunkIndex < count); - SIZE_T jumpStubIndex = count - m_PrecodeChunkIndex; - - return GetBase() + sizeof(PTR_MethodDesc) + jumpStubIndex * BACK_TO_BACK_JUMP_ALLOCATE_SIZE; -} -#endif // FIXUP_PRECODE_PREALLOCATE_DYNAMIC_METHOD_JUMP_STUBS - -#ifdef DACCESS_COMPILE -void FixupPrecode::EnumMemoryRegions(CLRDataEnumMemoryFlags flags) -{ - SUPPORTS_DAC; - DacEnumMemoryRegion(dac_cast(this), sizeof(FixupPrecode)); - - DacEnumMemoryRegion(GetBase(), sizeof(TADDR)); -} -#endif // DACCESS_COMPILE - -#endif // HAS_FIXUP_PRECODE - -#ifndef DACCESS_COMPILE - -void rel32SetInterlocked(/*PINT32*/ PVOID pRel32, /*PINT32*/ PVOID pRel32RW, TADDR target, MethodDesc* pMD) -{ - CONTRACTL - { - THROWS; // Creating a JumpStub could throw OutOfMemory - GC_NOTRIGGER; - } - CONTRACTL_END; - - INT32 targetRel32 = rel32UsingJumpStub((INT32*)pRel32, target, pMD); - - _ASSERTE(IS_ALIGNED(pRel32RW, sizeof(INT32))); - FastInterlockExchange((LONG*)pRel32RW, (LONG)targetRel32); -} - -BOOL rel32SetInterlocked(/*PINT32*/ PVOID pRel32, /*PINT32*/ PVOID pRel32RW, TADDR target, TADDR expected, MethodDesc* pMD) -{ - CONTRACTL - { - THROWS; // Creating a JumpStub could throw OutOfMemory - GC_NOTRIGGER; - } - CONTRACTL_END; - - BYTE* callAddrAdj = (BYTE*)pRel32 + 4; - INT32 expectedRel32 = static_cast((BYTE*)expected - callAddrAdj); - - INT32 targetRel32 = rel32UsingJumpStub((INT32*)pRel32, target, pMD); - - _ASSERTE(IS_ALIGNED(pRel32RW, sizeof(INT32))); - return FastInterlockCompareExchange((LONG*)pRel32RW, (LONG)targetRel32, (LONG)expectedRel32) == (LONG)expectedRel32; -} - -void StubPrecode::Init(StubPrecode* pPrecodeRX, MethodDesc* pMD, LoaderAllocator *pLoaderAllocator /* = NULL */, - BYTE type /* = StubPrecode::Type */, TADDR target /* = NULL */) -{ - WRAPPER_NO_CONTRACT; - - IN_TARGET_64BIT(m_movR10 = X86_INSTR_MOV_R10_IMM64); // mov r10, pMethodDesc - IN_TARGET_32BIT(m_movEAX = X86_INSTR_MOV_EAX_IMM32); // mov eax, pMethodDesc - m_pMethodDesc = (TADDR)pMD; - IN_TARGET_32BIT(m_mov_rm_r = X86_INSTR_MOV_RM_R); // mov reg,reg - m_type = type; - m_jmp = X86_INSTR_JMP_REL32; // jmp rel32 - - if (pLoaderAllocator != NULL) - { - // Use pMD == NULL in all precode initialization methods to allocate the initial jump stub in non-dynamic heap - // that has the same lifetime like as the precode itself - if (target == NULL) - target = GetPreStubEntryPoint(); - m_rel32 = rel32UsingJumpStub(&pPrecodeRX->m_rel32, target, NULL /* pMD */, pLoaderAllocator); - } -} - -#ifdef HAS_NDIRECT_IMPORT_PRECODE - -void NDirectImportPrecode::Init(NDirectImportPrecode* pPrecodeRX, MethodDesc* pMD, LoaderAllocator *pLoaderAllocator) -{ - WRAPPER_NO_CONTRACT; - StubPrecode::Init(pPrecodeRX, pMD, pLoaderAllocator, NDirectImportPrecode::Type, GetEEFuncEntryPoint(NDirectImportThunk)); -} - -#endif // HAS_NDIRECT_IMPORT_PRECODE - - -#ifdef HAS_FIXUP_PRECODE -void FixupPrecode::Init(FixupPrecode* pPrecodeRX, MethodDesc* pMD, LoaderAllocator *pLoaderAllocator, int iMethodDescChunkIndex /*=0*/, int iPrecodeChunkIndex /*=0*/) -{ - WRAPPER_NO_CONTRACT; - - m_op = X86_INSTR_CALL_REL32; // call PrecodeFixupThunk - m_type = FixupPrecode::TypePrestub; - - // Initialize chunk indices only if they are not initialized yet. This is necessary to make MethodDesc::Reset work. - if (m_PrecodeChunkIndex == 0) - { - _ASSERTE(FitsInU1(iPrecodeChunkIndex)); - m_PrecodeChunkIndex = static_cast(iPrecodeChunkIndex); - } - - if (iMethodDescChunkIndex != -1) - { - if (m_MethodDescChunkIndex == 0) - { - _ASSERTE(FitsInU1(iMethodDescChunkIndex)); - m_MethodDescChunkIndex = static_cast(iMethodDescChunkIndex); - } - - if (*(void**)GetBase() == NULL) - *(void**)GetBase() = (BYTE*)pMD - (iMethodDescChunkIndex * MethodDesc::ALIGNMENT); - } - - _ASSERTE(GetMethodDesc() == (TADDR)pMD); - - PCODE target = (PCODE)GetEEFuncEntryPoint(PrecodeFixupThunk); -#ifdef FIXUP_PRECODE_PREALLOCATE_DYNAMIC_METHOD_JUMP_STUBS - if (pMD->IsLCGMethod()) - { - m_rel32 = rel32UsingPreallocatedJumpStub(&pPrecodeRX->m_rel32, target, pPrecodeRX->GetDynamicMethodPrecodeFixupJumpStub(), GetDynamicMethodPrecodeFixupJumpStub(), false /* emitJump */); - return; - } -#endif // FIXUP_PRECODE_PREALLOCATE_DYNAMIC_METHOD_JUMP_STUBS - if (pLoaderAllocator != NULL) - { - m_rel32 = rel32UsingJumpStub(&pPrecodeRX->m_rel32, target, NULL /* pMD */, pLoaderAllocator); - } -} - -void FixupPrecode::ResetTargetInterlocked() -{ - CONTRACTL - { - THROWS; // Creating a JumpStub could throw OutOfMemory - GC_NOTRIGGER; - } - CONTRACTL_END; - - FixupPrecode newValue = *this; - newValue.m_op = X86_INSTR_CALL_REL32; // call PrecodeFixupThunk - newValue.m_type = FixupPrecode::TypePrestub; - - PCODE target = (PCODE)GetEEFuncEntryPoint(PrecodeFixupThunk); - MethodDesc* pMD = (MethodDesc*)GetMethodDesc(); -#ifdef FIXUP_PRECODE_PREALLOCATE_DYNAMIC_METHOD_JUMP_STUBS - // The entry point of LCG methods cannot revert back to the original entry point, as their jump stubs would have to be - // reused, which is currently not supported. This method is intended for resetting the entry point while the method is - // callable, which implies that the entry point may later be changed again to something else. Currently, this is not done - // for LCG methods. See GetDynamicMethodPrecodeFixupJumpStub() for more. - _ASSERTE(!pMD->IsLCGMethod()); -#endif // FIXUP_PRECODE_PREALLOCATE_DYNAMIC_METHOD_JUMP_STUBS - - newValue.m_rel32 = rel32UsingJumpStub(&m_rel32, target, pMD); - - _ASSERTE(IS_ALIGNED(this, sizeof(INT64))); - - ExecutableWriterHolder precodeWriterHolder(this, sizeof(FixupPrecode)); - FastInterlockExchangeLong((INT64*)precodeWriterHolder.GetRW(), *(INT64*)&newValue); -} - -BOOL FixupPrecode::SetTargetInterlocked(TADDR target, TADDR expected) -{ - CONTRACTL - { - THROWS; // Creating a JumpStub could throw OutOfMemory - GC_NOTRIGGER; - } - CONTRACTL_END; - - INT64 oldValue = *(INT64*)this; - BYTE* pOldValue = (BYTE*)&oldValue; - - MethodDesc * pMD = (MethodDesc*)GetMethodDesc(); - g_IBCLogger.LogMethodPrecodeWriteAccess(pMD); - -#ifdef FIXUP_PRECODE_PREALLOCATE_DYNAMIC_METHOD_JUMP_STUBS - // A different jump stub is used for this case, see Init(). This call is unexpected for resetting the entry point. - _ASSERTE(!pMD->IsLCGMethod() || target != (TADDR)GetEEFuncEntryPoint(PrecodeFixupThunk)); -#endif // FIXUP_PRECODE_PREALLOCATE_DYNAMIC_METHOD_JUMP_STUBS - - INT64 newValue = oldValue; - BYTE* pNewValue = (BYTE*)&newValue; - - if (pOldValue[OFFSETOF_PRECODE_TYPE_CALL_OR_JMP] == FixupPrecode::TypePrestub) - { - pNewValue[OFFSETOF_PRECODE_TYPE_CALL_OR_JMP] = FixupPrecode::Type; - - pOldValue[offsetof(FixupPrecode, m_op)] = X86_INSTR_CALL_REL32; - pNewValue[offsetof(FixupPrecode, m_op)] = X86_INSTR_JMP_REL32; - } - else if (pOldValue[OFFSETOF_PRECODE_TYPE_CALL_OR_JMP] == FixupPrecode::Type) - { -#ifdef FEATURE_CODE_VERSIONING - // No change needed, jmp is already in place -#else - // Setting the target more than once is unexpected - return FALSE; -#endif - } - else - { - // Pre-existing code doesn't conform to the expectations for a FixupPrecode - return FALSE; - } - -#ifdef FIXUP_PRECODE_PREALLOCATE_DYNAMIC_METHOD_JUMP_STUBS - ExecutableWriterHolder dynamicMethodEntryJumpStubWriterHolder; - if (pMD->IsLCGMethod()) - { - dynamicMethodEntryJumpStubWriterHolder = ExecutableWriterHolder((void*)GetDynamicMethodEntryJumpStub(), 12); - } -#endif - *(INT32*)(&pNewValue[offsetof(FixupPrecode, m_rel32)]) = -#ifdef FIXUP_PRECODE_PREALLOCATE_DYNAMIC_METHOD_JUMP_STUBS - pMD->IsLCGMethod() ? - rel32UsingPreallocatedJumpStub(&m_rel32, target, GetDynamicMethodEntryJumpStub(), (PCODE)dynamicMethodEntryJumpStubWriterHolder.GetRW(), true /* emitJump */) : -#endif // FIXUP_PRECODE_PREALLOCATE_DYNAMIC_METHOD_JUMP_STUBS - rel32UsingJumpStub(&m_rel32, target, pMD); - - _ASSERTE(IS_ALIGNED(this, sizeof(INT64))); - - ExecutableWriterHolder precodeWriterHolder(this, sizeof(FixupPrecode)); - return FastInterlockCompareExchangeLong((INT64*)precodeWriterHolder.GetRW(), newValue, oldValue) == oldValue; -} - -#endif // HAS_FIXUP_PRECODE - -#endif // !DACCESS_COMPILE - - #ifdef HAS_THISPTR_RETBUF_PRECODE // rel32 jmp target that points back to the jump (infinite loop). @@ -5279,8 +4996,12 @@ void ThisPtrRetBufPrecode::Init(MethodDesc* pMD, LoaderAllocator *pLoaderAllocat // This precode is never patched lazily - avoid unnecessary jump stub allocation m_rel32 = REL32_JMP_SELF; + + _ASSERTE(*((BYTE*)this + OFFSETOF_PRECODE_TYPE) == ThisPtrRetBufPrecode::Type); } +IN_TARGET_32BIT(static_assert_no_msg(offsetof(ThisPtrRetBufPrecode, m_movScratchArg0) == OFFSETOF_PRECODE_TYPE);) + BOOL ThisPtrRetBufPrecode::SetTargetInterlocked(TADDR target, TADDR expected) { CONTRACTL diff --git a/src/coreclr/vm/i386/stublinkerx86.h b/src/coreclr/vm/i386/stublinkerx86.h index c719057e97ea3..c41441314d982 100644 --- a/src/coreclr/vm/i386/stublinkerx86.h +++ b/src/coreclr/vm/i386/stublinkerx86.h @@ -16,15 +16,12 @@ extern PCODE GetPreStubEntryPoint(); #define X86_INSTR_CALL_REL32 0xE8 // call rel32 #define X86_INSTR_CALL_IND 0x15FF // call dword ptr[addr32] #define X86_INSTR_CALL_IND_EAX 0x10FF // call dword ptr[eax] -#define X86_INSTR_CALL_IND_EAX_OFFSET 0x50FF // call dword ptr[eax + offset] ; where offset follows these 2 bytes -#define X86_INSTR_CALL_EAX 0xD0FF // call eax #define X86_INSTR_JMP_REL32 0xE9 // jmp rel32 #define X86_INSTR_JMP_IND 0x25FF // jmp dword ptr[addr32] #define X86_INSTR_JMP_EAX 0xE0FF // jmp eax #define X86_INSTR_MOV_EAX_IMM32 0xB8 // mov eax, imm32 #define X86_INSTR_MOV_EAX_ECX_IND 0x018b // mov eax, [ecx] #define X86_INSTR_CMP_IND_ECX_IMM32 0x3981 // cmp [ecx], imm32 -#define X86_INSTR_MOV_RM_R 0x89 // mov r/m,reg #define X86_INSTR_MOV_AL 0xB0 // mov al, imm8 #define X86_INSTR_JMP_REL8 0xEB // jmp short rel8 @@ -43,10 +40,6 @@ extern PCODE GetPreStubEntryPoint(); #define X86_INSTR_MOVUPS_RM_R 0x110F // movups xmm1/mem128, xmm2 #define X86_INSTR_XORPS 0x570F // xorps xmm1, xmm2/mem128 -#ifdef TARGET_AMD64 -#define X86_INSTR_MOV_R10_IMM64 0xBA49 // mov r10, imm64 -#endif - //---------------------------------------------------------------------- // Encodes X86 registers. The numbers are chosen to match Intel's opcode // encoding. @@ -463,237 +456,8 @@ BOOL rel32SetInterlocked(/*PINT32*/ PVOID pRel32, /*PINT32*/ PVOID pRel32RW, TAD // //------------------------------------------------------------------------ -EXTERN_C VOID STDCALL PrecodeFixupThunk(); - -#ifdef HOST_64BIT - -#define OFFSETOF_PRECODE_TYPE 0 -#define OFFSETOF_PRECODE_TYPE_CALL_OR_JMP 5 -#define OFFSETOF_PRECODE_TYPE_MOV_R10 10 - -#define SIZEOF_PRECODE_BASE 16 - -#else - -EXTERN_C VOID STDCALL PrecodeRemotingThunk(); - -#define OFFSETOF_PRECODE_TYPE 5 -#define OFFSETOF_PRECODE_TYPE_CALL_OR_JMP 5 -#define OFFSETOF_PRECODE_TYPE_MOV_RM_R 6 - -#define SIZEOF_PRECODE_BASE 8 - -#endif // HOST_64BIT - - #include -// Invalid precode type -struct InvalidPrecode { - // int3 - static const int Type = 0xCC; -}; - - -// Regular precode -struct StubPrecode { - -#ifdef HOST_64BIT - static const BYTE Type = 0xF8; - // mov r10,pMethodDesc - // clc - // jmp Stub -#else - static const BYTE Type = 0xED; - // mov eax,pMethodDesc - // mov ebp,ebp - // jmp Stub -#endif // HOST_64BIT - - IN_TARGET_64BIT(USHORT m_movR10;) - IN_TARGET_32BIT(BYTE m_movEAX;) - TADDR m_pMethodDesc; - IN_TARGET_32BIT(BYTE m_mov_rm_r;) - BYTE m_type; - BYTE m_jmp; - INT32 m_rel32; - - void Init(StubPrecode* pPrecodeRX, MethodDesc* pMD, LoaderAllocator *pLoaderAllocator = NULL, BYTE type = StubPrecode::Type, TADDR target = NULL); - - TADDR GetMethodDesc() - { - LIMITED_METHOD_DAC_CONTRACT; - - return m_pMethodDesc; - } - - PCODE GetTarget() - { - LIMITED_METHOD_DAC_CONTRACT; - - return rel32Decode(PTR_HOST_MEMBER_TADDR(StubPrecode, this, m_rel32)); - } -#ifndef DACCESS_COMPILE - void ResetTargetInterlocked() - { - CONTRACTL - { - THROWS; - GC_NOTRIGGER; - } - CONTRACTL_END; - - ExecutableWriterHolder rel32WriterHolder(&m_rel32, sizeof(INT32)); - rel32SetInterlocked(&m_rel32, rel32WriterHolder.GetRW(), GetPreStubEntryPoint(), (MethodDesc*)GetMethodDesc()); - } - - BOOL SetTargetInterlocked(TADDR target, TADDR expected) - { - CONTRACTL - { - THROWS; - GC_NOTRIGGER; - } - CONTRACTL_END; - - ExecutableWriterHolder rel32Holder(&m_rel32, 4); - return rel32SetInterlocked(&m_rel32, rel32Holder.GetRW(), target, expected, (MethodDesc*)GetMethodDesc()); - } -#endif // !DACCESS_COMPILE -}; -IN_TARGET_64BIT(static_assert_no_msg(offsetof(StubPrecode, m_movR10) == OFFSETOF_PRECODE_TYPE);) -IN_TARGET_64BIT(static_assert_no_msg(offsetof(StubPrecode, m_type) == OFFSETOF_PRECODE_TYPE_MOV_R10);) -IN_TARGET_32BIT(static_assert_no_msg(offsetof(StubPrecode, m_mov_rm_r) == OFFSETOF_PRECODE_TYPE);) -IN_TARGET_32BIT(static_assert_no_msg(offsetof(StubPrecode, m_type) == OFFSETOF_PRECODE_TYPE_MOV_RM_R);) -typedef DPTR(StubPrecode) PTR_StubPrecode; - - -#ifdef HAS_NDIRECT_IMPORT_PRECODE - -// NDirect import precode -// (This is fake precode. VTable slot does not point to it.) -struct NDirectImportPrecode : StubPrecode { - -#ifdef HOST_64BIT - static const int Type = 0xF9; - // mov r10,pMethodDesc - // stc - // jmp NDirectImportThunk -#else - static const int Type = 0xC0; - // mov eax,pMethodDesc - // mov eax,eax - // jmp NDirectImportThunk -#endif // HOST_64BIT - - void Init(NDirectImportPrecode* pPrecodeRX, MethodDesc* pMD, LoaderAllocator *pLoaderAllocator); - - LPVOID GetEntrypoint() - { - LIMITED_METHOD_CONTRACT; - return this; - } -}; -typedef DPTR(NDirectImportPrecode) PTR_NDirectImportPrecode; - -#endif // HAS_NDIRECT_IMPORT_PRECODE - - -#ifdef HAS_FIXUP_PRECODE - -// Fixup precode is used in ngen images when the prestub does just one time fixup. -// The fixup precode is simple jump once patched. It does not have the two instruction overhead of regular precode. -struct FixupPrecode { - - static const int TypePrestub = 0x5E; - // The entrypoint has to be 8-byte aligned so that the "call PrecodeFixupThunk" can be patched to "jmp NativeCode" atomically. - // call PrecodeFixupThunk - // db TypePrestub (pop esi) - // db MethodDescChunkIndex - // db PrecodeChunkIndex - - static const int Type = 0x5F; - // After it has been patched to point to native code - // jmp NativeCode - // db Type (pop edi) - - BYTE m_op; - INT32 m_rel32; - BYTE m_type; - BYTE m_MethodDescChunkIndex; - BYTE m_PrecodeChunkIndex; -#ifdef HAS_FIXUP_PRECODE_CHUNKS - // Fixup precode chunk is associated with MethodDescChunk. The layout of the fixup precode chunk is: - // - // FixupPrecode Entrypoint PrecodeChunkIndex = 2 - // FixupPrecode Entrypoint PrecodeChunkIndex = 1 - // FixupPrecode Entrypoint PrecodeChunkIndex = 0 - // TADDR Base of MethodDescChunk -#else - TADDR m_pMethodDesc; -#endif - - void Init(FixupPrecode* pPrecodeRX, MethodDesc* pMD, LoaderAllocator *pLoaderAllocator, int iMethodDescChunkIndex = 0, int iPrecodeChunkIndex = 0); - -#ifdef HAS_FIXUP_PRECODE_CHUNKS - TADDR GetBase() - { - LIMITED_METHOD_CONTRACT; - SUPPORTS_DAC; - - return dac_cast(this) + (m_PrecodeChunkIndex + 1) * sizeof(FixupPrecode); - } - - size_t GetSizeRW() - { - LIMITED_METHOD_CONTRACT; - - return GetBase() + sizeof(void*) - dac_cast(this); - } - - TADDR GetMethodDesc(); -#else // HAS_FIXUP_PRECODE_CHUNKS - TADDR GetMethodDesc() - { - LIMITED_METHOD_CONTRACT; - return m_pMethodDesc; - } -#endif // HAS_FIXUP_PRECODE_CHUNKS - -#ifdef FIXUP_PRECODE_PREALLOCATE_DYNAMIC_METHOD_JUMP_STUBS - PCODE GetDynamicMethodPrecodeFixupJumpStub(); - PCODE GetDynamicMethodEntryJumpStub(); -#endif // FIXUP_PRECODE_PREALLOCATE_DYNAMIC_METHOD_JUMP_STUBS - - PCODE GetTarget() - { - LIMITED_METHOD_DAC_CONTRACT; - - return rel32Decode(PTR_HOST_MEMBER_TADDR(FixupPrecode, this, m_rel32)); - } - - void ResetTargetInterlocked(); - BOOL SetTargetInterlocked(TADDR target, TADDR expected); - - static BOOL IsFixupPrecodeByASM(TADDR addr) - { - LIMITED_METHOD_CONTRACT; - - return *dac_cast(addr) == X86_INSTR_JMP_REL32; - } - -#ifdef DACCESS_COMPILE - void EnumMemoryRegions(CLRDataEnumMemoryFlags flags); -#endif -}; -IN_TARGET_32BIT(static_assert_no_msg(offsetof(FixupPrecode, m_type) == OFFSETOF_PRECODE_TYPE)); -IN_TARGET_64BIT(static_assert_no_msg(offsetof(FixupPrecode, m_op) == OFFSETOF_PRECODE_TYPE);) -IN_TARGET_64BIT(static_assert_no_msg(offsetof(FixupPrecode, m_type) == OFFSETOF_PRECODE_TYPE_CALL_OR_JMP);) - -typedef DPTR(FixupPrecode) PTR_FixupPrecode; - -#endif // HAS_FIXUP_PRECODE - #ifdef HAS_THISPTR_RETBUF_PRECODE // Precode to stuffle this and retbuf for closed delegates over static methods with return buffer @@ -702,7 +466,7 @@ struct ThisPtrRetBufPrecode { #ifdef HOST_64BIT static const int Type = 0x90; #else - static const int Type = 0xC2; + static const int Type = 0x89; #endif // HOST_64BIT // mov regScratch,regArg0 @@ -738,7 +502,7 @@ struct ThisPtrRetBufPrecode { BOOL SetTargetInterlocked(TADDR target, TADDR expected); }; -IN_TARGET_32BIT(static_assert_no_msg(offsetof(ThisPtrRetBufPrecode, m_movArg1Scratch) + 1 == OFFSETOF_PRECODE_TYPE);) + typedef DPTR(ThisPtrRetBufPrecode) PTR_ThisPtrRetBufPrecode; #endif // HAS_THISPTR_RETBUF_PRECODE diff --git a/src/coreclr/vm/i386/thunktemplates.S b/src/coreclr/vm/i386/thunktemplates.S new file mode 100644 index 0000000000000..eedd6ac1dbe2e --- /dev/null +++ b/src/coreclr/vm/i386/thunktemplates.S @@ -0,0 +1,57 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +.intel_syntax noprefix +#include "unixasmmacros.inc" +#include "asmconstants.h" + +PAGE_SIZE = 4096 + +//#define DATA_SLOT(stub, field) stub##Code + PAGE_SIZE + stub##Data__##field +#define DATA_SLOT(stub, field) PAGE_SIZE + stub##Data__##field + +.macro INDJMP target + .att_syntax + jmp *\target + .intel_syntax noprefix +.endm + +.macro INDCALL target + .att_syntax + call *\target + .intel_syntax noprefix +.endm + +.macro SLOT_ADDRESS_PATCH_LABEL stub, field, offset=-4, index="" + C_FUNC(\stub\()Code_\field\()_Offset\index) = .\offset-\stub\()Code + .global C_FUNC(\stub\()Code_\field\()_Offset\index) +.endm + +LEAF_ENTRY StubPrecodeCode + mov eax, dword ptr [DATA_SLOT(StubPrecode, MethodDesc)] +SLOT_ADDRESS_PATCH_LABEL StubPrecode, MethodDesc + INDJMP DATA_SLOT(StubPrecode, Target) +SLOT_ADDRESS_PATCH_LABEL StubPrecode, Target + nop +LEAF_END_MARKED StubPrecodeCode + +LEAF_ENTRY FixupPrecodeCode + INDJMP DATA_SLOT(FixupPrecode, Target) +SLOT_ADDRESS_PATCH_LABEL FixupPrecode, Target + mov eax, dword ptr [DATA_SLOT(FixupPrecode, MethodDesc)] +SLOT_ADDRESS_PATCH_LABEL FixupPrecode, MethodDesc + INDJMP DATA_SLOT(FixupPrecode, PrecodeFixupThunk) +SLOT_ADDRESS_PATCH_LABEL FixupPrecode, PrecodeFixupThunk +LEAF_END_MARKED FixupPrecodeCode + +LEAF_ENTRY CallCountingStubCode + mov eax, dword ptr [DATA_SLOT(CallCountingStub, RemainingCallCountCell)] +SLOT_ADDRESS_PATCH_LABEL CallCountingStub, RemainingCallCountCell + dec WORD PTR [eax] + je LOCAL_LABEL(CountReachedZero) + INDJMP DATA_SLOT(CallCountingStub, TargetForMethod) +SLOT_ADDRESS_PATCH_LABEL CallCountingStub, TargetForMethod +LOCAL_LABEL(CountReachedZero): + INDJMP DATA_SLOT(CallCountingStub, TargetForThresholdReached) +SLOT_ADDRESS_PATCH_LABEL CallCountingStub, TargetForThresholdReached +LEAF_END_MARKED CallCountingStubCode diff --git a/src/coreclr/vm/i386/thunktemplates.asm b/src/coreclr/vm/i386/thunktemplates.asm new file mode 100644 index 0000000000000..dfb1a4285c022 --- /dev/null +++ b/src/coreclr/vm/i386/thunktemplates.asm @@ -0,0 +1,60 @@ +; Licensed to the .NET Foundation under one or more agreements. +; The .NET Foundation licenses this file to you under the MIT license. + + .586 + .model flat + +include +include AsmConstants.inc + + option casemap:none + .code + +.686P +.XMM + +PAGE_SIZE EQU 4096 + +DATA_SLOT macro stub, field + exitm @CatStr(<_>, stub, , stub, , field) +endm + +SLOT_ADDRESS_PATCH_LABEL macro stub, field, offset:=<-4>, index:=<> + LOCAL labelName, labelValue +labelName TEXTEQU @CatStr(<_>, stub, , field, <_Offset>, index) +labelValue TEXTEQU @CatStr(<$>, offset, <-_>, stub, ) + %labelName EQU labelValue + PUBLIC labelName +endm + +LEAF_ENTRY _StubPrecodeCode@0 + mov eax, dword ptr DATA_SLOT(StubPrecode, MethodDesc) +SLOT_ADDRESS_PATCH_LABEL StubPrecode, MethodDesc + jmp dword ptr DATA_SLOT(StubPrecode, Target) +SLOT_ADDRESS_PATCH_LABEL StubPrecode, Target +LEAF_END_MARKED _StubPrecodeCode@0 + +EXTERN _ThePreStub@0:PROC + +LEAF_ENTRY _FixupPrecodeCode@0 + jmp dword ptr DATA_SLOT(FixupPrecode, Target) +SLOT_ADDRESS_PATCH_LABEL FixupPrecode, Target + mov eax, dword ptr DATA_SLOT(FixupPrecode, MethodDesc) +SLOT_ADDRESS_PATCH_LABEL FixupPrecode, MethodDesc + jmp dword ptr DATA_SLOT(FixupPrecode, PrecodeFixupThunk) +SLOT_ADDRESS_PATCH_LABEL FixupPrecode, PrecodeFixupThunk +LEAF_END_MARKED _FixupPrecodeCode@0 + +LEAF_ENTRY _CallCountingStubCode@0 + mov eax, dword ptr DATA_SLOT(CallCountingStub, RemainingCallCountCell) +SLOT_ADDRESS_PATCH_LABEL CallCountingStub, RemainingCallCountCell + dec WORD PTR [eax] + je CountReachedZero + jmp dword ptr DATA_SLOT(CallCountingStub, TargetForMethod) +SLOT_ADDRESS_PATCH_LABEL CallCountingStub, TargetForMethod +CountReachedZero: + jmp dword ptr DATA_SLOT(CallCountingStub, TargetForThresholdReached) +SLOT_ADDRESS_PATCH_LABEL CallCountingStub, TargetForThresholdReached +LEAF_END_MARKED _CallCountingStubCode@0 + + end \ No newline at end of file diff --git a/src/coreclr/vm/jitinterface.cpp b/src/coreclr/vm/jitinterface.cpp index 3aad7d7d63334..15fea181eccf4 100644 --- a/src/coreclr/vm/jitinterface.cpp +++ b/src/coreclr/vm/jitinterface.cpp @@ -9006,19 +9006,26 @@ void CEEInfo::getFunctionEntryPoint(CORINFO_METHOD_HANDLE ftnHnd, // Resolve methodImpl. ftn = ftn->GetMethodTable()->MapMethodDeclToMethodImpl(ftn); - ret = (void *)ftn->TryGetMultiCallableAddrOfCode(accessFlags); - - // TryGetMultiCallableAddrOfCode returns NULL if indirect access is desired - if (ret == NULL) + if (!ftn->IsFCall() && ftn->IsVersionableWithPrecode() && (ftn->GetPrecodeType() == PRECODE_FIXUP) && !ftn->IsPointingToStableNativeCode()) { - // should never get here for EnC methods or if interception via remoting stub is required - _ASSERTE(!ftn->IsEnCMethod()); - - ret = (void *)ftn->GetAddrOfSlot(); - + ret = ((FixupPrecode*)ftn->GetOrCreatePrecode())->GetTargetSlot(); accessType = IAT_PVALUE; } + else + { + ret = (void *)ftn->TryGetMultiCallableAddrOfCode(accessFlags); + + // TryGetMultiCallableAddrOfCode returns NULL if indirect access is desired + if (ret == NULL) + { + // should never get here for EnC methods or if interception via remoting stub is required + _ASSERTE(!ftn->IsEnCMethod()); + ret = (void *)ftn->GetAddrOfSlot(); + + accessType = IAT_PVALUE; + } + } #if defined(FEATURE_GDBJIT) CalledMethod * pCM = new CalledMethod(orig_ftn, ret, m_pCalledMethods); @@ -11126,6 +11133,23 @@ void* CEEJitInfo::getHelperFtn(CorInfoHelpFunc ftnNum, /* IN */ } #endif + if (dynamicFtnNum == DYNAMIC_CORINFO_HELP_ISINSTANCEOFINTERFACE || + dynamicFtnNum == DYNAMIC_CORINFO_HELP_ISINSTANCEOFANY || + dynamicFtnNum == DYNAMIC_CORINFO_HELP_ISINSTANCEOFARRAY || + dynamicFtnNum == DYNAMIC_CORINFO_HELP_ISINSTANCEOFCLASS || + dynamicFtnNum == DYNAMIC_CORINFO_HELP_CHKCASTANY || + dynamicFtnNum == DYNAMIC_CORINFO_HELP_CHKCASTARRAY || + dynamicFtnNum == DYNAMIC_CORINFO_HELP_CHKCASTINTERFACE || + dynamicFtnNum == DYNAMIC_CORINFO_HELP_CHKCASTCLASS || + dynamicFtnNum == DYNAMIC_CORINFO_HELP_CHKCASTCLASS_SPECIAL || + dynamicFtnNum == DYNAMIC_CORINFO_HELP_UNBOX) + { + Precode* pPrecode = Precode::GetPrecodeFromEntryPoint((PCODE)hlpDynamicFuncTable[dynamicFtnNum].pfnHelper); + _ASSERTE(pPrecode->GetType() == PRECODE_FIXUP); + *ppIndirection = ((FixupPrecode*)pPrecode)->GetTargetSlot(); + return NULL; + } + pfnHelper = hlpDynamicFuncTable[dynamicFtnNum].pfnHelper; #ifdef _PREFAST_ diff --git a/src/coreclr/vm/loaderallocator.cpp b/src/coreclr/vm/loaderallocator.cpp index 657ff7b2b40ce..039bb3825837a 100644 --- a/src/coreclr/vm/loaderallocator.cpp +++ b/src/coreclr/vm/loaderallocator.cpp @@ -1116,11 +1116,6 @@ void LoaderAllocator::Init(BaseDomain *pDomain, BYTE *pExecutableHeapMemory) dwTotalReserveMemSize = (DWORD) ALIGN_UP(dwTotalReserveMemSize, VIRTUAL_ALLOC_RESERVE_GRANULARITY); -#if !defined(HOST_64BIT) - // Make sure that we reserve as little as possible on 32-bit to save address space - _ASSERTE(dwTotalReserveMemSize <= VIRTUAL_ALLOC_RESERVE_GRANULARITY); -#endif - BYTE * initReservedMem = (BYTE*)ExecutableAllocator::Instance()->Reserve(dwTotalReserveMemSize); m_InitialReservedMemForLoaderHeaps = initReservedMem; @@ -1161,7 +1156,7 @@ void LoaderAllocator::Init(BaseDomain *pDomain, BYTE *pExecutableHeapMemory) initReservedMem, dwExecutableHeapReserveSize, NULL, - TRUE /* Make heap executable */ + UnlockedLoaderHeap::HeapKind::Executable ); initReservedMem += dwExecutableHeapReserveSize; } @@ -1184,7 +1179,7 @@ void LoaderAllocator::Init(BaseDomain *pDomain, BYTE *pExecutableHeapMemory) initReservedMem, dwStubHeapReserveSize, STUBMANAGER_RANGELIST(StubLinkStubManager), - TRUE /* Make heap executable */); + UnlockedLoaderHeap::HeapKind::Executable); initReservedMem += dwStubHeapReserveSize; @@ -1194,6 +1189,22 @@ void LoaderAllocator::Init(BaseDomain *pDomain, BYTE *pExecutableHeapMemory) m_pPrecodeHeap = new (&m_PrecodeHeapInstance) CodeFragmentHeap(this, STUB_CODE_BLOCK_PRECODE); + m_pNewStubPrecodeHeap = new (&m_NewStubPrecodeHeapInstance) LoaderHeap(2 * GetOsPageSize(), + 2 * GetOsPageSize(), + PrecodeStubManager::g_pManager->GetStubPrecodeRangeList(), + UnlockedLoaderHeap::HeapKind::Interleaved, + false /* fUnlocked */, + StubPrecode::GenerateCodePage, + StubPrecode::CodeSize); + + m_pFixupPrecodeHeap = new (&m_FixupPrecodeHeapInstance) LoaderHeap(2 * GetOsPageSize(), + 2 * GetOsPageSize(), + PrecodeStubManager::g_pManager->GetFixupPrecodeRangeList(), + UnlockedLoaderHeap::HeapKind::Interleaved, + false /* fUnlocked */, + FixupPrecode::GenerateCodePage, + FixupPrecode::CodeSize); + // Initialize the EE marshaling data to NULL. m_pMarshalingData = NULL; @@ -1376,6 +1387,18 @@ void LoaderAllocator::Terminate() m_pPrecodeHeap = NULL; } + if (m_pFixupPrecodeHeap != NULL) + { + m_pFixupPrecodeHeap->~LoaderHeap(); + m_pFixupPrecodeHeap = NULL; + } + + if (m_pNewStubPrecodeHeap != NULL) + { + m_pNewStubPrecodeHeap->~LoaderHeap(); + m_pNewStubPrecodeHeap = NULL; + } + #ifdef FEATURE_READYTORUN if (m_pDynamicHelpersHeap != NULL) { diff --git a/src/coreclr/vm/loaderallocator.hpp b/src/coreclr/vm/loaderallocator.hpp index 0907d98e266d5..846ec6346d418 100644 --- a/src/coreclr/vm/loaderallocator.hpp +++ b/src/coreclr/vm/loaderallocator.hpp @@ -160,6 +160,8 @@ class LoaderAllocator BYTE m_HighFreqHeapInstance[sizeof(LoaderHeap)]; BYTE m_StubHeapInstance[sizeof(LoaderHeap)]; BYTE m_PrecodeHeapInstance[sizeof(CodeFragmentHeap)]; + BYTE m_FixupPrecodeHeapInstance[sizeof(LoaderHeap)]; + BYTE m_NewStubPrecodeHeapInstance[sizeof(LoaderHeap)]; PTR_LoaderHeap m_pLowFrequencyHeap; PTR_LoaderHeap m_pHighFrequencyHeap; PTR_LoaderHeap m_pStubHeap; // stubs for PInvoke, remoting, etc @@ -168,6 +170,8 @@ class LoaderAllocator #ifdef FEATURE_READYTORUN PTR_CodeFragmentHeap m_pDynamicHelpersHeap; #endif + PTR_LoaderHeap m_pFixupPrecodeHeap; + PTR_LoaderHeap m_pNewStubPrecodeHeap; //**************************************************************************************** OBJECTHANDLE m_hLoaderAllocatorObjectHandle; FuncPtrStubs * m_pFuncPtrStubs; // for GetMultiCallableAddrOfCode() @@ -443,6 +447,12 @@ class LoaderAllocator return m_pPrecodeHeap; } + PTR_LoaderHeap GetNewStubPrecodeHeap() + { + LIMITED_METHOD_CONTRACT; + return m_pNewStubPrecodeHeap; + } + // The executable heap is intended to only be used by the global loader allocator. // It refers to executable memory that is not associated with a rangelist. PTR_LoaderHeap GetExecutableHeap() @@ -451,6 +461,12 @@ class LoaderAllocator return m_pExecutableHeap; } + PTR_LoaderHeap GetFixupPrecodeHeap() + { + LIMITED_METHOD_CONTRACT; + return m_pFixupPrecodeHeap; + } + PTR_CodeFragmentHeap GetDynamicHelpersHeap(); FuncPtrStubs * GetFuncPtrStubs(); diff --git a/src/coreclr/vm/method.cpp b/src/coreclr/vm/method.cpp index 0f11c51766083..6449386918767 100644 --- a/src/coreclr/vm/method.cpp +++ b/src/coreclr/vm/method.cpp @@ -529,7 +529,6 @@ PTR_PCODE MethodDesc::GetAddrOfSlot() CONTRACTL_END; // Keep implementations of MethodDesc::GetMethodEntryPoint and MethodDesc::GetAddrOfSlot in sync! - if (HasNonVtableSlot()) { SIZE_T size = GetBaseSize(); @@ -1727,6 +1726,13 @@ MethodDescChunk *MethodDescChunk::CreateChunk(LoaderHeap *pHeap, DWORD methodDes DWORD maxMethodDescsPerChunk = (DWORD)(MethodDescChunk::MaxSizeOfMethodDescs / oneSize); + // Limit the maximum MethodDescs per chunk by the number of precodes that can fit to a single memory page, + // since we allocate consecutive temporary entry points for all MethodDescs in the whole chunk. + DWORD maxPrecodesPerPage = Precode::GetMaxTemporaryEntryPointsCount(); + + if (maxPrecodesPerPage < maxMethodDescsPerChunk) + maxMethodDescsPerChunk = maxPrecodesPerPage; + if (methodDescCount == 0) methodDescCount = maxMethodDescsPerChunk; @@ -2119,7 +2125,20 @@ MethodDesc* NonVirtualEntry2MethodDesc(PCODE entryPoint) RangeSection* pRS = ExecutionManager::FindCodeRange(entryPoint, ExecutionManager::GetScanFlags()); if (pRS == NULL) + { + TADDR pInstr = PCODEToPINSTR(entryPoint); + if (PrecodeStubManager::g_pManager->GetStubPrecodeRangeList()->IsInRange(entryPoint)) + { + return (MethodDesc*)((StubPrecode*)pInstr)->GetMethodDesc(); + } + + if (PrecodeStubManager::g_pManager->GetFixupPrecodeRangeList()->IsInRange(entryPoint)) + { + return (MethodDesc*)((FixupPrecode*)pInstr)->GetMethodDesc(); + } + return NULL; + } MethodDesc* pMD; if (pRS->pjit->JitCodeToMethodInfo(pRS, entryPoint, &pMD, NULL)) @@ -2328,8 +2347,8 @@ BOOL MethodDesc::MayHaveNativeCode() { CONTRACTL { - THROWS; - GC_TRIGGERS; + NOTHROW; + GC_NOTRIGGER; MODE_ANY; } CONTRACTL_END @@ -2445,7 +2464,7 @@ MethodDesc* MethodDesc::GetMethodDescFromStubAddr(PCODE addr, BOOL fSpeculative // Otherwise this must be some kind of precode // - Precode* pPrecode = Precode::GetPrecodeFromEntryPoint(addr, fSpeculative); + PTR_Precode pPrecode = Precode::GetPrecodeFromEntryPoint(addr, fSpeculative); PREFIX_ASSUME(fSpeculative || (pPrecode != NULL)); if (pPrecode != NULL) { @@ -3010,7 +3029,6 @@ Precode* MethodDesc::GetOrCreatePrecode() AllocMemTracker amt; Precode* pPrecode = Precode::Allocate(requiredType, this, GetLoaderAllocator(), &amt); - if (FastInterlockCompareExchangePointer(pSlot, pPrecode->GetEntryPoint(), tempEntry) == tempEntry) amt.SuppressRelease(); } diff --git a/src/coreclr/vm/method.hpp b/src/coreclr/vm/method.hpp index 187356e74e096..f7ad94802824e 100644 --- a/src/coreclr/vm/method.hpp +++ b/src/coreclr/vm/method.hpp @@ -283,8 +283,8 @@ class MethodDesc { CONTRACTL { - THROWS; - GC_TRIGGERS; + NOTHROW; + GC_NOTRIGGER; MODE_ANY; } CONTRACTL_END @@ -2722,7 +2722,7 @@ class NDirectImportThunkGlue PVOID m_dummy; // Dummy field to make the alignment right public: - LPVOID GetEntrypoint() + LPVOID GetEntryPoint() { LIMITED_METHOD_CONTRACT; return NULL; diff --git a/src/coreclr/vm/peimage.cpp b/src/coreclr/vm/peimage.cpp index 946943cc3cd61..f9476846130ec 100644 --- a/src/coreclr/vm/peimage.cpp +++ b/src/coreclr/vm/peimage.cpp @@ -505,7 +505,7 @@ LoaderHeap *PEImage::IJWFixupData::GetThunkHeap() LoaderHeap *pNewHeap = new LoaderHeap(VIRTUAL_ALLOC_RESERVE_GRANULARITY, // DWORD dwReserveBlockSize 0, // DWORD dwCommitBlockSize ThunkHeapStubManager::g_pManager->GetRangeList(), - TRUE); // BOOL fMakeExecutable + UnlockedLoaderHeap::HeapKind::Executable); if (FastInterlockCompareExchangePointer((PVOID*)&m_DllThunkHeap, (VOID*)pNewHeap, (VOID*)0) != 0) { diff --git a/src/coreclr/vm/precode.cpp b/src/coreclr/vm/precode.cpp index ebccb7be43d47..ad95a538e9528 100644 --- a/src/coreclr/vm/precode.cpp +++ b/src/coreclr/vm/precode.cpp @@ -169,19 +169,6 @@ BOOL Precode::IsCorrectMethodDesc(MethodDesc * pMD) if (pMDfromPrecode == pMD) return TRUE; -#ifdef HAS_FIXUP_PRECODE_CHUNKS - if (pMDfromPrecode == NULL) - { - PrecodeType precodeType = GetType(); - -#ifdef HAS_FIXUP_PRECODE_CHUNKS - // We do not keep track of the MethodDesc in every kind of fixup precode - if (precodeType == PRECODE_FIXUP) - return TRUE; -#endif - } -#endif // HAS_FIXUP_PRECODE_CHUNKS - return FALSE; } @@ -199,7 +186,7 @@ BOOL Precode::IsPointingToPrestub(PCODE target) return TRUE; #ifdef HAS_FIXUP_PRECODE - if (IsPointingTo(target, GetEEFuncEntryPoint(PrecodeFixupThunk))) + if (IsPointingTo(target, ((PCODE)this + FixupPrecode::FixupCodeOffset))) return TRUE; #endif @@ -223,46 +210,15 @@ Precode* Precode::GetPrecodeForTemporaryEntryPoint(TADDR temporaryEntryPoints, i { WRAPPER_NO_CONTRACT; PrecodeType t = PTR_Precode(temporaryEntryPoints)->GetType(); -#ifdef HAS_FIXUP_PRECODE_CHUNKS - if (t == PRECODE_FIXUP) - { - return PTR_Precode(temporaryEntryPoints + index * sizeof(FixupPrecode)); - } -#endif SIZE_T oneSize = SizeOfTemporaryEntryPoint(t); return PTR_Precode(temporaryEntryPoints + index * oneSize); } -SIZE_T Precode::SizeOfTemporaryEntryPoints(PrecodeType t, bool preallocateJumpStubs, int count) +SIZE_T Precode::SizeOfTemporaryEntryPoints(PrecodeType t, int count) { WRAPPER_NO_CONTRACT; SUPPORTS_DAC; -#ifdef HAS_FIXUP_PRECODE_CHUNKS - if (t == PRECODE_FIXUP) - { - SIZE_T size = count * sizeof(FixupPrecode) + sizeof(PTR_MethodDesc); - -#ifdef FIXUP_PRECODE_PREALLOCATE_DYNAMIC_METHOD_JUMP_STUBS - if (preallocateJumpStubs) - { - // For dynamic methods, space for jump stubs is allocated along with the precodes as part of the temporary entry - // points block. The first jump stub begins immediately after the PTR_MethodDesc. Aside from a jump stub per - // precode, an additional shared precode fixup jump stub is also allocated (see - // GetDynamicMethodPrecodeFixupJumpStub()). - size += ((SIZE_T)count + 1) * BACK_TO_BACK_JUMP_ALLOCATE_SIZE; - } -#else // !FIXUP_PRECODE_PREALLOCATE_DYNAMIC_METHOD_JUMP_STUBS - _ASSERTE(!preallocateJumpStubs); -#endif // FIXUP_PRECODE_PREALLOCATE_DYNAMIC_METHOD_JUMP_STUBS - - return size; - } - else - { - _ASSERTE(!preallocateJumpStubs); - } -#endif SIZE_T oneSize = SizeOfTemporaryEntryPoint(t); return count * oneSize; } @@ -273,14 +229,7 @@ SIZE_T Precode::SizeOfTemporaryEntryPoints(TADDR temporaryEntryPoints, int count SUPPORTS_DAC; PrecodeType precodeType = PTR_Precode(temporaryEntryPoints)->GetType(); -#ifdef FIXUP_PRECODE_PREALLOCATE_DYNAMIC_METHOD_JUMP_STUBS - bool preallocateJumpStubs = - precodeType == PRECODE_FIXUP && - ((PTR_MethodDesc)((PTR_FixupPrecode)temporaryEntryPoints)->GetMethodDesc())->IsLCGMethod(); -#else // !FIXUP_PRECODE_PREALLOCATE_DYNAMIC_METHOD_JUMP_STUBS - bool preallocateJumpStubs = false; -#endif // FIXUP_PRECODE_PREALLOCATE_DYNAMIC_METHOD_JUMP_STUBS - return SizeOfTemporaryEntryPoints(precodeType, preallocateJumpStubs, count); + return SizeOfTemporaryEntryPoints(precodeType, count); } #ifndef DACCESS_COMPILE @@ -297,24 +246,27 @@ Precode* Precode::Allocate(PrecodeType t, MethodDesc* pMD, } CONTRACTL_END; - SIZE_T size; + SIZE_T size = Precode::SizeOf(t); + Precode* pPrecode; -#ifdef HAS_FIXUP_PRECODE_CHUNKS if (t == PRECODE_FIXUP) { - size = sizeof(FixupPrecode) + sizeof(PTR_MethodDesc); + pPrecode = (Precode*)pamTracker->Track(pLoaderAllocator->GetFixupPrecodeHeap()->AllocAlignedMem(size, 1)); + pPrecode->Init(pPrecode, t, pMD, pLoaderAllocator); } - else -#endif + else if (t == PRECODE_STUB || t == PRECODE_NDIRECT_IMPORT) { - size = Precode::SizeOf(t); + pPrecode = (Precode*)pamTracker->Track(pLoaderAllocator->GetNewStubPrecodeHeap()->AllocAlignedMem(size, 1)); + pPrecode->Init(pPrecode, t, pMD, pLoaderAllocator); } + else + { + pPrecode = (Precode*)pamTracker->Track(pLoaderAllocator->GetPrecodeHeap()->AllocAlignedMem(size, AlignOf(t))); + ExecutableWriterHolder precodeWriterHolder(pPrecode, size); + precodeWriterHolder.GetRW()->Init(pPrecode, t, pMD, pLoaderAllocator); + ClrFlushInstructionCache(pPrecode, size); - Precode* pPrecode = (Precode*)pamTracker->Track(pLoaderAllocator->GetPrecodeHeap()->AllocAlignedMem(size, AlignOf(t))); - ExecutableWriterHolder precodeWriterHolder(pPrecode, size); - precodeWriterHolder.GetRW()->Init(pPrecode, t, pMD, pLoaderAllocator); - - ClrFlushInstructionCache(pPrecode, size); + } return pPrecode; } @@ -424,24 +376,20 @@ void Precode::Reset() WRAPPER_NO_CONTRACT; MethodDesc* pMD = GetMethodDesc(); - SIZE_T size; + PrecodeType t = GetType(); -#ifdef HAS_FIXUP_PRECODE_CHUNKS + SIZE_T size = Precode::SizeOf(t); + if (t == PRECODE_FIXUP) { - // The writeable size the Init method accesses is dynamic depending on - // the FixupPrecode members. - size = ((FixupPrecode*)this)->GetSizeRW(); + Init(this, t, pMD, pMD->GetLoaderAllocator()); } else -#endif { - size = Precode::SizeOf(t); + ExecutableWriterHolder precodeWriterHolder(this, size); + precodeWriterHolder.GetRW()->Init(this, t, pMD, pMD->GetLoaderAllocator()); + ClrFlushInstructionCache(this, SizeOf()); } - - ExecutableWriterHolder precodeWriterHolder(this, size); - precodeWriterHolder.GetRW()->Init(this, GetType(), pMD, pMD->GetLoaderAllocator()); - ClrFlushInstructionCache(this, SizeOf()); } /* static */ @@ -490,12 +438,6 @@ TADDR Precode::AllocateTemporaryEntryPoints(MethodDescChunk * pChunk, { t = PRECODE_FIXUP; -#ifdef FIXUP_PRECODE_PREALLOCATE_DYNAMIC_METHOD_JUMP_STUBS - if (pFirstMD->IsLCGMethod()) - { - preallocateJumpStubs = true; - } -#endif // FIXUP_PRECODE_PREALLOCATE_DYNAMIC_METHOD_JUMP_STUBS } else { @@ -503,7 +445,7 @@ TADDR Precode::AllocateTemporaryEntryPoints(MethodDescChunk * pChunk, } #endif // HAS_FIXUP_PRECODE - SIZE_T totalSize = SizeOfTemporaryEntryPoints(t, preallocateJumpStubs, count); + SIZE_T totalSize = SizeOfTemporaryEntryPoints(t, count); #ifdef HAS_COMPACT_ENTRYPOINTS // Note that these are just best guesses to save memory. If we guessed wrong, @@ -523,70 +465,52 @@ TADDR Precode::AllocateTemporaryEntryPoints(MethodDescChunk * pChunk, return NULL; #endif - TADDR temporaryEntryPoints = (TADDR)pamTracker->Track(pLoaderAllocator->GetPrecodeHeap()->AllocAlignedMem(totalSize, AlignOf(t))); - ExecutableWriterHolder entryPointsWriterHolder((void*)temporaryEntryPoints, totalSize); - -#ifdef HAS_FIXUP_PRECODE_CHUNKS - if (t == PRECODE_FIXUP) + TADDR temporaryEntryPoints; + SIZE_T oneSize = SizeOfTemporaryEntryPoint(t); + MethodDesc * pMD = pChunk->GetFirstMethodDesc(); + + if (t == PRECODE_FIXUP || t == PRECODE_STUB) { -#ifdef FIXUP_PRECODE_PREALLOCATE_DYNAMIC_METHOD_JUMP_STUBS - PCODE precodeFixupJumpStubRW = NULL; - PCODE precodeFixupJumpStub = NULL; - if (preallocateJumpStubs) + LoaderHeap *pStubHeap; + if (t == PRECODE_FIXUP) + { + pStubHeap = pLoaderAllocator->GetFixupPrecodeHeap(); + } + else { - // Emit the jump for the precode fixup jump stub now. This jump stub immediately follows the MethodDesc (see - // GetDynamicMethodPrecodeFixupJumpStub()). - precodeFixupJumpStub = temporaryEntryPoints + count * sizeof(FixupPrecode) + sizeof(PTR_MethodDesc); - // TODO: how to get the size? - precodeFixupJumpStubRW = (TADDR)entryPointsWriterHolder.GetRW() + count * sizeof(FixupPrecode) + sizeof(PTR_MethodDesc); - emitBackToBackJump((BYTE*)precodeFixupJumpStub, (BYTE*)precodeFixupJumpStubRW, (LPVOID)GetEEFuncEntryPoint(PrecodeFixupThunk)); + pStubHeap = pLoaderAllocator->GetNewStubPrecodeHeap(); } -#endif // FIXUP_PRECODE_PREALLOCATE_DYNAMIC_METHOD_JUMP_STUBS + temporaryEntryPoints = (TADDR)pamTracker->Track(pStubHeap->AllocAlignedMem(totalSize, 1)); TADDR entryPoint = temporaryEntryPoints; - TADDR entryPointRW = (TADDR)entryPointsWriterHolder.GetRW(); - - MethodDesc * pMD = pChunk->GetFirstMethodDesc(); for (int i = 0; i < count; i++) { - ((FixupPrecode *)entryPointRW)->Init((FixupPrecode*)entryPoint, pMD, pLoaderAllocator, pMD->GetMethodDescIndex(), (count - 1) - i); - -#ifdef FIXUP_PRECODE_PREALLOCATE_DYNAMIC_METHOD_JUMP_STUBS - _ASSERTE( - !preallocateJumpStubs || - !pMD->IsLCGMethod() || - ((FixupPrecode *)entryPoint)->GetDynamicMethodPrecodeFixupJumpStub() == precodeFixupJumpStub); -#endif // FIXUP_PRECODE_PREALLOCATE_DYNAMIC_METHOD_JUMP_STUBS + ((Precode *)entryPoint)->Init((Precode *)entryPoint, t, pMD, pLoaderAllocator); _ASSERTE((Precode *)entryPoint == GetPrecodeForTemporaryEntryPoint(temporaryEntryPoints, i)); - entryPoint += sizeof(FixupPrecode); - entryPointRW += sizeof(FixupPrecode); + entryPoint += oneSize; pMD = (MethodDesc *)(dac_cast(pMD) + pMD->SizeOf()); } - -#ifdef FEATURE_PERFMAP - PerfMap::LogStubs(__FUNCTION__, "PRECODE_FIXUP", (PCODE)temporaryEntryPoints, count * sizeof(FixupPrecode)); -#endif - ClrFlushInstructionCache((LPVOID)temporaryEntryPoints, count * sizeof(FixupPrecode)); - - return temporaryEntryPoints; } -#endif - - SIZE_T oneSize = SizeOfTemporaryEntryPoint(t); - TADDR entryPoint = temporaryEntryPoints; - TADDR entryPointRW = (TADDR)entryPointsWriterHolder.GetRW(); - MethodDesc * pMD = pChunk->GetFirstMethodDesc(); - for (int i = 0; i < count; i++) + else { - ((Precode *)entryPointRW)->Init((Precode *)entryPoint, t, pMD, pLoaderAllocator); + _ASSERTE(FALSE); + temporaryEntryPoints = (TADDR)pamTracker->Track(pLoaderAllocator->GetPrecodeHeap()->AllocAlignedMem(totalSize, AlignOf(t))); + ExecutableWriterHolder entryPointsWriterHolder((void*)temporaryEntryPoints, totalSize); - _ASSERTE((Precode *)entryPoint == GetPrecodeForTemporaryEntryPoint(temporaryEntryPoints, i)); - entryPoint += oneSize; - entryPointRW += oneSize; + TADDR entryPoint = temporaryEntryPoints; + TADDR entryPointRW = (TADDR)entryPointsWriterHolder.GetRW(); + for (int i = 0; i < count; i++) + { + ((Precode *)entryPointRW)->Init((Precode *)entryPoint, t, pMD, pLoaderAllocator); + + _ASSERTE((Precode *)entryPoint == GetPrecodeForTemporaryEntryPoint(temporaryEntryPoints, i)); + entryPoint += oneSize; + entryPointRW += oneSize; - pMD = (MethodDesc *)(dac_cast(pMD) + pMD->SizeOf()); + pMD = (MethodDesc *)(dac_cast(pMD) + pMD->SizeOf()); + } } #ifdef FEATURE_PERFMAP @@ -606,15 +530,282 @@ void Precode::EnumMemoryRegions(CLRDataEnumMemoryFlags flags) SUPPORTS_DAC; PrecodeType t = GetType(); -#ifdef HAS_FIXUP_PRECODE_CHUNKS - if (t == PRECODE_FIXUP) + DacEnumMemoryRegion(GetStart(), SizeOf(t)); +} +#endif + +#ifdef HAS_FIXUP_PRECODE + +#ifdef DACCESS_COMPILE +void FixupPrecode::EnumMemoryRegions(CLRDataEnumMemoryFlags flags) +{ + SUPPORTS_DAC; + DacEnumMemoryRegion(dac_cast(this), sizeof(FixupPrecode)); + DacEnumMemoryRegion(dac_cast(GetData()), sizeof(FixupPrecodeData)); +} +#endif // DACCESS_COMPILE + +#endif // HAS_FIXUP_PRECODE + +#ifndef DACCESS_COMPILE + +void StubPrecode::Init(StubPrecode* pPrecodeRX, MethodDesc* pMD, LoaderAllocator *pLoaderAllocator /* = NULL */, + BYTE type /* = StubPrecode::Type */, TADDR target /* = NULL */) +{ + WRAPPER_NO_CONTRACT; + + StubPrecodeData *pStubData = GetData(); + + if (pLoaderAllocator != NULL) { - AsFixupPrecode()->EnumMemoryRegions(flags); - return; + // Use pMD == NULL in all precode initialization methods to allocate the initial jump stub in non-dynamic heap + // that has the same lifetime like as the precode itself + if (target == NULL) + target = GetPreStubEntryPoint(); + pStubData->Target = target; } + + pStubData->MethodDesc = pMD; + pStubData->Type = type; +} + +#if defined(TARGET_ARM64) && defined(TARGET_UNIX) + #define ENUM_PAGE_SIZE(size) \ + extern "C" void StubPrecodeCode##size(); \ + extern "C" void StubPrecodeCode##size##_End(); + ENUM_PAGE_SIZES + #undef ENUM_PAGE_SIZE +#else +extern "C" void StubPrecodeCode(); +extern "C" void StubPrecodeCode_End(); #endif - DacEnumMemoryRegion(GetStart(), SizeOf(t)); +#ifdef TARGET_X86 +extern "C" size_t StubPrecodeCode_MethodDesc_Offset; +extern "C" size_t StubPrecodeCode_Target_Offset; + +#define SYMBOL_VALUE(name) ((size_t)&name) + +#endif + +#if defined(TARGET_ARM64) && defined(TARGET_UNIX) +void (*StubPrecode::StubPrecodeCode)(); +void (*StubPrecode::StubPrecodeCode_End)(); +#endif + +void StubPrecode::StaticInitialize() +{ +#if defined(TARGET_ARM64) && defined(TARGET_UNIX) + #define ENUM_PAGE_SIZE(size) \ + case size: \ + StubPrecodeCode = StubPrecodeCode##size; \ + StubPrecodeCode_End = StubPrecodeCode##size##_End; \ + _ASSERTE(((BYTE*)StubPrecodeCode##size##_End - (BYTE*)StubPrecodeCode##size) <= StubPrecode::CodeSize); \ + break; + + int pageSize = GetOsPageSize(); + switch (pageSize) + { + ENUM_PAGE_SIZES + default: + EEPOLICY_HANDLE_FATAL_ERROR_WITH_MESSAGE(COR_E_EXECUTIONENGINE, W("Unsupported OS page size")); + } + #undef ENUM_PAGE_SIZE +#else + _ASSERTE(((BYTE*)StubPrecodeCode_End - (BYTE*)StubPrecodeCode) <= StubPrecode::CodeSize); +#endif + _ASSERTE((*((BYTE*)PCODEToPINSTR((PCODE)StubPrecodeCode) + OFFSETOF_PRECODE_TYPE)) == StubPrecode::Type); +} + +void StubPrecode::GenerateCodePage(BYTE* pageBase, BYTE* pageBaseRX) +{ + int pageSize = GetOsPageSize(); + +#ifdef TARGET_X86 + int totalCodeSize = (pageSize / StubPrecode::CodeSize) * StubPrecode::CodeSize; + for (int i = 0; i < totalCodeSize; i += StubPrecode::CodeSize) + { + memcpy(pageBase + i, (const void*)StubPrecodeCode, (BYTE*)StubPrecodeCode_End - (BYTE*)StubPrecodeCode); + + BYTE* pTargetSlot = pageBaseRX + i + pageSize + offsetof(StubPrecodeData, Target); + *(BYTE**)(pageBase + i + SYMBOL_VALUE(StubPrecodeCode_Target_Offset)) = pTargetSlot; + + BYTE* pMethodDescSlot = pageBaseRX + i + pageSize + offsetof(StubPrecodeData, MethodDesc); + *(BYTE**)(pageBase + i + SYMBOL_VALUE(StubPrecodeCode_MethodDesc_Offset)) = pMethodDescSlot; + } +#else // TARGET_X86 + FillStubCodePage(pageBase, (const void*)PCODEToPINSTR((PCODE)StubPrecodeCode), StubPrecode::CodeSize, pageSize); +#endif // TARGET_X86 +} + +BOOL StubPrecode::IsStubPrecodeByASM(PCODE addr) +{ + BYTE *pInstr = (BYTE*)PCODEToPINSTR(addr); +#ifdef TARGET_X86 + return *pInstr == *(BYTE*)(StubPrecodeCode) && + *(DWORD*)(pInstr + SYMBOL_VALUE(StubPrecodeCode_MethodDesc_Offset)) == (DWORD)(pInstr + GetOsPageSize() + offsetof(StubPrecodeData, MethodDesc)) && + *(WORD*)(pInstr + 5) == *(WORD*)((BYTE*)StubPrecodeCode + 5) && + *(DWORD*)(pInstr + SYMBOL_VALUE(StubPrecodeCode_Target_Offset)) == (DWORD)(pInstr + GetOsPageSize() + offsetof(StubPrecodeData, Target)); +#else // TARGET_X86 + return memcmp(pInstr, (void*)PCODEToPINSTR((PCODE)StubPrecodeCode), (BYTE*)StubPrecodeCode_End - (BYTE*)StubPrecodeCode) == 0; +#endif // TARGET_X86 +} + +#ifdef HAS_NDIRECT_IMPORT_PRECODE + +void NDirectImportPrecode::Init(NDirectImportPrecode* pPrecodeRX, MethodDesc* pMD, LoaderAllocator *pLoaderAllocator) +{ + WRAPPER_NO_CONTRACT; + StubPrecode::Init(pPrecodeRX, pMD, pLoaderAllocator, NDirectImportPrecode::Type, GetEEFuncEntryPoint(NDirectImportThunk)); +} + +#endif // HAS_NDIRECT_IMPORT_PRECODE + +#ifdef HAS_FIXUP_PRECODE +void FixupPrecode::Init(FixupPrecode* pPrecodeRX, MethodDesc* pMD, LoaderAllocator *pLoaderAllocator) +{ + WRAPPER_NO_CONTRACT; + + _ASSERTE(pPrecodeRX == this); + + FixupPrecodeData *pData = GetData(); + pData->MethodDesc = pMD; + + _ASSERTE(GetMethodDesc() == (TADDR)pMD); + + pData->Target = (PCODE)pPrecodeRX + FixupPrecode::FixupCodeOffset; + pData->PrecodeFixupThunk = GetPreStubEntryPoint(); } + +#if defined(TARGET_ARM64) && defined(TARGET_UNIX) + #define ENUM_PAGE_SIZE(size) \ + extern "C" void FixupPrecodeCode##size(); \ + extern "C" void FixupPrecodeCode##size##_End(); + ENUM_PAGE_SIZES + #undef ENUM_PAGE_SIZE +#else +extern "C" void FixupPrecodeCode(); +extern "C" void FixupPrecodeCode_End(); #endif +#ifdef TARGET_X86 +extern "C" size_t FixupPrecodeCode_MethodDesc_Offset; +extern "C" size_t FixupPrecodeCode_Target_Offset; +extern "C" size_t FixupPrecodeCode_PrecodeFixupThunk_Offset; +#endif + +#if defined(TARGET_ARM64) && defined(TARGET_UNIX) +void (*FixupPrecode::FixupPrecodeCode)(); +void (*FixupPrecode::FixupPrecodeCode_End)(); +#endif + +void FixupPrecode::StaticInitialize() +{ +#if defined(TARGET_ARM64) && defined(TARGET_UNIX) + #define ENUM_PAGE_SIZE(size) \ + case size: \ + FixupPrecodeCode = FixupPrecodeCode##size; \ + FixupPrecodeCode_End = FixupPrecodeCode##size##_End; \ + _ASSERTE(((BYTE*)FixupPrecodeCode##size##_End - (BYTE*)FixupPrecodeCode##size) <= FixupPrecode::CodeSize); \ + break; + + int pageSize = GetOsPageSize(); + + switch (pageSize) + { + ENUM_PAGE_SIZES + default: + EEPOLICY_HANDLE_FATAL_ERROR_WITH_MESSAGE(COR_E_EXECUTIONENGINE, W("Unsupported OS page size")); + } + #undef ENUM_PAGE_SIZE +#else + _ASSERTE((BYTE*)FixupPrecodeCode_End - (BYTE*)FixupPrecodeCode <= FixupPrecode::CodeSize); +#endif + _ASSERTE(*((BYTE*)PCODEToPINSTR((PCODE)FixupPrecodeCode) + OFFSETOF_PRECODE_TYPE) == FixupPrecode::Type); +} + +void FixupPrecode::GenerateCodePage(BYTE* pageBase, BYTE* pageBaseRX) +{ + int pageSize = GetOsPageSize(); + +#ifdef TARGET_X86 + int totalCodeSize = (pageSize / FixupPrecode::CodeSize) * FixupPrecode::CodeSize; + + for (int i = 0; i < totalCodeSize; i += FixupPrecode::CodeSize) + { + memcpy(pageBase + i, (const void*)FixupPrecodeCode, FixupPrecode::CodeSize); + BYTE* pTargetSlot = pageBaseRX + i + pageSize + offsetof(FixupPrecodeData, Target); + *(BYTE**)(pageBase + i + SYMBOL_VALUE(FixupPrecodeCode_Target_Offset)) = pTargetSlot; + + BYTE* pMethodDescSlot = pageBaseRX + i + pageSize + offsetof(FixupPrecodeData, MethodDesc); + *(BYTE**)(pageBase + i + SYMBOL_VALUE(FixupPrecodeCode_MethodDesc_Offset)) = pMethodDescSlot; + + BYTE* pPrecodeFixupThunkSlot = pageBaseRX + i + pageSize + offsetof(FixupPrecodeData, PrecodeFixupThunk); + *(BYTE**)(pageBase + i + SYMBOL_VALUE(FixupPrecodeCode_PrecodeFixupThunk_Offset)) = pPrecodeFixupThunkSlot; + } +#else // TARGET_X86 + FillStubCodePage(pageBase, (const void*)PCODEToPINSTR((PCODE)FixupPrecodeCode), FixupPrecode::CodeSize, pageSize); +#endif // TARGET_X86 +} + +BOOL FixupPrecode::IsFixupPrecodeByASM(PCODE addr) +{ + BYTE *pInstr = (BYTE*)PCODEToPINSTR(addr); +#ifdef TARGET_X86 + return + *(WORD*)(pInstr) == *(WORD*)(FixupPrecodeCode) && + *(DWORD*)(pInstr + SYMBOL_VALUE(FixupPrecodeCode_Target_Offset)) == (DWORD)(pInstr + GetOsPageSize() + offsetof(FixupPrecodeData, Target)) && + *(pInstr + 6) == *((BYTE*)FixupPrecodeCode + 6) && + *(DWORD*)(pInstr + SYMBOL_VALUE(FixupPrecodeCode_MethodDesc_Offset)) == (DWORD)(pInstr + GetOsPageSize() + offsetof(FixupPrecodeData, MethodDesc)) && + *(WORD*)(pInstr + 11) == *(WORD*)((BYTE*)FixupPrecodeCode + 11) && + *(DWORD*)(pInstr + SYMBOL_VALUE(FixupPrecodeCode_PrecodeFixupThunk_Offset)) == (DWORD)(pInstr + GetOsPageSize() + offsetof(FixupPrecodeData, PrecodeFixupThunk)); +#else // TARGET_X86 + return memcmp(pInstr, (void*)PCODEToPINSTR((PCODE)FixupPrecodeCode), (BYTE*)FixupPrecodeCode_End - (BYTE*)FixupPrecodeCode) == 0; +#endif // TARGET_X86 +} + +#endif // HAS_FIXUP_PRECODE + +BOOL DoesSlotCallPrestub(PCODE pCode) +{ + CONTRACTL { + NOTHROW; + GC_NOTRIGGER; + PRECONDITION(pCode != GetPreStubEntryPoint()); + } CONTRACTL_END; + + TADDR pInstr = dac_cast(PCODEToPINSTR(pCode)); + +#ifdef HAS_COMPACT_ENTRYPOINTS + if (MethodDescChunk::GetMethodDescFromCompactEntryPoint(pCode, TRUE) != NULL) + { + return TRUE; + } +#endif + + if (!IS_ALIGNED(pInstr, PRECODE_ALIGNMENT)) + { + return FALSE; + } + + //FixupPrecode +#if defined(HAS_FIXUP_PRECODE) + if (FixupPrecode::IsFixupPrecodeByASM(pCode)) + { + PCODE pTarget = dac_cast(pInstr)->GetTarget(); + + return pTarget == PCODEToPINSTR(pCode) + FixupPrecode::FixupCodeOffset; + } +#endif + + // StubPrecode + if (StubPrecode::IsStubPrecodeByASM(pCode)) + { + pCode = dac_cast(pInstr)->GetTarget(); + return pCode == GetPreStubEntryPoint(); + } + + return FALSE; +} + +#endif // !DACCESS_COMPILE diff --git a/src/coreclr/vm/precode.h b/src/coreclr/vm/precode.h index 494747175a942..da1dfd593dad7 100644 --- a/src/coreclr/vm/precode.h +++ b/src/coreclr/vm/precode.h @@ -9,12 +9,315 @@ #ifndef __PRECODE_H__ #define __PRECODE_H__ -typedef DPTR(class Precode) PTR_Precode; - -#ifndef PRECODE_ALIGNMENT #define PRECODE_ALIGNMENT sizeof(void*) + +#if defined(HOST_AMD64) + +#define OFFSETOF_PRECODE_TYPE 0 +#define OFFSETOF_PRECODE_TYPE_CALL_OR_JMP 5 +#define OFFSETOF_PRECODE_TYPE_MOV_R10 10 + +#define SIZEOF_PRECODE_BASE 16 + +#elif defined(HOST_X86) + +EXTERN_C VOID STDCALL PrecodeRemotingThunk(); + +#define OFFSETOF_PRECODE_TYPE 0 +#define OFFSETOF_PRECODE_TYPE_CALL_OR_JMP 5 +#define OFFSETOF_PRECODE_TYPE_MOV_RM_R 6 + +#define SIZEOF_PRECODE_BASE 8 + +#elif defined(HOST_ARM64) + +#define SIZEOF_PRECODE_BASE CODE_SIZE_ALIGN +#define OFFSETOF_PRECODE_TYPE 0 + +#elif defined(HOST_ARM) + +#define SIZEOF_PRECODE_BASE CODE_SIZE_ALIGN +#define OFFSETOF_PRECODE_TYPE 3 + +#endif // HOST_AMD64 + +#ifndef DACCESS_COMPILE +// Given an address in a slot, figure out if the prestub will be called +BOOL DoesSlotCallPrestub(PCODE pCode); +#endif + +#include + +// Invalid precode type +struct InvalidPrecode +{ +#if defined(HOST_AMD64) || defined(HOST_X86) + // int3 + static const int Type = 0xCC; +#elif defined(HOST_ARM64) || defined(HOST_ARM) + static const int Type = 0; +#endif +}; + +struct StubPrecodeData +{ + PTR_MethodDesc MethodDesc; + PCODE Target; + BYTE Type; +}; + +typedef DPTR(StubPrecodeData) PTR_StubPrecodeData; + +#if !(defined(TARGET_ARM64) && defined(TARGET_UNIX)) +extern "C" void StubPrecodeCode(); +extern "C" void StubPrecodeCode_End(); +#endif + +// Regular precode +struct StubPrecode +{ +#if defined(HOST_AMD64) + static const BYTE Type = 0x4C; + static const int CodeSize = 24; +#elif defined(HOST_X86) + static const BYTE Type = 0xA1; + static const int CodeSize = 24; +#elif defined(HOST_ARM64) + static const int Type = 0x4A; + static const int CodeSize = 24; +#elif defined(HOST_ARM) + static const int Type = 0xCF; + static const int CodeSize = 12; +#endif // HOST_AMD64 + + BYTE m_code[CodeSize]; + +#if defined(TARGET_ARM64) && defined(TARGET_UNIX) + static void (*StubPrecodeCode)(); + static void (*StubPrecodeCode_End)(); +#endif + + void Init(StubPrecode* pPrecodeRX, MethodDesc* pMD, LoaderAllocator *pLoaderAllocator = NULL, BYTE type = StubPrecode::Type, TADDR target = NULL); + + static void StaticInitialize(); + + PTR_StubPrecodeData GetData() const + { + LIMITED_METHOD_CONTRACT; + return dac_cast(dac_cast(this) + GetOsPageSize()); + } + + TADDR GetMethodDesc() + { + LIMITED_METHOD_DAC_CONTRACT; + + return dac_cast(GetData()->MethodDesc); + } + + PCODE GetTarget() + { + LIMITED_METHOD_DAC_CONTRACT; + + return GetData()->Target; + } + + BYTE GetType() + { + return GetData()->Type; + } + +#ifndef DACCESS_COMPILE + static BOOL IsStubPrecodeByASM(PCODE addr); + + void ResetTargetInterlocked() + { + CONTRACTL + { + THROWS; + GC_NOTRIGGER; + } + CONTRACTL_END; + + StubPrecodeData *pData = GetData(); + InterlockedExchangeT(&pData->Target, GetPreStubEntryPoint()); + } + + BOOL SetTargetInterlocked(TADDR target, TADDR expected) + { + CONTRACTL + { + THROWS; + GC_NOTRIGGER; + } + CONTRACTL_END; + + StubPrecodeData *pData = GetData(); + return InterlockedCompareExchangeT(&pData->Target, (PCODE)target, (PCODE)expected) == expected; + } + + static void GenerateCodePage(BYTE* pageBase, BYTE* pageBaseRX); + +#endif // !DACCESS_COMPILE +}; + +typedef DPTR(StubPrecode) PTR_StubPrecode; + + +#ifdef HAS_NDIRECT_IMPORT_PRECODE + +// NDirect import precode +// (This is fake precode. VTable slot does not point to it.) +struct NDirectImportPrecode : StubPrecode +{ + static const int Type = 0x01; + + void Init(NDirectImportPrecode* pPrecodeRX, MethodDesc* pMD, LoaderAllocator *pLoaderAllocator); + + LPVOID GetEntrypoint() + { + LIMITED_METHOD_CONTRACT; + return (LPVOID)PINSTRToPCODE(dac_cast(this)); + } +}; +typedef DPTR(NDirectImportPrecode) PTR_NDirectImportPrecode; + +#endif // HAS_NDIRECT_IMPORT_PRECODE + + +#ifdef HAS_FIXUP_PRECODE + +struct FixupPrecodeData +{ + PCODE Target; + MethodDesc *MethodDesc; + PCODE PrecodeFixupThunk; +}; + +typedef DPTR(FixupPrecodeData) PTR_FixupPrecodeData; + +#if !(defined(TARGET_ARM64) && defined(TARGET_UNIX)) +extern "C" void FixupPrecodeCode(); +extern "C" void FixupPrecodeCode_End(); +#endif + +// Fixup precode is used in ngen images when the prestub does just one time fixup. +// The fixup precode is simple jump once patched. It does not have the two instruction overhead of regular precode. +struct FixupPrecode +{ +#if defined(HOST_AMD64) + static const int Type = 0xFF; + static const int CodeSize = 24; + static const int FixupCodeOffset = 6; +#elif defined(HOST_X86) + static const int Type = 0xFF; + static const int CodeSize = 24; + static const int FixupCodeOffset = 6; +#elif defined(HOST_ARM64) + static const int Type = 0x0B; + static const int CodeSize = 24; + static const int FixupCodeOffset = 8; +#elif defined(HOST_ARM) + static const int Type = 0xFF; + static const int CodeSize = 12; + static const int FixupCodeOffset = 4 + THUMB_CODE; +#endif // HOST_AMD64 + + BYTE m_code[CodeSize]; + +#if defined(TARGET_ARM64) && defined(TARGET_UNIX) + static void (*FixupPrecodeCode)(); + static void (*FixupPrecodeCode_End)(); #endif + void Init(FixupPrecode* pPrecodeRX, MethodDesc* pMD, LoaderAllocator *pLoaderAllocator); + + static void StaticInitialize(); + + static void GenerateCodePage(BYTE* pageBase, BYTE* pageBaseRX); + + PTR_FixupPrecodeData GetData() const + { + LIMITED_METHOD_CONTRACT; + return dac_cast(dac_cast(this) + GetOsPageSize()); + } + + TADDR GetMethodDesc() + { + LIMITED_METHOD_CONTRACT; + return (TADDR)GetData()->MethodDesc; + } + + PCODE GetTarget() + { + LIMITED_METHOD_DAC_CONTRACT; + return GetData()->Target; + } + + PCODE *GetTargetSlot() + { + LIMITED_METHOD_CONTRACT; + return &GetData()->Target; + } + +#ifndef DACCESS_COMPILE + static BOOL IsFixupPrecodeByASM(PCODE addr); + + void ResetTargetInterlocked() + { + CONTRACTL + { + NOTHROW; + GC_NOTRIGGER; + } + CONTRACTL_END; + + PCODE target = (PCODE)this + FixupCodeOffset; + + _ASSERTE(IS_ALIGNED(&GetData()->Target, sizeof(SIZE_T))); + InterlockedExchangeT(&GetData()->Target, target); + } + + BOOL SetTargetInterlocked(TADDR target, TADDR expected) + { + CONTRACTL + { + NOTHROW; + GC_NOTRIGGER; + } + CONTRACTL_END; + + MethodDesc * pMD = (MethodDesc*)GetMethodDesc(); + g_IBCLogger.LogMethodPrecodeWriteAccess(pMD); + + PCODE oldTarget = (PCODE)GetData()->Target; + if (oldTarget != ((PCODE)this + FixupCodeOffset)) + { +#ifdef FEATURE_CODE_VERSIONING + // No change needed, jmp is already in place +#else + // Setting the target more than once is unexpected + return FALSE; +#endif + } + + _ASSERTE(IS_ALIGNED(&GetData()->Target, sizeof(SIZE_T))); + return InterlockedCompareExchangeT(&GetData()->Target, (PCODE)target, (PCODE)oldTarget) == (PCODE)oldTarget; + } +#endif + +#ifdef DACCESS_COMPILE + void EnumMemoryRegions(CLRDataEnumMemoryFlags flags); +#endif +}; + +typedef DPTR(FixupPrecode) PTR_FixupPrecode; + +#endif // HAS_FIXUP_PRECODE + +#include + +typedef DPTR(class Precode) PTR_Precode; + enum PrecodeType { PRECODE_INVALID = InvalidPrecode::Type, PRECODE_STUB = StubPrecode::Type, @@ -57,7 +360,7 @@ class Precode { #endif // HAS_NDIRECT_IMPORT_PRECODE #ifdef HAS_FIXUP_PRECODE - FixupPrecode* AsFixupPrecode() + PTR_FixupPrecode AsFixupPrecode() { LIMITED_METHOD_CONTRACT; SUPPORTS_DAC; @@ -109,29 +412,12 @@ class Precode { #ifdef OFFSETOF_PRECODE_TYPE BYTE type = m_data[OFFSETOF_PRECODE_TYPE]; -#ifdef TARGET_X86 - if (type == X86_INSTR_MOV_RM_R) - type = m_data[OFFSETOF_PRECODE_TYPE_MOV_RM_R]; -#endif // TARGET_X86 - -#ifdef TARGET_AMD64 - if (type == (X86_INSTR_MOV_R10_IMM64 & 0xFF)) - type = m_data[OFFSETOF_PRECODE_TYPE_MOV_R10]; - else if ((type == (X86_INSTR_CALL_REL32 & 0xFF)) || (type == (X86_INSTR_JMP_REL32 & 0xFF))) - type = m_data[OFFSETOF_PRECODE_TYPE_CALL_OR_JMP]; -#endif // _AMD64 - -#if defined(HAS_FIXUP_PRECODE) && (defined(TARGET_X86) || defined(TARGET_AMD64)) - if (type == FixupPrecode::TypePrestub) - type = FixupPrecode::Type; -#endif - -#ifdef TARGET_ARM - static_assert_no_msg(offsetof(StubPrecode, m_pTarget) == offsetof(NDirectImportPrecode, m_pMethodDesc)); - // If the precode does not have thumb bit on target, it must be NDirectImportPrecode. - if (type == StubPrecode::Type && ((AsStubPrecode()->m_pTarget & THUMB_CODE) == 0)) - type = NDirectImportPrecode::Type; -#endif + if (type == StubPrecode::Type) + { + // StubPrecode code is used for both StubPrecode and NDirectImportPrecode, + // so we need to get the real type + type = AsStubPrecode()->GetType(); + } return (PrecodeType)type; @@ -147,12 +433,6 @@ class Precode { SUPPORTS_DAC; unsigned int align = PRECODE_ALIGNMENT; -#if defined(TARGET_X86) && defined(HAS_FIXUP_PRECODE) - // Fixup precodes has to be aligned to allow atomic patching - if (t == PRECODE_FIXUP) - align = 8; -#endif // TARGET_X86 && HAS_FIXUP_PRECODE - #if defined(TARGET_ARM) && defined(HAS_COMPACT_ENTRYPOINTS) // Precodes have to be aligned to allow fast compact entry points check _ASSERTE (align >= sizeof(void*)); @@ -211,19 +491,11 @@ class Precode { PCODE GetEntryPoint() { LIMITED_METHOD_CONTRACT; - return dac_cast(this) + GetEntryPointOffset(); - } - - static SIZE_T GetEntryPointOffset() - { - LIMITED_METHOD_CONTRACT; -#ifdef TARGET_ARM - return THUMB_CODE; -#else - return 0; -#endif + return PINSTRToPCODE(dac_cast(this)); } + PTR_PCODE GetTargetSlot(); + MethodDesc * GetMethodDesc(BOOL fSpeculative = FALSE); BOOL IsCorrectMethodDesc(MethodDesc * pMD); @@ -239,7 +511,7 @@ class Precode { void Reset(); #endif // DACCESS_COMPILE - static Precode* GetPrecodeFromEntryPoint(PCODE addr, BOOL fSpeculative = FALSE) + static PTR_Precode GetPrecodeFromEntryPoint(PCODE addr, BOOL fSpeculative = FALSE) { LIMITED_METHOD_DAC_CONTRACT; @@ -260,7 +532,7 @@ class Precode { } } - Precode* pPrecode = PTR_Precode(pInstr); + PTR_Precode pPrecode = PTR_Precode(pInstr); if (!fSpeculative) { @@ -280,38 +552,35 @@ class Precode { static SIZE_T SizeOfTemporaryEntryPoint(PrecodeType t) { LIMITED_METHOD_DAC_CONTRACT; -#ifdef HAS_FIXUP_PRECODE_CHUNKS - _ASSERTE(t != PRECODE_FIXUP); -#endif + return ALIGN_UP(SizeOf(t), AlignOf(t)); } static Precode * GetPrecodeForTemporaryEntryPoint(TADDR temporaryEntryPoints, int index); - static SIZE_T SizeOfTemporaryEntryPoints(PrecodeType t, bool preallocateJumpStubs, int count); + static SIZE_T SizeOfTemporaryEntryPoints(PrecodeType t, int count); static SIZE_T SizeOfTemporaryEntryPoints(TADDR temporaryEntryPoints, int count); static TADDR AllocateTemporaryEntryPoints(MethodDescChunk* pChunk, LoaderAllocator *pLoaderAllocator, AllocMemTracker *pamTracker); -#ifdef DACCESS_COMPILE - void EnumMemoryRegions(CLRDataEnumMemoryFlags flags); -#endif - -#ifdef HAS_FIXUP_PRECODE_CHUNKS - static DWORD GetOffsetOfBase(PrecodeType t, DWORD count) + static SIZE_T GetMaxTemporaryEntryPointsCount() { - assert(t == PRECODE_FIXUP); - return (DWORD)(count * sizeof(FixupPrecode)); + SIZE_T maxPrecodeCodeSize = Max(FixupPrecode::CodeSize, StubPrecode::CodeSize); + return GetOsPageSize() / maxPrecodeCodeSize; } - static DWORD GetOffset(PrecodeType t, DWORD index, DWORD count) - { - assert(t == PRECODE_FIXUP); - assert(index < count); - return (DWORD)((count - index - 1)* sizeof(FixupPrecode)); - } +#ifdef DACCESS_COMPILE + void EnumMemoryRegions(CLRDataEnumMemoryFlags flags); #endif }; +// Verify that the type for each precode is different +static_assert_no_msg(StubPrecode::Type != NDirectImportPrecode::Type); +static_assert_no_msg(StubPrecode::Type != FixupPrecode::Type); +static_assert_no_msg(StubPrecode::Type != ThisPtrRetBufPrecode::Type); +static_assert_no_msg(FixupPrecode::Type != NDirectImportPrecode::Type); +static_assert_no_msg(FixupPrecode::Type != ThisPtrRetBufPrecode::Type); +static_assert_no_msg(NDirectImportPrecode::Type != ThisPtrRetBufPrecode::Type); + #endif // __PRECODE_H__ diff --git a/src/coreclr/vm/stublink.cpp b/src/coreclr/vm/stublink.cpp index 29c046eb4a012..5b29d6e31c1f1 100644 --- a/src/coreclr/vm/stublink.cpp +++ b/src/coreclr/vm/stublink.cpp @@ -2168,7 +2168,7 @@ Stub* Stub::NewStub(PTR_VOID pCode, DWORD flags) _ASSERTE((stubPayloadOffset % CODE_SIZE_ALIGN) == 0); Stub* pStubRX = (Stub*)(pBlock + stubPayloadOffset); Stub* pStubRW; - ExecutableWriterHolder stubWriterHolder; + ExecutableWriterHolderNoLog stubWriterHolder; if (pHeap == NULL) { @@ -2176,7 +2176,7 @@ Stub* Stub::NewStub(PTR_VOID pCode, DWORD flags) } else { - stubWriterHolder = ExecutableWriterHolder(pStubRX, sizeof(Stub)); + stubWriterHolder.AssignExecutableWriterHolder(pStubRX, sizeof(Stub)); pStubRW = stubWriterHolder.GetRW(); } pStubRW->SetupStub( diff --git a/src/coreclr/vm/stubmgr.cpp b/src/coreclr/vm/stubmgr.cpp index 7a1823f804ead..60fffe5425e8d 100644 --- a/src/coreclr/vm/stubmgr.cpp +++ b/src/coreclr/vm/stubmgr.cpp @@ -1004,8 +1004,7 @@ BOOL PrecodeStubManager::CheckIsStub_Internal(PCODE stubStartAddress) } CONTRACTL_END; - // Forwarded to from RangeSectionStubManager - return FALSE; + return GetStubPrecodeRangeList()->IsInRange(stubStartAddress) || GetFixupPrecodeRangeList()->IsInRange(stubStartAddress); } BOOL PrecodeStubManager::DoTraceStub(PCODE stubStartAddress, @@ -1033,7 +1032,14 @@ BOOL PrecodeStubManager::DoTraceStub(PCODE stubStartAddress, else #endif // HAS_COMPACT_ENTRYPOINTS { - Precode* pPrecode = Precode::GetPrecodeFromEntryPoint(stubStartAddress); + // When the target slot points to the fixup part of the fixup precode, we need to compensate + // for that to get the actual stub address + Precode* pPrecode = Precode::GetPrecodeFromEntryPoint(stubStartAddress - FixupPrecode::FixupCodeOffset, TRUE /* speculative */); + if ((pPrecode == NULL) || (pPrecode->GetType() != PRECODE_FIXUP)) + { + pPrecode = Precode::GetPrecodeFromEntryPoint(stubStartAddress); + } + PREFIX_ASSUME(pPrecode != NULL); switch (pPrecode->GetType()) @@ -1498,21 +1504,6 @@ BOOL RangeSectionStubManager::TraceManager(Thread *thread, } #endif -PCODE RangeSectionStubManager::GetMethodThunkTarget(PCODE stubStartAddress) -{ - WRAPPER_NO_CONTRACT; - -#if defined(TARGET_X86) || defined(TARGET_AMD64) - return rel32Decode(stubStartAddress+1); -#elif defined(TARGET_ARM) - TADDR pInstr = PCODEToPINSTR(stubStartAddress); - return *dac_cast(pInstr + 2 * sizeof(DWORD)); -#else - PORTABILITY_ASSERT("RangeSectionStubManager::GetMethodThunkTarget"); - return NULL; -#endif -} - #ifdef DACCESS_COMPILE LPCWSTR RangeSectionStubManager::GetStubManagerName(PCODE addr) { @@ -2390,6 +2381,8 @@ PrecodeStubManager::DoEnumMemoryRegions(CLRDataEnumMemoryFlags flags) WRAPPER_NO_CONTRACT; DAC_ENUM_VTHIS(); EMEM_OUT(("MEM: %p PrecodeStubManager\n", dac_cast(this))); + GetStubPrecodeRangeList()->EnumMemoryRegions(flags); + GetFixupPrecodeRangeList()->EnumMemoryRegions(flags); } void diff --git a/src/coreclr/vm/stubmgr.h b/src/coreclr/vm/stubmgr.h index acb89f5af2a22..719db33844362 100644 --- a/src/coreclr/vm/stubmgr.h +++ b/src/coreclr/vm/stubmgr.h @@ -399,6 +399,28 @@ class PrecodeStubManager : public StubManager ~PrecodeStubManager() {WRAPPER_NO_CONTRACT;} #endif + protected: + LockedRangeList m_stubPrecodeRangeList; + LockedRangeList m_fixupPrecodeRangeList; + + public: + // Get dac-ized pointer to rangelist. + PTR_RangeList GetStubPrecodeRangeList() + { + SUPPORTS_DAC; + + TADDR addr = PTR_HOST_MEMBER_TADDR(PrecodeStubManager, this, m_stubPrecodeRangeList); + return PTR_RangeList(addr); + } + + PTR_RangeList GetFixupPrecodeRangeList() + { + SUPPORTS_DAC; + + TADDR addr = PTR_HOST_MEMBER_TADDR(PrecodeStubManager, this, m_fixupPrecodeRangeList); + return PTR_RangeList(addr); + } + public: virtual BOOL CheckIsStub_Internal(PCODE stubStartAddress); @@ -591,8 +613,6 @@ class RangeSectionStubManager : public StubManager static StubCodeBlockKind GetStubKind(PCODE stubStartAddress); - static PCODE GetMethodThunkTarget(PCODE stubStartAddress); - public: #ifdef _DEBUG virtual const char * DbgGetName() { LIMITED_METHOD_CONTRACT; return "RangeSectionStubManager"; } diff --git a/src/coreclr/vm/util.cpp b/src/coreclr/vm/util.cpp index 12a4ed3225739..a14950993b4b1 100644 --- a/src/coreclr/vm/util.cpp +++ b/src/coreclr/vm/util.cpp @@ -2244,4 +2244,22 @@ HRESULT GetFileVersion( // S_OK or error Volatile NormalizedTimer::s_frequency = -1.0; +void FillStubCodePage(BYTE* pageBase, const void* code, int codeSize, int pageSize) +{ + int totalCodeSize = (pageSize / codeSize) * codeSize; + + memcpy(pageBase, code, codeSize); + + int i; + for (i = codeSize; i < pageSize / 2; i *= 2) + { + memcpy(pageBase + i, pageBase, i); + } + + if (i != totalCodeSize) + { + memcpy(pageBase + i, pageBase, totalCodeSize - i); + } +} + #endif // !DACCESS_COMPILE diff --git a/src/coreclr/vm/util.hpp b/src/coreclr/vm/util.hpp index 9c08c52b9becf..c8dd86d3f5298 100644 --- a/src/coreclr/vm/util.hpp +++ b/src/coreclr/vm/util.hpp @@ -1006,6 +1006,15 @@ class NormalizedTimer HRESULT GetFileVersion(LPCWSTR wszFilePath, ULARGE_INTEGER* pFileVersion); #endif // !TARGET_UNIX +#define ENUM_PAGE_SIZES \ + ENUM_PAGE_SIZE(4096) \ + ENUM_PAGE_SIZE(8192) \ + ENUM_PAGE_SIZE(16384) \ + ENUM_PAGE_SIZE(32768) \ + ENUM_PAGE_SIZE(65536) + +void FillStubCodePage(BYTE* pageBase, const void* code, int codeSize, int pageSize); + #ifdef TARGET_64BIT // We use modified Daniel Lemire's fastmod algorithm (https://github.com/dotnet/runtime/pull/406), // which allows to avoid the long multiplication if the divisor is less than 2**31. diff --git a/src/coreclr/vm/virtualcallstub.cpp b/src/coreclr/vm/virtualcallstub.cpp index fd85071676e94..5ae47059bc082 100644 --- a/src/coreclr/vm/virtualcallstub.cpp +++ b/src/coreclr/vm/virtualcallstub.cpp @@ -693,7 +693,7 @@ void VirtualCallStubManager::Init(BaseDomain *pDomain, LoaderAllocator *pLoaderA NewHolder indcell_heap_holder( new LoaderHeap(indcell_heap_reserve_size, indcell_heap_commit_size, initReservedMem, indcell_heap_reserve_size, - NULL, FALSE)); + NULL, UnlockedLoaderHeap::HeapKind::Data)); initReservedMem += indcell_heap_reserve_size; @@ -701,7 +701,7 @@ void VirtualCallStubManager::Init(BaseDomain *pDomain, LoaderAllocator *pLoaderA NewHolder cache_entry_heap_holder( new LoaderHeap(cache_entry_heap_reserve_size, cache_entry_heap_commit_size, initReservedMem, cache_entry_heap_reserve_size, - &cache_entry_rangeList, FALSE)); + &cache_entry_rangeList, UnlockedLoaderHeap::HeapKind::Data)); initReservedMem += cache_entry_heap_reserve_size; @@ -709,7 +709,7 @@ void VirtualCallStubManager::Init(BaseDomain *pDomain, LoaderAllocator *pLoaderA NewHolder lookup_heap_holder( new LoaderHeap(lookup_heap_reserve_size, lookup_heap_commit_size, initReservedMem, lookup_heap_reserve_size, - &lookup_rangeList, TRUE)); + &lookup_rangeList, UnlockedLoaderHeap::HeapKind::Executable)); initReservedMem += lookup_heap_reserve_size; @@ -717,7 +717,7 @@ void VirtualCallStubManager::Init(BaseDomain *pDomain, LoaderAllocator *pLoaderA NewHolder dispatch_heap_holder( new LoaderHeap(dispatch_heap_reserve_size, dispatch_heap_commit_size, initReservedMem, dispatch_heap_reserve_size, - &dispatch_rangeList, TRUE)); + &dispatch_rangeList, UnlockedLoaderHeap::HeapKind::Executable)); initReservedMem += dispatch_heap_reserve_size; @@ -725,7 +725,7 @@ void VirtualCallStubManager::Init(BaseDomain *pDomain, LoaderAllocator *pLoaderA NewHolder resolve_heap_holder( new LoaderHeap(resolve_heap_reserve_size, resolve_heap_commit_size, initReservedMem, resolve_heap_reserve_size, - &resolve_rangeList, TRUE)); + &resolve_rangeList, UnlockedLoaderHeap::HeapKind::Executable)); initReservedMem += resolve_heap_reserve_size; @@ -733,7 +733,7 @@ void VirtualCallStubManager::Init(BaseDomain *pDomain, LoaderAllocator *pLoaderA NewHolder vtable_heap_holder( new LoaderHeap(vtable_heap_reserve_size, vtable_heap_commit_size, initReservedMem, vtable_heap_reserve_size, - &vtable_rangeList, TRUE)); + &vtable_rangeList, UnlockedLoaderHeap::HeapKind::Executable)); initReservedMem += vtable_heap_reserve_size;