diff --git a/src/coreclr/tools/superpmi/superpmi-shared/spmiutil.cpp b/src/coreclr/tools/superpmi/superpmi-shared/spmiutil.cpp index 4fa386c8fed04..034e309634ebd 100644 --- a/src/coreclr/tools/superpmi/superpmi-shared/spmiutil.cpp +++ b/src/coreclr/tools/superpmi/superpmi-shared/spmiutil.cpp @@ -335,3 +335,46 @@ void PutThumb2BlRel24(UINT16* p, INT32 imm24) p[0] = Opcode0; p[1] = Opcode1; } + +// GetArm64MovConstant / GetArm64MovkConstant: Decode arm64 mov / movk instructions, e.g.: +// d29ff600 mov x0, #65456 +// f2ab8640 movk x0, #23602, lsl #16 +// f2c04bc0 movk x0, #606, lsl #32 +// +// This is used in the NearDiffer to determine if a sequence of mov/movk is actually an address. +// +// Return `true` if the instruction pointed to by `p` is a mov/movk, `false` otherwise. +// If true, fill out the target register in `*pReg`, constant in `*pCon`, and (for movk) shift value in `*pShift`. + +bool GetArm64MovConstant(UINT32* p, unsigned* pReg, unsigned* pCon) +{ + UINT32 instr = *p; + if ((instr & 0xffe00000) == 0xd2800000) + { + *pReg = instr & 0x1f; + *pCon = (instr >> 5) & 0xffff; + return true; + } + + return false; +} + +bool GetArm64MovkConstant(UINT32* p, unsigned* pReg, unsigned* pCon, unsigned* pShift) +{ + UINT32 instr = *p; + if ((instr & 0xff800000) == 0xf2800000) + { + *pReg = instr & 0x1f; + *pCon = (instr >> 5) & 0xffff; + *pShift = ((instr >> 21) & 0x3) * 16; + return true; + } + + return false; +} + +// PutArm64MovkConstant: set the constant field in an Arm64 `movk` instruction +void PutArm64MovkConstant(UINT32* p, unsigned con) +{ + *p = (*p & ~(0xffff << 5)) | ((con & 0xffff) << 5); +} diff --git a/src/coreclr/tools/superpmi/superpmi-shared/spmiutil.h b/src/coreclr/tools/superpmi/superpmi-shared/spmiutil.h index aa5605576625c..f414e9be3623c 100644 --- a/src/coreclr/tools/superpmi/superpmi-shared/spmiutil.h +++ b/src/coreclr/tools/superpmi/superpmi-shared/spmiutil.h @@ -82,6 +82,11 @@ void PutArm64Rel12(UINT32* pCode, INT32 imm12); void PutThumb2Mov32(UINT16* p, UINT32 imm32); void PutThumb2BlRel24(UINT16* p, INT32 imm24); +bool GetArm64MovConstant(UINT32* p, unsigned* pReg, unsigned* pCon); +bool GetArm64MovkConstant(UINT32* p, unsigned* pReg, unsigned* pCon, unsigned* pShift); + +void PutArm64MovkConstant(UINT32* p, unsigned con); + template inline constexpr unsigned ArrLen(T (&)[size]) { diff --git a/src/coreclr/tools/superpmi/superpmi/neardiffer.cpp b/src/coreclr/tools/superpmi/superpmi/neardiffer.cpp index 2d480ee776dc5..8287970d7bebb 100644 --- a/src/coreclr/tools/superpmi/superpmi/neardiffer.cpp +++ b/src/coreclr/tools/superpmi/superpmi/neardiffer.cpp @@ -298,22 +298,24 @@ struct DiffData CompileResult* cr2; // Details of the first block - size_t blocksize1; - size_t datablock1; - size_t datablockSize1; - size_t originalBlock1; - size_t originalDataBlock1; - size_t otherCodeBlock1; - size_t otherCodeBlockSize1; + unsigned char* block1; + size_t blocksize1; + unsigned char* datablock1; + size_t datablockSize1; + size_t originalBlock1; + size_t originalDataBlock1; + size_t otherCodeBlock1; + size_t otherCodeBlockSize1; // Details of the second block - size_t blocksize2; - size_t datablock2; - size_t datablockSize2; - size_t originalBlock2; - size_t originalDataBlock2; - size_t otherCodeBlock2; - size_t otherCodeBlockSize2; + unsigned char* block2; + size_t blocksize2; + unsigned char* datablock2; + size_t datablockSize2; + size_t originalBlock2; + size_t originalDataBlock2; + size_t otherCodeBlock2; + size_t otherCodeBlockSize2; }; // @@ -330,6 +332,7 @@ bool NearDiffer::compareOffsets( return true; } + const SPMI_TARGET_ARCHITECTURE targetArch = GetSpmiTargetArchitecture(); const DiffData* data = (const DiffData*)payload; size_t ip1 = data->originalBlock1 + blockOffset; size_t ip2 = data->originalBlock2 + blockOffset; @@ -435,6 +438,99 @@ bool NearDiffer::compareOffsets( if ((mapped1 == mapped2) && (mapped1 != (size_t)-1)) return true; + // There are some cases on arm64 where we generate multiple instruction register construction of addresses + // but we don't have a relocation for them (so they aren't handled by `applyRelocs`). One case is + // allocPgoInstrumentationBySchema(), which returns an address into which the JIT writes PGO probe data. + // The instruction sequence is something like this: + // mov x0, #63408 + // movk x0, #23602, lsl #16 + // movk x0, #606, lsl #32 + // + // Here, we try to match this sequence and look it up in the address map. + // + // Some version of this logic might apply to ARM as well. + // + if (targetArch == SPMI_TARGET_ARCHITECTURE_ARM64) + { + bool movk2 = false, movk3 = false; + unsigned reg1_1, reg1_2, reg2_1, reg2_2, reg3_1, reg3_2, reg4_1, reg4_2; + unsigned con1_1, con1_2, con2_1, con2_2, con3_1, con3_2, con4_1, con4_2; + unsigned shift2_1, shift2_2, shift3_1, shift3_2, shift4_1, shift4_2; + UINT32* iaddr1 = (UINT32*)(data->block1 + blockOffset); + UINT32* iaddr2 = (UINT32*)(data->block2 + blockOffset); + UINT32* iaddr1end = (UINT32*)(data->block1 + data->blocksize1); + UINT32* iaddr2end = (UINT32*)(data->block2 + data->blocksize2); + + // We're assuming that a mov/movk isn't the last instruction in the instruction buffer. + if ((iaddr1 < iaddr1end) && + (iaddr2 < iaddr2end) && + GetArm64MovConstant(iaddr1, ®1_1, &con1_1) && + GetArm64MovConstant(iaddr2, ®1_2, &con1_2) && + (reg1_1 == reg1_2)) + { + if ((iaddr1 + 1 < iaddr1end) && + (iaddr2 + 1 < iaddr2end) && + GetArm64MovkConstant(iaddr1 + 1, ®2_1, &con2_1, &shift2_1) && + GetArm64MovkConstant(iaddr2 + 1, ®2_2, &con2_2, &shift2_2) && + (reg2_1 == reg2_2) && + (shift2_1 == shift2_2) && + (shift2_1 == 16)) + { + // We currently assume the address requires at least 2 'movk' instructions, thus, is >4GB. + // The 3rd 'movk' is optional. + if ((iaddr1 + 2 < iaddr1end) && + (iaddr2 + 2 < iaddr2end) && + GetArm64MovkConstant(iaddr1 + 2, ®3_1, &con3_1, &shift3_1) && + GetArm64MovkConstant(iaddr2 + 2, ®3_2, &con3_2, &shift3_2) && + (reg3_1 == reg3_2) && + (shift3_1 == shift3_2) && + (shift3_1 == 32)) + { + movk2 = true; + // Note: this only works if size_t is 64-bit. + size_t addr1 = (size_t)con1_1 + ((size_t)con2_1 << 16) + ((size_t)con3_1 << 32); + size_t addr2 = (size_t)con1_2 + ((size_t)con2_2 << 16) + ((size_t)con3_2 << 32); + if ((iaddr1 + 3 < iaddr1end) && + (iaddr2 + 3 < iaddr2end) && + GetArm64MovkConstant(iaddr1 + 3, ®4_1, &con4_1, &shift4_1) && + GetArm64MovkConstant(iaddr2 + 3, ®4_2, &con4_2, &shift4_2) && + (reg4_1 == reg4_2) && + (shift4_1 == shift4_2) && + (shift4_1 == 48)) + { + movk3 = true; + addr1 += (size_t)con4_1 << 48; + addr2 += (size_t)con4_2 << 48; + } + + // Check the constants! We don't need to check 'addr1 == addr2' because if that were + // true we wouldn't have gotten here. + + size_t mapped1 = (size_t)data->cr1->searchAddressMap((void*)addr1); + size_t mapped2 = (size_t)data->cr2->searchAddressMap((void*)addr2); + if ((mapped1 == mapped2) && (mapped1 != (size_t)-1)) + { + // Now, zero out the constants in the `movk` instructions so when the disassembler + // gets to them, they compare equal. + PutArm64MovkConstant(iaddr1 + 1, 0); + PutArm64MovkConstant(iaddr2 + 1, 0); + if (movk2) + { + PutArm64MovkConstant(iaddr1 + 2, 0); + PutArm64MovkConstant(iaddr2 + 2, 0); + } + if (movk3) + { + PutArm64MovkConstant(iaddr1 + 3, 0); + PutArm64MovkConstant(iaddr2 + 3, 0); + } + return true; + } + } + } + } + } + return false; } @@ -513,11 +609,11 @@ bool NearDiffer::compareCodeSection(MethodContext* mc, cr2, // Details of the first block - (size_t)blocksize1, (size_t)datablock1, (size_t)datablockSize1, (size_t)originalBlock1, + block1, (size_t)blocksize1, datablock1, (size_t)datablockSize1, (size_t)originalBlock1, (size_t)originalDataBlock1, (size_t)otherCodeBlock1, (size_t)otherCodeBlockSize1, // Details of the second block - (size_t)blocksize2, (size_t)datablock2, (size_t)datablockSize2, (size_t)originalBlock2, + block2, (size_t)blocksize2, datablock2, (size_t)datablockSize2, (size_t)originalBlock2, (size_t)originalDataBlock2, (size_t)otherCodeBlock2, (size_t)otherCodeBlockSize2}; #ifdef USE_COREDISTOOLS