Skip to content

Commit

Permalink
Support Arm64 "constructed" constants in SuperPMI asm diffs (#76616)
Browse files Browse the repository at this point in the history
* Support Arm64 "constructed" constants in SuperPMI asm diffs

SuperPMI asm diffs tries to ignore constants that can change between
multiple replays, such as addresses that the replay engine must generate
and not simply hand back from the collected data.

Often, addresses have associated relocations generated during replay.
SuperPMI can use these relocations to adjust the constants to allow
two replays to match. However, there are cases on Arm64 where an address
both doesn't report a relocation and is "constructed" using multiple
`mov`/`movk` instructions.

One case is the `allocPgoInstrumentationBySchema()`
API which returns a pointer to a PGO data buffer. An address within this
buffer is constructed via a sequence such as:
```
mov     x0, #63408
movk    x0, #23602, lsl dotnet#16
movk    x0, dotnet#606, lsl dotnet#32
```

When SuperPMI replays this API, it constructs a new buffer and returns that
pointer, which is used to construct various actual addresses that are
generated as "constructed" constants, shown above.

This change "de-constructs" the constants and looks them up in the replay
address map. If base and diff match the mapped constants, there is no asm diff.

* Fix 32-bit build

I don't think we fully support 64-bit replay on 32-bit host, but this
fix at least makes it possible for this case.

* Support more general mov/movk sequence

Allow JIT1 and JIT2 to have a different sequence of
mov/movk[/movk[/movk]] that map to the same address in the
address map. That is, the replay constant might require a different
set of instructions (e.g., if a `movk` is missing because its constant
is zero).
  • Loading branch information
BruceForstall authored Oct 5, 2022
1 parent dd96637 commit b303ffe
Show file tree
Hide file tree
Showing 3 changed files with 201 additions and 16 deletions.
43 changes: 43 additions & 0 deletions src/coreclr/tools/superpmi/superpmi-shared/spmiutil.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -335,3 +335,46 @@ void PutThumb2BlRel24(UINT16* p, INT32 imm24)
p[0] = Opcode0;
p[1] = Opcode1;
}

// GetArm64MovConstant / GetArm64MovkConstant: Decode arm64 mov / movk instructions, e.g.:
// d29ff600 mov x0, #65456
// f2ab8640 movk x0, #23602, lsl #16
// f2c04bc0 movk x0, #606, lsl #32
//
// This is used in the NearDiffer to determine if a sequence of mov/movk is actually an address.
//
// Return `true` if the instruction pointed to by `p` is a mov/movk, `false` otherwise.
// If true, fill out the target register in `*pReg`, constant in `*pCon`, and (for movk) shift value in `*pShift`.

bool GetArm64MovConstant(UINT32* p, unsigned* pReg, unsigned* pCon)
{
UINT32 instr = *p;
if ((instr & 0xffe00000) == 0xd2800000)
{
*pReg = instr & 0x1f;
*pCon = (instr >> 5) & 0xffff;
return true;
}

return false;
}

bool GetArm64MovkConstant(UINT32* p, unsigned* pReg, unsigned* pCon, unsigned* pShift)
{
UINT32 instr = *p;
if ((instr & 0xff800000) == 0xf2800000)
{
*pReg = instr & 0x1f;
*pCon = (instr >> 5) & 0xffff;
*pShift = ((instr >> 21) & 0x3) * 16;
return true;
}

return false;
}

// PutArm64MovkConstant: set the constant field in an Arm64 `movk` instruction
void PutArm64MovkConstant(UINT32* p, unsigned con)
{
*p = (*p & ~(0xffff << 5)) | ((con & 0xffff) << 5);
}
5 changes: 5 additions & 0 deletions src/coreclr/tools/superpmi/superpmi-shared/spmiutil.h
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,11 @@ void PutArm64Rel12(UINT32* pCode, INT32 imm12);
void PutThumb2Mov32(UINT16* p, UINT32 imm32);
void PutThumb2BlRel24(UINT16* p, INT32 imm24);

bool GetArm64MovConstant(UINT32* p, unsigned* pReg, unsigned* pCon);
bool GetArm64MovkConstant(UINT32* p, unsigned* pReg, unsigned* pCon, unsigned* pShift);

void PutArm64MovkConstant(UINT32* p, unsigned con);

template <typename T, int size>
inline constexpr unsigned ArrLen(T (&)[size])
{
Expand Down
169 changes: 153 additions & 16 deletions src/coreclr/tools/superpmi/superpmi/neardiffer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -298,22 +298,24 @@ struct DiffData
CompileResult* cr2;

// Details of the first block
size_t blocksize1;
size_t datablock1;
size_t datablockSize1;
size_t originalBlock1;
size_t originalDataBlock1;
size_t otherCodeBlock1;
size_t otherCodeBlockSize1;
unsigned char* block1;
size_t blocksize1;
unsigned char* datablock1;
size_t datablockSize1;
size_t originalBlock1;
size_t originalDataBlock1;
size_t otherCodeBlock1;
size_t otherCodeBlockSize1;

// Details of the second block
size_t blocksize2;
size_t datablock2;
size_t datablockSize2;
size_t originalBlock2;
size_t originalDataBlock2;
size_t otherCodeBlock2;
size_t otherCodeBlockSize2;
unsigned char* block2;
size_t blocksize2;
unsigned char* datablock2;
size_t datablockSize2;
size_t originalBlock2;
size_t originalDataBlock2;
size_t otherCodeBlock2;
size_t otherCodeBlockSize2;
};

//
Expand All @@ -330,6 +332,7 @@ bool NearDiffer::compareOffsets(
return true;
}

const SPMI_TARGET_ARCHITECTURE targetArch = GetSpmiTargetArchitecture();
const DiffData* data = (const DiffData*)payload;
size_t ip1 = data->originalBlock1 + blockOffset;
size_t ip2 = data->originalBlock2 + blockOffset;
Expand Down Expand Up @@ -435,6 +438,140 @@ bool NearDiffer::compareOffsets(
if ((mapped1 == mapped2) && (mapped1 != (size_t)-1))
return true;

// There are some cases on arm64 where we generate multiple instruction register construction of addresses
// but we don't have a relocation for them (so they aren't handled by `applyRelocs`). One case is
// allocPgoInstrumentationBySchema(), which returns an address the JIT writes into the code stream
// (used to store dynamic PGO probe data).
//
// The instruction sequence is something like this:
// mov x0, #63408
// movk x0, #23602, lsl #16
// movk x0, #606, lsl #32
//
// Here, we try to match this sequence and look it up in the address map.
//
// Since the mov/movk sequence is specific to the replay address constant, we don't assume the baseline
// and diff have the same number of instructions (e.g., it's possible to skip a `movk` if it is zero).
//
// Some version of this logic might apply to ARM as well.
//
if (targetArch == SPMI_TARGET_ARCHITECTURE_ARM64)
{
bool movk2_1 = false, movk3_1 = false;
bool movk2_2 = false, movk3_2 = false;

unsigned reg1_1 = 0, reg2_1, reg3_1, reg4_1;
unsigned reg1_2 = 0, reg2_2, reg3_2, reg4_2;
unsigned con1_1, con2_1, con3_1, con4_1;
unsigned con1_2, con2_2, con3_2, con4_2;
unsigned shift2_1, shift3_1, shift4_1;
unsigned shift2_2, shift3_2, shift4_2;

UINT32* iaddr1 = (UINT32*)(data->block1 + blockOffset);
UINT32* iaddr2 = (UINT32*)(data->block2 + blockOffset);
UINT32* iaddr1end = (UINT32*)(data->block1 + data->blocksize1);
UINT32* iaddr2end = (UINT32*)(data->block2 + data->blocksize2);

DWORDLONG addr1 = 0;
DWORDLONG addr2 = 0;

// Look for a mov/movk address pattern in code stream 1.

if ((iaddr1 < iaddr1end) &&
GetArm64MovConstant(iaddr1, &reg1_1, &con1_1))
{
// We assume the address requires at least 1 'movk' instruction.
if ((iaddr1 + 1 < iaddr1end) &&
GetArm64MovkConstant(iaddr1 + 1, &reg2_1, &con2_1, &shift2_1) &&
(reg1_1 == reg2_1))
{
addr1 = (DWORDLONG)con1_1 + ((DWORDLONG)con2_1 << shift2_1);

if ((iaddr1 + 2 < iaddr1end) &&
GetArm64MovkConstant(iaddr1 + 2, &reg3_1, &con3_1, &shift3_1) &&
(reg1_1 == reg3_1))
{
movk2_1 = true;
addr1 += (DWORDLONG)con3_1 << shift3_1;

if ((iaddr1 + 3 < iaddr1end) &&
GetArm64MovkConstant(iaddr1 + 3, &reg4_1, &con4_1, &shift4_1) &&
(reg1_1 == reg4_1))
{
movk3_1 = true;
addr1 += (DWORDLONG)con4_1 << shift4_1;
}
}
}
}

// Look for a mov/movk address pattern in code stream 2.

if ((iaddr2 < iaddr2end) &&
GetArm64MovConstant(iaddr2, &reg1_2, &con1_2))
{
// We assume the address requires at least 1 'movk' instruction.
if ((iaddr2 + 1 < iaddr2end) &&
GetArm64MovkConstant(iaddr2 + 1, &reg2_2, &con2_2, &shift2_2) &&
(reg1_2 == reg2_2))
{
addr2 = (DWORDLONG)con1_2 + ((DWORDLONG)con2_2 << shift2_2);

if ((iaddr2 + 2 < iaddr2end) &&
GetArm64MovkConstant(iaddr2 + 2, &reg3_2, &con3_2, &shift3_2) &&
(reg1_2 == reg3_2))
{
movk2_2 = true;
addr2 += (DWORDLONG)con3_2 << shift3_2;

if ((iaddr2 + 3 < iaddr2end) &&
GetArm64MovkConstant(iaddr2 + 3, &reg4_2, &con4_2, &shift4_2) &&
(reg1_2 == reg4_2))
{
movk3_2 = true;
addr2 += (DWORDLONG)con4_2 << shift4_2;
}
}
}
}

// Check the constants. We don't need to check 'addr1 == addr2' because if that were
// true we wouldn't have gotten here.
//
// Note: when replaying on a 32-bit platform, we must have
// movk2_1 == movk2_2 == movk3_1 == movk3_2 == false

if ((addr1 != 0) && (addr2 != 0) && (reg1_1 == reg1_2))
{
DWORDLONG mapped1 = (DWORDLONG)data->cr1->searchAddressMap((void*)addr1);
DWORDLONG mapped2 = (DWORDLONG)data->cr2->searchAddressMap((void*)addr2);
if ((mapped1 == mapped2) && (mapped1 != (DWORDLONG)-1))
{
// Now, zero out the constants in the `movk` instructions so when the disassembler
// gets to them, they compare equal.
PutArm64MovkConstant(iaddr1 + 1, 0);
PutArm64MovkConstant(iaddr2 + 1, 0);
if (movk2_1)
{
PutArm64MovkConstant(iaddr1 + 2, 0);
}
if (movk2_2)
{
PutArm64MovkConstant(iaddr2 + 2, 0);
}
if (movk3_1)
{
PutArm64MovkConstant(iaddr1 + 3, 0);
}
if (movk3_2)
{
PutArm64MovkConstant(iaddr2 + 3, 0);
}
return true;
}
}
}

return false;
}

Expand Down Expand Up @@ -513,11 +650,11 @@ bool NearDiffer::compareCodeSection(MethodContext* mc,
cr2,

// Details of the first block
(size_t)blocksize1, (size_t)datablock1, (size_t)datablockSize1, (size_t)originalBlock1,
block1, (size_t)blocksize1, datablock1, (size_t)datablockSize1, (size_t)originalBlock1,
(size_t)originalDataBlock1, (size_t)otherCodeBlock1, (size_t)otherCodeBlockSize1,

// Details of the second block
(size_t)blocksize2, (size_t)datablock2, (size_t)datablockSize2, (size_t)originalBlock2,
block2, (size_t)blocksize2, datablock2, (size_t)datablockSize2, (size_t)originalBlock2,
(size_t)originalDataBlock2, (size_t)otherCodeBlock2, (size_t)otherCodeBlockSize2};

#ifdef USE_COREDISTOOLS
Expand Down

0 comments on commit b303ffe

Please sign in to comment.