Skip to content

Commit

Permalink
Fix stress apple arm64 assertion '(thisFieldOffset + EA_SIZE_IN_BYTES…
Browse files Browse the repository at this point in the history
…(attr)) <= areaSize' (#48936)

* Fix a stress Arm64 apple issue.

* fix issue.
  • Loading branch information
Sergey Andreenko authored Mar 5, 2021
1 parent 7e658d7 commit 04ff9bd
Show file tree
Hide file tree
Showing 5 changed files with 63 additions and 20 deletions.
9 changes: 1 addition & 8 deletions src/coreclr/jit/codegenarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4821,14 +4821,7 @@ void CodeGen::genStoreLclTypeSIMD12(GenTree* treeNode)
// Need an additional integer register to extract upper 4 bytes from data.
regNumber tmpReg = lclVar->GetSingleTempReg();

// store lower 8 bytes
GetEmitter()->emitIns_S_R(INS_str, EA_8BYTE, operandReg, varNum, offs);

// Extract upper 4-bytes from data
GetEmitter()->emitIns_R_R_I(INS_mov, EA_4BYTE, tmpReg, operandReg, 2);

// 4-byte write
GetEmitter()->emitIns_S_R(INS_str, EA_4BYTE, tmpReg, varNum, offs + 8);
GetEmitter()->emitStoreSIMD12ToLclOffset(varNum, offs, operandReg, tmpReg);
}

#endif // FEATURE_SIMD
Expand Down
31 changes: 22 additions & 9 deletions src/coreclr/jit/codegenlinear.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1841,20 +1841,33 @@ void CodeGen::genPutArgStkFieldList(GenTreePutArgStk* putArgStk, unsigned outArg

// Evaluate each of the GT_FIELD_LIST items into their register
// and store their register into the outgoing argument area.
unsigned argOffset = putArgStk->getArgOffset();
const unsigned argOffset = putArgStk->getArgOffset();
for (GenTreeFieldList::Use& use : putArgStk->gtOp1->AsFieldList()->Uses())
{
GenTree* nextArgNode = use.GetNode();
genConsumeReg(nextArgNode);

regNumber reg = nextArgNode->GetRegNum();
var_types type = nextArgNode->TypeGet();
emitAttr attr = emitTypeSize(type);
regNumber reg = nextArgNode->GetRegNum();
var_types type = use.GetType();
unsigned thisFieldOffset = argOffset + use.GetOffset();

// Emit store instructions to store the registers produced by the GT_FIELD_LIST into the outgoing
// argument area.
unsigned thisFieldOffset = argOffset + use.GetOffset();
GetEmitter()->emitIns_S_R(ins_Store(type), attr, reg, outArgVarNum, thisFieldOffset);
// Emit store instructions to store the registers produced by the GT_FIELD_LIST into the outgoing
// argument area.

#if defined(FEATURE_SIMD) && defined(OSX_ARM64_ABI)
// storing of TYP_SIMD12 (i.e. Vector3) argument.
if (type == TYP_SIMD12)
{
// Need an additional integer register to extract upper 4 bytes from data.
regNumber tmpReg = nextArgNode->GetSingleTempReg();
GetEmitter()->emitStoreSIMD12ToLclOffset(outArgVarNum, thisFieldOffset, reg, tmpReg);
}
else
#endif // FEATURE_SIMD && OSX_ARM64_ABI
{
emitAttr attr = emitTypeSize(type);
GetEmitter()->emitIns_S_R(ins_Store(type), attr, reg, outArgVarNum, thisFieldOffset);
}

// We can't write beyond the arg area unless this is a tail call, in which case we use
// the first stack arg as the base of the incoming arg area.
Expand All @@ -1867,7 +1880,7 @@ void CodeGen::genPutArgStkFieldList(GenTreePutArgStk* putArgStk, unsigned outArg
}
#endif

assert((thisFieldOffset + EA_SIZE_IN_BYTES(attr)) <= areaSize);
assert((thisFieldOffset + genTypeSize(type)) <= areaSize);
#endif
}
}
Expand Down
26 changes: 26 additions & 0 deletions src/coreclr/jit/emitarm64.h
Original file line number Diff line number Diff line change
Expand Up @@ -935,4 +935,30 @@ inline bool emitIsLoadConstant(instrDesc* jmp)
(jmp->idInsFmt() == IF_LARGELDC));
}

#if defined(FEATURE_SIMD)
//-----------------------------------------------------------------------------------
// emitStoreSIMD12ToLclOffset: store SIMD12 value from opReg to varNum+offset.
//
// Arguments:
// varNum - the variable on the stack to use as a base;
// offset - the offset from the varNum;
// opReg - the src reg with SIMD12 value;
// tmpReg - a tmp reg to use for the write, can be general or float.
//
void emitStoreSIMD12ToLclOffset(unsigned varNum, unsigned offset, regNumber opReg, regNumber tmpReg)
{
assert(varNum != BAD_VAR_NUM);
assert(isVectorRegister(opReg));

// store lower 8 bytes
emitIns_S_R(INS_str, EA_8BYTE, opReg, varNum, offset);

// Extract upper 4-bytes from data
emitIns_R_R_I(INS_mov, EA_4BYTE, tmpReg, opReg, 2);

// 4-byte write
emitIns_S_R(INS_str, EA_4BYTE, tmpReg, varNum, offset + 8);
}
#endif // FEATURE_SIMD

#endif // TARGET_ARM64
14 changes: 12 additions & 2 deletions src/coreclr/jit/lsraarmarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -398,18 +398,28 @@ int LinearScan::BuildPutArgStk(GenTreePutArgStk* argNode)
int srcCount = 0;

// Do we have a TYP_STRUCT argument (or a GT_FIELD_LIST), if so it must be a multireg pass-by-value struct
if ((putArgChild->TypeGet() == TYP_STRUCT) || (putArgChild->OperGet() == GT_FIELD_LIST))
if (putArgChild->TypeIs(TYP_STRUCT) || putArgChild->OperIs(GT_FIELD_LIST))
{
// We will use store instructions that each write a register sized value

if (putArgChild->OperGet() == GT_FIELD_LIST)
if (putArgChild->OperIs(GT_FIELD_LIST))
{
assert(putArgChild->isContained());
// We consume all of the items in the GT_FIELD_LIST
for (GenTreeFieldList::Use& use : putArgChild->AsFieldList()->Uses())
{
BuildUse(use.GetNode());
srcCount++;

#if defined(FEATURE_SIMD) && defined(OSX_ARM64_ABI)
if (use.GetType() == TYP_SIMD12)
{
// Vector3 is read/written as two reads/writes: 8 byte and 4 byte.
// To assemble the vector properly we would need an additional int register.
// The other platforms can write it as 16-byte using 1 write.
buildInternalIntRegisterDefForNode(use.GetNode());
}
#endif // FEATURE_SIMD && OSX_ARM64_ABI
}
}
else
Expand Down
3 changes: 2 additions & 1 deletion src/coreclr/jit/lsrabuild.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3342,7 +3342,8 @@ int LinearScan::BuildStoreLoc(GenTreeLclVarCommon* storeLoc)
RefPosition* internalFloatDef = nullptr;
if (varTypeIsSIMD(storeLoc) && !op1->IsCnsIntOrI() && (storeLoc->TypeGet() == TYP_SIMD12))
{
// Need an additional register to extract upper 4 bytes of Vector3.
// Need an additional register to extract upper 4 bytes of Vector3,
// it has to be float for x86.
internalFloatDef = buildInternalFloatRegisterDefForNode(storeLoc, allSIMDRegs());
}
#endif // FEATURE_SIMD
Expand Down

0 comments on commit 04ff9bd

Please sign in to comment.