-
Notifications
You must be signed in to change notification settings - Fork 12
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Support for allowing direct VEXTRACT to 20-bit registers #233
base: aie-public
Are you sure you want to change the base?
Changes from all commits
16445fb
02d7e59
6138a60
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -80,6 +80,11 @@ class AIE2PreLegalizerCombinerImpl : public Combiner { | |
|
||
bool tryToCombineVectorInserts(MachineInstr &MI, unsigned SclSrcBits) const; | ||
|
||
bool tryToCombineTruncExt(Register DstReg, bool SignVal, | ||
unsigned SrcEltSize) const; | ||
|
||
bool tryToCombineVExtractElt(MachineInstr &MI) const; | ||
|
||
bool tryToCombineIntrinsic(MachineInstr &MI) const; | ||
|
||
private: | ||
|
@@ -288,6 +293,89 @@ bool AIE2PreLegalizerCombinerImpl::tryToCombineVectorInserts( | |
return true; | ||
} | ||
|
||
/// \returns true if it is possible to combine the below sequence of MIRs | ||
/// From : %10:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem16.I512), | ||
/// %2(<32 x s16>), %0(s32), %1(s32) | ||
/// %20:_(s16) = G_TRUNC %10(s32) | ||
/// %30:_(s20) = G_SEXT %20(s16) | ||
/// To : %10:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem16.I512), | ||
/// %2(<32 x s16>), %0(s32), %1(s32) | ||
/// %30:_(s20) = G_TRUNC %10(s32) | ||
/// Or even: | ||
/// From : %10:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem8.I512), | ||
/// %2(<64 x s8>), %0(s32), %1(s32) | ||
/// %20:_(s8) = G_TRUNC %10(s32) | ||
/// %30:_(s20) = G_SEXT %20(s8) | ||
/// To : %10:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem8.I512), | ||
/// %2(<64 x s8>), %0(s32), %1(s32) | ||
/// %30:_(s20) = G_TRUNC %10(s32) | ||
/// This also enables S20Narrowing for vextract | ||
bool AIE2PreLegalizerCombinerImpl::tryToCombineTruncExt( | ||
Register DstReg, bool SignVal, unsigned SrcEltSize) const { | ||
// Checks if a given register has non-debug user with a specific opcode and | ||
// destination size, and return that user. | ||
auto GetUseWithOpCode = | ||
abhinay-anubola marked this conversation as resolved.
Show resolved
Hide resolved
|
||
[&](const Register Reg, const unsigned OpcodeToCheck, | ||
const unsigned DstSize) -> std::optional<MachineInstr *> { | ||
for (auto &Use : MRI.use_nodbg_instructions(Reg)) { | ||
if (Use.getOpcode() == OpcodeToCheck) { | ||
const LLT DstRegTy = MRI.getType(Use.getOperand(0).getReg()); | ||
if (DstRegTy.getSizeInBits() == DstSize) | ||
return &Use; | ||
} | ||
} | ||
return std::nullopt; | ||
}; | ||
|
||
if (auto Trunc = | ||
GetUseWithOpCode(DstReg, TargetOpcode::G_TRUNC, SrcEltSize)) { | ||
MachineInstr *TruncMI = Trunc.value(); | ||
const unsigned ExtOpcode = | ||
SignVal ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT; | ||
const Register UseDstReg = TruncMI->getOperand(0).getReg(); | ||
// Ensure G_TRUNC has a single non-debug user before safely eliminating it. | ||
if (!MRI.hasOneNonDBGUser(UseDstReg)) | ||
return false; | ||
if (auto Ext = GetUseWithOpCode(UseDstReg, ExtOpcode, 20)) { | ||
MachineInstr *ExtMI = Ext.value(); | ||
MachineIRBuilder MIRBuilder(*ExtMI); | ||
MIRBuilder.buildInstr(TargetOpcode::G_TRUNC, {ExtMI->getOperand(0)}, | ||
{DstReg}); | ||
ExtMI->eraseFromParent(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I suggest to not erase 2 instructions here, because we may corrupt the iterator. Let There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Combiner engine may access freed memory:
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Question is, should we refactor There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We cant erase just the TRUNC as it is input for EXT. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You will also erase MI (intrinsic) in the caller function. |
||
TruncMI->eraseFromParent(); | ||
return true; | ||
} | ||
} | ||
return false; | ||
} | ||
|
||
bool AIE2PreLegalizerCombinerImpl::tryToCombineVExtractElt( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We can have a description of the combiner here. |
||
MachineInstr &MI) const { | ||
abhinay-anubola marked this conversation as resolved.
Show resolved
Hide resolved
|
||
const Register DstReg = MI.getOperand(0).getReg(); | ||
// In this case of G_INTRINSIC operand 1 is target intrinsic | ||
const Register SrcReg = MI.getOperand(2).getReg(); | ||
const Register IdxReg = MI.getOperand(3).getReg(); | ||
const Register SignReg = MI.getOperand(4).getReg(); | ||
|
||
const auto SignVal = getIConstantVRegSExtVal(SignReg, MRI); | ||
if (!SignVal) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This looks like an assert. We always need constant signal to be able to handle this intrinsic. |
||
return false; | ||
const LLT SrcVecTy = MRI.getType(SrcReg); | ||
const unsigned SrcEltSize = SrcVecTy.getScalarSizeInBits(); | ||
if (SrcEltSize == 8 || SrcEltSize == 16) { | ||
tryToCombineTruncExt(DstReg, SignVal.value(), SrcEltSize); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we can trigger the same multiple erased instructions problem here. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I understood that this |
||
} | ||
|
||
auto *TII = static_cast<const AIE2InstrInfo *>(STI.getInstrInfo()); | ||
const unsigned Opcode = | ||
TII->getGenericExtractVectorEltOpcode(SignVal.value()); | ||
MachineIRBuilder MIRBuilder(MI); | ||
MIRBuilder.buildInstr(Opcode, {DstReg}, {SrcReg, IdxReg}); | ||
|
||
MI.eraseFromParent(); | ||
return true; | ||
} | ||
|
||
bool AIE2PreLegalizerCombinerImpl::tryToCombineIntrinsic( | ||
MachineInstr &MI) const { | ||
const unsigned IntrinsicID = cast<GIntrinsic>(MI).getIntrinsicID(); | ||
|
@@ -306,6 +394,11 @@ bool AIE2PreLegalizerCombinerImpl::tryToCombineIntrinsic( | |
case Intrinsic::aie2_vinsert32_I512: { | ||
return tryToCombineVectorInserts(MI, getVInsertScalarSize(IntrinsicID)); | ||
} | ||
case Intrinsic::aie2_vextract_elem8_I512: | ||
case Intrinsic::aie2_vextract_elem16_I512: | ||
case Intrinsic::aie2_vextract_elem32_I512: { | ||
return tryToCombineVExtractElt(MI); | ||
} | ||
default: | ||
break; | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Instead of doing this combine here, why don't we just use
G_ASSERT_ZEXT
/G_ASSERT_SEXT
?In your previous commit 16445fb,
we should pre-select
into
Then you should get the G_TRUNC + G_ZEXT combine for free.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We should have something like:
However, for SEXT case, there is no combine pattern AFAIK.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We can try to match this pattern explicitly looking to ASSERT, something like:
And apply
replaceSingleDefInstWithReg
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Instead of hardcoding the G_ASSERT_SEXT, I'd suggest to use the KnownBits analysis to check if we have known sign bits:
KB->computeNumSignBits(Reg)