Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

G_BUILD_VECTOR & G_UNMERGE_VALUES legalization for 32-bit vectors #38

Merged
merged 2 commits into from
Jun 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
108 changes: 105 additions & 3 deletions llvm/lib/Target/AIE/AIELegalizerInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -491,6 +491,100 @@ bool AIELegalizerInfo::legalizeCustom(LegalizerHelper &Helper,
llvm_unreachable("Un-expected custom legalization");
}

bool AIELegalizerInfo::pack32BitVector(LegalizerHelper &Helper,
MachineInstr &MI,
Register SourceReg) const {
MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
MachineRegisterInfo &MRI = *MIRBuilder.getMRI();

const LLT SourceRegTy = MRI.getType(SourceReg);
const Register DstReg = MI.getOperand(0).getReg();
assert(SourceRegTy.getSizeInBits() == 32 &&
"cannot pack vectors larger or smaller than 32-bit");

const LLT S32 = LLT::scalar(32);
unsigned Offset = 0;
Register DstCastReg = MRI.createGenericVirtualRegister(S32);

// Skip the destination operand since that is where we are writing to.
MachineOperand *Operand = MI.operands_begin() + 1,
*OperandEnd = MI.operands_end();
MIRBuilder.buildConstant(DstCastReg, 0);

const LLT RegTy = MRI.getType(DstReg);
while (Operand != OperandEnd) {
Register DestinationOperand = Operand->getReg();

if (RegTy.getScalarSizeInBits() != 32) {
const Register TmpReg32 = MRI.createGenericVirtualRegister(S32);
MIRBuilder.buildInstr(AIE2::G_ZEXT, {TmpReg32}, {DestinationOperand});
DestinationOperand = TmpReg32;
}

// Avoid a useless shift for the first element, since it doesn't get
// optimized out in O0.
const Register AccumulatorReg = MRI.createGenericVirtualRegister(S32);
if (Offset != 0) {
const MachineInstrBuilder ShiftConstant =
MIRBuilder.buildConstant(S32, Offset);
const MachineInstrBuilder Masked =
MIRBuilder.buildShl(S32, DestinationOperand, ShiftConstant);
MIRBuilder.buildOr(AccumulatorReg, DstCastReg, Masked);
} else {
MIRBuilder.buildOr(AccumulatorReg, DstCastReg, DestinationOperand);
}

DstCastReg = AccumulatorReg;
Offset += RegTy.getScalarSizeInBits();
++Operand;
}

MIRBuilder.buildBitcast(DstReg, DstCastReg);
MI.eraseFromParent();
return true;
}

bool AIELegalizerInfo::unpack32BitVector(LegalizerHelper &Helper,
MachineInstr &MI,
Register SourceReg) const {
MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
MachineRegisterInfo &MRI = *MIRBuilder.getMRI();

const LLT SourceRegTy = MRI.getType(SourceReg);
assert(SourceRegTy.getSizeInBits() == 32 &&
"cannot unpack vectors larger or smaller than 32-bit");

const LLT S32 = LLT::scalar(32);
unsigned Offset = 0;
Register DstCastReg = MRI.createGenericVirtualRegister(S32);

MachineOperand *Operand = MI.operands_begin(),
*OperandEnd = MI.operands_end() - 1;
const LLT RegTy = MRI.getType(Operand->getReg());
MIRBuilder.buildBitcast(DstCastReg, SourceReg);
while (Operand != OperandEnd) {
Register DestinationOperand = Operand->getReg();
// Avoid a useless shift for the first element, since it doesn't get
// optimized out in O0.
if (Offset != 0) {
const MachineInstrBuilder ShiftConstant =
MIRBuilder.buildConstant(S32, Offset);
const MachineInstrBuilder Masked =
MIRBuilder.buildLShr(S32, DstCastReg, ShiftConstant);
MIRBuilder.buildTrunc(DestinationOperand, Masked);

} else {
MIRBuilder.buildTrunc(DestinationOperand, DstCastReg);
}

Offset += RegTy.getScalarSizeInBits();
++Operand;
}

MI.eraseFromParent();
return true;
}

bool AIELegalizerInfo::legalizeG_BUILD_VECTOR(LegalizerHelper &Helper,
MachineInstr &MI) const {
MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
Expand All @@ -504,9 +598,14 @@ bool AIELegalizerInfo::legalizeG_BUILD_VECTOR(LegalizerHelper &Helper,
const unsigned EltSize = DstVecEltTy.getScalarSizeInBits();
assert((EltSize == 8 || EltSize == 16 || EltSize == 32) &&
"non-existent integer size");
assert(DstVecSize > 64 && DstVecSize <= 1024 &&
"non-native vectors are not supported");
assert(DstVecSize == 32 || (DstVecSize > 64 && DstVecSize <= 1024 &&
"non-native vectors are not supported"));
assert(DstVecSize < 1024 && "vadd takes a 512-bit argument");

// If our vector is 32-bit we can store it as packed integer vector
if (DstVecSize == 32)
return pack32BitVector(Helper, MI, DstReg);

// We are using an undef since we are building over multiple instructions
const TypeSize VecEltTySize = DstVecEltTy.getSizeInBits();
const LLT VecTy = LLT::fixed_vector(512 / VecEltTySize, DstVecEltTy);
Expand Down Expand Up @@ -554,7 +653,7 @@ bool AIELegalizerInfo::legalizeG_BUILD_VECTOR(LegalizerHelper &Helper,
bool AIELegalizerInfo::legalizeG_UNMERGE_VALUES(LegalizerHelper &Helper,
MachineInstr &MI) const {
MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
const MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
MachineRegisterInfo &MRI = *MIRBuilder.getMRI();

const Register FirstReg = MI.getOperand(0).getReg();
const Register LastReg = MI.getOperand(MI.getNumOperands() - 1).getReg();
Expand All @@ -565,6 +664,9 @@ bool AIELegalizerInfo::legalizeG_UNMERGE_VALUES(LegalizerHelper &Helper,
LastTy.getSizeInBits() &&
"This operation is only supported for vectors");

if (LastTy.getSizeInBits() == 32)
return unpack32BitVector(Helper, MI, LastReg);

const unsigned NumOperands = MI.getNumOperands() - 1;
for (unsigned Index = 0; Index < NumOperands; ++Index) {
const Register Current = MI.getOperand(Index).getReg();
Expand Down
7 changes: 7 additions & 0 deletions llvm/lib/Target/AIE/AIELegalizerInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#define LLVM_LIB_TARGET_AIE_AIEMACHINELEGALIZER_H

#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
#include "llvm/CodeGen/Register.h"

namespace llvm {

Expand Down Expand Up @@ -48,6 +49,12 @@ class AIELegalizerInfo : public LegalizerInfo {
bool legalizeG_FPEXT(LegalizerHelper &Helper, MachineInstr &MI) const;
bool legalizeG_FABS(LegalizerHelper &Helper, MachineInstr &MI) const;
bool legalizeG_FADDSUB(LegalizerHelper &Helper, MachineInstr &MI) const;

// Helper functions for legalization
bool pack32BitVector(LegalizerHelper &Helper, MachineInstr &MI,
Register SourceReg) const;
bool unpack32BitVector(LegalizerHelper &Helper, MachineInstr &MI,
Register SourceReg) const;
};
} // end namespace llvm
#endif
191 changes: 191 additions & 0 deletions llvm/test/CodeGen/AIE/aie2/GlobalISel/legalize-build-vector.mir
Original file line number Diff line number Diff line change
Expand Up @@ -470,3 +470,194 @@ body: |
%0:_(p0) = G_FRAME_INDEX %stack.0
G_STORE %1(<4 x s32>), %0(p0) :: (store (<4 x s32>))
PseudoRET implicit $lr
...

---
name: test_build_vector_32_16
body: |
bb.1.entry:
; CHECK-LABEL: name: test_build_vector_32_16
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C2]], [[C]]
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C3]](s32)
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL]]
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[BITCAST]](<2 x s16>)
%0:_(s16) = G_CONSTANT i16 1
%1:_(s16) = G_CONSTANT i16 2
%2:_(<2 x s16>) = G_BUILD_VECTOR %0(s16), %1(s16)
PseudoRET implicit $lr, implicit %2
...

---
name: test_build_vector_32_8
body: |
bb.1.entry:
; CHECK-LABEL: name: test_build_vector_32_8
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C4]], [[C]]
; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C5]](s32)
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL]]
; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C6]](s32)
; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL1]]
; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[C7]](s32)
; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[OR2]], [[SHL2]]
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s8>) = G_BITCAST [[OR3]](s32)
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[BITCAST]](<4 x s8>)
%0:_(s8) = G_CONSTANT i8 1
%1:_(s8) = G_CONSTANT i8 2
%2:_(s8) = G_CONSTANT i8 3
%3:_(s8) = G_CONSTANT i8 4
%4:_(<4 x s8>) = G_BUILD_VECTOR %0(s8), %1(s8), %2(s8), %3(s8)
PseudoRET implicit $lr, implicit %4
...

---
name: test_build_vector_32_8_negative
body: |
bb.1.entry:
; CHECK-LABEL: name: test_build_vector_32_8_negative
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[COPY]]
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 254
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C3]](s32)
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL]]
; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 253
; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C5]](s32)
; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL1]]
; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 252
; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[C7]](s32)
; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[OR2]], [[SHL2]]
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s8>) = G_BITCAST [[OR3]](s32)
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[BITCAST]](<4 x s8>)
%0:_(s8) = G_CONSTANT i8 -1
%1:_(s8) = G_CONSTANT i8 -2
%2:_(s8) = G_CONSTANT i8 -3
%3:_(s8) = G_CONSTANT i8 -4
%4:_(<4 x s8>) = G_BUILD_VECTOR %0(s8), %1(s8), %2(s8), %3(s8)
PseudoRET implicit $lr, implicit %4
...

---
name: test_build_vector_32_8_mixed
body: |
bb.1.entry:
; CHECK-LABEL: name: test_build_vector_32_8_mixed
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 19
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 33
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C2]], [[C]]
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 232
; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[C4]](s32)
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL]]
; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C5]](s32)
; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL1]]
; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 164
; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[C7]](s32)
; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[OR2]], [[SHL2]]
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s8>) = G_BITCAST [[OR3]](s32)
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[BITCAST]](<4 x s8>)
%0:_(s8) = G_CONSTANT i8 19
%1:_(s8) = G_CONSTANT i8 -24
%2:_(s8) = G_CONSTANT i8 33
%3:_(s8) = G_CONSTANT i8 -92
%4:_(<4 x s8>) = G_BUILD_VECTOR %0(s8), %1(s8), %2(s8), %3(s8)
PseudoRET implicit $lr, implicit %4
...

---
name: test_build_vector_32_8_from_registers
body: |
bb.1.entry:
liveins: $r0, $r1, $r2, $r3
; CHECK-LABEL: name: test_build_vector_32_8_from_registers
; CHECK: liveins: $r0, $r1, $r2, $r3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $r0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $r1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $r2
; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $r3
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]]
; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[AND]]
; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]]
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL]]
; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]]
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C3]](s32)
; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL1]]
; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]]
; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[OR2]], [[SHL2]]
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s8>) = G_BITCAST [[OR3]](s32)
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[BITCAST]](<4 x s8>)
%0:_(s32) = COPY $r0
%1:_(s32) = COPY $r1
%2:_(s32) = COPY $r2
%3:_(s32) = COPY $r3
%4:_(s8) = G_TRUNC %0(s32)
%5:_(s8) = G_TRUNC %1(s32)
%6:_(s8) = G_TRUNC %2(s32)
%7:_(s8) = G_TRUNC %3(s32)
%8:_(<4 x s8>) = G_BUILD_VECTOR %4(s8), %5(s8), %6(s8), %7(s8)
PseudoRET implicit $lr, implicit %8
...

---
name: test_build_vector_32_8_register_constant
body: |
bb.1.entry:
liveins: $r0, $r1
; CHECK-LABEL: name: test_build_vector_32_8_register_constant
; CHECK: liveins: $r0, $r1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $r0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $r1
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 19
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 33
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C2]], [[C]]
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C3]]
; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C4]](s32)
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL]]
; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C5]](s32)
; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL1]]
; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]]
; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32)
; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[OR2]], [[SHL2]]
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s8>) = G_BITCAST [[OR3]](s32)
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[BITCAST]](<4 x s8>)
%0:_(s32) = COPY $r0
%1:_(s32) = COPY $r1
%2:_(s8) = G_CONSTANT i8 19
%3:_(s8) = G_TRUNC %0(s32)
%4:_(s8) = G_CONSTANT i8 33
%5:_(s8) = G_TRUNC %1(s32)
%6:_(<4 x s8>) = G_BUILD_VECTOR %2(s8), %3(s8), %4(s8), %5(s8)
PseudoRET implicit $lr, implicit %6
Loading
Loading