Skip to content

Commit

Permalink
[AIE2] Legalize G_BUILD_VECTOR for 32-bit vectors
Browse files Browse the repository at this point in the history
  • Loading branch information
ValentijnvdBeek committed May 31, 2024
1 parent b37eccb commit fbdec8c
Show file tree
Hide file tree
Showing 3 changed files with 256 additions and 2 deletions.
62 changes: 60 additions & 2 deletions llvm/lib/Target/AIE/AIELegalizerInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -491,6 +491,59 @@ bool AIELegalizerInfo::legalizeCustom(LegalizerHelper &Helper,
llvm_unreachable("Un-expected custom legalization");
}

bool AIELegalizerInfo::pack32BitVector(LegalizerHelper &Helper,
MachineInstr &MI,
Register SourceReg) const {
MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
MachineRegisterInfo &MRI = *MIRBuilder.getMRI();

const LLT SourceRegTy = MRI.getType(SourceReg);
const Register DstReg = MI.getOperand(0).getReg();
assert(SourceRegTy.getSizeInBits() == 32 &&
"cannot pack or unpack vectors larger or smaller than 32-bit");

const LLT S32 = LLT::scalar(32);
unsigned Offset = 0;
Register DstCastReg = MRI.createGenericVirtualRegister(S32);

// Skip the destination operand since that is where we are writing to.
MachineOperand *Operand = MI.operands_begin() + 1,
*OperandEnd = MI.operands_end();
MIRBuilder.buildConstant(DstCastReg, 0);

const LLT RegTy = MRI.getType(DstReg);
while (Operand != OperandEnd) {
Register DestinationOperand = Operand->getReg();

if (RegTy.getScalarSizeInBits() != 32) {
const Register TmpReg32 = MRI.createGenericVirtualRegister(S32);
MIRBuilder.buildInstr(AIE2::G_ZEXT, {TmpReg32}, {DestinationOperand});
DestinationOperand = TmpReg32;
}

// Avoid a useless shift for the first element, since it doesn't get
// optimized out in O0.
const Register AccumulatorReg = MRI.createGenericVirtualRegister(S32);
if (Offset != 0) {
const MachineInstrBuilder ShiftConstant =
MIRBuilder.buildConstant(S32, Offset);
const MachineInstrBuilder Masked =
MIRBuilder.buildShl(S32, DestinationOperand, ShiftConstant);
MIRBuilder.buildOr(AccumulatorReg, DstCastReg, Masked);
} else {
MIRBuilder.buildOr(AccumulatorReg, DstCastReg, DestinationOperand);
}

DstCastReg = AccumulatorReg;
Offset += RegTy.getScalarSizeInBits();
++Operand;
}

MIRBuilder.buildBitcast(DstReg, DstCastReg);
MI.eraseFromParent();
return true;
}

bool AIELegalizerInfo::legalizeG_BUILD_VECTOR(LegalizerHelper &Helper,
MachineInstr &MI) const {
MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
Expand All @@ -504,9 +557,14 @@ bool AIELegalizerInfo::legalizeG_BUILD_VECTOR(LegalizerHelper &Helper,
const unsigned EltSize = DstVecEltTy.getScalarSizeInBits();
assert((EltSize == 8 || EltSize == 16 || EltSize == 32) &&
"non-existent integer size");
assert(DstVecSize > 64 && DstVecSize <= 1024 &&
"non-native vectors are not supported");
assert(DstVecSize == 32 || (DstVecSize > 64 && DstVecSize <= 1024 &&
"non-native vectors are not supported"));
assert(DstVecSize < 1024 && "vadd takes a 512-bit argument");

// If our vector is 32-bit we can store it as packed integer vector
if (DstVecSize == 32)
return pack32BitVector(Helper, MI, DstReg);

// We are using an undef since we are building over multiple instructions
const TypeSize VecEltTySize = DstVecEltTy.getSizeInBits();
const LLT VecTy = LLT::fixed_vector(512 / VecEltTySize, DstVecEltTy);
Expand Down
5 changes: 5 additions & 0 deletions llvm/lib/Target/AIE/AIELegalizerInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#define LLVM_LIB_TARGET_AIE_AIEMACHINELEGALIZER_H

#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
#include "llvm/CodeGen/Register.h"

namespace llvm {

Expand Down Expand Up @@ -48,6 +49,10 @@ class AIELegalizerInfo : public LegalizerInfo {
bool legalizeG_FPEXT(LegalizerHelper &Helper, MachineInstr &MI) const;
bool legalizeG_FABS(LegalizerHelper &Helper, MachineInstr &MI) const;
bool legalizeG_FADDSUB(LegalizerHelper &Helper, MachineInstr &MI) const;

// Helper functions for legalization
bool pack32BitVector(LegalizerHelper &Helper, MachineInstr &MI,
Register SourceReg) const;
};
} // end namespace llvm
#endif
191 changes: 191 additions & 0 deletions llvm/test/CodeGen/AIE/aie2/GlobalISel/legalize-build-vector.mir
Original file line number Diff line number Diff line change
Expand Up @@ -470,3 +470,194 @@ body: |
%0:_(p0) = G_FRAME_INDEX %stack.0
G_STORE %1(<4 x s32>), %0(p0) :: (store (<4 x s32>))
PseudoRET implicit $lr
...

---
name: test_build_vector_32_16
body: |
bb.1.entry:
; CHECK-LABEL: name: test_build_vector_32_16
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C2]], [[C]]
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C3]](s32)
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL]]
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[BITCAST]](<2 x s16>)
%0:_(s16) = G_CONSTANT i16 1
%1:_(s16) = G_CONSTANT i16 2
%2:_(<2 x s16>) = G_BUILD_VECTOR %0(s16), %1(s16)
PseudoRET implicit $lr, implicit %2
...

---
name: test_build_vector_32_8
body: |
bb.1.entry:
; CHECK-LABEL: name: test_build_vector_32_8
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C4]], [[C]]
; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C5]](s32)
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL]]
; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C6]](s32)
; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL1]]
; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[C7]](s32)
; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[OR2]], [[SHL2]]
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s8>) = G_BITCAST [[OR3]](s32)
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[BITCAST]](<4 x s8>)
%0:_(s8) = G_CONSTANT i8 1
%1:_(s8) = G_CONSTANT i8 2
%2:_(s8) = G_CONSTANT i8 3
%3:_(s8) = G_CONSTANT i8 4
%4:_(<4 x s8>) = G_BUILD_VECTOR %0(s8), %1(s8), %2(s8), %3(s8)
PseudoRET implicit $lr, implicit %4
...

---
name: test_build_vector_32_8_negative
body: |
bb.1.entry:
; CHECK-LABEL: name: test_build_vector_32_8_negative
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[COPY]]
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 254
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C3]](s32)
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL]]
; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 253
; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C5]](s32)
; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL1]]
; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 252
; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[C7]](s32)
; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[OR2]], [[SHL2]]
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s8>) = G_BITCAST [[OR3]](s32)
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[BITCAST]](<4 x s8>)
%0:_(s8) = G_CONSTANT i8 -1
%1:_(s8) = G_CONSTANT i8 -2
%2:_(s8) = G_CONSTANT i8 -3
%3:_(s8) = G_CONSTANT i8 -4
%4:_(<4 x s8>) = G_BUILD_VECTOR %0(s8), %1(s8), %2(s8), %3(s8)
PseudoRET implicit $lr, implicit %4
...

---
name: test_build_vector_32_8_mixed
body: |
bb.1.entry:
; CHECK-LABEL: name: test_build_vector_32_8_mixed
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 19
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 33
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C2]], [[C]]
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 232
; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[C4]](s32)
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL]]
; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C5]](s32)
; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL1]]
; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 164
; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[C7]](s32)
; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[OR2]], [[SHL2]]
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s8>) = G_BITCAST [[OR3]](s32)
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[BITCAST]](<4 x s8>)
%0:_(s8) = G_CONSTANT i8 19
%1:_(s8) = G_CONSTANT i8 -24
%2:_(s8) = G_CONSTANT i8 33
%3:_(s8) = G_CONSTANT i8 -92
%4:_(<4 x s8>) = G_BUILD_VECTOR %0(s8), %1(s8), %2(s8), %3(s8)
PseudoRET implicit $lr, implicit %4
...

---
name: test_build_vector_32_8_from_registers
body: |
bb.1.entry:
liveins: $r0, $r1, $r2, $r3
; CHECK-LABEL: name: test_build_vector_32_8_from_registers
; CHECK: liveins: $r0, $r1, $r2, $r3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $r0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $r1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $r2
; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $r3
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]]
; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[AND]]
; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]]
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL]]
; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]]
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C3]](s32)
; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL1]]
; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]]
; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[OR2]], [[SHL2]]
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s8>) = G_BITCAST [[OR3]](s32)
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[BITCAST]](<4 x s8>)
%0:_(s32) = COPY $r0
%1:_(s32) = COPY $r1
%2:_(s32) = COPY $r2
%3:_(s32) = COPY $r3
%4:_(s8) = G_TRUNC %0(s32)
%5:_(s8) = G_TRUNC %1(s32)
%6:_(s8) = G_TRUNC %2(s32)
%7:_(s8) = G_TRUNC %3(s32)
%8:_(<4 x s8>) = G_BUILD_VECTOR %4(s8), %5(s8), %6(s8), %7(s8)
PseudoRET implicit $lr, implicit %8
...

---
name: test_build_vector_32_8_register_constant
body: |
bb.1.entry:
liveins: $r0, $r1
; CHECK-LABEL: name: test_build_vector_32_8_register_constant
; CHECK: liveins: $r0, $r1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $r0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $r1
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 19
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 33
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C2]], [[C]]
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C3]]
; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C4]](s32)
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL]]
; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C5]](s32)
; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL1]]
; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]]
; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32)
; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[OR2]], [[SHL2]]
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s8>) = G_BITCAST [[OR3]](s32)
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[BITCAST]](<4 x s8>)
%0:_(s32) = COPY $r0
%1:_(s32) = COPY $r1
%2:_(s8) = G_CONSTANT i8 19
%3:_(s8) = G_TRUNC %0(s32)
%4:_(s8) = G_CONSTANT i8 33
%5:_(s8) = G_TRUNC %1(s32)
%6:_(<4 x s8>) = G_BUILD_VECTOR %2(s8), %3(s8), %4(s8), %5(s8)
PseudoRET implicit $lr, implicit %6

0 comments on commit fbdec8c

Please sign in to comment.