Skip to content

Commit

Permalink
[AVR] Fix an issue of writing 16-bit ports
Browse files Browse the repository at this point in the history
For 16-bit ports, the normal devices reqiure writing high byte first
and then low byte. But the XMEGA devices require the reverse order.

Fixes llvm/llvm-project#58395

Reviewed By: aykevl, jacquesguan

Differential Revision: https://reviews.llvm.org/D141752
  • Loading branch information
benshi001 authored and veselypeta committed Aug 21, 2024
2 parents 25481c7 + 2a52876 commit 347acd6
Show file tree
Hide file tree
Showing 21 changed files with 225 additions and 133 deletions.
11 changes: 9 additions & 2 deletions llvm/lib/Target/AVR/AVRDevices.td
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,12 @@ def FeatureTinyEncoding
"The device has Tiny core specific "
"instruction encodings">;

// When writing a 16-bit port or storing a 16-bit word, do the low byte first.
def FeatureLowByteFirst
: SubtargetFeature<"lowbytefirst", "m_hasLowByteFirst", "true",
"Do the low byte first when writing a 16-bit port or "
"storing a 16-bit word">;

// The device has CPU registers mapped in data address space
def FeatureMMR : SubtargetFeature<"memmappedregs", "m_hasMemMappedGPR", "true",
"The device has CPU registers "
Expand Down Expand Up @@ -195,14 +201,15 @@ def FamilyXMEGA3 : Family<"xmega3",
[FamilyAVR0, FeatureLPM, FeatureIJMPCALL,
FeatureADDSUBIW, FeatureSRAM, FeatureJMPCALL,
FeatureMultiplication, FeatureMOVW, FeatureLPMX,
FeatureBREAK]>;
FeatureBREAK, FeatureLowByteFirst]>;

def FamilyXMEGA : Family<"xmega",
[FamilyAVR0, FeatureLPM, FeatureIJMPCALL,
FeatureADDSUBIW, FeatureSRAM, FeatureJMPCALL,
FeatureMultiplication, FeatureMOVW, FeatureLPMX,
FeatureSPM, FeatureBREAK, FeatureEIJMPCALL,
FeatureSPMX, FeatureDES, FeatureELPM, FeatureELPMX]>;
FeatureSPMX, FeatureDES, FeatureELPM, FeatureELPMX,
FeatureLowByteFirst]>;

def FamilyXMEGAU : Family<"xmegau", [FamilyXMEGA, FeatureRMW]>;

Expand Down
140 changes: 91 additions & 49 deletions llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1057,45 +1057,66 @@ bool AVRExpandPseudo::expand<AVR::AtomicFence>(Block &MBB, BlockIt MBBI) {

template <>
bool AVRExpandPseudo::expand<AVR::STSWKRr>(Block &MBB, BlockIt MBBI) {
const AVRSubtarget &STI = MBB.getParent()->getSubtarget<AVRSubtarget>();
MachineInstr &MI = *MBBI;
Register SrcLoReg, SrcHiReg;
Register SrcReg = MI.getOperand(1).getReg();
bool SrcIsKill = MI.getOperand(1).isKill();
unsigned OpLo = AVR::STSKRr;
unsigned OpHi = AVR::STSKRr;
TRI->splitReg(SrcReg, SrcLoReg, SrcHiReg);

// Write the high byte first in case this address belongs to a special
// I/O address with a special temporary register.
auto MIBHI = buildMI(MBB, MBBI, OpHi);
auto MIBLO = buildMI(MBB, MBBI, OpLo);
auto MIB0 = buildMI(MBB, MBBI, AVR::STSKRr);
auto MIB1 = buildMI(MBB, MBBI, AVR::STSKRr);

switch (MI.getOperand(0).getType()) {
case MachineOperand::MO_GlobalAddress: {
const GlobalValue *GV = MI.getOperand(0).getGlobal();
int64_t Offs = MI.getOperand(0).getOffset();
unsigned TF = MI.getOperand(0).getTargetFlags();

MIBLO.addGlobalAddress(GV, Offs, TF);
MIBHI.addGlobalAddress(GV, Offs + 1, TF);
if (STI.hasLowByteFirst()) {
// Write the low byte first for XMEGA devices.
MIB0.addGlobalAddress(GV, Offs, TF);
MIB1.addGlobalAddress(GV, Offs + 1, TF);
} else {
// Write the high byte first for traditional devices.
MIB0.addGlobalAddress(GV, Offs + 1, TF);
MIB1.addGlobalAddress(GV, Offs, TF);
}

break;
}
case MachineOperand::MO_Immediate: {
unsigned Imm = MI.getOperand(0).getImm();

MIBLO.addImm(Imm);
MIBHI.addImm(Imm + 1);
if (STI.hasLowByteFirst()) {
// Write the low byte first for XMEGA devices.
MIB0.addImm(Imm);
MIB1.addImm(Imm + 1);
} else {
// Write the high byte first for traditional devices.
MIB0.addImm(Imm + 1);
MIB1.addImm(Imm);
}

break;
}
default:
llvm_unreachable("Unknown operand type!");
}

MIBLO.addReg(SrcLoReg, getKillRegState(SrcIsKill));
MIBHI.addReg(SrcHiReg, getKillRegState(SrcIsKill));

MIBLO.setMemRefs(MI.memoperands());
MIBHI.setMemRefs(MI.memoperands());
if (STI.hasLowByteFirst()) {
// Write the low byte first for XMEGA devices.
MIB0.addReg(SrcLoReg, getKillRegState(SrcIsKill))
.setMemRefs(MI.memoperands());
MIB1.addReg(SrcHiReg, getKillRegState(SrcIsKill))
.setMemRefs(MI.memoperands());
} else {
// Write the high byte first for traditional devices.
MIB0.addReg(SrcHiReg, getKillRegState(SrcIsKill))
.setMemRefs(MI.memoperands());
MIB1.addReg(SrcLoReg, getKillRegState(SrcIsKill))
.setMemRefs(MI.memoperands());
}

MI.eraseFromParent();
return true;
Expand Down Expand Up @@ -1126,16 +1147,27 @@ bool AVRExpandPseudo::expand<AVR::STWPtrRr>(Block &MBB, BlockIt MBBI) {
} else {
Register SrcLoReg, SrcHiReg;
TRI->splitReg(SrcReg, SrcLoReg, SrcHiReg);
buildMI(MBB, MBBI, AVR::STPtrRr)
.addReg(DstReg, getUndefRegState(DstIsUndef))
.addReg(SrcLoReg, getKillRegState(SrcIsKill))
.setMemRefs(MI.memoperands());

buildMI(MBB, MBBI, AVR::STDPtrQRr)
.addReg(DstReg, getUndefRegState(DstIsUndef))
.addImm(1)
.addReg(SrcHiReg, getKillRegState(SrcIsKill))
.setMemRefs(MI.memoperands());
if (STI.hasLowByteFirst()) {
buildMI(MBB, MBBI, AVR::STPtrRr)
.addReg(DstReg, getUndefRegState(DstIsUndef))
.addReg(SrcLoReg, getKillRegState(SrcIsKill))
.setMemRefs(MI.memoperands());
buildMI(MBB, MBBI, AVR::STDPtrQRr)
.addReg(DstReg, getUndefRegState(DstIsUndef))
.addImm(1)
.addReg(SrcHiReg, getKillRegState(SrcIsKill))
.setMemRefs(MI.memoperands());
} else {
buildMI(MBB, MBBI, AVR::STDPtrQRr)
.addReg(DstReg, getUndefRegState(DstIsUndef))
.addImm(1)
.addReg(SrcHiReg, getKillRegState(SrcIsKill))
.setMemRefs(MI.memoperands());
buildMI(MBB, MBBI, AVR::STPtrRr)
.addReg(DstReg, getUndefRegState(DstIsUndef))
.addReg(SrcLoReg, getKillRegState(SrcIsKill))
.setMemRefs(MI.memoperands());
}
}

MI.eraseFromParent();
Expand Down Expand Up @@ -1252,23 +1284,32 @@ bool AVRExpandPseudo::expand<AVR::STDWPtrQRr>(Block &MBB, BlockIt MBBI) {
.addImm(Imm + 2);
}
} else {
unsigned OpLo = AVR::STDPtrQRr;
unsigned OpHi = AVR::STDPtrQRr;
Register SrcLoReg, SrcHiReg;
TRI->splitReg(SrcReg, SrcLoReg, SrcHiReg);

auto MIBLO = buildMI(MBB, MBBI, OpLo)
.addReg(DstReg)
.addImm(Imm)
.addReg(SrcLoReg, getKillRegState(SrcIsKill));

auto MIBHI = buildMI(MBB, MBBI, OpHi)
.addReg(DstReg, getKillRegState(DstIsKill))
.addImm(Imm + 1)
.addReg(SrcHiReg, getKillRegState(SrcIsKill));

MIBLO.setMemRefs(MI.memoperands());
MIBHI.setMemRefs(MI.memoperands());
if (STI.hasLowByteFirst()) {
buildMI(MBB, MBBI, AVR::STDPtrQRr)
.addReg(DstReg)
.addImm(Imm)
.addReg(SrcLoReg, getKillRegState(SrcIsKill))
.setMemRefs(MI.memoperands());
buildMI(MBB, MBBI, AVR::STDPtrQRr)
.addReg(DstReg, getKillRegState(DstIsKill))
.addImm(Imm + 1)
.addReg(SrcHiReg, getKillRegState(SrcIsKill))
.setMemRefs(MI.memoperands());
} else {
buildMI(MBB, MBBI, AVR::STDPtrQRr)
.addReg(DstReg)
.addImm(Imm + 1)
.addReg(SrcHiReg, getKillRegState(SrcIsKill))
.setMemRefs(MI.memoperands());
buildMI(MBB, MBBI, AVR::STDPtrQRr)
.addReg(DstReg, getKillRegState(DstIsKill))
.addImm(Imm)
.addReg(SrcLoReg, getKillRegState(SrcIsKill))
.setMemRefs(MI.memoperands());
}
}

MI.eraseFromParent();
Expand Down Expand Up @@ -1347,27 +1388,28 @@ bool AVRExpandPseudo::expand<AVR::INWRdA>(Block &MBB, BlockIt MBBI) {

template <>
bool AVRExpandPseudo::expand<AVR::OUTWARr>(Block &MBB, BlockIt MBBI) {
const AVRSubtarget &STI = MBB.getParent()->getSubtarget<AVRSubtarget>();
MachineInstr &MI = *MBBI;
Register SrcLoReg, SrcHiReg;
unsigned Imm = MI.getOperand(0).getImm();
Register SrcReg = MI.getOperand(1).getReg();
bool SrcIsKill = MI.getOperand(1).isKill();
unsigned OpLo = AVR::OUTARr;
unsigned OpHi = AVR::OUTARr;
TRI->splitReg(SrcReg, SrcLoReg, SrcHiReg);

// Since we add 1 to the Imm value for the high byte below, and 63 is the
// highest Imm value allowed for the instruction, 62 is the limit here.
assert(Imm <= 62 && "Address is out of range");

// 16 bit I/O writes need the high byte first
auto MIBHI = buildMI(MBB, MBBI, OpHi)
.addImm(Imm + 1)
.addReg(SrcHiReg, getKillRegState(SrcIsKill));

auto MIBLO = buildMI(MBB, MBBI, OpLo)
.addImm(Imm)
.addReg(SrcLoReg, getKillRegState(SrcIsKill));
// 16 bit I/O writes need the high byte first on normal AVR devices,
// and in reverse order for the XMEGA/XMEGA3/XMEGAU families.
auto MIBHI = buildMI(MBB, MBBI, AVR::OUTARr)
.addImm(STI.hasLowByteFirst() ? Imm : Imm + 1)
.addReg(STI.hasLowByteFirst() ? SrcLoReg : SrcHiReg,
getKillRegState(SrcIsKill));
auto MIBLO = buildMI(MBB, MBBI, AVR::OUTARr)
.addImm(STI.hasLowByteFirst() ? Imm + 1 : Imm)
.addReg(STI.hasLowByteFirst() ? SrcHiReg : SrcLoReg,
getKillRegState(SrcIsKill));

MIBLO.setMemRefs(MI.memoperands());
MIBHI.setMemRefs(MI.memoperands());
Expand Down
10 changes: 8 additions & 2 deletions llvm/lib/Target/AVR/AVRISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1127,9 +1127,15 @@ bool AVRTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
return false;
} else if (const StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
VT = ST->getMemoryVT();
if (AVR::isProgramMemoryAccess(ST)) {
// We can not store to program memory.
if (AVR::isProgramMemoryAccess(ST))
return false;
// Since the high byte need to be stored first, we can not emit
// i16 post increment store like:
// st X+, r24
// st X+, r25
if (VT == MVT::i16 && !Subtarget.hasLowByteFirst())
return false;
}
} else {
return false;
}
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/AVR/AVRSubtarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ class AVRSubtarget : public AVRGenSubtargetInfo {
bool hasBREAK() const { return m_hasBREAK; }
bool hasTinyEncoding() const { return m_hasTinyEncoding; }
bool hasMemMappedGPR() const { return m_hasMemMappedGPR; }
bool hasLowByteFirst() const { return m_hasLowByteFirst; }

uint8_t getIORegisterOffset() const { return hasMemMappedGPR() ? 0x20 : 0x0; }

Expand Down Expand Up @@ -134,6 +135,7 @@ class AVRSubtarget : public AVRGenSubtargetInfo {
bool m_supportsMultiplication = false;
bool m_hasBREAK = false;
bool m_hasTinyEncoding = false;
bool m_hasLowByteFirst = false;
bool m_hasMemMappedGPR = false;

// Dummy member, used by FeatureSet's. We cannot have a SubtargetFeature with
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AVR/PR37143.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@

; CHECK: ld {{r[0-9]+}}, [[PTR:[XYZ]]]
; CHECK: ldd {{r[0-9]+}}, [[PTR]]+1
; CHECK: st [[PTR2:[XYZ]]], {{r[0-9]+}}
; CHECK: std [[PTR2]]+1, {{r[0-9]+}}
; CHECK: std [[PTR2:[XYZ]]]+1, {{r[0-9]+}}
; CHECK: st [[PTR2]], {{r[0-9]+}}
define void @load_store_16(i16* nocapture %ptr) local_unnamed_addr #1 {
entry:
%0 = load i16, i16* %ptr, align 2
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/CodeGen/AVR/alloca.ll
Original file line number Diff line number Diff line change
Expand Up @@ -46,12 +46,12 @@ define i16 @alloca_write(i16 %x) {
entry:
; CHECK-LABEL: alloca_write:
; Small offset here
; CHECK: std Y+23, {{.*}}
; CHECK: std Y+24, {{.*}}
; CHECK: std Y+23, {{.*}}
; Big offset here
; CHECK: adiw r28, 57
; CHECK: std Y+62, {{.*}}
; CHECK: std Y+63, {{.*}}
; CHECK: std Y+62, {{.*}}
; CHECK: sbiw r28, 57
%p = alloca [15 x i16]
%k = alloca [14 x i16]
Expand All @@ -71,8 +71,8 @@ define void @alloca_write_huge() {
; CHECK-LABEL: alloca_write_huge:
; CHECK: subi r28, 41
; CHECK: sbci r29, 255
; CHECK: std Y+62, {{.*}}
; CHECK: std Y+63, {{.*}}
; CHECK: std Y+62, {{.*}}
; CHECK: subi r28, 215
; CHECK: sbci r29, 0
%k = alloca [140 x i16]
Expand Down
10 changes: 5 additions & 5 deletions llvm/test/CodeGen/AVR/atomics/load16.ll
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,8 @@ define i16 @atomic_load_cmp_swap16(i16* %foo) {
; CHECK-NEXT: ldd [[RDH:r[0-9]+]], [[RR]]+1
; CHECK-NEXT: add [[RR1L:r[0-9]+]], [[RDL]]
; CHECK-NEXT: adc [[RR1H:r[0-9]+]], [[RDH]]
; CHECK-NEXT: st [[RR]], [[RR1L]]
; CHECK-NEXT: std [[RR]]+1, [[RR1H]]
; CHECK-NEXT: st [[RR]], [[RR1L]]
; CHECK-NEXT: out 63, r0
define i16 @atomic_load_add16(i16* %foo) {
%val = atomicrmw add i16* %foo, i16 13 seq_cst
Expand All @@ -49,8 +49,8 @@ define i16 @atomic_load_add16(i16* %foo) {
; CHECK-NEXT: movw [[TMPL:r[0-9]+]], [[RDL]]
; CHECK-NEXT: sub [[TMPL]], [[RR1L:r[0-9]+]]
; CHECK-NEXT: sbc [[TMPH:r[0-9]+]], [[RR1H:r[0-9]+]]
; CHECK-NEXT: st [[RR]], [[TMPL]]
; CHECK-NEXT: std [[RR]]+1, [[TMPH]]
; CHECK-NEXT: st [[RR]], [[TMPL]]
; CHECK-NEXT: out 63, r0
define i16 @atomic_load_sub16(i16* %foo) {
%val = atomicrmw sub i16* %foo, i16 13 seq_cst
Expand All @@ -64,8 +64,8 @@ define i16 @atomic_load_sub16(i16* %foo) {
; CHECK-NEXT: ldd [[RDH:r[0-9]+]], [[RR]]+1
; CHECK-NEXT: and [[RD1L:r[0-9]+]], [[RDL]]
; CHECK-NEXT: and [[RD1H:r[0-9]+]], [[RDH]]
; CHECK-NEXT: st [[RR]], [[RD1L]]
; CHECK-NEXT: std [[RR]]+1, [[RD1H]]
; CHECK-NEXT: st [[RR]], [[RD1L]]
; CHECK-NEXT: out 63, r0
define i16 @atomic_load_and16(i16* %foo) {
%val = atomicrmw and i16* %foo, i16 13 seq_cst
Expand All @@ -79,8 +79,8 @@ define i16 @atomic_load_and16(i16* %foo) {
; CHECK-NEXT: ldd [[RDH:r[0-9]+]], [[RR]]+1
; CHECK-NEXT: or [[RD1L:r[0-9]+]], [[RDL]]
; CHECK-NEXT: or [[RD1H:r[0-9]+]], [[RDH]]
; CHECK-NEXT: st [[RR]], [[RD1L]]
; CHECK-NEXT: std [[RR]]+1, [[RD1H]]
; CHECK-NEXT: st [[RR]], [[RD1L]]
; CHECK-NEXT: out 63, r0
define i16 @atomic_load_or16(i16* %foo) {
%val = atomicrmw or i16* %foo, i16 13 seq_cst
Expand All @@ -94,8 +94,8 @@ define i16 @atomic_load_or16(i16* %foo) {
; CHECK-NEXT: ldd [[RDH:r[0-9]+]], [[RR]]+1
; CHECK-NEXT: eor [[RD1L:r[0-9]+]], [[RDL]]
; CHECK-NEXT: eor [[RD1H:r[0-9]+]], [[RDH]]
; CHECK-NEXT: st [[RR]], [[RD1L]]
; CHECK-NEXT: std [[RR]]+1, [[RD1H]]
; CHECK-NEXT: st [[RR]], [[RD1L]]
; CHECK-NEXT: out 63, r0
define i16 @atomic_load_xor16(i16* %foo) {
%val = atomicrmw xor i16* %foo, i16 13 seq_cst
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AVR/atomics/store.ll
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@ define void @atomic_store8(i8* %foo) {
; CHECK-LABEL: atomic_store16
; CHECK: in r0, 63
; CHECK-NEXT: cli
; CHECK-NEXT: st [[RD:(X|Y|Z)]], [[RR:r[0-9]+]]
; CHECK-NEXT: std [[RD]]+1, [[RR:r[0-9]+]]
; CHECK-NEXT: std [[RD:(X|Y|Z)]]+1, [[RR:r[0-9]+]]
; CHECK-NEXT: st [[RD]], [[RR:r[0-9]+]]
; CHECK-NEXT: out 63, r0
define void @atomic_store16(i16* %foo) {
store atomic i16 1, i16* %foo unordered, align 2
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AVR/atomics/store16.ll
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
; CHECK-LABEL: atomic_store16
; CHECK: in r0, 63
; CHECK-NEXT: cli
; CHECK-NEXT: st [[RD:(X|Y|Z)]], [[RR:r[0-9]+]]
; CHECK-NEXT: std [[RD:(X|Y|Z)]]+1, [[RR:r[0-9]+]]
; CHECK-NEXT: st [[RD:(X|Y|Z)]], [[RR:r[0-9]+]]
; CHECK-NEXT: out 63, r0
define void @atomic_store16(i16* %foo) {
store atomic i16 1, i16* %foo unordered, align 2
Expand All @@ -14,8 +14,8 @@ define void @atomic_store16(i16* %foo) {
; CHECK-LABEL: monotonic
; CHECK: in r0, 63
; CHECK-NEXT: cli
; CHECK-NEXT: st Z, r24
; CHECK-NEXT: std Z+1, r25
; CHECK-NEXT: st Z, r24
; CHECK-NEXT: out 63, r0
define void @monotonic(i16) {
entry-block:
Expand Down
Loading

0 comments on commit 347acd6

Please sign in to comment.