From e5296d3648ed08eba3e9b48f0323a177afe97d38 Mon Sep 17 00:00:00 2001 From: zhongyunde 00443407 Date: Mon, 20 Nov 2023 01:13:43 -0500 Subject: [PATCH] [AArch64] merge index address with large offset into base address A case for this transformation, https://gcc.godbolt.org/z/nhYcWq1WE ``` Fold mov w8, #56952 movk w8, #15, lsl #16 ldrb w0, [x0, x8] into add x0, x0, 1036288 ldrb w0, [x0, 3704] ``` Only support single use base, multi-use scenes are supported by PR74046. Fix https://github.com/llvm/llvm-project/issues/71917 TODO: support the multiple-uses with reuseing common base offset. https://gcc.godbolt.org/z/Mr7srTjnz --- .../Target/AArch64/AArch64ISelDAGToDAG.cpp | 62 +++++++++++++++++++ llvm/test/CodeGen/AArch64/arm64-addrmode.ll | 15 ++--- 2 files changed, 68 insertions(+), 9 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index 2f49e9a6b37cc3..70ba5784edec2d 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -10,6 +10,7 @@ // //===----------------------------------------------------------------------===// +#include "AArch64ExpandImm.h" #include "AArch64MachineFunctionInfo.h" #include "AArch64TargetMachine.h" #include "MCTargetDesc/AArch64AddressingModes.h" @@ -1074,6 +1075,41 @@ bool AArch64DAGToDAGISel::SelectAddrModeIndexedBitWidth(SDValue N, bool IsSigned return true; } +// 16-bit optionally shifted immediates are legal for single mov. +static bool isLegalSingleMOVImmediate(int64_t Immed) { + if (Immed == std::numeric_limits::min()) { + LLVM_DEBUG(dbgs() << "Illegal single mov imm " << Immed + << ": avoid UB for INT64_MIN\n"); + return false; + } + + // Calculate how many moves we will need to materialize this constant. + SmallVector Insn; + AArch64_IMM::expandMOVImm(Immed, 64, Insn); + return Insn.size() == 1; +} + +// Check whether a unsigned vaule is not in the immediate range of mov but in +// the immediate range of imm24. The "Size" argument is the size in bytes of the +// memory reference. +static bool isPreferredBaseAddrMode(const TargetLowering *TLI, int64_t ImmOff, + unsigned Size) { + if ((ImmOff & (Size - 1)) != 0 || ImmOff < 0) + return false; + + // If the immediate already can be encoded in mov, then just keep the existing + // logic. + if (isLegalSingleMOVImmediate(ImmOff)) + return false; + + // For a imm24, its low imm12 can be fold as the immediate of load or store, + // and its high part can be encoded in an add. + int64_t HighPart = ImmOff & ~0xfffULL; + if (TLI->isLegalAddImmediate(HighPart)) + return true; + return false; +} + /// SelectAddrModeIndexed - Select a "register plus scaled unsigned 12-bit /// immediate" address. The "Size" argument is the size in bytes of the memory /// reference, which determines the scale. @@ -1115,6 +1151,24 @@ bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size, OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64); return true; } + + // Perfer [Reg + imm] mode. + // ADD BaseReg, WideImmediate & 0x0fff000 + // LDR X2, [BaseReg, WideImmediate & 0x0fff] + SDValue LHS = N.getOperand(0); + if (isPreferredBaseAddrMode(TLI, RHSC, Size)) { + int64_t ImmOffUnScale = RHSC; + int64_t ImmOffLow = ImmOffUnScale & 0x0fff; + int64_t ImmOffHigh = RHSC - ImmOffLow; + SDValue ImmHighSDV = + CurDAG->getTargetConstant(ImmOffHigh >> 12, dl, MVT::i64); + Base = SDValue(CurDAG->getMachineNode( + AArch64::ADDXri, dl, MVT::i64, LHS, ImmHighSDV, + CurDAG->getTargetConstant(12, dl, MVT::i32)), + 0); + OffImm = CurDAG->getTargetConstant(ImmOffLow >> Scale, dl, MVT::i64); + return true; + } } } @@ -1356,6 +1410,14 @@ bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size, return true; } + // Perfer [Reg + imm] mode, so skip this scenarios. + if (auto *OffsetC = dyn_cast(RHS)) { + int64_t ImmOff = (int64_t)OffsetC->getZExtValue(); + const TargetLowering *TLI = getTargetLowering(); + if (isPreferredBaseAddrMode(TLI, ImmOff, Size)) { + return false; + } + } // Match any non-shifted, non-extend, non-immediate add expression. Base = LHS; Offset = RHS; diff --git a/llvm/test/CodeGen/AArch64/arm64-addrmode.ll b/llvm/test/CodeGen/AArch64/arm64-addrmode.ll index 3d4749a7b8e7df..b0b26c8836d85f 100644 --- a/llvm/test/CodeGen/AArch64/arm64-addrmode.ll +++ b/llvm/test/CodeGen/AArch64/arm64-addrmode.ll @@ -213,9 +213,8 @@ define void @t17(i64 %a) { define i32 @LdOffset_i8(ptr %a) { ; CHECK-LABEL: LdOffset_i8: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #56952 // =0xde78 -; CHECK-NEXT: movk w8, #15, lsl #16 -; CHECK-NEXT: ldrb w0, [x0, x8] +; CHECK-NEXT: add x8, x0, #253, lsl #12 // =1036288 +; CHECK-NEXT: ldrb w0, [x8, #3704] ; CHECK-NEXT: ret %arrayidx = getelementptr inbounds i8, ptr %a, i64 1039992 %val = load i8, ptr %arrayidx, align 1 @@ -226,9 +225,8 @@ define i32 @LdOffset_i8(ptr %a) { define i32 @LdOffset_i16(ptr %a) { ; CHECK-LABEL: LdOffset_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #48368 // =0xbcf0 -; CHECK-NEXT: movk w8, #31, lsl #16 -; CHECK-NEXT: ldrsh w0, [x0, x8] +; CHECK-NEXT: add x8, x0, #507, lsl #12 // =2076672 +; CHECK-NEXT: ldrsh w0, [x8, #3312] ; CHECK-NEXT: ret %arrayidx = getelementptr inbounds i16, ptr %a, i64 1039992 %val = load i16, ptr %arrayidx, align 2 @@ -239,9 +237,8 @@ define i32 @LdOffset_i16(ptr %a) { define i32 @LdOffset_i32(ptr %a) { ; CHECK-LABEL: LdOffset_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #31200 // =0x79e0 -; CHECK-NEXT: movk w8, #63, lsl #16 -; CHECK-NEXT: ldr w0, [x0, x8] +; CHECK-NEXT: add x8, x0, #1015, lsl #12 // =4157440 +; CHECK-NEXT: ldr w0, [x8, #2528] ; CHECK-NEXT: ret %arrayidx = getelementptr inbounds i32, ptr %a, i64 1039992 %val = load i32, ptr %arrayidx, align 4