From 74c6895b39e37b8d82993f862fe6c943da45962b Mon Sep 17 00:00:00 2001 From: Fraser Cormack Date: Thu, 21 Oct 2021 15:32:12 +0100 Subject: [PATCH] [RISCV] Fix missing cross-block VSETVLI insertion This patch fixes a codegen bug, the test for which was introduced in D112223. When merging VSETVLIInfo across blocks, if the 'exit' VSETVLIInfo produced by a block is found to be compatible with the VSETVLIInfo computed as the intersection of the 'exit' VSETVLIInfo produced by the block's predecessors, that blocks' 'exit' info is discarded and the intersected value is taken in its place. However, we have one authority on what constitutes VSETVLIInfo compatibility and we are using it in two different contexts. Compatibility is used in one context to elide VSETVLIs between straight-line vector instructions. But compatibility when evaluated between two blocks' exit infos ignores any info produced *inside* each respective block before the exit points. As such it does not guarantee that a block will not produce a VSETVLI which is incompatible with the 'previous' block. As such, we must ensure that any merging of VSETVLIInfo is performed using some notion of "strict" compatibility. I've defined this as a full vtype match, but this is perhaps too pessimistic. Given that test coverage in this regard is lacking -- the only change is in the failing test -- I think this is a good starting point. Reviewed By: craig.topper Differential Revision: https://reviews.llvm.org/D112228 --- llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp | 13 +++++++++---- .../CodeGen/RISCV/rvv/vsetvli-insert-crossbb.mir | 5 +---- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp index 3a7c0dfc9b703..c4a63e6e1f02b 100644 --- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp +++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp @@ -181,7 +181,7 @@ class VSETVLIInfo { // Determine whether the vector instructions requirements represented by // InstrInfo are compatible with the previous vsetvli instruction represented // by this. - bool isCompatible(const VSETVLIInfo &InstrInfo) const { + bool isCompatible(const VSETVLIInfo &InstrInfo, bool Strict) const { assert(isValid() && InstrInfo.isValid() && "Can't compare invalid VSETVLIInfos"); assert(!InstrInfo.SEWLMULRatioOnly && @@ -196,7 +196,8 @@ class VSETVLIInfo { // If the instruction doesn't need an AVLReg and the SEW matches, consider // it compatible. - if (InstrInfo.hasAVLReg() && InstrInfo.AVLReg == RISCV::NoRegister) { + if (!Strict && InstrInfo.hasAVLReg() && + InstrInfo.AVLReg == RISCV::NoRegister) { if (SEW == InstrInfo.SEW) return true; } @@ -209,6 +210,10 @@ class VSETVLIInfo { if (hasSameVTYPE(InstrInfo)) return true; + // Strict matches must ensure a full VTYPE match. + if (Strict) + return false; + // If this is a mask reg operation, it only cares about VLMAX. // FIXME: Mask reg operations are probably ok if "this" VLMAX is larger // than "InstrInfo". @@ -317,7 +322,7 @@ class VSETVLIInfo { // If the change is compatible with the input, we won't create a VSETVLI // and should keep the predecessor. - if (isCompatible(Other)) + if (isCompatible(Other, /*Strict*/ true)) return *this; // Otherwise just use whatever is in this block. @@ -550,7 +555,7 @@ static VSETVLIInfo getInfoForVSETVLI(const MachineInstr &MI) { bool RISCVInsertVSETVLI::needVSETVLI(const VSETVLIInfo &Require, const VSETVLIInfo &CurInfo) { - if (CurInfo.isCompatible(Require)) + if (CurInfo.isCompatible(Require, /*Strict*/ false)) return false; // We didn't find a compatible value. If our AVL is a virtual register, diff --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.mir b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.mir index 0110ada5a2045..81a52ee889191 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.mir +++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.mir @@ -442,10 +442,6 @@ body: | ... --- -# FIXME: We incorrectly merge info for %bb.1 into that for %bb.0, leading us to -# skip a vsetvli for the PseudoVADD_VX_MF2 in %bb.3. In fact, the -# PseudoVPOPC_M_B1 is given a vsetvli (e8,mf8) which, if control flow flows -# through %bb.1 to %bb.3, misconfigures the PseudoVADD_VX_MF2. name: vsetvli_vpopc tracksRegLiveness: true registers: @@ -495,6 +491,7 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: [[PHI:%[0-9]+]]:gpr = PHI [[DEF]], %bb.1, [[LWU]], %bb.2 + ; CHECK-NEXT: dead $x0 = PseudoVSETVLIX0 killed $x0, 87, implicit-def $vl, implicit-def $vtype, implicit $vl ; CHECK-NEXT: [[PseudoVADD_VX_MF2_:%[0-9]+]]:vr = nsw PseudoVADD_VX_MF2 [[PseudoVLE32_V_MF2_MASK]], [[PHI]], -1, 5, implicit $vl, implicit $vtype ; CHECK-NEXT: $v0 = COPY [[PseudoVADD_VX_MF2_]] ; CHECK-NEXT: PseudoRET implicit $v0