Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement a mechanism to disable GEP->PHI InstCombiner. #231

Open
wants to merge 3 commits into
base: aie-public
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions llvm/include/llvm/Analysis/TargetTransformInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -682,6 +682,11 @@ class TargetTransformInfo {
APInt & UndefElts, APInt & UndefElts2, APInt & UndefElts3,
std::function<void(Instruction *, unsigned, APInt, APInt &)>
SimplifyAndSetOp) const;
/// Can be used to expose a target-specific instruction combining
/// decision related to the folding of GEP into PHIs, as this can
/// generate additional PHI nodes and pointer chains that can induce
/// negative side effects for targets with indexed loads.
bool isProfitableFoldGEPIntoPHI() const;
/// @}

/// \name Scalar Target Information
Expand Down Expand Up @@ -1855,6 +1860,7 @@ class TargetTransformInfo::Concept {
APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3,
std::function<void(Instruction *, unsigned, APInt, APInt &)>
SimplifyAndSetOp) = 0;
virtual bool isProfitableFoldGEPIntoPHI() = 0;
virtual bool isLegalAddImmediate(int64_t Imm) = 0;
virtual bool isLegalAddScalableImmediate(int64_t Imm) = 0;
virtual bool isLegalICmpImmediate(int64_t Imm) = 0;
Expand Down Expand Up @@ -2315,6 +2321,9 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
IC, II, DemandedElts, UndefElts, UndefElts2, UndefElts3,
SimplifyAndSetOp);
}
bool isProfitableFoldGEPIntoPHI() override {
return Impl.isProfitableFoldGEPIntoPHI();
}
bool isLegalAddImmediate(int64_t Imm) override {
return Impl.isLegalAddImmediate(Imm);
}
Expand Down
2 changes: 2 additions & 0 deletions llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,8 @@ class TargetTransformInfoImplBase {
return std::nullopt;
}

bool isProfitableFoldGEPIntoPHI() { return true; }

void getUnrollingPreferences(Loop *, ScalarEvolution &,
TTI::UnrollingPreferences &,
OptimizationRemarkEmitter *) const {}
Expand Down
4 changes: 4 additions & 0 deletions llvm/include/llvm/CodeGen/BasicTTIImpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -705,6 +705,10 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
SimplifyAndSetOp);
}

bool isProfitableFoldGEPIntoPHI() {
return BaseT::isProfitableFoldGEPIntoPHI();
}

virtual std::optional<unsigned>
getCacheSize(TargetTransformInfo::CacheLevel Level) const {
return std::optional<unsigned>(
Expand Down
16 changes: 10 additions & 6 deletions llvm/include/llvm/Transforms/InstCombine/InstCombiner.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
// Modifications (c) Copyright 2024 Advanced Micro Devices, Inc. or its
// affiliates
//
//===----------------------------------------------------------------------===//
/// \file
///
Expand Down Expand Up @@ -45,11 +48,6 @@ class TargetTransformInfo;
/// This class provides both the logic to recursively visit instructions and
/// combine them.
class LLVM_LIBRARY_VISIBILITY InstCombiner {
/// Only used to call target specific intrinsic combining.
/// It must **NOT** be used for any other purpose, as InstCombine is a
/// target-independent canonicalization transform.
TargetTransformInfo &TTI;

public:
/// Maximum size of array considered when transforming.
uint64_t MaxArraySizeForCombine = 0;
Expand All @@ -60,6 +58,12 @@ class LLVM_LIBRARY_VISIBILITY InstCombiner {
BuilderTy &Builder;

protected:
/// Only used to call target-specific intrinsic combining and filtering
/// of non-profitable combining cases tied to target needs.
/// It must **NOT** be used for any other purpose, as InstCombine is a
/// target-independent canonicalization transform.
TargetTransformInfo &TTI;

/// A worklist of the instructions that need to be simplified.
InstructionWorklist &Worklist;

Expand Down Expand Up @@ -99,7 +103,7 @@ class LLVM_LIBRARY_VISIBILITY InstCombiner {
DominatorTree &DT, OptimizationRemarkEmitter &ORE,
BlockFrequencyInfo *BFI, BranchProbabilityInfo *BPI,
ProfileSummaryInfo *PSI, const DataLayout &DL, LoopInfo *LI)
: TTI(TTI), Builder(Builder), Worklist(Worklist),
: Builder(Builder), TTI(TTI), Worklist(Worklist),
MinimizeSize(MinimizeSize), AA(AA), AC(AC), TLI(TLI), DT(DT), DL(DL),
SQ(DL, &TLI, &DT, &AC, nullptr, /*UseInstrInfo*/ true,
/*CanUseUndef*/ true, &DC),
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Analysis/TargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -384,6 +384,10 @@ std::optional<Value *> TargetTransformInfo::simplifyDemandedVectorEltsIntrinsic(
SimplifyAndSetOp);
}

bool TargetTransformInfo::isProfitableFoldGEPIntoPHI() const {
return TTIImpl->isProfitableFoldGEPIntoPHI();
}

void TargetTransformInfo::getUnrollingPreferences(
Loop *L, ScalarEvolution &SE, UnrollingPreferences &UP,
OptimizationRemarkEmitter *ORE) const {
Expand Down
9 changes: 9 additions & 0 deletions llvm/lib/Target/AIE/AIE2TargetTransformInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,15 @@ class AIE2TTIImpl : public BasicTTIImplBase<AIE2TTIImpl> {
bool isProfitableOuterLSR(const Loop &L) const;
std::optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
IntrinsicInst &II) const;

// By returning false here, we will prevent the following type
// of code from reaching the backend when GEPs are used as incoming values:
// for.body: ; preds = %for.body, %for.preheader
// %phi0 = phi ptr [ %in0, %for.body ], [ %out0, %for.preheader ]
// %phi1 = phi ptr [ %phi0, %for.body ], [ %out1, %for.preheader ]
// This type of code can lead to additional pointer arithmetics and
// and pointer moves (especially due to the pre-pipeliner).
bool isProfitableFoldGEPIntoPHI() const { return false; }
};

} // end namespace llvm
Expand Down
6 changes: 5 additions & 1 deletion llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
// Modifications (c) Copyright 2024 Advanced Micro Devices, Inc. or its
// affiliates
//
//===----------------------------------------------------------------------===//
//
// This file implements the visitPHINode function.
Expand All @@ -15,6 +18,7 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/Support/CommandLine.h"
Expand Down Expand Up @@ -869,7 +873,7 @@ Instruction *InstCombinerImpl::foldPHIArgOpIntoPHI(PHINode &PN) {

Instruction *FirstInst = cast<Instruction>(PN.getIncomingValue(0));

if (isa<GetElementPtrInst>(FirstInst))
if (TTI.isProfitableFoldGEPIntoPHI() && isa<GetElementPtrInst>(FirstInst))
return foldPHIArgGEPIntoPHI(PN);
if (isa<LoadInst>(FirstInst))
return foldPHIArgLoadIntoPHI(PN);
Expand Down
47 changes: 47 additions & 0 deletions llvm/test/CodeGen/AIE/aie2/aie2-prevent-gep-phi-folding.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
;
; This file is licensed under the Apache License v2.0 with LLVM Exceptions.
; See https://llvm.org/LICENSE.txt for license information.
; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
;
; (c) Copyright 2024 Advanced Micro Devices, Inc. or its affiliates
; RUN: opt -mtriple=aie2 -passes=instcombine -S < %s | FileCheck %s

%struct = type { i32, i32 }

define i32 @dontFoldGEPs(ptr %dm, i1 %arg4, i64 %arg9, i64 %arg19) {
; CHECK-LABEL: define i32 @dontFoldGEPs(
; CHECK-SAME: ptr [[DM:%.*]], i1 [[ARG4:%.*]], i64 [[ARG9:%.*]], i64 [[ARG19:%.*]]) {
; CHECK-NEXT: bb:
; CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DM]], align 8
; CHECK-NEXT: br i1 [[ARG4]], label [[BB1:%.*]], label [[BB2:%.*]]
; CHECK: bb1:
; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 [[ARG9]] to i20
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT2:%.*]], ptr [[TMP1]], i20 [[TMP0]]
; CHECK-NEXT: br label [[BB3:%.*]]
; CHECK: bb2:
; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[ARG19]] to i20
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT2]], ptr [[TMP1]], i20 [[TMP2]]
; CHECK-NEXT: br label [[BB3]]
; CHECK: bb3:
; CHECK-NEXT: [[PHI:%.*]] = phi ptr [ [[TMP10]], [[BB1]] ], [ [[TMP4]], [[BB2]] ]
; CHECK-NEXT: [[TMP25:%.*]] = load i32, ptr [[PHI]], align 4
; CHECK-NEXT: ret i32 [[TMP25]]
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah. Probably the combine is better for very local alias analysis that doesn't want to follow too many PHI nodes

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In our case the PHI in bb3 dominates one of the GEPs?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In our case, one GEP should reach the load. The combiner basically does the following for this case:

%struct = type { i32, i32 }

define i32 @dontFoldGEPs(ptr %dm, i1 %arg4, i64 %arg9, i64 %arg19) {
bb:
  %0 = load ptr, ptr %dm, align 8
  br i1 %arg4, label %bb1, label %bb2

bb1:                                              ; preds = %bb
  %1 = trunc i64 %arg9 to i20
  br label %bb3

bb2:                                              ; preds = %bb
  %2 = trunc i64 %arg19 to i20
  br label %bb3

bb3:                                              ; preds = %bb2, %bb1
  %.pn = phi i20 [ %1, %bb1 ], [ %2, %bb2 ]
  %phi = getelementptr inbounds %struct, ptr %0, i20 %.pn
  %3 = load i32, ptr %phi, align 4
  ret i32 %3
}

;
bb:
%0 = load ptr, ptr %dm, align 8
br i1 %arg4, label %bb1, label %bb2

bb1:
%1 = getelementptr inbounds %struct, ptr %0, i64 %arg9
br label %bb3

bb2:
%2 = getelementptr inbounds %struct, ptr %0, i64 %arg19
br label %bb3

bb3:
%phi = phi ptr [ %1, %bb1 ], [ %2, %bb2 ]
%3 = load i32, ptr %phi, align 4
ret i32 %3
}
Loading