Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

PR for llvm/llvm-project#79137 #79561

Merged
merged 2 commits into from
Feb 3, 2024
Merged

Conversation

github-actions[bot]
Copy link

resolves #79137

@github-actions github-actions bot added this to the LLVM 18.X Release milestone Jan 26, 2024
Copy link
Author

@fhahn What do you think about merging this PR to the release branch?

@llvmbot
Copy link

llvmbot commented Jan 28, 2024

@llvm/pr-subscribers-llvm-transforms

Author: None (github-actions[bot])

Changes

resolves llvm/llvm-project#79137


Full diff: https://github.com/llvm/llvm-project/pull/79561.diff

3 Files Affected:

  • (modified) llvm/lib/Analysis/LoopAccessAnalysis.cpp (+46-7)
  • (modified) llvm/test/Analysis/LoopAccessAnalysis/noalias-scope-decl.ll (+10-1)
  • (added) llvm/test/Transforms/PhaseOrdering/X86/loop-vectorizer-noalias.ll (+78)
diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index 7e67c90152829dc..dd6b88fee415a7a 100644
--- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -657,16 +657,18 @@ class AccessAnalysis {
 
   AccessAnalysis(Loop *TheLoop, AAResults *AA, LoopInfo *LI,
                  MemoryDepChecker::DepCandidates &DA,
-                 PredicatedScalarEvolution &PSE)
-      : TheLoop(TheLoop), BAA(*AA), AST(BAA), LI(LI), DepCands(DA), PSE(PSE) {
+                 PredicatedScalarEvolution &PSE,
+                 SmallPtrSetImpl<MDNode *> &LoopAliasScopes)
+      : TheLoop(TheLoop), BAA(*AA), AST(BAA), LI(LI), DepCands(DA), PSE(PSE),
+        LoopAliasScopes(LoopAliasScopes) {
     // We're analyzing dependences across loop iterations.
     BAA.enableCrossIterationMode();
   }
 
   /// Register a load  and whether it is only read from.
   void addLoad(MemoryLocation &Loc, Type *AccessTy, bool IsReadOnly) {
-    Value *Ptr = const_cast<Value*>(Loc.Ptr);
-    AST.add(Loc.getWithNewSize(LocationSize::beforeOrAfterPointer()));
+    Value *Ptr = const_cast<Value *>(Loc.Ptr);
+    AST.add(adjustLoc(Loc));
     Accesses[MemAccessInfo(Ptr, false)].insert(AccessTy);
     if (IsReadOnly)
       ReadOnlyPtr.insert(Ptr);
@@ -674,8 +676,8 @@ class AccessAnalysis {
 
   /// Register a store.
   void addStore(MemoryLocation &Loc, Type *AccessTy) {
-    Value *Ptr = const_cast<Value*>(Loc.Ptr);
-    AST.add(Loc.getWithNewSize(LocationSize::beforeOrAfterPointer()));
+    Value *Ptr = const_cast<Value *>(Loc.Ptr);
+    AST.add(adjustLoc(Loc));
     Accesses[MemAccessInfo(Ptr, true)].insert(AccessTy);
   }
 
@@ -731,6 +733,32 @@ class AccessAnalysis {
 private:
   typedef MapVector<MemAccessInfo, SmallSetVector<Type *, 1>> PtrAccessMap;
 
+  /// Adjust the MemoryLocation so that it represents accesses to this
+  /// location across all iterations, rather than a single one.
+  MemoryLocation adjustLoc(MemoryLocation Loc) const {
+    // The accessed location varies within the loop, but remains within the
+    // underlying object.
+    Loc.Size = LocationSize::beforeOrAfterPointer();
+    Loc.AATags.Scope = adjustAliasScopeList(Loc.AATags.Scope);
+    Loc.AATags.NoAlias = adjustAliasScopeList(Loc.AATags.NoAlias);
+    return Loc;
+  }
+
+  /// Drop alias scopes that are only valid within a single loop iteration.
+  MDNode *adjustAliasScopeList(MDNode *ScopeList) const {
+    if (!ScopeList)
+      return nullptr;
+
+    // For the sake of simplicity, drop the whole scope list if any scope is
+    // iteration-local.
+    if (any_of(ScopeList->operands(), [&](Metadata *Scope) {
+          return LoopAliasScopes.contains(cast<MDNode>(Scope));
+        }))
+      return nullptr;
+
+    return ScopeList;
+  }
+
   /// Go over all memory access and check whether runtime pointer checks
   /// are needed and build sets of dependency check candidates.
   void processMemAccesses();
@@ -775,6 +803,10 @@ class AccessAnalysis {
   PredicatedScalarEvolution &PSE;
 
   DenseMap<Value *, SmallVector<const Value *, 16>> UnderlyingObjects;
+
+  /// Alias scopes that are declared inside the loop, and as such not valid
+  /// across iterations.
+  SmallPtrSetImpl<MDNode *> &LoopAliasScopes;
 };
 
 } // end anonymous namespace
@@ -2283,6 +2315,7 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
   // Holds the Load and Store instructions.
   SmallVector<LoadInst *, 16> Loads;
   SmallVector<StoreInst *, 16> Stores;
+  SmallPtrSet<MDNode *, 8> LoopAliasScopes;
 
   // Holds all the different accesses in the loop.
   unsigned NumReads = 0;
@@ -2326,6 +2359,11 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
       if (HasComplexMemInst)
         continue;
 
+      // Record alias scopes defined inside the loop.
+      if (auto *Decl = dyn_cast<NoAliasScopeDeclInst>(&I))
+        for (Metadata *Op : Decl->getScopeList()->operands())
+          LoopAliasScopes.insert(cast<MDNode>(Op));
+
       // Many math library functions read the rounding mode. We will only
       // vectorize a loop if it contains known function calls that don't set
       // the flag. Therefore, it is safe to ignore this read from memory.
@@ -2407,7 +2445,8 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
   }
 
   MemoryDepChecker::DepCandidates DependentAccesses;
-  AccessAnalysis Accesses(TheLoop, AA, LI, DependentAccesses, *PSE);
+  AccessAnalysis Accesses(TheLoop, AA, LI, DependentAccesses, *PSE,
+                          LoopAliasScopes);
 
   // Holds the analyzed pointers. We don't want to call getUnderlyingObjects
   // multiple times on the same object. If the ptr is accessed twice, once
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/noalias-scope-decl.ll b/llvm/test/Analysis/LoopAccessAnalysis/noalias-scope-decl.ll
index 98bb5f99a40a1e2..fb296f5089422db 100644
--- a/llvm/test/Analysis/LoopAccessAnalysis/noalias-scope-decl.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/noalias-scope-decl.ll
@@ -7,8 +7,17 @@
 define void @test_scope_in_loop(ptr %arg, i64 %num) {
 ; CHECK-LABEL: 'test_scope_in_loop'
 ; CHECK-NEXT:    loop:
-; CHECK-NEXT:      Memory dependences are safe
+; CHECK-NEXT:      Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
+; CHECK-NEXT:  Backward loop carried data dependence.
 ; CHECK-NEXT:      Dependences:
+; CHECK-NEXT:        Backward:
+; CHECK-NEXT:            %load.prev = load i8, ptr %prev.ptr, align 1, !alias.scope !0, !noalias !3 ->
+; CHECK-NEXT:            store i8 %add, ptr %cur.ptr, align 1, !alias.scope !3
+; CHECK-EMPTY:
+; CHECK-NEXT:        Forward:
+; CHECK-NEXT:            %load.cur = load i8, ptr %cur.ptr, align 1, !alias.scope !3 ->
+; CHECK-NEXT:            store i8 %add, ptr %cur.ptr, align 1, !alias.scope !3
+; CHECK-EMPTY:
 ; CHECK-NEXT:      Run-time memory checks:
 ; CHECK-NEXT:      Grouped accesses:
 ; CHECK-EMPTY:
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/loop-vectorizer-noalias.ll b/llvm/test/Transforms/PhaseOrdering/X86/loop-vectorizer-noalias.ll
new file mode 100644
index 000000000000000..5c85c0d21f59f74
--- /dev/null
+++ b/llvm/test/Transforms/PhaseOrdering/X86/loop-vectorizer-noalias.ll
@@ -0,0 +1,78 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt -S -O3 -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
+
+define internal void @acc(ptr noalias noundef %val, ptr noalias noundef %prev) {
+entry:
+  %0 = load i8, ptr %prev, align 1
+  %conv = zext i8 %0 to i32
+  %1 = load i8, ptr %val, align 1
+  %conv1 = zext i8 %1 to i32
+  %add = add nsw i32 %conv1, %conv
+  %conv2 = trunc i32 %add to i8
+  store i8 %conv2, ptr %val, align 1
+  ret void
+}
+
+; This loop should not get vectorized.
+define void @accsum(ptr noundef %vals, i64 noundef %num) #0 {
+; CHECK-LABEL: define void @accsum(
+; CHECK-SAME: ptr nocapture noundef [[VALS:%.*]], i64 noundef [[NUM:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp ugt i64 [[NUM]], 1
+; CHECK-NEXT:    br i1 [[CMP1]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]]
+; CHECK:       for.body.preheader:
+; CHECK-NEXT:    [[LOAD_INITIAL:%.*]] = load i8, ptr [[VALS]], align 1
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[STORE_FORWARDED:%.*]] = phi i8 [ [[LOAD_INITIAL]], [[FOR_BODY_PREHEADER]] ], [ [[ADD_I:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[I_02:%.*]] = phi i64 [ 1, [[FOR_BODY_PREHEADER]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[VALS]], i64 [[I_02]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1, !alias.scope [[META0:![0-9]+]], !noalias [[META3:![0-9]+]]
+; CHECK-NEXT:    [[ADD_I]] = add i8 [[TMP0]], [[STORE_FORWARDED]]
+; CHECK-NEXT:    store i8 [[ADD_I]], ptr [[ARRAYIDX]], align 1, !alias.scope [[META0]], !noalias [[META3]]
+; CHECK-NEXT:    [[INC]] = add nuw i64 [[I_02]], 1
+; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[NUM]]
+; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]]
+; CHECK:       for.end:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %i.0 = phi i64 [ 1, %entry ], [ %inc, %for.inc ]
+  %cmp = icmp ult i64 %i.0, %num
+  br i1 %cmp, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup:                                 ; preds = %for.cond
+  br label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %arrayidx = getelementptr inbounds i8, ptr %vals, i64 %i.0
+  %sub = sub i64 %i.0, 1
+  %arrayidx1 = getelementptr inbounds i8, ptr %vals, i64 %sub
+  call void @acc(ptr noundef %arrayidx, ptr noundef %arrayidx1)
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %inc = add i64 %i.0, 1
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond.cleanup
+  ret void
+}
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
+declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #1
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
+declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #1
+
+attributes #0 = { "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87"}
+;.
+; CHECK: [[META0]] = !{[[META1:![0-9]+]]}
+; CHECK: [[META1]] = distinct !{[[META1]], [[META2:![0-9]+]], !"acc: %val"}
+; CHECK: [[META2]] = distinct !{[[META2]], !"acc"}
+; CHECK: [[META3]] = !{[[META4:![0-9]+]]}
+; CHECK: [[META4]] = distinct !{[[META4]], [[META2]], !"acc: %prev"}
+;.

@llvmbot
Copy link

llvmbot commented Jan 28, 2024

@llvm/pr-subscribers-llvm-analysis

Author: None (github-actions[bot])

Changes

resolves llvm/llvm-project#79137


Full diff: https://github.com/llvm/llvm-project/pull/79561.diff

3 Files Affected:

  • (modified) llvm/lib/Analysis/LoopAccessAnalysis.cpp (+46-7)
  • (modified) llvm/test/Analysis/LoopAccessAnalysis/noalias-scope-decl.ll (+10-1)
  • (added) llvm/test/Transforms/PhaseOrdering/X86/loop-vectorizer-noalias.ll (+78)
diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index 7e67c90152829dc..dd6b88fee415a7a 100644
--- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -657,16 +657,18 @@ class AccessAnalysis {
 
   AccessAnalysis(Loop *TheLoop, AAResults *AA, LoopInfo *LI,
                  MemoryDepChecker::DepCandidates &DA,
-                 PredicatedScalarEvolution &PSE)
-      : TheLoop(TheLoop), BAA(*AA), AST(BAA), LI(LI), DepCands(DA), PSE(PSE) {
+                 PredicatedScalarEvolution &PSE,
+                 SmallPtrSetImpl<MDNode *> &LoopAliasScopes)
+      : TheLoop(TheLoop), BAA(*AA), AST(BAA), LI(LI), DepCands(DA), PSE(PSE),
+        LoopAliasScopes(LoopAliasScopes) {
     // We're analyzing dependences across loop iterations.
     BAA.enableCrossIterationMode();
   }
 
   /// Register a load  and whether it is only read from.
   void addLoad(MemoryLocation &Loc, Type *AccessTy, bool IsReadOnly) {
-    Value *Ptr = const_cast<Value*>(Loc.Ptr);
-    AST.add(Loc.getWithNewSize(LocationSize::beforeOrAfterPointer()));
+    Value *Ptr = const_cast<Value *>(Loc.Ptr);
+    AST.add(adjustLoc(Loc));
     Accesses[MemAccessInfo(Ptr, false)].insert(AccessTy);
     if (IsReadOnly)
       ReadOnlyPtr.insert(Ptr);
@@ -674,8 +676,8 @@ class AccessAnalysis {
 
   /// Register a store.
   void addStore(MemoryLocation &Loc, Type *AccessTy) {
-    Value *Ptr = const_cast<Value*>(Loc.Ptr);
-    AST.add(Loc.getWithNewSize(LocationSize::beforeOrAfterPointer()));
+    Value *Ptr = const_cast<Value *>(Loc.Ptr);
+    AST.add(adjustLoc(Loc));
     Accesses[MemAccessInfo(Ptr, true)].insert(AccessTy);
   }
 
@@ -731,6 +733,32 @@ class AccessAnalysis {
 private:
   typedef MapVector<MemAccessInfo, SmallSetVector<Type *, 1>> PtrAccessMap;
 
+  /// Adjust the MemoryLocation so that it represents accesses to this
+  /// location across all iterations, rather than a single one.
+  MemoryLocation adjustLoc(MemoryLocation Loc) const {
+    // The accessed location varies within the loop, but remains within the
+    // underlying object.
+    Loc.Size = LocationSize::beforeOrAfterPointer();
+    Loc.AATags.Scope = adjustAliasScopeList(Loc.AATags.Scope);
+    Loc.AATags.NoAlias = adjustAliasScopeList(Loc.AATags.NoAlias);
+    return Loc;
+  }
+
+  /// Drop alias scopes that are only valid within a single loop iteration.
+  MDNode *adjustAliasScopeList(MDNode *ScopeList) const {
+    if (!ScopeList)
+      return nullptr;
+
+    // For the sake of simplicity, drop the whole scope list if any scope is
+    // iteration-local.
+    if (any_of(ScopeList->operands(), [&](Metadata *Scope) {
+          return LoopAliasScopes.contains(cast<MDNode>(Scope));
+        }))
+      return nullptr;
+
+    return ScopeList;
+  }
+
   /// Go over all memory access and check whether runtime pointer checks
   /// are needed and build sets of dependency check candidates.
   void processMemAccesses();
@@ -775,6 +803,10 @@ class AccessAnalysis {
   PredicatedScalarEvolution &PSE;
 
   DenseMap<Value *, SmallVector<const Value *, 16>> UnderlyingObjects;
+
+  /// Alias scopes that are declared inside the loop, and as such not valid
+  /// across iterations.
+  SmallPtrSetImpl<MDNode *> &LoopAliasScopes;
 };
 
 } // end anonymous namespace
@@ -2283,6 +2315,7 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
   // Holds the Load and Store instructions.
   SmallVector<LoadInst *, 16> Loads;
   SmallVector<StoreInst *, 16> Stores;
+  SmallPtrSet<MDNode *, 8> LoopAliasScopes;
 
   // Holds all the different accesses in the loop.
   unsigned NumReads = 0;
@@ -2326,6 +2359,11 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
       if (HasComplexMemInst)
         continue;
 
+      // Record alias scopes defined inside the loop.
+      if (auto *Decl = dyn_cast<NoAliasScopeDeclInst>(&I))
+        for (Metadata *Op : Decl->getScopeList()->operands())
+          LoopAliasScopes.insert(cast<MDNode>(Op));
+
       // Many math library functions read the rounding mode. We will only
       // vectorize a loop if it contains known function calls that don't set
       // the flag. Therefore, it is safe to ignore this read from memory.
@@ -2407,7 +2445,8 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
   }
 
   MemoryDepChecker::DepCandidates DependentAccesses;
-  AccessAnalysis Accesses(TheLoop, AA, LI, DependentAccesses, *PSE);
+  AccessAnalysis Accesses(TheLoop, AA, LI, DependentAccesses, *PSE,
+                          LoopAliasScopes);
 
   // Holds the analyzed pointers. We don't want to call getUnderlyingObjects
   // multiple times on the same object. If the ptr is accessed twice, once
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/noalias-scope-decl.ll b/llvm/test/Analysis/LoopAccessAnalysis/noalias-scope-decl.ll
index 98bb5f99a40a1e2..fb296f5089422db 100644
--- a/llvm/test/Analysis/LoopAccessAnalysis/noalias-scope-decl.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/noalias-scope-decl.ll
@@ -7,8 +7,17 @@
 define void @test_scope_in_loop(ptr %arg, i64 %num) {
 ; CHECK-LABEL: 'test_scope_in_loop'
 ; CHECK-NEXT:    loop:
-; CHECK-NEXT:      Memory dependences are safe
+; CHECK-NEXT:      Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
+; CHECK-NEXT:  Backward loop carried data dependence.
 ; CHECK-NEXT:      Dependences:
+; CHECK-NEXT:        Backward:
+; CHECK-NEXT:            %load.prev = load i8, ptr %prev.ptr, align 1, !alias.scope !0, !noalias !3 ->
+; CHECK-NEXT:            store i8 %add, ptr %cur.ptr, align 1, !alias.scope !3
+; CHECK-EMPTY:
+; CHECK-NEXT:        Forward:
+; CHECK-NEXT:            %load.cur = load i8, ptr %cur.ptr, align 1, !alias.scope !3 ->
+; CHECK-NEXT:            store i8 %add, ptr %cur.ptr, align 1, !alias.scope !3
+; CHECK-EMPTY:
 ; CHECK-NEXT:      Run-time memory checks:
 ; CHECK-NEXT:      Grouped accesses:
 ; CHECK-EMPTY:
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/loop-vectorizer-noalias.ll b/llvm/test/Transforms/PhaseOrdering/X86/loop-vectorizer-noalias.ll
new file mode 100644
index 000000000000000..5c85c0d21f59f74
--- /dev/null
+++ b/llvm/test/Transforms/PhaseOrdering/X86/loop-vectorizer-noalias.ll
@@ -0,0 +1,78 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt -S -O3 -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
+
+define internal void @acc(ptr noalias noundef %val, ptr noalias noundef %prev) {
+entry:
+  %0 = load i8, ptr %prev, align 1
+  %conv = zext i8 %0 to i32
+  %1 = load i8, ptr %val, align 1
+  %conv1 = zext i8 %1 to i32
+  %add = add nsw i32 %conv1, %conv
+  %conv2 = trunc i32 %add to i8
+  store i8 %conv2, ptr %val, align 1
+  ret void
+}
+
+; This loop should not get vectorized.
+define void @accsum(ptr noundef %vals, i64 noundef %num) #0 {
+; CHECK-LABEL: define void @accsum(
+; CHECK-SAME: ptr nocapture noundef [[VALS:%.*]], i64 noundef [[NUM:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp ugt i64 [[NUM]], 1
+; CHECK-NEXT:    br i1 [[CMP1]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]]
+; CHECK:       for.body.preheader:
+; CHECK-NEXT:    [[LOAD_INITIAL:%.*]] = load i8, ptr [[VALS]], align 1
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[STORE_FORWARDED:%.*]] = phi i8 [ [[LOAD_INITIAL]], [[FOR_BODY_PREHEADER]] ], [ [[ADD_I:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[I_02:%.*]] = phi i64 [ 1, [[FOR_BODY_PREHEADER]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[VALS]], i64 [[I_02]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1, !alias.scope [[META0:![0-9]+]], !noalias [[META3:![0-9]+]]
+; CHECK-NEXT:    [[ADD_I]] = add i8 [[TMP0]], [[STORE_FORWARDED]]
+; CHECK-NEXT:    store i8 [[ADD_I]], ptr [[ARRAYIDX]], align 1, !alias.scope [[META0]], !noalias [[META3]]
+; CHECK-NEXT:    [[INC]] = add nuw i64 [[I_02]], 1
+; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[NUM]]
+; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]]
+; CHECK:       for.end:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %i.0 = phi i64 [ 1, %entry ], [ %inc, %for.inc ]
+  %cmp = icmp ult i64 %i.0, %num
+  br i1 %cmp, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup:                                 ; preds = %for.cond
+  br label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %arrayidx = getelementptr inbounds i8, ptr %vals, i64 %i.0
+  %sub = sub i64 %i.0, 1
+  %arrayidx1 = getelementptr inbounds i8, ptr %vals, i64 %sub
+  call void @acc(ptr noundef %arrayidx, ptr noundef %arrayidx1)
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %inc = add i64 %i.0, 1
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond.cleanup
+  ret void
+}
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
+declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #1
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
+declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #1
+
+attributes #0 = { "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87"}
+;.
+; CHECK: [[META0]] = !{[[META1:![0-9]+]]}
+; CHECK: [[META1]] = distinct !{[[META1]], [[META2:![0-9]+]], !"acc: %val"}
+; CHECK: [[META2]] = distinct !{[[META2]], !"acc"}
+; CHECK: [[META3]] = !{[[META4:![0-9]+]]}
+; CHECK: [[META4]] = distinct !{[[META4]], [[META2]], !"acc: %prev"}
+;.

@tstellar
Copy link
Collaborator

tstellar commented Feb 2, 2024

@fhahn What do you think about merging this PR to the release branch?

Copy link
Contributor

@fhahn fhahn left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM, thanks!

Should be low risk and good to have this fixed in the release

…lvm#79161)

LAA currently adds memory locations with their original AATags to AST.
However, scoped alias AATags may be valid only within one loop
iteration, while LAA reasons across iterations.

Fix this by determining which alias scopes are defined inside the loop,
and drop AATags that reference these scopes.

Fixes llvm#79137.

(cherry picked from commit cd7ea4e)
@tstellar tstellar merged commit 42b7061 into llvm:release/18.x Feb 3, 2024
2 of 9 checks passed
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
Development

Successfully merging this pull request may close these issues.

4 participants