diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp index 444598520c981a..679934d07e36d0 100644 --- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp +++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp @@ -1900,33 +1900,35 @@ bool VectorCombine::foldShuffleToIdentity(Instruction &I) { // We need each element to be the same type of value, and check that each // element has a single use. - if (all_of(drop_begin(Item), [Item](InstLane IL) { - Value *FrontV = Item.front().first->get(); - if (!IL.first) - return true; - Value *V = IL.first->get(); - if (auto *I = dyn_cast(V); I && !I->hasOneUse()) - return false; - if (V->getValueID() != FrontV->getValueID()) - return false; - if (auto *CI = dyn_cast(V)) - if (CI->getPredicate() != cast(FrontV)->getPredicate()) - return false; - if (auto *CI = dyn_cast(V)) - if (CI->getSrcTy() != cast(FrontV)->getSrcTy()) - return false; - if (auto *SI = dyn_cast(V)) - if (!isa(SI->getOperand(0)->getType()) || - SI->getOperand(0)->getType() != - cast(FrontV)->getOperand(0)->getType()) - return false; - if (isa(V) && !isa(V)) - return false; - auto *II = dyn_cast(V); - return !II || (isa(FrontV) && - II->getIntrinsicID() == - cast(FrontV)->getIntrinsicID()); - })) { + auto CheckLaneIsEquivalentToFirst = [Item](InstLane IL) { + Value *FrontV = Item.front().first->get(); + if (!IL.first) + return true; + Value *V = IL.first->get(); + if (auto *I = dyn_cast(V); I && !I->hasOneUse()) + return false; + if (V->getValueID() != FrontV->getValueID()) + return false; + if (auto *CI = dyn_cast(V)) + if (CI->getPredicate() != cast(FrontV)->getPredicate()) + return false; + if (auto *CI = dyn_cast(V)) + if (CI->getSrcTy() != cast(FrontV)->getSrcTy()) + return false; + if (auto *SI = dyn_cast(V)) + if (!isa(SI->getOperand(0)->getType()) || + SI->getOperand(0)->getType() != + cast(FrontV)->getOperand(0)->getType()) + return false; + if (isa(V) && !isa(V)) + return false; + auto *II = dyn_cast(V); + return !II || (isa(FrontV) && + II->getIntrinsicID() == + cast(FrontV)->getIntrinsicID() && + !II->hasOperandBundles()); + }; + if (all_of(drop_begin(Item), CheckLaneIsEquivalentToFirst)) { // Check the operator is one that we support. if (isa(FrontU)) { // We exclude div/rem in case they hit UB from poison lanes. @@ -1954,7 +1956,8 @@ bool VectorCombine::foldShuffleToIdentity(Instruction &I) { Worklist.push_back(generateInstLaneVectorFromOperand(Item, 2)); continue; } else if (auto *II = dyn_cast(FrontU); - II && isTriviallyVectorizable(II->getIntrinsicID())) { + II && isTriviallyVectorizable(II->getIntrinsicID()) && + !II->hasOperandBundles()) { for (unsigned Op = 0, E = II->getNumOperands() - 1; Op < E; Op++) { if (isVectorIntrinsicWithScalarOpAtArg(II->getIntrinsicID(), Op)) { if (!all_of(drop_begin(Item), [Item, Op](InstLane &IL) { diff --git a/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll b/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll index af04fb0ab4621b..0b91618da64068 100644 --- a/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll +++ b/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll @@ -1066,4 +1066,52 @@ entry: ret <2 x float> %4 } +define <16 x i64> @operandbundles(<4 x i64> %a, <4 x i64> %b, <4 x i64> %c) { +; CHECK-LABEL: @operandbundles( +; CHECK-NEXT: [[CALL:%.*]] = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> [[A:%.*]], <4 x i64> [[B:%.*]], <4 x i64> [[C:%.*]]) [ "jl_roots"(ptr addrspace(10) null, ptr addrspace(10) null) ] +; CHECK-NEXT: [[SHUFFLEVECTOR:%.*]] = shufflevector <4 x i64> [[CALL]], <4 x i64> poison, <16 x i32> +; CHECK-NEXT: [[SHUFFLEVECTOR1:%.*]] = shufflevector <16 x i64> [[SHUFFLEVECTOR]], <16 x i64> undef, <16 x i32> +; CHECK-NEXT: ret <16 x i64> [[SHUFFLEVECTOR1]] +; + %call = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a, <4 x i64> %b, <4 x i64> %c) [ "jl_roots"(ptr addrspace(10) null, ptr addrspace(10) null) ] + %shufflevector = shufflevector <4 x i64> %call, <4 x i64> poison, <16 x i32> + %shufflevector1 = shufflevector <16 x i64> %shufflevector, <16 x i64> undef, <16 x i32> + ret <16 x i64> %shufflevector1 +} + +define <8 x i8> @operandbundles_first(<8 x i8> %a) { +; CHECK-LABEL: @operandbundles_first( +; CHECK-NEXT: [[AB:%.*]] = shufflevector <8 x i8> [[A:%.*]], <8 x i8> poison, <4 x i32> +; CHECK-NEXT: [[AT:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> poison, <4 x i32> +; CHECK-NEXT: [[ABT:%.*]] = call <4 x i8> @llvm.abs.v4i8(<4 x i8> [[AT]], i1 false) [ "jl_roots"(ptr addrspace(10) null, ptr addrspace(10) null) ] +; CHECK-NEXT: [[ABB:%.*]] = call <4 x i8> @llvm.abs.v4i8(<4 x i8> [[AB]], i1 false) +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i8> [[ABT]], <4 x i8> [[ABB]], <8 x i32> +; CHECK-NEXT: ret <8 x i8> [[R]] +; + %ab = shufflevector <8 x i8> %a, <8 x i8> poison, <4 x i32> + %at = shufflevector <8 x i8> %a, <8 x i8> poison, <4 x i32> + %abt = call <4 x i8> @llvm.abs.v4i8(<4 x i8> %at, i1 false) [ "jl_roots"(ptr addrspace(10) null, ptr addrspace(10) null) ] + %abb = call <4 x i8> @llvm.abs.v4i8(<4 x i8> %ab, i1 false) + %r = shufflevector <4 x i8> %abt, <4 x i8> %abb, <8 x i32> + ret <8 x i8> %r +} + +define <8 x i8> @operandbundles_second(<8 x i8> %a) { +; CHECK-LABEL: @operandbundles_second( +; CHECK-NEXT: [[AB:%.*]] = shufflevector <8 x i8> [[A:%.*]], <8 x i8> poison, <4 x i32> +; CHECK-NEXT: [[AT:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> poison, <4 x i32> +; CHECK-NEXT: [[ABT:%.*]] = call <4 x i8> @llvm.abs.v4i8(<4 x i8> [[AT]], i1 false) +; CHECK-NEXT: [[ABB:%.*]] = call <4 x i8> @llvm.abs.v4i8(<4 x i8> [[AB]], i1 false) [ "jl_roots"(ptr addrspace(10) null, ptr addrspace(10) null) ] +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i8> [[ABT]], <4 x i8> [[ABB]], <8 x i32> +; CHECK-NEXT: ret <8 x i8> [[R]] +; + %ab = shufflevector <8 x i8> %a, <8 x i8> poison, <4 x i32> + %at = shufflevector <8 x i8> %a, <8 x i8> poison, <4 x i32> + %abt = call <4 x i8> @llvm.abs.v4i8(<4 x i8> %at, i1 false) + %abb = call <4 x i8> @llvm.abs.v4i8(<4 x i8> %ab, i1 false) [ "jl_roots"(ptr addrspace(10) null, ptr addrspace(10) null) ] + %r = shufflevector <4 x i8> %abt, <4 x i8> %abb, <8 x i32> + ret <8 x i8> %r +} + +declare <4 x i64> @llvm.fshl.v4i64(<4 x i64>, <4 x i64>, <4 x i64>) declare void @use(<4 x i8>)