From 60e4698b9aba8d9a2b27ac8a636c95ad1f7d94e0 Mon Sep 17 00:00:00 2001 From: Caroline Concatto Date: Wed, 25 Nov 2020 12:06:27 +0000 Subject: [PATCH 01/39] [CostModel]Replace FixedVectorType by VectorType in costgetIntrinsicInstrCost This patch replaces FixedVectorType by VectorType in getIntrinsicInstrCost in BasicTTIImpl.h. It re-arranges the scalable type test earlier return and add tests for scalable types. Depends on D91532 Differential Revision: https://reviews.llvm.org/D92094 --- llvm/include/llvm/CodeGen/BasicTTIImpl.h | 18 +++++++--- .../sve-getIntrinsicInstrCost-cctz-ctlz.ll | 33 +++++++++++++++++++ 2 files changed, 47 insertions(+), 4 deletions(-) create mode 100644 llvm/test/Analysis/CostModel/AArch64/sve-getIntrinsicInstrCost-cctz-ctlz.ll diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index 05c5c835d74a4..7dca7cd291c96 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -1202,14 +1202,11 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { if (ICA.isTypeBasedOnly()) return getTypeBasedIntrinsicInstrCost(ICA, CostKind); - // TODO: Handle scalable vectors? Type *RetTy = ICA.getReturnType(); - if (isa(RetTy)) - return BaseT::getIntrinsicInstrCost(ICA, CostKind); ElementCount VF = ICA.getVectorFactor(); ElementCount RetVF = - (RetTy->isVectorTy() ? cast(RetTy)->getElementCount() + (RetTy->isVectorTy() ? cast(RetTy)->getElementCount() : ElementCount::getFixed(1)); assert((RetVF.isScalar() || VF.isScalar()) && "VF > 1 and RetVF is a vector type"); @@ -1238,6 +1235,8 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { return thisT()->getMemcpyCost(ICA.getInst()); case Intrinsic::masked_scatter: { + if (isa(RetTy)) + return BaseT::getIntrinsicInstrCost(ICA, CostKind); assert(VF.isScalar() && "Can't vectorize types here."); const Value *Mask = Args[3]; bool VarMask = !isa(Mask); @@ -1247,6 +1246,8 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { VarMask, Alignment, CostKind, I); } case Intrinsic::masked_gather: { + if (isa(RetTy)) + return BaseT::getIntrinsicInstrCost(ICA, CostKind); assert(VF.isScalar() && "Can't vectorize types here."); const Value *Mask = Args[2]; bool VarMask = !isa(Mask); @@ -1265,17 +1266,23 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { case Intrinsic::vector_reduce_fmin: case Intrinsic::vector_reduce_umax: case Intrinsic::vector_reduce_umin: { + if (isa(RetTy)) + return BaseT::getIntrinsicInstrCost(ICA, CostKind); IntrinsicCostAttributes Attrs(IID, RetTy, Args[0]->getType(), FMF, 1, I); return getTypeBasedIntrinsicInstrCost(Attrs, CostKind); } case Intrinsic::vector_reduce_fadd: case Intrinsic::vector_reduce_fmul: { + if (isa(RetTy)) + return BaseT::getIntrinsicInstrCost(ICA, CostKind); IntrinsicCostAttributes Attrs( IID, RetTy, {Args[0]->getType(), Args[1]->getType()}, FMF, 1, I); return getTypeBasedIntrinsicInstrCost(Attrs, CostKind); } case Intrinsic::fshl: case Intrinsic::fshr: { + if (isa(RetTy)) + return BaseT::getIntrinsicInstrCost(ICA, CostKind); const Value *X = Args[0]; const Value *Y = Args[1]; const Value *Z = Args[2]; @@ -1316,6 +1323,9 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { return Cost; } } + // TODO: Handle the remaining intrinsic with scalable vector type + if (isa(RetTy)) + return BaseT::getIntrinsicInstrCost(ICA, CostKind); // Assume that we need to scalarize this intrinsic. SmallVector Types; diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-getIntrinsicInstrCost-cctz-ctlz.ll b/llvm/test/Analysis/CostModel/AArch64/sve-getIntrinsicInstrCost-cctz-ctlz.ll new file mode 100644 index 0000000000000..484aa2a011301 --- /dev/null +++ b/llvm/test/Analysis/CostModel/AArch64/sve-getIntrinsicInstrCost-cctz-ctlz.ll @@ -0,0 +1,33 @@ +; Checks getIntrinsicInstrCost in BasicTTIImpl.h with SVE for CTLZ and CCTZ + +; RUN: opt -cost-model -analyze -mtriple=aarch64--linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s + +; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t + +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. +; WARN-NOT: warning + +; Check for CTLZ + +define void @ctlz_nxv4i32( %A) { +; CHECK-LABEL: 'ctlz_nxv4i32' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = tail call @llvm.ctlz.nxv4i32( %A, i1 true) +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void + + %1 = tail call @llvm.ctlz.nxv4i32( %A, i1 true) + ret void +} + +; Check for CCTZ + +define void @cttz_nxv4i32( %A) { +; CHECK-LABEL: 'cttz_nxv4i32' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = tail call @llvm.cttz.nxv4i32( %A, i1 true) +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void + + %1 = tail call @llvm.cttz.nxv4i32( %A, i1 true) + ret void +} + +declare @llvm.ctlz.nxv4i32(, i1) +declare @llvm.cttz.nxv4i32(, i1) From 07217e0a1b7e40bb9c4e9953f9c7366c84d30c93 Mon Sep 17 00:00:00 2001 From: Caroline Concatto Date: Thu, 10 Dec 2020 13:52:35 +0000 Subject: [PATCH 02/39] [CostModel]Migrate getTreeCost() to use InstructionCost This patch changes the type of cost variables (for instance: Cost, ExtractCost, SpillCost) to use InstructionCost. This patch also changes the type of cost variables to InstructionCost in other functions that use the result of getTreeCost() This patch is part of a series of patches to use InstructionCost instead of unsigned/int for the cost model functions. See this thread for context: http://lists.llvm.org/pipermail/llvm-dev/2020-November/146408.html Depends on D91174 Differential Revision: https://reviews.llvm.org/D93049 --- .../Transforms/Vectorize/SLPVectorizer.cpp | 43 +++++++++++-------- 1 file changed, 24 insertions(+), 19 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index cda43521e9ba1..c0d7d078a3853 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -587,11 +587,11 @@ class BoUpSLP { /// \returns the cost incurred by unwanted spills and fills, caused by /// holding live values over call sites. - int getSpillCost() const; + InstructionCost getSpillCost() const; /// \returns the vectorization cost of the subtree that starts at \p VL. /// A negative number means that this is profitable. - int getTreeCost(); + InstructionCost getTreeCost(); /// Construct a vectorizable tree that starts at \p Roots, ignoring users for /// the purpose of scheduling and extraction in the \p UserIgnoreLst. @@ -3949,13 +3949,13 @@ bool BoUpSLP::isTreeTinyAndNotFullyVectorizable() const { return true; } -int BoUpSLP::getSpillCost() const { +InstructionCost BoUpSLP::getSpillCost() const { // Walk from the bottom of the tree to the top, tracking which values are // live. When we see a call instruction that is not part of our tree, // query TTI to see if there is a cost to keeping values live over it // (for example, if spills and fills are required). unsigned BundleWidth = VectorizableTree.front()->Scalars.size(); - int Cost = 0; + InstructionCost Cost = 0; SmallPtrSet LiveValues; Instruction *PrevInst = nullptr; @@ -4031,8 +4031,8 @@ int BoUpSLP::getSpillCost() const { return Cost; } -int BoUpSLP::getTreeCost() { - int Cost = 0; +InstructionCost BoUpSLP::getTreeCost() { + InstructionCost Cost = 0; LLVM_DEBUG(dbgs() << "SLP: Calculating cost for tree of size " << VectorizableTree.size() << ".\n"); @@ -4062,7 +4062,7 @@ int BoUpSLP::getTreeCost() { })) continue; - int C = getEntryCost(&TE); + InstructionCost C = getEntryCost(&TE); Cost += C; LLVM_DEBUG(dbgs() << "SLP: Adding cost " << C << " for bundle that starts with " << *TE.Scalars[0] @@ -4071,7 +4071,7 @@ int BoUpSLP::getTreeCost() { } SmallPtrSet ExtractCostCalculated; - int ExtractCost = 0; + InstructionCost ExtractCost = 0; for (ExternalUser &EU : ExternalUses) { // We only add extract cost once for the same scalar. if (!ExtractCostCalculated.insert(EU.Scalar).second) @@ -4101,7 +4101,7 @@ int BoUpSLP::getTreeCost() { } } - int SpillCost = getSpillCost(); + InstructionCost SpillCost = getSpillCost(); Cost += SpillCost + ExtractCost; #ifndef NDEBUG @@ -6009,10 +6009,10 @@ bool SLPVectorizerPass::vectorizeStoreChain(ArrayRef Chain, BoUpSLP &R, R.computeMinimumValueSizes(); - int Cost = R.getTreeCost(); + InstructionCost Cost = R.getTreeCost(); LLVM_DEBUG(dbgs() << "SLP: Found cost = " << Cost << " for VF =" << VF << "\n"); - if (Cost < -SLPCostThreshold) { + if (Cost.isValid() && Cost < -SLPCostThreshold) { LLVM_DEBUG(dbgs() << "SLP: Decided to vectorize cost = " << Cost << "\n"); using namespace ore; @@ -6213,7 +6213,7 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef VL, BoUpSLP &R, bool Changed = false; bool CandidateFound = false; - int MinCost = SLPCostThreshold; + InstructionCost MinCost = SLPCostThreshold.getValue(); bool CompensateUseCost = !InsertUses.empty() && llvm::all_of(InsertUses, [](const Value *V) { @@ -6269,7 +6269,7 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef VL, BoUpSLP &R, continue; R.computeMinimumValueSizes(); - int Cost = R.getTreeCost(); + InstructionCost Cost = R.getTreeCost(); CandidateFound = true; if (CompensateUseCost) { // TODO: Use TTI's getScalarizationOverhead for sequence of inserts @@ -6299,7 +6299,7 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef VL, BoUpSLP &R, // Switching to the TTI interface might help a bit. // Alternative solution could be pattern-match to detect a no-op or // shuffle. - unsigned UserCost = 0; + InstructionCost UserCost = 0; for (unsigned Lane = 0; Lane < OpsWidth; Lane++) { auto *IE = cast(InsertUses[I + Lane]); if (auto *CI = dyn_cast(IE->getOperand(2))) @@ -6311,9 +6311,9 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef VL, BoUpSLP &R, Cost -= UserCost; } - MinCost = std::min(MinCost, Cost); + MinCost = InstructionCost::min(MinCost, Cost); - if (Cost < -SLPCostThreshold) { + if (Cost.isValid() && Cost < -SLPCostThreshold) { LLVM_DEBUG(dbgs() << "SLP: Vectorizing list at cost:" << Cost << ".\n"); R.getORE()->emit(OptimizationRemark(SV_NAME, "VectorizedList", cast(Ops[0])) @@ -7088,9 +7088,14 @@ class HorizontalReduction { V.computeMinimumValueSizes(); // Estimate cost. - int TreeCost = V.getTreeCost(); - int ReductionCost = getReductionCost(TTI, ReducedVals[i], ReduxWidth); - int Cost = TreeCost + ReductionCost; + InstructionCost TreeCost = V.getTreeCost(); + InstructionCost ReductionCost = + getReductionCost(TTI, ReducedVals[i], ReduxWidth); + InstructionCost Cost = TreeCost + ReductionCost; + if (!Cost.isValid()) { + LLVM_DEBUG(dbgs() << "Encountered invalid baseline cost.\n"); + return false; + } if (Cost >= -SLPCostThreshold) { V.getORE()->emit([&]() { return OptimizationRemarkMissed(SV_NAME, "HorSLPNotBeneficial", From 6551c9ac365ca46e83354703d1a63c671a50258a Mon Sep 17 00:00:00 2001 From: ergawy Date: Wed, 16 Dec 2020 08:20:24 -0500 Subject: [PATCH 03/39] [mlir][spirv] Add parsing and printing support for SpecConstantOperation Adds more support for `SpecConstantOperation` by defining a custom syntax for the op and implementing its parsing and printing. Reviewed By: mravishankar, antiagainst Differential Revision: https://reviews.llvm.org/D92919 --- .../mlir/Dialect/SPIRV/SPIRVStructureOps.td | 37 +++--- mlir/lib/Dialect/SPIRV/SPIRVOps.cpp | 62 ++++----- mlir/test/Dialect/SPIRV/structure-ops.mlir | 121 ++---------------- 3 files changed, 59 insertions(+), 161 deletions(-) diff --git a/mlir/include/mlir/Dialect/SPIRV/SPIRVStructureOps.td b/mlir/include/mlir/Dialect/SPIRV/SPIRVStructureOps.td index b8e76c3662ece..1ae7d285cd93d 100644 --- a/mlir/include/mlir/Dialect/SPIRV/SPIRVStructureOps.td +++ b/mlir/include/mlir/Dialect/SPIRV/SPIRVStructureOps.td @@ -608,9 +608,12 @@ def SPV_SpecConstantCompositeOp : SPV_Op<"specConstantComposite", [InModuleScope let autogenSerialization = 0; } -def SPV_YieldOp : SPV_Op<"mlir.yield", [NoSideEffect, Terminator]> { - let summary = "Yields the result computed in `spv.SpecConstantOperation`'s" - "region back to the parent op."; +def SPV_YieldOp : SPV_Op<"mlir.yield", [ + HasParent<"SpecConstantOperationOp">, NoSideEffect, Terminator]> { + let summary = [{ + Yields the result computed in `spv.SpecConstantOperation`'s + region back to the parent op. + }]; let description = [{ This op is a special terminator whose only purpose is to terminate @@ -639,12 +642,16 @@ def SPV_YieldOp : SPV_Op<"mlir.yield", [NoSideEffect, Terminator]> { let autogenSerialization = 0; let assemblyFormat = "attr-dict $operand `:` type($operand)"; + + let verifier = [{ return success(); }]; } def SPV_SpecConstantOperationOp : SPV_Op<"SpecConstantOperation", [ - InFunctionScope, NoSideEffect, - IsolatedFromAbove]> { - let summary = "Declare a new specialization constant that results from doing an operation."; + NoSideEffect, InFunctionScope, + SingleBlockImplicitTerminator<"YieldOp">]> { + let summary = [{ + Declare a new specialization constant that results from doing an operation. + }]; let description = [{ This op declares a SPIR-V specialization constant that results from @@ -653,12 +660,8 @@ def SPV_SpecConstantOperationOp : SPV_Op<"SpecConstantOperation", [ In the `spv` dialect, this op is modelled as follows: ``` - spv-spec-constant-operation-op ::= `"spv.SpecConstantOperation"` - `(`ssa-id (`, ` ssa-id)`)` - `({` - ssa-id = spirv-op - `spv.mlir.yield` ssa-id - `})` `:` function-type + spv-spec-constant-operation-op ::= `spv.SpecConstantOperation` `wraps` + generic-spirv-op `:` function-type ``` In particular, an `spv.SpecConstantOperation` contains exactly one @@ -712,17 +715,15 @@ def SPV_SpecConstantOperationOp : SPV_Op<"SpecConstantOperation", [ #### Example: ```mlir %0 = spv.constant 1: i32 + %1 = spv.constant 1: i32 - %1 = "spv.SpecConstantOperation"(%0) ({ - %ret = spv.IAdd %0, %0 : i32 - spv.mlir.yield %ret : i32 - }) : (i32) -> i32 + %2 = spv.SpecConstantOperation wraps "spv.IAdd"(%0, %1) : (i32, i32) -> i32 ``` }]; - let arguments = (ins Variadic:$operands); + let arguments = (ins); - let results = (outs AnyType:$results); + let results = (outs AnyType:$result); let regions = (region SizedRegion<1>:$body); diff --git a/mlir/lib/Dialect/SPIRV/SPIRVOps.cpp b/mlir/lib/Dialect/SPIRV/SPIRVOps.cpp index 03e416e954414..43b3c517a4c62 100644 --- a/mlir/lib/Dialect/SPIRV/SPIRVOps.cpp +++ b/mlir/lib/Dialect/SPIRV/SPIRVOps.cpp @@ -3396,35 +3396,39 @@ static LogicalResult verify(spirv::SpecConstantCompositeOp constOp) { } //===----------------------------------------------------------------------===// -// spv.mlir.yield +// spv.SpecConstantOperation //===----------------------------------------------------------------------===// -static LogicalResult verify(spirv::YieldOp yieldOp) { - Operation *parentOp = yieldOp->getParentOp(); +static ParseResult parseSpecConstantOperationOp(OpAsmParser &parser, + OperationState &state) { + Region *body = state.addRegion(); - if (!parentOp || !isa(parentOp)) - return yieldOp.emitOpError( - "expected parent op to be 'spv.SpecConstantOperation'"); + if (parser.parseKeyword("wraps")) + return failure(); - Block &block = parentOp->getRegion(0).getBlocks().front(); - Operation &enclosedOp = block.getOperations().front(); + body->push_back(new Block); + Block &block = body->back(); + Operation *wrappedOp = parser.parseGenericOperation(&block, block.begin()); - if (yieldOp.getOperand().getDefiningOp() != &enclosedOp) - return yieldOp.emitOpError( - "expected operand to be defined by preceeding op"); + if (!wrappedOp) + return failure(); - return success(); -} + OpBuilder builder(parser.getBuilder().getContext()); + builder.setInsertionPointToEnd(&block); + builder.create(wrappedOp->getLoc(), wrappedOp->getResult(0)); + state.location = wrappedOp->getLoc(); -static ParseResult parseSpecConstantOperationOp(OpAsmParser &parser, - OperationState &state) { - // TODO: For now, only generic form is supported. - return failure(); + state.addTypes(wrappedOp->getResult(0).getType()); + + if (parser.parseOptionalAttrDict(state.attributes)) + return failure(); + + return success(); } static void print(spirv::SpecConstantOperationOp op, OpAsmPrinter &printer) { - // TODO - printer.printGenericOp(op); + printer << op.getOperationName() << " wraps "; + printer.printGenericOp(&op.body().front().front()); } static LogicalResult verify(spirv::SpecConstantOperationOp constOp) { @@ -3433,11 +3437,6 @@ static LogicalResult verify(spirv::SpecConstantOperationOp constOp) { if (block.getOperations().size() != 2) return constOp.emitOpError("expected exactly 2 nested ops"); - Operation &yieldOp = block.getOperations().back(); - - if (!isa(yieldOp)) - return constOp.emitOpError("expected terminator to be a yield op"); - Operation &enclosedOp = block.getOperations().front(); // TODO Add a `UsableInSpecConstantOp` trait and mark ops from the list below @@ -3457,21 +3456,12 @@ static LogicalResult verify(spirv::SpecConstantOperationOp constOp) { spirv::UGreaterThanEqualOp, spirv::SGreaterThanEqualOp>(enclosedOp)) return constOp.emitOpError("invalid enclosed op"); - if (enclosedOp.getNumOperands() != constOp.getOperands().size()) - return constOp.emitOpError("invalid number of operands; expected ") - << enclosedOp.getNumOperands() << ", actual " - << constOp.getOperands().size(); - - if (enclosedOp.getNumOperands() != constOp.getRegion().getNumArguments()) - return constOp.emitOpError("invalid number of region arguments; expected ") - << enclosedOp.getNumOperands() << ", actual " - << constOp.getRegion().getNumArguments(); - - for (auto operand : constOp.getOperands()) + for (auto operand : enclosedOp.getOperands()) if (!isa( operand.getDefiningOp())) - return constOp.emitOpError("invalid operand"); + return constOp.emitOpError( + "invalid operand, must be defined by a constant operation"); return success(); } diff --git a/mlir/test/Dialect/SPIRV/structure-ops.mlir b/mlir/test/Dialect/SPIRV/structure-ops.mlir index 89a30e23dec9f..c0b495115d6c1 100644 --- a/mlir/test/Dialect/SPIRV/structure-ops.mlir +++ b/mlir/test/Dialect/SPIRV/structure-ops.mlir @@ -757,6 +757,7 @@ spv.module Logical GLSL450 { // expected-error @+1 {{unsupported composite type}} spv.specConstantComposite @scc (@sc1) : !spv.coopmatrix<8x16xf32, Device> } + //===----------------------------------------------------------------------===// // spv.SpecConstantOperation //===----------------------------------------------------------------------===// @@ -765,34 +766,15 @@ spv.module Logical GLSL450 { spv.module Logical GLSL450 { spv.func @foo() -> i32 "None" { + // CHECK: [[LHS:%.*]] = spv.constant %0 = spv.constant 1: i32 - %2 = spv.constant 1: i32 - - %1 = "spv.SpecConstantOperation"(%0, %0) ({ - ^bb(%lhs : i32, %rhs : i32): - %ret = spv.IAdd %lhs, %rhs : i32 - spv.mlir.yield %ret : i32 - }) : (i32, i32) -> i32 - - spv.ReturnValue %1 : i32 - } -} - -// ----- - -spv.module Logical GLSL450 { - spv.func @foo() -> i32 "None" { - %0 = spv.constant 1: i32 - %2 = spv.constant 1: i32 + // CHECK: [[RHS:%.*]] = spv.constant + %1 = spv.constant 1: i32 - // expected-error @+1 {{invalid number of operands; expected 2, actual 1}} - %1 = "spv.SpecConstantOperation"(%0) ({ - ^bb(%lhs : i32, %rhs : i32): - %ret = spv.IAdd %lhs, %rhs : i32 - spv.mlir.yield %ret : i32 - }) : (i32) -> i32 + // CHECK: spv.SpecConstantOperation wraps "spv.IAdd"([[LHS]], [[RHS]]) : (i32, i32) -> i32 + %2 = spv.SpecConstantOperation wraps "spv.IAdd"(%0, %1) : (i32, i32) -> i32 - spv.ReturnValue %1 : i32 + spv.ReturnValue %2 : i32 } } @@ -801,93 +783,20 @@ spv.module Logical GLSL450 { spv.module Logical GLSL450 { spv.func @foo() -> i32 "None" { %0 = spv.constant 1: i32 - %2 = spv.constant 1: i32 - - // expected-error @+1 {{invalid number of region arguments; expected 2, actual 1}} - %1 = "spv.SpecConstantOperation"(%0, %0) ({ - ^bb(%lhs : i32): - %ret = spv.IAdd %lhs, %lhs : i32 - spv.mlir.yield %ret : i32 - }) : (i32, i32) -> i32 - - spv.ReturnValue %1 : i32 - } -} - -// ----- - -spv.module Logical GLSL450 { - spv.func @foo() -> i32 "None" { - %0 = spv.constant 1: i32 - // expected-error @+1 {{expected parent op to be 'spv.SpecConstantOperation'}} + // expected-error @+1 {{op expects parent op 'spv.SpecConstantOperation'}} spv.mlir.yield %0 : i32 } } // ----- -spv.module Logical GLSL450 { - spv.func @foo() -> i32 "None" { - %0 = spv.constant 1: i32 - - %1 = "spv.SpecConstantOperation"(%0, %0) ({ - ^bb(%lhs : i32, %rhs : i32): - %ret = spv.ISub %lhs, %rhs : i32 - // expected-error @+1 {{expected operand to be defined by preceeding op}} - spv.mlir.yield %lhs : i32 - }) : (i32, i32) -> i32 - - spv.ReturnValue %1 : i32 - } -} - -// ----- - -spv.module Logical GLSL450 { - spv.func @foo() -> i32 "None" { - %0 = spv.constant 1: i32 - - // expected-error @+1 {{expected exactly 2 nested ops}} - %1 = "spv.SpecConstantOperation"(%0, %0) ({ - ^bb(%lhs : i32, %rhs : i32): - %ret = spv.IAdd %lhs, %rhs : i32 - %ret2 = spv.IAdd %lhs, %rhs : i32 - spv.mlir.yield %ret : i32 - }) : (i32, i32) -> i32 - - spv.ReturnValue %1 : i32 - } -} - -// ----- - -spv.module Logical GLSL450 { - spv.func @foo() -> i32 "None" { - %0 = spv.constant 1: i32 - - // expected-error @+1 {{expected terminator to be a yield op}} - %1 = "spv.SpecConstantOperation"(%0, %0) ({ - ^bb(%lhs : i32, %rhs : i32): - %ret = spv.IAdd %lhs, %rhs : i32 - spv.ReturnValue %ret : i32 - }) : (i32, i32) -> i32 - - spv.ReturnValue %1 : i32 - } -} - -// ----- - spv.module Logical GLSL450 { spv.func @foo() -> () "None" { %0 = spv.Variable : !spv.ptr // expected-error @+1 {{invalid enclosed op}} - %2 = "spv.SpecConstantOperation"(%0) ({ - ^bb(%arg0 : !spv.ptr): - %ret = spv.Load "Function" %arg0 : i32 - spv.mlir.yield %ret : i32 - }) : (!spv.ptr) -> i32 + %1 = spv.SpecConstantOperation wraps "spv.Load"(%0) {memory_access = 0 : i32} : (!spv.ptr) -> i32 + spv.Return } } @@ -898,11 +807,9 @@ spv.module Logical GLSL450 { %0 = spv.Variable : !spv.ptr %1 = spv.Load "Function" %0 : i32 - // expected-error @+1 {{invalid operand}} - %2 = "spv.SpecConstantOperation"(%1, %1) ({ - ^bb(%lhs: i32, %rhs: i32): - %ret = spv.IAdd %lhs, %lhs : i32 - spv.mlir.yield %ret : i32 - }) : (i32, i32) -> i32 + // expected-error @+1 {{invalid operand, must be defined by a constant operation}} + %2 = spv.SpecConstantOperation wraps "spv.IAdd"(%1, %1) : (i32, i32) -> i32 + + spv.Return } } From e0b9c5df260c9d778be3b6ee56e6d0ffd7af9be7 Mon Sep 17 00:00:00 2001 From: Bradley Smith Date: Wed, 9 Dec 2020 11:03:54 +0000 Subject: [PATCH 04/39] [CostModel] Add costs for llvm.experimental.vector.{extract,insert} intrinsics Adds cost model support for the new llvm.experimental.vector.{extract,insert} intrinsics, using the existing getExtractSubvectorOverhead and getInsertSubvectorOverhead functions for shuffles. Previously this case would throw an assertion. Differential Revision: https://reviews.llvm.org/D93043 --- llvm/include/llvm/CodeGen/BasicTTIImpl.h | 36 +++++++++++++--- ...etIntrinsicInstrCost-vec-insert-extract.ll | 42 +++++++++++++++++++ 2 files changed, 72 insertions(+), 6 deletions(-) create mode 100644 llvm/test/Analysis/CostModel/AArch64/sve-getIntrinsicInstrCost-vec-insert-extract.ll diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index 7dca7cd291c96..02f1b73226fc4 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -114,12 +114,14 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { /// Estimate a cost of subvector extraction as a sequence of extract and /// insert operations. - unsigned getExtractSubvectorOverhead(FixedVectorType *VTy, int Index, + unsigned getExtractSubvectorOverhead(VectorType *VTy, int Index, FixedVectorType *SubVTy) { assert(VTy && SubVTy && "Can only extract subvectors from vectors"); int NumSubElts = SubVTy->getNumElements(); - assert((Index + NumSubElts) <= (int)VTy->getNumElements() && + assert((!isa(VTy) || + (Index + NumSubElts) <= + (int)cast(VTy)->getNumElements()) && "SK_ExtractSubvector index out of range"); unsigned Cost = 0; @@ -137,12 +139,14 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { /// Estimate a cost of subvector insertion as a sequence of extract and /// insert operations. - unsigned getInsertSubvectorOverhead(FixedVectorType *VTy, int Index, + unsigned getInsertSubvectorOverhead(VectorType *VTy, int Index, FixedVectorType *SubVTy) { assert(VTy && SubVTy && "Can only insert subvectors into vectors"); int NumSubElts = SubVTy->getNumElements(); - assert((Index + NumSubElts) <= (int)VTy->getNumElements() && + assert((!isa(VTy) || + (Index + NumSubElts) <= + (int)cast(VTy)->getNumElements()) && "SK_InsertSubvector index out of range"); unsigned Cost = 0; @@ -723,10 +727,10 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { case TTI::SK_PermuteTwoSrc: return getPermuteShuffleOverhead(cast(Tp)); case TTI::SK_ExtractSubvector: - return getExtractSubvectorOverhead(cast(Tp), Index, + return getExtractSubvectorOverhead(Tp, Index, cast(SubTp)); case TTI::SK_InsertSubvector: - return getInsertSubvectorOverhead(cast(Tp), Index, + return getInsertSubvectorOverhead(Tp, Index, cast(SubTp)); } llvm_unreachable("Unknown TTI::ShuffleKind"); @@ -1255,6 +1259,26 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { return thisT()->getGatherScatterOpCost(Instruction::Load, RetTy, Args[0], VarMask, Alignment, CostKind, I); } + case Intrinsic::experimental_vector_extract: { + // FIXME: Handle case where a scalable vector is extracted from a scalable + // vector + if (isa(RetTy)) + return BaseT::getIntrinsicInstrCost(ICA, CostKind); + unsigned Index = cast(Args[1])->getZExtValue(); + return thisT()->getShuffleCost(TTI::SK_ExtractSubvector, + cast(Args[0]->getType()), + Index, cast(RetTy)); + } + case Intrinsic::experimental_vector_insert: { + // FIXME: Handle case where a scalable vector is inserted into a scalable + // vector + if (isa(Args[1]->getType())) + return BaseT::getIntrinsicInstrCost(ICA, CostKind); + unsigned Index = cast(Args[2])->getZExtValue(); + return thisT()->getShuffleCost( + TTI::SK_InsertSubvector, cast(Args[0]->getType()), Index, + cast(Args[1]->getType())); + } case Intrinsic::vector_reduce_add: case Intrinsic::vector_reduce_mul: case Intrinsic::vector_reduce_and: diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-getIntrinsicInstrCost-vec-insert-extract.ll b/llvm/test/Analysis/CostModel/AArch64/sve-getIntrinsicInstrCost-vec-insert-extract.ll new file mode 100644 index 0000000000000..9523e17cb5de2 --- /dev/null +++ b/llvm/test/Analysis/CostModel/AArch64/sve-getIntrinsicInstrCost-vec-insert-extract.ll @@ -0,0 +1,42 @@ +; RUN: opt < %s -cost-model -analyze -mtriple=aarch64-linux-gnu -mattr=+sve | FileCheck %s + +define <16 x i32> @extract_cost( %vec) { +; CHECK-LABEL: 'extract_cost' +; CHECK-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %ret = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32( %vec, i64 0) +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %ret + + %ret = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32( %vec, i64 0) + ret <16 x i32> %ret +} + +define @insert_cost( %vec, <16 x i32> %subVec) { +; CHECK-LABEL: 'insert_cost' +; CHECK-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %ret = call @llvm.experimental.vector.insert.nxv4i32.v16i32( %vec, <16 x i32> %subVec, i64 0) +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret %ret + + %ret = call @llvm.experimental.vector.insert.nxv4i32.v16i32( %vec, <16 x i32> %subVec, i64 0) + ret %ret +} + +define @extract_cost_scalable( %vec) { +; CHECK-LABEL: 'extract_cost_scalable' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ret = call @llvm.experimental.vector.extract.nxv4i32.nxv16i32( %vec, i64 0) +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret %ret + + %ret = call @llvm.experimental.vector.extract.nxv4i32.nxv16i32( %vec, i64 0) + ret %ret +} + +define @insert_cost_scalable( %vec, %subVec) { +; CHECK-LABEL: 'insert_cost_scalable' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ret = call @llvm.experimental.vector.insert.nxv16i32.nxv4i32( %vec, %subVec, i64 0) +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret %ret + + %ret = call @llvm.experimental.vector.insert.nxv16i32.nxv4i32( %vec, %subVec, i64 0) + ret %ret +} + +declare <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(, i64) +declare @llvm.experimental.vector.insert.nxv4i32.v16i32(, <16 x i32>, i64) +declare @llvm.experimental.vector.extract.nxv4i32.nxv16i32(, i64) +declare @llvm.experimental.vector.insert.nxv16i32.nxv4i32(, , i64) From 75f98f0f8c813a0cffb130bc0589e4609ab09076 Mon Sep 17 00:00:00 2001 From: Stephan Bergmann Date: Wed, 16 Dec 2020 12:46:10 +0100 Subject: [PATCH 05/39] =?UTF-8?q?[PATCH]=20[compiler-rt]=C2=A0[builtins]?= =?UTF-8?q?=C2=A0Fix=20name=20of=20=5F=5Faarch64=5Fhave=5Flse=5Fatomics=20?= =?UTF-8?q?on=20Darwin?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ...where the name of that variable defined in compiler-rt/lib/builtins/cpu_model.c is decorated with a leading underscore Differential Revision: https://reviews.llvm.org/D93390 --- compiler-rt/lib/builtins/aarch64/lse.S | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/compiler-rt/lib/builtins/aarch64/lse.S b/compiler-rt/lib/builtins/aarch64/lse.S index 7a9433fd89cf9..5dc0d5320b5ab 100644 --- a/compiler-rt/lib/builtins/aarch64/lse.S +++ b/compiler-rt/lib/builtins/aarch64/lse.S @@ -28,7 +28,11 @@ .arch armv8-a #endif +#if !defined(__APPLE__) HIDDEN(__aarch64_have_lse_atomics) +#else +HIDDEN(___aarch64_have_lse_atomics) +#endif // Generate mnemonics for // L_cas: SIZE: 1,2,4,8,16 MODEL: 1,2,3,4 @@ -106,8 +110,8 @@ HIDDEN(__aarch64_have_lse_atomics) adrp x(tmp0), __aarch64_have_lse_atomics ldrb w(tmp0), [x(tmp0), :lo12:__aarch64_have_lse_atomics] #else - adrp x(tmp0), __aarch64_have_lse_atomics@page - ldrb w(tmp0), [x(tmp0), __aarch64_have_lse_atomics@pageoff] + adrp x(tmp0), ___aarch64_have_lse_atomics@page + ldrb w(tmp0), [x(tmp0), ___aarch64_have_lse_atomics@pageoff] #endif cbz w(tmp0), \label .endm From c5771a2f2df995b312a7d5dfc899d7869d6f26d1 Mon Sep 17 00:00:00 2001 From: Denis Antrushin Date: Tue, 15 Dec 2020 19:14:39 +0700 Subject: [PATCH 06/39] [Statepoints] Extract invoke tests into separate file. NFC. Extract VReg lowering tests with invokes into separate file for easier maintenance/modification. Check MIR after register allocation - at this point all transformations we're interested in has been applied and verifying of MIR is simpler than that of assembly. --- .../CodeGen/X86/statepoint-vreg-details.ll | 45 ----- .../CodeGen/X86/statepoint-vreg-invoke.ll | 156 ++++++++++++++++ llvm/test/CodeGen/X86/statepoint-vreg.ll | 171 +----------------- 3 files changed, 162 insertions(+), 210 deletions(-) create mode 100644 llvm/test/CodeGen/X86/statepoint-vreg-invoke.ll diff --git a/llvm/test/CodeGen/X86/statepoint-vreg-details.ll b/llvm/test/CodeGen/X86/statepoint-vreg-details.ll index 5f6f9c7091e40..39ee1506a39ed 100644 --- a/llvm/test/CodeGen/X86/statepoint-vreg-details.ll +++ b/llvm/test/CodeGen/X86/statepoint-vreg-details.ll @@ -16,8 +16,6 @@ declare dso_local void @consume(i32 addrspace(1)*) declare dso_local void @consume2(i32 addrspace(1)*, i32 addrspace(1)*) declare dso_local void @consume5(i32 addrspace(1)*, i32 addrspace(1)*, i32 addrspace(1)*, i32 addrspace(1)*, i32 addrspace(1)*) declare dso_local void @use1(i32 addrspace(1)*, i8 addrspace(1)*) -declare dso_local i32* @fake_personality_function() -declare dso_local i32 @foo(i32, i8 addrspace(1)*, i32, i32, i32) declare dso_local void @bar(i8 addrspace(1)*, i8 addrspace(1)*) ; test most simple relocate @@ -317,46 +315,6 @@ entry: ret void } -; Different IR Values which maps to the same SDValue must be assigned to the same VReg. -; This is test is similar to test_gcptr_uniqueing but explicitly uses invokes for which this is important -; Otherwise we may get a copy of statepoint result, inserted at the end ot statepoint block and used at landing pad -define void @test_duplicate_ir_values() gc "statepoint-example" personality i32* ()* @fake_personality_function{ -;CHECK-VREG-LABEL: name: test_duplicate_ir_values -;CHECK-VREG: bb.0.entry: -;CHECK-VREG: %0:gr64 = STATEPOINT 1, 16, 5, %8, $edi, $rsi, $edx, $ecx, $r8d, 2, 0, 2, 0, 2, 0, 2, 1, killed %1(tied-def 0), 2, 0, 2, 1, 0, 0, csr_64, implicit-def $rsp, implicit-def $ssp, implicit-def $eax -;CHECK-VREG: JMP_1 %bb.1 -;CHECK-VREG: bb.1.normal_continue: -;CHECK-VREG: MOV64mr %stack.0, 1, $noreg, 0, $noreg, %0 :: (store 8 into %stack.0) -;CHECK-VREG: %13:gr32 = MOV32ri 10 -;CHECK-VREG: $edi = COPY %13 -;CHECK-VREG: STATEPOINT 2882400000, 0, 1, @__llvm_deoptimize, $edi, 2, 0, 2, 2, 2, 2, 1, 8, %stack.0, 0, 1, 8, %stack.0, 0, 2, 0, 2, 0, 2, 0, csr_64, implicit-def $rsp, implicit-def $ssp :: (volatile load store 8 on %stack.0) -;CHECK-VREG: bb.2.exceptional_return (landing-pad): -;CHECK-VREG: EH_LABEL -;CHECK-VREG: MOV64mr %stack.0, 1, $noreg, 0, $noreg, %0 :: (store 8 into %stack.0) -;CHECK-VREG: %12:gr32 = MOV32ri -271 -;CHECK-VREG: $edi = COPY %12 -;CHECK-VREG: STATEPOINT 2882400000, 0, 1, @__llvm_deoptimize, $edi, 2, 0, 2, 0, 2, 1, 1, 8, %stack.0, 0, 2, 0, 2, 0, 2, 0, csr_64, implicit-def $rsp, implicit-def $ssp :: (volatile load store 8 on %stack.0) - -entry: - %local.0 = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* undef, align 8 - %local.9 = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* undef, align 8 - %statepoint_token1 = invoke token (i64, i32, i32 (i32, i8 addrspace(1)*, i32, i32, i32)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i32i32p1i8i32i32i32f(i64 1, i32 16, i32 (i32, i8 addrspace(1)*, i32, i32, i32)* nonnull @foo, i32 5, i32 0, i32 undef, i8 addrspace(1)* undef, i32 undef, i32 undef, i32 undef, i32 0, i32 0) [ "deopt"(), "gc-live"(i8 addrspace(1)* %local.0, i8 addrspace(1)* %local.9) ] - to label %normal_continue unwind label %exceptional_return - -normal_continue: ; preds = %entry - %local.0.relocated1 = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %statepoint_token1, i32 0, i32 0) ; (%local.0, %local.0) - %local.9.relocated1 = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %statepoint_token1, i32 1, i32 1) ; (%local.9, %local.9) - %safepoint_token2 = call token (i64, i32, void (i32)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidi32f(i64 2882400000, i32 0, void (i32)* nonnull @__llvm_deoptimize, i32 1, i32 2, i32 10, i32 0, i32 0) [ "deopt"(i8 addrspace(1)* %local.0.relocated1, i8 addrspace(1)* %local.9.relocated1), "gc-live"() ] - unreachable - -exceptional_return: ; preds = %entry - %lpad_token11090 = landingpad token - cleanup - %local.9.relocated2 = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %lpad_token11090, i32 1, i32 1) ; (%local.9, %local.9) - %safepoint_token3 = call token (i64, i32, void (i32)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidi32f(i64 2882400000, i32 0, void (i32)* nonnull @__llvm_deoptimize, i32 1, i32 0, i32 -271, i32 0, i32 0) [ "deopt"(i8 addrspace(1)* %local.9.relocated2), "gc-live"() ] - unreachable -} - ; Test that CopyFromReg emitted during ISEL processing of gc.relocate are properly ordered w.r.t. statepoint. define i8 addrspace(1)* @test_isel_sched(i8 addrspace(1)* %0, i8 addrspace(1)* %1, i32 %2) gc "statepoint-example" { ;CHECK-VREG-LABEL: name: test_isel_sched @@ -391,8 +349,5 @@ declare dso_local i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token, declare dso_local i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token, i32, i32) declare <2 x i8 addrspace(1)*> @llvm.experimental.gc.relocate.v2p1i8(token, i32, i32) declare dso_local i1 @llvm.experimental.gc.result.i1(token) -declare dso_local void @__llvm_deoptimize(i32) -declare token @llvm.experimental.gc.statepoint.p0f_isVoidi32f(i64 immarg, i32 immarg, void (i32)*, i32 immarg, i32 immarg, ...) -declare token @llvm.experimental.gc.statepoint.p0f_i32i32p1i8i32i32i32f(i64 immarg, i32 immarg, i32 (i32, i8 addrspace(1)*, i32, i32, i32)*, i32 immarg, i32 immarg, ...) declare token @llvm.experimental.gc.statepoint.p0f_isVoidp1i8p1i8f(i64 immarg, i32 immarg, void (i8 addrspace(1)*, i8 addrspace(1)*)*, i32 immarg, i32 immarg, ...) diff --git a/llvm/test/CodeGen/X86/statepoint-vreg-invoke.ll b/llvm/test/CodeGen/X86/statepoint-vreg-invoke.ll new file mode 100644 index 0000000000000..b734dca622ae0 --- /dev/null +++ b/llvm/test/CodeGen/X86/statepoint-vreg-invoke.ll @@ -0,0 +1,156 @@ +; RUN: llc -max-registers-for-gc-values=4 -stop-after virtregrewriter < %s | FileCheck %s + +target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-pc-linux-gnu" + +declare dso_local void @"some_call"(i64 addrspace(1)*) +declare dso_local i32 @foo(i32, i8 addrspace(1)*, i32, i32, i32) +declare dso_local i32* @personality_function() + +define i64 addrspace(1)* @test_basic_invoke(i64 addrspace(1)* %obj, i64 addrspace(1)* %obj1) +; CHECK-LABEL: name: test_basic_invoke +; CHECK: bb.0.entry: +; CHECK: renamable $r14, renamable $rbx = STATEPOINT 0, 0, 1, @some_call, $rdi, 2, 0, 2, 0, 2, 5, 2, 0, 2, -1, 2, 0, 2, 0, 2, 0, 2, 2, killed renamable $r14(tied-def 0), killed renamable $rbx(tied-def 1), 2, 0, 2, 2, 0, 0, 1, 1, csr_64, implicit-def $rsp, implicit-def $ssp +; CHECK: JMP_1 %bb.1 +; CHECK: bb.1.safepoint_normal_dest: +; CHECK: bb.2.normal_return: +; CHECK: $rax = COPY killed renamable $rbx +; CHECK: RET 0, $rax +; CHECK: bb.3.exceptional_return (landing-pad): +; CHECK: $rax = COPY killed renamable $r14 +; CHECK: RET 0, $rax + gc "statepoint-example" personality i32* ()* @"personality_function" { +entry: + %0 = invoke token (i64, i32, void (i64 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidp1i64f(i64 0, i32 0, void (i64 addrspace(1)*)* @some_call, i32 1, i32 0, i64 addrspace(1)* %obj, i32 0, i32 0) ["gc-live" (i64 addrspace(1)* %obj, i64 addrspace(1)* %obj1), "deopt" (i32 0, i32 -1, i32 0, i32 0, i32 0)] + to label %safepoint_normal_dest unwind label %exceptional_return + +safepoint_normal_dest: + %obj.relocated = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(token %0, i32 0, i32 0) + %obj1.relocated = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(token %0, i32 1, i32 1) + br label %normal_return + +normal_return: + ret i64 addrspace(1)* %obj.relocated + +exceptional_return: + %landing_pad = landingpad token + cleanup + %obj.relocated1 = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(token %landing_pad, i32 0, i32 0) + %obj1.relocated1 = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(token %landing_pad, i32 1, i32 1) + ret i64 addrspace(1)* %obj1.relocated1 +} + +define i64 addrspace(1)* @test_invoke_same_val(i1 %cond, i64 addrspace(1)* %val1, i64 addrspace(1)* %val2, i64 addrspace(1)* %val3) +; CHECK-LABEL: name: test_invoke_same_val +; CHECK: bb.0.entry: +; CHECK: renamable $r15 = COPY $rcx +; CHECK: renamable $rbx = COPY $rdx +; CHECK: renamable $rbp = COPY $rsi +; CHECK: renamable $r14d = COPY $edi +; CHECK: TEST8ri renamable $r14b, 1, implicit-def $eflags +; CHECK: JCC_1 %bb.3, 4, implicit killed $eflags +; CHECK: JMP_1 %bb.1 +; CHECK: bb.1.left: +; CHECK: $rdi = COPY renamable $rbp +; CHECK: renamable $rbx, renamable $rbp = STATEPOINT 0, 0, 1, @some_call, $rdi, 2, 0, 2, 0, 2, 0, 2, 2, killed renamable $rbx(tied-def 0), killed renamable $rbp(tied-def 1), 2, 0, 2, 2, 0, 0, 1, 1, csr_64, implicit-def $rsp, implicit-def $ssp +; CHECK: JMP_1 %bb.2 +; CHECK: bb.2.left.relocs: +; CHECK: JMP_1 %bb.5 +; CHECK: bb.3.right: +; CHECK: $rdi = COPY killed renamable $rbp +; CHECK: renamable $r15, renamable $rbx = STATEPOINT 0, 0, 1, @some_call, $rdi, 2, 0, 2, 0, 2, 0, 2, 2, killed renamable $r15(tied-def 0), killed renamable $rbx(tied-def 1), 2, 0, 2, 2, 0, 0, 1, 1, csr_64, implicit-def $rsp, implicit-def $ssp +; CHECK: JMP_1 %bb.4 +; CHECK: bb.4.right.relocs: +; CHECK: renamable $rbp = COPY killed renamable $r15 +; CHECK: bb.5.normal_return: +; CHECK: TEST8ri renamable $r14b, 1, implicit-def $eflags, implicit killed $r14d +; CHECK: renamable $rbp = CMOV64rr killed renamable $rbp, killed renamable $rbx, 4, implicit killed $eflags +; CHECK: $rax = COPY killed renamable $rbp +; CHECK: RET 0, $rax +; CHECK: bb.6.exceptional_return.left (landing-pad): +; CHECK: $rax = COPY killed renamable $rbp +; CHECK: RET 0, $rax +; CHECK: bb.7.exceptional_return.right (landing-pad): +; CHECK: $rax = COPY killed renamable $rbx +; CHECK: RET 0, $rax + gc "statepoint-example" personality i32* ()* @"personality_function" { +entry: + br i1 %cond, label %left, label %right + +left: + %sp1 = invoke token (i64, i32, void (i64 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidp1i64f(i64 0, i32 0, void (i64 addrspace(1)*)* @some_call, i32 1, i32 0, i64 addrspace(1)* %val1, i32 0, i32 0) ["gc-live"(i64 addrspace(1)* %val1, i64 addrspace(1)* %val2)] + to label %left.relocs unwind label %exceptional_return.left + +left.relocs: + %val1.relocated = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(token %sp1, i32 0, i32 0) + %val2.relocated_left = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(token %sp1, i32 1, i32 1) + br label %normal_return + +right: + %sp2 = invoke token (i64, i32, void (i64 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidp1i64f(i64 0, i32 0, void (i64 addrspace(1)*)* @some_call, i32 1, i32 0, i64 addrspace(1)* %val1, i32 0, i32 0) ["gc-live"(i64 addrspace(1)* %val2, i64 addrspace(1)* %val3)] + to label %right.relocs unwind label %exceptional_return.right + +right.relocs: + %val2.relocated_right = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(token %sp2, i32 0, i32 0) + %val3.relocated = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(token %sp2, i32 1, i32 1) + br label %normal_return + +normal_return: + %a1 = phi i64 addrspace(1)* [%val1.relocated, %left.relocs], [%val3.relocated, %right.relocs] + %a2 = phi i64 addrspace(1)* [%val2.relocated_left, %left.relocs], [%val2.relocated_right, %right.relocs] + %ret = select i1 %cond, i64 addrspace(1)* %a1, i64 addrspace(1)* %a2 + ret i64 addrspace(1)* %ret + +exceptional_return.left: + %landing_pad = landingpad token + cleanup + %val1.relocated2 = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(token %landing_pad, i32 0, i32 0) + ret i64 addrspace(1)* %val1.relocated2 + +exceptional_return.right: + %landing_pad1 = landingpad token + cleanup + %val2.relocated3 = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(token %landing_pad1, i32 0, i32 0) + ret i64 addrspace(1)* %val2.relocated3 +} + +define void @test_duplicate_ir_values() gc "statepoint-example" personality i32* ()* @personality_function { +; CHECK-LABEL: name: test_duplicate_ir_values +; CHECK: bb.0.entry: +; CHECK: renamable $rbx = MOV64rm undef renamable $rax, 1, $noreg, 0, $noreg :: (load 8 from `i8 addrspace(1)* addrspace(1)* undef`, addrspace 1) +; CHECK: renamable $rbx = STATEPOINT 1, 16, 5, undef renamable $rax, undef $edi, undef $rsi, undef $edx, undef $ecx, undef $r8d, 2, 0, 2, 0, 2, 0, 2, 1, killed renamable $rbx(tied-def 0), 2, 0, 2, 1, 0, 0, csr_64, implicit-def $rsp, implicit-def $ssp, implicit-def dead $eax +; CHECK: JMP_1 %bb.1 +; CHECK: bb.1.normal_continue: +; CHECK: MOV64mr %stack.0, 1, $noreg, 0, $noreg, killed renamable $rbx :: (store 8 into %stack.0) +; CHECK: $edi = MOV32ri 10 +; CHECK: STATEPOINT 2882400000, 0, 1, target-flags(x86-plt) @__llvm_deoptimize, killed $edi, 2, 0, 2, 2, 2, 2, 1, 8, %stack.0, 0, 1, 8, %stack.0, 0, 2, 0, 2, 0, 2, 0, csr_64, implicit-def $rsp, implicit-def $ssp :: (volatile load store 8 on %stack.0) +; CHECK: bb.2.exceptional_return (landing-pad): +; CHECK: MOV64mr %stack.0, 1, $noreg, 0, $noreg, killed renamable $rbx :: (store 8 into %stack.0) +; CHECK: $edi = MOV32ri -271 +; CHECK: STATEPOINT 2882400000, 0, 1, target-flags(x86-plt) @__llvm_deoptimize, killed $edi, 2, 0, 2, 0, 2, 1, 1, 8, %stack.0, 0, 2, 0, 2, 0, 2, 0, csr_64, implicit-def $rsp, implicit-def $ssp :: (volatile load store 8 on %stack.0) +entry: + %val1 = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* undef, align 8 + %val2 = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* undef, align 8 + %statepoint_token1 = invoke token (i64, i32, i32 (i32, i8 addrspace(1)*, i32, i32, i32)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i32i32p1i8i32i32i32f(i64 1, i32 16, i32 (i32, i8 addrspace(1)*, i32, i32, i32)* nonnull @foo, i32 5, i32 0, i32 undef, i8 addrspace(1)* undef, i32 undef, i32 undef, i32 undef, i32 0, i32 0) [ "deopt"(), "gc-live"(i8 addrspace(1)* %val1, i8 addrspace(1)* %val2) ] + to label %normal_continue unwind label %exceptional_return + +normal_continue: ; preds = %entry + %val1.relocated1 = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %statepoint_token1, i32 0, i32 0) ; (%val1, %val1) + %val2.relocated1 = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %statepoint_token1, i32 1, i32 1) ; (%val2, %val2) + %safepoint_token2 = call token (i64, i32, void (i32)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidi32f(i64 2882400000, i32 0, void (i32)* nonnull @__llvm_deoptimize, i32 1, i32 2, i32 10, i32 0, i32 0) [ "deopt"(i8 addrspace(1)* %val1.relocated1, i8 addrspace(1)* %val2.relocated1), "gc-live"() ] + unreachable + +exceptional_return: ; preds = %entry + %lpad_token11090 = landingpad token + cleanup + %val2.relocated2 = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %lpad_token11090, i32 1, i32 1) ; (%val2, %val2) + %safepoint_token3 = call token (i64, i32, void (i32)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidi32f(i64 2882400000, i32 0, void (i32)* nonnull @__llvm_deoptimize, i32 1, i32 0, i32 -271, i32 0, i32 0) [ "deopt"(i8 addrspace(1)* %val2.relocated2), "gc-live"() ] + unreachable +} + +declare void @__llvm_deoptimize(i32) +declare token @llvm.experimental.gc.statepoint.p0f_isVoidi32f(i64 immarg, i32 immarg, void (i32)*, i32 immarg, i32 immarg, ...) +declare token @llvm.experimental.gc.statepoint.p0f_isVoidp1i64f(i64, i32, void (i64 addrspace(1)*)*, i32, i32, ...) +declare token @llvm.experimental.gc.statepoint.p0f_i32i32p1i8i32i32i32f(i64 immarg, i32 immarg, i32 (i32, i8 addrspace(1)*, i32, i32, i32)*, i32 immarg, i32 immarg, ...) +declare i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(token, i32, i32) +declare i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token, i32, i32) diff --git a/llvm/test/CodeGen/X86/statepoint-vreg.ll b/llvm/test/CodeGen/X86/statepoint-vreg.ll index a91b5153cecb8..8f3279200ad98 100644 --- a/llvm/test/CodeGen/X86/statepoint-vreg.ll +++ b/llvm/test/CodeGen/X86/statepoint-vreg.ll @@ -6,7 +6,6 @@ target triple = "x86_64-pc-linux-gnu" declare i1 @return_i1() declare void @func() -declare void @"some_call"(i64 addrspace(1)*) declare void @consume(i32 addrspace(1)*) declare i32 @consume1(i32) gc "statepoint-example" declare void @consume2(i32 addrspace(1)*, i32 addrspace(1)*) @@ -15,7 +14,6 @@ declare float @consume4(i64) gc "statepoint-example" declare void @consume5(i32 addrspace(1)*, i32 addrspace(1)*, i32 addrspace(1)*, i32 addrspace(1)*, i32 addrspace(1)*) declare void @use1(i32 addrspace(1)*, i8 addrspace(1)*) -declare i32 @"personality_function"() ; test most simple relocate define i1 @test_relocate(i32 addrspace(1)* %a) gc "statepoint-example" { @@ -386,162 +384,6 @@ entry: ret void } -define i64 addrspace(1)* @test_basic_invoke(i64 addrspace(1)* %obj, -; CHECK-LABEL: test_basic_invoke: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: pushq %r14 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: pushq %rbx -; CHECK-NEXT: .cfi_def_cfa_offset 24 -; CHECK-NEXT: pushq %rax -; CHECK-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NEXT: .cfi_offset %rbx, -24 -; CHECK-NEXT: .cfi_offset %r14, -16 -; CHECK-NEXT: movq %rsi, %r14 -; CHECK-NEXT: movq %rdi, %rbx -; CHECK-NEXT: .Ltmp12: -; CHECK-NEXT: callq some_call -; CHECK-NEXT: .Ltmp15: -; CHECK-NEXT: .Ltmp13: -; CHECK-NEXT: # %bb.1: # %normal_return -; CHECK-NEXT: movq %rbx, %rax -; CHECK-NEXT: .LBB11_2: # %normal_return -; CHECK-NEXT: addq $8, %rsp -; CHECK-NEXT: .cfi_def_cfa_offset 24 -; CHECK-NEXT: popq %rbx -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: popq %r14 -; CHECK-NEXT: .cfi_def_cfa_offset 8 -; CHECK-NEXT: retq -; CHECK-NEXT: .LBB11_3: # %exceptional_return -; CHECK-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NEXT: .Ltmp14: -; CHECK-NEXT: movq %r14, %rax -; CHECK-NEXT: jmp .LBB11_2 - i64 addrspace(1)* %obj1) gc "statepoint-example" personality i32 ()* @"personality_function" { -entry: - %0 = invoke token (i64, i32, void (i64 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidp1i64f(i64 0, i32 0, void (i64 addrspace(1)*)* @some_call, i32 1, i32 0, i64 addrspace(1)* %obj, i32 0, i32 0) ["gc-live" (i64 addrspace(1)* %obj, i64 addrspace(1)* %obj1), "deopt" (i32 0, i32 -1, i32 0, i32 0, i32 0)] - to label %invoke_safepoint_normal_dest unwind label %exceptional_return - -invoke_safepoint_normal_dest: - %obj.relocated = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(token %0, i32 0, i32 0) - %obj1.relocated = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(token %0, i32 1, i32 1) - br label %normal_return - -normal_return: - ret i64 addrspace(1)* %obj.relocated - -exceptional_return: - %landing_pad = landingpad token - cleanup - %obj.relocated1 = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(token %landing_pad, i32 0, i32 0) - %obj1.relocated1 = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(token %landing_pad, i32 1, i32 1) - ret i64 addrspace(1)* %obj1.relocated1 -} - -define i64 addrspace(1)* @test_invoke_same_val(i1 %cond, i64 addrspace(1)* %val1, i64 addrspace(1)* %val2, i64 addrspace(1)* %val3) gc "statepoint-example" personality i32 ()* @"personality_function" { -; CHECK-LABEL: test_invoke_same_val: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: pushq %rbp -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: pushq %r15 -; CHECK-NEXT: .cfi_def_cfa_offset 24 -; CHECK-NEXT: pushq %r14 -; CHECK-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NEXT: pushq %rbx -; CHECK-NEXT: .cfi_def_cfa_offset 40 -; CHECK-NEXT: pushq %rax -; CHECK-NEXT: .cfi_def_cfa_offset 48 -; CHECK-NEXT: .cfi_offset %rbx, -40 -; CHECK-NEXT: .cfi_offset %r14, -32 -; CHECK-NEXT: .cfi_offset %r15, -24 -; CHECK-NEXT: .cfi_offset %rbp, -16 -; CHECK-NEXT: movq %rdx, %rbx -; CHECK-NEXT: movq %rsi, %rbp -; CHECK-NEXT: movl %edi, %r14d -; CHECK-NEXT: testb $1, %r14b -; CHECK-NEXT: je .LBB12_2 -; CHECK-NEXT: # %bb.1: # %left -; CHECK-NEXT: .Ltmp19: -; CHECK-NEXT: movq %rbp, %rdi -; CHECK-NEXT: callq some_call -; CHECK-NEXT: .Ltmp22: -; CHECK-NEXT: .Ltmp20: -; CHECK-NEXT: jmp .LBB12_4 -; CHECK-NEXT: .LBB12_2: # %right -; CHECK-NEXT: movq %rcx, %r15 -; CHECK-NEXT: .Ltmp16: -; CHECK-NEXT: movq %rbp, %rdi -; CHECK-NEXT: callq some_call -; CHECK-NEXT: .Ltmp23: -; CHECK-NEXT: .Ltmp17: -; CHECK-NEXT: # %bb.3: # %right.relocs -; CHECK-NEXT: movq %r15, %rbp -; CHECK-NEXT: .LBB12_4: # %normal_return -; CHECK-NEXT: testb $1, %r14b -; CHECK-NEXT: cmoveq %rbx, %rbp -; CHECK-NEXT: .LBB12_6: # %exceptional_return.left -; CHECK-NEXT: movq %rbp, %rax -; CHECK-NEXT: .LBB12_7: # %exceptional_return.left -; CHECK-NEXT: addq $8, %rsp -; CHECK-NEXT: .cfi_def_cfa_offset 40 -; CHECK-NEXT: popq %rbx -; CHECK-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NEXT: popq %r14 -; CHECK-NEXT: .cfi_def_cfa_offset 24 -; CHECK-NEXT: popq %r15 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: popq %rbp -; CHECK-NEXT: .cfi_def_cfa_offset 8 -; CHECK-NEXT: retq -; CHECK-NEXT: .LBB12_8: # %exceptional_return.right -; CHECK-NEXT: .cfi_def_cfa_offset 48 -; CHECK-NEXT: .Ltmp18: -; CHECK-NEXT: movq %rbx, %rax -; CHECK-NEXT: jmp .LBB12_7 -; CHECK-NEXT: .LBB12_5: # %exceptional_return.left -; CHECK-NEXT: .Ltmp21: -; CHECK-NEXT: jmp .LBB12_6 -entry: - br i1 %cond, label %left, label %right - -left: - %sp1 = invoke token (i64, i32, void (i64 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidp1i64f(i64 0, i32 0, void (i64 addrspace(1)*)* @some_call, i32 1, i32 0, i64 addrspace(1)* %val1, i32 0, i32 0) ["gc-live"(i64 addrspace(1)* %val1, i64 addrspace(1)* %val2)] - to label %left.relocs unwind label %exceptional_return.left - -left.relocs: - %val1.relocated = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(token %sp1, i32 0, i32 0) - %val2.relocated_left = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(token %sp1, i32 1, i32 1) - br label %normal_return - -right: - %sp2 = invoke token (i64, i32, void (i64 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidp1i64f(i64 0, i32 0, void (i64 addrspace(1)*)* @some_call, i32 1, i32 0, i64 addrspace(1)* %val1, i32 0, i32 0) ["gc-live"(i64 addrspace(1)* %val2, i64 addrspace(1)* %val3)] - to label %right.relocs unwind label %exceptional_return.right - -right.relocs: - %val2.relocated_right = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(token %sp2, i32 0, i32 0) - %val3.relocated = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(token %sp2, i32 1, i32 1) - br label %normal_return - -normal_return: - %a1 = phi i64 addrspace(1)* [%val1.relocated, %left.relocs], [%val3.relocated, %right.relocs] - %a2 = phi i64 addrspace(1)* [%val2.relocated_left, %left.relocs], [%val2.relocated_right, %right.relocs] - %ret = select i1 %cond, i64 addrspace(1)* %a1, i64 addrspace(1)* %a2 - ret i64 addrspace(1)* %ret - -exceptional_return.left: - %landing_pad = landingpad token - cleanup - %val.relocated2 = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(token %landing_pad, i32 0, i32 0) - ret i64 addrspace(1)* %val.relocated2 - -exceptional_return.right: - %landing_pad1 = landingpad token - cleanup - %val.relocated3 = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(token %landing_pad1, i32 0, i32 0) - ret i64 addrspace(1)* %val.relocated3 -} - ; test ISEL for constant base pointer - must properly tie operands define void @test_const_base(i32 addrspace(1)* %a) gc "statepoint-example" { ; CHECK-LABEL: test_const_base: @@ -551,7 +393,7 @@ define void @test_const_base(i32 addrspace(1)* %a) gc "statepoint-example" { ; CHECK-NEXT: .cfi_offset %rbx, -16 ; CHECK-NEXT: movq %rdi, %rbx ; CHECK-NEXT: callq func -; CHECK-NEXT: .Ltmp24: +; CHECK-NEXT: .Ltmp12: ; CHECK-NEXT: movq %rbx, %rdi ; CHECK-NEXT: callq consume ; CHECK-NEXT: popq %rbx @@ -581,12 +423,12 @@ define void @test_sched(float %0, i32 %1, i8 addrspace(1)* %2) gc "statepoint-ex ; CHECK-NEXT: movl %edi, %ebp ; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-NEXT: callq consume3 -; CHECK-NEXT: .Ltmp25: +; CHECK-NEXT: .Ltmp13: ; CHECK-NEXT: xorps %xmm0, %xmm0 ; CHECK-NEXT: cvtsi2sd %ebp, %xmm0 ; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; CHECK-NEXT: nopl 8(%rax,%rax) -; CHECK-NEXT: .Ltmp26: +; CHECK-NEXT: .Ltmp14: ; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload ; CHECK-NEXT: # xmm0 = mem[0],zero ; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp) @@ -594,7 +436,7 @@ define void @test_sched(float %0, i32 %1, i8 addrspace(1)* %2) gc "statepoint-ex ; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero ; CHECK-NEXT: movss %xmm0, (%rsp) ; CHECK-NEXT: nopl 8(%rax,%rax) -; CHECK-NEXT: .Ltmp27: +; CHECK-NEXT: .Ltmp15: ; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload ; CHECK-NEXT: # xmm0 = mem[0],zero ; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp) @@ -602,7 +444,7 @@ define void @test_sched(float %0, i32 %1, i8 addrspace(1)* %2) gc "statepoint-ex ; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero ; CHECK-NEXT: movss %xmm0, (%rsp) ; CHECK-NEXT: nopl 8(%rax,%rax) -; CHECK-NEXT: .Ltmp28: +; CHECK-NEXT: .Ltmp16: ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: xorpd %xmm0, %xmm0 ; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload @@ -615,7 +457,7 @@ define void @test_sched(float %0, i32 %1, i8 addrspace(1)* %2) gc "statepoint-ex ; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero ; CHECK-NEXT: movss %xmm0, (%rsp) ; CHECK-NEXT: nopl 8(%rax,%rax) -; CHECK-NEXT: .Ltmp29: +; CHECK-NEXT: .Ltmp17: ; CHECK-NEXT: addq $24, %rsp ; CHECK-NEXT: .cfi_def_cfa_offset 24 ; CHECK-NEXT: popq %rbx @@ -646,7 +488,6 @@ declare token @llvm.experimental.gc.statepoint.p0f_i32i32f(i64 immarg, i32 immar declare token @llvm.experimental.gc.statepoint.p0f_isVoidf32f(i64 immarg, i32 immarg, void (float)*, i32 immarg, i32 immarg, ...) declare token @llvm.experimental.gc.statepoint.p0f_i1f(i64, i32, i1 ()*, i32, i32, ...) declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...) -declare token @llvm.experimental.gc.statepoint.p0f_isVoidp1i64f(i64, i32, void (i64 addrspace(1)*)*, i32, i32, ...) declare i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token, i32, i32) declare i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(token, i32, i32) declare i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token, i32, i32) From be9184bc557ae4000cd785fe369347817e5cbad4 Mon Sep 17 00:00:00 2001 From: Caroline Concatto Date: Thu, 10 Dec 2020 17:23:46 +0000 Subject: [PATCH 07/39] [SLPVectorizer]Migrate getEntryCost to return InstructionCost This patch also changes: the return type of getGatherCost and the signature of the debug function dumpTreeCosts to use InstructionCost. This patch is part of a series of patches to use InstructionCost instead of unsigned/int for the cost model functions. See this thread for context: http://lists.llvm.org/pipermail/llvm-dev/2020-November/146408.html See this patch for the introduction of the type: https://reviews.llvm.org/D91174 Depends on D93049 Differential Revision: https://reviews.llvm.org/D93127 --- .../Transforms/Vectorize/SLPVectorizer.cpp | 111 +++++++++--------- 1 file changed, 58 insertions(+), 53 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index c0d7d078a3853..9ab89e0915963 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -79,6 +79,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/GraphWriter.h" +#include "llvm/Support/InstructionCost.h" #include "llvm/Support/KnownBits.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" @@ -1493,7 +1494,7 @@ class BoUpSLP { bool areAllUsersVectorized(Instruction *I) const; /// \returns the cost of the vectorizable entry. - int getEntryCost(TreeEntry *E); + InstructionCost getEntryCost(TreeEntry *E); /// This is the recursive part of buildTree. void buildTree_rec(ArrayRef Roots, unsigned Depth, @@ -1515,13 +1516,14 @@ class BoUpSLP { /// \returns the scalarization cost for this type. Scalarization in this /// context means the creation of vectors from a group of scalars. - int getGatherCost(FixedVectorType *Ty, - const DenseSet &ShuffledIndices) const; + InstructionCost + getGatherCost(FixedVectorType *Ty, + const DenseSet &ShuffledIndices) const; /// \returns the scalarization cost for this list of values. Assuming that /// this subtree gets vectorized, we may need to extract the values from the /// roots. This method calculates the cost of extracting the values. - int getGatherCost(ArrayRef VL) const; + InstructionCost getGatherCost(ArrayRef VL) const; /// Set the Builder insert point to one after the last instruction in /// the bundle @@ -1755,8 +1757,9 @@ class BoUpSLP { }; #ifndef NDEBUG - void dumpTreeCosts(TreeEntry *E, int ReuseShuffleCost, int VecCost, - int ScalarCost) const { + void dumpTreeCosts(TreeEntry *E, InstructionCost ReuseShuffleCost, + InstructionCost VecCost, + InstructionCost ScalarCost) const { dbgs() << "SLP: Calculated costs for Tree:\n"; E->dump(); dbgs() << "SLP: Costs:\n"; dbgs() << "SLP: ReuseShuffleCost = " << ReuseShuffleCost << "\n"; @@ -3423,7 +3426,7 @@ getVectorCallCosts(CallInst *CI, FixedVectorType *VecTy, return {IntrinsicCost, LibCost}; } -int BoUpSLP::getEntryCost(TreeEntry *E) { +InstructionCost BoUpSLP::getEntryCost(TreeEntry *E) { ArrayRef VL = E->Scalars; Type *ScalarTy = VL[0]->getType(); @@ -3442,7 +3445,7 @@ int BoUpSLP::getEntryCost(TreeEntry *E) { unsigned ReuseShuffleNumbers = E->ReuseShuffleIndices.size(); bool NeedToShuffleReuses = !E->ReuseShuffleIndices.empty(); - int ReuseShuffleCost = 0; + InstructionCost ReuseShuffleCost = 0; if (NeedToShuffleReuses) { ReuseShuffleCost = TTI->getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, VecTy); @@ -3458,7 +3461,8 @@ int BoUpSLP::getEntryCost(TreeEntry *E) { allSameType(VL) && allSameBlock(VL)) { Optional ShuffleKind = isShuffle(VL); if (ShuffleKind.hasValue()) { - int Cost = TTI->getShuffleCost(ShuffleKind.getValue(), VecTy); + InstructionCost Cost = + TTI->getShuffleCost(ShuffleKind.getValue(), VecTy); for (auto *V : VL) { // If all users of instruction are going to be vectorized and this // instruction itself is not going to be vectorized, consider this @@ -3490,7 +3494,7 @@ int BoUpSLP::getEntryCost(TreeEntry *E) { case Instruction::ExtractValue: case Instruction::ExtractElement: { - int DeadCost = 0; + InstructionCost DeadCost = 0; if (NeedToShuffleReuses) { unsigned Idx = 0; for (unsigned I : E->ReuseShuffleIndices) { @@ -3565,7 +3569,7 @@ int BoUpSLP::getEntryCost(TreeEntry *E) { case Instruction::FPTrunc: case Instruction::BitCast: { Type *SrcTy = VL0->getOperand(0)->getType(); - int ScalarEltCost = + InstructionCost ScalarEltCost = TTI->getCastInstrCost(E->getOpcode(), ScalarTy, SrcTy, TTI::getCastContextHint(VL0), CostKind, VL0); if (NeedToShuffleReuses) { @@ -3573,10 +3577,10 @@ int BoUpSLP::getEntryCost(TreeEntry *E) { } // Calculate the cost of this instruction. - int ScalarCost = VL.size() * ScalarEltCost; + InstructionCost ScalarCost = VL.size() * ScalarEltCost; auto *SrcVecTy = FixedVectorType::get(SrcTy, VL.size()); - int VecCost = 0; + InstructionCost VecCost = 0; // Check if the values are candidates to demote. if (!MinBWs.count(VL0) || VecTy != SrcVecTy) { VecCost = @@ -3591,14 +3595,14 @@ int BoUpSLP::getEntryCost(TreeEntry *E) { case Instruction::ICmp: case Instruction::Select: { // Calculate the cost of this instruction. - int ScalarEltCost = + InstructionCost ScalarEltCost = TTI->getCmpSelInstrCost(E->getOpcode(), ScalarTy, Builder.getInt1Ty(), CmpInst::BAD_ICMP_PREDICATE, CostKind, VL0); if (NeedToShuffleReuses) { ReuseShuffleCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost; } auto *MaskTy = FixedVectorType::get(Builder.getInt1Ty(), VL.size()); - int ScalarCost = VecTy->getNumElements() * ScalarEltCost; + InstructionCost ScalarCost = VecTy->getNumElements() * ScalarEltCost; // Check if all entries in VL are either compares or selects with compares // as condition that have the same predicates. @@ -3617,8 +3621,8 @@ int BoUpSLP::getEntryCost(TreeEntry *E) { VecPred = CurrentPred; } - int VecCost = TTI->getCmpSelInstrCost(E->getOpcode(), VecTy, MaskTy, - VecPred, CostKind, VL0); + InstructionCost VecCost = TTI->getCmpSelInstrCost( + E->getOpcode(), VecTy, MaskTy, VecPred, CostKind, VL0); // Check if it is possible and profitable to use min/max for selects in // VL. // @@ -3626,7 +3630,8 @@ int BoUpSLP::getEntryCost(TreeEntry *E) { if (IntrinsicAndUse.first != Intrinsic::not_intrinsic) { IntrinsicCostAttributes CostAttrs(IntrinsicAndUse.first, VecTy, {VecTy, VecTy}); - int IntrinsicCost = TTI->getIntrinsicInstrCost(CostAttrs, CostKind); + InstructionCost IntrinsicCost = + TTI->getIntrinsicInstrCost(CostAttrs, CostKind); // If the selects are the only uses of the compares, they will be dead // and we can adjust the cost by removing their cost. if (IntrinsicAndUse.second) @@ -3695,16 +3700,16 @@ int BoUpSLP::getEntryCost(TreeEntry *E) { } SmallVector Operands(VL0->operand_values()); - int ScalarEltCost = TTI->getArithmeticInstrCost( - E->getOpcode(), ScalarTy, CostKind, Op1VK, Op2VK, Op1VP, Op2VP, - Operands, VL0); + InstructionCost ScalarEltCost = + TTI->getArithmeticInstrCost(E->getOpcode(), ScalarTy, CostKind, Op1VK, + Op2VK, Op1VP, Op2VP, Operands, VL0); if (NeedToShuffleReuses) { ReuseShuffleCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost; } - int ScalarCost = VecTy->getNumElements() * ScalarEltCost; - int VecCost = TTI->getArithmeticInstrCost( - E->getOpcode(), VecTy, CostKind, Op1VK, Op2VK, Op1VP, Op2VP, - Operands, VL0); + InstructionCost ScalarCost = VecTy->getNumElements() * ScalarEltCost; + InstructionCost VecCost = + TTI->getArithmeticInstrCost(E->getOpcode(), VecTy, CostKind, Op1VK, + Op2VK, Op1VP, Op2VP, Operands, VL0); LLVM_DEBUG(dumpTreeCosts(E, ReuseShuffleCost, VecCost, ScalarCost)); return ReuseShuffleCost + VecCost - ScalarCost; } @@ -3714,30 +3719,27 @@ int BoUpSLP::getEntryCost(TreeEntry *E) { TargetTransformInfo::OperandValueKind Op2VK = TargetTransformInfo::OK_UniformConstantValue; - int ScalarEltCost = - TTI->getArithmeticInstrCost(Instruction::Add, ScalarTy, CostKind, - Op1VK, Op2VK); + InstructionCost ScalarEltCost = TTI->getArithmeticInstrCost( + Instruction::Add, ScalarTy, CostKind, Op1VK, Op2VK); if (NeedToShuffleReuses) { ReuseShuffleCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost; } - int ScalarCost = VecTy->getNumElements() * ScalarEltCost; - int VecCost = - TTI->getArithmeticInstrCost(Instruction::Add, VecTy, CostKind, - Op1VK, Op2VK); + InstructionCost ScalarCost = VecTy->getNumElements() * ScalarEltCost; + InstructionCost VecCost = TTI->getArithmeticInstrCost( + Instruction::Add, VecTy, CostKind, Op1VK, Op2VK); LLVM_DEBUG(dumpTreeCosts(E, ReuseShuffleCost, VecCost, ScalarCost)); return ReuseShuffleCost + VecCost - ScalarCost; } case Instruction::Load: { // Cost of wide load - cost of scalar loads. Align alignment = cast(VL0)->getAlign(); - int ScalarEltCost = - TTI->getMemoryOpCost(Instruction::Load, ScalarTy, alignment, 0, - CostKind, VL0); + InstructionCost ScalarEltCost = TTI->getMemoryOpCost( + Instruction::Load, ScalarTy, alignment, 0, CostKind, VL0); if (NeedToShuffleReuses) { ReuseShuffleCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost; } - int ScalarLdCost = VecTy->getNumElements() * ScalarEltCost; - int VecLdCost; + InstructionCost ScalarLdCost = VecTy->getNumElements() * ScalarEltCost; + InstructionCost VecLdCost; if (E->State == TreeEntry::Vectorize) { VecLdCost = TTI->getMemoryOpCost(Instruction::Load, VecTy, alignment, 0, CostKind, VL0); @@ -3759,12 +3761,11 @@ int BoUpSLP::getEntryCost(TreeEntry *E) { auto *SI = cast(IsReorder ? VL[E->ReorderIndices.front()] : VL0); Align Alignment = SI->getAlign(); - int ScalarEltCost = - TTI->getMemoryOpCost(Instruction::Store, ScalarTy, Alignment, 0, - CostKind, VL0); - int ScalarStCost = VecTy->getNumElements() * ScalarEltCost; - int VecStCost = TTI->getMemoryOpCost(Instruction::Store, - VecTy, Alignment, 0, CostKind, VL0); + InstructionCost ScalarEltCost = TTI->getMemoryOpCost( + Instruction::Store, ScalarTy, Alignment, 0, CostKind, VL0); + InstructionCost ScalarStCost = VecTy->getNumElements() * ScalarEltCost; + InstructionCost VecStCost = TTI->getMemoryOpCost( + Instruction::Store, VecTy, Alignment, 0, CostKind, VL0); if (IsReorder) VecStCost += TTI->getShuffleCost( TargetTransformInfo::SK_PermuteSingleSrc, VecTy); @@ -3777,14 +3778,16 @@ int BoUpSLP::getEntryCost(TreeEntry *E) { // Calculate the cost of the scalar and vector calls. IntrinsicCostAttributes CostAttrs(ID, *CI, ElementCount::getFixed(1), 1); - int ScalarEltCost = TTI->getIntrinsicInstrCost(CostAttrs, CostKind); + InstructionCost ScalarEltCost = + TTI->getIntrinsicInstrCost(CostAttrs, CostKind); if (NeedToShuffleReuses) { ReuseShuffleCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost; } - int ScalarCallCost = VecTy->getNumElements() * ScalarEltCost; + InstructionCost ScalarCallCost = VecTy->getNumElements() * ScalarEltCost; auto VecCallCosts = getVectorCallCosts(CI, VecTy, TTI, TLI); - int VecCallCost = std::min(VecCallCosts.first, VecCallCosts.second); + InstructionCost VecCallCost = + std::min(VecCallCosts.first, VecCallCosts.second); LLVM_DEBUG(dbgs() << "SLP: Call cost " << VecCallCost - ScalarCallCost << " (" << VecCallCost << "-" << ScalarCallCost << ")" @@ -3799,7 +3802,7 @@ int BoUpSLP::getEntryCost(TreeEntry *E) { (Instruction::isCast(E->getOpcode()) && Instruction::isCast(E->getAltOpcode()))) && "Invalid Shuffle Vector Operand"); - int ScalarCost = 0; + InstructionCost ScalarCost = 0; if (NeedToShuffleReuses) { for (unsigned Idx : E->ReuseShuffleIndices) { Instruction *I = cast(VL[Idx]); @@ -3823,7 +3826,7 @@ int BoUpSLP::getEntryCost(TreeEntry *E) { } // VecCost is equal to sum of the cost of creating 2 vectors // and the cost of creating shuffle. - int VecCost = 0; + InstructionCost VecCost = 0; if (Instruction::isBinaryOp(E->getOpcode())) { VecCost = TTI->getArithmeticInstrCost(E->getOpcode(), VecTy, CostKind); VecCost += TTI->getArithmeticInstrCost(E->getAltOpcode(), VecTy, @@ -4120,21 +4123,23 @@ InstructionCost BoUpSLP::getTreeCost() { return Cost; } -int BoUpSLP::getGatherCost(FixedVectorType *Ty, - const DenseSet &ShuffledIndices) const { +InstructionCost +BoUpSLP::getGatherCost(FixedVectorType *Ty, + const DenseSet &ShuffledIndices) const { unsigned NumElts = Ty->getNumElements(); APInt DemandedElts = APInt::getNullValue(NumElts); for (unsigned I = 0; I < NumElts; ++I) if (!ShuffledIndices.count(I)) DemandedElts.setBit(I); - int Cost = TTI->getScalarizationOverhead(Ty, DemandedElts, /*Insert*/ true, - /*Extract*/ false); + InstructionCost Cost = + TTI->getScalarizationOverhead(Ty, DemandedElts, /*Insert*/ true, + /*Extract*/ false); if (!ShuffledIndices.empty()) Cost += TTI->getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, Ty); return Cost; } -int BoUpSLP::getGatherCost(ArrayRef VL) const { +InstructionCost BoUpSLP::getGatherCost(ArrayRef VL) const { // Find the type of the operands in VL. Type *ScalarTy = VL[0]->getType(); if (StoreInst *SI = dyn_cast(VL[0])) From a1e1dcabe4fb1f24401f3153409583fe133ffd46 Mon Sep 17 00:00:00 2001 From: diggerlin Date: Wed, 16 Dec 2020 09:34:59 -0500 Subject: [PATCH 08/39] [XCOFF][AIX] Emit EH information in traceback table SUMMARY: In order for the runtime on AIX to find the compact unwind section(EHInfo table), we would need to set the following on the traceback table: The 6th byte's longtbtable field to true to signal there is an Extended TB Table Flag. The Extended TB Table Flag to be 0x08 to signal there is an exception handling info presents. Emit the offset between ehinfo TC entry and TOC base after all other optional portions of traceback table. The patch is authored by Jason Liu. Reviewers: David Tenty, Digger Lin Differential Revision: https://reviews.llvm.org/D92766 --- llvm/include/llvm/BinaryFormat/XCOFF.h | 12 ++++--- .../CodeGen/TargetLoweringObjectFileImpl.h | 5 +++ llvm/lib/BinaryFormat/XCOFF.cpp | 25 ++++++++++++++ llvm/lib/CodeGen/AsmPrinter/AIXException.cpp | 23 ++++++------- .../CodeGen/TargetLoweringObjectFileImpl.cpp | 23 +++++++++++++ llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp | 33 +++++++++++++++++++ llvm/test/CodeGen/PowerPC/aix-exception.ll | 26 +++++++++++++++ 7 files changed, 129 insertions(+), 18 deletions(-) diff --git a/llvm/include/llvm/BinaryFormat/XCOFF.h b/llvm/include/llvm/BinaryFormat/XCOFF.h index f2e11efef5cd1..48e1baf72689a 100644 --- a/llvm/include/llvm/BinaryFormat/XCOFF.h +++ b/llvm/include/llvm/BinaryFormat/XCOFF.h @@ -395,14 +395,16 @@ struct TracebackTable { // Extended Traceback table flags. enum ExtendedTBTableFlag : uint8_t { - TB_OS1 = 0x80, ///< Reserved for OS use - TB_RESERVED = 0x40, ///< Reserved for compiler - TB_SSP_CANARY = 0x20, ///< stack smasher canary present on stack - TB_OS2 = 0x10, ///< Reserved for OS use - TB_LONGTBTABLE2 = 0x01 ///< Additional tbtable extension exists + TB_OS1 = 0x80, ///< Reserved for OS use. + TB_RESERVED = 0x40, ///< Reserved for compiler. + TB_SSP_CANARY = 0x20, ///< stack smasher canary present on stack. + TB_OS2 = 0x10, ///< Reserved for OS use. + TB_EH_INFO = 0x08, ///< Exception handling info present. + TB_LONGTBTABLE2 = 0x01 ///< Additional tbtable extension exists. }; StringRef getNameForTracebackTableLanguageId(TracebackTable::LanguageID LangId); +SmallString<32> getExtendedTBTableFlagString(uint8_t Flag); } // end namespace XCOFF } // end namespace llvm diff --git a/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h b/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h index a7389bb213716..31e08b7d1e634 100644 --- a/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h +++ b/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h @@ -21,6 +21,7 @@ namespace llvm { class GlobalValue; class MachineModuleInfo; +class MachineFunction; class MCContext; class MCExpr; class MCSection; @@ -219,6 +220,10 @@ class TargetLoweringObjectFileXCOFF : public TargetLoweringObjectFile { TargetLoweringObjectFileXCOFF() = default; ~TargetLoweringObjectFileXCOFF() override = default; + static bool ShouldEmitEHBlock(const MachineFunction *MF); + + static MCSymbol *getEHInfoTableSymbol(const MachineFunction *MF); + void Initialize(MCContext &Ctx, const TargetMachine &TM) override; bool shouldPutJumpTableInFunctionSection(bool UsesLabelDifference, diff --git a/llvm/lib/BinaryFormat/XCOFF.cpp b/llvm/lib/BinaryFormat/XCOFF.cpp index fda4595971f36..0f270a5cea1b9 100644 --- a/llvm/lib/BinaryFormat/XCOFF.cpp +++ b/llvm/lib/BinaryFormat/XCOFF.cpp @@ -128,4 +128,29 @@ SmallString<32> XCOFF::parseParmsType(uint32_t Value, unsigned ParmsNum) { return ParmsType; } +SmallString<32> XCOFF::getExtendedTBTableFlagString(uint8_t Flag) { + SmallString<32> Res; + + if (Flag & ExtendedTBTableFlag::TB_OS1) + Res += "TB_OS1 "; + if (Flag & ExtendedTBTableFlag::TB_RESERVED) + Res += "TB_RESERVED "; + if (Flag & ExtendedTBTableFlag::TB_SSP_CANARY) + Res += "TB_SSP_CANARY "; + if (Flag & ExtendedTBTableFlag::TB_OS2) + Res += "TB_OS2 "; + if (Flag & ExtendedTBTableFlag::TB_EH_INFO) + Res += "TB_EH_INFO "; + if (Flag & ExtendedTBTableFlag::TB_LONGTBTABLE2) + Res += "TB_LONGTBTABLE2 "; + + // Two of the bits that haven't got used in the mask. + if (Flag & 0x06) + Res += "Unknown "; + + // Pop the last space. + Res.pop_back(); + return Res; +} + #undef RELOC_CASE diff --git a/llvm/lib/CodeGen/AsmPrinter/AIXException.cpp b/llvm/lib/CodeGen/AsmPrinter/AIXException.cpp index 21615d47f634d..95d878e65be41 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AIXException.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AIXException.cpp @@ -13,6 +13,7 @@ #include "DwarfException.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/MC/MCSectionXCOFF.h" #include "llvm/MC/MCStreamer.h" #include "llvm/Target/TargetLoweringObjectFile.h" @@ -37,8 +38,8 @@ void AIXException::emitExceptionInfoTable(const MCSymbol *LSDA, Asm->OutStreamer->SwitchSection( Asm->getObjFileLowering().getCompactUnwindSection()); - MCSymbol *EHInfoLabel = MMI->getContext().getOrCreateSymbol( - "__ehinfo." + Twine(Asm->getFunctionNumber())); + MCSymbol *EHInfoLabel = + TargetLoweringObjectFileXCOFF::getEHInfoTableSymbol(Asm->MF); Asm->OutStreamer->emitLabel(EHInfoLabel); // Version number. @@ -60,20 +61,16 @@ void AIXException::emitExceptionInfoTable(const MCSymbol *LSDA, } void AIXException::endFunction(const MachineFunction *MF) { - const Function &F = MF->getFunction(); - bool HasLandingPads = !MF->getLandingPads().empty(); - const Function *Per = nullptr; - if (F.hasPersonalityFn()) - Per = dyn_cast(F.getPersonalityFn()->stripPointerCasts()); - bool EmitEHBlock = - HasLandingPads || (F.hasPersonalityFn() && - !isNoOpWithoutInvoke(classifyEHPersonality(Per)) && - F.needsUnwindTableEntry()); - - if (!EmitEHBlock) + if (!TargetLoweringObjectFileXCOFF::ShouldEmitEHBlock(MF)) return; const MCSymbol *LSDALabel = emitExceptionTable(); + + const Function &F = MF->getFunction(); + assert(F.hasPersonalityFn() && + "Landingpads are presented, but no personality routine is found."); + const Function *Per = + dyn_cast(F.getPersonalityFn()->stripPointerCasts()); const MCSymbol *PerSym = Asm->TM.getSymbol(Per); emitExceptionInfoTable(LSDALabel, PerSym); diff --git a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index b4d88fcb0702f..6567aaaa3d829 100644 --- a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -2054,6 +2054,29 @@ MCSection *TargetLoweringObjectFileWasm::getStaticDtorSection( //===----------------------------------------------------------------------===// // XCOFF //===----------------------------------------------------------------------===// +bool TargetLoweringObjectFileXCOFF::ShouldEmitEHBlock( + const MachineFunction *MF) { + if (!MF->getLandingPads().empty()) + return true; + + const Function &F = MF->getFunction(); + if (!F.hasPersonalityFn() || !F.needsUnwindTableEntry()) + return false; + + const Function *Per = + dyn_cast(F.getPersonalityFn()->stripPointerCasts()); + if (isNoOpWithoutInvoke(classifyEHPersonality(Per))) + return false; + + return true; +} + +MCSymbol * +TargetLoweringObjectFileXCOFF::getEHInfoTableSymbol(const MachineFunction *MF) { + return MF->getMMI().getContext().getOrCreateSymbol( + "__ehinfo." + Twine(MF->getFunctionNumber())); +} + MCSymbol * TargetLoweringObjectFileXCOFF::getTargetSymbol(const GlobalValue *GV, const TargetMachine &TM) const { diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp index 3c6db3002f63a..87c4d1bd3ebc4 100644 --- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -1881,6 +1881,10 @@ void PPCAIXAsmPrinter::emitTracebackTable() { (SecondHalfOfMandatoryField & 0xff000000) >> 24, 1); // Set the 6th byte of mandatory field. + bool ShouldEmitEHBlock = TargetLoweringObjectFileXCOFF::ShouldEmitEHBlock(MF); + if (ShouldEmitEHBlock) + SecondHalfOfMandatoryField |= TracebackTable::HasExtensionTableMask; + uint32_t GPRSaved = 0; // X13 is reserved under 64-bit environment. @@ -1977,6 +1981,35 @@ void PPCAIXAsmPrinter::emitTracebackTable() { OutStreamer->AddComment("AllocaUsed"); OutStreamer->emitIntValueInHex(AllocReg, sizeof(AllocReg)); } + + uint8_t ExtensionTableFlag = 0; + if (SecondHalfOfMandatoryField & TracebackTable::HasExtensionTableMask) { + if (ShouldEmitEHBlock) + ExtensionTableFlag |= ExtendedTBTableFlag::TB_EH_INFO; + + CommentOS << "ExtensionTableFlag = " + << getExtendedTBTableFlagString(ExtensionTableFlag); + EmitCommentAndValue(ExtensionTableFlag, sizeof(ExtensionTableFlag)); + } + + if (ExtensionTableFlag & ExtendedTBTableFlag::TB_EH_INFO) { + auto &Ctx = OutStreamer->getContext(); + MCSymbol *EHInfoSym = + TargetLoweringObjectFileXCOFF::getEHInfoTableSymbol(MF); + MCSymbol *TOCEntry = lookUpOrCreateTOCEntry(EHInfoSym); + const MCSymbol *TOCBaseSym = + cast(getObjFileLowering().getTOCBaseSection()) + ->getQualNameSymbol(); + const MCExpr *Exp = + MCBinaryExpr::createSub(MCSymbolRefExpr::create(TOCEntry, Ctx), + MCSymbolRefExpr::create(TOCBaseSym, Ctx), Ctx); + + const DataLayout &DL = getDataLayout(); + OutStreamer->emitValueToAlignment(4); + OutStreamer->AddComment("EHInfo Table"); + OutStreamer->emitValue(Exp, DL.getPointerSize()); + } + #undef GENBOOLCOMMENT #undef GENVALUECOMMENT } diff --git a/llvm/test/CodeGen/PowerPC/aix-exception.ll b/llvm/test/CodeGen/PowerPC/aix-exception.ll index f2fc323e166fb..4c9d15cafe0ca 100644 --- a/llvm/test/CodeGen/PowerPC/aix-exception.ll +++ b/llvm/test/CodeGen/PowerPC/aix-exception.ll @@ -98,6 +98,29 @@ eh.resume: ; preds = %catch.dispatch ; ASM: bl .__cxa_end_catch[PR] ; ASM: nop ; ASM: b L..BB1_2 + +; ASM: L.._Z9catchFuncv0: +; ASM: .vbyte 4, 0x00000000 # Traceback table begin +; ASM: .byte 0x00 # Version = 0 +; ASM: .byte 0x09 # Language = CPlusPlus +; ASM: .byte 0x22 # -IsGlobaLinkage, -IsOutOfLineEpilogOrPrologue +; ASM: # +HasTraceBackTableOffset, -IsInternalProcedure +; ASM: # -HasControlledStorage, -IsTOCless +; ASM: # +IsFloatingPointPresent +; ASM: # -IsFloatingPointOperationLogOrAbortEnabled +; ASM: .byte 0x41 # -IsInterruptHandler, +IsFunctionNamePresent, -IsAllocaUsed +; ASM: # OnConditionDirective = 0, -IsCRSaved, +IsLRSaved +; ASM: .byte 0x80 # +IsBackChainStored, -IsFixup, NumOfFPRsSaved = 0 +; ASM: .byte 0x40 # -HasVectorInfo, +HasExtensionTable, NumOfGPRsSaved = 0 +; ASM: .byte 0x00 # NumberOfFixedParms = 0 +; ASM: .byte 0x01 # NumberOfFPParms = 0, +HasParmsOnStack +; ASM: .vbyte 4, L.._Z9catchFuncv0-._Z9catchFuncv # Function size +; ASM: .vbyte 2, 0x000d # Function name len = 13 +; ASM: .byte '_,'Z,'9,'c,'a,'t,'c,'h,'F,'u,'n,'c,'v # Function Name +; ASM: .byte 0x08 # ExtensionTableFlag = TB_EH_INFO +; ASM: .align 2 +; ASM32: .vbyte 4, L..C1-TOC[TC0] # EHInfo Table +; ASM64: .vbyte 8, L..C1-TOC[TC0] # EHInfo Table ; ASM: L..func_end0: ; ASM: .csect .gcc_except_table[RO],2 @@ -129,6 +152,7 @@ eh.resume: ; preds = %catch.dispatch ; ASM64: .vbyte 8, L..C0-TOC[TC0] # TypeInfo 1 ; ASM: L..ttbase0: ; ASM: .align 2 + ; ASM: .csect .eh_info_table[RW],2 ; ASM: __ehinfo.1: ; ASM: .vbyte 4, 0 @@ -142,6 +166,8 @@ eh.resume: ; preds = %catch.dispatch ; ASM: .toc ; ASM: L..C0: ; ASM: .tc _ZTIi[TC],_ZTIi[UA] +; ASM: L..C1: +; ASM: .tc __ehinfo.1[TC],__ehinfo.1 declare i8* @__cxa_allocate_exception(i32) declare void @__cxa_throw(i8*, i8*, i8*) From 66ee0d3d84a6ea04e895249aef2ea8a812664728 Mon Sep 17 00:00:00 2001 From: Mitch Phillips <31459023+hctim@users.noreply.github.com> Date: Wed, 16 Dec 2020 06:36:22 -0800 Subject: [PATCH 09/39] [sanitizer-common] Force pickup of llvm-symbolizer from new binaries. It's possible currently that the sanitizer runtimes when testing grab the path to the symbolizer through *SAN_SYMBOLIZER_PATH=... This can be polluted by things like Android's setup script. This patch forces external_symbolizer_path=$new_build_out_dir/llvm-symbolizer when %env_tool_options is used. Reviewed By: vitalybuka Differential Revision: https://reviews.llvm.org/D93352 --- compiler-rt/test/sanitizer_common/lit.common.cfg.py | 8 ++++++++ compiler-rt/test/sanitizer_common/lit.site.cfg.py.in | 1 + 2 files changed, 9 insertions(+) diff --git a/compiler-rt/test/sanitizer_common/lit.common.cfg.py b/compiler-rt/test/sanitizer_common/lit.common.cfg.py index b4f0670f99594..f7c526b6b37bb 100644 --- a/compiler-rt/test/sanitizer_common/lit.common.cfg.py +++ b/compiler-rt/test/sanitizer_common/lit.common.cfg.py @@ -43,6 +43,14 @@ # which does not work for abort()-terminated programs. default_tool_options += ['abort_on_error=0'] +# If the user has a poisoned *SAN_SYMBOLIZER_PATH (like what's setup by +# build/envsetup.sh on Android), then they can end up with an out-of-date +# symbolizer for the tests. Ensure they get the one from the recent build tree. +symbolizer_path="''" +if len(config.binary_path): + symbolizer_path = os.path.join(config.binary_path, "llvm-symbolizer") +default_tool_options += ['external_symbolizer_path=' + symbolizer_path] + default_tool_options_str = ':'.join(default_tool_options) if default_tool_options_str: config.environment[tool_options] = default_tool_options_str diff --git a/compiler-rt/test/sanitizer_common/lit.site.cfg.py.in b/compiler-rt/test/sanitizer_common/lit.site.cfg.py.in index 38f5ca1584631..3ff7c44aeab45 100644 --- a/compiler-rt/test/sanitizer_common/lit.site.cfg.py.in +++ b/compiler-rt/test/sanitizer_common/lit.site.cfg.py.in @@ -5,6 +5,7 @@ config.name_suffix = "@CONFIG_NAME@" config.tool_name = "@SANITIZER_COMMON_LIT_TEST_MODE@" config.target_cflags = "@SANITIZER_COMMON_TEST_TARGET_CFLAGS@" config.target_arch = "@SANITIZER_COMMON_TEST_TARGET_ARCH@" +config.binary_path = "@CMAKE_RUNTIME_OUTPUT_DIRECTORY@" # Load common config for all compiler-rt lit tests. lit_config.load_config(config, "@COMPILER_RT_BINARY_DIR@/test/lit.common.configured") From 553808d45620e12dad7dcab553d4444b74e6010d Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 16 Dec 2020 13:31:10 +0000 Subject: [PATCH 10/39] [X86] Rename reduction combiners to make it clearer whats happening. NFCI. Since these are all working on reduction patterns, actually use that term in the function name to make them easier to search for. At some point we're likely to start working with the ISD::VECREDUCE_* opcodes directly in the x86 backend, but that is still some way off. --- llvm/lib/Target/X86/X86ISelLowering.cpp | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 7be12238956ef..2562cc2d37e1c 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -39514,8 +39514,8 @@ static SDValue createPSADBW(SelectionDAG &DAG, const SDValue &Zext0, // Attempt to replace an min/max v8i16/v16i8 horizontal reduction with // PHMINPOSUW. -static SDValue combineHorizontalMinMaxResult(SDNode *Extract, SelectionDAG &DAG, - const X86Subtarget &Subtarget) { +static SDValue combineMinMaxReduction(SDNode *Extract, SelectionDAG &DAG, + const X86Subtarget &Subtarget) { // Bail without SSE41. if (!Subtarget.hasSSE41()) return SDValue(); @@ -39588,9 +39588,8 @@ static SDValue combineHorizontalMinMaxResult(SDNode *Extract, SelectionDAG &DAG, } // Attempt to replace an all_of/any_of/parity style horizontal reduction with a MOVMSK. -static SDValue combineHorizontalPredicateResult(SDNode *Extract, - SelectionDAG &DAG, - const X86Subtarget &Subtarget) { +static SDValue combinePredicateReduction(SDNode *Extract, SelectionDAG &DAG, + const X86Subtarget &Subtarget) { // Bail without SSE2. if (!Subtarget.hasSSE2()) return SDValue(); @@ -40081,8 +40080,8 @@ static SDValue scalarizeExtEltFP(SDNode *ExtElt, SelectionDAG &DAG) { /// Try to convert a vector reduction sequence composed of binops and shuffles /// into horizontal ops. -static SDValue combineReductionToHorizontal(SDNode *ExtElt, SelectionDAG &DAG, - const X86Subtarget &Subtarget) { +static SDValue combineArithReduction(SDNode *ExtElt, SelectionDAG &DAG, + const X86Subtarget &Subtarget) { assert(ExtElt->getOpcode() == ISD::EXTRACT_VECTOR_ELT && "Unexpected caller"); // We need at least SSE2 to anything here. @@ -40282,7 +40281,7 @@ static SDValue combineExtractVectorElt(SDNode *N, SelectionDAG &DAG, } // TODO - Remove this once we can handle the implicit zero-extension of - // X86ISD::PEXTRW/X86ISD::PEXTRB in combineHorizontalPredicateResult and + // X86ISD::PEXTRW/X86ISD::PEXTRB in combinePredicateReduction and // combineBasicSADPattern. return SDValue(); } @@ -40314,14 +40313,15 @@ static SDValue combineExtractVectorElt(SDNode *N, SelectionDAG &DAG, return SAD; // Attempt to replace an all_of/any_of horizontal reduction with a MOVMSK. - if (SDValue Cmp = combineHorizontalPredicateResult(N, DAG, Subtarget)) + if (SDValue Cmp = combinePredicateReduction(N, DAG, Subtarget)) return Cmp; // Attempt to replace min/max v8i16/v16i8 reductions with PHMINPOSUW. - if (SDValue MinMax = combineHorizontalMinMaxResult(N, DAG, Subtarget)) + if (SDValue MinMax = combineMinMaxReduction(N, DAG, Subtarget)) return MinMax; - if (SDValue V = combineReductionToHorizontal(N, DAG, Subtarget)) + // Attempt to optimize ADD/FADD/MUL reductions with HADD, promotion etc.. + if (SDValue V = combineArithReduction(N, DAG, Subtarget)) return V; if (SDValue V = scalarizeExtEltFP(N, DAG)) From e5039aad45740a3017d774673867eccbbe6b0ce0 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 16 Dec 2020 14:42:08 +0000 Subject: [PATCH 11/39] [X86] Regenerate bit extraction tests, cleaning up check-prefixes. As noticed on D92965, we needed to simplify the prefixes to ensure all RUNs were properly covered. We should never have a target with BMI2 without BMI1, so use that as the 'BMI level' and then check with/without TBM (all TBM targets have at least BMI1). --- llvm/test/CodeGen/X86/extract-bits.ll | 9537 +++++++++++----------- llvm/test/CodeGen/X86/extract-lowbits.ll | 4740 +++++------ 2 files changed, 7092 insertions(+), 7185 deletions(-) diff --git a/llvm/test/CodeGen/X86/extract-bits.ll b/llvm/test/CodeGen/X86/extract-bits.ll index c128a00fc91a7..408307439c9dd 100644 --- a/llvm/test/CodeGen/X86/extract-bits.ll +++ b/llvm/test/CodeGen/X86/extract-bits.ll @@ -1,14 +1,14 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=-bmi,-tbm,-bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=CHECK,X86,NOBMI,X86-NOBMI -; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,-tbm,-bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=CHECK,X86,BMI1,X86-BMI1,BMI1NOTBM,X86-BMI1NOTBM -; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,+tbm,-bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=CHECK,X86,BMI1,X86-BMI1,BMI1TBM,X86-BMI1TBM -; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,+tbm,+bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=CHECK,X86,BMI1,X86-BMI1,BMI1BMI2,X86-BMI1BMI2,BMI1TBM,X86-BMI1TBM,BMI1TBMBMI2,X86-BMI1TBMBMI2 -; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,-tbm,+bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=CHECK,X86,BMI1,X86-BMI1,BMI1BMI2,X86-BMI1BMI2,BMI1NOTBMBMI2,X86-BMI1NOTBMBMI2 -; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=-bmi,-tbm,-bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=CHECK,X64,NOBMI,X64-NOBMI -; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,-tbm,-bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=CHECK,X64,BMI1,X64-BMI1,BMI1NOTBM,X64-BMI1NOTBM -; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,+tbm,-bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=CHECK,X64,BMI1,X64-BMI1,BMI1TBM,X64-BMI1TBM -; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,+tbm,+bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=CHECK,X64,BMI1,X64-BMI1,BMI1BMI2,X64-BMI1BMI2,BMI1TBM,X64-BMI1TBM,BMI1TBMBMI2,X64-BMI1TBMBMI2 -; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,-tbm,+bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=CHECK,X64,BMI1,X64-BMI1,BMI1BMI2,X64-BMI1BMI2,BMI1NOTBMBMI2,X64-BMI1NOTBMBMI2 +; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=-bmi,-tbm,-bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=X86,X86-NOBMI +; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,-tbm,-bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=X86,X86-BMINOTBM,X86-BMI1 +; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,+tbm,-bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=X86,X86-BMITBM,X86-BMI1 +; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,+tbm,+bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=X86,X86-BMITBM,X86-BMI2 +; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,-tbm,+bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=X86,X86-BMINOTBM,X86-BMI2 +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=-bmi,-tbm,-bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=X64,X64-NOBMI +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,-tbm,-bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=X64,X64-BMINOTBM,X64-BMI1 +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,+tbm,-bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=X64,X64-BMITBM,X64-BMI1 +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,+tbm,+bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=X64,X64-BMITBM,X64-BMI2 +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,-tbm,+bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=X64,X64-BMINOTBM,X64-BMI2 ; *Please* keep in sync with test/CodeGen/AArch64/extract-bits.ll @@ -46,22 +46,22 @@ define i32 @bextr32_a0(i32 %val, i32 %numskipbits, i32 %numlowbits) nounwind { ; X86-NOBMI-NEXT: popl %esi ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bextr32_a0: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1NOTBM-NEXT: shll $8, %eax -; X86-BMI1NOTBM-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; X86-BMI1NOTBM-NEXT: orl %eax, %ecx -; X86-BMI1NOTBM-NEXT: bextrl %ecx, {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bextr32_a0: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: shrxl %ecx, {{[0-9]+}}(%esp), %ecx -; X86-BMI1BMI2-NEXT: bzhil %eax, %ecx, %eax -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bextr32_a0: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI1-NEXT: shll $8, %eax +; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI1-NEXT: orl %eax, %ecx +; X86-BMI1-NEXT: bextrl %ecx, {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bextr32_a0: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: shrxl %ecx, {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: bzhil %eax, %ecx, %eax +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr32_a0: ; X64-NOBMI: # %bb.0: @@ -75,19 +75,19 @@ define i32 @bextr32_a0(i32 %val, i32 %numskipbits, i32 %numlowbits) nounwind { ; X64-NOBMI-NEXT: andl %edi, %eax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bextr32_a0: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %edx -; X64-BMI1NOTBM-NEXT: movzbl %sil, %eax -; X64-BMI1NOTBM-NEXT: orl %edx, %eax -; X64-BMI1NOTBM-NEXT: bextrl %eax, %edi, %eax -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bextr32_a0: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: shrxl %esi, %edi, %eax -; X64-BMI1BMI2-NEXT: bzhil %edx, %eax, %eax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bextr32_a0: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %edx +; X64-BMI1-NEXT: movzbl %sil, %eax +; X64-BMI1-NEXT: orl %edx, %eax +; X64-BMI1-NEXT: bextrl %eax, %edi, %eax +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bextr32_a0: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: shrxl %esi, %edi, %eax +; X64-BMI2-NEXT: bzhil %edx, %eax, %eax +; X64-BMI2-NEXT: retq %shifted = lshr i32 %val, %numskipbits %onebit = shl i32 1, %numlowbits %mask = add nsw i32 %onebit, -1 @@ -111,23 +111,23 @@ define i32 @bextr32_a0_arithmetic(i32 %val, i32 %numskipbits, i32 %numlowbits) n ; X86-NOBMI-NEXT: popl %esi ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bextr32_a0_arithmetic: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-BMI1NOTBM-NEXT: sarl %cl, %edx -; X86-BMI1NOTBM-NEXT: shll $8, %eax -; X86-BMI1NOTBM-NEXT: bextrl %eax, %edx, %eax -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bextr32_a0_arithmetic: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: sarxl %ecx, {{[0-9]+}}(%esp), %ecx -; X86-BMI1BMI2-NEXT: bzhil %eax, %ecx, %eax -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bextr32_a0_arithmetic: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI1-NEXT: sarl %cl, %edx +; X86-BMI1-NEXT: shll $8, %eax +; X86-BMI1-NEXT: bextrl %eax, %edx, %eax +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bextr32_a0_arithmetic: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: sarxl %ecx, {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: bzhil %eax, %ecx, %eax +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr32_a0_arithmetic: ; X64-NOBMI: # %bb.0: @@ -141,20 +141,20 @@ define i32 @bextr32_a0_arithmetic(i32 %val, i32 %numskipbits, i32 %numlowbits) n ; X64-NOBMI-NEXT: andl %edi, %eax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bextr32_a0_arithmetic: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: movl %esi, %ecx -; X64-BMI1NOTBM-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-BMI1NOTBM-NEXT: sarl %cl, %edi -; X64-BMI1NOTBM-NEXT: shll $8, %edx -; X64-BMI1NOTBM-NEXT: bextrl %edx, %edi, %eax -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bextr32_a0_arithmetic: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: sarxl %esi, %edi, %eax -; X64-BMI1BMI2-NEXT: bzhil %edx, %eax, %eax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bextr32_a0_arithmetic: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: movl %esi, %ecx +; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-BMI1-NEXT: sarl %cl, %edi +; X64-BMI1-NEXT: shll $8, %edx +; X64-BMI1-NEXT: bextrl %edx, %edi, %eax +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bextr32_a0_arithmetic: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: sarxl %esi, %edi, %eax +; X64-BMI2-NEXT: bzhil %edx, %eax, %eax +; X64-BMI2-NEXT: retq %shifted = ashr i32 %val, %numskipbits %onebit = shl i32 1, %numlowbits %mask = add nsw i32 %onebit, -1 @@ -178,22 +178,22 @@ define i32 @bextr32_a1_indexzext(i32 %val, i8 zeroext %numskipbits, i8 zeroext % ; X86-NOBMI-NEXT: popl %esi ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bextr32_a1_indexzext: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1NOTBM-NEXT: shll $8, %eax -; X86-BMI1NOTBM-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; X86-BMI1NOTBM-NEXT: orl %eax, %ecx -; X86-BMI1NOTBM-NEXT: bextrl %ecx, {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bextr32_a1_indexzext: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: shrxl %ecx, {{[0-9]+}}(%esp), %ecx -; X86-BMI1BMI2-NEXT: bzhil %eax, %ecx, %eax -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bextr32_a1_indexzext: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI1-NEXT: shll $8, %eax +; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI1-NEXT: orl %eax, %ecx +; X86-BMI1-NEXT: bextrl %ecx, {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bextr32_a1_indexzext: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: shrxl %ecx, {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: bzhil %eax, %ecx, %eax +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr32_a1_indexzext: ; X64-NOBMI: # %bb.0: @@ -207,19 +207,19 @@ define i32 @bextr32_a1_indexzext(i32 %val, i8 zeroext %numskipbits, i8 zeroext % ; X64-NOBMI-NEXT: andl %edi, %eax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bextr32_a1_indexzext: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %edx -; X64-BMI1NOTBM-NEXT: movzbl %sil, %eax -; X64-BMI1NOTBM-NEXT: orl %edx, %eax -; X64-BMI1NOTBM-NEXT: bextrl %eax, %edi, %eax -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bextr32_a1_indexzext: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: shrxl %esi, %edi, %eax -; X64-BMI1BMI2-NEXT: bzhil %edx, %eax, %eax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bextr32_a1_indexzext: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %edx +; X64-BMI1-NEXT: movzbl %sil, %eax +; X64-BMI1-NEXT: orl %edx, %eax +; X64-BMI1-NEXT: bextrl %eax, %edi, %eax +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bextr32_a1_indexzext: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: shrxl %esi, %edi, %eax +; X64-BMI2-NEXT: bzhil %edx, %eax, %eax +; X64-BMI2-NEXT: retq %skip = zext i8 %numskipbits to i32 %shifted = lshr i32 %val, %skip %conv = zext i8 %numlowbits to i32 @@ -246,24 +246,24 @@ define i32 @bextr32_a2_load(i32* %w, i32 %numskipbits, i32 %numlowbits) nounwind ; X86-NOBMI-NEXT: popl %esi ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bextr32_a2_load: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: shll $8, %ecx -; X86-BMI1NOTBM-NEXT: movzbl {{[0-9]+}}(%esp), %edx -; X86-BMI1NOTBM-NEXT: orl %ecx, %edx -; X86-BMI1NOTBM-NEXT: bextrl %edx, (%eax), %eax -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bextr32_a2_load: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %dl -; X86-BMI1BMI2-NEXT: shrxl %edx, (%ecx), %ecx -; X86-BMI1BMI2-NEXT: bzhil %eax, %ecx, %eax -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bextr32_a2_load: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: shll $8, %ecx +; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %edx +; X86-BMI1-NEXT: orl %ecx, %edx +; X86-BMI1-NEXT: bextrl %edx, (%eax), %eax +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bextr32_a2_load: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %dl +; X86-BMI2-NEXT: shrxl %edx, (%ecx), %ecx +; X86-BMI2-NEXT: bzhil %eax, %ecx, %eax +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr32_a2_load: ; X64-NOBMI: # %bb.0: @@ -278,19 +278,19 @@ define i32 @bextr32_a2_load(i32* %w, i32 %numskipbits, i32 %numlowbits) nounwind ; X64-NOBMI-NEXT: andl %esi, %eax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bextr32_a2_load: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %edx -; X64-BMI1NOTBM-NEXT: movzbl %sil, %eax -; X64-BMI1NOTBM-NEXT: orl %edx, %eax -; X64-BMI1NOTBM-NEXT: bextrl %eax, (%rdi), %eax -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bextr32_a2_load: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: shrxl %esi, (%rdi), %eax -; X64-BMI1BMI2-NEXT: bzhil %edx, %eax, %eax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bextr32_a2_load: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %edx +; X64-BMI1-NEXT: movzbl %sil, %eax +; X64-BMI1-NEXT: orl %edx, %eax +; X64-BMI1-NEXT: bextrl %eax, (%rdi), %eax +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bextr32_a2_load: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: shrxl %esi, (%rdi), %eax +; X64-BMI2-NEXT: bzhil %edx, %eax, %eax +; X64-BMI2-NEXT: retq %val = load i32, i32* %w %shifted = lshr i32 %val, %numskipbits %onebit = shl i32 1, %numlowbits @@ -316,24 +316,24 @@ define i32 @bextr32_a3_load_indexzext(i32* %w, i8 zeroext %numskipbits, i8 zeroe ; X86-NOBMI-NEXT: popl %esi ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bextr32_a3_load_indexzext: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: shll $8, %ecx -; X86-BMI1NOTBM-NEXT: movzbl {{[0-9]+}}(%esp), %edx -; X86-BMI1NOTBM-NEXT: orl %ecx, %edx -; X86-BMI1NOTBM-NEXT: bextrl %edx, (%eax), %eax -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bextr32_a3_load_indexzext: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %dl -; X86-BMI1BMI2-NEXT: shrxl %edx, (%ecx), %ecx -; X86-BMI1BMI2-NEXT: bzhil %eax, %ecx, %eax -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bextr32_a3_load_indexzext: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: shll $8, %ecx +; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %edx +; X86-BMI1-NEXT: orl %ecx, %edx +; X86-BMI1-NEXT: bextrl %edx, (%eax), %eax +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bextr32_a3_load_indexzext: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %dl +; X86-BMI2-NEXT: shrxl %edx, (%ecx), %ecx +; X86-BMI2-NEXT: bzhil %eax, %ecx, %eax +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr32_a3_load_indexzext: ; X64-NOBMI: # %bb.0: @@ -348,19 +348,19 @@ define i32 @bextr32_a3_load_indexzext(i32* %w, i8 zeroext %numskipbits, i8 zeroe ; X64-NOBMI-NEXT: andl %esi, %eax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bextr32_a3_load_indexzext: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %edx -; X64-BMI1NOTBM-NEXT: movzbl %sil, %eax -; X64-BMI1NOTBM-NEXT: orl %edx, %eax -; X64-BMI1NOTBM-NEXT: bextrl %eax, (%rdi), %eax -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bextr32_a3_load_indexzext: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: shrxl %esi, (%rdi), %eax -; X64-BMI1BMI2-NEXT: bzhil %edx, %eax, %eax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bextr32_a3_load_indexzext: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %edx +; X64-BMI1-NEXT: movzbl %sil, %eax +; X64-BMI1-NEXT: orl %edx, %eax +; X64-BMI1-NEXT: bextrl %eax, (%rdi), %eax +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bextr32_a3_load_indexzext: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: shrxl %esi, (%rdi), %eax +; X64-BMI2-NEXT: bzhil %edx, %eax, %eax +; X64-BMI2-NEXT: retq %val = load i32, i32* %w %skip = zext i8 %numskipbits to i32 %shifted = lshr i32 %val, %skip @@ -387,22 +387,22 @@ define i32 @bextr32_a4_commutative(i32 %val, i32 %numskipbits, i32 %numlowbits) ; X86-NOBMI-NEXT: popl %esi ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bextr32_a4_commutative: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1NOTBM-NEXT: shll $8, %eax -; X86-BMI1NOTBM-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; X86-BMI1NOTBM-NEXT: orl %eax, %ecx -; X86-BMI1NOTBM-NEXT: bextrl %ecx, {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bextr32_a4_commutative: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: shrxl %ecx, {{[0-9]+}}(%esp), %ecx -; X86-BMI1BMI2-NEXT: bzhil %eax, %ecx, %eax -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bextr32_a4_commutative: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI1-NEXT: shll $8, %eax +; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI1-NEXT: orl %eax, %ecx +; X86-BMI1-NEXT: bextrl %ecx, {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bextr32_a4_commutative: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: shrxl %ecx, {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: bzhil %eax, %ecx, %eax +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr32_a4_commutative: ; X64-NOBMI: # %bb.0: @@ -416,19 +416,19 @@ define i32 @bextr32_a4_commutative(i32 %val, i32 %numskipbits, i32 %numlowbits) ; X64-NOBMI-NEXT: andl %edi, %eax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bextr32_a4_commutative: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %edx -; X64-BMI1NOTBM-NEXT: movzbl %sil, %eax -; X64-BMI1NOTBM-NEXT: orl %edx, %eax -; X64-BMI1NOTBM-NEXT: bextrl %eax, %edi, %eax -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bextr32_a4_commutative: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: shrxl %esi, %edi, %eax -; X64-BMI1BMI2-NEXT: bzhil %edx, %eax, %eax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bextr32_a4_commutative: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %edx +; X64-BMI1-NEXT: movzbl %sil, %eax +; X64-BMI1-NEXT: orl %edx, %eax +; X64-BMI1-NEXT: bextrl %eax, %edi, %eax +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bextr32_a4_commutative: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: shrxl %esi, %edi, %eax +; X64-BMI2-NEXT: bzhil %edx, %eax, %eax +; X64-BMI2-NEXT: retq %shifted = lshr i32 %val, %numskipbits %onebit = shl i32 1, %numlowbits %mask = add nsw i32 %onebit, -1 @@ -460,37 +460,37 @@ define i32 @bextr32_a5_skipextrauses(i32 %val, i32 %numskipbits, i32 %numlowbits ; X86-NOBMI-NEXT: popl %edi ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bextr32_a5_skipextrauses: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: subl $8, %esp -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: shll $8, %ecx -; X86-BMI1NOTBM-NEXT: movzbl %al, %edx -; X86-BMI1NOTBM-NEXT: orl %ecx, %edx -; X86-BMI1NOTBM-NEXT: bextrl %edx, {{[0-9]+}}(%esp), %esi -; X86-BMI1NOTBM-NEXT: movl %eax, (%esp) -; X86-BMI1NOTBM-NEXT: calll use32@PLT -; X86-BMI1NOTBM-NEXT: movl %esi, %eax -; X86-BMI1NOTBM-NEXT: addl $8, %esp -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bextr32_a5_skipextrauses: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: subl $8, %esp -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-BMI1BMI2-NEXT: shrxl %ecx, {{[0-9]+}}(%esp), %edx -; X86-BMI1BMI2-NEXT: bzhil %eax, %edx, %esi -; X86-BMI1BMI2-NEXT: movl %ecx, (%esp) -; X86-BMI1BMI2-NEXT: calll use32@PLT -; X86-BMI1BMI2-NEXT: movl %esi, %eax -; X86-BMI1BMI2-NEXT: addl $8, %esp -; X86-BMI1BMI2-NEXT: popl %esi -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bextr32_a5_skipextrauses: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: subl $8, %esp +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: shll $8, %ecx +; X86-BMI1-NEXT: movzbl %al, %edx +; X86-BMI1-NEXT: orl %ecx, %edx +; X86-BMI1-NEXT: bextrl %edx, {{[0-9]+}}(%esp), %esi +; X86-BMI1-NEXT: movl %eax, (%esp) +; X86-BMI1-NEXT: calll use32@PLT +; X86-BMI1-NEXT: movl %esi, %eax +; X86-BMI1-NEXT: addl $8, %esp +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bextr32_a5_skipextrauses: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: subl $8, %esp +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: shrxl %ecx, {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: bzhil %eax, %edx, %esi +; X86-BMI2-NEXT: movl %ecx, (%esp) +; X86-BMI2-NEXT: calll use32@PLT +; X86-BMI2-NEXT: movl %esi, %eax +; X86-BMI2-NEXT: addl $8, %esp +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr32_a5_skipextrauses: ; X64-NOBMI: # %bb.0: @@ -508,29 +508,29 @@ define i32 @bextr32_a5_skipextrauses(i32 %val, i32 %numskipbits, i32 %numlowbits ; X64-NOBMI-NEXT: popq %rbx ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bextr32_a5_skipextrauses: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: pushq %rbx -; X64-BMI1NOTBM-NEXT: shll $8, %edx -; X64-BMI1NOTBM-NEXT: movzbl %sil, %eax -; X64-BMI1NOTBM-NEXT: orl %edx, %eax -; X64-BMI1NOTBM-NEXT: bextrl %eax, %edi, %ebx -; X64-BMI1NOTBM-NEXT: movl %esi, %edi -; X64-BMI1NOTBM-NEXT: callq use32@PLT -; X64-BMI1NOTBM-NEXT: movl %ebx, %eax -; X64-BMI1NOTBM-NEXT: popq %rbx -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bextr32_a5_skipextrauses: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: pushq %rbx -; X64-BMI1BMI2-NEXT: shrxl %esi, %edi, %eax -; X64-BMI1BMI2-NEXT: bzhil %edx, %eax, %ebx -; X64-BMI1BMI2-NEXT: movl %esi, %edi -; X64-BMI1BMI2-NEXT: callq use32@PLT -; X64-BMI1BMI2-NEXT: movl %ebx, %eax -; X64-BMI1BMI2-NEXT: popq %rbx -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bextr32_a5_skipextrauses: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: pushq %rbx +; X64-BMI1-NEXT: shll $8, %edx +; X64-BMI1-NEXT: movzbl %sil, %eax +; X64-BMI1-NEXT: orl %edx, %eax +; X64-BMI1-NEXT: bextrl %eax, %edi, %ebx +; X64-BMI1-NEXT: movl %esi, %edi +; X64-BMI1-NEXT: callq use32@PLT +; X64-BMI1-NEXT: movl %ebx, %eax +; X64-BMI1-NEXT: popq %rbx +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bextr32_a5_skipextrauses: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: pushq %rbx +; X64-BMI2-NEXT: shrxl %esi, %edi, %eax +; X64-BMI2-NEXT: bzhil %edx, %eax, %ebx +; X64-BMI2-NEXT: movl %esi, %edi +; X64-BMI2-NEXT: callq use32@PLT +; X64-BMI2-NEXT: movl %ebx, %eax +; X64-BMI2-NEXT: popq %rbx +; X64-BMI2-NEXT: retq %shifted = lshr i32 %val, %numskipbits %onebit = shl i32 1, %numlowbits %mask = add nsw i32 %onebit, -1 @@ -578,78 +578,78 @@ define i64 @bextr64_a0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind { ; X86-NOBMI-NEXT: popl %edi ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bextr64_a0: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %edi -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %ch -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: movl %eax, %edi -; X86-BMI1NOTBM-NEXT: shrl %cl, %edi -; X86-BMI1NOTBM-NEXT: shrdl %cl, %eax, %esi -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: je .LBB7_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %edi, %esi -; X86-BMI1NOTBM-NEXT: xorl %edi, %edi -; X86-BMI1NOTBM-NEXT: .LBB7_2: -; X86-BMI1NOTBM-NEXT: movl $1, %eax -; X86-BMI1NOTBM-NEXT: xorl %edx, %edx -; X86-BMI1NOTBM-NEXT: movb %ch, %cl -; X86-BMI1NOTBM-NEXT: shldl %cl, %eax, %edx -; X86-BMI1NOTBM-NEXT: shll %cl, %eax -; X86-BMI1NOTBM-NEXT: testb $32, %ch -; X86-BMI1NOTBM-NEXT: je .LBB7_4 -; X86-BMI1NOTBM-NEXT: # %bb.3: -; X86-BMI1NOTBM-NEXT: movl %eax, %edx -; X86-BMI1NOTBM-NEXT: xorl %eax, %eax -; X86-BMI1NOTBM-NEXT: .LBB7_4: -; X86-BMI1NOTBM-NEXT: addl $-1, %eax -; X86-BMI1NOTBM-NEXT: adcl $-1, %edx -; X86-BMI1NOTBM-NEXT: andl %esi, %eax -; X86-BMI1NOTBM-NEXT: andl %edi, %edx -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: popl %edi -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bextr64_a0: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %ebx -; X86-BMI1BMI2-NEXT: pushl %edi -; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: shrdl %cl, %eax, %esi -; X86-BMI1BMI2-NEXT: shrxl %ecx, %eax, %edi -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: je .LBB7_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: movl %edi, %esi -; X86-BMI1BMI2-NEXT: xorl %edi, %edi -; X86-BMI1BMI2-NEXT: .LBB7_2: -; X86-BMI1BMI2-NEXT: movl $1, %eax -; X86-BMI1BMI2-NEXT: xorl %edx, %edx -; X86-BMI1BMI2-NEXT: movl %ebx, %ecx -; X86-BMI1BMI2-NEXT: shldl %cl, %eax, %edx -; X86-BMI1BMI2-NEXT: shlxl %ebx, %eax, %eax -; X86-BMI1BMI2-NEXT: testb $32, %bl -; X86-BMI1BMI2-NEXT: je .LBB7_4 -; X86-BMI1BMI2-NEXT: # %bb.3: -; X86-BMI1BMI2-NEXT: movl %eax, %edx -; X86-BMI1BMI2-NEXT: xorl %eax, %eax -; X86-BMI1BMI2-NEXT: .LBB7_4: -; X86-BMI1BMI2-NEXT: addl $-1, %eax -; X86-BMI1BMI2-NEXT: adcl $-1, %edx -; X86-BMI1BMI2-NEXT: andl %esi, %eax -; X86-BMI1BMI2-NEXT: andl %edi, %edx -; X86-BMI1BMI2-NEXT: popl %esi -; X86-BMI1BMI2-NEXT: popl %edi -; X86-BMI1BMI2-NEXT: popl %ebx -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bextr64_a0: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %edi +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %ch +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: movl %eax, %edi +; X86-BMI1-NEXT: shrl %cl, %edi +; X86-BMI1-NEXT: shrdl %cl, %eax, %esi +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: je .LBB7_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: movl %edi, %esi +; X86-BMI1-NEXT: xorl %edi, %edi +; X86-BMI1-NEXT: .LBB7_2: +; X86-BMI1-NEXT: movl $1, %eax +; X86-BMI1-NEXT: xorl %edx, %edx +; X86-BMI1-NEXT: movb %ch, %cl +; X86-BMI1-NEXT: shldl %cl, %eax, %edx +; X86-BMI1-NEXT: shll %cl, %eax +; X86-BMI1-NEXT: testb $32, %ch +; X86-BMI1-NEXT: je .LBB7_4 +; X86-BMI1-NEXT: # %bb.3: +; X86-BMI1-NEXT: movl %eax, %edx +; X86-BMI1-NEXT: xorl %eax, %eax +; X86-BMI1-NEXT: .LBB7_4: +; X86-BMI1-NEXT: addl $-1, %eax +; X86-BMI1-NEXT: adcl $-1, %edx +; X86-BMI1-NEXT: andl %esi, %eax +; X86-BMI1-NEXT: andl %edi, %edx +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: popl %edi +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bextr64_a0: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: shrdl %cl, %eax, %esi +; X86-BMI2-NEXT: shrxl %ecx, %eax, %edi +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB7_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %edi, %esi +; X86-BMI2-NEXT: xorl %edi, %edi +; X86-BMI2-NEXT: .LBB7_2: +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: movl %ebx, %ecx +; X86-BMI2-NEXT: shldl %cl, %eax, %edx +; X86-BMI2-NEXT: shlxl %ebx, %eax, %eax +; X86-BMI2-NEXT: testb $32, %bl +; X86-BMI2-NEXT: je .LBB7_4 +; X86-BMI2-NEXT: # %bb.3: +; X86-BMI2-NEXT: movl %eax, %edx +; X86-BMI2-NEXT: xorl %eax, %eax +; X86-BMI2-NEXT: .LBB7_4: +; X86-BMI2-NEXT: addl $-1, %eax +; X86-BMI2-NEXT: adcl $-1, %edx +; X86-BMI2-NEXT: andl %esi, %eax +; X86-BMI2-NEXT: andl %edi, %edx +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr64_a0: ; X64-NOBMI: # %bb.0: @@ -663,19 +663,19 @@ define i64 @bextr64_a0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind { ; X64-NOBMI-NEXT: andq %rdi, %rax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bextr64_a0: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %edx -; X64-BMI1NOTBM-NEXT: movzbl %sil, %eax -; X64-BMI1NOTBM-NEXT: orl %edx, %eax -; X64-BMI1NOTBM-NEXT: bextrq %rax, %rdi, %rax -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bextr64_a0: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: shrxq %rsi, %rdi, %rax -; X64-BMI1BMI2-NEXT: bzhiq %rdx, %rax, %rax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bextr64_a0: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %edx +; X64-BMI1-NEXT: movzbl %sil, %eax +; X64-BMI1-NEXT: orl %edx, %eax +; X64-BMI1-NEXT: bextrq %rax, %rdi, %rax +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bextr64_a0: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: shrxq %rsi, %rdi, %rax +; X64-BMI2-NEXT: bzhiq %rdx, %rax, %rax +; X64-BMI2-NEXT: retq %shifted = lshr i64 %val, %numskipbits %onebit = shl i64 1, %numlowbits %mask = add nsw i64 %onebit, -1 @@ -721,80 +721,80 @@ define i64 @bextr64_a0_arithmetic(i64 %val, i64 %numskipbits, i64 %numlowbits) n ; X86-NOBMI-NEXT: popl %edi ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bextr64_a0_arithmetic: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %edi -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %ch -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: movl %eax, %esi -; X86-BMI1NOTBM-NEXT: sarl %cl, %esi -; X86-BMI1NOTBM-NEXT: shrdl %cl, %eax, %edi -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: je .LBB8_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: sarl $31, %eax -; X86-BMI1NOTBM-NEXT: movl %esi, %edi -; X86-BMI1NOTBM-NEXT: movl %eax, %esi -; X86-BMI1NOTBM-NEXT: .LBB8_2: -; X86-BMI1NOTBM-NEXT: movl $1, %eax -; X86-BMI1NOTBM-NEXT: xorl %edx, %edx -; X86-BMI1NOTBM-NEXT: movb %ch, %cl -; X86-BMI1NOTBM-NEXT: shldl %cl, %eax, %edx -; X86-BMI1NOTBM-NEXT: shll %cl, %eax -; X86-BMI1NOTBM-NEXT: testb $32, %ch -; X86-BMI1NOTBM-NEXT: je .LBB8_4 -; X86-BMI1NOTBM-NEXT: # %bb.3: -; X86-BMI1NOTBM-NEXT: movl %eax, %edx -; X86-BMI1NOTBM-NEXT: xorl %eax, %eax -; X86-BMI1NOTBM-NEXT: .LBB8_4: -; X86-BMI1NOTBM-NEXT: addl $-1, %eax -; X86-BMI1NOTBM-NEXT: adcl $-1, %edx -; X86-BMI1NOTBM-NEXT: andl %edi, %eax -; X86-BMI1NOTBM-NEXT: andl %esi, %edx -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: popl %edi -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bextr64_a0_arithmetic: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %ebx -; X86-BMI1BMI2-NEXT: pushl %edi -; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: shrdl %cl, %eax, %esi -; X86-BMI1BMI2-NEXT: sarxl %ecx, %eax, %edi -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: je .LBB8_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: sarl $31, %eax -; X86-BMI1BMI2-NEXT: movl %edi, %esi -; X86-BMI1BMI2-NEXT: movl %eax, %edi -; X86-BMI1BMI2-NEXT: .LBB8_2: -; X86-BMI1BMI2-NEXT: movl $1, %eax -; X86-BMI1BMI2-NEXT: xorl %edx, %edx -; X86-BMI1BMI2-NEXT: movl %ebx, %ecx -; X86-BMI1BMI2-NEXT: shldl %cl, %eax, %edx -; X86-BMI1BMI2-NEXT: shlxl %ebx, %eax, %eax -; X86-BMI1BMI2-NEXT: testb $32, %bl -; X86-BMI1BMI2-NEXT: je .LBB8_4 -; X86-BMI1BMI2-NEXT: # %bb.3: -; X86-BMI1BMI2-NEXT: movl %eax, %edx -; X86-BMI1BMI2-NEXT: xorl %eax, %eax -; X86-BMI1BMI2-NEXT: .LBB8_4: -; X86-BMI1BMI2-NEXT: addl $-1, %eax -; X86-BMI1BMI2-NEXT: adcl $-1, %edx -; X86-BMI1BMI2-NEXT: andl %esi, %eax -; X86-BMI1BMI2-NEXT: andl %edi, %edx -; X86-BMI1BMI2-NEXT: popl %esi -; X86-BMI1BMI2-NEXT: popl %edi -; X86-BMI1BMI2-NEXT: popl %ebx -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bextr64_a0_arithmetic: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %edi +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %ch +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: movl %eax, %esi +; X86-BMI1-NEXT: sarl %cl, %esi +; X86-BMI1-NEXT: shrdl %cl, %eax, %edi +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: je .LBB8_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: sarl $31, %eax +; X86-BMI1-NEXT: movl %esi, %edi +; X86-BMI1-NEXT: movl %eax, %esi +; X86-BMI1-NEXT: .LBB8_2: +; X86-BMI1-NEXT: movl $1, %eax +; X86-BMI1-NEXT: xorl %edx, %edx +; X86-BMI1-NEXT: movb %ch, %cl +; X86-BMI1-NEXT: shldl %cl, %eax, %edx +; X86-BMI1-NEXT: shll %cl, %eax +; X86-BMI1-NEXT: testb $32, %ch +; X86-BMI1-NEXT: je .LBB8_4 +; X86-BMI1-NEXT: # %bb.3: +; X86-BMI1-NEXT: movl %eax, %edx +; X86-BMI1-NEXT: xorl %eax, %eax +; X86-BMI1-NEXT: .LBB8_4: +; X86-BMI1-NEXT: addl $-1, %eax +; X86-BMI1-NEXT: adcl $-1, %edx +; X86-BMI1-NEXT: andl %edi, %eax +; X86-BMI1-NEXT: andl %esi, %edx +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: popl %edi +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bextr64_a0_arithmetic: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: shrdl %cl, %eax, %esi +; X86-BMI2-NEXT: sarxl %ecx, %eax, %edi +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB8_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: sarl $31, %eax +; X86-BMI2-NEXT: movl %edi, %esi +; X86-BMI2-NEXT: movl %eax, %edi +; X86-BMI2-NEXT: .LBB8_2: +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: movl %ebx, %ecx +; X86-BMI2-NEXT: shldl %cl, %eax, %edx +; X86-BMI2-NEXT: shlxl %ebx, %eax, %eax +; X86-BMI2-NEXT: testb $32, %bl +; X86-BMI2-NEXT: je .LBB8_4 +; X86-BMI2-NEXT: # %bb.3: +; X86-BMI2-NEXT: movl %eax, %edx +; X86-BMI2-NEXT: xorl %eax, %eax +; X86-BMI2-NEXT: .LBB8_4: +; X86-BMI2-NEXT: addl $-1, %eax +; X86-BMI2-NEXT: adcl $-1, %edx +; X86-BMI2-NEXT: andl %esi, %eax +; X86-BMI2-NEXT: andl %edi, %edx +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr64_a0_arithmetic: ; X64-NOBMI: # %bb.0: @@ -808,20 +808,20 @@ define i64 @bextr64_a0_arithmetic(i64 %val, i64 %numskipbits, i64 %numlowbits) n ; X64-NOBMI-NEXT: andq %rdi, %rax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bextr64_a0_arithmetic: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: movq %rsi, %rcx -; X64-BMI1NOTBM-NEXT: # kill: def $cl killed $cl killed $rcx -; X64-BMI1NOTBM-NEXT: sarq %cl, %rdi -; X64-BMI1NOTBM-NEXT: shll $8, %edx -; X64-BMI1NOTBM-NEXT: bextrq %rdx, %rdi, %rax -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bextr64_a0_arithmetic: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: sarxq %rsi, %rdi, %rax -; X64-BMI1BMI2-NEXT: bzhiq %rdx, %rax, %rax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bextr64_a0_arithmetic: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: movq %rsi, %rcx +; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $rcx +; X64-BMI1-NEXT: sarq %cl, %rdi +; X64-BMI1-NEXT: shll $8, %edx +; X64-BMI1-NEXT: bextrq %rdx, %rdi, %rax +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bextr64_a0_arithmetic: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: sarxq %rsi, %rdi, %rax +; X64-BMI2-NEXT: bzhiq %rdx, %rax, %rax +; X64-BMI2-NEXT: retq %shifted = ashr i64 %val, %numskipbits %onebit = shl i64 1, %numlowbits %mask = add nsw i64 %onebit, -1 @@ -866,78 +866,78 @@ define i64 @bextr64_a1_indexzext(i64 %val, i8 zeroext %numskipbits, i8 zeroext % ; X86-NOBMI-NEXT: popl %edi ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bextr64_a1_indexzext: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %edi -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %ch -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: movl %eax, %edi -; X86-BMI1NOTBM-NEXT: shrl %cl, %edi -; X86-BMI1NOTBM-NEXT: shrdl %cl, %eax, %esi -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: je .LBB9_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %edi, %esi -; X86-BMI1NOTBM-NEXT: xorl %edi, %edi -; X86-BMI1NOTBM-NEXT: .LBB9_2: -; X86-BMI1NOTBM-NEXT: movl $1, %eax -; X86-BMI1NOTBM-NEXT: xorl %edx, %edx -; X86-BMI1NOTBM-NEXT: movb %ch, %cl -; X86-BMI1NOTBM-NEXT: shldl %cl, %eax, %edx -; X86-BMI1NOTBM-NEXT: shll %cl, %eax -; X86-BMI1NOTBM-NEXT: testb $32, %ch -; X86-BMI1NOTBM-NEXT: je .LBB9_4 -; X86-BMI1NOTBM-NEXT: # %bb.3: -; X86-BMI1NOTBM-NEXT: movl %eax, %edx -; X86-BMI1NOTBM-NEXT: xorl %eax, %eax -; X86-BMI1NOTBM-NEXT: .LBB9_4: -; X86-BMI1NOTBM-NEXT: addl $-1, %eax -; X86-BMI1NOTBM-NEXT: adcl $-1, %edx -; X86-BMI1NOTBM-NEXT: andl %esi, %eax -; X86-BMI1NOTBM-NEXT: andl %edi, %edx -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: popl %edi -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bextr64_a1_indexzext: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %ebx -; X86-BMI1BMI2-NEXT: pushl %edi -; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: shrdl %cl, %eax, %esi -; X86-BMI1BMI2-NEXT: shrxl %ecx, %eax, %edi -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: je .LBB9_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: movl %edi, %esi -; X86-BMI1BMI2-NEXT: xorl %edi, %edi -; X86-BMI1BMI2-NEXT: .LBB9_2: -; X86-BMI1BMI2-NEXT: movl $1, %eax -; X86-BMI1BMI2-NEXT: xorl %edx, %edx -; X86-BMI1BMI2-NEXT: movl %ebx, %ecx -; X86-BMI1BMI2-NEXT: shldl %cl, %eax, %edx -; X86-BMI1BMI2-NEXT: shlxl %ebx, %eax, %eax -; X86-BMI1BMI2-NEXT: testb $32, %bl -; X86-BMI1BMI2-NEXT: je .LBB9_4 -; X86-BMI1BMI2-NEXT: # %bb.3: -; X86-BMI1BMI2-NEXT: movl %eax, %edx -; X86-BMI1BMI2-NEXT: xorl %eax, %eax -; X86-BMI1BMI2-NEXT: .LBB9_4: -; X86-BMI1BMI2-NEXT: addl $-1, %eax -; X86-BMI1BMI2-NEXT: adcl $-1, %edx -; X86-BMI1BMI2-NEXT: andl %esi, %eax -; X86-BMI1BMI2-NEXT: andl %edi, %edx -; X86-BMI1BMI2-NEXT: popl %esi -; X86-BMI1BMI2-NEXT: popl %edi -; X86-BMI1BMI2-NEXT: popl %ebx -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bextr64_a1_indexzext: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %edi +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %ch +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: movl %eax, %edi +; X86-BMI1-NEXT: shrl %cl, %edi +; X86-BMI1-NEXT: shrdl %cl, %eax, %esi +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: je .LBB9_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: movl %edi, %esi +; X86-BMI1-NEXT: xorl %edi, %edi +; X86-BMI1-NEXT: .LBB9_2: +; X86-BMI1-NEXT: movl $1, %eax +; X86-BMI1-NEXT: xorl %edx, %edx +; X86-BMI1-NEXT: movb %ch, %cl +; X86-BMI1-NEXT: shldl %cl, %eax, %edx +; X86-BMI1-NEXT: shll %cl, %eax +; X86-BMI1-NEXT: testb $32, %ch +; X86-BMI1-NEXT: je .LBB9_4 +; X86-BMI1-NEXT: # %bb.3: +; X86-BMI1-NEXT: movl %eax, %edx +; X86-BMI1-NEXT: xorl %eax, %eax +; X86-BMI1-NEXT: .LBB9_4: +; X86-BMI1-NEXT: addl $-1, %eax +; X86-BMI1-NEXT: adcl $-1, %edx +; X86-BMI1-NEXT: andl %esi, %eax +; X86-BMI1-NEXT: andl %edi, %edx +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: popl %edi +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bextr64_a1_indexzext: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: shrdl %cl, %eax, %esi +; X86-BMI2-NEXT: shrxl %ecx, %eax, %edi +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB9_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %edi, %esi +; X86-BMI2-NEXT: xorl %edi, %edi +; X86-BMI2-NEXT: .LBB9_2: +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: movl %ebx, %ecx +; X86-BMI2-NEXT: shldl %cl, %eax, %edx +; X86-BMI2-NEXT: shlxl %ebx, %eax, %eax +; X86-BMI2-NEXT: testb $32, %bl +; X86-BMI2-NEXT: je .LBB9_4 +; X86-BMI2-NEXT: # %bb.3: +; X86-BMI2-NEXT: movl %eax, %edx +; X86-BMI2-NEXT: xorl %eax, %eax +; X86-BMI2-NEXT: .LBB9_4: +; X86-BMI2-NEXT: addl $-1, %eax +; X86-BMI2-NEXT: adcl $-1, %edx +; X86-BMI2-NEXT: andl %esi, %eax +; X86-BMI2-NEXT: andl %edi, %edx +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr64_a1_indexzext: ; X64-NOBMI: # %bb.0: @@ -951,21 +951,21 @@ define i64 @bextr64_a1_indexzext(i64 %val, i8 zeroext %numskipbits, i8 zeroext % ; X64-NOBMI-NEXT: andq %rdi, %rax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bextr64_a1_indexzext: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %edx -; X64-BMI1NOTBM-NEXT: movzbl %sil, %eax -; X64-BMI1NOTBM-NEXT: orl %edx, %eax -; X64-BMI1NOTBM-NEXT: bextrq %rax, %rdi, %rax -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bextr64_a1_indexzext: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: # kill: def $edx killed $edx def $rdx -; X64-BMI1BMI2-NEXT: # kill: def $esi killed $esi def $rsi -; X64-BMI1BMI2-NEXT: shrxq %rsi, %rdi, %rax -; X64-BMI1BMI2-NEXT: bzhiq %rdx, %rax, %rax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bextr64_a1_indexzext: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %edx +; X64-BMI1-NEXT: movzbl %sil, %eax +; X64-BMI1-NEXT: orl %edx, %eax +; X64-BMI1-NEXT: bextrq %rax, %rdi, %rax +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bextr64_a1_indexzext: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: # kill: def $edx killed $edx def $rdx +; X64-BMI2-NEXT: # kill: def $esi killed $esi def $rsi +; X64-BMI2-NEXT: shrxq %rsi, %rdi, %rax +; X64-BMI2-NEXT: bzhiq %rdx, %rax, %rax +; X64-BMI2-NEXT: retq %skip = zext i8 %numskipbits to i64 %shifted = lshr i64 %val, %skip %conv = zext i8 %numlowbits to i64 @@ -1013,80 +1013,80 @@ define i64 @bextr64_a2_load(i64* %w, i64 %numskipbits, i64 %numlowbits) nounwind ; X86-NOBMI-NEXT: popl %edi ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bextr64_a2_load: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %edi -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %ch -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: movl (%eax), %esi -; X86-BMI1NOTBM-NEXT: movl 4(%eax), %eax -; X86-BMI1NOTBM-NEXT: movl %eax, %edi -; X86-BMI1NOTBM-NEXT: shrl %cl, %edi -; X86-BMI1NOTBM-NEXT: shrdl %cl, %eax, %esi -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: je .LBB10_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %edi, %esi -; X86-BMI1NOTBM-NEXT: xorl %edi, %edi -; X86-BMI1NOTBM-NEXT: .LBB10_2: -; X86-BMI1NOTBM-NEXT: movl $1, %eax -; X86-BMI1NOTBM-NEXT: xorl %edx, %edx -; X86-BMI1NOTBM-NEXT: movb %ch, %cl -; X86-BMI1NOTBM-NEXT: shldl %cl, %eax, %edx -; X86-BMI1NOTBM-NEXT: shll %cl, %eax -; X86-BMI1NOTBM-NEXT: testb $32, %ch -; X86-BMI1NOTBM-NEXT: je .LBB10_4 -; X86-BMI1NOTBM-NEXT: # %bb.3: -; X86-BMI1NOTBM-NEXT: movl %eax, %edx -; X86-BMI1NOTBM-NEXT: xorl %eax, %eax -; X86-BMI1NOTBM-NEXT: .LBB10_4: -; X86-BMI1NOTBM-NEXT: addl $-1, %eax -; X86-BMI1NOTBM-NEXT: adcl $-1, %edx -; X86-BMI1NOTBM-NEXT: andl %esi, %eax -; X86-BMI1NOTBM-NEXT: andl %edi, %edx -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: popl %edi -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bextr64_a2_load: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %ebx -; X86-BMI1BMI2-NEXT: pushl %edi -; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: movl (%eax), %esi -; X86-BMI1BMI2-NEXT: movl 4(%eax), %eax -; X86-BMI1BMI2-NEXT: shrxl %ecx, %eax, %edi -; X86-BMI1BMI2-NEXT: shrdl %cl, %eax, %esi -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: je .LBB10_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: movl %edi, %esi -; X86-BMI1BMI2-NEXT: xorl %edi, %edi -; X86-BMI1BMI2-NEXT: .LBB10_2: -; X86-BMI1BMI2-NEXT: movl $1, %eax -; X86-BMI1BMI2-NEXT: xorl %edx, %edx -; X86-BMI1BMI2-NEXT: movl %ebx, %ecx -; X86-BMI1BMI2-NEXT: shldl %cl, %eax, %edx -; X86-BMI1BMI2-NEXT: shlxl %ebx, %eax, %eax -; X86-BMI1BMI2-NEXT: testb $32, %bl -; X86-BMI1BMI2-NEXT: je .LBB10_4 -; X86-BMI1BMI2-NEXT: # %bb.3: -; X86-BMI1BMI2-NEXT: movl %eax, %edx -; X86-BMI1BMI2-NEXT: xorl %eax, %eax -; X86-BMI1BMI2-NEXT: .LBB10_4: -; X86-BMI1BMI2-NEXT: addl $-1, %eax -; X86-BMI1BMI2-NEXT: adcl $-1, %edx -; X86-BMI1BMI2-NEXT: andl %esi, %eax -; X86-BMI1BMI2-NEXT: andl %edi, %edx -; X86-BMI1BMI2-NEXT: popl %esi -; X86-BMI1BMI2-NEXT: popl %edi -; X86-BMI1BMI2-NEXT: popl %ebx -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bextr64_a2_load: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %edi +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %ch +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: movl (%eax), %esi +; X86-BMI1-NEXT: movl 4(%eax), %eax +; X86-BMI1-NEXT: movl %eax, %edi +; X86-BMI1-NEXT: shrl %cl, %edi +; X86-BMI1-NEXT: shrdl %cl, %eax, %esi +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: je .LBB10_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: movl %edi, %esi +; X86-BMI1-NEXT: xorl %edi, %edi +; X86-BMI1-NEXT: .LBB10_2: +; X86-BMI1-NEXT: movl $1, %eax +; X86-BMI1-NEXT: xorl %edx, %edx +; X86-BMI1-NEXT: movb %ch, %cl +; X86-BMI1-NEXT: shldl %cl, %eax, %edx +; X86-BMI1-NEXT: shll %cl, %eax +; X86-BMI1-NEXT: testb $32, %ch +; X86-BMI1-NEXT: je .LBB10_4 +; X86-BMI1-NEXT: # %bb.3: +; X86-BMI1-NEXT: movl %eax, %edx +; X86-BMI1-NEXT: xorl %eax, %eax +; X86-BMI1-NEXT: .LBB10_4: +; X86-BMI1-NEXT: addl $-1, %eax +; X86-BMI1-NEXT: adcl $-1, %edx +; X86-BMI1-NEXT: andl %esi, %eax +; X86-BMI1-NEXT: andl %edi, %edx +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: popl %edi +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bextr64_a2_load: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl (%eax), %esi +; X86-BMI2-NEXT: movl 4(%eax), %eax +; X86-BMI2-NEXT: shrxl %ecx, %eax, %edi +; X86-BMI2-NEXT: shrdl %cl, %eax, %esi +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB10_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %edi, %esi +; X86-BMI2-NEXT: xorl %edi, %edi +; X86-BMI2-NEXT: .LBB10_2: +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: movl %ebx, %ecx +; X86-BMI2-NEXT: shldl %cl, %eax, %edx +; X86-BMI2-NEXT: shlxl %ebx, %eax, %eax +; X86-BMI2-NEXT: testb $32, %bl +; X86-BMI2-NEXT: je .LBB10_4 +; X86-BMI2-NEXT: # %bb.3: +; X86-BMI2-NEXT: movl %eax, %edx +; X86-BMI2-NEXT: xorl %eax, %eax +; X86-BMI2-NEXT: .LBB10_4: +; X86-BMI2-NEXT: addl $-1, %eax +; X86-BMI2-NEXT: adcl $-1, %edx +; X86-BMI2-NEXT: andl %esi, %eax +; X86-BMI2-NEXT: andl %edi, %edx +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr64_a2_load: ; X64-NOBMI: # %bb.0: @@ -1101,19 +1101,19 @@ define i64 @bextr64_a2_load(i64* %w, i64 %numskipbits, i64 %numlowbits) nounwind ; X64-NOBMI-NEXT: andq %rsi, %rax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bextr64_a2_load: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %edx -; X64-BMI1NOTBM-NEXT: movzbl %sil, %eax -; X64-BMI1NOTBM-NEXT: orl %edx, %eax -; X64-BMI1NOTBM-NEXT: bextrq %rax, (%rdi), %rax -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bextr64_a2_load: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: shrxq %rsi, (%rdi), %rax -; X64-BMI1BMI2-NEXT: bzhiq %rdx, %rax, %rax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bextr64_a2_load: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %edx +; X64-BMI1-NEXT: movzbl %sil, %eax +; X64-BMI1-NEXT: orl %edx, %eax +; X64-BMI1-NEXT: bextrq %rax, (%rdi), %rax +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bextr64_a2_load: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: shrxq %rsi, (%rdi), %rax +; X64-BMI2-NEXT: bzhiq %rdx, %rax, %rax +; X64-BMI2-NEXT: retq %val = load i64, i64* %w %shifted = lshr i64 %val, %numskipbits %onebit = shl i64 1, %numlowbits @@ -1160,80 +1160,80 @@ define i64 @bextr64_a3_load_indexzext(i64* %w, i8 zeroext %numskipbits, i8 zeroe ; X86-NOBMI-NEXT: popl %edi ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bextr64_a3_load_indexzext: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %edi -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %ch -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: movl (%eax), %esi -; X86-BMI1NOTBM-NEXT: movl 4(%eax), %eax -; X86-BMI1NOTBM-NEXT: movl %eax, %edi -; X86-BMI1NOTBM-NEXT: shrl %cl, %edi -; X86-BMI1NOTBM-NEXT: shrdl %cl, %eax, %esi -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: je .LBB11_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %edi, %esi -; X86-BMI1NOTBM-NEXT: xorl %edi, %edi -; X86-BMI1NOTBM-NEXT: .LBB11_2: -; X86-BMI1NOTBM-NEXT: movl $1, %eax -; X86-BMI1NOTBM-NEXT: xorl %edx, %edx -; X86-BMI1NOTBM-NEXT: movb %ch, %cl -; X86-BMI1NOTBM-NEXT: shldl %cl, %eax, %edx -; X86-BMI1NOTBM-NEXT: shll %cl, %eax -; X86-BMI1NOTBM-NEXT: testb $32, %ch -; X86-BMI1NOTBM-NEXT: je .LBB11_4 -; X86-BMI1NOTBM-NEXT: # %bb.3: -; X86-BMI1NOTBM-NEXT: movl %eax, %edx -; X86-BMI1NOTBM-NEXT: xorl %eax, %eax -; X86-BMI1NOTBM-NEXT: .LBB11_4: -; X86-BMI1NOTBM-NEXT: addl $-1, %eax -; X86-BMI1NOTBM-NEXT: adcl $-1, %edx -; X86-BMI1NOTBM-NEXT: andl %esi, %eax -; X86-BMI1NOTBM-NEXT: andl %edi, %edx -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: popl %edi -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bextr64_a3_load_indexzext: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %ebx -; X86-BMI1BMI2-NEXT: pushl %edi -; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: movl (%eax), %esi -; X86-BMI1BMI2-NEXT: movl 4(%eax), %eax -; X86-BMI1BMI2-NEXT: shrxl %ecx, %eax, %edi -; X86-BMI1BMI2-NEXT: shrdl %cl, %eax, %esi -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: je .LBB11_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: movl %edi, %esi -; X86-BMI1BMI2-NEXT: xorl %edi, %edi -; X86-BMI1BMI2-NEXT: .LBB11_2: -; X86-BMI1BMI2-NEXT: movl $1, %eax -; X86-BMI1BMI2-NEXT: xorl %edx, %edx -; X86-BMI1BMI2-NEXT: movl %ebx, %ecx -; X86-BMI1BMI2-NEXT: shldl %cl, %eax, %edx -; X86-BMI1BMI2-NEXT: shlxl %ebx, %eax, %eax -; X86-BMI1BMI2-NEXT: testb $32, %bl -; X86-BMI1BMI2-NEXT: je .LBB11_4 -; X86-BMI1BMI2-NEXT: # %bb.3: -; X86-BMI1BMI2-NEXT: movl %eax, %edx -; X86-BMI1BMI2-NEXT: xorl %eax, %eax -; X86-BMI1BMI2-NEXT: .LBB11_4: -; X86-BMI1BMI2-NEXT: addl $-1, %eax -; X86-BMI1BMI2-NEXT: adcl $-1, %edx -; X86-BMI1BMI2-NEXT: andl %esi, %eax -; X86-BMI1BMI2-NEXT: andl %edi, %edx -; X86-BMI1BMI2-NEXT: popl %esi -; X86-BMI1BMI2-NEXT: popl %edi -; X86-BMI1BMI2-NEXT: popl %ebx -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bextr64_a3_load_indexzext: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %edi +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %ch +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: movl (%eax), %esi +; X86-BMI1-NEXT: movl 4(%eax), %eax +; X86-BMI1-NEXT: movl %eax, %edi +; X86-BMI1-NEXT: shrl %cl, %edi +; X86-BMI1-NEXT: shrdl %cl, %eax, %esi +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: je .LBB11_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: movl %edi, %esi +; X86-BMI1-NEXT: xorl %edi, %edi +; X86-BMI1-NEXT: .LBB11_2: +; X86-BMI1-NEXT: movl $1, %eax +; X86-BMI1-NEXT: xorl %edx, %edx +; X86-BMI1-NEXT: movb %ch, %cl +; X86-BMI1-NEXT: shldl %cl, %eax, %edx +; X86-BMI1-NEXT: shll %cl, %eax +; X86-BMI1-NEXT: testb $32, %ch +; X86-BMI1-NEXT: je .LBB11_4 +; X86-BMI1-NEXT: # %bb.3: +; X86-BMI1-NEXT: movl %eax, %edx +; X86-BMI1-NEXT: xorl %eax, %eax +; X86-BMI1-NEXT: .LBB11_4: +; X86-BMI1-NEXT: addl $-1, %eax +; X86-BMI1-NEXT: adcl $-1, %edx +; X86-BMI1-NEXT: andl %esi, %eax +; X86-BMI1-NEXT: andl %edi, %edx +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: popl %edi +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bextr64_a3_load_indexzext: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl (%eax), %esi +; X86-BMI2-NEXT: movl 4(%eax), %eax +; X86-BMI2-NEXT: shrxl %ecx, %eax, %edi +; X86-BMI2-NEXT: shrdl %cl, %eax, %esi +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB11_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %edi, %esi +; X86-BMI2-NEXT: xorl %edi, %edi +; X86-BMI2-NEXT: .LBB11_2: +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: movl %ebx, %ecx +; X86-BMI2-NEXT: shldl %cl, %eax, %edx +; X86-BMI2-NEXT: shlxl %ebx, %eax, %eax +; X86-BMI2-NEXT: testb $32, %bl +; X86-BMI2-NEXT: je .LBB11_4 +; X86-BMI2-NEXT: # %bb.3: +; X86-BMI2-NEXT: movl %eax, %edx +; X86-BMI2-NEXT: xorl %eax, %eax +; X86-BMI2-NEXT: .LBB11_4: +; X86-BMI2-NEXT: addl $-1, %eax +; X86-BMI2-NEXT: adcl $-1, %edx +; X86-BMI2-NEXT: andl %esi, %eax +; X86-BMI2-NEXT: andl %edi, %edx +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr64_a3_load_indexzext: ; X64-NOBMI: # %bb.0: @@ -1248,21 +1248,21 @@ define i64 @bextr64_a3_load_indexzext(i64* %w, i8 zeroext %numskipbits, i8 zeroe ; X64-NOBMI-NEXT: andq %rsi, %rax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bextr64_a3_load_indexzext: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %edx -; X64-BMI1NOTBM-NEXT: movzbl %sil, %eax -; X64-BMI1NOTBM-NEXT: orl %edx, %eax -; X64-BMI1NOTBM-NEXT: bextrq %rax, (%rdi), %rax -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bextr64_a3_load_indexzext: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: # kill: def $edx killed $edx def $rdx -; X64-BMI1BMI2-NEXT: # kill: def $esi killed $esi def $rsi -; X64-BMI1BMI2-NEXT: shrxq %rsi, (%rdi), %rax -; X64-BMI1BMI2-NEXT: bzhiq %rdx, %rax, %rax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bextr64_a3_load_indexzext: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %edx +; X64-BMI1-NEXT: movzbl %sil, %eax +; X64-BMI1-NEXT: orl %edx, %eax +; X64-BMI1-NEXT: bextrq %rax, (%rdi), %rax +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bextr64_a3_load_indexzext: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: # kill: def $edx killed $edx def $rdx +; X64-BMI2-NEXT: # kill: def $esi killed $esi def $rsi +; X64-BMI2-NEXT: shrxq %rsi, (%rdi), %rax +; X64-BMI2-NEXT: bzhiq %rdx, %rax, %rax +; X64-BMI2-NEXT: retq %val = load i64, i64* %w %skip = zext i8 %numskipbits to i64 %shifted = lshr i64 %val, %skip @@ -1310,78 +1310,78 @@ define i64 @bextr64_a4_commutative(i64 %val, i64 %numskipbits, i64 %numlowbits) ; X86-NOBMI-NEXT: popl %edi ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bextr64_a4_commutative: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %edi -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %ch -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-BMI1NOTBM-NEXT: movl %esi, %edx -; X86-BMI1NOTBM-NEXT: shrl %cl, %edx -; X86-BMI1NOTBM-NEXT: shrdl %cl, %esi, %eax -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: je .LBB12_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %edx, %eax -; X86-BMI1NOTBM-NEXT: xorl %edx, %edx -; X86-BMI1NOTBM-NEXT: .LBB12_2: -; X86-BMI1NOTBM-NEXT: movl $1, %esi -; X86-BMI1NOTBM-NEXT: xorl %edi, %edi -; X86-BMI1NOTBM-NEXT: movb %ch, %cl -; X86-BMI1NOTBM-NEXT: shldl %cl, %esi, %edi -; X86-BMI1NOTBM-NEXT: shll %cl, %esi -; X86-BMI1NOTBM-NEXT: testb $32, %ch -; X86-BMI1NOTBM-NEXT: je .LBB12_4 -; X86-BMI1NOTBM-NEXT: # %bb.3: -; X86-BMI1NOTBM-NEXT: movl %esi, %edi -; X86-BMI1NOTBM-NEXT: xorl %esi, %esi -; X86-BMI1NOTBM-NEXT: .LBB12_4: -; X86-BMI1NOTBM-NEXT: addl $-1, %esi -; X86-BMI1NOTBM-NEXT: adcl $-1, %edi -; X86-BMI1NOTBM-NEXT: andl %esi, %eax -; X86-BMI1NOTBM-NEXT: andl %edi, %edx -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: popl %edi -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bextr64_a4_commutative: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %ebx -; X86-BMI1BMI2-NEXT: pushl %edi -; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-BMI1BMI2-NEXT: shrdl %cl, %edx, %eax -; X86-BMI1BMI2-NEXT: shrxl %ecx, %edx, %edx -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: je .LBB12_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: movl %edx, %eax -; X86-BMI1BMI2-NEXT: xorl %edx, %edx -; X86-BMI1BMI2-NEXT: .LBB12_2: -; X86-BMI1BMI2-NEXT: movl $1, %edi -; X86-BMI1BMI2-NEXT: xorl %esi, %esi -; X86-BMI1BMI2-NEXT: movl %ebx, %ecx -; X86-BMI1BMI2-NEXT: shldl %cl, %edi, %esi -; X86-BMI1BMI2-NEXT: shlxl %ebx, %edi, %ecx -; X86-BMI1BMI2-NEXT: testb $32, %bl -; X86-BMI1BMI2-NEXT: je .LBB12_4 -; X86-BMI1BMI2-NEXT: # %bb.3: -; X86-BMI1BMI2-NEXT: movl %ecx, %esi -; X86-BMI1BMI2-NEXT: xorl %ecx, %ecx -; X86-BMI1BMI2-NEXT: .LBB12_4: -; X86-BMI1BMI2-NEXT: addl $-1, %ecx -; X86-BMI1BMI2-NEXT: adcl $-1, %esi -; X86-BMI1BMI2-NEXT: andl %ecx, %eax -; X86-BMI1BMI2-NEXT: andl %esi, %edx -; X86-BMI1BMI2-NEXT: popl %esi -; X86-BMI1BMI2-NEXT: popl %edi -; X86-BMI1BMI2-NEXT: popl %ebx -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bextr64_a4_commutative: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %edi +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %ch +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI1-NEXT: movl %esi, %edx +; X86-BMI1-NEXT: shrl %cl, %edx +; X86-BMI1-NEXT: shrdl %cl, %esi, %eax +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: je .LBB12_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: movl %edx, %eax +; X86-BMI1-NEXT: xorl %edx, %edx +; X86-BMI1-NEXT: .LBB12_2: +; X86-BMI1-NEXT: movl $1, %esi +; X86-BMI1-NEXT: xorl %edi, %edi +; X86-BMI1-NEXT: movb %ch, %cl +; X86-BMI1-NEXT: shldl %cl, %esi, %edi +; X86-BMI1-NEXT: shll %cl, %esi +; X86-BMI1-NEXT: testb $32, %ch +; X86-BMI1-NEXT: je .LBB12_4 +; X86-BMI1-NEXT: # %bb.3: +; X86-BMI1-NEXT: movl %esi, %edi +; X86-BMI1-NEXT: xorl %esi, %esi +; X86-BMI1-NEXT: .LBB12_4: +; X86-BMI1-NEXT: addl $-1, %esi +; X86-BMI1-NEXT: adcl $-1, %edi +; X86-BMI1-NEXT: andl %esi, %eax +; X86-BMI1-NEXT: andl %edi, %edx +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: popl %edi +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bextr64_a4_commutative: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: shrdl %cl, %edx, %eax +; X86-BMI2-NEXT: shrxl %ecx, %edx, %edx +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB12_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %edx, %eax +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: .LBB12_2: +; X86-BMI2-NEXT: movl $1, %edi +; X86-BMI2-NEXT: xorl %esi, %esi +; X86-BMI2-NEXT: movl %ebx, %ecx +; X86-BMI2-NEXT: shldl %cl, %edi, %esi +; X86-BMI2-NEXT: shlxl %ebx, %edi, %ecx +; X86-BMI2-NEXT: testb $32, %bl +; X86-BMI2-NEXT: je .LBB12_4 +; X86-BMI2-NEXT: # %bb.3: +; X86-BMI2-NEXT: movl %ecx, %esi +; X86-BMI2-NEXT: xorl %ecx, %ecx +; X86-BMI2-NEXT: .LBB12_4: +; X86-BMI2-NEXT: addl $-1, %ecx +; X86-BMI2-NEXT: adcl $-1, %esi +; X86-BMI2-NEXT: andl %ecx, %eax +; X86-BMI2-NEXT: andl %esi, %edx +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr64_a4_commutative: ; X64-NOBMI: # %bb.0: @@ -1395,19 +1395,19 @@ define i64 @bextr64_a4_commutative(i64 %val, i64 %numskipbits, i64 %numlowbits) ; X64-NOBMI-NEXT: andq %rdi, %rax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bextr64_a4_commutative: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %edx -; X64-BMI1NOTBM-NEXT: movzbl %sil, %eax -; X64-BMI1NOTBM-NEXT: orl %edx, %eax -; X64-BMI1NOTBM-NEXT: bextrq %rax, %rdi, %rax -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bextr64_a4_commutative: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: shrxq %rsi, %rdi, %rax -; X64-BMI1BMI2-NEXT: bzhiq %rdx, %rax, %rax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bextr64_a4_commutative: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %edx +; X64-BMI1-NEXT: movzbl %sil, %eax +; X64-BMI1-NEXT: orl %edx, %eax +; X64-BMI1-NEXT: bextrq %rax, %rdi, %rax +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bextr64_a4_commutative: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: shrxq %rsi, %rdi, %rax +; X64-BMI2-NEXT: bzhiq %rdx, %rax, %rax +; X64-BMI2-NEXT: retq %shifted = lshr i64 %val, %numskipbits %onebit = shl i64 1, %numlowbits %mask = add nsw i64 %onebit, -1 @@ -1466,104 +1466,104 @@ define i64 @bextr64_a5_skipextrauses(i64 %val, i64 %numskipbits, i64 %numlowbits ; X86-NOBMI-NEXT: popl %ebp ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bextr64_a5_skipextrauses: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %ebp -; X86-BMI1NOTBM-NEXT: pushl %ebx -; X86-BMI1NOTBM-NEXT: pushl %edi -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: subl $12, %esp -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %dl -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: movl %esi, %ebp -; X86-BMI1NOTBM-NEXT: movl %eax, %ecx -; X86-BMI1NOTBM-NEXT: shrl %cl, %ebp -; X86-BMI1NOTBM-NEXT: shrdl %cl, %esi, %ebx -; X86-BMI1NOTBM-NEXT: testb $32, %al -; X86-BMI1NOTBM-NEXT: je .LBB13_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %ebp, %ebx -; X86-BMI1NOTBM-NEXT: xorl %ebp, %ebp -; X86-BMI1NOTBM-NEXT: .LBB13_2: -; X86-BMI1NOTBM-NEXT: movl $1, %esi -; X86-BMI1NOTBM-NEXT: xorl %edi, %edi -; X86-BMI1NOTBM-NEXT: movl %edx, %ecx -; X86-BMI1NOTBM-NEXT: shldl %cl, %esi, %edi -; X86-BMI1NOTBM-NEXT: shll %cl, %esi -; X86-BMI1NOTBM-NEXT: testb $32, %dl -; X86-BMI1NOTBM-NEXT: je .LBB13_4 -; X86-BMI1NOTBM-NEXT: # %bb.3: -; X86-BMI1NOTBM-NEXT: movl %esi, %edi -; X86-BMI1NOTBM-NEXT: xorl %esi, %esi -; X86-BMI1NOTBM-NEXT: .LBB13_4: -; X86-BMI1NOTBM-NEXT: addl $-1, %esi -; X86-BMI1NOTBM-NEXT: adcl $-1, %edi -; X86-BMI1NOTBM-NEXT: andl %ebx, %esi -; X86-BMI1NOTBM-NEXT: andl %ebp, %edi -; X86-BMI1NOTBM-NEXT: subl $8, %esp -; X86-BMI1NOTBM-NEXT: pushl {{[0-9]+}}(%esp) -; X86-BMI1NOTBM-NEXT: pushl %eax -; X86-BMI1NOTBM-NEXT: calll use64@PLT -; X86-BMI1NOTBM-NEXT: addl $16, %esp -; X86-BMI1NOTBM-NEXT: movl %esi, %eax -; X86-BMI1NOTBM-NEXT: movl %edi, %edx -; X86-BMI1NOTBM-NEXT: addl $12, %esp -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: popl %edi -; X86-BMI1NOTBM-NEXT: popl %ebx -; X86-BMI1NOTBM-NEXT: popl %ebp -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bextr64_a5_skipextrauses: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %ebp -; X86-BMI1BMI2-NEXT: pushl %ebx -; X86-BMI1BMI2-NEXT: pushl %edi -; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: subl $12, %esp -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %dl -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: movl %eax, %ecx -; X86-BMI1BMI2-NEXT: shrdl %cl, %esi, %ebx -; X86-BMI1BMI2-NEXT: shrxl %eax, %esi, %ebp -; X86-BMI1BMI2-NEXT: testb $32, %al -; X86-BMI1BMI2-NEXT: je .LBB13_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: movl %ebp, %ebx -; X86-BMI1BMI2-NEXT: xorl %ebp, %ebp -; X86-BMI1BMI2-NEXT: .LBB13_2: -; X86-BMI1BMI2-NEXT: movl $1, %edi -; X86-BMI1BMI2-NEXT: xorl %esi, %esi -; X86-BMI1BMI2-NEXT: movl %edx, %ecx -; X86-BMI1BMI2-NEXT: shldl %cl, %edi, %esi -; X86-BMI1BMI2-NEXT: shlxl %edx, %edi, %edi -; X86-BMI1BMI2-NEXT: testb $32, %dl -; X86-BMI1BMI2-NEXT: je .LBB13_4 -; X86-BMI1BMI2-NEXT: # %bb.3: -; X86-BMI1BMI2-NEXT: movl %edi, %esi -; X86-BMI1BMI2-NEXT: xorl %edi, %edi -; X86-BMI1BMI2-NEXT: .LBB13_4: -; X86-BMI1BMI2-NEXT: addl $-1, %edi -; X86-BMI1BMI2-NEXT: adcl $-1, %esi -; X86-BMI1BMI2-NEXT: andl %ebx, %edi -; X86-BMI1BMI2-NEXT: andl %ebp, %esi -; X86-BMI1BMI2-NEXT: subl $8, %esp -; X86-BMI1BMI2-NEXT: pushl {{[0-9]+}}(%esp) -; X86-BMI1BMI2-NEXT: pushl %eax -; X86-BMI1BMI2-NEXT: calll use64@PLT -; X86-BMI1BMI2-NEXT: addl $16, %esp -; X86-BMI1BMI2-NEXT: movl %edi, %eax -; X86-BMI1BMI2-NEXT: movl %esi, %edx -; X86-BMI1BMI2-NEXT: addl $12, %esp -; X86-BMI1BMI2-NEXT: popl %esi -; X86-BMI1BMI2-NEXT: popl %edi -; X86-BMI1BMI2-NEXT: popl %ebx -; X86-BMI1BMI2-NEXT: popl %ebp -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bextr64_a5_skipextrauses: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %ebp +; X86-BMI1-NEXT: pushl %ebx +; X86-BMI1-NEXT: pushl %edi +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: subl $12, %esp +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %dl +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: movl %esi, %ebp +; X86-BMI1-NEXT: movl %eax, %ecx +; X86-BMI1-NEXT: shrl %cl, %ebp +; X86-BMI1-NEXT: shrdl %cl, %esi, %ebx +; X86-BMI1-NEXT: testb $32, %al +; X86-BMI1-NEXT: je .LBB13_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: movl %ebp, %ebx +; X86-BMI1-NEXT: xorl %ebp, %ebp +; X86-BMI1-NEXT: .LBB13_2: +; X86-BMI1-NEXT: movl $1, %esi +; X86-BMI1-NEXT: xorl %edi, %edi +; X86-BMI1-NEXT: movl %edx, %ecx +; X86-BMI1-NEXT: shldl %cl, %esi, %edi +; X86-BMI1-NEXT: shll %cl, %esi +; X86-BMI1-NEXT: testb $32, %dl +; X86-BMI1-NEXT: je .LBB13_4 +; X86-BMI1-NEXT: # %bb.3: +; X86-BMI1-NEXT: movl %esi, %edi +; X86-BMI1-NEXT: xorl %esi, %esi +; X86-BMI1-NEXT: .LBB13_4: +; X86-BMI1-NEXT: addl $-1, %esi +; X86-BMI1-NEXT: adcl $-1, %edi +; X86-BMI1-NEXT: andl %ebx, %esi +; X86-BMI1-NEXT: andl %ebp, %edi +; X86-BMI1-NEXT: subl $8, %esp +; X86-BMI1-NEXT: pushl {{[0-9]+}}(%esp) +; X86-BMI1-NEXT: pushl %eax +; X86-BMI1-NEXT: calll use64@PLT +; X86-BMI1-NEXT: addl $16, %esp +; X86-BMI1-NEXT: movl %esi, %eax +; X86-BMI1-NEXT: movl %edi, %edx +; X86-BMI1-NEXT: addl $12, %esp +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: popl %edi +; X86-BMI1-NEXT: popl %ebx +; X86-BMI1-NEXT: popl %ebp +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bextr64_a5_skipextrauses: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: subl $12, %esp +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %dl +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl %eax, %ecx +; X86-BMI2-NEXT: shrdl %cl, %esi, %ebx +; X86-BMI2-NEXT: shrxl %eax, %esi, %ebp +; X86-BMI2-NEXT: testb $32, %al +; X86-BMI2-NEXT: je .LBB13_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %ebp, %ebx +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB13_2: +; X86-BMI2-NEXT: movl $1, %edi +; X86-BMI2-NEXT: xorl %esi, %esi +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: shldl %cl, %edi, %esi +; X86-BMI2-NEXT: shlxl %edx, %edi, %edi +; X86-BMI2-NEXT: testb $32, %dl +; X86-BMI2-NEXT: je .LBB13_4 +; X86-BMI2-NEXT: # %bb.3: +; X86-BMI2-NEXT: movl %edi, %esi +; X86-BMI2-NEXT: xorl %edi, %edi +; X86-BMI2-NEXT: .LBB13_4: +; X86-BMI2-NEXT: addl $-1, %edi +; X86-BMI2-NEXT: adcl $-1, %esi +; X86-BMI2-NEXT: andl %ebx, %edi +; X86-BMI2-NEXT: andl %ebp, %esi +; X86-BMI2-NEXT: subl $8, %esp +; X86-BMI2-NEXT: pushl {{[0-9]+}}(%esp) +; X86-BMI2-NEXT: pushl %eax +; X86-BMI2-NEXT: calll use64@PLT +; X86-BMI2-NEXT: addl $16, %esp +; X86-BMI2-NEXT: movl %edi, %eax +; X86-BMI2-NEXT: movl %esi, %edx +; X86-BMI2-NEXT: addl $12, %esp +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr64_a5_skipextrauses: ; X64-NOBMI: # %bb.0: @@ -1581,29 +1581,29 @@ define i64 @bextr64_a5_skipextrauses(i64 %val, i64 %numskipbits, i64 %numlowbits ; X64-NOBMI-NEXT: popq %rbx ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bextr64_a5_skipextrauses: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: pushq %rbx -; X64-BMI1NOTBM-NEXT: shll $8, %edx -; X64-BMI1NOTBM-NEXT: movzbl %sil, %eax -; X64-BMI1NOTBM-NEXT: orl %edx, %eax -; X64-BMI1NOTBM-NEXT: bextrq %rax, %rdi, %rbx -; X64-BMI1NOTBM-NEXT: movq %rsi, %rdi -; X64-BMI1NOTBM-NEXT: callq use64@PLT -; X64-BMI1NOTBM-NEXT: movq %rbx, %rax -; X64-BMI1NOTBM-NEXT: popq %rbx -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bextr64_a5_skipextrauses: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: pushq %rbx -; X64-BMI1BMI2-NEXT: shrxq %rsi, %rdi, %rax -; X64-BMI1BMI2-NEXT: bzhiq %rdx, %rax, %rbx -; X64-BMI1BMI2-NEXT: movq %rsi, %rdi -; X64-BMI1BMI2-NEXT: callq use64@PLT -; X64-BMI1BMI2-NEXT: movq %rbx, %rax -; X64-BMI1BMI2-NEXT: popq %rbx -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bextr64_a5_skipextrauses: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: pushq %rbx +; X64-BMI1-NEXT: shll $8, %edx +; X64-BMI1-NEXT: movzbl %sil, %eax +; X64-BMI1-NEXT: orl %edx, %eax +; X64-BMI1-NEXT: bextrq %rax, %rdi, %rbx +; X64-BMI1-NEXT: movq %rsi, %rdi +; X64-BMI1-NEXT: callq use64@PLT +; X64-BMI1-NEXT: movq %rbx, %rax +; X64-BMI1-NEXT: popq %rbx +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bextr64_a5_skipextrauses: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: pushq %rbx +; X64-BMI2-NEXT: shrxq %rsi, %rdi, %rax +; X64-BMI2-NEXT: bzhiq %rdx, %rax, %rbx +; X64-BMI2-NEXT: movq %rsi, %rdi +; X64-BMI2-NEXT: callq use64@PLT +; X64-BMI2-NEXT: movq %rbx, %rax +; X64-BMI2-NEXT: popq %rbx +; X64-BMI2-NEXT: retq %shifted = lshr i64 %val, %numskipbits %onebit = shl i64 1, %numlowbits %mask = add nsw i64 %onebit, -1 @@ -1647,61 +1647,61 @@ define i32 @bextr64_32_a0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind ; X86-NOBMI-NEXT: popl %edi ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bextr64_32_a0: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %edi -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %dl -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-BMI1NOTBM-NEXT: movl %edi, %esi -; X86-BMI1NOTBM-NEXT: shrl %cl, %esi -; X86-BMI1NOTBM-NEXT: shrdl %cl, %edi, %eax -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: jne .LBB14_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %eax, %esi -; X86-BMI1NOTBM-NEXT: .LBB14_2: -; X86-BMI1NOTBM-NEXT: movl $1, %edi -; X86-BMI1NOTBM-NEXT: movl %edx, %ecx -; X86-BMI1NOTBM-NEXT: shll %cl, %edi -; X86-BMI1NOTBM-NEXT: xorl %eax, %eax -; X86-BMI1NOTBM-NEXT: testb $32, %dl -; X86-BMI1NOTBM-NEXT: jne .LBB14_4 -; X86-BMI1NOTBM-NEXT: # %bb.3: -; X86-BMI1NOTBM-NEXT: movl %edi, %eax -; X86-BMI1NOTBM-NEXT: .LBB14_4: -; X86-BMI1NOTBM-NEXT: decl %eax -; X86-BMI1NOTBM-NEXT: andl %esi, %eax -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: popl %edi -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bextr64_32_a0: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %ebx -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: shrdl %cl, %eax, %edx -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: je .LBB14_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: shrxl %ecx, %eax, %edx -; X86-BMI1BMI2-NEXT: .LBB14_2: -; X86-BMI1BMI2-NEXT: xorl %eax, %eax -; X86-BMI1BMI2-NEXT: testb $32, %bl -; X86-BMI1BMI2-NEXT: jne .LBB14_4 -; X86-BMI1BMI2-NEXT: # %bb.3: -; X86-BMI1BMI2-NEXT: movl $1, %eax -; X86-BMI1BMI2-NEXT: shlxl %ebx, %eax, %eax -; X86-BMI1BMI2-NEXT: .LBB14_4: -; X86-BMI1BMI2-NEXT: decl %eax -; X86-BMI1BMI2-NEXT: andl %edx, %eax -; X86-BMI1BMI2-NEXT: popl %ebx -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bextr64_32_a0: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %edi +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %dl +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-BMI1-NEXT: movl %edi, %esi +; X86-BMI1-NEXT: shrl %cl, %esi +; X86-BMI1-NEXT: shrdl %cl, %edi, %eax +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: jne .LBB14_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: movl %eax, %esi +; X86-BMI1-NEXT: .LBB14_2: +; X86-BMI1-NEXT: movl $1, %edi +; X86-BMI1-NEXT: movl %edx, %ecx +; X86-BMI1-NEXT: shll %cl, %edi +; X86-BMI1-NEXT: xorl %eax, %eax +; X86-BMI1-NEXT: testb $32, %dl +; X86-BMI1-NEXT: jne .LBB14_4 +; X86-BMI1-NEXT: # %bb.3: +; X86-BMI1-NEXT: movl %edi, %eax +; X86-BMI1-NEXT: .LBB14_4: +; X86-BMI1-NEXT: decl %eax +; X86-BMI1-NEXT: andl %esi, %eax +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: popl %edi +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bextr64_32_a0: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: shrdl %cl, %eax, %edx +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB14_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: shrxl %ecx, %eax, %edx +; X86-BMI2-NEXT: .LBB14_2: +; X86-BMI2-NEXT: xorl %eax, %eax +; X86-BMI2-NEXT: testb $32, %bl +; X86-BMI2-NEXT: jne .LBB14_4 +; X86-BMI2-NEXT: # %bb.3: +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: shlxl %ebx, %eax, %eax +; X86-BMI2-NEXT: .LBB14_4: +; X86-BMI2-NEXT: decl %eax +; X86-BMI2-NEXT: andl %edx, %eax +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr64_32_a0: ; X64-NOBMI: # %bb.0: @@ -1716,20 +1716,20 @@ define i32 @bextr64_32_a0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind ; X64-NOBMI-NEXT: # kill: def $eax killed $eax killed $rax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bextr64_32_a0: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %edx -; X64-BMI1NOTBM-NEXT: movzbl %sil, %eax -; X64-BMI1NOTBM-NEXT: orl %edx, %eax -; X64-BMI1NOTBM-NEXT: bextrq %rax, %rdi, %rax -; X64-BMI1NOTBM-NEXT: # kill: def $eax killed $eax killed $rax -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bextr64_32_a0: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: shrxq %rsi, %rdi, %rax -; X64-BMI1BMI2-NEXT: bzhil %edx, %eax, %eax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bextr64_32_a0: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %edx +; X64-BMI1-NEXT: movzbl %sil, %eax +; X64-BMI1-NEXT: orl %edx, %eax +; X64-BMI1-NEXT: bextrq %rax, %rdi, %rax +; X64-BMI1-NEXT: # kill: def $eax killed $eax killed $rax +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bextr64_32_a0: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: shrxq %rsi, %rdi, %rax +; X64-BMI2-NEXT: bzhil %edx, %eax, %eax +; X64-BMI2-NEXT: retq %shifted = lshr i64 %val, %numskipbits %onebit = shl i64 1, %numlowbits %mask = add nsw i64 %onebit, -1 @@ -1765,44 +1765,44 @@ define i32 @bextr64_32_a1(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind ; X86-NOBMI-NEXT: popl %edi ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bextr64_32_a1: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %edi -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-BMI1NOTBM-NEXT: movl %edi, %edx -; X86-BMI1NOTBM-NEXT: shrl %cl, %edx -; X86-BMI1NOTBM-NEXT: shrdl %cl, %edi, %esi -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: jne .LBB15_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %esi, %edx -; X86-BMI1NOTBM-NEXT: .LBB15_2: -; X86-BMI1NOTBM-NEXT: shll $8, %eax -; X86-BMI1NOTBM-NEXT: bextrl %eax, %edx, %eax -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: popl %edi -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bextr64_32_a1: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-BMI1BMI2-NEXT: shrdl %cl, %esi, %edx -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: je .LBB15_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: shrxl %ecx, %esi, %edx -; X86-BMI1BMI2-NEXT: .LBB15_2: -; X86-BMI1BMI2-NEXT: bzhil %eax, %edx, %eax -; X86-BMI1BMI2-NEXT: popl %esi -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bextr64_32_a1: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %edi +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-BMI1-NEXT: movl %edi, %edx +; X86-BMI1-NEXT: shrl %cl, %edx +; X86-BMI1-NEXT: shrdl %cl, %edi, %esi +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: jne .LBB15_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: movl %esi, %edx +; X86-BMI1-NEXT: .LBB15_2: +; X86-BMI1-NEXT: shll $8, %eax +; X86-BMI1-NEXT: bextrl %eax, %edx, %eax +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: popl %edi +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bextr64_32_a1: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: shrdl %cl, %esi, %edx +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB15_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: shrxl %ecx, %esi, %edx +; X86-BMI2-NEXT: .LBB15_2: +; X86-BMI2-NEXT: bzhil %eax, %edx, %eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr64_32_a1: ; X64-NOBMI: # %bb.0: @@ -1816,20 +1816,20 @@ define i32 @bextr64_32_a1(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind ; X64-NOBMI-NEXT: andl %edi, %eax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bextr64_32_a1: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %edx -; X64-BMI1NOTBM-NEXT: movzbl %sil, %eax -; X64-BMI1NOTBM-NEXT: orl %edx, %eax -; X64-BMI1NOTBM-NEXT: bextrq %rax, %rdi, %rax -; X64-BMI1NOTBM-NEXT: # kill: def $eax killed $eax killed $rax -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bextr64_32_a1: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: shrxq %rsi, %rdi, %rax -; X64-BMI1BMI2-NEXT: bzhil %edx, %eax, %eax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bextr64_32_a1: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %edx +; X64-BMI1-NEXT: movzbl %sil, %eax +; X64-BMI1-NEXT: orl %edx, %eax +; X64-BMI1-NEXT: bextrq %rax, %rdi, %rax +; X64-BMI1-NEXT: # kill: def $eax killed $eax killed $rax +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bextr64_32_a1: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: shrxq %rsi, %rdi, %rax +; X64-BMI2-NEXT: bzhil %edx, %eax, %eax +; X64-BMI2-NEXT: retq %shifted = lshr i64 %val, %numskipbits %truncshifted = trunc i64 %shifted to i32 %onebit = shl i32 1, %numlowbits @@ -1870,54 +1870,54 @@ define i32 @bextr64_32_a1_trunc_extrause(i64 %val, i64 %numskipbits, i32 %numlow ; X86-NOBMI-NEXT: popl %ebx ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bextr64_32_a1_trunc_extrause: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %ebx -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: pushl %eax -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %bl -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-BMI1NOTBM-NEXT: movl %edx, %esi -; X86-BMI1NOTBM-NEXT: shrl %cl, %esi -; X86-BMI1NOTBM-NEXT: shrdl %cl, %edx, %eax -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: jne .LBB16_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %eax, %esi -; X86-BMI1NOTBM-NEXT: .LBB16_2: -; X86-BMI1NOTBM-NEXT: movl %esi, (%esp) -; X86-BMI1NOTBM-NEXT: calll use32@PLT -; X86-BMI1NOTBM-NEXT: shll $8, %ebx -; X86-BMI1NOTBM-NEXT: bextrl %ebx, %esi, %eax -; X86-BMI1NOTBM-NEXT: addl $4, %esp -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: popl %ebx -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bextr64_32_a1_trunc_extrause: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %ebx -; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: pushl %eax -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: shrdl %cl, %eax, %esi -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: je .LBB16_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: shrxl %ecx, %eax, %esi -; X86-BMI1BMI2-NEXT: .LBB16_2: -; X86-BMI1BMI2-NEXT: movl %esi, (%esp) -; X86-BMI1BMI2-NEXT: calll use32@PLT -; X86-BMI1BMI2-NEXT: bzhil %ebx, %esi, %eax -; X86-BMI1BMI2-NEXT: addl $4, %esp -; X86-BMI1BMI2-NEXT: popl %esi -; X86-BMI1BMI2-NEXT: popl %ebx -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bextr64_32_a1_trunc_extrause: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %ebx +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: pushl %eax +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %bl +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI1-NEXT: movl %edx, %esi +; X86-BMI1-NEXT: shrl %cl, %esi +; X86-BMI1-NEXT: shrdl %cl, %edx, %eax +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: jne .LBB16_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: movl %eax, %esi +; X86-BMI1-NEXT: .LBB16_2: +; X86-BMI1-NEXT: movl %esi, (%esp) +; X86-BMI1-NEXT: calll use32@PLT +; X86-BMI1-NEXT: shll $8, %ebx +; X86-BMI1-NEXT: bextrl %ebx, %esi, %eax +; X86-BMI1-NEXT: addl $4, %esp +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: popl %ebx +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bextr64_32_a1_trunc_extrause: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: pushl %eax +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: shrdl %cl, %eax, %esi +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB16_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: shrxl %ecx, %eax, %esi +; X86-BMI2-NEXT: .LBB16_2: +; X86-BMI2-NEXT: movl %esi, (%esp) +; X86-BMI2-NEXT: calll use32@PLT +; X86-BMI2-NEXT: bzhil %ebx, %esi, %eax +; X86-BMI2-NEXT: addl $4, %esp +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr64_32_a1_trunc_extrause: ; X64-NOBMI: # %bb.0: @@ -1941,39 +1941,39 @@ define i32 @bextr64_32_a1_trunc_extrause(i64 %val, i64 %numskipbits, i32 %numlow ; X64-NOBMI-NEXT: popq %rbp ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bextr64_32_a1_trunc_extrause: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: pushq %rbp -; X64-BMI1NOTBM-NEXT: pushq %rbx -; X64-BMI1NOTBM-NEXT: pushq %rax -; X64-BMI1NOTBM-NEXT: movl %edx, %ebp -; X64-BMI1NOTBM-NEXT: movq %rsi, %rcx -; X64-BMI1NOTBM-NEXT: movq %rdi, %rbx -; X64-BMI1NOTBM-NEXT: # kill: def $cl killed $cl killed $rcx -; X64-BMI1NOTBM-NEXT: shrq %cl, %rbx -; X64-BMI1NOTBM-NEXT: movl %ebx, %edi -; X64-BMI1NOTBM-NEXT: callq use32@PLT -; X64-BMI1NOTBM-NEXT: shll $8, %ebp -; X64-BMI1NOTBM-NEXT: bextrl %ebp, %ebx, %eax -; X64-BMI1NOTBM-NEXT: addq $8, %rsp -; X64-BMI1NOTBM-NEXT: popq %rbx -; X64-BMI1NOTBM-NEXT: popq %rbp -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bextr64_32_a1_trunc_extrause: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: pushq %rbp -; X64-BMI1BMI2-NEXT: pushq %rbx -; X64-BMI1BMI2-NEXT: pushq %rax -; X64-BMI1BMI2-NEXT: movl %edx, %ebp -; X64-BMI1BMI2-NEXT: shrxq %rsi, %rdi, %rbx -; X64-BMI1BMI2-NEXT: movl %ebx, %edi -; X64-BMI1BMI2-NEXT: callq use32@PLT -; X64-BMI1BMI2-NEXT: bzhil %ebp, %ebx, %eax -; X64-BMI1BMI2-NEXT: addq $8, %rsp -; X64-BMI1BMI2-NEXT: popq %rbx -; X64-BMI1BMI2-NEXT: popq %rbp -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bextr64_32_a1_trunc_extrause: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: pushq %rbp +; X64-BMI1-NEXT: pushq %rbx +; X64-BMI1-NEXT: pushq %rax +; X64-BMI1-NEXT: movl %edx, %ebp +; X64-BMI1-NEXT: movq %rsi, %rcx +; X64-BMI1-NEXT: movq %rdi, %rbx +; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $rcx +; X64-BMI1-NEXT: shrq %cl, %rbx +; X64-BMI1-NEXT: movl %ebx, %edi +; X64-BMI1-NEXT: callq use32@PLT +; X64-BMI1-NEXT: shll $8, %ebp +; X64-BMI1-NEXT: bextrl %ebp, %ebx, %eax +; X64-BMI1-NEXT: addq $8, %rsp +; X64-BMI1-NEXT: popq %rbx +; X64-BMI1-NEXT: popq %rbp +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bextr64_32_a1_trunc_extrause: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: pushq %rbp +; X64-BMI2-NEXT: pushq %rbx +; X64-BMI2-NEXT: pushq %rax +; X64-BMI2-NEXT: movl %edx, %ebp +; X64-BMI2-NEXT: shrxq %rsi, %rdi, %rbx +; X64-BMI2-NEXT: movl %ebx, %edi +; X64-BMI2-NEXT: callq use32@PLT +; X64-BMI2-NEXT: bzhil %ebp, %ebx, %eax +; X64-BMI2-NEXT: addq $8, %rsp +; X64-BMI2-NEXT: popq %rbx +; X64-BMI2-NEXT: popq %rbp +; X64-BMI2-NEXT: retq %shifted = lshr i64 %val, %numskipbits %truncshifted = trunc i64 %shifted to i32 call void @use32(i32 %truncshifted) @@ -2011,44 +2011,44 @@ define i32 @bextr64_32_a2(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind ; X86-NOBMI-NEXT: popl %edi ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bextr64_32_a2: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %edi -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-BMI1NOTBM-NEXT: movl %edi, %edx -; X86-BMI1NOTBM-NEXT: shrl %cl, %edx -; X86-BMI1NOTBM-NEXT: shrdl %cl, %edi, %esi -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: jne .LBB17_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %esi, %edx -; X86-BMI1NOTBM-NEXT: .LBB17_2: -; X86-BMI1NOTBM-NEXT: shll $8, %eax -; X86-BMI1NOTBM-NEXT: bextrl %eax, %edx, %eax -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: popl %edi -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bextr64_32_a2: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-BMI1BMI2-NEXT: shrdl %cl, %esi, %edx -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: je .LBB17_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: shrxl %ecx, %esi, %edx -; X86-BMI1BMI2-NEXT: .LBB17_2: -; X86-BMI1BMI2-NEXT: bzhil %eax, %edx, %eax -; X86-BMI1BMI2-NEXT: popl %esi -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bextr64_32_a2: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %edi +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-BMI1-NEXT: movl %edi, %edx +; X86-BMI1-NEXT: shrl %cl, %edx +; X86-BMI1-NEXT: shrdl %cl, %edi, %esi +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: jne .LBB17_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: movl %esi, %edx +; X86-BMI1-NEXT: .LBB17_2: +; X86-BMI1-NEXT: shll $8, %eax +; X86-BMI1-NEXT: bextrl %eax, %edx, %eax +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: popl %edi +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bextr64_32_a2: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: shrdl %cl, %esi, %edx +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB17_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: shrxl %ecx, %esi, %edx +; X86-BMI2-NEXT: .LBB17_2: +; X86-BMI2-NEXT: bzhil %eax, %edx, %eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr64_32_a2: ; X64-NOBMI: # %bb.0: @@ -2062,20 +2062,20 @@ define i32 @bextr64_32_a2(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind ; X64-NOBMI-NEXT: andl %edi, %eax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bextr64_32_a2: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %edx -; X64-BMI1NOTBM-NEXT: movzbl %sil, %eax -; X64-BMI1NOTBM-NEXT: orl %edx, %eax -; X64-BMI1NOTBM-NEXT: bextrq %rax, %rdi, %rax -; X64-BMI1NOTBM-NEXT: # kill: def $eax killed $eax killed $rax -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bextr64_32_a2: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: shrxq %rsi, %rdi, %rax -; X64-BMI1BMI2-NEXT: bzhil %edx, %eax, %eax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bextr64_32_a2: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %edx +; X64-BMI1-NEXT: movzbl %sil, %eax +; X64-BMI1-NEXT: orl %edx, %eax +; X64-BMI1-NEXT: bextrq %rax, %rdi, %rax +; X64-BMI1-NEXT: # kill: def $eax killed $eax killed $rax +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bextr64_32_a2: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: shrxq %rsi, %rdi, %rax +; X64-BMI2-NEXT: bzhil %edx, %eax, %eax +; X64-BMI2-NEXT: retq %shifted = lshr i64 %val, %numskipbits %onebit = shl i32 1, %numlowbits %mask = add nsw i32 %onebit, -1 @@ -2119,61 +2119,61 @@ define i32 @bextr64_32_a3(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind ; X86-NOBMI-NEXT: popl %edi ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bextr64_32_a3: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %edi -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %dl -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-BMI1NOTBM-NEXT: movl %edi, %esi -; X86-BMI1NOTBM-NEXT: shrl %cl, %esi -; X86-BMI1NOTBM-NEXT: shrdl %cl, %edi, %eax -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: jne .LBB18_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %eax, %esi -; X86-BMI1NOTBM-NEXT: .LBB18_2: -; X86-BMI1NOTBM-NEXT: movl $1, %edi -; X86-BMI1NOTBM-NEXT: movl %edx, %ecx -; X86-BMI1NOTBM-NEXT: shll %cl, %edi -; X86-BMI1NOTBM-NEXT: xorl %eax, %eax -; X86-BMI1NOTBM-NEXT: testb $32, %dl -; X86-BMI1NOTBM-NEXT: jne .LBB18_4 -; X86-BMI1NOTBM-NEXT: # %bb.3: -; X86-BMI1NOTBM-NEXT: movl %edi, %eax -; X86-BMI1NOTBM-NEXT: .LBB18_4: -; X86-BMI1NOTBM-NEXT: decl %eax -; X86-BMI1NOTBM-NEXT: andl %esi, %eax -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: popl %edi -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bextr64_32_a3: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %ebx -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: shrdl %cl, %eax, %edx -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: je .LBB18_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: shrxl %ecx, %eax, %edx -; X86-BMI1BMI2-NEXT: .LBB18_2: -; X86-BMI1BMI2-NEXT: xorl %eax, %eax -; X86-BMI1BMI2-NEXT: testb $32, %bl -; X86-BMI1BMI2-NEXT: jne .LBB18_4 -; X86-BMI1BMI2-NEXT: # %bb.3: -; X86-BMI1BMI2-NEXT: movl $1, %eax -; X86-BMI1BMI2-NEXT: shlxl %ebx, %eax, %eax -; X86-BMI1BMI2-NEXT: .LBB18_4: -; X86-BMI1BMI2-NEXT: decl %eax -; X86-BMI1BMI2-NEXT: andl %edx, %eax -; X86-BMI1BMI2-NEXT: popl %ebx -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bextr64_32_a3: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %edi +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %dl +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-BMI1-NEXT: movl %edi, %esi +; X86-BMI1-NEXT: shrl %cl, %esi +; X86-BMI1-NEXT: shrdl %cl, %edi, %eax +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: jne .LBB18_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: movl %eax, %esi +; X86-BMI1-NEXT: .LBB18_2: +; X86-BMI1-NEXT: movl $1, %edi +; X86-BMI1-NEXT: movl %edx, %ecx +; X86-BMI1-NEXT: shll %cl, %edi +; X86-BMI1-NEXT: xorl %eax, %eax +; X86-BMI1-NEXT: testb $32, %dl +; X86-BMI1-NEXT: jne .LBB18_4 +; X86-BMI1-NEXT: # %bb.3: +; X86-BMI1-NEXT: movl %edi, %eax +; X86-BMI1-NEXT: .LBB18_4: +; X86-BMI1-NEXT: decl %eax +; X86-BMI1-NEXT: andl %esi, %eax +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: popl %edi +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bextr64_32_a3: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: shrdl %cl, %eax, %edx +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB18_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: shrxl %ecx, %eax, %edx +; X86-BMI2-NEXT: .LBB18_2: +; X86-BMI2-NEXT: xorl %eax, %eax +; X86-BMI2-NEXT: testb $32, %bl +; X86-BMI2-NEXT: jne .LBB18_4 +; X86-BMI2-NEXT: # %bb.3: +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: shlxl %ebx, %eax, %eax +; X86-BMI2-NEXT: .LBB18_4: +; X86-BMI2-NEXT: decl %eax +; X86-BMI2-NEXT: andl %edx, %eax +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr64_32_a3: ; X64-NOBMI: # %bb.0: @@ -2188,20 +2188,20 @@ define i32 @bextr64_32_a3(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind ; X64-NOBMI-NEXT: # kill: def $eax killed $eax killed $rax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bextr64_32_a3: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %edx -; X64-BMI1NOTBM-NEXT: movzbl %sil, %eax -; X64-BMI1NOTBM-NEXT: orl %edx, %eax -; X64-BMI1NOTBM-NEXT: bextrq %rax, %rdi, %rax -; X64-BMI1NOTBM-NEXT: # kill: def $eax killed $eax killed $rax -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bextr64_32_a3: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: shrxq %rsi, %rdi, %rax -; X64-BMI1BMI2-NEXT: bzhil %edx, %eax, %eax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bextr64_32_a3: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %edx +; X64-BMI1-NEXT: movzbl %sil, %eax +; X64-BMI1-NEXT: orl %edx, %eax +; X64-BMI1-NEXT: bextrq %rax, %rdi, %rax +; X64-BMI1-NEXT: # kill: def $eax killed $eax killed $rax +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bextr64_32_a3: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: shrxq %rsi, %rdi, %rax +; X64-BMI2-NEXT: bzhil %edx, %eax, %eax +; X64-BMI2-NEXT: retq %shifted = lshr i64 %val, %numskipbits %onebit = shl i64 1, %numlowbits %mask = add nsw i64 %onebit, 4294967295 @@ -2230,22 +2230,22 @@ define i32 @bextr32_b0(i32 %val, i32 %numskipbits, i32 %numlowbits) nounwind { ; X86-NOBMI-NEXT: popl %esi ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bextr32_b0: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1NOTBM-NEXT: shll $8, %eax -; X86-BMI1NOTBM-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; X86-BMI1NOTBM-NEXT: orl %eax, %ecx -; X86-BMI1NOTBM-NEXT: bextrl %ecx, {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bextr32_b0: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: shrxl %ecx, {{[0-9]+}}(%esp), %ecx -; X86-BMI1BMI2-NEXT: bzhil %eax, %ecx, %eax -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bextr32_b0: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI1-NEXT: shll $8, %eax +; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI1-NEXT: orl %eax, %ecx +; X86-BMI1-NEXT: bextrl %ecx, {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bextr32_b0: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: shrxl %ecx, {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: bzhil %eax, %ecx, %eax +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr32_b0: ; X64-NOBMI: # %bb.0: @@ -2259,19 +2259,19 @@ define i32 @bextr32_b0(i32 %val, i32 %numskipbits, i32 %numlowbits) nounwind { ; X64-NOBMI-NEXT: andl %edi, %eax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bextr32_b0: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %edx -; X64-BMI1NOTBM-NEXT: movzbl %sil, %eax -; X64-BMI1NOTBM-NEXT: orl %edx, %eax -; X64-BMI1NOTBM-NEXT: bextrl %eax, %edi, %eax -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bextr32_b0: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: shrxl %esi, %edi, %eax -; X64-BMI1BMI2-NEXT: bzhil %edx, %eax, %eax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bextr32_b0: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %edx +; X64-BMI1-NEXT: movzbl %sil, %eax +; X64-BMI1-NEXT: orl %edx, %eax +; X64-BMI1-NEXT: bextrl %eax, %edi, %eax +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bextr32_b0: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: shrxl %esi, %edi, %eax +; X64-BMI2-NEXT: bzhil %edx, %eax, %eax +; X64-BMI2-NEXT: retq %shifted = lshr i32 %val, %numskipbits %notmask = shl i32 -1, %numlowbits %mask = xor i32 %notmask, -1 @@ -2295,22 +2295,22 @@ define i32 @bextr32_b1_indexzext(i32 %val, i8 zeroext %numskipbits, i8 zeroext % ; X86-NOBMI-NEXT: popl %esi ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bextr32_b1_indexzext: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1NOTBM-NEXT: shll $8, %eax -; X86-BMI1NOTBM-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; X86-BMI1NOTBM-NEXT: orl %eax, %ecx -; X86-BMI1NOTBM-NEXT: bextrl %ecx, {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bextr32_b1_indexzext: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: shrxl %ecx, {{[0-9]+}}(%esp), %ecx -; X86-BMI1BMI2-NEXT: bzhil %eax, %ecx, %eax -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bextr32_b1_indexzext: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI1-NEXT: shll $8, %eax +; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI1-NEXT: orl %eax, %ecx +; X86-BMI1-NEXT: bextrl %ecx, {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bextr32_b1_indexzext: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: shrxl %ecx, {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: bzhil %eax, %ecx, %eax +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr32_b1_indexzext: ; X64-NOBMI: # %bb.0: @@ -2324,19 +2324,19 @@ define i32 @bextr32_b1_indexzext(i32 %val, i8 zeroext %numskipbits, i8 zeroext % ; X64-NOBMI-NEXT: andl %edi, %eax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bextr32_b1_indexzext: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %edx -; X64-BMI1NOTBM-NEXT: movzbl %sil, %eax -; X64-BMI1NOTBM-NEXT: orl %edx, %eax -; X64-BMI1NOTBM-NEXT: bextrl %eax, %edi, %eax -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bextr32_b1_indexzext: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: shrxl %esi, %edi, %eax -; X64-BMI1BMI2-NEXT: bzhil %edx, %eax, %eax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bextr32_b1_indexzext: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %edx +; X64-BMI1-NEXT: movzbl %sil, %eax +; X64-BMI1-NEXT: orl %edx, %eax +; X64-BMI1-NEXT: bextrl %eax, %edi, %eax +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bextr32_b1_indexzext: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: shrxl %esi, %edi, %eax +; X64-BMI2-NEXT: bzhil %edx, %eax, %eax +; X64-BMI2-NEXT: retq %skip = zext i8 %numskipbits to i32 %shifted = lshr i32 %val, %skip %conv = zext i8 %numlowbits to i32 @@ -2363,24 +2363,24 @@ define i32 @bextr32_b2_load(i32* %w, i32 %numskipbits, i32 %numlowbits) nounwind ; X86-NOBMI-NEXT: popl %esi ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bextr32_b2_load: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: shll $8, %ecx -; X86-BMI1NOTBM-NEXT: movzbl {{[0-9]+}}(%esp), %edx -; X86-BMI1NOTBM-NEXT: orl %ecx, %edx -; X86-BMI1NOTBM-NEXT: bextrl %edx, (%eax), %eax -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bextr32_b2_load: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %dl -; X86-BMI1BMI2-NEXT: shrxl %edx, (%ecx), %ecx -; X86-BMI1BMI2-NEXT: bzhil %eax, %ecx, %eax -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bextr32_b2_load: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: shll $8, %ecx +; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %edx +; X86-BMI1-NEXT: orl %ecx, %edx +; X86-BMI1-NEXT: bextrl %edx, (%eax), %eax +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bextr32_b2_load: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %dl +; X86-BMI2-NEXT: shrxl %edx, (%ecx), %ecx +; X86-BMI2-NEXT: bzhil %eax, %ecx, %eax +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr32_b2_load: ; X64-NOBMI: # %bb.0: @@ -2395,19 +2395,19 @@ define i32 @bextr32_b2_load(i32* %w, i32 %numskipbits, i32 %numlowbits) nounwind ; X64-NOBMI-NEXT: andl %esi, %eax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bextr32_b2_load: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %edx -; X64-BMI1NOTBM-NEXT: movzbl %sil, %eax -; X64-BMI1NOTBM-NEXT: orl %edx, %eax -; X64-BMI1NOTBM-NEXT: bextrl %eax, (%rdi), %eax -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bextr32_b2_load: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: shrxl %esi, (%rdi), %eax -; X64-BMI1BMI2-NEXT: bzhil %edx, %eax, %eax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bextr32_b2_load: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %edx +; X64-BMI1-NEXT: movzbl %sil, %eax +; X64-BMI1-NEXT: orl %edx, %eax +; X64-BMI1-NEXT: bextrl %eax, (%rdi), %eax +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bextr32_b2_load: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: shrxl %esi, (%rdi), %eax +; X64-BMI2-NEXT: bzhil %edx, %eax, %eax +; X64-BMI2-NEXT: retq %val = load i32, i32* %w %shifted = lshr i32 %val, %numskipbits %notmask = shl i32 -1, %numlowbits @@ -2433,24 +2433,24 @@ define i32 @bextr32_b3_load_indexzext(i32* %w, i8 zeroext %numskipbits, i8 zeroe ; X86-NOBMI-NEXT: popl %esi ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bextr32_b3_load_indexzext: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: shll $8, %ecx -; X86-BMI1NOTBM-NEXT: movzbl {{[0-9]+}}(%esp), %edx -; X86-BMI1NOTBM-NEXT: orl %ecx, %edx -; X86-BMI1NOTBM-NEXT: bextrl %edx, (%eax), %eax -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bextr32_b3_load_indexzext: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %dl -; X86-BMI1BMI2-NEXT: shrxl %edx, (%ecx), %ecx -; X86-BMI1BMI2-NEXT: bzhil %eax, %ecx, %eax -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bextr32_b3_load_indexzext: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: shll $8, %ecx +; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %edx +; X86-BMI1-NEXT: orl %ecx, %edx +; X86-BMI1-NEXT: bextrl %edx, (%eax), %eax +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bextr32_b3_load_indexzext: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %dl +; X86-BMI2-NEXT: shrxl %edx, (%ecx), %ecx +; X86-BMI2-NEXT: bzhil %eax, %ecx, %eax +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr32_b3_load_indexzext: ; X64-NOBMI: # %bb.0: @@ -2465,19 +2465,19 @@ define i32 @bextr32_b3_load_indexzext(i32* %w, i8 zeroext %numskipbits, i8 zeroe ; X64-NOBMI-NEXT: andl %esi, %eax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bextr32_b3_load_indexzext: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %edx -; X64-BMI1NOTBM-NEXT: movzbl %sil, %eax -; X64-BMI1NOTBM-NEXT: orl %edx, %eax -; X64-BMI1NOTBM-NEXT: bextrl %eax, (%rdi), %eax -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bextr32_b3_load_indexzext: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: shrxl %esi, (%rdi), %eax -; X64-BMI1BMI2-NEXT: bzhil %edx, %eax, %eax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bextr32_b3_load_indexzext: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %edx +; X64-BMI1-NEXT: movzbl %sil, %eax +; X64-BMI1-NEXT: orl %edx, %eax +; X64-BMI1-NEXT: bextrl %eax, (%rdi), %eax +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bextr32_b3_load_indexzext: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: shrxl %esi, (%rdi), %eax +; X64-BMI2-NEXT: bzhil %edx, %eax, %eax +; X64-BMI2-NEXT: retq %val = load i32, i32* %w %skip = zext i8 %numskipbits to i32 %shifted = lshr i32 %val, %skip @@ -2504,22 +2504,22 @@ define i32 @bextr32_b4_commutative(i32 %val, i32 %numskipbits, i32 %numlowbits) ; X86-NOBMI-NEXT: popl %esi ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bextr32_b4_commutative: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1NOTBM-NEXT: shll $8, %eax -; X86-BMI1NOTBM-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; X86-BMI1NOTBM-NEXT: orl %eax, %ecx -; X86-BMI1NOTBM-NEXT: bextrl %ecx, {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bextr32_b4_commutative: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: shrxl %ecx, {{[0-9]+}}(%esp), %ecx -; X86-BMI1BMI2-NEXT: bzhil %eax, %ecx, %eax -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bextr32_b4_commutative: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI1-NEXT: shll $8, %eax +; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI1-NEXT: orl %eax, %ecx +; X86-BMI1-NEXT: bextrl %ecx, {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bextr32_b4_commutative: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: shrxl %ecx, {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: bzhil %eax, %ecx, %eax +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr32_b4_commutative: ; X64-NOBMI: # %bb.0: @@ -2533,19 +2533,19 @@ define i32 @bextr32_b4_commutative(i32 %val, i32 %numskipbits, i32 %numlowbits) ; X64-NOBMI-NEXT: andl %edi, %eax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bextr32_b4_commutative: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %edx -; X64-BMI1NOTBM-NEXT: movzbl %sil, %eax -; X64-BMI1NOTBM-NEXT: orl %edx, %eax -; X64-BMI1NOTBM-NEXT: bextrl %eax, %edi, %eax -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bextr32_b4_commutative: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: shrxl %esi, %edi, %eax -; X64-BMI1BMI2-NEXT: bzhil %edx, %eax, %eax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bextr32_b4_commutative: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %edx +; X64-BMI1-NEXT: movzbl %sil, %eax +; X64-BMI1-NEXT: orl %edx, %eax +; X64-BMI1-NEXT: bextrl %eax, %edi, %eax +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bextr32_b4_commutative: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: shrxl %esi, %edi, %eax +; X64-BMI2-NEXT: bzhil %edx, %eax, %eax +; X64-BMI2-NEXT: retq %shifted = lshr i32 %val, %numskipbits %notmask = shl i32 -1, %numlowbits %mask = xor i32 %notmask, -1 @@ -2577,37 +2577,37 @@ define i32 @bextr32_b5_skipextrauses(i32 %val, i32 %numskipbits, i32 %numlowbits ; X86-NOBMI-NEXT: popl %edi ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bextr32_b5_skipextrauses: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: subl $8, %esp -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: shll $8, %ecx -; X86-BMI1NOTBM-NEXT: movzbl %al, %edx -; X86-BMI1NOTBM-NEXT: orl %ecx, %edx -; X86-BMI1NOTBM-NEXT: bextrl %edx, {{[0-9]+}}(%esp), %esi -; X86-BMI1NOTBM-NEXT: movl %eax, (%esp) -; X86-BMI1NOTBM-NEXT: calll use32@PLT -; X86-BMI1NOTBM-NEXT: movl %esi, %eax -; X86-BMI1NOTBM-NEXT: addl $8, %esp -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bextr32_b5_skipextrauses: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: subl $8, %esp -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-BMI1BMI2-NEXT: shrxl %ecx, {{[0-9]+}}(%esp), %edx -; X86-BMI1BMI2-NEXT: bzhil %eax, %edx, %esi -; X86-BMI1BMI2-NEXT: movl %ecx, (%esp) -; X86-BMI1BMI2-NEXT: calll use32@PLT -; X86-BMI1BMI2-NEXT: movl %esi, %eax -; X86-BMI1BMI2-NEXT: addl $8, %esp -; X86-BMI1BMI2-NEXT: popl %esi -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bextr32_b5_skipextrauses: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: subl $8, %esp +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: shll $8, %ecx +; X86-BMI1-NEXT: movzbl %al, %edx +; X86-BMI1-NEXT: orl %ecx, %edx +; X86-BMI1-NEXT: bextrl %edx, {{[0-9]+}}(%esp), %esi +; X86-BMI1-NEXT: movl %eax, (%esp) +; X86-BMI1-NEXT: calll use32@PLT +; X86-BMI1-NEXT: movl %esi, %eax +; X86-BMI1-NEXT: addl $8, %esp +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bextr32_b5_skipextrauses: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: subl $8, %esp +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: shrxl %ecx, {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: bzhil %eax, %edx, %esi +; X86-BMI2-NEXT: movl %ecx, (%esp) +; X86-BMI2-NEXT: calll use32@PLT +; X86-BMI2-NEXT: movl %esi, %eax +; X86-BMI2-NEXT: addl $8, %esp +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr32_b5_skipextrauses: ; X64-NOBMI: # %bb.0: @@ -2625,29 +2625,29 @@ define i32 @bextr32_b5_skipextrauses(i32 %val, i32 %numskipbits, i32 %numlowbits ; X64-NOBMI-NEXT: popq %rbx ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bextr32_b5_skipextrauses: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: pushq %rbx -; X64-BMI1NOTBM-NEXT: shll $8, %edx -; X64-BMI1NOTBM-NEXT: movzbl %sil, %eax -; X64-BMI1NOTBM-NEXT: orl %edx, %eax -; X64-BMI1NOTBM-NEXT: bextrl %eax, %edi, %ebx -; X64-BMI1NOTBM-NEXT: movl %esi, %edi -; X64-BMI1NOTBM-NEXT: callq use32@PLT -; X64-BMI1NOTBM-NEXT: movl %ebx, %eax -; X64-BMI1NOTBM-NEXT: popq %rbx -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bextr32_b5_skipextrauses: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: pushq %rbx -; X64-BMI1BMI2-NEXT: shrxl %esi, %edi, %eax -; X64-BMI1BMI2-NEXT: bzhil %edx, %eax, %ebx -; X64-BMI1BMI2-NEXT: movl %esi, %edi -; X64-BMI1BMI2-NEXT: callq use32@PLT -; X64-BMI1BMI2-NEXT: movl %ebx, %eax -; X64-BMI1BMI2-NEXT: popq %rbx -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bextr32_b5_skipextrauses: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: pushq %rbx +; X64-BMI1-NEXT: shll $8, %edx +; X64-BMI1-NEXT: movzbl %sil, %eax +; X64-BMI1-NEXT: orl %edx, %eax +; X64-BMI1-NEXT: bextrl %eax, %edi, %ebx +; X64-BMI1-NEXT: movl %esi, %edi +; X64-BMI1-NEXT: callq use32@PLT +; X64-BMI1-NEXT: movl %ebx, %eax +; X64-BMI1-NEXT: popq %rbx +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bextr32_b5_skipextrauses: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: pushq %rbx +; X64-BMI2-NEXT: shrxl %esi, %edi, %eax +; X64-BMI2-NEXT: bzhil %edx, %eax, %ebx +; X64-BMI2-NEXT: movl %esi, %edi +; X64-BMI2-NEXT: callq use32@PLT +; X64-BMI2-NEXT: movl %ebx, %eax +; X64-BMI2-NEXT: popq %rbx +; X64-BMI2-NEXT: retq %shifted = lshr i32 %val, %numskipbits %notmask = shl i32 -1, %numlowbits %mask = xor i32 %notmask, -1 @@ -2699,70 +2699,70 @@ define i64 @bextr64_b0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind { ; X86-NOBMI-NEXT: popl %ebx ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bextr64_b0: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %ebx -; X86-BMI1NOTBM-NEXT: pushl %edi -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-BMI1NOTBM-NEXT: movl %edi, %edx -; X86-BMI1NOTBM-NEXT: shrl %cl, %edx -; X86-BMI1NOTBM-NEXT: shrdl %cl, %edi, %esi -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: je .LBB25_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %edx, %esi -; X86-BMI1NOTBM-NEXT: xorl %edx, %edx -; X86-BMI1NOTBM-NEXT: .LBB25_2: -; X86-BMI1NOTBM-NEXT: movl $-1, %edi -; X86-BMI1NOTBM-NEXT: movl $-1, %ebx -; X86-BMI1NOTBM-NEXT: movl %eax, %ecx -; X86-BMI1NOTBM-NEXT: shll %cl, %ebx -; X86-BMI1NOTBM-NEXT: testb $32, %al -; X86-BMI1NOTBM-NEXT: je .LBB25_4 -; X86-BMI1NOTBM-NEXT: # %bb.3: -; X86-BMI1NOTBM-NEXT: movl %ebx, %edi -; X86-BMI1NOTBM-NEXT: xorl %ebx, %ebx -; X86-BMI1NOTBM-NEXT: .LBB25_4: -; X86-BMI1NOTBM-NEXT: andnl %edx, %edi, %edx -; X86-BMI1NOTBM-NEXT: andnl %esi, %ebx, %eax -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: popl %edi -; X86-BMI1NOTBM-NEXT: popl %ebx -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bextr64_b0: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %ebx -; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-BMI1BMI2-NEXT: shrdl %cl, %edx, %eax -; X86-BMI1BMI2-NEXT: shrxl %ecx, %edx, %edx -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: je .LBB25_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: movl %edx, %eax -; X86-BMI1BMI2-NEXT: xorl %edx, %edx -; X86-BMI1BMI2-NEXT: .LBB25_2: -; X86-BMI1BMI2-NEXT: movl $-1, %esi -; X86-BMI1BMI2-NEXT: shlxl %ebx, %esi, %ecx -; X86-BMI1BMI2-NEXT: testb $32, %bl -; X86-BMI1BMI2-NEXT: je .LBB25_4 -; X86-BMI1BMI2-NEXT: # %bb.3: -; X86-BMI1BMI2-NEXT: movl %ecx, %esi -; X86-BMI1BMI2-NEXT: xorl %ecx, %ecx -; X86-BMI1BMI2-NEXT: .LBB25_4: -; X86-BMI1BMI2-NEXT: andnl %edx, %esi, %edx -; X86-BMI1BMI2-NEXT: andnl %eax, %ecx, %eax -; X86-BMI1BMI2-NEXT: popl %esi -; X86-BMI1BMI2-NEXT: popl %ebx -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bextr64_b0: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %ebx +; X86-BMI1-NEXT: pushl %edi +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-BMI1-NEXT: movl %edi, %edx +; X86-BMI1-NEXT: shrl %cl, %edx +; X86-BMI1-NEXT: shrdl %cl, %edi, %esi +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: je .LBB25_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: movl %edx, %esi +; X86-BMI1-NEXT: xorl %edx, %edx +; X86-BMI1-NEXT: .LBB25_2: +; X86-BMI1-NEXT: movl $-1, %edi +; X86-BMI1-NEXT: movl $-1, %ebx +; X86-BMI1-NEXT: movl %eax, %ecx +; X86-BMI1-NEXT: shll %cl, %ebx +; X86-BMI1-NEXT: testb $32, %al +; X86-BMI1-NEXT: je .LBB25_4 +; X86-BMI1-NEXT: # %bb.3: +; X86-BMI1-NEXT: movl %ebx, %edi +; X86-BMI1-NEXT: xorl %ebx, %ebx +; X86-BMI1-NEXT: .LBB25_4: +; X86-BMI1-NEXT: andnl %edx, %edi, %edx +; X86-BMI1-NEXT: andnl %esi, %ebx, %eax +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: popl %edi +; X86-BMI1-NEXT: popl %ebx +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bextr64_b0: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: shrdl %cl, %edx, %eax +; X86-BMI2-NEXT: shrxl %ecx, %edx, %edx +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB25_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %edx, %eax +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: .LBB25_2: +; X86-BMI2-NEXT: movl $-1, %esi +; X86-BMI2-NEXT: shlxl %ebx, %esi, %ecx +; X86-BMI2-NEXT: testb $32, %bl +; X86-BMI2-NEXT: je .LBB25_4 +; X86-BMI2-NEXT: # %bb.3: +; X86-BMI2-NEXT: movl %ecx, %esi +; X86-BMI2-NEXT: xorl %ecx, %ecx +; X86-BMI2-NEXT: .LBB25_4: +; X86-BMI2-NEXT: andnl %edx, %esi, %edx +; X86-BMI2-NEXT: andnl %eax, %ecx, %eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr64_b0: ; X64-NOBMI: # %bb.0: @@ -2776,19 +2776,19 @@ define i64 @bextr64_b0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind { ; X64-NOBMI-NEXT: andq %rdi, %rax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bextr64_b0: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %edx -; X64-BMI1NOTBM-NEXT: movzbl %sil, %eax -; X64-BMI1NOTBM-NEXT: orl %edx, %eax -; X64-BMI1NOTBM-NEXT: bextrq %rax, %rdi, %rax -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bextr64_b0: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: shrxq %rsi, %rdi, %rax -; X64-BMI1BMI2-NEXT: bzhiq %rdx, %rax, %rax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bextr64_b0: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %edx +; X64-BMI1-NEXT: movzbl %sil, %eax +; X64-BMI1-NEXT: orl %edx, %eax +; X64-BMI1-NEXT: bextrq %rax, %rdi, %rax +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bextr64_b0: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: shrxq %rsi, %rdi, %rax +; X64-BMI2-NEXT: bzhiq %rdx, %rax, %rax +; X64-BMI2-NEXT: retq %shifted = lshr i64 %val, %numskipbits %notmask = shl i64 -1, %numlowbits %mask = xor i64 %notmask, -1 @@ -2837,70 +2837,70 @@ define i64 @bextr64_b1_indexzext(i64 %val, i8 zeroext %numskipbits, i8 zeroext % ; X86-NOBMI-NEXT: popl %ebx ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bextr64_b1_indexzext: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %ebx -; X86-BMI1NOTBM-NEXT: pushl %edi -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-BMI1NOTBM-NEXT: movl %edi, %edx -; X86-BMI1NOTBM-NEXT: shrl %cl, %edx -; X86-BMI1NOTBM-NEXT: shrdl %cl, %edi, %esi -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: je .LBB26_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %edx, %esi -; X86-BMI1NOTBM-NEXT: xorl %edx, %edx -; X86-BMI1NOTBM-NEXT: .LBB26_2: -; X86-BMI1NOTBM-NEXT: movl $-1, %edi -; X86-BMI1NOTBM-NEXT: movl $-1, %ebx -; X86-BMI1NOTBM-NEXT: movl %eax, %ecx -; X86-BMI1NOTBM-NEXT: shll %cl, %ebx -; X86-BMI1NOTBM-NEXT: testb $32, %al -; X86-BMI1NOTBM-NEXT: je .LBB26_4 -; X86-BMI1NOTBM-NEXT: # %bb.3: -; X86-BMI1NOTBM-NEXT: movl %ebx, %edi -; X86-BMI1NOTBM-NEXT: xorl %ebx, %ebx -; X86-BMI1NOTBM-NEXT: .LBB26_4: -; X86-BMI1NOTBM-NEXT: andnl %edx, %edi, %edx -; X86-BMI1NOTBM-NEXT: andnl %esi, %ebx, %eax -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: popl %edi -; X86-BMI1NOTBM-NEXT: popl %ebx -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bextr64_b1_indexzext: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %ebx -; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-BMI1BMI2-NEXT: shrdl %cl, %edx, %eax -; X86-BMI1BMI2-NEXT: shrxl %ecx, %edx, %edx -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: je .LBB26_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: movl %edx, %eax -; X86-BMI1BMI2-NEXT: xorl %edx, %edx -; X86-BMI1BMI2-NEXT: .LBB26_2: -; X86-BMI1BMI2-NEXT: movl $-1, %esi -; X86-BMI1BMI2-NEXT: shlxl %ebx, %esi, %ecx -; X86-BMI1BMI2-NEXT: testb $32, %bl -; X86-BMI1BMI2-NEXT: je .LBB26_4 -; X86-BMI1BMI2-NEXT: # %bb.3: -; X86-BMI1BMI2-NEXT: movl %ecx, %esi -; X86-BMI1BMI2-NEXT: xorl %ecx, %ecx -; X86-BMI1BMI2-NEXT: .LBB26_4: -; X86-BMI1BMI2-NEXT: andnl %edx, %esi, %edx -; X86-BMI1BMI2-NEXT: andnl %eax, %ecx, %eax -; X86-BMI1BMI2-NEXT: popl %esi -; X86-BMI1BMI2-NEXT: popl %ebx -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bextr64_b1_indexzext: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %ebx +; X86-BMI1-NEXT: pushl %edi +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-BMI1-NEXT: movl %edi, %edx +; X86-BMI1-NEXT: shrl %cl, %edx +; X86-BMI1-NEXT: shrdl %cl, %edi, %esi +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: je .LBB26_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: movl %edx, %esi +; X86-BMI1-NEXT: xorl %edx, %edx +; X86-BMI1-NEXT: .LBB26_2: +; X86-BMI1-NEXT: movl $-1, %edi +; X86-BMI1-NEXT: movl $-1, %ebx +; X86-BMI1-NEXT: movl %eax, %ecx +; X86-BMI1-NEXT: shll %cl, %ebx +; X86-BMI1-NEXT: testb $32, %al +; X86-BMI1-NEXT: je .LBB26_4 +; X86-BMI1-NEXT: # %bb.3: +; X86-BMI1-NEXT: movl %ebx, %edi +; X86-BMI1-NEXT: xorl %ebx, %ebx +; X86-BMI1-NEXT: .LBB26_4: +; X86-BMI1-NEXT: andnl %edx, %edi, %edx +; X86-BMI1-NEXT: andnl %esi, %ebx, %eax +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: popl %edi +; X86-BMI1-NEXT: popl %ebx +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bextr64_b1_indexzext: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: shrdl %cl, %edx, %eax +; X86-BMI2-NEXT: shrxl %ecx, %edx, %edx +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB26_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %edx, %eax +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: .LBB26_2: +; X86-BMI2-NEXT: movl $-1, %esi +; X86-BMI2-NEXT: shlxl %ebx, %esi, %ecx +; X86-BMI2-NEXT: testb $32, %bl +; X86-BMI2-NEXT: je .LBB26_4 +; X86-BMI2-NEXT: # %bb.3: +; X86-BMI2-NEXT: movl %ecx, %esi +; X86-BMI2-NEXT: xorl %ecx, %ecx +; X86-BMI2-NEXT: .LBB26_4: +; X86-BMI2-NEXT: andnl %edx, %esi, %edx +; X86-BMI2-NEXT: andnl %eax, %ecx, %eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr64_b1_indexzext: ; X64-NOBMI: # %bb.0: @@ -2914,21 +2914,21 @@ define i64 @bextr64_b1_indexzext(i64 %val, i8 zeroext %numskipbits, i8 zeroext % ; X64-NOBMI-NEXT: andq %rdi, %rax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bextr64_b1_indexzext: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %edx -; X64-BMI1NOTBM-NEXT: movzbl %sil, %eax -; X64-BMI1NOTBM-NEXT: orl %edx, %eax -; X64-BMI1NOTBM-NEXT: bextrq %rax, %rdi, %rax -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bextr64_b1_indexzext: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: # kill: def $edx killed $edx def $rdx -; X64-BMI1BMI2-NEXT: # kill: def $esi killed $esi def $rsi -; X64-BMI1BMI2-NEXT: shrxq %rsi, %rdi, %rax -; X64-BMI1BMI2-NEXT: bzhiq %rdx, %rax, %rax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bextr64_b1_indexzext: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %edx +; X64-BMI1-NEXT: movzbl %sil, %eax +; X64-BMI1-NEXT: orl %edx, %eax +; X64-BMI1-NEXT: bextrq %rax, %rdi, %rax +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bextr64_b1_indexzext: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: # kill: def $edx killed $edx def $rdx +; X64-BMI2-NEXT: # kill: def $esi killed $esi def $rsi +; X64-BMI2-NEXT: shrxq %rsi, %rdi, %rax +; X64-BMI2-NEXT: bzhiq %rdx, %rax, %rax +; X64-BMI2-NEXT: retq %skip = zext i8 %numskipbits to i64 %shifted = lshr i64 %val, %skip %conv = zext i8 %numlowbits to i64 @@ -2980,72 +2980,72 @@ define i64 @bextr64_b2_load(i64* %w, i64 %numskipbits, i64 %numlowbits) nounwind ; X86-NOBMI-NEXT: popl %ebx ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bextr64_b2_load: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %ebx -; X86-BMI1NOTBM-NEXT: pushl %edi -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-BMI1NOTBM-NEXT: movl (%edx), %esi -; X86-BMI1NOTBM-NEXT: movl 4(%edx), %edi -; X86-BMI1NOTBM-NEXT: movl %edi, %edx -; X86-BMI1NOTBM-NEXT: shrl %cl, %edx -; X86-BMI1NOTBM-NEXT: shrdl %cl, %edi, %esi -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: je .LBB27_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %edx, %esi -; X86-BMI1NOTBM-NEXT: xorl %edx, %edx -; X86-BMI1NOTBM-NEXT: .LBB27_2: -; X86-BMI1NOTBM-NEXT: movl $-1, %edi -; X86-BMI1NOTBM-NEXT: movl $-1, %ebx -; X86-BMI1NOTBM-NEXT: movl %eax, %ecx -; X86-BMI1NOTBM-NEXT: shll %cl, %ebx -; X86-BMI1NOTBM-NEXT: testb $32, %al -; X86-BMI1NOTBM-NEXT: je .LBB27_4 -; X86-BMI1NOTBM-NEXT: # %bb.3: -; X86-BMI1NOTBM-NEXT: movl %ebx, %edi -; X86-BMI1NOTBM-NEXT: xorl %ebx, %ebx -; X86-BMI1NOTBM-NEXT: .LBB27_4: -; X86-BMI1NOTBM-NEXT: andnl %edx, %edi, %edx -; X86-BMI1NOTBM-NEXT: andnl %esi, %ebx, %eax -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: popl %edi -; X86-BMI1NOTBM-NEXT: popl %ebx -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bextr64_b2_load: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %ebx -; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-BMI1BMI2-NEXT: movl (%edx), %eax -; X86-BMI1BMI2-NEXT: movl 4(%edx), %esi -; X86-BMI1BMI2-NEXT: shrxl %ecx, %esi, %edx -; X86-BMI1BMI2-NEXT: shrdl %cl, %esi, %eax -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: je .LBB27_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: movl %edx, %eax -; X86-BMI1BMI2-NEXT: xorl %edx, %edx -; X86-BMI1BMI2-NEXT: .LBB27_2: -; X86-BMI1BMI2-NEXT: movl $-1, %esi -; X86-BMI1BMI2-NEXT: shlxl %ebx, %esi, %ecx -; X86-BMI1BMI2-NEXT: testb $32, %bl -; X86-BMI1BMI2-NEXT: je .LBB27_4 -; X86-BMI1BMI2-NEXT: # %bb.3: -; X86-BMI1BMI2-NEXT: movl %ecx, %esi -; X86-BMI1BMI2-NEXT: xorl %ecx, %ecx -; X86-BMI1BMI2-NEXT: .LBB27_4: -; X86-BMI1BMI2-NEXT: andnl %edx, %esi, %edx -; X86-BMI1BMI2-NEXT: andnl %eax, %ecx, %eax -; X86-BMI1BMI2-NEXT: popl %esi -; X86-BMI1BMI2-NEXT: popl %ebx -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bextr64_b2_load: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %ebx +; X86-BMI1-NEXT: pushl %edi +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI1-NEXT: movl (%edx), %esi +; X86-BMI1-NEXT: movl 4(%edx), %edi +; X86-BMI1-NEXT: movl %edi, %edx +; X86-BMI1-NEXT: shrl %cl, %edx +; X86-BMI1-NEXT: shrdl %cl, %edi, %esi +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: je .LBB27_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: movl %edx, %esi +; X86-BMI1-NEXT: xorl %edx, %edx +; X86-BMI1-NEXT: .LBB27_2: +; X86-BMI1-NEXT: movl $-1, %edi +; X86-BMI1-NEXT: movl $-1, %ebx +; X86-BMI1-NEXT: movl %eax, %ecx +; X86-BMI1-NEXT: shll %cl, %ebx +; X86-BMI1-NEXT: testb $32, %al +; X86-BMI1-NEXT: je .LBB27_4 +; X86-BMI1-NEXT: # %bb.3: +; X86-BMI1-NEXT: movl %ebx, %edi +; X86-BMI1-NEXT: xorl %ebx, %ebx +; X86-BMI1-NEXT: .LBB27_4: +; X86-BMI1-NEXT: andnl %edx, %edi, %edx +; X86-BMI1-NEXT: andnl %esi, %ebx, %eax +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: popl %edi +; X86-BMI1-NEXT: popl %ebx +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bextr64_b2_load: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl (%edx), %eax +; X86-BMI2-NEXT: movl 4(%edx), %esi +; X86-BMI2-NEXT: shrxl %ecx, %esi, %edx +; X86-BMI2-NEXT: shrdl %cl, %esi, %eax +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB27_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %edx, %eax +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: .LBB27_2: +; X86-BMI2-NEXT: movl $-1, %esi +; X86-BMI2-NEXT: shlxl %ebx, %esi, %ecx +; X86-BMI2-NEXT: testb $32, %bl +; X86-BMI2-NEXT: je .LBB27_4 +; X86-BMI2-NEXT: # %bb.3: +; X86-BMI2-NEXT: movl %ecx, %esi +; X86-BMI2-NEXT: xorl %ecx, %ecx +; X86-BMI2-NEXT: .LBB27_4: +; X86-BMI2-NEXT: andnl %edx, %esi, %edx +; X86-BMI2-NEXT: andnl %eax, %ecx, %eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr64_b2_load: ; X64-NOBMI: # %bb.0: @@ -3060,19 +3060,19 @@ define i64 @bextr64_b2_load(i64* %w, i64 %numskipbits, i64 %numlowbits) nounwind ; X64-NOBMI-NEXT: andq %rsi, %rax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bextr64_b2_load: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %edx -; X64-BMI1NOTBM-NEXT: movzbl %sil, %eax -; X64-BMI1NOTBM-NEXT: orl %edx, %eax -; X64-BMI1NOTBM-NEXT: bextrq %rax, (%rdi), %rax -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bextr64_b2_load: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: shrxq %rsi, (%rdi), %rax -; X64-BMI1BMI2-NEXT: bzhiq %rdx, %rax, %rax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bextr64_b2_load: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %edx +; X64-BMI1-NEXT: movzbl %sil, %eax +; X64-BMI1-NEXT: orl %edx, %eax +; X64-BMI1-NEXT: bextrq %rax, (%rdi), %rax +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bextr64_b2_load: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: shrxq %rsi, (%rdi), %rax +; X64-BMI2-NEXT: bzhiq %rdx, %rax, %rax +; X64-BMI2-NEXT: retq %val = load i64, i64* %w %shifted = lshr i64 %val, %numskipbits %notmask = shl i64 -1, %numlowbits @@ -3123,72 +3123,72 @@ define i64 @bextr64_b3_load_indexzext(i64* %w, i8 zeroext %numskipbits, i8 zeroe ; X86-NOBMI-NEXT: popl %ebx ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bextr64_b3_load_indexzext: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %ebx -; X86-BMI1NOTBM-NEXT: pushl %edi -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-BMI1NOTBM-NEXT: movl (%edx), %esi -; X86-BMI1NOTBM-NEXT: movl 4(%edx), %edi -; X86-BMI1NOTBM-NEXT: movl %edi, %edx -; X86-BMI1NOTBM-NEXT: shrl %cl, %edx -; X86-BMI1NOTBM-NEXT: shrdl %cl, %edi, %esi -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: je .LBB28_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %edx, %esi -; X86-BMI1NOTBM-NEXT: xorl %edx, %edx -; X86-BMI1NOTBM-NEXT: .LBB28_2: -; X86-BMI1NOTBM-NEXT: movl $-1, %edi -; X86-BMI1NOTBM-NEXT: movl $-1, %ebx -; X86-BMI1NOTBM-NEXT: movl %eax, %ecx -; X86-BMI1NOTBM-NEXT: shll %cl, %ebx -; X86-BMI1NOTBM-NEXT: testb $32, %al -; X86-BMI1NOTBM-NEXT: je .LBB28_4 -; X86-BMI1NOTBM-NEXT: # %bb.3: -; X86-BMI1NOTBM-NEXT: movl %ebx, %edi -; X86-BMI1NOTBM-NEXT: xorl %ebx, %ebx -; X86-BMI1NOTBM-NEXT: .LBB28_4: -; X86-BMI1NOTBM-NEXT: andnl %edx, %edi, %edx -; X86-BMI1NOTBM-NEXT: andnl %esi, %ebx, %eax -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: popl %edi -; X86-BMI1NOTBM-NEXT: popl %ebx -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bextr64_b3_load_indexzext: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %ebx -; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-BMI1BMI2-NEXT: movl (%edx), %eax -; X86-BMI1BMI2-NEXT: movl 4(%edx), %esi -; X86-BMI1BMI2-NEXT: shrxl %ecx, %esi, %edx -; X86-BMI1BMI2-NEXT: shrdl %cl, %esi, %eax -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: je .LBB28_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: movl %edx, %eax -; X86-BMI1BMI2-NEXT: xorl %edx, %edx -; X86-BMI1BMI2-NEXT: .LBB28_2: -; X86-BMI1BMI2-NEXT: movl $-1, %esi -; X86-BMI1BMI2-NEXT: shlxl %ebx, %esi, %ecx -; X86-BMI1BMI2-NEXT: testb $32, %bl -; X86-BMI1BMI2-NEXT: je .LBB28_4 -; X86-BMI1BMI2-NEXT: # %bb.3: -; X86-BMI1BMI2-NEXT: movl %ecx, %esi -; X86-BMI1BMI2-NEXT: xorl %ecx, %ecx -; X86-BMI1BMI2-NEXT: .LBB28_4: -; X86-BMI1BMI2-NEXT: andnl %edx, %esi, %edx -; X86-BMI1BMI2-NEXT: andnl %eax, %ecx, %eax -; X86-BMI1BMI2-NEXT: popl %esi -; X86-BMI1BMI2-NEXT: popl %ebx -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bextr64_b3_load_indexzext: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %ebx +; X86-BMI1-NEXT: pushl %edi +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI1-NEXT: movl (%edx), %esi +; X86-BMI1-NEXT: movl 4(%edx), %edi +; X86-BMI1-NEXT: movl %edi, %edx +; X86-BMI1-NEXT: shrl %cl, %edx +; X86-BMI1-NEXT: shrdl %cl, %edi, %esi +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: je .LBB28_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: movl %edx, %esi +; X86-BMI1-NEXT: xorl %edx, %edx +; X86-BMI1-NEXT: .LBB28_2: +; X86-BMI1-NEXT: movl $-1, %edi +; X86-BMI1-NEXT: movl $-1, %ebx +; X86-BMI1-NEXT: movl %eax, %ecx +; X86-BMI1-NEXT: shll %cl, %ebx +; X86-BMI1-NEXT: testb $32, %al +; X86-BMI1-NEXT: je .LBB28_4 +; X86-BMI1-NEXT: # %bb.3: +; X86-BMI1-NEXT: movl %ebx, %edi +; X86-BMI1-NEXT: xorl %ebx, %ebx +; X86-BMI1-NEXT: .LBB28_4: +; X86-BMI1-NEXT: andnl %edx, %edi, %edx +; X86-BMI1-NEXT: andnl %esi, %ebx, %eax +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: popl %edi +; X86-BMI1-NEXT: popl %ebx +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bextr64_b3_load_indexzext: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl (%edx), %eax +; X86-BMI2-NEXT: movl 4(%edx), %esi +; X86-BMI2-NEXT: shrxl %ecx, %esi, %edx +; X86-BMI2-NEXT: shrdl %cl, %esi, %eax +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB28_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %edx, %eax +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: .LBB28_2: +; X86-BMI2-NEXT: movl $-1, %esi +; X86-BMI2-NEXT: shlxl %ebx, %esi, %ecx +; X86-BMI2-NEXT: testb $32, %bl +; X86-BMI2-NEXT: je .LBB28_4 +; X86-BMI2-NEXT: # %bb.3: +; X86-BMI2-NEXT: movl %ecx, %esi +; X86-BMI2-NEXT: xorl %ecx, %ecx +; X86-BMI2-NEXT: .LBB28_4: +; X86-BMI2-NEXT: andnl %edx, %esi, %edx +; X86-BMI2-NEXT: andnl %eax, %ecx, %eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr64_b3_load_indexzext: ; X64-NOBMI: # %bb.0: @@ -3203,21 +3203,21 @@ define i64 @bextr64_b3_load_indexzext(i64* %w, i8 zeroext %numskipbits, i8 zeroe ; X64-NOBMI-NEXT: andq %rsi, %rax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bextr64_b3_load_indexzext: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %edx -; X64-BMI1NOTBM-NEXT: movzbl %sil, %eax -; X64-BMI1NOTBM-NEXT: orl %edx, %eax -; X64-BMI1NOTBM-NEXT: bextrq %rax, (%rdi), %rax -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bextr64_b3_load_indexzext: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: # kill: def $edx killed $edx def $rdx -; X64-BMI1BMI2-NEXT: # kill: def $esi killed $esi def $rsi -; X64-BMI1BMI2-NEXT: shrxq %rsi, (%rdi), %rax -; X64-BMI1BMI2-NEXT: bzhiq %rdx, %rax, %rax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bextr64_b3_load_indexzext: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %edx +; X64-BMI1-NEXT: movzbl %sil, %eax +; X64-BMI1-NEXT: orl %edx, %eax +; X64-BMI1-NEXT: bextrq %rax, (%rdi), %rax +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bextr64_b3_load_indexzext: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: # kill: def $edx killed $edx def $rdx +; X64-BMI2-NEXT: # kill: def $esi killed $esi def $rsi +; X64-BMI2-NEXT: shrxq %rsi, (%rdi), %rax +; X64-BMI2-NEXT: bzhiq %rdx, %rax, %rax +; X64-BMI2-NEXT: retq %val = load i64, i64* %w %skip = zext i8 %numskipbits to i64 %shifted = lshr i64 %val, %skip @@ -3269,70 +3269,70 @@ define i64 @bextr64_b4_commutative(i64 %val, i64 %numskipbits, i64 %numlowbits) ; X86-NOBMI-NEXT: popl %ebx ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bextr64_b4_commutative: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %ebx -; X86-BMI1NOTBM-NEXT: pushl %edi -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-BMI1NOTBM-NEXT: movl %edi, %edx -; X86-BMI1NOTBM-NEXT: shrl %cl, %edx -; X86-BMI1NOTBM-NEXT: shrdl %cl, %edi, %esi -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: je .LBB29_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %edx, %esi -; X86-BMI1NOTBM-NEXT: xorl %edx, %edx -; X86-BMI1NOTBM-NEXT: .LBB29_2: -; X86-BMI1NOTBM-NEXT: movl $-1, %edi -; X86-BMI1NOTBM-NEXT: movl $-1, %ebx -; X86-BMI1NOTBM-NEXT: movl %eax, %ecx -; X86-BMI1NOTBM-NEXT: shll %cl, %ebx -; X86-BMI1NOTBM-NEXT: testb $32, %al -; X86-BMI1NOTBM-NEXT: je .LBB29_4 -; X86-BMI1NOTBM-NEXT: # %bb.3: -; X86-BMI1NOTBM-NEXT: movl %ebx, %edi -; X86-BMI1NOTBM-NEXT: xorl %ebx, %ebx -; X86-BMI1NOTBM-NEXT: .LBB29_4: -; X86-BMI1NOTBM-NEXT: andnl %edx, %edi, %edx -; X86-BMI1NOTBM-NEXT: andnl %esi, %ebx, %eax -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: popl %edi -; X86-BMI1NOTBM-NEXT: popl %ebx -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bextr64_b4_commutative: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %ebx -; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-BMI1BMI2-NEXT: shrdl %cl, %edx, %eax -; X86-BMI1BMI2-NEXT: shrxl %ecx, %edx, %edx -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: je .LBB29_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: movl %edx, %eax -; X86-BMI1BMI2-NEXT: xorl %edx, %edx -; X86-BMI1BMI2-NEXT: .LBB29_2: -; X86-BMI1BMI2-NEXT: movl $-1, %esi -; X86-BMI1BMI2-NEXT: shlxl %ebx, %esi, %ecx -; X86-BMI1BMI2-NEXT: testb $32, %bl -; X86-BMI1BMI2-NEXT: je .LBB29_4 -; X86-BMI1BMI2-NEXT: # %bb.3: -; X86-BMI1BMI2-NEXT: movl %ecx, %esi -; X86-BMI1BMI2-NEXT: xorl %ecx, %ecx -; X86-BMI1BMI2-NEXT: .LBB29_4: -; X86-BMI1BMI2-NEXT: andnl %edx, %esi, %edx -; X86-BMI1BMI2-NEXT: andnl %eax, %ecx, %eax -; X86-BMI1BMI2-NEXT: popl %esi -; X86-BMI1BMI2-NEXT: popl %ebx -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bextr64_b4_commutative: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %ebx +; X86-BMI1-NEXT: pushl %edi +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-BMI1-NEXT: movl %edi, %edx +; X86-BMI1-NEXT: shrl %cl, %edx +; X86-BMI1-NEXT: shrdl %cl, %edi, %esi +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: je .LBB29_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: movl %edx, %esi +; X86-BMI1-NEXT: xorl %edx, %edx +; X86-BMI1-NEXT: .LBB29_2: +; X86-BMI1-NEXT: movl $-1, %edi +; X86-BMI1-NEXT: movl $-1, %ebx +; X86-BMI1-NEXT: movl %eax, %ecx +; X86-BMI1-NEXT: shll %cl, %ebx +; X86-BMI1-NEXT: testb $32, %al +; X86-BMI1-NEXT: je .LBB29_4 +; X86-BMI1-NEXT: # %bb.3: +; X86-BMI1-NEXT: movl %ebx, %edi +; X86-BMI1-NEXT: xorl %ebx, %ebx +; X86-BMI1-NEXT: .LBB29_4: +; X86-BMI1-NEXT: andnl %edx, %edi, %edx +; X86-BMI1-NEXT: andnl %esi, %ebx, %eax +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: popl %edi +; X86-BMI1-NEXT: popl %ebx +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bextr64_b4_commutative: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: shrdl %cl, %edx, %eax +; X86-BMI2-NEXT: shrxl %ecx, %edx, %edx +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB29_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %edx, %eax +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: .LBB29_2: +; X86-BMI2-NEXT: movl $-1, %esi +; X86-BMI2-NEXT: shlxl %ebx, %esi, %ecx +; X86-BMI2-NEXT: testb $32, %bl +; X86-BMI2-NEXT: je .LBB29_4 +; X86-BMI2-NEXT: # %bb.3: +; X86-BMI2-NEXT: movl %ecx, %esi +; X86-BMI2-NEXT: xorl %ecx, %ecx +; X86-BMI2-NEXT: .LBB29_4: +; X86-BMI2-NEXT: andnl %edx, %esi, %edx +; X86-BMI2-NEXT: andnl %eax, %ecx, %eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr64_b4_commutative: ; X64-NOBMI: # %bb.0: @@ -3346,19 +3346,19 @@ define i64 @bextr64_b4_commutative(i64 %val, i64 %numskipbits, i64 %numlowbits) ; X64-NOBMI-NEXT: andq %rdi, %rax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bextr64_b4_commutative: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %edx -; X64-BMI1NOTBM-NEXT: movzbl %sil, %eax -; X64-BMI1NOTBM-NEXT: orl %edx, %eax -; X64-BMI1NOTBM-NEXT: bextrq %rax, %rdi, %rax -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bextr64_b4_commutative: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: shrxq %rsi, %rdi, %rax -; X64-BMI1BMI2-NEXT: bzhiq %rdx, %rax, %rax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bextr64_b4_commutative: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %edx +; X64-BMI1-NEXT: movzbl %sil, %eax +; X64-BMI1-NEXT: orl %edx, %eax +; X64-BMI1-NEXT: bextrq %rax, %rdi, %rax +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bextr64_b4_commutative: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: shrxq %rsi, %rdi, %rax +; X64-BMI2-NEXT: bzhiq %rdx, %rax, %rax +; X64-BMI2-NEXT: retq %shifted = lshr i64 %val, %numskipbits %notmask = shl i64 -1, %numlowbits %mask = xor i64 %notmask, -1 @@ -3419,96 +3419,96 @@ define i64 @bextr64_b5_skipextrauses(i64 %val, i64 %numskipbits, i64 %numlowbits ; X86-NOBMI-NEXT: popl %ebp ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bextr64_b5_skipextrauses: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %ebp -; X86-BMI1NOTBM-NEXT: pushl %ebx -; X86-BMI1NOTBM-NEXT: pushl %edi -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: subl $12, %esp -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %dl -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: movl %ebx, %esi -; X86-BMI1NOTBM-NEXT: movl %eax, %ecx -; X86-BMI1NOTBM-NEXT: shrl %cl, %esi -; X86-BMI1NOTBM-NEXT: shrdl %cl, %ebx, %edi -; X86-BMI1NOTBM-NEXT: testb $32, %al -; X86-BMI1NOTBM-NEXT: je .LBB30_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %esi, %edi -; X86-BMI1NOTBM-NEXT: xorl %esi, %esi -; X86-BMI1NOTBM-NEXT: .LBB30_2: -; X86-BMI1NOTBM-NEXT: movl $-1, %ebx -; X86-BMI1NOTBM-NEXT: movl $-1, %ebp -; X86-BMI1NOTBM-NEXT: movl %edx, %ecx -; X86-BMI1NOTBM-NEXT: shll %cl, %ebp -; X86-BMI1NOTBM-NEXT: testb $32, %dl -; X86-BMI1NOTBM-NEXT: je .LBB30_4 -; X86-BMI1NOTBM-NEXT: # %bb.3: -; X86-BMI1NOTBM-NEXT: movl %ebp, %ebx -; X86-BMI1NOTBM-NEXT: xorl %ebp, %ebp -; X86-BMI1NOTBM-NEXT: .LBB30_4: -; X86-BMI1NOTBM-NEXT: andnl %esi, %ebx, %esi -; X86-BMI1NOTBM-NEXT: andnl %edi, %ebp, %edi -; X86-BMI1NOTBM-NEXT: subl $8, %esp -; X86-BMI1NOTBM-NEXT: pushl {{[0-9]+}}(%esp) -; X86-BMI1NOTBM-NEXT: pushl %eax -; X86-BMI1NOTBM-NEXT: calll use64@PLT -; X86-BMI1NOTBM-NEXT: addl $16, %esp -; X86-BMI1NOTBM-NEXT: movl %edi, %eax -; X86-BMI1NOTBM-NEXT: movl %esi, %edx -; X86-BMI1NOTBM-NEXT: addl $12, %esp -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: popl %edi -; X86-BMI1NOTBM-NEXT: popl %ebx -; X86-BMI1NOTBM-NEXT: popl %ebp -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bextr64_b5_skipextrauses: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %ebp -; X86-BMI1BMI2-NEXT: pushl %ebx -; X86-BMI1BMI2-NEXT: pushl %edi -; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: subl $12, %esp -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-BMI1BMI2-NEXT: shrdl %cl, %edx, %eax -; X86-BMI1BMI2-NEXT: shrxl %ecx, %edx, %edx -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: je .LBB30_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: movl %edx, %eax -; X86-BMI1BMI2-NEXT: xorl %edx, %edx -; X86-BMI1BMI2-NEXT: .LBB30_2: -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-BMI1BMI2-NEXT: movl $-1, %esi -; X86-BMI1BMI2-NEXT: shlxl %ebx, %esi, %edi -; X86-BMI1BMI2-NEXT: testb $32, %bl -; X86-BMI1BMI2-NEXT: je .LBB30_4 -; X86-BMI1BMI2-NEXT: # %bb.3: -; X86-BMI1BMI2-NEXT: movl %edi, %esi -; X86-BMI1BMI2-NEXT: xorl %edi, %edi -; X86-BMI1BMI2-NEXT: .LBB30_4: -; X86-BMI1BMI2-NEXT: andnl %edx, %esi, %esi -; X86-BMI1BMI2-NEXT: andnl %eax, %edi, %edi -; X86-BMI1BMI2-NEXT: subl $8, %esp -; X86-BMI1BMI2-NEXT: pushl %ebp -; X86-BMI1BMI2-NEXT: pushl %ecx -; X86-BMI1BMI2-NEXT: calll use64@PLT -; X86-BMI1BMI2-NEXT: addl $16, %esp -; X86-BMI1BMI2-NEXT: movl %edi, %eax -; X86-BMI1BMI2-NEXT: movl %esi, %edx -; X86-BMI1BMI2-NEXT: addl $12, %esp -; X86-BMI1BMI2-NEXT: popl %esi -; X86-BMI1BMI2-NEXT: popl %edi -; X86-BMI1BMI2-NEXT: popl %ebx -; X86-BMI1BMI2-NEXT: popl %ebp -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bextr64_b5_skipextrauses: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %ebp +; X86-BMI1-NEXT: pushl %ebx +; X86-BMI1-NEXT: pushl %edi +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: subl $12, %esp +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %dl +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: movl %ebx, %esi +; X86-BMI1-NEXT: movl %eax, %ecx +; X86-BMI1-NEXT: shrl %cl, %esi +; X86-BMI1-NEXT: shrdl %cl, %ebx, %edi +; X86-BMI1-NEXT: testb $32, %al +; X86-BMI1-NEXT: je .LBB30_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: movl %esi, %edi +; X86-BMI1-NEXT: xorl %esi, %esi +; X86-BMI1-NEXT: .LBB30_2: +; X86-BMI1-NEXT: movl $-1, %ebx +; X86-BMI1-NEXT: movl $-1, %ebp +; X86-BMI1-NEXT: movl %edx, %ecx +; X86-BMI1-NEXT: shll %cl, %ebp +; X86-BMI1-NEXT: testb $32, %dl +; X86-BMI1-NEXT: je .LBB30_4 +; X86-BMI1-NEXT: # %bb.3: +; X86-BMI1-NEXT: movl %ebp, %ebx +; X86-BMI1-NEXT: xorl %ebp, %ebp +; X86-BMI1-NEXT: .LBB30_4: +; X86-BMI1-NEXT: andnl %esi, %ebx, %esi +; X86-BMI1-NEXT: andnl %edi, %ebp, %edi +; X86-BMI1-NEXT: subl $8, %esp +; X86-BMI1-NEXT: pushl {{[0-9]+}}(%esp) +; X86-BMI1-NEXT: pushl %eax +; X86-BMI1-NEXT: calll use64@PLT +; X86-BMI1-NEXT: addl $16, %esp +; X86-BMI1-NEXT: movl %edi, %eax +; X86-BMI1-NEXT: movl %esi, %edx +; X86-BMI1-NEXT: addl $12, %esp +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: popl %edi +; X86-BMI1-NEXT: popl %ebx +; X86-BMI1-NEXT: popl %ebp +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bextr64_b5_skipextrauses: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: subl $12, %esp +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: shrdl %cl, %edx, %eax +; X86-BMI2-NEXT: shrxl %ecx, %edx, %edx +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB30_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %edx, %eax +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: .LBB30_2: +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X86-BMI2-NEXT: movl $-1, %esi +; X86-BMI2-NEXT: shlxl %ebx, %esi, %edi +; X86-BMI2-NEXT: testb $32, %bl +; X86-BMI2-NEXT: je .LBB30_4 +; X86-BMI2-NEXT: # %bb.3: +; X86-BMI2-NEXT: movl %edi, %esi +; X86-BMI2-NEXT: xorl %edi, %edi +; X86-BMI2-NEXT: .LBB30_4: +; X86-BMI2-NEXT: andnl %edx, %esi, %esi +; X86-BMI2-NEXT: andnl %eax, %edi, %edi +; X86-BMI2-NEXT: subl $8, %esp +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: pushl %ecx +; X86-BMI2-NEXT: calll use64@PLT +; X86-BMI2-NEXT: addl $16, %esp +; X86-BMI2-NEXT: movl %edi, %eax +; X86-BMI2-NEXT: movl %esi, %edx +; X86-BMI2-NEXT: addl $12, %esp +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr64_b5_skipextrauses: ; X64-NOBMI: # %bb.0: @@ -3526,29 +3526,29 @@ define i64 @bextr64_b5_skipextrauses(i64 %val, i64 %numskipbits, i64 %numlowbits ; X64-NOBMI-NEXT: popq %rbx ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bextr64_b5_skipextrauses: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: pushq %rbx -; X64-BMI1NOTBM-NEXT: shll $8, %edx -; X64-BMI1NOTBM-NEXT: movzbl %sil, %eax -; X64-BMI1NOTBM-NEXT: orl %edx, %eax -; X64-BMI1NOTBM-NEXT: bextrq %rax, %rdi, %rbx -; X64-BMI1NOTBM-NEXT: movq %rsi, %rdi -; X64-BMI1NOTBM-NEXT: callq use64@PLT -; X64-BMI1NOTBM-NEXT: movq %rbx, %rax -; X64-BMI1NOTBM-NEXT: popq %rbx -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bextr64_b5_skipextrauses: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: pushq %rbx -; X64-BMI1BMI2-NEXT: shrxq %rsi, %rdi, %rax -; X64-BMI1BMI2-NEXT: bzhiq %rdx, %rax, %rbx -; X64-BMI1BMI2-NEXT: movq %rsi, %rdi -; X64-BMI1BMI2-NEXT: callq use64@PLT -; X64-BMI1BMI2-NEXT: movq %rbx, %rax -; X64-BMI1BMI2-NEXT: popq %rbx -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bextr64_b5_skipextrauses: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: pushq %rbx +; X64-BMI1-NEXT: shll $8, %edx +; X64-BMI1-NEXT: movzbl %sil, %eax +; X64-BMI1-NEXT: orl %edx, %eax +; X64-BMI1-NEXT: bextrq %rax, %rdi, %rbx +; X64-BMI1-NEXT: movq %rsi, %rdi +; X64-BMI1-NEXT: callq use64@PLT +; X64-BMI1-NEXT: movq %rbx, %rax +; X64-BMI1-NEXT: popq %rbx +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bextr64_b5_skipextrauses: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: pushq %rbx +; X64-BMI2-NEXT: shrxq %rsi, %rdi, %rax +; X64-BMI2-NEXT: bzhiq %rdx, %rax, %rbx +; X64-BMI2-NEXT: movq %rsi, %rdi +; X64-BMI2-NEXT: callq use64@PLT +; X64-BMI2-NEXT: movq %rbx, %rax +; X64-BMI2-NEXT: popq %rbx +; X64-BMI2-NEXT: retq %shifted = lshr i64 %val, %numskipbits %notmask = shl i64 -1, %numlowbits %mask = xor i64 %notmask, -1 @@ -3592,59 +3592,59 @@ define i32 @bextr64_32_b0(i64 %val, i64 %numskipbits, i8 %numlowbits) nounwind { ; X86-NOBMI-NEXT: popl %edi ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bextr64_32_b0: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %edi -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-BMI1NOTBM-NEXT: movl %edi, %edx -; X86-BMI1NOTBM-NEXT: shrl %cl, %edx -; X86-BMI1NOTBM-NEXT: shrdl %cl, %edi, %esi -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: jne .LBB31_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %esi, %edx -; X86-BMI1NOTBM-NEXT: .LBB31_2: -; X86-BMI1NOTBM-NEXT: movl $-1, %esi -; X86-BMI1NOTBM-NEXT: movl %eax, %ecx -; X86-BMI1NOTBM-NEXT: shll %cl, %esi -; X86-BMI1NOTBM-NEXT: xorl %ecx, %ecx -; X86-BMI1NOTBM-NEXT: testb $32, %al -; X86-BMI1NOTBM-NEXT: jne .LBB31_4 -; X86-BMI1NOTBM-NEXT: # %bb.3: -; X86-BMI1NOTBM-NEXT: movl %esi, %ecx -; X86-BMI1NOTBM-NEXT: .LBB31_4: -; X86-BMI1NOTBM-NEXT: andnl %edx, %ecx, %eax -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: popl %edi -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bextr64_32_b0: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-BMI1BMI2-NEXT: shrdl %cl, %esi, %edx -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: je .LBB31_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: shrxl %ecx, %esi, %edx -; X86-BMI1BMI2-NEXT: .LBB31_2: -; X86-BMI1BMI2-NEXT: xorl %ecx, %ecx -; X86-BMI1BMI2-NEXT: testb $32, %al -; X86-BMI1BMI2-NEXT: jne .LBB31_4 -; X86-BMI1BMI2-NEXT: # %bb.3: -; X86-BMI1BMI2-NEXT: movl $-1, %ecx -; X86-BMI1BMI2-NEXT: shlxl %eax, %ecx, %ecx -; X86-BMI1BMI2-NEXT: .LBB31_4: -; X86-BMI1BMI2-NEXT: andnl %edx, %ecx, %eax -; X86-BMI1BMI2-NEXT: popl %esi -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bextr64_32_b0: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %edi +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-BMI1-NEXT: movl %edi, %edx +; X86-BMI1-NEXT: shrl %cl, %edx +; X86-BMI1-NEXT: shrdl %cl, %edi, %esi +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: jne .LBB31_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: movl %esi, %edx +; X86-BMI1-NEXT: .LBB31_2: +; X86-BMI1-NEXT: movl $-1, %esi +; X86-BMI1-NEXT: movl %eax, %ecx +; X86-BMI1-NEXT: shll %cl, %esi +; X86-BMI1-NEXT: xorl %ecx, %ecx +; X86-BMI1-NEXT: testb $32, %al +; X86-BMI1-NEXT: jne .LBB31_4 +; X86-BMI1-NEXT: # %bb.3: +; X86-BMI1-NEXT: movl %esi, %ecx +; X86-BMI1-NEXT: .LBB31_4: +; X86-BMI1-NEXT: andnl %edx, %ecx, %eax +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: popl %edi +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bextr64_32_b0: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: shrdl %cl, %esi, %edx +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB31_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: shrxl %ecx, %esi, %edx +; X86-BMI2-NEXT: .LBB31_2: +; X86-BMI2-NEXT: xorl %ecx, %ecx +; X86-BMI2-NEXT: testb $32, %al +; X86-BMI2-NEXT: jne .LBB31_4 +; X86-BMI2-NEXT: # %bb.3: +; X86-BMI2-NEXT: movl $-1, %ecx +; X86-BMI2-NEXT: shlxl %eax, %ecx, %ecx +; X86-BMI2-NEXT: .LBB31_4: +; X86-BMI2-NEXT: andnl %edx, %ecx, %eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr64_32_b0: ; X64-NOBMI: # %bb.0: @@ -3659,20 +3659,20 @@ define i32 @bextr64_32_b0(i64 %val, i64 %numskipbits, i8 %numlowbits) nounwind { ; X64-NOBMI-NEXT: # kill: def $eax killed $eax killed $rax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bextr64_32_b0: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %edx -; X64-BMI1NOTBM-NEXT: movzbl %sil, %eax -; X64-BMI1NOTBM-NEXT: orl %edx, %eax -; X64-BMI1NOTBM-NEXT: bextrq %rax, %rdi, %rax -; X64-BMI1NOTBM-NEXT: # kill: def $eax killed $eax killed $rax -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bextr64_32_b0: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: shrxq %rsi, %rdi, %rax -; X64-BMI1BMI2-NEXT: bzhil %edx, %eax, %eax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bextr64_32_b0: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %edx +; X64-BMI1-NEXT: movzbl %sil, %eax +; X64-BMI1-NEXT: orl %edx, %eax +; X64-BMI1-NEXT: bextrq %rax, %rdi, %rax +; X64-BMI1-NEXT: # kill: def $eax killed $eax killed $rax +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bextr64_32_b0: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: shrxq %rsi, %rdi, %rax +; X64-BMI2-NEXT: bzhil %edx, %eax, %eax +; X64-BMI2-NEXT: retq %shiftedval = lshr i64 %val, %numskipbits %widenumlowbits = zext i8 %numlowbits to i64 %notmask = shl nsw i64 -1, %widenumlowbits @@ -3709,44 +3709,44 @@ define i32 @bextr64_32_b1(i64 %val, i64 %numskipbits, i8 %numlowbits) nounwind { ; X86-NOBMI-NEXT: popl %edi ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bextr64_32_b1: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %edi -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-BMI1NOTBM-NEXT: movl %edi, %edx -; X86-BMI1NOTBM-NEXT: shrl %cl, %edx -; X86-BMI1NOTBM-NEXT: shrdl %cl, %edi, %esi -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: jne .LBB32_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %esi, %edx -; X86-BMI1NOTBM-NEXT: .LBB32_2: -; X86-BMI1NOTBM-NEXT: shll $8, %eax -; X86-BMI1NOTBM-NEXT: bextrl %eax, %edx, %eax -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: popl %edi -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bextr64_32_b1: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-BMI1BMI2-NEXT: shrdl %cl, %esi, %edx -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: je .LBB32_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: shrxl %ecx, %esi, %edx -; X86-BMI1BMI2-NEXT: .LBB32_2: -; X86-BMI1BMI2-NEXT: bzhil %eax, %edx, %eax -; X86-BMI1BMI2-NEXT: popl %esi -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bextr64_32_b1: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %edi +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-BMI1-NEXT: movl %edi, %edx +; X86-BMI1-NEXT: shrl %cl, %edx +; X86-BMI1-NEXT: shrdl %cl, %edi, %esi +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: jne .LBB32_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: movl %esi, %edx +; X86-BMI1-NEXT: .LBB32_2: +; X86-BMI1-NEXT: shll $8, %eax +; X86-BMI1-NEXT: bextrl %eax, %edx, %eax +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: popl %edi +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bextr64_32_b1: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: shrdl %cl, %esi, %edx +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB32_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: shrxl %ecx, %esi, %edx +; X86-BMI2-NEXT: .LBB32_2: +; X86-BMI2-NEXT: bzhil %eax, %edx, %eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr64_32_b1: ; X64-NOBMI: # %bb.0: @@ -3760,20 +3760,20 @@ define i32 @bextr64_32_b1(i64 %val, i64 %numskipbits, i8 %numlowbits) nounwind { ; X64-NOBMI-NEXT: andl %edi, %eax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bextr64_32_b1: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %edx -; X64-BMI1NOTBM-NEXT: movzbl %sil, %eax -; X64-BMI1NOTBM-NEXT: orl %edx, %eax -; X64-BMI1NOTBM-NEXT: bextrq %rax, %rdi, %rax -; X64-BMI1NOTBM-NEXT: # kill: def $eax killed $eax killed $rax -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bextr64_32_b1: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: shrxq %rsi, %rdi, %rax -; X64-BMI1BMI2-NEXT: bzhil %edx, %eax, %eax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bextr64_32_b1: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %edx +; X64-BMI1-NEXT: movzbl %sil, %eax +; X64-BMI1-NEXT: orl %edx, %eax +; X64-BMI1-NEXT: bextrq %rax, %rdi, %rax +; X64-BMI1-NEXT: # kill: def $eax killed $eax killed $rax +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bextr64_32_b1: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: shrxq %rsi, %rdi, %rax +; X64-BMI2-NEXT: bzhil %edx, %eax, %eax +; X64-BMI2-NEXT: retq %shiftedval = lshr i64 %val, %numskipbits %truncshiftedval = trunc i64 %shiftedval to i32 %widenumlowbits = zext i8 %numlowbits to i32 @@ -3811,44 +3811,44 @@ define i32 @bextr64_32_b2(i64 %val, i64 %numskipbits, i8 %numlowbits) nounwind { ; X86-NOBMI-NEXT: popl %edi ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bextr64_32_b2: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %edi -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-BMI1NOTBM-NEXT: movl %edi, %edx -; X86-BMI1NOTBM-NEXT: shrl %cl, %edx -; X86-BMI1NOTBM-NEXT: shrdl %cl, %edi, %esi -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: jne .LBB33_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %esi, %edx -; X86-BMI1NOTBM-NEXT: .LBB33_2: -; X86-BMI1NOTBM-NEXT: shll $8, %eax -; X86-BMI1NOTBM-NEXT: bextrl %eax, %edx, %eax -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: popl %edi -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bextr64_32_b2: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-BMI1BMI2-NEXT: shrdl %cl, %esi, %edx -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: je .LBB33_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: shrxl %ecx, %esi, %edx -; X86-BMI1BMI2-NEXT: .LBB33_2: -; X86-BMI1BMI2-NEXT: bzhil %eax, %edx, %eax -; X86-BMI1BMI2-NEXT: popl %esi -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bextr64_32_b2: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %edi +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-BMI1-NEXT: movl %edi, %edx +; X86-BMI1-NEXT: shrl %cl, %edx +; X86-BMI1-NEXT: shrdl %cl, %edi, %esi +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: jne .LBB33_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: movl %esi, %edx +; X86-BMI1-NEXT: .LBB33_2: +; X86-BMI1-NEXT: shll $8, %eax +; X86-BMI1-NEXT: bextrl %eax, %edx, %eax +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: popl %edi +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bextr64_32_b2: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: shrdl %cl, %esi, %edx +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB33_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: shrxl %ecx, %esi, %edx +; X86-BMI2-NEXT: .LBB33_2: +; X86-BMI2-NEXT: bzhil %eax, %edx, %eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr64_32_b2: ; X64-NOBMI: # %bb.0: @@ -3862,20 +3862,20 @@ define i32 @bextr64_32_b2(i64 %val, i64 %numskipbits, i8 %numlowbits) nounwind { ; X64-NOBMI-NEXT: andl %edi, %eax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bextr64_32_b2: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %edx -; X64-BMI1NOTBM-NEXT: movzbl %sil, %eax -; X64-BMI1NOTBM-NEXT: orl %edx, %eax -; X64-BMI1NOTBM-NEXT: bextrq %rax, %rdi, %rax -; X64-BMI1NOTBM-NEXT: # kill: def $eax killed $eax killed $rax -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bextr64_32_b2: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: shrxq %rsi, %rdi, %rax -; X64-BMI1BMI2-NEXT: bzhil %edx, %eax, %eax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bextr64_32_b2: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %edx +; X64-BMI1-NEXT: movzbl %sil, %eax +; X64-BMI1-NEXT: orl %edx, %eax +; X64-BMI1-NEXT: bextrq %rax, %rdi, %rax +; X64-BMI1-NEXT: # kill: def $eax killed $eax killed $rax +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bextr64_32_b2: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: shrxq %rsi, %rdi, %rax +; X64-BMI2-NEXT: bzhil %edx, %eax, %eax +; X64-BMI2-NEXT: retq %shiftedval = lshr i64 %val, %numskipbits %widenumlowbits = zext i8 %numlowbits to i32 %notmask = shl nsw i32 -1, %widenumlowbits @@ -3920,59 +3920,59 @@ define i32 @bextr64_32_b3(i64 %val, i64 %numskipbits, i8 %numlowbits) nounwind { ; X86-NOBMI-NEXT: popl %edi ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bextr64_32_b3: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %edi -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-BMI1NOTBM-NEXT: movl %edi, %edx -; X86-BMI1NOTBM-NEXT: shrl %cl, %edx -; X86-BMI1NOTBM-NEXT: shrdl %cl, %edi, %esi -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: jne .LBB34_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %esi, %edx -; X86-BMI1NOTBM-NEXT: .LBB34_2: -; X86-BMI1NOTBM-NEXT: movl $-1, %esi -; X86-BMI1NOTBM-NEXT: movl %eax, %ecx -; X86-BMI1NOTBM-NEXT: shll %cl, %esi -; X86-BMI1NOTBM-NEXT: xorl %ecx, %ecx -; X86-BMI1NOTBM-NEXT: testb $32, %al -; X86-BMI1NOTBM-NEXT: jne .LBB34_4 -; X86-BMI1NOTBM-NEXT: # %bb.3: -; X86-BMI1NOTBM-NEXT: movl %esi, %ecx -; X86-BMI1NOTBM-NEXT: .LBB34_4: -; X86-BMI1NOTBM-NEXT: andnl %edx, %ecx, %eax -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: popl %edi -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bextr64_32_b3: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-BMI1BMI2-NEXT: shrdl %cl, %esi, %edx -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: je .LBB34_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: shrxl %ecx, %esi, %edx -; X86-BMI1BMI2-NEXT: .LBB34_2: -; X86-BMI1BMI2-NEXT: xorl %ecx, %ecx -; X86-BMI1BMI2-NEXT: testb $32, %al -; X86-BMI1BMI2-NEXT: jne .LBB34_4 -; X86-BMI1BMI2-NEXT: # %bb.3: -; X86-BMI1BMI2-NEXT: movl $-1, %ecx -; X86-BMI1BMI2-NEXT: shlxl %eax, %ecx, %ecx -; X86-BMI1BMI2-NEXT: .LBB34_4: -; X86-BMI1BMI2-NEXT: andnl %edx, %ecx, %eax -; X86-BMI1BMI2-NEXT: popl %esi -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bextr64_32_b3: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %edi +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-BMI1-NEXT: movl %edi, %edx +; X86-BMI1-NEXT: shrl %cl, %edx +; X86-BMI1-NEXT: shrdl %cl, %edi, %esi +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: jne .LBB34_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: movl %esi, %edx +; X86-BMI1-NEXT: .LBB34_2: +; X86-BMI1-NEXT: movl $-1, %esi +; X86-BMI1-NEXT: movl %eax, %ecx +; X86-BMI1-NEXT: shll %cl, %esi +; X86-BMI1-NEXT: xorl %ecx, %ecx +; X86-BMI1-NEXT: testb $32, %al +; X86-BMI1-NEXT: jne .LBB34_4 +; X86-BMI1-NEXT: # %bb.3: +; X86-BMI1-NEXT: movl %esi, %ecx +; X86-BMI1-NEXT: .LBB34_4: +; X86-BMI1-NEXT: andnl %edx, %ecx, %eax +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: popl %edi +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bextr64_32_b3: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: shrdl %cl, %esi, %edx +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB34_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: shrxl %ecx, %esi, %edx +; X86-BMI2-NEXT: .LBB34_2: +; X86-BMI2-NEXT: xorl %ecx, %ecx +; X86-BMI2-NEXT: testb $32, %al +; X86-BMI2-NEXT: jne .LBB34_4 +; X86-BMI2-NEXT: # %bb.3: +; X86-BMI2-NEXT: movl $-1, %ecx +; X86-BMI2-NEXT: shlxl %eax, %ecx, %ecx +; X86-BMI2-NEXT: .LBB34_4: +; X86-BMI2-NEXT: andnl %edx, %ecx, %eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr64_32_b3: ; X64-NOBMI: # %bb.0: @@ -3988,20 +3988,20 @@ define i32 @bextr64_32_b3(i64 %val, i64 %numskipbits, i8 %numlowbits) nounwind { ; X64-NOBMI-NEXT: # kill: def $eax killed $eax killed $rax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bextr64_32_b3: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %edx -; X64-BMI1NOTBM-NEXT: movzbl %sil, %eax -; X64-BMI1NOTBM-NEXT: orl %edx, %eax -; X64-BMI1NOTBM-NEXT: bextrq %rax, %rdi, %rax -; X64-BMI1NOTBM-NEXT: # kill: def $eax killed $eax killed $rax -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bextr64_32_b3: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: shrxq %rsi, %rdi, %rax -; X64-BMI1BMI2-NEXT: bzhil %edx, %eax, %eax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bextr64_32_b3: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %edx +; X64-BMI1-NEXT: movzbl %sil, %eax +; X64-BMI1-NEXT: orl %edx, %eax +; X64-BMI1-NEXT: bextrq %rax, %rdi, %rax +; X64-BMI1-NEXT: # kill: def $eax killed $eax killed $rax +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bextr64_32_b3: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: shrxq %rsi, %rdi, %rax +; X64-BMI2-NEXT: bzhil %edx, %eax, %eax +; X64-BMI2-NEXT: retq %shiftedval = lshr i64 %val, %numskipbits %widenumlowbits = zext i8 %numlowbits to i64 %notmask = shl nsw i64 4294967295, %widenumlowbits @@ -4038,47 +4038,47 @@ define i32 @bextr32_c0(i32 %val, i32 %numskipbits, i32 %numlowbits) nounwind { ; X86-NOBMI-NEXT: popl %edi ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bextr32_c0: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %edi -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: pushl %eax -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-BMI1NOTBM-NEXT: shrl %cl, %edi -; X86-BMI1NOTBM-NEXT: xorl %ecx, %ecx -; X86-BMI1NOTBM-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl $-1, %esi -; X86-BMI1NOTBM-NEXT: # kill: def $cl killed $cl killed $ecx -; X86-BMI1NOTBM-NEXT: shrl %cl, %esi -; X86-BMI1NOTBM-NEXT: movl %esi, (%esp) -; X86-BMI1NOTBM-NEXT: calll use32@PLT -; X86-BMI1NOTBM-NEXT: andl %edi, %esi -; X86-BMI1NOTBM-NEXT: movl %esi, %eax -; X86-BMI1NOTBM-NEXT: addl $4, %esp -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: popl %edi -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bextr32_c0: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %ebx -; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: pushl %eax -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1BMI2-NEXT: shrxl %eax, {{[0-9]+}}(%esp), %esi -; X86-BMI1BMI2-NEXT: movl %ebx, %eax -; X86-BMI1BMI2-NEXT: negb %al -; X86-BMI1BMI2-NEXT: movl $-1, %ecx -; X86-BMI1BMI2-NEXT: shrxl %eax, %ecx, %eax -; X86-BMI1BMI2-NEXT: movl %eax, (%esp) -; X86-BMI1BMI2-NEXT: calll use32@PLT -; X86-BMI1BMI2-NEXT: bzhil %ebx, %esi, %eax -; X86-BMI1BMI2-NEXT: addl $4, %esp -; X86-BMI1BMI2-NEXT: popl %esi -; X86-BMI1BMI2-NEXT: popl %ebx -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bextr32_c0: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %edi +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: pushl %eax +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-BMI1-NEXT: shrl %cl, %edi +; X86-BMI1-NEXT: xorl %ecx, %ecx +; X86-BMI1-NEXT: subb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl $-1, %esi +; X86-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-BMI1-NEXT: shrl %cl, %esi +; X86-BMI1-NEXT: movl %esi, (%esp) +; X86-BMI1-NEXT: calll use32@PLT +; X86-BMI1-NEXT: andl %edi, %esi +; X86-BMI1-NEXT: movl %esi, %eax +; X86-BMI1-NEXT: addl $4, %esp +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: popl %edi +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bextr32_c0: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: pushl %eax +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: shrxl %eax, {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl %ebx, %eax +; X86-BMI2-NEXT: negb %al +; X86-BMI2-NEXT: movl $-1, %ecx +; X86-BMI2-NEXT: shrxl %eax, %ecx, %eax +; X86-BMI2-NEXT: movl %eax, (%esp) +; X86-BMI2-NEXT: calll use32@PLT +; X86-BMI2-NEXT: bzhil %ebx, %esi, %eax +; X86-BMI2-NEXT: addl $4, %esp +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr32_c0: ; X64-NOBMI: # %bb.0: @@ -4102,45 +4102,45 @@ define i32 @bextr32_c0(i32 %val, i32 %numskipbits, i32 %numlowbits) nounwind { ; X64-NOBMI-NEXT: popq %rbp ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bextr32_c0: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: pushq %rbp -; X64-BMI1NOTBM-NEXT: pushq %rbx -; X64-BMI1NOTBM-NEXT: pushq %rax -; X64-BMI1NOTBM-NEXT: movl %esi, %ecx -; X64-BMI1NOTBM-NEXT: movl %edi, %ebx -; X64-BMI1NOTBM-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-BMI1NOTBM-NEXT: shrl %cl, %ebx -; X64-BMI1NOTBM-NEXT: negb %dl -; X64-BMI1NOTBM-NEXT: movl $-1, %ebp -; X64-BMI1NOTBM-NEXT: movl %edx, %ecx -; X64-BMI1NOTBM-NEXT: shrl %cl, %ebp -; X64-BMI1NOTBM-NEXT: movl %ebp, %edi -; X64-BMI1NOTBM-NEXT: callq use32@PLT -; X64-BMI1NOTBM-NEXT: andl %ebx, %ebp -; X64-BMI1NOTBM-NEXT: movl %ebp, %eax -; X64-BMI1NOTBM-NEXT: addq $8, %rsp -; X64-BMI1NOTBM-NEXT: popq %rbx -; X64-BMI1NOTBM-NEXT: popq %rbp -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bextr32_c0: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: pushq %rbp -; X64-BMI1BMI2-NEXT: pushq %rbx -; X64-BMI1BMI2-NEXT: pushq %rax -; X64-BMI1BMI2-NEXT: movl %edx, %ebx -; X64-BMI1BMI2-NEXT: shrxl %esi, %edi, %ebp -; X64-BMI1BMI2-NEXT: movl %ebx, %eax -; X64-BMI1BMI2-NEXT: negb %al -; X64-BMI1BMI2-NEXT: movl $-1, %ecx -; X64-BMI1BMI2-NEXT: shrxl %eax, %ecx, %edi -; X64-BMI1BMI2-NEXT: callq use32@PLT -; X64-BMI1BMI2-NEXT: bzhil %ebx, %ebp, %eax -; X64-BMI1BMI2-NEXT: addq $8, %rsp -; X64-BMI1BMI2-NEXT: popq %rbx -; X64-BMI1BMI2-NEXT: popq %rbp -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bextr32_c0: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: pushq %rbp +; X64-BMI1-NEXT: pushq %rbx +; X64-BMI1-NEXT: pushq %rax +; X64-BMI1-NEXT: movl %esi, %ecx +; X64-BMI1-NEXT: movl %edi, %ebx +; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-BMI1-NEXT: shrl %cl, %ebx +; X64-BMI1-NEXT: negb %dl +; X64-BMI1-NEXT: movl $-1, %ebp +; X64-BMI1-NEXT: movl %edx, %ecx +; X64-BMI1-NEXT: shrl %cl, %ebp +; X64-BMI1-NEXT: movl %ebp, %edi +; X64-BMI1-NEXT: callq use32@PLT +; X64-BMI1-NEXT: andl %ebx, %ebp +; X64-BMI1-NEXT: movl %ebp, %eax +; X64-BMI1-NEXT: addq $8, %rsp +; X64-BMI1-NEXT: popq %rbx +; X64-BMI1-NEXT: popq %rbp +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bextr32_c0: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: pushq %rbp +; X64-BMI2-NEXT: pushq %rbx +; X64-BMI2-NEXT: pushq %rax +; X64-BMI2-NEXT: movl %edx, %ebx +; X64-BMI2-NEXT: shrxl %esi, %edi, %ebp +; X64-BMI2-NEXT: movl %ebx, %eax +; X64-BMI2-NEXT: negb %al +; X64-BMI2-NEXT: movl $-1, %ecx +; X64-BMI2-NEXT: shrxl %eax, %ecx, %edi +; X64-BMI2-NEXT: callq use32@PLT +; X64-BMI2-NEXT: bzhil %ebx, %ebp, %eax +; X64-BMI2-NEXT: addq $8, %rsp +; X64-BMI2-NEXT: popq %rbx +; X64-BMI2-NEXT: popq %rbp +; X64-BMI2-NEXT: retq %shifted = lshr i32 %val, %numskipbits %numhighbits = sub i32 32, %numlowbits %mask = lshr i32 -1, %numhighbits @@ -4172,47 +4172,47 @@ define i32 @bextr32_c1_indexzext(i32 %val, i8 %numskipbits, i8 %numlowbits) noun ; X86-NOBMI-NEXT: popl %edi ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bextr32_c1_indexzext: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %edi -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: pushl %eax -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-BMI1NOTBM-NEXT: shrl %cl, %edi -; X86-BMI1NOTBM-NEXT: xorl %ecx, %ecx -; X86-BMI1NOTBM-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl $-1, %esi -; X86-BMI1NOTBM-NEXT: # kill: def $cl killed $cl killed $ecx -; X86-BMI1NOTBM-NEXT: shrl %cl, %esi -; X86-BMI1NOTBM-NEXT: movl %esi, (%esp) -; X86-BMI1NOTBM-NEXT: calll use32@PLT -; X86-BMI1NOTBM-NEXT: andl %edi, %esi -; X86-BMI1NOTBM-NEXT: movl %esi, %eax -; X86-BMI1NOTBM-NEXT: addl $4, %esp -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: popl %edi -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bextr32_c1_indexzext: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %ebx -; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: pushl %eax -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1BMI2-NEXT: shrxl %eax, {{[0-9]+}}(%esp), %esi -; X86-BMI1BMI2-NEXT: movl %ebx, %eax -; X86-BMI1BMI2-NEXT: negb %al -; X86-BMI1BMI2-NEXT: movl $-1, %ecx -; X86-BMI1BMI2-NEXT: shrxl %eax, %ecx, %eax -; X86-BMI1BMI2-NEXT: movl %eax, (%esp) -; X86-BMI1BMI2-NEXT: calll use32@PLT -; X86-BMI1BMI2-NEXT: bzhil %ebx, %esi, %eax -; X86-BMI1BMI2-NEXT: addl $4, %esp -; X86-BMI1BMI2-NEXT: popl %esi -; X86-BMI1BMI2-NEXT: popl %ebx -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bextr32_c1_indexzext: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %edi +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: pushl %eax +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-BMI1-NEXT: shrl %cl, %edi +; X86-BMI1-NEXT: xorl %ecx, %ecx +; X86-BMI1-NEXT: subb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl $-1, %esi +; X86-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-BMI1-NEXT: shrl %cl, %esi +; X86-BMI1-NEXT: movl %esi, (%esp) +; X86-BMI1-NEXT: calll use32@PLT +; X86-BMI1-NEXT: andl %edi, %esi +; X86-BMI1-NEXT: movl %esi, %eax +; X86-BMI1-NEXT: addl $4, %esp +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: popl %edi +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bextr32_c1_indexzext: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: pushl %eax +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: shrxl %eax, {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl %ebx, %eax +; X86-BMI2-NEXT: negb %al +; X86-BMI2-NEXT: movl $-1, %ecx +; X86-BMI2-NEXT: shrxl %eax, %ecx, %eax +; X86-BMI2-NEXT: movl %eax, (%esp) +; X86-BMI2-NEXT: calll use32@PLT +; X86-BMI2-NEXT: bzhil %ebx, %esi, %eax +; X86-BMI2-NEXT: addl $4, %esp +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr32_c1_indexzext: ; X64-NOBMI: # %bb.0: @@ -4236,45 +4236,45 @@ define i32 @bextr32_c1_indexzext(i32 %val, i8 %numskipbits, i8 %numlowbits) noun ; X64-NOBMI-NEXT: popq %rbp ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bextr32_c1_indexzext: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: pushq %rbp -; X64-BMI1NOTBM-NEXT: pushq %rbx -; X64-BMI1NOTBM-NEXT: pushq %rax -; X64-BMI1NOTBM-NEXT: movl %esi, %ecx -; X64-BMI1NOTBM-NEXT: movl %edi, %ebx -; X64-BMI1NOTBM-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-BMI1NOTBM-NEXT: shrl %cl, %ebx -; X64-BMI1NOTBM-NEXT: negb %dl -; X64-BMI1NOTBM-NEXT: movl $-1, %ebp -; X64-BMI1NOTBM-NEXT: movl %edx, %ecx -; X64-BMI1NOTBM-NEXT: shrl %cl, %ebp -; X64-BMI1NOTBM-NEXT: movl %ebp, %edi -; X64-BMI1NOTBM-NEXT: callq use32@PLT -; X64-BMI1NOTBM-NEXT: andl %ebx, %ebp -; X64-BMI1NOTBM-NEXT: movl %ebp, %eax -; X64-BMI1NOTBM-NEXT: addq $8, %rsp -; X64-BMI1NOTBM-NEXT: popq %rbx -; X64-BMI1NOTBM-NEXT: popq %rbp -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bextr32_c1_indexzext: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: pushq %rbp -; X64-BMI1BMI2-NEXT: pushq %rbx -; X64-BMI1BMI2-NEXT: pushq %rax -; X64-BMI1BMI2-NEXT: movl %edx, %ebx -; X64-BMI1BMI2-NEXT: shrxl %esi, %edi, %ebp -; X64-BMI1BMI2-NEXT: movl %ebx, %eax -; X64-BMI1BMI2-NEXT: negb %al -; X64-BMI1BMI2-NEXT: movl $-1, %ecx -; X64-BMI1BMI2-NEXT: shrxl %eax, %ecx, %edi -; X64-BMI1BMI2-NEXT: callq use32@PLT -; X64-BMI1BMI2-NEXT: bzhil %ebx, %ebp, %eax -; X64-BMI1BMI2-NEXT: addq $8, %rsp -; X64-BMI1BMI2-NEXT: popq %rbx -; X64-BMI1BMI2-NEXT: popq %rbp -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bextr32_c1_indexzext: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: pushq %rbp +; X64-BMI1-NEXT: pushq %rbx +; X64-BMI1-NEXT: pushq %rax +; X64-BMI1-NEXT: movl %esi, %ecx +; X64-BMI1-NEXT: movl %edi, %ebx +; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-BMI1-NEXT: shrl %cl, %ebx +; X64-BMI1-NEXT: negb %dl +; X64-BMI1-NEXT: movl $-1, %ebp +; X64-BMI1-NEXT: movl %edx, %ecx +; X64-BMI1-NEXT: shrl %cl, %ebp +; X64-BMI1-NEXT: movl %ebp, %edi +; X64-BMI1-NEXT: callq use32@PLT +; X64-BMI1-NEXT: andl %ebx, %ebp +; X64-BMI1-NEXT: movl %ebp, %eax +; X64-BMI1-NEXT: addq $8, %rsp +; X64-BMI1-NEXT: popq %rbx +; X64-BMI1-NEXT: popq %rbp +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bextr32_c1_indexzext: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: pushq %rbp +; X64-BMI2-NEXT: pushq %rbx +; X64-BMI2-NEXT: pushq %rax +; X64-BMI2-NEXT: movl %edx, %ebx +; X64-BMI2-NEXT: shrxl %esi, %edi, %ebp +; X64-BMI2-NEXT: movl %ebx, %eax +; X64-BMI2-NEXT: negb %al +; X64-BMI2-NEXT: movl $-1, %ecx +; X64-BMI2-NEXT: shrxl %eax, %ecx, %edi +; X64-BMI2-NEXT: callq use32@PLT +; X64-BMI2-NEXT: bzhil %ebx, %ebp, %eax +; X64-BMI2-NEXT: addq $8, %rsp +; X64-BMI2-NEXT: popq %rbx +; X64-BMI2-NEXT: popq %rbp +; X64-BMI2-NEXT: retq %skip = zext i8 %numskipbits to i32 %shifted = lshr i32 %val, %skip %numhighbits = sub i8 32, %numlowbits @@ -4309,49 +4309,49 @@ define i32 @bextr32_c2_load(i32* %w, i32 %numskipbits, i32 %numlowbits) nounwind ; X86-NOBMI-NEXT: popl %edi ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bextr32_c2_load: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %edi -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: pushl %eax -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: movl (%eax), %edi -; X86-BMI1NOTBM-NEXT: shrl %cl, %edi -; X86-BMI1NOTBM-NEXT: xorl %ecx, %ecx -; X86-BMI1NOTBM-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl $-1, %esi -; X86-BMI1NOTBM-NEXT: # kill: def $cl killed $cl killed $ecx -; X86-BMI1NOTBM-NEXT: shrl %cl, %esi -; X86-BMI1NOTBM-NEXT: movl %esi, (%esp) -; X86-BMI1NOTBM-NEXT: calll use32@PLT -; X86-BMI1NOTBM-NEXT: andl %edi, %esi -; X86-BMI1NOTBM-NEXT: movl %esi, %eax -; X86-BMI1NOTBM-NEXT: addl $4, %esp -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: popl %edi -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bextr32_c2_load: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %ebx -; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: pushl %eax -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: shrxl %ecx, (%eax), %esi -; X86-BMI1BMI2-NEXT: movl %ebx, %eax -; X86-BMI1BMI2-NEXT: negb %al -; X86-BMI1BMI2-NEXT: movl $-1, %ecx -; X86-BMI1BMI2-NEXT: shrxl %eax, %ecx, %eax -; X86-BMI1BMI2-NEXT: movl %eax, (%esp) -; X86-BMI1BMI2-NEXT: calll use32@PLT -; X86-BMI1BMI2-NEXT: bzhil %ebx, %esi, %eax -; X86-BMI1BMI2-NEXT: addl $4, %esp -; X86-BMI1BMI2-NEXT: popl %esi -; X86-BMI1BMI2-NEXT: popl %ebx -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bextr32_c2_load: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %edi +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: pushl %eax +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: movl (%eax), %edi +; X86-BMI1-NEXT: shrl %cl, %edi +; X86-BMI1-NEXT: xorl %ecx, %ecx +; X86-BMI1-NEXT: subb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl $-1, %esi +; X86-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-BMI1-NEXT: shrl %cl, %esi +; X86-BMI1-NEXT: movl %esi, (%esp) +; X86-BMI1-NEXT: calll use32@PLT +; X86-BMI1-NEXT: andl %edi, %esi +; X86-BMI1-NEXT: movl %esi, %eax +; X86-BMI1-NEXT: addl $4, %esp +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: popl %edi +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bextr32_c2_load: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: pushl %eax +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: shrxl %ecx, (%eax), %esi +; X86-BMI2-NEXT: movl %ebx, %eax +; X86-BMI2-NEXT: negb %al +; X86-BMI2-NEXT: movl $-1, %ecx +; X86-BMI2-NEXT: shrxl %eax, %ecx, %eax +; X86-BMI2-NEXT: movl %eax, (%esp) +; X86-BMI2-NEXT: calll use32@PLT +; X86-BMI2-NEXT: bzhil %ebx, %esi, %eax +; X86-BMI2-NEXT: addl $4, %esp +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr32_c2_load: ; X64-NOBMI: # %bb.0: @@ -4375,45 +4375,45 @@ define i32 @bextr32_c2_load(i32* %w, i32 %numskipbits, i32 %numlowbits) nounwind ; X64-NOBMI-NEXT: popq %rbp ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bextr32_c2_load: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: pushq %rbp -; X64-BMI1NOTBM-NEXT: pushq %rbx -; X64-BMI1NOTBM-NEXT: pushq %rax -; X64-BMI1NOTBM-NEXT: movl %esi, %ecx -; X64-BMI1NOTBM-NEXT: movl (%rdi), %ebp -; X64-BMI1NOTBM-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-BMI1NOTBM-NEXT: shrl %cl, %ebp -; X64-BMI1NOTBM-NEXT: negb %dl -; X64-BMI1NOTBM-NEXT: movl $-1, %ebx -; X64-BMI1NOTBM-NEXT: movl %edx, %ecx -; X64-BMI1NOTBM-NEXT: shrl %cl, %ebx -; X64-BMI1NOTBM-NEXT: movl %ebx, %edi -; X64-BMI1NOTBM-NEXT: callq use32@PLT -; X64-BMI1NOTBM-NEXT: andl %ebp, %ebx -; X64-BMI1NOTBM-NEXT: movl %ebx, %eax -; X64-BMI1NOTBM-NEXT: addq $8, %rsp -; X64-BMI1NOTBM-NEXT: popq %rbx -; X64-BMI1NOTBM-NEXT: popq %rbp -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bextr32_c2_load: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: pushq %rbp -; X64-BMI1BMI2-NEXT: pushq %rbx -; X64-BMI1BMI2-NEXT: pushq %rax -; X64-BMI1BMI2-NEXT: movl %edx, %ebx -; X64-BMI1BMI2-NEXT: shrxl %esi, (%rdi), %ebp -; X64-BMI1BMI2-NEXT: movl %ebx, %eax -; X64-BMI1BMI2-NEXT: negb %al -; X64-BMI1BMI2-NEXT: movl $-1, %ecx -; X64-BMI1BMI2-NEXT: shrxl %eax, %ecx, %edi -; X64-BMI1BMI2-NEXT: callq use32@PLT -; X64-BMI1BMI2-NEXT: bzhil %ebx, %ebp, %eax -; X64-BMI1BMI2-NEXT: addq $8, %rsp -; X64-BMI1BMI2-NEXT: popq %rbx -; X64-BMI1BMI2-NEXT: popq %rbp -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bextr32_c2_load: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: pushq %rbp +; X64-BMI1-NEXT: pushq %rbx +; X64-BMI1-NEXT: pushq %rax +; X64-BMI1-NEXT: movl %esi, %ecx +; X64-BMI1-NEXT: movl (%rdi), %ebp +; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-BMI1-NEXT: shrl %cl, %ebp +; X64-BMI1-NEXT: negb %dl +; X64-BMI1-NEXT: movl $-1, %ebx +; X64-BMI1-NEXT: movl %edx, %ecx +; X64-BMI1-NEXT: shrl %cl, %ebx +; X64-BMI1-NEXT: movl %ebx, %edi +; X64-BMI1-NEXT: callq use32@PLT +; X64-BMI1-NEXT: andl %ebp, %ebx +; X64-BMI1-NEXT: movl %ebx, %eax +; X64-BMI1-NEXT: addq $8, %rsp +; X64-BMI1-NEXT: popq %rbx +; X64-BMI1-NEXT: popq %rbp +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bextr32_c2_load: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: pushq %rbp +; X64-BMI2-NEXT: pushq %rbx +; X64-BMI2-NEXT: pushq %rax +; X64-BMI2-NEXT: movl %edx, %ebx +; X64-BMI2-NEXT: shrxl %esi, (%rdi), %ebp +; X64-BMI2-NEXT: movl %ebx, %eax +; X64-BMI2-NEXT: negb %al +; X64-BMI2-NEXT: movl $-1, %ecx +; X64-BMI2-NEXT: shrxl %eax, %ecx, %edi +; X64-BMI2-NEXT: callq use32@PLT +; X64-BMI2-NEXT: bzhil %ebx, %ebp, %eax +; X64-BMI2-NEXT: addq $8, %rsp +; X64-BMI2-NEXT: popq %rbx +; X64-BMI2-NEXT: popq %rbp +; X64-BMI2-NEXT: retq %val = load i32, i32* %w %shifted = lshr i32 %val, %numskipbits %numhighbits = sub i32 32, %numlowbits @@ -4447,49 +4447,49 @@ define i32 @bextr32_c3_load_indexzext(i32* %w, i8 %numskipbits, i8 %numlowbits) ; X86-NOBMI-NEXT: popl %edi ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bextr32_c3_load_indexzext: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %edi -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: pushl %eax -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: movl (%eax), %edi -; X86-BMI1NOTBM-NEXT: shrl %cl, %edi -; X86-BMI1NOTBM-NEXT: xorl %ecx, %ecx -; X86-BMI1NOTBM-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl $-1, %esi -; X86-BMI1NOTBM-NEXT: # kill: def $cl killed $cl killed $ecx -; X86-BMI1NOTBM-NEXT: shrl %cl, %esi -; X86-BMI1NOTBM-NEXT: movl %esi, (%esp) -; X86-BMI1NOTBM-NEXT: calll use32@PLT -; X86-BMI1NOTBM-NEXT: andl %edi, %esi -; X86-BMI1NOTBM-NEXT: movl %esi, %eax -; X86-BMI1NOTBM-NEXT: addl $4, %esp -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: popl %edi -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bextr32_c3_load_indexzext: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %ebx -; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: pushl %eax -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: shrxl %ecx, (%eax), %esi -; X86-BMI1BMI2-NEXT: movl %ebx, %eax -; X86-BMI1BMI2-NEXT: negb %al -; X86-BMI1BMI2-NEXT: movl $-1, %ecx -; X86-BMI1BMI2-NEXT: shrxl %eax, %ecx, %eax -; X86-BMI1BMI2-NEXT: movl %eax, (%esp) -; X86-BMI1BMI2-NEXT: calll use32@PLT -; X86-BMI1BMI2-NEXT: bzhil %ebx, %esi, %eax -; X86-BMI1BMI2-NEXT: addl $4, %esp -; X86-BMI1BMI2-NEXT: popl %esi -; X86-BMI1BMI2-NEXT: popl %ebx -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bextr32_c3_load_indexzext: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %edi +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: pushl %eax +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: movl (%eax), %edi +; X86-BMI1-NEXT: shrl %cl, %edi +; X86-BMI1-NEXT: xorl %ecx, %ecx +; X86-BMI1-NEXT: subb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl $-1, %esi +; X86-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-BMI1-NEXT: shrl %cl, %esi +; X86-BMI1-NEXT: movl %esi, (%esp) +; X86-BMI1-NEXT: calll use32@PLT +; X86-BMI1-NEXT: andl %edi, %esi +; X86-BMI1-NEXT: movl %esi, %eax +; X86-BMI1-NEXT: addl $4, %esp +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: popl %edi +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bextr32_c3_load_indexzext: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: pushl %eax +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: shrxl %ecx, (%eax), %esi +; X86-BMI2-NEXT: movl %ebx, %eax +; X86-BMI2-NEXT: negb %al +; X86-BMI2-NEXT: movl $-1, %ecx +; X86-BMI2-NEXT: shrxl %eax, %ecx, %eax +; X86-BMI2-NEXT: movl %eax, (%esp) +; X86-BMI2-NEXT: calll use32@PLT +; X86-BMI2-NEXT: bzhil %ebx, %esi, %eax +; X86-BMI2-NEXT: addl $4, %esp +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr32_c3_load_indexzext: ; X64-NOBMI: # %bb.0: @@ -4513,45 +4513,45 @@ define i32 @bextr32_c3_load_indexzext(i32* %w, i8 %numskipbits, i8 %numlowbits) ; X64-NOBMI-NEXT: popq %rbp ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bextr32_c3_load_indexzext: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: pushq %rbp -; X64-BMI1NOTBM-NEXT: pushq %rbx -; X64-BMI1NOTBM-NEXT: pushq %rax -; X64-BMI1NOTBM-NEXT: movl %esi, %ecx -; X64-BMI1NOTBM-NEXT: movl (%rdi), %ebp -; X64-BMI1NOTBM-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-BMI1NOTBM-NEXT: shrl %cl, %ebp -; X64-BMI1NOTBM-NEXT: negb %dl -; X64-BMI1NOTBM-NEXT: movl $-1, %ebx -; X64-BMI1NOTBM-NEXT: movl %edx, %ecx -; X64-BMI1NOTBM-NEXT: shrl %cl, %ebx -; X64-BMI1NOTBM-NEXT: movl %ebx, %edi -; X64-BMI1NOTBM-NEXT: callq use32@PLT -; X64-BMI1NOTBM-NEXT: andl %ebp, %ebx -; X64-BMI1NOTBM-NEXT: movl %ebx, %eax -; X64-BMI1NOTBM-NEXT: addq $8, %rsp -; X64-BMI1NOTBM-NEXT: popq %rbx -; X64-BMI1NOTBM-NEXT: popq %rbp -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bextr32_c3_load_indexzext: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: pushq %rbp -; X64-BMI1BMI2-NEXT: pushq %rbx -; X64-BMI1BMI2-NEXT: pushq %rax -; X64-BMI1BMI2-NEXT: movl %edx, %ebx -; X64-BMI1BMI2-NEXT: shrxl %esi, (%rdi), %ebp -; X64-BMI1BMI2-NEXT: movl %ebx, %eax -; X64-BMI1BMI2-NEXT: negb %al -; X64-BMI1BMI2-NEXT: movl $-1, %ecx -; X64-BMI1BMI2-NEXT: shrxl %eax, %ecx, %edi -; X64-BMI1BMI2-NEXT: callq use32@PLT -; X64-BMI1BMI2-NEXT: bzhil %ebx, %ebp, %eax -; X64-BMI1BMI2-NEXT: addq $8, %rsp -; X64-BMI1BMI2-NEXT: popq %rbx -; X64-BMI1BMI2-NEXT: popq %rbp -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bextr32_c3_load_indexzext: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: pushq %rbp +; X64-BMI1-NEXT: pushq %rbx +; X64-BMI1-NEXT: pushq %rax +; X64-BMI1-NEXT: movl %esi, %ecx +; X64-BMI1-NEXT: movl (%rdi), %ebp +; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-BMI1-NEXT: shrl %cl, %ebp +; X64-BMI1-NEXT: negb %dl +; X64-BMI1-NEXT: movl $-1, %ebx +; X64-BMI1-NEXT: movl %edx, %ecx +; X64-BMI1-NEXT: shrl %cl, %ebx +; X64-BMI1-NEXT: movl %ebx, %edi +; X64-BMI1-NEXT: callq use32@PLT +; X64-BMI1-NEXT: andl %ebp, %ebx +; X64-BMI1-NEXT: movl %ebx, %eax +; X64-BMI1-NEXT: addq $8, %rsp +; X64-BMI1-NEXT: popq %rbx +; X64-BMI1-NEXT: popq %rbp +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bextr32_c3_load_indexzext: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: pushq %rbp +; X64-BMI2-NEXT: pushq %rbx +; X64-BMI2-NEXT: pushq %rax +; X64-BMI2-NEXT: movl %edx, %ebx +; X64-BMI2-NEXT: shrxl %esi, (%rdi), %ebp +; X64-BMI2-NEXT: movl %ebx, %eax +; X64-BMI2-NEXT: negb %al +; X64-BMI2-NEXT: movl $-1, %ecx +; X64-BMI2-NEXT: shrxl %eax, %ecx, %edi +; X64-BMI2-NEXT: callq use32@PLT +; X64-BMI2-NEXT: bzhil %ebx, %ebp, %eax +; X64-BMI2-NEXT: addq $8, %rsp +; X64-BMI2-NEXT: popq %rbx +; X64-BMI2-NEXT: popq %rbp +; X64-BMI2-NEXT: retq %val = load i32, i32* %w %skip = zext i8 %numskipbits to i32 %shifted = lshr i32 %val, %skip @@ -4586,47 +4586,47 @@ define i32 @bextr32_c4_commutative(i32 %val, i32 %numskipbits, i32 %numlowbits) ; X86-NOBMI-NEXT: popl %edi ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bextr32_c4_commutative: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %edi -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: pushl %eax -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-BMI1NOTBM-NEXT: shrl %cl, %edi -; X86-BMI1NOTBM-NEXT: xorl %ecx, %ecx -; X86-BMI1NOTBM-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl $-1, %esi -; X86-BMI1NOTBM-NEXT: # kill: def $cl killed $cl killed $ecx -; X86-BMI1NOTBM-NEXT: shrl %cl, %esi -; X86-BMI1NOTBM-NEXT: movl %esi, (%esp) -; X86-BMI1NOTBM-NEXT: calll use32@PLT -; X86-BMI1NOTBM-NEXT: andl %edi, %esi -; X86-BMI1NOTBM-NEXT: movl %esi, %eax -; X86-BMI1NOTBM-NEXT: addl $4, %esp -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: popl %edi -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bextr32_c4_commutative: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %ebx -; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: pushl %eax -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1BMI2-NEXT: shrxl %eax, {{[0-9]+}}(%esp), %esi -; X86-BMI1BMI2-NEXT: movl %ebx, %eax -; X86-BMI1BMI2-NEXT: negb %al -; X86-BMI1BMI2-NEXT: movl $-1, %ecx -; X86-BMI1BMI2-NEXT: shrxl %eax, %ecx, %eax -; X86-BMI1BMI2-NEXT: movl %eax, (%esp) -; X86-BMI1BMI2-NEXT: calll use32@PLT -; X86-BMI1BMI2-NEXT: bzhil %ebx, %esi, %eax -; X86-BMI1BMI2-NEXT: addl $4, %esp -; X86-BMI1BMI2-NEXT: popl %esi -; X86-BMI1BMI2-NEXT: popl %ebx -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bextr32_c4_commutative: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %edi +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: pushl %eax +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-BMI1-NEXT: shrl %cl, %edi +; X86-BMI1-NEXT: xorl %ecx, %ecx +; X86-BMI1-NEXT: subb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl $-1, %esi +; X86-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-BMI1-NEXT: shrl %cl, %esi +; X86-BMI1-NEXT: movl %esi, (%esp) +; X86-BMI1-NEXT: calll use32@PLT +; X86-BMI1-NEXT: andl %edi, %esi +; X86-BMI1-NEXT: movl %esi, %eax +; X86-BMI1-NEXT: addl $4, %esp +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: popl %edi +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bextr32_c4_commutative: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: pushl %eax +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: shrxl %eax, {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl %ebx, %eax +; X86-BMI2-NEXT: negb %al +; X86-BMI2-NEXT: movl $-1, %ecx +; X86-BMI2-NEXT: shrxl %eax, %ecx, %eax +; X86-BMI2-NEXT: movl %eax, (%esp) +; X86-BMI2-NEXT: calll use32@PLT +; X86-BMI2-NEXT: bzhil %ebx, %esi, %eax +; X86-BMI2-NEXT: addl $4, %esp +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr32_c4_commutative: ; X64-NOBMI: # %bb.0: @@ -4650,45 +4650,45 @@ define i32 @bextr32_c4_commutative(i32 %val, i32 %numskipbits, i32 %numlowbits) ; X64-NOBMI-NEXT: popq %rbp ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bextr32_c4_commutative: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: pushq %rbp -; X64-BMI1NOTBM-NEXT: pushq %rbx -; X64-BMI1NOTBM-NEXT: pushq %rax -; X64-BMI1NOTBM-NEXT: movl %esi, %ecx -; X64-BMI1NOTBM-NEXT: movl %edi, %ebx -; X64-BMI1NOTBM-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-BMI1NOTBM-NEXT: shrl %cl, %ebx -; X64-BMI1NOTBM-NEXT: negb %dl -; X64-BMI1NOTBM-NEXT: movl $-1, %ebp -; X64-BMI1NOTBM-NEXT: movl %edx, %ecx -; X64-BMI1NOTBM-NEXT: shrl %cl, %ebp -; X64-BMI1NOTBM-NEXT: movl %ebp, %edi -; X64-BMI1NOTBM-NEXT: callq use32@PLT -; X64-BMI1NOTBM-NEXT: andl %ebx, %ebp -; X64-BMI1NOTBM-NEXT: movl %ebp, %eax -; X64-BMI1NOTBM-NEXT: addq $8, %rsp -; X64-BMI1NOTBM-NEXT: popq %rbx -; X64-BMI1NOTBM-NEXT: popq %rbp -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bextr32_c4_commutative: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: pushq %rbp -; X64-BMI1BMI2-NEXT: pushq %rbx -; X64-BMI1BMI2-NEXT: pushq %rax -; X64-BMI1BMI2-NEXT: movl %edx, %ebx -; X64-BMI1BMI2-NEXT: shrxl %esi, %edi, %ebp -; X64-BMI1BMI2-NEXT: movl %ebx, %eax -; X64-BMI1BMI2-NEXT: negb %al -; X64-BMI1BMI2-NEXT: movl $-1, %ecx -; X64-BMI1BMI2-NEXT: shrxl %eax, %ecx, %edi -; X64-BMI1BMI2-NEXT: callq use32@PLT -; X64-BMI1BMI2-NEXT: bzhil %ebx, %ebp, %eax -; X64-BMI1BMI2-NEXT: addq $8, %rsp -; X64-BMI1BMI2-NEXT: popq %rbx -; X64-BMI1BMI2-NEXT: popq %rbp -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bextr32_c4_commutative: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: pushq %rbp +; X64-BMI1-NEXT: pushq %rbx +; X64-BMI1-NEXT: pushq %rax +; X64-BMI1-NEXT: movl %esi, %ecx +; X64-BMI1-NEXT: movl %edi, %ebx +; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-BMI1-NEXT: shrl %cl, %ebx +; X64-BMI1-NEXT: negb %dl +; X64-BMI1-NEXT: movl $-1, %ebp +; X64-BMI1-NEXT: movl %edx, %ecx +; X64-BMI1-NEXT: shrl %cl, %ebp +; X64-BMI1-NEXT: movl %ebp, %edi +; X64-BMI1-NEXT: callq use32@PLT +; X64-BMI1-NEXT: andl %ebx, %ebp +; X64-BMI1-NEXT: movl %ebp, %eax +; X64-BMI1-NEXT: addq $8, %rsp +; X64-BMI1-NEXT: popq %rbx +; X64-BMI1-NEXT: popq %rbp +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bextr32_c4_commutative: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: pushq %rbp +; X64-BMI2-NEXT: pushq %rbx +; X64-BMI2-NEXT: pushq %rax +; X64-BMI2-NEXT: movl %edx, %ebx +; X64-BMI2-NEXT: shrxl %esi, %edi, %ebp +; X64-BMI2-NEXT: movl %ebx, %eax +; X64-BMI2-NEXT: negb %al +; X64-BMI2-NEXT: movl $-1, %ecx +; X64-BMI2-NEXT: shrxl %eax, %ecx, %edi +; X64-BMI2-NEXT: callq use32@PLT +; X64-BMI2-NEXT: bzhil %ebx, %ebp, %eax +; X64-BMI2-NEXT: addq $8, %rsp +; X64-BMI2-NEXT: popq %rbx +; X64-BMI2-NEXT: popq %rbp +; X64-BMI2-NEXT: retq %shifted = lshr i32 %val, %numskipbits %numhighbits = sub i32 32, %numlowbits %mask = lshr i32 -1, %numhighbits @@ -4725,57 +4725,57 @@ define i32 @bextr32_c5_skipextrauses(i32 %val, i32 %numskipbits, i32 %numlowbits ; X86-NOBMI-NEXT: popl %ebx ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bextr32_c5_skipextrauses: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %ebx -; X86-BMI1NOTBM-NEXT: pushl %edi -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: subl $16, %esp -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-BMI1NOTBM-NEXT: movl %ebx, %ecx -; X86-BMI1NOTBM-NEXT: shrl %cl, %edi -; X86-BMI1NOTBM-NEXT: xorl %ecx, %ecx -; X86-BMI1NOTBM-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl $-1, %esi -; X86-BMI1NOTBM-NEXT: # kill: def $cl killed $cl killed $ecx -; X86-BMI1NOTBM-NEXT: shrl %cl, %esi -; X86-BMI1NOTBM-NEXT: movl %esi, (%esp) -; X86-BMI1NOTBM-NEXT: calll use32@PLT -; X86-BMI1NOTBM-NEXT: andl %edi, %esi -; X86-BMI1NOTBM-NEXT: movl %ebx, (%esp) -; X86-BMI1NOTBM-NEXT: calll use32@PLT -; X86-BMI1NOTBM-NEXT: movl %esi, %eax -; X86-BMI1NOTBM-NEXT: addl $16, %esp -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: popl %edi -; X86-BMI1NOTBM-NEXT: popl %ebx -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bextr32_c5_skipextrauses: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %ebx -; X86-BMI1BMI2-NEXT: pushl %edi -; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: subl $16, %esp -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-BMI1BMI2-NEXT: shrxl %edi, {{[0-9]+}}(%esp), %esi -; X86-BMI1BMI2-NEXT: movl %ebx, %eax -; X86-BMI1BMI2-NEXT: negb %al -; X86-BMI1BMI2-NEXT: movl $-1, %ecx -; X86-BMI1BMI2-NEXT: shrxl %eax, %ecx, %eax -; X86-BMI1BMI2-NEXT: movl %eax, (%esp) -; X86-BMI1BMI2-NEXT: calll use32@PLT -; X86-BMI1BMI2-NEXT: bzhil %ebx, %esi, %esi -; X86-BMI1BMI2-NEXT: movl %edi, (%esp) -; X86-BMI1BMI2-NEXT: calll use32@PLT -; X86-BMI1BMI2-NEXT: movl %esi, %eax -; X86-BMI1BMI2-NEXT: addl $16, %esp -; X86-BMI1BMI2-NEXT: popl %esi -; X86-BMI1BMI2-NEXT: popl %edi -; X86-BMI1BMI2-NEXT: popl %ebx -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bextr32_c5_skipextrauses: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %ebx +; X86-BMI1-NEXT: pushl %edi +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: subl $16, %esp +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-BMI1-NEXT: movl %ebx, %ecx +; X86-BMI1-NEXT: shrl %cl, %edi +; X86-BMI1-NEXT: xorl %ecx, %ecx +; X86-BMI1-NEXT: subb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl $-1, %esi +; X86-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-BMI1-NEXT: shrl %cl, %esi +; X86-BMI1-NEXT: movl %esi, (%esp) +; X86-BMI1-NEXT: calll use32@PLT +; X86-BMI1-NEXT: andl %edi, %esi +; X86-BMI1-NEXT: movl %ebx, (%esp) +; X86-BMI1-NEXT: calll use32@PLT +; X86-BMI1-NEXT: movl %esi, %eax +; X86-BMI1-NEXT: addl $16, %esp +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: popl %edi +; X86-BMI1-NEXT: popl %ebx +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bextr32_c5_skipextrauses: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: subl $16, %esp +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-BMI2-NEXT: shrxl %edi, {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl %ebx, %eax +; X86-BMI2-NEXT: negb %al +; X86-BMI2-NEXT: movl $-1, %ecx +; X86-BMI2-NEXT: shrxl %eax, %ecx, %eax +; X86-BMI2-NEXT: movl %eax, (%esp) +; X86-BMI2-NEXT: calll use32@PLT +; X86-BMI2-NEXT: bzhil %ebx, %esi, %esi +; X86-BMI2-NEXT: movl %edi, (%esp) +; X86-BMI2-NEXT: calll use32@PLT +; X86-BMI2-NEXT: movl %esi, %eax +; X86-BMI2-NEXT: addl $16, %esp +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr32_c5_skipextrauses: ; X64-NOBMI: # %bb.0: @@ -4801,51 +4801,51 @@ define i32 @bextr32_c5_skipextrauses(i32 %val, i32 %numskipbits, i32 %numlowbits ; X64-NOBMI-NEXT: popq %rbp ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bextr32_c5_skipextrauses: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: pushq %rbp -; X64-BMI1NOTBM-NEXT: pushq %r14 -; X64-BMI1NOTBM-NEXT: pushq %rbx -; X64-BMI1NOTBM-NEXT: movl %esi, %r14d -; X64-BMI1NOTBM-NEXT: movl %edi, %ebp -; X64-BMI1NOTBM-NEXT: movl %r14d, %ecx -; X64-BMI1NOTBM-NEXT: shrl %cl, %ebp -; X64-BMI1NOTBM-NEXT: negb %dl -; X64-BMI1NOTBM-NEXT: movl $-1, %ebx -; X64-BMI1NOTBM-NEXT: movl %edx, %ecx -; X64-BMI1NOTBM-NEXT: shrl %cl, %ebx -; X64-BMI1NOTBM-NEXT: movl %ebx, %edi -; X64-BMI1NOTBM-NEXT: callq use32@PLT -; X64-BMI1NOTBM-NEXT: andl %ebp, %ebx -; X64-BMI1NOTBM-NEXT: movl %r14d, %edi -; X64-BMI1NOTBM-NEXT: callq use32@PLT -; X64-BMI1NOTBM-NEXT: movl %ebx, %eax -; X64-BMI1NOTBM-NEXT: popq %rbx -; X64-BMI1NOTBM-NEXT: popq %r14 -; X64-BMI1NOTBM-NEXT: popq %rbp -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bextr32_c5_skipextrauses: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: pushq %rbp -; X64-BMI1BMI2-NEXT: pushq %r14 -; X64-BMI1BMI2-NEXT: pushq %rbx -; X64-BMI1BMI2-NEXT: movl %edx, %ebx -; X64-BMI1BMI2-NEXT: movl %esi, %ebp -; X64-BMI1BMI2-NEXT: shrxl %esi, %edi, %r14d -; X64-BMI1BMI2-NEXT: movl %ebx, %eax -; X64-BMI1BMI2-NEXT: negb %al -; X64-BMI1BMI2-NEXT: movl $-1, %ecx -; X64-BMI1BMI2-NEXT: shrxl %eax, %ecx, %edi -; X64-BMI1BMI2-NEXT: callq use32@PLT -; X64-BMI1BMI2-NEXT: bzhil %ebx, %r14d, %ebx -; X64-BMI1BMI2-NEXT: movl %ebp, %edi -; X64-BMI1BMI2-NEXT: callq use32@PLT -; X64-BMI1BMI2-NEXT: movl %ebx, %eax -; X64-BMI1BMI2-NEXT: popq %rbx -; X64-BMI1BMI2-NEXT: popq %r14 -; X64-BMI1BMI2-NEXT: popq %rbp -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bextr32_c5_skipextrauses: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: pushq %rbp +; X64-BMI1-NEXT: pushq %r14 +; X64-BMI1-NEXT: pushq %rbx +; X64-BMI1-NEXT: movl %esi, %r14d +; X64-BMI1-NEXT: movl %edi, %ebp +; X64-BMI1-NEXT: movl %r14d, %ecx +; X64-BMI1-NEXT: shrl %cl, %ebp +; X64-BMI1-NEXT: negb %dl +; X64-BMI1-NEXT: movl $-1, %ebx +; X64-BMI1-NEXT: movl %edx, %ecx +; X64-BMI1-NEXT: shrl %cl, %ebx +; X64-BMI1-NEXT: movl %ebx, %edi +; X64-BMI1-NEXT: callq use32@PLT +; X64-BMI1-NEXT: andl %ebp, %ebx +; X64-BMI1-NEXT: movl %r14d, %edi +; X64-BMI1-NEXT: callq use32@PLT +; X64-BMI1-NEXT: movl %ebx, %eax +; X64-BMI1-NEXT: popq %rbx +; X64-BMI1-NEXT: popq %r14 +; X64-BMI1-NEXT: popq %rbp +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bextr32_c5_skipextrauses: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: pushq %rbp +; X64-BMI2-NEXT: pushq %r14 +; X64-BMI2-NEXT: pushq %rbx +; X64-BMI2-NEXT: movl %edx, %ebx +; X64-BMI2-NEXT: movl %esi, %ebp +; X64-BMI2-NEXT: shrxl %esi, %edi, %r14d +; X64-BMI2-NEXT: movl %ebx, %eax +; X64-BMI2-NEXT: negb %al +; X64-BMI2-NEXT: movl $-1, %ecx +; X64-BMI2-NEXT: shrxl %eax, %ecx, %edi +; X64-BMI2-NEXT: callq use32@PLT +; X64-BMI2-NEXT: bzhil %ebx, %r14d, %ebx +; X64-BMI2-NEXT: movl %ebp, %edi +; X64-BMI2-NEXT: callq use32@PLT +; X64-BMI2-NEXT: movl %ebx, %eax +; X64-BMI2-NEXT: popq %rbx +; X64-BMI2-NEXT: popq %r14 +; X64-BMI2-NEXT: popq %rbp +; X64-BMI2-NEXT: retq %shifted = lshr i32 %val, %numskipbits %numhighbits = sub i32 32, %numlowbits %mask = lshr i32 -1, %numhighbits @@ -4904,95 +4904,95 @@ define i64 @bextr64_c0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind { ; X86-NOBMI-NEXT: popl %ebp ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bextr64_c0: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %ebp -; X86-BMI1NOTBM-NEXT: pushl %ebx -; X86-BMI1NOTBM-NEXT: pushl %edi -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: subl $12, %esp -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: movl %eax, %edi -; X86-BMI1NOTBM-NEXT: shrl %cl, %edi -; X86-BMI1NOTBM-NEXT: shrdl %cl, %eax, %esi -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: je .LBB41_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %edi, %esi -; X86-BMI1NOTBM-NEXT: xorl %edi, %edi -; X86-BMI1NOTBM-NEXT: .LBB41_2: -; X86-BMI1NOTBM-NEXT: movb $64, %cl -; X86-BMI1NOTBM-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl $-1, %ebp -; X86-BMI1NOTBM-NEXT: movl $-1, %ebx -; X86-BMI1NOTBM-NEXT: shrl %cl, %ebx -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: je .LBB41_4 -; X86-BMI1NOTBM-NEXT: # %bb.3: -; X86-BMI1NOTBM-NEXT: movl %ebx, %ebp -; X86-BMI1NOTBM-NEXT: xorl %ebx, %ebx -; X86-BMI1NOTBM-NEXT: .LBB41_4: -; X86-BMI1NOTBM-NEXT: subl $8, %esp -; X86-BMI1NOTBM-NEXT: pushl %ebx -; X86-BMI1NOTBM-NEXT: pushl %ebp -; X86-BMI1NOTBM-NEXT: calll use64@PLT -; X86-BMI1NOTBM-NEXT: addl $16, %esp -; X86-BMI1NOTBM-NEXT: andl %ebp, %esi -; X86-BMI1NOTBM-NEXT: andl %ebx, %edi -; X86-BMI1NOTBM-NEXT: movl %esi, %eax -; X86-BMI1NOTBM-NEXT: movl %edi, %edx -; X86-BMI1NOTBM-NEXT: addl $12, %esp -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: popl %edi -; X86-BMI1NOTBM-NEXT: popl %ebx -; X86-BMI1NOTBM-NEXT: popl %ebp -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bextr64_c0: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %ebp -; X86-BMI1BMI2-NEXT: pushl %ebx -; X86-BMI1BMI2-NEXT: pushl %edi -; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: subl $12, %esp -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: shrdl %cl, %eax, %esi -; X86-BMI1BMI2-NEXT: shrxl %ecx, %eax, %edi -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: je .LBB41_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: movl %edi, %esi -; X86-BMI1BMI2-NEXT: xorl %edi, %edi -; X86-BMI1BMI2-NEXT: .LBB41_2: -; X86-BMI1BMI2-NEXT: movb $64, %al -; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %al -; X86-BMI1BMI2-NEXT: movl $-1, %ebp -; X86-BMI1BMI2-NEXT: shrxl %eax, %ebp, %ebx -; X86-BMI1BMI2-NEXT: testb $32, %al -; X86-BMI1BMI2-NEXT: je .LBB41_4 -; X86-BMI1BMI2-NEXT: # %bb.3: -; X86-BMI1BMI2-NEXT: movl %ebx, %ebp -; X86-BMI1BMI2-NEXT: xorl %ebx, %ebx -; X86-BMI1BMI2-NEXT: .LBB41_4: -; X86-BMI1BMI2-NEXT: subl $8, %esp -; X86-BMI1BMI2-NEXT: pushl %ebx -; X86-BMI1BMI2-NEXT: pushl %ebp -; X86-BMI1BMI2-NEXT: calll use64@PLT -; X86-BMI1BMI2-NEXT: addl $16, %esp -; X86-BMI1BMI2-NEXT: andl %ebp, %esi -; X86-BMI1BMI2-NEXT: andl %ebx, %edi -; X86-BMI1BMI2-NEXT: movl %esi, %eax -; X86-BMI1BMI2-NEXT: movl %edi, %edx -; X86-BMI1BMI2-NEXT: addl $12, %esp -; X86-BMI1BMI2-NEXT: popl %esi -; X86-BMI1BMI2-NEXT: popl %edi -; X86-BMI1BMI2-NEXT: popl %ebx -; X86-BMI1BMI2-NEXT: popl %ebp -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bextr64_c0: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %ebp +; X86-BMI1-NEXT: pushl %ebx +; X86-BMI1-NEXT: pushl %edi +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: subl $12, %esp +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: movl %eax, %edi +; X86-BMI1-NEXT: shrl %cl, %edi +; X86-BMI1-NEXT: shrdl %cl, %eax, %esi +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: je .LBB41_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: movl %edi, %esi +; X86-BMI1-NEXT: xorl %edi, %edi +; X86-BMI1-NEXT: .LBB41_2: +; X86-BMI1-NEXT: movb $64, %cl +; X86-BMI1-NEXT: subb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl $-1, %ebp +; X86-BMI1-NEXT: movl $-1, %ebx +; X86-BMI1-NEXT: shrl %cl, %ebx +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: je .LBB41_4 +; X86-BMI1-NEXT: # %bb.3: +; X86-BMI1-NEXT: movl %ebx, %ebp +; X86-BMI1-NEXT: xorl %ebx, %ebx +; X86-BMI1-NEXT: .LBB41_4: +; X86-BMI1-NEXT: subl $8, %esp +; X86-BMI1-NEXT: pushl %ebx +; X86-BMI1-NEXT: pushl %ebp +; X86-BMI1-NEXT: calll use64@PLT +; X86-BMI1-NEXT: addl $16, %esp +; X86-BMI1-NEXT: andl %ebp, %esi +; X86-BMI1-NEXT: andl %ebx, %edi +; X86-BMI1-NEXT: movl %esi, %eax +; X86-BMI1-NEXT: movl %edi, %edx +; X86-BMI1-NEXT: addl $12, %esp +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: popl %edi +; X86-BMI1-NEXT: popl %ebx +; X86-BMI1-NEXT: popl %ebp +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bextr64_c0: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: subl $12, %esp +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: shrdl %cl, %eax, %esi +; X86-BMI2-NEXT: shrxl %ecx, %eax, %edi +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB41_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %edi, %esi +; X86-BMI2-NEXT: xorl %edi, %edi +; X86-BMI2-NEXT: .LBB41_2: +; X86-BMI2-NEXT: movb $64, %al +; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: movl $-1, %ebp +; X86-BMI2-NEXT: shrxl %eax, %ebp, %ebx +; X86-BMI2-NEXT: testb $32, %al +; X86-BMI2-NEXT: je .LBB41_4 +; X86-BMI2-NEXT: # %bb.3: +; X86-BMI2-NEXT: movl %ebx, %ebp +; X86-BMI2-NEXT: xorl %ebx, %ebx +; X86-BMI2-NEXT: .LBB41_4: +; X86-BMI2-NEXT: subl $8, %esp +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: calll use64@PLT +; X86-BMI2-NEXT: addl $16, %esp +; X86-BMI2-NEXT: andl %ebp, %esi +; X86-BMI2-NEXT: andl %ebx, %edi +; X86-BMI2-NEXT: movl %esi, %eax +; X86-BMI2-NEXT: movl %edi, %edx +; X86-BMI2-NEXT: addl $12, %esp +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr64_c0: ; X64-NOBMI: # %bb.0: @@ -5016,45 +5016,45 @@ define i64 @bextr64_c0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind { ; X64-NOBMI-NEXT: popq %r14 ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bextr64_c0: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: pushq %r14 -; X64-BMI1NOTBM-NEXT: pushq %rbx -; X64-BMI1NOTBM-NEXT: pushq %rax -; X64-BMI1NOTBM-NEXT: movq %rsi, %rcx -; X64-BMI1NOTBM-NEXT: movq %rdi, %r14 -; X64-BMI1NOTBM-NEXT: # kill: def $cl killed $cl killed $rcx -; X64-BMI1NOTBM-NEXT: shrq %cl, %r14 -; X64-BMI1NOTBM-NEXT: negb %dl -; X64-BMI1NOTBM-NEXT: movq $-1, %rbx -; X64-BMI1NOTBM-NEXT: movl %edx, %ecx -; X64-BMI1NOTBM-NEXT: shrq %cl, %rbx -; X64-BMI1NOTBM-NEXT: movq %rbx, %rdi -; X64-BMI1NOTBM-NEXT: callq use64@PLT -; X64-BMI1NOTBM-NEXT: andq %r14, %rbx -; X64-BMI1NOTBM-NEXT: movq %rbx, %rax -; X64-BMI1NOTBM-NEXT: addq $8, %rsp -; X64-BMI1NOTBM-NEXT: popq %rbx -; X64-BMI1NOTBM-NEXT: popq %r14 -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bextr64_c0: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: pushq %r14 -; X64-BMI1BMI2-NEXT: pushq %rbx -; X64-BMI1BMI2-NEXT: pushq %rax -; X64-BMI1BMI2-NEXT: movq %rdx, %rbx -; X64-BMI1BMI2-NEXT: shrxq %rsi, %rdi, %r14 -; X64-BMI1BMI2-NEXT: movl %ebx, %eax -; X64-BMI1BMI2-NEXT: negb %al -; X64-BMI1BMI2-NEXT: movq $-1, %rcx -; X64-BMI1BMI2-NEXT: shrxq %rax, %rcx, %rdi -; X64-BMI1BMI2-NEXT: callq use64@PLT -; X64-BMI1BMI2-NEXT: bzhiq %rbx, %r14, %rax -; X64-BMI1BMI2-NEXT: addq $8, %rsp -; X64-BMI1BMI2-NEXT: popq %rbx -; X64-BMI1BMI2-NEXT: popq %r14 -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bextr64_c0: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: pushq %r14 +; X64-BMI1-NEXT: pushq %rbx +; X64-BMI1-NEXT: pushq %rax +; X64-BMI1-NEXT: movq %rsi, %rcx +; X64-BMI1-NEXT: movq %rdi, %r14 +; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $rcx +; X64-BMI1-NEXT: shrq %cl, %r14 +; X64-BMI1-NEXT: negb %dl +; X64-BMI1-NEXT: movq $-1, %rbx +; X64-BMI1-NEXT: movl %edx, %ecx +; X64-BMI1-NEXT: shrq %cl, %rbx +; X64-BMI1-NEXT: movq %rbx, %rdi +; X64-BMI1-NEXT: callq use64@PLT +; X64-BMI1-NEXT: andq %r14, %rbx +; X64-BMI1-NEXT: movq %rbx, %rax +; X64-BMI1-NEXT: addq $8, %rsp +; X64-BMI1-NEXT: popq %rbx +; X64-BMI1-NEXT: popq %r14 +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bextr64_c0: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: pushq %r14 +; X64-BMI2-NEXT: pushq %rbx +; X64-BMI2-NEXT: pushq %rax +; X64-BMI2-NEXT: movq %rdx, %rbx +; X64-BMI2-NEXT: shrxq %rsi, %rdi, %r14 +; X64-BMI2-NEXT: movl %ebx, %eax +; X64-BMI2-NEXT: negb %al +; X64-BMI2-NEXT: movq $-1, %rcx +; X64-BMI2-NEXT: shrxq %rax, %rcx, %rdi +; X64-BMI2-NEXT: callq use64@PLT +; X64-BMI2-NEXT: bzhiq %rbx, %r14, %rax +; X64-BMI2-NEXT: addq $8, %rsp +; X64-BMI2-NEXT: popq %rbx +; X64-BMI2-NEXT: popq %r14 +; X64-BMI2-NEXT: retq %shifted = lshr i64 %val, %numskipbits %numhighbits = sub i64 64, %numlowbits %mask = lshr i64 -1, %numhighbits @@ -5110,95 +5110,95 @@ define i64 @bextr64_c1_indexzext(i64 %val, i8 %numskipbits, i8 %numlowbits) noun ; X86-NOBMI-NEXT: popl %ebp ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bextr64_c1_indexzext: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %ebp -; X86-BMI1NOTBM-NEXT: pushl %ebx -; X86-BMI1NOTBM-NEXT: pushl %edi -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: subl $12, %esp -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: movl %eax, %edi -; X86-BMI1NOTBM-NEXT: shrl %cl, %edi -; X86-BMI1NOTBM-NEXT: shrdl %cl, %eax, %esi -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: je .LBB42_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %edi, %esi -; X86-BMI1NOTBM-NEXT: xorl %edi, %edi -; X86-BMI1NOTBM-NEXT: .LBB42_2: -; X86-BMI1NOTBM-NEXT: movb $64, %cl -; X86-BMI1NOTBM-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl $-1, %ebp -; X86-BMI1NOTBM-NEXT: movl $-1, %ebx -; X86-BMI1NOTBM-NEXT: shrl %cl, %ebx -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: je .LBB42_4 -; X86-BMI1NOTBM-NEXT: # %bb.3: -; X86-BMI1NOTBM-NEXT: movl %ebx, %ebp -; X86-BMI1NOTBM-NEXT: xorl %ebx, %ebx -; X86-BMI1NOTBM-NEXT: .LBB42_4: -; X86-BMI1NOTBM-NEXT: subl $8, %esp -; X86-BMI1NOTBM-NEXT: pushl %ebx -; X86-BMI1NOTBM-NEXT: pushl %ebp -; X86-BMI1NOTBM-NEXT: calll use64@PLT -; X86-BMI1NOTBM-NEXT: addl $16, %esp -; X86-BMI1NOTBM-NEXT: andl %ebp, %esi -; X86-BMI1NOTBM-NEXT: andl %ebx, %edi -; X86-BMI1NOTBM-NEXT: movl %esi, %eax -; X86-BMI1NOTBM-NEXT: movl %edi, %edx -; X86-BMI1NOTBM-NEXT: addl $12, %esp -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: popl %edi -; X86-BMI1NOTBM-NEXT: popl %ebx -; X86-BMI1NOTBM-NEXT: popl %ebp -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bextr64_c1_indexzext: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %ebp -; X86-BMI1BMI2-NEXT: pushl %ebx -; X86-BMI1BMI2-NEXT: pushl %edi -; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: subl $12, %esp -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: shrdl %cl, %eax, %esi -; X86-BMI1BMI2-NEXT: shrxl %ecx, %eax, %edi -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: je .LBB42_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: movl %edi, %esi -; X86-BMI1BMI2-NEXT: xorl %edi, %edi -; X86-BMI1BMI2-NEXT: .LBB42_2: -; X86-BMI1BMI2-NEXT: movb $64, %al -; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %al -; X86-BMI1BMI2-NEXT: movl $-1, %ebp -; X86-BMI1BMI2-NEXT: shrxl %eax, %ebp, %ebx -; X86-BMI1BMI2-NEXT: testb $32, %al -; X86-BMI1BMI2-NEXT: je .LBB42_4 -; X86-BMI1BMI2-NEXT: # %bb.3: -; X86-BMI1BMI2-NEXT: movl %ebx, %ebp -; X86-BMI1BMI2-NEXT: xorl %ebx, %ebx -; X86-BMI1BMI2-NEXT: .LBB42_4: -; X86-BMI1BMI2-NEXT: subl $8, %esp -; X86-BMI1BMI2-NEXT: pushl %ebx -; X86-BMI1BMI2-NEXT: pushl %ebp -; X86-BMI1BMI2-NEXT: calll use64@PLT -; X86-BMI1BMI2-NEXT: addl $16, %esp -; X86-BMI1BMI2-NEXT: andl %ebp, %esi -; X86-BMI1BMI2-NEXT: andl %ebx, %edi -; X86-BMI1BMI2-NEXT: movl %esi, %eax -; X86-BMI1BMI2-NEXT: movl %edi, %edx -; X86-BMI1BMI2-NEXT: addl $12, %esp -; X86-BMI1BMI2-NEXT: popl %esi -; X86-BMI1BMI2-NEXT: popl %edi -; X86-BMI1BMI2-NEXT: popl %ebx -; X86-BMI1BMI2-NEXT: popl %ebp -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bextr64_c1_indexzext: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %ebp +; X86-BMI1-NEXT: pushl %ebx +; X86-BMI1-NEXT: pushl %edi +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: subl $12, %esp +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: movl %eax, %edi +; X86-BMI1-NEXT: shrl %cl, %edi +; X86-BMI1-NEXT: shrdl %cl, %eax, %esi +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: je .LBB42_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: movl %edi, %esi +; X86-BMI1-NEXT: xorl %edi, %edi +; X86-BMI1-NEXT: .LBB42_2: +; X86-BMI1-NEXT: movb $64, %cl +; X86-BMI1-NEXT: subb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl $-1, %ebp +; X86-BMI1-NEXT: movl $-1, %ebx +; X86-BMI1-NEXT: shrl %cl, %ebx +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: je .LBB42_4 +; X86-BMI1-NEXT: # %bb.3: +; X86-BMI1-NEXT: movl %ebx, %ebp +; X86-BMI1-NEXT: xorl %ebx, %ebx +; X86-BMI1-NEXT: .LBB42_4: +; X86-BMI1-NEXT: subl $8, %esp +; X86-BMI1-NEXT: pushl %ebx +; X86-BMI1-NEXT: pushl %ebp +; X86-BMI1-NEXT: calll use64@PLT +; X86-BMI1-NEXT: addl $16, %esp +; X86-BMI1-NEXT: andl %ebp, %esi +; X86-BMI1-NEXT: andl %ebx, %edi +; X86-BMI1-NEXT: movl %esi, %eax +; X86-BMI1-NEXT: movl %edi, %edx +; X86-BMI1-NEXT: addl $12, %esp +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: popl %edi +; X86-BMI1-NEXT: popl %ebx +; X86-BMI1-NEXT: popl %ebp +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bextr64_c1_indexzext: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: subl $12, %esp +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: shrdl %cl, %eax, %esi +; X86-BMI2-NEXT: shrxl %ecx, %eax, %edi +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB42_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %edi, %esi +; X86-BMI2-NEXT: xorl %edi, %edi +; X86-BMI2-NEXT: .LBB42_2: +; X86-BMI2-NEXT: movb $64, %al +; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: movl $-1, %ebp +; X86-BMI2-NEXT: shrxl %eax, %ebp, %ebx +; X86-BMI2-NEXT: testb $32, %al +; X86-BMI2-NEXT: je .LBB42_4 +; X86-BMI2-NEXT: # %bb.3: +; X86-BMI2-NEXT: movl %ebx, %ebp +; X86-BMI2-NEXT: xorl %ebx, %ebx +; X86-BMI2-NEXT: .LBB42_4: +; X86-BMI2-NEXT: subl $8, %esp +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: calll use64@PLT +; X86-BMI2-NEXT: addl $16, %esp +; X86-BMI2-NEXT: andl %ebp, %esi +; X86-BMI2-NEXT: andl %ebx, %edi +; X86-BMI2-NEXT: movl %esi, %eax +; X86-BMI2-NEXT: movl %edi, %edx +; X86-BMI2-NEXT: addl $12, %esp +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr64_c1_indexzext: ; X64-NOBMI: # %bb.0: @@ -5222,46 +5222,46 @@ define i64 @bextr64_c1_indexzext(i64 %val, i8 %numskipbits, i8 %numlowbits) noun ; X64-NOBMI-NEXT: popq %r14 ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bextr64_c1_indexzext: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: pushq %r14 -; X64-BMI1NOTBM-NEXT: pushq %rbx -; X64-BMI1NOTBM-NEXT: pushq %rax -; X64-BMI1NOTBM-NEXT: movl %esi, %ecx -; X64-BMI1NOTBM-NEXT: movq %rdi, %r14 -; X64-BMI1NOTBM-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-BMI1NOTBM-NEXT: shrq %cl, %r14 -; X64-BMI1NOTBM-NEXT: negb %dl -; X64-BMI1NOTBM-NEXT: movq $-1, %rbx -; X64-BMI1NOTBM-NEXT: movl %edx, %ecx -; X64-BMI1NOTBM-NEXT: shrq %cl, %rbx -; X64-BMI1NOTBM-NEXT: movq %rbx, %rdi -; X64-BMI1NOTBM-NEXT: callq use64@PLT -; X64-BMI1NOTBM-NEXT: andq %r14, %rbx -; X64-BMI1NOTBM-NEXT: movq %rbx, %rax -; X64-BMI1NOTBM-NEXT: addq $8, %rsp -; X64-BMI1NOTBM-NEXT: popq %rbx -; X64-BMI1NOTBM-NEXT: popq %r14 -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bextr64_c1_indexzext: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: pushq %r14 -; X64-BMI1BMI2-NEXT: pushq %rbx -; X64-BMI1BMI2-NEXT: pushq %rax -; X64-BMI1BMI2-NEXT: movl %edx, %ebx -; X64-BMI1BMI2-NEXT: # kill: def $esi killed $esi def $rsi -; X64-BMI1BMI2-NEXT: shrxq %rsi, %rdi, %r14 -; X64-BMI1BMI2-NEXT: movl %ebx, %eax -; X64-BMI1BMI2-NEXT: negb %al -; X64-BMI1BMI2-NEXT: movq $-1, %rcx -; X64-BMI1BMI2-NEXT: shrxq %rax, %rcx, %rdi -; X64-BMI1BMI2-NEXT: callq use64@PLT -; X64-BMI1BMI2-NEXT: bzhiq %rbx, %r14, %rax -; X64-BMI1BMI2-NEXT: addq $8, %rsp -; X64-BMI1BMI2-NEXT: popq %rbx -; X64-BMI1BMI2-NEXT: popq %r14 -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bextr64_c1_indexzext: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: pushq %r14 +; X64-BMI1-NEXT: pushq %rbx +; X64-BMI1-NEXT: pushq %rax +; X64-BMI1-NEXT: movl %esi, %ecx +; X64-BMI1-NEXT: movq %rdi, %r14 +; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-BMI1-NEXT: shrq %cl, %r14 +; X64-BMI1-NEXT: negb %dl +; X64-BMI1-NEXT: movq $-1, %rbx +; X64-BMI1-NEXT: movl %edx, %ecx +; X64-BMI1-NEXT: shrq %cl, %rbx +; X64-BMI1-NEXT: movq %rbx, %rdi +; X64-BMI1-NEXT: callq use64@PLT +; X64-BMI1-NEXT: andq %r14, %rbx +; X64-BMI1-NEXT: movq %rbx, %rax +; X64-BMI1-NEXT: addq $8, %rsp +; X64-BMI1-NEXT: popq %rbx +; X64-BMI1-NEXT: popq %r14 +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bextr64_c1_indexzext: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: pushq %r14 +; X64-BMI2-NEXT: pushq %rbx +; X64-BMI2-NEXT: pushq %rax +; X64-BMI2-NEXT: movl %edx, %ebx +; X64-BMI2-NEXT: # kill: def $esi killed $esi def $rsi +; X64-BMI2-NEXT: shrxq %rsi, %rdi, %r14 +; X64-BMI2-NEXT: movl %ebx, %eax +; X64-BMI2-NEXT: negb %al +; X64-BMI2-NEXT: movq $-1, %rcx +; X64-BMI2-NEXT: shrxq %rax, %rcx, %rdi +; X64-BMI2-NEXT: callq use64@PLT +; X64-BMI2-NEXT: bzhiq %rbx, %r14, %rax +; X64-BMI2-NEXT: addq $8, %rsp +; X64-BMI2-NEXT: popq %rbx +; X64-BMI2-NEXT: popq %r14 +; X64-BMI2-NEXT: retq %skip = zext i8 %numskipbits to i64 %shifted = lshr i64 %val, %skip %numhighbits = sub i8 64, %numlowbits @@ -5320,97 +5320,97 @@ define i64 @bextr64_c2_load(i64* %w, i64 %numskipbits, i64 %numlowbits) nounwind ; X86-NOBMI-NEXT: popl %ebp ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bextr64_c2_load: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %ebp -; X86-BMI1NOTBM-NEXT: pushl %ebx -; X86-BMI1NOTBM-NEXT: pushl %edi -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: subl $12, %esp -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: movl (%eax), %esi -; X86-BMI1NOTBM-NEXT: movl 4(%eax), %eax -; X86-BMI1NOTBM-NEXT: movl %eax, %edi -; X86-BMI1NOTBM-NEXT: shrl %cl, %edi -; X86-BMI1NOTBM-NEXT: shrdl %cl, %eax, %esi -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: je .LBB43_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %edi, %esi -; X86-BMI1NOTBM-NEXT: xorl %edi, %edi -; X86-BMI1NOTBM-NEXT: .LBB43_2: -; X86-BMI1NOTBM-NEXT: movb $64, %cl -; X86-BMI1NOTBM-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl $-1, %ebp -; X86-BMI1NOTBM-NEXT: movl $-1, %ebx -; X86-BMI1NOTBM-NEXT: shrl %cl, %ebx -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: je .LBB43_4 -; X86-BMI1NOTBM-NEXT: # %bb.3: -; X86-BMI1NOTBM-NEXT: movl %ebx, %ebp -; X86-BMI1NOTBM-NEXT: xorl %ebx, %ebx -; X86-BMI1NOTBM-NEXT: .LBB43_4: -; X86-BMI1NOTBM-NEXT: subl $8, %esp -; X86-BMI1NOTBM-NEXT: pushl %ebx -; X86-BMI1NOTBM-NEXT: pushl %ebp -; X86-BMI1NOTBM-NEXT: calll use64@PLT -; X86-BMI1NOTBM-NEXT: addl $16, %esp -; X86-BMI1NOTBM-NEXT: andl %ebp, %esi -; X86-BMI1NOTBM-NEXT: andl %ebx, %edi -; X86-BMI1NOTBM-NEXT: movl %esi, %eax -; X86-BMI1NOTBM-NEXT: movl %edi, %edx -; X86-BMI1NOTBM-NEXT: addl $12, %esp -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: popl %edi -; X86-BMI1NOTBM-NEXT: popl %ebx -; X86-BMI1NOTBM-NEXT: popl %ebp -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bextr64_c2_load: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %ebp -; X86-BMI1BMI2-NEXT: pushl %ebx -; X86-BMI1BMI2-NEXT: pushl %edi -; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: subl $12, %esp -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: movl (%eax), %esi -; X86-BMI1BMI2-NEXT: movl 4(%eax), %eax -; X86-BMI1BMI2-NEXT: shrxl %ecx, %eax, %edi -; X86-BMI1BMI2-NEXT: shrdl %cl, %eax, %esi -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: je .LBB43_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: movl %edi, %esi -; X86-BMI1BMI2-NEXT: xorl %edi, %edi -; X86-BMI1BMI2-NEXT: .LBB43_2: -; X86-BMI1BMI2-NEXT: movb $64, %al -; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %al -; X86-BMI1BMI2-NEXT: movl $-1, %ebp -; X86-BMI1BMI2-NEXT: shrxl %eax, %ebp, %ebx -; X86-BMI1BMI2-NEXT: testb $32, %al -; X86-BMI1BMI2-NEXT: je .LBB43_4 -; X86-BMI1BMI2-NEXT: # %bb.3: -; X86-BMI1BMI2-NEXT: movl %ebx, %ebp -; X86-BMI1BMI2-NEXT: xorl %ebx, %ebx -; X86-BMI1BMI2-NEXT: .LBB43_4: -; X86-BMI1BMI2-NEXT: subl $8, %esp -; X86-BMI1BMI2-NEXT: pushl %ebx -; X86-BMI1BMI2-NEXT: pushl %ebp -; X86-BMI1BMI2-NEXT: calll use64@PLT -; X86-BMI1BMI2-NEXT: addl $16, %esp -; X86-BMI1BMI2-NEXT: andl %ebp, %esi -; X86-BMI1BMI2-NEXT: andl %ebx, %edi -; X86-BMI1BMI2-NEXT: movl %esi, %eax -; X86-BMI1BMI2-NEXT: movl %edi, %edx -; X86-BMI1BMI2-NEXT: addl $12, %esp -; X86-BMI1BMI2-NEXT: popl %esi -; X86-BMI1BMI2-NEXT: popl %edi -; X86-BMI1BMI2-NEXT: popl %ebx -; X86-BMI1BMI2-NEXT: popl %ebp -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bextr64_c2_load: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %ebp +; X86-BMI1-NEXT: pushl %ebx +; X86-BMI1-NEXT: pushl %edi +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: subl $12, %esp +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: movl (%eax), %esi +; X86-BMI1-NEXT: movl 4(%eax), %eax +; X86-BMI1-NEXT: movl %eax, %edi +; X86-BMI1-NEXT: shrl %cl, %edi +; X86-BMI1-NEXT: shrdl %cl, %eax, %esi +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: je .LBB43_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: movl %edi, %esi +; X86-BMI1-NEXT: xorl %edi, %edi +; X86-BMI1-NEXT: .LBB43_2: +; X86-BMI1-NEXT: movb $64, %cl +; X86-BMI1-NEXT: subb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl $-1, %ebp +; X86-BMI1-NEXT: movl $-1, %ebx +; X86-BMI1-NEXT: shrl %cl, %ebx +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: je .LBB43_4 +; X86-BMI1-NEXT: # %bb.3: +; X86-BMI1-NEXT: movl %ebx, %ebp +; X86-BMI1-NEXT: xorl %ebx, %ebx +; X86-BMI1-NEXT: .LBB43_4: +; X86-BMI1-NEXT: subl $8, %esp +; X86-BMI1-NEXT: pushl %ebx +; X86-BMI1-NEXT: pushl %ebp +; X86-BMI1-NEXT: calll use64@PLT +; X86-BMI1-NEXT: addl $16, %esp +; X86-BMI1-NEXT: andl %ebp, %esi +; X86-BMI1-NEXT: andl %ebx, %edi +; X86-BMI1-NEXT: movl %esi, %eax +; X86-BMI1-NEXT: movl %edi, %edx +; X86-BMI1-NEXT: addl $12, %esp +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: popl %edi +; X86-BMI1-NEXT: popl %ebx +; X86-BMI1-NEXT: popl %ebp +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bextr64_c2_load: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: subl $12, %esp +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl (%eax), %esi +; X86-BMI2-NEXT: movl 4(%eax), %eax +; X86-BMI2-NEXT: shrxl %ecx, %eax, %edi +; X86-BMI2-NEXT: shrdl %cl, %eax, %esi +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB43_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %edi, %esi +; X86-BMI2-NEXT: xorl %edi, %edi +; X86-BMI2-NEXT: .LBB43_2: +; X86-BMI2-NEXT: movb $64, %al +; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: movl $-1, %ebp +; X86-BMI2-NEXT: shrxl %eax, %ebp, %ebx +; X86-BMI2-NEXT: testb $32, %al +; X86-BMI2-NEXT: je .LBB43_4 +; X86-BMI2-NEXT: # %bb.3: +; X86-BMI2-NEXT: movl %ebx, %ebp +; X86-BMI2-NEXT: xorl %ebx, %ebx +; X86-BMI2-NEXT: .LBB43_4: +; X86-BMI2-NEXT: subl $8, %esp +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: calll use64@PLT +; X86-BMI2-NEXT: addl $16, %esp +; X86-BMI2-NEXT: andl %ebp, %esi +; X86-BMI2-NEXT: andl %ebx, %edi +; X86-BMI2-NEXT: movl %esi, %eax +; X86-BMI2-NEXT: movl %edi, %edx +; X86-BMI2-NEXT: addl $12, %esp +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr64_c2_load: ; X64-NOBMI: # %bb.0: @@ -5434,45 +5434,45 @@ define i64 @bextr64_c2_load(i64* %w, i64 %numskipbits, i64 %numlowbits) nounwind ; X64-NOBMI-NEXT: popq %r14 ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bextr64_c2_load: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: pushq %r14 -; X64-BMI1NOTBM-NEXT: pushq %rbx -; X64-BMI1NOTBM-NEXT: pushq %rax -; X64-BMI1NOTBM-NEXT: movq %rsi, %rcx -; X64-BMI1NOTBM-NEXT: movq (%rdi), %r14 -; X64-BMI1NOTBM-NEXT: # kill: def $cl killed $cl killed $rcx -; X64-BMI1NOTBM-NEXT: shrq %cl, %r14 -; X64-BMI1NOTBM-NEXT: negb %dl -; X64-BMI1NOTBM-NEXT: movq $-1, %rbx -; X64-BMI1NOTBM-NEXT: movl %edx, %ecx -; X64-BMI1NOTBM-NEXT: shrq %cl, %rbx -; X64-BMI1NOTBM-NEXT: movq %rbx, %rdi -; X64-BMI1NOTBM-NEXT: callq use64@PLT -; X64-BMI1NOTBM-NEXT: andq %r14, %rbx -; X64-BMI1NOTBM-NEXT: movq %rbx, %rax -; X64-BMI1NOTBM-NEXT: addq $8, %rsp -; X64-BMI1NOTBM-NEXT: popq %rbx -; X64-BMI1NOTBM-NEXT: popq %r14 -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bextr64_c2_load: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: pushq %r14 -; X64-BMI1BMI2-NEXT: pushq %rbx -; X64-BMI1BMI2-NEXT: pushq %rax -; X64-BMI1BMI2-NEXT: movq %rdx, %rbx -; X64-BMI1BMI2-NEXT: shrxq %rsi, (%rdi), %r14 -; X64-BMI1BMI2-NEXT: movl %ebx, %eax -; X64-BMI1BMI2-NEXT: negb %al -; X64-BMI1BMI2-NEXT: movq $-1, %rcx -; X64-BMI1BMI2-NEXT: shrxq %rax, %rcx, %rdi -; X64-BMI1BMI2-NEXT: callq use64@PLT -; X64-BMI1BMI2-NEXT: bzhiq %rbx, %r14, %rax -; X64-BMI1BMI2-NEXT: addq $8, %rsp -; X64-BMI1BMI2-NEXT: popq %rbx -; X64-BMI1BMI2-NEXT: popq %r14 -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bextr64_c2_load: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: pushq %r14 +; X64-BMI1-NEXT: pushq %rbx +; X64-BMI1-NEXT: pushq %rax +; X64-BMI1-NEXT: movq %rsi, %rcx +; X64-BMI1-NEXT: movq (%rdi), %r14 +; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $rcx +; X64-BMI1-NEXT: shrq %cl, %r14 +; X64-BMI1-NEXT: negb %dl +; X64-BMI1-NEXT: movq $-1, %rbx +; X64-BMI1-NEXT: movl %edx, %ecx +; X64-BMI1-NEXT: shrq %cl, %rbx +; X64-BMI1-NEXT: movq %rbx, %rdi +; X64-BMI1-NEXT: callq use64@PLT +; X64-BMI1-NEXT: andq %r14, %rbx +; X64-BMI1-NEXT: movq %rbx, %rax +; X64-BMI1-NEXT: addq $8, %rsp +; X64-BMI1-NEXT: popq %rbx +; X64-BMI1-NEXT: popq %r14 +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bextr64_c2_load: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: pushq %r14 +; X64-BMI2-NEXT: pushq %rbx +; X64-BMI2-NEXT: pushq %rax +; X64-BMI2-NEXT: movq %rdx, %rbx +; X64-BMI2-NEXT: shrxq %rsi, (%rdi), %r14 +; X64-BMI2-NEXT: movl %ebx, %eax +; X64-BMI2-NEXT: negb %al +; X64-BMI2-NEXT: movq $-1, %rcx +; X64-BMI2-NEXT: shrxq %rax, %rcx, %rdi +; X64-BMI2-NEXT: callq use64@PLT +; X64-BMI2-NEXT: bzhiq %rbx, %r14, %rax +; X64-BMI2-NEXT: addq $8, %rsp +; X64-BMI2-NEXT: popq %rbx +; X64-BMI2-NEXT: popq %r14 +; X64-BMI2-NEXT: retq %val = load i64, i64* %w %shifted = lshr i64 %val, %numskipbits %numhighbits = sub i64 64, %numlowbits @@ -5530,97 +5530,97 @@ define i64 @bextr64_c3_load_indexzext(i64* %w, i8 %numskipbits, i8 %numlowbits) ; X86-NOBMI-NEXT: popl %ebp ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bextr64_c3_load_indexzext: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %ebp -; X86-BMI1NOTBM-NEXT: pushl %ebx -; X86-BMI1NOTBM-NEXT: pushl %edi -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: subl $12, %esp -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: movl (%eax), %esi -; X86-BMI1NOTBM-NEXT: movl 4(%eax), %eax -; X86-BMI1NOTBM-NEXT: movl %eax, %edi -; X86-BMI1NOTBM-NEXT: shrl %cl, %edi -; X86-BMI1NOTBM-NEXT: shrdl %cl, %eax, %esi -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: je .LBB44_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %edi, %esi -; X86-BMI1NOTBM-NEXT: xorl %edi, %edi -; X86-BMI1NOTBM-NEXT: .LBB44_2: -; X86-BMI1NOTBM-NEXT: movb $64, %cl -; X86-BMI1NOTBM-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl $-1, %ebp -; X86-BMI1NOTBM-NEXT: movl $-1, %ebx -; X86-BMI1NOTBM-NEXT: shrl %cl, %ebx -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: je .LBB44_4 -; X86-BMI1NOTBM-NEXT: # %bb.3: -; X86-BMI1NOTBM-NEXT: movl %ebx, %ebp -; X86-BMI1NOTBM-NEXT: xorl %ebx, %ebx -; X86-BMI1NOTBM-NEXT: .LBB44_4: -; X86-BMI1NOTBM-NEXT: subl $8, %esp -; X86-BMI1NOTBM-NEXT: pushl %ebx -; X86-BMI1NOTBM-NEXT: pushl %ebp -; X86-BMI1NOTBM-NEXT: calll use64@PLT -; X86-BMI1NOTBM-NEXT: addl $16, %esp -; X86-BMI1NOTBM-NEXT: andl %ebp, %esi -; X86-BMI1NOTBM-NEXT: andl %ebx, %edi -; X86-BMI1NOTBM-NEXT: movl %esi, %eax -; X86-BMI1NOTBM-NEXT: movl %edi, %edx -; X86-BMI1NOTBM-NEXT: addl $12, %esp -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: popl %edi -; X86-BMI1NOTBM-NEXT: popl %ebx -; X86-BMI1NOTBM-NEXT: popl %ebp -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bextr64_c3_load_indexzext: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %ebp -; X86-BMI1BMI2-NEXT: pushl %ebx -; X86-BMI1BMI2-NEXT: pushl %edi -; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: subl $12, %esp -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: movl (%eax), %esi -; X86-BMI1BMI2-NEXT: movl 4(%eax), %eax -; X86-BMI1BMI2-NEXT: shrxl %ecx, %eax, %edi -; X86-BMI1BMI2-NEXT: shrdl %cl, %eax, %esi -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: je .LBB44_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: movl %edi, %esi -; X86-BMI1BMI2-NEXT: xorl %edi, %edi -; X86-BMI1BMI2-NEXT: .LBB44_2: -; X86-BMI1BMI2-NEXT: movb $64, %al -; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %al -; X86-BMI1BMI2-NEXT: movl $-1, %ebp -; X86-BMI1BMI2-NEXT: shrxl %eax, %ebp, %ebx -; X86-BMI1BMI2-NEXT: testb $32, %al -; X86-BMI1BMI2-NEXT: je .LBB44_4 -; X86-BMI1BMI2-NEXT: # %bb.3: -; X86-BMI1BMI2-NEXT: movl %ebx, %ebp -; X86-BMI1BMI2-NEXT: xorl %ebx, %ebx -; X86-BMI1BMI2-NEXT: .LBB44_4: -; X86-BMI1BMI2-NEXT: subl $8, %esp -; X86-BMI1BMI2-NEXT: pushl %ebx -; X86-BMI1BMI2-NEXT: pushl %ebp -; X86-BMI1BMI2-NEXT: calll use64@PLT -; X86-BMI1BMI2-NEXT: addl $16, %esp -; X86-BMI1BMI2-NEXT: andl %ebp, %esi -; X86-BMI1BMI2-NEXT: andl %ebx, %edi -; X86-BMI1BMI2-NEXT: movl %esi, %eax -; X86-BMI1BMI2-NEXT: movl %edi, %edx -; X86-BMI1BMI2-NEXT: addl $12, %esp -; X86-BMI1BMI2-NEXT: popl %esi -; X86-BMI1BMI2-NEXT: popl %edi -; X86-BMI1BMI2-NEXT: popl %ebx -; X86-BMI1BMI2-NEXT: popl %ebp -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bextr64_c3_load_indexzext: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %ebp +; X86-BMI1-NEXT: pushl %ebx +; X86-BMI1-NEXT: pushl %edi +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: subl $12, %esp +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: movl (%eax), %esi +; X86-BMI1-NEXT: movl 4(%eax), %eax +; X86-BMI1-NEXT: movl %eax, %edi +; X86-BMI1-NEXT: shrl %cl, %edi +; X86-BMI1-NEXT: shrdl %cl, %eax, %esi +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: je .LBB44_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: movl %edi, %esi +; X86-BMI1-NEXT: xorl %edi, %edi +; X86-BMI1-NEXT: .LBB44_2: +; X86-BMI1-NEXT: movb $64, %cl +; X86-BMI1-NEXT: subb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl $-1, %ebp +; X86-BMI1-NEXT: movl $-1, %ebx +; X86-BMI1-NEXT: shrl %cl, %ebx +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: je .LBB44_4 +; X86-BMI1-NEXT: # %bb.3: +; X86-BMI1-NEXT: movl %ebx, %ebp +; X86-BMI1-NEXT: xorl %ebx, %ebx +; X86-BMI1-NEXT: .LBB44_4: +; X86-BMI1-NEXT: subl $8, %esp +; X86-BMI1-NEXT: pushl %ebx +; X86-BMI1-NEXT: pushl %ebp +; X86-BMI1-NEXT: calll use64@PLT +; X86-BMI1-NEXT: addl $16, %esp +; X86-BMI1-NEXT: andl %ebp, %esi +; X86-BMI1-NEXT: andl %ebx, %edi +; X86-BMI1-NEXT: movl %esi, %eax +; X86-BMI1-NEXT: movl %edi, %edx +; X86-BMI1-NEXT: addl $12, %esp +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: popl %edi +; X86-BMI1-NEXT: popl %ebx +; X86-BMI1-NEXT: popl %ebp +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bextr64_c3_load_indexzext: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: subl $12, %esp +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl (%eax), %esi +; X86-BMI2-NEXT: movl 4(%eax), %eax +; X86-BMI2-NEXT: shrxl %ecx, %eax, %edi +; X86-BMI2-NEXT: shrdl %cl, %eax, %esi +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB44_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %edi, %esi +; X86-BMI2-NEXT: xorl %edi, %edi +; X86-BMI2-NEXT: .LBB44_2: +; X86-BMI2-NEXT: movb $64, %al +; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: movl $-1, %ebp +; X86-BMI2-NEXT: shrxl %eax, %ebp, %ebx +; X86-BMI2-NEXT: testb $32, %al +; X86-BMI2-NEXT: je .LBB44_4 +; X86-BMI2-NEXT: # %bb.3: +; X86-BMI2-NEXT: movl %ebx, %ebp +; X86-BMI2-NEXT: xorl %ebx, %ebx +; X86-BMI2-NEXT: .LBB44_4: +; X86-BMI2-NEXT: subl $8, %esp +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: calll use64@PLT +; X86-BMI2-NEXT: addl $16, %esp +; X86-BMI2-NEXT: andl %ebp, %esi +; X86-BMI2-NEXT: andl %ebx, %edi +; X86-BMI2-NEXT: movl %esi, %eax +; X86-BMI2-NEXT: movl %edi, %edx +; X86-BMI2-NEXT: addl $12, %esp +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr64_c3_load_indexzext: ; X64-NOBMI: # %bb.0: @@ -5644,46 +5644,46 @@ define i64 @bextr64_c3_load_indexzext(i64* %w, i8 %numskipbits, i8 %numlowbits) ; X64-NOBMI-NEXT: popq %r14 ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bextr64_c3_load_indexzext: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: pushq %r14 -; X64-BMI1NOTBM-NEXT: pushq %rbx -; X64-BMI1NOTBM-NEXT: pushq %rax -; X64-BMI1NOTBM-NEXT: movl %esi, %ecx -; X64-BMI1NOTBM-NEXT: movq (%rdi), %r14 -; X64-BMI1NOTBM-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-BMI1NOTBM-NEXT: shrq %cl, %r14 -; X64-BMI1NOTBM-NEXT: negb %dl -; X64-BMI1NOTBM-NEXT: movq $-1, %rbx -; X64-BMI1NOTBM-NEXT: movl %edx, %ecx -; X64-BMI1NOTBM-NEXT: shrq %cl, %rbx -; X64-BMI1NOTBM-NEXT: movq %rbx, %rdi -; X64-BMI1NOTBM-NEXT: callq use64@PLT -; X64-BMI1NOTBM-NEXT: andq %r14, %rbx -; X64-BMI1NOTBM-NEXT: movq %rbx, %rax -; X64-BMI1NOTBM-NEXT: addq $8, %rsp -; X64-BMI1NOTBM-NEXT: popq %rbx -; X64-BMI1NOTBM-NEXT: popq %r14 -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bextr64_c3_load_indexzext: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: pushq %r14 -; X64-BMI1BMI2-NEXT: pushq %rbx -; X64-BMI1BMI2-NEXT: pushq %rax -; X64-BMI1BMI2-NEXT: movl %edx, %ebx -; X64-BMI1BMI2-NEXT: # kill: def $esi killed $esi def $rsi -; X64-BMI1BMI2-NEXT: shrxq %rsi, (%rdi), %r14 -; X64-BMI1BMI2-NEXT: movl %ebx, %eax -; X64-BMI1BMI2-NEXT: negb %al -; X64-BMI1BMI2-NEXT: movq $-1, %rcx -; X64-BMI1BMI2-NEXT: shrxq %rax, %rcx, %rdi -; X64-BMI1BMI2-NEXT: callq use64@PLT -; X64-BMI1BMI2-NEXT: bzhiq %rbx, %r14, %rax -; X64-BMI1BMI2-NEXT: addq $8, %rsp -; X64-BMI1BMI2-NEXT: popq %rbx -; X64-BMI1BMI2-NEXT: popq %r14 -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bextr64_c3_load_indexzext: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: pushq %r14 +; X64-BMI1-NEXT: pushq %rbx +; X64-BMI1-NEXT: pushq %rax +; X64-BMI1-NEXT: movl %esi, %ecx +; X64-BMI1-NEXT: movq (%rdi), %r14 +; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-BMI1-NEXT: shrq %cl, %r14 +; X64-BMI1-NEXT: negb %dl +; X64-BMI1-NEXT: movq $-1, %rbx +; X64-BMI1-NEXT: movl %edx, %ecx +; X64-BMI1-NEXT: shrq %cl, %rbx +; X64-BMI1-NEXT: movq %rbx, %rdi +; X64-BMI1-NEXT: callq use64@PLT +; X64-BMI1-NEXT: andq %r14, %rbx +; X64-BMI1-NEXT: movq %rbx, %rax +; X64-BMI1-NEXT: addq $8, %rsp +; X64-BMI1-NEXT: popq %rbx +; X64-BMI1-NEXT: popq %r14 +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bextr64_c3_load_indexzext: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: pushq %r14 +; X64-BMI2-NEXT: pushq %rbx +; X64-BMI2-NEXT: pushq %rax +; X64-BMI2-NEXT: movl %edx, %ebx +; X64-BMI2-NEXT: # kill: def $esi killed $esi def $rsi +; X64-BMI2-NEXT: shrxq %rsi, (%rdi), %r14 +; X64-BMI2-NEXT: movl %ebx, %eax +; X64-BMI2-NEXT: negb %al +; X64-BMI2-NEXT: movq $-1, %rcx +; X64-BMI2-NEXT: shrxq %rax, %rcx, %rdi +; X64-BMI2-NEXT: callq use64@PLT +; X64-BMI2-NEXT: bzhiq %rbx, %r14, %rax +; X64-BMI2-NEXT: addq $8, %rsp +; X64-BMI2-NEXT: popq %rbx +; X64-BMI2-NEXT: popq %r14 +; X64-BMI2-NEXT: retq %val = load i64, i64* %w %skip = zext i8 %numskipbits to i64 %shifted = lshr i64 %val, %skip @@ -5742,95 +5742,95 @@ define i64 @bextr64_c4_commutative(i64 %val, i64 %numskipbits, i64 %numlowbits) ; X86-NOBMI-NEXT: popl %ebp ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bextr64_c4_commutative: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %ebp -; X86-BMI1NOTBM-NEXT: pushl %ebx -; X86-BMI1NOTBM-NEXT: pushl %edi -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: subl $12, %esp -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: movl %eax, %edi -; X86-BMI1NOTBM-NEXT: shrl %cl, %edi -; X86-BMI1NOTBM-NEXT: shrdl %cl, %eax, %esi -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: je .LBB45_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %edi, %esi -; X86-BMI1NOTBM-NEXT: xorl %edi, %edi -; X86-BMI1NOTBM-NEXT: .LBB45_2: -; X86-BMI1NOTBM-NEXT: movb $64, %cl -; X86-BMI1NOTBM-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl $-1, %ebp -; X86-BMI1NOTBM-NEXT: movl $-1, %ebx -; X86-BMI1NOTBM-NEXT: shrl %cl, %ebx -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: je .LBB45_4 -; X86-BMI1NOTBM-NEXT: # %bb.3: -; X86-BMI1NOTBM-NEXT: movl %ebx, %ebp -; X86-BMI1NOTBM-NEXT: xorl %ebx, %ebx -; X86-BMI1NOTBM-NEXT: .LBB45_4: -; X86-BMI1NOTBM-NEXT: subl $8, %esp -; X86-BMI1NOTBM-NEXT: pushl %ebx -; X86-BMI1NOTBM-NEXT: pushl %ebp -; X86-BMI1NOTBM-NEXT: calll use64@PLT -; X86-BMI1NOTBM-NEXT: addl $16, %esp -; X86-BMI1NOTBM-NEXT: andl %ebp, %esi -; X86-BMI1NOTBM-NEXT: andl %ebx, %edi -; X86-BMI1NOTBM-NEXT: movl %esi, %eax -; X86-BMI1NOTBM-NEXT: movl %edi, %edx -; X86-BMI1NOTBM-NEXT: addl $12, %esp -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: popl %edi -; X86-BMI1NOTBM-NEXT: popl %ebx -; X86-BMI1NOTBM-NEXT: popl %ebp -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bextr64_c4_commutative: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %ebp -; X86-BMI1BMI2-NEXT: pushl %ebx -; X86-BMI1BMI2-NEXT: pushl %edi -; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: subl $12, %esp -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: shrdl %cl, %eax, %esi -; X86-BMI1BMI2-NEXT: shrxl %ecx, %eax, %edi -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: je .LBB45_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: movl %edi, %esi -; X86-BMI1BMI2-NEXT: xorl %edi, %edi -; X86-BMI1BMI2-NEXT: .LBB45_2: -; X86-BMI1BMI2-NEXT: movb $64, %al -; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %al -; X86-BMI1BMI2-NEXT: movl $-1, %ebp -; X86-BMI1BMI2-NEXT: shrxl %eax, %ebp, %ebx -; X86-BMI1BMI2-NEXT: testb $32, %al -; X86-BMI1BMI2-NEXT: je .LBB45_4 -; X86-BMI1BMI2-NEXT: # %bb.3: -; X86-BMI1BMI2-NEXT: movl %ebx, %ebp -; X86-BMI1BMI2-NEXT: xorl %ebx, %ebx -; X86-BMI1BMI2-NEXT: .LBB45_4: -; X86-BMI1BMI2-NEXT: subl $8, %esp -; X86-BMI1BMI2-NEXT: pushl %ebx -; X86-BMI1BMI2-NEXT: pushl %ebp -; X86-BMI1BMI2-NEXT: calll use64@PLT -; X86-BMI1BMI2-NEXT: addl $16, %esp -; X86-BMI1BMI2-NEXT: andl %ebp, %esi -; X86-BMI1BMI2-NEXT: andl %ebx, %edi -; X86-BMI1BMI2-NEXT: movl %esi, %eax -; X86-BMI1BMI2-NEXT: movl %edi, %edx -; X86-BMI1BMI2-NEXT: addl $12, %esp -; X86-BMI1BMI2-NEXT: popl %esi -; X86-BMI1BMI2-NEXT: popl %edi -; X86-BMI1BMI2-NEXT: popl %ebx -; X86-BMI1BMI2-NEXT: popl %ebp -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bextr64_c4_commutative: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %ebp +; X86-BMI1-NEXT: pushl %ebx +; X86-BMI1-NEXT: pushl %edi +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: subl $12, %esp +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: movl %eax, %edi +; X86-BMI1-NEXT: shrl %cl, %edi +; X86-BMI1-NEXT: shrdl %cl, %eax, %esi +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: je .LBB45_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: movl %edi, %esi +; X86-BMI1-NEXT: xorl %edi, %edi +; X86-BMI1-NEXT: .LBB45_2: +; X86-BMI1-NEXT: movb $64, %cl +; X86-BMI1-NEXT: subb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl $-1, %ebp +; X86-BMI1-NEXT: movl $-1, %ebx +; X86-BMI1-NEXT: shrl %cl, %ebx +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: je .LBB45_4 +; X86-BMI1-NEXT: # %bb.3: +; X86-BMI1-NEXT: movl %ebx, %ebp +; X86-BMI1-NEXT: xorl %ebx, %ebx +; X86-BMI1-NEXT: .LBB45_4: +; X86-BMI1-NEXT: subl $8, %esp +; X86-BMI1-NEXT: pushl %ebx +; X86-BMI1-NEXT: pushl %ebp +; X86-BMI1-NEXT: calll use64@PLT +; X86-BMI1-NEXT: addl $16, %esp +; X86-BMI1-NEXT: andl %ebp, %esi +; X86-BMI1-NEXT: andl %ebx, %edi +; X86-BMI1-NEXT: movl %esi, %eax +; X86-BMI1-NEXT: movl %edi, %edx +; X86-BMI1-NEXT: addl $12, %esp +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: popl %edi +; X86-BMI1-NEXT: popl %ebx +; X86-BMI1-NEXT: popl %ebp +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bextr64_c4_commutative: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: subl $12, %esp +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: shrdl %cl, %eax, %esi +; X86-BMI2-NEXT: shrxl %ecx, %eax, %edi +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB45_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %edi, %esi +; X86-BMI2-NEXT: xorl %edi, %edi +; X86-BMI2-NEXT: .LBB45_2: +; X86-BMI2-NEXT: movb $64, %al +; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: movl $-1, %ebp +; X86-BMI2-NEXT: shrxl %eax, %ebp, %ebx +; X86-BMI2-NEXT: testb $32, %al +; X86-BMI2-NEXT: je .LBB45_4 +; X86-BMI2-NEXT: # %bb.3: +; X86-BMI2-NEXT: movl %ebx, %ebp +; X86-BMI2-NEXT: xorl %ebx, %ebx +; X86-BMI2-NEXT: .LBB45_4: +; X86-BMI2-NEXT: subl $8, %esp +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: calll use64@PLT +; X86-BMI2-NEXT: addl $16, %esp +; X86-BMI2-NEXT: andl %ebp, %esi +; X86-BMI2-NEXT: andl %ebx, %edi +; X86-BMI2-NEXT: movl %esi, %eax +; X86-BMI2-NEXT: movl %edi, %edx +; X86-BMI2-NEXT: addl $12, %esp +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr64_c4_commutative: ; X64-NOBMI: # %bb.0: @@ -5854,45 +5854,45 @@ define i64 @bextr64_c4_commutative(i64 %val, i64 %numskipbits, i64 %numlowbits) ; X64-NOBMI-NEXT: popq %r14 ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bextr64_c4_commutative: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: pushq %r14 -; X64-BMI1NOTBM-NEXT: pushq %rbx -; X64-BMI1NOTBM-NEXT: pushq %rax -; X64-BMI1NOTBM-NEXT: movq %rsi, %rcx -; X64-BMI1NOTBM-NEXT: movq %rdi, %r14 -; X64-BMI1NOTBM-NEXT: # kill: def $cl killed $cl killed $rcx -; X64-BMI1NOTBM-NEXT: shrq %cl, %r14 -; X64-BMI1NOTBM-NEXT: negb %dl -; X64-BMI1NOTBM-NEXT: movq $-1, %rbx -; X64-BMI1NOTBM-NEXT: movl %edx, %ecx -; X64-BMI1NOTBM-NEXT: shrq %cl, %rbx -; X64-BMI1NOTBM-NEXT: movq %rbx, %rdi -; X64-BMI1NOTBM-NEXT: callq use64@PLT -; X64-BMI1NOTBM-NEXT: andq %r14, %rbx -; X64-BMI1NOTBM-NEXT: movq %rbx, %rax -; X64-BMI1NOTBM-NEXT: addq $8, %rsp -; X64-BMI1NOTBM-NEXT: popq %rbx -; X64-BMI1NOTBM-NEXT: popq %r14 -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bextr64_c4_commutative: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: pushq %r14 -; X64-BMI1BMI2-NEXT: pushq %rbx -; X64-BMI1BMI2-NEXT: pushq %rax -; X64-BMI1BMI2-NEXT: movq %rdx, %rbx -; X64-BMI1BMI2-NEXT: shrxq %rsi, %rdi, %r14 -; X64-BMI1BMI2-NEXT: movl %ebx, %eax -; X64-BMI1BMI2-NEXT: negb %al -; X64-BMI1BMI2-NEXT: movq $-1, %rcx -; X64-BMI1BMI2-NEXT: shrxq %rax, %rcx, %rdi -; X64-BMI1BMI2-NEXT: callq use64@PLT -; X64-BMI1BMI2-NEXT: bzhiq %rbx, %r14, %rax -; X64-BMI1BMI2-NEXT: addq $8, %rsp -; X64-BMI1BMI2-NEXT: popq %rbx -; X64-BMI1BMI2-NEXT: popq %r14 -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bextr64_c4_commutative: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: pushq %r14 +; X64-BMI1-NEXT: pushq %rbx +; X64-BMI1-NEXT: pushq %rax +; X64-BMI1-NEXT: movq %rsi, %rcx +; X64-BMI1-NEXT: movq %rdi, %r14 +; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $rcx +; X64-BMI1-NEXT: shrq %cl, %r14 +; X64-BMI1-NEXT: negb %dl +; X64-BMI1-NEXT: movq $-1, %rbx +; X64-BMI1-NEXT: movl %edx, %ecx +; X64-BMI1-NEXT: shrq %cl, %rbx +; X64-BMI1-NEXT: movq %rbx, %rdi +; X64-BMI1-NEXT: callq use64@PLT +; X64-BMI1-NEXT: andq %r14, %rbx +; X64-BMI1-NEXT: movq %rbx, %rax +; X64-BMI1-NEXT: addq $8, %rsp +; X64-BMI1-NEXT: popq %rbx +; X64-BMI1-NEXT: popq %r14 +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bextr64_c4_commutative: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: pushq %r14 +; X64-BMI2-NEXT: pushq %rbx +; X64-BMI2-NEXT: pushq %rax +; X64-BMI2-NEXT: movq %rdx, %rbx +; X64-BMI2-NEXT: shrxq %rsi, %rdi, %r14 +; X64-BMI2-NEXT: movl %ebx, %eax +; X64-BMI2-NEXT: negb %al +; X64-BMI2-NEXT: movq $-1, %rcx +; X64-BMI2-NEXT: shrxq %rax, %rcx, %rdi +; X64-BMI2-NEXT: callq use64@PLT +; X64-BMI2-NEXT: bzhiq %rbx, %r14, %rax +; X64-BMI2-NEXT: addq $8, %rsp +; X64-BMI2-NEXT: popq %rbx +; X64-BMI2-NEXT: popq %r14 +; X64-BMI2-NEXT: retq %shifted = lshr i64 %val, %numskipbits %numhighbits = sub i64 64, %numlowbits %mask = lshr i64 -1, %numhighbits @@ -5953,105 +5953,105 @@ define i64 @bextr64_c5_skipextrauses(i64 %val, i64 %numskipbits, i64 %numlowbits ; X86-NOBMI-NEXT: popl %ebp ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bextr64_c5_skipextrauses: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %ebp -; X86-BMI1NOTBM-NEXT: pushl %ebx -; X86-BMI1NOTBM-NEXT: pushl %edi -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: subl $12, %esp -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-BMI1NOTBM-NEXT: movl %eax, %edi -; X86-BMI1NOTBM-NEXT: shrl %cl, %edi -; X86-BMI1NOTBM-NEXT: shrdl %cl, %eax, %esi -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: je .LBB46_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %edi, %esi -; X86-BMI1NOTBM-NEXT: xorl %edi, %edi -; X86-BMI1NOTBM-NEXT: .LBB46_2: -; X86-BMI1NOTBM-NEXT: movb $64, %cl -; X86-BMI1NOTBM-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl $-1, %ebx -; X86-BMI1NOTBM-NEXT: movl $-1, %ebp -; X86-BMI1NOTBM-NEXT: shrl %cl, %ebp -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: je .LBB46_4 -; X86-BMI1NOTBM-NEXT: # %bb.3: -; X86-BMI1NOTBM-NEXT: movl %ebp, %ebx -; X86-BMI1NOTBM-NEXT: xorl %ebp, %ebp -; X86-BMI1NOTBM-NEXT: .LBB46_4: -; X86-BMI1NOTBM-NEXT: subl $8, %esp -; X86-BMI1NOTBM-NEXT: pushl %ebp -; X86-BMI1NOTBM-NEXT: pushl %ebx -; X86-BMI1NOTBM-NEXT: calll use64@PLT -; X86-BMI1NOTBM-NEXT: addl $16, %esp -; X86-BMI1NOTBM-NEXT: andl %ebx, %esi -; X86-BMI1NOTBM-NEXT: andl %ebp, %edi -; X86-BMI1NOTBM-NEXT: subl $8, %esp -; X86-BMI1NOTBM-NEXT: pushl {{[0-9]+}}(%esp) -; X86-BMI1NOTBM-NEXT: pushl {{[0-9]+}}(%esp) -; X86-BMI1NOTBM-NEXT: calll use64@PLT -; X86-BMI1NOTBM-NEXT: addl $16, %esp -; X86-BMI1NOTBM-NEXT: movl %esi, %eax -; X86-BMI1NOTBM-NEXT: movl %edi, %edx -; X86-BMI1NOTBM-NEXT: addl $12, %esp -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: popl %edi -; X86-BMI1NOTBM-NEXT: popl %ebx -; X86-BMI1NOTBM-NEXT: popl %ebp -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bextr64_c5_skipextrauses: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %ebp -; X86-BMI1BMI2-NEXT: pushl %ebx -; X86-BMI1BMI2-NEXT: pushl %edi -; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: subl $12, %esp -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-BMI1BMI2-NEXT: shrdl %cl, %eax, %esi -; X86-BMI1BMI2-NEXT: shrxl %ecx, %eax, %edi -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: je .LBB46_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: movl %edi, %esi -; X86-BMI1BMI2-NEXT: xorl %edi, %edi -; X86-BMI1BMI2-NEXT: .LBB46_2: -; X86-BMI1BMI2-NEXT: movb $64, %al -; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %al -; X86-BMI1BMI2-NEXT: movl $-1, %ebp -; X86-BMI1BMI2-NEXT: shrxl %eax, %ebp, %ebx -; X86-BMI1BMI2-NEXT: testb $32, %al -; X86-BMI1BMI2-NEXT: je .LBB46_4 -; X86-BMI1BMI2-NEXT: # %bb.3: -; X86-BMI1BMI2-NEXT: movl %ebx, %ebp -; X86-BMI1BMI2-NEXT: xorl %ebx, %ebx -; X86-BMI1BMI2-NEXT: .LBB46_4: -; X86-BMI1BMI2-NEXT: subl $8, %esp -; X86-BMI1BMI2-NEXT: pushl %ebx -; X86-BMI1BMI2-NEXT: pushl %ebp -; X86-BMI1BMI2-NEXT: calll use64@PLT -; X86-BMI1BMI2-NEXT: addl $16, %esp -; X86-BMI1BMI2-NEXT: andl %ebp, %esi -; X86-BMI1BMI2-NEXT: andl %ebx, %edi -; X86-BMI1BMI2-NEXT: subl $8, %esp -; X86-BMI1BMI2-NEXT: pushl {{[0-9]+}}(%esp) -; X86-BMI1BMI2-NEXT: pushl {{[0-9]+}}(%esp) -; X86-BMI1BMI2-NEXT: calll use64@PLT -; X86-BMI1BMI2-NEXT: addl $16, %esp -; X86-BMI1BMI2-NEXT: movl %esi, %eax -; X86-BMI1BMI2-NEXT: movl %edi, %edx -; X86-BMI1BMI2-NEXT: addl $12, %esp -; X86-BMI1BMI2-NEXT: popl %esi -; X86-BMI1BMI2-NEXT: popl %edi -; X86-BMI1BMI2-NEXT: popl %ebx -; X86-BMI1BMI2-NEXT: popl %ebp -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bextr64_c5_skipextrauses: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %ebp +; X86-BMI1-NEXT: pushl %ebx +; X86-BMI1-NEXT: pushl %edi +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: subl $12, %esp +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI1-NEXT: movl %eax, %edi +; X86-BMI1-NEXT: shrl %cl, %edi +; X86-BMI1-NEXT: shrdl %cl, %eax, %esi +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: je .LBB46_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: movl %edi, %esi +; X86-BMI1-NEXT: xorl %edi, %edi +; X86-BMI1-NEXT: .LBB46_2: +; X86-BMI1-NEXT: movb $64, %cl +; X86-BMI1-NEXT: subb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl $-1, %ebx +; X86-BMI1-NEXT: movl $-1, %ebp +; X86-BMI1-NEXT: shrl %cl, %ebp +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: je .LBB46_4 +; X86-BMI1-NEXT: # %bb.3: +; X86-BMI1-NEXT: movl %ebp, %ebx +; X86-BMI1-NEXT: xorl %ebp, %ebp +; X86-BMI1-NEXT: .LBB46_4: +; X86-BMI1-NEXT: subl $8, %esp +; X86-BMI1-NEXT: pushl %ebp +; X86-BMI1-NEXT: pushl %ebx +; X86-BMI1-NEXT: calll use64@PLT +; X86-BMI1-NEXT: addl $16, %esp +; X86-BMI1-NEXT: andl %ebx, %esi +; X86-BMI1-NEXT: andl %ebp, %edi +; X86-BMI1-NEXT: subl $8, %esp +; X86-BMI1-NEXT: pushl {{[0-9]+}}(%esp) +; X86-BMI1-NEXT: pushl {{[0-9]+}}(%esp) +; X86-BMI1-NEXT: calll use64@PLT +; X86-BMI1-NEXT: addl $16, %esp +; X86-BMI1-NEXT: movl %esi, %eax +; X86-BMI1-NEXT: movl %edi, %edx +; X86-BMI1-NEXT: addl $12, %esp +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: popl %edi +; X86-BMI1-NEXT: popl %ebx +; X86-BMI1-NEXT: popl %ebp +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bextr64_c5_skipextrauses: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: subl $12, %esp +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: shrdl %cl, %eax, %esi +; X86-BMI2-NEXT: shrxl %ecx, %eax, %edi +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB46_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %edi, %esi +; X86-BMI2-NEXT: xorl %edi, %edi +; X86-BMI2-NEXT: .LBB46_2: +; X86-BMI2-NEXT: movb $64, %al +; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: movl $-1, %ebp +; X86-BMI2-NEXT: shrxl %eax, %ebp, %ebx +; X86-BMI2-NEXT: testb $32, %al +; X86-BMI2-NEXT: je .LBB46_4 +; X86-BMI2-NEXT: # %bb.3: +; X86-BMI2-NEXT: movl %ebx, %ebp +; X86-BMI2-NEXT: xorl %ebx, %ebx +; X86-BMI2-NEXT: .LBB46_4: +; X86-BMI2-NEXT: subl $8, %esp +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: calll use64@PLT +; X86-BMI2-NEXT: addl $16, %esp +; X86-BMI2-NEXT: andl %ebp, %esi +; X86-BMI2-NEXT: andl %ebx, %edi +; X86-BMI2-NEXT: subl $8, %esp +; X86-BMI2-NEXT: pushl {{[0-9]+}}(%esp) +; X86-BMI2-NEXT: pushl {{[0-9]+}}(%esp) +; X86-BMI2-NEXT: calll use64@PLT +; X86-BMI2-NEXT: addl $16, %esp +; X86-BMI2-NEXT: movl %esi, %eax +; X86-BMI2-NEXT: movl %edi, %edx +; X86-BMI2-NEXT: addl $12, %esp +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr64_c5_skipextrauses: ; X64-NOBMI: # %bb.0: @@ -6077,51 +6077,51 @@ define i64 @bextr64_c5_skipextrauses(i64 %val, i64 %numskipbits, i64 %numlowbits ; X64-NOBMI-NEXT: popq %r15 ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bextr64_c5_skipextrauses: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: pushq %r15 -; X64-BMI1NOTBM-NEXT: pushq %r14 -; X64-BMI1NOTBM-NEXT: pushq %rbx -; X64-BMI1NOTBM-NEXT: movq %rsi, %r14 -; X64-BMI1NOTBM-NEXT: movq %rdi, %r15 -; X64-BMI1NOTBM-NEXT: movl %r14d, %ecx -; X64-BMI1NOTBM-NEXT: shrq %cl, %r15 -; X64-BMI1NOTBM-NEXT: negb %dl -; X64-BMI1NOTBM-NEXT: movq $-1, %rbx -; X64-BMI1NOTBM-NEXT: movl %edx, %ecx -; X64-BMI1NOTBM-NEXT: shrq %cl, %rbx -; X64-BMI1NOTBM-NEXT: movq %rbx, %rdi -; X64-BMI1NOTBM-NEXT: callq use64@PLT -; X64-BMI1NOTBM-NEXT: andq %r15, %rbx -; X64-BMI1NOTBM-NEXT: movq %r14, %rdi -; X64-BMI1NOTBM-NEXT: callq use64@PLT -; X64-BMI1NOTBM-NEXT: movq %rbx, %rax -; X64-BMI1NOTBM-NEXT: popq %rbx -; X64-BMI1NOTBM-NEXT: popq %r14 -; X64-BMI1NOTBM-NEXT: popq %r15 -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bextr64_c5_skipextrauses: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: pushq %r15 -; X64-BMI1BMI2-NEXT: pushq %r14 -; X64-BMI1BMI2-NEXT: pushq %rbx -; X64-BMI1BMI2-NEXT: movq %rdx, %rbx -; X64-BMI1BMI2-NEXT: movq %rsi, %r14 -; X64-BMI1BMI2-NEXT: shrxq %rsi, %rdi, %r15 -; X64-BMI1BMI2-NEXT: movl %ebx, %eax -; X64-BMI1BMI2-NEXT: negb %al -; X64-BMI1BMI2-NEXT: movq $-1, %rcx -; X64-BMI1BMI2-NEXT: shrxq %rax, %rcx, %rdi -; X64-BMI1BMI2-NEXT: callq use64@PLT -; X64-BMI1BMI2-NEXT: bzhiq %rbx, %r15, %rbx -; X64-BMI1BMI2-NEXT: movq %r14, %rdi -; X64-BMI1BMI2-NEXT: callq use64@PLT -; X64-BMI1BMI2-NEXT: movq %rbx, %rax -; X64-BMI1BMI2-NEXT: popq %rbx -; X64-BMI1BMI2-NEXT: popq %r14 -; X64-BMI1BMI2-NEXT: popq %r15 -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bextr64_c5_skipextrauses: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: pushq %r15 +; X64-BMI1-NEXT: pushq %r14 +; X64-BMI1-NEXT: pushq %rbx +; X64-BMI1-NEXT: movq %rsi, %r14 +; X64-BMI1-NEXT: movq %rdi, %r15 +; X64-BMI1-NEXT: movl %r14d, %ecx +; X64-BMI1-NEXT: shrq %cl, %r15 +; X64-BMI1-NEXT: negb %dl +; X64-BMI1-NEXT: movq $-1, %rbx +; X64-BMI1-NEXT: movl %edx, %ecx +; X64-BMI1-NEXT: shrq %cl, %rbx +; X64-BMI1-NEXT: movq %rbx, %rdi +; X64-BMI1-NEXT: callq use64@PLT +; X64-BMI1-NEXT: andq %r15, %rbx +; X64-BMI1-NEXT: movq %r14, %rdi +; X64-BMI1-NEXT: callq use64@PLT +; X64-BMI1-NEXT: movq %rbx, %rax +; X64-BMI1-NEXT: popq %rbx +; X64-BMI1-NEXT: popq %r14 +; X64-BMI1-NEXT: popq %r15 +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bextr64_c5_skipextrauses: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: pushq %r15 +; X64-BMI2-NEXT: pushq %r14 +; X64-BMI2-NEXT: pushq %rbx +; X64-BMI2-NEXT: movq %rdx, %rbx +; X64-BMI2-NEXT: movq %rsi, %r14 +; X64-BMI2-NEXT: shrxq %rsi, %rdi, %r15 +; X64-BMI2-NEXT: movl %ebx, %eax +; X64-BMI2-NEXT: negb %al +; X64-BMI2-NEXT: movq $-1, %rcx +; X64-BMI2-NEXT: shrxq %rax, %rcx, %rdi +; X64-BMI2-NEXT: callq use64@PLT +; X64-BMI2-NEXT: bzhiq %rbx, %r15, %rbx +; X64-BMI2-NEXT: movq %r14, %rdi +; X64-BMI2-NEXT: callq use64@PLT +; X64-BMI2-NEXT: movq %rbx, %rax +; X64-BMI2-NEXT: popq %rbx +; X64-BMI2-NEXT: popq %r14 +; X64-BMI2-NEXT: popq %r15 +; X64-BMI2-NEXT: retq %shifted = lshr i64 %val, %numskipbits %numhighbits = sub i64 64, %numlowbits %mask = lshr i64 -1, %numhighbits @@ -6162,54 +6162,54 @@ define i32 @bextr64_32_c0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind ; X86-NOBMI-NEXT: popl %esi ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bextr64_32_c0: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-BMI1NOTBM-NEXT: movl %esi, %edx -; X86-BMI1NOTBM-NEXT: shrl %cl, %edx -; X86-BMI1NOTBM-NEXT: shrdl %cl, %esi, %eax -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: jne .LBB47_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %eax, %edx -; X86-BMI1NOTBM-NEXT: .LBB47_2: -; X86-BMI1NOTBM-NEXT: movb $64, %cl -; X86-BMI1NOTBM-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl $-1, %eax -; X86-BMI1NOTBM-NEXT: shrl %cl, %eax -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: jne .LBB47_4 -; X86-BMI1NOTBM-NEXT: # %bb.3: -; X86-BMI1NOTBM-NEXT: movl $-1, %eax -; X86-BMI1NOTBM-NEXT: .LBB47_4: -; X86-BMI1NOTBM-NEXT: andl %edx, %eax -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bextr64_32_c0: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: shrdl %cl, %eax, %edx -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: je .LBB47_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: shrxl %ecx, %eax, %edx -; X86-BMI1BMI2-NEXT: .LBB47_2: -; X86-BMI1BMI2-NEXT: movb $64, %cl -; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: movl $-1, %eax -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: je .LBB47_4 -; X86-BMI1BMI2-NEXT: # %bb.3: -; X86-BMI1BMI2-NEXT: shrxl %ecx, %eax, %eax -; X86-BMI1BMI2-NEXT: .LBB47_4: -; X86-BMI1BMI2-NEXT: andl %edx, %eax -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bextr64_32_c0: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI1-NEXT: movl %esi, %edx +; X86-BMI1-NEXT: shrl %cl, %edx +; X86-BMI1-NEXT: shrdl %cl, %esi, %eax +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: jne .LBB47_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: movl %eax, %edx +; X86-BMI1-NEXT: .LBB47_2: +; X86-BMI1-NEXT: movb $64, %cl +; X86-BMI1-NEXT: subb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl $-1, %eax +; X86-BMI1-NEXT: shrl %cl, %eax +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: jne .LBB47_4 +; X86-BMI1-NEXT: # %bb.3: +; X86-BMI1-NEXT: movl $-1, %eax +; X86-BMI1-NEXT: .LBB47_4: +; X86-BMI1-NEXT: andl %edx, %eax +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bextr64_32_c0: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: shrdl %cl, %eax, %edx +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB47_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: shrxl %ecx, %eax, %edx +; X86-BMI2-NEXT: .LBB47_2: +; X86-BMI2-NEXT: movb $64, %cl +; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: movl $-1, %eax +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB47_4 +; X86-BMI2-NEXT: # %bb.3: +; X86-BMI2-NEXT: shrxl %ecx, %eax, %eax +; X86-BMI2-NEXT: .LBB47_4: +; X86-BMI2-NEXT: andl %edx, %eax +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr64_32_c0: ; X64-NOBMI: # %bb.0: @@ -6224,20 +6224,20 @@ define i32 @bextr64_32_c0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind ; X64-NOBMI-NEXT: # kill: def $eax killed $eax killed $rax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bextr64_32_c0: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %edx -; X64-BMI1NOTBM-NEXT: movzbl %sil, %eax -; X64-BMI1NOTBM-NEXT: orl %edx, %eax -; X64-BMI1NOTBM-NEXT: bextrq %rax, %rdi, %rax -; X64-BMI1NOTBM-NEXT: # kill: def $eax killed $eax killed $rax -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bextr64_32_c0: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: shrxq %rsi, %rdi, %rax -; X64-BMI1BMI2-NEXT: bzhil %edx, %eax, %eax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bextr64_32_c0: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %edx +; X64-BMI1-NEXT: movzbl %sil, %eax +; X64-BMI1-NEXT: orl %edx, %eax +; X64-BMI1-NEXT: bextrq %rax, %rdi, %rax +; X64-BMI1-NEXT: # kill: def $eax killed $eax killed $rax +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bextr64_32_c0: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: shrxq %rsi, %rdi, %rax +; X64-BMI2-NEXT: bzhil %edx, %eax, %eax +; X64-BMI2-NEXT: retq %shifted = lshr i64 %val, %numskipbits %numhighbits = sub i64 64, %numlowbits %mask = lshr i64 -1, %numhighbits @@ -6270,44 +6270,44 @@ define i32 @bextr64_32_c1(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind ; X86-NOBMI-NEXT: popl %esi ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bextr64_32_c1: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %edi -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-BMI1NOTBM-NEXT: movl %edi, %edx -; X86-BMI1NOTBM-NEXT: shrl %cl, %edx -; X86-BMI1NOTBM-NEXT: shrdl %cl, %edi, %esi -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: jne .LBB48_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %esi, %edx -; X86-BMI1NOTBM-NEXT: .LBB48_2: -; X86-BMI1NOTBM-NEXT: shll $8, %eax -; X86-BMI1NOTBM-NEXT: bextrl %eax, %edx, %eax -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: popl %edi -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bextr64_32_c1: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-BMI1BMI2-NEXT: shrdl %cl, %esi, %edx -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: je .LBB48_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: shrxl %ecx, %esi, %edx -; X86-BMI1BMI2-NEXT: .LBB48_2: -; X86-BMI1BMI2-NEXT: bzhil %eax, %edx, %eax -; X86-BMI1BMI2-NEXT: popl %esi -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bextr64_32_c1: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %edi +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-BMI1-NEXT: movl %edi, %edx +; X86-BMI1-NEXT: shrl %cl, %edx +; X86-BMI1-NEXT: shrdl %cl, %edi, %esi +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: jne .LBB48_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: movl %esi, %edx +; X86-BMI1-NEXT: .LBB48_2: +; X86-BMI1-NEXT: shll $8, %eax +; X86-BMI1-NEXT: bextrl %eax, %edx, %eax +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: popl %edi +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bextr64_32_c1: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: shrdl %cl, %esi, %edx +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB48_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: shrxl %ecx, %esi, %edx +; X86-BMI2-NEXT: .LBB48_2: +; X86-BMI2-NEXT: bzhil %eax, %edx, %eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr64_32_c1: ; X64-NOBMI: # %bb.0: @@ -6322,20 +6322,20 @@ define i32 @bextr64_32_c1(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind ; X64-NOBMI-NEXT: # kill: def $eax killed $eax killed $rax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bextr64_32_c1: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %edx -; X64-BMI1NOTBM-NEXT: movzbl %sil, %eax -; X64-BMI1NOTBM-NEXT: orl %edx, %eax -; X64-BMI1NOTBM-NEXT: bextrq %rax, %rdi, %rax -; X64-BMI1NOTBM-NEXT: # kill: def $eax killed $eax killed $rax -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bextr64_32_c1: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: shrxq %rsi, %rdi, %rax -; X64-BMI1BMI2-NEXT: bzhil %edx, %eax, %eax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bextr64_32_c1: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %edx +; X64-BMI1-NEXT: movzbl %sil, %eax +; X64-BMI1-NEXT: orl %edx, %eax +; X64-BMI1-NEXT: bextrq %rax, %rdi, %rax +; X64-BMI1-NEXT: # kill: def $eax killed $eax killed $rax +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bextr64_32_c1: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: shrxq %rsi, %rdi, %rax +; X64-BMI2-NEXT: bzhil %edx, %eax, %eax +; X64-BMI2-NEXT: retq %shifted = lshr i64 %val, %numskipbits %truncshifted = trunc i64 %shifted to i32 %numhighbits = sub i32 32, %numlowbits @@ -6369,44 +6369,44 @@ define i32 @bextr64_32_c2(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind ; X86-NOBMI-NEXT: popl %esi ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bextr64_32_c2: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %edi -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-BMI1NOTBM-NEXT: movl %edi, %edx -; X86-BMI1NOTBM-NEXT: shrl %cl, %edx -; X86-BMI1NOTBM-NEXT: shrdl %cl, %edi, %esi -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: jne .LBB49_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %esi, %edx -; X86-BMI1NOTBM-NEXT: .LBB49_2: -; X86-BMI1NOTBM-NEXT: shll $8, %eax -; X86-BMI1NOTBM-NEXT: bextrl %eax, %edx, %eax -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: popl %edi -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bextr64_32_c2: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-BMI1BMI2-NEXT: shrdl %cl, %esi, %edx -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: je .LBB49_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: shrxl %ecx, %esi, %edx -; X86-BMI1BMI2-NEXT: .LBB49_2: -; X86-BMI1BMI2-NEXT: bzhil %eax, %edx, %eax -; X86-BMI1BMI2-NEXT: popl %esi -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bextr64_32_c2: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %edi +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-BMI1-NEXT: movl %edi, %edx +; X86-BMI1-NEXT: shrl %cl, %edx +; X86-BMI1-NEXT: shrdl %cl, %edi, %esi +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: jne .LBB49_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: movl %esi, %edx +; X86-BMI1-NEXT: .LBB49_2: +; X86-BMI1-NEXT: shll $8, %eax +; X86-BMI1-NEXT: bextrl %eax, %edx, %eax +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: popl %edi +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bextr64_32_c2: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: shrdl %cl, %esi, %edx +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB49_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: shrxl %ecx, %esi, %edx +; X86-BMI2-NEXT: .LBB49_2: +; X86-BMI2-NEXT: bzhil %eax, %edx, %eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr64_32_c2: ; X64-NOBMI: # %bb.0: @@ -6421,20 +6421,20 @@ define i32 @bextr64_32_c2(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind ; X64-NOBMI-NEXT: # kill: def $eax killed $eax killed $rax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bextr64_32_c2: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %edx -; X64-BMI1NOTBM-NEXT: movzbl %sil, %eax -; X64-BMI1NOTBM-NEXT: orl %edx, %eax -; X64-BMI1NOTBM-NEXT: bextrq %rax, %rdi, %rax -; X64-BMI1NOTBM-NEXT: # kill: def $eax killed $eax killed $rax -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bextr64_32_c2: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: shrxq %rsi, %rdi, %rax -; X64-BMI1BMI2-NEXT: bzhil %edx, %eax, %eax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bextr64_32_c2: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %edx +; X64-BMI1-NEXT: movzbl %sil, %eax +; X64-BMI1-NEXT: orl %edx, %eax +; X64-BMI1-NEXT: bextrq %rax, %rdi, %rax +; X64-BMI1-NEXT: # kill: def $eax killed $eax killed $rax +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bextr64_32_c2: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: shrxq %rsi, %rdi, %rax +; X64-BMI2-NEXT: bzhil %edx, %eax, %eax +; X64-BMI2-NEXT: retq %shifted = lshr i64 %val, %numskipbits %numhighbits = sub i32 32, %numlowbits %mask = lshr i32 -1, %numhighbits @@ -6475,59 +6475,59 @@ define i32 @bextr64_32_c3(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind ; X86-NOBMI-NEXT: popl %esi ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bextr64_32_c3: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-BMI1NOTBM-NEXT: movl %esi, %edx -; X86-BMI1NOTBM-NEXT: shrl %cl, %edx -; X86-BMI1NOTBM-NEXT: shrdl %cl, %esi, %eax -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: jne .LBB50_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %eax, %edx -; X86-BMI1NOTBM-NEXT: .LBB50_2: -; X86-BMI1NOTBM-NEXT: movb $64, %cl -; X86-BMI1NOTBM-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: xorl %eax, %eax -; X86-BMI1NOTBM-NEXT: movl $-1, %esi -; X86-BMI1NOTBM-NEXT: shrdl %cl, %eax, %esi -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: jne .LBB50_4 -; X86-BMI1NOTBM-NEXT: # %bb.3: -; X86-BMI1NOTBM-NEXT: movl %esi, %eax -; X86-BMI1NOTBM-NEXT: .LBB50_4: -; X86-BMI1NOTBM-NEXT: andl %edx, %eax -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bextr64_32_c3: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: shrdl %cl, %eax, %edx -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: je .LBB50_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: shrxl %ecx, %eax, %edx -; X86-BMI1BMI2-NEXT: .LBB50_2: -; X86-BMI1BMI2-NEXT: movb $64, %cl -; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: xorl %eax, %eax -; X86-BMI1BMI2-NEXT: movl $-1, %esi -; X86-BMI1BMI2-NEXT: shrdl %cl, %eax, %esi -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: jne .LBB50_4 -; X86-BMI1BMI2-NEXT: # %bb.3: -; X86-BMI1BMI2-NEXT: movl %esi, %eax -; X86-BMI1BMI2-NEXT: .LBB50_4: -; X86-BMI1BMI2-NEXT: andl %edx, %eax -; X86-BMI1BMI2-NEXT: popl %esi -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bextr64_32_c3: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI1-NEXT: movl %esi, %edx +; X86-BMI1-NEXT: shrl %cl, %edx +; X86-BMI1-NEXT: shrdl %cl, %esi, %eax +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: jne .LBB50_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: movl %eax, %edx +; X86-BMI1-NEXT: .LBB50_2: +; X86-BMI1-NEXT: movb $64, %cl +; X86-BMI1-NEXT: subb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: xorl %eax, %eax +; X86-BMI1-NEXT: movl $-1, %esi +; X86-BMI1-NEXT: shrdl %cl, %eax, %esi +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: jne .LBB50_4 +; X86-BMI1-NEXT: # %bb.3: +; X86-BMI1-NEXT: movl %esi, %eax +; X86-BMI1-NEXT: .LBB50_4: +; X86-BMI1-NEXT: andl %edx, %eax +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bextr64_32_c3: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: shrdl %cl, %eax, %edx +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB50_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: shrxl %ecx, %eax, %edx +; X86-BMI2-NEXT: .LBB50_2: +; X86-BMI2-NEXT: movb $64, %cl +; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: xorl %eax, %eax +; X86-BMI2-NEXT: movl $-1, %esi +; X86-BMI2-NEXT: shrdl %cl, %eax, %esi +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: jne .LBB50_4 +; X86-BMI2-NEXT: # %bb.3: +; X86-BMI2-NEXT: movl %esi, %eax +; X86-BMI2-NEXT: .LBB50_4: +; X86-BMI2-NEXT: andl %edx, %eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr64_32_c3: ; X64-NOBMI: # %bb.0: @@ -6542,28 +6542,28 @@ define i32 @bextr64_32_c3(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind ; X64-NOBMI-NEXT: # kill: def $eax killed $eax killed $rax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bextr64_32_c3: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: movq %rsi, %rcx -; X64-BMI1NOTBM-NEXT: # kill: def $cl killed $cl killed $rcx -; X64-BMI1NOTBM-NEXT: shrq %cl, %rdi -; X64-BMI1NOTBM-NEXT: negb %dl -; X64-BMI1NOTBM-NEXT: movl $4294967295, %eax # imm = 0xFFFFFFFF -; X64-BMI1NOTBM-NEXT: movl %edx, %ecx -; X64-BMI1NOTBM-NEXT: shrq %cl, %rax -; X64-BMI1NOTBM-NEXT: andl %edi, %eax -; X64-BMI1NOTBM-NEXT: # kill: def $eax killed $eax killed $rax -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bextr64_32_c3: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: shrxq %rsi, %rdi, %rcx -; X64-BMI1BMI2-NEXT: negb %dl -; X64-BMI1BMI2-NEXT: movl $4294967295, %eax # imm = 0xFFFFFFFF -; X64-BMI1BMI2-NEXT: shrxq %rdx, %rax, %rax -; X64-BMI1BMI2-NEXT: andl %ecx, %eax -; X64-BMI1BMI2-NEXT: # kill: def $eax killed $eax killed $rax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bextr64_32_c3: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: movq %rsi, %rcx +; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $rcx +; X64-BMI1-NEXT: shrq %cl, %rdi +; X64-BMI1-NEXT: negb %dl +; X64-BMI1-NEXT: movl $4294967295, %eax # imm = 0xFFFFFFFF +; X64-BMI1-NEXT: movl %edx, %ecx +; X64-BMI1-NEXT: shrq %cl, %rax +; X64-BMI1-NEXT: andl %edi, %eax +; X64-BMI1-NEXT: # kill: def $eax killed $eax killed $rax +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bextr64_32_c3: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: shrxq %rsi, %rdi, %rcx +; X64-BMI2-NEXT: negb %dl +; X64-BMI2-NEXT: movl $4294967295, %eax # imm = 0xFFFFFFFF +; X64-BMI2-NEXT: shrxq %rdx, %rax, %rax +; X64-BMI2-NEXT: andl %ecx, %eax +; X64-BMI2-NEXT: # kill: def $eax killed $eax killed $rax +; X64-BMI2-NEXT: retq %shifted = lshr i64 %val, %numskipbits %numhighbits = sub i64 64, %numlowbits %mask = lshr i64 4294967295, %numhighbits @@ -6589,22 +6589,22 @@ define i32 @bextr32_d0(i32 %val, i32 %numskipbits, i32 %numlowbits) nounwind { ; X86-NOBMI-NEXT: shrl %cl, %eax ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bextr32_d0: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1NOTBM-NEXT: shll $8, %eax -; X86-BMI1NOTBM-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; X86-BMI1NOTBM-NEXT: orl %eax, %ecx -; X86-BMI1NOTBM-NEXT: bextrl %ecx, {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bextr32_d0: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: shrxl %ecx, {{[0-9]+}}(%esp), %ecx -; X86-BMI1BMI2-NEXT: bzhil %eax, %ecx, %eax -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bextr32_d0: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI1-NEXT: shll $8, %eax +; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI1-NEXT: orl %eax, %ecx +; X86-BMI1-NEXT: bextrl %ecx, {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bextr32_d0: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: shrxl %ecx, {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: bzhil %eax, %ecx, %eax +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr32_d0: ; X64-NOBMI: # %bb.0: @@ -6618,19 +6618,19 @@ define i32 @bextr32_d0(i32 %val, i32 %numskipbits, i32 %numlowbits) nounwind { ; X64-NOBMI-NEXT: shrl %cl, %eax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bextr32_d0: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %edx -; X64-BMI1NOTBM-NEXT: movzbl %sil, %eax -; X64-BMI1NOTBM-NEXT: orl %edx, %eax -; X64-BMI1NOTBM-NEXT: bextrl %eax, %edi, %eax -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bextr32_d0: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: shrxl %esi, %edi, %eax -; X64-BMI1BMI2-NEXT: bzhil %edx, %eax, %eax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bextr32_d0: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %edx +; X64-BMI1-NEXT: movzbl %sil, %eax +; X64-BMI1-NEXT: orl %edx, %eax +; X64-BMI1-NEXT: bextrl %eax, %edi, %eax +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bextr32_d0: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: shrxl %esi, %edi, %eax +; X64-BMI2-NEXT: bzhil %edx, %eax, %eax +; X64-BMI2-NEXT: retq %shifted = lshr i32 %val, %numskipbits %numhighbits = sub i32 32, %numlowbits %highbitscleared = shl i32 %shifted, %numhighbits @@ -6651,22 +6651,22 @@ define i32 @bextr32_d1_indexzext(i32 %val, i8 %numskipbits, i8 %numlowbits) noun ; X86-NOBMI-NEXT: shrl %cl, %eax ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bextr32_d1_indexzext: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1NOTBM-NEXT: shll $8, %eax -; X86-BMI1NOTBM-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; X86-BMI1NOTBM-NEXT: orl %eax, %ecx -; X86-BMI1NOTBM-NEXT: bextrl %ecx, {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bextr32_d1_indexzext: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: shrxl %ecx, {{[0-9]+}}(%esp), %ecx -; X86-BMI1BMI2-NEXT: bzhil %eax, %ecx, %eax -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bextr32_d1_indexzext: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI1-NEXT: shll $8, %eax +; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI1-NEXT: orl %eax, %ecx +; X86-BMI1-NEXT: bextrl %ecx, {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bextr32_d1_indexzext: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: shrxl %ecx, {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: bzhil %eax, %ecx, %eax +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr32_d1_indexzext: ; X64-NOBMI: # %bb.0: @@ -6680,19 +6680,19 @@ define i32 @bextr32_d1_indexzext(i32 %val, i8 %numskipbits, i8 %numlowbits) noun ; X64-NOBMI-NEXT: shrl %cl, %eax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bextr32_d1_indexzext: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %edx -; X64-BMI1NOTBM-NEXT: movzbl %sil, %eax -; X64-BMI1NOTBM-NEXT: orl %edx, %eax -; X64-BMI1NOTBM-NEXT: bextrl %eax, %edi, %eax -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bextr32_d1_indexzext: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: shrxl %esi, %edi, %eax -; X64-BMI1BMI2-NEXT: bzhil %edx, %eax, %eax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bextr32_d1_indexzext: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %edx +; X64-BMI1-NEXT: movzbl %sil, %eax +; X64-BMI1-NEXT: orl %edx, %eax +; X64-BMI1-NEXT: bextrl %eax, %edi, %eax +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bextr32_d1_indexzext: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: shrxl %esi, %edi, %eax +; X64-BMI2-NEXT: bzhil %edx, %eax, %eax +; X64-BMI2-NEXT: retq %skip = zext i8 %numskipbits to i32 %shifted = lshr i32 %val, %skip %numhighbits = sub i8 32, %numlowbits @@ -6716,24 +6716,24 @@ define i32 @bextr32_d2_load(i32* %w, i32 %numskipbits, i32 %numlowbits) nounwind ; X86-NOBMI-NEXT: shrl %cl, %eax ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bextr32_d2_load: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: shll $8, %ecx -; X86-BMI1NOTBM-NEXT: movzbl {{[0-9]+}}(%esp), %edx -; X86-BMI1NOTBM-NEXT: orl %ecx, %edx -; X86-BMI1NOTBM-NEXT: bextrl %edx, (%eax), %eax -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bextr32_d2_load: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %dl -; X86-BMI1BMI2-NEXT: shrxl %edx, (%ecx), %ecx -; X86-BMI1BMI2-NEXT: bzhil %eax, %ecx, %eax -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bextr32_d2_load: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: shll $8, %ecx +; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %edx +; X86-BMI1-NEXT: orl %ecx, %edx +; X86-BMI1-NEXT: bextrl %edx, (%eax), %eax +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bextr32_d2_load: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %dl +; X86-BMI2-NEXT: shrxl %edx, (%ecx), %ecx +; X86-BMI2-NEXT: bzhil %eax, %ecx, %eax +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr32_d2_load: ; X64-NOBMI: # %bb.0: @@ -6747,19 +6747,19 @@ define i32 @bextr32_d2_load(i32* %w, i32 %numskipbits, i32 %numlowbits) nounwind ; X64-NOBMI-NEXT: shrl %cl, %eax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bextr32_d2_load: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %edx -; X64-BMI1NOTBM-NEXT: movzbl %sil, %eax -; X64-BMI1NOTBM-NEXT: orl %edx, %eax -; X64-BMI1NOTBM-NEXT: bextrl %eax, (%rdi), %eax -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bextr32_d2_load: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: shrxl %esi, (%rdi), %eax -; X64-BMI1BMI2-NEXT: bzhil %edx, %eax, %eax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bextr32_d2_load: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %edx +; X64-BMI1-NEXT: movzbl %sil, %eax +; X64-BMI1-NEXT: orl %edx, %eax +; X64-BMI1-NEXT: bextrl %eax, (%rdi), %eax +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bextr32_d2_load: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: shrxl %esi, (%rdi), %eax +; X64-BMI2-NEXT: bzhil %edx, %eax, %eax +; X64-BMI2-NEXT: retq %val = load i32, i32* %w %shifted = lshr i32 %val, %numskipbits %numhighbits = sub i32 32, %numlowbits @@ -6782,24 +6782,24 @@ define i32 @bextr32_d3_load_indexzext(i32* %w, i8 %numskipbits, i8 %numlowbits) ; X86-NOBMI-NEXT: shrl %cl, %eax ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bextr32_d3_load_indexzext: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: shll $8, %ecx -; X86-BMI1NOTBM-NEXT: movzbl {{[0-9]+}}(%esp), %edx -; X86-BMI1NOTBM-NEXT: orl %ecx, %edx -; X86-BMI1NOTBM-NEXT: bextrl %edx, (%eax), %eax -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bextr32_d3_load_indexzext: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %dl -; X86-BMI1BMI2-NEXT: shrxl %edx, (%ecx), %ecx -; X86-BMI1BMI2-NEXT: bzhil %eax, %ecx, %eax -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bextr32_d3_load_indexzext: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: shll $8, %ecx +; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %edx +; X86-BMI1-NEXT: orl %ecx, %edx +; X86-BMI1-NEXT: bextrl %edx, (%eax), %eax +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bextr32_d3_load_indexzext: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %dl +; X86-BMI2-NEXT: shrxl %edx, (%ecx), %ecx +; X86-BMI2-NEXT: bzhil %eax, %ecx, %eax +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr32_d3_load_indexzext: ; X64-NOBMI: # %bb.0: @@ -6813,19 +6813,19 @@ define i32 @bextr32_d3_load_indexzext(i32* %w, i8 %numskipbits, i8 %numlowbits) ; X64-NOBMI-NEXT: shrl %cl, %eax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bextr32_d3_load_indexzext: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %edx -; X64-BMI1NOTBM-NEXT: movzbl %sil, %eax -; X64-BMI1NOTBM-NEXT: orl %edx, %eax -; X64-BMI1NOTBM-NEXT: bextrl %eax, (%rdi), %eax -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bextr32_d3_load_indexzext: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: shrxl %esi, (%rdi), %eax -; X64-BMI1BMI2-NEXT: bzhil %edx, %eax, %eax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bextr32_d3_load_indexzext: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %edx +; X64-BMI1-NEXT: movzbl %sil, %eax +; X64-BMI1-NEXT: orl %edx, %eax +; X64-BMI1-NEXT: bextrl %eax, (%rdi), %eax +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bextr32_d3_load_indexzext: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: shrxl %esi, (%rdi), %eax +; X64-BMI2-NEXT: bzhil %edx, %eax, %eax +; X64-BMI2-NEXT: retq %val = load i32, i32* %w %skip = zext i8 %numskipbits to i32 %shifted = lshr i32 %val, %skip @@ -6857,37 +6857,37 @@ define i32 @bextr32_d5_skipextrauses(i32 %val, i32 %numskipbits, i32 %numlowbits ; X86-NOBMI-NEXT: popl %esi ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bextr32_d5_skipextrauses: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: subl $8, %esp -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: shll $8, %ecx -; X86-BMI1NOTBM-NEXT: movzbl %al, %edx -; X86-BMI1NOTBM-NEXT: orl %ecx, %edx -; X86-BMI1NOTBM-NEXT: bextrl %edx, {{[0-9]+}}(%esp), %esi -; X86-BMI1NOTBM-NEXT: movl %eax, (%esp) -; X86-BMI1NOTBM-NEXT: calll use32@PLT -; X86-BMI1NOTBM-NEXT: movl %esi, %eax -; X86-BMI1NOTBM-NEXT: addl $8, %esp -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bextr32_d5_skipextrauses: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: subl $8, %esp -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-BMI1BMI2-NEXT: shrxl %ecx, {{[0-9]+}}(%esp), %edx -; X86-BMI1BMI2-NEXT: bzhil %eax, %edx, %esi -; X86-BMI1BMI2-NEXT: movl %ecx, (%esp) -; X86-BMI1BMI2-NEXT: calll use32@PLT -; X86-BMI1BMI2-NEXT: movl %esi, %eax -; X86-BMI1BMI2-NEXT: addl $8, %esp -; X86-BMI1BMI2-NEXT: popl %esi -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bextr32_d5_skipextrauses: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: subl $8, %esp +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: shll $8, %ecx +; X86-BMI1-NEXT: movzbl %al, %edx +; X86-BMI1-NEXT: orl %ecx, %edx +; X86-BMI1-NEXT: bextrl %edx, {{[0-9]+}}(%esp), %esi +; X86-BMI1-NEXT: movl %eax, (%esp) +; X86-BMI1-NEXT: calll use32@PLT +; X86-BMI1-NEXT: movl %esi, %eax +; X86-BMI1-NEXT: addl $8, %esp +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bextr32_d5_skipextrauses: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: subl $8, %esp +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: shrxl %ecx, {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: bzhil %eax, %edx, %esi +; X86-BMI2-NEXT: movl %ecx, (%esp) +; X86-BMI2-NEXT: calll use32@PLT +; X86-BMI2-NEXT: movl %esi, %eax +; X86-BMI2-NEXT: addl $8, %esp +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr32_d5_skipextrauses: ; X64-NOBMI: # %bb.0: @@ -6905,29 +6905,29 @@ define i32 @bextr32_d5_skipextrauses(i32 %val, i32 %numskipbits, i32 %numlowbits ; X64-NOBMI-NEXT: popq %rbx ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bextr32_d5_skipextrauses: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: pushq %rbx -; X64-BMI1NOTBM-NEXT: shll $8, %edx -; X64-BMI1NOTBM-NEXT: movzbl %sil, %eax -; X64-BMI1NOTBM-NEXT: orl %edx, %eax -; X64-BMI1NOTBM-NEXT: bextrl %eax, %edi, %ebx -; X64-BMI1NOTBM-NEXT: movl %esi, %edi -; X64-BMI1NOTBM-NEXT: callq use32@PLT -; X64-BMI1NOTBM-NEXT: movl %ebx, %eax -; X64-BMI1NOTBM-NEXT: popq %rbx -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bextr32_d5_skipextrauses: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: pushq %rbx -; X64-BMI1BMI2-NEXT: shrxl %esi, %edi, %eax -; X64-BMI1BMI2-NEXT: bzhil %edx, %eax, %ebx -; X64-BMI1BMI2-NEXT: movl %esi, %edi -; X64-BMI1BMI2-NEXT: callq use32@PLT -; X64-BMI1BMI2-NEXT: movl %ebx, %eax -; X64-BMI1BMI2-NEXT: popq %rbx -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bextr32_d5_skipextrauses: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: pushq %rbx +; X64-BMI1-NEXT: shll $8, %edx +; X64-BMI1-NEXT: movzbl %sil, %eax +; X64-BMI1-NEXT: orl %edx, %eax +; X64-BMI1-NEXT: bextrl %eax, %edi, %ebx +; X64-BMI1-NEXT: movl %esi, %edi +; X64-BMI1-NEXT: callq use32@PLT +; X64-BMI1-NEXT: movl %ebx, %eax +; X64-BMI1-NEXT: popq %rbx +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bextr32_d5_skipextrauses: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: pushq %rbx +; X64-BMI2-NEXT: shrxl %esi, %edi, %eax +; X64-BMI2-NEXT: bzhil %edx, %eax, %ebx +; X64-BMI2-NEXT: movl %esi, %edi +; X64-BMI2-NEXT: callq use32@PLT +; X64-BMI2-NEXT: movl %ebx, %eax +; X64-BMI2-NEXT: popq %rbx +; X64-BMI2-NEXT: retq %shifted = lshr i32 %val, %numskipbits %numhighbits = sub i32 32, %numlowbits %highbitscleared = shl i32 %shifted, %numhighbits @@ -6987,94 +6987,94 @@ define i64 @bextr64_d0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind { ; X86-NOBMI-NEXT: popl %ebx ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bextr64_d0: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %ebx -; X86-BMI1NOTBM-NEXT: pushl %edi -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-BMI1NOTBM-NEXT: movl %edx, %eax -; X86-BMI1NOTBM-NEXT: shrl %cl, %eax -; X86-BMI1NOTBM-NEXT: shrdl %cl, %edx, %edi -; X86-BMI1NOTBM-NEXT: xorl %esi, %esi -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: je .LBB56_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %eax, %edi -; X86-BMI1NOTBM-NEXT: xorl %eax, %eax -; X86-BMI1NOTBM-NEXT: .LBB56_2: -; X86-BMI1NOTBM-NEXT: movb $64, %cl -; X86-BMI1NOTBM-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: shldl %cl, %edi, %eax -; X86-BMI1NOTBM-NEXT: shll %cl, %edi -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: movl %edi, %ebx -; X86-BMI1NOTBM-NEXT: jne .LBB56_4 -; X86-BMI1NOTBM-NEXT: # %bb.3: -; X86-BMI1NOTBM-NEXT: movl %eax, %ebx -; X86-BMI1NOTBM-NEXT: .LBB56_4: -; X86-BMI1NOTBM-NEXT: movl %ebx, %eax -; X86-BMI1NOTBM-NEXT: shrl %cl, %eax -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: movl $0, %edx -; X86-BMI1NOTBM-NEXT: jne .LBB56_6 -; X86-BMI1NOTBM-NEXT: # %bb.5: -; X86-BMI1NOTBM-NEXT: movl %edi, %esi -; X86-BMI1NOTBM-NEXT: movl %eax, %edx -; X86-BMI1NOTBM-NEXT: .LBB56_6: -; X86-BMI1NOTBM-NEXT: shrdl %cl, %ebx, %esi -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: jne .LBB56_8 -; X86-BMI1NOTBM-NEXT: # %bb.7: -; X86-BMI1NOTBM-NEXT: movl %esi, %eax -; X86-BMI1NOTBM-NEXT: .LBB56_8: -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: popl %edi -; X86-BMI1NOTBM-NEXT: popl %ebx -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bextr64_d0: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %edi -; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-BMI1BMI2-NEXT: shrdl %cl, %edx, %eax -; X86-BMI1BMI2-NEXT: shrxl %ecx, %edx, %esi -; X86-BMI1BMI2-NEXT: xorl %edx, %edx -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: je .LBB56_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: movl %esi, %eax -; X86-BMI1BMI2-NEXT: xorl %esi, %esi -; X86-BMI1BMI2-NEXT: .LBB56_2: -; X86-BMI1BMI2-NEXT: movb $64, %cl -; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: shldl %cl, %eax, %esi -; X86-BMI1BMI2-NEXT: shlxl %ecx, %eax, %edi -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: je .LBB56_4 -; X86-BMI1BMI2-NEXT: # %bb.3: -; X86-BMI1BMI2-NEXT: movl %edi, %esi -; X86-BMI1BMI2-NEXT: movl $0, %edi -; X86-BMI1BMI2-NEXT: .LBB56_4: -; X86-BMI1BMI2-NEXT: shrxl %ecx, %esi, %eax -; X86-BMI1BMI2-NEXT: jne .LBB56_6 -; X86-BMI1BMI2-NEXT: # %bb.5: -; X86-BMI1BMI2-NEXT: movl %eax, %edx -; X86-BMI1BMI2-NEXT: .LBB56_6: -; X86-BMI1BMI2-NEXT: shrdl %cl, %esi, %edi -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: jne .LBB56_8 -; X86-BMI1BMI2-NEXT: # %bb.7: -; X86-BMI1BMI2-NEXT: movl %edi, %eax -; X86-BMI1BMI2-NEXT: .LBB56_8: -; X86-BMI1BMI2-NEXT: popl %esi -; X86-BMI1BMI2-NEXT: popl %edi -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bextr64_d0: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %ebx +; X86-BMI1-NEXT: pushl %edi +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI1-NEXT: movl %edx, %eax +; X86-BMI1-NEXT: shrl %cl, %eax +; X86-BMI1-NEXT: shrdl %cl, %edx, %edi +; X86-BMI1-NEXT: xorl %esi, %esi +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: je .LBB56_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: movl %eax, %edi +; X86-BMI1-NEXT: xorl %eax, %eax +; X86-BMI1-NEXT: .LBB56_2: +; X86-BMI1-NEXT: movb $64, %cl +; X86-BMI1-NEXT: subb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: shldl %cl, %edi, %eax +; X86-BMI1-NEXT: shll %cl, %edi +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: movl %edi, %ebx +; X86-BMI1-NEXT: jne .LBB56_4 +; X86-BMI1-NEXT: # %bb.3: +; X86-BMI1-NEXT: movl %eax, %ebx +; X86-BMI1-NEXT: .LBB56_4: +; X86-BMI1-NEXT: movl %ebx, %eax +; X86-BMI1-NEXT: shrl %cl, %eax +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: movl $0, %edx +; X86-BMI1-NEXT: jne .LBB56_6 +; X86-BMI1-NEXT: # %bb.5: +; X86-BMI1-NEXT: movl %edi, %esi +; X86-BMI1-NEXT: movl %eax, %edx +; X86-BMI1-NEXT: .LBB56_6: +; X86-BMI1-NEXT: shrdl %cl, %ebx, %esi +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: jne .LBB56_8 +; X86-BMI1-NEXT: # %bb.7: +; X86-BMI1-NEXT: movl %esi, %eax +; X86-BMI1-NEXT: .LBB56_8: +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: popl %edi +; X86-BMI1-NEXT: popl %ebx +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bextr64_d0: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: shrdl %cl, %edx, %eax +; X86-BMI2-NEXT: shrxl %ecx, %edx, %esi +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB56_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %esi, %eax +; X86-BMI2-NEXT: xorl %esi, %esi +; X86-BMI2-NEXT: .LBB56_2: +; X86-BMI2-NEXT: movb $64, %cl +; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: shldl %cl, %eax, %esi +; X86-BMI2-NEXT: shlxl %ecx, %eax, %edi +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB56_4 +; X86-BMI2-NEXT: # %bb.3: +; X86-BMI2-NEXT: movl %edi, %esi +; X86-BMI2-NEXT: movl $0, %edi +; X86-BMI2-NEXT: .LBB56_4: +; X86-BMI2-NEXT: shrxl %ecx, %esi, %eax +; X86-BMI2-NEXT: jne .LBB56_6 +; X86-BMI2-NEXT: # %bb.5: +; X86-BMI2-NEXT: movl %eax, %edx +; X86-BMI2-NEXT: .LBB56_6: +; X86-BMI2-NEXT: shrdl %cl, %esi, %edi +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: jne .LBB56_8 +; X86-BMI2-NEXT: # %bb.7: +; X86-BMI2-NEXT: movl %edi, %eax +; X86-BMI2-NEXT: .LBB56_8: +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr64_d0: ; X64-NOBMI: # %bb.0: @@ -7088,19 +7088,19 @@ define i64 @bextr64_d0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind { ; X64-NOBMI-NEXT: shrq %cl, %rax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bextr64_d0: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %edx -; X64-BMI1NOTBM-NEXT: movzbl %sil, %eax -; X64-BMI1NOTBM-NEXT: orl %edx, %eax -; X64-BMI1NOTBM-NEXT: bextrq %rax, %rdi, %rax -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bextr64_d0: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: shrxq %rsi, %rdi, %rax -; X64-BMI1BMI2-NEXT: bzhiq %rdx, %rax, %rax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bextr64_d0: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %edx +; X64-BMI1-NEXT: movzbl %sil, %eax +; X64-BMI1-NEXT: orl %edx, %eax +; X64-BMI1-NEXT: bextrq %rax, %rdi, %rax +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bextr64_d0: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: shrxq %rsi, %rdi, %rax +; X64-BMI2-NEXT: bzhiq %rdx, %rax, %rax +; X64-BMI2-NEXT: retq %shifted = lshr i64 %val, %numskipbits %numhighbits = sub i64 64, %numlowbits %highbitscleared = shl i64 %shifted, %numhighbits @@ -7157,94 +7157,94 @@ define i64 @bextr64_d1_indexzext(i64 %val, i8 %numskipbits, i8 %numlowbits) noun ; X86-NOBMI-NEXT: popl %ebx ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bextr64_d1_indexzext: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %ebx -; X86-BMI1NOTBM-NEXT: pushl %edi -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-BMI1NOTBM-NEXT: movl %edx, %eax -; X86-BMI1NOTBM-NEXT: shrl %cl, %eax -; X86-BMI1NOTBM-NEXT: shrdl %cl, %edx, %edi -; X86-BMI1NOTBM-NEXT: xorl %esi, %esi -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: je .LBB57_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %eax, %edi -; X86-BMI1NOTBM-NEXT: xorl %eax, %eax -; X86-BMI1NOTBM-NEXT: .LBB57_2: -; X86-BMI1NOTBM-NEXT: movb $64, %cl -; X86-BMI1NOTBM-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: shldl %cl, %edi, %eax -; X86-BMI1NOTBM-NEXT: shll %cl, %edi -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: movl %edi, %ebx -; X86-BMI1NOTBM-NEXT: jne .LBB57_4 -; X86-BMI1NOTBM-NEXT: # %bb.3: -; X86-BMI1NOTBM-NEXT: movl %eax, %ebx -; X86-BMI1NOTBM-NEXT: .LBB57_4: -; X86-BMI1NOTBM-NEXT: movl %ebx, %eax -; X86-BMI1NOTBM-NEXT: shrl %cl, %eax -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: movl $0, %edx -; X86-BMI1NOTBM-NEXT: jne .LBB57_6 -; X86-BMI1NOTBM-NEXT: # %bb.5: -; X86-BMI1NOTBM-NEXT: movl %edi, %esi -; X86-BMI1NOTBM-NEXT: movl %eax, %edx -; X86-BMI1NOTBM-NEXT: .LBB57_6: -; X86-BMI1NOTBM-NEXT: shrdl %cl, %ebx, %esi -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: jne .LBB57_8 -; X86-BMI1NOTBM-NEXT: # %bb.7: -; X86-BMI1NOTBM-NEXT: movl %esi, %eax -; X86-BMI1NOTBM-NEXT: .LBB57_8: -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: popl %edi -; X86-BMI1NOTBM-NEXT: popl %ebx -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bextr64_d1_indexzext: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %edi -; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-BMI1BMI2-NEXT: shrdl %cl, %edx, %eax -; X86-BMI1BMI2-NEXT: shrxl %ecx, %edx, %esi -; X86-BMI1BMI2-NEXT: xorl %edx, %edx -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: je .LBB57_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: movl %esi, %eax -; X86-BMI1BMI2-NEXT: xorl %esi, %esi -; X86-BMI1BMI2-NEXT: .LBB57_2: -; X86-BMI1BMI2-NEXT: movb $64, %cl -; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: shldl %cl, %eax, %esi -; X86-BMI1BMI2-NEXT: shlxl %ecx, %eax, %edi -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: je .LBB57_4 -; X86-BMI1BMI2-NEXT: # %bb.3: -; X86-BMI1BMI2-NEXT: movl %edi, %esi -; X86-BMI1BMI2-NEXT: movl $0, %edi -; X86-BMI1BMI2-NEXT: .LBB57_4: -; X86-BMI1BMI2-NEXT: shrxl %ecx, %esi, %eax -; X86-BMI1BMI2-NEXT: jne .LBB57_6 -; X86-BMI1BMI2-NEXT: # %bb.5: -; X86-BMI1BMI2-NEXT: movl %eax, %edx -; X86-BMI1BMI2-NEXT: .LBB57_6: -; X86-BMI1BMI2-NEXT: shrdl %cl, %esi, %edi -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: jne .LBB57_8 -; X86-BMI1BMI2-NEXT: # %bb.7: -; X86-BMI1BMI2-NEXT: movl %edi, %eax -; X86-BMI1BMI2-NEXT: .LBB57_8: -; X86-BMI1BMI2-NEXT: popl %esi -; X86-BMI1BMI2-NEXT: popl %edi -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bextr64_d1_indexzext: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %ebx +; X86-BMI1-NEXT: pushl %edi +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI1-NEXT: movl %edx, %eax +; X86-BMI1-NEXT: shrl %cl, %eax +; X86-BMI1-NEXT: shrdl %cl, %edx, %edi +; X86-BMI1-NEXT: xorl %esi, %esi +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: je .LBB57_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: movl %eax, %edi +; X86-BMI1-NEXT: xorl %eax, %eax +; X86-BMI1-NEXT: .LBB57_2: +; X86-BMI1-NEXT: movb $64, %cl +; X86-BMI1-NEXT: subb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: shldl %cl, %edi, %eax +; X86-BMI1-NEXT: shll %cl, %edi +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: movl %edi, %ebx +; X86-BMI1-NEXT: jne .LBB57_4 +; X86-BMI1-NEXT: # %bb.3: +; X86-BMI1-NEXT: movl %eax, %ebx +; X86-BMI1-NEXT: .LBB57_4: +; X86-BMI1-NEXT: movl %ebx, %eax +; X86-BMI1-NEXT: shrl %cl, %eax +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: movl $0, %edx +; X86-BMI1-NEXT: jne .LBB57_6 +; X86-BMI1-NEXT: # %bb.5: +; X86-BMI1-NEXT: movl %edi, %esi +; X86-BMI1-NEXT: movl %eax, %edx +; X86-BMI1-NEXT: .LBB57_6: +; X86-BMI1-NEXT: shrdl %cl, %ebx, %esi +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: jne .LBB57_8 +; X86-BMI1-NEXT: # %bb.7: +; X86-BMI1-NEXT: movl %esi, %eax +; X86-BMI1-NEXT: .LBB57_8: +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: popl %edi +; X86-BMI1-NEXT: popl %ebx +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bextr64_d1_indexzext: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: shrdl %cl, %edx, %eax +; X86-BMI2-NEXT: shrxl %ecx, %edx, %esi +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB57_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %esi, %eax +; X86-BMI2-NEXT: xorl %esi, %esi +; X86-BMI2-NEXT: .LBB57_2: +; X86-BMI2-NEXT: movb $64, %cl +; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: shldl %cl, %eax, %esi +; X86-BMI2-NEXT: shlxl %ecx, %eax, %edi +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB57_4 +; X86-BMI2-NEXT: # %bb.3: +; X86-BMI2-NEXT: movl %edi, %esi +; X86-BMI2-NEXT: movl $0, %edi +; X86-BMI2-NEXT: .LBB57_4: +; X86-BMI2-NEXT: shrxl %ecx, %esi, %eax +; X86-BMI2-NEXT: jne .LBB57_6 +; X86-BMI2-NEXT: # %bb.5: +; X86-BMI2-NEXT: movl %eax, %edx +; X86-BMI2-NEXT: .LBB57_6: +; X86-BMI2-NEXT: shrdl %cl, %esi, %edi +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: jne .LBB57_8 +; X86-BMI2-NEXT: # %bb.7: +; X86-BMI2-NEXT: movl %edi, %eax +; X86-BMI2-NEXT: .LBB57_8: +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr64_d1_indexzext: ; X64-NOBMI: # %bb.0: @@ -7258,21 +7258,21 @@ define i64 @bextr64_d1_indexzext(i64 %val, i8 %numskipbits, i8 %numlowbits) noun ; X64-NOBMI-NEXT: shrq %cl, %rax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bextr64_d1_indexzext: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %edx -; X64-BMI1NOTBM-NEXT: movzbl %sil, %eax -; X64-BMI1NOTBM-NEXT: orl %edx, %eax -; X64-BMI1NOTBM-NEXT: bextrq %rax, %rdi, %rax -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bextr64_d1_indexzext: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: # kill: def $edx killed $edx def $rdx -; X64-BMI1BMI2-NEXT: # kill: def $esi killed $esi def $rsi -; X64-BMI1BMI2-NEXT: shrxq %rsi, %rdi, %rax -; X64-BMI1BMI2-NEXT: bzhiq %rdx, %rax, %rax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bextr64_d1_indexzext: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %edx +; X64-BMI1-NEXT: movzbl %sil, %eax +; X64-BMI1-NEXT: orl %edx, %eax +; X64-BMI1-NEXT: bextrq %rax, %rdi, %rax +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bextr64_d1_indexzext: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: # kill: def $edx killed $edx def $rdx +; X64-BMI2-NEXT: # kill: def $esi killed $esi def $rsi +; X64-BMI2-NEXT: shrxq %rsi, %rdi, %rax +; X64-BMI2-NEXT: bzhiq %rdx, %rax, %rax +; X64-BMI2-NEXT: retq %skip = zext i8 %numskipbits to i64 %shifted = lshr i64 %val, %skip %numhighbits = sub i8 64, %numlowbits @@ -7332,96 +7332,96 @@ define i64 @bextr64_d2_load(i64* %w, i64 %numskipbits, i64 %numlowbits) nounwind ; X86-NOBMI-NEXT: popl %ebx ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bextr64_d2_load: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %ebx -; X86-BMI1NOTBM-NEXT: pushl %edi -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: movl (%eax), %edi -; X86-BMI1NOTBM-NEXT: movl 4(%eax), %edx -; X86-BMI1NOTBM-NEXT: movl %edx, %eax -; X86-BMI1NOTBM-NEXT: shrl %cl, %eax -; X86-BMI1NOTBM-NEXT: shrdl %cl, %edx, %edi -; X86-BMI1NOTBM-NEXT: xorl %esi, %esi -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: je .LBB58_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %eax, %edi -; X86-BMI1NOTBM-NEXT: xorl %eax, %eax -; X86-BMI1NOTBM-NEXT: .LBB58_2: -; X86-BMI1NOTBM-NEXT: movb $64, %cl -; X86-BMI1NOTBM-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: shldl %cl, %edi, %eax -; X86-BMI1NOTBM-NEXT: shll %cl, %edi -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: movl %edi, %ebx -; X86-BMI1NOTBM-NEXT: jne .LBB58_4 -; X86-BMI1NOTBM-NEXT: # %bb.3: -; X86-BMI1NOTBM-NEXT: movl %eax, %ebx -; X86-BMI1NOTBM-NEXT: .LBB58_4: -; X86-BMI1NOTBM-NEXT: movl %ebx, %eax -; X86-BMI1NOTBM-NEXT: shrl %cl, %eax -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: movl $0, %edx -; X86-BMI1NOTBM-NEXT: jne .LBB58_6 -; X86-BMI1NOTBM-NEXT: # %bb.5: -; X86-BMI1NOTBM-NEXT: movl %edi, %esi -; X86-BMI1NOTBM-NEXT: movl %eax, %edx -; X86-BMI1NOTBM-NEXT: .LBB58_6: -; X86-BMI1NOTBM-NEXT: shrdl %cl, %ebx, %esi -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: jne .LBB58_8 -; X86-BMI1NOTBM-NEXT: # %bb.7: -; X86-BMI1NOTBM-NEXT: movl %esi, %eax -; X86-BMI1NOTBM-NEXT: .LBB58_8: -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: popl %edi -; X86-BMI1NOTBM-NEXT: popl %ebx -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bextr64_d2_load: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %edi -; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-BMI1BMI2-NEXT: movl (%edx), %eax -; X86-BMI1BMI2-NEXT: movl 4(%edx), %edx -; X86-BMI1BMI2-NEXT: shrxl %ecx, %edx, %esi -; X86-BMI1BMI2-NEXT: shrdl %cl, %edx, %eax -; X86-BMI1BMI2-NEXT: xorl %edx, %edx -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: je .LBB58_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: movl %esi, %eax -; X86-BMI1BMI2-NEXT: xorl %esi, %esi -; X86-BMI1BMI2-NEXT: .LBB58_2: -; X86-BMI1BMI2-NEXT: movb $64, %cl -; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: shldl %cl, %eax, %esi -; X86-BMI1BMI2-NEXT: shlxl %ecx, %eax, %edi -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: je .LBB58_4 -; X86-BMI1BMI2-NEXT: # %bb.3: -; X86-BMI1BMI2-NEXT: movl %edi, %esi -; X86-BMI1BMI2-NEXT: movl $0, %edi -; X86-BMI1BMI2-NEXT: .LBB58_4: -; X86-BMI1BMI2-NEXT: shrxl %ecx, %esi, %eax -; X86-BMI1BMI2-NEXT: jne .LBB58_6 -; X86-BMI1BMI2-NEXT: # %bb.5: -; X86-BMI1BMI2-NEXT: movl %eax, %edx -; X86-BMI1BMI2-NEXT: .LBB58_6: -; X86-BMI1BMI2-NEXT: shrdl %cl, %esi, %edi -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: jne .LBB58_8 -; X86-BMI1BMI2-NEXT: # %bb.7: -; X86-BMI1BMI2-NEXT: movl %edi, %eax -; X86-BMI1BMI2-NEXT: .LBB58_8: -; X86-BMI1BMI2-NEXT: popl %esi -; X86-BMI1BMI2-NEXT: popl %edi -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bextr64_d2_load: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %ebx +; X86-BMI1-NEXT: pushl %edi +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: movl (%eax), %edi +; X86-BMI1-NEXT: movl 4(%eax), %edx +; X86-BMI1-NEXT: movl %edx, %eax +; X86-BMI1-NEXT: shrl %cl, %eax +; X86-BMI1-NEXT: shrdl %cl, %edx, %edi +; X86-BMI1-NEXT: xorl %esi, %esi +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: je .LBB58_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: movl %eax, %edi +; X86-BMI1-NEXT: xorl %eax, %eax +; X86-BMI1-NEXT: .LBB58_2: +; X86-BMI1-NEXT: movb $64, %cl +; X86-BMI1-NEXT: subb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: shldl %cl, %edi, %eax +; X86-BMI1-NEXT: shll %cl, %edi +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: movl %edi, %ebx +; X86-BMI1-NEXT: jne .LBB58_4 +; X86-BMI1-NEXT: # %bb.3: +; X86-BMI1-NEXT: movl %eax, %ebx +; X86-BMI1-NEXT: .LBB58_4: +; X86-BMI1-NEXT: movl %ebx, %eax +; X86-BMI1-NEXT: shrl %cl, %eax +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: movl $0, %edx +; X86-BMI1-NEXT: jne .LBB58_6 +; X86-BMI1-NEXT: # %bb.5: +; X86-BMI1-NEXT: movl %edi, %esi +; X86-BMI1-NEXT: movl %eax, %edx +; X86-BMI1-NEXT: .LBB58_6: +; X86-BMI1-NEXT: shrdl %cl, %ebx, %esi +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: jne .LBB58_8 +; X86-BMI1-NEXT: # %bb.7: +; X86-BMI1-NEXT: movl %esi, %eax +; X86-BMI1-NEXT: .LBB58_8: +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: popl %edi +; X86-BMI1-NEXT: popl %ebx +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bextr64_d2_load: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl (%edx), %eax +; X86-BMI2-NEXT: movl 4(%edx), %edx +; X86-BMI2-NEXT: shrxl %ecx, %edx, %esi +; X86-BMI2-NEXT: shrdl %cl, %edx, %eax +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB58_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %esi, %eax +; X86-BMI2-NEXT: xorl %esi, %esi +; X86-BMI2-NEXT: .LBB58_2: +; X86-BMI2-NEXT: movb $64, %cl +; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: shldl %cl, %eax, %esi +; X86-BMI2-NEXT: shlxl %ecx, %eax, %edi +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB58_4 +; X86-BMI2-NEXT: # %bb.3: +; X86-BMI2-NEXT: movl %edi, %esi +; X86-BMI2-NEXT: movl $0, %edi +; X86-BMI2-NEXT: .LBB58_4: +; X86-BMI2-NEXT: shrxl %ecx, %esi, %eax +; X86-BMI2-NEXT: jne .LBB58_6 +; X86-BMI2-NEXT: # %bb.5: +; X86-BMI2-NEXT: movl %eax, %edx +; X86-BMI2-NEXT: .LBB58_6: +; X86-BMI2-NEXT: shrdl %cl, %esi, %edi +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: jne .LBB58_8 +; X86-BMI2-NEXT: # %bb.7: +; X86-BMI2-NEXT: movl %edi, %eax +; X86-BMI2-NEXT: .LBB58_8: +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr64_d2_load: ; X64-NOBMI: # %bb.0: @@ -7435,19 +7435,19 @@ define i64 @bextr64_d2_load(i64* %w, i64 %numskipbits, i64 %numlowbits) nounwind ; X64-NOBMI-NEXT: shrq %cl, %rax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bextr64_d2_load: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %edx -; X64-BMI1NOTBM-NEXT: movzbl %sil, %eax -; X64-BMI1NOTBM-NEXT: orl %edx, %eax -; X64-BMI1NOTBM-NEXT: bextrq %rax, (%rdi), %rax -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bextr64_d2_load: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: shrxq %rsi, (%rdi), %rax -; X64-BMI1BMI2-NEXT: bzhiq %rdx, %rax, %rax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bextr64_d2_load: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %edx +; X64-BMI1-NEXT: movzbl %sil, %eax +; X64-BMI1-NEXT: orl %edx, %eax +; X64-BMI1-NEXT: bextrq %rax, (%rdi), %rax +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bextr64_d2_load: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: shrxq %rsi, (%rdi), %rax +; X64-BMI2-NEXT: bzhiq %rdx, %rax, %rax +; X64-BMI2-NEXT: retq %val = load i64, i64* %w %shifted = lshr i64 %val, %numskipbits %numhighbits = sub i64 64, %numlowbits @@ -7506,96 +7506,96 @@ define i64 @bextr64_d3_load_indexzext(i64* %w, i8 %numskipbits, i8 %numlowbits) ; X86-NOBMI-NEXT: popl %ebx ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bextr64_d3_load_indexzext: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %ebx -; X86-BMI1NOTBM-NEXT: pushl %edi -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: movl (%eax), %edi -; X86-BMI1NOTBM-NEXT: movl 4(%eax), %edx -; X86-BMI1NOTBM-NEXT: movl %edx, %eax -; X86-BMI1NOTBM-NEXT: shrl %cl, %eax -; X86-BMI1NOTBM-NEXT: shrdl %cl, %edx, %edi -; X86-BMI1NOTBM-NEXT: xorl %esi, %esi -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: je .LBB59_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %eax, %edi -; X86-BMI1NOTBM-NEXT: xorl %eax, %eax -; X86-BMI1NOTBM-NEXT: .LBB59_2: -; X86-BMI1NOTBM-NEXT: movb $64, %cl -; X86-BMI1NOTBM-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: shldl %cl, %edi, %eax -; X86-BMI1NOTBM-NEXT: shll %cl, %edi -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: movl %edi, %ebx -; X86-BMI1NOTBM-NEXT: jne .LBB59_4 -; X86-BMI1NOTBM-NEXT: # %bb.3: -; X86-BMI1NOTBM-NEXT: movl %eax, %ebx -; X86-BMI1NOTBM-NEXT: .LBB59_4: -; X86-BMI1NOTBM-NEXT: movl %ebx, %eax -; X86-BMI1NOTBM-NEXT: shrl %cl, %eax -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: movl $0, %edx -; X86-BMI1NOTBM-NEXT: jne .LBB59_6 -; X86-BMI1NOTBM-NEXT: # %bb.5: -; X86-BMI1NOTBM-NEXT: movl %edi, %esi -; X86-BMI1NOTBM-NEXT: movl %eax, %edx -; X86-BMI1NOTBM-NEXT: .LBB59_6: -; X86-BMI1NOTBM-NEXT: shrdl %cl, %ebx, %esi -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: jne .LBB59_8 -; X86-BMI1NOTBM-NEXT: # %bb.7: -; X86-BMI1NOTBM-NEXT: movl %esi, %eax -; X86-BMI1NOTBM-NEXT: .LBB59_8: -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: popl %edi -; X86-BMI1NOTBM-NEXT: popl %ebx -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bextr64_d3_load_indexzext: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %edi -; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-BMI1BMI2-NEXT: movl (%edx), %eax -; X86-BMI1BMI2-NEXT: movl 4(%edx), %edx -; X86-BMI1BMI2-NEXT: shrxl %ecx, %edx, %esi -; X86-BMI1BMI2-NEXT: shrdl %cl, %edx, %eax -; X86-BMI1BMI2-NEXT: xorl %edx, %edx -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: je .LBB59_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: movl %esi, %eax -; X86-BMI1BMI2-NEXT: xorl %esi, %esi -; X86-BMI1BMI2-NEXT: .LBB59_2: -; X86-BMI1BMI2-NEXT: movb $64, %cl -; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: shldl %cl, %eax, %esi -; X86-BMI1BMI2-NEXT: shlxl %ecx, %eax, %edi -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: je .LBB59_4 -; X86-BMI1BMI2-NEXT: # %bb.3: -; X86-BMI1BMI2-NEXT: movl %edi, %esi -; X86-BMI1BMI2-NEXT: movl $0, %edi -; X86-BMI1BMI2-NEXT: .LBB59_4: -; X86-BMI1BMI2-NEXT: shrxl %ecx, %esi, %eax -; X86-BMI1BMI2-NEXT: jne .LBB59_6 -; X86-BMI1BMI2-NEXT: # %bb.5: -; X86-BMI1BMI2-NEXT: movl %eax, %edx -; X86-BMI1BMI2-NEXT: .LBB59_6: -; X86-BMI1BMI2-NEXT: shrdl %cl, %esi, %edi -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: jne .LBB59_8 -; X86-BMI1BMI2-NEXT: # %bb.7: -; X86-BMI1BMI2-NEXT: movl %edi, %eax -; X86-BMI1BMI2-NEXT: .LBB59_8: -; X86-BMI1BMI2-NEXT: popl %esi -; X86-BMI1BMI2-NEXT: popl %edi -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bextr64_d3_load_indexzext: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %ebx +; X86-BMI1-NEXT: pushl %edi +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: movl (%eax), %edi +; X86-BMI1-NEXT: movl 4(%eax), %edx +; X86-BMI1-NEXT: movl %edx, %eax +; X86-BMI1-NEXT: shrl %cl, %eax +; X86-BMI1-NEXT: shrdl %cl, %edx, %edi +; X86-BMI1-NEXT: xorl %esi, %esi +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: je .LBB59_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: movl %eax, %edi +; X86-BMI1-NEXT: xorl %eax, %eax +; X86-BMI1-NEXT: .LBB59_2: +; X86-BMI1-NEXT: movb $64, %cl +; X86-BMI1-NEXT: subb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: shldl %cl, %edi, %eax +; X86-BMI1-NEXT: shll %cl, %edi +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: movl %edi, %ebx +; X86-BMI1-NEXT: jne .LBB59_4 +; X86-BMI1-NEXT: # %bb.3: +; X86-BMI1-NEXT: movl %eax, %ebx +; X86-BMI1-NEXT: .LBB59_4: +; X86-BMI1-NEXT: movl %ebx, %eax +; X86-BMI1-NEXT: shrl %cl, %eax +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: movl $0, %edx +; X86-BMI1-NEXT: jne .LBB59_6 +; X86-BMI1-NEXT: # %bb.5: +; X86-BMI1-NEXT: movl %edi, %esi +; X86-BMI1-NEXT: movl %eax, %edx +; X86-BMI1-NEXT: .LBB59_6: +; X86-BMI1-NEXT: shrdl %cl, %ebx, %esi +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: jne .LBB59_8 +; X86-BMI1-NEXT: # %bb.7: +; X86-BMI1-NEXT: movl %esi, %eax +; X86-BMI1-NEXT: .LBB59_8: +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: popl %edi +; X86-BMI1-NEXT: popl %ebx +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bextr64_d3_load_indexzext: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl (%edx), %eax +; X86-BMI2-NEXT: movl 4(%edx), %edx +; X86-BMI2-NEXT: shrxl %ecx, %edx, %esi +; X86-BMI2-NEXT: shrdl %cl, %edx, %eax +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB59_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %esi, %eax +; X86-BMI2-NEXT: xorl %esi, %esi +; X86-BMI2-NEXT: .LBB59_2: +; X86-BMI2-NEXT: movb $64, %cl +; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: shldl %cl, %eax, %esi +; X86-BMI2-NEXT: shlxl %ecx, %eax, %edi +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB59_4 +; X86-BMI2-NEXT: # %bb.3: +; X86-BMI2-NEXT: movl %edi, %esi +; X86-BMI2-NEXT: movl $0, %edi +; X86-BMI2-NEXT: .LBB59_4: +; X86-BMI2-NEXT: shrxl %ecx, %esi, %eax +; X86-BMI2-NEXT: jne .LBB59_6 +; X86-BMI2-NEXT: # %bb.5: +; X86-BMI2-NEXT: movl %eax, %edx +; X86-BMI2-NEXT: .LBB59_6: +; X86-BMI2-NEXT: shrdl %cl, %esi, %edi +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: jne .LBB59_8 +; X86-BMI2-NEXT: # %bb.7: +; X86-BMI2-NEXT: movl %edi, %eax +; X86-BMI2-NEXT: .LBB59_8: +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr64_d3_load_indexzext: ; X64-NOBMI: # %bb.0: @@ -7609,21 +7609,21 @@ define i64 @bextr64_d3_load_indexzext(i64* %w, i8 %numskipbits, i8 %numlowbits) ; X64-NOBMI-NEXT: shrq %cl, %rax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bextr64_d3_load_indexzext: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %edx -; X64-BMI1NOTBM-NEXT: movzbl %sil, %eax -; X64-BMI1NOTBM-NEXT: orl %edx, %eax -; X64-BMI1NOTBM-NEXT: bextrq %rax, (%rdi), %rax -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bextr64_d3_load_indexzext: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: # kill: def $edx killed $edx def $rdx -; X64-BMI1BMI2-NEXT: # kill: def $esi killed $esi def $rsi -; X64-BMI1BMI2-NEXT: shrxq %rsi, (%rdi), %rax -; X64-BMI1BMI2-NEXT: bzhiq %rdx, %rax, %rax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bextr64_d3_load_indexzext: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %edx +; X64-BMI1-NEXT: movzbl %sil, %eax +; X64-BMI1-NEXT: orl %edx, %eax +; X64-BMI1-NEXT: bextrq %rax, (%rdi), %rax +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bextr64_d3_load_indexzext: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: # kill: def $edx killed $edx def $rdx +; X64-BMI2-NEXT: # kill: def $esi killed $esi def $rsi +; X64-BMI2-NEXT: shrxq %rsi, (%rdi), %rax +; X64-BMI2-NEXT: bzhiq %rdx, %rax, %rax +; X64-BMI2-NEXT: retq %val = load i64, i64* %w %skip = zext i8 %numskipbits to i64 %shifted = lshr i64 %val, %skip @@ -7696,118 +7696,118 @@ define i64 @bextr64_d5_skipextrauses(i64 %val, i64 %numskipbits, i64 %numlowbits ; X86-NOBMI-NEXT: popl %ebp ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bextr64_d5_skipextrauses: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %ebp -; X86-BMI1NOTBM-NEXT: pushl %ebx -; X86-BMI1NOTBM-NEXT: pushl %edi -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: subl $12, %esp -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: movl %edx, %esi -; X86-BMI1NOTBM-NEXT: movl %eax, %ecx -; X86-BMI1NOTBM-NEXT: shrl %cl, %esi -; X86-BMI1NOTBM-NEXT: shrdl %cl, %edx, %ebx -; X86-BMI1NOTBM-NEXT: xorl %edx, %edx -; X86-BMI1NOTBM-NEXT: testb $32, %al -; X86-BMI1NOTBM-NEXT: je .LBB60_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %esi, %ebx -; X86-BMI1NOTBM-NEXT: xorl %esi, %esi -; X86-BMI1NOTBM-NEXT: .LBB60_2: -; X86-BMI1NOTBM-NEXT: movb $64, %cl -; X86-BMI1NOTBM-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: shldl %cl, %ebx, %esi -; X86-BMI1NOTBM-NEXT: shll %cl, %ebx -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: movl %ebx, %ebp -; X86-BMI1NOTBM-NEXT: jne .LBB60_4 -; X86-BMI1NOTBM-NEXT: # %bb.3: -; X86-BMI1NOTBM-NEXT: movl %esi, %ebp -; X86-BMI1NOTBM-NEXT: .LBB60_4: -; X86-BMI1NOTBM-NEXT: movl %ebp, %esi -; X86-BMI1NOTBM-NEXT: shrl %cl, %esi -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: movl $0, %edi -; X86-BMI1NOTBM-NEXT: jne .LBB60_6 -; X86-BMI1NOTBM-NEXT: # %bb.5: -; X86-BMI1NOTBM-NEXT: movl %ebx, %edx -; X86-BMI1NOTBM-NEXT: movl %esi, %edi -; X86-BMI1NOTBM-NEXT: .LBB60_6: -; X86-BMI1NOTBM-NEXT: shrdl %cl, %ebp, %edx -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-BMI1NOTBM-NEXT: jne .LBB60_8 -; X86-BMI1NOTBM-NEXT: # %bb.7: -; X86-BMI1NOTBM-NEXT: movl %edx, %esi -; X86-BMI1NOTBM-NEXT: .LBB60_8: -; X86-BMI1NOTBM-NEXT: subl $8, %esp -; X86-BMI1NOTBM-NEXT: pushl %ecx -; X86-BMI1NOTBM-NEXT: pushl %eax -; X86-BMI1NOTBM-NEXT: calll use64@PLT -; X86-BMI1NOTBM-NEXT: addl $16, %esp -; X86-BMI1NOTBM-NEXT: movl %esi, %eax -; X86-BMI1NOTBM-NEXT: movl %edi, %edx -; X86-BMI1NOTBM-NEXT: addl $12, %esp -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: popl %edi -; X86-BMI1NOTBM-NEXT: popl %ebx -; X86-BMI1NOTBM-NEXT: popl %ebp -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bextr64_d5_skipextrauses: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %ebx -; X86-BMI1BMI2-NEXT: pushl %edi -; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: movl %eax, %ecx -; X86-BMI1BMI2-NEXT: shrdl %cl, %edx, %edi -; X86-BMI1BMI2-NEXT: shrxl %eax, %edx, %edx -; X86-BMI1BMI2-NEXT: xorl %esi, %esi -; X86-BMI1BMI2-NEXT: testb $32, %al -; X86-BMI1BMI2-NEXT: je .LBB60_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: movl %edx, %edi -; X86-BMI1BMI2-NEXT: xorl %edx, %edx -; X86-BMI1BMI2-NEXT: .LBB60_2: -; X86-BMI1BMI2-NEXT: movb $64, %cl -; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: shldl %cl, %edi, %edx -; X86-BMI1BMI2-NEXT: shlxl %ecx, %edi, %ebx -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: je .LBB60_4 -; X86-BMI1BMI2-NEXT: # %bb.3: -; X86-BMI1BMI2-NEXT: movl %ebx, %edx -; X86-BMI1BMI2-NEXT: movl $0, %ebx -; X86-BMI1BMI2-NEXT: .LBB60_4: -; X86-BMI1BMI2-NEXT: shrxl %ecx, %edx, %edi -; X86-BMI1BMI2-NEXT: jne .LBB60_6 -; X86-BMI1BMI2-NEXT: # %bb.5: -; X86-BMI1BMI2-NEXT: movl %edi, %esi -; X86-BMI1BMI2-NEXT: .LBB60_6: -; X86-BMI1BMI2-NEXT: shrdl %cl, %edx, %ebx -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-BMI1BMI2-NEXT: jne .LBB60_8 -; X86-BMI1BMI2-NEXT: # %bb.7: -; X86-BMI1BMI2-NEXT: movl %ebx, %edi -; X86-BMI1BMI2-NEXT: .LBB60_8: -; X86-BMI1BMI2-NEXT: subl $8, %esp -; X86-BMI1BMI2-NEXT: pushl %ecx -; X86-BMI1BMI2-NEXT: pushl %eax -; X86-BMI1BMI2-NEXT: calll use64@PLT -; X86-BMI1BMI2-NEXT: addl $16, %esp -; X86-BMI1BMI2-NEXT: movl %edi, %eax -; X86-BMI1BMI2-NEXT: movl %esi, %edx -; X86-BMI1BMI2-NEXT: popl %esi -; X86-BMI1BMI2-NEXT: popl %edi -; X86-BMI1BMI2-NEXT: popl %ebx -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bextr64_d5_skipextrauses: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %ebp +; X86-BMI1-NEXT: pushl %ebx +; X86-BMI1-NEXT: pushl %edi +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: subl $12, %esp +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: movl %edx, %esi +; X86-BMI1-NEXT: movl %eax, %ecx +; X86-BMI1-NEXT: shrl %cl, %esi +; X86-BMI1-NEXT: shrdl %cl, %edx, %ebx +; X86-BMI1-NEXT: xorl %edx, %edx +; X86-BMI1-NEXT: testb $32, %al +; X86-BMI1-NEXT: je .LBB60_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: movl %esi, %ebx +; X86-BMI1-NEXT: xorl %esi, %esi +; X86-BMI1-NEXT: .LBB60_2: +; X86-BMI1-NEXT: movb $64, %cl +; X86-BMI1-NEXT: subb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: shldl %cl, %ebx, %esi +; X86-BMI1-NEXT: shll %cl, %ebx +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: movl %ebx, %ebp +; X86-BMI1-NEXT: jne .LBB60_4 +; X86-BMI1-NEXT: # %bb.3: +; X86-BMI1-NEXT: movl %esi, %ebp +; X86-BMI1-NEXT: .LBB60_4: +; X86-BMI1-NEXT: movl %ebp, %esi +; X86-BMI1-NEXT: shrl %cl, %esi +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: movl $0, %edi +; X86-BMI1-NEXT: jne .LBB60_6 +; X86-BMI1-NEXT: # %bb.5: +; X86-BMI1-NEXT: movl %ebx, %edx +; X86-BMI1-NEXT: movl %esi, %edi +; X86-BMI1-NEXT: .LBB60_6: +; X86-BMI1-NEXT: shrdl %cl, %ebp, %edx +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI1-NEXT: jne .LBB60_8 +; X86-BMI1-NEXT: # %bb.7: +; X86-BMI1-NEXT: movl %edx, %esi +; X86-BMI1-NEXT: .LBB60_8: +; X86-BMI1-NEXT: subl $8, %esp +; X86-BMI1-NEXT: pushl %ecx +; X86-BMI1-NEXT: pushl %eax +; X86-BMI1-NEXT: calll use64@PLT +; X86-BMI1-NEXT: addl $16, %esp +; X86-BMI1-NEXT: movl %esi, %eax +; X86-BMI1-NEXT: movl %edi, %edx +; X86-BMI1-NEXT: addl $12, %esp +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: popl %edi +; X86-BMI1-NEXT: popl %ebx +; X86-BMI1-NEXT: popl %ebp +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bextr64_d5_skipextrauses: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl %eax, %ecx +; X86-BMI2-NEXT: shrdl %cl, %edx, %edi +; X86-BMI2-NEXT: shrxl %eax, %edx, %edx +; X86-BMI2-NEXT: xorl %esi, %esi +; X86-BMI2-NEXT: testb $32, %al +; X86-BMI2-NEXT: je .LBB60_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %edx, %edi +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: .LBB60_2: +; X86-BMI2-NEXT: movb $64, %cl +; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: shldl %cl, %edi, %edx +; X86-BMI2-NEXT: shlxl %ecx, %edi, %ebx +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB60_4 +; X86-BMI2-NEXT: # %bb.3: +; X86-BMI2-NEXT: movl %ebx, %edx +; X86-BMI2-NEXT: movl $0, %ebx +; X86-BMI2-NEXT: .LBB60_4: +; X86-BMI2-NEXT: shrxl %ecx, %edx, %edi +; X86-BMI2-NEXT: jne .LBB60_6 +; X86-BMI2-NEXT: # %bb.5: +; X86-BMI2-NEXT: movl %edi, %esi +; X86-BMI2-NEXT: .LBB60_6: +; X86-BMI2-NEXT: shrdl %cl, %edx, %ebx +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: jne .LBB60_8 +; X86-BMI2-NEXT: # %bb.7: +; X86-BMI2-NEXT: movl %ebx, %edi +; X86-BMI2-NEXT: .LBB60_8: +; X86-BMI2-NEXT: subl $8, %esp +; X86-BMI2-NEXT: pushl %ecx +; X86-BMI2-NEXT: pushl %eax +; X86-BMI2-NEXT: calll use64@PLT +; X86-BMI2-NEXT: addl $16, %esp +; X86-BMI2-NEXT: movl %edi, %eax +; X86-BMI2-NEXT: movl %esi, %edx +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr64_d5_skipextrauses: ; X64-NOBMI: # %bb.0: @@ -7825,29 +7825,29 @@ define i64 @bextr64_d5_skipextrauses(i64 %val, i64 %numskipbits, i64 %numlowbits ; X64-NOBMI-NEXT: popq %rbx ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bextr64_d5_skipextrauses: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: pushq %rbx -; X64-BMI1NOTBM-NEXT: shll $8, %edx -; X64-BMI1NOTBM-NEXT: movzbl %sil, %eax -; X64-BMI1NOTBM-NEXT: orl %edx, %eax -; X64-BMI1NOTBM-NEXT: bextrq %rax, %rdi, %rbx -; X64-BMI1NOTBM-NEXT: movq %rsi, %rdi -; X64-BMI1NOTBM-NEXT: callq use64@PLT -; X64-BMI1NOTBM-NEXT: movq %rbx, %rax -; X64-BMI1NOTBM-NEXT: popq %rbx -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bextr64_d5_skipextrauses: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: pushq %rbx -; X64-BMI1BMI2-NEXT: shrxq %rsi, %rdi, %rax -; X64-BMI1BMI2-NEXT: bzhiq %rdx, %rax, %rbx -; X64-BMI1BMI2-NEXT: movq %rsi, %rdi -; X64-BMI1BMI2-NEXT: callq use64@PLT -; X64-BMI1BMI2-NEXT: movq %rbx, %rax -; X64-BMI1BMI2-NEXT: popq %rbx -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bextr64_d5_skipextrauses: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: pushq %rbx +; X64-BMI1-NEXT: shll $8, %edx +; X64-BMI1-NEXT: movzbl %sil, %eax +; X64-BMI1-NEXT: orl %edx, %eax +; X64-BMI1-NEXT: bextrq %rax, %rdi, %rbx +; X64-BMI1-NEXT: movq %rsi, %rdi +; X64-BMI1-NEXT: callq use64@PLT +; X64-BMI1-NEXT: movq %rbx, %rax +; X64-BMI1-NEXT: popq %rbx +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bextr64_d5_skipextrauses: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: pushq %rbx +; X64-BMI2-NEXT: shrxq %rsi, %rdi, %rax +; X64-BMI2-NEXT: bzhiq %rdx, %rax, %rbx +; X64-BMI2-NEXT: movq %rsi, %rdi +; X64-BMI2-NEXT: callq use64@PLT +; X64-BMI2-NEXT: movq %rbx, %rax +; X64-BMI2-NEXT: popq %rbx +; X64-BMI2-NEXT: retq %shifted = lshr i64 %val, %numskipbits %numhighbits = sub i64 64, %numlowbits %highbitscleared = shl i64 %shifted, %numhighbits @@ -7895,71 +7895,71 @@ define i32 @bextr64_32_d0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind ; X86-NOBMI-NEXT: popl %esi ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bextr64_32_d0: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-BMI1NOTBM-NEXT: movl %esi, %eax -; X86-BMI1NOTBM-NEXT: shrl %cl, %eax -; X86-BMI1NOTBM-NEXT: shrdl %cl, %esi, %edx -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: je .LBB61_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %eax, %edx -; X86-BMI1NOTBM-NEXT: xorl %eax, %eax -; X86-BMI1NOTBM-NEXT: .LBB61_2: -; X86-BMI1NOTBM-NEXT: movb $64, %cl -; X86-BMI1NOTBM-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: shldl %cl, %edx, %eax -; X86-BMI1NOTBM-NEXT: shll %cl, %edx -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: je .LBB61_4 -; X86-BMI1NOTBM-NEXT: # %bb.3: -; X86-BMI1NOTBM-NEXT: movl %edx, %eax -; X86-BMI1NOTBM-NEXT: xorl %edx, %edx -; X86-BMI1NOTBM-NEXT: .LBB61_4: -; X86-BMI1NOTBM-NEXT: shrdl %cl, %eax, %edx -; X86-BMI1NOTBM-NEXT: shrl %cl, %eax -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: jne .LBB61_6 -; X86-BMI1NOTBM-NEXT: # %bb.5: -; X86-BMI1NOTBM-NEXT: movl %edx, %eax -; X86-BMI1NOTBM-NEXT: .LBB61_6: -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bextr64_32_d0: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-BMI1BMI2-NEXT: shrdl %cl, %edx, %eax -; X86-BMI1BMI2-NEXT: shrxl %ecx, %edx, %edx -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: je .LBB61_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: movl %edx, %eax -; X86-BMI1BMI2-NEXT: xorl %edx, %edx -; X86-BMI1BMI2-NEXT: .LBB61_2: -; X86-BMI1BMI2-NEXT: movb $64, %cl -; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: shldl %cl, %eax, %edx -; X86-BMI1BMI2-NEXT: shlxl %ecx, %eax, %eax -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: je .LBB61_4 -; X86-BMI1BMI2-NEXT: # %bb.3: -; X86-BMI1BMI2-NEXT: movl %eax, %edx -; X86-BMI1BMI2-NEXT: xorl %eax, %eax -; X86-BMI1BMI2-NEXT: .LBB61_4: -; X86-BMI1BMI2-NEXT: shrdl %cl, %edx, %eax -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: je .LBB61_6 -; X86-BMI1BMI2-NEXT: # %bb.5: -; X86-BMI1BMI2-NEXT: shrxl %ecx, %edx, %eax -; X86-BMI1BMI2-NEXT: .LBB61_6: -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bextr64_32_d0: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI1-NEXT: movl %esi, %eax +; X86-BMI1-NEXT: shrl %cl, %eax +; X86-BMI1-NEXT: shrdl %cl, %esi, %edx +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: je .LBB61_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: movl %eax, %edx +; X86-BMI1-NEXT: xorl %eax, %eax +; X86-BMI1-NEXT: .LBB61_2: +; X86-BMI1-NEXT: movb $64, %cl +; X86-BMI1-NEXT: subb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: shldl %cl, %edx, %eax +; X86-BMI1-NEXT: shll %cl, %edx +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: je .LBB61_4 +; X86-BMI1-NEXT: # %bb.3: +; X86-BMI1-NEXT: movl %edx, %eax +; X86-BMI1-NEXT: xorl %edx, %edx +; X86-BMI1-NEXT: .LBB61_4: +; X86-BMI1-NEXT: shrdl %cl, %eax, %edx +; X86-BMI1-NEXT: shrl %cl, %eax +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: jne .LBB61_6 +; X86-BMI1-NEXT: # %bb.5: +; X86-BMI1-NEXT: movl %edx, %eax +; X86-BMI1-NEXT: .LBB61_6: +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bextr64_32_d0: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: shrdl %cl, %edx, %eax +; X86-BMI2-NEXT: shrxl %ecx, %edx, %edx +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB61_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %edx, %eax +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: .LBB61_2: +; X86-BMI2-NEXT: movb $64, %cl +; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: shldl %cl, %eax, %edx +; X86-BMI2-NEXT: shlxl %ecx, %eax, %eax +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB61_4 +; X86-BMI2-NEXT: # %bb.3: +; X86-BMI2-NEXT: movl %eax, %edx +; X86-BMI2-NEXT: xorl %eax, %eax +; X86-BMI2-NEXT: .LBB61_4: +; X86-BMI2-NEXT: shrdl %cl, %edx, %eax +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB61_6 +; X86-BMI2-NEXT: # %bb.5: +; X86-BMI2-NEXT: shrxl %ecx, %edx, %eax +; X86-BMI2-NEXT: .LBB61_6: +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr64_32_d0: ; X64-NOBMI: # %bb.0: @@ -7974,21 +7974,21 @@ define i32 @bextr64_32_d0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind ; X64-NOBMI-NEXT: # kill: def $eax killed $eax killed $rax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bextr64_32_d0: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %edx -; X64-BMI1NOTBM-NEXT: movzbl %sil, %eax -; X64-BMI1NOTBM-NEXT: orl %edx, %eax -; X64-BMI1NOTBM-NEXT: bextrq %rax, %rdi, %rax -; X64-BMI1NOTBM-NEXT: # kill: def $eax killed $eax killed $rax -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bextr64_32_d0: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: shrxq %rsi, %rdi, %rax -; X64-BMI1BMI2-NEXT: bzhiq %rdx, %rax, %rax -; X64-BMI1BMI2-NEXT: # kill: def $eax killed $eax killed $rax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bextr64_32_d0: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %edx +; X64-BMI1-NEXT: movzbl %sil, %eax +; X64-BMI1-NEXT: orl %edx, %eax +; X64-BMI1-NEXT: bextrq %rax, %rdi, %rax +; X64-BMI1-NEXT: # kill: def $eax killed $eax killed $rax +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bextr64_32_d0: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: shrxq %rsi, %rdi, %rax +; X64-BMI2-NEXT: bzhiq %rdx, %rax, %rax +; X64-BMI2-NEXT: # kill: def $eax killed $eax killed $rax +; X64-BMI2-NEXT: retq %shifted = lshr i64 %val, %numskipbits %numhighbits = sub i64 64, %numlowbits %highbitscleared = shl i64 %shifted, %numhighbits @@ -8021,44 +8021,44 @@ define i32 @bextr64_32_d1(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind ; X86-NOBMI-NEXT: popl %esi ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bextr64_32_d1: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %edi -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-BMI1NOTBM-NEXT: movl %edi, %edx -; X86-BMI1NOTBM-NEXT: shrl %cl, %edx -; X86-BMI1NOTBM-NEXT: shrdl %cl, %edi, %esi -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: jne .LBB62_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %esi, %edx -; X86-BMI1NOTBM-NEXT: .LBB62_2: -; X86-BMI1NOTBM-NEXT: shll $8, %eax -; X86-BMI1NOTBM-NEXT: bextrl %eax, %edx, %eax -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: popl %edi -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bextr64_32_d1: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-BMI1BMI2-NEXT: shrdl %cl, %esi, %edx -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: je .LBB62_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: shrxl %ecx, %esi, %edx -; X86-BMI1BMI2-NEXT: .LBB62_2: -; X86-BMI1BMI2-NEXT: bzhil %eax, %edx, %eax -; X86-BMI1BMI2-NEXT: popl %esi -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bextr64_32_d1: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %edi +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-BMI1-NEXT: movl %edi, %edx +; X86-BMI1-NEXT: shrl %cl, %edx +; X86-BMI1-NEXT: shrdl %cl, %edi, %esi +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: jne .LBB62_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: movl %esi, %edx +; X86-BMI1-NEXT: .LBB62_2: +; X86-BMI1-NEXT: shll $8, %eax +; X86-BMI1-NEXT: bextrl %eax, %edx, %eax +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: popl %edi +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bextr64_32_d1: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: shrdl %cl, %esi, %edx +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB62_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: shrxl %ecx, %esi, %edx +; X86-BMI2-NEXT: .LBB62_2: +; X86-BMI2-NEXT: bzhil %eax, %edx, %eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr64_32_d1: ; X64-NOBMI: # %bb.0: @@ -8073,20 +8073,20 @@ define i32 @bextr64_32_d1(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind ; X64-NOBMI-NEXT: # kill: def $eax killed $eax killed $rax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bextr64_32_d1: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %edx -; X64-BMI1NOTBM-NEXT: movzbl %sil, %eax -; X64-BMI1NOTBM-NEXT: orl %edx, %eax -; X64-BMI1NOTBM-NEXT: bextrq %rax, %rdi, %rax -; X64-BMI1NOTBM-NEXT: # kill: def $eax killed $eax killed $rax -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bextr64_32_d1: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: shrxq %rsi, %rdi, %rax -; X64-BMI1BMI2-NEXT: bzhil %edx, %eax, %eax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bextr64_32_d1: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %edx +; X64-BMI1-NEXT: movzbl %sil, %eax +; X64-BMI1-NEXT: orl %edx, %eax +; X64-BMI1-NEXT: bextrq %rax, %rdi, %rax +; X64-BMI1-NEXT: # kill: def $eax killed $eax killed $rax +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bextr64_32_d1: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: shrxq %rsi, %rdi, %rax +; X64-BMI2-NEXT: bzhil %edx, %eax, %eax +; X64-BMI2-NEXT: retq %shifted = lshr i64 %val, %numskipbits %truncshifted = trunc i64 %shifted to i32 %numhighbits = sub i32 32, %numlowbits @@ -8111,31 +8111,22 @@ define void @pr38938(i32* %a0, i64* %a1) nounwind { ; X86-NOBMI-NEXT: incl (%eax,%ecx) ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: pr38938: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-BMI1NOTBM-NEXT: movl $2581, %edx # imm = 0xA15 -; X86-BMI1NOTBM-NEXT: bextrl %edx, (%ecx), %ecx -; X86-BMI1NOTBM-NEXT: incl (%eax,%ecx,4) -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1TBM-LABEL: pr38938: -; X86-BMI1TBM: # %bb.0: -; X86-BMI1TBM-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1TBM-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-BMI1TBM-NEXT: bextrl $2581, (%ecx), %ecx # imm = 0xA15 -; X86-BMI1TBM-NEXT: incl (%eax,%ecx,4) -; X86-BMI1TBM-NEXT: retl -; -; X86-BMI1NOTBMBMI2-LABEL: pr38938: -; X86-BMI1NOTBMBMI2: # %bb.0: -; X86-BMI1NOTBMBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBMBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-BMI1NOTBMBMI2-NEXT: movl $2581, %edx # imm = 0xA15 -; X86-BMI1NOTBMBMI2-NEXT: bextrl %edx, (%ecx), %ecx -; X86-BMI1NOTBMBMI2-NEXT: incl (%eax,%ecx,4) -; X86-BMI1NOTBMBMI2-NEXT: retl +; X86-BMINOTBM-LABEL: pr38938: +; X86-BMINOTBM: # %bb.0: +; X86-BMINOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMINOTBM-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMINOTBM-NEXT: movl $2581, %edx # imm = 0xA15 +; X86-BMINOTBM-NEXT: bextrl %edx, (%ecx), %ecx +; X86-BMINOTBM-NEXT: incl (%eax,%ecx,4) +; X86-BMINOTBM-NEXT: retl +; +; X86-BMITBM-LABEL: pr38938: +; X86-BMITBM: # %bb.0: +; X86-BMITBM-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMITBM-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMITBM-NEXT: bextrl $2581, (%ecx), %ecx # imm = 0xA15 +; X86-BMITBM-NEXT: incl (%eax,%ecx,4) +; X86-BMITBM-NEXT: retl ; ; X64-NOBMI-LABEL: pr38938: ; X64-NOBMI: # %bb.0: @@ -8145,25 +8136,18 @@ define void @pr38938(i32* %a0, i64* %a1) nounwind { ; X64-NOBMI-NEXT: incl (%rdi,%rax) ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: pr38938: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: movl $2581, %eax # imm = 0xA15 -; X64-BMI1NOTBM-NEXT: bextrq %rax, (%rsi), %rax -; X64-BMI1NOTBM-NEXT: incl (%rdi,%rax,4) -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1TBM-LABEL: pr38938: -; X64-BMI1TBM: # %bb.0: -; X64-BMI1TBM-NEXT: bextrq $2581, (%rsi), %rax # imm = 0xA15 -; X64-BMI1TBM-NEXT: incl (%rdi,%rax,4) -; X64-BMI1TBM-NEXT: retq -; -; X64-BMI1NOTBMBMI2-LABEL: pr38938: -; X64-BMI1NOTBMBMI2: # %bb.0: -; X64-BMI1NOTBMBMI2-NEXT: movl $2581, %eax # imm = 0xA15 -; X64-BMI1NOTBMBMI2-NEXT: bextrq %rax, (%rsi), %rax -; X64-BMI1NOTBMBMI2-NEXT: incl (%rdi,%rax,4) -; X64-BMI1NOTBMBMI2-NEXT: retq +; X64-BMINOTBM-LABEL: pr38938: +; X64-BMINOTBM: # %bb.0: +; X64-BMINOTBM-NEXT: movl $2581, %eax # imm = 0xA15 +; X64-BMINOTBM-NEXT: bextrq %rax, (%rsi), %rax +; X64-BMINOTBM-NEXT: incl (%rdi,%rax,4) +; X64-BMINOTBM-NEXT: retq +; +; X64-BMITBM-LABEL: pr38938: +; X64-BMITBM: # %bb.0: +; X64-BMITBM-NEXT: bextrq $2581, (%rsi), %rax # imm = 0xA15 +; X64-BMITBM-NEXT: incl (%rdi,%rax,4) +; X64-BMITBM-NEXT: retq %tmp = load i64, i64* %a1, align 8 %tmp1 = lshr i64 %tmp, 21 %tmp2 = and i64 %tmp1, 1023 @@ -8183,22 +8167,16 @@ define i32 @c0_i32(i32 %arg) nounwind { ; X86-NOBMI-NEXT: andl $1023, %eax # imm = 0x3FF ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: c0_i32: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: movl $2579, %eax # imm = 0xA13 -; X86-BMI1NOTBM-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: retl +; X86-BMINOTBM-LABEL: c0_i32: +; X86-BMINOTBM: # %bb.0: +; X86-BMINOTBM-NEXT: movl $2579, %eax # imm = 0xA13 +; X86-BMINOTBM-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax +; X86-BMINOTBM-NEXT: retl ; -; X86-BMI1TBM-LABEL: c0_i32: -; X86-BMI1TBM: # %bb.0: -; X86-BMI1TBM-NEXT: bextrl $2579, {{[0-9]+}}(%esp), %eax # imm = 0xA13 -; X86-BMI1TBM-NEXT: retl -; -; X86-BMI1NOTBMBMI2-LABEL: c0_i32: -; X86-BMI1NOTBMBMI2: # %bb.0: -; X86-BMI1NOTBMBMI2-NEXT: movl $2579, %eax # imm = 0xA13 -; X86-BMI1NOTBMBMI2-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBMBMI2-NEXT: retl +; X86-BMITBM-LABEL: c0_i32: +; X86-BMITBM: # %bb.0: +; X86-BMITBM-NEXT: bextrl $2579, {{[0-9]+}}(%esp), %eax # imm = 0xA13 +; X86-BMITBM-NEXT: retl ; ; X64-NOBMI-LABEL: c0_i32: ; X64-NOBMI: # %bb.0: @@ -8207,22 +8185,16 @@ define i32 @c0_i32(i32 %arg) nounwind { ; X64-NOBMI-NEXT: andl $1023, %eax # imm = 0x3FF ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: c0_i32: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: movl $2579, %eax # imm = 0xA13 -; X64-BMI1NOTBM-NEXT: bextrl %eax, %edi, %eax -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1TBM-LABEL: c0_i32: -; X64-BMI1TBM: # %bb.0: -; X64-BMI1TBM-NEXT: bextrl $2579, %edi, %eax # imm = 0xA13 -; X64-BMI1TBM-NEXT: retq -; -; X64-BMI1NOTBMBMI2-LABEL: c0_i32: -; X64-BMI1NOTBMBMI2: # %bb.0: -; X64-BMI1NOTBMBMI2-NEXT: movl $2579, %eax # imm = 0xA13 -; X64-BMI1NOTBMBMI2-NEXT: bextrl %eax, %edi, %eax -; X64-BMI1NOTBMBMI2-NEXT: retq +; X64-BMINOTBM-LABEL: c0_i32: +; X64-BMINOTBM: # %bb.0: +; X64-BMINOTBM-NEXT: movl $2579, %eax # imm = 0xA13 +; X64-BMINOTBM-NEXT: bextrl %eax, %edi, %eax +; X64-BMINOTBM-NEXT: retq +; +; X64-BMITBM-LABEL: c0_i32: +; X64-BMITBM: # %bb.0: +; X64-BMITBM-NEXT: bextrl $2579, %edi, %eax # imm = 0xA13 +; X64-BMITBM-NEXT: retq %tmp0 = lshr i32 %arg, 19 %tmp1 = and i32 %tmp0, 1023 ret i32 %tmp1 @@ -8301,25 +8273,18 @@ define i64 @c0_i64(i64 %arg) nounwind { ; X86-NOBMI-NEXT: xorl %edx, %edx ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: c0_i64: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: movl $2579, %eax # imm = 0xA13 -; X86-BMI1NOTBM-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: xorl %edx, %edx -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1TBM-LABEL: c0_i64: -; X86-BMI1TBM: # %bb.0: -; X86-BMI1TBM-NEXT: bextrl $2579, {{[0-9]+}}(%esp), %eax # imm = 0xA13 -; X86-BMI1TBM-NEXT: xorl %edx, %edx -; X86-BMI1TBM-NEXT: retl -; -; X86-BMI1NOTBMBMI2-LABEL: c0_i64: -; X86-BMI1NOTBMBMI2: # %bb.0: -; X86-BMI1NOTBMBMI2-NEXT: movl $2579, %eax # imm = 0xA13 -; X86-BMI1NOTBMBMI2-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBMBMI2-NEXT: xorl %edx, %edx -; X86-BMI1NOTBMBMI2-NEXT: retl +; X86-BMINOTBM-LABEL: c0_i64: +; X86-BMINOTBM: # %bb.0: +; X86-BMINOTBM-NEXT: movl $2579, %eax # imm = 0xA13 +; X86-BMINOTBM-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax +; X86-BMINOTBM-NEXT: xorl %edx, %edx +; X86-BMINOTBM-NEXT: retl +; +; X86-BMITBM-LABEL: c0_i64: +; X86-BMITBM: # %bb.0: +; X86-BMITBM-NEXT: bextrl $2579, {{[0-9]+}}(%esp), %eax # imm = 0xA13 +; X86-BMITBM-NEXT: xorl %edx, %edx +; X86-BMITBM-NEXT: retl ; ; X64-NOBMI-LABEL: c0_i64: ; X64-NOBMI: # %bb.0: @@ -8328,22 +8293,16 @@ define i64 @c0_i64(i64 %arg) nounwind { ; X64-NOBMI-NEXT: andl $1023, %eax # imm = 0x3FF ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: c0_i64: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: movl $2611, %eax # imm = 0xA33 -; X64-BMI1NOTBM-NEXT: bextrq %rax, %rdi, %rax -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1TBM-LABEL: c0_i64: -; X64-BMI1TBM: # %bb.0: -; X64-BMI1TBM-NEXT: bextrq $2611, %rdi, %rax # imm = 0xA33 -; X64-BMI1TBM-NEXT: retq -; -; X64-BMI1NOTBMBMI2-LABEL: c0_i64: -; X64-BMI1NOTBMBMI2: # %bb.0: -; X64-BMI1NOTBMBMI2-NEXT: movl $2611, %eax # imm = 0xA33 -; X64-BMI1NOTBMBMI2-NEXT: bextrq %rax, %rdi, %rax -; X64-BMI1NOTBMBMI2-NEXT: retq +; X64-BMINOTBM-LABEL: c0_i64: +; X64-BMINOTBM: # %bb.0: +; X64-BMINOTBM-NEXT: movl $2611, %eax # imm = 0xA33 +; X64-BMINOTBM-NEXT: bextrq %rax, %rdi, %rax +; X64-BMINOTBM-NEXT: retq +; +; X64-BMITBM-LABEL: c0_i64: +; X64-BMITBM: # %bb.0: +; X64-BMITBM-NEXT: bextrq $2611, %rdi, %rax # imm = 0xA33 +; X64-BMITBM-NEXT: retq %tmp0 = lshr i64 %arg, 51 %tmp1 = and i64 %tmp0, 1023 ret i64 %tmp1 @@ -8430,28 +8389,20 @@ define void @c5_i32(i32 %arg, i32* %ptr) nounwind { ; X86-NOBMI-NEXT: movl %ecx, (%eax) ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: c5_i32: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: movl $2579, %ecx # imm = 0xA13 -; X86-BMI1NOTBM-NEXT: bextrl %ecx, {{[0-9]+}}(%esp), %ecx -; X86-BMI1NOTBM-NEXT: movl %ecx, (%eax) -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1TBM-LABEL: c5_i32: -; X86-BMI1TBM: # %bb.0: -; X86-BMI1TBM-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1TBM-NEXT: bextrl $2579, {{[0-9]+}}(%esp), %ecx # imm = 0xA13 -; X86-BMI1TBM-NEXT: movl %ecx, (%eax) -; X86-BMI1TBM-NEXT: retl -; -; X86-BMI1NOTBMBMI2-LABEL: c5_i32: -; X86-BMI1NOTBMBMI2: # %bb.0: -; X86-BMI1NOTBMBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBMBMI2-NEXT: movl $2579, %ecx # imm = 0xA13 -; X86-BMI1NOTBMBMI2-NEXT: bextrl %ecx, {{[0-9]+}}(%esp), %ecx -; X86-BMI1NOTBMBMI2-NEXT: movl %ecx, (%eax) -; X86-BMI1NOTBMBMI2-NEXT: retl +; X86-BMINOTBM-LABEL: c5_i32: +; X86-BMINOTBM: # %bb.0: +; X86-BMINOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMINOTBM-NEXT: movl $2579, %ecx # imm = 0xA13 +; X86-BMINOTBM-NEXT: bextrl %ecx, {{[0-9]+}}(%esp), %ecx +; X86-BMINOTBM-NEXT: movl %ecx, (%eax) +; X86-BMINOTBM-NEXT: retl +; +; X86-BMITBM-LABEL: c5_i32: +; X86-BMITBM: # %bb.0: +; X86-BMITBM-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMITBM-NEXT: bextrl $2579, {{[0-9]+}}(%esp), %ecx # imm = 0xA13 +; X86-BMITBM-NEXT: movl %ecx, (%eax) +; X86-BMITBM-NEXT: retl ; ; X64-NOBMI-LABEL: c5_i32: ; X64-NOBMI: # %bb.0: @@ -8460,25 +8411,18 @@ define void @c5_i32(i32 %arg, i32* %ptr) nounwind { ; X64-NOBMI-NEXT: movl %edi, (%rsi) ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: c5_i32: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: movl $2579, %eax # imm = 0xA13 -; X64-BMI1NOTBM-NEXT: bextrl %eax, %edi, %eax -; X64-BMI1NOTBM-NEXT: movl %eax, (%rsi) -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1TBM-LABEL: c5_i32: -; X64-BMI1TBM: # %bb.0: -; X64-BMI1TBM-NEXT: bextrl $2579, %edi, %eax # imm = 0xA13 -; X64-BMI1TBM-NEXT: movl %eax, (%rsi) -; X64-BMI1TBM-NEXT: retq -; -; X64-BMI1NOTBMBMI2-LABEL: c5_i32: -; X64-BMI1NOTBMBMI2: # %bb.0: -; X64-BMI1NOTBMBMI2-NEXT: movl $2579, %eax # imm = 0xA13 -; X64-BMI1NOTBMBMI2-NEXT: bextrl %eax, %edi, %eax -; X64-BMI1NOTBMBMI2-NEXT: movl %eax, (%rsi) -; X64-BMI1NOTBMBMI2-NEXT: retq +; X64-BMINOTBM-LABEL: c5_i32: +; X64-BMINOTBM: # %bb.0: +; X64-BMINOTBM-NEXT: movl $2579, %eax # imm = 0xA13 +; X64-BMINOTBM-NEXT: bextrl %eax, %edi, %eax +; X64-BMINOTBM-NEXT: movl %eax, (%rsi) +; X64-BMINOTBM-NEXT: retq +; +; X64-BMITBM-LABEL: c5_i32: +; X64-BMITBM: # %bb.0: +; X64-BMITBM-NEXT: bextrl $2579, %edi, %eax # imm = 0xA13 +; X64-BMITBM-NEXT: movl %eax, (%rsi) +; X64-BMITBM-NEXT: retq %tmp0 = lshr i32 %arg, 19 %tmp1 = and i32 %tmp0, 1023 store i32 %tmp1, i32* %ptr @@ -8496,28 +8440,20 @@ define void @c6_i32(i32 %arg, i32* %ptr) nounwind { ; X86-NOBMI-NEXT: movl %ecx, (%eax) ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: c6_i32: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: movl $3091, %ecx # imm = 0xC13 -; X86-BMI1NOTBM-NEXT: bextrl %ecx, {{[0-9]+}}(%esp), %ecx -; X86-BMI1NOTBM-NEXT: movl %ecx, (%eax) -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1TBM-LABEL: c6_i32: -; X86-BMI1TBM: # %bb.0: -; X86-BMI1TBM-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1TBM-NEXT: bextrl $3091, {{[0-9]+}}(%esp), %ecx # imm = 0xC13 -; X86-BMI1TBM-NEXT: movl %ecx, (%eax) -; X86-BMI1TBM-NEXT: retl -; -; X86-BMI1NOTBMBMI2-LABEL: c6_i32: -; X86-BMI1NOTBMBMI2: # %bb.0: -; X86-BMI1NOTBMBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBMBMI2-NEXT: movl $3091, %ecx # imm = 0xC13 -; X86-BMI1NOTBMBMI2-NEXT: bextrl %ecx, {{[0-9]+}}(%esp), %ecx -; X86-BMI1NOTBMBMI2-NEXT: movl %ecx, (%eax) -; X86-BMI1NOTBMBMI2-NEXT: retl +; X86-BMINOTBM-LABEL: c6_i32: +; X86-BMINOTBM: # %bb.0: +; X86-BMINOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMINOTBM-NEXT: movl $3091, %ecx # imm = 0xC13 +; X86-BMINOTBM-NEXT: bextrl %ecx, {{[0-9]+}}(%esp), %ecx +; X86-BMINOTBM-NEXT: movl %ecx, (%eax) +; X86-BMINOTBM-NEXT: retl +; +; X86-BMITBM-LABEL: c6_i32: +; X86-BMITBM: # %bb.0: +; X86-BMITBM-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMITBM-NEXT: bextrl $3091, {{[0-9]+}}(%esp), %ecx # imm = 0xC13 +; X86-BMITBM-NEXT: movl %ecx, (%eax) +; X86-BMITBM-NEXT: retl ; ; X64-NOBMI-LABEL: c6_i32: ; X64-NOBMI: # %bb.0: @@ -8526,25 +8462,18 @@ define void @c6_i32(i32 %arg, i32* %ptr) nounwind { ; X64-NOBMI-NEXT: movl %edi, (%rsi) ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: c6_i32: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: movl $3091, %eax # imm = 0xC13 -; X64-BMI1NOTBM-NEXT: bextrl %eax, %edi, %eax -; X64-BMI1NOTBM-NEXT: movl %eax, (%rsi) -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1TBM-LABEL: c6_i32: -; X64-BMI1TBM: # %bb.0: -; X64-BMI1TBM-NEXT: bextrl $3091, %edi, %eax # imm = 0xC13 -; X64-BMI1TBM-NEXT: movl %eax, (%rsi) -; X64-BMI1TBM-NEXT: retq -; -; X64-BMI1NOTBMBMI2-LABEL: c6_i32: -; X64-BMI1NOTBMBMI2: # %bb.0: -; X64-BMI1NOTBMBMI2-NEXT: movl $3091, %eax # imm = 0xC13 -; X64-BMI1NOTBMBMI2-NEXT: bextrl %eax, %edi, %eax -; X64-BMI1NOTBMBMI2-NEXT: movl %eax, (%rsi) -; X64-BMI1NOTBMBMI2-NEXT: retq +; X64-BMINOTBM-LABEL: c6_i32: +; X64-BMINOTBM: # %bb.0: +; X64-BMINOTBM-NEXT: movl $3091, %eax # imm = 0xC13 +; X64-BMINOTBM-NEXT: bextrl %eax, %edi, %eax +; X64-BMINOTBM-NEXT: movl %eax, (%rsi) +; X64-BMINOTBM-NEXT: retq +; +; X64-BMITBM-LABEL: c6_i32: +; X64-BMITBM: # %bb.0: +; X64-BMITBM-NEXT: bextrl $3091, %edi, %eax # imm = 0xC13 +; X64-BMITBM-NEXT: movl %eax, (%rsi) +; X64-BMITBM-NEXT: retq %tmp0 = lshr i32 %arg, 19 %tmp1 = and i32 %tmp0, 4095 store i32 %tmp1, i32* %ptr @@ -8589,31 +8518,22 @@ define void @c5_i64(i64 %arg, i64* %ptr) nounwind { ; X86-NOBMI-NEXT: movl $0, 4(%eax) ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: c5_i64: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: movl $2579, %ecx # imm = 0xA13 -; X86-BMI1NOTBM-NEXT: bextrl %ecx, {{[0-9]+}}(%esp), %ecx -; X86-BMI1NOTBM-NEXT: movl %ecx, (%eax) -; X86-BMI1NOTBM-NEXT: movl $0, 4(%eax) -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1TBM-LABEL: c5_i64: -; X86-BMI1TBM: # %bb.0: -; X86-BMI1TBM-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1TBM-NEXT: bextrl $2579, {{[0-9]+}}(%esp), %ecx # imm = 0xA13 -; X86-BMI1TBM-NEXT: movl %ecx, (%eax) -; X86-BMI1TBM-NEXT: movl $0, 4(%eax) -; X86-BMI1TBM-NEXT: retl -; -; X86-BMI1NOTBMBMI2-LABEL: c5_i64: -; X86-BMI1NOTBMBMI2: # %bb.0: -; X86-BMI1NOTBMBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBMBMI2-NEXT: movl $2579, %ecx # imm = 0xA13 -; X86-BMI1NOTBMBMI2-NEXT: bextrl %ecx, {{[0-9]+}}(%esp), %ecx -; X86-BMI1NOTBMBMI2-NEXT: movl %ecx, (%eax) -; X86-BMI1NOTBMBMI2-NEXT: movl $0, 4(%eax) -; X86-BMI1NOTBMBMI2-NEXT: retl +; X86-BMINOTBM-LABEL: c5_i64: +; X86-BMINOTBM: # %bb.0: +; X86-BMINOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMINOTBM-NEXT: movl $2579, %ecx # imm = 0xA13 +; X86-BMINOTBM-NEXT: bextrl %ecx, {{[0-9]+}}(%esp), %ecx +; X86-BMINOTBM-NEXT: movl %ecx, (%eax) +; X86-BMINOTBM-NEXT: movl $0, 4(%eax) +; X86-BMINOTBM-NEXT: retl +; +; X86-BMITBM-LABEL: c5_i64: +; X86-BMITBM: # %bb.0: +; X86-BMITBM-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMITBM-NEXT: bextrl $2579, {{[0-9]+}}(%esp), %ecx # imm = 0xA13 +; X86-BMITBM-NEXT: movl %ecx, (%eax) +; X86-BMITBM-NEXT: movl $0, 4(%eax) +; X86-BMITBM-NEXT: retl ; ; X64-NOBMI-LABEL: c5_i64: ; X64-NOBMI: # %bb.0: @@ -8622,25 +8542,18 @@ define void @c5_i64(i64 %arg, i64* %ptr) nounwind { ; X64-NOBMI-NEXT: movq %rdi, (%rsi) ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: c5_i64: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: movl $2611, %eax # imm = 0xA33 -; X64-BMI1NOTBM-NEXT: bextrq %rax, %rdi, %rax -; X64-BMI1NOTBM-NEXT: movq %rax, (%rsi) -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1TBM-LABEL: c5_i64: -; X64-BMI1TBM: # %bb.0: -; X64-BMI1TBM-NEXT: bextrq $2611, %rdi, %rax # imm = 0xA33 -; X64-BMI1TBM-NEXT: movq %rax, (%rsi) -; X64-BMI1TBM-NEXT: retq -; -; X64-BMI1NOTBMBMI2-LABEL: c5_i64: -; X64-BMI1NOTBMBMI2: # %bb.0: -; X64-BMI1NOTBMBMI2-NEXT: movl $2611, %eax # imm = 0xA33 -; X64-BMI1NOTBMBMI2-NEXT: bextrq %rax, %rdi, %rax -; X64-BMI1NOTBMBMI2-NEXT: movq %rax, (%rsi) -; X64-BMI1NOTBMBMI2-NEXT: retq +; X64-BMINOTBM-LABEL: c5_i64: +; X64-BMINOTBM: # %bb.0: +; X64-BMINOTBM-NEXT: movl $2611, %eax # imm = 0xA33 +; X64-BMINOTBM-NEXT: bextrq %rax, %rdi, %rax +; X64-BMINOTBM-NEXT: movq %rax, (%rsi) +; X64-BMINOTBM-NEXT: retq +; +; X64-BMITBM-LABEL: c5_i64: +; X64-BMITBM: # %bb.0: +; X64-BMITBM-NEXT: bextrq $2611, %rdi, %rax # imm = 0xA33 +; X64-BMITBM-NEXT: movq %rax, (%rsi) +; X64-BMITBM-NEXT: retq %tmp0 = lshr i64 %arg, 51 %tmp1 = and i64 %tmp0, 1023 store i64 %tmp1, i64* %ptr @@ -8659,31 +8572,22 @@ define void @c6_i64(i64 %arg, i64* %ptr) nounwind { ; X86-NOBMI-NEXT: movl $0, 4(%eax) ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: c6_i64: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: movl $3091, %ecx # imm = 0xC13 -; X86-BMI1NOTBM-NEXT: bextrl %ecx, {{[0-9]+}}(%esp), %ecx -; X86-BMI1NOTBM-NEXT: movl %ecx, (%eax) -; X86-BMI1NOTBM-NEXT: movl $0, 4(%eax) -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1TBM-LABEL: c6_i64: -; X86-BMI1TBM: # %bb.0: -; X86-BMI1TBM-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1TBM-NEXT: bextrl $3091, {{[0-9]+}}(%esp), %ecx # imm = 0xC13 -; X86-BMI1TBM-NEXT: movl %ecx, (%eax) -; X86-BMI1TBM-NEXT: movl $0, 4(%eax) -; X86-BMI1TBM-NEXT: retl -; -; X86-BMI1NOTBMBMI2-LABEL: c6_i64: -; X86-BMI1NOTBMBMI2: # %bb.0: -; X86-BMI1NOTBMBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBMBMI2-NEXT: movl $3091, %ecx # imm = 0xC13 -; X86-BMI1NOTBMBMI2-NEXT: bextrl %ecx, {{[0-9]+}}(%esp), %ecx -; X86-BMI1NOTBMBMI2-NEXT: movl %ecx, (%eax) -; X86-BMI1NOTBMBMI2-NEXT: movl $0, 4(%eax) -; X86-BMI1NOTBMBMI2-NEXT: retl +; X86-BMINOTBM-LABEL: c6_i64: +; X86-BMINOTBM: # %bb.0: +; X86-BMINOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMINOTBM-NEXT: movl $3091, %ecx # imm = 0xC13 +; X86-BMINOTBM-NEXT: bextrl %ecx, {{[0-9]+}}(%esp), %ecx +; X86-BMINOTBM-NEXT: movl %ecx, (%eax) +; X86-BMINOTBM-NEXT: movl $0, 4(%eax) +; X86-BMINOTBM-NEXT: retl +; +; X86-BMITBM-LABEL: c6_i64: +; X86-BMITBM: # %bb.0: +; X86-BMITBM-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMITBM-NEXT: bextrl $3091, {{[0-9]+}}(%esp), %ecx # imm = 0xC13 +; X86-BMITBM-NEXT: movl %ecx, (%eax) +; X86-BMITBM-NEXT: movl $0, 4(%eax) +; X86-BMITBM-NEXT: retl ; ; X64-NOBMI-LABEL: c6_i64: ; X64-NOBMI: # %bb.0: @@ -8692,25 +8596,18 @@ define void @c6_i64(i64 %arg, i64* %ptr) nounwind { ; X64-NOBMI-NEXT: movq %rdi, (%rsi) ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: c6_i64: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: movl $3123, %eax # imm = 0xC33 -; X64-BMI1NOTBM-NEXT: bextrq %rax, %rdi, %rax -; X64-BMI1NOTBM-NEXT: movq %rax, (%rsi) -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1TBM-LABEL: c6_i64: -; X64-BMI1TBM: # %bb.0: -; X64-BMI1TBM-NEXT: bextrq $3123, %rdi, %rax # imm = 0xC33 -; X64-BMI1TBM-NEXT: movq %rax, (%rsi) -; X64-BMI1TBM-NEXT: retq -; -; X64-BMI1NOTBMBMI2-LABEL: c6_i64: -; X64-BMI1NOTBMBMI2: # %bb.0: -; X64-BMI1NOTBMBMI2-NEXT: movl $3123, %eax # imm = 0xC33 -; X64-BMI1NOTBMBMI2-NEXT: bextrq %rax, %rdi, %rax -; X64-BMI1NOTBMBMI2-NEXT: movq %rax, (%rsi) -; X64-BMI1NOTBMBMI2-NEXT: retq +; X64-BMINOTBM-LABEL: c6_i64: +; X64-BMINOTBM: # %bb.0: +; X64-BMINOTBM-NEXT: movl $3123, %eax # imm = 0xC33 +; X64-BMINOTBM-NEXT: bextrq %rax, %rdi, %rax +; X64-BMINOTBM-NEXT: movq %rax, (%rsi) +; X64-BMINOTBM-NEXT: retq +; +; X64-BMITBM-LABEL: c6_i64: +; X64-BMITBM: # %bb.0: +; X64-BMITBM-NEXT: bextrq $3123, %rdi, %rax # imm = 0xC33 +; X64-BMITBM-NEXT: movq %rax, (%rsi) +; X64-BMITBM-NEXT: retq %tmp0 = lshr i64 %arg, 51 %tmp1 = and i64 %tmp0, 4095 store i64 %tmp1, i64* %ptr diff --git a/llvm/test/CodeGen/X86/extract-lowbits.ll b/llvm/test/CodeGen/X86/extract-lowbits.ll index 4c11d55140223..177f99e7660c8 100644 --- a/llvm/test/CodeGen/X86/extract-lowbits.ll +++ b/llvm/test/CodeGen/X86/extract-lowbits.ll @@ -1,14 +1,14 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=-bmi,-tbm,-bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X86,NOBMI,X86-NOBMI -; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,-tbm,-bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X86,BMI1,X86-BMI1,BMI1NOTBM,X86-BMI1NOTBM -; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,+tbm,-bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X86,BMI1,X86-BMI1,BMI1TBM,X86-BMI1TBM -; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,+tbm,+bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X86,BMI1,X86-BMI1,BMI1BMI2,X86-BMI1BMI2,BMI1TBM,X86-BMI1TBM,BMI1TBMBMI2,X86-BMI1TBMBMI2 -; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,-tbm,+bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X86,BMI1,X86-BMI1,BMI1BMI2,X86-BMI1BMI2,BMI1NOTBMBMI2,X86-BMI1NOTBMBMI2 -; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=-bmi,-tbm,-bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X64,NOBMI,X64-NOBMI -; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,-tbm,-bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X64,BMI1,X64-BMI1,BMI1NOTBM,X64-BMI1NOTBM -; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,+tbm,-bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X64,BMI1,X64-BMI1,BMI1TBM,X64-BMI1TBM -; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,+tbm,+bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X64,BMI1,X64-BMI1,BMI1BMI2,X64-BMI1BMI2,BMI1TBM,X64-BMI1TBM,BMI1TBMBMI2,X64-BMI1TBMBMI2 -; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,-tbm,+bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X64,BMI1,X64-BMI1,BMI1BMI2,X64-BMI1BMI2,BMI1NOTBMBMI2,X64-BMI1NOTBMBMI2 +; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=-bmi,-tbm,-bmi2 < %s | FileCheck %s --check-prefixes=X86,X86-NOBMI +; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,-tbm,-bmi2 < %s | FileCheck %s --check-prefixes=X86,X86-BMI1 +; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,+tbm,-bmi2 < %s | FileCheck %s --check-prefixes=X86,X86-BMI1 +; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,+tbm,+bmi2 < %s | FileCheck %s --check-prefixes=X86,X86-BMI2 +; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,-tbm,+bmi2 < %s | FileCheck %s --check-prefixes=X86,X86-BMI2 +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=-bmi,-tbm,-bmi2 < %s | FileCheck %s --check-prefixes=X64,X64-NOBMI +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,-tbm,-bmi2 < %s | FileCheck %s --check-prefixes=X64,X64-BMI1,X64-BMI1NOTBM +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,+tbm,-bmi2 < %s | FileCheck %s --check-prefixes=X64,X64-BMI1,X64-BMI1TBM +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,+tbm,+bmi2 < %s | FileCheck %s --check-prefixes=X64,X64-BMI2,X64-BMI2TBM +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,-tbm,+bmi2 < %s | FileCheck %s --check-prefixes=X64,X64-BMI2,X64-BMI2NOTBM ; *Please* keep in sync with test/CodeGen/AArch64/extract-lowbits.ll @@ -37,18 +37,18 @@ define i32 @bzhi32_a0(i32 %val, i32 %numlowbits) nounwind { ; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bzhi32_a0: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1NOTBM-NEXT: shll $8, %eax -; X86-BMI1NOTBM-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: retl +; X86-BMI1-LABEL: bzhi32_a0: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI1-NEXT: shll $8, %eax +; X86-BMI1-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: retl ; -; X86-BMI1BMI2-LABEL: bzhi32_a0: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: retl +; X86-BMI2-LABEL: bzhi32_a0: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bzhi32_a0: ; X64-NOBMI: # %bb.0: @@ -60,16 +60,16 @@ define i32 @bzhi32_a0(i32 %val, i32 %numlowbits) nounwind { ; X64-NOBMI-NEXT: andl %edi, %eax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bzhi32_a0: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %esi -; X64-BMI1NOTBM-NEXT: bextrl %esi, %edi, %eax -; X64-BMI1NOTBM-NEXT: retq +; X64-BMI1-LABEL: bzhi32_a0: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %esi +; X64-BMI1-NEXT: bextrl %esi, %edi, %eax +; X64-BMI1-NEXT: retq ; -; X64-BMI1BMI2-LABEL: bzhi32_a0: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: bzhil %esi, %edi, %eax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI2-LABEL: bzhi32_a0: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: bzhil %esi, %edi, %eax +; X64-BMI2-NEXT: retq %onebit = shl i32 1, %numlowbits %mask = add nsw i32 %onebit, -1 %masked = and i32 %mask, %val @@ -86,18 +86,18 @@ define i32 @bzhi32_a1_indexzext(i32 %val, i8 zeroext %numlowbits) nounwind { ; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bzhi32_a1_indexzext: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1NOTBM-NEXT: shll $8, %eax -; X86-BMI1NOTBM-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: retl +; X86-BMI1-LABEL: bzhi32_a1_indexzext: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI1-NEXT: shll $8, %eax +; X86-BMI1-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: retl ; -; X86-BMI1BMI2-LABEL: bzhi32_a1_indexzext: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: retl +; X86-BMI2-LABEL: bzhi32_a1_indexzext: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bzhi32_a1_indexzext: ; X64-NOBMI: # %bb.0: @@ -109,16 +109,16 @@ define i32 @bzhi32_a1_indexzext(i32 %val, i8 zeroext %numlowbits) nounwind { ; X64-NOBMI-NEXT: andl %edi, %eax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bzhi32_a1_indexzext: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %esi -; X64-BMI1NOTBM-NEXT: bextrl %esi, %edi, %eax -; X64-BMI1NOTBM-NEXT: retq +; X64-BMI1-LABEL: bzhi32_a1_indexzext: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %esi +; X64-BMI1-NEXT: bextrl %esi, %edi, %eax +; X64-BMI1-NEXT: retq ; -; X64-BMI1BMI2-LABEL: bzhi32_a1_indexzext: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: bzhil %esi, %edi, %eax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI2-LABEL: bzhi32_a1_indexzext: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: bzhil %esi, %edi, %eax +; X64-BMI2-NEXT: retq %conv = zext i8 %numlowbits to i32 %onebit = shl i32 1, %conv %mask = add nsw i32 %onebit, -1 @@ -137,20 +137,20 @@ define i32 @bzhi32_a2_load(i32* %w, i32 %numlowbits) nounwind { ; X86-NOBMI-NEXT: andl (%edx), %eax ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bzhi32_a2_load: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: shll $8, %ecx -; X86-BMI1NOTBM-NEXT: bextrl %ecx, (%eax), %eax -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bzhi32_a2_load: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: bzhil %ecx, (%eax), %eax -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bzhi32_a2_load: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: shll $8, %ecx +; X86-BMI1-NEXT: bextrl %ecx, (%eax), %eax +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bzhi32_a2_load: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: bzhil %ecx, (%eax), %eax +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bzhi32_a2_load: ; X64-NOBMI: # %bb.0: @@ -162,16 +162,16 @@ define i32 @bzhi32_a2_load(i32* %w, i32 %numlowbits) nounwind { ; X64-NOBMI-NEXT: andl (%rdi), %eax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bzhi32_a2_load: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %esi -; X64-BMI1NOTBM-NEXT: bextrl %esi, (%rdi), %eax -; X64-BMI1NOTBM-NEXT: retq +; X64-BMI1-LABEL: bzhi32_a2_load: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %esi +; X64-BMI1-NEXT: bextrl %esi, (%rdi), %eax +; X64-BMI1-NEXT: retq ; -; X64-BMI1BMI2-LABEL: bzhi32_a2_load: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: bzhil %esi, (%rdi), %eax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI2-LABEL: bzhi32_a2_load: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: bzhil %esi, (%rdi), %eax +; X64-BMI2-NEXT: retq %val = load i32, i32* %w %onebit = shl i32 1, %numlowbits %mask = add nsw i32 %onebit, -1 @@ -190,20 +190,20 @@ define i32 @bzhi32_a3_load_indexzext(i32* %w, i8 zeroext %numlowbits) nounwind { ; X86-NOBMI-NEXT: andl (%edx), %eax ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bzhi32_a3_load_indexzext: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: shll $8, %ecx -; X86-BMI1NOTBM-NEXT: bextrl %ecx, (%eax), %eax -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bzhi32_a3_load_indexzext: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: bzhil %ecx, (%eax), %eax -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bzhi32_a3_load_indexzext: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: shll $8, %ecx +; X86-BMI1-NEXT: bextrl %ecx, (%eax), %eax +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bzhi32_a3_load_indexzext: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: bzhil %ecx, (%eax), %eax +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bzhi32_a3_load_indexzext: ; X64-NOBMI: # %bb.0: @@ -215,16 +215,16 @@ define i32 @bzhi32_a3_load_indexzext(i32* %w, i8 zeroext %numlowbits) nounwind { ; X64-NOBMI-NEXT: andl (%rdi), %eax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bzhi32_a3_load_indexzext: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %esi -; X64-BMI1NOTBM-NEXT: bextrl %esi, (%rdi), %eax -; X64-BMI1NOTBM-NEXT: retq +; X64-BMI1-LABEL: bzhi32_a3_load_indexzext: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %esi +; X64-BMI1-NEXT: bextrl %esi, (%rdi), %eax +; X64-BMI1-NEXT: retq ; -; X64-BMI1BMI2-LABEL: bzhi32_a3_load_indexzext: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: bzhil %esi, (%rdi), %eax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI2-LABEL: bzhi32_a3_load_indexzext: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: bzhil %esi, (%rdi), %eax +; X64-BMI2-NEXT: retq %val = load i32, i32* %w %conv = zext i8 %numlowbits to i32 %onebit = shl i32 1, %conv @@ -243,18 +243,18 @@ define i32 @bzhi32_a4_commutative(i32 %val, i32 %numlowbits) nounwind { ; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bzhi32_a4_commutative: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1NOTBM-NEXT: shll $8, %eax -; X86-BMI1NOTBM-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: retl +; X86-BMI1-LABEL: bzhi32_a4_commutative: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI1-NEXT: shll $8, %eax +; X86-BMI1-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: retl ; -; X86-BMI1BMI2-LABEL: bzhi32_a4_commutative: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: retl +; X86-BMI2-LABEL: bzhi32_a4_commutative: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bzhi32_a4_commutative: ; X64-NOBMI: # %bb.0: @@ -266,16 +266,16 @@ define i32 @bzhi32_a4_commutative(i32 %val, i32 %numlowbits) nounwind { ; X64-NOBMI-NEXT: andl %edi, %eax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bzhi32_a4_commutative: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %esi -; X64-BMI1NOTBM-NEXT: bextrl %esi, %edi, %eax -; X64-BMI1NOTBM-NEXT: retq +; X64-BMI1-LABEL: bzhi32_a4_commutative: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %esi +; X64-BMI1-NEXT: bextrl %esi, %edi, %eax +; X64-BMI1-NEXT: retq ; -; X64-BMI1BMI2-LABEL: bzhi32_a4_commutative: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: bzhil %esi, %edi, %eax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI2-LABEL: bzhi32_a4_commutative: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: bzhil %esi, %edi, %eax +; X64-BMI2-NEXT: retq %onebit = shl i32 1, %numlowbits %mask = add nsw i32 %onebit, -1 %masked = and i32 %val, %mask ; swapped order @@ -304,43 +304,43 @@ define i64 @bzhi64_a0(i64 %val, i64 %numlowbits) nounwind { ; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bzhi64_a0: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl $1, %eax -; X86-BMI1NOTBM-NEXT: xorl %edx, %edx -; X86-BMI1NOTBM-NEXT: shldl %cl, %eax, %edx -; X86-BMI1NOTBM-NEXT: shll %cl, %eax -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: je .LBB5_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %eax, %edx -; X86-BMI1NOTBM-NEXT: xorl %eax, %eax -; X86-BMI1NOTBM-NEXT: .LBB5_2: -; X86-BMI1NOTBM-NEXT: addl $-1, %eax -; X86-BMI1NOTBM-NEXT: adcl $-1, %edx -; X86-BMI1NOTBM-NEXT: andl {{[0-9]+}}(%esp), %edx -; X86-BMI1NOTBM-NEXT: andl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bzhi64_a0: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: movl $1, %eax -; X86-BMI1BMI2-NEXT: xorl %edx, %edx -; X86-BMI1BMI2-NEXT: shldl %cl, %eax, %edx -; X86-BMI1BMI2-NEXT: shlxl %ecx, %eax, %eax -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: je .LBB5_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: movl %eax, %edx -; X86-BMI1BMI2-NEXT: xorl %eax, %eax -; X86-BMI1BMI2-NEXT: .LBB5_2: -; X86-BMI1BMI2-NEXT: addl $-1, %eax -; X86-BMI1BMI2-NEXT: adcl $-1, %edx -; X86-BMI1BMI2-NEXT: andl {{[0-9]+}}(%esp), %edx -; X86-BMI1BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bzhi64_a0: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl $1, %eax +; X86-BMI1-NEXT: xorl %edx, %edx +; X86-BMI1-NEXT: shldl %cl, %eax, %edx +; X86-BMI1-NEXT: shll %cl, %eax +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: je .LBB5_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: movl %eax, %edx +; X86-BMI1-NEXT: xorl %eax, %eax +; X86-BMI1-NEXT: .LBB5_2: +; X86-BMI1-NEXT: addl $-1, %eax +; X86-BMI1-NEXT: adcl $-1, %edx +; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %edx +; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bzhi64_a0: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: shldl %cl, %eax, %edx +; X86-BMI2-NEXT: shlxl %ecx, %eax, %eax +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB5_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %eax, %edx +; X86-BMI2-NEXT: xorl %eax, %eax +; X86-BMI2-NEXT: .LBB5_2: +; X86-BMI2-NEXT: addl $-1, %eax +; X86-BMI2-NEXT: adcl $-1, %edx +; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bzhi64_a0: ; X64-NOBMI: # %bb.0: @@ -352,16 +352,16 @@ define i64 @bzhi64_a0(i64 %val, i64 %numlowbits) nounwind { ; X64-NOBMI-NEXT: andq %rdi, %rax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bzhi64_a0: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %esi -; X64-BMI1NOTBM-NEXT: bextrq %rsi, %rdi, %rax -; X64-BMI1NOTBM-NEXT: retq +; X64-BMI1-LABEL: bzhi64_a0: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %esi +; X64-BMI1-NEXT: bextrq %rsi, %rdi, %rax +; X64-BMI1-NEXT: retq ; -; X64-BMI1BMI2-LABEL: bzhi64_a0: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: bzhiq %rsi, %rdi, %rax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI2-LABEL: bzhi64_a0: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: bzhiq %rsi, %rdi, %rax +; X64-BMI2-NEXT: retq %onebit = shl i64 1, %numlowbits %mask = add nsw i64 %onebit, -1 %masked = and i64 %mask, %val @@ -388,43 +388,43 @@ define i64 @bzhi64_a1_indexzext(i64 %val, i8 zeroext %numlowbits) nounwind { ; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bzhi64_a1_indexzext: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl $1, %eax -; X86-BMI1NOTBM-NEXT: xorl %edx, %edx -; X86-BMI1NOTBM-NEXT: shldl %cl, %eax, %edx -; X86-BMI1NOTBM-NEXT: shll %cl, %eax -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: je .LBB6_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %eax, %edx -; X86-BMI1NOTBM-NEXT: xorl %eax, %eax -; X86-BMI1NOTBM-NEXT: .LBB6_2: -; X86-BMI1NOTBM-NEXT: addl $-1, %eax -; X86-BMI1NOTBM-NEXT: adcl $-1, %edx -; X86-BMI1NOTBM-NEXT: andl {{[0-9]+}}(%esp), %edx -; X86-BMI1NOTBM-NEXT: andl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bzhi64_a1_indexzext: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: movl $1, %eax -; X86-BMI1BMI2-NEXT: xorl %edx, %edx -; X86-BMI1BMI2-NEXT: shldl %cl, %eax, %edx -; X86-BMI1BMI2-NEXT: shlxl %ecx, %eax, %eax -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: je .LBB6_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: movl %eax, %edx -; X86-BMI1BMI2-NEXT: xorl %eax, %eax -; X86-BMI1BMI2-NEXT: .LBB6_2: -; X86-BMI1BMI2-NEXT: addl $-1, %eax -; X86-BMI1BMI2-NEXT: adcl $-1, %edx -; X86-BMI1BMI2-NEXT: andl {{[0-9]+}}(%esp), %edx -; X86-BMI1BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bzhi64_a1_indexzext: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl $1, %eax +; X86-BMI1-NEXT: xorl %edx, %edx +; X86-BMI1-NEXT: shldl %cl, %eax, %edx +; X86-BMI1-NEXT: shll %cl, %eax +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: je .LBB6_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: movl %eax, %edx +; X86-BMI1-NEXT: xorl %eax, %eax +; X86-BMI1-NEXT: .LBB6_2: +; X86-BMI1-NEXT: addl $-1, %eax +; X86-BMI1-NEXT: adcl $-1, %edx +; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %edx +; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bzhi64_a1_indexzext: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: shldl %cl, %eax, %edx +; X86-BMI2-NEXT: shlxl %ecx, %eax, %eax +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB6_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %eax, %edx +; X86-BMI2-NEXT: xorl %eax, %eax +; X86-BMI2-NEXT: .LBB6_2: +; X86-BMI2-NEXT: addl $-1, %eax +; X86-BMI2-NEXT: adcl $-1, %edx +; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bzhi64_a1_indexzext: ; X64-NOBMI: # %bb.0: @@ -436,18 +436,18 @@ define i64 @bzhi64_a1_indexzext(i64 %val, i8 zeroext %numlowbits) nounwind { ; X64-NOBMI-NEXT: andq %rdi, %rax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bzhi64_a1_indexzext: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: # kill: def $esi killed $esi def $rsi -; X64-BMI1NOTBM-NEXT: shll $8, %esi -; X64-BMI1NOTBM-NEXT: bextrq %rsi, %rdi, %rax -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bzhi64_a1_indexzext: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: # kill: def $esi killed $esi def $rsi -; X64-BMI1BMI2-NEXT: bzhiq %rsi, %rdi, %rax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bzhi64_a1_indexzext: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: # kill: def $esi killed $esi def $rsi +; X64-BMI1-NEXT: shll $8, %esi +; X64-BMI1-NEXT: bextrq %rsi, %rdi, %rax +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bzhi64_a1_indexzext: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: # kill: def $esi killed $esi def $rsi +; X64-BMI2-NEXT: bzhiq %rsi, %rdi, %rax +; X64-BMI2-NEXT: retq %conv = zext i8 %numlowbits to i64 %onebit = shl i64 1, %conv %mask = add nsw i64 %onebit, -1 @@ -478,49 +478,49 @@ define i64 @bzhi64_a2_load(i64* %w, i64 %numlowbits) nounwind { ; X86-NOBMI-NEXT: popl %esi ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bzhi64_a2_load: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl $1, %eax -; X86-BMI1NOTBM-NEXT: xorl %edx, %edx -; X86-BMI1NOTBM-NEXT: shldl %cl, %eax, %edx -; X86-BMI1NOTBM-NEXT: shll %cl, %eax -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: je .LBB7_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %eax, %edx -; X86-BMI1NOTBM-NEXT: xorl %eax, %eax -; X86-BMI1NOTBM-NEXT: .LBB7_2: -; X86-BMI1NOTBM-NEXT: addl $-1, %eax -; X86-BMI1NOTBM-NEXT: adcl $-1, %edx -; X86-BMI1NOTBM-NEXT: andl 4(%esi), %edx -; X86-BMI1NOTBM-NEXT: andl (%esi), %eax -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bzhi64_a2_load: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: movl $1, %eax -; X86-BMI1BMI2-NEXT: xorl %edx, %edx -; X86-BMI1BMI2-NEXT: shldl %cl, %eax, %edx -; X86-BMI1BMI2-NEXT: shlxl %ecx, %eax, %eax -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: je .LBB7_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: movl %eax, %edx -; X86-BMI1BMI2-NEXT: xorl %eax, %eax -; X86-BMI1BMI2-NEXT: .LBB7_2: -; X86-BMI1BMI2-NEXT: addl $-1, %eax -; X86-BMI1BMI2-NEXT: adcl $-1, %edx -; X86-BMI1BMI2-NEXT: andl 4(%esi), %edx -; X86-BMI1BMI2-NEXT: andl (%esi), %eax -; X86-BMI1BMI2-NEXT: popl %esi -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bzhi64_a2_load: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl $1, %eax +; X86-BMI1-NEXT: xorl %edx, %edx +; X86-BMI1-NEXT: shldl %cl, %eax, %edx +; X86-BMI1-NEXT: shll %cl, %eax +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: je .LBB7_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: movl %eax, %edx +; X86-BMI1-NEXT: xorl %eax, %eax +; X86-BMI1-NEXT: .LBB7_2: +; X86-BMI1-NEXT: addl $-1, %eax +; X86-BMI1-NEXT: adcl $-1, %edx +; X86-BMI1-NEXT: andl 4(%esi), %edx +; X86-BMI1-NEXT: andl (%esi), %eax +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bzhi64_a2_load: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: shldl %cl, %eax, %edx +; X86-BMI2-NEXT: shlxl %ecx, %eax, %eax +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB7_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %eax, %edx +; X86-BMI2-NEXT: xorl %eax, %eax +; X86-BMI2-NEXT: .LBB7_2: +; X86-BMI2-NEXT: addl $-1, %eax +; X86-BMI2-NEXT: adcl $-1, %edx +; X86-BMI2-NEXT: andl 4(%esi), %edx +; X86-BMI2-NEXT: andl (%esi), %eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bzhi64_a2_load: ; X64-NOBMI: # %bb.0: @@ -532,16 +532,16 @@ define i64 @bzhi64_a2_load(i64* %w, i64 %numlowbits) nounwind { ; X64-NOBMI-NEXT: andq (%rdi), %rax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bzhi64_a2_load: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %esi -; X64-BMI1NOTBM-NEXT: bextrq %rsi, (%rdi), %rax -; X64-BMI1NOTBM-NEXT: retq +; X64-BMI1-LABEL: bzhi64_a2_load: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %esi +; X64-BMI1-NEXT: bextrq %rsi, (%rdi), %rax +; X64-BMI1-NEXT: retq ; -; X64-BMI1BMI2-LABEL: bzhi64_a2_load: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: bzhiq %rsi, (%rdi), %rax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI2-LABEL: bzhi64_a2_load: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: bzhiq %rsi, (%rdi), %rax +; X64-BMI2-NEXT: retq %val = load i64, i64* %w %onebit = shl i64 1, %numlowbits %mask = add nsw i64 %onebit, -1 @@ -572,49 +572,49 @@ define i64 @bzhi64_a3_load_indexzext(i64* %w, i8 zeroext %numlowbits) nounwind { ; X86-NOBMI-NEXT: popl %esi ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bzhi64_a3_load_indexzext: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl $1, %eax -; X86-BMI1NOTBM-NEXT: xorl %edx, %edx -; X86-BMI1NOTBM-NEXT: shldl %cl, %eax, %edx -; X86-BMI1NOTBM-NEXT: shll %cl, %eax -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: je .LBB8_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %eax, %edx -; X86-BMI1NOTBM-NEXT: xorl %eax, %eax -; X86-BMI1NOTBM-NEXT: .LBB8_2: -; X86-BMI1NOTBM-NEXT: addl $-1, %eax -; X86-BMI1NOTBM-NEXT: adcl $-1, %edx -; X86-BMI1NOTBM-NEXT: andl 4(%esi), %edx -; X86-BMI1NOTBM-NEXT: andl (%esi), %eax -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bzhi64_a3_load_indexzext: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: movl $1, %eax -; X86-BMI1BMI2-NEXT: xorl %edx, %edx -; X86-BMI1BMI2-NEXT: shldl %cl, %eax, %edx -; X86-BMI1BMI2-NEXT: shlxl %ecx, %eax, %eax -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: je .LBB8_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: movl %eax, %edx -; X86-BMI1BMI2-NEXT: xorl %eax, %eax -; X86-BMI1BMI2-NEXT: .LBB8_2: -; X86-BMI1BMI2-NEXT: addl $-1, %eax -; X86-BMI1BMI2-NEXT: adcl $-1, %edx -; X86-BMI1BMI2-NEXT: andl 4(%esi), %edx -; X86-BMI1BMI2-NEXT: andl (%esi), %eax -; X86-BMI1BMI2-NEXT: popl %esi -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bzhi64_a3_load_indexzext: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl $1, %eax +; X86-BMI1-NEXT: xorl %edx, %edx +; X86-BMI1-NEXT: shldl %cl, %eax, %edx +; X86-BMI1-NEXT: shll %cl, %eax +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: je .LBB8_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: movl %eax, %edx +; X86-BMI1-NEXT: xorl %eax, %eax +; X86-BMI1-NEXT: .LBB8_2: +; X86-BMI1-NEXT: addl $-1, %eax +; X86-BMI1-NEXT: adcl $-1, %edx +; X86-BMI1-NEXT: andl 4(%esi), %edx +; X86-BMI1-NEXT: andl (%esi), %eax +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bzhi64_a3_load_indexzext: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: shldl %cl, %eax, %edx +; X86-BMI2-NEXT: shlxl %ecx, %eax, %eax +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB8_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %eax, %edx +; X86-BMI2-NEXT: xorl %eax, %eax +; X86-BMI2-NEXT: .LBB8_2: +; X86-BMI2-NEXT: addl $-1, %eax +; X86-BMI2-NEXT: adcl $-1, %edx +; X86-BMI2-NEXT: andl 4(%esi), %edx +; X86-BMI2-NEXT: andl (%esi), %eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bzhi64_a3_load_indexzext: ; X64-NOBMI: # %bb.0: @@ -626,18 +626,18 @@ define i64 @bzhi64_a3_load_indexzext(i64* %w, i8 zeroext %numlowbits) nounwind { ; X64-NOBMI-NEXT: andq (%rdi), %rax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bzhi64_a3_load_indexzext: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: # kill: def $esi killed $esi def $rsi -; X64-BMI1NOTBM-NEXT: shll $8, %esi -; X64-BMI1NOTBM-NEXT: bextrq %rsi, (%rdi), %rax -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bzhi64_a3_load_indexzext: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: # kill: def $esi killed $esi def $rsi -; X64-BMI1BMI2-NEXT: bzhiq %rsi, (%rdi), %rax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bzhi64_a3_load_indexzext: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: # kill: def $esi killed $esi def $rsi +; X64-BMI1-NEXT: shll $8, %esi +; X64-BMI1-NEXT: bextrq %rsi, (%rdi), %rax +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bzhi64_a3_load_indexzext: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: # kill: def $esi killed $esi def $rsi +; X64-BMI2-NEXT: bzhiq %rsi, (%rdi), %rax +; X64-BMI2-NEXT: retq %val = load i64, i64* %w %conv = zext i8 %numlowbits to i64 %onebit = shl i64 1, %conv @@ -666,43 +666,43 @@ define i64 @bzhi64_a4_commutative(i64 %val, i64 %numlowbits) nounwind { ; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bzhi64_a4_commutative: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl $1, %eax -; X86-BMI1NOTBM-NEXT: xorl %edx, %edx -; X86-BMI1NOTBM-NEXT: shldl %cl, %eax, %edx -; X86-BMI1NOTBM-NEXT: shll %cl, %eax -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: je .LBB9_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %eax, %edx -; X86-BMI1NOTBM-NEXT: xorl %eax, %eax -; X86-BMI1NOTBM-NEXT: .LBB9_2: -; X86-BMI1NOTBM-NEXT: addl $-1, %eax -; X86-BMI1NOTBM-NEXT: adcl $-1, %edx -; X86-BMI1NOTBM-NEXT: andl {{[0-9]+}}(%esp), %edx -; X86-BMI1NOTBM-NEXT: andl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bzhi64_a4_commutative: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: movl $1, %eax -; X86-BMI1BMI2-NEXT: xorl %edx, %edx -; X86-BMI1BMI2-NEXT: shldl %cl, %eax, %edx -; X86-BMI1BMI2-NEXT: shlxl %ecx, %eax, %eax -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: je .LBB9_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: movl %eax, %edx -; X86-BMI1BMI2-NEXT: xorl %eax, %eax -; X86-BMI1BMI2-NEXT: .LBB9_2: -; X86-BMI1BMI2-NEXT: addl $-1, %eax -; X86-BMI1BMI2-NEXT: adcl $-1, %edx -; X86-BMI1BMI2-NEXT: andl {{[0-9]+}}(%esp), %edx -; X86-BMI1BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bzhi64_a4_commutative: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl $1, %eax +; X86-BMI1-NEXT: xorl %edx, %edx +; X86-BMI1-NEXT: shldl %cl, %eax, %edx +; X86-BMI1-NEXT: shll %cl, %eax +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: je .LBB9_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: movl %eax, %edx +; X86-BMI1-NEXT: xorl %eax, %eax +; X86-BMI1-NEXT: .LBB9_2: +; X86-BMI1-NEXT: addl $-1, %eax +; X86-BMI1-NEXT: adcl $-1, %edx +; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %edx +; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bzhi64_a4_commutative: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: shldl %cl, %eax, %edx +; X86-BMI2-NEXT: shlxl %ecx, %eax, %eax +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB9_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %eax, %edx +; X86-BMI2-NEXT: xorl %eax, %eax +; X86-BMI2-NEXT: .LBB9_2: +; X86-BMI2-NEXT: addl $-1, %eax +; X86-BMI2-NEXT: adcl $-1, %edx +; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bzhi64_a4_commutative: ; X64-NOBMI: # %bb.0: @@ -714,16 +714,16 @@ define i64 @bzhi64_a4_commutative(i64 %val, i64 %numlowbits) nounwind { ; X64-NOBMI-NEXT: andq %rdi, %rax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bzhi64_a4_commutative: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %esi -; X64-BMI1NOTBM-NEXT: bextrq %rsi, %rdi, %rax -; X64-BMI1NOTBM-NEXT: retq +; X64-BMI1-LABEL: bzhi64_a4_commutative: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %esi +; X64-BMI1-NEXT: bextrq %rsi, %rdi, %rax +; X64-BMI1-NEXT: retq ; -; X64-BMI1BMI2-LABEL: bzhi64_a4_commutative: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: bzhiq %rsi, %rdi, %rax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI2-LABEL: bzhi64_a4_commutative: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: bzhiq %rsi, %rdi, %rax +; X64-BMI2-NEXT: retq %onebit = shl i64 1, %numlowbits %mask = add nsw i64 %onebit, -1 %masked = and i64 %val, %mask ; swapped order @@ -749,34 +749,34 @@ define i32 @bzhi64_32_a0(i64 %val, i64 %numlowbits) nounwind { ; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bzhi64_32_a0: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl $1, %edx -; X86-BMI1NOTBM-NEXT: shll %cl, %edx -; X86-BMI1NOTBM-NEXT: xorl %eax, %eax -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: jne .LBB10_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %edx, %eax -; X86-BMI1NOTBM-NEXT: .LBB10_2: -; X86-BMI1NOTBM-NEXT: decl %eax -; X86-BMI1NOTBM-NEXT: andl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bzhi64_32_a0: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: xorl %eax, %eax -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: jne .LBB10_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: movl $1, %eax -; X86-BMI1BMI2-NEXT: shlxl %ecx, %eax, %eax -; X86-BMI1BMI2-NEXT: .LBB10_2: -; X86-BMI1BMI2-NEXT: decl %eax -; X86-BMI1BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bzhi64_32_a0: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl $1, %edx +; X86-BMI1-NEXT: shll %cl, %edx +; X86-BMI1-NEXT: xorl %eax, %eax +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: jne .LBB10_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: movl %edx, %eax +; X86-BMI1-NEXT: .LBB10_2: +; X86-BMI1-NEXT: decl %eax +; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bzhi64_32_a0: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: xorl %eax, %eax +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: jne .LBB10_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: shlxl %ecx, %eax, %eax +; X86-BMI2-NEXT: .LBB10_2: +; X86-BMI2-NEXT: decl %eax +; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bzhi64_32_a0: ; X64-NOBMI: # %bb.0: @@ -789,16 +789,16 @@ define i32 @bzhi64_32_a0(i64 %val, i64 %numlowbits) nounwind { ; X64-NOBMI-NEXT: # kill: def $eax killed $eax killed $rax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bzhi64_32_a0: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %esi -; X64-BMI1NOTBM-NEXT: bextrl %esi, %edi, %eax -; X64-BMI1NOTBM-NEXT: retq +; X64-BMI1-LABEL: bzhi64_32_a0: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %esi +; X64-BMI1-NEXT: bextrl %esi, %edi, %eax +; X64-BMI1-NEXT: retq ; -; X64-BMI1BMI2-LABEL: bzhi64_32_a0: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: bzhil %esi, %edi, %eax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI2-LABEL: bzhi64_32_a0: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: bzhil %esi, %edi, %eax +; X64-BMI2-NEXT: retq %onebit = shl i64 1, %numlowbits %mask = add nsw i64 %onebit, -1 %masked = and i64 %mask, %val @@ -817,18 +817,18 @@ define i32 @bzhi64_32_a1(i64 %val, i32 %numlowbits) nounwind { ; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bzhi64_32_a1: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1NOTBM-NEXT: shll $8, %eax -; X86-BMI1NOTBM-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: retl +; X86-BMI1-LABEL: bzhi64_32_a1: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI1-NEXT: shll $8, %eax +; X86-BMI1-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: retl ; -; X86-BMI1BMI2-LABEL: bzhi64_32_a1: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: retl +; X86-BMI2-LABEL: bzhi64_32_a1: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bzhi64_32_a1: ; X64-NOBMI: # %bb.0: @@ -840,16 +840,16 @@ define i32 @bzhi64_32_a1(i64 %val, i32 %numlowbits) nounwind { ; X64-NOBMI-NEXT: andl %edi, %eax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bzhi64_32_a1: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %esi -; X64-BMI1NOTBM-NEXT: bextrl %esi, %edi, %eax -; X64-BMI1NOTBM-NEXT: retq +; X64-BMI1-LABEL: bzhi64_32_a1: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %esi +; X64-BMI1-NEXT: bextrl %esi, %edi, %eax +; X64-BMI1-NEXT: retq ; -; X64-BMI1BMI2-LABEL: bzhi64_32_a1: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: bzhil %esi, %edi, %eax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI2-LABEL: bzhi64_32_a1: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: bzhil %esi, %edi, %eax +; X64-BMI2-NEXT: retq %truncval = trunc i64 %val to i32 %onebit = shl i32 1, %numlowbits %mask = add nsw i32 %onebit, -1 @@ -879,36 +879,36 @@ define i32 @bzhi64_32_a1_trunc_extrause(i64 %val, i32 %numlowbits) nounwind { ; X86-NOBMI-NEXT: popl %ebx ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bzhi64_32_a1_trunc_extrause: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %ebx -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: pushl %eax -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %bl -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-BMI1NOTBM-NEXT: movl %esi, (%esp) -; X86-BMI1NOTBM-NEXT: calll use32@PLT -; X86-BMI1NOTBM-NEXT: shll $8, %ebx -; X86-BMI1NOTBM-NEXT: bextrl %ebx, %esi, %eax -; X86-BMI1NOTBM-NEXT: addl $4, %esp -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: popl %ebx -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bzhi64_32_a1_trunc_extrause: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %ebx -; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: pushl %eax -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-BMI1BMI2-NEXT: movl %esi, (%esp) -; X86-BMI1BMI2-NEXT: calll use32@PLT -; X86-BMI1BMI2-NEXT: bzhil %ebx, %esi, %eax -; X86-BMI1BMI2-NEXT: addl $4, %esp -; X86-BMI1BMI2-NEXT: popl %esi -; X86-BMI1BMI2-NEXT: popl %ebx -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bzhi64_32_a1_trunc_extrause: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %ebx +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: pushl %eax +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %bl +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI1-NEXT: movl %esi, (%esp) +; X86-BMI1-NEXT: calll use32@PLT +; X86-BMI1-NEXT: shll $8, %ebx +; X86-BMI1-NEXT: bextrl %ebx, %esi, %eax +; X86-BMI1-NEXT: addl $4, %esp +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: popl %ebx +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bzhi64_32_a1_trunc_extrause: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: pushl %eax +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl %esi, (%esp) +; X86-BMI2-NEXT: calll use32@PLT +; X86-BMI2-NEXT: bzhil %ebx, %esi, %eax +; X86-BMI2-NEXT: addl $4, %esp +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bzhi64_32_a1_trunc_extrause: ; X64-NOBMI: # %bb.0: @@ -928,34 +928,34 @@ define i32 @bzhi64_32_a1_trunc_extrause(i64 %val, i32 %numlowbits) nounwind { ; X64-NOBMI-NEXT: popq %rbp ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bzhi64_32_a1_trunc_extrause: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: pushq %r14 -; X64-BMI1NOTBM-NEXT: pushq %rbx -; X64-BMI1NOTBM-NEXT: pushq %rax -; X64-BMI1NOTBM-NEXT: movl %esi, %ebx -; X64-BMI1NOTBM-NEXT: movq %rdi, %r14 -; X64-BMI1NOTBM-NEXT: callq use32@PLT -; X64-BMI1NOTBM-NEXT: shll $8, %ebx -; X64-BMI1NOTBM-NEXT: bextrl %ebx, %r14d, %eax -; X64-BMI1NOTBM-NEXT: addq $8, %rsp -; X64-BMI1NOTBM-NEXT: popq %rbx -; X64-BMI1NOTBM-NEXT: popq %r14 -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bzhi64_32_a1_trunc_extrause: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: pushq %rbp -; X64-BMI1BMI2-NEXT: pushq %rbx -; X64-BMI1BMI2-NEXT: pushq %rax -; X64-BMI1BMI2-NEXT: movl %esi, %ebp -; X64-BMI1BMI2-NEXT: movq %rdi, %rbx -; X64-BMI1BMI2-NEXT: callq use32@PLT -; X64-BMI1BMI2-NEXT: bzhil %ebp, %ebx, %eax -; X64-BMI1BMI2-NEXT: addq $8, %rsp -; X64-BMI1BMI2-NEXT: popq %rbx -; X64-BMI1BMI2-NEXT: popq %rbp -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bzhi64_32_a1_trunc_extrause: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: pushq %r14 +; X64-BMI1-NEXT: pushq %rbx +; X64-BMI1-NEXT: pushq %rax +; X64-BMI1-NEXT: movl %esi, %ebx +; X64-BMI1-NEXT: movq %rdi, %r14 +; X64-BMI1-NEXT: callq use32@PLT +; X64-BMI1-NEXT: shll $8, %ebx +; X64-BMI1-NEXT: bextrl %ebx, %r14d, %eax +; X64-BMI1-NEXT: addq $8, %rsp +; X64-BMI1-NEXT: popq %rbx +; X64-BMI1-NEXT: popq %r14 +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bzhi64_32_a1_trunc_extrause: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: pushq %rbp +; X64-BMI2-NEXT: pushq %rbx +; X64-BMI2-NEXT: pushq %rax +; X64-BMI2-NEXT: movl %esi, %ebp +; X64-BMI2-NEXT: movq %rdi, %rbx +; X64-BMI2-NEXT: callq use32@PLT +; X64-BMI2-NEXT: bzhil %ebp, %ebx, %eax +; X64-BMI2-NEXT: addq $8, %rsp +; X64-BMI2-NEXT: popq %rbx +; X64-BMI2-NEXT: popq %rbp +; X64-BMI2-NEXT: retq %truncval = trunc i64 %val to i32 call void @use32(i32 %truncval) %onebit = shl i32 1, %numlowbits @@ -976,18 +976,18 @@ define i32 @bzhi64_32_a2(i64 %val, i32 %numlowbits) nounwind { ; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bzhi64_32_a2: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1NOTBM-NEXT: shll $8, %eax -; X86-BMI1NOTBM-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: retl +; X86-BMI1-LABEL: bzhi64_32_a2: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI1-NEXT: shll $8, %eax +; X86-BMI1-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: retl ; -; X86-BMI1BMI2-LABEL: bzhi64_32_a2: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: retl +; X86-BMI2-LABEL: bzhi64_32_a2: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bzhi64_32_a2: ; X64-NOBMI: # %bb.0: @@ -999,16 +999,16 @@ define i32 @bzhi64_32_a2(i64 %val, i32 %numlowbits) nounwind { ; X64-NOBMI-NEXT: andl %edi, %eax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bzhi64_32_a2: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %esi -; X64-BMI1NOTBM-NEXT: bextrl %esi, %edi, %eax -; X64-BMI1NOTBM-NEXT: retq +; X64-BMI1-LABEL: bzhi64_32_a2: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %esi +; X64-BMI1-NEXT: bextrl %esi, %edi, %eax +; X64-BMI1-NEXT: retq ; -; X64-BMI1BMI2-LABEL: bzhi64_32_a2: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: bzhil %esi, %edi, %eax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI2-LABEL: bzhi64_32_a2: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: bzhil %esi, %edi, %eax +; X64-BMI2-NEXT: retq %onebit = shl i32 1, %numlowbits %mask = add nsw i32 %onebit, -1 %zextmask = zext i32 %mask to i64 @@ -1035,34 +1035,34 @@ define i32 @bzhi64_32_a3(i64 %val, i64 %numlowbits) nounwind { ; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bzhi64_32_a3: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl $1, %edx -; X86-BMI1NOTBM-NEXT: shll %cl, %edx -; X86-BMI1NOTBM-NEXT: xorl %eax, %eax -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: jne .LBB14_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %edx, %eax -; X86-BMI1NOTBM-NEXT: .LBB14_2: -; X86-BMI1NOTBM-NEXT: decl %eax -; X86-BMI1NOTBM-NEXT: andl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bzhi64_32_a3: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: xorl %eax, %eax -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: jne .LBB14_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: movl $1, %eax -; X86-BMI1BMI2-NEXT: shlxl %ecx, %eax, %eax -; X86-BMI1BMI2-NEXT: .LBB14_2: -; X86-BMI1BMI2-NEXT: decl %eax -; X86-BMI1BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bzhi64_32_a3: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl $1, %edx +; X86-BMI1-NEXT: shll %cl, %edx +; X86-BMI1-NEXT: xorl %eax, %eax +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: jne .LBB14_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: movl %edx, %eax +; X86-BMI1-NEXT: .LBB14_2: +; X86-BMI1-NEXT: decl %eax +; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bzhi64_32_a3: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: xorl %eax, %eax +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: jne .LBB14_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: shlxl %ecx, %eax, %eax +; X86-BMI2-NEXT: .LBB14_2: +; X86-BMI2-NEXT: decl %eax +; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bzhi64_32_a3: ; X64-NOBMI: # %bb.0: @@ -1075,16 +1075,16 @@ define i32 @bzhi64_32_a3(i64 %val, i64 %numlowbits) nounwind { ; X64-NOBMI-NEXT: # kill: def $eax killed $eax killed $rax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bzhi64_32_a3: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %esi -; X64-BMI1NOTBM-NEXT: bextrl %esi, %edi, %eax -; X64-BMI1NOTBM-NEXT: retq +; X64-BMI1-LABEL: bzhi64_32_a3: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %esi +; X64-BMI1-NEXT: bextrl %esi, %edi, %eax +; X64-BMI1-NEXT: retq ; -; X64-BMI1BMI2-LABEL: bzhi64_32_a3: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: bzhil %esi, %edi, %eax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI2-LABEL: bzhi64_32_a3: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: bzhil %esi, %edi, %eax +; X64-BMI2-NEXT: retq %onebit = shl i64 1, %numlowbits %mask = add nsw i64 %onebit, 4294967295 %masked = and i64 %mask, %val @@ -1106,18 +1106,18 @@ define i32 @bzhi32_b0(i32 %val, i32 %numlowbits) nounwind { ; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bzhi32_b0: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1NOTBM-NEXT: shll $8, %eax -; X86-BMI1NOTBM-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: retl +; X86-BMI1-LABEL: bzhi32_b0: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI1-NEXT: shll $8, %eax +; X86-BMI1-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: retl ; -; X86-BMI1BMI2-LABEL: bzhi32_b0: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: retl +; X86-BMI2-LABEL: bzhi32_b0: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bzhi32_b0: ; X64-NOBMI: # %bb.0: @@ -1129,16 +1129,16 @@ define i32 @bzhi32_b0(i32 %val, i32 %numlowbits) nounwind { ; X64-NOBMI-NEXT: andl %edi, %eax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bzhi32_b0: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %esi -; X64-BMI1NOTBM-NEXT: bextrl %esi, %edi, %eax -; X64-BMI1NOTBM-NEXT: retq +; X64-BMI1-LABEL: bzhi32_b0: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %esi +; X64-BMI1-NEXT: bextrl %esi, %edi, %eax +; X64-BMI1-NEXT: retq ; -; X64-BMI1BMI2-LABEL: bzhi32_b0: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: bzhil %esi, %edi, %eax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI2-LABEL: bzhi32_b0: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: bzhil %esi, %edi, %eax +; X64-BMI2-NEXT: retq %notmask = shl i32 -1, %numlowbits %mask = xor i32 %notmask, -1 %masked = and i32 %mask, %val @@ -1155,18 +1155,18 @@ define i32 @bzhi32_b1_indexzext(i32 %val, i8 zeroext %numlowbits) nounwind { ; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bzhi32_b1_indexzext: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1NOTBM-NEXT: shll $8, %eax -; X86-BMI1NOTBM-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: retl +; X86-BMI1-LABEL: bzhi32_b1_indexzext: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI1-NEXT: shll $8, %eax +; X86-BMI1-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: retl ; -; X86-BMI1BMI2-LABEL: bzhi32_b1_indexzext: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: retl +; X86-BMI2-LABEL: bzhi32_b1_indexzext: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bzhi32_b1_indexzext: ; X64-NOBMI: # %bb.0: @@ -1178,16 +1178,16 @@ define i32 @bzhi32_b1_indexzext(i32 %val, i8 zeroext %numlowbits) nounwind { ; X64-NOBMI-NEXT: andl %edi, %eax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bzhi32_b1_indexzext: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %esi -; X64-BMI1NOTBM-NEXT: bextrl %esi, %edi, %eax -; X64-BMI1NOTBM-NEXT: retq +; X64-BMI1-LABEL: bzhi32_b1_indexzext: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %esi +; X64-BMI1-NEXT: bextrl %esi, %edi, %eax +; X64-BMI1-NEXT: retq ; -; X64-BMI1BMI2-LABEL: bzhi32_b1_indexzext: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: bzhil %esi, %edi, %eax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI2-LABEL: bzhi32_b1_indexzext: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: bzhil %esi, %edi, %eax +; X64-BMI2-NEXT: retq %conv = zext i8 %numlowbits to i32 %notmask = shl i32 -1, %conv %mask = xor i32 %notmask, -1 @@ -1206,20 +1206,20 @@ define i32 @bzhi32_b2_load(i32* %w, i32 %numlowbits) nounwind { ; X86-NOBMI-NEXT: andl (%edx), %eax ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bzhi32_b2_load: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: shll $8, %ecx -; X86-BMI1NOTBM-NEXT: bextrl %ecx, (%eax), %eax -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bzhi32_b2_load: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: bzhil %ecx, (%eax), %eax -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bzhi32_b2_load: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: shll $8, %ecx +; X86-BMI1-NEXT: bextrl %ecx, (%eax), %eax +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bzhi32_b2_load: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: bzhil %ecx, (%eax), %eax +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bzhi32_b2_load: ; X64-NOBMI: # %bb.0: @@ -1231,16 +1231,16 @@ define i32 @bzhi32_b2_load(i32* %w, i32 %numlowbits) nounwind { ; X64-NOBMI-NEXT: andl (%rdi), %eax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bzhi32_b2_load: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %esi -; X64-BMI1NOTBM-NEXT: bextrl %esi, (%rdi), %eax -; X64-BMI1NOTBM-NEXT: retq +; X64-BMI1-LABEL: bzhi32_b2_load: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %esi +; X64-BMI1-NEXT: bextrl %esi, (%rdi), %eax +; X64-BMI1-NEXT: retq ; -; X64-BMI1BMI2-LABEL: bzhi32_b2_load: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: bzhil %esi, (%rdi), %eax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI2-LABEL: bzhi32_b2_load: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: bzhil %esi, (%rdi), %eax +; X64-BMI2-NEXT: retq %val = load i32, i32* %w %notmask = shl i32 -1, %numlowbits %mask = xor i32 %notmask, -1 @@ -1259,20 +1259,20 @@ define i32 @bzhi32_b3_load_indexzext(i32* %w, i8 zeroext %numlowbits) nounwind { ; X86-NOBMI-NEXT: andl (%edx), %eax ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bzhi32_b3_load_indexzext: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: shll $8, %ecx -; X86-BMI1NOTBM-NEXT: bextrl %ecx, (%eax), %eax -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bzhi32_b3_load_indexzext: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: bzhil %ecx, (%eax), %eax -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bzhi32_b3_load_indexzext: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: shll $8, %ecx +; X86-BMI1-NEXT: bextrl %ecx, (%eax), %eax +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bzhi32_b3_load_indexzext: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: bzhil %ecx, (%eax), %eax +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bzhi32_b3_load_indexzext: ; X64-NOBMI: # %bb.0: @@ -1284,16 +1284,16 @@ define i32 @bzhi32_b3_load_indexzext(i32* %w, i8 zeroext %numlowbits) nounwind { ; X64-NOBMI-NEXT: andl (%rdi), %eax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bzhi32_b3_load_indexzext: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %esi -; X64-BMI1NOTBM-NEXT: bextrl %esi, (%rdi), %eax -; X64-BMI1NOTBM-NEXT: retq +; X64-BMI1-LABEL: bzhi32_b3_load_indexzext: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %esi +; X64-BMI1-NEXT: bextrl %esi, (%rdi), %eax +; X64-BMI1-NEXT: retq ; -; X64-BMI1BMI2-LABEL: bzhi32_b3_load_indexzext: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: bzhil %esi, (%rdi), %eax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI2-LABEL: bzhi32_b3_load_indexzext: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: bzhil %esi, (%rdi), %eax +; X64-BMI2-NEXT: retq %val = load i32, i32* %w %conv = zext i8 %numlowbits to i32 %notmask = shl i32 -1, %conv @@ -1312,18 +1312,18 @@ define i32 @bzhi32_b4_commutative(i32 %val, i32 %numlowbits) nounwind { ; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bzhi32_b4_commutative: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1NOTBM-NEXT: shll $8, %eax -; X86-BMI1NOTBM-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: retl +; X86-BMI1-LABEL: bzhi32_b4_commutative: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI1-NEXT: shll $8, %eax +; X86-BMI1-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: retl ; -; X86-BMI1BMI2-LABEL: bzhi32_b4_commutative: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: retl +; X86-BMI2-LABEL: bzhi32_b4_commutative: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bzhi32_b4_commutative: ; X64-NOBMI: # %bb.0: @@ -1335,16 +1335,16 @@ define i32 @bzhi32_b4_commutative(i32 %val, i32 %numlowbits) nounwind { ; X64-NOBMI-NEXT: andl %edi, %eax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bzhi32_b4_commutative: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %esi -; X64-BMI1NOTBM-NEXT: bextrl %esi, %edi, %eax -; X64-BMI1NOTBM-NEXT: retq +; X64-BMI1-LABEL: bzhi32_b4_commutative: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %esi +; X64-BMI1-NEXT: bextrl %esi, %edi, %eax +; X64-BMI1-NEXT: retq ; -; X64-BMI1BMI2-LABEL: bzhi32_b4_commutative: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: bzhil %esi, %edi, %eax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI2-LABEL: bzhi32_b4_commutative: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: bzhil %esi, %edi, %eax +; X64-BMI2-NEXT: retq %notmask = shl i32 -1, %numlowbits %mask = xor i32 %notmask, -1 %masked = and i32 %val, %mask ; swapped order @@ -1377,36 +1377,36 @@ define i64 @bzhi64_b0(i64 %val, i64 %numlowbits) nounwind { ; X86-NOBMI-NEXT: popl %esi ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bzhi64_b0: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl $-1, %edx -; X86-BMI1NOTBM-NEXT: movl $-1, %eax -; X86-BMI1NOTBM-NEXT: shll %cl, %eax -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: je .LBB20_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %eax, %edx -; X86-BMI1NOTBM-NEXT: xorl %eax, %eax -; X86-BMI1NOTBM-NEXT: .LBB20_2: -; X86-BMI1NOTBM-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax -; X86-BMI1NOTBM-NEXT: andnl {{[0-9]+}}(%esp), %edx, %edx -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bzhi64_b0: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %dl -; X86-BMI1BMI2-NEXT: movl $-1, %ecx -; X86-BMI1BMI2-NEXT: shlxl %edx, %ecx, %eax -; X86-BMI1BMI2-NEXT: testb $32, %dl -; X86-BMI1BMI2-NEXT: je .LBB20_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: movl %eax, %ecx -; X86-BMI1BMI2-NEXT: xorl %eax, %eax -; X86-BMI1BMI2-NEXT: .LBB20_2: -; X86-BMI1BMI2-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax -; X86-BMI1BMI2-NEXT: andnl {{[0-9]+}}(%esp), %ecx, %edx -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bzhi64_b0: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl $-1, %edx +; X86-BMI1-NEXT: movl $-1, %eax +; X86-BMI1-NEXT: shll %cl, %eax +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: je .LBB20_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: movl %eax, %edx +; X86-BMI1-NEXT: xorl %eax, %eax +; X86-BMI1-NEXT: .LBB20_2: +; X86-BMI1-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax +; X86-BMI1-NEXT: andnl {{[0-9]+}}(%esp), %edx, %edx +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bzhi64_b0: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %dl +; X86-BMI2-NEXT: movl $-1, %ecx +; X86-BMI2-NEXT: shlxl %edx, %ecx, %eax +; X86-BMI2-NEXT: testb $32, %dl +; X86-BMI2-NEXT: je .LBB20_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %eax, %ecx +; X86-BMI2-NEXT: xorl %eax, %eax +; X86-BMI2-NEXT: .LBB20_2: +; X86-BMI2-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax +; X86-BMI2-NEXT: andnl {{[0-9]+}}(%esp), %ecx, %edx +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bzhi64_b0: ; X64-NOBMI: # %bb.0: @@ -1418,16 +1418,16 @@ define i64 @bzhi64_b0(i64 %val, i64 %numlowbits) nounwind { ; X64-NOBMI-NEXT: andq %rdi, %rax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bzhi64_b0: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %esi -; X64-BMI1NOTBM-NEXT: bextrq %rsi, %rdi, %rax -; X64-BMI1NOTBM-NEXT: retq +; X64-BMI1-LABEL: bzhi64_b0: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %esi +; X64-BMI1-NEXT: bextrq %rsi, %rdi, %rax +; X64-BMI1-NEXT: retq ; -; X64-BMI1BMI2-LABEL: bzhi64_b0: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: bzhiq %rsi, %rdi, %rax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI2-LABEL: bzhi64_b0: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: bzhiq %rsi, %rdi, %rax +; X64-BMI2-NEXT: retq %notmask = shl i64 -1, %numlowbits %mask = xor i64 %notmask, -1 %masked = and i64 %mask, %val @@ -1458,36 +1458,36 @@ define i64 @bzhi64_b1_indexzext(i64 %val, i8 zeroext %numlowbits) nounwind { ; X86-NOBMI-NEXT: popl %esi ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bzhi64_b1_indexzext: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl $-1, %edx -; X86-BMI1NOTBM-NEXT: movl $-1, %eax -; X86-BMI1NOTBM-NEXT: shll %cl, %eax -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: je .LBB21_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %eax, %edx -; X86-BMI1NOTBM-NEXT: xorl %eax, %eax -; X86-BMI1NOTBM-NEXT: .LBB21_2: -; X86-BMI1NOTBM-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax -; X86-BMI1NOTBM-NEXT: andnl {{[0-9]+}}(%esp), %edx, %edx -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bzhi64_b1_indexzext: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %dl -; X86-BMI1BMI2-NEXT: movl $-1, %ecx -; X86-BMI1BMI2-NEXT: shlxl %edx, %ecx, %eax -; X86-BMI1BMI2-NEXT: testb $32, %dl -; X86-BMI1BMI2-NEXT: je .LBB21_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: movl %eax, %ecx -; X86-BMI1BMI2-NEXT: xorl %eax, %eax -; X86-BMI1BMI2-NEXT: .LBB21_2: -; X86-BMI1BMI2-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax -; X86-BMI1BMI2-NEXT: andnl {{[0-9]+}}(%esp), %ecx, %edx -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bzhi64_b1_indexzext: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl $-1, %edx +; X86-BMI1-NEXT: movl $-1, %eax +; X86-BMI1-NEXT: shll %cl, %eax +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: je .LBB21_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: movl %eax, %edx +; X86-BMI1-NEXT: xorl %eax, %eax +; X86-BMI1-NEXT: .LBB21_2: +; X86-BMI1-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax +; X86-BMI1-NEXT: andnl {{[0-9]+}}(%esp), %edx, %edx +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bzhi64_b1_indexzext: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %dl +; X86-BMI2-NEXT: movl $-1, %ecx +; X86-BMI2-NEXT: shlxl %edx, %ecx, %eax +; X86-BMI2-NEXT: testb $32, %dl +; X86-BMI2-NEXT: je .LBB21_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %eax, %ecx +; X86-BMI2-NEXT: xorl %eax, %eax +; X86-BMI2-NEXT: .LBB21_2: +; X86-BMI2-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax +; X86-BMI2-NEXT: andnl {{[0-9]+}}(%esp), %ecx, %edx +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bzhi64_b1_indexzext: ; X64-NOBMI: # %bb.0: @@ -1499,18 +1499,18 @@ define i64 @bzhi64_b1_indexzext(i64 %val, i8 zeroext %numlowbits) nounwind { ; X64-NOBMI-NEXT: andq %rdi, %rax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bzhi64_b1_indexzext: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: # kill: def $esi killed $esi def $rsi -; X64-BMI1NOTBM-NEXT: shll $8, %esi -; X64-BMI1NOTBM-NEXT: bextrq %rsi, %rdi, %rax -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bzhi64_b1_indexzext: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: # kill: def $esi killed $esi def $rsi -; X64-BMI1BMI2-NEXT: bzhiq %rsi, %rdi, %rax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bzhi64_b1_indexzext: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: # kill: def $esi killed $esi def $rsi +; X64-BMI1-NEXT: shll $8, %esi +; X64-BMI1-NEXT: bextrq %rsi, %rdi, %rax +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bzhi64_b1_indexzext: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: # kill: def $esi killed $esi def $rsi +; X64-BMI2-NEXT: bzhiq %rsi, %rdi, %rax +; X64-BMI2-NEXT: retq %conv = zext i8 %numlowbits to i64 %notmask = shl i64 -1, %conv %mask = xor i64 %notmask, -1 @@ -1545,42 +1545,42 @@ define i64 @bzhi64_b2_load(i64* %w, i64 %numlowbits) nounwind { ; X86-NOBMI-NEXT: popl %edi ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bzhi64_b2_load: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl $-1, %esi -; X86-BMI1NOTBM-NEXT: movl $-1, %eax -; X86-BMI1NOTBM-NEXT: shll %cl, %eax -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: je .LBB22_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %eax, %esi -; X86-BMI1NOTBM-NEXT: xorl %eax, %eax -; X86-BMI1NOTBM-NEXT: .LBB22_2: -; X86-BMI1NOTBM-NEXT: andnl (%edx), %eax, %eax -; X86-BMI1NOTBM-NEXT: andnl 4(%edx), %esi, %edx -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bzhi64_b2_load: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %ebx -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl -; X86-BMI1BMI2-NEXT: movl $-1, %edx -; X86-BMI1BMI2-NEXT: shlxl %ebx, %edx, %eax -; X86-BMI1BMI2-NEXT: testb $32, %bl -; X86-BMI1BMI2-NEXT: je .LBB22_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: movl %eax, %edx -; X86-BMI1BMI2-NEXT: xorl %eax, %eax -; X86-BMI1BMI2-NEXT: .LBB22_2: -; X86-BMI1BMI2-NEXT: andnl (%ecx), %eax, %eax -; X86-BMI1BMI2-NEXT: andnl 4(%ecx), %edx, %edx -; X86-BMI1BMI2-NEXT: popl %ebx -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bzhi64_b2_load: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl $-1, %esi +; X86-BMI1-NEXT: movl $-1, %eax +; X86-BMI1-NEXT: shll %cl, %eax +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: je .LBB22_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: movl %eax, %esi +; X86-BMI1-NEXT: xorl %eax, %eax +; X86-BMI1-NEXT: .LBB22_2: +; X86-BMI1-NEXT: andnl (%edx), %eax, %eax +; X86-BMI1-NEXT: andnl 4(%edx), %esi, %edx +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bzhi64_b2_load: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl +; X86-BMI2-NEXT: movl $-1, %edx +; X86-BMI2-NEXT: shlxl %ebx, %edx, %eax +; X86-BMI2-NEXT: testb $32, %bl +; X86-BMI2-NEXT: je .LBB22_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %eax, %edx +; X86-BMI2-NEXT: xorl %eax, %eax +; X86-BMI2-NEXT: .LBB22_2: +; X86-BMI2-NEXT: andnl (%ecx), %eax, %eax +; X86-BMI2-NEXT: andnl 4(%ecx), %edx, %edx +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bzhi64_b2_load: ; X64-NOBMI: # %bb.0: @@ -1592,16 +1592,16 @@ define i64 @bzhi64_b2_load(i64* %w, i64 %numlowbits) nounwind { ; X64-NOBMI-NEXT: andq (%rdi), %rax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bzhi64_b2_load: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %esi -; X64-BMI1NOTBM-NEXT: bextrq %rsi, (%rdi), %rax -; X64-BMI1NOTBM-NEXT: retq +; X64-BMI1-LABEL: bzhi64_b2_load: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %esi +; X64-BMI1-NEXT: bextrq %rsi, (%rdi), %rax +; X64-BMI1-NEXT: retq ; -; X64-BMI1BMI2-LABEL: bzhi64_b2_load: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: bzhiq %rsi, (%rdi), %rax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI2-LABEL: bzhi64_b2_load: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: bzhiq %rsi, (%rdi), %rax +; X64-BMI2-NEXT: retq %val = load i64, i64* %w %notmask = shl i64 -1, %numlowbits %mask = xor i64 %notmask, -1 @@ -1636,42 +1636,42 @@ define i64 @bzhi64_b3_load_indexzext(i64* %w, i8 zeroext %numlowbits) nounwind { ; X86-NOBMI-NEXT: popl %edi ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bzhi64_b3_load_indexzext: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl $-1, %esi -; X86-BMI1NOTBM-NEXT: movl $-1, %eax -; X86-BMI1NOTBM-NEXT: shll %cl, %eax -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: je .LBB23_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %eax, %esi -; X86-BMI1NOTBM-NEXT: xorl %eax, %eax -; X86-BMI1NOTBM-NEXT: .LBB23_2: -; X86-BMI1NOTBM-NEXT: andnl (%edx), %eax, %eax -; X86-BMI1NOTBM-NEXT: andnl 4(%edx), %esi, %edx -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bzhi64_b3_load_indexzext: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %ebx -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl -; X86-BMI1BMI2-NEXT: movl $-1, %edx -; X86-BMI1BMI2-NEXT: shlxl %ebx, %edx, %eax -; X86-BMI1BMI2-NEXT: testb $32, %bl -; X86-BMI1BMI2-NEXT: je .LBB23_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: movl %eax, %edx -; X86-BMI1BMI2-NEXT: xorl %eax, %eax -; X86-BMI1BMI2-NEXT: .LBB23_2: -; X86-BMI1BMI2-NEXT: andnl (%ecx), %eax, %eax -; X86-BMI1BMI2-NEXT: andnl 4(%ecx), %edx, %edx -; X86-BMI1BMI2-NEXT: popl %ebx -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bzhi64_b3_load_indexzext: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl $-1, %esi +; X86-BMI1-NEXT: movl $-1, %eax +; X86-BMI1-NEXT: shll %cl, %eax +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: je .LBB23_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: movl %eax, %esi +; X86-BMI1-NEXT: xorl %eax, %eax +; X86-BMI1-NEXT: .LBB23_2: +; X86-BMI1-NEXT: andnl (%edx), %eax, %eax +; X86-BMI1-NEXT: andnl 4(%edx), %esi, %edx +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bzhi64_b3_load_indexzext: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl +; X86-BMI2-NEXT: movl $-1, %edx +; X86-BMI2-NEXT: shlxl %ebx, %edx, %eax +; X86-BMI2-NEXT: testb $32, %bl +; X86-BMI2-NEXT: je .LBB23_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %eax, %edx +; X86-BMI2-NEXT: xorl %eax, %eax +; X86-BMI2-NEXT: .LBB23_2: +; X86-BMI2-NEXT: andnl (%ecx), %eax, %eax +; X86-BMI2-NEXT: andnl 4(%ecx), %edx, %edx +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bzhi64_b3_load_indexzext: ; X64-NOBMI: # %bb.0: @@ -1683,18 +1683,18 @@ define i64 @bzhi64_b3_load_indexzext(i64* %w, i8 zeroext %numlowbits) nounwind { ; X64-NOBMI-NEXT: andq (%rdi), %rax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bzhi64_b3_load_indexzext: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: # kill: def $esi killed $esi def $rsi -; X64-BMI1NOTBM-NEXT: shll $8, %esi -; X64-BMI1NOTBM-NEXT: bextrq %rsi, (%rdi), %rax -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bzhi64_b3_load_indexzext: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: # kill: def $esi killed $esi def $rsi -; X64-BMI1BMI2-NEXT: bzhiq %rsi, (%rdi), %rax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bzhi64_b3_load_indexzext: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: # kill: def $esi killed $esi def $rsi +; X64-BMI1-NEXT: shll $8, %esi +; X64-BMI1-NEXT: bextrq %rsi, (%rdi), %rax +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bzhi64_b3_load_indexzext: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: # kill: def $esi killed $esi def $rsi +; X64-BMI2-NEXT: bzhiq %rsi, (%rdi), %rax +; X64-BMI2-NEXT: retq %val = load i64, i64* %w %conv = zext i8 %numlowbits to i64 %notmask = shl i64 -1, %conv @@ -1727,36 +1727,36 @@ define i64 @bzhi64_b4_commutative(i64 %val, i64 %numlowbits) nounwind { ; X86-NOBMI-NEXT: popl %esi ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bzhi64_b4_commutative: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl $-1, %edx -; X86-BMI1NOTBM-NEXT: movl $-1, %eax -; X86-BMI1NOTBM-NEXT: shll %cl, %eax -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: je .LBB24_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %eax, %edx -; X86-BMI1NOTBM-NEXT: xorl %eax, %eax -; X86-BMI1NOTBM-NEXT: .LBB24_2: -; X86-BMI1NOTBM-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax -; X86-BMI1NOTBM-NEXT: andnl {{[0-9]+}}(%esp), %edx, %edx -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bzhi64_b4_commutative: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %dl -; X86-BMI1BMI2-NEXT: movl $-1, %ecx -; X86-BMI1BMI2-NEXT: shlxl %edx, %ecx, %eax -; X86-BMI1BMI2-NEXT: testb $32, %dl -; X86-BMI1BMI2-NEXT: je .LBB24_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: movl %eax, %ecx -; X86-BMI1BMI2-NEXT: xorl %eax, %eax -; X86-BMI1BMI2-NEXT: .LBB24_2: -; X86-BMI1BMI2-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax -; X86-BMI1BMI2-NEXT: andnl {{[0-9]+}}(%esp), %ecx, %edx -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bzhi64_b4_commutative: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl $-1, %edx +; X86-BMI1-NEXT: movl $-1, %eax +; X86-BMI1-NEXT: shll %cl, %eax +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: je .LBB24_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: movl %eax, %edx +; X86-BMI1-NEXT: xorl %eax, %eax +; X86-BMI1-NEXT: .LBB24_2: +; X86-BMI1-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax +; X86-BMI1-NEXT: andnl {{[0-9]+}}(%esp), %edx, %edx +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bzhi64_b4_commutative: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %dl +; X86-BMI2-NEXT: movl $-1, %ecx +; X86-BMI2-NEXT: shlxl %edx, %ecx, %eax +; X86-BMI2-NEXT: testb $32, %dl +; X86-BMI2-NEXT: je .LBB24_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %eax, %ecx +; X86-BMI2-NEXT: xorl %eax, %eax +; X86-BMI2-NEXT: .LBB24_2: +; X86-BMI2-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax +; X86-BMI2-NEXT: andnl {{[0-9]+}}(%esp), %ecx, %edx +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bzhi64_b4_commutative: ; X64-NOBMI: # %bb.0: @@ -1768,16 +1768,16 @@ define i64 @bzhi64_b4_commutative(i64 %val, i64 %numlowbits) nounwind { ; X64-NOBMI-NEXT: andq %rdi, %rax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bzhi64_b4_commutative: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %esi -; X64-BMI1NOTBM-NEXT: bextrq %rsi, %rdi, %rax -; X64-BMI1NOTBM-NEXT: retq +; X64-BMI1-LABEL: bzhi64_b4_commutative: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %esi +; X64-BMI1-NEXT: bextrq %rsi, %rdi, %rax +; X64-BMI1-NEXT: retq ; -; X64-BMI1BMI2-LABEL: bzhi64_b4_commutative: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: bzhiq %rsi, %rdi, %rax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI2-LABEL: bzhi64_b4_commutative: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: bzhiq %rsi, %rdi, %rax +; X64-BMI2-NEXT: retq %notmask = shl i64 -1, %numlowbits %mask = xor i64 %notmask, -1 %masked = and i64 %val, %mask ; swapped order @@ -1803,32 +1803,32 @@ define i32 @bzhi64_32_b0(i64 %val, i8 %numlowbits) nounwind { ; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bzhi64_32_b0: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl $-1, %eax -; X86-BMI1NOTBM-NEXT: shll %cl, %eax -; X86-BMI1NOTBM-NEXT: xorl %edx, %edx -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: jne .LBB25_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %eax, %edx -; X86-BMI1NOTBM-NEXT: .LBB25_2: -; X86-BMI1NOTBM-NEXT: andnl {{[0-9]+}}(%esp), %edx, %eax -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bzhi64_32_b0: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1BMI2-NEXT: xorl %ecx, %ecx -; X86-BMI1BMI2-NEXT: testb $32, %al -; X86-BMI1BMI2-NEXT: jne .LBB25_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: movl $-1, %ecx -; X86-BMI1BMI2-NEXT: shlxl %eax, %ecx, %ecx -; X86-BMI1BMI2-NEXT: .LBB25_2: -; X86-BMI1BMI2-NEXT: andnl {{[0-9]+}}(%esp), %ecx, %eax -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bzhi64_32_b0: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl $-1, %eax +; X86-BMI1-NEXT: shll %cl, %eax +; X86-BMI1-NEXT: xorl %edx, %edx +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: jne .LBB25_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: movl %eax, %edx +; X86-BMI1-NEXT: .LBB25_2: +; X86-BMI1-NEXT: andnl {{[0-9]+}}(%esp), %edx, %eax +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bzhi64_32_b0: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: xorl %ecx, %ecx +; X86-BMI2-NEXT: testb $32, %al +; X86-BMI2-NEXT: jne .LBB25_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl $-1, %ecx +; X86-BMI2-NEXT: shlxl %eax, %ecx, %ecx +; X86-BMI2-NEXT: .LBB25_2: +; X86-BMI2-NEXT: andnl {{[0-9]+}}(%esp), %ecx, %eax +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bzhi64_32_b0: ; X64-NOBMI: # %bb.0: @@ -1841,16 +1841,16 @@ define i32 @bzhi64_32_b0(i64 %val, i8 %numlowbits) nounwind { ; X64-NOBMI-NEXT: # kill: def $eax killed $eax killed $rax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bzhi64_32_b0: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %esi -; X64-BMI1NOTBM-NEXT: bextrl %esi, %edi, %eax -; X64-BMI1NOTBM-NEXT: retq +; X64-BMI1-LABEL: bzhi64_32_b0: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %esi +; X64-BMI1-NEXT: bextrl %esi, %edi, %eax +; X64-BMI1-NEXT: retq ; -; X64-BMI1BMI2-LABEL: bzhi64_32_b0: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: bzhil %esi, %edi, %eax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI2-LABEL: bzhi64_32_b0: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: bzhil %esi, %edi, %eax +; X64-BMI2-NEXT: retq %widenumlowbits = zext i8 %numlowbits to i64 %notmask = shl nsw i64 -1, %widenumlowbits %mask = xor i64 %notmask, -1 @@ -1870,18 +1870,18 @@ define i32 @bzhi64_32_b1(i64 %val, i8 %numlowbits) nounwind { ; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bzhi64_32_b1: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1NOTBM-NEXT: shll $8, %eax -; X86-BMI1NOTBM-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: retl +; X86-BMI1-LABEL: bzhi64_32_b1: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI1-NEXT: shll $8, %eax +; X86-BMI1-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: retl ; -; X86-BMI1BMI2-LABEL: bzhi64_32_b1: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: retl +; X86-BMI2-LABEL: bzhi64_32_b1: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bzhi64_32_b1: ; X64-NOBMI: # %bb.0: @@ -1893,16 +1893,16 @@ define i32 @bzhi64_32_b1(i64 %val, i8 %numlowbits) nounwind { ; X64-NOBMI-NEXT: andl %edi, %eax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bzhi64_32_b1: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %esi -; X64-BMI1NOTBM-NEXT: bextrl %esi, %edi, %eax -; X64-BMI1NOTBM-NEXT: retq +; X64-BMI1-LABEL: bzhi64_32_b1: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %esi +; X64-BMI1-NEXT: bextrl %esi, %edi, %eax +; X64-BMI1-NEXT: retq ; -; X64-BMI1BMI2-LABEL: bzhi64_32_b1: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: bzhil %esi, %edi, %eax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI2-LABEL: bzhi64_32_b1: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: bzhil %esi, %edi, %eax +; X64-BMI2-NEXT: retq %truncval = trunc i64 %val to i32 %widenumlowbits = zext i8 %numlowbits to i32 %notmask = shl nsw i32 -1, %widenumlowbits @@ -1923,18 +1923,18 @@ define i32 @bzhi64_32_b2(i64 %val, i8 %numlowbits) nounwind { ; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bzhi64_32_b2: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1NOTBM-NEXT: shll $8, %eax -; X86-BMI1NOTBM-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: retl +; X86-BMI1-LABEL: bzhi64_32_b2: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI1-NEXT: shll $8, %eax +; X86-BMI1-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: retl ; -; X86-BMI1BMI2-LABEL: bzhi64_32_b2: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: retl +; X86-BMI2-LABEL: bzhi64_32_b2: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bzhi64_32_b2: ; X64-NOBMI: # %bb.0: @@ -1946,16 +1946,16 @@ define i32 @bzhi64_32_b2(i64 %val, i8 %numlowbits) nounwind { ; X64-NOBMI-NEXT: andl %edi, %eax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bzhi64_32_b2: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %esi -; X64-BMI1NOTBM-NEXT: bextrl %esi, %edi, %eax -; X64-BMI1NOTBM-NEXT: retq +; X64-BMI1-LABEL: bzhi64_32_b2: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %esi +; X64-BMI1-NEXT: bextrl %esi, %edi, %eax +; X64-BMI1-NEXT: retq ; -; X64-BMI1BMI2-LABEL: bzhi64_32_b2: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: bzhil %esi, %edi, %eax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI2-LABEL: bzhi64_32_b2: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: bzhil %esi, %edi, %eax +; X64-BMI2-NEXT: retq %widenumlowbits = zext i8 %numlowbits to i32 %notmask = shl nsw i32 -1, %widenumlowbits %mask = xor i32 %notmask, -1 @@ -1983,32 +1983,32 @@ define i32 @bzhi64_32_b3(i64 %val, i8 %numlowbits) nounwind { ; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bzhi64_32_b3: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl $-1, %eax -; X86-BMI1NOTBM-NEXT: shll %cl, %eax -; X86-BMI1NOTBM-NEXT: xorl %edx, %edx -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: jne .LBB28_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %eax, %edx -; X86-BMI1NOTBM-NEXT: .LBB28_2: -; X86-BMI1NOTBM-NEXT: andnl {{[0-9]+}}(%esp), %edx, %eax -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bzhi64_32_b3: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1BMI2-NEXT: xorl %ecx, %ecx -; X86-BMI1BMI2-NEXT: testb $32, %al -; X86-BMI1BMI2-NEXT: jne .LBB28_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: movl $-1, %ecx -; X86-BMI1BMI2-NEXT: shlxl %eax, %ecx, %ecx -; X86-BMI1BMI2-NEXT: .LBB28_2: -; X86-BMI1BMI2-NEXT: andnl {{[0-9]+}}(%esp), %ecx, %eax -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bzhi64_32_b3: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl $-1, %eax +; X86-BMI1-NEXT: shll %cl, %eax +; X86-BMI1-NEXT: xorl %edx, %edx +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: jne .LBB28_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: movl %eax, %edx +; X86-BMI1-NEXT: .LBB28_2: +; X86-BMI1-NEXT: andnl {{[0-9]+}}(%esp), %edx, %eax +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bzhi64_32_b3: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: xorl %ecx, %ecx +; X86-BMI2-NEXT: testb $32, %al +; X86-BMI2-NEXT: jne .LBB28_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl $-1, %ecx +; X86-BMI2-NEXT: shlxl %eax, %ecx, %ecx +; X86-BMI2-NEXT: .LBB28_2: +; X86-BMI2-NEXT: andnl {{[0-9]+}}(%esp), %ecx, %eax +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bzhi64_32_b3: ; X64-NOBMI: # %bb.0: @@ -2022,16 +2022,16 @@ define i32 @bzhi64_32_b3(i64 %val, i8 %numlowbits) nounwind { ; X64-NOBMI-NEXT: # kill: def $eax killed $eax killed $rax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bzhi64_32_b3: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %esi -; X64-BMI1NOTBM-NEXT: bextrl %esi, %edi, %eax -; X64-BMI1NOTBM-NEXT: retq +; X64-BMI1-LABEL: bzhi64_32_b3: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %esi +; X64-BMI1-NEXT: bextrl %esi, %edi, %eax +; X64-BMI1-NEXT: retq ; -; X64-BMI1BMI2-LABEL: bzhi64_32_b3: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: bzhil %esi, %edi, %eax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI2-LABEL: bzhi64_32_b3: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: bzhil %esi, %edi, %eax +; X64-BMI2-NEXT: retq %widenumlowbits = zext i8 %numlowbits to i64 %notmask = shl nsw i64 4294967295, %widenumlowbits %mask = xor i64 %notmask, 4294967295 @@ -2064,38 +2064,38 @@ define i32 @bzhi32_c0(i32 %val, i32 %numlowbits) nounwind { ; X86-NOBMI-NEXT: popl %esi ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bzhi32_c0: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: subl $8, %esp -; X86-BMI1NOTBM-NEXT: xorl %ecx, %ecx -; X86-BMI1NOTBM-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl $-1, %esi -; X86-BMI1NOTBM-NEXT: # kill: def $cl killed $cl killed $ecx -; X86-BMI1NOTBM-NEXT: shrl %cl, %esi -; X86-BMI1NOTBM-NEXT: movl %esi, (%esp) -; X86-BMI1NOTBM-NEXT: calll use32@PLT -; X86-BMI1NOTBM-NEXT: andl {{[0-9]+}}(%esp), %esi -; X86-BMI1NOTBM-NEXT: movl %esi, %eax -; X86-BMI1NOTBM-NEXT: addl $8, %esp -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bzhi32_c0: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %ebx -; X86-BMI1BMI2-NEXT: subl $8, %esp -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl -; X86-BMI1BMI2-NEXT: movl %ebx, %eax -; X86-BMI1BMI2-NEXT: negb %al -; X86-BMI1BMI2-NEXT: movl $-1, %ecx -; X86-BMI1BMI2-NEXT: shrxl %eax, %ecx, %eax -; X86-BMI1BMI2-NEXT: movl %eax, (%esp) -; X86-BMI1BMI2-NEXT: calll use32@PLT -; X86-BMI1BMI2-NEXT: bzhil %ebx, {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: addl $8, %esp -; X86-BMI1BMI2-NEXT: popl %ebx -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bzhi32_c0: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: subl $8, %esp +; X86-BMI1-NEXT: xorl %ecx, %ecx +; X86-BMI1-NEXT: subb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl $-1, %esi +; X86-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-BMI1-NEXT: shrl %cl, %esi +; X86-BMI1-NEXT: movl %esi, (%esp) +; X86-BMI1-NEXT: calll use32@PLT +; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %esi +; X86-BMI1-NEXT: movl %esi, %eax +; X86-BMI1-NEXT: addl $8, %esp +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bzhi32_c0: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: subl $8, %esp +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl +; X86-BMI2-NEXT: movl %ebx, %eax +; X86-BMI2-NEXT: negb %al +; X86-BMI2-NEXT: movl $-1, %ecx +; X86-BMI2-NEXT: shrxl %eax, %ecx, %eax +; X86-BMI2-NEXT: movl %eax, (%esp) +; X86-BMI2-NEXT: calll use32@PLT +; X86-BMI2-NEXT: bzhil %ebx, {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: addl $8, %esp +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bzhi32_c0: ; X64-NOBMI: # %bb.0: @@ -2117,43 +2117,43 @@ define i32 @bzhi32_c0(i32 %val, i32 %numlowbits) nounwind { ; X64-NOBMI-NEXT: popq %rbp ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bzhi32_c0: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: pushq %rbp -; X64-BMI1NOTBM-NEXT: pushq %rbx -; X64-BMI1NOTBM-NEXT: pushq %rax -; X64-BMI1NOTBM-NEXT: movl %esi, %ecx -; X64-BMI1NOTBM-NEXT: movl %edi, %ebx -; X64-BMI1NOTBM-NEXT: negb %cl -; X64-BMI1NOTBM-NEXT: movl $-1, %ebp -; X64-BMI1NOTBM-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-BMI1NOTBM-NEXT: shrl %cl, %ebp -; X64-BMI1NOTBM-NEXT: movl %ebp, %edi -; X64-BMI1NOTBM-NEXT: callq use32@PLT -; X64-BMI1NOTBM-NEXT: andl %ebx, %ebp -; X64-BMI1NOTBM-NEXT: movl %ebp, %eax -; X64-BMI1NOTBM-NEXT: addq $8, %rsp -; X64-BMI1NOTBM-NEXT: popq %rbx -; X64-BMI1NOTBM-NEXT: popq %rbp -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bzhi32_c0: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: pushq %rbp -; X64-BMI1BMI2-NEXT: pushq %rbx -; X64-BMI1BMI2-NEXT: pushq %rax -; X64-BMI1BMI2-NEXT: movl %esi, %ebx -; X64-BMI1BMI2-NEXT: movl %edi, %ebp -; X64-BMI1BMI2-NEXT: movl %ebx, %eax -; X64-BMI1BMI2-NEXT: negb %al -; X64-BMI1BMI2-NEXT: movl $-1, %ecx -; X64-BMI1BMI2-NEXT: shrxl %eax, %ecx, %edi -; X64-BMI1BMI2-NEXT: callq use32@PLT -; X64-BMI1BMI2-NEXT: bzhil %ebx, %ebp, %eax -; X64-BMI1BMI2-NEXT: addq $8, %rsp -; X64-BMI1BMI2-NEXT: popq %rbx -; X64-BMI1BMI2-NEXT: popq %rbp -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bzhi32_c0: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: pushq %rbp +; X64-BMI1-NEXT: pushq %rbx +; X64-BMI1-NEXT: pushq %rax +; X64-BMI1-NEXT: movl %esi, %ecx +; X64-BMI1-NEXT: movl %edi, %ebx +; X64-BMI1-NEXT: negb %cl +; X64-BMI1-NEXT: movl $-1, %ebp +; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-BMI1-NEXT: shrl %cl, %ebp +; X64-BMI1-NEXT: movl %ebp, %edi +; X64-BMI1-NEXT: callq use32@PLT +; X64-BMI1-NEXT: andl %ebx, %ebp +; X64-BMI1-NEXT: movl %ebp, %eax +; X64-BMI1-NEXT: addq $8, %rsp +; X64-BMI1-NEXT: popq %rbx +; X64-BMI1-NEXT: popq %rbp +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bzhi32_c0: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: pushq %rbp +; X64-BMI2-NEXT: pushq %rbx +; X64-BMI2-NEXT: pushq %rax +; X64-BMI2-NEXT: movl %esi, %ebx +; X64-BMI2-NEXT: movl %edi, %ebp +; X64-BMI2-NEXT: movl %ebx, %eax +; X64-BMI2-NEXT: negb %al +; X64-BMI2-NEXT: movl $-1, %ecx +; X64-BMI2-NEXT: shrxl %eax, %ecx, %edi +; X64-BMI2-NEXT: callq use32@PLT +; X64-BMI2-NEXT: bzhil %ebx, %ebp, %eax +; X64-BMI2-NEXT: addq $8, %rsp +; X64-BMI2-NEXT: popq %rbx +; X64-BMI2-NEXT: popq %rbp +; X64-BMI2-NEXT: retq %numhighbits = sub i32 32, %numlowbits %mask = lshr i32 -1, %numhighbits call void @use32(i32 %mask) @@ -2179,38 +2179,38 @@ define i32 @bzhi32_c1_indexzext(i32 %val, i8 %numlowbits) nounwind { ; X86-NOBMI-NEXT: popl %esi ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bzhi32_c1_indexzext: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: subl $8, %esp -; X86-BMI1NOTBM-NEXT: xorl %ecx, %ecx -; X86-BMI1NOTBM-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl $-1, %esi -; X86-BMI1NOTBM-NEXT: # kill: def $cl killed $cl killed $ecx -; X86-BMI1NOTBM-NEXT: shrl %cl, %esi -; X86-BMI1NOTBM-NEXT: movl %esi, (%esp) -; X86-BMI1NOTBM-NEXT: calll use32@PLT -; X86-BMI1NOTBM-NEXT: andl {{[0-9]+}}(%esp), %esi -; X86-BMI1NOTBM-NEXT: movl %esi, %eax -; X86-BMI1NOTBM-NEXT: addl $8, %esp -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bzhi32_c1_indexzext: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %ebx -; X86-BMI1BMI2-NEXT: subl $8, %esp -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl -; X86-BMI1BMI2-NEXT: movl %ebx, %eax -; X86-BMI1BMI2-NEXT: negb %al -; X86-BMI1BMI2-NEXT: movl $-1, %ecx -; X86-BMI1BMI2-NEXT: shrxl %eax, %ecx, %eax -; X86-BMI1BMI2-NEXT: movl %eax, (%esp) -; X86-BMI1BMI2-NEXT: calll use32@PLT -; X86-BMI1BMI2-NEXT: bzhil %ebx, {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: addl $8, %esp -; X86-BMI1BMI2-NEXT: popl %ebx -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bzhi32_c1_indexzext: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: subl $8, %esp +; X86-BMI1-NEXT: xorl %ecx, %ecx +; X86-BMI1-NEXT: subb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl $-1, %esi +; X86-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-BMI1-NEXT: shrl %cl, %esi +; X86-BMI1-NEXT: movl %esi, (%esp) +; X86-BMI1-NEXT: calll use32@PLT +; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %esi +; X86-BMI1-NEXT: movl %esi, %eax +; X86-BMI1-NEXT: addl $8, %esp +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bzhi32_c1_indexzext: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: subl $8, %esp +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl +; X86-BMI2-NEXT: movl %ebx, %eax +; X86-BMI2-NEXT: negb %al +; X86-BMI2-NEXT: movl $-1, %ecx +; X86-BMI2-NEXT: shrxl %eax, %ecx, %eax +; X86-BMI2-NEXT: movl %eax, (%esp) +; X86-BMI2-NEXT: calll use32@PLT +; X86-BMI2-NEXT: bzhil %ebx, {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: addl $8, %esp +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bzhi32_c1_indexzext: ; X64-NOBMI: # %bb.0: @@ -2232,43 +2232,43 @@ define i32 @bzhi32_c1_indexzext(i32 %val, i8 %numlowbits) nounwind { ; X64-NOBMI-NEXT: popq %rbp ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bzhi32_c1_indexzext: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: pushq %rbp -; X64-BMI1NOTBM-NEXT: pushq %rbx -; X64-BMI1NOTBM-NEXT: pushq %rax -; X64-BMI1NOTBM-NEXT: movl %esi, %ecx -; X64-BMI1NOTBM-NEXT: movl %edi, %ebx -; X64-BMI1NOTBM-NEXT: negb %cl -; X64-BMI1NOTBM-NEXT: movl $-1, %ebp -; X64-BMI1NOTBM-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-BMI1NOTBM-NEXT: shrl %cl, %ebp -; X64-BMI1NOTBM-NEXT: movl %ebp, %edi -; X64-BMI1NOTBM-NEXT: callq use32@PLT -; X64-BMI1NOTBM-NEXT: andl %ebx, %ebp -; X64-BMI1NOTBM-NEXT: movl %ebp, %eax -; X64-BMI1NOTBM-NEXT: addq $8, %rsp -; X64-BMI1NOTBM-NEXT: popq %rbx -; X64-BMI1NOTBM-NEXT: popq %rbp -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bzhi32_c1_indexzext: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: pushq %rbp -; X64-BMI1BMI2-NEXT: pushq %rbx -; X64-BMI1BMI2-NEXT: pushq %rax -; X64-BMI1BMI2-NEXT: movl %esi, %ebx -; X64-BMI1BMI2-NEXT: movl %edi, %ebp -; X64-BMI1BMI2-NEXT: movl %ebx, %eax -; X64-BMI1BMI2-NEXT: negb %al -; X64-BMI1BMI2-NEXT: movl $-1, %ecx -; X64-BMI1BMI2-NEXT: shrxl %eax, %ecx, %edi -; X64-BMI1BMI2-NEXT: callq use32@PLT -; X64-BMI1BMI2-NEXT: bzhil %ebx, %ebp, %eax -; X64-BMI1BMI2-NEXT: addq $8, %rsp -; X64-BMI1BMI2-NEXT: popq %rbx -; X64-BMI1BMI2-NEXT: popq %rbp -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bzhi32_c1_indexzext: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: pushq %rbp +; X64-BMI1-NEXT: pushq %rbx +; X64-BMI1-NEXT: pushq %rax +; X64-BMI1-NEXT: movl %esi, %ecx +; X64-BMI1-NEXT: movl %edi, %ebx +; X64-BMI1-NEXT: negb %cl +; X64-BMI1-NEXT: movl $-1, %ebp +; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-BMI1-NEXT: shrl %cl, %ebp +; X64-BMI1-NEXT: movl %ebp, %edi +; X64-BMI1-NEXT: callq use32@PLT +; X64-BMI1-NEXT: andl %ebx, %ebp +; X64-BMI1-NEXT: movl %ebp, %eax +; X64-BMI1-NEXT: addq $8, %rsp +; X64-BMI1-NEXT: popq %rbx +; X64-BMI1-NEXT: popq %rbp +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bzhi32_c1_indexzext: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: pushq %rbp +; X64-BMI2-NEXT: pushq %rbx +; X64-BMI2-NEXT: pushq %rax +; X64-BMI2-NEXT: movl %esi, %ebx +; X64-BMI2-NEXT: movl %edi, %ebp +; X64-BMI2-NEXT: movl %ebx, %eax +; X64-BMI2-NEXT: negb %al +; X64-BMI2-NEXT: movl $-1, %ecx +; X64-BMI2-NEXT: shrxl %eax, %ecx, %edi +; X64-BMI2-NEXT: callq use32@PLT +; X64-BMI2-NEXT: bzhil %ebx, %ebp, %eax +; X64-BMI2-NEXT: addq $8, %rsp +; X64-BMI2-NEXT: popq %rbx +; X64-BMI2-NEXT: popq %rbp +; X64-BMI2-NEXT: retq %numhighbits = sub i8 32, %numlowbits %sh_prom = zext i8 %numhighbits to i32 %mask = lshr i32 -1, %sh_prom @@ -2297,42 +2297,42 @@ define i32 @bzhi32_c2_load(i32* %w, i32 %numlowbits) nounwind { ; X86-NOBMI-NEXT: popl %esi ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bzhi32_c2_load: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: subl $8, %esp -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: xorl %ecx, %ecx -; X86-BMI1NOTBM-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl $-1, %edx -; X86-BMI1NOTBM-NEXT: # kill: def $cl killed $cl killed $ecx -; X86-BMI1NOTBM-NEXT: shrl %cl, %edx -; X86-BMI1NOTBM-NEXT: movl (%eax), %esi -; X86-BMI1NOTBM-NEXT: andl %edx, %esi -; X86-BMI1NOTBM-NEXT: movl %edx, (%esp) -; X86-BMI1NOTBM-NEXT: calll use32@PLT -; X86-BMI1NOTBM-NEXT: movl %esi, %eax -; X86-BMI1NOTBM-NEXT: addl $8, %esp -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bzhi32_c2_load: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: subl $8, %esp -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: bzhil %ecx, (%eax), %esi -; X86-BMI1BMI2-NEXT: # kill: def $cl killed $cl killed $ecx def $ecx -; X86-BMI1BMI2-NEXT: negb %cl -; X86-BMI1BMI2-NEXT: movl $-1, %eax -; X86-BMI1BMI2-NEXT: shrxl %ecx, %eax, %eax -; X86-BMI1BMI2-NEXT: movl %eax, (%esp) -; X86-BMI1BMI2-NEXT: calll use32@PLT -; X86-BMI1BMI2-NEXT: movl %esi, %eax -; X86-BMI1BMI2-NEXT: addl $8, %esp -; X86-BMI1BMI2-NEXT: popl %esi -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bzhi32_c2_load: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: subl $8, %esp +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: xorl %ecx, %ecx +; X86-BMI1-NEXT: subb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl $-1, %edx +; X86-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-BMI1-NEXT: shrl %cl, %edx +; X86-BMI1-NEXT: movl (%eax), %esi +; X86-BMI1-NEXT: andl %edx, %esi +; X86-BMI1-NEXT: movl %edx, (%esp) +; X86-BMI1-NEXT: calll use32@PLT +; X86-BMI1-NEXT: movl %esi, %eax +; X86-BMI1-NEXT: addl $8, %esp +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bzhi32_c2_load: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: subl $8, %esp +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: bzhil %ecx, (%eax), %esi +; X86-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx def $ecx +; X86-BMI2-NEXT: negb %cl +; X86-BMI2-NEXT: movl $-1, %eax +; X86-BMI2-NEXT: shrxl %ecx, %eax, %eax +; X86-BMI2-NEXT: movl %eax, (%esp) +; X86-BMI2-NEXT: calll use32@PLT +; X86-BMI2-NEXT: movl %esi, %eax +; X86-BMI2-NEXT: addl $8, %esp +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bzhi32_c2_load: ; X64-NOBMI: # %bb.0: @@ -2350,34 +2350,34 @@ define i32 @bzhi32_c2_load(i32* %w, i32 %numlowbits) nounwind { ; X64-NOBMI-NEXT: popq %rbx ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bzhi32_c2_load: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: pushq %rbx -; X64-BMI1NOTBM-NEXT: movl %esi, %ecx -; X64-BMI1NOTBM-NEXT: negb %cl -; X64-BMI1NOTBM-NEXT: movl $-1, %eax -; X64-BMI1NOTBM-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-BMI1NOTBM-NEXT: shrl %cl, %eax -; X64-BMI1NOTBM-NEXT: movl (%rdi), %ebx -; X64-BMI1NOTBM-NEXT: andl %eax, %ebx -; X64-BMI1NOTBM-NEXT: movl %eax, %edi -; X64-BMI1NOTBM-NEXT: callq use32@PLT -; X64-BMI1NOTBM-NEXT: movl %ebx, %eax -; X64-BMI1NOTBM-NEXT: popq %rbx -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bzhi32_c2_load: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: pushq %rbx -; X64-BMI1BMI2-NEXT: bzhil %esi, (%rdi), %ebx -; X64-BMI1BMI2-NEXT: # kill: def $sil killed $sil killed $esi def $esi -; X64-BMI1BMI2-NEXT: negb %sil -; X64-BMI1BMI2-NEXT: movl $-1, %eax -; X64-BMI1BMI2-NEXT: shrxl %esi, %eax, %edi -; X64-BMI1BMI2-NEXT: callq use32@PLT -; X64-BMI1BMI2-NEXT: movl %ebx, %eax -; X64-BMI1BMI2-NEXT: popq %rbx -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bzhi32_c2_load: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: pushq %rbx +; X64-BMI1-NEXT: movl %esi, %ecx +; X64-BMI1-NEXT: negb %cl +; X64-BMI1-NEXT: movl $-1, %eax +; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-BMI1-NEXT: shrl %cl, %eax +; X64-BMI1-NEXT: movl (%rdi), %ebx +; X64-BMI1-NEXT: andl %eax, %ebx +; X64-BMI1-NEXT: movl %eax, %edi +; X64-BMI1-NEXT: callq use32@PLT +; X64-BMI1-NEXT: movl %ebx, %eax +; X64-BMI1-NEXT: popq %rbx +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bzhi32_c2_load: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: pushq %rbx +; X64-BMI2-NEXT: bzhil %esi, (%rdi), %ebx +; X64-BMI2-NEXT: # kill: def $sil killed $sil killed $esi def $esi +; X64-BMI2-NEXT: negb %sil +; X64-BMI2-NEXT: movl $-1, %eax +; X64-BMI2-NEXT: shrxl %esi, %eax, %edi +; X64-BMI2-NEXT: callq use32@PLT +; X64-BMI2-NEXT: movl %ebx, %eax +; X64-BMI2-NEXT: popq %rbx +; X64-BMI2-NEXT: retq %val = load i32, i32* %w %numhighbits = sub i32 32, %numlowbits %mask = lshr i32 -1, %numhighbits @@ -2406,42 +2406,42 @@ define i32 @bzhi32_c3_load_indexzext(i32* %w, i8 %numlowbits) nounwind { ; X86-NOBMI-NEXT: popl %esi ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bzhi32_c3_load_indexzext: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: subl $8, %esp -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: xorl %ecx, %ecx -; X86-BMI1NOTBM-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl $-1, %edx -; X86-BMI1NOTBM-NEXT: # kill: def $cl killed $cl killed $ecx -; X86-BMI1NOTBM-NEXT: shrl %cl, %edx -; X86-BMI1NOTBM-NEXT: movl (%eax), %esi -; X86-BMI1NOTBM-NEXT: andl %edx, %esi -; X86-BMI1NOTBM-NEXT: movl %edx, (%esp) -; X86-BMI1NOTBM-NEXT: calll use32@PLT -; X86-BMI1NOTBM-NEXT: movl %esi, %eax -; X86-BMI1NOTBM-NEXT: addl $8, %esp -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bzhi32_c3_load_indexzext: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: subl $8, %esp -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: bzhil %ecx, (%eax), %esi -; X86-BMI1BMI2-NEXT: # kill: def $cl killed $cl killed $ecx def $ecx -; X86-BMI1BMI2-NEXT: negb %cl -; X86-BMI1BMI2-NEXT: movl $-1, %eax -; X86-BMI1BMI2-NEXT: shrxl %ecx, %eax, %eax -; X86-BMI1BMI2-NEXT: movl %eax, (%esp) -; X86-BMI1BMI2-NEXT: calll use32@PLT -; X86-BMI1BMI2-NEXT: movl %esi, %eax -; X86-BMI1BMI2-NEXT: addl $8, %esp -; X86-BMI1BMI2-NEXT: popl %esi -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bzhi32_c3_load_indexzext: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: subl $8, %esp +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: xorl %ecx, %ecx +; X86-BMI1-NEXT: subb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl $-1, %edx +; X86-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-BMI1-NEXT: shrl %cl, %edx +; X86-BMI1-NEXT: movl (%eax), %esi +; X86-BMI1-NEXT: andl %edx, %esi +; X86-BMI1-NEXT: movl %edx, (%esp) +; X86-BMI1-NEXT: calll use32@PLT +; X86-BMI1-NEXT: movl %esi, %eax +; X86-BMI1-NEXT: addl $8, %esp +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bzhi32_c3_load_indexzext: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: subl $8, %esp +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: bzhil %ecx, (%eax), %esi +; X86-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx def $ecx +; X86-BMI2-NEXT: negb %cl +; X86-BMI2-NEXT: movl $-1, %eax +; X86-BMI2-NEXT: shrxl %ecx, %eax, %eax +; X86-BMI2-NEXT: movl %eax, (%esp) +; X86-BMI2-NEXT: calll use32@PLT +; X86-BMI2-NEXT: movl %esi, %eax +; X86-BMI2-NEXT: addl $8, %esp +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bzhi32_c3_load_indexzext: ; X64-NOBMI: # %bb.0: @@ -2459,34 +2459,34 @@ define i32 @bzhi32_c3_load_indexzext(i32* %w, i8 %numlowbits) nounwind { ; X64-NOBMI-NEXT: popq %rbx ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bzhi32_c3_load_indexzext: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: pushq %rbx -; X64-BMI1NOTBM-NEXT: movl %esi, %ecx -; X64-BMI1NOTBM-NEXT: negb %cl -; X64-BMI1NOTBM-NEXT: movl $-1, %eax -; X64-BMI1NOTBM-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-BMI1NOTBM-NEXT: shrl %cl, %eax -; X64-BMI1NOTBM-NEXT: movl (%rdi), %ebx -; X64-BMI1NOTBM-NEXT: andl %eax, %ebx -; X64-BMI1NOTBM-NEXT: movl %eax, %edi -; X64-BMI1NOTBM-NEXT: callq use32@PLT -; X64-BMI1NOTBM-NEXT: movl %ebx, %eax -; X64-BMI1NOTBM-NEXT: popq %rbx -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bzhi32_c3_load_indexzext: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: pushq %rbx -; X64-BMI1BMI2-NEXT: bzhil %esi, (%rdi), %ebx -; X64-BMI1BMI2-NEXT: # kill: def $sil killed $sil killed $esi def $esi -; X64-BMI1BMI2-NEXT: negb %sil -; X64-BMI1BMI2-NEXT: movl $-1, %eax -; X64-BMI1BMI2-NEXT: shrxl %esi, %eax, %edi -; X64-BMI1BMI2-NEXT: callq use32@PLT -; X64-BMI1BMI2-NEXT: movl %ebx, %eax -; X64-BMI1BMI2-NEXT: popq %rbx -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bzhi32_c3_load_indexzext: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: pushq %rbx +; X64-BMI1-NEXT: movl %esi, %ecx +; X64-BMI1-NEXT: negb %cl +; X64-BMI1-NEXT: movl $-1, %eax +; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-BMI1-NEXT: shrl %cl, %eax +; X64-BMI1-NEXT: movl (%rdi), %ebx +; X64-BMI1-NEXT: andl %eax, %ebx +; X64-BMI1-NEXT: movl %eax, %edi +; X64-BMI1-NEXT: callq use32@PLT +; X64-BMI1-NEXT: movl %ebx, %eax +; X64-BMI1-NEXT: popq %rbx +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bzhi32_c3_load_indexzext: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: pushq %rbx +; X64-BMI2-NEXT: bzhil %esi, (%rdi), %ebx +; X64-BMI2-NEXT: # kill: def $sil killed $sil killed $esi def $esi +; X64-BMI2-NEXT: negb %sil +; X64-BMI2-NEXT: movl $-1, %eax +; X64-BMI2-NEXT: shrxl %esi, %eax, %edi +; X64-BMI2-NEXT: callq use32@PLT +; X64-BMI2-NEXT: movl %ebx, %eax +; X64-BMI2-NEXT: popq %rbx +; X64-BMI2-NEXT: retq %val = load i32, i32* %w %numhighbits = sub i8 32, %numlowbits %sh_prom = zext i8 %numhighbits to i32 @@ -2514,38 +2514,38 @@ define i32 @bzhi32_c4_commutative(i32 %val, i32 %numlowbits) nounwind { ; X86-NOBMI-NEXT: popl %esi ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bzhi32_c4_commutative: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: subl $8, %esp -; X86-BMI1NOTBM-NEXT: xorl %ecx, %ecx -; X86-BMI1NOTBM-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl $-1, %esi -; X86-BMI1NOTBM-NEXT: # kill: def $cl killed $cl killed $ecx -; X86-BMI1NOTBM-NEXT: shrl %cl, %esi -; X86-BMI1NOTBM-NEXT: movl %esi, (%esp) -; X86-BMI1NOTBM-NEXT: calll use32@PLT -; X86-BMI1NOTBM-NEXT: andl {{[0-9]+}}(%esp), %esi -; X86-BMI1NOTBM-NEXT: movl %esi, %eax -; X86-BMI1NOTBM-NEXT: addl $8, %esp -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bzhi32_c4_commutative: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %ebx -; X86-BMI1BMI2-NEXT: subl $8, %esp -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl -; X86-BMI1BMI2-NEXT: movl %ebx, %eax -; X86-BMI1BMI2-NEXT: negb %al -; X86-BMI1BMI2-NEXT: movl $-1, %ecx -; X86-BMI1BMI2-NEXT: shrxl %eax, %ecx, %eax -; X86-BMI1BMI2-NEXT: movl %eax, (%esp) -; X86-BMI1BMI2-NEXT: calll use32@PLT -; X86-BMI1BMI2-NEXT: bzhil %ebx, {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: addl $8, %esp -; X86-BMI1BMI2-NEXT: popl %ebx -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bzhi32_c4_commutative: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: subl $8, %esp +; X86-BMI1-NEXT: xorl %ecx, %ecx +; X86-BMI1-NEXT: subb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl $-1, %esi +; X86-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-BMI1-NEXT: shrl %cl, %esi +; X86-BMI1-NEXT: movl %esi, (%esp) +; X86-BMI1-NEXT: calll use32@PLT +; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %esi +; X86-BMI1-NEXT: movl %esi, %eax +; X86-BMI1-NEXT: addl $8, %esp +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bzhi32_c4_commutative: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: subl $8, %esp +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl +; X86-BMI2-NEXT: movl %ebx, %eax +; X86-BMI2-NEXT: negb %al +; X86-BMI2-NEXT: movl $-1, %ecx +; X86-BMI2-NEXT: shrxl %eax, %ecx, %eax +; X86-BMI2-NEXT: movl %eax, (%esp) +; X86-BMI2-NEXT: calll use32@PLT +; X86-BMI2-NEXT: bzhil %ebx, {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: addl $8, %esp +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bzhi32_c4_commutative: ; X64-NOBMI: # %bb.0: @@ -2567,43 +2567,43 @@ define i32 @bzhi32_c4_commutative(i32 %val, i32 %numlowbits) nounwind { ; X64-NOBMI-NEXT: popq %rbp ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bzhi32_c4_commutative: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: pushq %rbp -; X64-BMI1NOTBM-NEXT: pushq %rbx -; X64-BMI1NOTBM-NEXT: pushq %rax -; X64-BMI1NOTBM-NEXT: movl %esi, %ecx -; X64-BMI1NOTBM-NEXT: movl %edi, %ebx -; X64-BMI1NOTBM-NEXT: negb %cl -; X64-BMI1NOTBM-NEXT: movl $-1, %ebp -; X64-BMI1NOTBM-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-BMI1NOTBM-NEXT: shrl %cl, %ebp -; X64-BMI1NOTBM-NEXT: movl %ebp, %edi -; X64-BMI1NOTBM-NEXT: callq use32@PLT -; X64-BMI1NOTBM-NEXT: andl %ebx, %ebp -; X64-BMI1NOTBM-NEXT: movl %ebp, %eax -; X64-BMI1NOTBM-NEXT: addq $8, %rsp -; X64-BMI1NOTBM-NEXT: popq %rbx -; X64-BMI1NOTBM-NEXT: popq %rbp -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bzhi32_c4_commutative: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: pushq %rbp -; X64-BMI1BMI2-NEXT: pushq %rbx -; X64-BMI1BMI2-NEXT: pushq %rax -; X64-BMI1BMI2-NEXT: movl %esi, %ebx -; X64-BMI1BMI2-NEXT: movl %edi, %ebp -; X64-BMI1BMI2-NEXT: movl %ebx, %eax -; X64-BMI1BMI2-NEXT: negb %al -; X64-BMI1BMI2-NEXT: movl $-1, %ecx -; X64-BMI1BMI2-NEXT: shrxl %eax, %ecx, %edi -; X64-BMI1BMI2-NEXT: callq use32@PLT -; X64-BMI1BMI2-NEXT: bzhil %ebx, %ebp, %eax -; X64-BMI1BMI2-NEXT: addq $8, %rsp -; X64-BMI1BMI2-NEXT: popq %rbx -; X64-BMI1BMI2-NEXT: popq %rbp -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bzhi32_c4_commutative: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: pushq %rbp +; X64-BMI1-NEXT: pushq %rbx +; X64-BMI1-NEXT: pushq %rax +; X64-BMI1-NEXT: movl %esi, %ecx +; X64-BMI1-NEXT: movl %edi, %ebx +; X64-BMI1-NEXT: negb %cl +; X64-BMI1-NEXT: movl $-1, %ebp +; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-BMI1-NEXT: shrl %cl, %ebp +; X64-BMI1-NEXT: movl %ebp, %edi +; X64-BMI1-NEXT: callq use32@PLT +; X64-BMI1-NEXT: andl %ebx, %ebp +; X64-BMI1-NEXT: movl %ebp, %eax +; X64-BMI1-NEXT: addq $8, %rsp +; X64-BMI1-NEXT: popq %rbx +; X64-BMI1-NEXT: popq %rbp +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bzhi32_c4_commutative: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: pushq %rbp +; X64-BMI2-NEXT: pushq %rbx +; X64-BMI2-NEXT: pushq %rax +; X64-BMI2-NEXT: movl %esi, %ebx +; X64-BMI2-NEXT: movl %edi, %ebp +; X64-BMI2-NEXT: movl %ebx, %eax +; X64-BMI2-NEXT: negb %al +; X64-BMI2-NEXT: movl $-1, %ecx +; X64-BMI2-NEXT: shrxl %eax, %ecx, %edi +; X64-BMI2-NEXT: callq use32@PLT +; X64-BMI2-NEXT: bzhil %ebx, %ebp, %eax +; X64-BMI2-NEXT: addq $8, %rsp +; X64-BMI2-NEXT: popq %rbx +; X64-BMI2-NEXT: popq %rbp +; X64-BMI2-NEXT: retq %numhighbits = sub i32 32, %numlowbits %mask = lshr i32 -1, %numhighbits call void @use32(i32 %mask) @@ -2646,64 +2646,64 @@ define i64 @bzhi64_c0(i64 %val, i64 %numlowbits) nounwind { ; X86-NOBMI-NEXT: popl %edi ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bzhi64_c0: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %edi -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: pushl %eax -; X86-BMI1NOTBM-NEXT: movb $64, %cl -; X86-BMI1NOTBM-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl $-1, %esi -; X86-BMI1NOTBM-NEXT: movl $-1, %edi -; X86-BMI1NOTBM-NEXT: shrl %cl, %edi -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: je .LBB34_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %edi, %esi -; X86-BMI1NOTBM-NEXT: xorl %edi, %edi -; X86-BMI1NOTBM-NEXT: .LBB34_2: -; X86-BMI1NOTBM-NEXT: subl $8, %esp -; X86-BMI1NOTBM-NEXT: pushl %edi -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: calll use64@PLT -; X86-BMI1NOTBM-NEXT: addl $16, %esp -; X86-BMI1NOTBM-NEXT: andl {{[0-9]+}}(%esp), %esi -; X86-BMI1NOTBM-NEXT: andl {{[0-9]+}}(%esp), %edi -; X86-BMI1NOTBM-NEXT: movl %esi, %eax -; X86-BMI1NOTBM-NEXT: movl %edi, %edx -; X86-BMI1NOTBM-NEXT: addl $4, %esp -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: popl %edi -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bzhi64_c0: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %edi -; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: pushl %eax -; X86-BMI1BMI2-NEXT: movb $64, %al -; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %al -; X86-BMI1BMI2-NEXT: movl $-1, %edi -; X86-BMI1BMI2-NEXT: shrxl %eax, %edi, %esi -; X86-BMI1BMI2-NEXT: testb $32, %al -; X86-BMI1BMI2-NEXT: je .LBB34_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: movl %esi, %edi -; X86-BMI1BMI2-NEXT: xorl %esi, %esi -; X86-BMI1BMI2-NEXT: .LBB34_2: -; X86-BMI1BMI2-NEXT: subl $8, %esp -; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: pushl %edi -; X86-BMI1BMI2-NEXT: calll use64@PLT -; X86-BMI1BMI2-NEXT: addl $16, %esp -; X86-BMI1BMI2-NEXT: andl {{[0-9]+}}(%esp), %edi -; X86-BMI1BMI2-NEXT: andl {{[0-9]+}}(%esp), %esi -; X86-BMI1BMI2-NEXT: movl %edi, %eax -; X86-BMI1BMI2-NEXT: movl %esi, %edx -; X86-BMI1BMI2-NEXT: addl $4, %esp -; X86-BMI1BMI2-NEXT: popl %esi -; X86-BMI1BMI2-NEXT: popl %edi -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bzhi64_c0: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %edi +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: pushl %eax +; X86-BMI1-NEXT: movb $64, %cl +; X86-BMI1-NEXT: subb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl $-1, %esi +; X86-BMI1-NEXT: movl $-1, %edi +; X86-BMI1-NEXT: shrl %cl, %edi +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: je .LBB34_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: movl %edi, %esi +; X86-BMI1-NEXT: xorl %edi, %edi +; X86-BMI1-NEXT: .LBB34_2: +; X86-BMI1-NEXT: subl $8, %esp +; X86-BMI1-NEXT: pushl %edi +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: calll use64@PLT +; X86-BMI1-NEXT: addl $16, %esp +; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %esi +; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %edi +; X86-BMI1-NEXT: movl %esi, %eax +; X86-BMI1-NEXT: movl %edi, %edx +; X86-BMI1-NEXT: addl $4, %esp +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: popl %edi +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bzhi64_c0: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: pushl %eax +; X86-BMI2-NEXT: movb $64, %al +; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: movl $-1, %edi +; X86-BMI2-NEXT: shrxl %eax, %edi, %esi +; X86-BMI2-NEXT: testb $32, %al +; X86-BMI2-NEXT: je .LBB34_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %esi, %edi +; X86-BMI2-NEXT: xorl %esi, %esi +; X86-BMI2-NEXT: .LBB34_2: +; X86-BMI2-NEXT: subl $8, %esp +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: calll use64@PLT +; X86-BMI2-NEXT: addl $16, %esp +; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %edi +; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl %edi, %eax +; X86-BMI2-NEXT: movl %esi, %edx +; X86-BMI2-NEXT: addl $4, %esp +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bzhi64_c0: ; X64-NOBMI: # %bb.0: @@ -2725,43 +2725,43 @@ define i64 @bzhi64_c0(i64 %val, i64 %numlowbits) nounwind { ; X64-NOBMI-NEXT: popq %r14 ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bzhi64_c0: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: pushq %r14 -; X64-BMI1NOTBM-NEXT: pushq %rbx -; X64-BMI1NOTBM-NEXT: pushq %rax -; X64-BMI1NOTBM-NEXT: movq %rsi, %rcx -; X64-BMI1NOTBM-NEXT: movq %rdi, %r14 -; X64-BMI1NOTBM-NEXT: negb %cl -; X64-BMI1NOTBM-NEXT: movq $-1, %rbx -; X64-BMI1NOTBM-NEXT: # kill: def $cl killed $cl killed $rcx -; X64-BMI1NOTBM-NEXT: shrq %cl, %rbx -; X64-BMI1NOTBM-NEXT: movq %rbx, %rdi -; X64-BMI1NOTBM-NEXT: callq use64@PLT -; X64-BMI1NOTBM-NEXT: andq %r14, %rbx -; X64-BMI1NOTBM-NEXT: movq %rbx, %rax -; X64-BMI1NOTBM-NEXT: addq $8, %rsp -; X64-BMI1NOTBM-NEXT: popq %rbx -; X64-BMI1NOTBM-NEXT: popq %r14 -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bzhi64_c0: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: pushq %r14 -; X64-BMI1BMI2-NEXT: pushq %rbx -; X64-BMI1BMI2-NEXT: pushq %rax -; X64-BMI1BMI2-NEXT: movq %rsi, %rbx -; X64-BMI1BMI2-NEXT: movq %rdi, %r14 -; X64-BMI1BMI2-NEXT: movl %ebx, %eax -; X64-BMI1BMI2-NEXT: negb %al -; X64-BMI1BMI2-NEXT: movq $-1, %rcx -; X64-BMI1BMI2-NEXT: shrxq %rax, %rcx, %rdi -; X64-BMI1BMI2-NEXT: callq use64@PLT -; X64-BMI1BMI2-NEXT: bzhiq %rbx, %r14, %rax -; X64-BMI1BMI2-NEXT: addq $8, %rsp -; X64-BMI1BMI2-NEXT: popq %rbx -; X64-BMI1BMI2-NEXT: popq %r14 -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bzhi64_c0: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: pushq %r14 +; X64-BMI1-NEXT: pushq %rbx +; X64-BMI1-NEXT: pushq %rax +; X64-BMI1-NEXT: movq %rsi, %rcx +; X64-BMI1-NEXT: movq %rdi, %r14 +; X64-BMI1-NEXT: negb %cl +; X64-BMI1-NEXT: movq $-1, %rbx +; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $rcx +; X64-BMI1-NEXT: shrq %cl, %rbx +; X64-BMI1-NEXT: movq %rbx, %rdi +; X64-BMI1-NEXT: callq use64@PLT +; X64-BMI1-NEXT: andq %r14, %rbx +; X64-BMI1-NEXT: movq %rbx, %rax +; X64-BMI1-NEXT: addq $8, %rsp +; X64-BMI1-NEXT: popq %rbx +; X64-BMI1-NEXT: popq %r14 +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bzhi64_c0: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: pushq %r14 +; X64-BMI2-NEXT: pushq %rbx +; X64-BMI2-NEXT: pushq %rax +; X64-BMI2-NEXT: movq %rsi, %rbx +; X64-BMI2-NEXT: movq %rdi, %r14 +; X64-BMI2-NEXT: movl %ebx, %eax +; X64-BMI2-NEXT: negb %al +; X64-BMI2-NEXT: movq $-1, %rcx +; X64-BMI2-NEXT: shrxq %rax, %rcx, %rdi +; X64-BMI2-NEXT: callq use64@PLT +; X64-BMI2-NEXT: bzhiq %rbx, %r14, %rax +; X64-BMI2-NEXT: addq $8, %rsp +; X64-BMI2-NEXT: popq %rbx +; X64-BMI2-NEXT: popq %r14 +; X64-BMI2-NEXT: retq %numhighbits = sub i64 64, %numlowbits %mask = lshr i64 -1, %numhighbits call void @use64(i64 %mask) @@ -2800,64 +2800,64 @@ define i64 @bzhi64_c1_indexzext(i64 %val, i8 %numlowbits) nounwind { ; X86-NOBMI-NEXT: popl %edi ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bzhi64_c1_indexzext: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %edi -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: pushl %eax -; X86-BMI1NOTBM-NEXT: movb $64, %cl -; X86-BMI1NOTBM-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl $-1, %esi -; X86-BMI1NOTBM-NEXT: movl $-1, %edi -; X86-BMI1NOTBM-NEXT: shrl %cl, %edi -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: je .LBB35_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %edi, %esi -; X86-BMI1NOTBM-NEXT: xorl %edi, %edi -; X86-BMI1NOTBM-NEXT: .LBB35_2: -; X86-BMI1NOTBM-NEXT: subl $8, %esp -; X86-BMI1NOTBM-NEXT: pushl %edi -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: calll use64@PLT -; X86-BMI1NOTBM-NEXT: addl $16, %esp -; X86-BMI1NOTBM-NEXT: andl {{[0-9]+}}(%esp), %esi -; X86-BMI1NOTBM-NEXT: andl {{[0-9]+}}(%esp), %edi -; X86-BMI1NOTBM-NEXT: movl %esi, %eax -; X86-BMI1NOTBM-NEXT: movl %edi, %edx -; X86-BMI1NOTBM-NEXT: addl $4, %esp -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: popl %edi -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bzhi64_c1_indexzext: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %edi -; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: pushl %eax -; X86-BMI1BMI2-NEXT: movb $64, %al -; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %al -; X86-BMI1BMI2-NEXT: movl $-1, %edi -; X86-BMI1BMI2-NEXT: shrxl %eax, %edi, %esi -; X86-BMI1BMI2-NEXT: testb $32, %al -; X86-BMI1BMI2-NEXT: je .LBB35_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: movl %esi, %edi -; X86-BMI1BMI2-NEXT: xorl %esi, %esi -; X86-BMI1BMI2-NEXT: .LBB35_2: -; X86-BMI1BMI2-NEXT: subl $8, %esp -; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: pushl %edi -; X86-BMI1BMI2-NEXT: calll use64@PLT -; X86-BMI1BMI2-NEXT: addl $16, %esp -; X86-BMI1BMI2-NEXT: andl {{[0-9]+}}(%esp), %edi -; X86-BMI1BMI2-NEXT: andl {{[0-9]+}}(%esp), %esi -; X86-BMI1BMI2-NEXT: movl %edi, %eax -; X86-BMI1BMI2-NEXT: movl %esi, %edx -; X86-BMI1BMI2-NEXT: addl $4, %esp -; X86-BMI1BMI2-NEXT: popl %esi -; X86-BMI1BMI2-NEXT: popl %edi -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bzhi64_c1_indexzext: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %edi +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: pushl %eax +; X86-BMI1-NEXT: movb $64, %cl +; X86-BMI1-NEXT: subb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl $-1, %esi +; X86-BMI1-NEXT: movl $-1, %edi +; X86-BMI1-NEXT: shrl %cl, %edi +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: je .LBB35_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: movl %edi, %esi +; X86-BMI1-NEXT: xorl %edi, %edi +; X86-BMI1-NEXT: .LBB35_2: +; X86-BMI1-NEXT: subl $8, %esp +; X86-BMI1-NEXT: pushl %edi +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: calll use64@PLT +; X86-BMI1-NEXT: addl $16, %esp +; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %esi +; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %edi +; X86-BMI1-NEXT: movl %esi, %eax +; X86-BMI1-NEXT: movl %edi, %edx +; X86-BMI1-NEXT: addl $4, %esp +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: popl %edi +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bzhi64_c1_indexzext: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: pushl %eax +; X86-BMI2-NEXT: movb $64, %al +; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: movl $-1, %edi +; X86-BMI2-NEXT: shrxl %eax, %edi, %esi +; X86-BMI2-NEXT: testb $32, %al +; X86-BMI2-NEXT: je .LBB35_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %esi, %edi +; X86-BMI2-NEXT: xorl %esi, %esi +; X86-BMI2-NEXT: .LBB35_2: +; X86-BMI2-NEXT: subl $8, %esp +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: calll use64@PLT +; X86-BMI2-NEXT: addl $16, %esp +; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %edi +; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl %edi, %eax +; X86-BMI2-NEXT: movl %esi, %edx +; X86-BMI2-NEXT: addl $4, %esp +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bzhi64_c1_indexzext: ; X64-NOBMI: # %bb.0: @@ -2879,43 +2879,43 @@ define i64 @bzhi64_c1_indexzext(i64 %val, i8 %numlowbits) nounwind { ; X64-NOBMI-NEXT: popq %r14 ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bzhi64_c1_indexzext: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: pushq %r14 -; X64-BMI1NOTBM-NEXT: pushq %rbx -; X64-BMI1NOTBM-NEXT: pushq %rax -; X64-BMI1NOTBM-NEXT: movl %esi, %ecx -; X64-BMI1NOTBM-NEXT: movq %rdi, %r14 -; X64-BMI1NOTBM-NEXT: negb %cl -; X64-BMI1NOTBM-NEXT: movq $-1, %rbx -; X64-BMI1NOTBM-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-BMI1NOTBM-NEXT: shrq %cl, %rbx -; X64-BMI1NOTBM-NEXT: movq %rbx, %rdi -; X64-BMI1NOTBM-NEXT: callq use64@PLT -; X64-BMI1NOTBM-NEXT: andq %r14, %rbx -; X64-BMI1NOTBM-NEXT: movq %rbx, %rax -; X64-BMI1NOTBM-NEXT: addq $8, %rsp -; X64-BMI1NOTBM-NEXT: popq %rbx -; X64-BMI1NOTBM-NEXT: popq %r14 -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bzhi64_c1_indexzext: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: pushq %r14 -; X64-BMI1BMI2-NEXT: pushq %rbx -; X64-BMI1BMI2-NEXT: pushq %rax -; X64-BMI1BMI2-NEXT: movl %esi, %ebx -; X64-BMI1BMI2-NEXT: movq %rdi, %r14 -; X64-BMI1BMI2-NEXT: movl %ebx, %eax -; X64-BMI1BMI2-NEXT: negb %al -; X64-BMI1BMI2-NEXT: movq $-1, %rcx -; X64-BMI1BMI2-NEXT: shrxq %rax, %rcx, %rdi -; X64-BMI1BMI2-NEXT: callq use64@PLT -; X64-BMI1BMI2-NEXT: bzhiq %rbx, %r14, %rax -; X64-BMI1BMI2-NEXT: addq $8, %rsp -; X64-BMI1BMI2-NEXT: popq %rbx -; X64-BMI1BMI2-NEXT: popq %r14 -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bzhi64_c1_indexzext: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: pushq %r14 +; X64-BMI1-NEXT: pushq %rbx +; X64-BMI1-NEXT: pushq %rax +; X64-BMI1-NEXT: movl %esi, %ecx +; X64-BMI1-NEXT: movq %rdi, %r14 +; X64-BMI1-NEXT: negb %cl +; X64-BMI1-NEXT: movq $-1, %rbx +; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-BMI1-NEXT: shrq %cl, %rbx +; X64-BMI1-NEXT: movq %rbx, %rdi +; X64-BMI1-NEXT: callq use64@PLT +; X64-BMI1-NEXT: andq %r14, %rbx +; X64-BMI1-NEXT: movq %rbx, %rax +; X64-BMI1-NEXT: addq $8, %rsp +; X64-BMI1-NEXT: popq %rbx +; X64-BMI1-NEXT: popq %r14 +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bzhi64_c1_indexzext: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: pushq %r14 +; X64-BMI2-NEXT: pushq %rbx +; X64-BMI2-NEXT: pushq %rax +; X64-BMI2-NEXT: movl %esi, %ebx +; X64-BMI2-NEXT: movq %rdi, %r14 +; X64-BMI2-NEXT: movl %ebx, %eax +; X64-BMI2-NEXT: negb %al +; X64-BMI2-NEXT: movq $-1, %rcx +; X64-BMI2-NEXT: shrxq %rax, %rcx, %rdi +; X64-BMI2-NEXT: callq use64@PLT +; X64-BMI2-NEXT: bzhiq %rbx, %r14, %rax +; X64-BMI2-NEXT: addq $8, %rsp +; X64-BMI2-NEXT: popq %rbx +; X64-BMI2-NEXT: popq %r14 +; X64-BMI2-NEXT: retq %numhighbits = sub i8 64, %numlowbits %sh_prom = zext i8 %numhighbits to i64 %mask = lshr i64 -1, %sh_prom @@ -2958,70 +2958,70 @@ define i64 @bzhi64_c2_load(i64* %w, i64 %numlowbits) nounwind { ; X86-NOBMI-NEXT: popl %ebx ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bzhi64_c2_load: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %ebx -; X86-BMI1NOTBM-NEXT: pushl %edi -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-BMI1NOTBM-NEXT: movb $64, %cl -; X86-BMI1NOTBM-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl $-1, %eax -; X86-BMI1NOTBM-NEXT: movl $-1, %ebx -; X86-BMI1NOTBM-NEXT: shrl %cl, %ebx -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: je .LBB36_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %ebx, %eax -; X86-BMI1NOTBM-NEXT: xorl %ebx, %ebx -; X86-BMI1NOTBM-NEXT: .LBB36_2: -; X86-BMI1NOTBM-NEXT: movl 4(%edx), %esi -; X86-BMI1NOTBM-NEXT: andl %ebx, %esi -; X86-BMI1NOTBM-NEXT: movl (%edx), %edi -; X86-BMI1NOTBM-NEXT: andl %eax, %edi -; X86-BMI1NOTBM-NEXT: subl $8, %esp -; X86-BMI1NOTBM-NEXT: pushl %ebx -; X86-BMI1NOTBM-NEXT: pushl %eax -; X86-BMI1NOTBM-NEXT: calll use64@PLT -; X86-BMI1NOTBM-NEXT: addl $16, %esp -; X86-BMI1NOTBM-NEXT: movl %edi, %eax -; X86-BMI1NOTBM-NEXT: movl %esi, %edx -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: popl %edi -; X86-BMI1NOTBM-NEXT: popl %ebx -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bzhi64_c2_load: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %ebx -; X86-BMI1BMI2-NEXT: pushl %edi -; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: movb $64, %bl -; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %bl -; X86-BMI1BMI2-NEXT: movl $-1, %ecx -; X86-BMI1BMI2-NEXT: shrxl %ebx, %ecx, %edx -; X86-BMI1BMI2-NEXT: testb $32, %bl -; X86-BMI1BMI2-NEXT: je .LBB36_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: movl %edx, %ecx -; X86-BMI1BMI2-NEXT: xorl %edx, %edx -; X86-BMI1BMI2-NEXT: .LBB36_2: -; X86-BMI1BMI2-NEXT: movl 4(%eax), %esi -; X86-BMI1BMI2-NEXT: andl %edx, %esi -; X86-BMI1BMI2-NEXT: movl (%eax), %edi -; X86-BMI1BMI2-NEXT: andl %ecx, %edi -; X86-BMI1BMI2-NEXT: subl $8, %esp -; X86-BMI1BMI2-NEXT: pushl %edx -; X86-BMI1BMI2-NEXT: pushl %ecx -; X86-BMI1BMI2-NEXT: calll use64@PLT -; X86-BMI1BMI2-NEXT: addl $16, %esp -; X86-BMI1BMI2-NEXT: movl %edi, %eax -; X86-BMI1BMI2-NEXT: movl %esi, %edx -; X86-BMI1BMI2-NEXT: popl %esi -; X86-BMI1BMI2-NEXT: popl %edi -; X86-BMI1BMI2-NEXT: popl %ebx -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bzhi64_c2_load: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %ebx +; X86-BMI1-NEXT: pushl %edi +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI1-NEXT: movb $64, %cl +; X86-BMI1-NEXT: subb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl $-1, %eax +; X86-BMI1-NEXT: movl $-1, %ebx +; X86-BMI1-NEXT: shrl %cl, %ebx +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: je .LBB36_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: movl %ebx, %eax +; X86-BMI1-NEXT: xorl %ebx, %ebx +; X86-BMI1-NEXT: .LBB36_2: +; X86-BMI1-NEXT: movl 4(%edx), %esi +; X86-BMI1-NEXT: andl %ebx, %esi +; X86-BMI1-NEXT: movl (%edx), %edi +; X86-BMI1-NEXT: andl %eax, %edi +; X86-BMI1-NEXT: subl $8, %esp +; X86-BMI1-NEXT: pushl %ebx +; X86-BMI1-NEXT: pushl %eax +; X86-BMI1-NEXT: calll use64@PLT +; X86-BMI1-NEXT: addl $16, %esp +; X86-BMI1-NEXT: movl %edi, %eax +; X86-BMI1-NEXT: movl %esi, %edx +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: popl %edi +; X86-BMI1-NEXT: popl %ebx +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bzhi64_c2_load: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movb $64, %bl +; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %bl +; X86-BMI2-NEXT: movl $-1, %ecx +; X86-BMI2-NEXT: shrxl %ebx, %ecx, %edx +; X86-BMI2-NEXT: testb $32, %bl +; X86-BMI2-NEXT: je .LBB36_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: .LBB36_2: +; X86-BMI2-NEXT: movl 4(%eax), %esi +; X86-BMI2-NEXT: andl %edx, %esi +; X86-BMI2-NEXT: movl (%eax), %edi +; X86-BMI2-NEXT: andl %ecx, %edi +; X86-BMI2-NEXT: subl $8, %esp +; X86-BMI2-NEXT: pushl %edx +; X86-BMI2-NEXT: pushl %ecx +; X86-BMI2-NEXT: calll use64@PLT +; X86-BMI2-NEXT: addl $16, %esp +; X86-BMI2-NEXT: movl %edi, %eax +; X86-BMI2-NEXT: movl %esi, %edx +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bzhi64_c2_load: ; X64-NOBMI: # %bb.0: @@ -3039,34 +3039,34 @@ define i64 @bzhi64_c2_load(i64* %w, i64 %numlowbits) nounwind { ; X64-NOBMI-NEXT: popq %rbx ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bzhi64_c2_load: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: pushq %rbx -; X64-BMI1NOTBM-NEXT: movq %rsi, %rcx -; X64-BMI1NOTBM-NEXT: negb %cl -; X64-BMI1NOTBM-NEXT: movq $-1, %rax -; X64-BMI1NOTBM-NEXT: # kill: def $cl killed $cl killed $rcx -; X64-BMI1NOTBM-NEXT: shrq %cl, %rax -; X64-BMI1NOTBM-NEXT: movq (%rdi), %rbx -; X64-BMI1NOTBM-NEXT: andq %rax, %rbx -; X64-BMI1NOTBM-NEXT: movq %rax, %rdi -; X64-BMI1NOTBM-NEXT: callq use64@PLT -; X64-BMI1NOTBM-NEXT: movq %rbx, %rax -; X64-BMI1NOTBM-NEXT: popq %rbx -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bzhi64_c2_load: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: pushq %rbx -; X64-BMI1BMI2-NEXT: bzhiq %rsi, (%rdi), %rbx -; X64-BMI1BMI2-NEXT: # kill: def $sil killed $sil killed $rsi def $rsi -; X64-BMI1BMI2-NEXT: negb %sil -; X64-BMI1BMI2-NEXT: movq $-1, %rax -; X64-BMI1BMI2-NEXT: shrxq %rsi, %rax, %rdi -; X64-BMI1BMI2-NEXT: callq use64@PLT -; X64-BMI1BMI2-NEXT: movq %rbx, %rax -; X64-BMI1BMI2-NEXT: popq %rbx -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bzhi64_c2_load: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: pushq %rbx +; X64-BMI1-NEXT: movq %rsi, %rcx +; X64-BMI1-NEXT: negb %cl +; X64-BMI1-NEXT: movq $-1, %rax +; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $rcx +; X64-BMI1-NEXT: shrq %cl, %rax +; X64-BMI1-NEXT: movq (%rdi), %rbx +; X64-BMI1-NEXT: andq %rax, %rbx +; X64-BMI1-NEXT: movq %rax, %rdi +; X64-BMI1-NEXT: callq use64@PLT +; X64-BMI1-NEXT: movq %rbx, %rax +; X64-BMI1-NEXT: popq %rbx +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bzhi64_c2_load: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: pushq %rbx +; X64-BMI2-NEXT: bzhiq %rsi, (%rdi), %rbx +; X64-BMI2-NEXT: # kill: def $sil killed $sil killed $rsi def $rsi +; X64-BMI2-NEXT: negb %sil +; X64-BMI2-NEXT: movq $-1, %rax +; X64-BMI2-NEXT: shrxq %rsi, %rax, %rdi +; X64-BMI2-NEXT: callq use64@PLT +; X64-BMI2-NEXT: movq %rbx, %rax +; X64-BMI2-NEXT: popq %rbx +; X64-BMI2-NEXT: retq %val = load i64, i64* %w %numhighbits = sub i64 64, %numlowbits %mask = lshr i64 -1, %numhighbits @@ -3109,70 +3109,70 @@ define i64 @bzhi64_c3_load_indexzext(i64* %w, i8 %numlowbits) nounwind { ; X86-NOBMI-NEXT: popl %ebx ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bzhi64_c3_load_indexzext: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %ebx -; X86-BMI1NOTBM-NEXT: pushl %edi -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-BMI1NOTBM-NEXT: movb $64, %cl -; X86-BMI1NOTBM-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl $-1, %eax -; X86-BMI1NOTBM-NEXT: movl $-1, %ebx -; X86-BMI1NOTBM-NEXT: shrl %cl, %ebx -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: je .LBB37_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %ebx, %eax -; X86-BMI1NOTBM-NEXT: xorl %ebx, %ebx -; X86-BMI1NOTBM-NEXT: .LBB37_2: -; X86-BMI1NOTBM-NEXT: movl 4(%edx), %esi -; X86-BMI1NOTBM-NEXT: andl %ebx, %esi -; X86-BMI1NOTBM-NEXT: movl (%edx), %edi -; X86-BMI1NOTBM-NEXT: andl %eax, %edi -; X86-BMI1NOTBM-NEXT: subl $8, %esp -; X86-BMI1NOTBM-NEXT: pushl %ebx -; X86-BMI1NOTBM-NEXT: pushl %eax -; X86-BMI1NOTBM-NEXT: calll use64@PLT -; X86-BMI1NOTBM-NEXT: addl $16, %esp -; X86-BMI1NOTBM-NEXT: movl %edi, %eax -; X86-BMI1NOTBM-NEXT: movl %esi, %edx -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: popl %edi -; X86-BMI1NOTBM-NEXT: popl %ebx -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bzhi64_c3_load_indexzext: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %ebx -; X86-BMI1BMI2-NEXT: pushl %edi -; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: movb $64, %bl -; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %bl -; X86-BMI1BMI2-NEXT: movl $-1, %ecx -; X86-BMI1BMI2-NEXT: shrxl %ebx, %ecx, %edx -; X86-BMI1BMI2-NEXT: testb $32, %bl -; X86-BMI1BMI2-NEXT: je .LBB37_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: movl %edx, %ecx -; X86-BMI1BMI2-NEXT: xorl %edx, %edx -; X86-BMI1BMI2-NEXT: .LBB37_2: -; X86-BMI1BMI2-NEXT: movl 4(%eax), %esi -; X86-BMI1BMI2-NEXT: andl %edx, %esi -; X86-BMI1BMI2-NEXT: movl (%eax), %edi -; X86-BMI1BMI2-NEXT: andl %ecx, %edi -; X86-BMI1BMI2-NEXT: subl $8, %esp -; X86-BMI1BMI2-NEXT: pushl %edx -; X86-BMI1BMI2-NEXT: pushl %ecx -; X86-BMI1BMI2-NEXT: calll use64@PLT -; X86-BMI1BMI2-NEXT: addl $16, %esp -; X86-BMI1BMI2-NEXT: movl %edi, %eax -; X86-BMI1BMI2-NEXT: movl %esi, %edx -; X86-BMI1BMI2-NEXT: popl %esi -; X86-BMI1BMI2-NEXT: popl %edi -; X86-BMI1BMI2-NEXT: popl %ebx -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bzhi64_c3_load_indexzext: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %ebx +; X86-BMI1-NEXT: pushl %edi +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI1-NEXT: movb $64, %cl +; X86-BMI1-NEXT: subb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl $-1, %eax +; X86-BMI1-NEXT: movl $-1, %ebx +; X86-BMI1-NEXT: shrl %cl, %ebx +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: je .LBB37_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: movl %ebx, %eax +; X86-BMI1-NEXT: xorl %ebx, %ebx +; X86-BMI1-NEXT: .LBB37_2: +; X86-BMI1-NEXT: movl 4(%edx), %esi +; X86-BMI1-NEXT: andl %ebx, %esi +; X86-BMI1-NEXT: movl (%edx), %edi +; X86-BMI1-NEXT: andl %eax, %edi +; X86-BMI1-NEXT: subl $8, %esp +; X86-BMI1-NEXT: pushl %ebx +; X86-BMI1-NEXT: pushl %eax +; X86-BMI1-NEXT: calll use64@PLT +; X86-BMI1-NEXT: addl $16, %esp +; X86-BMI1-NEXT: movl %edi, %eax +; X86-BMI1-NEXT: movl %esi, %edx +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: popl %edi +; X86-BMI1-NEXT: popl %ebx +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bzhi64_c3_load_indexzext: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movb $64, %bl +; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %bl +; X86-BMI2-NEXT: movl $-1, %ecx +; X86-BMI2-NEXT: shrxl %ebx, %ecx, %edx +; X86-BMI2-NEXT: testb $32, %bl +; X86-BMI2-NEXT: je .LBB37_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: .LBB37_2: +; X86-BMI2-NEXT: movl 4(%eax), %esi +; X86-BMI2-NEXT: andl %edx, %esi +; X86-BMI2-NEXT: movl (%eax), %edi +; X86-BMI2-NEXT: andl %ecx, %edi +; X86-BMI2-NEXT: subl $8, %esp +; X86-BMI2-NEXT: pushl %edx +; X86-BMI2-NEXT: pushl %ecx +; X86-BMI2-NEXT: calll use64@PLT +; X86-BMI2-NEXT: addl $16, %esp +; X86-BMI2-NEXT: movl %edi, %eax +; X86-BMI2-NEXT: movl %esi, %edx +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bzhi64_c3_load_indexzext: ; X64-NOBMI: # %bb.0: @@ -3190,35 +3190,35 @@ define i64 @bzhi64_c3_load_indexzext(i64* %w, i8 %numlowbits) nounwind { ; X64-NOBMI-NEXT: popq %rbx ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bzhi64_c3_load_indexzext: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: pushq %rbx -; X64-BMI1NOTBM-NEXT: movl %esi, %ecx -; X64-BMI1NOTBM-NEXT: negb %cl -; X64-BMI1NOTBM-NEXT: movq $-1, %rax -; X64-BMI1NOTBM-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-BMI1NOTBM-NEXT: shrq %cl, %rax -; X64-BMI1NOTBM-NEXT: movq (%rdi), %rbx -; X64-BMI1NOTBM-NEXT: andq %rax, %rbx -; X64-BMI1NOTBM-NEXT: movq %rax, %rdi -; X64-BMI1NOTBM-NEXT: callq use64@PLT -; X64-BMI1NOTBM-NEXT: movq %rbx, %rax -; X64-BMI1NOTBM-NEXT: popq %rbx -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bzhi64_c3_load_indexzext: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: pushq %rbx -; X64-BMI1BMI2-NEXT: # kill: def $esi killed $esi def $rsi -; X64-BMI1BMI2-NEXT: bzhiq %rsi, (%rdi), %rbx -; X64-BMI1BMI2-NEXT: # kill: def $sil killed $sil killed $rsi def $rsi -; X64-BMI1BMI2-NEXT: negb %sil -; X64-BMI1BMI2-NEXT: movq $-1, %rax -; X64-BMI1BMI2-NEXT: shrxq %rsi, %rax, %rdi -; X64-BMI1BMI2-NEXT: callq use64@PLT -; X64-BMI1BMI2-NEXT: movq %rbx, %rax -; X64-BMI1BMI2-NEXT: popq %rbx -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bzhi64_c3_load_indexzext: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: pushq %rbx +; X64-BMI1-NEXT: movl %esi, %ecx +; X64-BMI1-NEXT: negb %cl +; X64-BMI1-NEXT: movq $-1, %rax +; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-BMI1-NEXT: shrq %cl, %rax +; X64-BMI1-NEXT: movq (%rdi), %rbx +; X64-BMI1-NEXT: andq %rax, %rbx +; X64-BMI1-NEXT: movq %rax, %rdi +; X64-BMI1-NEXT: callq use64@PLT +; X64-BMI1-NEXT: movq %rbx, %rax +; X64-BMI1-NEXT: popq %rbx +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bzhi64_c3_load_indexzext: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: pushq %rbx +; X64-BMI2-NEXT: # kill: def $esi killed $esi def $rsi +; X64-BMI2-NEXT: bzhiq %rsi, (%rdi), %rbx +; X64-BMI2-NEXT: # kill: def $sil killed $sil killed $rsi def $rsi +; X64-BMI2-NEXT: negb %sil +; X64-BMI2-NEXT: movq $-1, %rax +; X64-BMI2-NEXT: shrxq %rsi, %rax, %rdi +; X64-BMI2-NEXT: callq use64@PLT +; X64-BMI2-NEXT: movq %rbx, %rax +; X64-BMI2-NEXT: popq %rbx +; X64-BMI2-NEXT: retq %val = load i64, i64* %w %numhighbits = sub i8 64, %numlowbits %sh_prom = zext i8 %numhighbits to i64 @@ -3259,64 +3259,64 @@ define i64 @bzhi64_c4_commutative(i64 %val, i64 %numlowbits) nounwind { ; X86-NOBMI-NEXT: popl %edi ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bzhi64_c4_commutative: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %edi -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: pushl %eax -; X86-BMI1NOTBM-NEXT: movb $64, %cl -; X86-BMI1NOTBM-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl $-1, %esi -; X86-BMI1NOTBM-NEXT: movl $-1, %edi -; X86-BMI1NOTBM-NEXT: shrl %cl, %edi -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: je .LBB38_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %edi, %esi -; X86-BMI1NOTBM-NEXT: xorl %edi, %edi -; X86-BMI1NOTBM-NEXT: .LBB38_2: -; X86-BMI1NOTBM-NEXT: subl $8, %esp -; X86-BMI1NOTBM-NEXT: pushl %edi -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: calll use64@PLT -; X86-BMI1NOTBM-NEXT: addl $16, %esp -; X86-BMI1NOTBM-NEXT: andl {{[0-9]+}}(%esp), %esi -; X86-BMI1NOTBM-NEXT: andl {{[0-9]+}}(%esp), %edi -; X86-BMI1NOTBM-NEXT: movl %esi, %eax -; X86-BMI1NOTBM-NEXT: movl %edi, %edx -; X86-BMI1NOTBM-NEXT: addl $4, %esp -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: popl %edi -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bzhi64_c4_commutative: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %edi -; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: pushl %eax -; X86-BMI1BMI2-NEXT: movb $64, %al -; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %al -; X86-BMI1BMI2-NEXT: movl $-1, %edi -; X86-BMI1BMI2-NEXT: shrxl %eax, %edi, %esi -; X86-BMI1BMI2-NEXT: testb $32, %al -; X86-BMI1BMI2-NEXT: je .LBB38_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: movl %esi, %edi -; X86-BMI1BMI2-NEXT: xorl %esi, %esi -; X86-BMI1BMI2-NEXT: .LBB38_2: -; X86-BMI1BMI2-NEXT: subl $8, %esp -; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: pushl %edi -; X86-BMI1BMI2-NEXT: calll use64@PLT -; X86-BMI1BMI2-NEXT: addl $16, %esp -; X86-BMI1BMI2-NEXT: andl {{[0-9]+}}(%esp), %edi -; X86-BMI1BMI2-NEXT: andl {{[0-9]+}}(%esp), %esi -; X86-BMI1BMI2-NEXT: movl %edi, %eax -; X86-BMI1BMI2-NEXT: movl %esi, %edx -; X86-BMI1BMI2-NEXT: addl $4, %esp -; X86-BMI1BMI2-NEXT: popl %esi -; X86-BMI1BMI2-NEXT: popl %edi -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bzhi64_c4_commutative: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %edi +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: pushl %eax +; X86-BMI1-NEXT: movb $64, %cl +; X86-BMI1-NEXT: subb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl $-1, %esi +; X86-BMI1-NEXT: movl $-1, %edi +; X86-BMI1-NEXT: shrl %cl, %edi +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: je .LBB38_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: movl %edi, %esi +; X86-BMI1-NEXT: xorl %edi, %edi +; X86-BMI1-NEXT: .LBB38_2: +; X86-BMI1-NEXT: subl $8, %esp +; X86-BMI1-NEXT: pushl %edi +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: calll use64@PLT +; X86-BMI1-NEXT: addl $16, %esp +; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %esi +; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %edi +; X86-BMI1-NEXT: movl %esi, %eax +; X86-BMI1-NEXT: movl %edi, %edx +; X86-BMI1-NEXT: addl $4, %esp +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: popl %edi +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bzhi64_c4_commutative: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: pushl %eax +; X86-BMI2-NEXT: movb $64, %al +; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: movl $-1, %edi +; X86-BMI2-NEXT: shrxl %eax, %edi, %esi +; X86-BMI2-NEXT: testb $32, %al +; X86-BMI2-NEXT: je .LBB38_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %esi, %edi +; X86-BMI2-NEXT: xorl %esi, %esi +; X86-BMI2-NEXT: .LBB38_2: +; X86-BMI2-NEXT: subl $8, %esp +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: calll use64@PLT +; X86-BMI2-NEXT: addl $16, %esp +; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %edi +; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl %edi, %eax +; X86-BMI2-NEXT: movl %esi, %edx +; X86-BMI2-NEXT: addl $4, %esp +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bzhi64_c4_commutative: ; X64-NOBMI: # %bb.0: @@ -3338,43 +3338,43 @@ define i64 @bzhi64_c4_commutative(i64 %val, i64 %numlowbits) nounwind { ; X64-NOBMI-NEXT: popq %r14 ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bzhi64_c4_commutative: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: pushq %r14 -; X64-BMI1NOTBM-NEXT: pushq %rbx -; X64-BMI1NOTBM-NEXT: pushq %rax -; X64-BMI1NOTBM-NEXT: movq %rsi, %rcx -; X64-BMI1NOTBM-NEXT: movq %rdi, %r14 -; X64-BMI1NOTBM-NEXT: negb %cl -; X64-BMI1NOTBM-NEXT: movq $-1, %rbx -; X64-BMI1NOTBM-NEXT: # kill: def $cl killed $cl killed $rcx -; X64-BMI1NOTBM-NEXT: shrq %cl, %rbx -; X64-BMI1NOTBM-NEXT: movq %rbx, %rdi -; X64-BMI1NOTBM-NEXT: callq use64@PLT -; X64-BMI1NOTBM-NEXT: andq %r14, %rbx -; X64-BMI1NOTBM-NEXT: movq %rbx, %rax -; X64-BMI1NOTBM-NEXT: addq $8, %rsp -; X64-BMI1NOTBM-NEXT: popq %rbx -; X64-BMI1NOTBM-NEXT: popq %r14 -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bzhi64_c4_commutative: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: pushq %r14 -; X64-BMI1BMI2-NEXT: pushq %rbx -; X64-BMI1BMI2-NEXT: pushq %rax -; X64-BMI1BMI2-NEXT: movq %rsi, %rbx -; X64-BMI1BMI2-NEXT: movq %rdi, %r14 -; X64-BMI1BMI2-NEXT: movl %ebx, %eax -; X64-BMI1BMI2-NEXT: negb %al -; X64-BMI1BMI2-NEXT: movq $-1, %rcx -; X64-BMI1BMI2-NEXT: shrxq %rax, %rcx, %rdi -; X64-BMI1BMI2-NEXT: callq use64@PLT -; X64-BMI1BMI2-NEXT: bzhiq %rbx, %r14, %rax -; X64-BMI1BMI2-NEXT: addq $8, %rsp -; X64-BMI1BMI2-NEXT: popq %rbx -; X64-BMI1BMI2-NEXT: popq %r14 -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bzhi64_c4_commutative: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: pushq %r14 +; X64-BMI1-NEXT: pushq %rbx +; X64-BMI1-NEXT: pushq %rax +; X64-BMI1-NEXT: movq %rsi, %rcx +; X64-BMI1-NEXT: movq %rdi, %r14 +; X64-BMI1-NEXT: negb %cl +; X64-BMI1-NEXT: movq $-1, %rbx +; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $rcx +; X64-BMI1-NEXT: shrq %cl, %rbx +; X64-BMI1-NEXT: movq %rbx, %rdi +; X64-BMI1-NEXT: callq use64@PLT +; X64-BMI1-NEXT: andq %r14, %rbx +; X64-BMI1-NEXT: movq %rbx, %rax +; X64-BMI1-NEXT: addq $8, %rsp +; X64-BMI1-NEXT: popq %rbx +; X64-BMI1-NEXT: popq %r14 +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bzhi64_c4_commutative: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: pushq %r14 +; X64-BMI2-NEXT: pushq %rbx +; X64-BMI2-NEXT: pushq %rax +; X64-BMI2-NEXT: movq %rsi, %rbx +; X64-BMI2-NEXT: movq %rdi, %r14 +; X64-BMI2-NEXT: movl %ebx, %eax +; X64-BMI2-NEXT: negb %al +; X64-BMI2-NEXT: movq $-1, %rcx +; X64-BMI2-NEXT: shrxq %rax, %rcx, %rdi +; X64-BMI2-NEXT: callq use64@PLT +; X64-BMI2-NEXT: bzhiq %rbx, %r14, %rax +; X64-BMI2-NEXT: addq $8, %rsp +; X64-BMI2-NEXT: popq %rbx +; X64-BMI2-NEXT: popq %r14 +; X64-BMI2-NEXT: retq %numhighbits = sub i64 64, %numlowbits %mask = lshr i64 -1, %numhighbits call void @use64(i64 %mask) @@ -3400,32 +3400,32 @@ define i32 @bzhi64_32_c0(i64 %val, i64 %numlowbits) nounwind { ; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bzhi64_32_c0: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: movb $64, %cl -; X86-BMI1NOTBM-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl $-1, %eax -; X86-BMI1NOTBM-NEXT: shrl %cl, %eax -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: jne .LBB39_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl $-1, %eax -; X86-BMI1NOTBM-NEXT: .LBB39_2: -; X86-BMI1NOTBM-NEXT: andl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bzhi64_32_c0: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: movb $64, %cl -; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: movl $-1, %eax -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: je .LBB39_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: shrxl %ecx, %eax, %eax -; X86-BMI1BMI2-NEXT: .LBB39_2: -; X86-BMI1BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bzhi64_32_c0: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: movb $64, %cl +; X86-BMI1-NEXT: subb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl $-1, %eax +; X86-BMI1-NEXT: shrl %cl, %eax +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: jne .LBB39_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: movl $-1, %eax +; X86-BMI1-NEXT: .LBB39_2: +; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bzhi64_32_c0: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: movb $64, %cl +; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: movl $-1, %eax +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB39_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: shrxl %ecx, %eax, %eax +; X86-BMI2-NEXT: .LBB39_2: +; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bzhi64_32_c0: ; X64-NOBMI: # %bb.0: @@ -3438,16 +3438,16 @@ define i32 @bzhi64_32_c0(i64 %val, i64 %numlowbits) nounwind { ; X64-NOBMI-NEXT: # kill: def $eax killed $eax killed $rax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bzhi64_32_c0: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %esi -; X64-BMI1NOTBM-NEXT: bextrl %esi, %edi, %eax -; X64-BMI1NOTBM-NEXT: retq +; X64-BMI1-LABEL: bzhi64_32_c0: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %esi +; X64-BMI1-NEXT: bextrl %esi, %edi, %eax +; X64-BMI1-NEXT: retq ; -; X64-BMI1BMI2-LABEL: bzhi64_32_c0: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: bzhil %esi, %edi, %eax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI2-LABEL: bzhi64_32_c0: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: bzhil %esi, %edi, %eax +; X64-BMI2-NEXT: retq %numhighbits = sub i64 64, %numlowbits %mask = lshr i64 -1, %numhighbits %masked = and i64 %mask, %val @@ -3467,18 +3467,18 @@ define i32 @bzhi64_32_c1(i64 %val, i32 %numlowbits) nounwind { ; X86-NOBMI-NEXT: shrl %cl, %eax ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bzhi64_32_c1: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1NOTBM-NEXT: shll $8, %eax -; X86-BMI1NOTBM-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: retl +; X86-BMI1-LABEL: bzhi64_32_c1: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI1-NEXT: shll $8, %eax +; X86-BMI1-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: retl ; -; X86-BMI1BMI2-LABEL: bzhi64_32_c1: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: retl +; X86-BMI2-LABEL: bzhi64_32_c1: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bzhi64_32_c1: ; X64-NOBMI: # %bb.0: @@ -3491,16 +3491,16 @@ define i32 @bzhi64_32_c1(i64 %val, i32 %numlowbits) nounwind { ; X64-NOBMI-NEXT: # kill: def $eax killed $eax killed $rax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bzhi64_32_c1: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %esi -; X64-BMI1NOTBM-NEXT: bextrl %esi, %edi, %eax -; X64-BMI1NOTBM-NEXT: retq +; X64-BMI1-LABEL: bzhi64_32_c1: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %esi +; X64-BMI1-NEXT: bextrl %esi, %edi, %eax +; X64-BMI1-NEXT: retq ; -; X64-BMI1BMI2-LABEL: bzhi64_32_c1: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: bzhil %esi, %edi, %eax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI2-LABEL: bzhi64_32_c1: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: bzhil %esi, %edi, %eax +; X64-BMI2-NEXT: retq %truncval = trunc i64 %val to i32 %numhighbits = sub i32 32, %numlowbits %mask = lshr i32 -1, %numhighbits @@ -3521,18 +3521,18 @@ define i32 @bzhi64_32_c2(i64 %val, i32 %numlowbits) nounwind { ; X86-NOBMI-NEXT: shrl %cl, %eax ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bzhi64_32_c2: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1NOTBM-NEXT: shll $8, %eax -; X86-BMI1NOTBM-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: retl +; X86-BMI1-LABEL: bzhi64_32_c2: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI1-NEXT: shll $8, %eax +; X86-BMI1-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: retl ; -; X86-BMI1BMI2-LABEL: bzhi64_32_c2: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: retl +; X86-BMI2-LABEL: bzhi64_32_c2: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bzhi64_32_c2: ; X64-NOBMI: # %bb.0: @@ -3545,16 +3545,16 @@ define i32 @bzhi64_32_c2(i64 %val, i32 %numlowbits) nounwind { ; X64-NOBMI-NEXT: # kill: def $eax killed $eax killed $rax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bzhi64_32_c2: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %esi -; X64-BMI1NOTBM-NEXT: bextrl %esi, %edi, %eax -; X64-BMI1NOTBM-NEXT: retq +; X64-BMI1-LABEL: bzhi64_32_c2: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %esi +; X64-BMI1-NEXT: bextrl %esi, %edi, %eax +; X64-BMI1-NEXT: retq ; -; X64-BMI1BMI2-LABEL: bzhi64_32_c2: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: bzhil %esi, %edi, %eax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI2-LABEL: bzhi64_32_c2: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: bzhil %esi, %edi, %eax +; X64-BMI2-NEXT: retq %numhighbits = sub i32 32, %numlowbits %mask = lshr i32 -1, %numhighbits %zextmask = zext i32 %mask to i64 @@ -3592,25 +3592,25 @@ define i32 @bzhi64_32_c3(i64 %val, i64 %numlowbits) nounwind { ; X64-NOBMI-NEXT: # kill: def $eax killed $eax killed $rax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bzhi64_32_c3: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: movq %rsi, %rcx -; X64-BMI1NOTBM-NEXT: negb %cl -; X64-BMI1NOTBM-NEXT: movl $4294967295, %eax # imm = 0xFFFFFFFF -; X64-BMI1NOTBM-NEXT: # kill: def $cl killed $cl killed $rcx -; X64-BMI1NOTBM-NEXT: shrq %cl, %rax -; X64-BMI1NOTBM-NEXT: andl %edi, %eax -; X64-BMI1NOTBM-NEXT: # kill: def $eax killed $eax killed $rax -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bzhi64_32_c3: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: negb %sil -; X64-BMI1BMI2-NEXT: movl $4294967295, %eax # imm = 0xFFFFFFFF -; X64-BMI1BMI2-NEXT: shrxq %rsi, %rax, %rax -; X64-BMI1BMI2-NEXT: andl %edi, %eax -; X64-BMI1BMI2-NEXT: # kill: def $eax killed $eax killed $rax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bzhi64_32_c3: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: movq %rsi, %rcx +; X64-BMI1-NEXT: negb %cl +; X64-BMI1-NEXT: movl $4294967295, %eax # imm = 0xFFFFFFFF +; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $rcx +; X64-BMI1-NEXT: shrq %cl, %rax +; X64-BMI1-NEXT: andl %edi, %eax +; X64-BMI1-NEXT: # kill: def $eax killed $eax killed $rax +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bzhi64_32_c3: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: negb %sil +; X64-BMI2-NEXT: movl $4294967295, %eax # imm = 0xFFFFFFFF +; X64-BMI2-NEXT: shrxq %rsi, %rax, %rax +; X64-BMI2-NEXT: andl %edi, %eax +; X64-BMI2-NEXT: # kill: def $eax killed $eax killed $rax +; X64-BMI2-NEXT: retq %numhighbits = sub i64 64, %numlowbits %mask = lshr i64 4294967295, %numhighbits %masked = and i64 %mask, %val @@ -3633,18 +3633,18 @@ define i32 @bzhi32_d0(i32 %val, i32 %numlowbits) nounwind { ; X86-NOBMI-NEXT: shrl %cl, %eax ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bzhi32_d0: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1NOTBM-NEXT: shll $8, %eax -; X86-BMI1NOTBM-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: retl +; X86-BMI1-LABEL: bzhi32_d0: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI1-NEXT: shll $8, %eax +; X86-BMI1-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: retl ; -; X86-BMI1BMI2-LABEL: bzhi32_d0: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: retl +; X86-BMI2-LABEL: bzhi32_d0: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bzhi32_d0: ; X64-NOBMI: # %bb.0: @@ -3656,16 +3656,16 @@ define i32 @bzhi32_d0(i32 %val, i32 %numlowbits) nounwind { ; X64-NOBMI-NEXT: shrl %cl, %eax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bzhi32_d0: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %esi -; X64-BMI1NOTBM-NEXT: bextrl %esi, %edi, %eax -; X64-BMI1NOTBM-NEXT: retq +; X64-BMI1-LABEL: bzhi32_d0: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %esi +; X64-BMI1-NEXT: bextrl %esi, %edi, %eax +; X64-BMI1-NEXT: retq ; -; X64-BMI1BMI2-LABEL: bzhi32_d0: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: bzhil %esi, %edi, %eax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI2-LABEL: bzhi32_d0: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: bzhil %esi, %edi, %eax +; X64-BMI2-NEXT: retq %numhighbits = sub i32 32, %numlowbits %highbitscleared = shl i32 %val, %numhighbits %masked = lshr i32 %highbitscleared, %numhighbits @@ -3683,18 +3683,18 @@ define i32 @bzhi32_d1_indexzext(i32 %val, i8 %numlowbits) nounwind { ; X86-NOBMI-NEXT: shrl %cl, %eax ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bzhi32_d1_indexzext: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1NOTBM-NEXT: shll $8, %eax -; X86-BMI1NOTBM-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: retl +; X86-BMI1-LABEL: bzhi32_d1_indexzext: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI1-NEXT: shll $8, %eax +; X86-BMI1-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: retl ; -; X86-BMI1BMI2-LABEL: bzhi32_d1_indexzext: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: retl +; X86-BMI2-LABEL: bzhi32_d1_indexzext: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bzhi32_d1_indexzext: ; X64-NOBMI: # %bb.0: @@ -3706,16 +3706,16 @@ define i32 @bzhi32_d1_indexzext(i32 %val, i8 %numlowbits) nounwind { ; X64-NOBMI-NEXT: shrl %cl, %eax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bzhi32_d1_indexzext: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %esi -; X64-BMI1NOTBM-NEXT: bextrl %esi, %edi, %eax -; X64-BMI1NOTBM-NEXT: retq +; X64-BMI1-LABEL: bzhi32_d1_indexzext: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %esi +; X64-BMI1-NEXT: bextrl %esi, %edi, %eax +; X64-BMI1-NEXT: retq ; -; X64-BMI1BMI2-LABEL: bzhi32_d1_indexzext: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: bzhil %esi, %edi, %eax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI2-LABEL: bzhi32_d1_indexzext: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: bzhil %esi, %edi, %eax +; X64-BMI2-NEXT: retq %numhighbits = sub i8 32, %numlowbits %sh_prom = zext i8 %numhighbits to i32 %highbitscleared = shl i32 %val, %sh_prom @@ -3735,20 +3735,20 @@ define i32 @bzhi32_d2_load(i32* %w, i32 %numlowbits) nounwind { ; X86-NOBMI-NEXT: shrl %cl, %eax ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bzhi32_d2_load: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: shll $8, %ecx -; X86-BMI1NOTBM-NEXT: bextrl %ecx, (%eax), %eax -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bzhi32_d2_load: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: bzhil %ecx, (%eax), %eax -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bzhi32_d2_load: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: shll $8, %ecx +; X86-BMI1-NEXT: bextrl %ecx, (%eax), %eax +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bzhi32_d2_load: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: bzhil %ecx, (%eax), %eax +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bzhi32_d2_load: ; X64-NOBMI: # %bb.0: @@ -3760,16 +3760,16 @@ define i32 @bzhi32_d2_load(i32* %w, i32 %numlowbits) nounwind { ; X64-NOBMI-NEXT: shrl %cl, %eax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bzhi32_d2_load: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %esi -; X64-BMI1NOTBM-NEXT: bextrl %esi, (%rdi), %eax -; X64-BMI1NOTBM-NEXT: retq +; X64-BMI1-LABEL: bzhi32_d2_load: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %esi +; X64-BMI1-NEXT: bextrl %esi, (%rdi), %eax +; X64-BMI1-NEXT: retq ; -; X64-BMI1BMI2-LABEL: bzhi32_d2_load: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: bzhil %esi, (%rdi), %eax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI2-LABEL: bzhi32_d2_load: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: bzhil %esi, (%rdi), %eax +; X64-BMI2-NEXT: retq %val = load i32, i32* %w %numhighbits = sub i32 32, %numlowbits %highbitscleared = shl i32 %val, %numhighbits @@ -3789,20 +3789,20 @@ define i32 @bzhi32_d3_load_indexzext(i32* %w, i8 %numlowbits) nounwind { ; X86-NOBMI-NEXT: shrl %cl, %eax ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bzhi32_d3_load_indexzext: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: shll $8, %ecx -; X86-BMI1NOTBM-NEXT: bextrl %ecx, (%eax), %eax -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bzhi32_d3_load_indexzext: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: bzhil %ecx, (%eax), %eax -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bzhi32_d3_load_indexzext: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: shll $8, %ecx +; X86-BMI1-NEXT: bextrl %ecx, (%eax), %eax +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bzhi32_d3_load_indexzext: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: bzhil %ecx, (%eax), %eax +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bzhi32_d3_load_indexzext: ; X64-NOBMI: # %bb.0: @@ -3814,16 +3814,16 @@ define i32 @bzhi32_d3_load_indexzext(i32* %w, i8 %numlowbits) nounwind { ; X64-NOBMI-NEXT: shrl %cl, %eax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bzhi32_d3_load_indexzext: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %esi -; X64-BMI1NOTBM-NEXT: bextrl %esi, (%rdi), %eax -; X64-BMI1NOTBM-NEXT: retq +; X64-BMI1-LABEL: bzhi32_d3_load_indexzext: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %esi +; X64-BMI1-NEXT: bextrl %esi, (%rdi), %eax +; X64-BMI1-NEXT: retq ; -; X64-BMI1BMI2-LABEL: bzhi32_d3_load_indexzext: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: bzhil %esi, (%rdi), %eax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI2-LABEL: bzhi32_d3_load_indexzext: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: bzhil %esi, (%rdi), %eax +; X64-BMI2-NEXT: retq %val = load i32, i32* %w %numhighbits = sub i8 32, %numlowbits %sh_prom = zext i8 %numhighbits to i32 @@ -3874,76 +3874,76 @@ define i64 @bzhi64_d0(i64 %val, i64 %numlowbits) nounwind { ; X86-NOBMI-NEXT: popl %ebx ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bzhi64_d0: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %ebx -; X86-BMI1NOTBM-NEXT: pushl %edi -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: movb $64, %cl -; X86-BMI1NOTBM-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl %edx, %esi -; X86-BMI1NOTBM-NEXT: shll %cl, %esi -; X86-BMI1NOTBM-NEXT: shldl %cl, %edx, %eax -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: movl %esi, %edi -; X86-BMI1NOTBM-NEXT: jne .LBB47_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %eax, %edi -; X86-BMI1NOTBM-NEXT: .LBB47_2: -; X86-BMI1NOTBM-NEXT: movl %edi, %eax -; X86-BMI1NOTBM-NEXT: shrl %cl, %eax -; X86-BMI1NOTBM-NEXT: xorl %ebx, %ebx -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: movl $0, %edx -; X86-BMI1NOTBM-NEXT: jne .LBB47_4 -; X86-BMI1NOTBM-NEXT: # %bb.3: -; X86-BMI1NOTBM-NEXT: movl %esi, %ebx -; X86-BMI1NOTBM-NEXT: movl %eax, %edx -; X86-BMI1NOTBM-NEXT: .LBB47_4: -; X86-BMI1NOTBM-NEXT: shrdl %cl, %edi, %ebx -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: jne .LBB47_6 -; X86-BMI1NOTBM-NEXT: # %bb.5: -; X86-BMI1NOTBM-NEXT: movl %ebx, %eax -; X86-BMI1NOTBM-NEXT: .LBB47_6: -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: popl %edi -; X86-BMI1NOTBM-NEXT: popl %ebx -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bzhi64_d0: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %edi -; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-BMI1BMI2-NEXT: movb $64, %cl -; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: shldl %cl, %eax, %esi -; X86-BMI1BMI2-NEXT: shlxl %ecx, %eax, %edi -; X86-BMI1BMI2-NEXT: xorl %edx, %edx -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: je .LBB47_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: movl %edi, %esi -; X86-BMI1BMI2-NEXT: movl $0, %edi -; X86-BMI1BMI2-NEXT: .LBB47_2: -; X86-BMI1BMI2-NEXT: shrxl %ecx, %esi, %eax -; X86-BMI1BMI2-NEXT: jne .LBB47_4 -; X86-BMI1BMI2-NEXT: # %bb.3: -; X86-BMI1BMI2-NEXT: movl %eax, %edx -; X86-BMI1BMI2-NEXT: .LBB47_4: -; X86-BMI1BMI2-NEXT: shrdl %cl, %esi, %edi -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: jne .LBB47_6 -; X86-BMI1BMI2-NEXT: # %bb.5: -; X86-BMI1BMI2-NEXT: movl %edi, %eax -; X86-BMI1BMI2-NEXT: .LBB47_6: -; X86-BMI1BMI2-NEXT: popl %esi -; X86-BMI1BMI2-NEXT: popl %edi -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bzhi64_d0: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %ebx +; X86-BMI1-NEXT: pushl %edi +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: movb $64, %cl +; X86-BMI1-NEXT: subb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl %edx, %esi +; X86-BMI1-NEXT: shll %cl, %esi +; X86-BMI1-NEXT: shldl %cl, %edx, %eax +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: movl %esi, %edi +; X86-BMI1-NEXT: jne .LBB47_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: movl %eax, %edi +; X86-BMI1-NEXT: .LBB47_2: +; X86-BMI1-NEXT: movl %edi, %eax +; X86-BMI1-NEXT: shrl %cl, %eax +; X86-BMI1-NEXT: xorl %ebx, %ebx +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: movl $0, %edx +; X86-BMI1-NEXT: jne .LBB47_4 +; X86-BMI1-NEXT: # %bb.3: +; X86-BMI1-NEXT: movl %esi, %ebx +; X86-BMI1-NEXT: movl %eax, %edx +; X86-BMI1-NEXT: .LBB47_4: +; X86-BMI1-NEXT: shrdl %cl, %edi, %ebx +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: jne .LBB47_6 +; X86-BMI1-NEXT: # %bb.5: +; X86-BMI1-NEXT: movl %ebx, %eax +; X86-BMI1-NEXT: .LBB47_6: +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: popl %edi +; X86-BMI1-NEXT: popl %ebx +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bzhi64_d0: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movb $64, %cl +; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: shldl %cl, %eax, %esi +; X86-BMI2-NEXT: shlxl %ecx, %eax, %edi +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB47_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %edi, %esi +; X86-BMI2-NEXT: movl $0, %edi +; X86-BMI2-NEXT: .LBB47_2: +; X86-BMI2-NEXT: shrxl %ecx, %esi, %eax +; X86-BMI2-NEXT: jne .LBB47_4 +; X86-BMI2-NEXT: # %bb.3: +; X86-BMI2-NEXT: movl %eax, %edx +; X86-BMI2-NEXT: .LBB47_4: +; X86-BMI2-NEXT: shrdl %cl, %esi, %edi +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: jne .LBB47_6 +; X86-BMI2-NEXT: # %bb.5: +; X86-BMI2-NEXT: movl %edi, %eax +; X86-BMI2-NEXT: .LBB47_6: +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bzhi64_d0: ; X64-NOBMI: # %bb.0: @@ -3955,16 +3955,16 @@ define i64 @bzhi64_d0(i64 %val, i64 %numlowbits) nounwind { ; X64-NOBMI-NEXT: shrq %cl, %rax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bzhi64_d0: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %esi -; X64-BMI1NOTBM-NEXT: bextrq %rsi, %rdi, %rax -; X64-BMI1NOTBM-NEXT: retq +; X64-BMI1-LABEL: bzhi64_d0: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %esi +; X64-BMI1-NEXT: bextrq %rsi, %rdi, %rax +; X64-BMI1-NEXT: retq ; -; X64-BMI1BMI2-LABEL: bzhi64_d0: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: bzhiq %rsi, %rdi, %rax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI2-LABEL: bzhi64_d0: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: bzhiq %rsi, %rdi, %rax +; X64-BMI2-NEXT: retq %numhighbits = sub i64 64, %numlowbits %highbitscleared = shl i64 %val, %numhighbits %masked = lshr i64 %highbitscleared, %numhighbits @@ -4011,76 +4011,76 @@ define i64 @bzhi64_d1_indexzext(i64 %val, i8 %numlowbits) nounwind { ; X86-NOBMI-NEXT: popl %ebx ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bzhi64_d1_indexzext: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %ebx -; X86-BMI1NOTBM-NEXT: pushl %edi -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: movb $64, %cl -; X86-BMI1NOTBM-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl %edx, %esi -; X86-BMI1NOTBM-NEXT: shll %cl, %esi -; X86-BMI1NOTBM-NEXT: shldl %cl, %edx, %eax -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: movl %esi, %edi -; X86-BMI1NOTBM-NEXT: jne .LBB48_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %eax, %edi -; X86-BMI1NOTBM-NEXT: .LBB48_2: -; X86-BMI1NOTBM-NEXT: movl %edi, %eax -; X86-BMI1NOTBM-NEXT: shrl %cl, %eax -; X86-BMI1NOTBM-NEXT: xorl %ebx, %ebx -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: movl $0, %edx -; X86-BMI1NOTBM-NEXT: jne .LBB48_4 -; X86-BMI1NOTBM-NEXT: # %bb.3: -; X86-BMI1NOTBM-NEXT: movl %esi, %ebx -; X86-BMI1NOTBM-NEXT: movl %eax, %edx -; X86-BMI1NOTBM-NEXT: .LBB48_4: -; X86-BMI1NOTBM-NEXT: shrdl %cl, %edi, %ebx -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: jne .LBB48_6 -; X86-BMI1NOTBM-NEXT: # %bb.5: -; X86-BMI1NOTBM-NEXT: movl %ebx, %eax -; X86-BMI1NOTBM-NEXT: .LBB48_6: -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: popl %edi -; X86-BMI1NOTBM-NEXT: popl %ebx -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bzhi64_d1_indexzext: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %edi -; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-BMI1BMI2-NEXT: movb $64, %cl -; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: shldl %cl, %eax, %esi -; X86-BMI1BMI2-NEXT: shlxl %ecx, %eax, %edi -; X86-BMI1BMI2-NEXT: xorl %edx, %edx -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: je .LBB48_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: movl %edi, %esi -; X86-BMI1BMI2-NEXT: movl $0, %edi -; X86-BMI1BMI2-NEXT: .LBB48_2: -; X86-BMI1BMI2-NEXT: shrxl %ecx, %esi, %eax -; X86-BMI1BMI2-NEXT: jne .LBB48_4 -; X86-BMI1BMI2-NEXT: # %bb.3: -; X86-BMI1BMI2-NEXT: movl %eax, %edx -; X86-BMI1BMI2-NEXT: .LBB48_4: -; X86-BMI1BMI2-NEXT: shrdl %cl, %esi, %edi -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: jne .LBB48_6 -; X86-BMI1BMI2-NEXT: # %bb.5: -; X86-BMI1BMI2-NEXT: movl %edi, %eax -; X86-BMI1BMI2-NEXT: .LBB48_6: -; X86-BMI1BMI2-NEXT: popl %esi -; X86-BMI1BMI2-NEXT: popl %edi -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bzhi64_d1_indexzext: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %ebx +; X86-BMI1-NEXT: pushl %edi +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: movb $64, %cl +; X86-BMI1-NEXT: subb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl %edx, %esi +; X86-BMI1-NEXT: shll %cl, %esi +; X86-BMI1-NEXT: shldl %cl, %edx, %eax +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: movl %esi, %edi +; X86-BMI1-NEXT: jne .LBB48_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: movl %eax, %edi +; X86-BMI1-NEXT: .LBB48_2: +; X86-BMI1-NEXT: movl %edi, %eax +; X86-BMI1-NEXT: shrl %cl, %eax +; X86-BMI1-NEXT: xorl %ebx, %ebx +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: movl $0, %edx +; X86-BMI1-NEXT: jne .LBB48_4 +; X86-BMI1-NEXT: # %bb.3: +; X86-BMI1-NEXT: movl %esi, %ebx +; X86-BMI1-NEXT: movl %eax, %edx +; X86-BMI1-NEXT: .LBB48_4: +; X86-BMI1-NEXT: shrdl %cl, %edi, %ebx +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: jne .LBB48_6 +; X86-BMI1-NEXT: # %bb.5: +; X86-BMI1-NEXT: movl %ebx, %eax +; X86-BMI1-NEXT: .LBB48_6: +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: popl %edi +; X86-BMI1-NEXT: popl %ebx +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bzhi64_d1_indexzext: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movb $64, %cl +; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: shldl %cl, %eax, %esi +; X86-BMI2-NEXT: shlxl %ecx, %eax, %edi +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB48_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %edi, %esi +; X86-BMI2-NEXT: movl $0, %edi +; X86-BMI2-NEXT: .LBB48_2: +; X86-BMI2-NEXT: shrxl %ecx, %esi, %eax +; X86-BMI2-NEXT: jne .LBB48_4 +; X86-BMI2-NEXT: # %bb.3: +; X86-BMI2-NEXT: movl %eax, %edx +; X86-BMI2-NEXT: .LBB48_4: +; X86-BMI2-NEXT: shrdl %cl, %esi, %edi +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: jne .LBB48_6 +; X86-BMI2-NEXT: # %bb.5: +; X86-BMI2-NEXT: movl %edi, %eax +; X86-BMI2-NEXT: .LBB48_6: +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bzhi64_d1_indexzext: ; X64-NOBMI: # %bb.0: @@ -4092,18 +4092,18 @@ define i64 @bzhi64_d1_indexzext(i64 %val, i8 %numlowbits) nounwind { ; X64-NOBMI-NEXT: shrq %cl, %rax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bzhi64_d1_indexzext: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: # kill: def $esi killed $esi def $rsi -; X64-BMI1NOTBM-NEXT: shll $8, %esi -; X64-BMI1NOTBM-NEXT: bextrq %rsi, %rdi, %rax -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bzhi64_d1_indexzext: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: # kill: def $esi killed $esi def $rsi -; X64-BMI1BMI2-NEXT: bzhiq %rsi, %rdi, %rax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bzhi64_d1_indexzext: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: # kill: def $esi killed $esi def $rsi +; X64-BMI1-NEXT: shll $8, %esi +; X64-BMI1-NEXT: bextrq %rsi, %rdi, %rax +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bzhi64_d1_indexzext: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: # kill: def $esi killed $esi def $rsi +; X64-BMI2-NEXT: bzhiq %rsi, %rdi, %rax +; X64-BMI2-NEXT: retq %numhighbits = sub i8 64, %numlowbits %sh_prom = zext i8 %numhighbits to i64 %highbitscleared = shl i64 %val, %sh_prom @@ -4152,78 +4152,78 @@ define i64 @bzhi64_d2_load(i64* %w, i64 %numlowbits) nounwind { ; X86-NOBMI-NEXT: popl %ebx ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bzhi64_d2_load: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %ebx -; X86-BMI1NOTBM-NEXT: pushl %edi -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: movl (%eax), %edx -; X86-BMI1NOTBM-NEXT: movl 4(%eax), %eax -; X86-BMI1NOTBM-NEXT: movb $64, %cl -; X86-BMI1NOTBM-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl %edx, %esi -; X86-BMI1NOTBM-NEXT: shll %cl, %esi -; X86-BMI1NOTBM-NEXT: shldl %cl, %edx, %eax -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: movl %esi, %edi -; X86-BMI1NOTBM-NEXT: jne .LBB49_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %eax, %edi -; X86-BMI1NOTBM-NEXT: .LBB49_2: -; X86-BMI1NOTBM-NEXT: movl %edi, %eax -; X86-BMI1NOTBM-NEXT: shrl %cl, %eax -; X86-BMI1NOTBM-NEXT: xorl %ebx, %ebx -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: movl $0, %edx -; X86-BMI1NOTBM-NEXT: jne .LBB49_4 -; X86-BMI1NOTBM-NEXT: # %bb.3: -; X86-BMI1NOTBM-NEXT: movl %esi, %ebx -; X86-BMI1NOTBM-NEXT: movl %eax, %edx -; X86-BMI1NOTBM-NEXT: .LBB49_4: -; X86-BMI1NOTBM-NEXT: shrdl %cl, %edi, %ebx -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: jne .LBB49_6 -; X86-BMI1NOTBM-NEXT: # %bb.5: -; X86-BMI1NOTBM-NEXT: movl %ebx, %eax -; X86-BMI1NOTBM-NEXT: .LBB49_6: -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: popl %edi -; X86-BMI1NOTBM-NEXT: popl %ebx -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bzhi64_d2_load: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %edi -; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: movl (%eax), %edx -; X86-BMI1BMI2-NEXT: movl 4(%eax), %esi -; X86-BMI1BMI2-NEXT: movb $64, %cl -; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: shldl %cl, %edx, %esi -; X86-BMI1BMI2-NEXT: shlxl %ecx, %edx, %edi -; X86-BMI1BMI2-NEXT: xorl %edx, %edx -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: je .LBB49_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: movl %edi, %esi -; X86-BMI1BMI2-NEXT: movl $0, %edi -; X86-BMI1BMI2-NEXT: .LBB49_2: -; X86-BMI1BMI2-NEXT: shrxl %ecx, %esi, %eax -; X86-BMI1BMI2-NEXT: jne .LBB49_4 -; X86-BMI1BMI2-NEXT: # %bb.3: -; X86-BMI1BMI2-NEXT: movl %eax, %edx -; X86-BMI1BMI2-NEXT: .LBB49_4: -; X86-BMI1BMI2-NEXT: shrdl %cl, %esi, %edi -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: jne .LBB49_6 -; X86-BMI1BMI2-NEXT: # %bb.5: -; X86-BMI1BMI2-NEXT: movl %edi, %eax -; X86-BMI1BMI2-NEXT: .LBB49_6: -; X86-BMI1BMI2-NEXT: popl %esi -; X86-BMI1BMI2-NEXT: popl %edi -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bzhi64_d2_load: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %ebx +; X86-BMI1-NEXT: pushl %edi +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: movl (%eax), %edx +; X86-BMI1-NEXT: movl 4(%eax), %eax +; X86-BMI1-NEXT: movb $64, %cl +; X86-BMI1-NEXT: subb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl %edx, %esi +; X86-BMI1-NEXT: shll %cl, %esi +; X86-BMI1-NEXT: shldl %cl, %edx, %eax +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: movl %esi, %edi +; X86-BMI1-NEXT: jne .LBB49_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: movl %eax, %edi +; X86-BMI1-NEXT: .LBB49_2: +; X86-BMI1-NEXT: movl %edi, %eax +; X86-BMI1-NEXT: shrl %cl, %eax +; X86-BMI1-NEXT: xorl %ebx, %ebx +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: movl $0, %edx +; X86-BMI1-NEXT: jne .LBB49_4 +; X86-BMI1-NEXT: # %bb.3: +; X86-BMI1-NEXT: movl %esi, %ebx +; X86-BMI1-NEXT: movl %eax, %edx +; X86-BMI1-NEXT: .LBB49_4: +; X86-BMI1-NEXT: shrdl %cl, %edi, %ebx +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: jne .LBB49_6 +; X86-BMI1-NEXT: # %bb.5: +; X86-BMI1-NEXT: movl %ebx, %eax +; X86-BMI1-NEXT: .LBB49_6: +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: popl %edi +; X86-BMI1-NEXT: popl %ebx +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bzhi64_d2_load: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl (%eax), %edx +; X86-BMI2-NEXT: movl 4(%eax), %esi +; X86-BMI2-NEXT: movb $64, %cl +; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: shldl %cl, %edx, %esi +; X86-BMI2-NEXT: shlxl %ecx, %edx, %edi +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB49_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %edi, %esi +; X86-BMI2-NEXT: movl $0, %edi +; X86-BMI2-NEXT: .LBB49_2: +; X86-BMI2-NEXT: shrxl %ecx, %esi, %eax +; X86-BMI2-NEXT: jne .LBB49_4 +; X86-BMI2-NEXT: # %bb.3: +; X86-BMI2-NEXT: movl %eax, %edx +; X86-BMI2-NEXT: .LBB49_4: +; X86-BMI2-NEXT: shrdl %cl, %esi, %edi +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: jne .LBB49_6 +; X86-BMI2-NEXT: # %bb.5: +; X86-BMI2-NEXT: movl %edi, %eax +; X86-BMI2-NEXT: .LBB49_6: +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bzhi64_d2_load: ; X64-NOBMI: # %bb.0: @@ -4235,16 +4235,16 @@ define i64 @bzhi64_d2_load(i64* %w, i64 %numlowbits) nounwind { ; X64-NOBMI-NEXT: shrq %cl, %rax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bzhi64_d2_load: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %esi -; X64-BMI1NOTBM-NEXT: bextrq %rsi, (%rdi), %rax -; X64-BMI1NOTBM-NEXT: retq +; X64-BMI1-LABEL: bzhi64_d2_load: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %esi +; X64-BMI1-NEXT: bextrq %rsi, (%rdi), %rax +; X64-BMI1-NEXT: retq ; -; X64-BMI1BMI2-LABEL: bzhi64_d2_load: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: bzhiq %rsi, (%rdi), %rax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI2-LABEL: bzhi64_d2_load: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: bzhiq %rsi, (%rdi), %rax +; X64-BMI2-NEXT: retq %val = load i64, i64* %w %numhighbits = sub i64 64, %numlowbits %highbitscleared = shl i64 %val, %numhighbits @@ -4293,78 +4293,78 @@ define i64 @bzhi64_d3_load_indexzext(i64* %w, i8 %numlowbits) nounwind { ; X86-NOBMI-NEXT: popl %ebx ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bzhi64_d3_load_indexzext: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %ebx -; X86-BMI1NOTBM-NEXT: pushl %edi -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: movl (%eax), %edx -; X86-BMI1NOTBM-NEXT: movl 4(%eax), %eax -; X86-BMI1NOTBM-NEXT: movb $64, %cl -; X86-BMI1NOTBM-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl %edx, %esi -; X86-BMI1NOTBM-NEXT: shll %cl, %esi -; X86-BMI1NOTBM-NEXT: shldl %cl, %edx, %eax -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: movl %esi, %edi -; X86-BMI1NOTBM-NEXT: jne .LBB50_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %eax, %edi -; X86-BMI1NOTBM-NEXT: .LBB50_2: -; X86-BMI1NOTBM-NEXT: movl %edi, %eax -; X86-BMI1NOTBM-NEXT: shrl %cl, %eax -; X86-BMI1NOTBM-NEXT: xorl %ebx, %ebx -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: movl $0, %edx -; X86-BMI1NOTBM-NEXT: jne .LBB50_4 -; X86-BMI1NOTBM-NEXT: # %bb.3: -; X86-BMI1NOTBM-NEXT: movl %esi, %ebx -; X86-BMI1NOTBM-NEXT: movl %eax, %edx -; X86-BMI1NOTBM-NEXT: .LBB50_4: -; X86-BMI1NOTBM-NEXT: shrdl %cl, %edi, %ebx -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: jne .LBB50_6 -; X86-BMI1NOTBM-NEXT: # %bb.5: -; X86-BMI1NOTBM-NEXT: movl %ebx, %eax -; X86-BMI1NOTBM-NEXT: .LBB50_6: -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: popl %edi -; X86-BMI1NOTBM-NEXT: popl %ebx -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bzhi64_d3_load_indexzext: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %edi -; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: movl (%eax), %edx -; X86-BMI1BMI2-NEXT: movl 4(%eax), %esi -; X86-BMI1BMI2-NEXT: movb $64, %cl -; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: shldl %cl, %edx, %esi -; X86-BMI1BMI2-NEXT: shlxl %ecx, %edx, %edi -; X86-BMI1BMI2-NEXT: xorl %edx, %edx -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: je .LBB50_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: movl %edi, %esi -; X86-BMI1BMI2-NEXT: movl $0, %edi -; X86-BMI1BMI2-NEXT: .LBB50_2: -; X86-BMI1BMI2-NEXT: shrxl %ecx, %esi, %eax -; X86-BMI1BMI2-NEXT: jne .LBB50_4 -; X86-BMI1BMI2-NEXT: # %bb.3: -; X86-BMI1BMI2-NEXT: movl %eax, %edx -; X86-BMI1BMI2-NEXT: .LBB50_4: -; X86-BMI1BMI2-NEXT: shrdl %cl, %esi, %edi -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: jne .LBB50_6 -; X86-BMI1BMI2-NEXT: # %bb.5: -; X86-BMI1BMI2-NEXT: movl %edi, %eax -; X86-BMI1BMI2-NEXT: .LBB50_6: -; X86-BMI1BMI2-NEXT: popl %esi -; X86-BMI1BMI2-NEXT: popl %edi -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bzhi64_d3_load_indexzext: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %ebx +; X86-BMI1-NEXT: pushl %edi +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: movl (%eax), %edx +; X86-BMI1-NEXT: movl 4(%eax), %eax +; X86-BMI1-NEXT: movb $64, %cl +; X86-BMI1-NEXT: subb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl %edx, %esi +; X86-BMI1-NEXT: shll %cl, %esi +; X86-BMI1-NEXT: shldl %cl, %edx, %eax +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: movl %esi, %edi +; X86-BMI1-NEXT: jne .LBB50_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: movl %eax, %edi +; X86-BMI1-NEXT: .LBB50_2: +; X86-BMI1-NEXT: movl %edi, %eax +; X86-BMI1-NEXT: shrl %cl, %eax +; X86-BMI1-NEXT: xorl %ebx, %ebx +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: movl $0, %edx +; X86-BMI1-NEXT: jne .LBB50_4 +; X86-BMI1-NEXT: # %bb.3: +; X86-BMI1-NEXT: movl %esi, %ebx +; X86-BMI1-NEXT: movl %eax, %edx +; X86-BMI1-NEXT: .LBB50_4: +; X86-BMI1-NEXT: shrdl %cl, %edi, %ebx +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: jne .LBB50_6 +; X86-BMI1-NEXT: # %bb.5: +; X86-BMI1-NEXT: movl %ebx, %eax +; X86-BMI1-NEXT: .LBB50_6: +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: popl %edi +; X86-BMI1-NEXT: popl %ebx +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bzhi64_d3_load_indexzext: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl (%eax), %edx +; X86-BMI2-NEXT: movl 4(%eax), %esi +; X86-BMI2-NEXT: movb $64, %cl +; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: shldl %cl, %edx, %esi +; X86-BMI2-NEXT: shlxl %ecx, %edx, %edi +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB50_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %edi, %esi +; X86-BMI2-NEXT: movl $0, %edi +; X86-BMI2-NEXT: .LBB50_2: +; X86-BMI2-NEXT: shrxl %ecx, %esi, %eax +; X86-BMI2-NEXT: jne .LBB50_4 +; X86-BMI2-NEXT: # %bb.3: +; X86-BMI2-NEXT: movl %eax, %edx +; X86-BMI2-NEXT: .LBB50_4: +; X86-BMI2-NEXT: shrdl %cl, %esi, %edi +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: jne .LBB50_6 +; X86-BMI2-NEXT: # %bb.5: +; X86-BMI2-NEXT: movl %edi, %eax +; X86-BMI2-NEXT: .LBB50_6: +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bzhi64_d3_load_indexzext: ; X64-NOBMI: # %bb.0: @@ -4376,18 +4376,18 @@ define i64 @bzhi64_d3_load_indexzext(i64* %w, i8 %numlowbits) nounwind { ; X64-NOBMI-NEXT: shrq %cl, %rax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bzhi64_d3_load_indexzext: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: # kill: def $esi killed $esi def $rsi -; X64-BMI1NOTBM-NEXT: shll $8, %esi -; X64-BMI1NOTBM-NEXT: bextrq %rsi, (%rdi), %rax -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bzhi64_d3_load_indexzext: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: # kill: def $esi killed $esi def $rsi -; X64-BMI1BMI2-NEXT: bzhiq %rsi, (%rdi), %rax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bzhi64_d3_load_indexzext: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: # kill: def $esi killed $esi def $rsi +; X64-BMI1-NEXT: shll $8, %esi +; X64-BMI1-NEXT: bextrq %rsi, (%rdi), %rax +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bzhi64_d3_load_indexzext: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: # kill: def $esi killed $esi def $rsi +; X64-BMI2-NEXT: bzhiq %rsi, (%rdi), %rax +; X64-BMI2-NEXT: retq %val = load i64, i64* %w %numhighbits = sub i8 64, %numlowbits %sh_prom = zext i8 %numhighbits to i64 @@ -4426,53 +4426,53 @@ define i32 @bzhi64_32_d0(i64 %val, i64 %numlowbits) nounwind { ; X86-NOBMI-NEXT: popl %esi ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bzhi64_32_d0: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: movb $64, %cl -; X86-BMI1NOTBM-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl %esi, %edx -; X86-BMI1NOTBM-NEXT: shll %cl, %edx -; X86-BMI1NOTBM-NEXT: shldl %cl, %esi, %eax -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: je .LBB51_2 -; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %edx, %eax -; X86-BMI1NOTBM-NEXT: xorl %edx, %edx -; X86-BMI1NOTBM-NEXT: .LBB51_2: -; X86-BMI1NOTBM-NEXT: shrdl %cl, %eax, %edx -; X86-BMI1NOTBM-NEXT: shrl %cl, %eax -; X86-BMI1NOTBM-NEXT: testb $32, %cl -; X86-BMI1NOTBM-NEXT: jne .LBB51_4 -; X86-BMI1NOTBM-NEXT: # %bb.3: -; X86-BMI1NOTBM-NEXT: movl %edx, %eax -; X86-BMI1NOTBM-NEXT: .LBB51_4: -; X86-BMI1NOTBM-NEXT: popl %esi -; X86-BMI1NOTBM-NEXT: retl -; -; X86-BMI1BMI2-LABEL: bzhi64_32_d0: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-BMI1BMI2-NEXT: movb $64, %cl -; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: shldl %cl, %eax, %edx -; X86-BMI1BMI2-NEXT: shlxl %ecx, %eax, %eax -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: je .LBB51_2 -; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: movl %eax, %edx -; X86-BMI1BMI2-NEXT: xorl %eax, %eax -; X86-BMI1BMI2-NEXT: .LBB51_2: -; X86-BMI1BMI2-NEXT: shrdl %cl, %edx, %eax -; X86-BMI1BMI2-NEXT: testb $32, %cl -; X86-BMI1BMI2-NEXT: je .LBB51_4 -; X86-BMI1BMI2-NEXT: # %bb.3: -; X86-BMI1BMI2-NEXT: shrxl %ecx, %edx, %eax -; X86-BMI1BMI2-NEXT: .LBB51_4: -; X86-BMI1BMI2-NEXT: retl +; X86-BMI1-LABEL: bzhi64_32_d0: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: pushl %esi +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: movb $64, %cl +; X86-BMI1-NEXT: subb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl %esi, %edx +; X86-BMI1-NEXT: shll %cl, %edx +; X86-BMI1-NEXT: shldl %cl, %esi, %eax +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: je .LBB51_2 +; X86-BMI1-NEXT: # %bb.1: +; X86-BMI1-NEXT: movl %edx, %eax +; X86-BMI1-NEXT: xorl %edx, %edx +; X86-BMI1-NEXT: .LBB51_2: +; X86-BMI1-NEXT: shrdl %cl, %eax, %edx +; X86-BMI1-NEXT: shrl %cl, %eax +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: jne .LBB51_4 +; X86-BMI1-NEXT: # %bb.3: +; X86-BMI1-NEXT: movl %edx, %eax +; X86-BMI1-NEXT: .LBB51_4: +; X86-BMI1-NEXT: popl %esi +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: bzhi64_32_d0: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movb $64, %cl +; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: shldl %cl, %eax, %edx +; X86-BMI2-NEXT: shlxl %ecx, %eax, %eax +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB51_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %eax, %edx +; X86-BMI2-NEXT: xorl %eax, %eax +; X86-BMI2-NEXT: .LBB51_2: +; X86-BMI2-NEXT: shrdl %cl, %edx, %eax +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB51_4 +; X86-BMI2-NEXT: # %bb.3: +; X86-BMI2-NEXT: shrxl %ecx, %edx, %eax +; X86-BMI2-NEXT: .LBB51_4: +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bzhi64_32_d0: ; X64-NOBMI: # %bb.0: @@ -4485,18 +4485,18 @@ define i32 @bzhi64_32_d0(i64 %val, i64 %numlowbits) nounwind { ; X64-NOBMI-NEXT: # kill: def $eax killed $eax killed $rax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bzhi64_32_d0: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %esi -; X64-BMI1NOTBM-NEXT: bextrq %rsi, %rdi, %rax -; X64-BMI1NOTBM-NEXT: # kill: def $eax killed $eax killed $rax -; X64-BMI1NOTBM-NEXT: retq -; -; X64-BMI1BMI2-LABEL: bzhi64_32_d0: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: bzhiq %rsi, %rdi, %rax -; X64-BMI1BMI2-NEXT: # kill: def $eax killed $eax killed $rax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI1-LABEL: bzhi64_32_d0: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %esi +; X64-BMI1-NEXT: bextrq %rsi, %rdi, %rax +; X64-BMI1-NEXT: # kill: def $eax killed $eax killed $rax +; X64-BMI1-NEXT: retq +; +; X64-BMI2-LABEL: bzhi64_32_d0: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: bzhiq %rsi, %rdi, %rax +; X64-BMI2-NEXT: # kill: def $eax killed $eax killed $rax +; X64-BMI2-NEXT: retq %numhighbits = sub i64 64, %numlowbits %highbitscleared = shl i64 %val, %numhighbits %masked = lshr i64 %highbitscleared, %numhighbits @@ -4516,18 +4516,18 @@ define i32 @bzhi64_32_d1(i64 %val, i32 %numlowbits) nounwind { ; X86-NOBMI-NEXT: shrl %cl, %eax ; X86-NOBMI-NEXT: retl ; -; X86-BMI1NOTBM-LABEL: bzhi64_32_d1: -; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1NOTBM-NEXT: shll $8, %eax -; X86-BMI1NOTBM-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax -; X86-BMI1NOTBM-NEXT: retl +; X86-BMI1-LABEL: bzhi64_32_d1: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI1-NEXT: shll $8, %eax +; X86-BMI1-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: retl ; -; X86-BMI1BMI2-LABEL: bzhi64_32_d1: -; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI1BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: retl +; X86-BMI2-LABEL: bzhi64_32_d1: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bzhi64_32_d1: ; X64-NOBMI: # %bb.0: @@ -4540,16 +4540,16 @@ define i32 @bzhi64_32_d1(i64 %val, i32 %numlowbits) nounwind { ; X64-NOBMI-NEXT: # kill: def $eax killed $eax killed $rax ; X64-NOBMI-NEXT: retq ; -; X64-BMI1NOTBM-LABEL: bzhi64_32_d1: -; X64-BMI1NOTBM: # %bb.0: -; X64-BMI1NOTBM-NEXT: shll $8, %esi -; X64-BMI1NOTBM-NEXT: bextrl %esi, %edi, %eax -; X64-BMI1NOTBM-NEXT: retq +; X64-BMI1-LABEL: bzhi64_32_d1: +; X64-BMI1: # %bb.0: +; X64-BMI1-NEXT: shll $8, %esi +; X64-BMI1-NEXT: bextrl %esi, %edi, %eax +; X64-BMI1-NEXT: retq ; -; X64-BMI1BMI2-LABEL: bzhi64_32_d1: -; X64-BMI1BMI2: # %bb.0: -; X64-BMI1BMI2-NEXT: bzhil %esi, %edi, %eax -; X64-BMI1BMI2-NEXT: retq +; X64-BMI2-LABEL: bzhi64_32_d1: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: bzhil %esi, %edi, %eax +; X64-BMI2-NEXT: retq %truncval = trunc i64 %val to i32 %numhighbits = sub i32 32, %numlowbits %highbitscleared = shl i32 %truncval, %numhighbits @@ -4692,11 +4692,16 @@ define i64 @bzhi64_constant_mask64(i64 %val) nounwind { ; X64-BMI1TBM-NEXT: bextrq $15872, %rdi, %rax # imm = 0x3E00 ; X64-BMI1TBM-NEXT: retq ; -; X64-BMI1NOTBMBMI2-LABEL: bzhi64_constant_mask64: -; X64-BMI1NOTBMBMI2: # %bb.0: -; X64-BMI1NOTBMBMI2-NEXT: movb $62, %al -; X64-BMI1NOTBMBMI2-NEXT: bzhiq %rax, %rdi, %rax -; X64-BMI1NOTBMBMI2-NEXT: retq +; X64-BMI2TBM-LABEL: bzhi64_constant_mask64: +; X64-BMI2TBM: # %bb.0: +; X64-BMI2TBM-NEXT: bextrq $15872, %rdi, %rax # imm = 0x3E00 +; X64-BMI2TBM-NEXT: retq +; +; X64-BMI2NOTBM-LABEL: bzhi64_constant_mask64: +; X64-BMI2NOTBM: # %bb.0: +; X64-BMI2NOTBM-NEXT: movb $62, %al +; X64-BMI2NOTBM-NEXT: bzhiq %rax, %rdi, %rax +; X64-BMI2NOTBM-NEXT: retq %masked = and i64 %val, 4611686018427387903 ret i64 %masked } @@ -4727,11 +4732,16 @@ define i64 @bzhi64_constant_mask64_load(i64* %val) nounwind { ; X64-BMI1TBM-NEXT: bextrq $15872, (%rdi), %rax # imm = 0x3E00 ; X64-BMI1TBM-NEXT: retq ; -; X64-BMI1NOTBMBMI2-LABEL: bzhi64_constant_mask64_load: -; X64-BMI1NOTBMBMI2: # %bb.0: -; X64-BMI1NOTBMBMI2-NEXT: movb $62, %al -; X64-BMI1NOTBMBMI2-NEXT: bzhiq %rax, (%rdi), %rax -; X64-BMI1NOTBMBMI2-NEXT: retq +; X64-BMI2TBM-LABEL: bzhi64_constant_mask64_load: +; X64-BMI2TBM: # %bb.0: +; X64-BMI2TBM-NEXT: bextrq $15872, (%rdi), %rax # imm = 0x3E00 +; X64-BMI2TBM-NEXT: retq +; +; X64-BMI2NOTBM-LABEL: bzhi64_constant_mask64_load: +; X64-BMI2NOTBM: # %bb.0: +; X64-BMI2NOTBM-NEXT: movb $62, %al +; X64-BMI2NOTBM-NEXT: bzhiq %rax, (%rdi), %rax +; X64-BMI2NOTBM-NEXT: retq %val1 = load i64, i64* %val %masked = and i64 %val1, 4611686018427387903 ret i64 %masked From d61ccda76965ebb9f4aa24e87899a8b0e65b2d54 Mon Sep 17 00:00:00 2001 From: "Paul C. Anagnostopoulos" Date: Fri, 11 Dec 2020 16:19:52 -0500 Subject: [PATCH 12/39] [TableGen] Slim down the data structures in xxxGenInstrInfo.inc, step 1 --- llvm/utils/TableGen/InstrInfoEmitter.cpp | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/llvm/utils/TableGen/InstrInfoEmitter.cpp b/llvm/utils/TableGen/InstrInfoEmitter.cpp index 025c5354514ce..156fa6d18d2ee 100644 --- a/llvm/utils/TableGen/InstrInfoEmitter.cpp +++ b/llvm/utils/TableGen/InstrInfoEmitter.cpp @@ -371,7 +371,7 @@ void InstrInfoEmitter::emitOperandTypeMappings( OS << "namespace " << Namespace << " {\n"; OS << "LLVM_READONLY\n"; OS << "static int getOperandType(uint16_t Opcode, uint16_t OpIdx) {\n"; - // TODO: Factor out instructions with same operands to compress the tables. + // TODO: Factor out duplicate operand lists to compress the tables. if (!NumberedInstructions.empty()) { std::vector OperandOffsets; std::vector OperandRecords; @@ -393,16 +393,26 @@ void InstrInfoEmitter::emitOperandTypeMappings( } } - // Emit the table of offsets for the opcode lookup. - OS << " const int Offsets[] = {\n"; + // Emit the table of offsets (indexes) into the operand type table. + // Size the unsigned integer offset to save space. + assert(OperandRecords.size() <= UINT32_MAX && + "Too many operands for offset table"); + OS << ((OperandRecords.size() <= UINT16_MAX) ? " const uint16_t" + : " const uint32_t"); + OS << " Offsets[] = {\n"; for (int I = 0, E = OperandOffsets.size(); I != E; ++I) OS << " " << OperandOffsets[I] << ",\n"; OS << " };\n"; // Add an entry for the end so that we don't need to special case it below. OperandOffsets.push_back(OperandRecords.size()); + // Emit the actual operand types in a flat table. - OS << " const int OpcodeOperandTypes[] = {\n "; + // Size the signed integer operand type to save space. + assert(EnumVal <= INT16_MAX && + "Too many operand types for operand types table"); + OS << ((EnumVal <= INT8_MAX) ? " const int8_t" : " const int16_t"); + OS << " OpcodeOperandTypes[] = {\n "; for (int I = 0, E = OperandRecords.size(), CurOffset = 1; I != E; ++I) { // We print each Opcode's operands in its own row. if (I == OperandOffsets[CurOffset]) { From 95b2dab199100f5a86d3f73a995afea879886d65 Mon Sep 17 00:00:00 2001 From: Erik Pilkington Date: Tue, 15 Dec 2020 17:00:20 -0500 Subject: [PATCH 13/39] [Sema] Fix a miscompile by retaining array qualifiers when folding VLAs to constant arrays rdar://72243125 Differential revision: https://reviews.llvm.org/D93247 --- clang/lib/Sema/SemaDecl.cpp | 5 +++-- clang/test/SemaObjC/arc.m | 12 ++++++++++++ 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index 0031f874c05aa..6c438f319991e 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -5965,8 +5965,9 @@ static QualType TryToFixInvalidVariablyModifiedType(QualType T, return QualType(); } - return Context.getConstantArrayType(ElemTy, Res, VLATy->getSizeExpr(), - ArrayType::Normal, 0); + QualType FoldedArrayType = Context.getConstantArrayType( + ElemTy, Res, VLATy->getSizeExpr(), ArrayType::Normal, 0); + return Qs.apply(Context, FoldedArrayType); } static void diff --git a/clang/test/SemaObjC/arc.m b/clang/test/SemaObjC/arc.m index fe5db9ce53840..bcd2f995622c8 100644 --- a/clang/test/SemaObjC/arc.m +++ b/clang/test/SemaObjC/arc.m @@ -839,3 +839,15 @@ void block_capture_autoreleasing(A * __autoreleasing *a, (void)*l; }(); } + +void test_vla_fold_keeps_strong(void) { + const unsigned bounds = 1; + + static id array[bounds]; // expected-warning {{variable length array folded to constant array as an extension}} + typedef __typeof__(array) array_type; + typedef id __strong array_type[1]; + + static id weak_array[bounds] __weak; // expected-warning {{variable length array folded to constant array as an extension}} + typedef __typeof__(weak_array) weak_array_type; + typedef id __weak weak_array_type[1]; +} From 92d6e8001e20d6d0f457ac7cab8b64f3b1a131bf Mon Sep 17 00:00:00 2001 From: Erik Pilkington Date: Tue, 15 Dec 2020 17:01:20 -0500 Subject: [PATCH 14/39] NFC: balance a quote in AttrDocs.td This was confusing my editor. --- clang/include/clang/Basic/AttrDocs.td | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td index a65964e94bf14..4f8cd8ecd86f3 100644 --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -3346,7 +3346,7 @@ As ``global_device`` and ``global_host`` are a subset of ``__global/opencl_global`` address spaces it is allowed to convert ``global_device`` and ``global_host`` address spaces to ``__global/opencl_global`` address spaces (following ISO/IEC TR 18037 5.1.3 -"Address space nesting and rules for pointers). +"Address space nesting and rules for pointers"). }]; } From 7082de56b7ad4b4eeb75e59e0d4c28bed44b5d23 Mon Sep 17 00:00:00 2001 From: Tim Keith Date: Wed, 16 Dec 2020 07:06:53 -0800 Subject: [PATCH 15/39] [flang] Handle multiple names for same operator Some operators have more than one name, e.g. operator(==), operator(.eq). That was working correctly in generic definitions but they can also appear in other contexts: USE statements and access statements, for example. This changes FindInScope to always look for each of the names for a symbol. So an operator may be use-associated under one name but declared private under another name and it will be the same symbol. This replaces GenericSpecInfo::FindInScope which was only usable in some cases. Add a version of FindInScope() that looks in the current scope to simplify many of the calls. Differential Revision: https://reviews.llvm.org/D93344 --- flang/lib/Semantics/resolve-names-utils.cpp | 74 ++++++++++++--------- flang/lib/Semantics/resolve-names-utils.h | 12 ++-- flang/lib/Semantics/resolve-names.cpp | 48 +++++++------ flang/test/Semantics/modfile07.f90 | 49 ++++++++++++++ 4 files changed, 126 insertions(+), 57 deletions(-) diff --git a/flang/lib/Semantics/resolve-names-utils.cpp b/flang/lib/Semantics/resolve-names-utils.cpp index 8dbd25e163acb..83bff78f426ab 100644 --- a/flang/lib/Semantics/resolve-names-utils.cpp +++ b/flang/lib/Semantics/resolve-names-utils.cpp @@ -29,6 +29,8 @@ using common::NumericOperator; using common::RelationalOperator; using IntrinsicOperator = parser::DefinedOperator::IntrinsicOperator; +static constexpr const char *operatorPrefix{"operator("}; + static GenericKind MapIntrinsicOperator(IntrinsicOperator); Symbol *Resolve(const parser::Name &name, Symbol *symbol) { @@ -65,6 +67,37 @@ bool IsIntrinsicOperator( return false; } +template +std::forward_list GetOperatorNames( + const SemanticsContext &context, E opr) { + std::forward_list result; + for (const char *name : context.languageFeatures().GetNames(opr)) { + result.emplace_front(std::string{operatorPrefix} + name + ')'); + } + return result; +} + +std::forward_list GetAllNames( + const SemanticsContext &context, const SourceName &name) { + std::string str{name.ToString()}; + if (!name.empty() && name.end()[-1] == ')' && + name.ToString().rfind(std::string{operatorPrefix}, 0) == 0) { + for (int i{0}; i != common::LogicalOperator_enumSize; ++i) { + auto names{GetOperatorNames(context, LogicalOperator{i})}; + if (std::find(names.begin(), names.end(), str) != names.end()) { + return names; + } + } + for (int i{0}; i != common::RelationalOperator_enumSize; ++i) { + auto names{GetOperatorNames(context, RelationalOperator{i})}; + if (std::find(names.begin(), names.end(), str) != names.end()) { + return names; + } + } + } + return {str}; +} + bool IsLogicalConstant( const SemanticsContext &context, const SourceName &name) { std::string str{name.ToString()}; @@ -73,37 +106,6 @@ bool IsLogicalConstant( (str == ".t" || str == ".f.")); } -// The operators <, <=, >, >=, ==, and /= always have the same interpretations -// as the operators .LT., .LE., .GT., .GE., .EQ., and .NE., respectively. -std::forward_list GenericSpecInfo::GetAllNames( - SemanticsContext &context) const { - auto getNames{[&](auto opr) { - std::forward_list result; - for (const char *name : context.languageFeatures().GetNames(opr)) { - result.emplace_front("operator("s + name + ')'); - } - return result; - }}; - return std::visit( - common::visitors{[&](const LogicalOperator &x) { return getNames(x); }, - [&](const RelationalOperator &x) { return getNames(x); }, - [&](const auto &) -> std::forward_list { - return {symbolName_.value().ToString()}; - }}, - kind_.u); -} - -Symbol *GenericSpecInfo::FindInScope( - SemanticsContext &context, const Scope &scope) const { - for (const auto &name : GetAllNames(context)) { - auto iter{scope.find(SourceName{name})}; - if (iter != scope.end()) { - return &*iter->second; - } - } - return nullptr; -} - void GenericSpecInfo::Resolve(Symbol *symbol) const { if (symbol) { if (auto *details{symbol->detailsIf()}) { @@ -162,6 +164,16 @@ void GenericSpecInfo::Analyze(const parser::GenericSpec &x) { x.u); } +llvm::raw_ostream &operator<<( + llvm::raw_ostream &os, const GenericSpecInfo &info) { + os << "GenericSpecInfo: kind=" << info.kind_.ToString(); + os << " parseName=" + << (info.parseName_ ? info.parseName_->ToString() : "null"); + os << " symbolName=" + << (info.symbolName_ ? info.symbolName_->ToString() : "null"); + return os; +} + // parser::DefinedOperator::IntrinsicOperator -> GenericKind static GenericKind MapIntrinsicOperator(IntrinsicOperator op) { switch (op) { diff --git a/flang/lib/Semantics/resolve-names-utils.h b/flang/lib/Semantics/resolve-names-utils.h index 17462d111d970..89011ff3b9565 100644 --- a/flang/lib/Semantics/resolve-names-utils.h +++ b/flang/lib/Semantics/resolve-names-utils.h @@ -19,6 +19,7 @@ #include "flang/Semantics/semantics.h" #include "flang/Semantics/symbol.h" #include "flang/Semantics/type.h" +#include "llvm/Support/raw_ostream.h" #include namespace Fortran::parser { @@ -50,6 +51,11 @@ parser::MessageFixedText WithIsFatal( bool IsIntrinsicOperator(const SemanticsContext &, const SourceName &); bool IsLogicalConstant(const SemanticsContext &, const SourceName &); +// Some intrinsic operators have more than one name (e.g. `operator(.eq.)` and +// `operator(==)`). GetAllNames() returns them all, including symbolName. +std::forward_list GetAllNames( + const SemanticsContext &, const SourceName &); + template MaybeIntExpr EvaluateIntExpr(SemanticsContext &context, const T &expr) { if (MaybeExpr maybeExpr{ @@ -75,13 +81,11 @@ class GenericSpecInfo { GenericKind kind() const { return kind_; } const SourceName &symbolName() const { return symbolName_.value(); } - // Some intrinsic operators have more than one name (e.g. `operator(.eq.)` and - // `operator(==)`). GetAllNames() returns them all, including symbolName. - std::forward_list GetAllNames(SemanticsContext &) const; // Set the GenericKind in this symbol and resolve the corresponding // name if there is one void Resolve(Symbol *) const; - Symbol *FindInScope(SemanticsContext &, const Scope &) const; + friend llvm::raw_ostream &operator<<( + llvm::raw_ostream &, const GenericSpecInfo &); private: GenericKind kind_; diff --git a/flang/lib/Semantics/resolve-names.cpp b/flang/lib/Semantics/resolve-names.cpp index 5ac787b61d68b..1288b11a7727a 100644 --- a/flang/lib/Semantics/resolve-names.cpp +++ b/flang/lib/Semantics/resolve-names.cpp @@ -502,6 +502,9 @@ class ScopeHandler : public ImplicitRulesVisitor { // Search for name only in scope, not in enclosing scopes. Symbol *FindInScope(const Scope &, const parser::Name &); Symbol *FindInScope(const Scope &, const SourceName &); + template Symbol *FindInScope(const T &name) { + return FindInScope(currScope(), name); + } // Search for name in a derived type scope and its parents. Symbol *FindInTypeOrParents(const Scope &, const parser::Name &); Symbol *FindInTypeOrParents(const parser::Name &); @@ -533,7 +536,7 @@ class ScopeHandler : public ImplicitRulesVisitor { const SourceName &name, const Attrs &attrs, D &&details) { // Note: don't use FindSymbol here. If this is a derived type scope, // we want to detect whether the name is already declared as a component. - auto *symbol{FindInScope(currScope(), name)}; + auto *symbol{FindInScope(name)}; if (!symbol) { symbol = &MakeSymbol(name, attrs); symbol->set_details(std::move(details)); @@ -2048,7 +2051,7 @@ Symbol &ScopeHandler::MakeHostAssocSymbol( return symbol; } Symbol &ScopeHandler::CopySymbol(const SourceName &name, const Symbol &symbol) { - CHECK(!FindInScope(currScope(), name)); + CHECK(!FindInScope(name)); return MakeSymbol(currScope(), name, symbol.attrs()); } @@ -2058,11 +2061,14 @@ Symbol *ScopeHandler::FindInScope( return Resolve(name, FindInScope(scope, name.source)); } Symbol *ScopeHandler::FindInScope(const Scope &scope, const SourceName &name) { - if (auto it{scope.find(name)}; it != scope.end()) { - return &*it->second; - } else { - return nullptr; + // all variants of names, e.g. "operator(.ne.)" for "operator(/=)" + for (const std::string &n : GetAllNames(context(), name)) { + auto it{scope.find(SourceName{n})}; + if (it != scope.end()) { + return &*it->second; + } } + return nullptr; } // Find a component or type parameter by name in a derived type or its parents. @@ -2318,7 +2324,7 @@ void ModuleVisitor::Post(const parser::UseStmt &x) { !symbol->attrs().test(Attr::INTRINSIC) && !symbol->has() && useNames.count(name) == 0) { SourceName location{x.moduleName.source}; - if (auto *localSymbol{FindInScope(currScope(), name)}) { + if (auto *localSymbol{FindInScope(name)}) { DoAddUse(location, localSymbol->name(), *localSymbol, *symbol); } else { DoAddUse(location, location, CopySymbol(name, *symbol), *symbol); @@ -2397,8 +2403,7 @@ void ModuleVisitor::DoAddUse(const SourceName &location, generic1.CopyFrom(generic2); } EraseSymbol(localSymbol); - MakeSymbol( - localSymbol.name(), localUltimate.attrs(), std::move(generic1)); + MakeSymbol(localSymbol.name(), localSymbol.attrs(), std::move(generic1)); } else { ConvertToUseError(localSymbol, location, *useModuleScope_); } @@ -2435,8 +2440,7 @@ void ModuleVisitor::DoAddUse(const SourceName &location, void ModuleVisitor::AddUse(const GenericSpecInfo &info) { if (useModuleScope_) { const auto &name{info.symbolName()}; - auto rename{ - AddUse(name, name, info.FindInScope(context(), *useModuleScope_))}; + auto rename{AddUse(name, name, FindInScope(*useModuleScope_, name))}; info.Resolve(rename.use); } } @@ -2523,7 +2527,7 @@ void InterfaceVisitor::Post(const parser::EndInterfaceStmt &) { // Create a symbol in genericSymbol_ for this GenericSpec. bool InterfaceVisitor::Pre(const parser::GenericSpec &x) { - if (auto *symbol{GenericSpecInfo{x}.FindInScope(context(), currScope())}) { + if (auto *symbol{FindInScope(GenericSpecInfo{x}.symbolName())}) { SetGenericSymbol(*symbol); } return false; @@ -3402,7 +3406,7 @@ Symbol &DeclarationVisitor::HandleAttributeStmt( if (attr == Attr::INTRINSIC && !IsIntrinsic(name.source, std::nullopt)) { Say(name.source, "'%s' is not a known intrinsic procedure"_err_en_US); } - auto *symbol{FindInScope(currScope(), name)}; + auto *symbol{FindInScope(name)}; if (attr == Attr::ASYNCHRONOUS || attr == Attr::VOLATILE) { // these can be set on a symbol that is host-assoc or use-assoc if (!symbol && @@ -4065,7 +4069,7 @@ void DeclarationVisitor::CheckBindings( CHECK(currScope().IsDerivedType()); for (auto &declaration : tbps.declarations) { auto &bindingName{std::get(declaration.t)}; - if (Symbol * binding{FindInScope(currScope(), bindingName)}) { + if (Symbol * binding{FindInScope(bindingName)}) { if (auto *details{binding->detailsIf()}) { const Symbol *procedure{FindSubprogram(details->symbol())}; if (!CanBeTypeBoundProc(procedure)) { @@ -4134,7 +4138,7 @@ bool DeclarationVisitor::Pre(const parser::TypeBoundGenericStmt &x) { SourceName symbolName{info.symbolName()}; bool isPrivate{accessSpec ? accessSpec->v == parser::AccessSpec::Kind::Private : derivedTypeInfo_.privateBindings}; - auto *genericSymbol{info.FindInScope(context(), currScope())}; + auto *genericSymbol{FindInScope(symbolName)}; if (genericSymbol) { if (!genericSymbol->has()) { genericSymbol = nullptr; // MakeTypeSymbol will report the error below @@ -4142,7 +4146,7 @@ bool DeclarationVisitor::Pre(const parser::TypeBoundGenericStmt &x) { } else { // look in parent types: Symbol *inheritedSymbol{nullptr}; - for (const auto &name : info.GetAllNames(context())) { + for (const auto &name : GetAllNames(context(), symbolName)) { inheritedSymbol = currScope().FindComponent(SourceName{name}); if (inheritedSymbol) { break; @@ -4298,7 +4302,7 @@ bool DeclarationVisitor::Pre(const parser::NamelistStmt::Group &x) { } const auto &groupName{std::get(x.t)}; - auto *groupSymbol{FindInScope(currScope(), groupName)}; + auto *groupSymbol{FindInScope(groupName)}; if (!groupSymbol || !groupSymbol->has()) { groupSymbol = &MakeSymbol(groupName, std::move(details)); groupSymbol->ReplaceName(groupName.source); @@ -4397,7 +4401,7 @@ bool DeclarationVisitor::Pre(const parser::SaveStmt &x) { void DeclarationVisitor::CheckSaveStmts() { for (const SourceName &name : saveInfo_.entities) { - auto *symbol{FindInScope(currScope(), name)}; + auto *symbol{FindInScope(name)}; if (!symbol) { // error was reported } else if (saveInfo_.saveAll) { @@ -5159,7 +5163,7 @@ bool ConstructVisitor::Pre(const parser::ChangeTeamStmt &x) { void ConstructVisitor::Post(const parser::CoarrayAssociation &x) { const auto &decl{std::get(x.t)}; const auto &name{std::get(decl.t)}; - if (auto *symbol{FindInScope(currScope(), name)}) { + if (auto *symbol{FindInScope(name)}) { const auto &selector{std::get(x.t)}; if (auto sel{ResolveSelector(selector)}) { const Symbol *whole{UnwrapWholeSymbolDataRef(sel.expr)}; @@ -5962,7 +5966,7 @@ bool ModuleVisitor::Pre(const parser::AccessStmt &x) { [=](const Indirection &y) { auto info{GenericSpecInfo{y.value()}}; const auto &symbolName{info.symbolName()}; - if (auto *symbol{info.FindInScope(context(), currScope())}) { + if (auto *symbol{FindInScope(symbolName)}) { info.Resolve(&SetAccess(symbolName, accessAttr, symbol)); } else if (info.kind().IsName()) { info.Resolve(&SetAccess(symbolName, accessAttr)); @@ -6084,7 +6088,7 @@ void ResolveNamesVisitor::CreateGeneric(const parser::GenericSpec &x) { return; } GenericDetails genericDetails; - if (Symbol * existing{info.FindInScope(context(), currScope())}) { + if (Symbol * existing{FindInScope(symbolName)}) { if (existing->has()) { info.Resolve(existing); return; // already have generic, add to it @@ -6204,7 +6208,7 @@ void ResolveNamesVisitor::CheckImports() { void ResolveNamesVisitor::CheckImport( const SourceName &location, const SourceName &name) { - if (auto *symbol{FindInScope(currScope(), name)}) { + if (auto *symbol{FindInScope(name)}) { Say(location, "'%s' from host is not accessible"_err_en_US, name) .Attach(symbol->name(), "'%s' is hidden by this entity"_en_US, symbol->name()); diff --git a/flang/test/Semantics/modfile07.f90 b/flang/test/Semantics/modfile07.f90 index 0809da471a602..f3e98bf195f9f 100644 --- a/flang/test/Semantics/modfile07.f90 +++ b/flang/test/Semantics/modfile07.f90 @@ -549,3 +549,52 @@ subroutine test() ! end !end +! Verify that equivalent names are used when generic operators are merged + +module m10a + interface operator(.ne.) + end interface +end +!Expect: m10a.mod +!module m10a +! interface operator(.ne.) +! end interface +!end + +module m10b + interface operator(<>) + end interface +end +!Expect: m10b.mod +!module m10b +! interface operator(<>) +! end interface +!end + +module m10c + use m10a + use m10b + interface operator(/=) + end interface +end +!Expect: m10c.mod +!module m10c +! use m10b,only:operator(.ne.) +! use m10a,only:operator(.ne.) +! interface operator(.ne.) +! end interface +!end + +module m10d + use m10a + use m10c + private :: operator(<>) +end +!Expect: m10d.mod +!module m10d +! use m10c,only:operator(.ne.) +! use m10a,only:operator(.ne.) +! interface operator(.ne.) +! end interface +! private::operator(.ne.) +!end From f3e0431b763979c373258f7222668e87bb5d28cb Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 16 Dec 2020 10:25:07 -0500 Subject: [PATCH 16/39] LangRef: Update byval/sret description for required types --- llvm/docs/LangRef.rst | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index b29eb589e2d72..cde9ed5196262 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -1045,7 +1045,7 @@ Currently, only the following parameter attributes are defined: opposed to memory, though some targets use it to distinguish between two different kinds of registers). Use of this attribute is target-specific. -``byval`` or ``byval()`` +``byval()`` This indicates that the pointer parameter should really be passed by value to the function. The attribute implies that a hidden copy of the pointee is made between the caller and the callee, so the callee @@ -1057,7 +1057,7 @@ Currently, only the following parameter attributes are defined: ``byval`` parameters). This is not a valid attribute for return values. - The byval attribute also supports an optional type argument, which + The byval type argument indicates the in-memory value type, and must be the same as the pointee type of the argument. The byval attribute also supports specifying an alignment with the @@ -1144,7 +1144,7 @@ Currently, only the following parameter attributes are defined: See :doc:`InAlloca` for more information on how to use this attribute. -``sret`` or ``sret()`` +``sret()`` This indicates that the pointer parameter specifies the address of a structure that is the return value of the function in the source program. This pointer must be guaranteed by the caller to be valid: @@ -1152,9 +1152,8 @@ Currently, only the following parameter attributes are defined: to trap and to be properly aligned. This is not a valid attribute for return values. - The sret attribute also supports an optional type argument, which - must be the same as the pointee type of the argument. In the - future this will be required. + The sret type argument specifies the in memory type, which must be + the same as the pointee type of the argument. .. _attr_align: From 4a6a4e573fe6dc000d717824106515459f2ff432 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Wed, 16 Dec 2020 11:30:01 +0000 Subject: [PATCH 17/39] [InstCombine] Precommit tests for !annotation metadata handling. --- .../Transforms/InstCombine/annotations.ll | 153 ++++++++++++++++++ 1 file changed, 153 insertions(+) create mode 100644 llvm/test/Transforms/InstCombine/annotations.ll diff --git a/llvm/test/Transforms/InstCombine/annotations.ll b/llvm/test/Transforms/InstCombine/annotations.ll new file mode 100644 index 0000000000000..1530b867bfb68 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/annotations.ll @@ -0,0 +1,153 @@ +; RUN: opt < %s -instcombine -S | FileCheck --match-full-lines %s + +; Test cases to make sure !annotation metadata is preserved, if possible. +; Currently we fail to preserve !annotation metadata in many cases. + +; Make sure !annotation metadata is added to new instructions, if the source +; instruction has !annotation metadata. +define i1 @fold_to_new_instruction(i8* %a, i8* %b) { +; CHECK-LABEL: define {{.+}} @fold_to_new_instruction({{.+}} +; CHECK-NEXT: [[C:%.*]] = icmp uge i8* [[A:%.*]], [[B:%[a-z]*]] +; CHECK-NEXT: ret i1 [[C]] +; + %a.c = bitcast i8* %a to i32*, !annotation !0 + %b.c = bitcast i8* %b to i32*, !annotation !0 + %c = icmp uge i32* %a.c, %b.c, !annotation !0 + ret i1 %c +} + +; Make sure !annotation is not added to new instructions if the source +; instruction does not have it (even if some folded operands do have +; !annotation). +define i1 @fold_to_new_instruction2(i8* %a, i8* %b) { +; CHECK-LABEL: define {{.+}} @fold_to_new_instruction2({{.+}} +; CHECK-NEXT: [[C:%.*]] = icmp uge i8* [[A:%.*]], [[B:%[a-z]+]] +; CHECK-NEXT: ret i1 [[C]] +; + %a.c = bitcast i8* %a to i32*, !annotation !0 + %b.c = bitcast i8* %b to i32*, !annotation !0 + %c = icmp uge i32* %a.c, %b.c + ret i1 %c +} + +; Make sure !annotation metadata is *not* added if we replace an instruction +; with !annotation with an existing one without. +define i32 @do_not_add_annotation_to_existing_instr(i32 %a, i32 %b) { +; CHECK-LABEL: define {{.+}} @do_not_add_annotation_to_existing_instr({{.+}} +; CHECK-NEXT: [[ADD:%.*]] = add i32 [[A:%.*]], [[B:%[a-z]+]] +; CHECK-NEXT: ret i32 [[ADD]] +; + %add = add i32 %a, %b + %res = add i32 0, %add, !annotation !0 + ret i32 %res +} + +; memcpy can be expanded inline with load/store. Verify that we keep the +; !annotation metadata. + +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind + +define void @copy_1_byte(i8* %d, i8* %s) { +; CHECK-LABEL: define {{.+}} @copy_1_byte({{.+}} +; CHECK-NEXT: [[TMP1:%.*]] = load i8, i8* [[S:%.*]], align 1 +; CHECK-NEXT: store i8 [[TMP1]], i8* [[D:%.*]], align 1 +; CHECK-NEXT: ret void +; + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %d, i8* %s, i32 1, i1 false), !annotation !0 + ret void +} + +declare i8* @memcpy(i8* noalias returned, i8* noalias nocapture readonly, i64) nofree nounwind + +define void @libcallcopy_1_byte(i8* %d, i8* %s) { +; CHECK-LABEL: define {{.+}} @libcallcopy_1_byte({{.+}} +; CHECK-NEXT: [[TMP1:%.*]] = load i8, i8* [[S:%.*]], align 1 +; CHECK-NEXT: store i8 [[TMP1]], i8* [[D:%.*]], align 1 +; CHECK-NEXT: ret void +; + call i8* @memcpy(i8* %d, i8* %s, i64 1), !annotation !0 + ret void +} + +declare i8* @__memcpy_chk(i8*, i8*, i64, i64) nofree nounwind + +define void @libcallcopy_1_byte_chk(i8* %d, i8* %s) { +; CHECK-LABEL: define {{.+}} @libcallcopy_1_byte_chk({{.+}} +; CHECK-NEXT: [[TMP1:%.*]] = load i8, i8* [[S:%.*]], align 1 +; CHECK-NEXT: store i8 [[TMP1]], i8* [[D:%.*]], align 1 +; CHECK-NEXT: ret void +; + call i8* @__memcpy_chk(i8* %d, i8* %s, i64 1, i64 1), !annotation !0 + ret void +} + +declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i1) nounwind + +define void @move_1_byte(i8* %d, i8* %s) { +; CHECK-LABEL: define {{.+}} @move_1_byte({{.+}} +; CHECK-NEXT: [[TMP1:%.*]] = load i8, i8* [[S:%.*]], align 1 +; CHECK-NEXT: store i8 [[TMP1]], i8* [[D:%.*]], align 1 +; CHECK-NEXT: ret void +; + call void @llvm.memmove.p0i8.p0i8.i32(i8* %d, i8* %s, i32 1, i1 false), !annotation !0 + ret void +} + +declare i8* @memmove(i8* returned, i8* nocapture readonly, i64) nofree nounwind + +define void @libcallmove_1_byte(i8* %d, i8* %s) { +; CHECK-LABEL: define {{.+}} @libcallmove_1_byte({{.+}} +; CHECK-NEXT: [[TMP1:%.*]] = load i8, i8* [[S:%.*]], align 1 +; CHECK-NEXT: store i8 [[TMP1]], i8* [[D:%.*]], align 1 +; CHECK-NEXT: ret void +; + call i8* @memmove(i8* %d, i8* %s, i64 1), !annotation !0 + ret void +} + +declare i8* @__memmove_chk(i8*, i8*, i64, i64) nofree nounwind + +define void @libcallmove_1_byte_chk(i8* %d, i8* %s) { +; CHECK-LABEL: define {{.+}} @libcallmove_1_byte_chk({{.+}} +; CHECK-NEXT: [[TMP1:%.*]] = load i8, i8* [[S:%.*]], align 1 +; CHECK-NEXT: store i8 [[TMP1]], i8* [[D:%.*]], align 1 +; CHECK-NEXT: ret void +; + call i8* @__memmove_chk(i8* %d, i8* %s, i64 1, i64 1), !annotation !0 + ret void +} + +declare void @llvm.memset.p0i8.i32(i8* nocapture writeonly, i8, i32, i1) argmemonly nounwind + +define void @set_1_byte(i8* %d) { +; CHECK-LABEL: define {{.+}} @set_1_byte({{.+}} +; CHECK-NEXT: store i8 1, i8* [[D:%.*]], align 1 +; CHECK-NEXT: ret void +; + call void @llvm.memset.p0i8.i32(i8* %d, i8 1, i32 1, i1 false), !annotation !0 + ret void +} + +declare i8* @memset(i8*, i32, i64) nofree + +define void @libcall_set_1_byte(i8* %d) { +; CHECK-LABEL: define {{.+}} @libcall_set_1_byte({{.+}} +; CHECK-NEXT: store i8 1, i8* [[D:%.*]], align 1 +; CHECK-NEXT: ret void +; + call i8* @memset(i8* %d, i32 1, i64 1), !annotation !0 + ret void +} + +declare i8* @__memset_chk(i8*, i32, i64, i64) nofree + +define void @libcall_set_1_byte_chk(i8* %d) { +; CHECK-LABEL: define {{.+}} @libcall_set_1_byte_chk({{.+}} +; CHECK-NEXT: store i8 1, i8* [[D:%.*]], align 1 +; CHECK-NEXT: ret void +; + call i8* @__memset_chk(i8* %d, i32 1, i64 1, i64 1), !annotation !0 + ret void +} + +!0 = !{ !"auto-init" } From e2e86f4e77ec2fd79743f4d0e94689e9668600ad Mon Sep 17 00:00:00 2001 From: Kai Nacke Date: Wed, 16 Dec 2020 06:01:08 -0500 Subject: [PATCH 18/39] [Doc][SystemZ] Add Linux/SystemZ to Getting Started guide. The Linux/SystemZ platform is missing in the Getting Started guide as platform on which LLVM is known to work. Reviewed by: uweigand Differential Revision: https://reviews.llvm.org/D93388 --- llvm/docs/GettingStarted.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/docs/GettingStarted.rst b/llvm/docs/GettingStarted.rst index 70e12b0f877a0..05d2994fed2b1 100644 --- a/llvm/docs/GettingStarted.rst +++ b/llvm/docs/GettingStarted.rst @@ -121,6 +121,7 @@ Linux amd64 GCC, Clang Linux ARM GCC, Clang Linux Mips GCC, Clang Linux PowerPC GCC, Clang +Linux SystemZ GCC, Clang Solaris V9 (Ultrasparc) GCC FreeBSD x86\ :sup:`1` GCC, Clang FreeBSD amd64 GCC, Clang From 07751310580fa5b7b94b6efa85d7964af0f699a6 Mon Sep 17 00:00:00 2001 From: peter klausler Date: Tue, 15 Dec 2020 10:59:26 -0800 Subject: [PATCH 19/39] [flang] Fix crash in folding (#48437) Elemental intrinsic function folding was not taking the lower bounds of constant array arguments into account; these lower bounds can be distinct from 1 when named constants appear as arguments. LLVM bugzilla #48437. Differential Revision: https://reviews.llvm.org/D93321 --- flang/include/flang/Evaluate/constant.h | 5 ++-- flang/lib/Evaluate/fold-implementation.h | 36 +++++++++--------------- flang/test/Evaluate/folding16.f90 | 8 ++++++ 3 files changed, 25 insertions(+), 24 deletions(-) create mode 100644 flang/test/Evaluate/folding16.f90 diff --git a/flang/include/flang/Evaluate/constant.h b/flang/include/flang/Evaluate/constant.h index a9f6e87c9db03..89a5867722f72 100644 --- a/flang/include/flang/Evaluate/constant.h +++ b/flang/include/flang/Evaluate/constant.h @@ -140,7 +140,8 @@ template class Constant : public ConstantBase { } } - // Apply subscripts. + // Apply subscripts. An empty subscript list is allowed for + // a scalar constant. Element At(const ConstantSubscripts &) const; Constant Reshape(ConstantSubscripts &&) const; @@ -177,7 +178,7 @@ class Constant> : public ConstantBounds { } } - // Apply subscripts + // Apply subscripts, if any. Scalar At(const ConstantSubscripts &) const; Constant Reshape(ConstantSubscripts &&) const; diff --git a/flang/lib/Evaluate/fold-implementation.h b/flang/lib/Evaluate/fold-implementation.h index 4fa5f6a4c8837..7232715600fd5 100644 --- a/flang/lib/Evaluate/fold-implementation.h +++ b/flang/lib/Evaluate/fold-implementation.h @@ -443,9 +443,8 @@ Expr FoldElementalIntrinsicHelper(FoldingContext &context, // Compute the shape of the result based on shapes of arguments ConstantSubscripts shape; int rank{0}; - const ConstantSubscripts *shapes[sizeof...(TA)]{ - &std::get(*args)->shape()...}; - const int ranks[sizeof...(TA)]{std::get(*args)->Rank()...}; + const ConstantSubscripts *shapes[]{&std::get(*args)->shape()...}; + const int ranks[]{std::get(*args)->Rank()...}; for (unsigned int i{0}; i < sizeof...(TA); ++i) { if (ranks[i] > 0) { if (rank == 0) { @@ -470,20 +469,19 @@ Expr FoldElementalIntrinsicHelper(FoldingContext &context, std::vector> results; if (TotalElementCount(shape) > 0) { ConstantBounds bounds{shape}; - ConstantSubscripts index(rank, 1); + ConstantSubscripts resultIndex(rank, 1); + ConstantSubscripts argIndex[]{std::get(*args)->lbounds()...}; do { if constexpr (std::is_same_v, ScalarFuncWithContext>) { - results.emplace_back(func(context, - (ranks[I] ? std::get(*args)->At(index) - : std::get(*args)->GetScalarValue().value())...)); + results.emplace_back( + func(context, std::get(*args)->At(argIndex[I])...)); } else if constexpr (std::is_same_v, ScalarFunc>) { - results.emplace_back(func( - (ranks[I] ? std::get(*args)->At(index) - : std::get(*args)->GetScalarValue().value())...)); + results.emplace_back(func(std::get(*args)->At(argIndex[I])...)); } - } while (bounds.IncrementSubscripts(index)); + (std::get(*args)->IncrementSubscripts(argIndex[I]), ...); + } while (bounds.IncrementSubscripts(resultIndex)); } // Build and return constant result if constexpr (TR::category == TypeCategory::Character) { @@ -732,17 +730,11 @@ template class ArrayConstructorFolder { Expr folded{Fold(context_, common::Clone(expr.value()))}; if (const auto *c{UnwrapConstantValue(folded)}) { // Copy elements in Fortran array element order - ConstantSubscripts shape{c->shape()}; - int rank{c->Rank()}; - ConstantSubscripts index(GetRank(shape), 1); - for (std::size_t n{c->size()}; n-- > 0;) { - elements_.emplace_back(c->At(index)); - for (int d{0}; d < rank; ++d) { - if (++index[d] <= shape[d]) { - break; - } - index[d] = 1; - } + if (c->size() > 0) { + ConstantSubscripts index{c->lbounds()}; + do { + elements_.emplace_back(c->At(index)); + } while (c->IncrementSubscripts(index)); } return true; } else { diff --git a/flang/test/Evaluate/folding16.f90 b/flang/test/Evaluate/folding16.f90 new file mode 100644 index 0000000000000..0918381040bfe --- /dev/null +++ b/flang/test/Evaluate/folding16.f90 @@ -0,0 +1,8 @@ +! RUN: %S/test_folding.sh %s %t %f18 +! Ensure that lower bounds are accounted for in intrinsic folding; +! this is a regression test for a bug in which they were not +real, parameter :: a(-1:-1) = 1. +real, parameter :: b(-1:-1) = log(a) +logical, parameter :: test = lbound(a,1)==-1 .and. lbound(b,1)==-1 .and. & + lbound(log(a),1)==1 .and. all(b==0) +end From 6e890ec7beb0874464a0af9f84e41a987f968b23 Mon Sep 17 00:00:00 2001 From: Jonas Hahnfeld Date: Sat, 5 Dec 2020 12:52:38 +0100 Subject: [PATCH 20/39] [CMake] Avoid __FakeVCSRevision.h with no git repository Set the return variable to "" in find_first_existing_vc_file to say that there is a repository, but no file to depend on. This works transparently for all other callers that handle undefinedness and equality to an empty string the same way. Use the knowledge to avoid depending on __FakeVCSRevision.h if there is no git repository at all (for example when building a release) as there is no point in regenerating an empty VCSRevision.h. Differential Revision: https://reviews.llvm.org/D92718 --- llvm/cmake/modules/AddLLVM.cmake | 8 ++++++++ llvm/include/llvm/Support/CMakeLists.txt | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/llvm/cmake/modules/AddLLVM.cmake b/llvm/cmake/modules/AddLLVM.cmake index a3e9eaeeb2622..b86fbdaaa6d83 100644 --- a/llvm/cmake/modules/AddLLVM.cmake +++ b/llvm/cmake/modules/AddLLVM.cmake @@ -2135,6 +2135,13 @@ function(setup_dependency_debugging name) set_target_properties(${name} PROPERTIES RULE_LAUNCH_COMPILE ${sandbox_command}) endfunction() +# If the sources at the given `path` are under version control, set `out_var` +# to the the path of a file which will be modified when the VCS revision +# changes, attempting to create that file if it does not exist; if no such +# file exists and one cannot be created, instead set `out_var` to the +# empty string. +# +# If the sources are not under version control, do not define `out_var`. function(find_first_existing_vc_file path out_var) if(NOT EXISTS "${path}") return() @@ -2156,6 +2163,7 @@ function(find_first_existing_vc_file path out_var) RESULT_VARIABLE touch_head_result ERROR_QUIET) if (NOT touch_head_result EQUAL 0) + set(${out_var} "" PARENT_SCOPE) return() endif() endif() diff --git a/llvm/include/llvm/Support/CMakeLists.txt b/llvm/include/llvm/Support/CMakeLists.txt index aa71b55721810..aeb5866ecbdde 100644 --- a/llvm/include/llvm/Support/CMakeLists.txt +++ b/llvm/include/llvm/Support/CMakeLists.txt @@ -11,7 +11,7 @@ if(LLVM_APPEND_VC_REV) # A fake version file and is not expected to exist. It is being used to # force regeneration of VCSRevision.h for source directory with no write # permission available. - if (NOT llvm_vc) + if (llvm_vc STREQUAL "") set(fake_version_inc "${CMAKE_CURRENT_BINARY_DIR}/__FakeVCSRevision.h") endif() endif() From b607837c75d04cc007dcf855983dfa3b69f63d73 Mon Sep 17 00:00:00 2001 From: Jon Chesterfield Date: Wed, 16 Dec 2020 16:40:21 +0000 Subject: [PATCH 21/39] [libomptarget][nfc] Replace static const with enum [libomptarget][nfc] Replace static const with enum Semantically identical. Replaces 0xff... with ~0 to spare counting the f. Has the advantage that the compiler doesn't need to prove the 4/8 byte value dead before discarding it, and sidesteps the compilation question associated with what static means for a single source language. Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D93328 --- openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h | 5 +++-- openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h b/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h index 34794587e0fe7..d25ea8559c05d 100644 --- a/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h +++ b/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h @@ -74,8 +74,9 @@ INLINE uint64_t __kmpc_impl_pack(uint32_t lo, uint32_t hi) { return (((uint64_t)hi) << 32) | (uint64_t)lo; } -static const __kmpc_impl_lanemask_t __kmpc_impl_all_lanes = - UINT64_C(0xffffffffffffffff); +enum : __kmpc_impl_lanemask_t { + __kmpc_impl_all_lanes = ~(__kmpc_impl_lanemask_t)0 +}; DEVICE __kmpc_impl_lanemask_t __kmpc_impl_lanemask_lt(); diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h b/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h index 46ce751c44c4d..411e1676b7c7d 100644 --- a/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h +++ b/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h @@ -91,8 +91,9 @@ INLINE uint64_t __kmpc_impl_pack(uint32_t lo, uint32_t hi) { return val; } -static const __kmpc_impl_lanemask_t __kmpc_impl_all_lanes = - UINT32_C(0xffffffff); +enum : __kmpc_impl_lanemask_t { + __kmpc_impl_all_lanes = ~(__kmpc_impl_lanemask_t)0 +}; INLINE __kmpc_impl_lanemask_t __kmpc_impl_lanemask_lt() { __kmpc_impl_lanemask_t res; From 16cb7910f51f0c2570b1f3406bcd8d4069e52a3e Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Wed, 16 Dec 2020 08:59:38 -0800 Subject: [PATCH 22/39] [ELF] --emit-relocs: fix a crash if .rela.dyn is an empty output section Fix PR48357: If .rela.dyn appears as an output section description, its type may be SHT_RELA (due to the empty synthetic .rela.plt) while there is no input section. The empty .rela.dyn may be retained due to a reference in a linker script. Don't crash. Reviewed By: grimar Differential Revision: https://reviews.llvm.org/D93367 --- lld/ELF/OutputSections.cpp | 6 +++++- .../ELF/linkerscript/emit-relocs-rela-dyn.s | 17 +++++++++++++++++ 2 files changed, 22 insertions(+), 1 deletion(-) create mode 100644 lld/test/ELF/linkerscript/emit-relocs-rela-dyn.s diff --git a/lld/ELF/OutputSections.cpp b/lld/ELF/OutputSections.cpp index 5eea3bc8f6a51..9fd226fbba2c8 100644 --- a/lld/ELF/OutputSections.cpp +++ b/lld/ELF/OutputSections.cpp @@ -418,7 +418,11 @@ void OutputSection::finalize() { if (!config->copyRelocs || (type != SHT_RELA && type != SHT_REL)) return; - if (isa(first)) + // Skip if 'first' is synthetic, i.e. not a section created by --emit-relocs. + // Normally 'type' was changed by 'first' so 'first' should be non-null. + // However, if the output section is .rela.dyn, 'type' can be set by the empty + // synthetic .rela.plt and first can be null. + if (!first || isa(first)) return; link = in.symTab->getParent()->sectionIndex; diff --git a/lld/test/ELF/linkerscript/emit-relocs-rela-dyn.s b/lld/test/ELF/linkerscript/emit-relocs-rela-dyn.s new file mode 100644 index 0000000000000..a6c627fb18ce3 --- /dev/null +++ b/lld/test/ELF/linkerscript/emit-relocs-rela-dyn.s @@ -0,0 +1,17 @@ +# REQUIRES: x86 +## PR48357: If .rela.dyn appears as an output section description, its type may +## be SHT_RELA (due to the empty synthetic .rela.plt) while there is no input +## section. The empty .rela.dyn may be retained due to a reference. Don't crash. + +# RUN: llvm-mc -filetype=obj -triple=x86_64 /dev/null -o %t.o +# RUN: ld.lld -shared --emit-relocs -T %s %t.o -o %t +# RUN: llvm-readelf -S %t | FileCheck %s + +## Note, sh_link of such an empty .rela.dyn is 0. +# CHECK: Name Type Address Off Size ES Flg Lk Inf Al +# CHECK: .rela.dyn RELA 0000000000000000 001000 000000 18 A 0 0 8 + +SECTIONS { + .rela.dyn : { *(.rela*) } + __rela_offset = ABSOLUTE(ADDR(.rela.dyn)); +} From c0619d3b21cd420b9faf15f14db0816787c44ded Mon Sep 17 00:00:00 2001 From: Jon Chesterfield Date: Wed, 16 Dec 2020 17:00:18 +0000 Subject: [PATCH 23/39] [NFC] Use regex for code object version in hip tests [NFC] Use regex for code object version in hip tests Extracted from D93258. Makes tests robust to changes in default code object version. Reviewed By: t-tye Differential Revision: https://reviews.llvm.org/D93398 --- clang/test/Driver/hip-autolink.hip | 2 +- clang/test/Driver/hip-code-object-version.hip | 11 +++++-- clang/test/Driver/hip-device-compile.hip | 2 +- clang/test/Driver/hip-host-cpu-features.hip | 6 ++-- clang/test/Driver/hip-rdc-device-only.hip | 32 +++++++++---------- clang/test/Driver/hip-target-id.hip | 6 ++-- clang/test/Driver/hip-toolchain-mllvm.hip | 4 +-- clang/test/Driver/hip-toolchain-no-rdc.hip | 8 ++--- clang/test/Driver/hip-toolchain-opt.hip | 2 +- .../Driver/hip-toolchain-rdc-separate.hip | 8 ++--- .../Driver/hip-toolchain-rdc-static-lib.hip | 8 ++--- clang/test/Driver/hip-toolchain-rdc.hip | 8 ++--- 12 files changed, 51 insertions(+), 46 deletions(-) diff --git a/clang/test/Driver/hip-autolink.hip b/clang/test/Driver/hip-autolink.hip index 073c6c4d244a6..5f9311d7ba734 100644 --- a/clang/test/Driver/hip-autolink.hip +++ b/clang/test/Driver/hip-autolink.hip @@ -7,7 +7,7 @@ // RUN: %clang --target=i386-pc-windows-msvc --cuda-gpu-arch=gfx906 -nogpulib \ // RUN: --cuda-host-only %s -### 2>&1 | FileCheck --check-prefix=HOST %s -// DEV: "-cc1" "-mllvm" "--amdhsa-code-object-version=4" "-triple" "amdgcn-amd-amdhsa" +// DEV: "-cc1" "-mllvm" "--amdhsa-code-object-version={{[0-9]+}}" "-triple" "amdgcn-amd-amdhsa" // DEV-SAME: "-fno-autolink" // HOST: "-cc1" "-triple" "i386-pc-windows-msvc{{.*}}" diff --git a/clang/test/Driver/hip-code-object-version.hip b/clang/test/Driver/hip-code-object-version.hip index 26ad6f8710cc2..51d9004b0cbf5 100644 --- a/clang/test/Driver/hip-code-object-version.hip +++ b/clang/test/Driver/hip-code-object-version.hip @@ -44,12 +44,17 @@ // RUN: --offload-arch=gfx906 -nogpulib \ // RUN: %s 2>&1 | FileCheck -check-prefix=V4 %s +// V4: "-mllvm" "--amdhsa-code-object-version=4" +// V4: "-targets=host-x86_64-unknown-linux,hip-amdgcn-amd-amdhsa--gfx906" + +// Check bundle ID for code object version default + // RUN: %clang -### -target x86_64-linux-gnu \ // RUN: --offload-arch=gfx906 -nogpulib \ -// RUN: %s 2>&1 | FileCheck -check-prefix=V4 %s +// RUN: %s 2>&1 | FileCheck -check-prefix=VD %s -// V4: "-mllvm" "--amdhsa-code-object-version=4" -// V4: "-targets=host-x86_64-unknown-linux,hip-amdgcn-amd-amdhsa--gfx906" +// VD: "-mllvm" "--amdhsa-code-object-version=4" +// VD: "-targets=host-x86_64-unknown-linux,hip-amdgcn-amd-amdhsa--gfx906" // Check invalid code object version option. diff --git a/clang/test/Driver/hip-device-compile.hip b/clang/test/Driver/hip-device-compile.hip index 5fbcbc97bd805..c460ff7e8c67d 100644 --- a/clang/test/Driver/hip-device-compile.hip +++ b/clang/test/Driver/hip-device-compile.hip @@ -26,7 +26,7 @@ // RUN: %S/Inputs/hip_multiple_inputs/a.cu \ // RUN: 2>&1 | FileCheck -check-prefixes=CHECK,ASM %s -// CHECK: {{".*clang.*"}} "-cc1" "-mllvm" "--amdhsa-code-object-version=4" "-triple" "amdgcn-amd-amdhsa" +// CHECK: {{".*clang.*"}} "-cc1" "-mllvm" "--amdhsa-code-object-version={{[0-9]+}}" "-triple" "amdgcn-amd-amdhsa" // CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu" // BC-SAME: "-emit-llvm-bc" // LL-SAME: "-emit-llvm" diff --git a/clang/test/Driver/hip-host-cpu-features.hip b/clang/test/Driver/hip-host-cpu-features.hip index 235f0f1f22c24..8addfb11dc0b6 100644 --- a/clang/test/Driver/hip-host-cpu-features.hip +++ b/clang/test/Driver/hip-host-cpu-features.hip @@ -6,14 +6,14 @@ // RUN: %clang -### -c -target x86_64-linux-gnu -msse3 --cuda-gpu-arch=gfx803 -nogpulib %s 2>&1 | FileCheck %s -check-prefix=HOSTSSE3 // RUN: %clang -### -c -target x86_64-linux-gnu --gpu-use-aux-triple-only -march=znver2 --cuda-gpu-arch=gfx803 -nogpulib %s 2>&1 | FileCheck %s -check-prefix=NOHOSTCPU -// HOSTCPU: "-cc1" "-mllvm" "--amdhsa-code-object-version=4" "-triple" "amdgcn-amd-amdhsa" +// HOSTCPU: "-cc1" "-mllvm" "--amdhsa-code-object-version={{[0-9]+}}" "-triple" "amdgcn-amd-amdhsa" // HOSTCPU-SAME: "-aux-triple" "x86_64-unknown-linux-gnu" // HOSTCPU-SAME: "-aux-target-cpu" "znver2" -// HOSTSSE3: "-cc1" "-mllvm" "--amdhsa-code-object-version=4" "-triple" "amdgcn-amd-amdhsa" +// HOSTSSE3: "-cc1" "-mllvm" "--amdhsa-code-object-version={{[0-9]+}}" "-triple" "amdgcn-amd-amdhsa" // HOSTSSE3-SAME: "-aux-triple" "x86_64-unknown-linux-gnu" // HOSTSSE3-SAME: "-aux-target-feature" "+sse3" -// NOHOSTCPU: "-cc1" "-mllvm" "--amdhsa-code-object-version=4" "-triple" "amdgcn-amd-amdhsa" +// NOHOSTCPU: "-cc1" "-mllvm" "--amdhsa-code-object-version={{[0-9]+}}" "-triple" "amdgcn-amd-amdhsa" // NOHOSTCPU-SAME: "-aux-triple" "x86_64-unknown-linux-gnu" // NOHOSTCPU-NOT: "-aux-target-cpu" "znver2" diff --git a/clang/test/Driver/hip-rdc-device-only.hip b/clang/test/Driver/hip-rdc-device-only.hip index c2e3cf2b41768..541cbf848d660 100644 --- a/clang/test/Driver/hip-rdc-device-only.hip +++ b/clang/test/Driver/hip-rdc-device-only.hip @@ -47,7 +47,7 @@ // RUN: %S/Inputs/hip_multiple_inputs/b.hip \ // RUN: 2>&1 | FileCheck -check-prefix=SAVETEMP %s -// COMMON: [[CLANG:".*clang.*"]] "-cc1" "-mllvm" "--amdhsa-code-object-version=4" "-triple" "amdgcn-amd-amdhsa" +// COMMON: [[CLANG:".*clang.*"]] "-cc1" "-mllvm" "--amdhsa-code-object-version={{[0-9]+}}" "-triple" "amdgcn-amd-amdhsa" // COMMON-SAME: "-aux-triple" "x86_64-unknown-linux-gnu" // EMITBC-SAME: "-emit-llvm-bc" // EMITLL-SAME: "-emit-llvm" @@ -59,7 +59,7 @@ // EMITLL-SAME: {{.*}} "-o" {{"a.*ll"}} "-x" "hip" // CHECK-SAME: {{.*}} {{".*a.cu"}} -// COMMON: [[CLANG]] "-cc1" "-mllvm" "--amdhsa-code-object-version=4" "-triple" "amdgcn-amd-amdhsa" +// COMMON: [[CLANG]] "-cc1" "-mllvm" "--amdhsa-code-object-version={{[0-9]+}}" "-triple" "amdgcn-amd-amdhsa" // COMMON-SAME: "-aux-triple" "x86_64-unknown-linux-gnu" // EMITBC-SAME: "-emit-llvm-bc" // EMITLL-SAME: "-emit-llvm" @@ -71,7 +71,7 @@ // EMITLL-SAME: {{.*}} "-o" {{"a.*ll"}} "-x" "hip" // COMMON-SAME: {{.*}} {{".*a.cu"}} -// COMMON: [[CLANG]] "-cc1" "-mllvm" "--amdhsa-code-object-version=4" "-triple" "amdgcn-amd-amdhsa" +// COMMON: [[CLANG]] "-cc1" "-mllvm" "--amdhsa-code-object-version={{[0-9]+}}" "-triple" "amdgcn-amd-amdhsa" // COMMON-SAME: "-aux-triple" "x86_64-unknown-linux-gnu" // EMITBC-SAME: "-emit-llvm-bc" // EMITLL-SAME: "-emit-llvm" @@ -83,7 +83,7 @@ // EMITLL-SAME: {{.*}} "-o" {{"b.*ll"}} "-x" "hip" // COMMON-SAME: {{.*}} {{".*b.hip"}} -// COMMON: [[CLANG]] "-cc1" "-mllvm" "--amdhsa-code-object-version=4" "-triple" "amdgcn-amd-amdhsa" +// COMMON: [[CLANG]] "-cc1" "-mllvm" "--amdhsa-code-object-version={{[0-9]+}}" "-triple" "amdgcn-amd-amdhsa" // COMMON-SAME: "-aux-triple" "x86_64-unknown-linux-gnu" // EMITBC-SAME: "-emit-llvm-bc" // EMITLL-SAME: "-emit-llvm" @@ -95,54 +95,54 @@ // EMITLL-SAME: {{.*}} "-o" {{"b.*ll"}} "-x" "hip" // COMMON-SAME: {{.*}} {{".*b.hip"}} -// SAVETEMP: [[CLANG:".*clang.*"]] "-cc1" "-mllvm" "--amdhsa-code-object-version=4" "-triple" "amdgcn-amd-amdhsa" "-aux-triple" "x86_64-unknown-linux-gnu" +// SAVETEMP: [[CLANG:".*clang.*"]] "-cc1" "-mllvm" "--amdhsa-code-object-version={{[0-9]+}}" "-triple" "amdgcn-amd-amdhsa" "-aux-triple" "x86_64-unknown-linux-gnu" // SAVETEMP-SAME: "-E" // SAVETEMP-SAME: {{.*}} "-main-file-name" "a.cu" {{.*}} "-target-cpu" "gfx803" // SAVETEMP-SAME: {{.*}} "-o" [[A_GFX803_CUI:"a.*cui"]] "-x" "hip" {{".*a.cu"}} -// SAVETEMP-NEXT: [[CLANG]] "-cc1" "-mllvm" "--amdhsa-code-object-version=4" "-triple" "amdgcn-amd-amdhsa" "-aux-triple" "x86_64-unknown-linux-gnu" +// SAVETEMP-NEXT: [[CLANG]] "-cc1" "-mllvm" "--amdhsa-code-object-version={{[0-9]+}}" "-triple" "amdgcn-amd-amdhsa" "-aux-triple" "x86_64-unknown-linux-gnu" // SAVETEMP-SAME: "-emit-llvm-bc" // SAVETEMP-SAME: {{.*}} "-main-file-name" "a.cu" {{.*}} "-target-cpu" "gfx803" // SAVETEMP-SAME: {{.*}} "-o" [[A_GFX803_TMP_BC:"a.*tmp.bc"]] "-x" "hip-cpp-output" [[A_GFX803_CUI]] -// SAVETEMP-NEXT: [[CLANG]] "-cc1" "-mllvm" "--amdhsa-code-object-version=4" "-triple" "amdgcn-amd-amdhsa" "-aux-triple" "x86_64-unknown-linux-gnu" +// SAVETEMP-NEXT: [[CLANG]] "-cc1" "-mllvm" "--amdhsa-code-object-version={{[0-9]+}}" "-triple" "amdgcn-amd-amdhsa" "-aux-triple" "x86_64-unknown-linux-gnu" // SAVETEMP-SAME: "-emit-llvm" // SAVETEMP-SAME: {{.*}} "-main-file-name" "a.cu" {{.*}} "-target-cpu" "gfx803" // SAVETEMP-SAME: {{.*}} "-o" {{"a.*.ll"}} "-x" "ir" [[A_GFX803_TMP_BC]] -// SAVETEMP: [[CLANG]] "-cc1" "-mllvm" "--amdhsa-code-object-version=4" "-triple" "amdgcn-amd-amdhsa" "-aux-triple" "x86_64-unknown-linux-gnu" +// SAVETEMP: [[CLANG]] "-cc1" "-mllvm" "--amdhsa-code-object-version={{[0-9]+}}" "-triple" "amdgcn-amd-amdhsa" "-aux-triple" "x86_64-unknown-linux-gnu" // SAVETEMP-SAME: "-E" // SAVETEMP-SAME: {{.*}} "-main-file-name" "a.cu" {{.*}} "-target-cpu" "gfx900" // SAVETEMP-SAME: {{.*}} "-o" [[A_GFX900_CUI:"a.*cui"]] "-x" "hip" {{".*a.cu"}} -// SAVETEMP-NEXT: [[CLANG]] "-cc1" "-mllvm" "--amdhsa-code-object-version=4" "-triple" "amdgcn-amd-amdhsa" "-aux-triple" "x86_64-unknown-linux-gnu" +// SAVETEMP-NEXT: [[CLANG]] "-cc1" "-mllvm" "--amdhsa-code-object-version={{[0-9]+}}" "-triple" "amdgcn-amd-amdhsa" "-aux-triple" "x86_64-unknown-linux-gnu" // SAVETEMP-SAME: "-emit-llvm-bc" // SAVETEMP-SAME: {{.*}} "-main-file-name" "a.cu" {{.*}} "-target-cpu" "gfx900" // SAVETEMP-SAME: {{.*}} "-o" [[A_GFX900_TMP_BC:"a.*tmp.bc"]] "-x" "hip-cpp-output" [[A_GFX900_CUI]] -// SAVETEMP-NEXT: [[CLANG]] "-cc1" "-mllvm" "--amdhsa-code-object-version=4" "-triple" "amdgcn-amd-amdhsa" "-aux-triple" "x86_64-unknown-linux-gnu" +// SAVETEMP-NEXT: [[CLANG]] "-cc1" "-mllvm" "--amdhsa-code-object-version={{[0-9]+}}" "-triple" "amdgcn-amd-amdhsa" "-aux-triple" "x86_64-unknown-linux-gnu" // SAVETEMP-SAME: "-emit-llvm" // SAVETEMP-SAME: {{.*}} "-main-file-name" "a.cu" {{.*}} "-target-cpu" "gfx900" // SAVETEMP-SAME: {{.*}} "-o" {{"a.*.ll"}} "-x" "ir" [[A_GFX900_TMP_BC]] -// SAVETEMP: [[CLANG]] "-cc1" "-mllvm" "--amdhsa-code-object-version=4" "-triple" "amdgcn-amd-amdhsa" "-aux-triple" "x86_64-unknown-linux-gnu" +// SAVETEMP: [[CLANG]] "-cc1" "-mllvm" "--amdhsa-code-object-version={{[0-9]+}}" "-triple" "amdgcn-amd-amdhsa" "-aux-triple" "x86_64-unknown-linux-gnu" // SAVETEMP-SAME: "-E" // SAVETEMP-SAME: {{.*}} "-main-file-name" "b.hip" {{.*}} "-target-cpu" "gfx803" // SAVETEMP-SAME: {{.*}} "-o" [[B_GFX803_CUI:"b.*cui"]] "-x" "hip" {{".*b.hip"}} -// SAVETEMP-NEXT: [[CLANG]] "-cc1" "-mllvm" "--amdhsa-code-object-version=4" "-triple" "amdgcn-amd-amdhsa" "-aux-triple" "x86_64-unknown-linux-gnu" +// SAVETEMP-NEXT: [[CLANG]] "-cc1" "-mllvm" "--amdhsa-code-object-version={{[0-9]+}}" "-triple" "amdgcn-amd-amdhsa" "-aux-triple" "x86_64-unknown-linux-gnu" // SAVETEMP-SAME: "-emit-llvm-bc" // SAVETEMP-SAME: {{.*}} "-main-file-name" "b.hip" {{.*}} "-target-cpu" "gfx803" // SAVETEMP-SAME: {{.*}} "-o" [[B_GFX803_TMP_BC:"b.*tmp.bc"]] "-x" "hip-cpp-output" [[B_GFX803_CUI]] -// SAVETEMP-NEXT: [[CLANG]] "-cc1" "-mllvm" "--amdhsa-code-object-version=4" "-triple" "amdgcn-amd-amdhsa" "-aux-triple" "x86_64-unknown-linux-gnu" +// SAVETEMP-NEXT: [[CLANG]] "-cc1" "-mllvm" "--amdhsa-code-object-version={{[0-9]+}}" "-triple" "amdgcn-amd-amdhsa" "-aux-triple" "x86_64-unknown-linux-gnu" // SAVETEMP-SAME: "-emit-llvm" // SAVETEMP-SAME: {{.*}} "-main-file-name" "b.hip" {{.*}} "-target-cpu" "gfx803" // SAVETEMP-SAME: {{.*}} "-o" {{"b.*.ll"}} "-x" "ir" [[B_GFX803_TMP_BC]] -// SAVETEMP: [[CLANG]] "-cc1" "-mllvm" "--amdhsa-code-object-version=4" "-triple" "amdgcn-amd-amdhsa" "-aux-triple" "x86_64-unknown-linux-gnu" +// SAVETEMP: [[CLANG]] "-cc1" "-mllvm" "--amdhsa-code-object-version={{[0-9]+}}" "-triple" "amdgcn-amd-amdhsa" "-aux-triple" "x86_64-unknown-linux-gnu" // SAVETEMP-SAME: "-E" // SAVETEMP-SAME: {{.*}} "-main-file-name" "b.hip" {{.*}} "-target-cpu" "gfx900" // SAVETEMP-SAME: {{.*}} "-o" [[B_GFX900_CUI:"b.*cui"]] "-x" "hip" {{".*b.hip"}} -// SAVETEMP-NEXT: [[CLANG]] "-cc1" "-mllvm" "--amdhsa-code-object-version=4" "-triple" "amdgcn-amd-amdhsa" "-aux-triple" "x86_64-unknown-linux-gnu" +// SAVETEMP-NEXT: [[CLANG]] "-cc1" "-mllvm" "--amdhsa-code-object-version={{[0-9]+}}" "-triple" "amdgcn-amd-amdhsa" "-aux-triple" "x86_64-unknown-linux-gnu" // SAVETEMP-SAME: "-emit-llvm-bc" // SAVETEMP-SAME: {{.*}} "-main-file-name" "b.hip" {{.*}} "-target-cpu" "gfx900" // SAVETEMP-SAME: {{.*}} "-o" [[B_GFX900_TMP_BC:"b.*tmp.bc"]] "-x" "hip-cpp-output" [[B_GFX900_CUI]] -// SAVETEMP-NEXT: [[CLANG]] "-cc1" "-mllvm" "--amdhsa-code-object-version=4" "-triple" "amdgcn-amd-amdhsa" "-aux-triple" "x86_64-unknown-linux-gnu" +// SAVETEMP-NEXT: [[CLANG]] "-cc1" "-mllvm" "--amdhsa-code-object-version={{[0-9]+}}" "-triple" "amdgcn-amd-amdhsa" "-aux-triple" "x86_64-unknown-linux-gnu" // SAVETEMP-SAME: "-emit-llvm" // SAVETEMP-SAME: {{.*}} "-main-file-name" "b.hip" {{.*}} "-target-cpu" "gfx900" // SAVETEMP-SAME: {{.*}} "-o" {{"b.*.ll"}} "-x" "ir" [[B_GFX900_TMP_BC]] diff --git a/clang/test/Driver/hip-target-id.hip b/clang/test/Driver/hip-target-id.hip index 9f36b47d63ebc..aee44a1dcd1c0 100644 --- a/clang/test/Driver/hip-target-id.hip +++ b/clang/test/Driver/hip-target-id.hip @@ -25,12 +25,12 @@ // RUN: -fgpu-rdc \ // RUN: %s 2>&1 | FileCheck --check-prefixes=CHECK %s -// CHECK: [[CLANG:"[^"]*clang[^"]*"]] "-cc1" "-mllvm" "--amdhsa-code-object-version=4" "-triple" "amdgcn-amd-amdhsa" +// CHECK: [[CLANG:"[^"]*clang[^"]*"]] "-cc1" "-mllvm" "--amdhsa-code-object-version={{[0-9]+}}" "-triple" "amdgcn-amd-amdhsa" // CHECK-SAME: "-target-cpu" "gfx908" // CHECK-SAME: "-target-feature" "+sramecc" // CHECK-SAME: "-target-feature" "+xnack" -// TMP: [[CLANG:"[^"]*clang[^"]*"]] "-cc1as" "-mllvm" "--amdhsa-code-object-version=4" "-triple" "amdgcn-amd-amdhsa" +// TMP: [[CLANG:"[^"]*clang[^"]*"]] "-cc1as" "-mllvm" "--amdhsa-code-object-version={{[0-9]+}}" "-triple" "amdgcn-amd-amdhsa" // TMP-SAME: "-target-cpu" "gfx908" // TMP-SAME: "-target-feature" "+sramecc" // TMP-SAME: "-target-feature" "+xnack" @@ -38,7 +38,7 @@ // CHECK: [[LLD:"[^"]*lld[^"]*"]] {{.*}} "-plugin-opt=mcpu=gfx908" // CHECK-SAME: "-plugin-opt=-mattr=+sramecc,+xnack" -// CHECK: [[CLANG]] "-cc1" "-mllvm" "--amdhsa-code-object-version=4" "-triple" "amdgcn-amd-amdhsa" +// CHECK: [[CLANG]] "-cc1" "-mllvm" "--amdhsa-code-object-version={{[0-9]+}}" "-triple" "amdgcn-amd-amdhsa" // CHECK-SAME: "-target-cpu" "gfx908" // CHECK-SAME: "-target-feature" "-sramecc" // CHECK-SAME: "-target-feature" "+xnack" diff --git a/clang/test/Driver/hip-toolchain-mllvm.hip b/clang/test/Driver/hip-toolchain-mllvm.hip index 5ea6bb36179e2..4298b787fe94e 100644 --- a/clang/test/Driver/hip-toolchain-mllvm.hip +++ b/clang/test/Driver/hip-toolchain-mllvm.hip @@ -12,7 +12,7 @@ // RUN: -fgpu-rdc -mllvm -amdgpu-function-calls=0 \ // RUN: %s 2>&1 | FileCheck -check-prefixes=CHECK,RDC %s -// CHECK: [[CLANG:".*clang.*"]] "-cc1" "-mllvm" "--amdhsa-code-object-version=4" "-triple" "amdgcn-amd-amdhsa" +// CHECK: [[CLANG:".*clang.*"]] "-cc1" "-mllvm" "--amdhsa-code-object-version={{[0-9]+}}" "-triple" "amdgcn-amd-amdhsa" // CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu" // CHECK-SAME: {{.*}} "-target-cpu" "gfx803" // CHECK-SAME: {{.*}} "-mllvm" "-amdgpu-function-calls=0" {{.*}} @@ -21,7 +21,7 @@ // CHECK-NOT: {{".*llc"}} // RDC: [[LLD:".*lld.*"]] {{.*}} "-plugin-opt=-amdgpu-function-calls=0" -// CHECK: [[CLANG]] "-cc1" "-mllvm" "--amdhsa-code-object-version=4" "-triple" "amdgcn-amd-amdhsa" +// CHECK: [[CLANG]] "-cc1" "-mllvm" "--amdhsa-code-object-version={{[0-9]+}}" "-triple" "amdgcn-amd-amdhsa" // CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu" // CHECK-SAME: {{.*}} "-target-cpu" "gfx900" // CHECK-SAME: {{.*}} "-mllvm" "-amdgpu-function-calls=0" {{.*}} diff --git a/clang/test/Driver/hip-toolchain-no-rdc.hip b/clang/test/Driver/hip-toolchain-no-rdc.hip index 3cda314a8569e..659ae25c418ee 100644 --- a/clang/test/Driver/hip-toolchain-no-rdc.hip +++ b/clang/test/Driver/hip-toolchain-no-rdc.hip @@ -34,7 +34,7 @@ // Compile device code in a.cu to code object for gfx803. // -// CHECK: [[CLANG:".*clang.*"]] "-cc1" "-mllvm" "--amdhsa-code-object-version=4" "-triple" "amdgcn-amd-amdhsa" +// CHECK: [[CLANG:".*clang.*"]] "-cc1" "-mllvm" "--amdhsa-code-object-version={{[0-9]+}}" "-triple" "amdgcn-amd-amdhsa" // CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu" // CHECK-SAME: "-emit-obj" // CHECK-SAME: {{.*}} "-main-file-name" "a.cu" @@ -57,7 +57,7 @@ // Compile device code in a.cu to code object for gfx900. // -// CHECK: [[CLANG:".*clang.*"]] "-cc1" "-mllvm" "--amdhsa-code-object-version=4" "-triple" "amdgcn-amd-amdhsa" +// CHECK: [[CLANG:".*clang.*"]] "-cc1" "-mllvm" "--amdhsa-code-object-version={{[0-9]+}}" "-triple" "amdgcn-amd-amdhsa" // CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu" // CHECK-SAME: "-emit-obj" // CHECK-SAME: {{.*}} "-main-file-name" "a.cu" @@ -97,7 +97,7 @@ // Compile device code in b.hip to code object for gfx803. // -// CHECK: [[CLANG:".*clang.*"]] "-cc1" "-mllvm" "--amdhsa-code-object-version=4" "-triple" "amdgcn-amd-amdhsa" +// CHECK: [[CLANG:".*clang.*"]] "-cc1" "-mllvm" "--amdhsa-code-object-version={{[0-9]+}}" "-triple" "amdgcn-amd-amdhsa" // CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu" // CHECK-SAME: "-emit-obj" // CHECK-SAME: {{.*}} "-main-file-name" "b.hip" @@ -120,7 +120,7 @@ // Compile device code in b.hip to code object for gfx900. // -// CHECK: [[CLANG:".*clang.*"]] "-cc1" "-mllvm" "--amdhsa-code-object-version=4" "-triple" "amdgcn-amd-amdhsa" +// CHECK: [[CLANG:".*clang.*"]] "-cc1" "-mllvm" "--amdhsa-code-object-version={{[0-9]+}}" "-triple" "amdgcn-amd-amdhsa" // CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu" // CHECK-SAME: "-emit-obj" // CHECK-SAME: {{.*}} "-main-file-name" "b.hip" diff --git a/clang/test/Driver/hip-toolchain-opt.hip b/clang/test/Driver/hip-toolchain-opt.hip index e027ebc40ea3e..a43456f1d5e36 100644 --- a/clang/test/Driver/hip-toolchain-opt.hip +++ b/clang/test/Driver/hip-toolchain-opt.hip @@ -58,7 +58,7 @@ // RUN: %S/Inputs/hip_multiple_inputs/b.hip \ // RUN: 2>&1 | FileCheck --check-prefixes=ALL,Og %s -// ALL: "{{.*}}clang{{.*}}" "-cc1" "-mllvm" "--amdhsa-code-object-version=4" "-triple" "amdgcn-amd-amdhsa" +// ALL: "{{.*}}clang{{.*}}" "-cc1" "-mllvm" "--amdhsa-code-object-version={{[0-9]+}}" "-triple" "amdgcn-amd-amdhsa" // DEFAULT-NOT: "-O{{.}}" // O0-SAME: "-O0" // O1-SAME: "-O1" diff --git a/clang/test/Driver/hip-toolchain-rdc-separate.hip b/clang/test/Driver/hip-toolchain-rdc-separate.hip index 96493c8627d1b..5cf1b4f26850b 100644 --- a/clang/test/Driver/hip-toolchain-rdc-separate.hip +++ b/clang/test/Driver/hip-toolchain-rdc-separate.hip @@ -12,7 +12,7 @@ // RUN: %S/Inputs/hip_multiple_inputs/b.hip \ // RUN: 2>&1 | FileCheck %s -// CHECK: [[CLANG:".*clang.*"]] "-cc1" "-mllvm" "--amdhsa-code-object-version=4" "-triple" "amdgcn-amd-amdhsa" +// CHECK: [[CLANG:".*clang.*"]] "-cc1" "-mllvm" "--amdhsa-code-object-version={{[0-9]+}}" "-triple" "amdgcn-amd-amdhsa" // CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu" // CHECK-SAME: "-emit-llvm-bc" // CHECK-SAME: {{.*}} "-main-file-name" "a.cu" @@ -23,7 +23,7 @@ // CHECK-SAME: {{.*}} "-o" "[[A_BC1:.*bc]]" "-x" "hip" // CHECK-SAME: {{.*}} [[A_SRC:".*a.cu"]] -// CHECK: [[CLANG]] "-cc1" "-mllvm" "--amdhsa-code-object-version=4" "-triple" "amdgcn-amd-amdhsa" +// CHECK: [[CLANG]] "-cc1" "-mllvm" "--amdhsa-code-object-version={{[0-9]+}}" "-triple" "amdgcn-amd-amdhsa" // CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu" // CHECK-SAME: "-emit-llvm-bc" // CHECK-SAME: {{.*}} "-main-file-name" "a.cu" @@ -44,7 +44,7 @@ // CHECK-SAME: "-targets=hip-amdgcn-amd-amdhsa-gfx803,hip-amdgcn-amd-amdhsa-gfx900,host-x86_64-unknown-linux-gnu" // CHECK-SAME: "-outputs=[[A_O:.*a.o]]" "-inputs=[[A_BC1]],[[A_BC2]],[[A_OBJ_HOST]]" -// CHECK: [[CLANG]] "-cc1" "-mllvm" "--amdhsa-code-object-version=4" "-triple" "amdgcn-amd-amdhsa" +// CHECK: [[CLANG]] "-cc1" "-mllvm" "--amdhsa-code-object-version={{[0-9]+}}" "-triple" "amdgcn-amd-amdhsa" // CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu" // CHECK-SAME: "-emit-llvm-bc" // CHECK-SAME: {{.*}} "-main-file-name" "b.hip" @@ -55,7 +55,7 @@ // CHECK-SAME: {{.*}} "-o" "[[B_BC1:.*bc]]" "-x" "hip" // CHECK-SAME: {{.*}} [[B_SRC:".*b.hip"]] -// CHECK: [[CLANG]] "-cc1" "-mllvm" "--amdhsa-code-object-version=4" "-triple" "amdgcn-amd-amdhsa" +// CHECK: [[CLANG]] "-cc1" "-mllvm" "--amdhsa-code-object-version={{[0-9]+}}" "-triple" "amdgcn-amd-amdhsa" // CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu" // CHECK-SAME: "-emit-llvm-bc" // CHECK-SAME: {{.*}} "-main-file-name" "b.hip" diff --git a/clang/test/Driver/hip-toolchain-rdc-static-lib.hip b/clang/test/Driver/hip-toolchain-rdc-static-lib.hip index 630cb2f059629..b11cb88a64285 100644 --- a/clang/test/Driver/hip-toolchain-rdc-static-lib.hip +++ b/clang/test/Driver/hip-toolchain-rdc-static-lib.hip @@ -26,7 +26,7 @@ // CHECK-SAME: {{.*}} [[B_SRC:".*b.hip"]] // generate image for device side path on gfx803 -// CHECK: [[CLANG]] "-cc1" "-mllvm" "--amdhsa-code-object-version=4" "-triple" "amdgcn-amd-amdhsa" +// CHECK: [[CLANG]] "-cc1" "-mllvm" "--amdhsa-code-object-version={{[0-9]+}}" "-triple" "amdgcn-amd-amdhsa" // CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu" // CHECK-SAME: "-emit-llvm-bc" // CHECK-SAME: {{.*}} "-main-file-name" "a.cu" @@ -35,7 +35,7 @@ // CHECK-SAME: {{.*}} "-o" [[A_BC1:".*bc"]] "-x" "hip" // CHECK-SAME: {{.*}} [[A_SRC]] -// CHECK: [[CLANG]] "-cc1" "-mllvm" "--amdhsa-code-object-version=4" "-triple" "amdgcn-amd-amdhsa" +// CHECK: [[CLANG]] "-cc1" "-mllvm" "--amdhsa-code-object-version={{[0-9]+}}" "-triple" "amdgcn-amd-amdhsa" // CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu" // CHECK-SAME: "-emit-llvm-bc" // CHECK-SAME: {{.*}} "-main-file-name" "b.hip" @@ -52,7 +52,7 @@ // CHECK-SAME: "-o" "[[IMG_DEV1:.*out]]" [[A_BC1]] [[B_BC1]] // generate image for device side path on gfx900 -// CHECK: [[CLANG]] "-cc1" "-mllvm" "--amdhsa-code-object-version=4" "-triple" "amdgcn-amd-amdhsa" +// CHECK: [[CLANG]] "-cc1" "-mllvm" "--amdhsa-code-object-version={{[0-9]+}}" "-triple" "amdgcn-amd-amdhsa" // CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu" // CHECK-SAME: "-emit-llvm-bc" // CHECK-SAME: {{.*}} "-main-file-name" "a.cu" @@ -61,7 +61,7 @@ // CHECK-SAME: {{.*}} "-o" [[A_BC2:".*bc"]] "-x" "hip" // CHECK-SAME: {{.*}} [[A_SRC]] -// CHECK: [[CLANG]] "-cc1" "-mllvm" "--amdhsa-code-object-version=4" "-triple" "amdgcn-amd-amdhsa" +// CHECK: [[CLANG]] "-cc1" "-mllvm" "--amdhsa-code-object-version={{[0-9]+}}" "-triple" "amdgcn-amd-amdhsa" // CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu" // CHECK-SAME: "-emit-llvm-bc" // CHECK-SAME: {{.*}} "-main-file-name" "b.hip" diff --git a/clang/test/Driver/hip-toolchain-rdc.hip b/clang/test/Driver/hip-toolchain-rdc.hip index 826554f4feca3..7a8f29afda3b5 100644 --- a/clang/test/Driver/hip-toolchain-rdc.hip +++ b/clang/test/Driver/hip-toolchain-rdc.hip @@ -32,7 +32,7 @@ // CHECK-SAME: {{.*}} [[B_SRC:".*b.hip"]] // generate image for device side path on gfx803 -// CHECK: [[CLANG]] "-cc1" "-mllvm" "--amdhsa-code-object-version=4" "-triple" "amdgcn-amd-amdhsa" +// CHECK: [[CLANG]] "-cc1" "-mllvm" "--amdhsa-code-object-version={{[0-9]+}}" "-triple" "amdgcn-amd-amdhsa" // CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu" // CHECK-SAME: "-emit-llvm-bc" // CHECK-SAME: {{.*}} "-main-file-name" "a.cu" @@ -43,7 +43,7 @@ // CHECK-SAME: {{.*}} "-o" [[A_BC1:".*bc"]] "-x" "hip" // CHECK-SAME: {{.*}} [[A_SRC]] -// CHECK: [[CLANG]] "-cc1" "-mllvm" "--amdhsa-code-object-version=4" "-triple" "amdgcn-amd-amdhsa" +// CHECK: [[CLANG]] "-cc1" "-mllvm" "--amdhsa-code-object-version={{[0-9]+}}" "-triple" "amdgcn-amd-amdhsa" // CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu" // CHECK-SAME: "-emit-llvm-bc" // CHECK-SAME: {{.*}} "-main-file-name" "b.hip" @@ -62,7 +62,7 @@ // CHECK-SAME: "-o" "[[IMG_DEV1:.*.out]]" [[A_BC1]] [[B_BC1]] // generate image for device side path on gfx900 -// CHECK: [[CLANG]] "-cc1" "-mllvm" "--amdhsa-code-object-version=4" "-triple" "amdgcn-amd-amdhsa" +// CHECK: [[CLANG]] "-cc1" "-mllvm" "--amdhsa-code-object-version={{[0-9]+}}" "-triple" "amdgcn-amd-amdhsa" // CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu" // CHECK-SAME: "-emit-llvm-bc" // CHECK-SAME: {{.*}} "-main-file-name" "a.cu" @@ -72,7 +72,7 @@ // CHECK-SAME: {{.*}} "-o" [[A_BC2:".*bc"]] "-x" "hip" // CHECK-SAME: {{.*}} [[A_SRC]] -// CHECK: [[CLANG]] "-cc1" "-mllvm" "--amdhsa-code-object-version=4" "-triple" "amdgcn-amd-amdhsa" +// CHECK: [[CLANG]] "-cc1" "-mllvm" "--amdhsa-code-object-version={{[0-9]+}}" "-triple" "amdgcn-amd-amdhsa" // CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu" // CHECK-SAME: "-emit-llvm-bc" // CHECK-SAME: {{.*}} "-main-file-name" "b.hip" From fa3693ad0b34ef1d64f49e3d3dd10865b9fb7a8b Mon Sep 17 00:00:00 2001 From: Whitney Tsang Date: Wed, 16 Dec 2020 17:05:44 +0000 Subject: [PATCH 24/39] [LoopNest] Handle loop-nest passes in LoopPassManager Per http://llvm.org/OpenProjects.html#llvm_loopnest, the goal of this patch (and other following patches) is to create facilities that allow implementing loop nest passes that run on top-level loop nests for the New Pass Manager. This patch extends the functionality of LoopPassManager to handle loop-nest passes by specializing the definition of LoopPassManager that accepts both kinds of passes in addPass. Only loop passes are executed if L is not a top-level one, and both kinds of passes are executed if L is top-level. Currently, loop nest passes should have the following run method: PreservedAnalyses run(LoopNest &, LoopAnalysisManager &, LoopStandardAnalysisResults &, LPMUpdater &); Reviewed By: Whitney, ychen Differential Revision: https://reviews.llvm.org/D87045 --- llvm/include/llvm/Analysis/LoopNestAnalysis.h | 2 + .../llvm/Transforms/Scalar/LoopPassManager.h | 161 +++++++++++++++++- llvm/lib/Analysis/LoopNestAnalysis.cpp | 2 + .../lib/Transforms/Scalar/LoopPassManager.cpp | 148 ++++++++++++---- .../unittests/IR/PassBuilderCallbacksTest.cpp | 152 ++++++++++++++++- .../Transforms/Scalar/LoopPassManagerTest.cpp | 38 +++++ 6 files changed, 452 insertions(+), 51 deletions(-) diff --git a/llvm/include/llvm/Analysis/LoopNestAnalysis.h b/llvm/include/llvm/Analysis/LoopNestAnalysis.h index 792958a312ce3..4d77d735819fd 100644 --- a/llvm/include/llvm/Analysis/LoopNestAnalysis.h +++ b/llvm/include/llvm/Analysis/LoopNestAnalysis.h @@ -128,6 +128,8 @@ class LoopNest { [](const Loop *L) { return L->isLoopSimplifyForm(); }); } + StringRef getName() const { return Loops.front()->getName(); } + protected: const unsigned MaxPerfectDepth; // maximum perfect nesting depth level. LoopVectorTy Loops; // the loops in the nest (in breadth first order). diff --git a/llvm/include/llvm/Transforms/Scalar/LoopPassManager.h b/llvm/include/llvm/Transforms/Scalar/LoopPassManager.h index 4ac12061e79e1..a1f43aa6d4044 100644 --- a/llvm/include/llvm/Transforms/Scalar/LoopPassManager.h +++ b/llvm/include/llvm/Transforms/Scalar/LoopPassManager.h @@ -45,6 +45,7 @@ #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/LoopAnalysisManager.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/LoopNestAnalysis.h" #include "llvm/Analysis/MemorySSA.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" @@ -67,13 +68,136 @@ class LPMUpdater; // See the comments on the definition of the specialization for details on how // it differs from the primary template. template <> -PreservedAnalyses -PassManager::run(Loop &InitialL, LoopAnalysisManager &AM, - LoopStandardAnalysisResults &AnalysisResults, - LPMUpdater &U); -extern template class PassManager; +class PassManager + : public PassInfoMixin< + PassManager> { +private: + template + using HasRunOnLoopT = decltype(std::declval().run( + std::declval(), std::declval(), + std::declval(), + std::declval())); + +public: + /// Construct a pass manager. + /// + /// If \p DebugLogging is true, we'll log our progress to llvm::dbgs(). + explicit PassManager(bool DebugLogging = false) + : DebugLogging(DebugLogging) {} + + // FIXME: These are equivalent to the default move constructor/move + // assignment. However, using = default triggers linker errors due to the + // explicit instantiations below. Find a way to use the default and remove the + // duplicated code here. + PassManager(PassManager &&Arg) + : IsLoopNestPass(std::move(Arg.IsLoopNestPass)), + LoopPasses(std::move(Arg.LoopPasses)), + LoopNestPasses(std::move(Arg.LoopNestPasses)), + DebugLogging(std::move(Arg.DebugLogging)) {} + + PassManager &operator=(PassManager &&RHS) { + IsLoopNestPass = std::move(RHS.IsLoopNestPass); + LoopPasses = std::move(RHS.LoopPasses); + LoopNestPasses = std::move(RHS.LoopNestPasses); + DebugLogging = std::move(RHS.DebugLogging); + return *this; + } + + PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, + LoopStandardAnalysisResults &AR, LPMUpdater &U); + + /// Add either a loop pass or a loop-nest pass to the pass manager. Append \p + /// Pass to the list of loop passes if it has a dedicated \fn run() method for + /// loops and to the list of loop-nest passes if the \fn run() method is for + /// loop-nests instead. Also append whether \p Pass is loop-nest pass or not + /// to the end of \var IsLoopNestPass so we can easily identify the types of + /// passes in the pass manager later. + template + std::enable_if_t::value> + addPass(PassT Pass) { + using LoopPassModelT = + detail::PassModel; + IsLoopNestPass.push_back(false); + LoopPasses.emplace_back(new LoopPassModelT(std::move(Pass))); + } + + template + std::enable_if_t::value> + addPass(PassT Pass) { + using LoopNestPassModelT = + detail::PassModel; + IsLoopNestPass.push_back(true); + LoopNestPasses.emplace_back(new LoopNestPassModelT(std::move(Pass))); + } + + // Specializations of `addPass` for `RepeatedPass`. These are necessary since + // `RepeatedPass` has a templated `run` method that will result in incorrect + // detection of `HasRunOnLoopT`. + template + std::enable_if_t::value> + addPass(RepeatedPass Pass) { + using RepeatedLoopPassModelT = + detail::PassModel, PreservedAnalyses, + LoopAnalysisManager, LoopStandardAnalysisResults &, + LPMUpdater &>; + IsLoopNestPass.push_back(false); + LoopPasses.emplace_back(new RepeatedLoopPassModelT(std::move(Pass))); + } + + template + std::enable_if_t::value> + addPass(RepeatedPass Pass) { + using RepeatedLoopNestPassModelT = + detail::PassModel, PreservedAnalyses, + LoopAnalysisManager, LoopStandardAnalysisResults &, + LPMUpdater &>; + IsLoopNestPass.push_back(true); + LoopNestPasses.emplace_back( + new RepeatedLoopNestPassModelT(std::move(Pass))); + } + + bool isEmpty() const { return LoopPasses.empty() && LoopNestPasses.empty(); } + + static bool isRequired() { return true; } + +protected: + using LoopPassConceptT = + detail::PassConcept; + using LoopNestPassConceptT = + detail::PassConcept; + + // BitVector that identifies whether the passes are loop passes or loop-nest + // passes (true for loop-nest passes). + BitVector IsLoopNestPass; + std::vector> LoopPasses; + std::vector> LoopNestPasses; + + /// Flag indicating whether we should do debug logging. + bool DebugLogging; + + /// Run either a loop pass or a loop-nest pass. Returns `None` if + /// PassInstrumentation's BeforePass returns false. Otherwise, returns the + /// preserved analyses of the pass. + template + Optional + runSinglePass(IRUnitT &IR, PassT &Pass, LoopAnalysisManager &AM, + LoopStandardAnalysisResults &AR, LPMUpdater &U, + PassInstrumentation &PI); + + PreservedAnalyses runWithLoopNestPasses(Loop &L, LoopAnalysisManager &AM, + LoopStandardAnalysisResults &AR, + LPMUpdater &U); + PreservedAnalyses runWithoutLoopNestPasses(Loop &L, LoopAnalysisManager &AM, + LoopStandardAnalysisResults &AR, + LPMUpdater &U); +}; /// The Loop pass manager. /// @@ -223,6 +347,29 @@ class LPMUpdater { : Worklist(Worklist), LAM(LAM) {} }; +template +Optional LoopPassManager::runSinglePass( + IRUnitT &IR, PassT &Pass, LoopAnalysisManager &AM, + LoopStandardAnalysisResults &AR, LPMUpdater &U, PassInstrumentation &PI) { + // Check the PassInstrumentation's BeforePass callbacks before running the + // pass, skip its execution completely if asked to (callback returns false). + if (!PI.runBeforePass(*Pass, IR)) + return None; + + PreservedAnalyses PA; + { + TimeTraceScope TimeScope(Pass->name(), IR.getName()); + PA = Pass->run(IR, AM, AR, U); + } + + // do not pass deleted Loop into the instrumentation + if (U.skipCurrentLoop()) + PI.runAfterPassInvalidated(*Pass, PA); + else + PI.runAfterPass(*Pass, IR, PA); + return PA; +} + /// Adaptor that maps from a function to its loops. /// /// Designed to allow composition of a LoopPass(Manager) and a diff --git a/llvm/lib/Analysis/LoopNestAnalysis.cpp b/llvm/lib/Analysis/LoopNestAnalysis.cpp index 1e322e15f74c4..ef10b7e974614 100644 --- a/llvm/lib/Analysis/LoopNestAnalysis.cpp +++ b/llvm/lib/Analysis/LoopNestAnalysis.cpp @@ -306,6 +306,8 @@ static bool checkLoopsStructure(const Loop &OuterLoop, const Loop &InnerLoop, return true; } +AnalysisKey LoopNestAnalysis::Key; + raw_ostream &llvm::operator<<(raw_ostream &OS, const LoopNest &LN) { OS << "IsPerfect="; if (LN.getMaxPerfectDepth() == LN.getNestDepth()) diff --git a/llvm/lib/Transforms/Scalar/LoopPassManager.cpp b/llvm/lib/Transforms/Scalar/LoopPassManager.cpp index 90e23c88cb846..809f43eb4dd8a 100644 --- a/llvm/lib/Transforms/Scalar/LoopPassManager.cpp +++ b/llvm/lib/Transforms/Scalar/LoopPassManager.cpp @@ -12,58 +12,101 @@ using namespace llvm; -// Explicit template instantiations and specialization defininitions for core -// template typedefs. namespace llvm { -template class PassManager; /// Explicitly specialize the pass manager's run method to handle loop nest /// structure updates. -template <> PreservedAnalyses PassManager::run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U) { - PreservedAnalyses PA = PreservedAnalyses::all(); if (DebugLogging) dbgs() << "Starting Loop pass manager run.\n"; + // Runs loop-nest passes only when the current loop is a top-level one. + PreservedAnalyses PA = (L.isOutermost() && !LoopNestPasses.empty()) + ? runWithLoopNestPasses(L, AM, AR, U) + : runWithoutLoopNestPasses(L, AM, AR, U); + + // Invalidation for the current loop should be handled above, and other loop + // analysis results shouldn't be impacted by runs over this loop. Therefore, + // the remaining analysis results in the AnalysisManager are preserved. We + // mark this with a set so that we don't need to inspect each one + // individually. + // FIXME: This isn't correct! This loop and all nested loops' analyses should + // be preserved, but unrolling should invalidate the parent loop's analyses. + PA.preserveSet>(); + + if (DebugLogging) + dbgs() << "Finished Loop pass manager run.\n"; + + return PA; +} + +// Run both loop passes and loop-nest passes on top-level loop \p L. +PreservedAnalyses +LoopPassManager::runWithLoopNestPasses(Loop &L, LoopAnalysisManager &AM, + LoopStandardAnalysisResults &AR, + LPMUpdater &U) { + assert(L.isOutermost() && + "Loop-nest passes should only run on top-level loops."); + PreservedAnalyses PA = PreservedAnalyses::all(); + // Request PassInstrumentation from analysis manager, will use it to run // instrumenting callbacks for the passes later. PassInstrumentation PI = AM.getResult(L, AR); - for (auto &Pass : Passes) { - // Check the PassInstrumentation's BeforePass callbacks before running the - // pass, skip its execution completely if asked to (callback returns false). - if (!PI.runBeforePass(*Pass, L)) - continue; - PreservedAnalyses PassPA; - { - TimeTraceScope TimeScope(Pass->name(), L.getName()); - PassPA = Pass->run(L, AM, AR, U); + unsigned LoopPassIndex = 0, LoopNestPassIndex = 0; + + // `LoopNestPtr` points to the `LoopNest` object for the current top-level + // loop and `IsLoopNestPtrValid` indicates whether the pointer is still valid. + // The `LoopNest` object will have to be re-constructed if the pointer is + // invalid when encountering a loop-nest pass. + std::unique_ptr LoopNestPtr; + bool IsLoopNestPtrValid = false; + + for (size_t I = 0, E = IsLoopNestPass.size(); I != E; ++I) { + Optional PassPA; + if (!IsLoopNestPass[I]) { + // The `I`-th pass is a loop pass. + auto &Pass = LoopPasses[LoopPassIndex++]; + PassPA = runSinglePass(L, Pass, AM, AR, U, PI); + } else { + // The `I`-th pass is a loop-nest pass. + auto &Pass = LoopNestPasses[LoopNestPassIndex++]; + + // If the loop-nest object calculated before is no longer valid, + // re-calculate it here before running the loop-nest pass. + if (!IsLoopNestPtrValid) { + LoopNestPtr = LoopNest::getLoopNest(L, AR.SE); + IsLoopNestPtrValid = true; + } + PassPA = runSinglePass(*LoopNestPtr, Pass, AM, AR, U, PI); } - // do not pass deleted Loop into the instrumentation - if (U.skipCurrentLoop()) - PI.runAfterPassInvalidated(*Pass, PassPA); - else - PI.runAfterPass(*Pass, L, PassPA); + // `PassPA` is `None` means that the before-pass callbacks in + // `PassInstrumentation` return false. The pass does not run in this case, + // so we can skip the following procedure. + if (!PassPA) + continue; // If the loop was deleted, abort the run and return to the outer walk. if (U.skipCurrentLoop()) { - PA.intersect(std::move(PassPA)); + PA.intersect(std::move(*PassPA)); break; } // Update the analysis manager as each pass runs and potentially // invalidates analyses. - AM.invalidate(L, PassPA); + AM.invalidate(L, *PassPA); // Finally, we intersect the final preserved analyses to compute the // aggregate preserved set for this pass manager. - PA.intersect(std::move(PassPA)); + PA.intersect(std::move(*PassPA)); + + // Check if the current pass preserved the loop-nest object or not. + IsLoopNestPtrValid &= PassPA->getChecker().preserved(); // FIXME: Historically, the pass managers all called the LLVM context's // yield function here. We don't have a generic way to acquire the @@ -71,22 +114,53 @@ PassManager>(); +// Run all loop passes on loop \p L. Loop-nest passes don't run either because +// \p L is not a top-level one or simply because there are no loop-nest passes +// in the pass manager at all. +PreservedAnalyses +LoopPassManager::runWithoutLoopNestPasses(Loop &L, LoopAnalysisManager &AM, + LoopStandardAnalysisResults &AR, + LPMUpdater &U) { + PreservedAnalyses PA = PreservedAnalyses::all(); - if (DebugLogging) - dbgs() << "Finished Loop pass manager run.\n"; + // Request PassInstrumentation from analysis manager, will use it to run + // instrumenting callbacks for the passes later. + PassInstrumentation PI = AM.getResult(L, AR); + for (auto &Pass : LoopPasses) { + Optional PassPA = runSinglePass(L, Pass, AM, AR, U, PI); + // `PassPA` is `None` means that the before-pass callbacks in + // `PassInstrumentation` return false. The pass does not run in this case, + // so we can skip the following procedure. + if (!PassPA) + continue; + + // If the loop was deleted, abort the run and return to the outer walk. + if (U.skipCurrentLoop()) { + PA.intersect(std::move(*PassPA)); + break; + } + + // Update the analysis manager as each pass runs and potentially + // invalidates analyses. + AM.invalidate(L, *PassPA); + + // Finally, we intersect the final preserved analyses to compute the + // aggregate preserved set for this pass manager. + PA.intersect(std::move(*PassPA)); + + // FIXME: Historically, the pass managers all called the LLVM context's + // yield function here. We don't have a generic way to acquire the + // context and it isn't yet clear what the right pattern is for yielding + // in the new pass manager so it is currently omitted. + // ...getContext().yield(); + } return PA; } -} +} // namespace llvm PreservedAnalyses FunctionToLoopPassAdaptor::run(Function &F, FunctionAnalysisManager &AM) { @@ -152,8 +226,10 @@ PreservedAnalyses FunctionToLoopPassAdaptor::run(Function &F, PI.pushBeforeNonSkippedPassCallback([&LAR, &LI](StringRef PassID, Any IR) { if (isSpecialPass(PassID, {"PassManager"})) return; - assert(any_isa(IR)); - const Loop *L = any_cast(IR); + assert(any_isa(IR) || any_isa(IR)); + const Loop *L = any_isa(IR) + ? any_cast(IR) + : &any_cast(IR)->getOutermostLoop(); assert(L && "Loop should be valid for printing"); // Verify the loop structure and LCSSA form before visiting the loop. diff --git a/llvm/unittests/IR/PassBuilderCallbacksTest.cpp b/llvm/unittests/IR/PassBuilderCallbacksTest.cpp index a4366e10bd68f..edd46b8521d6d 100644 --- a/llvm/unittests/IR/PassBuilderCallbacksTest.cpp +++ b/llvm/unittests/IR/PassBuilderCallbacksTest.cpp @@ -174,6 +174,22 @@ struct MockPassHandle MockPassHandle() { setDefaults(); } }; +template <> +struct MockPassHandle + : MockPassHandleBase, LoopNest, + LoopAnalysisManager, LoopStandardAnalysisResults &, + LPMUpdater &> { + MOCK_METHOD4(run, + PreservedAnalyses(LoopNest &, LoopAnalysisManager &, + LoopStandardAnalysisResults &, LPMUpdater &)); + static void invalidateLoopNest(LoopNest &L, LoopAnalysisManager &, + LoopStandardAnalysisResults &, + LPMUpdater &Updater) { + Updater.markLoopAsDeleted(L.getOutermostLoop(), L.getName()); + } + MockPassHandle() { setDefaults(); } +}; + template <> struct MockPassHandle : MockPassHandleBase, Function> { @@ -284,6 +300,8 @@ template <> std::string getName(const llvm::Any &WrappedIR) { return any_cast(WrappedIR)->getName().str(); if (any_isa(WrappedIR)) return any_cast(WrappedIR)->getName().str(); + if (any_isa(WrappedIR)) + return any_cast(WrappedIR)->getName().str(); if (any_isa(WrappedIR)) return any_cast(WrappedIR)->getName(); return ""; @@ -384,6 +402,11 @@ struct MockPassInstrumentationCallbacks { } }; +template +using ExtraMockPassHandle = + std::conditional_t::value, + MockPassHandle, MockPassHandle>; + template class PassBuilderCallbacksTest; /// This test fixture is shared between all the actual tests below and @@ -416,6 +439,8 @@ class PassBuilderCallbacksTest PassHandle; + ExtraMockPassHandle ExtraPassHandle; + MockAnalysisHandle AnalysisHandle; static PreservedAnalyses getAnalysisResult(IRUnitT &U, AnalysisManagerT &AM, @@ -475,6 +500,8 @@ class PassBuilderCallbacksTest::value) + PM.addPass(ExtraPassHandle.getPass()); return true; } return false; @@ -781,6 +808,7 @@ TEST_F(LoopCallbacksTest, Passes) { EXPECT_CALL(AnalysisHandle, run(HasName("loop"), _, _)); EXPECT_CALL(PassHandle, run(HasName("loop"), _, _, _)) .WillOnce(WithArgs<0, 1, 2>(Invoke(getAnalysisResult))); + EXPECT_CALL(ExtraPassHandle, run(HasName("loop"), _, _, _)); StringRef PipelineText = "test-transform"; ASSERT_THAT_ERROR(PB.parsePassPipeline(PM, PipelineText), Succeeded()) @@ -798,6 +826,7 @@ TEST_F(LoopCallbacksTest, InstrumentedPasses) { EXPECT_CALL(AnalysisHandle, run(HasName("loop"), _, _)); EXPECT_CALL(PassHandle, run(HasName("loop"), _, _, _)) .WillOnce(WithArgs<0, 1, 2>(Invoke(getAnalysisResult))); + EXPECT_CALL(ExtraPassHandle, run(HasName("loop"), _, _, _)); // PassInstrumentation calls should happen in-sequence, in the same order // as passes/analyses are scheduled. @@ -821,6 +850,19 @@ TEST_F(LoopCallbacksTest, InstrumentedPasses) { runAfterPass(HasNameRegex("MockPassHandle"), HasName("loop"), _)) .InSequence(PISequence); + EXPECT_CALL(CallbacksHandle, + runBeforePass(HasNameRegex("MockPassHandle<.*LoopNest>"), + HasName("loop"))) + .InSequence(PISequence); + EXPECT_CALL(CallbacksHandle, + runBeforeNonSkippedPass( + HasNameRegex("MockPassHandle<.*LoopNest>"), HasName("loop"))) + .InSequence(PISequence); + EXPECT_CALL(CallbacksHandle, + runAfterPass(HasNameRegex("MockPassHandle<.*LoopNest>"), + HasName("loop"), _)) + .InSequence(PISequence); + // Our mock pass does not invalidate IR. EXPECT_CALL(CallbacksHandle, runAfterPassInvalidated(HasNameRegex("MockPassHandle"), _)) @@ -887,6 +929,77 @@ TEST_F(LoopCallbacksTest, InstrumentedInvalidatingPasses) { PM.run(*M, AM); } +TEST_F(LoopCallbacksTest, InstrumentedInvalidatingLoopNestPasses) { + CallbacksHandle.registerPassInstrumentation(); + // Non-mock instrumentation not specifically mentioned below can be ignored. + CallbacksHandle.ignoreNonMockPassInstrumentation(""); + CallbacksHandle.ignoreNonMockPassInstrumentation("foo"); + CallbacksHandle.ignoreNonMockPassInstrumentation("loop"); + + EXPECT_CALL(AnalysisHandle, run(HasName("loop"), _, _)); + EXPECT_CALL(PassHandle, run(HasName("loop"), _, _, _)) + .WillOnce(WithArgs<0, 1, 2>(Invoke(getAnalysisResult))); + EXPECT_CALL(ExtraPassHandle, run(HasName("loop"), _, _, _)) + .WillOnce(DoAll(Invoke(ExtraPassHandle.invalidateLoopNest), + Invoke([&](LoopNest &, LoopAnalysisManager &, + LoopStandardAnalysisResults &, LPMUpdater &) { + return PreservedAnalyses::all(); + }))); + + // PassInstrumentation calls should happen in-sequence, in the same order + // as passes/analyses are scheduled. + ::testing::Sequence PISequence; + EXPECT_CALL(CallbacksHandle, + runBeforePass(HasNameRegex("MockPassHandle"), HasName("loop"))) + .InSequence(PISequence); + EXPECT_CALL( + CallbacksHandle, + runBeforeNonSkippedPass(HasNameRegex("MockPassHandle"), HasName("loop"))) + .InSequence(PISequence); + EXPECT_CALL( + CallbacksHandle, + runBeforeAnalysis(HasNameRegex("MockAnalysisHandle"), HasName("loop"))) + .InSequence(PISequence); + EXPECT_CALL( + CallbacksHandle, + runAfterAnalysis(HasNameRegex("MockAnalysisHandle"), HasName("loop"))) + .InSequence(PISequence); + EXPECT_CALL(CallbacksHandle, + runAfterPass(HasNameRegex("MockPassHandle"), HasName("loop"), _)) + .InSequence(PISequence); + + EXPECT_CALL(CallbacksHandle, + runBeforePass(HasNameRegex("MockPassHandle<.*LoopNest>"), + HasName("loop"))) + .InSequence(PISequence); + EXPECT_CALL(CallbacksHandle, + runBeforeNonSkippedPass( + HasNameRegex("MockPassHandle<.*LoopNest>"), HasName("loop"))) + .InSequence(PISequence); + EXPECT_CALL( + CallbacksHandle, + runAfterPassInvalidated(HasNameRegex("MockPassHandle<.*LoopNest>"), _)) + .InSequence(PISequence); + + EXPECT_CALL(CallbacksHandle, + runAfterPassInvalidated(HasNameRegex("^PassManager"), _)) + .InSequence(PISequence); + + // Our mock pass invalidates IR, thus normal runAfterPass is never called. + EXPECT_CALL(CallbacksHandle, runAfterPassInvalidated( + HasNameRegex("MockPassHandle<.*Loop>"), _)) + .Times(0); + EXPECT_CALL(CallbacksHandle, + runAfterPass(HasNameRegex("MockPassHandle<.*LoopNest>"), + HasName("loop"), _)) + .Times(0); + + StringRef PipelineText = "test-transform"; + ASSERT_THAT_ERROR(PB.parsePassPipeline(PM, PipelineText), Succeeded()) + << "Pipeline was: " << PipelineText; + PM.run(*M, AM); +} + TEST_F(LoopCallbacksTest, InstrumentedSkippedPasses) { CallbacksHandle.registerPassInstrumentation(); // Non-mock instrumentation run here can safely be ignored. @@ -895,28 +1008,51 @@ TEST_F(LoopCallbacksTest, InstrumentedSkippedPasses) { CallbacksHandle.ignoreNonMockPassInstrumentation("loop"); // Skip the pass by returning false. + EXPECT_CALL( + CallbacksHandle, + runBeforePass(HasNameRegex("MockPassHandle<.*Loop>"), HasName("loop"))) + .WillOnce(Return(false)); + EXPECT_CALL(CallbacksHandle, - runBeforePass(HasNameRegex("MockPassHandle"), HasName("loop"))) + runBeforeSkippedPass(HasNameRegex("MockPassHandle<.*Loop>"), + HasName("loop"))) + .Times(1); + + EXPECT_CALL(CallbacksHandle, + runBeforePass(HasNameRegex("MockPassHandle<.*LoopNest>"), + HasName("loop"))) .WillOnce(Return(false)); - EXPECT_CALL( - CallbacksHandle, - runBeforeSkippedPass(HasNameRegex("MockPassHandle"), HasName("loop"))) + EXPECT_CALL(CallbacksHandle, + runBeforeSkippedPass(HasNameRegex("MockPassHandle<.*LoopNest>"), + HasName("loop"))) .Times(1); EXPECT_CALL(AnalysisHandle, run(HasName("loop"), _, _)).Times(0); EXPECT_CALL(PassHandle, run(HasName("loop"), _, _, _)).Times(0); + EXPECT_CALL(ExtraPassHandle, run(HasName("loop"), _, _, _)).Times(0); // As the pass is skipped there is no afterPass, beforeAnalysis/afterAnalysis // as well. - EXPECT_CALL(CallbacksHandle, - runBeforeNonSkippedPass(HasNameRegex("MockPassHandle"), _)) + EXPECT_CALL(CallbacksHandle, runBeforeNonSkippedPass( + HasNameRegex("MockPassHandle<.*Loop>"), _)) .Times(0); EXPECT_CALL(CallbacksHandle, - runAfterPass(HasNameRegex("MockPassHandle"), _, _)) + runAfterPass(HasNameRegex("MockPassHandle<.*Loop>"), _, _)) + .Times(0); + EXPECT_CALL(CallbacksHandle, runAfterPassInvalidated( + HasNameRegex("MockPassHandle<.*Loop>"), _)) + .Times(0); + EXPECT_CALL( + CallbacksHandle, + runBeforeNonSkippedPass(HasNameRegex("MockPassHandle<.*LoopNest>"), _)) .Times(0); EXPECT_CALL(CallbacksHandle, - runAfterPassInvalidated(HasNameRegex("MockPassHandle"), _)) + runAfterPass(HasNameRegex("MockPassHandle<.*LoopNest>"), _, _)) + .Times(0); + EXPECT_CALL( + CallbacksHandle, + runAfterPassInvalidated(HasNameRegex("MockPassHandle<.*LoopNest>"), _)) .Times(0); EXPECT_CALL(CallbacksHandle, runBeforeAnalysis(HasNameRegex("MockAnalysisHandle"), _)) diff --git a/llvm/unittests/Transforms/Scalar/LoopPassManagerTest.cpp b/llvm/unittests/Transforms/Scalar/LoopPassManagerTest.cpp index c5b3e29d2a78a..fc41bfa00ead6 100644 --- a/llvm/unittests/Transforms/Scalar/LoopPassManagerTest.cpp +++ b/llvm/unittests/Transforms/Scalar/LoopPassManagerTest.cpp @@ -193,6 +193,16 @@ struct MockLoopPassHandle MockLoopPassHandle() { setDefaults(); } }; +struct MockLoopNestPassHandle + : MockPassHandleBase { + MOCK_METHOD4(run, + PreservedAnalyses(LoopNest &, LoopAnalysisManager &, + LoopStandardAnalysisResults &, LPMUpdater &)); + + MockLoopNestPassHandle() { setDefaults(); } +}; + struct MockFunctionPassHandle : MockPassHandleBase { MOCK_METHOD2(run, PreservedAnalyses(Function &, FunctionAnalysisManager &)); @@ -242,6 +252,7 @@ class LoopPassManagerTest : public ::testing::Test { MockLoopAnalysisHandle MLAHandle; MockLoopPassHandle MLPHandle; + MockLoopNestPassHandle MLNPHandle; MockFunctionPassHandle MFPHandle; MockModulePassHandle MMPHandle; @@ -1590,4 +1601,31 @@ TEST_F(LoopPassManagerTest, LoopDeletion) { MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); MPM.run(*M, MAM); } + +TEST_F(LoopPassManagerTest, HandleLoopNestPass) { + ::testing::InSequence MakeExpectationsSequenced; + + EXPECT_CALL(MLPHandle, run(HasName("loop.0.0"), _, _, _)).Times(2); + EXPECT_CALL(MLPHandle, run(HasName("loop.0.1"), _, _, _)).Times(2); + EXPECT_CALL(MLPHandle, run(HasName("loop.0"), _, _, _)); + EXPECT_CALL(MLNPHandle, run(HasName("loop.0"), _, _, _)); + EXPECT_CALL(MLPHandle, run(HasName("loop.0"), _, _, _)); + EXPECT_CALL(MLNPHandle, run(HasName("loop.0"), _, _, _)); + EXPECT_CALL(MLPHandle, run(HasName("loop.g.0"), _, _, _)); + EXPECT_CALL(MLNPHandle, run(HasName("loop.g.0"), _, _, _)); + EXPECT_CALL(MLPHandle, run(HasName("loop.g.0"), _, _, _)); + EXPECT_CALL(MLNPHandle, run(HasName("loop.g.0"), _, _, _)); + + LoopPassManager LPM(true); + LPM.addPass(MLPHandle.getPass()); + LPM.addPass(MLNPHandle.getPass()); + LPM.addPass(MLPHandle.getPass()); + LPM.addPass(MLNPHandle.getPass()); + + ModulePassManager MPM(true); + MPM.addPass(createModuleToFunctionPassAdaptor( + createFunctionToLoopPassAdaptor(std::move(LPM)))); + MPM.run(*M, MAM); } + +} // namespace From 2ea7210e3946b25ea52af061e8ae45d26877507a Mon Sep 17 00:00:00 2001 From: Esme-Yi Date: Wed, 16 Dec 2020 17:12:24 +0000 Subject: [PATCH 25/39] Revert "[PowerPC] Extend folding RLWINM + RLWINM to post-RA." This reverts commit 1c0941e1524f499e3fbde48fc3bdd0e70fc8f2e4. --- llvm/lib/Target/PowerPC/PPCInstrInfo.cpp | 77 ++------- llvm/lib/Target/PowerPC/PPCInstrInfo.h | 3 +- llvm/lib/Target/PowerPC/PPCMIPeephole.cpp | 2 +- .../lib/Target/PowerPC/PPCPreEmitPeephole.cpp | 9 - .../CodeGen/PowerPC/fold-rlwinm-after-ra.mir | 163 ------------------ llvm/test/CodeGen/PowerPC/vsx_builtins.ll | 3 +- 6 files changed, 22 insertions(+), 235 deletions(-) delete mode 100644 llvm/test/CodeGen/PowerPC/fold-rlwinm-after-ra.mir diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp index ed84377ade930..deac690a76118 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -3244,64 +3244,18 @@ bool PPCInstrInfo::convertToImmediateForm(MachineInstr &MI, return false; } -// This function tries to combine two RLWINMs. We not only perform such -// optimization in SSA, but also after RA, since some RLWINM is generated after -// RA. -bool PPCInstrInfo::simplifyRotateAndMaskInstr(MachineInstr &MI, - MachineInstr *&ToErase) const { - bool Is64Bit = false; - switch (MI.getOpcode()) { - case PPC::RLWINM: - case PPC::RLWINM_rec: - break; - case PPC::RLWINM8: - case PPC::RLWINM8_rec: - Is64Bit = true; - break; - default: - return false; - } +bool PPCInstrInfo::combineRLWINM(MachineInstr &MI, + MachineInstr **ToErase) const { MachineRegisterInfo *MRI = &MI.getParent()->getParent()->getRegInfo(); - Register FoldingReg = MI.getOperand(1).getReg(); - MachineInstr *SrcMI = nullptr; - bool CanErase = false; - bool OtherIntermediateUse = true; - if (MRI->isSSA()) { - if (!Register::isVirtualRegister(FoldingReg)) - return false; - SrcMI = MRI->getVRegDef(FoldingReg); - } else { - SrcMI = getDefMIPostRA(FoldingReg, MI, OtherIntermediateUse); - } - if (!SrcMI) + unsigned FoldingReg = MI.getOperand(1).getReg(); + if (!Register::isVirtualRegister(FoldingReg)) return false; - // TODO: The pairs of RLWINM8(RLWINM) or RLWINM(RLWINM8) never occur before - // RA, but after RA. And We can fold RLWINM8(RLWINM) -> RLWINM8, or - // RLWINM(RLWINM8) -> RLWINM. - switch (SrcMI->getOpcode()) { - case PPC::RLWINM: - case PPC::RLWINM_rec: - if (Is64Bit) - return false; - break; - case PPC::RLWINM8: - case PPC::RLWINM8_rec: - if (!Is64Bit) - return false; - break; - default: + MachineInstr *SrcMI = MRI->getVRegDef(FoldingReg); + if (SrcMI->getOpcode() != PPC::RLWINM && + SrcMI->getOpcode() != PPC::RLWINM_rec && + SrcMI->getOpcode() != PPC::RLWINM8 && + SrcMI->getOpcode() != PPC::RLWINM8_rec) return false; - } - if (MRI->isSSA()) { - CanErase = !SrcMI->hasImplicitDef() && MRI->hasOneNonDBGUse(FoldingReg); - } else { - CanErase = !OtherIntermediateUse && MI.getOperand(1).isKill() && - !SrcMI->hasImplicitDef(); - // In post-RA, if SrcMI also defines the register to be forwarded, we can - // only do the folding if SrcMI is going to be erased. - if (!CanErase && SrcMI->definesRegister(SrcMI->getOperand(1).getReg())) - return false; - } assert((MI.getOperand(2).isImm() && MI.getOperand(3).isImm() && MI.getOperand(4).isImm() && SrcMI->getOperand(2).isImm() && SrcMI->getOperand(3).isImm() && SrcMI->getOperand(4).isImm()) && @@ -3312,6 +3266,7 @@ bool PPCInstrInfo::simplifyRotateAndMaskInstr(MachineInstr &MI, uint64_t MBMI = MI.getOperand(3).getImm(); uint64_t MESrc = SrcMI->getOperand(4).getImm(); uint64_t MEMI = MI.getOperand(4).getImm(); + assert((MEMI < 32 && MESrc < 32 && MBMI < 32 && MBSrc < 32) && "Invalid PPC::RLWINM Instruction!"); // If MBMI is bigger than MEMI, we always can not get run of ones. @@ -3355,6 +3310,8 @@ bool PPCInstrInfo::simplifyRotateAndMaskInstr(MachineInstr &MI, // If final mask is 0, MI result should be 0 too. if (FinalMask.isNullValue()) { + bool Is64Bit = + (MI.getOpcode() == PPC::RLWINM8 || MI.getOpcode() == PPC::RLWINM8_rec); Simplified = true; LLVM_DEBUG(dbgs() << "Replace Instr: "); LLVM_DEBUG(MI.dump()); @@ -3412,10 +3369,12 @@ bool PPCInstrInfo::simplifyRotateAndMaskInstr(MachineInstr &MI, LLVM_DEBUG(dbgs() << "To: "); LLVM_DEBUG(MI.dump()); } - if (Simplified && CanErase) { - // If SrcMI has no implicit def, and FoldingReg has no non-debug use or - // its flag is "killed", it's safe to delete SrcMI. Otherwise keep it. - ToErase = SrcMI; + if (Simplified & MRI->use_nodbg_empty(FoldingReg) && + !SrcMI->hasImplicitDef()) { + // If FoldingReg has no non-debug use and it has no implicit def (it + // is not RLWINMO or RLWINM8o), it's safe to delete its def SrcMI. + // Otherwise keep it. + *ToErase = SrcMI; LLVM_DEBUG(dbgs() << "Delete dead instruction: "); LLVM_DEBUG(SrcMI->dump()); } diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/llvm/lib/Target/PowerPC/PPCInstrInfo.h index db63b013a923a..962456e784fae 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.h +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.h @@ -585,8 +585,7 @@ class PPCInstrInfo : public PPCGenInstrInfo { bool convertToImmediateForm(MachineInstr &MI, MachineInstr **KilledDef = nullptr) const; bool foldFrameOffset(MachineInstr &MI) const; - bool simplifyRotateAndMaskInstr(MachineInstr &MI, - MachineInstr *&ToErase) const; + bool combineRLWINM(MachineInstr &MI, MachineInstr **ToErase = nullptr) const; bool isADDIInstrEligibleForFolding(MachineInstr &ADDIMI, int64_t &Imm) const; bool isADDInstrEligibleForFolding(MachineInstr &ADDMI) const; bool isImmInstrEligibleForFolding(MachineInstr &MI, unsigned &BaseReg, diff --git a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp index 307fffae870ca..c28e09fc047eb 100644 --- a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp +++ b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp @@ -987,7 +987,7 @@ bool PPCMIPeephole::simplifyCode(void) { case PPC::RLWINM_rec: case PPC::RLWINM8: case PPC::RLWINM8_rec: { - Simplified = TII->simplifyRotateAndMaskInstr(MI, ToErase); + Simplified = TII->combineRLWINM(MI, &ToErase); if (Simplified) ++NumRotatesCollapsed; break; diff --git a/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp b/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp index f3def378a93c5..04749cdd61c41 100644 --- a/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp +++ b/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp @@ -37,8 +37,6 @@ STATISTIC(NumberOfSelfCopies, "Number of self copy instructions eliminated"); STATISTIC(NumFrameOffFoldInPreEmit, "Number of folding frame offset by using r+r in pre-emit peephole"); -STATISTIC(NumRotateInstrFoldInPreEmit, - "Number of folding Rotate instructions in pre-emit peephole"); static cl::opt EnablePCRelLinkerOpt("ppc-pcrel-linker-opt", cl::Hidden, cl::init(true), @@ -474,13 +472,6 @@ static bool hasPCRelativeForm(MachineInstr &Use) { LLVM_DEBUG(dbgs() << "Frame offset folding by using index form: "); LLVM_DEBUG(MI.dump()); } - MachineInstr *ToErase = nullptr; - if (TII->simplifyRotateAndMaskInstr(MI, ToErase)) { - Changed = true; - NumRotateInstrFoldInPreEmit++; - if (ToErase) - InstrsToErase.push_back(ToErase); - } } // Eliminate conditional branch based on a constant CR bit by diff --git a/llvm/test/CodeGen/PowerPC/fold-rlwinm-after-ra.mir b/llvm/test/CodeGen/PowerPC/fold-rlwinm-after-ra.mir deleted file mode 100644 index 182929053c63e..0000000000000 --- a/llvm/test/CodeGen/PowerPC/fold-rlwinm-after-ra.mir +++ /dev/null @@ -1,163 +0,0 @@ -# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-unknown -stop-after \ -# RUN: ppc-pre-emit-peephole %s -o - | FileCheck %s - ---- -name: testFoldRLWINM -tracksRegLiveness: true -body: | - bb.0.entry: - liveins: $r3 - ; CHECK-LABEL: name: testFoldRLWINM - ; CHECK: liveins: $r3 - ; CHECK: renamable $r3 = RLWINM killed renamable $r3, 14, 0, 12, implicit-def $x3 - ; CHECK: BLR8 implicit $lr8, implicit $rm, implicit killed $x3 - $r3 = RLWINM killed $r3, 27, 5, 31 - dead renamable $r3 = RLWINM killed renamable $r3, 19, 0, 12, implicit-def $x3 - BLR8 implicit $lr8, implicit $rm, implicit killed $x3 -... ---- -name: testFoldRLWINMSrcFullMask1 -tracksRegLiveness: true -body: | - bb.0.entry: - liveins: $r3 - ; CHECK-LABEL: name: testFoldRLWINMSrcFullMask1 - ; CHECK: liveins: $r3 - ; CHECK: renamable $r3 = RLWINM killed renamable $r3, 14, 0, 12, implicit-def $x3 - ; CHECK: BLR8 implicit $lr8, implicit $rm, implicit killed $x3 - $r3 = RLWINM killed $r3, 27, 0, 31 - dead renamable $r3 = RLWINM killed renamable $r3, 19, 0, 12, implicit-def $x3 - BLR8 implicit $lr8, implicit $rm, implicit killed $x3 -... ---- -name: testFoldRLWINMSrcFullMask2 -tracksRegLiveness: true -body: | - bb.0.entry: - liveins: $r2, $r3 - ; CHECK-LABEL: name: testFoldRLWINMSrcFullMask2 - ; CHECK: liveins: $r2, $r3 - ; CHECK: renamable $r3 = RLWINM $r2, 14, 10, 1, implicit-def $x3 - ; CHECK: BLR8 implicit $lr8, implicit $rm, implicit killed $x3 - $r3 = RLWINM killed $r2, 27, 10, 9 - dead renamable $r3 = RLWINM killed renamable $r3, 19, 10, 1, implicit-def $x3 - BLR8 implicit $lr8, implicit $rm, implicit killed $x3 -... ---- -name: testFoldRLWINMSrcWrapped -tracksRegLiveness: true -body: | - bb.0.entry: - liveins: $r3 - ; CHECK-LABEL: name: testFoldRLWINMSrcWrapped - ; CHECK: liveins: $r3 - ; CHECK: renamable $r3 = RLWINM killed renamable $r3, 14, 11, 12, implicit-def $x3 - ; CHECK: BLR8 implicit $lr8, implicit $rm, implicit killed $x3 - $r3 = RLWINM killed $r3, 27, 30, 10 - dead renamable $r3 = RLWINM killed renamable $r3, 19, 0, 12, implicit-def $x3 - BLR8 implicit $lr8, implicit $rm, implicit killed $x3 -... ---- -name: testFoldRLWINMUserWrapped -tracksRegLiveness: true -body: | - bb.0.entry: - liveins: $r3 - ; CHECK-LABEL: name: testFoldRLWINMUserWrapped - ; CHECK: liveins: $r3 - ; CHECK: $r3 = RLWINM killed $r3, 10, 5, 31 - ; CHECK: renamable $r3 = RLWINM killed renamable $r3, 10, 30, 5, implicit-def $x3 - ; CHECK: BLR8 implicit $lr8, implicit $rm, implicit killed $x3 - $r3 = RLWINM killed $r3, 10, 5, 31 - dead renamable $r3 = RLWINM killed renamable $r3, 10, 30, 5, implicit-def $x3 - BLR8 implicit $lr8, implicit $rm, implicit killed $x3 -... ---- -name: testFoldRLWINMResultWrapped -tracksRegLiveness: true -body: | - bb.0.entry: - liveins: $r3 - ; CHECK-LABEL: name: testFoldRLWINMResultWrapped - ; CHECK: liveins: $r3 - ; CHECK: $r3 = RLWINM killed $r3, 10, 20, 10 - ; CHECK: renamable $r3 = RLWINM killed renamable $r3, 10, 0, 31, implicit-def $x3 - ; CHECK: BLR8 implicit $lr8, implicit $rm, implicit killed $x3 - $r3 = RLWINM killed $r3, 10, 20, 10 - dead renamable $r3 = RLWINM killed renamable $r3, 10, 0, 31, implicit-def $x3 - BLR8 implicit $lr8, implicit $rm, implicit killed $x3 -... ---- -name: testFoldRLWINMToZero -tracksRegLiveness: true -body: | - bb.0.entry: - liveins: $r3 - ; CHECK-LABEL: name: testFoldRLWINMToZero - ; CHECK: liveins: $r3 - ; CHECK: renamable $r3 = LI 0, implicit-def $x3 - ; CHECK: BLR8 implicit $lr8, implicit $rm, implicit killed $x3 - $r3 = RLWINM killed $r3, 27, 5, 10 - dead renamable $r3 = RLWINM killed renamable $r3, 8, 5, 10, implicit-def $x3 - BLR8 implicit $lr8, implicit $rm, implicit killed $x3 -... ---- -name: testFoldRLWINM_recToZero -tracksRegLiveness: true -body: | - bb.0.entry: - liveins: $r3 - ; CHECK-LABEL: name: testFoldRLWINM_recToZero - ; CHECK: liveins: $r3 - ; CHECK: dead renamable $r3 = ANDI_rec killed renamable $r3, 0, implicit-def $cr0 - ; CHECK: BLR8 implicit $lr8, implicit $rm, implicit killed $cr0 - $r3 = RLWINM killed $r3, 27, 5, 10 - dead renamable $r3 = RLWINM_rec killed renamable $r3, 8, 5, 10, implicit-def $cr0 - BLR8 implicit $lr8, implicit $rm, implicit killed $cr0 -... ---- -name: testFoldRLWINMInvalidMask -tracksRegLiveness: true -body: | - bb.0.entry: - liveins: $r3 - ; CHECK-LABEL: name: testFoldRLWINMInvalidMask - ; CHECK: liveins: $r3 - ; CHECK: $r3 = RLWINM killed $r3, 20, 5, 31 - ; CHECK: renamable $r3 = RLWINM killed renamable $r3, 19, 10, 20, implicit-def $x3 - ; CHECK: BLR8 implicit $lr8, implicit $rm, implicit killed $x3 - $r3 = RLWINM killed $r3, 20, 5, 31 - dead renamable $r3 = RLWINM killed renamable $r3, 19, 10, 20, implicit-def $x3 - BLR8 implicit $lr8, implicit $rm, implicit killed $x3 -... ---- -name: testFoldRLWINCanNotBeDeleted -tracksRegLiveness: true -body: | - bb.0.entry: - liveins: $r2, $r3 - ; CHECK-LABEL: name: testFoldRLWINCanNotBeDeleted - ; CHECK: liveins: $r2, $r3 - ; CHECK: $r3 = RLWINM_rec $r2, 27, 5, 10, implicit-def dead $cr0 - ; CHECK: dead renamable $r3 = ANDI_rec $r2, 0, implicit-def $cr0 - ; CHECK: BLR8 implicit $lr8, implicit $rm, implicit killed $cr0 - $r3 = RLWINM_rec $r2, 27, 5, 10, implicit-def $cr0 - dead renamable $r3 = RLWINM_rec killed renamable $r3, 8, 5, 10, implicit-def $cr0 - BLR8 implicit $lr8, implicit $rm, implicit killed $cr0 -... ---- -name: testCanNotFoldRLWINM -tracksRegLiveness: true -body: | - bb.0.entry: - liveins: $r3 - ; CHECK-LABEL: name: testCanNotFoldRLWINM - ; CHECK: liveins: $r3 - ; CHECK: $r3 = RLWINM_rec killed $r3, 27, 5, 10, implicit-def dead $cr0 - ; CHECK: dead renamable $r3 = RLWINM_rec killed renamable $r3, 8, 5, 10, implicit-def $cr0 - ; CHECK: BLR8 implicit $lr8, implicit $rm, implicit killed $cr0 - $r3 = RLWINM_rec $r3, 27, 5, 10, implicit-def $cr0 - dead renamable $r3 = RLWINM_rec killed renamable $r3, 8, 5, 10, implicit-def $cr0 - BLR8 implicit $lr8, implicit $rm, implicit killed $cr0 -... diff --git a/llvm/test/CodeGen/PowerPC/vsx_builtins.ll b/llvm/test/CodeGen/PowerPC/vsx_builtins.ll index b40a84a7e95c5..0aae50af26490 100644 --- a/llvm/test/CodeGen/PowerPC/vsx_builtins.ll +++ b/llvm/test/CodeGen/PowerPC/vsx_builtins.ll @@ -131,7 +131,8 @@ define i32 @xvtdivdp_shift(<2 x double> %a, <2 x double> %b) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xvtdivdp cr0, v2, v3 ; CHECK-NEXT: mfocrf r3, 128 -; CHECK-NEXT: li r3, 0 +; CHECK-NEXT: srwi r3, r3, 28 +; CHECK-NEXT: rlwinm r3, r3, 28, 31, 31 ; CHECK-NEXT: blr entry: %0 = tail call i32 @llvm.ppc.vsx.xvtdivdp(<2 x double> %a, <2 x double> %b) From 66bcbdbc9c584becff35f6795cd2836a7a0141b2 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Wed, 16 Dec 2020 09:20:55 -0800 Subject: [PATCH 26/39] [AArch64InstPrinter] Change printADRPLabel to print the target address in hexadecimal form Similar to D77853. Change ADRP to print the target address in hex, instead of the raw immediate. The behavior is similar to GNU objdump but we also include `0x`. Note: GNU objdump is not consistent whether or not to emit `0x` for different architectures. We try emitting 0x consistently for all targets. ``` GNU objdump: adrp x16, 10000000 Old llvm-objdump: adrp x16, #0 New llvm-objdump: adrp x16, 0x10000000 ``` `adrp Xd, 0x...` assembles to a relocation referencing `*ABS*+0x10000` which is not intended. We need to use a linker or use yaml2obj. The main test is `test/tools/llvm-objdump/ELF/AArch64/pcrel-address.yaml` Differential Revision: https://reviews.llvm.org/D93241 --- lld/test/COFF/arm64-delayimport.yaml | 4 +- lld/test/COFF/arm64-import2.test | 4 +- lld/test/COFF/arm64-relocs-imports.test | 10 ++-- lld/test/COFF/arm64-thunks.s | 4 +- lld/test/ELF/aarch64-condb-reloc.s | 8 +-- lld/test/ELF/aarch64-copy.s | 2 +- .../ELF/aarch64-cortex-a53-843419-address.s | 10 ++-- .../ELF/aarch64-cortex-a53-843419-large.s | 6 +-- .../ELF/aarch64-cortex-a53-843419-recognize.s | 54 +++++++++---------- .../ELF/aarch64-cortex-a53-843419-thunk.s | 2 +- lld/test/ELF/aarch64-feature-bti.s | 24 ++++----- lld/test/ELF/aarch64-feature-btipac.s | 18 +++---- lld/test/ELF/aarch64-feature-pac.s | 12 ++--- lld/test/ELF/aarch64-fpic-got.s | 2 +- lld/test/ELF/aarch64-gnu-ifunc-address.s | 2 +- .../ELF/aarch64-gnu-ifunc-nonpreemptable.s | 8 +-- lld/test/ELF/aarch64-gnu-ifunc-plt.s | 10 ++-- lld/test/ELF/aarch64-gnu-ifunc.s | 4 +- lld/test/ELF/aarch64-gnu-ifunc2.s | 4 +- lld/test/ELF/aarch64-hi21-nc.s | 2 +- lld/test/ELF/aarch64-ifunc-bti.s | 6 +-- lld/test/ELF/aarch64-plt.s | 14 ++--- lld/test/ELF/aarch64-relocs.s | 2 +- lld/test/ELF/aarch64-thunk-pi.s | 14 ++--- lld/test/ELF/aarch64-tls-gdie.s | 2 +- lld/test/ELF/aarch64-tls-ie.s | 4 +- lld/test/ELF/aarch64-tlsdesc.s | 6 +-- lld/test/ELF/aarch64-tstbr14-reloc.s | 6 +-- lld/test/ELF/aarch64-undefined-weak.s | 2 +- .../lib/Target/AArch64/AArch64InstrFormats.td | 1 + .../MCTargetDesc/AArch64InstPrinter.cpp | 9 +++- .../AArch64/MCTargetDesc/AArch64InstPrinter.h | 2 +- llvm/test/MC/AArch64/adr.s | 10 ++-- llvm/test/MC/AArch64/coff-relocations.s | 2 +- .../AArch64/elf-aarch64-mapping-symbols.test | 2 +- .../ELF/AArch64/pcrel-address.yaml | 26 +++++++++ 36 files changed, 165 insertions(+), 133 deletions(-) create mode 100644 llvm/test/tools/llvm-objdump/ELF/AArch64/pcrel-address.yaml diff --git a/lld/test/COFF/arm64-delayimport.yaml b/lld/test/COFF/arm64-delayimport.yaml index 6b16a54dfa117..583103f064a07 100644 --- a/lld/test/COFF/arm64-delayimport.yaml +++ b/lld/test/COFF/arm64-delayimport.yaml @@ -5,7 +5,7 @@ # RUN: llvm-objdump -d %t.exe | FileCheck %s --check-prefix DISASM # RUN: llvm-readobj --coff-imports %t.exe | FileCheck %s -check-prefix IMPORTS -# DISASM: 140001014: 11 00 00 d0 adrp x17, #8192 +# DISASM: 140001014: 11 00 00 d0 adrp x17, 0x140003000 # DISASM: 140001018: 31 22 00 91 add x17, x17, #8 # DISASM: 14000101c: 01 00 00 14 b 0x140001020 <.text+0x20> # DISASM: 140001020: fd 7b b3 a9 stp x29, x30, [sp, #-208]! @@ -19,7 +19,7 @@ # DISASM: 140001040: e4 97 04 ad stp q4, q5, [sp, #144] # DISASM: 140001044: e6 9f 05 ad stp q6, q7, [sp, #176] # DISASM: 140001048: e1 03 11 aa mov x1, x17 -# DISASM: 14000104c: 00 00 00 b0 adrp x0, #4096 +# DISASM: 14000104c: 00 00 00 b0 adrp x0, 0x140002000 # DISASM: 140001050: 00 00 00 91 add x0, x0, #0 # DISASM: 140001054: eb ff ff 97 bl 0x140001000 <.text> # DISASM: 140001058: f0 03 00 aa mov x16, x0 diff --git a/lld/test/COFF/arm64-import2.test b/lld/test/COFF/arm64-import2.test index b3fa6382642fc..1122ab8e0dbc9 100644 --- a/lld/test/COFF/arm64-import2.test +++ b/lld/test/COFF/arm64-import2.test @@ -19,10 +19,10 @@ # AFTER: 140001004: 06 00 00 94 bl 0x14000101c # AFTER: 140001008: c0 03 5f d6 ret # AFTER: 14000100c: ff cc cc cc -# AFTER: 140001010: 10 00 00 b0 adrp x16, #4096 +# AFTER: 140001010: 10 00 00 b0 adrp x16, 0x140002000 # AFTER: 140001014: 10 32 40 f9 ldr x16, [x16, #96] # AFTER: 140001018: 00 02 1f d6 br x16 -# AFTER: 14000101c: 10 00 00 b0 adrp x16, #4096 +# AFTER: 14000101c: 10 00 00 b0 adrp x16, 0x140002000 # AFTER: 140001020: 10 3a 40 f9 ldr x16, [x16, #112] # AFTER: 140001024: 00 02 1f d6 br x16 diff --git a/lld/test/COFF/arm64-relocs-imports.test b/lld/test/COFF/arm64-relocs-imports.test index 8682088bf4654..bdad54b2159db 100644 --- a/lld/test/COFF/arm64-relocs-imports.test +++ b/lld/test/COFF/arm64-relocs-imports.test @@ -8,7 +8,7 @@ # BEFORE: Disassembly of section .text: # BEFORE-EMPTY: # BEFORE: 0: fe 0f 1f f8 str x30, [sp, #-16]! -# BEFORE: 4: 00 00 00 90 adrp x0, #0 +# BEFORE: 4: 00 00 00 90 adrp x0, 0x0 # BEFORE: 8: 00 08 00 91 add x0, x0, #2 # BEFORE: c: 00 00 00 94 bl 0xc # BEFORE: 10: 00 01 40 39 ldrb w0, [x8] @@ -30,7 +30,7 @@ # BEFORE: 50: 00 01 00 fd str d0, [x8] # BEFORE: 54: 00 01 80 3d str q0, [x8] # BEFORE: 58: 00 05 40 f9 ldr x0, [x8, #8] -# BEFORE: 5c: 20 1a 01 b0 adrp x0, #36982784 +# BEFORE: 5c: 20 1a 01 b0 adrp x0, 0x2345000 # BEFORE: 60: 00 fc 4f f9 ldr x0, [x0, #8184] # BEFORE: 64: e0 03 1f 2a mov w0, wzr # BEFORE: 68: fe 07 41 f8 ldr x30, [sp], #16 @@ -51,7 +51,7 @@ # AFTER: Disassembly of section .text: # AFTER-EMPTY: # AFTER: 140001000: fe 0f 1f f8 str x30, [sp, #-16]! -# AFTER: 140001004: 00 00 00 b0 adrp x0, #4096 +# AFTER: 140001004: 00 00 00 b0 adrp x0, 0x140002000 # AFTER: 140001008: 00 18 00 91 add x0, x0, #6 # AFTER: 14000100c: 25 00 00 94 bl 0x1400010a0 # AFTER: 140001010: 00 21 40 39 ldrb w0, [x8, #8] @@ -73,7 +73,7 @@ # AFTER: 140001050: 00 09 00 fd str d0, [x8, #16] # AFTER: 140001054: 00 05 80 3d str q0, [x8, #16] # AFTER: 140001058: 00 09 40 f9 ldr x0, [x8, #16] -# AFTER: 14000105c: 00 00 00 f0 adrp x0, #12288 +# AFTER: 14000105c: 00 00 00 f0 adrp x0, 0x140004000 # AFTER: 140001060: 00 fc 47 f9 ldr x0, [x0, #4088] # AFTER: 140001064: e0 03 1f 2a mov w0, wzr # AFTER: 140001068: fe 07 41 f8 ldr x30, [sp], #16 @@ -90,7 +90,7 @@ # AFTER: 140001094: 61 00 00 54 b.ne 0x1400010a0 # AFTER: 140001098: 40 00 00 36 tbz w0, #0, 0x1400010a0 # AFTER: 14000109c: 61 ff ff ff -# AFTER: 1400010a0: 10 00 00 b0 adrp x16, #4096 +# AFTER: 1400010a0: 10 00 00 b0 adrp x16, 0x140002000 # AFTER: 1400010a4: 10 2a 40 f9 ldr x16, [x16, #80] # AFTER: 1400010a8: 00 02 1f d6 br x16 diff --git a/lld/test/COFF/arm64-thunks.s b/lld/test/COFF/arm64-thunks.s index 9ddae6281d726..4428bf79ae9e2 100644 --- a/lld/test/COFF/arm64-thunks.s +++ b/lld/test/COFF/arm64-thunks.s @@ -29,14 +29,14 @@ func2: // DISASM: 0000000140001000 <.text>: // DISASM: 140001000: 40 00 00 36 tbz w0, #0, 0x140001008 <.text+0x8> // DISASM: 140001004: c0 03 5f d6 ret -// DISASM: 140001008: 50 00 00 90 adrp x16, #32768 +// DISASM: 140001008: 50 00 00 90 adrp x16, 0x140009000 // DISASM: 14000100c: 10 52 00 91 add x16, x16, #20 // DISASM: 140001010: 00 02 1f d6 br x16 // DISASM: 140009014: 60 00 00 36 tbz w0, #0, 0x140009020 <.text+0x8020> // DISASM: 140009018: c0 03 5f d6 ret -// DISASM: 140009020: 50 00 00 90 adrp x16, #32768 +// DISASM: 140009020: 50 00 00 90 adrp x16, 0x140011000 // DISASM: 140009024: 10 b2 00 91 add x16, x16, #44 // DISASM: 140009028: 00 02 1f d6 br x16 diff --git a/lld/test/ELF/aarch64-condb-reloc.s b/lld/test/ELF/aarch64-condb-reloc.s index 06c7b0a822106..0b90d5c9937a9 100644 --- a/lld/test/ELF/aarch64-condb-reloc.s +++ b/lld/test/ELF/aarch64-condb-reloc.s @@ -76,7 +76,7 @@ #DSO-EMPTY: #DSO-NEXT: <.plt>: #DSO-NEXT: 10370: stp x16, x30, [sp, #-16]! -#DSO-NEXT: 10374: adrp x16, #131072 +#DSO-NEXT: 10374: adrp x16, 0x30000 #DSO-NEXT: 10378: ldr x17, [x16, #1152] #DSO-NEXT: 1037c: add x16, x16, #1152 #DSO-NEXT: 10380: br x17 @@ -85,19 +85,19 @@ #DSO-NEXT: 1038c: nop #DSO-EMPTY: #DSO-NEXT: <_foo@plt>: -#DSO-NEXT: 10390: adrp x16, #131072 +#DSO-NEXT: 10390: adrp x16, 0x30000 #DSO-NEXT: 10394: ldr x17, [x16, #1160] #DSO-NEXT: 10398: add x16, x16, #1160 #DSO-NEXT: 1039c: br x17 #DSO-EMPTY: #DSO-NEXT: <_bar@plt>: -#DSO-NEXT: 103a0: adrp x16, #131072 +#DSO-NEXT: 103a0: adrp x16, 0x30000 #DSO-NEXT: 103a4: ldr x17, [x16, #1168] #DSO-NEXT: 103a8: add x16, x16, #1168 #DSO-NEXT: 103ac: br x17 #DSO-EMPTY: #DSO-NEXT: <_dah@plt>: -#DSO-NEXT: 103b0: adrp x16, #131072 +#DSO-NEXT: 103b0: adrp x16, 0x30000 #DSO-NEXT: 103b4: ldr x17, [x16, #1176] #DSO-NEXT: 103b8: add x16, x16, #1176 #DSO-NEXT: 103bc: br x17 diff --git a/lld/test/ELF/aarch64-copy.s b/lld/test/ELF/aarch64-copy.s index ba7c8afd5ee13..591186f1470f0 100644 --- a/lld/test/ELF/aarch64-copy.s +++ b/lld/test/ELF/aarch64-copy.s @@ -67,7 +67,7 @@ _start: // S + A - P = 0x2303f0 + 0 - 0x21031c = 131284 // CODE-NEXT: 21031c: adr x1, #131284 // Page(S + A) - Page(P) = Page(0x230400) - Page(0x210320) = 131072 -// CODE-NEXT: 210320: adrp x2, #131072 +// CODE-NEXT: 210320: adrp x2, 0x230000 // (S + A) & 0xFFF = (0x230400 + 0) & 0xFFF = 1024 // CODE-NEXT: 210324: add x2, x2, #1024 diff --git a/lld/test/ELF/aarch64-cortex-a53-843419-address.s b/lld/test/ELF/aarch64-cortex-a53-843419-address.s index 186ff550aa881..fcf5f407c1428 100644 --- a/lld/test/ELF/aarch64-cortex-a53-843419-address.s +++ b/lld/test/ELF/aarch64-cortex-a53-843419-address.s @@ -38,7 +38,7 @@ // CHECK-PRINT: detected cortex-a53-843419 erratum sequence starting at FF8 in unpatched output. // CHECK: : -// CHECK-NEXT: ff8: 20 00 00 d0 adrp x0, #24576 +// CHECK-NEXT: ff8: 20 00 00 d0 adrp x0, 0x6000 // CHECK-NEXT: ffc: 21 00 40 f9 ldr x1, [x1] // CHECK-NEXT: 1000: f9 0f 00 14 b 0x4fe4 // CHECK-NEXT: 1004: c0 03 5f d6 ret @@ -60,7 +60,7 @@ t3_ff8_ldr: $x.999: // CHECK-PRINT-NEXT: detected cortex-a53-843419 erratum sequence starting at 1FFC in unpatched output. // CHECK: : -// CHECK-NEXT: 1ffc: 20 00 00 b0 adrp x0, #20480 +// CHECK-NEXT: 1ffc: 20 00 00 b0 adrp x0, 0x6000 // CHECK-NEXT: 2000: 21 00 40 bd ldr s1, [x1] // CHECK-NEXT: 2004: fa 0b 00 14 b 0x4fec // CHECK-NEXT: 2008: c0 03 5f d6 ret @@ -97,7 +97,7 @@ t3_ff8_ldralldata: // CHECK-PRINT-NEXT: detected cortex-a53-843419 erratum sequence starting at 3FF8 in unpatched output. // CHECK: : -// CHECK-NEXT: 3ff8: 00 00 00 f0 adrp x0, #12288 +// CHECK-NEXT: 3ff8: 00 00 00 f0 adrp x0, 0x6000 // CHECK-NEXT: 3ffc: 21 00 40 f9 ldr x1, [x1] // CHECK-NEXT: 4000: fd 03 00 14 b 0x4ff4 // CHECK-NEXT: 4004: c0 03 5f d6 ret @@ -129,7 +129,7 @@ t3_ff8_ldralldata: // CHECK-PRINT-NEXT: detected cortex-a53-843419 erratum sequence starting at 4FFC in unpatched output // CHECK: : -// CHECK-NEXT: 4ffc: 00 00 00 d0 adrp x0, #8192 +// CHECK-NEXT: 4ffc: 00 00 00 d0 adrp x0, 0x6000 // CHECK-NEXT: 5000: 21 00 00 f9 str x1, [x1] // CHECK-NEXT: 5004: fb 03 00 14 b 0x5ff0 // CHECK-NEXT: 5008: c0 03 5f d6 ret @@ -154,7 +154,7 @@ t3_ffc_str: //CHECK-PRINT-NEXT: detected cortex-a53-843419 erratum sequence starting at 5FF8 in unpatched output // CHECK: : -// CHECK-NEXT: 5ff8: 00 00 00 b0 adrp x0, #4096 +// CHECK-NEXT: 5ff8: 00 00 00 b0 adrp x0, 0x6000 // CHECK-NEXT: 5ffc: 21 00 00 f9 str x1, [x1] // CHECK-NEXT: 6000: 03 00 00 14 b 0x600c // CHECK-NEXT: 6004: c0 03 5f d6 ret diff --git a/lld/test/ELF/aarch64-cortex-a53-843419-large.s b/lld/test/ELF/aarch64-cortex-a53-843419-large.s index 35b25323d415b..a1aafbaaff684 100644 --- a/lld/test/ELF/aarch64-cortex-a53-843419-large.s +++ b/lld/test/ELF/aarch64-cortex-a53-843419-large.s @@ -43,7 +43,7 @@ t3_ff8_ldr: ret // CHECK3: : -// CHECK3-NEXT: 211ff8: e0 00 04 f0 adrp x0, #134344704 +// CHECK3-NEXT: 211ff8: e0 00 04 f0 adrp x0, 0x8230000 // CHECK3-NEXT: 211ffc: 21 00 40 f9 ldr x1, [x1] // CHECK3-NEXT: 212000: 02 08 80 15 b 0x6214008 // CHECK3-NEXT: 212004: c0 03 5f d6 ret @@ -64,7 +64,7 @@ t3_ff8_str: ret // CHECK4: : -// CHECK4-NEXT: 4213ff8: e0 00 02 b0 adrp x0, #67227648 +// CHECK4-NEXT: 4213ff8: e0 00 02 b0 adrp x0, 0x8230000 // CHECK4-NEXT: 4213ffc: 21 00 40 f9 ldr x1, [x1] // CHECK4-NEXT: 4214000: 04 00 80 14 b 0x6214010 // CHECK4-NEXT: 4214004: c0 03 5f d6 ret @@ -103,7 +103,7 @@ t3_ffc_ldr: ret // CHECK7: : -// CHECK7-NEXT: 8211ffc: e0 00 00 f0 adrp x0, #126976 +// CHECK7-NEXT: 8211ffc: e0 00 00 f0 adrp x0, 0x8230000 // CHECK7-NEXT: 8212000: 21 00 40 f9 ldr x1, [x1] // CHECK7-NEXT: 8212004: 02 00 00 14 b 0x821200c // CHECK7-NEXT: 8212008: c0 03 5f d6 ret diff --git a/lld/test/ELF/aarch64-cortex-a53-843419-recognize.s b/lld/test/ELF/aarch64-cortex-a53-843419-recognize.s index 823c7525ab2ed..3d7aef78a6893 100644 --- a/lld/test/ELF/aarch64-cortex-a53-843419-recognize.s +++ b/lld/test/ELF/aarch64-cortex-a53-843419-recognize.s @@ -30,13 +30,13 @@ // CHECK-PRINT: detected cortex-a53-843419 erratum sequence starting at 211FF8 in unpatched output. // CHECK: : -// CHECK-NEXT: 211ff8: 60 02 00 f0 adrp x0, #323584 +// CHECK-NEXT: 211ff8: 60 02 00 f0 adrp x0, 0x260000 // CHECK-NEXT: 211ffc: 21 00 40 f9 ldr x1, [x1] // CHECK-FIX: 212000: 03 c8 00 14 b 0x24400c // CHECK-NOFIX: 212000: 00 00 40 f9 ldr x0, [x0] // CHECK-NEXT: 212004: c0 03 5f d6 ret // CHECK-RELOCATABLE: : -// CHECK-RELOCATABLE-NEXT: ff8: 00 00 00 90 adrp x0, #0 +// CHECK-RELOCATABLE-NEXT: ff8: 00 00 00 90 adrp x0, 0x0 // CHECK-RELOCATABLE-NEXT: ffc: 21 00 40 f9 ldr x1, [x1] // CHECK-RELOCATABLE-NEXT: 1000: 00 00 40 f9 ldr x0, [x0] // CHECK-RELOCATABLE-NEXT: 1004: c0 03 5f d6 ret @@ -54,7 +54,7 @@ t3_ff8_ldr: // CHECK-PRINT: detected cortex-a53-843419 erratum sequence starting at 213FF8 in unpatched output. // CHECK: : -// CHECK-NEXT: 213ff8: 60 02 00 b0 adrp x0, #315392 +// CHECK-NEXT: 213ff8: 60 02 00 b0 adrp x0, 0x260000 // CHECK-NEXT: 213ffc: 21 00 40 bd ldr s1, [x1] // CHECK-FIX: 214000: 05 c0 00 14 b 0x244014 // CHECK-NOFIX: 214000: 02 04 40 f9 ldr x2, [x0, #8] @@ -72,7 +72,7 @@ t3_ff8_ldrsimd: // CHECK-PRINT: detected cortex-a53-843419 erratum sequence starting at 215FFC in unpatched output. // CHECK: : -// CHECK-NEXT: 215ffc: 40 02 00 f0 adrp x0, #307200 +// CHECK-NEXT: 215ffc: 40 02 00 f0 adrp x0, 0x260000 // CHECK-NEXT: 216000: 21 84 40 bc ldr s1, [x1], #8 // CHECK-FIX: 216004: 06 b8 00 14 b 0x24401c // CHECK-NOFIX: 216004: 03 08 40 f9 ldr x3, [x0, #16] @@ -90,7 +90,7 @@ t3_ffc_ldrpost: // CHECK-PRINT: detected cortex-a53-843419 erratum sequence starting at 217FF8 in unpatched output. // CHECK: : -// CHECK-NEXT: 217ff8: 40 02 00 b0 adrp x0, #299008 +// CHECK-NEXT: 217ff8: 40 02 00 b0 adrp x0, 0x260000 // CHECK-NEXT: 217ffc: 21 8c 00 bc str s1, [x1, #8]! // CHECK-FIX: 218000: 09 b0 00 14 b 0x244024 // CHECK-NOFIX: 218000: 02 0c 40 f9 ldr x2, [x0, #24] @@ -108,7 +108,7 @@ t3_ff8_strpre: // CHECK-PRINT: detected cortex-a53-843419 erratum sequence starting at 219FFC in unpatched output. // CHECK: : -// CHECK-NEXT: 219ffc: 3c 02 00 f0 adrp x28, #290816 +// CHECK-NEXT: 219ffc: 3c 02 00 f0 adrp x28, 0x260000 // CHECK-NEXT: 21a000: 42 00 00 f9 str x2, [x2] // CHECK-FIX: 21a004: 0a a8 00 14 b 0x24402c // CHECK-NOFIX: 21a004: 9c 13 00 f9 str x28, [x28, #32] @@ -126,7 +126,7 @@ t3_ffc_str: // CHECK-PRINT: detected cortex-a53-843419 erratum sequence starting at 21BFFC in unpatched output. // CHECK: : -// CHECK-NEXT: 21bffc: 3c 02 00 b0 adrp x28, #282624 +// CHECK-NEXT: 21bffc: 3c 02 00 b0 adrp x28, 0x260000 // CHECK-NEXT: 21c000: 44 00 00 b9 str w4, [x2] // CHECK-FIX: 21c004: 0c a0 00 14 b 0x244034 // CHECK-NOFIX: 21c004: 84 17 00 f9 str x4, [x28, #40] @@ -144,7 +144,7 @@ t3_ffc_strsimd: // CHECK-PRINT: detected cortex-a53-843419 erratum sequence starting at 21DFF8 in unpatched output. // CHECK: : -// CHECK-NEXT: 21dff8: 1d 02 00 f0 adrp x29, #274432 +// CHECK-NEXT: 21dff8: 1d 02 00 f0 adrp x29, 0x260000 // CHECK-NEXT: 21dffc: 41 08 40 38 ldtrb w1, [x2] // CHECK-FIX: 21e000: 0f 98 00 14 b 0x24403c // CHECK-NOFIX: 21e000: bd 03 40 f9 ldr x29, [x29] @@ -162,7 +162,7 @@ t3_ff8_ldrunpriv: // CHECK-PRINT: detected cortex-a53-843419 erratum sequence starting at 21FFFC in unpatched output. // CHECK: : -// CHECK-NEXT: 21fffc: 1d 02 00 b0 adrp x29, #266240 +// CHECK-NEXT: 21fffc: 1d 02 00 b0 adrp x29, 0x260000 // CHECK-NEXT: 220000: 42 40 40 b8 ldur w2, [x2, #4] // CHECK-FIX: 220004: 10 90 00 14 b 0x244044 // CHECK-NOFIX: 220004: bd 07 40 f9 ldr x29, [x29, #8] @@ -179,7 +179,7 @@ t3_ffc_ldur: // CHECK-PRINT: detected cortex-a53-843419 erratum sequence starting at 221FFC in unpatched output. // CHECK: : -// CHECK-NEXT: 221ffc: f2 01 00 f0 adrp x18, #258048 +// CHECK-NEXT: 221ffc: f2 01 00 f0 adrp x18, 0x260000 // CHECK-NEXT: 222000: 43 40 00 78 sturh w3, [x2, #4] // CHECK-FIX: 222004: 12 88 00 14 b 0x24404c // CHECK-NOFIX: 222004: 41 0a 40 f9 ldr x1, [x18, #16] @@ -197,7 +197,7 @@ t3_ffc_sturh: // CHECK-PRINT: detected cortex-a53-843419 erratum sequence starting at 223FF8 in unpatched output. // CHECK: : -// CHECK-NEXT: 223ff8: f2 01 00 b0 adrp x18, #249856 +// CHECK-NEXT: 223ff8: f2 01 00 b0 adrp x18, 0x260000 // CHECK-NEXT: 223ffc: e3 ff ff 58 ldr x3, 0x223ff8 // CHECK-FIX: 224000: 15 80 00 14 b 0x244054 // CHECK-NOFIX: 224000: 52 0e 40 f9 ldr x18, [x18, #24] @@ -215,7 +215,7 @@ t3_ff8_literal: // CHECK-PRINT: detected cortex-a53-843419 erratum sequence starting at 225FFC in unpatched output. // CHECK: : -// CHECK-NEXT: 225ffc: cf 01 00 f0 adrp x15, #241664 +// CHECK-NEXT: 225ffc: cf 01 00 f0 adrp x15, 0x260000 // CHECK-NEXT: 226000: 43 68 61 f8 ldr x3, [x2, x1] // CHECK-FIX: 226004: 16 78 00 14 b 0x24405c // CHECK-NOFIX: 226004: ea 11 40 f9 ldr x10, [x15, #32] @@ -233,7 +233,7 @@ t3_ffc_register: // CHECK-PRINT: detected cortex-a53-843419 erratum sequence starting at 227FF8 in unpatched output. // CHECK: : -// CHECK-NEXT: 227ff8: d0 01 00 b0 adrp x16, #233472 +// CHECK-NEXT: 227ff8: d0 01 00 b0 adrp x16, 0x260000 // CHECK-NEXT: 227ffc: 61 08 00 a9 stp x1, x2, [x3] // CHECK-FIX: 228000: 19 70 00 14 b 0x244064 // CHECK-NOFIX: 228000: 0d 16 40 f9 ldr x13, [x16, #40] @@ -251,7 +251,7 @@ t3_ff8_stp: // CHECK-PRINT: detected cortex-a53-843419 erratum sequence starting at 229FFC in unpatched output. // CHECK: : -// CHECK-NEXT: 229ffc: a7 01 00 f0 adrp x7, #225280 +// CHECK-NEXT: 229ffc: a7 01 00 f0 adrp x7, 0x260000 // CHECK-NEXT: 22a000: 61 08 00 a8 stnp x1, x2, [x3] // CHECK-FIX: 22a004: 1a 68 00 14 b 0x24406c // CHECK-NOFIX: 22a004: e9 0c 40 f9 ldr x9, [x7, #24] @@ -269,7 +269,7 @@ t3_ffc_stnp: // CHECK-PRINT: detected cortex-a53-843419 erratum sequence starting at 22BFFC in unpatched output. // CHECK: : -// CHECK-NEXT: 22bffc: b7 01 00 b0 adrp x23, #217088 +// CHECK-NEXT: 22bffc: b7 01 00 b0 adrp x23, 0x260000 // CHECK-NEXT: 22c000: 20 04 82 0d st1 { v0.b }[1], [x1], x2 // CHECK-FIX: 22c004: 1c 60 00 14 b 0x244074 // CHECK-NOFIX: 22c004: f6 12 40 f9 ldr x22, [x23, #32] @@ -287,7 +287,7 @@ t3_ffc_st1singlepost: // CHECK-PRINT: detected cortex-a53-843419 erratum sequence starting at 22DFF8 in unpatched output. // CHECK: : -// CHECK-NEXT: 22dff8: 97 01 00 f0 adrp x23, #208896 +// CHECK-NEXT: 22dff8: 97 01 00 f0 adrp x23, 0x260000 // CHECK-NEXT: 22dffc: 20 a0 00 4c st1 { v0.16b, v1.16b }, [x1] // CHECK-FIX: 22e000: 1f 58 00 14 b 0x24407c // CHECK-NOFIX: 22e000: f8 16 40 f9 ldr x24, [x23, #40] @@ -305,7 +305,7 @@ t3_ff8_st1multiple: // CHECK-PRINT: detected cortex-a53-843419 erratum sequence starting at 22FFF8 in unpatched output. // CHECK: : -// CHECK-NEXT: 22fff8: 80 01 00 b0 adrp x0, #200704 +// CHECK-NEXT: 22fff8: 80 01 00 b0 adrp x0, 0x260000 // CHECK-NEXT: 22fffc: 21 00 40 f9 ldr x1, [x1] // CHECK-NEXT: 230000: 42 00 00 8b add x2, x2, x0 // CHECK-FIX: 230004: 20 50 00 14 b 0x244084 @@ -325,7 +325,7 @@ t4_ff8_ldr: // CHECK-PRINT: detected cortex-a53-843419 erratum sequence starting at 231FFC in unpatched output. // CHECK: : -// CHECK-NEXT: 231ffc: 7c 01 00 f0 adrp x28, #192512 +// CHECK-NEXT: 231ffc: 7c 01 00 f0 adrp x28, 0x260000 // CHECK-NEXT: 232000: 42 00 00 f9 str x2, [x2] // CHECK-NEXT: 232004: 20 00 02 cb sub x0, x1, x2 // CHECK-FIX: 232008: 21 48 00 14 b 0x24408c @@ -345,7 +345,7 @@ t4_ffc_str: // CHECK-PRINT: detected cortex-a53-843419 erratum sequence starting at 233FF8 in unpatched output. // CHECK: : -// CHECK-NEXT: 233ff8: 70 01 00 b0 adrp x16, #184320 +// CHECK-NEXT: 233ff8: 70 01 00 b0 adrp x16, 0x260000 // CHECK-NEXT: 233ffc: 61 08 00 a9 stp x1, x2, [x3] // CHECK-NEXT: 234000: 03 7e 10 9b mul x3, x16, x16 // CHECK-FIX: 234004: 24 40 00 14 b 0x244094 @@ -365,7 +365,7 @@ t4_ff8_stp: // CHECK-PRINT: detected cortex-a53-843419 erratum sequence starting at 235FF8 in unpatched output. // CHECK: : -// CHECK-NEXT: 235ff8: 50 01 00 f0 adrp x16, #176128 +// CHECK-NEXT: 235ff8: 50 01 00 f0 adrp x16, 0x260000 // CHECK-NEXT: 235ffc: 61 08 81 a9 stp x1, x2, [x3, #16]! // CHECK-NEXT: 236000: 03 7e 10 9b mul x3, x16, x16 // CHECK-FIX: 236004: 26 38 00 14 b 0x24409c @@ -385,7 +385,7 @@ t4_ff8_stppre: // CHECK-PRINT: detected cortex-a53-843419 erratum sequence starting at 237FF8 in unpatched output. // CHECK: : -// CHECK-NEXT: 237ff8: 50 01 00 b0 adrp x16, #167936 +// CHECK-NEXT: 237ff8: 50 01 00 b0 adrp x16, 0x260000 // CHECK-NEXT: 237ffc: 61 08 81 a8 stp x1, x2, [x3], #16 // CHECK-NEXT: 238000: 03 7e 10 9b mul x3, x16, x16 // CHECK-FIX: 238004: 28 30 00 14 b 0x2440a4 @@ -405,7 +405,7 @@ t4_ff8_stppost: // CHECK-PRINT: detected cortex-a53-843419 erratum sequence starting at 239FFC in unpatched output. // CHECK: : -// CHECK-NEXT: 239ffc: 30 01 00 f0 adrp x16, #159744 +// CHECK-NEXT: 239ffc: 30 01 00 f0 adrp x16, 0x260000 // CHECK-NEXT: 23a000: 61 08 00 ad stp q1, q2, [x3] // CHECK-NEXT: 23a004: 03 7e 10 9b mul x3, x16, x16 // CHECK-FIX: 23a008: 29 28 00 14 b 0x2440ac @@ -425,7 +425,7 @@ t4_ffc_stpsimd: // CHECK-PRINT: detected cortex-a53-843419 erratum sequence starting at 23BFFC in unpatched output. // CHECK: : -// CHECK-NEXT: 23bffc: 27 01 00 b0 adrp x7, #151552 +// CHECK-NEXT: 23bffc: 27 01 00 b0 adrp x7, 0x260000 // CHECK-NEXT: 23c000: 61 08 00 a8 stnp x1, x2, [x3] // CHECK-NEXT: 23c004: 1f 20 03 d5 nop // CHECK-FIX: 23c008: 2b 20 00 14 b 0x2440b4 @@ -445,7 +445,7 @@ t4_ffc_stnp: // CHECK-PRINT: detected cortex-a53-843419 erratum sequence starting at 23DFFC in unpatched output. // CHECK: : -// CHECK-NEXT: 23dffc: 18 01 00 f0 adrp x24, #143360 +// CHECK-NEXT: 23dffc: 18 01 00 f0 adrp x24, 0x260000 // CHECK-NEXT: 23e000: 20 80 00 4d st1 { v0.s }[2], [x1] // CHECK-NEXT: 23e004: f6 06 40 f9 ldr x22, [x23, #8] // CHECK-FIX: 23e008: 2d 18 00 14 b 0x2440bc @@ -465,7 +465,7 @@ t4_ffc_st1: // CHECK-PRINT: detected cortex-a53-843419 erratum sequence starting at 23FFF8 in unpatched output. // CHECK: : -// CHECK-NEXT: 23fff8: 00 01 00 b0 adrp x0, #135168 +// CHECK-NEXT: 23fff8: 00 01 00 b0 adrp x0, 0x260000 // CHECK-NEXT: 23fffc: 20 70 82 4c st1 { v0.16b }, [x1], x2 // CHECK-FIX: 240000: 31 10 00 14 b 0x2440c4 // CHECK-NOFIX: 240000: 01 08 40 f9 ldr x1, [x0, #16] @@ -485,7 +485,7 @@ t3_ff8_ldr_once: // CHECK-PRINT: detected cortex-a53-843419 erratum sequence starting at 241FF8 in unpatched output. // CHECK: : -// CHECK-NEXT: 241ff8: e0 00 00 f0 adrp x0, #126976 +// CHECK-NEXT: 241ff8: e0 00 00 f0 adrp x0, 0x260000 // CHECK-NEXT: 241ffc: 03 7c 5f c8 ldxr x3, [x0] // CHECK-FIX: 242000: 33 08 00 14 b 0x2440cc // CHECK-NOFIX: 242000: 01 08 40 f9 ldr x1, [x0, #16] @@ -505,7 +505,7 @@ t3_ff8_ldxr: // CHECK-PRINT: detected cortex-a53-843419 erratum sequence starting at 243FF8 in unpatched output. // CHECK: : -// CHECK-NEXT: 243ff8: e0 00 00 b0 adrp x0, #118784 +// CHECK-NEXT: 243ff8: e0 00 00 b0 adrp x0, 0x260000 // CHECK-NEXT: 243ffc: 03 7c 04 c8 stxr w4, x3, [x0] // CHECK-FIX: 244000: 35 00 00 14 b 0x2440d4 // CHECK-NOFIX: 244000: 01 08 40 f9 ldr x1, [x0, #16] diff --git a/lld/test/ELF/aarch64-cortex-a53-843419-thunk.s b/lld/test/ELF/aarch64-cortex-a53-843419-thunk.s index 998175e397198..290fc712e0707 100644 --- a/lld/test/ELF/aarch64-cortex-a53-843419-thunk.s +++ b/lld/test/ELF/aarch64-cortex-a53-843419-thunk.s @@ -39,7 +39,7 @@ t3_ff8_ldr: // CHECK-PRINT: detected cortex-a53-843419 erratum sequence starting at 10FF8 in unpatched output. // CHECK: 0000000000010ff8 : -// CHECK-NEXT: adrp x0, #134217728 +// CHECK-NEXT: adrp x0, 0x8010000 // CHECK-NEXT: ldr x1, [x1] // CHECK-NEXT: b 0x11008 // CHECK-NEXT: ret diff --git a/lld/test/ELF/aarch64-feature-bti.s b/lld/test/ELF/aarch64-feature-bti.s index f4fb701cc77c3..a66aced645eda 100644 --- a/lld/test/ELF/aarch64-feature-bti.s +++ b/lld/test/ELF/aarch64-feature-bti.s @@ -22,7 +22,7 @@ # NOBTI: Disassembly of section .plt: # NOBTI: 00000000000102d0 <.plt>: # NOBTI-NEXT: 102d0: stp x16, x30, [sp, #-16]! -# NOBTI-NEXT: 102d4: adrp x16, #131072 +# NOBTI-NEXT: 102d4: adrp x16, 0x30000 # NOBTI-NEXT: 102d8: ldr x17, [x16, #960] # NOBTI-NEXT: 102dc: add x16, x16, #960 # NOBTI-NEXT: 102e0: br x17 @@ -30,7 +30,7 @@ # NOBTI-NEXT: 102e8: nop # NOBTI-NEXT: 102ec: nop # NOBTI: 00000000000102f0 : -# NOBTI-NEXT: 102f0: adrp x16, #131072 +# NOBTI-NEXT: 102f0: adrp x16, 0x30000 # NOBTI-NEXT: 102f4: ldr x17, [x16, #968] # NOBTI-NEXT: 102f8: add x16, x16, #968 # NOBTI-NEXT: 102fc: br x17 @@ -64,14 +64,14 @@ # BTISO: 0000000000010360 <.plt>: # BTISO-NEXT: 10360: bti c # BTISO-NEXT: stp x16, x30, [sp, #-16]! -# BTISO-NEXT: adrp x16, #131072 +# BTISO-NEXT: adrp x16, 0x30000 # BTISO-NEXT: ldr x17, [x16, #1136] # BTISO-NEXT: add x16, x16, #1136 # BTISO-NEXT: br x17 # BTISO-NEXT: nop # BTISO-NEXT: nop # BTISO: 0000000000010380 : -# BTISO-NEXT: 10380: adrp x16, #131072 +# BTISO-NEXT: 10380: adrp x16, 0x30000 # BTISO-NEXT: ldr x17, [x16, #1144] # BTISO-NEXT: add x16, x16, #1144 # BTISO-NEXT: br x17 @@ -98,7 +98,7 @@ # EXECBTI: 0000000000210350 <.plt>: # EXECBTI-NEXT: 210350: bti c # EXECBTI-NEXT: stp x16, x30, [sp, #-16]! -# EXECBTI-NEXT: adrp x16, #131072 +# EXECBTI-NEXT: adrp x16, 0x230000 # EXECBTI-NEXT: ldr x17, [x16, #1160] # EXECBTI-NEXT: add x16, x16, #1160 # EXECBTI-NEXT: br x17 @@ -106,7 +106,7 @@ # EXECBTI-NEXT: nop # EXECBTI: 0000000000210370 : # EXECBTI-NEXT: 210370: bti c -# EXECBTI-NEXT: adrp x16, #131072 +# EXECBTI-NEXT: adrp x16, 0x230000 # EXECBTI-NEXT: ldr x17, [x16, #1168] # EXECBTI-NEXT: add x16, x16, #1168 # EXECBTI-NEXT: br x17 @@ -126,7 +126,7 @@ # PIE: 0000000000010350 <.plt>: # PIE-NEXT: 10350: bti c # PIE-NEXT: stp x16, x30, [sp, #-16]! -# PIE-NEXT: adrp x16, #131072 +# PIE-NEXT: adrp x16, 0x30000 # PIE-NEXT: ldr x17, [x16, #1176] # PIE-NEXT: add x16, x16, #1176 # PIE-NEXT: br x17 @@ -134,7 +134,7 @@ # PIE-NEXT: nop # PIE: 0000000000010370 : # PIE-NEXT: 10370: bti c -# PIE-NEXT: adrp x16, #131072 +# PIE-NEXT: adrp x16, 0x30000 # PIE-NEXT: ldr x17, [x16, #1184] # PIE-NEXT: add x16, x16, #1184 # PIE-NEXT: br x17 @@ -156,7 +156,7 @@ # NOEX: Disassembly of section .plt: # NOEX: 00000000002102f0 <.plt>: # NOEX-NEXT: 2102f0: stp x16, x30, [sp, #-16]! -# NOEX-NEXT: adrp x16, #131072 +# NOEX-NEXT: adrp x16, 0x230000 # NOEX-NEXT: ldr x17, [x16, #1024] # NOEX-NEXT: add x16, x16, #1024 # NOEX-NEXT: br x17 @@ -164,7 +164,7 @@ # NOEX-NEXT: nop # NOEX-NEXT: nop # NOEX: 0000000000210310 : -# NOEX-NEXT: 210310: adrp x16, #131072 +# NOEX-NEXT: 210310: adrp x16, 0x230000 # NOEX-NEXT: ldr x17, [x16, #1032] # NOEX-NEXT: add x16, x16, #1032 # NOEX-NEXT: br x17 @@ -191,7 +191,7 @@ # FORCE: 0000000000210380 <.plt>: # FORCE-NEXT: 210380: bti c # FORCE-NEXT: stp x16, x30, [sp, #-16]! -# FORCE-NEXT: adrp x16, #131072 +# FORCE-NEXT: adrp x16, 0x230000 # FORCE-NEXT: ldr x17, [x16, #1192] # FORCE-NEXT: add x16, x16, #1192 # FORCE-NEXT: br x17 @@ -199,7 +199,7 @@ # FORCE-NEXT: nop # FORCE: 00000000002103a0 : # FORCE-NEXT: 2103a0: bti c -# FORCE-NEXT: adrp x16, #131072 +# FORCE-NEXT: adrp x16, 0x230000 # FORCE-NEXT: ldr x17, [x16, #1200] # FORCE-NEXT: add x16, x16, #1200 # FORCE-NEXT: br x17 diff --git a/lld/test/ELF/aarch64-feature-btipac.s b/lld/test/ELF/aarch64-feature-btipac.s index e5d8abeaaf286..653bb05dc9682 100644 --- a/lld/test/ELF/aarch64-feature-btipac.s +++ b/lld/test/ELF/aarch64-feature-btipac.s @@ -24,14 +24,14 @@ # BTIPACSO: 0000000000010360 <.plt>: # BTIPACSO-NEXT: 10360: bti c # BTIPACSO-NEXT: stp x16, x30, [sp, #-16]! -# BTIPACSO-NEXT: adrp x16, #131072 +# BTIPACSO-NEXT: adrp x16, 0x30000 # BTIPACSO-NEXT: ldr x17, [x16, #1136] # BTIPACSO-NEXT: add x16, x16, #1136 # BTIPACSO-NEXT: br x17 # BTIPACSO-NEXT: nop # BTIPACSO-NEXT: nop # BTIPACSO: 0000000000010380 : -# BTIPACSO-NEXT: 10380: adrp x16, #131072 +# BTIPACSO-NEXT: 10380: adrp x16, 0x30000 # BTIPACSO-NEXT: ldr x17, [x16, #1144] # BTIPACSO-NEXT: add x16, x16, #1144 # BTIPACSO-NEXT: br x17 @@ -61,7 +61,7 @@ # BTIPACEX: 0000000000210380 <.plt>: # BTIPACEX-NEXT: 210380: bti c # BTIPACEX-NEXT: stp x16, x30, [sp, #-16]! -# BTIPACEX-NEXT: adrp x16, #131072 +# BTIPACEX-NEXT: adrp x16, 0x230000 # BTIPACEX-NEXT: ldr x17, [x16, #1192] # BTIPACEX-NEXT: add x16, x16, #1192 # BTIPACEX-NEXT: br x17 @@ -69,7 +69,7 @@ # BTIPACEX-NEXT: nop # BTIPACEX: 00000000002103a0 : # BTIPACEX-NEXT: 2103a0: bti c -# BTIPACEX-NEXT: adrp x16, #131072 +# BTIPACEX-NEXT: adrp x16, 0x230000 # BTIPACEX-NEXT: ldr x17, [x16, #1200] # BTIPACEX-NEXT: add x16, x16, #1200 # BTIPACEX-NEXT: br x17 @@ -93,7 +93,7 @@ # EX: Disassembly of section .plt: # EX: 00000000002102f0 <.plt>: # EX-NEXT: 2102f0: stp x16, x30, [sp, #-16]! -# EX-NEXT: adrp x16, #131072 +# EX-NEXT: adrp x16, 0x230000 # EX-NEXT: ldr x17, [x16, #1024] # EX-NEXT: add x16, x16, #1024 # EX-NEXT: br x17 @@ -101,7 +101,7 @@ # EX-NEXT: nop # EX-NEXT: nop # EX: 0000000000210310 : -# EX: 210310: adrp x16, #131072 +# EX: 210310: adrp x16, 0x230000 # EX-NEXT: ldr x17, [x16, #1032] # EX-NEXT: add x16, x16, #1032 # EX-NEXT: br x17 @@ -155,7 +155,7 @@ func1: # BTIPACEX2: 0000000000210380 <.plt>: # BTIPACEX2-NEXT: 210380: bti c # BTIPACEX2-NEXT: stp x16, x30, [sp, #-16]! -# BTIPACEX2-NEXT: adrp x16, #131072 +# BTIPACEX2-NEXT: adrp x16, 0x230000 # BTIPACEX2-NEXT: ldr x17, [x16, #1208] # BTIPACEX2-NEXT: add x16, x16, #1208 # BTIPACEX2-NEXT: br x17 @@ -163,11 +163,11 @@ func1: # BTIPACEX2-NEXT: nop # BTIPACEX2: 00000000002103a0 : # BTIPACEX2-NEXT: 2103a0: bti c -# BTIPACEX2-NEXT: adrp x16, #131072 +# BTIPACEX2-NEXT: adrp x16, 0x230000 # BTIPACEX2-NEXT: ldr x17, [x16, #1216] # BTIPACEX2-NEXT: add x16, x16, #1216 # BTIPACEX2-NEXT: autia1716 # BTIPACEX2-NEXT: br x17 # BTIPACDYN2: 0x0000000070000001 (AARCH64_BTI_PLT) -# BTIPACDYN2-NEXT: 0x0000000070000003 (AARCH64_PAC_PLT) \ No newline at end of file +# BTIPACDYN2-NEXT: 0x0000000070000003 (AARCH64_PAC_PLT) diff --git a/lld/test/ELF/aarch64-feature-pac.s b/lld/test/ELF/aarch64-feature-pac.s index 8b5182f53ee56..4bdba6947f95e 100644 --- a/lld/test/ELF/aarch64-feature-pac.s +++ b/lld/test/ELF/aarch64-feature-pac.s @@ -19,7 +19,7 @@ # NOPAC: Disassembly of section .plt: # NOPAC: 00000000000102d0 <.plt>: # NOPAC-NEXT: 102d0: stp x16, x30, [sp, #-16]! -# NOPAC-NEXT: adrp x16, #131072 +# NOPAC-NEXT: adrp x16, 0x30000 # NOPAC-NEXT: ldr x17, [x16, #960] # NOPAC-NEXT: add x16, x16, #960 # NOPAC-NEXT: br x17 @@ -27,7 +27,7 @@ # NOPAC-NEXT: nop # NOPAC-NEXT: nop # NOPAC: 00000000000102f0 : -# NOPAC-NEXT: 102f0: adrp x16, #131072 +# NOPAC-NEXT: 102f0: adrp x16, 0x30000 # NOPAC-NEXT: ldr x17, [x16, #968] # NOPAC-NEXT: add x16, x16, #968 # NOPAC-NEXT: br x17 @@ -54,7 +54,7 @@ # PACSO: Disassembly of section .plt: # PACSO: 0000000000010360 <.plt>: # PACSO-NEXT: 10360: stp x16, x30, [sp, #-16]! -# PACSO-NEXT: adrp x16, #131072 +# PACSO-NEXT: adrp x16, 0x30000 # PACSO-NEXT: ldr x17, [x16, #1120] # PACSO-NEXT: add x16, x16, #1120 # PACSO-NEXT: br x17 @@ -62,7 +62,7 @@ # PACSO-NEXT: nop # PACSO-NEXT: nop # PACSO: 0000000000010380 : -# PACSO-NEXT: 10380: adrp x16, #131072 +# PACSO-NEXT: 10380: adrp x16, 0x30000 # PACSO-NEXT: ldr x17, [x16, #1128] # PACSO-NEXT: add x16, x16, #1128 # PACSO-NEXT: br x17 @@ -95,7 +95,7 @@ # PACPLT: Disassembly of section .plt: # PACPLT: 0000000000210380 <.plt>: # PACPLT-NEXT: 210380: stp x16, x30, [sp, #-16]! -# PACPLT-NEXT: adrp x16, #131072 +# PACPLT-NEXT: adrp x16, 0x230000 # PACPLT-NEXT: ldr x17, [x16, #1192] # PACPLT-NEXT: add x16, x16, #1192 # PACPLT-NEXT: br x17 @@ -103,7 +103,7 @@ # PACPLT-NEXT: nop # PACPLT-NEXT: nop # PACPLT: 00000000002103a0 : -# PACPLT-NEXT: 2103a0: adrp x16, #131072 +# PACPLT-NEXT: 2103a0: adrp x16, 0x230000 # PACPLT-NEXT: ldr x17, [x16, #1200] # PACPLT-NEXT: add x16, x16, #1200 # PACPLT-NEXT: autia1716 diff --git a/lld/test/ELF/aarch64-fpic-got.s b/lld/test/ELF/aarch64-fpic-got.s index 4f46cea9c6ee2..091fb84e2b6ea 100644 --- a/lld/test/ELF/aarch64-fpic-got.s +++ b/lld/test/ELF/aarch64-fpic-got.s @@ -17,7 +17,7 @@ ## page(0x220320) - page(0x210000) = 65536 ## page(0x220320) & 0xff8 = 800 # DIS: <_start>: -# DIS-NEXT: 210258: adrp x0, #65536 +# DIS-NEXT: 210258: adrp x0, 0x220000 # DIS-NEXT: 21025c: ldr x0, [x0, #800] .globl _start diff --git a/lld/test/ELF/aarch64-gnu-ifunc-address.s b/lld/test/ELF/aarch64-gnu-ifunc-address.s index 36c3f5a3bd289..46a914fe82fa7 100644 --- a/lld/test/ELF/aarch64-gnu-ifunc-address.s +++ b/lld/test/ELF/aarch64-gnu-ifunc-address.s @@ -22,7 +22,7 @@ main: ret # CHECK: 0000000000010284
: ## myfunc's got entry = page(0x20330)-page(0x10284) + 0x330 = 65536 + 816 -# CHECK-NEXT: 10284: adrp x8, #65536 +# CHECK-NEXT: 10284: adrp x8, 0x20000 # CHECK-NEXT: 10288: ldr x8, [x8, #816] # CHECK-NEXT: 1028c: ret diff --git a/lld/test/ELF/aarch64-gnu-ifunc-nonpreemptable.s b/lld/test/ELF/aarch64-gnu-ifunc-nonpreemptable.s index 9f8a59367b0ef..284d9a8d7edce 100644 --- a/lld/test/ELF/aarch64-gnu-ifunc-nonpreemptable.s +++ b/lld/test/ELF/aarch64-gnu-ifunc-nonpreemptable.s @@ -33,7 +33,7 @@ main: # PDE: : # PDE-NEXT: 210170: ret # PDE:
: -# PDE-NEXT: 210174: adrp x8, #0 +# PDE-NEXT: 210174: adrp x8, 0x210000 # PDE-NEXT: 210178: add x8, x8, #384 # PDE-NEXT: 21017c: ret # PDE-EMPTY: @@ -41,7 +41,7 @@ main: # PDE-EMPTY: # PDE-NEXT: : ## page(.got.plt) - page(0x210010) = 65536 -# PDE-NEXT: 210180: adrp x16, #65536 +# PDE-NEXT: 210180: adrp x16, 0x220000 # PDE-NEXT: 210184: ldr x17, [x16, #400] # PDE-NEXT: 210188: add x16, x16, #400 # PDE-NEXT: 21018c: br x17 @@ -55,14 +55,14 @@ main: # PIE: : # PIE-NEXT: 10260: ret # PIE:
: -# PIE-NEXT: 10264: adrp x8, #0 +# PIE-NEXT: 10264: adrp x8, 0x10000 # PIE-NEXT: 10268: add x8, x8, #624 # PIE-NEXT: 1026c: ret # PIE-EMPTY: # PIE-NEXT: Disassembly of section .iplt: # PIE-EMPTY: # PIE-NEXT: : -# PIE-NEXT: 10270: adrp x16, #131072 +# PIE-NEXT: 10270: adrp x16, 0x30000 # PIE-NEXT: 10274: ldr x17, [x16, #896] # PIE-NEXT: 10278: add x16, x16, #896 # PIE-NEXT: 1027c: br x17 diff --git a/lld/test/ELF/aarch64-gnu-ifunc-plt.s b/lld/test/ELF/aarch64-gnu-ifunc-plt.s index a74965a7b364e..fdd8e6ee09aa6 100644 --- a/lld/test/ELF/aarch64-gnu-ifunc-plt.s +++ b/lld/test/ELF/aarch64-gnu-ifunc-plt.s @@ -48,7 +48,7 @@ // DISASM-EMPTY: // DISASM-NEXT: <.plt>: // DISASM-NEXT: 2102f0: stp x16, x30, [sp, #-16]! -// DISASM-NEXT: 2102f4: adrp x16, #131072 +// DISASM-NEXT: 2102f4: adrp x16, 0x230000 // DISASM-NEXT: 2102f8: ldr x17, [x16, #1104] // DISASM-NEXT: 2102fc: add x16, x16, #1104 // DISASM-NEXT: 210300: br x17 @@ -57,13 +57,13 @@ // DISASM-NEXT: 21030c: nop // DISASM-EMPTY: // DISASM-NEXT: : -// DISASM-NEXT: 210310: adrp x16, #131072 +// DISASM-NEXT: 210310: adrp x16, 0x230000 // DISASM-NEXT: 210314: ldr x17, [x16, #1112] // DISASM-NEXT: 210318: add x16, x16, #1112 // DISASM-NEXT: 21031c: br x17 // DISASM-EMPTY: // DISASM-NEXT: : -// DISASM-NEXT: 210320: adrp x16, #131072 +// DISASM-NEXT: 210320: adrp x16, 0x230000 // DISASM-NEXT: 210324: ldr x17, [x16, #1120] // DISASM-NEXT: 210328: add x16, x16, #1120 // DISASM-NEXT: 21032c: br x17 @@ -71,11 +71,11 @@ // DISASM-NEXT: Disassembly of section .iplt: // DISASM-EMPTY: // DISASM-NEXT: <.iplt>: -// DISASM-NEXT: 210330: adrp x16, #131072 +// DISASM-NEXT: 210330: adrp x16, 0x230000 // DISASM-NEXT: 210334: ldr x17, [x16, #1128] // DISASM-NEXT: 210338: add x16, x16, #1128 // DISASM-NEXT: 21033c: br x17 -// DISASM-NEXT: 210340: adrp x16, #131072 +// DISASM-NEXT: 210340: adrp x16, 0x230000 // DISASM-NEXT: 210344: ldr x17, [x16, #1136] // DISASM-NEXT: 210348: add x16, x16, #1136 // DISASM-NEXT: 21034c: br x17 diff --git a/lld/test/ELF/aarch64-gnu-ifunc.s b/lld/test/ELF/aarch64-gnu-ifunc.s index e03651788b6a7..e0a51aa1c3c6f 100644 --- a/lld/test/ELF/aarch64-gnu-ifunc.s +++ b/lld/test/ELF/aarch64-gnu-ifunc.s @@ -115,11 +115,11 @@ // DISASM-NEXT: Disassembly of section .iplt: // DISASM-EMPTY: // DISASM-NEXT: <.iplt>: -// DISASM-NEXT: 2101a0: adrp x16, #65536 +// DISASM-NEXT: 2101a0: adrp x16, 0x220000 // DISASM-NEXT: 2101a4: ldr x17, [x16, #448] // DISASM-NEXT: 2101a8: add x16, x16, #448 // DISASM-NEXT: 2101ac: br x17 -// DISASM-NEXT: 2101b0: adrp x16, #65536 +// DISASM-NEXT: 2101b0: adrp x16, 0x220000 // DISASM-NEXT: 2101b4: ldr x17, [x16, #456] // DISASM-NEXT: 2101b8: add x16, x16, #456 // DISASM-NEXT: 2101bc: br x17 diff --git a/lld/test/ELF/aarch64-gnu-ifunc2.s b/lld/test/ELF/aarch64-gnu-ifunc2.s index 89b7625858c18..b70c38c299e09 100644 --- a/lld/test/ELF/aarch64-gnu-ifunc2.s +++ b/lld/test/ELF/aarch64-gnu-ifunc2.s @@ -12,7 +12,7 @@ # CHECK:
: # .got.plt - page(0x210174) = 0x220190 - 0x210000 = 0x10190 -# CHECK-NEXT: 210174: adrp x8, #0x10000 +# CHECK-NEXT: 210174: adrp x8, 0x220000 # CHECK-NEXT: 210178: ldr x8, [x8, #0x190] # CHECK-NEXT: 21017c: ret @@ -20,7 +20,7 @@ # CHECK-EMPTY: # CHECK-NEXT: <.iplt>: # .got.plt - page(0x210180) = 0x220190 - 0x210000 = 0x10190 -# CHECK-NEXT: 210180: adrp x16, #0x10000 +# CHECK-NEXT: 210180: adrp x16, 0x220000 # CHECK-NEXT: 210184: ldr x17, [x16, #0x190] # CHECK-NEXT: 210188: add x16, x16, #0x190 # CHECK-NEXT: 21018c: br x17 diff --git a/lld/test/ELF/aarch64-hi21-nc.s b/lld/test/ELF/aarch64-hi21-nc.s index 154f98c792b90..a70e1482b4629 100644 --- a/lld/test/ELF/aarch64-hi21-nc.s +++ b/lld/test/ELF/aarch64-hi21-nc.s @@ -4,5 +4,5 @@ // RUN: llvm-objdump -d %t | FileCheck %s foo = . + 0x1100000000000000 -// CHECK: adrp x0, #0 +// CHECK: adrp x0, 0x210000 adrp x0, :pg_hi21_nc:foo diff --git a/lld/test/ELF/aarch64-ifunc-bti.s b/lld/test/ELF/aarch64-ifunc-bti.s index ab6c6236d2a00..d05be1eab6a69 100644 --- a/lld/test/ELF/aarch64-ifunc-bti.s +++ b/lld/test/ELF/aarch64-ifunc-bti.s @@ -14,7 +14,7 @@ # CHECK: 0000000000010380 <.plt>: # CHECK-NEXT: 10380: bti c # CHECK-NEXT: stp x16, x30, [sp, #-16]! -# CHECK-NEXT: adrp x16, #131072 +# CHECK-NEXT: adrp x16, 0x30000 # CHECK-NEXT: ldr x17, [x16, #1288] # CHECK-NEXT: add x16, x16, #1288 # CHECK-NEXT: br x17 @@ -22,7 +22,7 @@ # CHECK-NEXT: nop # CHECK: 00000000000103a0 : # CHECK-NEXT: 103a0: bti c -# CHECK-NEXT: adrp x16, #131072 +# CHECK-NEXT: adrp x16, 0x30000 # CHECK-NEXT: ldr x17, [x16, #1296] # CHECK-NEXT: add x16, x16, #1296 # CHECK-NEXT: br x17 @@ -32,7 +32,7 @@ # CHECK-EMPTY: # CHECK-NEXT: 00000000000103c0 : # CHECK-NEXT: 103c0: bti c -# CHECK-NEXT: adrp x16, #131072 +# CHECK-NEXT: adrp x16, 0x30000 # CHECK-NEXT: ldr x17, [x16, #1304] # CHECK-NEXT: add x16, x16, #1304 # CHECK-NEXT: br x17 diff --git a/lld/test/ELF/aarch64-plt.s b/lld/test/ELF/aarch64-plt.s index a049fbef6eb5f..7d83a40dfc278 100644 --- a/lld/test/ELF/aarch64-plt.s +++ b/lld/test/ELF/aarch64-plt.s @@ -71,7 +71,7 @@ // DISASMDSO-NEXT: <.plt>: // DISASMDSO-NEXT: 10340: stp x16, x30, [sp, #-0x10]! // &(.got.plt[2]) = 0x30450 + 2 * 8 = 0x30460 -// DISASMDSO-NEXT: 10344: adrp x16, #0x20000 +// DISASMDSO-NEXT: 10344: adrp x16, 0x30000 // DISASMDSO-NEXT: 10348: ldr x17, [x16, #0x460] // DISASMDSO-NEXT: 1034c: add x16, x16, #0x460 // DISASMDSO-NEXT: 10350: br x17 @@ -83,7 +83,7 @@ // &.got.plt[foo] = 0x30468 // DISASMDSO-EMPTY: // DISASMDSO-NEXT: : -// DISASMDSO-NEXT: 10360: adrp x16, #0x20000 +// DISASMDSO-NEXT: 10360: adrp x16, 0x30000 // DISASMDSO-NEXT: 10364: ldr x17, [x16, #0x468] // DISASMDSO-NEXT: 10368: add x16, x16, #0x468 // DISASMDSO-NEXT: 1036c: br x17 @@ -92,7 +92,7 @@ // &.got.plt[foo] = 0x30470 // DISASMDSO-EMPTY: // DISASMDSO-NEXT: : -// DISASMDSO-NEXT: 10370: adrp x16, #0x20000 +// DISASMDSO-NEXT: 10370: adrp x16, 0x30000 // DISASMDSO-NEXT: 10374: ldr x17, [x16, #0x470] // DISASMDSO-NEXT: 10378: add x16, x16, #0x470 // DISASMDSO-NEXT: 1037c: br x17 @@ -101,7 +101,7 @@ // 0x30468 = 0x10000 + 131072 + 1128 // DISASMDSO-EMPTY: // DISASMDSO-NEXT: : -// DISASMDSO-NEXT: 10380: adrp x16, #0x20000 +// DISASMDSO-NEXT: 10380: adrp x16, 0x30000 // DISASMDSO-NEXT: 10384: ldr x17, [x16, #0x478] // DISASMDSO-NEXT: 10388: add x16, x16, #0x478 // DISASMDSO-NEXT: 1038c: br x17 @@ -163,7 +163,7 @@ // DISASMEXE-NEXT: <.plt>: // DISASMEXE-NEXT: 2102e0: stp x16, x30, [sp, #-0x10]! // &(.got.plt[2]) = 0x2303f0 + 2 * 8 = 0x230400 -// DISASMEXE-NEXT: 2102e4: adrp x16, #0x20000 +// DISASMEXE-NEXT: 2102e4: adrp x16, 0x230000 // DISASMEXE-NEXT: 2102e8: ldr x17, [x16, #0x400] // DISASMEXE-NEXT: 2102ec: add x16, x16, #0x400 // DISASMEXE-NEXT: 2102f0: br x17 @@ -174,7 +174,7 @@ // bar@plt // DISASMEXE-EMPTY: // DISASMEXE-NEXT: : -// DISASMEXE-NEXT: 210300: adrp x16, #0x20000 +// DISASMEXE-NEXT: 210300: adrp x16, 0x230000 // DISASMEXE-NEXT: 210304: ldr x17, [x16, #0x408] // DISASMEXE-NEXT: 210308: add x16, x16, #0x408 // DISASMEXE-NEXT: 21030c: br x17 @@ -182,7 +182,7 @@ // weak@plt // DISASMEXE-EMPTY: // DISASMEXE-NEXT: : -// DISASMEXE-NEXT: 210310: adrp x16, #0x20000 +// DISASMEXE-NEXT: 210310: adrp x16, 0x230000 // DISASMEXE-NEXT: 210314: ldr x17, [x16, #0x410] // DISASMEXE-NEXT: 210318: add x16, x16, #0x410 // DISASMEXE-NEXT: 21031c: br x17 diff --git a/lld/test/ELF/aarch64-relocs.s b/lld/test/ELF/aarch64-relocs.s index 9025a2413237b..089da40a60bcb 100644 --- a/lld/test/ELF/aarch64-relocs.s +++ b/lld/test/ELF/aarch64-relocs.s @@ -30,7 +30,7 @@ mystr: # CHECK: Disassembly of section .R_AARCH64_ADR_PREL_PG_H121: # CHECK-EMPTY: # CHECK-NEXT: <$x.2>: -# CHECK-NEXT: 210132: 01 00 00 90 adrp x1, #0 +# CHECK-NEXT: 210132: 01 00 00 90 adrp x1, 0x210000 .section .R_AARCH64_ADD_ABS_LO12_NC,"ax",@progbits add x0, x0, :lo12:.L.str diff --git a/lld/test/ELF/aarch64-thunk-pi.s b/lld/test/ELF/aarch64-thunk-pi.s index 50322c524046a..12a56bc287631 100644 --- a/lld/test/ELF/aarch64-thunk-pi.s +++ b/lld/test/ELF/aarch64-thunk-pi.s @@ -35,16 +35,16 @@ low_target2: // Expect range extension thunks for .text_low // adrp calculation is (PC + signed immediate) & (!0xfff) // CHECK: <__AArch64ADRPThunk_high_target>: -// CHECK-NEXT: ec: adrp x16, #0x10000000 +// CHECK-NEXT: ec: adrp x16, 0x10000000 // CHECK-NEXT: add x16, x16, #0x40 // CHECK-NEXT: br x16 // CHECK: <__AArch64ADRPThunk_high_target2>: -// CHECK-NEXT: f8: adrp x16, #0x10000000 +// CHECK-NEXT: f8: adrp x16, 0x10000000 // CHECK-NEXT: add x16, x16, #0x8 // CHECK-NEXT: br x16 /// Identical to the previous one, but for the target .text_high+8. // CHECK: <__AArch64ADRPThunk_>: -// CHECK-NEXT: 104: adrp x16, #0x10000000 +// CHECK-NEXT: 104: adrp x16, 0x10000000 // CHECK-NEXT: add x16, x16, #0x8 // CHECK-NEXT: br x16 @@ -74,7 +74,7 @@ high_target2: // Expect Thunk for .text.high // CHECK: <__AArch64ADRPThunk_low_target2>: -// CHECK-NEXT: 10000010: adrp x16, #-0x10000000 +// CHECK-NEXT: 10000010: adrp x16, 0x0 // CHECK-NEXT: add x16, x16, #0xe0 // CHECK-NEXT: br x16 @@ -82,7 +82,7 @@ high_target2: // CHECK-EMPTY: // CHECK-NEXT: <.plt>: // CHECK-NEXT: 10000020: stp x16, x30, [sp, #-0x10]! -// CHECK-NEXT: adrp x16, #0 +// CHECK-NEXT: adrp x16, 0x10000000 // CHECK-NEXT: ldr x17, [x16, #0x120] // CHECK-NEXT: add x16, x16, #0x120 // CHECK-NEXT: br x17 @@ -91,13 +91,13 @@ high_target2: // CHECK-NEXT: nop // CHECK-EMPTY: // CHECK-NEXT: : -// CHECK-NEXT: 10000040: adrp x16, #0x0 +// CHECK-NEXT: 10000040: adrp x16, 0x10000000 // CHECK-NEXT: ldr x17, [x16, #0x128] // CHECK-NEXT: add x16, x16, #0x128 // CHECK-NEXT: br x17 // CHECK-EMPTY: // CHECK-NEXT: : -// CHECK-NEXT: 10000050: adrp x16, #0x0 +// CHECK-NEXT: 10000050: adrp x16, 0x10000000 // CHECK-NEXT: ldr x17, [x16, #0x130] // CHECK-NEXT: add x16, x16, #0x130 // CHECK-NEXT: br x17 diff --git a/lld/test/ELF/aarch64-tls-gdie.s b/lld/test/ELF/aarch64-tls-gdie.s index 52fb563fc099f..ff64bf347516f 100644 --- a/lld/test/ELF/aarch64-tls-gdie.s +++ b/lld/test/ELF/aarch64-tls-gdie.s @@ -28,7 +28,7 @@ _start: // CHECK: <_start>: // CHECK-NEXT: 210238: nop -// CHECK-NEXT: 21023c: adrp x0, #65536 +// CHECK-NEXT: 21023c: adrp x0, 0x220000 // CHECK-NEXT: 210240: ldr x0, [x0, #768] // CHECK-NEXT: 210244: nop // CHECK-NEXT: 210248: nop diff --git a/lld/test/ELF/aarch64-tls-ie.s b/lld/test/ELF/aarch64-tls-ie.s index 24443cebe20ca..6d3e4e2b15373 100644 --- a/lld/test/ELF/aarch64-tls-ie.s +++ b/lld/test/ELF/aarch64-tls-ie.s @@ -34,9 +34,9 @@ ## Page(0x2200B8) - Page(0x210000) = 0x10000 = 65536 ## 0x2200B8 & 0xff8 = 0xB8 = 184 # CHECK: <_start>: -# CHECK-NEXT: 210278: adrp x0, #65536 +# CHECK-NEXT: 210278: adrp x0, 0x220000 # CHECK-NEXT: 21027c: ldr x0, [x0, #824] -# CHECK-NEXT: 210280: adrp x0, #65536 +# CHECK-NEXT: 210280: adrp x0, 0x220000 # CHECK-NEXT: 210284: ldr x0, [x0, #832] .globl _start diff --git a/lld/test/ELF/aarch64-tlsdesc.s b/lld/test/ELF/aarch64-tlsdesc.s index 1ae9152364b03..b7d3072d9eefb 100644 --- a/lld/test/ELF/aarch64-tlsdesc.s +++ b/lld/test/ELF/aarch64-tlsdesc.s @@ -15,7 +15,7 @@ // create target specific dynamic TLSDESC relocation where addend is // the symbol VMA in tls block. -// CHECK: 10298: adrp x0, #65536 +// CHECK: 10298: adrp x0, 0x20000 // CHECK-NEXT: 1029c: ldr x1, [x0, #856] // CHECK-NEXT: 102a0: add x0, x0, #856 // CHECK-NEXT: 102a4: blr x1 @@ -26,7 +26,7 @@ .tlsdesccall a blr x1 -// CHECK: 102a8: adrp x0, #65536 +// CHECK: 102a8: adrp x0, 0x20000 // CHECK-NEXT: 102ac: ldr x1, [x0, #872] // CHECK-NEXT: 102b0: add x0, x0, #872 // CHECK-NEXT: 102b4: blr x1 @@ -37,7 +37,7 @@ .tlsdesccall a blr x1 -// CHECK: 102b8: adrp x0, #65536 +// CHECK: 102b8: adrp x0, 0x20000 // CHECK-NEXT: 102bc: ldr x1, [x0, #888] // CHECK-NEXT: 102c0: add x0, x0, #888 // CHECK-NEXT: 102c4: blr x1 diff --git a/lld/test/ELF/aarch64-tstbr14-reloc.s b/lld/test/ELF/aarch64-tstbr14-reloc.s index a6fc4f9f0d0f1..5e259755beb86 100644 --- a/lld/test/ELF/aarch64-tstbr14-reloc.s +++ b/lld/test/ELF/aarch64-tstbr14-reloc.s @@ -66,7 +66,7 @@ #DSO-EMPTY: #DSO-NEXT: <.plt>: #DSO-NEXT: 10330: stp x16, x30, [sp, #-16]! -#DSO-NEXT: 10334: adrp x16, #131072 +#DSO-NEXT: 10334: adrp x16, 0x30000 #DSO-NEXT: 10338: ldr x17, [x16, #1072] #DSO-NEXT: 1033c: add x16, x16, #1072 #DSO-NEXT: 10340: br x17 @@ -75,13 +75,13 @@ #DSO-NEXT: 1034c: nop #DSO-EMPTY: #DSO-NEXT: <_foo@plt>: -#DSO-NEXT: 10350: adrp x16, #131072 +#DSO-NEXT: 10350: adrp x16, 0x30000 #DSO-NEXT: 10354: ldr x17, [x16, #1080] #DSO-NEXT: 10358: add x16, x16, #1080 #DSO-NEXT: 1035c: br x17 #DSO-EMPTY: #DSO-NEXT: <_bar@plt>: -#DSO-NEXT: 10360: adrp x16, #131072 +#DSO-NEXT: 10360: adrp x16, 0x30000 #DSO-NEXT: 10364: ldr x17, [x16, #1088] #DSO-NEXT: 10368: add x16, x16, #1088 #DSO-NEXT: 1036c: br x17 diff --git a/lld/test/ELF/aarch64-undefined-weak.s b/lld/test/ELF/aarch64-undefined-weak.s index 9a4b7c23aa7a1..cd5abf3a22ea6 100644 --- a/lld/test/ELF/aarch64-undefined-weak.s +++ b/lld/test/ELF/aarch64-undefined-weak.s @@ -44,7 +44,7 @@ _start: // CHECK-NEXT: 10010128: b.eq 0x1001012c // CHECK-NEXT: 1001012c: cbz x1, 0x10010130 // CHECK-NEXT: 10010130: adr x0, #0 -// CHECK-NEXT: 10010134: adrp x0, #0 +// CHECK-NEXT: 10010134: adrp x0, 0x10010000 // CHECK-NEXT: 10010138: ldr x8, 0x10010138 // CHECK: 1001013c: 00 00 00 00 .word 0x00000000 // CHECK-NEXT: 10010140: 00 00 00 00 .word 0x00000000 diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td index 8e01a8cf7beb9..2756e4dc8aa43 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -263,6 +263,7 @@ def adrplabel : Operand { let EncoderMethod = "getAdrLabelOpValue"; let PrintMethod = "printAdrpLabel"; let ParserMatchClass = AdrpOperand; + let OperandType = "OPERAND_PCREL"; } def AdrOperand : AsmOperandClass { diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp index 38474d31460dd..a372298740ea8 100644 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp @@ -1377,7 +1377,8 @@ void AArch64InstPrinter::printAlignedLabel(const MCInst *MI, uint64_t Address, } } -void AArch64InstPrinter::printAdrpLabel(const MCInst *MI, unsigned OpNum, +void AArch64InstPrinter::printAdrpLabel(const MCInst *MI, uint64_t Address, + unsigned OpNum, const MCSubtargetInfo &STI, raw_ostream &O) { const MCOperand &Op = MI->getOperand(OpNum); @@ -1385,7 +1386,11 @@ void AArch64InstPrinter::printAdrpLabel(const MCInst *MI, unsigned OpNum, // If the label has already been resolved to an immediate offset (say, when // we're running the disassembler), just print the immediate. if (Op.isImm()) { - O << "#" << formatImm(Op.getImm() * (1 << 12)); + const int64_t Offset = Op.getImm() << 12; + if (PrintBranchImmAsAddress) + O << formatHex((Address & -4096) + Offset); + else + O << "#" << Offset; return; } diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h index 20c4e4c750ec6..1a0342b783a2b 100644 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h @@ -156,7 +156,7 @@ class AArch64InstPrinter : public MCInstPrinter { void printVectorIndex(const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI, raw_ostream &O); - void printAdrpLabel(const MCInst *MI, unsigned OpNum, + void printAdrpLabel(const MCInst *MI, uint64_t Address, unsigned OpNum, const MCSubtargetInfo &STI, raw_ostream &O); void printBarrierOption(const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI, raw_ostream &O); diff --git a/llvm/test/MC/AArch64/adr.s b/llvm/test/MC/AArch64/adr.s index 57b3a5b03096b..c6abd5d539e5e 100644 --- a/llvm/test/MC/AArch64/adr.s +++ b/llvm/test/MC/AArch64/adr.s @@ -19,15 +19,15 @@ adr x5, (0xffffffff000f1000 - 0xffffffff00000000 + Symbol) adr x6, Symbol + (0xffffffff000f1000 - 0xffffffff00000000) -// CHECK-NEXT: adrp x0, #0 +// CHECK-NEXT: adrp x0, 0x0 // CHECK-NEXT: R_AARCH64_ADR_PREL_PG_HI21 Symbol -// CHECK-NEXT: adrp x2, #0 +// CHECK-NEXT: adrp x2, 0x0 // CHECK-NEXT: R_AARCH64_ADR_PREL_PG_HI21 Symbol -// CHECK-NEXT: adrp x3, #0 +// CHECK-NEXT: adrp x3, 0x0 // CHECK-NEXT: R_AARCH64_ADR_PREL_PG_HI21 Symbol+0xf1000 -// CHECK-NEXT: adrp x4, #0 +// CHECK-NEXT: adrp x4, 0x0 // CHECK-NEXT: R_AARCH64_ADR_PREL_PG_HI21 Symbol+0xf1000 -// CHECK-NEXT: adrp x5, #0 +// CHECK-NEXT: adrp x5, 0x0 // CHECK-NEXT: R_AARCH64_ADR_PREL_PG_HI21 Symbol+0xf1000 adrp x0, Symbol diff --git a/llvm/test/MC/AArch64/coff-relocations.s b/llvm/test/MC/AArch64/coff-relocations.s index c37b2bb7c3cc0..54706fd897e63 100644 --- a/llvm/test/MC/AArch64/coff-relocations.s +++ b/llvm/test/MC/AArch64/coff-relocations.s @@ -89,7 +89,7 @@ tbz x0, #0, target ; CHECK: } ; CHECK: ] -; DISASM: 30: 20 1a 09 b0 adrp x0, #305418240 +; DISASM: 30: 20 1a 09 b0 adrp x0, 0x12345000 ; DISASM: 34: 00 14 0d 91 add x0, x0, #837 ; DISASM: 38: 00 14 4d 39 ldrb w0, [x0, #837] ; DISASM: 3c: 00 a4 41 f9 ldr x0, [x0, #840] diff --git a/llvm/test/tools/llvm-objdump/ELF/AArch64/elf-aarch64-mapping-symbols.test b/llvm/test/tools/llvm-objdump/ELF/AArch64/elf-aarch64-mapping-symbols.test index 1d988c6ebe5f0..9b628d7b0ed5f 100644 --- a/llvm/test/tools/llvm-objdump/ELF/AArch64/elf-aarch64-mapping-symbols.test +++ b/llvm/test/tools/llvm-objdump/ELF/AArch64/elf-aarch64-mapping-symbols.test @@ -24,7 +24,7 @@ mystr: # CHECK: 10: 0a 00 .short 0x000a # CHECK: Disassembly of section .myothersection: # CHECK: <$x.2>: -# CHECK: 0: 01 00 00 90 adrp x1, #0 +# CHECK: 0: 01 00 00 90 adrp x1, 0x0 # CHECK: : # CHECK: 4: 62 6c 61 68 .word # CHECK: 8: 00 .byte 0x01 diff --git a/llvm/test/tools/llvm-objdump/ELF/AArch64/pcrel-address.yaml b/llvm/test/tools/llvm-objdump/ELF/AArch64/pcrel-address.yaml new file mode 100644 index 0000000000000..48b2f2c208890 --- /dev/null +++ b/llvm/test/tools/llvm-objdump/ELF/AArch64/pcrel-address.yaml @@ -0,0 +1,26 @@ +# RUN: yaml2obj %s -o %t +# RUN: llvm-objdump %t -d --no-show-raw-insn --no-leading-addr | FileCheck %s + +# CHECK-LABEL: <_start>: +# CHECK-NEXT: adrp x2, 0x220000 <_start+0x80> + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC + Machine: EM_AARCH64 +Sections: + - Name: .text + Type: SHT_PROGBITS + Address: 0x200100 + Flags: [SHF_ALLOC, SHF_EXECINSTR] + Content: '02010090' + - Name: .data + Type: SHT_PROGBITS + Flags: [SHF_ALLOC, SHF_WRITE] + Address: 0x220000 +Symbols: + - Name: _start + Section: .text + Value: 0x200100 From aaaf0ec72b062dea09a277e5b9e6bda0a3da55c9 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Wed, 16 Dec 2020 12:11:57 -0500 Subject: [PATCH 27/39] [VectorCombine] loosen alignment constraint for load transform As discussed in D93229, we only need a minimal alignment constraint when querying whether a hypothetical vector load is safe. We still pass/use the potentially stronger alignment attribute when checking costs and creating the new load. There's already a test that changes with the minimum code change, so splitting this off as a preliminary commit independent of any gep/offset enhancements. Differential Revision: https://reviews.llvm.org/D93397 --- llvm/lib/Transforms/Vectorize/VectorCombine.cpp | 7 +++++-- llvm/test/Transforms/VectorCombine/X86/load.ll | 8 +++++--- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp index 89b60045ce910..086169c55c8df 100644 --- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp +++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp @@ -134,13 +134,16 @@ bool VectorCombine::vectorizeLoadInsert(Instruction &I) { return false; // Check safety of replacing the scalar load with a larger vector load. + // We use minimal alignment (maximum flexibility) because we only care about + // the dereferenceable region. When calculating cost and creating a new op, + // we may use a larger value based on alignment attributes. unsigned MinVecNumElts = MinVectorSize / ScalarSize; auto *MinVecTy = VectorType::get(ScalarTy, MinVecNumElts, false); - Align Alignment = Load->getAlign(); - if (!isSafeToLoadUnconditionally(SrcPtr, MinVecTy, Alignment, DL, Load, &DT)) + if (!isSafeToLoadUnconditionally(SrcPtr, MinVecTy, Align(1), DL, Load, &DT)) return false; // Original pattern: insertelt undef, load [free casts of] PtrOp, 0 + Align Alignment = Load->getAlign(); Type *LoadTy = Load->getType(); int OldCost = TTI.getMemoryOpCost(Instruction::Load, LoadTy, Alignment, AS); APInt DemandedElts = APInt::getOneBitSet(MinVecNumElts, 0); diff --git a/llvm/test/Transforms/VectorCombine/X86/load.ll b/llvm/test/Transforms/VectorCombine/X86/load.ll index d28d287616329..f5a962dd7cfec 100644 --- a/llvm/test/Transforms/VectorCombine/X86/load.ll +++ b/llvm/test/Transforms/VectorCombine/X86/load.ll @@ -403,12 +403,14 @@ define <4 x float> @load_f32_insert_v4f32_volatile(float* align 16 dereferenceab ret <4 x float> %r } -; Negative test? - pointer is not as aligned as load. +; Pointer is not as aligned as load, but that's ok. +; The new load uses the larger alignment value. define <4 x float> @load_f32_insert_v4f32_align(float* align 1 dereferenceable(16) %p) { ; CHECK-LABEL: @load_f32_insert_v4f32_align( -; CHECK-NEXT: [[S:%.*]] = load float, float* [[P:%.*]], align 4 -; CHECK-NEXT: [[R:%.*]] = insertelement <4 x float> undef, float [[S]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[P:%.*]] to <4 x float>* +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> undef, <4 x i32> ; CHECK-NEXT: ret <4 x float> [[R]] ; %s = load float, float* %p, align 4 From 4bd9e62422d1e3c63e01ce9f3523d5dcc59d7215 Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Wed, 16 Dec 2020 17:31:22 +0000 Subject: [PATCH 28/39] Remove spurious MLIRLLVMConversionsIncGen dependency from LLVM Dialect (NFC) Reviewed By: ftynse Differential Revision: https://reviews.llvm.org/D93335 --- mlir/lib/Dialect/LLVMIR/CMakeLists.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/mlir/lib/Dialect/LLVMIR/CMakeLists.txt b/mlir/lib/Dialect/LLVMIR/CMakeLists.txt index cd73e7dcfc69b..91fb02db96019 100644 --- a/mlir/lib/Dialect/LLVMIR/CMakeLists.txt +++ b/mlir/lib/Dialect/LLVMIR/CMakeLists.txt @@ -10,7 +10,6 @@ add_mlir_dialect_library(MLIRLLVMIR DEPENDS MLIRLLVMOpsIncGen - MLIRLLVMConversionsIncGen MLIROpenMPOpsIncGen intrinsics_gen From 6eff12788ee8d3f85f6e57809e757ca3250813d8 Mon Sep 17 00:00:00 2001 From: Bardia Mahjour Date: Wed, 16 Dec 2020 12:34:38 -0500 Subject: [PATCH 29/39] [DDG] Data Dependence Graph - DOT printer - recommit This is being recommitted to try and address the MSVC complaint. This patch implements a DDG printer pass that generates a graph in the DOT description language, providing a more visually appealing representation of the DDG. Similar to the CFG DOT printer, this functionality is provided under an option called -dot-ddg and can be generated in a less verbose mode under -dot-ddg-only option. Reviewed By: Meinersbur Differential Revision: https://reviews.llvm.org/D90159 --- clang/lib/StaticAnalyzer/Core/ExprEngine.cpp | 4 +- llvm/include/llvm/Analysis/CFGPrinter.h | 2 +- llvm/include/llvm/Analysis/DDG.h | 26 ++++ llvm/include/llvm/Analysis/DDGPrinter.h | 91 +++++++++++ llvm/include/llvm/Support/DOTGraphTraits.h | 3 +- llvm/include/llvm/Support/GraphWriter.h | 8 +- llvm/lib/Analysis/CFGPrinter.cpp | 3 +- llvm/lib/Analysis/CMakeLists.txt | 1 + llvm/lib/Analysis/CallPrinter.cpp | 3 +- llvm/lib/Analysis/DDGPrinter.cpp | 150 +++++++++++++++++++ llvm/lib/CodeGen/MachineScheduler.cpp | 2 +- llvm/lib/CodeGen/ScheduleDAGPrinter.cpp | 2 +- llvm/lib/Passes/PassBuilder.cpp | 1 + llvm/lib/Passes/PassRegistry.def | 1 + 14 files changed, 284 insertions(+), 13 deletions(-) create mode 100644 llvm/include/llvm/Analysis/DDGPrinter.h create mode 100644 llvm/lib/Analysis/DDGPrinter.cpp diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp index 409741cdb6e41..f285b652c1754 100644 --- a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp +++ b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp @@ -3149,7 +3149,7 @@ struct DOTGraphTraits : public DefaultDOTGraphTraits { if (Stop(N)) return true; - if (N->succ_size() != 1 || !isNodeHidden(N->getFirstSucc())) + if (N->succ_size() != 1 || !isNodeHidden(N->getFirstSucc(), nullptr)) break; PostCallback(N); @@ -3158,7 +3158,7 @@ struct DOTGraphTraits : public DefaultDOTGraphTraits { return false; } - static bool isNodeHidden(const ExplodedNode *N) { + static bool isNodeHidden(const ExplodedNode *N, const ExplodedGraph *G) { return N->isTrivial(); } diff --git a/llvm/include/llvm/Analysis/CFGPrinter.h b/llvm/include/llvm/Analysis/CFGPrinter.h index bc6a19f2e2b94..53700798b6b30 100644 --- a/llvm/include/llvm/Analysis/CFGPrinter.h +++ b/llvm/include/llvm/Analysis/CFGPrinter.h @@ -295,7 +295,7 @@ struct DOTGraphTraits : public DefaultDOTGraphTraits { " fillcolor=\"" + Color + "70\""; return Attrs; } - bool isNodeHidden(const BasicBlock *Node); + bool isNodeHidden(const BasicBlock *Node, const DOTFuncInfo *CFGInfo); void computeHiddenNodes(const Function *F); }; } // End llvm namespace diff --git a/llvm/include/llvm/Analysis/DDG.h b/llvm/include/llvm/Analysis/DDG.h index 9e2b7907eaec8..8d225c155cd45 100644 --- a/llvm/include/llvm/Analysis/DDG.h +++ b/llvm/include/llvm/Analysis/DDG.h @@ -290,6 +290,12 @@ template class DependenceGraphInfo { bool getDependencies(const NodeType &Src, const NodeType &Dst, DependenceList &Deps) const; + /// Return a string representing the type of dependence that the dependence + /// analysis identified between the two given nodes. This function assumes + /// that there is a memory dependence between the given two nodes. + const std::string getDependenceString(const NodeType &Src, + const NodeType &Dst) const; + protected: // Name of the graph. std::string Name; @@ -463,6 +469,26 @@ bool DependenceGraphInfo::getDependencies( return !Deps.empty(); } +template +const std::string +DependenceGraphInfo::getDependenceString(const NodeType &Src, + const NodeType &Dst) const { + std::string Str; + raw_string_ostream OS(Str); + DependenceList Deps; + if (!getDependencies(Src, Dst, Deps)) + return OS.str(); + interleaveComma(Deps, OS, [&](const std::unique_ptr &D) { + D->dump(OS); + // Remove the extra new-line character printed by the dump + // method + if (OS.str().back() == '\n') + OS.str().pop_back(); + }); + + return OS.str(); +} + //===--------------------------------------------------------------------===// // GraphTraits specializations for the DDG //===--------------------------------------------------------------------===// diff --git a/llvm/include/llvm/Analysis/DDGPrinter.h b/llvm/include/llvm/Analysis/DDGPrinter.h new file mode 100644 index 0000000000000..4477b387fe507 --- /dev/null +++ b/llvm/include/llvm/Analysis/DDGPrinter.h @@ -0,0 +1,91 @@ +//===- llvm/Analysis/DDGPrinter.h -------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// +// This file defines the DOT printer for the Data-Dependence Graph (DDG). +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_DDGPRINTER_H +#define LLVM_ANALYSIS_DDGPRINTER_H + +#include "llvm/Analysis/DDG.h" +#include "llvm/Pass.h" +#include "llvm/Support/DOTGraphTraits.h" + +namespace llvm { + +//===--------------------------------------------------------------------===// +// Implementation of DDG DOT Printer for a loop. +//===--------------------------------------------------------------------===// +class DDGDotPrinterPass : public PassInfoMixin { +public: + PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, + LoopStandardAnalysisResults &AR, LPMUpdater &U); +}; + +//===--------------------------------------------------------------------===// +// Specialization of DOTGraphTraits. +//===--------------------------------------------------------------------===// +template <> +struct DOTGraphTraits + : public DefaultDOTGraphTraits { + + DOTGraphTraits(bool IsSimple = false) : DefaultDOTGraphTraits(IsSimple) {} + + /// Generate a title for the graph in DOT format + std::string getGraphName(const DataDependenceGraph *G) { + assert(G && "expected a valid pointer to the graph."); + return "DDG for '" + std::string(G->getName()) + "'"; + } + + /// Print a DDG node either in concise form (-ddg-dot-only) or + /// verbose mode (-ddg-dot). + std::string getNodeLabel(const DDGNode *Node, + const DataDependenceGraph *Graph); + + /// Print attributes of an edge in the DDG graph. If the edge + /// is a MemoryDependence edge, then detailed dependence info + /// available from DependenceAnalysis is displayed. + std::string + getEdgeAttributes(const DDGNode *Node, + GraphTraits::ChildIteratorType I, + const DataDependenceGraph *G); + + /// Do not print nodes that are part of a pi-block separately. They + /// will be printed when their containing pi-block is being printed. + bool isNodeHidden(const DDGNode *Node, const DataDependenceGraph *G); + +private: + /// Print a DDG node in concise form. + static std::string getSimpleNodeLabel(const DDGNode *Node, + const DataDependenceGraph *G); + + /// Print a DDG node with more information including containing instructions + /// and detailed information about the dependence edges. + static std::string getVerboseNodeLabel(const DDGNode *Node, + const DataDependenceGraph *G); + + /// Print a DDG edge in concise form. + static std::string getSimpleEdgeAttributes(const DDGNode *Src, + const DDGEdge *Edge, + const DataDependenceGraph *G); + + /// Print a DDG edge with more information including detailed information + /// about the dependence edges. + static std::string getVerboseEdgeAttributes(const DDGNode *Src, + const DDGEdge *Edge, + const DataDependenceGraph *G); +}; + +using DDGDotGraphTraits = DOTGraphTraits; + +} // namespace llvm + +#endif // LLVM_ANALYSIS_DDGPRINTER_H diff --git a/llvm/include/llvm/Support/DOTGraphTraits.h b/llvm/include/llvm/Support/DOTGraphTraits.h index ec01b7d9576ae..a73538fa14624 100644 --- a/llvm/include/llvm/Support/DOTGraphTraits.h +++ b/llvm/include/llvm/Support/DOTGraphTraits.h @@ -60,7 +60,8 @@ struct DefaultDOTGraphTraits { /// isNodeHidden - If the function returns true, the given node is not /// displayed in the graph. - static bool isNodeHidden(const void *) { + template + static bool isNodeHidden(const void *, const GraphType &) { return false; } diff --git a/llvm/include/llvm/Support/GraphWriter.h b/llvm/include/llvm/Support/GraphWriter.h index f9241b1e8081b..1f60fbc351265 100644 --- a/llvm/include/llvm/Support/GraphWriter.h +++ b/llvm/include/llvm/Support/GraphWriter.h @@ -158,9 +158,7 @@ class GraphWriter { writeNode(Node); } - bool isNodeHidden(NodeRef Node) { - return DTraits.isNodeHidden(Node); - } + bool isNodeHidden(NodeRef Node) { return DTraits.isNodeHidden(Node, G); } void writeNode(NodeRef Node) { std::string NodeAttributes = DTraits.getNodeAttributes(Node, G); @@ -228,10 +226,10 @@ class GraphWriter { child_iterator EI = GTraits::child_begin(Node); child_iterator EE = GTraits::child_end(Node); for (unsigned i = 0; EI != EE && i != 64; ++EI, ++i) - if (!DTraits.isNodeHidden(*EI)) + if (!DTraits.isNodeHidden(*EI, G)) writeEdge(Node, i, EI); for (; EI != EE; ++EI) - if (!DTraits.isNodeHidden(*EI)) + if (!DTraits.isNodeHidden(*EI, G)) writeEdge(Node, 64, EI); } diff --git a/llvm/lib/Analysis/CFGPrinter.cpp b/llvm/lib/Analysis/CFGPrinter.cpp index cf4afc8cfd9cb..582e61b33f49d 100644 --- a/llvm/lib/Analysis/CFGPrinter.cpp +++ b/llvm/lib/Analysis/CFGPrinter.cpp @@ -289,7 +289,8 @@ void DOTGraphTraits::computeHiddenNodes(const Function *F) { evaluateBB); } -bool DOTGraphTraits::isNodeHidden(const BasicBlock *Node) { +bool DOTGraphTraits::isNodeHidden(const BasicBlock *Node, + const DOTFuncInfo *CFGInfo) { // If both restricting flags are false, all nodes are displayed. if (!HideUnreachablePaths && !HideDeoptimizePaths) return false; diff --git a/llvm/lib/Analysis/CMakeLists.txt b/llvm/lib/Analysis/CMakeLists.txt index c7e20b2e90dd2..b89b6b3c4c647 100644 --- a/llvm/lib/Analysis/CMakeLists.txt +++ b/llvm/lib/Analysis/CMakeLists.txt @@ -39,6 +39,7 @@ add_llvm_component_library(LLVMAnalysis CodeMetrics.cpp ConstantFolding.cpp DDG.cpp + DDGPrinter.cpp ConstraintSystem.cpp Delinearization.cpp DemandedBits.cpp diff --git a/llvm/lib/Analysis/CallPrinter.cpp b/llvm/lib/Analysis/CallPrinter.cpp index bb447411ec472..c3922d5560238 100644 --- a/llvm/lib/Analysis/CallPrinter.cpp +++ b/llvm/lib/Analysis/CallPrinter.cpp @@ -143,7 +143,8 @@ struct DOTGraphTraits : public DefaultDOTGraphTraits { std::string(CGInfo->getModule()->getModuleIdentifier()); } - static bool isNodeHidden(const CallGraphNode *Node) { + static bool isNodeHidden(const CallGraphNode *Node, + const CallGraphDOTInfo *CGInfo) { if (CallMultiGraph || Node->getFunction()) return false; return true; diff --git a/llvm/lib/Analysis/DDGPrinter.cpp b/llvm/lib/Analysis/DDGPrinter.cpp new file mode 100644 index 0000000000000..51bd54809857d --- /dev/null +++ b/llvm/lib/Analysis/DDGPrinter.cpp @@ -0,0 +1,150 @@ +//===- DDGPrinter.cpp - DOT printer for the data dependence graph ----------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// +// This file defines the `-dot-ddg` analysis pass, which emits DDG in DOT format +// in a file named `ddg..dot` for each loop in a function. +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/DDGPrinter.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/GraphWriter.h" + +using namespace llvm; + +static cl::opt DotOnly("dot-ddg-only", cl::init(false), cl::Hidden, + cl::ZeroOrMore, cl::desc("simple ddg dot graph")); +static cl::opt DDGDotFilenamePrefix( + "dot-ddg-filename-prefix", cl::init("ddg"), cl::Hidden, + cl::desc("The prefix used for the DDG dot file names.")); + +static void writeDDGToDotFile(DataDependenceGraph &G, bool DOnly = false); + +//===--------------------------------------------------------------------===// +// Implementation of DDG DOT Printer for a loop +//===--------------------------------------------------------------------===// +PreservedAnalyses DDGDotPrinterPass::run(Loop &L, LoopAnalysisManager &AM, + LoopStandardAnalysisResults &AR, + LPMUpdater &U) { + writeDDGToDotFile(*AM.getResult(L, AR), DotOnly); + return PreservedAnalyses::all(); +} + +static void writeDDGToDotFile(DataDependenceGraph &G, bool DOnly) { + std::string Filename = + Twine(DDGDotFilenamePrefix + "." + G.getName() + ".dot").str(); + errs() << "Writing '" << Filename << "'..."; + + std::error_code EC; + raw_fd_ostream File(Filename, EC, sys::fs::F_Text); + + if (!EC) + // We only provide the constant verson of the DOTGraphTrait specialization, + // hence the conversion to const pointer + WriteGraph(File, (const DataDependenceGraph *)&G, DOnly); + else + errs() << " error opening file for writing!"; + errs() << "\n"; +} + +//===--------------------------------------------------------------------===// +// DDG DOT Printer Implementation +//===--------------------------------------------------------------------===// +std::string DDGDotGraphTraits::getNodeLabel(const DDGNode *Node, + const DataDependenceGraph *Graph) { + if (isSimple()) + return getSimpleNodeLabel(Node, Graph); + else + return getVerboseNodeLabel(Node, Graph); +} + +std::string DDGDotGraphTraits::getEdgeAttributes( + const DDGNode *Node, GraphTraits::ChildIteratorType I, + const DataDependenceGraph *G) { + const DDGEdge *E = static_cast(*I.getCurrent()); + if (isSimple()) + return getSimpleEdgeAttributes(Node, E, G); + else + return getVerboseEdgeAttributes(Node, E, G); +} + +bool DDGDotGraphTraits::isNodeHidden(const DDGNode *Node, + const DataDependenceGraph *Graph) { + if (isSimple() && isa(Node)) + return true; + assert(Graph && "expected a valid graph pointer"); + return Graph->getPiBlock(*Node) != nullptr; +} + +std::string +DDGDotGraphTraits::getSimpleNodeLabel(const DDGNode *Node, + const DataDependenceGraph *G) { + std::string Str; + raw_string_ostream OS(Str); + if (isa(Node)) + for (auto *II : static_cast(Node)->getInstructions()) + OS << *II << "\n"; + else if (isa(Node)) + OS << "pi-block\nwith\n" + << cast(Node)->getNodes().size() << " nodes\n"; + else if (isa(Node)) + OS << "root\n"; + else + llvm_unreachable("Unimplemented type of node"); + return OS.str(); +} + +std::string +DDGDotGraphTraits::getVerboseNodeLabel(const DDGNode *Node, + const DataDependenceGraph *G) { + std::string Str; + raw_string_ostream OS(Str); + OS << "getKind() << ">\n"; + if (isa(Node)) + for (auto *II : static_cast(Node)->getInstructions()) + OS << *II << "\n"; + else if (isa(Node)) { + OS << "--- start of nodes in pi-block ---\n"; + unsigned Count = 0; + const auto &PNodes = cast(Node)->getNodes(); + for (auto *PN : PNodes) { + OS << getVerboseNodeLabel(PN, G); + if (++Count != PNodes.size()) + OS << "\n"; + } + OS << "--- end of nodes in pi-block ---\n"; + } else if (isa(Node)) + OS << "root\n"; + else + llvm_unreachable("Unimplemented type of node"); + return OS.str(); +} + +std::string DDGDotGraphTraits::getSimpleEdgeAttributes( + const DDGNode *Src, const DDGEdge *Edge, const DataDependenceGraph *G) { + std::string Str; + raw_string_ostream OS(Str); + DDGEdge::EdgeKind Kind = Edge->getKind(); + OS << "label=\"[" << Kind << "]\""; + return OS.str(); +} + +std::string DDGDotGraphTraits::getVerboseEdgeAttributes( + const DDGNode *Src, const DDGEdge *Edge, const DataDependenceGraph *G) { + std::string Str; + raw_string_ostream OS(Str); + DDGEdge::EdgeKind Kind = Edge->getKind(); + OS << "label=\"["; + if (Kind == DDGEdge::EdgeKind::MemoryDependence) + OS << G->getDependenceString(*Src, Edge->getTargetNode()); + else + OS << Kind; + OS << "]\""; + return OS.str(); +} diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp index 5843f84b2a91b..8d51bb26103ae 100644 --- a/llvm/lib/CodeGen/MachineScheduler.cpp +++ b/llvm/lib/CodeGen/MachineScheduler.cpp @@ -3836,7 +3836,7 @@ struct DOTGraphTraits : public DefaultDOTGraphTraits { return true; } - static bool isNodeHidden(const SUnit *Node) { + static bool isNodeHidden(const SUnit *Node, const ScheduleDAG *G) { if (ViewMISchedCutoff == 0) return false; return (Node->Preds.size() > ViewMISchedCutoff diff --git a/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp b/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp index a113c30f851bd..05b2a3764ccac 100644 --- a/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp +++ b/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp @@ -35,7 +35,7 @@ namespace llvm { return true; } - static bool isNodeHidden(const SUnit *Node) { + static bool isNodeHidden(const SUnit *Node, const ScheduleDAG *G) { return (Node->NumPreds > 10 || Node->NumSuccs > 10); } diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index d11725d7507c8..a7ef8e3b8add6 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -29,6 +29,7 @@ #include "llvm/Analysis/CGSCCPassManager.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/DDG.h" +#include "llvm/Analysis/DDGPrinter.h" #include "llvm/Analysis/Delinearization.h" #include "llvm/Analysis/DemandedBits.h" #include "llvm/Analysis/DependenceAnalysis.h" diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index ffd91bfdf8acd..f971027299d48 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -384,6 +384,7 @@ LOOP_ANALYSIS("pass-instrumentation", PassInstrumentationAnalysis(PIC)) #define LOOP_PASS(NAME, CREATE_PASS) #endif LOOP_PASS("canon-freeze", CanonicalizeFreezeInLoopsPass()) +LOOP_PASS("dot-ddg", DDGDotPrinterPass()) LOOP_PASS("invalidate", InvalidateAllAnalysesPass()) LOOP_PASS("licm", LICMPass()) LOOP_PASS("loop-idiom", LoopIdiomRecognizePass()) From d5700fdf10459dc2cd822fcec0e04d438376f74b Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Wed, 16 Dec 2020 17:38:06 +0000 Subject: [PATCH 30/39] [gn build] Port 6eff12788ee --- llvm/utils/gn/secondary/llvm/lib/Analysis/BUILD.gn | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/utils/gn/secondary/llvm/lib/Analysis/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Analysis/BUILD.gn index 8f86e7fdddcc3..3711c0c1803b6 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Analysis/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Analysis/BUILD.gn @@ -38,6 +38,7 @@ static_library("Analysis") { "ConstraintSystem.cpp", "CostModel.cpp", "DDG.cpp", + "DDGPrinter.cpp", "Delinearization.cpp", "DemandedBits.cpp", "DependenceAnalysis.cpp", From a79b26db0e96b6f6dd7888053ea300cfc2feb5a8 Mon Sep 17 00:00:00 2001 From: Christian Sigg Date: Tue, 15 Dec 2020 21:15:28 +0100 Subject: [PATCH 31/39] [mlir] Fix for gpu-async-region pass. - the !gpu.async.token is the second result of 'gpu.alloc async', not the first. - async.execute construction takes operand types not yet wrapped in !async.value. - fix typo Reviewed By: herhut Differential Revision: https://reviews.llvm.org/D93156 --- .../GPU/Transforms/AsyncRegionRewriter.cpp | 18 ++++++++---- mlir/test/Dialect/GPU/async-region.mlir | 29 ++++++++++++++++++- 2 files changed, 41 insertions(+), 6 deletions(-) diff --git a/mlir/lib/Dialect/GPU/Transforms/AsyncRegionRewriter.cpp b/mlir/lib/Dialect/GPU/Transforms/AsyncRegionRewriter.cpp index eaa777c380604..c8378ae8977a3 100644 --- a/mlir/lib/Dialect/GPU/Transforms/AsyncRegionRewriter.cpp +++ b/mlir/lib/Dialect/GPU/Transforms/AsyncRegionRewriter.cpp @@ -85,18 +85,19 @@ struct GpuAsyncRegionPass::ThreadTokenCallback { asyncOp.addAsyncDependency(currentToken); // Clone the op to return a token in addition to the other results. - SmallVector resultTypes = {tokenType}; + SmallVector resultTypes; resultTypes.reserve(1 + op->getNumResults()); copy(op->getResultTypes(), std::back_inserter(resultTypes)); + resultTypes.push_back(tokenType); auto *newOp = Operation::create(op->getLoc(), op->getName(), resultTypes, op->getOperands(), op->getMutableAttrDict(), op->getSuccessors()); // Replace the op with the async clone. auto results = newOp->getResults(); - currentToken = results.front(); + currentToken = results.back(); builder.insert(newOp); - op->replaceAllUsesWith(results.drop_front()); + op->replaceAllUsesWith(results.drop_back()); op->erase(); return success(); @@ -165,7 +166,14 @@ struct GpuAsyncRegionPass::DeferWaitCallback { // Construct new result type list with `count` additional types. SmallVector resultTypes; resultTypes.reserve(numResults); - copy(executeOp.getResultTypes(), std::back_inserter(resultTypes)); + transform(executeOp.getResultTypes(), std::back_inserter(resultTypes), + [](Type type) { + // Extract value type from !async.value. + if (auto valueType = type.dyn_cast()) + return valueType.getValueType(); + assert(type.isa() && "expected token type"); + return type; + }); OpBuilder builder(executeOp); auto tokenType = builder.getType(); resultTypes.resize(numResults, tokenType); @@ -266,7 +274,7 @@ void GpuAsyncRegionPass::runOnFunction() { .wasInterrupted()) return signalPassFailure(); - // Collect gpu.wait ops that we can move out of gpu.execute regions. + // Collect gpu.wait ops that we can move out of async.execute regions. getFunction().getRegion().walk(DeferWaitCallback()); } diff --git a/mlir/test/Dialect/GPU/async-region.mlir b/mlir/test/Dialect/GPU/async-region.mlir index 2fc58cf02a09a..216ccceda1f0e 100644 --- a/mlir/test/Dialect/GPU/async-region.mlir +++ b/mlir/test/Dialect/GPU/async-region.mlir @@ -18,7 +18,11 @@ module attributes {gpu.container_module} { // CHECK: %[[t2:.*]] = gpu.launch_func async [%[[t1]]] gpu.launch_func @kernels::@kernel blocks in (%sz, %sz, %sz) threads in (%sz, %sz, %sz) - // CHECK: gpu.wait [%[[t2]]] + // CHECK: %[[m:.*]], %[[t3:.*]] = gpu.alloc async [%[[t2]]] () + %0 = gpu.alloc() : memref<7xf32> + // CHECK: %[[t4:.*]] = gpu.dealloc async [%[[t3]]] %[[m]] + gpu.dealloc %0 : memref<7xf32> + // CHECK: gpu.wait [%[[t4]]] // CHECK: call @foo call @foo() : () -> () return @@ -98,4 +102,27 @@ module attributes {gpu.container_module} { async.await %a1 : !async.token return } + + // CHECK-LABEL:func @async_execute_with_result(%{{.*}}: index) + func @async_execute_with_result(%sz : index) -> index { + // CHECK: %[[a0:.*]], %[[f0:.*]]:2 = async.execute + // CHECK-SAME: -> (!async.value, !async.value) + %a0, %f0 = async.execute -> !async.value { + // CHECK: %[[t:.*]] = gpu.launch_func async + gpu.launch_func @kernels::@kernel + blocks in (%sz, %sz, %sz) threads in (%sz, %sz, %sz) + // CHECK-NOT: gpu.wait + // CHECK: async.yield {{.*}}, %[[t]] : index, !gpu.async.token + async.yield %sz : index + } + + // CHECK: async.await %[[a0]] : !async.token + // CHECK: %[[t:.*]] = async.await %[[f0]]#1 : !async.value + // CHECK: gpu.wait [%[[t]]] + async.await %a0 : !async.token + // CHECK: %[[x:.*]] = async.await %[[f0]]#0 : !async.value + %x = async.await %f0 : !async.value + // CHECK: return %[[x]] : index + return %x : index + } } From 0f10a26dd4c7542a6e46e1d44fd8d816cb634e26 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Wed, 16 Dec 2020 10:09:58 -0800 Subject: [PATCH 32/39] [Go] Fix bindings/go/llvm/IRBindings.cpp --- llvm/bindings/go/llvm/IRBindings.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/llvm/bindings/go/llvm/IRBindings.cpp b/llvm/bindings/go/llvm/IRBindings.cpp index 5ee841c5fa9b2..1831d33540ca2 100644 --- a/llvm/bindings/go/llvm/IRBindings.cpp +++ b/llvm/bindings/go/llvm/IRBindings.cpp @@ -56,9 +56,9 @@ void LLVMGoSetCurrentDebugLocation(LLVMBuilderRef Bref, unsigned Line, if (!Scope) unwrap(Bref)->SetCurrentDebugLocation(DebugLoc()); else - unwrap(Bref)->SetCurrentDebugLocation( - DILocation::get(Scope->getContext(), Line, Col, unwrap(Scope), - InlinedAt ? unwrap(InlinedAt) : nullptr)); + unwrap(Bref)->SetCurrentDebugLocation(DILocation::get( + unwrap(Scope)->getContext(), Line, Col, unwrap(Scope), + InlinedAt ? unwrap(InlinedAt) : nullptr)); } LLVMDebugLocMetadata LLVMGoGetCurrentDebugLocation(LLVMBuilderRef Bref) { From 70bd75426e5c06b36ab19c3650ed6bc157f8ce13 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Wed, 16 Dec 2020 16:51:14 +0000 Subject: [PATCH 33/39] [SimplifyCFG] Precommit test for preserving !annotation. --- .../Transforms/SimplifyCFG/annotations.ll | 145 ++++++++++++++++++ 1 file changed, 145 insertions(+) create mode 100644 llvm/test/Transforms/SimplifyCFG/annotations.ll diff --git a/llvm/test/Transforms/SimplifyCFG/annotations.ll b/llvm/test/Transforms/SimplifyCFG/annotations.ll new file mode 100644 index 0000000000000..5e39107e1c89a --- /dev/null +++ b/llvm/test/Transforms/SimplifyCFG/annotations.ll @@ -0,0 +1,145 @@ +; RUN: opt -simplifycfg -S %s | FileCheck --match-full-lines %s + +; The branch in %cont has !annotation metadata. Make sure generated AND +; has !annotation metadata. +define i32 @test_preserve_and(i8* %a, i8* %b, i8* %c, i8* %d) { +; CHECK-LABEL: define {{.*}} @test_preserve_and({{.*}} +; CHECK-NEXT: entry: +; CHECK-NEXT: [[C_1:%.*]] = icmp ult i8* [[A:%.*]], [[B:%.*]], !annotation !0 +; CHECK-NEXT: [[C_2:%.*]] = icmp uge i8* [[C:%.*]], [[D:%.*]] +; CHECK-NEXT: [[OR_COND:%.*]] = and i1 [[C_1]], [[C_2]] +; CHECK-NEXT: br i1 [[OR_COND]], label [[CONT1:%.*]], label [[TRAP:%.*]], !annotation !0 +; CHECK: trap: ; preds = %entry +; CHECK-NEXT: call void @fn1() +; CHECK-NEXT: unreachable +; CHECK: cont1: ; preds = %entry +; CHECK-NEXT: call void @fn2() +; CHECK-NEXT: ret i32 0 +; +entry: + %c.1 = icmp ult i8* %a, %b, !annotation !0 + br i1 %c.1, label %cont, label %trap, !annotation !0 + +cont: ; preds = %entry + %c.2 = icmp uge i8* %c, %d, !annotation !0 + br i1 %c.2, label %cont1, label %trap, !annotation !0 + +trap: ; preds = %cont, %entry + call void @fn1() + unreachable + +cont1: ; preds = %cont + call void @fn2() + ret i32 0 +} + +; The branch in %cont has !annotation metadata. Make sure generated OR +; has !annotation metadata. +define i32 @test_preserve_or(i8* %a, i8* %b, i8* %c, i8* %d) { +; CHECK-LABEL: define {{.*}} @test_preserve_or({{.*}} +; CHECK-NEXT: entry: +; CHECK-NEXT: [[C_1:%.*]] = icmp uge i8* [[A:%.*]], [[B:%.*]], !annotation !0 +; CHECK-NEXT: [[C_2:%.*]] = icmp uge i8* [[C:%.*]], [[D:%.*]] +; CHECK-NEXT: [[OR_COND:%.*]] = or i1 [[C_1]], [[C_2]] +; CHECK-NEXT: br i1 [[OR_COND]], label [[TRAP:%.*]], label [[CONT1:%.*]], !annotation !0 +; CHECK: trap: ; preds = %entry +; CHECK-NEXT: call void @fn1() +; CHECK-NEXT: unreachable +; CHECK: cont1: ; preds = %entry +; CHECK-NEXT: call void @fn2() +; CHECK-NEXT: ret i32 0 +; +entry: + %c.1 = icmp ult i8* %a, %b, !annotation !0 + br i1 %c.1, label %cont, label %trap, !annotation !0 + +cont: ; preds = %entry + %c.2 = icmp uge i8* %c, %d, !annotation !0 + br i1 %c.2, label %trap, label %cont1, !annotation !0 + +trap: ; preds = %cont, %entry + call void @fn1() + unreachable + +cont1: ; preds = %cont + call void @fn2() + ret i32 0 +} + +; The branch in %cont has !annotation metadata. Make sure generated negation +; and OR have !annotation metadata. +define i32 @test_preserve_or_not(i8* %a, i8* %b, i8* %c, i8* %d) { +; CHECK-LABEL: define {{.*}} @test_preserve_or_not({{.*}} +; CHECK-NEXT: entry: +; CHECK-NEXT: [[C_1:%.*]] = icmp ult i8* [[A:%.*]], [[B:%.*]], !annotation !0 +; CHECK-NEXT: [[C_2:%.*]] = xor i1 [[C_1]], true +; CHECK-NEXT: [[C_2_NOT:%.*]] = xor i1 [[C_2]], true +; CHECK-NEXT: [[C_3:%.*]] = icmp uge i8* [[C:%.*]], [[D:%.*]] +; CHECK-NEXT: [[OR_COND:%.*]] = or i1 [[C_2_NOT]], [[C_3]] +; CHECK-NEXT: br i1 [[OR_COND]], label [[TRAP:%.*]], label [[CONT1:%.*]], !annotation !0 +; CHECK: trap: ; preds = %entry +; CHECK-NEXT: call void @fn1() +; CHECK-NEXT: unreachable +; CHECK: cont1: ; preds = %entry +; CHECK-NEXT: call void @fn2() +; CHECK-NEXT: ret i32 0 +; +entry: + %c.1 = icmp ult i8* %a, %b, !annotation !0 + %c.2 = xor i1 %c.1, true + br i1 %c.2, label %cont, label %trap, !annotation !0 + +cont: ; preds = %entry + %c.3 = icmp uge i8* %c, %d, !annotation !0 + br i1 %c.3, label %trap, label %cont1, !annotation !0 + +trap: ; preds = %cont, %entry + call void @fn1() + unreachable + +cont1: ; preds = %cont + call void @fn2() + ret i32 0 +} + + +; The branch in %cont has no !annotation metadata. Make sure generated negation +; and OR do not have !annotation metadata. +define i32 @test_or_not_no_annotation(i8* %a, i8* %b, i8* %c, i8* %d) { +; CHECK-LABEL: define {{.*}} @test_or_not_no_annotation({{.*}} +; CHECK-NEXT: entry: +; CHECK-NEXT: [[C_1:%.*]] = icmp ult i8* [[A:%.*]], [[B:%.*]], !annotation !0 +; CHECK-NEXT: [[C_2:%.*]] = xor i1 [[C_1]], true +; CHECK-NEXT: [[C_2_NOT:%.*]] = xor i1 [[C_2]], true +; CHECK-NEXT: [[C_3:%.*]] = icmp uge i8* [[C:%.*]], [[D:%.*]] +; CHECK-NEXT: [[OR_COND:%.*]] = or i1 [[C_2_NOT]], [[C_3]] +; CHECK-NEXT: br i1 [[OR_COND]], label [[TRAP:%.*]], label [[CONT1:%.*]], !annotation !0 +; CHECK: trap: ; preds = %entry +; CHECK-NEXT: call void @fn1() +; CHECK-NEXT: unreachable +; CHECK: cont1: ; preds = %entry +; CHECK-NEXT: call void @fn2() +; CHECK-NEXT: ret i32 0 +; +entry: + %c.1 = icmp ult i8* %a, %b, !annotation !0 + %c.2 = xor i1 %c.1, true + br i1 %c.2, label %cont, label %trap, !annotation !0 + +cont: ; preds = %entry + %c.3 = icmp uge i8* %c, %d, !annotation !0 + br i1 %c.3, label %trap, label %cont1 + +trap: ; preds = %cont, %entry + call void @fn1() + unreachable + +cont1: ; preds = %cont + call void @fn2() + ret i32 0 +} + +declare void @fn1() +declare void @fn2() + +!0 = !{!"foo"} From 869f8363c424592e5f8c258492f46d5fcbc90c83 Mon Sep 17 00:00:00 2001 From: Jordan Rupprecht Date: Wed, 16 Dec 2020 10:53:18 -0800 Subject: [PATCH 34/39] [lldb][NFC] Apply performance-faster-string-find (`str.find("X")` -> `str.find('x')`) --- .../InstrumentationRuntimeMainThreadChecker.cpp | 2 +- lldb/tools/lldb-vscode/JSONUtils.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/lldb/source/Plugins/InstrumentationRuntime/MainThreadChecker/InstrumentationRuntimeMainThreadChecker.cpp b/lldb/source/Plugins/InstrumentationRuntime/MainThreadChecker/InstrumentationRuntimeMainThreadChecker.cpp index 72d28c3474576..99784bd3dbd19 100644 --- a/lldb/source/Plugins/InstrumentationRuntime/MainThreadChecker/InstrumentationRuntimeMainThreadChecker.cpp +++ b/lldb/source/Plugins/InstrumentationRuntime/MainThreadChecker/InstrumentationRuntimeMainThreadChecker.cpp @@ -114,7 +114,7 @@ InstrumentationRuntimeMainThreadChecker::RetrieveReportData( std::string className = ""; std::string selector = ""; if (apiName.substr(0, 2) == "-[") { - size_t spacePos = apiName.find(" "); + size_t spacePos = apiName.find(' '); if (spacePos != std::string::npos) { className = apiName.substr(2, spacePos - 2); selector = apiName.substr(spacePos + 1, apiName.length() - spacePos - 2); diff --git a/lldb/tools/lldb-vscode/JSONUtils.cpp b/lldb/tools/lldb-vscode/JSONUtils.cpp index 044bfd13ec463..831f3285d31f1 100644 --- a/lldb/tools/lldb-vscode/JSONUtils.cpp +++ b/lldb/tools/lldb-vscode/JSONUtils.cpp @@ -1027,7 +1027,7 @@ CreateRunInTerminalReverseRequest(const llvm::json::Object &launch_request) { std::vector envs = GetStrings(launch_request_arguments, "env"); llvm::json::Object environment; for (const std::string &env : envs) { - size_t index = env.find("="); + size_t index = env.find('='); environment.try_emplace(env.substr(0, index), env.substr(index + 1)); } run_in_terminal_args.try_emplace("env", From 4e90cad6a6b5504f11b7876e26e80c2a079e04b0 Mon Sep 17 00:00:00 2001 From: Peter Steinfeld Date: Tue, 15 Dec 2020 14:44:22 -0800 Subject: [PATCH 35/39] [flang] Handle undeclared names in EQUIVALENCE statements Names in EQUIVALENCE statements are only allowed to indicate local objects as per 19.5.1.4, paragraph 2, item (10). Thus, a name appearing in an EQUIVALENCE statement with no corresponding declaration in the same scope is an implicit declaration of the name. If that scope contains an IMPLICIT NONE, it's an error. I implemented this by adding a state variable to ScopeHandler to indicate if we're resolving the names in an EQUIVALENCE statement and then checked this state when resolving names. I also added a test to the existing tests for EQUIVALENCE statements. Differential Revision: https://reviews.llvm.org/D93345 --- flang/lib/Semantics/resolve-names.cpp | 16 ++++++++++++---- flang/test/Semantics/equivalence01.f90 | 17 +++++++++++++++++ 2 files changed, 29 insertions(+), 4 deletions(-) diff --git a/flang/lib/Semantics/resolve-names.cpp b/flang/lib/Semantics/resolve-names.cpp index 1288b11a7727a..495d7d0f8584f 100644 --- a/flang/lib/Semantics/resolve-names.cpp +++ b/flang/lib/Semantics/resolve-names.cpp @@ -597,6 +597,7 @@ class ScopeHandler : public ImplicitRulesVisitor { bool inExecutionPart_{false}; bool inSpecificationPart_{false}; + bool inEquivalenceStmt_{false}; std::set specPartForwardRefs_; private: @@ -2021,7 +2022,11 @@ Symbol *ScopeHandler::FindSymbol(const Scope &scope, const parser::Name &name) { } return FindSymbol(scope.parent(), name); } else { - return Resolve(name, scope.FindSymbol(name.source)); + // In EQUIVALENCE statements only resolve names in the local scope, see + // 19.5.1.4, paragraph 2, item (10) + return Resolve(name, + inEquivalenceStmt_ ? FindInScope(scope, name) + : scope.FindSymbol(name.source)); } } @@ -4347,15 +4352,17 @@ void DeclarationVisitor::Post(const parser::CommonBlockObject &x) { bool DeclarationVisitor::Pre(const parser::EquivalenceStmt &x) { // save equivalence sets to be processed after specification part - CheckNotInBlock("EQUIVALENCE"); // C1107 - for (const std::list &set : x.v) { - equivalenceSets_.push_back(&set); + if (CheckNotInBlock("EQUIVALENCE")) { // C1107 + for (const std::list &set : x.v) { + equivalenceSets_.push_back(&set); + } } return false; // don't implicitly declare names yet } void DeclarationVisitor::CheckEquivalenceSets() { EquivalenceSets equivSets{context()}; + inEquivalenceStmt_ = true; for (const auto *set : equivalenceSets_) { const auto &source{set->front().v.value().source}; if (set->size() <= 1) { // R871 @@ -4372,6 +4379,7 @@ void DeclarationVisitor::CheckEquivalenceSets() { } equivSets.FinishSet(source); } + inEquivalenceStmt_ = false; for (auto &set : equivSets.sets()) { if (!set.empty()) { currScope().add_equivalenceSet(std::move(set)); diff --git a/flang/test/Semantics/equivalence01.f90 b/flang/test/Semantics/equivalence01.f90 index 234c42744ee9a..e75d954001d76 100644 --- a/flang/test/Semantics/equivalence01.f90 +++ b/flang/test/Semantics/equivalence01.f90 @@ -197,3 +197,20 @@ end subroutine interfaceSub end interface end subroutine s16 + +module m17 + real :: dupName +contains + real function f17a() + implicit none + real :: y + !ERROR: No explicit type declared for 'dupname' + equivalence (dupName, y) + end function f17a + real function f17b() + real :: y + ! The following implicitly declares an object called "dupName" local to + ! the function f17b(). OK since there's no "implicit none + equivalence (dupName, y) + end function f17b +end module m17 From 11f1027b4d8d851c94497330bb901bd5753188f3 Mon Sep 17 00:00:00 2001 From: Eugene Zhulenev Date: Wed, 16 Dec 2020 05:14:12 -0800 Subject: [PATCH 36/39] [mlir] AsyncRuntime: mode runtime declarations to mlir::runtime namespace Define Async runtime related typedefs in the `mlir::runtime` namespace. Reviewed By: mehdi_amini Differential Revision: https://reviews.llvm.org/D93391 --- .../mlir/ExecutionEngine/AsyncRuntime.h | 6 ++++++ mlir/lib/ExecutionEngine/AsyncRuntime.cpp | 19 +++++++++++++------ 2 files changed, 19 insertions(+), 6 deletions(-) diff --git a/mlir/include/mlir/ExecutionEngine/AsyncRuntime.h b/mlir/include/mlir/ExecutionEngine/AsyncRuntime.h index 54724af4133d8..e3d90198f36c3 100644 --- a/mlir/include/mlir/ExecutionEngine/AsyncRuntime.h +++ b/mlir/include/mlir/ExecutionEngine/AsyncRuntime.h @@ -32,6 +32,9 @@ #define MLIR_ASYNCRUNTIME_DEFINE_FUNCTIONS #endif // _WIN32 +namespace mlir { +namespace runtime { + //===----------------------------------------------------------------------===// // Async runtime API. //===----------------------------------------------------------------------===// @@ -102,4 +105,7 @@ mlirAsyncRuntimeAwaitAllInGroupAndExecute(AsyncGroup *, CoroHandle, CoroResume); extern "C" MLIR_ASYNCRUNTIME_EXPORT void mlirAsyncRuntimePrintCurrentThreadId(); +} // namespace runtime +} // namespace mlir + #endif // MLIR_EXECUTIONENGINE_ASYNCRUNTIME_H_ diff --git a/mlir/lib/ExecutionEngine/AsyncRuntime.cpp b/mlir/lib/ExecutionEngine/AsyncRuntime.cpp index 3b90b9c694f3f..3bfed86aa9960 100644 --- a/mlir/lib/ExecutionEngine/AsyncRuntime.cpp +++ b/mlir/lib/ExecutionEngine/AsyncRuntime.cpp @@ -24,10 +24,14 @@ #include #include +using namespace mlir::runtime; + //===----------------------------------------------------------------------===// // Async runtime API. //===----------------------------------------------------------------------===// +namespace mlir { +namespace runtime { namespace { // Forward declare class defined below. @@ -66,12 +70,6 @@ class AsyncRuntime { std::atomic numRefCountedObjects; }; -// Returns the default per-process instance of an async runtime. -AsyncRuntime *getDefaultAsyncRuntimeInstance() { - static auto runtime = std::make_unique(); - return runtime.get(); -} - // -------------------------------------------------------------------------- // // A base class for all reference counted objects created by the async runtime. // -------------------------------------------------------------------------- // @@ -110,6 +108,12 @@ class RefCounted { } // namespace +// Returns the default per-process instance of an async runtime. +static AsyncRuntime *getDefaultAsyncRuntimeInstance() { + static auto runtime = std::make_unique(); + return runtime.get(); +} + struct AsyncToken : public RefCounted { // AsyncToken created with a reference count of 2 because it will be returned // to the `async.execute` caller and also will be later on emplaced by the @@ -140,6 +144,9 @@ struct AsyncGroup : public RefCounted { std::vector> awaiters; }; +} // namespace runtime +} // namespace mlir + // Adds references to reference counted runtime object. extern "C" void mlirAsyncRuntimeAddRef(RefCountedObjPtr ptr, int32_t count) { RefCounted *refCounted = static_cast(ptr); From 900d71a851db5d50a52906c0146451b4b71cd85f Mon Sep 17 00:00:00 2001 From: Eugene Zhulenev Date: Wed, 16 Dec 2020 02:17:53 -0800 Subject: [PATCH 37/39] [mlir] Async: re-enable tests after fixing fkakines Test flakiness was fixed by: https://github.com/llvm/llvm-project/commit/9edcedf7f222ce7c893d1e3bf19b3a7a1f0f2218 Runs these tests to verify that all parts of the lowering work correctly. Reviewed By: mehdi_amini Differential Revision: https://reviews.llvm.org/D93384 --- mlir/test/mlir-cpu-runner/async-group.mlir | 20 ++++++++---------- mlir/test/mlir-cpu-runner/async.mlir | 24 ++++++++++------------ 2 files changed, 20 insertions(+), 24 deletions(-) diff --git a/mlir/test/mlir-cpu-runner/async-group.mlir b/mlir/test/mlir-cpu-runner/async-group.mlir index e7a07dc4f1ab1..50f85ff546093 100644 --- a/mlir/test/mlir-cpu-runner/async-group.mlir +++ b/mlir/test/mlir-cpu-runner/async-group.mlir @@ -1,14 +1,12 @@ -// RUN: true -// TODO: re-enable when not flaky. -// _UN: mlir-opt %s -async-ref-counting \ -// _UN: -convert-async-to-llvm \ -// _UN: -convert-std-to-llvm \ -// _UN: | mlir-cpu-runner \ -// _UN: -e main -entry-point-result=void -O0 \ -// _UN: -shared-libs=%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext \ -// _UN: -shared-libs=%linalg_test_lib_dir/libmlir_runner_utils%shlibext \ -// _UN: -shared-libs=%linalg_test_lib_dir/libmlir_async_runtime%shlibext \ -// _UN: | FileCheck %s +// RUN: mlir-opt %s -async-ref-counting \ +// RUN: -convert-async-to-llvm \ +// RUN: -convert-std-to-llvm \ +// RUN: | mlir-cpu-runner \ +// RUN: -e main -entry-point-result=void -O0 \ +// RUN: -shared-libs=%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext \ +// RUN: -shared-libs=%linalg_test_lib_dir/libmlir_runner_utils%shlibext \ +// RUN: -shared-libs=%linalg_test_lib_dir/libmlir_async_runtime%shlibext \ +// RUN: | FileCheck %s func @main() { %group = async.create_group diff --git a/mlir/test/mlir-cpu-runner/async.mlir b/mlir/test/mlir-cpu-runner/async.mlir index 4546628e59696..5f06dd17ed618 100644 --- a/mlir/test/mlir-cpu-runner/async.mlir +++ b/mlir/test/mlir-cpu-runner/async.mlir @@ -1,16 +1,14 @@ -// RUN: true -// TODO: re-enable when not flaky. -// _UN: mlir-opt %s -async-ref-counting \ -// _UN: -convert-async-to-llvm \ -// _UN: -convert-linalg-to-loops \ -// _UN: -convert-linalg-to-llvm \ -// _UN: -convert-std-to-llvm \ -// _UN: | mlir-cpu-runner \ -// _UN: -e main -entry-point-result=void -O0 \ -// _UN: -shared-libs=%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext \ -// _UN: -shared-libs=%linalg_test_lib_dir/libmlir_runner_utils%shlibext \ -// _UN: -shared-libs=%linalg_test_lib_dir/libmlir_async_runtime%shlibext \ -// _UN: | FileCheck %s +// RUN: mlir-opt %s -async-ref-counting \ +// RUN: -convert-async-to-llvm \ +// RUN: -convert-linalg-to-loops \ +// RUN: -convert-linalg-to-llvm \ +// RUN: -convert-std-to-llvm \ +// RUN: | mlir-cpu-runner \ +// RUN: -e main -entry-point-result=void -O0 \ +// RUN: -shared-libs=%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext \ +// RUN: -shared-libs=%linalg_test_lib_dir/libmlir_runner_utils%shlibext \ +// RUN: -shared-libs=%linalg_test_lib_dir/libmlir_async_runtime%shlibext \ +// RUN: | FileCheck %s func @main() { %i0 = constant 0 : index From 7685d818ef329cd3f6ef121af1208be409eb59db Mon Sep 17 00:00:00 2001 From: Emma Blink Date: Wed, 16 Dec 2020 14:42:07 -0500 Subject: [PATCH 38/39] Mark implicit coroutine variables as being implicit This prevents the clang-tidy readability-identifier-naming check from triggering on implicit __coro_gro and __promise variables in coroutines. --- .../system/coroutines.h | 34 +++++++++++++++++++ .../readability-identifier-naming.cpp | 16 +++++++-- clang/lib/Sema/SemaCoroutine.cpp | 2 ++ 3 files changed, 50 insertions(+), 2 deletions(-) create mode 100644 clang-tools-extra/test/clang-tidy/checkers/Inputs/readability-identifier-naming/system/coroutines.h diff --git a/clang-tools-extra/test/clang-tidy/checkers/Inputs/readability-identifier-naming/system/coroutines.h b/clang-tools-extra/test/clang-tidy/checkers/Inputs/readability-identifier-naming/system/coroutines.h new file mode 100644 index 0000000000000..b38da9999c52f --- /dev/null +++ b/clang-tools-extra/test/clang-tidy/checkers/Inputs/readability-identifier-naming/system/coroutines.h @@ -0,0 +1,34 @@ +#pragma once + +namespace std { +namespace experimental { + +template +struct coroutine_traits { + using promise_type = typename ret_t::promise_type; +}; + +template +struct coroutine_handle { + static constexpr coroutine_handle from_address(void *addr) noexcept { return {}; }; +}; + +} // namespace experimental +} // namespace std + +struct never_suspend { + bool await_ready() noexcept { return false; } + template + void await_suspend(coro_t handle) noexcept {} + void await_resume() noexcept {} +}; + +struct task { + struct promise_type { + task get_return_object() noexcept { return {}; } + never_suspend initial_suspend() noexcept { return {}; } + never_suspend final_suspend() noexcept { return {}; } + void return_void() {} + void unhandled_exception() {} + }; +}; diff --git a/clang-tools-extra/test/clang-tidy/checkers/readability-identifier-naming.cpp b/clang-tools-extra/test/clang-tidy/checkers/readability-identifier-naming.cpp index 66b6009d62ae5..f66202ecd11b5 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/readability-identifier-naming.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/readability-identifier-naming.cpp @@ -81,13 +81,14 @@ // RUN: {key: readability-identifier-naming.LocalPointerPrefix, value: 'l_'}, \ // RUN: {key: readability-identifier-naming.LocalConstantPointerCase, value: CamelCase}, \ // RUN: {key: readability-identifier-naming.LocalConstantPointerPrefix, value: 'lc_'}, \ -// RUN: ]}' -- -fno-delayed-template-parsing -Dbad_macro \ +// RUN: ]}' -- -fno-delayed-template-parsing -Dbad_macro -std=c++17 -fcoroutines-ts \ // RUN: -I%S/Inputs/readability-identifier-naming \ // RUN: -isystem %S/Inputs/readability-identifier-naming/system // clang-format off #include +#include #include "user-header.h" // NO warnings or fixes expected from declarations within header files without // the -header-filter= option @@ -287,7 +288,7 @@ class COverriding : public AOverridden { // Overriding a badly-named base isn't a new violation. void BadBaseMethod() override {} // CHECK-FIXES: {{^}} void v_Bad_Base_Method() override {} - + void foo() { BadBaseMethod(); // CHECK-FIXES: {{^}} v_Bad_Base_Method(); @@ -614,3 +615,14 @@ template auto GetRes(type_t& Param) -> decltype(Param.res()); // CHECK-MESSAGES: :[[@LINE-1]]:21: warning: invalid case style for parameter 'Param' // CHECK-FIXES: auto GetRes(type_t& a_param) -> decltype(a_param.res()); + +// Check implicit declarations in coroutines + +struct async_obj { +public: + never_suspend operator co_await() const noexcept; +}; + +task ImplicitDeclTest(async_obj &a_object) { + co_await a_object; // CHECK-MESSAGES-NOT: warning: invalid case style for local variable +} diff --git a/clang/lib/Sema/SemaCoroutine.cpp b/clang/lib/Sema/SemaCoroutine.cpp index 76820616fb9d2..7a48bfa429e95 100644 --- a/clang/lib/Sema/SemaCoroutine.cpp +++ b/clang/lib/Sema/SemaCoroutine.cpp @@ -544,6 +544,7 @@ VarDecl *Sema::buildCoroutinePromise(SourceLocation Loc) { auto *VD = VarDecl::Create(Context, FD, FD->getLocation(), FD->getLocation(), &PP.getIdentifierTable().get("__promise"), T, Context.getTrivialTypeSourceInfo(T, Loc), SC_None); + VD->setImplicit(); CheckVariableDeclarationType(VD); if (VD->isInvalidDecl()) return nullptr; @@ -1577,6 +1578,7 @@ bool CoroutineStmtBuilder::makeGroDeclAndReturnStmt() { S.Context, &FD, FD.getLocation(), FD.getLocation(), &S.PP.getIdentifierTable().get("__coro_gro"), GroType, S.Context.getTrivialTypeSourceInfo(GroType, Loc), SC_None); + GroDecl->setImplicit(); S.CheckVariableDeclarationType(GroDecl); if (GroDecl->isInvalidDecl()) From b9fb063e63c7959e8bc9b424bd34b266ca826826 Mon Sep 17 00:00:00 2001 From: "Yaxun (Sam) Liu" Date: Thu, 10 Dec 2020 16:30:24 -0500 Subject: [PATCH 39/39] [clang-offload-bundler] Add option -allow-missing-bundles There are out-of-tree tools using clang-offload-bundler to extract bundles from bundled files. When a bundle is not in the bundled file, clang-offload-bundler is expected to emit an error message and return non-zero value. However currently clang-offload-bundler silently generates empty file for the missing bundles. Since OpenMP/HIP toolchains expect the current behavior, an option -allow-missing-bundles is added to let clang-offload-bundler create empty file when a bundle is missing when unbundling. The unbundling job action is updated to use this option by default. clang-offload-bundler itself will emit error when a bundle is missing when unbundling by default. Changes are also made to check duplicate targets in -targets option and emit error. Differential Revision: https://reviews.llvm.org/D93068 --- clang/lib/Driver/ToolChains/Clang.cpp | 1 + clang/test/Driver/clang-offload-bundler.c | 47 +++++++++++++++---- .../Driver/hip-toolchain-rdc-separate.hip | 8 ++-- clang/test/Driver/openmp-offload.c | 12 ++--- .../ClangOffloadBundler.cpp | 34 ++++++++++++++ 5 files changed, 84 insertions(+), 18 deletions(-) diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 1c1224f3990b4..6ec6a551fafee 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -7392,6 +7392,7 @@ void OffloadBundler::ConstructJobMultipleOutputs( } CmdArgs.push_back(TCArgs.MakeArgString(UB)); CmdArgs.push_back("-unbundle"); + CmdArgs.push_back("-allow-missing-bundles"); // All the inputs are encoded as commands. C.addCommand(std::make_unique( diff --git a/clang/test/Driver/clang-offload-bundler.c b/clang/test/Driver/clang-offload-bundler.c index 21699e78dda6d..b4bab6bbd1e86 100644 --- a/clang/test/Driver/clang-offload-bundler.c +++ b/clang/test/Driver/clang-offload-bundler.c @@ -33,6 +33,7 @@ // CK-HELP: {{.*}}one. The resulting file can also be unbundled into different files by // CK-HELP: {{.*}}this tool if -unbundle is provided. // CK-HELP: {{.*}}USAGE: clang-offload-bundler [options] +// CK-HELP: {{.*}}-allow-missing-bundles {{.*}}- Create empty files if bundles are missing when unbundling // CK-HELP: {{.*}}-inputs= - [,...] // CK-HELP: {{.*}}-outputs= - [,...] // CK-HELP: {{.*}}-targets= - [-,...] @@ -88,7 +89,7 @@ // RUN: not clang-offload-bundler -type=i -targets=openmp-powerpc64le-linux,openmp-powerpc64le-ibm-linux-gnu,openmp-x86_64-pc-linux-gnu -inputs=%t.i,%t.tgt1,%t.tgt2 -outputs=%t.bundle.i 2>&1 | FileCheck %s --check-prefix CK-ERR9A // RUN: not clang-offload-bundler -type=i -targets=host-%itanium_abi_triple,host-%itanium_abi_triple,openmp-x86_64-pc-linux-gnu -inputs=%t.i,%t.tgt1,%t.tgt2 -outputs=%t.bundle.i 2>&1 | FileCheck %s --check-prefix CK-ERR9B // CK-ERR9A: error: expecting exactly one host target but got 0 -// CK-ERR9B: error: expecting exactly one host target but got 2 +// CK-ERR9B: error: Duplicate targets are not allowed // // Check text bundle. This is a readable format, so we check for the format we expect to find. @@ -181,17 +182,17 @@ // RUN: diff %t.tgt2 %t.res.tgt2 // Check if we can unbundle a file with no magic strings. -// RUN: clang-offload-bundler -type=s -targets=host-%itanium_abi_triple,openmp-powerpc64le-ibm-linux-gnu,openmp-x86_64-pc-linux-gnu -outputs=%t.res.s,%t.res.tgt1,%t.res.tgt2 -inputs=%t.s -unbundle +// RUN: clang-offload-bundler -type=s -targets=host-%itanium_abi_triple,openmp-powerpc64le-ibm-linux-gnu,openmp-x86_64-pc-linux-gnu -outputs=%t.res.s,%t.res.tgt1,%t.res.tgt2 -inputs=%t.s -unbundle -allow-missing-bundles // RUN: diff %t.s %t.res.s // RUN: diff %t.empty %t.res.tgt1 // RUN: diff %t.empty %t.res.tgt2 -// RUN: clang-offload-bundler -type=s -targets=openmp-powerpc64le-ibm-linux-gnu,host-%itanium_abi_triple,openmp-x86_64-pc-linux-gnu -outputs=%t.res.tgt1,%t.res.s,%t.res.tgt2 -inputs=%t.s -unbundle +// RUN: clang-offload-bundler -type=s -targets=openmp-powerpc64le-ibm-linux-gnu,host-%itanium_abi_triple,openmp-x86_64-pc-linux-gnu -outputs=%t.res.tgt1,%t.res.s,%t.res.tgt2 -inputs=%t.s -unbundle -allow-missing-bundles // RUN: diff %t.s %t.res.s // RUN: diff %t.empty %t.res.tgt1 // RUN: diff %t.empty %t.res.tgt2 // Check that bindler prints an error if given host bundle does not exist in the fat binary. -// RUN: not clang-offload-bundler -type=s -targets=host-x86_64-xxx-linux-gnu,openmp-powerpc64le-ibm-linux-gnu -outputs=%t.res.s,%t.res.tgt1 -inputs=%t.bundle3.s -unbundle 2>&1 | FileCheck %s --check-prefix CK-NO-HOST-BUNDLE +// RUN: not clang-offload-bundler -type=s -targets=host-x86_64-xxx-linux-gnu,openmp-powerpc64le-ibm-linux-gnu -outputs=%t.res.s,%t.res.tgt1 -inputs=%t.bundle3.s -unbundle -allow-missing-bundles 2>&1 | FileCheck %s --check-prefix CK-NO-HOST-BUNDLE // CK-NO-HOST-BUNDLE: error: Can't find bundle for the host target // @@ -229,11 +230,11 @@ // RUN: diff %t.tgt1 %t.res.tgt1 // Check if we can unbundle a file with no magic strings. -// RUN: clang-offload-bundler -type=bc -targets=host-%itanium_abi_triple,openmp-powerpc64le-ibm-linux-gnu,openmp-x86_64-pc-linux-gnu -outputs=%t.res.bc,%t.res.tgt1,%t.res.tgt2 -inputs=%t.bc -unbundle +// RUN: clang-offload-bundler -type=bc -targets=host-%itanium_abi_triple,openmp-powerpc64le-ibm-linux-gnu,openmp-x86_64-pc-linux-gnu -outputs=%t.res.bc,%t.res.tgt1,%t.res.tgt2 -inputs=%t.bc -unbundle -allow-missing-bundles // RUN: diff %t.bc %t.res.bc // RUN: diff %t.empty %t.res.tgt1 // RUN: diff %t.empty %t.res.tgt2 -// RUN: clang-offload-bundler -type=bc -targets=openmp-powerpc64le-ibm-linux-gnu,host-%itanium_abi_triple,openmp-x86_64-pc-linux-gnu -outputs=%t.res.tgt1,%t.res.bc,%t.res.tgt2 -inputs=%t.bc -unbundle +// RUN: clang-offload-bundler -type=bc -targets=openmp-powerpc64le-ibm-linux-gnu,host-%itanium_abi_triple,openmp-x86_64-pc-linux-gnu -outputs=%t.res.tgt1,%t.res.bc,%t.res.tgt2 -inputs=%t.bc -unbundle -allow-missing-bundles // RUN: diff %t.bc %t.res.bc // RUN: diff %t.empty %t.res.tgt1 // RUN: diff %t.empty %t.res.tgt2 @@ -269,11 +270,11 @@ // RUN: diff %t.tgt1 %t.res.tgt1 // Check if we can unbundle a file with no magic strings. -// RUN: clang-offload-bundler -type=o -targets=host-%itanium_abi_triple,openmp-powerpc64le-ibm-linux-gnu,openmp-x86_64-pc-linux-gnu -outputs=%t.res.o,%t.res.tgt1,%t.res.tgt2 -inputs=%t.o -unbundle +// RUN: clang-offload-bundler -type=o -targets=host-%itanium_abi_triple,openmp-powerpc64le-ibm-linux-gnu,openmp-x86_64-pc-linux-gnu -outputs=%t.res.o,%t.res.tgt1,%t.res.tgt2 -inputs=%t.o -unbundle -allow-missing-bundles // RUN: diff %t.o %t.res.o // RUN: diff %t.empty %t.res.tgt1 // RUN: diff %t.empty %t.res.tgt2 -// RUN: clang-offload-bundler -type=o -targets=openmp-powerpc64le-ibm-linux-gnu,host-%itanium_abi_triple,openmp-x86_64-pc-linux-gnu -outputs=%t.res.tgt1,%t.res.o,%t.res.tgt2 -inputs=%t.o -unbundle +// RUN: clang-offload-bundler -type=o -targets=openmp-powerpc64le-ibm-linux-gnu,host-%itanium_abi_triple,openmp-x86_64-pc-linux-gnu -outputs=%t.res.tgt1,%t.res.o,%t.res.tgt2 -inputs=%t.o -unbundle -allow-missing-bundles // RUN: diff %t.o %t.res.o // RUN: diff %t.empty %t.res.tgt1 // RUN: diff %t.empty %t.res.tgt2 @@ -288,6 +289,36 @@ // RUN: diff %t.tgt1 %t.res.tgt1 // RUN: diff %t.tgt2 %t.res.tgt2 +// +// Check error due to missing bundles +// +// RUN: clang-offload-bundler -type=bc -targets=host-%itanium_abi_triple,hip-amdgcn-amd-amdhsa-gfx900 -inputs=%t.bc,%t.tgt1 -outputs=%t.hip.bundle.bc +// RUN: not clang-offload-bundler -type=bc -inputs=%t.hip.bundle.bc -outputs=%t.tmp.bc -unbundle \ +// RUN: -targets=hip-amdgcn-amd-amdhsa-gfx906 \ +// RUN: 2>&1 | FileCheck -check-prefix=MISS1 %s +// RUN: not clang-offload-bundler -type=bc -inputs=%t.hip.bundle.bc -outputs=%t.tmp.bc,%t.tmp2.bc -unbundle \ +// RUN: -targets=hip-amdgcn-amd-amdhsa-gfx906,hip-amdgcn-amd-amdhsa-gfx900 \ +// RUN: 2>&1 | FileCheck -check-prefix=MISS1 %s +// MISS1: error: Can't find bundles for hip-amdgcn-amd-amdhsa-gfx906 +// RUN: not clang-offload-bundler -type=bc -inputs=%t.hip.bundle.bc -outputs=%t.tmp.bc,%t.tmp2.bc -unbundle \ +// RUN: -targets=hip-amdgcn-amd-amdhsa-gfx906,hip-amdgcn-amd-amdhsa-gfx803 \ +// RUN: 2>&1 | FileCheck -check-prefix=MISS2 %s +// MISS2: error: Can't find bundles for hip-amdgcn-amd-amdhsa-gfx803 and hip-amdgcn-amd-amdhsa-gfx906 +// RUN: not clang-offload-bundler -type=bc -inputs=%t.hip.bundle.bc -outputs=%t.tmp.bc,%t.tmp2.bc,%t.tmp3.bc -unbundle \ +// RUN: -targets=hip-amdgcn-amd-amdhsa-gfx906,hip-amdgcn-amd-amdhsa-gfx803,hip-amdgcn-amd-amdhsa-gfx1010 \ +// RUN: 2>&1 | FileCheck -check-prefix=MISS3 %s +// MISS3: error: Can't find bundles for hip-amdgcn-amd-amdhsa-gfx1010, hip-amdgcn-amd-amdhsa-gfx803, and hip-amdgcn-amd-amdhsa-gfx906 + +// +// Check error due to duplicate targets +// +// RUN: not clang-offload-bundler -type=bc -targets=host-%itanium_abi_triple,hip-amdgcn-amd-amdhsa-gfx900,hip-amdgcn-amd-amdhsa-gfx900 \ +// RUN: -inputs=%t.bc,%t.tgt1,%t.tgt1 -outputs=%t.hip.bundle.bc 2>&1 | FileCheck -check-prefix=DUP %s +// RUN: not clang-offload-bundler -type=bc -inputs=%t.hip.bundle.bc -outputs=%t.tmp.bc,%t.tmp2.bc -unbundle \ +// RUN: -targets=hip-amdgcn-amd-amdhsa-gfx906,hip-amdgcn-amd-amdhsa-gfx906 \ +// RUN: 2>&1 | FileCheck -check-prefix=DUP %s +// DUP: error: Duplicate targets are not allowed + // Some code so that we can create a binary out of this file. int A = 0; void test_func(void) { diff --git a/clang/test/Driver/hip-toolchain-rdc-separate.hip b/clang/test/Driver/hip-toolchain-rdc-separate.hip index 5cf1b4f26850b..979b90fee0464 100644 --- a/clang/test/Driver/hip-toolchain-rdc-separate.hip +++ b/clang/test/Driver/hip-toolchain-rdc-separate.hip @@ -87,22 +87,22 @@ // LINK: [[BUNDLER:".*clang-offload-bundler"]] "-type=o" // LINK-SAME: "-targets=host-x86_64-unknown-linux-gnu,hip-amdgcn-amd-amdhsa-gfx803,hip-amdgcn-amd-amdhsa-gfx900" // LINK-SAME: "-inputs=[[A_O:.*a.o]]" "-outputs=[[A_OBJ_HOST:.*o]],{{.*o}},{{.*o}}" -// LINK: "-unbundle" +// LINK: "-unbundle" "-allow-missing-bundles" // LINK: [[BUNDLER:".*clang-offload-bundler"]] "-type=o" // LINK-SAME: "-targets=host-x86_64-unknown-linux-gnu,hip-amdgcn-amd-amdhsa-gfx803,hip-amdgcn-amd-amdhsa-gfx900" // LINK-SAME: "-inputs=[[B_O:.*b.o]]" "-outputs=[[B_OBJ_HOST:.*o]],{{.*o}},{{.*o}}" -// LINK: "-unbundle" +// LINK: "-unbundle" "-allow-missing-bundles" // LINK: [[BUNDLER:".*clang-offload-bundler"]] "-type=o" // LINK-SAME: "-targets=host-x86_64-unknown-linux-gnu,hip-amdgcn-amd-amdhsa-gfx803,hip-amdgcn-amd-amdhsa-gfx900" // LINK-SAME: "-inputs=[[A_O]]" "-outputs={{.*o}},[[A_BC1:.*o]],[[A_BC2:.*o]]" -// LINK: "-unbundle" +// LINK: "-unbundle" "-allow-missing-bundles" // LINK: [[BUNDLER:".*clang-offload-bundler"]] "-type=o" // LINK-SAME: "-targets=host-x86_64-unknown-linux-gnu,hip-amdgcn-amd-amdhsa-gfx803,hip-amdgcn-amd-amdhsa-gfx900" // LINK-SAME: "-inputs=[[B_O]]" "-outputs={{.*o}},[[B_BC1:.*o]],[[B_BC2:.*o]]" -// LINK: "-unbundle" +// LINK: "-unbundle" "-allow-missing-bundles" // LINK-NOT: "*.llvm-link" // LINK-NOT: ".*opt" diff --git a/clang/test/Driver/openmp-offload.c b/clang/test/Driver/openmp-offload.c index d32632059f585..8be535b02c820 100644 --- a/clang/test/Driver/openmp-offload.c +++ b/clang/test/Driver/openmp-offload.c @@ -495,7 +495,7 @@ // CHK-UBJOBS-SAME: [[INPUT:[^\\/]+\.i]]" "-outputs= // CHK-UBJOBS-SAME: [[HOSTPP:[^\\/]+\.i]], // CHK-UBJOBS-SAME: [[T1PP:[^\\/]+\.i]], -// CHK-UBJOBS-SAME: [[T2PP:[^\\/]+\.i]]" "-unbundle" +// CHK-UBJOBS-SAME: [[T2PP:[^\\/]+\.i]]" "-unbundle" "-allow-missing-bundles" // CHK-UBJOBS: clang{{.*}}" "-cc1" "-triple" "powerpc64le-unknown-linux" {{.*}}"-emit-llvm-bc" {{.*}}"-fopenmp" {{.*}}"-disable-llvm-passes" {{.*}}"-fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu" {{.*}}"-o" " // CHK-UBJOBS-SAME: [[HOSTBC:[^\\/]+\.bc]]" "-x" "cpp-output" "{{.*}}[[HOSTPP]]" // CHK-UBJOBS: clang{{.*}}" "-cc1" "-triple" "powerpc64le-unknown-linux" {{.*}}"-emit-obj" {{.*}}"-fopenmp" {{.*}}"-o" " @@ -504,7 +504,7 @@ // CHK-UBJOBS-ST-SAME: [[INPUT:[^\\/]+\.i]]" "-outputs= // CHK-UBJOBS-ST-SAME: [[HOSTPP:[^\\/,]+\.i]], // CHK-UBJOBS-ST-SAME: [[T1PP:[^\\/,]+\.i]], -// CHK-UBJOBS-ST-SAME: [[T2PP:[^\\/,]+\.i]]" "-unbundle" +// CHK-UBJOBS-ST-SAME: [[T2PP:[^\\/,]+\.i]]" "-unbundle" "-allow-missing-bundles" // CHK-UBJOBS-ST: clang{{.*}}" "-cc1" "-triple" "powerpc64le-unknown-linux" {{.*}}"-emit-llvm-bc" {{.*}}"-fopenmp" {{.*}}"-disable-llvm-passes" {{.*}}"-fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu" {{.*}}"-o" " // CHK-UBJOBS-ST-SAME: [[HOSTBC:[^\\/]+\.bc]]" "-x" "cpp-output" "{{.*}}[[HOSTPP]]" // CHK-UBJOBS-ST: clang{{.*}}" "-cc1" "-triple" "powerpc64le-unknown-linux" {{.*}}"-S" {{.*}}"-fopenmp" {{.*}}"-o" " @@ -563,7 +563,7 @@ // CHK-UBJOBS2-SAME: [[INPUT:[^\\/]+\.o]]" "-outputs= // CHK-UBJOBS2-SAME: [[HOSTOBJ:[^\\/]+\.o]], // CHK-UBJOBS2-SAME: [[T1OBJ:[^\\/]+\.o]], -// CHK-UBJOBS2-SAME: [[T2OBJ:[^\\/]+\.o]]" "-unbundle" +// CHK-UBJOBS2-SAME: [[T2OBJ:[^\\/]+\.o]]" "-unbundle" "-allow-missing-bundles" // CHK-UBJOBS2: ld{{(\.exe)?}}" {{.*}}"-o" " // CHK-UBJOBS2-SAME: [[T1BIN:[^\\/]+\.out]]" {{.*}}"{{.*}}[[T1OBJ]]" // CHK-UBJOBS2: ld{{(\.exe)?}}" {{.*}}"-o" " @@ -579,7 +579,7 @@ // CHK-UBJOBS2-ST-SAME: [[INPUT:[^\\/]+\.o]]" "-outputs= // CHK-UBJOBS2-ST-SAME: [[HOSTOBJ:[^\\/,]+\.o]], // CHK-UBJOBS2-ST-SAME: [[T1OBJ:[^\\/,]+\.o]], -// CHK-UBJOBS2-ST-SAME: [[T2OBJ:[^\\/,]+\.o]]" "-unbundle" +// CHK-UBJOBS2-ST-SAME: [[T2OBJ:[^\\/,]+\.o]]" "-unbundle" "-allow-missing-bundles" // CHK-UBJOBS2-ST-NOT: clang-offload-bundler{{.*}}in.so // CHK-UBJOBS2-ST: ld{{(\.exe)?}}" {{.*}}"-o" " // CHK-UBJOBS2-ST-SAME: [[T1BIN:[^\\/]+\.out-openmp-powerpc64le-ibm-linux-gnu]]" {{.*}}"{{.*}}[[T1OBJ]]" @@ -609,7 +609,7 @@ // CHK-UBUJOBS-SAME: [[INPUT:[^\\/]+\.i]]" "-outputs= // CHK-UBUJOBS-SAME: [[HOSTPP:[^\\/]+\.i]], // CHK-UBUJOBS-SAME: [[T1PP:[^\\/]+\.i]], -// CHK-UBUJOBS-SAME: [[T2PP:[^\\/]+\.i]]" "-unbundle" +// CHK-UBUJOBS-SAME: [[T2PP:[^\\/]+\.i]]" "-unbundle" "-allow-missing-bundles" // CHK-UBUJOBS: clang{{.*}}" "-cc1" "-triple" "powerpc64le-unknown-linux" {{.*}}"-emit-llvm-bc" {{.*}}"-fopenmp" {{.*}}"-disable-llvm-passes" {{.*}}"-fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu" {{.*}}"-o" " // CHK-UBUJOBS-SAME: [[HOSTBC:[^\\/]+\.bc]]" "-x" "cpp-output" "{{.*}}[[HOSTPP]]" @@ -617,7 +617,7 @@ // CHK-UBUJOBS-ST-SAME: [[INPUT:[^\\/]+\.i]]" "-outputs= // CHK-UBUJOBS-ST-SAME: [[HOSTPP:[^\\/,]+\.i]], // CHK-UBUJOBS-ST-SAME: [[T1PP:[^\\/,]+\.i]], -// CHK-UBUJOBS-ST-SAME: [[T2PP:[^\\/,]+\.i]]" "-unbundle" +// CHK-UBUJOBS-ST-SAME: [[T2PP:[^\\/,]+\.i]]" "-unbundle" "-allow-missing-bundles" // CHK-UBUJOBS-ST: clang{{.*}}" "-cc1" "-triple" "powerpc64le-unknown-linux" {{.*}}"-emit-llvm-bc" {{.*}}"-fopenmp" {{.*}}"-disable-llvm-passes" {{.*}}"-fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu" {{.*}}"-o" " // CHK-UBUJOBS-ST-SAME: [[HOSTBC:[^\\/]+\.bc]]" "-x" "cpp-output" "{{.*}}[[HOSTPP]]" diff --git a/clang/tools/clang-offload-bundler/ClangOffloadBundler.cpp b/clang/tools/clang-offload-bundler/ClangOffloadBundler.cpp index a1b2fecb4a806..1e2a2a67bcce1 100644 --- a/clang/tools/clang-offload-bundler/ClangOffloadBundler.cpp +++ b/clang/tools/clang-offload-bundler/ClangOffloadBundler.cpp @@ -43,6 +43,7 @@ #include #include #include +#include #include #include #include @@ -95,6 +96,12 @@ static cl::opt PrintExternalCommands( "instead of actually executing them - for testing purposes.\n"), cl::init(false), cl::cat(ClangOffloadBundlerCategory)); +static cl::opt + AllowMissingBundles("allow-missing-bundles", + cl::desc("Create empty files if bundles are missing " + "when unbundling.\n"), + cl::init(false), cl::cat(ClangOffloadBundlerCategory)); + static cl::opt BundleAlignment("bundle-align", cl::desc("Alignment of bundle for binary files"), @@ -883,6 +890,25 @@ static Error UnbundleFiles() { FoundHostBundle = true; } + if (!AllowMissingBundles && !Worklist.empty()) { + std::string ErrMsg = "Can't find bundles for"; + std::set Sorted; + for (auto &E : Worklist) + Sorted.insert(E.first()); + unsigned I = 0; + unsigned Last = Sorted.size() - 1; + for (auto &E : Sorted) { + if (I != 0 && Last > 1) + ErrMsg += ","; + ErrMsg += " "; + if (I == Last && I != 0) + ErrMsg += "and "; + ErrMsg += E.str(); + ++I; + } + return createStringError(inconvertibleErrorCode(), ErrMsg); + } + // If no bundles were found, assume the input file is the host bundle and // create empty files for the remaining targets. if (Worklist.size() == TargetNames.size()) { @@ -974,7 +1000,15 @@ int main(int argc, const char **argv) { // have exactly one host target. unsigned Index = 0u; unsigned HostTargetNum = 0u; + llvm::DenseSet ParsedTargets; for (StringRef Target : TargetNames) { + if (ParsedTargets.contains(Target)) { + reportError(createStringError(errc::invalid_argument, + "Duplicate targets are not allowed")); + return 1; + } + ParsedTargets.insert(Target); + StringRef Kind; StringRef Triple; getOffloadKindAndTriple(Target, Kind, Triple);