diff --git a/clang-tools-extra/clangd/FileDistance.h b/clang-tools-extra/clangd/FileDistance.h index e7174bccb9ddb..88bb30c142702 100644 --- a/clang-tools-extra/clangd/FileDistance.h +++ b/clang-tools-extra/clangd/FileDistance.h @@ -43,6 +43,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseMapInfo.h" #include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/Path.h" diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 37dbac30901f6..298d793a0de46 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -56,6 +56,14 @@ Improvements to Clang's diagnostics - -Wtautological-compare for self comparisons and -Wtautological-overlap-compare will now look through member and array access to determine if two operand expressions are the same. +- -Wtautological-bitwise-compare is a new warning group. This group has the + current warning which diagnoses the tautological comparison of a bitwise + operation and a constant. The group also has the new warning which diagnoses + when a bitwise-or with a non-negative value is converted to a bool, since + that bool will always be true. +- -Wbitwise-conditional-parentheses will warn on operator precedence issues + when mixing bitwise-and (&) and bitwise-or (|) operator with the + conditional operator (?:). Non-comprehensive list of changes in this release ------------------------------------------------- diff --git a/clang/include/clang/AST/DeclCXX.h b/clang/include/clang/AST/DeclCXX.h index 0c6c2846c9b8b..66212f72b7875 100644 --- a/clang/include/clang/AST/DeclCXX.h +++ b/clang/include/clang/AST/DeclCXX.h @@ -389,9 +389,12 @@ class CXXRecordDecl : public RecordDecl { /// The number of explicit captures in this lambda. unsigned NumExplicitCaptures : 13; + /// Has known `internal` linkage. + unsigned HasKnownInternalLinkage : 1; + /// The number used to indicate this lambda expression for name /// mangling in the Itanium C++ ABI. - unsigned ManglingNumber = 0; + unsigned ManglingNumber : 31; /// The declaration that provides context for this lambda, if the /// actual DeclContext does not suffice. This is used for lambdas that @@ -406,12 +409,12 @@ class CXXRecordDecl : public RecordDecl { /// The type of the call method. TypeSourceInfo *MethodTyInfo; - LambdaDefinitionData(CXXRecordDecl *D, TypeSourceInfo *Info, - bool Dependent, bool IsGeneric, - LambdaCaptureDefault CaptureDefault) - : DefinitionData(D), Dependent(Dependent), IsGenericLambda(IsGeneric), - CaptureDefault(CaptureDefault), NumCaptures(0), NumExplicitCaptures(0), - MethodTyInfo(Info) { + LambdaDefinitionData(CXXRecordDecl *D, TypeSourceInfo *Info, bool Dependent, + bool IsGeneric, LambdaCaptureDefault CaptureDefault) + : DefinitionData(D), Dependent(Dependent), IsGenericLambda(IsGeneric), + CaptureDefault(CaptureDefault), NumCaptures(0), + NumExplicitCaptures(0), HasKnownInternalLinkage(0), ManglingNumber(0), + MethodTyInfo(Info) { IsLambda = true; // C++1z [expr.prim.lambda]p4: @@ -1705,6 +1708,13 @@ class CXXRecordDecl : public RecordDecl { return getLambdaData().ManglingNumber; } + /// The lambda is known to has internal linkage no matter whether it has name + /// mangling number. + bool hasKnownLambdaInternalLinkage() const { + assert(isLambda() && "Not a lambda closure type!"); + return getLambdaData().HasKnownInternalLinkage; + } + /// Retrieve the declaration that provides additional context for a /// lambda, when the normal declaration context is not specific enough. /// @@ -1718,9 +1728,12 @@ class CXXRecordDecl : public RecordDecl { /// Set the mangling number and context declaration for a lambda /// class. - void setLambdaMangling(unsigned ManglingNumber, Decl *ContextDecl) { + void setLambdaMangling(unsigned ManglingNumber, Decl *ContextDecl, + bool HasKnownInternalLinkage = false) { + assert(isLambda() && "Not a lambda closure type!"); getLambdaData().ManglingNumber = ManglingNumber; getLambdaData().ContextDecl = ContextDecl; + getLambdaData().HasKnownInternalLinkage = HasKnownInternalLinkage; } /// Returns the inheritance model used for this record. diff --git a/clang/include/clang/AST/ExprCXX.h b/clang/include/clang/AST/ExprCXX.h index 7b7ca9bf8f0f0..2152e108c7cba 100644 --- a/clang/include/clang/AST/ExprCXX.h +++ b/clang/include/clang/AST/ExprCXX.h @@ -262,8 +262,8 @@ class CUDAKernelCallExpr final : public CallExpr { /// - a != b -> !(a == b) /// - a != b -> !(b == a) /// - For \c \@ in \c <, \c <=, \c >, \c >=, \c <=>: -/// - a @ b -> (a <=> b) @ 0 -/// - a @ b -> 0 @ (b <=> a) +/// - a @ b -> (a <=> b) @ 0 +/// - a @ b -> 0 @ (b <=> a) /// /// This expression provides access to both the original syntax and the /// rewritten expression. diff --git a/clang/include/clang/Analysis/CFG.h b/clang/include/clang/Analysis/CFG.h index b24e32c966777..a8301a0e0063f 100644 --- a/clang/include/clang/Analysis/CFG.h +++ b/clang/include/clang/Analysis/CFG.h @@ -1213,6 +1213,7 @@ class CFGCallback { virtual void compareAlwaysTrue(const BinaryOperator *B, bool isAlwaysTrue) {} virtual void compareBitwiseEquality(const BinaryOperator *B, bool isAlwaysTrue) {} + virtual void compareBitwiseOr(const BinaryOperator *B) {} }; /// Represents a source-level, intra-procedural CFG that represents the diff --git a/clang/include/clang/Basic/DiagnosticGroups.td b/clang/include/clang/Basic/DiagnosticGroups.td index f267f4d92b287..0daad2ba759bc 100644 --- a/clang/include/clang/Basic/DiagnosticGroups.td +++ b/clang/include/clang/Basic/DiagnosticGroups.td @@ -296,6 +296,7 @@ def ExitTimeDestructors : DiagGroup<"exit-time-destructors">; def FlexibleArrayExtensions : DiagGroup<"flexible-array-extensions">; def FourByteMultiChar : DiagGroup<"four-char-constants">; def GlobalConstructors : DiagGroup<"global-constructors">; +def BitwiseConditionalParentheses: DiagGroup<"bitwise-conditional-parentheses">; def BitwiseOpParentheses: DiagGroup<"bitwise-op-parentheses">; def LogicalOpParentheses: DiagGroup<"logical-op-parentheses">; def LogicalNotParentheses: DiagGroup<"logical-not-parentheses">; @@ -516,12 +517,14 @@ def TautologicalConstantCompare : DiagGroup<"tautological-constant-compare", [TautologicalOutOfRangeCompare]>; def TautologicalPointerCompare : DiagGroup<"tautological-pointer-compare">; def TautologicalOverlapCompare : DiagGroup<"tautological-overlap-compare">; +def TautologicalBitwiseCompare : DiagGroup<"tautological-bitwise-compare">; def TautologicalUndefinedCompare : DiagGroup<"tautological-undefined-compare">; def TautologicalObjCBoolCompare : DiagGroup<"tautological-objc-bool-compare">; def TautologicalCompare : DiagGroup<"tautological-compare", [TautologicalConstantCompare, TautologicalPointerCompare, TautologicalOverlapCompare, + TautologicalBitwiseCompare, TautologicalUndefinedCompare, TautologicalObjCBoolCompare]>; def HeaderHygiene : DiagGroup<"header-hygiene">; @@ -735,6 +738,7 @@ def ParenthesesOnEquality : DiagGroup<"parentheses-equality">; def Parentheses : DiagGroup<"parentheses", [LogicalOpParentheses, LogicalNotParentheses, + BitwiseConditionalParentheses, BitwiseOpParentheses, ShiftOpParentheses, OverloadedShiftOpParentheses, diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 7be237a821311..2a19317b148f7 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -5771,6 +5771,9 @@ def note_precedence_silence : Note< def warn_precedence_conditional : Warning< "operator '?:' has lower precedence than '%0'; '%0' will be evaluated first">, InGroup; +def warn_precedence_bitwise_conditional : Warning< + "operator '?:' has lower precedence than '%0'; '%0' will be evaluated first">, + InGroup; def note_precedence_conditional_first : Note< "place parentheses around the '?:' expression to evaluate it first">; @@ -8358,7 +8361,10 @@ def warn_comparison_always : Warning< InGroup; def warn_comparison_bitwise_always : Warning< "bitwise comparison always evaluates to %select{false|true}0">, - InGroup; + InGroup, DefaultIgnore; +def warn_comparison_bitwise_or : Warning< + "bitwise or with non-zero value always evaluates to true">, + InGroup, DefaultIgnore; def warn_tautological_overlap_comparison : Warning< "overlapping comparisons always evaluate to %select{false|true}0">, InGroup, DefaultIgnore; diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index b9874158686ba..49d3789822fd1 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -6056,12 +6056,17 @@ class Sema { LambdaCaptureDefault CaptureDefault); /// Start the definition of a lambda expression. - CXXMethodDecl * - startLambdaDefinition(CXXRecordDecl *Class, SourceRange IntroducerRange, - TypeSourceInfo *MethodType, SourceLocation EndLoc, - ArrayRef Params, - ConstexprSpecKind ConstexprKind, - Optional> Mangling = None); + CXXMethodDecl *startLambdaDefinition(CXXRecordDecl *Class, + SourceRange IntroducerRange, + TypeSourceInfo *MethodType, + SourceLocation EndLoc, + ArrayRef Params, + ConstexprSpecKind ConstexprKind); + + /// Number lambda for linkage purposes if necessary. + void handleLambdaNumbering( + CXXRecordDecl *Class, CXXMethodDecl *Method, + Optional> Mangling = None); /// Endow the lambda scope info with the relevant properties. void buildLambdaScope(sema::LambdaScopeInfo *LSI, @@ -10553,11 +10558,11 @@ class Sema { Ref_Compatible }; - ReferenceCompareResult CompareReferenceRelationship(SourceLocation Loc, - QualType T1, QualType T2, - bool &DerivedToBase, - bool &ObjCConversion, - bool &ObjCLifetimeConversion); + ReferenceCompareResult + CompareReferenceRelationship(SourceLocation Loc, QualType T1, QualType T2, + bool &DerivedToBase, bool &ObjCConversion, + bool &ObjCLifetimeConversion, + bool &FunctionConversion); ExprResult checkUnknownAnyCast(SourceRange TypeRange, QualType CastType, Expr *CastExpr, CastKind &CastKind, diff --git a/clang/lib/AST/ASTImporter.cpp b/clang/lib/AST/ASTImporter.cpp index a549cf9f899ee..54acca7dc62cc 100644 --- a/clang/lib/AST/ASTImporter.cpp +++ b/clang/lib/AST/ASTImporter.cpp @@ -2694,7 +2694,8 @@ ExpectedDecl ASTNodeImporter::VisitRecordDecl(RecordDecl *D) { ExpectedDecl CDeclOrErr = import(DCXX->getLambdaContextDecl()); if (!CDeclOrErr) return CDeclOrErr.takeError(); - D2CXX->setLambdaMangling(DCXX->getLambdaManglingNumber(), *CDeclOrErr); + D2CXX->setLambdaMangling(DCXX->getLambdaManglingNumber(), *CDeclOrErr, + DCXX->hasKnownLambdaInternalLinkage()); } else if (DCXX->isInjectedClassName()) { // We have to be careful to do a similar dance to the one in // Sema::ActOnStartCXXMemberDeclarations diff --git a/clang/lib/AST/Decl.cpp b/clang/lib/AST/Decl.cpp index 9ebf1c32629fe..80235d8496d22 100644 --- a/clang/lib/AST/Decl.cpp +++ b/clang/lib/AST/Decl.cpp @@ -1385,7 +1385,8 @@ LinkageInfo LinkageComputer::computeLVForDecl(const NamedDecl *D, case Decl::CXXRecord: { const auto *Record = cast(D); if (Record->isLambda()) { - if (!Record->getLambdaManglingNumber()) { + if (Record->hasKnownLambdaInternalLinkage() || + !Record->getLambdaManglingNumber()) { // This lambda has no mangling number, so it's internal. return getInternalLinkageFor(D); } @@ -1402,7 +1403,8 @@ LinkageInfo LinkageComputer::computeLVForDecl(const NamedDecl *D, // }; const CXXRecordDecl *OuterMostLambda = getOutermostEnclosingLambda(Record); - if (!OuterMostLambda->getLambdaManglingNumber()) + if (OuterMostLambda->hasKnownLambdaInternalLinkage() || + !OuterMostLambda->getLambdaManglingNumber()) return getInternalLinkageFor(D); return getLVForClosure( diff --git a/clang/lib/Analysis/CFG.cpp b/clang/lib/Analysis/CFG.cpp index 54fb388b0c6c1..a533a8d97b848 100644 --- a/clang/lib/Analysis/CFG.cpp +++ b/clang/lib/Analysis/CFG.cpp @@ -1139,6 +1139,31 @@ class CFGBuilder { return {}; } + /// A bitwise-or with a non-zero constant always evaluates to true. + TryResult checkIncorrectBitwiseOrOperator(const BinaryOperator *B) { + const Expr *LHSConstant = + tryTransformToIntOrEnumConstant(B->getLHS()->IgnoreParenImpCasts()); + const Expr *RHSConstant = + tryTransformToIntOrEnumConstant(B->getRHS()->IgnoreParenImpCasts()); + + if ((LHSConstant && RHSConstant) || (!LHSConstant && !RHSConstant)) + return {}; + + const Expr *Constant = LHSConstant ? LHSConstant : RHSConstant; + + Expr::EvalResult Result; + if (!Constant->EvaluateAsInt(Result, *Context)) + return {}; + + if (Result.Val.getInt() == 0) + return {}; + + if (BuildOpts.Observer) + BuildOpts.Observer->compareBitwiseOr(B); + + return TryResult(true); + } + /// Try and evaluate an expression to an integer constant. bool tryEvaluate(Expr *S, Expr::EvalResult &outResult) { if (!BuildOpts.PruneTriviallyFalseEdges) @@ -1156,7 +1181,7 @@ class CFGBuilder { return {}; if (BinaryOperator *Bop = dyn_cast(S)) { - if (Bop->isLogicalOp()) { + if (Bop->isLogicalOp() || Bop->isEqualityOp()) { // Check the cache first. CachedBoolEvalsTy::iterator I = CachedBoolEvals.find(S); if (I != CachedBoolEvals.end()) @@ -1240,6 +1265,10 @@ class CFGBuilder { TryResult BopRes = checkIncorrectRelationalOperator(Bop); if (BopRes.isKnown()) return BopRes.isTrue(); + } else if (Bop->getOpcode() == BO_Or) { + TryResult BopRes = checkIncorrectBitwiseOrOperator(Bop); + if (BopRes.isKnown()) + return BopRes.isTrue(); } } @@ -2340,6 +2369,9 @@ CFGBlock *CFGBuilder::VisitUnaryOperator(UnaryOperator *U, appendStmt(Block, U); } + if (U->getOpcode() == UO_LNot) + tryEvaluateBool(U->getSubExpr()->IgnoreParens()); + return Visit(U->getSubExpr(), AddStmtChoice()); } @@ -2474,6 +2506,9 @@ CFGBlock *CFGBuilder::VisitBinaryOperator(BinaryOperator *B, appendStmt(Block, B); } + if (B->isEqualityOp() || B->isRelationalOp()) + tryEvaluateBool(B); + CFGBlock *RBlock = Visit(B->getRHS()); CFGBlock *LBlock = Visit(B->getLHS()); // If visiting RHS causes us to finish 'Block', e.g. the RHS is a StmtExpr @@ -4527,6 +4562,10 @@ CFGBlock *CFGBuilder::VisitImplicitCastExpr(ImplicitCastExpr *E, autoCreateBlock(); appendStmt(Block, E); } + + if (E->getCastKind() == CK_IntegralToBoolean) + tryEvaluateBool(E->getSubExpr()->IgnoreParens()); + return Visit(E->getSubExpr(), AddStmtChoice()); } diff --git a/clang/lib/CodeGen/CGDebugInfo.cpp b/clang/lib/CodeGen/CGDebugInfo.cpp index 294d54e37883f..7c63743f3b43d 100644 --- a/clang/lib/CodeGen/CGDebugInfo.cpp +++ b/clang/lib/CodeGen/CGDebugInfo.cpp @@ -539,11 +539,11 @@ void CGDebugInfo::CreateCompileUnit() { // file to determine the real absolute path for the file. std::string MainFileDir; if (const FileEntry *MainFile = SM.getFileEntryForID(SM.getMainFileID())) { - MainFileDir = remapDIPath(MainFile->getDir()->getName()); - if (MainFileDir != ".") { + MainFileDir = MainFile->getDir()->getName(); + if (!llvm::sys::path::is_absolute(MainFileName)) { llvm::SmallString<1024> MainFileDirSS(MainFileDir); llvm::sys::path::append(MainFileDirSS, MainFileName); - MainFileName = MainFileDirSS.str(); + MainFileName = llvm::sys::path::remove_leading_dotslash(MainFileDirSS); } // If the main file name provided is identical to the input file name, and // if the input file is a preprocessed source, use the module name for diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 5d177d028f32d..7eff7787fa29b 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -3586,7 +3586,8 @@ CodeGenModule::GetOrCreateLLVMGlobal(StringRef MangledName, // Make a new global with the correct type, this is now guaranteed // to work. auto *NewGV = cast( - GetAddrOfGlobalVar(D, InitType, IsForDefinition)); + GetAddrOfGlobalVar(D, InitType, IsForDefinition) + ->stripPointerCasts()); // Erase the old global, since it is no longer used. GV->eraseFromParent(); @@ -4076,14 +4077,8 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D, llvm::Constant *Entry = GetAddrOfGlobalVar(D, InitType, ForDefinition_t(!IsTentative)); - // Strip off a bitcast if we got one back. - if (auto *CE = dyn_cast(Entry)) { - assert(CE->getOpcode() == llvm::Instruction::BitCast || - CE->getOpcode() == llvm::Instruction::AddrSpaceCast || - // All zero index gep. - CE->getOpcode() == llvm::Instruction::GetElementPtr); - Entry = CE->getOperand(0); - } + // Strip off pointer casts if we got them. + Entry = Entry->stripPointerCasts(); // Entry is now either a Function or GlobalVariable. auto *GV = dyn_cast(Entry); @@ -4106,7 +4101,8 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D, // Make a new global with the correct type, this is now guaranteed to work. GV = cast( - GetAddrOfGlobalVar(D, InitType, ForDefinition_t(!IsTentative))); + GetAddrOfGlobalVar(D, InitType, ForDefinition_t(!IsTentative)) + ->stripPointerCasts()); // Replace all uses of the old global with the new global llvm::Constant *NewPtrForOldDecl = diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index 253bc95e5a6d3..3b4a26830fdc5 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -2036,23 +2036,36 @@ bool Driver::HandleImmediateArgs(const Compilation &C) { return true; } +enum { + TopLevelAction = 0, + HeadSibAction = 1, + OtherSibAction = 2, +}; + // Display an action graph human-readably. Action A is the "sink" node // and latest-occuring action. Traversal is in pre-order, visiting the // inputs to each action before printing the action itself. static unsigned PrintActions1(const Compilation &C, Action *A, - std::map &Ids) { + std::map &Ids, + Twine Indent = {}, int Kind = TopLevelAction) { if (Ids.count(A)) // A was already visited. return Ids[A]; std::string str; llvm::raw_string_ostream os(str); + auto getSibIndent = [](int K) -> Twine { + return (K == HeadSibAction) ? " " : (K == OtherSibAction) ? "| " : ""; + }; + + Twine SibIndent = Indent + getSibIndent(Kind); + int SibKind = HeadSibAction; os << Action::getClassName(A->getKind()) << ", "; if (InputAction *IA = dyn_cast(A)) { os << "\"" << IA->getInputArg().getValue() << "\""; } else if (BindArchAction *BIA = dyn_cast(A)) { os << '"' << BIA->getArchName() << '"' << ", {" - << PrintActions1(C, *BIA->input_begin(), Ids) << "}"; + << PrintActions1(C, *BIA->input_begin(), Ids, SibIndent, SibKind) << "}"; } else if (OffloadAction *OA = dyn_cast(A)) { bool IsFirst = true; OA->doOnEachDependence( @@ -2075,8 +2088,9 @@ static unsigned PrintActions1(const Compilation &C, Action *A, os << ":" << BoundArch; os << ")"; os << '"'; - os << " {" << PrintActions1(C, A, Ids) << "}"; + os << " {" << PrintActions1(C, A, Ids, SibIndent, SibKind) << "}"; IsFirst = false; + SibKind = OtherSibAction; }); } else { const ActionList *AL = &A->getInputs(); @@ -2084,8 +2098,9 @@ static unsigned PrintActions1(const Compilation &C, Action *A, if (AL->size()) { const char *Prefix = "{"; for (Action *PreRequisite : *AL) { - os << Prefix << PrintActions1(C, PreRequisite, Ids); + os << Prefix << PrintActions1(C, PreRequisite, Ids, SibIndent, SibKind); Prefix = ", "; + SibKind = OtherSibAction; } os << "}"; } else @@ -2106,9 +2121,13 @@ static unsigned PrintActions1(const Compilation &C, Action *A, } } + auto getSelfIndent = [](int K) -> Twine { + return (K == HeadSibAction) ? "+- " : (K == OtherSibAction) ? "|- " : ""; + }; + unsigned Id = Ids.size(); Ids[A] = Id; - llvm::errs() << Id << ": " << os.str() << ", " + llvm::errs() << Indent + getSelfIndent(Kind) << Id << ": " << os.str() << ", " << types::getTypeName(A->getType()) << offload_os.str() << "\n"; return Id; diff --git a/clang/lib/Driver/ToolChains/Arch/AArch64.cpp b/clang/lib/Driver/ToolChains/Arch/AArch64.cpp index 35d11f4e2d3b3..3a5fe6ddeaed5 100644 --- a/clang/lib/Driver/ToolChains/Arch/AArch64.cpp +++ b/clang/lib/Driver/ToolChains/Arch/AArch64.cpp @@ -12,6 +12,7 @@ #include "clang/Driver/Options.h" #include "llvm/Option/ArgList.h" #include "llvm/Support/TargetParser.h" +#include "llvm/Support/Host.h" using namespace clang::driver; using namespace clang::driver::tools; diff --git a/clang/lib/Driver/ToolChains/Arch/ARM.cpp b/clang/lib/Driver/ToolChains/Arch/ARM.cpp index b99a1b4d36949..68a57310ad402 100644 --- a/clang/lib/Driver/ToolChains/Arch/ARM.cpp +++ b/clang/lib/Driver/ToolChains/Arch/ARM.cpp @@ -13,6 +13,7 @@ #include "llvm/ADT/StringSwitch.h" #include "llvm/Option/ArgList.h" #include "llvm/Support/TargetParser.h" +#include "llvm/Support/Host.h" using namespace clang::driver; using namespace clang::driver::tools; diff --git a/clang/lib/Driver/ToolChains/Arch/PPC.cpp b/clang/lib/Driver/ToolChains/Arch/PPC.cpp index 59ff7cbc787c7..3e02e57e0f6c7 100644 --- a/clang/lib/Driver/ToolChains/Arch/PPC.cpp +++ b/clang/lib/Driver/ToolChains/Arch/PPC.cpp @@ -13,6 +13,7 @@ #include "clang/Driver/Options.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/Option/ArgList.h" +#include "llvm/Support/Host.h" using namespace clang::driver; using namespace clang::driver::tools; diff --git a/clang/lib/Driver/ToolChains/Arch/X86.cpp b/clang/lib/Driver/ToolChains/Arch/X86.cpp index 34be226b69e98..d2b97bf6ad719 100644 --- a/clang/lib/Driver/ToolChains/Arch/X86.cpp +++ b/clang/lib/Driver/ToolChains/Arch/X86.cpp @@ -13,6 +13,7 @@ #include "clang/Driver/Options.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/Option/ArgList.h" +#include "llvm/Support/Host.h" using namespace clang::driver; using namespace clang::driver::tools; diff --git a/clang/lib/Sema/AnalysisBasedWarnings.cpp b/clang/lib/Sema/AnalysisBasedWarnings.cpp index 01648fea8dd8e..2c70c0599ecfa 100644 --- a/clang/lib/Sema/AnalysisBasedWarnings.cpp +++ b/clang/lib/Sema/AnalysisBasedWarnings.cpp @@ -159,6 +159,20 @@ class LogicalErrorHandler : public CFGCallback { S.Diag(B->getExprLoc(), diag::warn_comparison_bitwise_always) << DiagRange << isAlwaysTrue; } + + void compareBitwiseOr(const BinaryOperator *B) override { + if (HasMacroID(B)) + return; + + SourceRange DiagRange = B->getSourceRange(); + S.Diag(B->getExprLoc(), diag::warn_comparison_bitwise_or) << DiagRange; + } + + static bool hasActiveDiagnostics(DiagnosticsEngine &Diags, + SourceLocation Loc) { + return !Diags.isIgnored(diag::warn_tautological_overlap_comparison, Loc) || + !Diags.isIgnored(diag::warn_comparison_bitwise_or, Loc); + } }; } // anonymous namespace @@ -2070,10 +2084,9 @@ AnalysisBasedWarnings::IssueWarnings(sema::AnalysisBasedWarnings::Policy P, .setAlwaysAdd(Stmt::AttributedStmtClass); } - // Install the logical handler for -Wtautological-overlap-compare + // Install the logical handler. llvm::Optional LEH; - if (!Diags.isIgnored(diag::warn_tautological_overlap_comparison, - D->getBeginLoc())) { + if (LogicalErrorHandler::hasActiveDiagnostics(Diags, D->getBeginLoc())) { LEH.emplace(S); AC.getCFGBuildOptions().Observer = &*LEH; } @@ -2222,9 +2235,8 @@ AnalysisBasedWarnings::IssueWarnings(sema::AnalysisBasedWarnings::Policy P, checkThrowInNonThrowingFunc(S, FD, AC); // If none of the previous checks caused a CFG build, trigger one here - // for -Wtautological-overlap-compare - if (!Diags.isIgnored(diag::warn_tautological_overlap_comparison, - D->getBeginLoc())) { + // for the logical error handler. + if (LogicalErrorHandler::hasActiveDiagnostics(Diags, D->getBeginLoc())) { AC.getCFG(); } diff --git a/clang/lib/Sema/SemaCast.cpp b/clang/lib/Sema/SemaCast.cpp index 8c6abc448d977..0ebb5c68f7c23 100644 --- a/clang/lib/Sema/SemaCast.cpp +++ b/clang/lib/Sema/SemaCast.cpp @@ -1304,6 +1304,7 @@ TryCastResult TryLValueToRValueCast(Sema &Self, Expr *SrcExpr, bool DerivedToBase; bool ObjCConversion; bool ObjCLifetimeConversion; + bool FunctionConversion; QualType FromType = SrcExpr->getType(); QualType ToType = R->getPointeeType(); if (CStyle) { @@ -1313,7 +1314,7 @@ TryCastResult TryLValueToRValueCast(Sema &Self, Expr *SrcExpr, Sema::ReferenceCompareResult RefResult = Self.CompareReferenceRelationship( SrcExpr->getBeginLoc(), ToType, FromType, DerivedToBase, ObjCConversion, - ObjCLifetimeConversion); + ObjCLifetimeConversion, FunctionConversion); if (RefResult != Sema::Ref_Compatible) { if (CStyle || RefResult == Sema::Ref_Incompatible) return TC_NotApplicable; diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index b0d700cb660b0..74dcac7ecb774 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -7686,7 +7686,12 @@ static void SuggestParentheses(Sema &Self, SourceLocation Loc, static bool IsArithmeticOp(BinaryOperatorKind Opc) { return BinaryOperator::isAdditiveOp(Opc) || BinaryOperator::isMultiplicativeOp(Opc) || - BinaryOperator::isShiftOp(Opc); + BinaryOperator::isShiftOp(Opc) || Opc == BO_And || Opc == BO_Or; + // This only checks for bitwise-or and bitwise-and, but not bitwise-xor and + // not any of the logical operators. Bitwise-xor is commonly used as a + // logical-xor because there is no logical-xor operator. The logical + // operators, including uses of xor, have a high false positive rate for + // precedence warnings. } /// IsArithmeticBinaryExpr - Returns true if E is an arithmetic binary @@ -7776,7 +7781,11 @@ static void DiagnoseConditionalPrecedence(Sema &Self, // The condition is an arithmetic binary expression, with a right- // hand side that looks boolean, so warn. - Self.Diag(OpLoc, diag::warn_precedence_conditional) + unsigned DiagID = BinaryOperator::isBitwiseOp(CondOpcode) + ? diag::warn_precedence_bitwise_conditional + : diag::warn_precedence_conditional; + + Self.Diag(OpLoc, DiagID) << Condition->getSourceRange() << BinaryOperator::getOpcodeStr(CondOpcode); diff --git a/clang/lib/Sema/SemaExprCXX.cpp b/clang/lib/Sema/SemaExprCXX.cpp index bb2b445bf58f2..e6491e4f67249 100644 --- a/clang/lib/Sema/SemaExprCXX.cpp +++ b/clang/lib/Sema/SemaExprCXX.cpp @@ -5862,20 +5862,21 @@ QualType Sema::CXXCheckConditionalOperands(ExprResult &Cond, ExprResult &LHS, LVK == RVK && LVK != VK_RValue) { // DerivedToBase was already handled by the class-specific case above. // FIXME: Should we allow ObjC conversions here? - bool DerivedToBase, ObjCConversion, ObjCLifetimeConversion; - if (CompareReferenceRelationship( - QuestionLoc, LTy, RTy, DerivedToBase, - ObjCConversion, ObjCLifetimeConversion) == Ref_Compatible && + bool DerivedToBase, ObjCConversion, ObjCLifetimeConversion, + FunctionConversion; + if (CompareReferenceRelationship(QuestionLoc, LTy, RTy, DerivedToBase, + ObjCConversion, ObjCLifetimeConversion, + FunctionConversion) == Ref_Compatible && !DerivedToBase && !ObjCConversion && !ObjCLifetimeConversion && // [...] subject to the constraint that the reference must bind // directly [...] - !RHS.get()->refersToBitField() && - !RHS.get()->refersToVectorElement()) { + !RHS.get()->refersToBitField() && !RHS.get()->refersToVectorElement()) { RHS = ImpCastExprToType(RHS.get(), LTy, CK_NoOp, RVK); RTy = RHS.get()->getType(); } else if (CompareReferenceRelationship( - QuestionLoc, RTy, LTy, DerivedToBase, - ObjCConversion, ObjCLifetimeConversion) == Ref_Compatible && + QuestionLoc, RTy, LTy, DerivedToBase, ObjCConversion, + ObjCLifetimeConversion, + FunctionConversion) == Ref_Compatible && !DerivedToBase && !ObjCConversion && !ObjCLifetimeConversion && !LHS.get()->refersToBitField() && !LHS.get()->refersToVectorElement()) { diff --git a/clang/lib/Sema/SemaInit.cpp b/clang/lib/Sema/SemaInit.cpp index 38de136503bf6..43c47c38e6f6f 100644 --- a/clang/lib/Sema/SemaInit.cpp +++ b/clang/lib/Sema/SemaInit.cpp @@ -4229,10 +4229,10 @@ static void TryReferenceListInitialization(Sema &S, return; SourceLocation DeclLoc = Initializer->getBeginLoc(); - bool dummy1, dummy2, dummy3; + bool dummy1, dummy2, dummy3, dummy4; Sema::ReferenceCompareResult RefRelationship = S.CompareReferenceRelationship(DeclLoc, cv1T1, cv2T2, dummy1, - dummy2, dummy3); + dummy2, dummy3, dummy4); if (RefRelationship >= Sema::Ref_Related) { // Try to bind the reference here. TryReferenceInitializationCore(S, Entity, Kind, Initializer, cv1T1, T1, @@ -4472,13 +4472,15 @@ static OverloadingResult TryRefInitWithConversionFunction( bool DerivedToBase; bool ObjCConversion; bool ObjCLifetimeConversion; - assert(!S.CompareReferenceRelationship(Initializer->getBeginLoc(), T1, T2, - DerivedToBase, ObjCConversion, - ObjCLifetimeConversion) && + bool FunctionConversion; + assert(!S.CompareReferenceRelationship( + Initializer->getBeginLoc(), T1, T2, DerivedToBase, ObjCConversion, + ObjCLifetimeConversion, FunctionConversion) && "Must have incompatible references when binding via conversion"); (void)DerivedToBase; (void)ObjCConversion; (void)ObjCLifetimeConversion; + (void)FunctionConversion; // Build the candidate set directly in the initialization sequence // structure, so that it will persist if we fail. @@ -4605,10 +4607,11 @@ static OverloadingResult TryRefInitWithConversionFunction( bool NewDerivedToBase = false; bool NewObjCConversion = false; bool NewObjCLifetimeConversion = false; - Sema::ReferenceCompareResult NewRefRelationship - = S.CompareReferenceRelationship(DeclLoc, T1, cv3T3, - NewDerivedToBase, NewObjCConversion, - NewObjCLifetimeConversion); + bool NewFunctionConversion = false; + Sema::ReferenceCompareResult NewRefRelationship = + S.CompareReferenceRelationship( + DeclLoc, T1, cv3T3, NewDerivedToBase, NewObjCConversion, + NewObjCLifetimeConversion, NewFunctionConversion); // Add the final conversion sequence, if necessary. if (NewRefRelationship == Sema::Ref_Incompatible) { @@ -4642,6 +4645,8 @@ static OverloadingResult TryRefInitWithConversionFunction( Sequence.AddDerivedToBaseCastStep(cv1T1, VK); else if (NewObjCConversion) Sequence.AddObjCObjectConversionStep(cv1T1); + else if (NewFunctionConversion) + Sequence.AddQualificationConversionStep(cv1T1, VK); return OR_Success; } @@ -4701,10 +4706,11 @@ static void TryReferenceInitializationCore(Sema &S, bool DerivedToBase = false; bool ObjCConversion = false; bool ObjCLifetimeConversion = false; + bool FunctionConversion = false; Expr::Classification InitCategory = Initializer->Classify(S.Context); - Sema::ReferenceCompareResult RefRelationship - = S.CompareReferenceRelationship(DeclLoc, cv1T1, cv2T2, DerivedToBase, - ObjCConversion, ObjCLifetimeConversion); + Sema::ReferenceCompareResult RefRelationship = S.CompareReferenceRelationship( + DeclLoc, cv1T1, cv2T2, DerivedToBase, ObjCConversion, + ObjCLifetimeConversion, FunctionConversion); // C++0x [dcl.init.ref]p5: // A reference to type "cv1 T1" is initialized by an expression of type @@ -4735,6 +4741,8 @@ static void TryReferenceInitializationCore(Sema &S, Sequence.AddDerivedToBaseCastStep(cv1T1, VK_LValue); else if (ObjCConversion) Sequence.AddObjCObjectConversionStep(cv1T1); + else if (FunctionConversion) + Sequence.AddQualificationConversionStep(cv1T1, VK_LValue); // We only create a temporary here when binding a reference to a // bit-field or vector element. Those cases are't supposed to be diff --git a/clang/lib/Sema/SemaLambda.cpp b/clang/lib/Sema/SemaLambda.cpp index df4f0ddc6c37d..749b0f2caaa03 100644 --- a/clang/lib/Sema/SemaLambda.cpp +++ b/clang/lib/Sema/SemaLambda.cpp @@ -335,7 +335,7 @@ Sema::getCurrentMangleNumberContext(const DeclContext *DC) { case StaticDataMember: // -- the initializers of nonspecialized static members of template classes if (!IsInNonspecializedTemplate) - return std::make_tuple(nullptr, nullptr); + return std::make_tuple(nullptr, ManglingContextDecl); // Fall through to get the current context. LLVM_FALLTHROUGH; @@ -356,14 +356,15 @@ Sema::getCurrentMangleNumberContext(const DeclContext *DC) { llvm_unreachable("unexpected context"); } -CXXMethodDecl *Sema::startLambdaDefinition( - CXXRecordDecl *Class, SourceRange IntroducerRange, - TypeSourceInfo *MethodTypeInfo, SourceLocation EndLoc, - ArrayRef Params, ConstexprSpecKind ConstexprKind, - Optional> Mangling) { +CXXMethodDecl *Sema::startLambdaDefinition(CXXRecordDecl *Class, + SourceRange IntroducerRange, + TypeSourceInfo *MethodTypeInfo, + SourceLocation EndLoc, + ArrayRef Params, + ConstexprSpecKind ConstexprKind) { QualType MethodType = MethodTypeInfo->getType(); TemplateParameterList *TemplateParams = - getGenericLambdaTemplateParameterList(getCurLambda(), *this); + getGenericLambdaTemplateParameterList(getCurLambda(), *this); // If a lambda appears in a dependent context or is a generic lambda (has // template parameters) and has an 'auto' return type, deduce it to a // dependent type. @@ -425,20 +426,55 @@ CXXMethodDecl *Sema::startLambdaDefinition( P->setOwningFunction(Method); } + return Method; +} + +void Sema::handleLambdaNumbering( + CXXRecordDecl *Class, CXXMethodDecl *Method, + Optional> Mangling) { if (Mangling) { - Class->setLambdaMangling(Mangling->first, Mangling->second); - } else { - MangleNumberingContext *MCtx; + unsigned ManglingNumber; + bool HasKnownInternalLinkage; Decl *ManglingContextDecl; - std::tie(MCtx, ManglingContextDecl) = - getCurrentMangleNumberContext(Class->getDeclContext()); - if (MCtx) { - unsigned ManglingNumber = MCtx->getManglingNumber(Method); - Class->setLambdaMangling(ManglingNumber, ManglingContextDecl); - } + std::tie(ManglingNumber, HasKnownInternalLinkage, ManglingContextDecl) = + Mangling.getValue(); + Class->setLambdaMangling(ManglingNumber, ManglingContextDecl, + HasKnownInternalLinkage); + return; } - return Method; + auto getMangleNumberingContext = + [this](CXXRecordDecl *Class, Decl *ManglingContextDecl) -> MangleNumberingContext * { + // Get mangle numbering context if there's any extra decl context. + if (ManglingContextDecl) + return &Context.getManglingNumberContext( + ASTContext::NeedExtraManglingDecl, ManglingContextDecl); + // Otherwise, from that lambda's decl context. + auto DC = Class->getDeclContext(); + while (auto *CD = dyn_cast(DC)) + DC = CD->getParent(); + return &Context.getManglingNumberContext(DC); + }; + + MangleNumberingContext *MCtx; + Decl *ManglingContextDecl; + std::tie(MCtx, ManglingContextDecl) = + getCurrentMangleNumberContext(Class->getDeclContext()); + bool HasKnownInternalLinkage = false; + if (!MCtx && getLangOpts().CUDA) { + // Force lambda numbering in CUDA/HIP as we need to name lambdas following + // ODR. Both device- and host-compilation need to have a consistent naming + // on kernel functions. As lambdas are potential part of these `__global__` + // function names, they needs numbering following ODR. + MCtx = getMangleNumberingContext(Class, ManglingContextDecl); + assert(MCtx && "Retrieving mangle numbering context failed!"); + HasKnownInternalLinkage = true; + } + if (MCtx) { + unsigned ManglingNumber = MCtx->getManglingNumber(Method); + Class->setLambdaMangling(ManglingNumber, ManglingContextDecl, + HasKnownInternalLinkage); + } } void Sema::buildLambdaScope(LambdaScopeInfo *LSI, @@ -951,6 +987,9 @@ void Sema::ActOnStartOfLambdaDefinition(LambdaIntroducer &Intro, if (getLangOpts().CUDA) CUDASetLambdaAttrs(Method); + // Number the lambda for linkage purposes if necessary. + handleLambdaNumbering(Class, Method); + // Introduce the function call operator as the current declaration context. PushDeclContext(CurScope, Method); diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp index af70bcaa28874..cfd891ba6a6c0 100644 --- a/clang/lib/Sema/SemaOverload.cpp +++ b/clang/lib/Sema/SemaOverload.cpp @@ -4372,7 +4372,8 @@ Sema::CompareReferenceRelationship(SourceLocation Loc, QualType OrigT1, QualType OrigT2, bool &DerivedToBase, bool &ObjCConversion, - bool &ObjCLifetimeConversion) { + bool &ObjCLifetimeConversion, + bool &FunctionConversion) { assert(!OrigT1->isReferenceType() && "T1 must be the pointee type of the reference type"); assert(!OrigT2->isReferenceType() && "T2 cannot be a reference type"); @@ -4402,15 +4403,16 @@ Sema::CompareReferenceRelationship(SourceLocation Loc, Context.canBindObjCObjectType(UnqualT1, UnqualT2)) ObjCConversion = true; else if (UnqualT2->isFunctionType() && - IsFunctionConversion(UnqualT2, UnqualT1, ConvertedT2)) + IsFunctionConversion(UnqualT2, UnqualT1, ConvertedT2)) { // C++1z [dcl.init.ref]p4: // cv1 T1" is reference-compatible with "cv2 T2" if [...] T2 is "noexcept // function" and T1 is "function" // // We extend this to also apply to 'noreturn', so allow any function // conversion between function types. + FunctionConversion = true; return Ref_Compatible; - else + } else return Ref_Incompatible; // At this point, we know that T1 and T2 are reference-related (at @@ -4491,6 +4493,7 @@ FindConversionForRefInit(Sema &S, ImplicitConversionSequence &ICS, bool DerivedToBase = false; bool ObjCConversion = false; bool ObjCLifetimeConversion = false; + bool FunctionConversion = false; // If we are initializing an rvalue reference, don't permit conversion // functions that return lvalues. @@ -4503,12 +4506,13 @@ FindConversionForRefInit(Sema &S, ImplicitConversionSequence &ICS, if (!ConvTemplate && S.CompareReferenceRelationship( - DeclLoc, - Conv->getConversionType().getNonReferenceType() - .getUnqualifiedType(), - DeclType.getNonReferenceType().getUnqualifiedType(), - DerivedToBase, ObjCConversion, ObjCLifetimeConversion) == - Sema::Ref_Incompatible) + DeclLoc, + Conv->getConversionType() + .getNonReferenceType() + .getUnqualifiedType(), + DeclType.getNonReferenceType().getUnqualifiedType(), + DerivedToBase, ObjCConversion, ObjCLifetimeConversion, + FunctionConversion) == Sema::Ref_Incompatible) continue; } else { // If the conversion function doesn't return a reference type, @@ -4612,11 +4616,11 @@ TryReferenceInit(Sema &S, Expr *Init, QualType DeclType, bool DerivedToBase = false; bool ObjCConversion = false; bool ObjCLifetimeConversion = false; + bool FunctionConversion = false; Expr::Classification InitCategory = Init->Classify(S.Context); - Sema::ReferenceCompareResult RefRelationship - = S.CompareReferenceRelationship(DeclLoc, T1, T2, DerivedToBase, - ObjCConversion, ObjCLifetimeConversion); - + Sema::ReferenceCompareResult RefRelationship = S.CompareReferenceRelationship( + DeclLoc, T1, T2, DerivedToBase, ObjCConversion, ObjCLifetimeConversion, + FunctionConversion); // C++0x [dcl.init.ref]p5: // A reference to type "cv1 T1" is initialized by an expression @@ -5041,9 +5045,10 @@ TryListConversion(Sema &S, InitListExpr *From, QualType ToType, bool dummy1 = false; bool dummy2 = false; bool dummy3 = false; + bool dummy4 = false; Sema::ReferenceCompareResult RefRelationship = S.CompareReferenceRelationship(From->getBeginLoc(), T1, T2, dummy1, - dummy2, dummy3); + dummy2, dummy3, dummy4); if (RefRelationship >= Sema::Ref_Related) { return TryReferenceInit(S, Init, ToType, /*FIXME*/ From->getBeginLoc(), diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h index 89a7b8cc845e6..4b3a6708717c2 100644 --- a/clang/lib/Sema/TreeTransform.h +++ b/clang/lib/Sema/TreeTransform.h @@ -11497,17 +11497,18 @@ TreeTransform::TransformLambdaExpr(LambdaExpr *E) { E->getCaptureDefault()); getDerived().transformedLocalDecl(OldClass, {Class}); - Optional> Mangling; + Optional> Mangling; if (getDerived().ReplacingOriginal()) - Mangling = std::make_pair(OldClass->getLambdaManglingNumber(), - OldClass->getLambdaContextDecl()); + Mangling = std::make_tuple(OldClass->getLambdaManglingNumber(), + OldClass->hasKnownLambdaInternalLinkage(), + OldClass->getLambdaContextDecl()); // Build the call operator. CXXMethodDecl *NewCallOperator = getSema().startLambdaDefinition( Class, E->getIntroducerRange(), NewCallOpTSI, E->getCallOperator()->getEndLoc(), NewCallOpTSI->getTypeLoc().castAs().getParams(), - E->getCallOperator()->getConstexprKind(), Mangling); + E->getCallOperator()->getConstexprKind()); LSI->CallOperator = NewCallOperator; @@ -11527,6 +11528,9 @@ TreeTransform::TransformLambdaExpr(LambdaExpr *E) { getDerived().transformAttrs(E->getCallOperator(), NewCallOperator); getDerived().transformedLocalDecl(E->getCallOperator(), {NewCallOperator}); + // Number the lambda for linkage purposes if necessary. + getSema().handleLambdaNumbering(Class, NewCallOperator, Mangling); + // Introduce the context of the call operator. Sema::ContextRAII SavedContext(getSema(), NewCallOperator, /*NewThisContext*/false); diff --git a/clang/lib/Serialization/ASTReaderDecl.cpp b/clang/lib/Serialization/ASTReaderDecl.cpp index 65d62524e22f7..9aa8c77c62319 100644 --- a/clang/lib/Serialization/ASTReaderDecl.cpp +++ b/clang/lib/Serialization/ASTReaderDecl.cpp @@ -1690,6 +1690,7 @@ void ASTDeclReader::ReadCXXDefinitionData( Lambda.CaptureDefault = Record.readInt(); Lambda.NumCaptures = Record.readInt(); Lambda.NumExplicitCaptures = Record.readInt(); + Lambda.HasKnownInternalLinkage = Record.readInt(); Lambda.ManglingNumber = Record.readInt(); Lambda.ContextDecl = ReadDeclID(); Lambda.Captures = (Capture *)Reader.getContext().Allocate( diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index c6b05c9a17987..28affedbbb30f 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -6224,6 +6224,7 @@ void ASTRecordWriter::AddCXXDefinitionData(const CXXRecordDecl *D) { Record->push_back(Lambda.CaptureDefault); Record->push_back(Lambda.NumCaptures); Record->push_back(Lambda.NumExplicitCaptures); + Record->push_back(Lambda.HasKnownInternalLinkage); Record->push_back(Lambda.ManglingNumber); AddDeclRef(D->getLambdaContextDecl()); AddTypeSourceInfo(Lambda.MethodTyInfo); diff --git a/clang/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp b/clang/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp index f1592ebff669d..7ba93b858baf5 100644 --- a/clang/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp +++ b/clang/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp @@ -1418,14 +1418,19 @@ FindLastStoreBRVisitor::VisitNode(const ExplodedNode *Succ, if (Optional CE = Succ->getLocationAs()) { if (const auto *VR = dyn_cast(R)) { - const auto *Param = cast(VR->getDecl()); + if (const auto *Param = dyn_cast(VR->getDecl())) { + ProgramStateManager &StateMgr = BRC.getStateManager(); + CallEventManager &CallMgr = StateMgr.getCallEventManager(); - ProgramStateManager &StateMgr = BRC.getStateManager(); - CallEventManager &CallMgr = StateMgr.getCallEventManager(); - - CallEventRef<> Call = CallMgr.getCaller(CE->getCalleeContext(), - Succ->getState()); - InitE = Call->getArgExpr(Param->getFunctionScopeIndex()); + CallEventRef<> Call = CallMgr.getCaller(CE->getCalleeContext(), + Succ->getState()); + InitE = Call->getArgExpr(Param->getFunctionScopeIndex()); + } else { + // Handle Objective-C 'self'. + assert(isa(VR->getDecl())); + InitE = cast(CE->getCalleeContext()->getCallSite()) + ->getInstanceReceiver()->IgnoreParenCasts(); + } IsParam = true; } } @@ -2029,8 +2034,6 @@ bool bugreporter::trackExpressionValue(const ExplodedNode *InputNode, // Is it a symbolic value? if (auto L = V.getAs()) { - report.addVisitor(std::make_unique(L->getRegion())); - // FIXME: this is a hack for fixing a later crash when attempting to // dereference a void* pointer. // We should not try to dereference pointers at all when we don't care @@ -2051,10 +2054,14 @@ bool bugreporter::trackExpressionValue(const ExplodedNode *InputNode, else if (CanDereference) RVal = LVState->getSVal(L->getRegion()); - if (CanDereference) + if (CanDereference) { + report.addVisitor( + std::make_unique(L->getRegion())); + if (auto KV = RVal.getAs()) report.addVisitor(std::make_unique( *KV, L->getRegion(), EnableNullFPSuppression, TKind, SFC)); + } const MemRegion *RegionRVal = RVal.getAsRegion(); if (RegionRVal && isa(RegionRVal)) { diff --git a/clang/lib/Tooling/AllTUsExecution.cpp b/clang/lib/Tooling/AllTUsExecution.cpp index 267f945f567b7..d85075f596079 100644 --- a/clang/lib/Tooling/AllTUsExecution.cpp +++ b/clang/lib/Tooling/AllTUsExecution.cpp @@ -8,6 +8,7 @@ #include "clang/Tooling/AllTUsExecution.h" #include "clang/Tooling/ToolExecutorPluginRegistry.h" +#include "llvm/Support/Threading.h" #include "llvm/Support/ThreadPool.h" #include "llvm/Support/VirtualFileSystem.h" diff --git a/clang/test/Analysis/cast-value-logic.cpp b/clang/test/Analysis/cast-value-logic.cpp index 221ae7f9ae38b..1411ede92e366 100644 --- a/clang/test/Analysis/cast-value-logic.cpp +++ b/clang/test/Analysis/cast-value-logic.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_analyze_cc1 \ +// RUN: %clang_analyze_cc1 -std=c++14 \ // RUN: -analyzer-checker=core,apiModeling.llvm.CastValue,debug.ExprInspection\ // RUN: -verify %s diff --git a/clang/test/Analysis/cast-value-notes.cpp b/clang/test/Analysis/cast-value-notes.cpp index a0eaeae8ba483..eb5d1b3d3fe27 100644 --- a/clang/test/Analysis/cast-value-notes.cpp +++ b/clang/test/Analysis/cast-value-notes.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_analyze_cc1 \ +// RUN: %clang_analyze_cc1 -std=c++14 \ // RUN: -analyzer-checker=core,apiModeling.llvm.CastValue,debug.ExprInspection\ // RUN: -analyzer-output=text -verify %s diff --git a/clang/test/Analysis/cast-value-state-dump.cpp b/clang/test/Analysis/cast-value-state-dump.cpp index b8152d46da47d..9abdaae0d4592 100644 --- a/clang/test/Analysis/cast-value-state-dump.cpp +++ b/clang/test/Analysis/cast-value-state-dump.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_analyze_cc1 \ +// RUN: %clang_analyze_cc1 -std=c++14 \ // RUN: -analyzer-checker=core,apiModeling.llvm.CastValue,debug.ExprInspection\ // RUN: -analyzer-output=text -verify %s 2>&1 | FileCheck %s diff --git a/clang/test/Analysis/ctu-different-triples.cpp b/clang/test/Analysis/ctu-different-triples.cpp index dbfa82fb483d9..20acc318e2e72 100644 --- a/clang/test/Analysis/ctu-different-triples.cpp +++ b/clang/test/Analysis/ctu-different-triples.cpp @@ -1,9 +1,9 @@ // RUN: rm -rf %t && mkdir %t // RUN: mkdir -p %t/ctudir -// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu \ +// RUN: %clang_cc1 -std=c++14 -triple x86_64-pc-linux-gnu \ // RUN: -emit-pch -o %t/ctudir/ctu-other.cpp.ast %S/Inputs/ctu-other.cpp // RUN: cp %S/Inputs/ctu-other.cpp.externalDefMap.txt %t/ctudir/externalDefMap.txt -// RUN: %clang_analyze_cc1 -triple powerpc64-montavista-linux-gnu \ +// RUN: %clang_analyze_cc1 -std=c++14 -triple powerpc64-montavista-linux-gnu \ // RUN: -analyzer-checker=core,debug.ExprInspection \ // RUN: -analyzer-config experimental-enable-naive-ctu-analysis=true \ // RUN: -analyzer-config ctu-dir=%t/ctudir \ diff --git a/clang/test/Analysis/ctu-main.cpp b/clang/test/Analysis/ctu-main.cpp index abfacfbdae5d7..3f095a0aabc30 100644 --- a/clang/test/Analysis/ctu-main.cpp +++ b/clang/test/Analysis/ctu-main.cpp @@ -1,16 +1,16 @@ // RUN: rm -rf %t && mkdir %t // RUN: mkdir -p %t/ctudir -// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu \ +// RUN: %clang_cc1 -std=c++14 -triple x86_64-pc-linux-gnu \ // RUN: -emit-pch -o %t/ctudir/ctu-other.cpp.ast %S/Inputs/ctu-other.cpp -// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu \ +// RUN: %clang_cc1 -std=c++14 -triple x86_64-pc-linux-gnu \ // RUN: -emit-pch -o %t/ctudir/ctu-chain.cpp.ast %S/Inputs/ctu-chain.cpp // RUN: cp %S/Inputs/ctu-other.cpp.externalDefMap.txt %t/ctudir/externalDefMap.txt -// RUN: %clang_analyze_cc1 -triple x86_64-pc-linux-gnu \ +// RUN: %clang_analyze_cc1 -std=c++14 -triple x86_64-pc-linux-gnu \ // RUN: -analyzer-checker=core,debug.ExprInspection \ // RUN: -analyzer-config experimental-enable-naive-ctu-analysis=true \ // RUN: -analyzer-config ctu-dir=%t/ctudir \ // RUN: -verify %s -// RUN: %clang_analyze_cc1 -triple x86_64-pc-linux-gnu \ +// RUN: %clang_analyze_cc1 -std=c++14 -triple x86_64-pc-linux-gnu \ // RUN: -analyzer-checker=core,debug.ExprInspection \ // RUN: -analyzer-config experimental-enable-naive-ctu-analysis=true \ // RUN: -analyzer-config ctu-dir=%t/ctudir \ diff --git a/clang/test/Analysis/ctu-unknown-parts-in-triples.cpp b/clang/test/Analysis/ctu-unknown-parts-in-triples.cpp index 5e643c164dd7d..6bcbd709b5ef7 100644 --- a/clang/test/Analysis/ctu-unknown-parts-in-triples.cpp +++ b/clang/test/Analysis/ctu-unknown-parts-in-triples.cpp @@ -3,10 +3,10 @@ // RUN: rm -rf %t && mkdir %t // RUN: mkdir -p %t/ctudir -// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu \ +// RUN: %clang_cc1 -std=c++14 -triple x86_64-pc-linux-gnu \ // RUN: -emit-pch -o %t/ctudir/ctu-other.cpp.ast %S/Inputs/ctu-other.cpp // RUN: cp %S/Inputs/ctu-other.cpp.externalDefMap.txt %t/ctudir/externalDefMap.txt -// RUN: %clang_analyze_cc1 -triple x86_64-unknown-linux-gnu \ +// RUN: %clang_analyze_cc1 -std=c++14 -triple x86_64-unknown-linux-gnu \ // RUN: -analyzer-checker=core,debug.ExprInspection \ // RUN: -analyzer-config experimental-enable-naive-ctu-analysis=true \ // RUN: -analyzer-config ctu-dir=%t/ctudir \ diff --git a/clang/test/Analysis/deadstores-driverkit.cpp b/clang/test/Analysis/deadstores-driverkit.cpp index 0885367b1b9bf..9c423fc6ff218 100644 --- a/clang/test/Analysis/deadstores-driverkit.cpp +++ b/clang/test/Analysis/deadstores-driverkit.cpp @@ -5,7 +5,7 @@ // It needs to be on the top. // Run-lines can wait. -// RUN: %clang_analyze_cc1 -w -triple x86_64-apple-driverkit19.0 \ +// RUN: %clang_analyze_cc1 -std=c++17 -w -triple x86_64-apple-driverkit19.0 \ // RUN: -analyzer-checker=deadcode -verify %s // expected-no-diagnostics diff --git a/clang/test/Analysis/diagnostics/dtors.cpp b/clang/test/Analysis/diagnostics/dtors.cpp index 18bedc61f98e8..6a8349da9d78c 100644 --- a/clang/test/Analysis/diagnostics/dtors.cpp +++ b/clang/test/Analysis/diagnostics/dtors.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_analyze_cc1 -w -analyzer-checker=core,cplusplus -analyzer-output=text -verify %s +// RUN: %clang_analyze_cc1 -std=c++14 -w -analyzer-checker=core,cplusplus -analyzer-output=text -verify %s namespace no_crash_on_delete_dtor { // We were crashing when producing diagnostics for this code, but not for the diff --git a/clang/test/Analysis/domtest.cpp b/clang/test/Analysis/domtest.cpp index 078117ef85dc1..2a2caed130d79 100644 --- a/clang/test/Analysis/domtest.cpp +++ b/clang/test/Analysis/domtest.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_analyze_cc1 %s \ +// RUN: %clang_analyze_cc1 -std=c++14 %s \ // RUN: -analyzer-checker=debug.DumpDominators \ // RUN: -analyzer-checker=debug.DumpPostDominators \ // RUN: -analyzer-checker=debug.DumpControlDependencies \ diff --git a/clang/test/Analysis/explain-svals.cpp b/clang/test/Analysis/explain-svals.cpp index c1b5200eb8e92..9c37642758bb9 100644 --- a/clang/test/Analysis/explain-svals.cpp +++ b/clang/test/Analysis/explain-svals.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_analyze_cc1 -triple i386-apple-darwin10 -analyzer-checker=core.builtin,debug.ExprInspection,unix.cstring -verify %s +// RUN: %clang_analyze_cc1 -std=c++14 -triple i386-apple-darwin10 -analyzer-checker=core.builtin,debug.ExprInspection,unix.cstring -verify %s typedef unsigned long size_t; diff --git a/clang/test/Analysis/initialization.cpp b/clang/test/Analysis/initialization.cpp index db765930b6e5e..dd622e077e934 100644 --- a/clang/test/Analysis/initialization.cpp +++ b/clang/test/Analysis/initialization.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -triple i386-apple-darwin10 -analyze -analyzer-checker=core.builtin,debug.ExprInspection -verify %s +// RUN: %clang_cc1 -std=c++14 -triple i386-apple-darwin10 -analyze -analyzer-checker=core.builtin,debug.ExprInspection -verify %s void clang_analyzer_eval(int); diff --git a/clang/test/Analysis/inlining/placement-new-fp-suppression.cpp b/clang/test/Analysis/inlining/placement-new-fp-suppression.cpp index 5f75411716836..5a99ad11cc17e 100644 --- a/clang/test/Analysis/inlining/placement-new-fp-suppression.cpp +++ b/clang/test/Analysis/inlining/placement-new-fp-suppression.cpp @@ -1,8 +1,8 @@ -// RUN: %clang_analyze_cc1 \ +// RUN: %clang_analyze_cc1 -std=c++14 \ // RUN: -analyzer-checker=core.CallAndMessage \ // RUN: -analyzer-config suppress-null-return-paths=false \ // RUN: -verify %s -// RUN: %clang_analyze_cc1 \ +// RUN: %clang_analyze_cc1 -std=c++14 \ // RUN: -analyzer-checker=core.CallAndMessage \ // RUN: -DSUPPRESSED \ // RUN: -verify %s diff --git a/clang/test/Analysis/inner-pointer.cpp b/clang/test/Analysis/inner-pointer.cpp index 5cee0bfbcca60..d8b011a7aa64e 100644 --- a/clang/test/Analysis/inner-pointer.cpp +++ b/clang/test/Analysis/inner-pointer.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_analyze_cc1 -analyzer-checker=cplusplus.InnerPointer \ +// RUN: %clang_analyze_cc1 -std=c++14 -analyzer-checker=cplusplus.InnerPointer \ // RUN: -Wno-dangling -Wno-dangling-field -Wno-return-stack-address \ // RUN: %s -analyzer-output=text -verify diff --git a/clang/test/Analysis/malloc.mm b/clang/test/Analysis/malloc.mm index d7bfbf3f34f33..e84644b9dd732 100644 --- a/clang/test/Analysis/malloc.mm +++ b/clang/test/Analysis/malloc.mm @@ -1,4 +1,4 @@ -// RUN: %clang_analyze_cc1 -analyzer-checker=core,unix.Malloc -analyzer-store=region -verify -fblocks %s +// RUN: %clang_analyze_cc1 -std=c++14 -analyzer-checker=core,unix.Malloc -analyzer-store=region -verify -fblocks %s #import "Inputs/system-header-simulator-objc.h" #import "Inputs/system-header-simulator-for-malloc.h" diff --git a/clang/test/Analysis/mig.mm b/clang/test/Analysis/mig.mm index 0c7d729e9375d..e8d08f355d3ea 100644 --- a/clang/test/Analysis/mig.mm +++ b/clang/test/Analysis/mig.mm @@ -1,4 +1,4 @@ -// RUN: %clang_analyze_cc1 -w -analyzer-checker=core,osx.MIG\ +// RUN: %clang_analyze_cc1 -w -analyzer-checker=core,osx.MIG -std=c++14 \ // RUN: -analyzer-output=text -fblocks -verify %s typedef unsigned uint32_t; diff --git a/clang/test/Analysis/new-ctor-null-throw.cpp b/clang/test/Analysis/new-ctor-null-throw.cpp index dfa7cba763fcf..28922c0fad4b7 100644 --- a/clang/test/Analysis/new-ctor-null-throw.cpp +++ b/clang/test/Analysis/new-ctor-null-throw.cpp @@ -1,7 +1,7 @@ -// RUN: %clang_analyze_cc1 -analyzer-checker=core \ +// RUN: %clang_analyze_cc1 -std=c++14 -analyzer-checker=core \ // RUN: -analyzer-config suppress-null-return-paths=false \ // RUN: -verify %s -// RUN: %clang_analyze_cc1 -analyzer-checker=core \ +// RUN: %clang_analyze_cc1 -std=c++14 -analyzer-checker=core \ // RUN: -DSUPPRESSED \ // RUN: -verify %s diff --git a/clang/test/Analysis/new-ctor-null.cpp b/clang/test/Analysis/new-ctor-null.cpp index 32f2f9500cce3..f3c07e2123731 100644 --- a/clang/test/Analysis/new-ctor-null.cpp +++ b/clang/test/Analysis/new-ctor-null.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_analyze_cc1 \ +// RUN: %clang_analyze_cc1 -std=c++14 \ // RUN: -analyzer-checker=core,debug.ExprInspection \ // RUN: -verify %s diff --git a/clang/test/Analysis/novoidtypecrash.c b/clang/test/Analysis/novoidtypecrash.c index c04cfca29b4f1..b19990a2791b0 100644 --- a/clang/test/Analysis/novoidtypecrash.c +++ b/clang/test/Analysis/novoidtypecrash.c @@ -1,8 +1,27 @@ // RUN: %clang_analyze_cc1 -analyzer-checker=core %s +x; +y(void **z) { // no-crash + *z = x; + int *w; + y(&w); + *w; +} + a; -b(void **c) { // no-crash - *c = a; - int *d; - b(&d); - *d; +b(*c) {} +e(*c) { + void *d = f(); + b(d); + *c = d; +} +void *g() { + e(&a); + return a; +} +j() { + int h; + char i = g(); + if (i) + for (; h;) + ; } diff --git a/clang/test/Analysis/osobject-retain-release.cpp b/clang/test/Analysis/osobject-retain-release.cpp index afcc242583588..42675fc70e785 100644 --- a/clang/test/Analysis/osobject-retain-release.cpp +++ b/clang/test/Analysis/osobject-retain-release.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_analyze_cc1 -fblocks -analyze -analyzer-output=text\ +// RUN: %clang_analyze_cc1 -std=c++14 -fblocks -analyze -analyzer-output=text\ // RUN: -analyzer-checker=core,osx,debug.ExprInspection -verify %s #include "os_object_base.h" diff --git a/clang/test/Analysis/osobjectcstylecastchecker_test.cpp b/clang/test/Analysis/osobjectcstylecastchecker_test.cpp index 07f878cd39d55..fabed7ee34b1b 100644 --- a/clang/test/Analysis/osobjectcstylecastchecker_test.cpp +++ b/clang/test/Analysis/osobjectcstylecastchecker_test.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_analyze_cc1 -analyzer-checker=optin.osx.OSObjectCStyleCast %s -verify +// RUN: %clang_analyze_cc1 -std=c++14 -analyzer-checker=optin.osx.OSObjectCStyleCast %s -verify #include "os_object_base.h" struct OSArray : public OSObject { diff --git a/clang/test/Analysis/plist-macros-with-expansion.cpp b/clang/test/Analysis/plist-macros-with-expansion.cpp index e836c78b4bb35..e07747eaec74d 100644 --- a/clang/test/Analysis/plist-macros-with-expansion.cpp +++ b/clang/test/Analysis/plist-macros-with-expansion.cpp @@ -1,6 +1,6 @@ -// RUN: %clang_analyze_cc1 -analyzer-checker=core -verify %s +// RUN: %clang_analyze_cc1 -std=c++14 -analyzer-checker=core -verify %s // -// RUN: %clang_analyze_cc1 -analyzer-checker=core %s \ +// RUN: %clang_analyze_cc1 -std=c++14 -analyzer-checker=core %s \ // RUN: -analyzer-output=plist -o %t.plist \ // RUN: -analyzer-config expand-macros=true // diff --git a/clang/test/Analysis/ptr-iter.cpp b/clang/test/Analysis/ptr-iter.cpp index a35fae470a7ef..a94288cd1c8cc 100644 --- a/clang/test/Analysis/ptr-iter.cpp +++ b/clang/test/Analysis/ptr-iter.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_analyze_cc1 %s -analyzer-output=text -verify \ +// RUN: %clang_analyze_cc1 %s -std=c++14 -analyzer-output=text -verify \ // RUN: -analyzer-checker=core,alpha.nondeterminism.PointerIteration #include "Inputs/system-header-simulator-cxx.h" diff --git a/clang/test/Analysis/ptr-sort.cpp b/clang/test/Analysis/ptr-sort.cpp index a4f94817f13b3..d238b390bdc23 100644 --- a/clang/test/Analysis/ptr-sort.cpp +++ b/clang/test/Analysis/ptr-sort.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_analyze_cc1 %s -analyzer-output=text -verify \ +// RUN: %clang_analyze_cc1 %s -std=c++14 -analyzer-output=text -verify \ // RUN: -analyzer-checker=core,alpha.nondeterminism.PointerSorting #include "Inputs/system-header-simulator-cxx.h" diff --git a/clang/test/Analysis/rvo.cpp b/clang/test/Analysis/rvo.cpp index cf06a9570b368..7215fbbded461 100644 --- a/clang/test/Analysis/rvo.cpp +++ b/clang/test/Analysis/rvo.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_analyze_cc1 -analyzer-checker core,cplusplus \ +// RUN: %clang_analyze_cc1 -analyzer-checker core,cplusplus -std=c++14 \ // RUN: -analyzer-checker debug.ExprInspection -verify %s void clang_analyzer_eval(bool); diff --git a/clang/test/Analysis/sizeofpack.cpp b/clang/test/Analysis/sizeofpack.cpp index 44c3bba3a8ae4..8c0ca02b0710a 100644 --- a/clang/test/Analysis/sizeofpack.cpp +++ b/clang/test/Analysis/sizeofpack.cpp @@ -1,5 +1,5 @@ // RUN: %clang_analyze_cc1 -analyzer-checker=core,debug.ExprInspection \ -// RUN: -verify %s +// RUN: -std=c++14 -verify %s typedef __typeof(sizeof(int)) size_t; diff --git a/clang/test/Analysis/stack-frame-context-revision.cpp b/clang/test/Analysis/stack-frame-context-revision.cpp index 8c119f50c141e..51f86defe3154 100644 --- a/clang/test/Analysis/stack-frame-context-revision.cpp +++ b/clang/test/Analysis/stack-frame-context-revision.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_analyze_cc1 -analyzer-checker=core,cplusplus.NewDelete -verify %s +// RUN: %clang_analyze_cc1 -std=c++14 -analyzer-checker=core,cplusplus.NewDelete -verify %s // expected-no-diagnostics: // From now the profile of the 'StackFrameContext' also contains the diff --git a/clang/test/Analysis/temporaries.mm b/clang/test/Analysis/temporaries.mm index 43546ae3441d8..44d30d5d7d535 100644 --- a/clang/test/Analysis/temporaries.mm +++ b/clang/test/Analysis/temporaries.mm @@ -1,4 +1,4 @@ -// RUN: %clang_analyze_cc1 -analyzer-checker core,cplusplus -verify %s +// RUN: %clang_analyze_cc1 -std=c++14 -analyzer-checker core,cplusplus -verify %s // expected-no-diagnostics diff --git a/clang/test/Analysis/test-separate-retaincount.cpp b/clang/test/Analysis/test-separate-retaincount.cpp index 5fda2b2e22112..621e1d120bbb2 100644 --- a/clang/test/Analysis/test-separate-retaincount.cpp +++ b/clang/test/Analysis/test-separate-retaincount.cpp @@ -1,12 +1,12 @@ -// RUN: %clang_analyze_cc1 -DNO_CF_OBJECT -verify %s \ +// RUN: %clang_analyze_cc1 -std=c++14 -DNO_CF_OBJECT -verify %s \ // RUN: -analyzer-checker=core,osx \ // RUN: -analyzer-disable-checker osx.cocoa.RetainCount // -// RUN: %clang_analyze_cc1 -DNO_OS_OBJECT -verify %s \ +// RUN: %clang_analyze_cc1 -std=c++14 -DNO_OS_OBJECT -verify %s \ // RUN: -analyzer-checker=core,osx \ // RUN: -analyzer-disable-checker osx.OSObjectRetainCount // -// RUN: %clang_analyze_cc1 -DNO_OS_OBJECT -verify %s \ +// RUN: %clang_analyze_cc1 -std=c++14 -DNO_OS_OBJECT -verify %s \ // RUN: -analyzer-checker=core,osx \ // RUN: -analyzer-config "osx.cocoa.RetainCount:CheckOSObject=false" diff --git a/clang/test/Analysis/track-control-dependency-conditions.cpp b/clang/test/Analysis/track-control-dependency-conditions.cpp index 737620f4c31e4..11eb1c56a0388 100644 --- a/clang/test/Analysis/track-control-dependency-conditions.cpp +++ b/clang/test/Analysis/track-control-dependency-conditions.cpp @@ -1,10 +1,10 @@ -// RUN: %clang_analyze_cc1 %s \ +// RUN: %clang_analyze_cc1 %s -std=c++14 \ // RUN: -verify=expected,tracking \ // RUN: -analyzer-config track-conditions=true \ // RUN: -analyzer-output=text \ // RUN: -analyzer-checker=core -// RUN: not %clang_analyze_cc1 -verify %s \ +// RUN: not %clang_analyze_cc1 -std=c++14 -verify %s \ // RUN: -analyzer-checker=core \ // RUN: -analyzer-config track-conditions=false \ // RUN: -analyzer-config track-conditions-debug=true \ @@ -14,14 +14,14 @@ // CHECK-INVALID-DEBUG-SAME: 'track-conditions-debug', that expects // CHECK-INVALID-DEBUG-SAME: 'track-conditions' to also be enabled // -// RUN: %clang_analyze_cc1 %s \ +// RUN: %clang_analyze_cc1 %s -std=c++14 \ // RUN: -verify=expected,tracking,debug \ // RUN: -analyzer-config track-conditions=true \ // RUN: -analyzer-config track-conditions-debug=true \ // RUN: -analyzer-output=text \ // RUN: -analyzer-checker=core -// RUN: %clang_analyze_cc1 %s -verify \ +// RUN: %clang_analyze_cc1 %s -std=c++14 -verify \ // RUN: -analyzer-output=text \ // RUN: -analyzer-config track-conditions=false \ // RUN: -analyzer-checker=core diff --git a/clang/test/Analysis/track-control-dependency-conditions.m b/clang/test/Analysis/track-control-dependency-conditions.m new file mode 100644 index 0000000000000..05b0638a5eb56 --- /dev/null +++ b/clang/test/Analysis/track-control-dependency-conditions.m @@ -0,0 +1,32 @@ +// RUN: %clang_analyze_cc1 -w -analyzer-checker=core,nullability -verify %s + +// expected-no-diagnostics + +@class C; + +#pragma clang assume_nonnull begin +@interface I +- foo:(C *)c; +@end +#pragma clang assume_nonnull end + +@interface J +@property C *c; +@end + +J *conjure_J(); + +@implementation I +- (void)bar { + if (self) { // no-crash + J *j = conjure_J(); + if (j.c) + [self bar]; + // FIXME: Should warn. + [self foo:j.c]; // no-warning + } +} +@end + +@implementation J +@end diff --git a/clang/test/Analysis/unions.cpp b/clang/test/Analysis/unions.cpp index 6fd35d1a43f4a..76eb20550fdb4 100644 --- a/clang/test/Analysis/unions.cpp +++ b/clang/test/Analysis/unions.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_analyze_cc1 -analyzer-checker=core,unix.Malloc,debug.ExprInspection %s -analyzer-config eagerly-assume=false -verify +// RUN: %clang_analyze_cc1 -std=c++14 -analyzer-checker=core,unix.Malloc,debug.ExprInspection %s -analyzer-config eagerly-assume=false -verify extern void clang_analyzer_eval(bool); extern void clang_analyzer_warnIfReached(); diff --git a/clang/test/CodeGenCUDA/unnamed-types.cu b/clang/test/CodeGenCUDA/unnamed-types.cu new file mode 100644 index 0000000000000..81557817e42eb --- /dev/null +++ b/clang/test/CodeGenCUDA/unnamed-types.cu @@ -0,0 +1,39 @@ +// RUN: %clang_cc1 -std=c++11 -x hip -triple x86_64-linux-gnu -aux-triple amdgcn-amd-amdhsa -emit-llvm %s -o - | FileCheck %s --check-prefix=HOST +// RUN: %clang_cc1 -std=c++11 -x hip -triple amdgcn-amd-amdhsa -fcuda-is-device -emit-llvm %s -o - | FileCheck %s --check-prefix=DEVICE + +#include "Inputs/cuda.h" + +// HOST: @0 = private unnamed_addr constant [43 x i8] c"_Z2k0IZZ2f1PfENKUlS0_E_clES0_EUlfE_EvS0_T_\00", align 1 + +__device__ float d0(float x) { + return [](float x) { return x + 2.f; }(x); +} + +__device__ float d1(float x) { + return [](float x) { return x * 2.f; }(x); +} + +// DEVICE: amdgpu_kernel void @_Z2k0IZZ2f1PfENKUlS0_E_clES0_EUlfE_EvS0_T_( +template +__global__ void k0(float *p, F f) { + p[0] = f(p[0]) + d0(p[1]) + d1(p[2]); +} + +void f0(float *p) { + [](float *p) { + *p = 1.f; + }(p); +} + +// The inner/outer lambdas are required to be mangled following ODR but their +// linkages are still required to keep the original `internal` linkage. + +// HOST: define internal void @_ZZ2f1PfENKUlS_E_clES_( +// DEVICE: define internal float @_ZZZ2f1PfENKUlS_E_clES_ENKUlfE_clEf( +void f1(float *p) { + [](float *p) { + k0<<<1,1>>>(p, [] __device__ (float x) { return x + 1.f; }); + }(p); +} +// HOST: @__hip_register_globals +// HOST: __hipRegisterFunction{{.*}}@_Z2k0IZZ2f1PfENKUlS0_E_clES0_EUlfE_EvS0_T_{{.*}}@0 diff --git a/clang/test/CodeGenCXX/cxx11-extern-constexpr.cpp b/clang/test/CodeGenCXX/cxx11-extern-constexpr.cpp index 6c520038e9415..0688d7bead087 100644 --- a/clang/test/CodeGenCXX/cxx11-extern-constexpr.cpp +++ b/clang/test/CodeGenCXX/cxx11-extern-constexpr.cpp @@ -1,10 +1,13 @@ -// RUN: %clang_cc1 -std=c++11 %s -emit-llvm -o - -triple x86_64-linux-gnu | FileCheck %s --check-prefix=CHECK --check-prefix=CXX11 -// RUN: %clang_cc1 -std=c++1z %s -emit-llvm -o - -triple x86_64-linux-gnu | FileCheck %s --check-prefix=CHECK --check-prefix=CXX17 +// RUN: %clang_cc1 -std=c++11 %s -emit-llvm -o - -triple x86_64-linux-gnu | FileCheck %s --check-prefixes=X86,CXX11X86 +// RUN: %clang_cc1 -std=c++1z %s -emit-llvm -o - -triple x86_64-linux-gnu | FileCheck %s --check-prefixes=X86,CXX17X86 +// RUN: %clang_cc1 -std=c++11 %s -emit-llvm -o - -triple amdgcn-amd-amdhsa | FileCheck %s --check-prefixes=AMD,CXX11AMD +// RUN: %clang_cc1 -std=c++1z %s -emit-llvm -o - -triple amdgcn-amd-amdhsa | FileCheck %s --check-prefixes=AMD,CXX17AMD struct A { static const int Foo = 123; }; -// CHECK: @_ZN1A3FooE = constant i32 123, align 4 +// X86: @_ZN1A3FooE = constant i32 123, align 4 +// AMD: @_ZN1A3FooE = addrspace(4) constant i32 123, align 4 const int *p = &A::Foo; // emit available_externally const int A::Foo; // convert to full definition @@ -16,7 +19,8 @@ struct CreatePOD { // Deferred initialization of the structure here requires changing // the type of the global variable: the initializer list does not include // the tail padding. - // CXX11: @_ZN9CreatePOD3podE = available_externally constant { i32, i8 } { i32 42, i8 43 }, + // CXX11X86: @_ZN9CreatePOD3podE = available_externally constant { i32, i8 } { i32 42, i8 43 }, + // CXX11AMD: @_ZN9CreatePOD3podE = available_externally addrspace(1) constant { i32, i8 } { i32 42, i8 43 }, static constexpr PODWithInit pod{}; }; const int *p_pod = &CreatePOD::pod.g; @@ -30,29 +34,40 @@ struct MutableBar { }; struct Foo { - // CXX11: @_ZN3Foo21ConstexprStaticMemberE = available_externally constant i32 42, - // CXX17: @_ZN3Foo21ConstexprStaticMemberE = linkonce_odr constant i32 42, + // CXX11X86: @_ZN3Foo21ConstexprStaticMemberE = available_externally constant i32 42, + // CXX17X86: @_ZN3Foo21ConstexprStaticMemberE = linkonce_odr constant i32 42, + // CXX11AMD: @_ZN3Foo21ConstexprStaticMemberE = available_externally addrspace(4) constant i32 42, + // CXX17AMD: @_ZN3Foo21ConstexprStaticMemberE = linkonce_odr addrspace(4) constant i32 42, static constexpr int ConstexprStaticMember = 42; - // CHECK: @_ZN3Foo17ConstStaticMemberE = available_externally constant i32 43, + // X86: @_ZN3Foo17ConstStaticMemberE = available_externally constant i32 43, + // AMD: @_ZN3Foo17ConstStaticMemberE = available_externally addrspace(4) constant i32 43, static const int ConstStaticMember = 43; - // CXX11: @_ZN3Foo23ConstStaticStructMemberE = available_externally constant %struct.Bar { i32 44 }, - // CXX17: @_ZN3Foo23ConstStaticStructMemberE = linkonce_odr constant %struct.Bar { i32 44 }, + // CXX11X86: @_ZN3Foo23ConstStaticStructMemberE = available_externally constant %struct.Bar { i32 44 }, + // CXX17X86: @_ZN3Foo23ConstStaticStructMemberE = linkonce_odr constant %struct.Bar { i32 44 }, + // CXX11AMD: @_ZN3Foo23ConstStaticStructMemberE = available_externally addrspace(1) constant %struct.Bar { i32 44 }, + // CXX17AMD: @_ZN3Foo23ConstStaticStructMemberE = linkonce_odr addrspace(1) constant %struct.Bar { i32 44 }, static constexpr Bar ConstStaticStructMember = {44}; - // CXX11: @_ZN3Foo34ConstexprStaticMutableStructMemberE = external global %struct.MutableBar, - // CXX17: @_ZN3Foo34ConstexprStaticMutableStructMemberE = linkonce_odr global %struct.MutableBar { i32 45 }, + // CXX11X86: @_ZN3Foo34ConstexprStaticMutableStructMemberE = external global %struct.MutableBar, + // CXX17X86: @_ZN3Foo34ConstexprStaticMutableStructMemberE = linkonce_odr global %struct.MutableBar { i32 45 }, + // CXX11AMD: @_ZN3Foo34ConstexprStaticMutableStructMemberE = external addrspace(1) global %struct.MutableBar, + // CXX17AMD: @_ZN3Foo34ConstexprStaticMutableStructMemberE = linkonce_odr addrspace(1) global %struct.MutableBar { i32 45 }, static constexpr MutableBar ConstexprStaticMutableStructMember = {45}; }; -// CHECK: @_ZL15ConstStaticexpr = internal constant i32 46, +// X86: @_ZL15ConstStaticexpr = internal constant i32 46, +// AMD: @_ZL15ConstStaticexpr = internal addrspace(4) constant i32 46, static constexpr int ConstStaticexpr = 46; -// CHECK: @_ZL9ConstExpr = internal constant i32 46, align 4 +// X86: @_ZL9ConstExpr = internal constant i32 46, align 4 +// AMD: @_ZL9ConstExpr = internal addrspace(4) constant i32 46, align 4 static const int ConstExpr = 46; -// CHECK: @_ZL21ConstexprStaticStruct = internal constant %struct.Bar { i32 47 }, +// X86: @_ZL21ConstexprStaticStruct = internal constant %struct.Bar { i32 47 }, +// AMD: @_ZL21ConstexprStaticStruct = internal addrspace(1) constant %struct.Bar { i32 47 }, static constexpr Bar ConstexprStaticStruct = {47}; -// CHECK: @_ZL28ConstexprStaticMutableStruct = internal global %struct.MutableBar { i32 48 }, +// X86: @_ZL28ConstexprStaticMutableStruct = internal global %struct.MutableBar { i32 48 }, +// AMD: @_ZL28ConstexprStaticMutableStruct = internal addrspace(1) global %struct.MutableBar { i32 48 }, static constexpr MutableBar ConstexprStaticMutableStruct = {48}; void use(const int &); diff --git a/clang/test/CodeGenCXX/implicit-function-conversion.cpp b/clang/test/CodeGenCXX/implicit-function-conversion.cpp new file mode 100644 index 0000000000000..2d14c6ae519f5 --- /dev/null +++ b/clang/test/CodeGenCXX/implicit-function-conversion.cpp @@ -0,0 +1,7 @@ +// RUN: %clang_cc1 -emit-llvm %s -o - -triple=x86_64-unknown-linux -std=c++17 | FileCheck %s + +double a(double) noexcept; +int b(double (&)(double)); + +// CHECK: call i32 @_Z1bRFddE(double (double)* @_Z1ad) +int c = b(a); diff --git a/clang/test/PCH/debug-info-pch-container-path.c b/clang/test/PCH/debug-info-pch-container-path.c new file mode 100644 index 0000000000000..efe49e48e2dc5 --- /dev/null +++ b/clang/test/PCH/debug-info-pch-container-path.c @@ -0,0 +1,22 @@ +// REQUIRES: asserts + +// Modules: +// RUN: rm -rf %t +// RUN: mkdir %t +// RUN: cd %t + +// RUN: %clang_cc1 -fmodule-format=obj -emit-pch \ +// RUN: -triple %itanium_abi_triple \ +// RUN: -fdebug-prefix-map=%t=BUILD \ +// RUN: -fdebug-prefix-map=%S=SOURCE \ +// RUN: -o %t/prefix.ll %S/debug-info-limited-struct.h \ +// RUN: -mllvm -debug-only=pchcontainer &>%t-container.ll +// RUN: cat %t-container.ll | FileCheck %s + +// CHECK: distinct !DICompileUnit( +// CHECK-SAME: language: DW_LANG_C99, +// CHECK-SAME: file: ![[FILE:[0-9]+]], +// CHECK: ![[FILE]] = !DIFile( +// CHECK-SAME: filename: "SOURCE/debug-info-limited-struct.h", +// CHECK-SAME: directory: "BUILD" + diff --git a/clang/test/Sema/parentheses.c b/clang/test/Sema/parentheses.c index 0dabc11d39136..047bcbfe6cafd 100644 --- a/clang/test/Sema/parentheses.c +++ b/clang/test/Sema/parentheses.c @@ -93,6 +93,28 @@ void conditional_op(int x, int y, _Bool b, void* p) { (void)(x + y > 0 ? 1 : 2); // no warning (void)(x + (y > 0) ? 1 : 2); // expected-warning {{operator '?:' has lower precedence than '+'}} expected-note 2{{place parentheses}} + + (void)(b ? 0xf0 : 0x10 | b ? 0x5 : 0x2); // expected-warning {{operator '?:' has lower precedence than '|'}} expected-note 2{{place parentheses}} + + (void)((b ? 0xf0 : 0x10) | (b ? 0x5 : 0x2)); // no warning, has parentheses + (void)(b ? 0xf0 : (0x10 | b) ? 0x5 : 0x2); // no warning, has parentheses + + (void)(x | b ? 1 : 2); // expected-warning {{operator '?:' has lower precedence than '|'}} expected-note 2{{place parentheses}} + (void)(x & b ? 1 : 2); // expected-warning {{operator '?:' has lower precedence than '&'}} expected-note 2{{place parentheses}} + + (void)((x | b) ? 1 : 2); // no warning, has parentheses + (void)(x | (b ? 1 : 2)); // no warning, has parentheses + (void)((x & b) ? 1 : 2); // no warning, has parentheses + (void)(x & (b ? 1 : 2)); // no warning, has parentheses + + // Only warn on uses of the bitwise operators, and not the logical operators. + // The bitwise operators are more likely to be bugs while the logical + // operators are more likely to be used correctly. Since there is no + // explicit logical-xor operator, the bitwise-xor is commonly used instead. + // For this warning, treat the bitwise-xor as if it were a logical operator. + (void)(x ^ b ? 1 : 2); // no warning, ^ is often used as logical xor + (void)(x || b ? 1 : 2); // no warning, logical operator + (void)(x && b ? 1 : 2); // no warning, logical operator } // RUN: not %clang_cc1 -fsyntax-only -Wparentheses -Werror -fdiagnostics-show-option %s 2>&1 | FileCheck %s -check-prefix=CHECK-FLAG diff --git a/clang/test/Sema/warn-bitwise-compare.c b/clang/test/Sema/warn-bitwise-compare.c index 175f8f5367f33..d08f1bf13f20b 100644 --- a/clang/test/Sema/warn-bitwise-compare.c +++ b/clang/test/Sema/warn-bitwise-compare.c @@ -1,7 +1,12 @@ -// RUN: %clang_cc1 -fsyntax-only -verify -Wtautological-compare %s +// RUN: %clang_cc1 -fsyntax-only -verify -Wtautological-bitwise-compare %s #define mydefine 2 +enum { + ZERO, + ONE, +}; + void f(int x) { if ((8 & x) == 3) {} // expected-warning {{bitwise comparison always evaluates to false}} if ((x & 8) == 4) {} // expected-warning {{bitwise comparison always evaluates to false}} @@ -13,6 +18,9 @@ void f(int x) { if ((x & 0x15) == 0x13) {} // expected-warning {{bitwise comparison always evaluates to false}} if ((0x23 | x) == 0x155){} // expected-warning {{bitwise comparison always evaluates to false}} + if (!!((8 & x) == 3)) {} // expected-warning {{bitwise comparison always evaluates to false}} + int y = ((8 & x) == 3) ? 1 : 2; // expected-warning {{bitwise comparison always evaluates to false}} + if ((x & 8) == 8) {} if ((x & 8) != 8) {} if ((x | 4) == 4) {} @@ -26,3 +34,14 @@ void f(int x) { if ((x & mydefine) == 8) {} if ((x | mydefine) == 4) {} } + +void g(int x) { + if (x | 5) {} // expected-warning {{bitwise or with non-zero value always evaluates to true}} + if (5 | x) {} // expected-warning {{bitwise or with non-zero value always evaluates to true}} + if (!((x | 5))) {} // expected-warning {{bitwise or with non-zero value always evaluates to true}} + + if (x | -1) {} // expected-warning {{bitwise or with non-zero value always evaluates to true}} + if (x | ONE) {} // expected-warning {{bitwise or with non-zero value always evaluates to true}} + + if (x | ZERO) {} +} diff --git a/clang/test/SemaCXX/warn-bitwise-compare.cpp b/clang/test/SemaCXX/warn-bitwise-compare.cpp new file mode 100644 index 0000000000000..894d4c581e6f0 --- /dev/null +++ b/clang/test/SemaCXX/warn-bitwise-compare.cpp @@ -0,0 +1,12 @@ +// RUN: %clang_cc1 -fsyntax-only -verify -Wtautological-bitwise-compare %s + +void test(int x) { + bool b1 = (8 & x) == 3; + // expected-warning@-1 {{bitwise comparison always evaluates to false}} + bool b2 = x | 5; + // expected-warning@-1 {{bitwise or with non-zero value always evaluates to true}} + bool b3 = (x | 5); + // expected-warning@-1 {{bitwise or with non-zero value always evaluates to true}} + bool b4 = !!(x | 5); + // expected-warning@-1 {{bitwise or with non-zero value always evaluates to true}} +} diff --git a/compiler-rt/test/profile/instrprof-get-filename-merge-mode.c b/compiler-rt/test/profile/Posix/instrprof-get-filename-merge-mode.c similarity index 86% rename from compiler-rt/test/profile/instrprof-get-filename-merge-mode.c rename to compiler-rt/test/profile/Posix/instrprof-get-filename-merge-mode.c index c6e2fca22d194..7e26e3e6b5dd3 100644 --- a/compiler-rt/test/profile/instrprof-get-filename-merge-mode.c +++ b/compiler-rt/test/profile/Posix/instrprof-get-filename-merge-mode.c @@ -1,6 +1,6 @@ // Test __llvm_profile_get_filename when the on-line merging mode is enabled. // -// RUN: %clang_pgogen -dynamiclib -o %t.dso %p/Inputs/instrprof-get-filename-dso.c +// RUN: %clang_pgogen -fPIC -shared -o %t.dso %p/../Inputs/instrprof-get-filename-dso.c // RUN: %clang_pgogen -o %t %s %t.dso // RUN: env LLVM_PROFILE_FILE="%t-%m.profraw" %run %t diff --git a/libcxx/include/__config b/libcxx/include/__config index ee7351e9313d5..044cd0ceb0073 100644 --- a/libcxx/include/__config +++ b/libcxx/include/__config @@ -242,6 +242,7 @@ #ifdef __FreeBSD__ # include +# include # if _BYTE_ORDER == _LITTLE_ENDIAN # define _LIBCPP_LITTLE_ENDIAN # else // _BYTE_ORDER == _LITTLE_ENDIAN @@ -963,6 +964,20 @@ typedef unsigned int char32_t; # define _LIBCPP_DEPRECATED_IN_CXX17 #endif +// Macros to enter and leave a state where deprecation warnings are suppressed. +#if !defined(_LIBCPP_SUPPRESS_DEPRECATED_PUSH) && \ + (defined(_LIBCPP_COMPILER_CLANG) || defined(_LIBCPP_COMPILER_GCC)) +# define _LIBCPP_SUPPRESS_DEPRECATED_PUSH \ + _Pragma("GCC diagnostic push") \ + _Pragma("GCC diagnostic ignored \"-Wdeprecated\"") +# define _LIBCPP_SUPPRESS_DEPRECATED_POP \ + _Pragma("GCC diagnostic pop") +#endif +#if !defined(_LIBCPP_SUPPRESS_DEPRECATED_PUSH) +# define _LIBCPP_SUPPRESS_DEPRECATED_PUSH +# define _LIBCPP_SUPPRESS_DEPRECATED_POP +#endif + #if _LIBCPP_STD_VER <= 11 # define _LIBCPP_EXPLICIT_AFTER_CXX11 #else @@ -1164,7 +1179,8 @@ _LIBCPP_FUNC_VIS extern "C" void __sanitizer_annotate_contiguous_container( // Some systems do not provide gets() in their C library, for security reasons. #ifndef _LIBCPP_C_HAS_NO_GETS -# if defined(_LIBCPP_MSVCRT) || (defined(__FreeBSD__) && __FreeBSD__ >= 13) +# if defined(_LIBCPP_MSVCRT) || \ + (defined(__FreeBSD_version) && __FreeBSD_version >= 1300043) # define _LIBCPP_C_HAS_NO_GETS # endif #endif diff --git a/libcxx/include/type_traits b/libcxx/include/type_traits index 77dcc2321d991..77b57a41e418a 100644 --- a/libcxx/include/type_traits +++ b/libcxx/include/type_traits @@ -1119,8 +1119,12 @@ template struct _LIBCPP_TEMPLATE_VIS add_rvalue_reference template using add_rvalue_reference_t = typename add_rvalue_reference<_Tp>::type; #endif +// Suppress deprecation notice for volatile-qualified return type resulting +// from volatile-qualified types _Tp. +_LIBCPP_SUPPRESS_DEPRECATED_PUSH template _Tp&& __declval(int); template _Tp __declval(long); +_LIBCPP_SUPPRESS_DEPRECATED_POP template decltype(_VSTD::__declval<_Tp>(0)) diff --git a/libcxx/test/std/utilities/meta/meta.trans/meta.trans.other/result_of11.pass.cpp b/libcxx/test/std/utilities/meta/meta.trans/meta.trans.other/result_of11.pass.cpp index 2128590047d67..595989c4f3fa4 100644 --- a/libcxx/test/std/utilities/meta/meta.trans/meta.trans.other/result_of11.pass.cpp +++ b/libcxx/test/std/utilities/meta/meta.trans/meta.trans.other/result_of11.pass.cpp @@ -55,6 +55,9 @@ void test_result_of_imp() #endif } +// Do not warn on deprecated uses of 'volatile' below. +_LIBCPP_SUPPRESS_DEPRECATED_PUSH + int main(int, char**) { { @@ -171,3 +174,5 @@ int main(int, char**) return 0; } + +_LIBCPP_SUPPRESS_DEPRECATED_POP diff --git a/libcxxabi/test/unwind_06.pass.cpp b/libcxxabi/test/unwind_06.pass.cpp index e4c04e837451e..7d67f52f8e02d 100644 --- a/libcxxabi/test/unwind_06.pass.cpp +++ b/libcxxabi/test/unwind_06.pass.cpp @@ -24,7 +24,7 @@ volatile int counter; double try1(bool v) { double a = get(0); double b = get(1); - for (counter = 100; counter; --counter) + for (counter = 100; counter; counter = counter - 1) a += get(1) + b; if (v) throw 10; return get(0)+a+b; @@ -34,7 +34,7 @@ double try2(bool v) { double a = get(0); double b = get(1); double c = get(2); - for (counter = 100; counter; --counter) + for (counter = 100; counter; counter = counter - 1) a += get(1) + b + c; if (v) throw 10; return get(0)+a+b+c; @@ -45,7 +45,7 @@ double try3(bool v) { double b = get(1); double c = get(2); double d = get(3); - for (counter = 100; counter; --counter) + for (counter = 100; counter; counter = counter - 1) a += get(1) + b + c + d; if (v) throw 10; return get(0)+a+b+c+d; @@ -57,7 +57,7 @@ double try4(bool v) { double c = get(0); double d = get(0); double e = get(0); - for (counter = 100; counter; --counter) + for (counter = 100; counter; counter = counter - 1) a += get(1) + b+c+d+e; if (v) throw 10; return get(0)+a+b+c+d+e; @@ -70,7 +70,7 @@ double try5(bool v) { double d = get(0); double e = get(0); double f = get(0); - for (counter = 100; counter; --counter) + for (counter = 100; counter; counter = counter - 1) a += get(1) + b+c+d+e+f; if (v) throw 10; return get(0)+a+b+c+d+e+f; @@ -84,7 +84,7 @@ double try6(bool v) { double e = get(0); double f = get(0); double g = get(0); - for (counter = 100; counter; --counter) + for (counter = 100; counter; counter = counter - 1) a += get(1) + b+c+d+e+f+g; if (v) throw 10; return get(0)+a+b+c+d+e+f+g; @@ -99,7 +99,7 @@ double try7(bool v) { double f = get(0); double g = get(0); double h = get(0); - for (counter = 100; counter; --counter) + for (counter = 100; counter; counter = counter - 1) a += get(1) + b+c+d+e+f+g+h; if (v) throw 10; return get(0)+a+b+c+d+e+f+g+h; @@ -115,7 +115,7 @@ double try8(bool v) { double g = get(0); double h = get(0); double i = get(0); - for (counter = 100; counter; --counter) + for (counter = 100; counter; counter = counter - 1) a += get(1) + b+c+d+e+f+g+h+i; if (v) throw 10; return get(0)+a+b+c+d+e+f+g+h+i; diff --git a/lld/COFF/CMakeLists.txt b/lld/COFF/CMakeLists.txt index a30df7c4faac7..7c5e8b79b7fe8 100644 --- a/lld/COFF/CMakeLists.txt +++ b/lld/COFF/CMakeLists.txt @@ -38,7 +38,6 @@ add_lld_library(lldCOFF Object Option Support - Symbolize WindowsManifest LINK_LIBS diff --git a/lld/COFF/Config.h b/lld/COFF/Config.h index cd597ddca4fa4..309e1fbf99e3b 100644 --- a/lld/COFF/Config.h +++ b/lld/COFF/Config.h @@ -18,12 +18,6 @@ #include #include -namespace llvm { -namespace symbolize { -class LLVMSymbolizer; -} -} // namespace llvm - namespace lld { namespace coff { @@ -232,8 +226,6 @@ struct Configuration { bool swaprunNet = false; bool thinLTOEmitImportsFiles; bool thinLTOIndexOnly; - - llvm::symbolize::LLVMSymbolizer *symbolizer = nullptr; }; extern Configuration *config; diff --git a/lld/COFF/InputFiles.cpp b/lld/COFF/InputFiles.cpp index 8770a870ddb8f..faec3ba160a56 100644 --- a/lld/COFF/InputFiles.cpp +++ b/lld/COFF/InputFiles.cpp @@ -795,81 +795,29 @@ void ObjFile::initializeDependencies() { Optional> ObjFile::getVariableLocation(StringRef var) { if (!dwarf) { - dwarf = DWARFContext::create(*getCOFFObj()); + dwarf = make(DWARFContext::create(*getCOFFObj())); if (!dwarf) return None; - initializeDwarf(); } if (config->machine == I386) var.consume_front("_"); - auto it = variableLoc.find(var); - if (it == variableLoc.end()) + Optional> ret = dwarf->getVariableLoc(var); + if (!ret) return None; - - // Take file name string from line table. - std::string fileName; - if (!it->second.lt->getFileNameByIndex( - it->second.file, {}, - DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath, fileName)) - return None; - - return std::make_pair(saver.save(fileName), it->second.line); + return std::make_pair(saver.save(ret->first), ret->second); } // Used only for DWARF debug info, which is not common (except in MinGW -// environments). This initializes the dwarf, lineTables and variableLoc -// members. -void ObjFile::initializeDwarf() { - for (std::unique_ptr &cu : dwarf->compile_units()) { - auto report = [](Error err) { - handleAllErrors(std::move(err), - [](ErrorInfoBase &info) { warn(info.message()); }); - }; - Expected expectedLT = - dwarf->getLineTableForUnit(cu.get(), report); - const DWARFDebugLine::LineTable *lt = nullptr; - if (expectedLT) - lt = *expectedLT; - else - report(expectedLT.takeError()); - if (!lt) - continue; - lineTables.push_back(lt); - - // Loop over variable records and insert them to variableLoc. - for (const auto &entry : cu->dies()) { - DWARFDie die(cu.get(), &entry); - // Skip all tags that are not variables. - if (die.getTag() != dwarf::DW_TAG_variable) - continue; - - // Skip if a local variable because we don't need them for generating - // error messages. In general, only non-local symbols can fail to be - // linked. - if (!dwarf::toUnsigned(die.find(dwarf::DW_AT_external), 0)) - continue; - - // Get the source filename index for the variable. - unsigned file = dwarf::toUnsigned(die.find(dwarf::DW_AT_decl_file), 0); - if (!lt->hasFileAtIndex(file)) - continue; - - // Get the line number on which the variable is declared. - unsigned line = dwarf::toUnsigned(die.find(dwarf::DW_AT_decl_line), 0); - - // Here we want to take the variable name to add it into variableLoc. - // Variable can have regular and linkage name associated. At first, we try - // to get linkage name as it can be different, for example when we have - // two variables in different namespaces of the same object. Use common - // name otherwise, but handle the case when it also absent in case if the - // input object file lacks some debug info. - StringRef name = - dwarf::toString(die.find(dwarf::DW_AT_linkage_name), - dwarf::toString(die.find(dwarf::DW_AT_name), "")); - if (!name.empty()) - variableLoc.insert({name, {lt, file, line}}); - } +// environments). +Optional ObjFile::getDILineInfo(uint32_t offset, + uint32_t sectionIndex) { + if (!dwarf) { + dwarf = make(DWARFContext::create(*getCOFFObj())); + if (!dwarf) + return None; } + + return dwarf->getDILineInfo(offset, sectionIndex); } StringRef ltrim1(StringRef s, const char *chars) { diff --git a/lld/COFF/InputFiles.h b/lld/COFF/InputFiles.h index d3f4cd7bb2f56..672461cd84baf 100644 --- a/lld/COFF/InputFiles.h +++ b/lld/COFF/InputFiles.h @@ -10,13 +10,13 @@ #define LLD_COFF_INPUT_FILES_H #include "Config.h" +#include "lld/Common/DWARF.h" #include "lld/Common/LLVM.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/BinaryFormat/Magic.h" #include "llvm/DebugInfo/CodeView/TypeRecord.h" -#include "llvm/DebugInfo/DWARF/DWARFContext.h" #include "llvm/LTO/LTO.h" #include "llvm/Object/Archive.h" #include "llvm/Object/COFF.h" @@ -26,6 +26,7 @@ #include namespace llvm { +struct DILineInfo; namespace pdb { class DbiModuleDescriptorBuilder; } @@ -206,6 +207,9 @@ class ObjFile : public InputFile { llvm::Optional> getVariableLocation(StringRef var); + llvm::Optional getDILineInfo(uint32_t offset, + uint32_t sectionIndex); + private: const coff_section* getSection(uint32_t i); const coff_section *getSection(COFFSymbolRef sym) { @@ -216,7 +220,6 @@ class ObjFile : public InputFile { void initializeSymbols(); void initializeFlags(); void initializeDependencies(); - void initializeDwarf(); SectionChunk * readSection(uint32_t sectionNumber, @@ -291,14 +294,7 @@ class ObjFile : public InputFile { // symbols in the real symbol table) are filled with null pointers. std::vector symbols; - std::unique_ptr dwarf; - std::vector lineTables; - struct VarLoc { - const llvm::DWARFDebugLine::LineTable *lt; - unsigned file; - unsigned line; - }; - llvm::DenseMap variableLoc; + DWARFCache *dwarf = nullptr; }; // This type represents import library members that contain DLL names diff --git a/lld/COFF/SymbolTable.cpp b/lld/COFF/SymbolTable.cpp index 183761f1c1983..869dfc7a2ee5f 100644 --- a/lld/COFF/SymbolTable.cpp +++ b/lld/COFF/SymbolTable.cpp @@ -110,13 +110,11 @@ static std::vector getSymbolLocations(BitcodeFile *file) { static Optional> getFileLineDwarf(const SectionChunk *c, uint32_t addr) { - if (!config->symbolizer) - config->symbolizer = make(); - Expected expectedLineInfo = config->symbolizer->symbolizeCode( - *c->file->getCOFFObj(), {addr, c->getSectionNumber() - 1}); - if (!expectedLineInfo) + Optional optionalLineInfo = + c->file->getDILineInfo(addr, c->getSectionNumber() - 1); + if (!optionalLineInfo) return None; - const DILineInfo &lineInfo = *expectedLineInfo; + const DILineInfo &lineInfo = *optionalLineInfo; if (lineInfo.FileName == DILineInfo::BadString) return None; return std::make_pair(saver.save(lineInfo.FileName), lineInfo.Line); diff --git a/lld/Common/CMakeLists.txt b/lld/Common/CMakeLists.txt index 70849cc7b94b2..1a04a8074bed0 100644 --- a/lld/Common/CMakeLists.txt +++ b/lld/Common/CMakeLists.txt @@ -29,6 +29,7 @@ set_property(SOURCE Version.cpp APPEND PROPERTY add_lld_library(lldCommon Args.cpp + DWARF.cpp ErrorHandler.cpp Filesystem.cpp Memory.cpp @@ -46,6 +47,7 @@ add_lld_library(lldCommon LINK_COMPONENTS Codegen Core + DebugInfoDWARF Demangle MC Option diff --git a/lld/Common/DWARF.cpp b/lld/Common/DWARF.cpp new file mode 100644 index 0000000000000..077adbcaf8581 --- /dev/null +++ b/lld/Common/DWARF.cpp @@ -0,0 +1,103 @@ +//===- DWARF.cpp ----------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "lld/Common/DWARF.h" +#include "lld/Common/ErrorHandler.h" + +using namespace llvm; + +namespace lld { + +DWARFCache::DWARFCache(std::unique_ptr d) + : dwarf(std::move(d)) { + for (std::unique_ptr &cu : dwarf->compile_units()) { + auto report = [](Error err) { + handleAllErrors(std::move(err), + [](ErrorInfoBase &info) { warn(info.message()); }); + }; + Expected expectedLT = + dwarf->getLineTableForUnit(cu.get(), report); + const DWARFDebugLine::LineTable *lt = nullptr; + if (expectedLT) + lt = *expectedLT; + else + report(expectedLT.takeError()); + if (!lt) + continue; + lineTables.push_back(lt); + + // Loop over variable records and insert them to variableLoc. + for (const auto &entry : cu->dies()) { + DWARFDie die(cu.get(), &entry); + // Skip all tags that are not variables. + if (die.getTag() != dwarf::DW_TAG_variable) + continue; + + // Skip if a local variable because we don't need them for generating + // error messages. In general, only non-local symbols can fail to be + // linked. + if (!dwarf::toUnsigned(die.find(dwarf::DW_AT_external), 0)) + continue; + + // Get the source filename index for the variable. + unsigned file = dwarf::toUnsigned(die.find(dwarf::DW_AT_decl_file), 0); + if (!lt->hasFileAtIndex(file)) + continue; + + // Get the line number on which the variable is declared. + unsigned line = dwarf::toUnsigned(die.find(dwarf::DW_AT_decl_line), 0); + + // Here we want to take the variable name to add it into variableLoc. + // Variable can have regular and linkage name associated. At first, we try + // to get linkage name as it can be different, for example when we have + // two variables in different namespaces of the same object. Use common + // name otherwise, but handle the case when it also absent in case if the + // input object file lacks some debug info. + StringRef name = + dwarf::toString(die.find(dwarf::DW_AT_linkage_name), + dwarf::toString(die.find(dwarf::DW_AT_name), "")); + if (!name.empty()) + variableLoc.insert({name, {lt, file, line}}); + } + } +} + +// Returns the pair of file name and line number describing location of data +// object (variable, array, etc) definition. +Optional> +DWARFCache::getVariableLoc(StringRef name) { + // Return if we have no debug information about data object. + auto it = variableLoc.find(name); + if (it == variableLoc.end()) + return None; + + // Take file name string from line table. + std::string fileName; + if (!it->second.lt->getFileNameByIndex( + it->second.file, {}, + DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath, fileName)) + return None; + + return std::make_pair(fileName, it->second.line); +} + +// Returns source line information for a given offset +// using DWARF debug info. +Optional DWARFCache::getDILineInfo(uint64_t offset, + uint64_t sectionIndex) { + DILineInfo info; + for (const llvm::DWARFDebugLine::LineTable *lt : lineTables) { + if (lt->getFileLineInfoForAddress( + {offset, sectionIndex}, nullptr, + DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath, info)) + return info; + } + return None; +} + +} // namespace lld diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp index b0389ccf100af..fdf935a30856a 100644 --- a/lld/ELF/InputFiles.cpp +++ b/lld/ELF/InputFiles.cpp @@ -17,7 +17,6 @@ #include "lld/Common/Memory.h" #include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/Analysis.h" -#include "llvm/DebugInfo/DWARF/DWARFContext.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/LTO/LTO.h" @@ -265,57 +264,8 @@ std::string InputFile::getSrcMsg(const Symbol &sym, InputSectionBase &sec, } template void ObjFile::initializeDwarf() { - dwarf = std::make_unique(std::make_unique>(this)); - for (std::unique_ptr &cu : dwarf->compile_units()) { - auto report = [](Error err) { - handleAllErrors(std::move(err), - [](ErrorInfoBase &info) { warn(info.message()); }); - }; - Expected expectedLT = - dwarf->getLineTableForUnit(cu.get(), report); - const DWARFDebugLine::LineTable *lt = nullptr; - if (expectedLT) - lt = *expectedLT; - else - report(expectedLT.takeError()); - if (!lt) - continue; - lineTables.push_back(lt); - - // Loop over variable records and insert them to variableLoc. - for (const auto &entry : cu->dies()) { - DWARFDie die(cu.get(), &entry); - // Skip all tags that are not variables. - if (die.getTag() != dwarf::DW_TAG_variable) - continue; - - // Skip if a local variable because we don't need them for generating - // error messages. In general, only non-local symbols can fail to be - // linked. - if (!dwarf::toUnsigned(die.find(dwarf::DW_AT_external), 0)) - continue; - - // Get the source filename index for the variable. - unsigned file = dwarf::toUnsigned(die.find(dwarf::DW_AT_decl_file), 0); - if (!lt->hasFileAtIndex(file)) - continue; - - // Get the line number on which the variable is declared. - unsigned line = dwarf::toUnsigned(die.find(dwarf::DW_AT_decl_line), 0); - - // Here we want to take the variable name to add it into variableLoc. - // Variable can have regular and linkage name associated. At first, we try - // to get linkage name as it can be different, for example when we have - // two variables in different namespaces of the same object. Use common - // name otherwise, but handle the case when it also absent in case if the - // input object file lacks some debug info. - StringRef name = - dwarf::toString(die.find(dwarf::DW_AT_linkage_name), - dwarf::toString(die.find(dwarf::DW_AT_name), "")); - if (!name.empty()) - variableLoc.insert({name, {lt, file, line}}); - } - } + dwarf = make(std::make_unique( + std::make_unique>(this))); } // Returns the pair of file name and line number describing location of data @@ -325,19 +275,7 @@ Optional> ObjFile::getVariableLoc(StringRef name) { llvm::call_once(initDwarfLine, [this]() { initializeDwarf(); }); - // Return if we have no debug information about data object. - auto it = variableLoc.find(name); - if (it == variableLoc.end()) - return None; - - // Take file name string from line table. - std::string fileName; - if (!it->second.lt->getFileNameByIndex( - it->second.file, {}, - DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath, fileName)) - return None; - - return std::make_pair(fileName, it->second.line); + return dwarf->getVariableLoc(name); } // Returns source line information for a given offset @@ -359,14 +297,7 @@ Optional ObjFile::getDILineInfo(InputSectionBase *s, // Use fake address calcuated by adding section file offset and offset in // section. See comments for ObjectInfo class. - DILineInfo info; - for (const llvm::DWARFDebugLine::LineTable *lt : lineTables) { - if (lt->getFileLineInfoForAddress( - {s->getOffsetInFile() + offset, sectionIndex}, nullptr, - DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath, info)) - return info; - } - return None; + return dwarf->getDILineInfo(s->getOffsetInFile() + offset, sectionIndex); } ELFFileBase::ELFFileBase(Kind k, MemoryBufferRef mb) : InputFile(k, mb) { diff --git a/lld/ELF/InputFiles.h b/lld/ELF/InputFiles.h index 1c78654d00fc6..cde6bc617764d 100644 --- a/lld/ELF/InputFiles.h +++ b/lld/ELF/InputFiles.h @@ -10,13 +10,13 @@ #define LLD_ELF_INPUT_FILES_H #include "Config.h" +#include "lld/Common/DWARF.h" #include "lld/Common/ErrorHandler.h" #include "lld/Common/LLVM.h" #include "lld/Common/Reproduce.h" #include "llvm/ADT/CachedHashString.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/DebugInfo/DWARF/DWARFDebugLine.h" #include "llvm/IR/Comdat.h" #include "llvm/Object/Archive.h" #include "llvm/Object/ELF.h" @@ -26,7 +26,6 @@ namespace llvm { class TarWriter; -struct DILineInfo; namespace lto { class InputFile; } @@ -282,14 +281,7 @@ template class ObjFile : public ELFFileBase { // reporting. Linker may find reasonable number of errors in a // single object file, so we cache debugging information in order to // parse it only once for each object file we link. - std::unique_ptr dwarf; - std::vector lineTables; - struct VarLoc { - const llvm::DWARFDebugLine::LineTable *lt; - unsigned file; - unsigned line; - }; - llvm::DenseMap variableLoc; + DWARFCache *dwarf; llvm::once_flag initDwarfLine; }; diff --git a/lld/include/lld/Common/DWARF.h b/lld/include/lld/Common/DWARF.h new file mode 100644 index 0000000000000..f0d3d2fbda775 --- /dev/null +++ b/lld/include/lld/Common/DWARF.h @@ -0,0 +1,47 @@ +//===- DWARF.h --------------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_DWARF_H +#define LLD_DWARF_H + +#include "lld/Common/LLVM.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/DebugInfo/DWARF/DWARFContext.h" +#include "llvm/DebugInfo/DWARF/DWARFDebugLine.h" +#include +#include + +namespace llvm { +struct DILineInfo; +} // namespace llvm + +namespace lld { + +class DWARFCache { +public: + DWARFCache(std::unique_ptr dwarf); + llvm::Optional getDILineInfo(uint64_t offset, + uint64_t sectionIndex); + llvm::Optional> + getVariableLoc(StringRef name); + +private: + std::unique_ptr dwarf; + std::vector lineTables; + struct VarLoc { + const llvm::DWARFDebugLine::LineTable *lt; + unsigned file; + unsigned line; + }; + llvm::DenseMap variableLoc; +}; + +} // namespace lld + +#endif diff --git a/lld/include/lld/Common/LLVM.h b/lld/include/lld/Common/LLVM.h index f7ed1d793ca7b..34b7b0d194ab4 100644 --- a/lld/include/lld/Common/LLVM.h +++ b/lld/include/lld/Common/LLVM.h @@ -17,6 +17,7 @@ // This should be the only #include, force #includes of all the others on // clients. #include "llvm/ADT/Hashing.h" +#include "llvm/ADT/StringRef.h" #include "llvm/Support/Casting.h" #include diff --git a/lld/include/lld/Core/File.h b/lld/include/lld/Core/File.h index 492f35989f166..df014669eb628 100644 --- a/lld/include/lld/Core/File.h +++ b/lld/include/lld/Core/File.h @@ -16,6 +16,7 @@ #include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Twine.h" +#include "llvm/Support/Allocator.h" #include "llvm/Support/ErrorHandling.h" #include #include diff --git a/lld/lib/ReaderWriter/MachO/DebugInfo.h b/lld/lib/ReaderWriter/MachO/DebugInfo.h index 959e10f9a0735..591dd1ebad86c 100644 --- a/lld/lib/ReaderWriter/MachO/DebugInfo.h +++ b/lld/lib/ReaderWriter/MachO/DebugInfo.h @@ -12,6 +12,7 @@ #include "lld/Core/Atom.h" #include +#include "llvm/Support/Allocator.h" #include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" diff --git a/lld/test/ELF/invalid/bad-reloc-target.test b/lld/test/ELF/invalid/bad-reloc-target.test index 05aa2e5d6fae7..9ab5d9c22580f 100644 --- a/lld/test/ELF/invalid/bad-reloc-target.test +++ b/lld/test/ELF/invalid/bad-reloc-target.test @@ -68,7 +68,6 @@ Sections: Flags: [ SHF_ALLOC, SHF_EXECINSTR ] - Name: .rela.text Type: SHT_RELA - Link: .symtab Info: .text Relocations: - Offset: 0x0000000000000000 diff --git a/lld/test/ELF/invalid/sht-group.test b/lld/test/ELF/invalid/sht-group.test index c91e649e26434..ac155fa08ba6c 100644 --- a/lld/test/ELF/invalid/sht-group.test +++ b/lld/test/ELF/invalid/sht-group.test @@ -12,7 +12,6 @@ FileHeader: Sections: - Name: .group Type: SHT_GROUP - Link: .symtab Info: 12345 Members: - SectionOrType: GRP_COMDAT diff --git a/lld/tools/lld/lld.cpp b/lld/tools/lld/lld.cpp index 37ef260495e9b..14dcc95899e99 100644 --- a/lld/tools/lld/lld.cpp +++ b/lld/tools/lld/lld.cpp @@ -33,6 +33,7 @@ #include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Host.h" #include "llvm/Support/InitLLVM.h" #include "llvm/Support/Path.h" #include diff --git a/lldb/include/lldb/Utility/UUID.h b/lldb/include/lldb/Utility/UUID.h index cb2e051a4569c..0284357be44a2 100644 --- a/lldb/include/lldb/Utility/UUID.h +++ b/lldb/include/lldb/Utility/UUID.h @@ -9,14 +9,11 @@ #ifndef LLDB_UTILITY_UUID_H #define LLDB_UTILITY_UUID_H +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringRef.h" #include #include #include -#include "llvm/ADT/ArrayRef.h" - -namespace llvm { - class StringRef; -} namespace lldb_private { diff --git a/lldb/packages/Python/lldbsuite/test/commands/command/script/TestCommandScript.py b/lldb/packages/Python/lldbsuite/test/commands/command/script/TestCommandScript.py index 6531cd672792e..9542d0264a6bd 100644 --- a/lldb/packages/Python/lldbsuite/test/commands/command/script/TestCommandScript.py +++ b/lldb/packages/Python/lldbsuite/test/commands/command/script/TestCommandScript.py @@ -4,7 +4,7 @@ from __future__ import print_function - +import sys import lldb from lldbsuite.test.decorators import * from lldbsuite.test.lldbtest import * @@ -22,6 +22,21 @@ def test(self): def pycmd_tests(self): self.runCmd("command source py_import") + # Test a bunch of different kinds of python callables with + # both 4 and 5 positional arguments. + self.expect("foobar", substrs=["All good"]) + self.expect("foobar4", substrs=["All good"]) + self.expect("vfoobar", substrs=["All good"]) + self.expect("v5foobar", substrs=["All good"]) + self.expect("sfoobar", substrs=["All good"]) + self.expect("cfoobar", substrs=["All good"]) + self.expect("ifoobar", substrs=["All good"]) + self.expect("sfoobar4", substrs=["All good"]) + self.expect("cfoobar4", substrs=["All good"]) + self.expect("ifoobar4", substrs=["All good"]) + self.expect("ofoobar", substrs=["All good"]) + self.expect("ofoobar4", substrs=["All good"]) + # Verify command that specifies eCommandRequiresTarget returns failure # without a target. self.expect('targetname', diff --git a/lldb/packages/Python/lldbsuite/test/commands/command/script/callables.py b/lldb/packages/Python/lldbsuite/test/commands/command/script/callables.py new file mode 100644 index 0000000000000..21e599b82e5be --- /dev/null +++ b/lldb/packages/Python/lldbsuite/test/commands/command/script/callables.py @@ -0,0 +1,63 @@ + +from __future__ import print_function + +import lldb + +# bunch of different kinds of python callables that should +# all work as commands. + +def check(debugger, command, context, result, internal_dict): + if (not isinstance(debugger, lldb.SBDebugger) or + not isinstance(command, str) or + not isinstance(result, lldb.SBCommandReturnObject) or + not isinstance(internal_dict, dict) or + (not context is None and + not isinstance(context, lldb.SBExecutionContext))): + raise Exception() + result.AppendMessage("All good.") + +def vfoobar(*args): + check(*args) + +def v5foobar(debugger, command, context, result, internal_dict, *args): + check(debugger, command, context, result, internal_dict) + +def foobar(debugger, command, context, result, internal_dict): + check(debugger, command, context, result, internal_dict) + +def foobar4(debugger, command, result, internal_dict): + check(debugger, command, None, result, internal_dict) + +class FooBar: + @staticmethod + def sfoobar(debugger, command, context, result, internal_dict): + check(debugger, command, context, result, internal_dict) + + @classmethod + def cfoobar(cls, debugger, command, context, result, internal_dict): + check(debugger, command, context, result, internal_dict) + + def ifoobar(self, debugger, command, context, result, internal_dict): + check(debugger, command, context, result, internal_dict) + + def __call__(self, debugger, command, context, result, internal_dict): + check(debugger, command, context, result, internal_dict) + + @staticmethod + def sfoobar4(debugger, command, result, internal_dict): + check(debugger, command, None, result, internal_dict) + + @classmethod + def cfoobar4(cls, debugger, command, result, internal_dict): + check(debugger, command, None, result, internal_dict) + + def ifoobar4(self, debugger, command, result, internal_dict): + check(debugger, command, None, result, internal_dict) + +class FooBar4: + def __call__(self, debugger, command, result, internal_dict): + check(debugger, command, None, result, internal_dict) + +FooBarObj = FooBar() + +FooBar4Obj = FooBar4() \ No newline at end of file diff --git a/lldb/packages/Python/lldbsuite/test/commands/command/script/py_import b/lldb/packages/Python/lldbsuite/test/commands/command/script/py_import index 6c1f7b8185f6d..4372d32b0ad11 100644 --- a/lldb/packages/Python/lldbsuite/test/commands/command/script/py_import +++ b/lldb/packages/Python/lldbsuite/test/commands/command/script/py_import @@ -11,3 +11,22 @@ command script add tell_async --function welcome.check_for_synchro --synchronici command script add tell_curr --function welcome.check_for_synchro --synchronicity curr command script add takes_exe_ctx --function welcome.takes_exe_ctx command script import decorated.py + + +command script import callables.py + +command script add -f callables.foobar foobar +command script add -f callables.foobar4 foobar4 +command script add -f callables.vfoobar vfoobar +command script add -f callables.v5foobar v5foobar + +command script add -f callables.FooBar.sfoobar sfoobar +command script add -f callables.FooBar.cfoobar cfoobar +command script add -f callables.FooBarObj.ifoobar ifoobar + +command script add -f callables.FooBar.sfoobar4 sfoobar4 +command script add -f callables.FooBar.cfoobar4 cfoobar4 +command script add -f callables.FooBarObj.ifoobar4 ifoobar4 + +command script add -f callables.FooBarObj ofoobar +command script add -f callables.FooBar4Obj ofoobar4 diff --git a/lldb/packages/Python/lldbsuite/test/commands/expression/formatters/TestFormatters.py b/lldb/packages/Python/lldbsuite/test/commands/expression/formatters/TestFormatters.py index b13d6555f33bf..011dabce6e9dd 100644 --- a/lldb/packages/Python/lldbsuite/test/commands/expression/formatters/TestFormatters.py +++ b/lldb/packages/Python/lldbsuite/test/commands/expression/formatters/TestFormatters.py @@ -74,6 +74,21 @@ def cleanup(): # EXPR-TYPES-NEW-FOO-NEXT: } # EXPR-TYPES-NEW-FOO-NEXT: } + + self.runCmd("type summary add -F formatters.foo_SummaryProvider3 foo") + self.filecheck("expression foo1", __file__, '-check-prefix=EXPR-FOO1opts') + # EXPR-FOO1opts: (foo) $ + # EXPR-FOO1opts-SAME: a = 12 + # EXPR-FOO1opts-SAME: a_ptr = {{[0-9]+}} -> 13 + # EXPR-FOO1opts-SAME: i = 24 + # EXPR-FOO1opts-SAME: i_ptr = {{[0-9]+}} -> 25 + # EXPR-FOO1opts-SAME: b_ref = {{[0-9]+}} + # EXPR-FOO1opts-SAME: h = 27 + # EXPR-FOO1opts-SAME: k = 29 + # EXPR-FOO1opts-SAME: WITH_OPTS + + self.runCmd("type summary delete foo") + self.runCmd("type summary add -F formatters.foo_SummaryProvider foo") self.expect("expression new int(12)", diff --git a/lldb/packages/Python/lldbsuite/test/commands/expression/formatters/formatters.py b/lldb/packages/Python/lldbsuite/test/commands/expression/formatters/formatters.py index dae84988af9e2..ac2888bd203f7 100644 --- a/lldb/packages/Python/lldbsuite/test/commands/expression/formatters/formatters.py +++ b/lldb/packages/Python/lldbsuite/test/commands/expression/formatters/formatters.py @@ -1,3 +1,5 @@ +import lldb + def foo_SummaryProvider(valobj, dict): a = valobj.GetChildMemberWithName('a') a_ptr = valobj.GetChildMemberWithName('a_ptr') @@ -15,3 +17,8 @@ def foo_SummaryProvider(valobj, dict): ', i_ptr = ' + str(i_ptr.GetValueAsUnsigned(0)) + ' -> ' + str(i_ptr.Dereference().GetValueAsUnsigned(0)) + \ ', b_ref = ' + str(b_ref.GetValueAsUnsigned(0)) + \ ', h = ' + str(h.GetValueAsUnsigned(0)) + ' , k = ' + str(k.GetValueAsUnsigned(0)) + +def foo_SummaryProvider3(valobj, dict, options): + if not isinstance(options, lldb.SBTypeSummaryOptions): + raise Exception() + return foo_SummaryProvider(valobj, dict) + ", WITH_OPTS" \ No newline at end of file diff --git a/lldb/packages/Python/lldbsuite/test/commands/expression/static-initializers/Makefile b/lldb/packages/Python/lldbsuite/test/commands/expression/static-initializers/Makefile new file mode 100644 index 0000000000000..99998b20bcb05 --- /dev/null +++ b/lldb/packages/Python/lldbsuite/test/commands/expression/static-initializers/Makefile @@ -0,0 +1,3 @@ +CXX_SOURCES := main.cpp + +include Makefile.rules diff --git a/lldb/packages/Python/lldbsuite/test/commands/expression/static-initializers/TestStaticInitializers.py b/lldb/packages/Python/lldbsuite/test/commands/expression/static-initializers/TestStaticInitializers.py new file mode 100644 index 0000000000000..e350e6ef930f9 --- /dev/null +++ b/lldb/packages/Python/lldbsuite/test/commands/expression/static-initializers/TestStaticInitializers.py @@ -0,0 +1,31 @@ +import lldb +from lldbsuite.test.decorators import * +from lldbsuite.test.lldbtest import * +from lldbsuite.test import lldbutil + +class StaticInitializers(TestBase): + + mydir = TestBase.compute_mydir(__file__) + + def test(self): + """ Test a static initializer. """ + self.build() + + lldbutil.run_to_source_breakpoint(self, '// break here', + lldb.SBFileSpec("main.cpp", False)) + + # We use counter to observe if the initializer was called. + self.expect("expr counter", substrs=["(int) $", " = 0"]) + self.expect("expr -p -- struct Foo { Foo() { inc_counter(); } }; Foo f;") + self.expect("expr counter", substrs=["(int) $", " = 1"]) + + def test_failing_init(self): + """ Test a static initializer that fails to execute. """ + self.build() + + lldbutil.run_to_source_breakpoint(self, '// break here', + lldb.SBFileSpec("main.cpp", False)) + + # FIXME: This error message is not even remotely helpful. + self.expect("expr -p -- struct Foo2 { Foo2() { do_abort(); } }; Foo2 f;", error=True, + substrs=["error: couldn't run static initializers: couldn't run static initializer:"]) diff --git a/lldb/packages/Python/lldbsuite/test/commands/expression/static-initializers/main.cpp b/lldb/packages/Python/lldbsuite/test/commands/expression/static-initializers/main.cpp new file mode 100644 index 0000000000000..0bcf1eb3edafb --- /dev/null +++ b/lldb/packages/Python/lldbsuite/test/commands/expression/static-initializers/main.cpp @@ -0,0 +1,11 @@ +#include + +int counter = 0; + +void inc_counter() { ++counter; } + +void do_abort() { abort(); } + +int main() { + return 0; // break here +} diff --git a/lldb/packages/Python/lldbsuite/test/iohandler/completion/TestIOHandlerCompletion.py b/lldb/packages/Python/lldbsuite/test/iohandler/completion/TestIOHandlerCompletion.py index d202887902e90..b485851463743 100644 --- a/lldb/packages/Python/lldbsuite/test/iohandler/completion/TestIOHandlerCompletion.py +++ b/lldb/packages/Python/lldbsuite/test/iohandler/completion/TestIOHandlerCompletion.py @@ -13,6 +13,9 @@ class IOHandlerCompletionTest(PExpectTest): mydir = TestBase.compute_mydir(__file__) + # PExpect uses many timeouts internally and doesn't play well + # under ASAN on a loaded machine.. + @skipIfAsan def test_completion(self): self.launch(dimensions=(100,500)) diff --git a/lldb/packages/Python/lldbsuite/test/tools/lldb-server/TestGdbRemoteProcessInfo.py b/lldb/packages/Python/lldbsuite/test/tools/lldb-server/TestGdbRemoteProcessInfo.py index 5a3ae926896a9..70cc25520608b 100644 --- a/lldb/packages/Python/lldbsuite/test/tools/lldb-server/TestGdbRemoteProcessInfo.py +++ b/lldb/packages/Python/lldbsuite/test/tools/lldb-server/TestGdbRemoteProcessInfo.py @@ -53,7 +53,7 @@ def attach_commandline_qProcessInfo_reports_correct_pid(self): self.add_process_info_collection_packets() # Run the stream - context = self.expect_gdbremote_sequence(timeout_seconds=8) + context = self.expect_gdbremote_sequence(timeout_seconds=self._DEFAULT_TIMEOUT) self.assertIsNotNone(context) # Gather process info response diff --git a/lldb/packages/Python/lldbsuite/test/tools/lldb-server/TestGdbRemote_qThreadStopInfo.py b/lldb/packages/Python/lldbsuite/test/tools/lldb-server/TestGdbRemote_qThreadStopInfo.py index 1d3a63d27b76d..0944ba5d0510a 100644 --- a/lldb/packages/Python/lldbsuite/test/tools/lldb-server/TestGdbRemote_qThreadStopInfo.py +++ b/lldb/packages/Python/lldbsuite/test/tools/lldb-server/TestGdbRemote_qThreadStopInfo.py @@ -52,7 +52,7 @@ def gather_stop_replies_via_qThreadStopInfo(self, thread_count): self.assertIsNotNone(context) # Wait until all threads have started. - threads = self.wait_for_thread_count(thread_count, timeout_seconds=3) + threads = self.wait_for_thread_count(thread_count, timeout_seconds=self._WAIT_TIMEOUT) self.assertIsNotNone(threads) # On Windows, there could be more threads spawned. For example, DebugBreakProcess will diff --git a/lldb/packages/Python/lldbsuite/test/tools/lldb-server/TestLldbGdbServer.py b/lldb/packages/Python/lldbsuite/test/tools/lldb-server/TestLldbGdbServer.py index a3abe203a9874..1b30718f74815 100644 --- a/lldb/packages/Python/lldbsuite/test/tools/lldb-server/TestLldbGdbServer.py +++ b/lldb/packages/Python/lldbsuite/test/tools/lldb-server/TestLldbGdbServer.py @@ -639,7 +639,7 @@ def Hg_switches_to_3_threads(self): self.run_process_then_stop(run_seconds=1) # Wait at most x seconds for 3 threads to be present. - threads = self.wait_for_thread_count(3, timeout_seconds=5) + threads = self.wait_for_thread_count(3, timeout_seconds=self._WAIT_TIMEOUT) self.assertEqual(len(threads), 3) # verify we can $H to each thead, and $qC matches the thread we set. @@ -735,7 +735,7 @@ def Hc_then_Csignal_signals_correct_thread(self, segfault_signo): 2: "thread_id"}}], True) - context = self.expect_gdbremote_sequence(timeout_seconds=10) + context = self.expect_gdbremote_sequence(timeout_seconds=self._DEFAULT_TIMEOUT) self.assertIsNotNone(context) signo = context.get("signo") self.assertEqual(int(signo, 16), segfault_signo) @@ -771,7 +771,8 @@ def Hc_then_Csignal_signals_correct_thread(self, segfault_signo): True) # Run the sequence. - context = self.expect_gdbremote_sequence(timeout_seconds=10) + context = self.expect_gdbremote_sequence( + timeout_seconds=self._DEFAULT_TIMEOUT) self.assertIsNotNone(context) # Ensure the stop signal is the signal we delivered. @@ -1485,7 +1486,7 @@ def P_and_p_thread_suffix_work(self): self.assertIsNotNone(context) # Wait for 3 threads to be present. - threads = self.wait_for_thread_count(3, timeout_seconds=5) + threads = self.wait_for_thread_count(3, timeout_seconds=self._WAIT_TIMEOUT) self.assertEqual(len(threads), 3) expected_reg_values = [] diff --git a/lldb/packages/Python/lldbsuite/test/tools/lldb-server/commandline/TestStubReverseConnect.py b/lldb/packages/Python/lldbsuite/test/tools/lldb-server/commandline/TestStubReverseConnect.py index 18efbf9d650f9..664b6001d8dae 100644 --- a/lldb/packages/Python/lldbsuite/test/tools/lldb-server/commandline/TestStubReverseConnect.py +++ b/lldb/packages/Python/lldbsuite/test/tools/lldb-server/commandline/TestStubReverseConnect.py @@ -14,7 +14,7 @@ class TestStubReverseConnect(gdbremote_testcase.GdbRemoteTestCaseBase): mydir = TestBase.compute_mydir(__file__) - _DEFAULT_TIMEOUT = 20 + _DEFAULT_TIMEOUT = 20 * (10 if ('ASAN_OPTIONS' in os.environ) else 1) def setUp(self): # Set up the test. diff --git a/lldb/packages/Python/lldbsuite/test/tools/lldb-server/gdbremote_testcase.py b/lldb/packages/Python/lldbsuite/test/tools/lldb-server/gdbremote_testcase.py index 670d770f88194..ac611bcca1695 100644 --- a/lldb/packages/Python/lldbsuite/test/tools/lldb-server/gdbremote_testcase.py +++ b/lldb/packages/Python/lldbsuite/test/tools/lldb-server/gdbremote_testcase.py @@ -32,8 +32,9 @@ class GdbRemoteTestCaseBase(TestBase): NO_DEBUG_INFO_TESTCASE = True _TIMEOUT_SECONDS = 120 * (10 if ('ASAN_OPTIONS' in os.environ) else 1) + _DEFAULT_TIMEOUT = 10 * (10 if ('ASAN_OPTIONS' in os.environ) else 1) _READ_TIMEOUT = 5 * (10 if ('ASAN_OPTIONS' in os.environ) else 1) - _WAIT_TIMEOUT = 3 * (10 if ('ASAN_OPTIONS' in os.environ) else 1) + _WAIT_TIMEOUT = 5 * (10 if ('ASAN_OPTIONS' in os.environ) else 1) _GDBREMOTE_KILL_PACKET = "$k#6b" diff --git a/lldb/scripts/Python/python-wrapper.swig b/lldb/scripts/Python/python-wrapper.swig index 7d507b31c5cd8..277b8657d3441 100644 --- a/lldb/scripts/Python/python-wrapper.swig +++ b/lldb/scripts/Python/python-wrapper.swig @@ -163,14 +163,19 @@ LLDBSwigPythonCallTypeScript } PythonObject result; - auto argc = pfunc.GetNumArguments(); - // if the third argument is supported, or varargs are allowed + auto argc = pfunc.GetArgInfo(); + if (!argc) { + llvm::consumeError(argc.takeError()); + return false; + } + PythonObject value_arg(PyRefType::Owned, SBTypeToSWIGWrapper(sb_value)); PythonObject options_arg(PyRefType::Owned, SBTypeToSWIGWrapper(sb_options)); - if (argc.count == 3 || argc.has_varargs) - result = pfunc(value_arg,dict,options_arg); - else + + if (argc.get().max_positional_args < 3) result = pfunc(value_arg,dict); + else + result = pfunc(value_arg,dict,options_arg); retval = result.Str().GetString().str(); @@ -696,15 +701,19 @@ LLDBSwigPythonCallCommand // pass the pointer-to cmd_retobj_sb or watch the underlying object disappear from under you // see comment above for SBCommandReturnObjectReleaser for further details - auto argc = pfunc.GetNumArguments(); + auto argc = pfunc.GetArgInfo(); + if (!argc) { + llvm::consumeError(argc.takeError()); + return false; + } PythonObject debugger_arg(PyRefType::Owned, SBTypeToSWIGWrapper(debugger_sb)); PythonObject exe_ctx_arg(PyRefType::Owned, SBTypeToSWIGWrapper(exe_ctx_sb)); PythonObject cmd_retobj_arg(PyRefType::Owned, SBTypeToSWIGWrapper(&cmd_retobj_sb)); - if (argc.count == 5 || argc.is_bound_method || argc.has_varargs) - pfunc(debugger_arg, PythonString(args), exe_ctx_arg, cmd_retobj_arg, dict); - else + if (argc.get().max_positional_args < 5u) pfunc(debugger_arg, PythonString(args), cmd_retobj_arg, dict); + else + pfunc(debugger_arg, PythonString(args), exe_ctx_arg, cmd_retobj_arg, dict); return true; } diff --git a/lldb/source/Plugins/Process/Windows/Common/CMakeLists.txt b/lldb/source/Plugins/Process/Windows/Common/CMakeLists.txt index d0d3fcbee6f4d..876bc8cab966a 100644 --- a/lldb/source/Plugins/Process/Windows/Common/CMakeLists.txt +++ b/lldb/source/Plugins/Process/Windows/Common/CMakeLists.txt @@ -4,6 +4,7 @@ add_lldb_library(lldbPluginProcessWindowsCommon PLUGIN LocalDebugDelegate.cpp NativeProcessWindows.cpp NativeRegisterContextWindows.cpp + NativeRegisterContextWindows_arm.cpp NativeRegisterContextWindows_arm64.cpp NativeRegisterContextWindows_i386.cpp NativeRegisterContextWindows_WoW64.cpp @@ -14,10 +15,10 @@ add_lldb_library(lldbPluginProcessWindowsCommon PLUGIN ProcessWindowsLog.cpp RegisterContextWindows.cpp TargetThreadWindows.cpp + arm/RegisterContextWindows_arm.cpp arm64/RegisterContextWindows_arm64.cpp x64/RegisterContextWindows_x64.cpp x86/RegisterContextWindows_x86.cpp - # TODO add support for ARM (NT) LINK_LIBS lldbCore diff --git a/lldb/source/Plugins/Process/Windows/Common/NativeRegisterContextWindows_arm.cpp b/lldb/source/Plugins/Process/Windows/Common/NativeRegisterContextWindows_arm.cpp new file mode 100644 index 0000000000000..d25b08f7ecbac --- /dev/null +++ b/lldb/source/Plugins/Process/Windows/Common/NativeRegisterContextWindows_arm.cpp @@ -0,0 +1,644 @@ +//===-- NativeRegisterContextWindows_arm.cpp --------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#if defined(__arm__) || defined(_M_ARM) + +#include "NativeRegisterContextWindows_arm.h" +#include "NativeThreadWindows.h" +#include "Plugins/Process/Utility/RegisterInfoPOSIX_arm.h" +#include "ProcessWindowsLog.h" +#include "lldb/Host/HostInfo.h" +#include "lldb/Host/HostThread.h" +#include "lldb/Host/windows/HostThreadWindows.h" +#include "lldb/Host/windows/windows.h" + +#include "lldb/Utility/Log.h" +#include "lldb/Utility/RegisterValue.h" +#include "llvm/ADT/STLExtras.h" + +using namespace lldb; +using namespace lldb_private; + +#define REG_CONTEXT_SIZE sizeof(::CONTEXT) + +namespace { +static const uint32_t g_gpr_regnums_arm[] = { + gpr_r0_arm, gpr_r1_arm, gpr_r2_arm, gpr_r3_arm, gpr_r4_arm, + gpr_r5_arm, gpr_r6_arm, gpr_r7_arm, gpr_r8_arm, gpr_r9_arm, + gpr_r10_arm, gpr_r11_arm, gpr_r12_arm, gpr_sp_arm, gpr_lr_arm, + gpr_pc_arm, gpr_cpsr_arm, + LLDB_INVALID_REGNUM // Register set must be terminated with this flag +}; +static_assert(((sizeof g_gpr_regnums_arm / sizeof g_gpr_regnums_arm[0]) - 1) == + k_num_gpr_registers_arm, + "g_gpr_regnums_arm has wrong number of register infos"); + +static const uint32_t g_fpr_regnums_arm[] = { + fpu_s0_arm, fpu_s1_arm, fpu_s2_arm, fpu_s3_arm, fpu_s4_arm, + fpu_s5_arm, fpu_s6_arm, fpu_s7_arm, fpu_s8_arm, fpu_s9_arm, + fpu_s10_arm, fpu_s11_arm, fpu_s12_arm, fpu_s13_arm, fpu_s14_arm, + fpu_s15_arm, fpu_s16_arm, fpu_s17_arm, fpu_s18_arm, fpu_s19_arm, + fpu_s20_arm, fpu_s21_arm, fpu_s22_arm, fpu_s23_arm, fpu_s24_arm, + fpu_s25_arm, fpu_s26_arm, fpu_s27_arm, fpu_s28_arm, fpu_s29_arm, + fpu_s30_arm, fpu_s31_arm, + + fpu_d0_arm, fpu_d1_arm, fpu_d2_arm, fpu_d3_arm, fpu_d4_arm, + fpu_d5_arm, fpu_d6_arm, fpu_d7_arm, fpu_d8_arm, fpu_d9_arm, + fpu_d10_arm, fpu_d11_arm, fpu_d12_arm, fpu_d13_arm, fpu_d14_arm, + fpu_d15_arm, fpu_d16_arm, fpu_d17_arm, fpu_d18_arm, fpu_d19_arm, + fpu_d20_arm, fpu_d21_arm, fpu_d22_arm, fpu_d23_arm, fpu_d24_arm, + fpu_d25_arm, fpu_d26_arm, fpu_d27_arm, fpu_d28_arm, fpu_d29_arm, + fpu_d30_arm, fpu_d31_arm, + + fpu_q0_arm, fpu_q1_arm, fpu_q2_arm, fpu_q3_arm, fpu_q4_arm, + fpu_q5_arm, fpu_q6_arm, fpu_q7_arm, fpu_q8_arm, fpu_q9_arm, + fpu_q10_arm, fpu_q11_arm, fpu_q12_arm, fpu_q13_arm, fpu_q14_arm, + fpu_q15_arm, + + fpu_fpscr_arm, + LLDB_INVALID_REGNUM // Register set must be terminated with this flag +}; +static_assert(((sizeof g_fpr_regnums_arm / sizeof g_fpr_regnums_arm[0]) - 1) == + k_num_fpr_registers_arm, + "g_fpu_regnums_arm has wrong number of register infos"); + +static const RegisterSet g_reg_sets_arm[] = { + {"General Purpose Registers", "gpr", + llvm::array_lengthof(g_gpr_regnums_arm) - 1, g_gpr_regnums_arm}, + {"Floating Point Registers", "fpr", + llvm::array_lengthof(g_fpr_regnums_arm) - 1, g_fpr_regnums_arm}, +}; + +enum { k_num_register_sets = 2 }; + +} // namespace + +static RegisterInfoInterface * +CreateRegisterInfoInterface(const ArchSpec &target_arch) { + assert((HostInfo::GetArchitecture().GetAddressByteSize() == 8) && + "Register setting path assumes this is a 64-bit host"); + return new RegisterInfoPOSIX_arm(target_arch); +} + +static Status GetThreadContextHelper(lldb::thread_t thread_handle, + PCONTEXT context_ptr, + const DWORD control_flag) { + Log *log = ProcessWindowsLog::GetLogIfAny(WINDOWS_LOG_REGISTERS); + Status error; + + memset(context_ptr, 0, sizeof(::CONTEXT)); + context_ptr->ContextFlags = control_flag; + if (!::GetThreadContext(thread_handle, context_ptr)) { + error.SetError(GetLastError(), eErrorTypeWin32); + LLDB_LOG(log, "{0} GetThreadContext failed with error {1}", __FUNCTION__, + error); + return error; + } + return Status(); +} + +static Status SetThreadContextHelper(lldb::thread_t thread_handle, + PCONTEXT context_ptr) { + Log *log = ProcessWindowsLog::GetLogIfAny(WINDOWS_LOG_REGISTERS); + Status error; + // It's assumed that the thread has stopped. + if (!::SetThreadContext(thread_handle, context_ptr)) { + error.SetError(GetLastError(), eErrorTypeWin32); + LLDB_LOG(log, "{0} SetThreadContext failed with error {1}", __FUNCTION__, + error); + return error; + } + return Status(); +} + +std::unique_ptr +NativeRegisterContextWindows::CreateHostNativeRegisterContextWindows( + const ArchSpec &target_arch, NativeThreadProtocol &native_thread) { + // TODO: Register context for a WoW64 application? + + // Register context for a native 64-bit application. + return std::make_unique(target_arch, + native_thread); +} + +NativeRegisterContextWindows_arm::NativeRegisterContextWindows_arm( + const ArchSpec &target_arch, NativeThreadProtocol &native_thread) + : NativeRegisterContextWindows(native_thread, + CreateRegisterInfoInterface(target_arch)) {} + +bool NativeRegisterContextWindows_arm::IsGPR(uint32_t reg_index) const { + return (reg_index >= k_first_gpr_arm && reg_index <= k_last_gpr_arm); +} + +bool NativeRegisterContextWindows_arm::IsFPR(uint32_t reg_index) const { + return (reg_index >= k_first_fpr_arm && reg_index <= k_last_fpr_arm); +} + +uint32_t NativeRegisterContextWindows_arm::GetRegisterSetCount() const { + return k_num_register_sets; +} + +const RegisterSet * +NativeRegisterContextWindows_arm::GetRegisterSet(uint32_t set_index) const { + if (set_index >= k_num_register_sets) + return nullptr; + return &g_reg_sets_arm[set_index]; +} + +Status NativeRegisterContextWindows_arm::GPRRead(const uint32_t reg, + RegisterValue ®_value) { + ::CONTEXT tls_context; + DWORD context_flag = CONTEXT_CONTROL | CONTEXT_INTEGER; + Status error = + GetThreadContextHelper(GetThreadHandle(), &tls_context, context_flag); + if (error.Fail()) + return error; + + switch (reg) { + case gpr_r0_arm: + reg_value.SetUInt32(tls_context.R0); + break; + case gpr_r1_arm: + reg_value.SetUInt32(tls_context.R1); + break; + case gpr_r2_arm: + reg_value.SetUInt32(tls_context.R2); + break; + case gpr_r3_arm: + reg_value.SetUInt32(tls_context.R3); + break; + case gpr_r4_arm: + reg_value.SetUInt32(tls_context.R4); + break; + case gpr_r5_arm: + reg_value.SetUInt32(tls_context.R5); + break; + case gpr_r6_arm: + reg_value.SetUInt32(tls_context.R6); + break; + case gpr_r7_arm: + reg_value.SetUInt32(tls_context.R7); + break; + case gpr_r8_arm: + reg_value.SetUInt32(tls_context.R8); + break; + case gpr_r9_arm: + reg_value.SetUInt32(tls_context.R9); + break; + case gpr_r10_arm: + reg_value.SetUInt32(tls_context.R10); + break; + case gpr_r11_arm: + reg_value.SetUInt32(tls_context.R11); + break; + case gpr_r12_arm: + reg_value.SetUInt32(tls_context.R12); + break; + case gpr_sp_arm: + reg_value.SetUInt32(tls_context.Sp); + break; + case gpr_lr_arm: + reg_value.SetUInt32(tls_context.Lr); + break; + case gpr_pc_arm: + reg_value.SetUInt32(tls_context.Pc); + break; + case gpr_cpsr_arm: + reg_value.SetUInt32(tls_context.Cpsr); + break; + } + + return error; +} + +Status +NativeRegisterContextWindows_arm::GPRWrite(const uint32_t reg, + const RegisterValue ®_value) { + ::CONTEXT tls_context; + DWORD context_flag = CONTEXT_CONTROL | CONTEXT_INTEGER; + auto thread_handle = GetThreadHandle(); + Status error = + GetThreadContextHelper(thread_handle, &tls_context, context_flag); + if (error.Fail()) + return error; + + switch (reg) { + case gpr_r0_arm: + tls_context.R0 = reg_value.GetAsUInt32(); + break; + case gpr_r1_arm: + tls_context.R1 = reg_value.GetAsUInt32(); + break; + case gpr_r2_arm: + tls_context.R2 = reg_value.GetAsUInt32(); + break; + case gpr_r3_arm: + tls_context.R3 = reg_value.GetAsUInt32(); + break; + case gpr_r4_arm: + tls_context.R4 = reg_value.GetAsUInt32(); + break; + case gpr_r5_arm: + tls_context.R5 = reg_value.GetAsUInt32(); + break; + case gpr_r6_arm: + tls_context.R6 = reg_value.GetAsUInt32(); + break; + case gpr_r7_arm: + tls_context.R7 = reg_value.GetAsUInt32(); + break; + case gpr_r8_arm: + tls_context.R8 = reg_value.GetAsUInt32(); + break; + case gpr_r9_arm: + tls_context.R9 = reg_value.GetAsUInt32(); + break; + case gpr_r10_arm: + tls_context.R10 = reg_value.GetAsUInt32(); + break; + case gpr_r11_arm: + tls_context.R11 = reg_value.GetAsUInt32(); + break; + case gpr_r12_arm: + tls_context.R12 = reg_value.GetAsUInt32(); + break; + case gpr_sp_arm: + tls_context.Sp = reg_value.GetAsUInt32(); + break; + case gpr_lr_arm: + tls_context.Lr = reg_value.GetAsUInt32(); + break; + case gpr_pc_arm: + tls_context.Pc = reg_value.GetAsUInt32(); + break; + case gpr_cpsr_arm: + tls_context.Cpsr = reg_value.GetAsUInt32(); + break; + } + + return SetThreadContextHelper(thread_handle, &tls_context); +} + +Status NativeRegisterContextWindows_arm::FPRRead(const uint32_t reg, + RegisterValue ®_value) { + ::CONTEXT tls_context; + DWORD context_flag = CONTEXT_CONTROL | CONTEXT_FLOATING_POINT; + Status error = + GetThreadContextHelper(GetThreadHandle(), &tls_context, context_flag); + if (error.Fail()) + return error; + + switch (reg) { + case fpu_s0_arm: + case fpu_s1_arm: + case fpu_s2_arm: + case fpu_s3_arm: + case fpu_s4_arm: + case fpu_s5_arm: + case fpu_s6_arm: + case fpu_s7_arm: + case fpu_s8_arm: + case fpu_s9_arm: + case fpu_s10_arm: + case fpu_s11_arm: + case fpu_s12_arm: + case fpu_s13_arm: + case fpu_s14_arm: + case fpu_s15_arm: + case fpu_s16_arm: + case fpu_s17_arm: + case fpu_s18_arm: + case fpu_s19_arm: + case fpu_s20_arm: + case fpu_s21_arm: + case fpu_s22_arm: + case fpu_s23_arm: + case fpu_s24_arm: + case fpu_s25_arm: + case fpu_s26_arm: + case fpu_s27_arm: + case fpu_s28_arm: + case fpu_s29_arm: + case fpu_s30_arm: + case fpu_s31_arm: + reg_value.SetUInt32(tls_context.S[reg - fpu_s0_arm], + RegisterValue::eTypeFloat); + break; + + case fpu_d0_arm: + case fpu_d1_arm: + case fpu_d2_arm: + case fpu_d3_arm: + case fpu_d4_arm: + case fpu_d5_arm: + case fpu_d6_arm: + case fpu_d7_arm: + case fpu_d8_arm: + case fpu_d9_arm: + case fpu_d10_arm: + case fpu_d11_arm: + case fpu_d12_arm: + case fpu_d13_arm: + case fpu_d14_arm: + case fpu_d15_arm: + case fpu_d16_arm: + case fpu_d17_arm: + case fpu_d18_arm: + case fpu_d19_arm: + case fpu_d20_arm: + case fpu_d21_arm: + case fpu_d22_arm: + case fpu_d23_arm: + case fpu_d24_arm: + case fpu_d25_arm: + case fpu_d26_arm: + case fpu_d27_arm: + case fpu_d28_arm: + case fpu_d29_arm: + case fpu_d30_arm: + case fpu_d31_arm: + reg_value.SetUInt64(tls_context.D[reg - fpu_d0_arm], + RegisterValue::eTypeDouble); + break; + + case fpu_q0_arm: + case fpu_q1_arm: + case fpu_q2_arm: + case fpu_q3_arm: + case fpu_q4_arm: + case fpu_q5_arm: + case fpu_q6_arm: + case fpu_q7_arm: + case fpu_q8_arm: + case fpu_q9_arm: + case fpu_q10_arm: + case fpu_q11_arm: + case fpu_q12_arm: + case fpu_q13_arm: + case fpu_q14_arm: + case fpu_q15_arm: + reg_value.SetBytes(&tls_context.Q[reg - fpu_q0_arm], 16, + endian::InlHostByteOrder()); + break; + + case fpu_fpscr_arm: + reg_value.SetUInt32(tls_context.Fpscr); + break; + } + + return error; +} + +Status +NativeRegisterContextWindows_arm::FPRWrite(const uint32_t reg, + const RegisterValue ®_value) { + ::CONTEXT tls_context; + DWORD context_flag = CONTEXT_CONTROL | CONTEXT_FLOATING_POINT; + auto thread_handle = GetThreadHandle(); + Status error = + GetThreadContextHelper(thread_handle, &tls_context, context_flag); + if (error.Fail()) + return error; + + switch (reg) { + case fpu_s0_arm: + case fpu_s1_arm: + case fpu_s2_arm: + case fpu_s3_arm: + case fpu_s4_arm: + case fpu_s5_arm: + case fpu_s6_arm: + case fpu_s7_arm: + case fpu_s8_arm: + case fpu_s9_arm: + case fpu_s10_arm: + case fpu_s11_arm: + case fpu_s12_arm: + case fpu_s13_arm: + case fpu_s14_arm: + case fpu_s15_arm: + case fpu_s16_arm: + case fpu_s17_arm: + case fpu_s18_arm: + case fpu_s19_arm: + case fpu_s20_arm: + case fpu_s21_arm: + case fpu_s22_arm: + case fpu_s23_arm: + case fpu_s24_arm: + case fpu_s25_arm: + case fpu_s26_arm: + case fpu_s27_arm: + case fpu_s28_arm: + case fpu_s29_arm: + case fpu_s30_arm: + case fpu_s31_arm: + tls_context.S[reg - fpu_s0_arm] = reg_value.GetAsUInt32(); + break; + + case fpu_d0_arm: + case fpu_d1_arm: + case fpu_d2_arm: + case fpu_d3_arm: + case fpu_d4_arm: + case fpu_d5_arm: + case fpu_d6_arm: + case fpu_d7_arm: + case fpu_d8_arm: + case fpu_d9_arm: + case fpu_d10_arm: + case fpu_d11_arm: + case fpu_d12_arm: + case fpu_d13_arm: + case fpu_d14_arm: + case fpu_d15_arm: + case fpu_d16_arm: + case fpu_d17_arm: + case fpu_d18_arm: + case fpu_d19_arm: + case fpu_d20_arm: + case fpu_d21_arm: + case fpu_d22_arm: + case fpu_d23_arm: + case fpu_d24_arm: + case fpu_d25_arm: + case fpu_d26_arm: + case fpu_d27_arm: + case fpu_d28_arm: + case fpu_d29_arm: + case fpu_d30_arm: + case fpu_d31_arm: + tls_context.D[reg - fpu_d0_arm] = reg_value.GetAsUInt64(); + break; + + case fpu_q0_arm: + case fpu_q1_arm: + case fpu_q2_arm: + case fpu_q3_arm: + case fpu_q4_arm: + case fpu_q5_arm: + case fpu_q6_arm: + case fpu_q7_arm: + case fpu_q8_arm: + case fpu_q9_arm: + case fpu_q10_arm: + case fpu_q11_arm: + case fpu_q12_arm: + case fpu_q13_arm: + case fpu_q14_arm: + case fpu_q15_arm: + memcpy(&tls_context.Q[reg - fpu_q0_arm], reg_value.GetBytes(), 16); + break; + + case fpu_fpscr_arm: + tls_context.Fpscr = reg_value.GetAsUInt32(); + break; + } + + return SetThreadContextHelper(thread_handle, &tls_context); +} + +Status +NativeRegisterContextWindows_arm::ReadRegister(const RegisterInfo *reg_info, + RegisterValue ®_value) { + Status error; + if (!reg_info) { + error.SetErrorString("reg_info NULL"); + return error; + } + + const uint32_t reg = reg_info->kinds[lldb::eRegisterKindLLDB]; + if (reg == LLDB_INVALID_REGNUM) { + // This is likely an internal register for lldb use only and should not be + // directly queried. + error.SetErrorStringWithFormat("register \"%s\" is an internal-only lldb " + "register, cannot read directly", + reg_info->name); + return error; + } + + if (IsGPR(reg)) + return GPRRead(reg, reg_value); + + if (IsFPR(reg)) + return FPRRead(reg, reg_value); + + return Status("unimplemented"); +} + +Status NativeRegisterContextWindows_arm::WriteRegister( + const RegisterInfo *reg_info, const RegisterValue ®_value) { + Status error; + + if (!reg_info) { + error.SetErrorString("reg_info NULL"); + return error; + } + + const uint32_t reg = reg_info->kinds[lldb::eRegisterKindLLDB]; + if (reg == LLDB_INVALID_REGNUM) { + // This is likely an internal register for lldb use only and should not be + // directly written. + error.SetErrorStringWithFormat("register \"%s\" is an internal-only lldb " + "register, cannot write directly", + reg_info->name); + return error; + } + + if (IsGPR(reg)) + return GPRWrite(reg, reg_value); + + if (IsFPR(reg)) + return FPRWrite(reg, reg_value); + + return Status("unimplemented"); +} + +Status NativeRegisterContextWindows_arm::ReadAllRegisterValues( + lldb::DataBufferSP &data_sp) { + const size_t data_size = REG_CONTEXT_SIZE; + data_sp = std::make_shared(data_size, 0); + ::CONTEXT tls_context; + Status error = + GetThreadContextHelper(GetThreadHandle(), &tls_context, CONTEXT_ALL); + if (error.Fail()) + return error; + + uint8_t *dst = data_sp->GetBytes(); + ::memcpy(dst, &tls_context, data_size); + return error; +} + +Status NativeRegisterContextWindows_arm::WriteAllRegisterValues( + const lldb::DataBufferSP &data_sp) { + Status error; + const size_t data_size = REG_CONTEXT_SIZE; + if (!data_sp) { + error.SetErrorStringWithFormat( + "NativeRegisterContextWindows_arm::%s invalid data_sp provided", + __FUNCTION__); + return error; + } + + if (data_sp->GetByteSize() != data_size) { + error.SetErrorStringWithFormatv( + "data_sp contained mismatched data size, expected {0}, actual {1}", + data_size, data_sp->GetByteSize()); + return error; + } + + ::CONTEXT tls_context; + memcpy(&tls_context, data_sp->GetBytes(), data_size); + return SetThreadContextHelper(GetThreadHandle(), &tls_context); +} + +Status NativeRegisterContextWindows_arm::IsWatchpointHit(uint32_t wp_index, + bool &is_hit) { + return Status("unimplemented"); +} + +Status NativeRegisterContextWindows_arm::GetWatchpointHitIndex( + uint32_t &wp_index, lldb::addr_t trap_addr) { + return Status("unimplemented"); +} + +Status NativeRegisterContextWindows_arm::IsWatchpointVacant(uint32_t wp_index, + bool &is_vacant) { + return Status("unimplemented"); +} + +Status NativeRegisterContextWindows_arm::SetHardwareWatchpointWithIndex( + lldb::addr_t addr, size_t size, uint32_t watch_flags, uint32_t wp_index) { + return Status("unimplemented"); +} + +bool NativeRegisterContextWindows_arm::ClearHardwareWatchpoint( + uint32_t wp_index) { + return false; +} + +Status NativeRegisterContextWindows_arm::ClearAllHardwareWatchpoints() { + return Status("unimplemented"); +} + +uint32_t NativeRegisterContextWindows_arm::SetHardwareWatchpoint( + lldb::addr_t addr, size_t size, uint32_t watch_flags) { + return LLDB_INVALID_INDEX32; +} + +lldb::addr_t +NativeRegisterContextWindows_arm::GetWatchpointAddress(uint32_t wp_index) { + return LLDB_INVALID_ADDRESS; +} + +uint32_t NativeRegisterContextWindows_arm::NumSupportedHardwareWatchpoints() { + // Not implemented + return 0; +} + +#endif // defined(__arm__) || defined(_M_ARM) diff --git a/lldb/source/Plugins/Process/Windows/Common/NativeRegisterContextWindows_arm.h b/lldb/source/Plugins/Process/Windows/Common/NativeRegisterContextWindows_arm.h new file mode 100644 index 0000000000000..2778bed9a78de --- /dev/null +++ b/lldb/source/Plugins/Process/Windows/Common/NativeRegisterContextWindows_arm.h @@ -0,0 +1,80 @@ +//===-- NativeRegisterContextWindows_arm.h ----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#if defined(__arm__) || defined(_M_ARM) +#ifndef liblldb_NativeRegisterContextWindows_arm_h_ +#define liblldb_NativeRegisterContextWindows_arm_h_ + +#include "Plugins/Process/Utility/lldb-arm-register-enums.h" + +#include "NativeRegisterContextWindows.h" + +namespace lldb_private { + +class NativeThreadWindows; + +class NativeRegisterContextWindows_arm : public NativeRegisterContextWindows { +public: + NativeRegisterContextWindows_arm(const ArchSpec &target_arch, + NativeThreadProtocol &native_thread); + + uint32_t GetRegisterSetCount() const override; + + const RegisterSet *GetRegisterSet(uint32_t set_index) const override; + + Status ReadRegister(const RegisterInfo *reg_info, + RegisterValue ®_value) override; + + Status WriteRegister(const RegisterInfo *reg_info, + const RegisterValue ®_value) override; + + Status ReadAllRegisterValues(lldb::DataBufferSP &data_sp) override; + + Status WriteAllRegisterValues(const lldb::DataBufferSP &data_sp) override; + + Status IsWatchpointHit(uint32_t wp_index, bool &is_hit) override; + + Status GetWatchpointHitIndex(uint32_t &wp_index, + lldb::addr_t trap_addr) override; + + Status IsWatchpointVacant(uint32_t wp_index, bool &is_vacant) override; + + bool ClearHardwareWatchpoint(uint32_t wp_index) override; + + Status ClearAllHardwareWatchpoints() override; + + Status SetHardwareWatchpointWithIndex(lldb::addr_t addr, size_t size, + uint32_t watch_flags, + uint32_t wp_index); + + uint32_t SetHardwareWatchpoint(lldb::addr_t addr, size_t size, + uint32_t watch_flags) override; + + lldb::addr_t GetWatchpointAddress(uint32_t wp_index) override; + + uint32_t NumSupportedHardwareWatchpoints() override; + +protected: + Status GPRRead(const uint32_t reg, RegisterValue ®_value); + + Status GPRWrite(const uint32_t reg, const RegisterValue ®_value); + + Status FPRRead(const uint32_t reg, RegisterValue ®_value); + + Status FPRWrite(const uint32_t reg, const RegisterValue ®_value); + +private: + bool IsGPR(uint32_t reg_index) const; + + bool IsFPR(uint32_t reg_index) const; +}; + +} // namespace lldb_private + +#endif // liblldb_NativeRegisterContextWindows_arm_h_ +#endif // defined(__arm__) || defined(_M_ARM) diff --git a/lldb/source/Plugins/Process/Windows/Common/TargetThreadWindows.cpp b/lldb/source/Plugins/Process/Windows/Common/TargetThreadWindows.cpp index 416abed882309..86a302a87b86d 100644 --- a/lldb/source/Plugins/Process/Windows/Common/TargetThreadWindows.cpp +++ b/lldb/source/Plugins/Process/Windows/Common/TargetThreadWindows.cpp @@ -20,13 +20,14 @@ #include "ProcessWindowsLog.h" #include "TargetThreadWindows.h" -// TODO support _M_ARM #if defined(__x86_64__) || defined(_M_AMD64) #include "x64/RegisterContextWindows_x64.h" #elif defined(__i386__) || defined(_M_IX86) #include "x86/RegisterContextWindows_x86.h" #elif defined(__aarch64__) || defined(_M_ARM64) #include "arm64/RegisterContextWindows_arm64.h" +#elif defined(__arm__) || defined(_M_ARM) +#include "arm/RegisterContextWindows_arm.h" #endif using namespace lldb; @@ -71,7 +72,12 @@ TargetThreadWindows::CreateRegisterContextForFrame(StackFrame *frame) { switch (arch.GetMachine()) { case llvm::Triple::arm: case llvm::Triple::thumb: - LLDB_LOG(log, "debugging ARM (NT) targets is currently unsupported"); +#if defined(__arm__) || defined(_M_ARM) + m_thread_reg_ctx_sp.reset( + new RegisterContextWindows_arm(*this, concrete_frame_idx)); +#else + LLDB_LOG(log, "debugging foreign targets is currently unsupported"); +#endif break; case llvm::Triple::aarch64: diff --git a/lldb/source/Plugins/Process/Windows/Common/arm/RegisterContextWindows_arm.cpp b/lldb/source/Plugins/Process/Windows/Common/arm/RegisterContextWindows_arm.cpp new file mode 100644 index 0000000000000..c8bcf71c7f6dd --- /dev/null +++ b/lldb/source/Plugins/Process/Windows/Common/arm/RegisterContextWindows_arm.cpp @@ -0,0 +1,432 @@ +//===-- RegisterContextWindows_arm.cpp --------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#if defined(__arm__) || defined(_M_ARM) + +#include "lldb/Host/windows/HostThreadWindows.h" +#include "lldb/Host/windows/windows.h" +#include "lldb/Utility/RegisterValue.h" +#include "lldb/Utility/Status.h" +#include "lldb/lldb-private-types.h" + +#include "RegisterContextWindows_arm.h" +#include "TargetThreadWindows.h" + +#include "llvm/ADT/STLExtras.h" + +using namespace lldb; +using namespace lldb_private; + +#define GPR_OFFSET(idx) 0 +#define FPU_OFFSET(idx) 0 +#define FPSCR_OFFSET 0 +#define EXC_OFFSET(reg) 0 +#define DBG_OFFSET_NAME(reg) 0 + +#define DEFINE_DBG(reg, i) \ + #reg, NULL, \ + 0, DBG_OFFSET_NAME(reg[i]), eEncodingUint, eFormatHex, \ + {LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM, \ + LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM, \ + LLDB_INVALID_REGNUM }, \ + NULL, NULL, NULL, 0 + +// Include RegisterInfos_arm to declare our g_register_infos_arm structure. +#define DECLARE_REGISTER_INFOS_ARM_STRUCT +#include "Plugins/Process/Utility/RegisterInfos_arm.h" +#undef DECLARE_REGISTER_INFOS_ARM_STRUCT + +static size_t k_num_register_infos = llvm::array_lengthof(g_register_infos_arm); + +// Array of lldb register numbers used to define the set of all General Purpose +// Registers +uint32_t g_gpr_reg_indices[] = { + gpr_r0, gpr_r1, gpr_r2, gpr_r3, gpr_r4, gpr_r5, gpr_r6, gpr_r7, gpr_r8, + gpr_r9, gpr_r10, gpr_r11, gpr_r12, gpr_sp, gpr_lr, gpr_pc, gpr_cpsr, +}; + +uint32_t g_fpu_reg_indices[] = { + fpu_s0, fpu_s1, fpu_s2, fpu_s3, fpu_s4, fpu_s5, fpu_s6, fpu_s7, + fpu_s8, fpu_s9, fpu_s10, fpu_s11, fpu_s12, fpu_s13, fpu_s14, fpu_s15, + fpu_s16, fpu_s17, fpu_s18, fpu_s19, fpu_s20, fpu_s21, fpu_s22, fpu_s23, + fpu_s24, fpu_s25, fpu_s26, fpu_s27, fpu_s28, fpu_s29, fpu_s30, fpu_s31, + + fpu_d0, fpu_d1, fpu_d2, fpu_d3, fpu_d4, fpu_d5, fpu_d6, fpu_d7, + fpu_d8, fpu_d9, fpu_d10, fpu_d11, fpu_d12, fpu_d13, fpu_d14, fpu_d15, + fpu_d16, fpu_d17, fpu_d18, fpu_d19, fpu_d20, fpu_d21, fpu_d22, fpu_d23, + fpu_d24, fpu_d25, fpu_d26, fpu_d27, fpu_d28, fpu_d29, fpu_d30, fpu_d31, + + fpu_q0, fpu_q1, fpu_q2, fpu_q3, fpu_q4, fpu_q5, fpu_q6, fpu_q7, + fpu_q8, fpu_q9, fpu_q10, fpu_q11, fpu_q12, fpu_q13, fpu_q14, fpu_q15, + + fpu_fpscr, +}; + +RegisterSet g_register_sets[] = { + {"General Purpose Registers", "gpr", + llvm::array_lengthof(g_gpr_reg_indices), g_gpr_reg_indices}, + {"Floating Point Registers", "fpu", llvm::array_lengthof(g_fpu_reg_indices), + g_fpu_reg_indices}, +}; + +// Constructors and Destructors +RegisterContextWindows_arm::RegisterContextWindows_arm( + Thread &thread, uint32_t concrete_frame_idx) + : RegisterContextWindows(thread, concrete_frame_idx) {} + +RegisterContextWindows_arm::~RegisterContextWindows_arm() {} + +size_t RegisterContextWindows_arm::GetRegisterCount() { + return llvm::array_lengthof(g_register_infos_arm); +} + +const RegisterInfo * +RegisterContextWindows_arm::GetRegisterInfoAtIndex(size_t reg) { + if (reg < k_num_register_infos) + return &g_register_infos_arm[reg]; + return NULL; +} + +size_t RegisterContextWindows_arm::GetRegisterSetCount() { + return llvm::array_lengthof(g_register_sets); +} + +const RegisterSet *RegisterContextWindows_arm::GetRegisterSet(size_t reg_set) { + return &g_register_sets[reg_set]; +} + +bool RegisterContextWindows_arm::ReadRegister(const RegisterInfo *reg_info, + RegisterValue ®_value) { + if (!CacheAllRegisterValues()) + return false; + + if (reg_info == nullptr) + return false; + + const uint32_t reg = reg_info->kinds[eRegisterKindLLDB]; + + switch (reg) { + case gpr_r0: + reg_value.SetUInt32(m_context.R0); + break; + case gpr_r1: + reg_value.SetUInt32(m_context.R1); + break; + case gpr_r2: + reg_value.SetUInt32(m_context.R2); + break; + case gpr_r3: + reg_value.SetUInt32(m_context.R3); + break; + case gpr_r4: + reg_value.SetUInt32(m_context.R4); + break; + case gpr_r5: + reg_value.SetUInt32(m_context.R5); + break; + case gpr_r6: + reg_value.SetUInt32(m_context.R6); + break; + case gpr_r7: + reg_value.SetUInt32(m_context.R7); + break; + case gpr_r8: + reg_value.SetUInt32(m_context.R8); + break; + case gpr_r9: + reg_value.SetUInt32(m_context.R9); + break; + case gpr_r10: + reg_value.SetUInt32(m_context.R10); + break; + case gpr_r11: + reg_value.SetUInt32(m_context.R11); + break; + case gpr_r12: + reg_value.SetUInt32(m_context.R12); + break; + case gpr_sp: + reg_value.SetUInt32(m_context.Sp); + break; + case gpr_lr: + reg_value.SetUInt32(m_context.Lr); + break; + case gpr_pc: + reg_value.SetUInt32(m_context.Pc); + break; + case gpr_cpsr: + reg_value.SetUInt32(m_context.Cpsr); + break; + + case fpu_s0: + case fpu_s1: + case fpu_s2: + case fpu_s3: + case fpu_s4: + case fpu_s5: + case fpu_s6: + case fpu_s7: + case fpu_s8: + case fpu_s9: + case fpu_s10: + case fpu_s11: + case fpu_s12: + case fpu_s13: + case fpu_s14: + case fpu_s15: + case fpu_s16: + case fpu_s17: + case fpu_s18: + case fpu_s19: + case fpu_s20: + case fpu_s21: + case fpu_s22: + case fpu_s23: + case fpu_s24: + case fpu_s25: + case fpu_s26: + case fpu_s27: + case fpu_s28: + case fpu_s29: + case fpu_s30: + case fpu_s31: + reg_value.SetUInt32(m_context.S[reg - fpu_s0], RegisterValue::eTypeFloat); + break; + + case fpu_d0: + case fpu_d1: + case fpu_d2: + case fpu_d3: + case fpu_d4: + case fpu_d5: + case fpu_d6: + case fpu_d7: + case fpu_d8: + case fpu_d9: + case fpu_d10: + case fpu_d11: + case fpu_d12: + case fpu_d13: + case fpu_d14: + case fpu_d15: + case fpu_d16: + case fpu_d17: + case fpu_d18: + case fpu_d19: + case fpu_d20: + case fpu_d21: + case fpu_d22: + case fpu_d23: + case fpu_d24: + case fpu_d25: + case fpu_d26: + case fpu_d27: + case fpu_d28: + case fpu_d29: + case fpu_d30: + case fpu_d31: + reg_value.SetUInt64(m_context.D[reg - fpu_d0], RegisterValue::eTypeDouble); + break; + + case fpu_q0: + case fpu_q1: + case fpu_q2: + case fpu_q3: + case fpu_q4: + case fpu_q5: + case fpu_q6: + case fpu_q7: + case fpu_q8: + case fpu_q9: + case fpu_q10: + case fpu_q11: + case fpu_q12: + case fpu_q13: + case fpu_q14: + case fpu_q15: + reg_value.SetBytes(&m_context.Q[reg - fpu_q0], reg_info->byte_size, + endian::InlHostByteOrder()); + break; + + case fpu_fpscr: + reg_value.SetUInt32(m_context.Fpscr); + break; + + default: + reg_value.SetValueToInvalid(); + return false; + } + return true; +} + +bool RegisterContextWindows_arm::WriteRegister(const RegisterInfo *reg_info, + const RegisterValue ®_value) { + // Since we cannot only write a single register value to the inferior, we + // need to make sure our cached copy of the register values are fresh. + // Otherwise when writing EAX, for example, we may also overwrite some other + // register with a stale value. + if (!CacheAllRegisterValues()) + return false; + + const uint32_t reg = reg_info->kinds[eRegisterKindLLDB]; + + switch (reg) { + case gpr_r0: + m_context.R0 = reg_value.GetAsUInt32(); + break; + case gpr_r1: + m_context.R1 = reg_value.GetAsUInt32(); + break; + case gpr_r2: + m_context.R2 = reg_value.GetAsUInt32(); + break; + case gpr_r3: + m_context.R3 = reg_value.GetAsUInt32(); + break; + case gpr_r4: + m_context.R4 = reg_value.GetAsUInt32(); + break; + case gpr_r5: + m_context.R5 = reg_value.GetAsUInt32(); + break; + case gpr_r6: + m_context.R6 = reg_value.GetAsUInt32(); + break; + case gpr_r7: + m_context.R7 = reg_value.GetAsUInt32(); + break; + case gpr_r8: + m_context.R8 = reg_value.GetAsUInt32(); + break; + case gpr_r9: + m_context.R9 = reg_value.GetAsUInt32(); + break; + case gpr_r10: + m_context.R10 = reg_value.GetAsUInt32(); + break; + case gpr_r11: + m_context.R11 = reg_value.GetAsUInt32(); + break; + case gpr_r12: + m_context.R12 = reg_value.GetAsUInt32(); + break; + case gpr_sp: + m_context.Sp = reg_value.GetAsUInt32(); + break; + case gpr_lr: + m_context.Lr = reg_value.GetAsUInt32(); + break; + case gpr_pc: + m_context.Pc = reg_value.GetAsUInt32(); + break; + case gpr_cpsr: + m_context.Cpsr = reg_value.GetAsUInt32(); + break; + + case fpu_s0: + case fpu_s1: + case fpu_s2: + case fpu_s3: + case fpu_s4: + case fpu_s5: + case fpu_s6: + case fpu_s7: + case fpu_s8: + case fpu_s9: + case fpu_s10: + case fpu_s11: + case fpu_s12: + case fpu_s13: + case fpu_s14: + case fpu_s15: + case fpu_s16: + case fpu_s17: + case fpu_s18: + case fpu_s19: + case fpu_s20: + case fpu_s21: + case fpu_s22: + case fpu_s23: + case fpu_s24: + case fpu_s25: + case fpu_s26: + case fpu_s27: + case fpu_s28: + case fpu_s29: + case fpu_s30: + case fpu_s31: + m_context.S[reg - fpu_s0] = reg_value.GetAsUInt32(); + break; + + case fpu_d0: + case fpu_d1: + case fpu_d2: + case fpu_d3: + case fpu_d4: + case fpu_d5: + case fpu_d6: + case fpu_d7: + case fpu_d8: + case fpu_d9: + case fpu_d10: + case fpu_d11: + case fpu_d12: + case fpu_d13: + case fpu_d14: + case fpu_d15: + case fpu_d16: + case fpu_d17: + case fpu_d18: + case fpu_d19: + case fpu_d20: + case fpu_d21: + case fpu_d22: + case fpu_d23: + case fpu_d24: + case fpu_d25: + case fpu_d26: + case fpu_d27: + case fpu_d28: + case fpu_d29: + case fpu_d30: + case fpu_d31: + m_context.D[reg - fpu_d0] = reg_value.GetAsUInt64(); + break; + + case fpu_q0: + case fpu_q1: + case fpu_q2: + case fpu_q3: + case fpu_q4: + case fpu_q5: + case fpu_q6: + case fpu_q7: + case fpu_q8: + case fpu_q9: + case fpu_q10: + case fpu_q11: + case fpu_q12: + case fpu_q13: + case fpu_q14: + case fpu_q15: + memcpy(&m_context.Q[reg - fpu_q0], reg_value.GetBytes(), 16); + break; + + case fpu_fpscr: + m_context.Fpscr = reg_value.GetAsUInt32(); + break; + + default: + return false; + } + + // Physically update the registers in the target process. + return ApplyAllRegisterValues(); +} + +#endif // defined(__arm__) || defined(_M_ARM) diff --git a/lldb/source/Plugins/Process/Windows/Common/arm/RegisterContextWindows_arm.h b/lldb/source/Plugins/Process/Windows/Common/arm/RegisterContextWindows_arm.h new file mode 100644 index 0000000000000..57050671d40c2 --- /dev/null +++ b/lldb/source/Plugins/Process/Windows/Common/arm/RegisterContextWindows_arm.h @@ -0,0 +1,47 @@ +//===-- RegisterContextWindows_arm.h ----------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef liblldb_RegisterContextWindows_arm_H_ +#define liblldb_RegisterContextWindows_arm_H_ + +#if defined(__arm__) || defined(_M_ARM) + +#include "RegisterContextWindows.h" +#include "lldb/lldb-forward.h" + +namespace lldb_private { + +class Thread; + +class RegisterContextWindows_arm : public RegisterContextWindows { +public: + // Constructors and Destructors + RegisterContextWindows_arm(Thread &thread, uint32_t concrete_frame_idx); + + virtual ~RegisterContextWindows_arm(); + + // Subclasses must override these functions + size_t GetRegisterCount() override; + + const RegisterInfo *GetRegisterInfoAtIndex(size_t reg) override; + + size_t GetRegisterSetCount() override; + + const RegisterSet *GetRegisterSet(size_t reg_set) override; + + bool ReadRegister(const RegisterInfo *reg_info, + RegisterValue ®_value) override; + + bool WriteRegister(const RegisterInfo *reg_info, + const RegisterValue ®_value) override; +}; +} // namespace lldb_private + +#endif // defined(__arm__) || defined(_M_ARM) + +#endif // #ifndef liblldb_RegisterContextWindows_arm_H_ diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.cpp b/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.cpp index 1fb9355b9ee32..d0d593656efd2 100644 --- a/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.cpp +++ b/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.cpp @@ -34,6 +34,7 @@ using namespace lldb_private::python; using llvm::cantFail; using llvm::Error; using llvm::Expected; +using llvm::Twine; template <> Expected python::As(Expected &&obj) { if (!obj) @@ -278,7 +279,7 @@ PythonByteArray::PythonByteArray(llvm::ArrayRef bytes) PythonByteArray::PythonByteArray(const uint8_t *bytes, size_t length) { const char *str = reinterpret_cast(bytes); - Reset(PyRefType::Owned, PyByteArray_FromStringAndSize(str, length)); + *this = Take(PyByteArray_FromStringAndSize(str, length)); } bool PythonByteArray::Check(PyObject *py_obj) { @@ -522,11 +523,11 @@ StructuredData::BooleanSP PythonBoolean::CreateStructuredBoolean() const { PythonList::PythonList(PyInitialValue value) { if (value == PyInitialValue::Empty) - Reset(PyRefType::Owned, PyList_New(0)); + *this = Take(PyList_New(0)); } PythonList::PythonList(int list_size) { - Reset(PyRefType::Owned, PyList_New(list_size)); + *this = Take(PyList_New(list_size)); } bool PythonList::Check(PyObject *py_obj) { @@ -578,11 +579,11 @@ StructuredData::ArraySP PythonList::CreateStructuredArray() const { PythonTuple::PythonTuple(PyInitialValue value) { if (value == PyInitialValue::Empty) - Reset(PyRefType::Owned, PyTuple_New(0)); + *this = Take(PyTuple_New(0)); } PythonTuple::PythonTuple(int tuple_size) { - Reset(PyRefType::Owned, PyTuple_New(tuple_size)); + *this = Take(PyTuple_New(tuple_size)); } PythonTuple::PythonTuple(std::initializer_list objects) { @@ -649,7 +650,7 @@ StructuredData::ArraySP PythonTuple::CreateStructuredArray() const { PythonDictionary::PythonDictionary(PyInitialValue value) { if (value == PyInitialValue::Empty) - Reset(PyRefType::Owned, PyDict_New()); + *this = Take(PyDict_New()); } bool PythonDictionary::Check(PyObject *py_obj) { @@ -696,10 +697,10 @@ PythonDictionary::GetItem(const PythonObject &key) const { return Retain(o); } -Expected PythonDictionary::GetItem(const char *key) const { +Expected PythonDictionary::GetItem(const Twine &key) const { if (!IsValid()) return nullDeref(); - PyObject *o = PyDict_GetItemString(m_py_obj, key); + PyObject *o = PyDict_GetItemString(m_py_obj, NullTerminated(key)); if (PyErr_Occurred()) return exception(); if (!o) @@ -717,11 +718,11 @@ Error PythonDictionary::SetItem(const PythonObject &key, return Error::success(); } -Error PythonDictionary::SetItem(const char *key, +Error PythonDictionary::SetItem(const Twine &key, const PythonObject &value) const { if (!IsValid() || !value.IsValid()) return nullDeref(); - int r = PyDict_SetItemString(m_py_obj, key, value.get()); + int r = PyDict_SetItemString(m_py_obj, NullTerminated(key), value.get()); if (r < 0) return exception(); return Error::success(); @@ -763,20 +764,20 @@ PythonModule PythonModule::AddModule(llvm::StringRef module) { return PythonModule(PyRefType::Borrowed, PyImport_AddModule(str.c_str())); } -Expected PythonModule::Import(const char *name) { - PyObject *mod = PyImport_ImportModule(name); +Expected PythonModule::Import(const Twine &name) { + PyObject *mod = PyImport_ImportModule(NullTerminated(name)); if (!mod) return exception(); return Take(mod); } -Expected PythonModule::Get(const char *name) { +Expected PythonModule::Get(const Twine &name) { if (!IsValid()) return nullDeref(); PyObject *dict = PyModule_GetDict(m_py_obj); if (!dict) return exception(); - PyObject *item = PyDict_GetItemString(dict, name); + PyObject *item = PyDict_GetItemString(dict, NullTerminated(name)); if (!item) return exception(); return Retain(item); @@ -790,7 +791,9 @@ bool PythonModule::Check(PyObject *py_obj) { } PythonDictionary PythonModule::GetDictionary() const { - return PythonDictionary(PyRefType::Borrowed, PyModule_GetDict(m_py_obj)); + if (!IsValid()) + return PythonDictionary(); + return Retain(PyModule_GetDict(m_py_obj)); } bool PythonCallable::Check(PyObject *py_obj) { @@ -876,21 +879,23 @@ Expected PythonCallable::GetArgInfo() const { result.count = cantFail(As(pyarginfo.get().GetAttribute("count"))); result.has_varargs = cantFail(As(pyarginfo.get().GetAttribute("has_varargs"))); - result.is_bound_method = + bool is_method = cantFail(As(pyarginfo.get().GetAttribute("is_bound_method"))); + result.max_positional_args = + result.has_varargs ? ArgInfo::UNBOUNDED : result.count; // FIXME emulate old broken behavior - if (result.is_bound_method) + if (is_method) result.count++; #else - + bool is_bound_method = false; PyObject *py_func_obj = m_py_obj; if (PyMethod_Check(py_func_obj)) { py_func_obj = PyMethod_GET_FUNCTION(py_func_obj); PythonObject im_self = GetAttributeValue("im_self"); if (im_self.IsValid() && !im_self.IsNone()) - result.is_bound_method = true; + is_bound_method = true; } else { // see if this is a callable object with an __call__ method if (!PyFunction_Check(py_func_obj)) { @@ -899,9 +904,9 @@ Expected PythonCallable::GetArgInfo() const { auto __callable__ = __call__.AsType(); if (__callable__.IsValid()) { py_func_obj = PyMethod_GET_FUNCTION(__callable__.get()); - PythonObject im_self = GetAttributeValue("im_self"); + PythonObject im_self = __callable__.GetAttributeValue("im_self"); if (im_self.IsValid() && !im_self.IsNone()) - result.is_bound_method = true; + is_bound_method = true; } } } @@ -916,12 +921,18 @@ Expected PythonCallable::GetArgInfo() const { result.count = code->co_argcount; result.has_varargs = !!(code->co_flags & CO_VARARGS); + result.max_positional_args = result.has_varargs + ? ArgInfo::UNBOUNDED + : (result.count - (int)is_bound_method); #endif return result; } +constexpr unsigned + PythonCallable::ArgInfo::UNBOUNDED; // FIXME delete after c++17 + PythonCallable::ArgInfo PythonCallable::GetNumArguments() const { auto arginfo = GetArgInfo(); if (!arginfo) { @@ -1084,6 +1095,8 @@ template class OwnedPythonFile : public Base { assert(m_py_obj); GIL takeGIL; Close(); + // we need to ensure the python object is released while we still + // hold the GIL m_py_obj.Reset(); } diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.h b/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.h index 5823f740a5302..634d6e896015f 100644 --- a/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.h +++ b/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.h @@ -151,6 +151,30 @@ template T Retain(PyObject *obj) { return std::move(thing); } +// This class can be used like a utility function to convert from +// a llvm-friendly Twine into a null-terminated const char *, +// which is the form python C APIs want their strings in. +// +// Example: +// const llvm::Twine &some_twine; +// PyFoo_Bar(x, y, z, NullTerminated(some_twine)); +// +// Why a class instead of a function? If the twine isn't already null +// terminated, it will need a temporary buffer to copy the string +// into. We need that buffer to stick around for the lifetime of the +// statement. +class NullTerminated { + const char *str; + llvm::SmallString<32> storage; + +public: + NullTerminated(const llvm::Twine &twine) { + llvm::StringRef ref = twine.toNullTerminatedStringRef(storage); + str = ref.begin(); + } + operator const char *() { return str; } +}; + } // namespace python enum class PyInitialValue { Invalid, Empty }; @@ -323,10 +347,11 @@ class PythonObject { return python::Take(obj); } - llvm::Expected GetAttribute(const char *name) const { + llvm::Expected GetAttribute(const llvm::Twine &name) const { + using namespace python; if (!m_py_obj) return nullDeref(); - PyObject *obj = PyObject_GetAttrString(m_py_obj, name); + PyObject *obj = PyObject_GetAttrString(m_py_obj, NullTerminated(name)); if (!obj) return exception(); return python::Take(obj); @@ -392,10 +417,11 @@ template class TypedPythonObject : public PythonObject { // This can be eliminated once we drop python 2 support. static void Convert(PyRefType &type, PyObject *&py_obj) {} - using PythonObject::Reset; + void Reset() { PythonObject::Reset(); } - void Reset(PyRefType type, PyObject *py_obj) { - Reset(); + void Reset(PyRefType type, PyObject *py_obj) = delete; + + TypedPythonObject(PyRefType type, PyObject *py_obj) { if (!py_obj) return; T::Convert(type, py_obj); @@ -405,8 +431,6 @@ template class TypedPythonObject : public PythonObject { Py_DECREF(py_obj); } - TypedPythonObject(PyRefType type, PyObject *py_obj) { Reset(type, py_obj); } - TypedPythonObject() {} }; @@ -562,9 +586,9 @@ class PythonDictionary : public TypedPythonObject { const PythonObject &value); // DEPRECATED llvm::Expected GetItem(const PythonObject &key) const; - llvm::Expected GetItem(const char *key) const; + llvm::Expected GetItem(const llvm::Twine &key) const; llvm::Error SetItem(const PythonObject &key, const PythonObject &value) const; - llvm::Error SetItem(const char *key, const PythonObject &value) const; + llvm::Error SetItem(const llvm::Twine &key, const PythonObject &value) const; StructuredData::DictionarySP CreateStructuredDictionary() const; }; @@ -592,9 +616,9 @@ class PythonModule : public TypedPythonObject { return std::move(mod.get()); } - static llvm::Expected Import(const char *name); + static llvm::Expected Import(const llvm::Twine &name); - llvm::Expected Get(const char *name); + llvm::Expected Get(const llvm::Twine &name); PythonDictionary GetDictionary() const; }; @@ -604,6 +628,11 @@ class PythonCallable : public TypedPythonObject { using TypedPythonObject::TypedPythonObject; struct ArgInfo { + /* the largest number of positional arguments this callable + * can accept, or UNBOUNDED, ie UINT_MAX if it's a varargs + * function and can accept an arbitrary number */ + unsigned max_positional_args; + static constexpr unsigned UNBOUNDED = UINT_MAX; // FIXME c++17 inline /* the number of positional arguments, including optional ones, * and excluding varargs. If this is a bound method, then the * count will still include a +1 for self. @@ -614,8 +643,6 @@ class PythonCallable : public TypedPythonObject { int count; /* does the callable have positional varargs? */ bool has_varargs : 1; // FIXME delete this - /* is the callable a bound method written in python? */ - bool is_bound_method : 1; // FIXME delete this }; static bool Check(PyObject *py_obj); @@ -705,6 +732,17 @@ template T unwrapOrSetPythonException(llvm::Expected expected) { return T(); } +namespace python { +// This is only here to help incrementally migrate old, exception-unsafe +// code. +template T unwrapIgnoringErrors(llvm::Expected expected) { + if (expected) + return std::move(expected.get()); + llvm::consumeError(expected.takeError()); + return T(); +} +} // namespace python + } // namespace lldb_private #endif diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp index 70654a423848c..8f71335241639 100644 --- a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp +++ b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp @@ -55,6 +55,7 @@ using namespace lldb; using namespace lldb_private; +using namespace lldb_private::python; // Defined in the SWIG source file #if PY_MAJOR_VERSION >= 3 @@ -765,19 +766,16 @@ PythonDictionary &ScriptInterpreterPythonImpl::GetSessionDictionary() { if (!main_dict.IsValid()) return m_session_dict; - PythonObject item = main_dict.GetItemForKey(PythonString(m_dictionary_name)); - m_session_dict.Reset(PyRefType::Borrowed, item.get()); + m_session_dict = unwrapIgnoringErrors( + As(main_dict.GetItem(m_dictionary_name))); return m_session_dict; } PythonDictionary &ScriptInterpreterPythonImpl::GetSysModuleDictionary() { if (m_sys_module_dict.IsValid()) return m_sys_module_dict; - - PythonObject sys_module(PyRefType::Borrowed, PyImport_AddModule("sys")); - if (sys_module.IsValid()) - m_sys_module_dict.Reset(PyRefType::Borrowed, - PyModule_GetDict(sys_module.get())); + PythonModule sys_module = unwrapIgnoringErrors(PythonModule::Import("sys")); + m_sys_module_dict = sys_module.GetDictionary(); return m_sys_module_dict; } @@ -1053,9 +1051,8 @@ bool ScriptInterpreterPythonImpl::ExecuteOneLineWithReturn( PythonDictionary locals = GetSessionDictionary(); if (!locals.IsValid()) { - locals.Reset( - PyRefType::Owned, - PyObject_GetAttrString(globals.get(), m_dictionary_name.c_str())); + locals = unwrapIgnoringErrors( + As(globals.GetAttribute(m_dictionary_name))); } if (!locals.IsValid()) @@ -1204,9 +1201,8 @@ Status ScriptInterpreterPythonImpl::ExecuteMultipleLines( PythonDictionary locals = GetSessionDictionary(); if (!locals.IsValid()) - locals.Reset( - PyRefType::Owned, - PyObject_GetAttrString(globals.get(), m_dictionary_name.c_str())); + locals = unwrapIgnoringErrors( + As(globals.GetAttribute(m_dictionary_name))); if (!locals.IsValid()) locals = globals; diff --git a/lldb/test/Shell/ObjectFile/ELF/minidebuginfo-corrupt-xz.yaml b/lldb/test/Shell/ObjectFile/ELF/minidebuginfo-corrupt-xz.yaml index cec34b9c62332..938688cdfe617 100644 --- a/lldb/test/Shell/ObjectFile/ELF/minidebuginfo-corrupt-xz.yaml +++ b/lldb/test/Shell/ObjectFile/ELF/minidebuginfo-corrupt-xz.yaml @@ -5,11 +5,6 @@ # RUN: yaml2obj %s > %t.obj -# TODO(kwk): once yaml2obj doesn't auto-generate a .symtab section -# when there's none in YAML, remove the following line: - -# RUN: llvm-objcopy --remove-section=.symtab %t.obj - # RUN: %lldb -b -o 'image dump symtab' %t.obj 2>&1 | FileCheck %s # CHECK: warning: (x86_64) {{.*}}.obj An error occurred while decompression the section .gnu_debugdata: lzma_stream_buffer_decode()=lzma error: LZMA_DATA_ERROR diff --git a/lldb/test/Shell/ObjectFile/ELF/minidebuginfo-find-symbols.yaml b/lldb/test/Shell/ObjectFile/ELF/minidebuginfo-find-symbols.yaml index 230ce8bb1c338..e6ebb03814396 100644 --- a/lldb/test/Shell/ObjectFile/ELF/minidebuginfo-find-symbols.yaml +++ b/lldb/test/Shell/ObjectFile/ELF/minidebuginfo-find-symbols.yaml @@ -2,11 +2,6 @@ # RUN: yaml2obj %s > %t.obj -# TODO(kwk): once yaml2obj doesn't auto-generate a .symtab section -# when there's none in YAML, remove the following line: - -# RUN: llvm-objcopy --remove-section=.symtab %t.obj - # RUN: %lldb -b -o 'image dump symtab' %t.obj | FileCheck %s # CHECK: [ 0] 1 X Code 0x00000000004005b0 0x000000000000000f 0x00000012 multiplyByFour diff --git a/lldb/test/Shell/ObjectFile/ELF/minidebuginfo-no-lzma.yaml b/lldb/test/Shell/ObjectFile/ELF/minidebuginfo-no-lzma.yaml index a127109e991ab..63c82baf07e2a 100644 --- a/lldb/test/Shell/ObjectFile/ELF/minidebuginfo-no-lzma.yaml +++ b/lldb/test/Shell/ObjectFile/ELF/minidebuginfo-no-lzma.yaml @@ -5,11 +5,6 @@ # RUN: yaml2obj %s > %t.obj -# TODO(kwk): once yaml2obj doesn't auto-generate a .symtab section -# when there's none in YAML, remove the following line: - -# RUN: llvm-objcopy --remove-section=.symtab %t.obj - # RUN: %lldb -b -o 'image dump symtab' %t.obj 2>&1 | FileCheck %s # CHECK: warning: (x86_64) {{.*}}.obj No LZMA support found for reading .gnu_debugdata section diff --git a/lldb/test/Shell/Register/Inputs/arm-fp-read.cpp b/lldb/test/Shell/Register/Inputs/arm-fp-read.cpp new file mode 100644 index 0000000000000..2dce2ed2d7532 --- /dev/null +++ b/lldb/test/Shell/Register/Inputs/arm-fp-read.cpp @@ -0,0 +1,19 @@ +int main() { + asm volatile( + "vmov.f64 d0, #0.5\n\t" + "vmov.f64 d1, #1.5\n\t" + "vmov.f64 d2, #2.5\n\t" + "vmov.f64 d3, #3.5\n\t" + "vmov.f32 s8, #4.5\n\t" + "vmov.f32 s9, #5.5\n\t" + "vmov.f32 s10, #6.5\n\t" + "vmov.f32 s11, #7.5\n\t" + "\n\t" + "bkpt #0\n\t" + : + : + : "d0", "d1", "d2", "d3", "s8", "s9", "s10", "s11" + ); + + return 0; +} diff --git a/lldb/test/Shell/Register/Inputs/arm-gp-read.cpp b/lldb/test/Shell/Register/Inputs/arm-gp-read.cpp new file mode 100644 index 0000000000000..fd891c49c09aa --- /dev/null +++ b/lldb/test/Shell/Register/Inputs/arm-gp-read.cpp @@ -0,0 +1,44 @@ +#include + +struct alignas(16) vec_t { + uint64_t a, b; +}; + +int main() { + constexpr uint32_t gprs[] = { + 0x00010203, + 0x10111213, + 0x20212223, + 0x30313233, + 0x40414243, + 0x50515253, + 0x60616263, + 0x70717273, + }; + + constexpr vec_t vecs[] = { + { 0x0F0E0D0C0B0A0908, 0x1716151413121110, }, + { 0x100F0E0D0C0B0A09, 0x1817161514131211, }, + { 0x11100F0E0D0C0B0A, 0x1918171615141312, }, + { 0x1211100F0E0D0C0B, 0x1A19181716151413, }, + }; + const vec_t *vec_ptr = vecs; + + asm volatile( + "ldrd r0, r1, [%1]\n\t" + "ldrd r2, r3, [%1, #8]\n\t" + "ldrd r4, r5, [%1, #16]\n\t" + "ldrd r6, r7, [%1, #24]\n\t" + "\n\t" + "vld1.64 {q0, q1}, [%0]!\n\t" + "vld1.64 {q2, q3}, [%0]!\n\t" + "\n\t" + "bkpt #0\n\t" + : "+r"(vec_ptr) + : "r"(gprs) + : "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", + "q0", "q1", "q2", "q3" + ); + + return 0; +} diff --git a/lldb/test/Shell/Register/arm-fp-read.test b/lldb/test/Shell/Register/arm-fp-read.test new file mode 100644 index 0000000000000..21af9074e3b31 --- /dev/null +++ b/lldb/test/Shell/Register/arm-fp-read.test @@ -0,0 +1,21 @@ +# REQUIRES: native && target-arm +# RUN: %clangxx -fomit-frame-pointer %p/Inputs/arm-fp-read.cpp -o %t +# RUN: %lldb -b -s %s %t | FileCheck %s +process launch + +register read d0 +register read d1 +register read d2 +register read d3 +register read s8 +register read s9 +register read s10 +register read s11 +# CHECK-DAG: d0 = 0.5 +# CHECK-DAG: d1 = 1.5 +# CHECK-DAG: d2 = 2.5 +# CHECK-DAG: d3 = 3.5 +# CHECK-DAG: s8 = 4.5 +# CHECK-DAG: s9 = 5.5 +# CHECK-DAG: s10 = 6.5 +# CHECK-DAG: s11 = 7.5 diff --git a/lldb/test/Shell/Register/arm-gp-read.test b/lldb/test/Shell/Register/arm-gp-read.test new file mode 100644 index 0000000000000..73c1034b6e23b --- /dev/null +++ b/lldb/test/Shell/Register/arm-gp-read.test @@ -0,0 +1,19 @@ +# REQUIRES: native && target-arm +# RUN: %clangxx -fomit-frame-pointer %p/Inputs/arm-gp-read.cpp -o %t +# RUN: %lldb -b -s %s %t | FileCheck %s +process launch + +register read --all +# CHECK-DAG: r0 = 0x00010203 +# CHECK-DAG: r1 = 0x10111213 +# CHECK-DAG: r2 = 0x20212223 +# CHECK-DAG: r3 = 0x30313233 +# CHECK-DAG: r4 = 0x40414243 +# CHECK-DAG: r5 = 0x50515253 +# CHECK-DAG: r6 = 0x60616263 +# CHECK-DAG: r7 = 0x70717273 + +# CHECK-DAG: q0 = {0x08 0x09 0x0a 0x0b 0x0c 0x0d 0x0e 0x0f 0x10 0x11 0x12 0x13 0x14 0x15 0x16 0x17} +# CHECK-DAG: q1 = {0x09 0x0a 0x0b 0x0c 0x0d 0x0e 0x0f 0x10 0x11 0x12 0x13 0x14 0x15 0x16 0x17 0x18} +# CHECK-DAG: q2 = {0x0a 0x0b 0x0c 0x0d 0x0e 0x0f 0x10 0x11 0x12 0x13 0x14 0x15 0x16 0x17 0x18 0x19} +# CHECK-DAG: q3 = {0x0b 0x0c 0x0d 0x0e 0x0f 0x10 0x11 0x12 0x13 0x14 0x15 0x16 0x17 0x18 0x19 0x1a} diff --git a/lldb/unittests/ScriptInterpreter/Python/PythonDataObjectsTests.cpp b/lldb/unittests/ScriptInterpreter/Python/PythonDataObjectsTests.cpp index d9e4435bf93e9..c01dade444081 100644 --- a/lldb/unittests/ScriptInterpreter/Python/PythonDataObjectsTests.cpp +++ b/lldb/unittests/ScriptInterpreter/Python/PythonDataObjectsTests.cpp @@ -27,8 +27,7 @@ class PythonDataObjectsTest : public PythonTestSuite { void SetUp() override { PythonTestSuite::SetUp(); - PythonString sys_module("sys"); - m_sys_module.Reset(PyRefType::Owned, PyImport_Import(sys_module.get())); + m_sys_module = unwrapIgnoringErrors(PythonModule::Import("sys")); m_main_module = PythonModule::MainModule(); m_builtins_module = PythonModule::BuiltinsModule(); } @@ -70,13 +69,10 @@ TEST_F(PythonDataObjectsTest, TestResetting) { PythonDictionary dict(PyInitialValue::Empty); PyObject *new_dict = PyDict_New(); - dict.Reset(PyRefType::Owned, new_dict); + dict = Take(new_dict); EXPECT_EQ(new_dict, dict.get()); - dict.Reset(PyRefType::Owned, nullptr); - EXPECT_EQ(nullptr, dict.get()); - - dict.Reset(PyRefType::Owned, PyDict_New()); + dict = Take(PyDict_New()); EXPECT_NE(nullptr, dict.get()); dict.Reset(); EXPECT_EQ(nullptr, dict.get()); @@ -643,8 +639,8 @@ TEST_F(PythonDataObjectsTest, TestCallable) { auto arginfo = lambda.GetArgInfo(); ASSERT_THAT_EXPECTED(arginfo, llvm::Succeeded()); EXPECT_EQ(arginfo.get().count, 1); + EXPECT_EQ(arginfo.get().max_positional_args, 1u); EXPECT_EQ(arginfo.get().has_varargs, false); - EXPECT_EQ(arginfo.get().is_bound_method, false); } { @@ -655,8 +651,8 @@ TEST_F(PythonDataObjectsTest, TestCallable) { auto arginfo = lambda.GetArgInfo(); ASSERT_THAT_EXPECTED(arginfo, llvm::Succeeded()); EXPECT_EQ(arginfo.get().count, 2); + EXPECT_EQ(arginfo.get().max_positional_args, 2u); EXPECT_EQ(arginfo.get().has_varargs, false); - EXPECT_EQ(arginfo.get().is_bound_method, false); } { @@ -667,6 +663,7 @@ TEST_F(PythonDataObjectsTest, TestCallable) { auto arginfo = lambda.GetArgInfo(); ASSERT_THAT_EXPECTED(arginfo, llvm::Succeeded()); EXPECT_EQ(arginfo.get().count, 2); + EXPECT_EQ(arginfo.get().max_positional_args, 2u); EXPECT_EQ(arginfo.get().has_varargs, false); } @@ -678,8 +675,9 @@ TEST_F(PythonDataObjectsTest, TestCallable) { auto arginfo = lambda.GetArgInfo(); ASSERT_THAT_EXPECTED(arginfo, llvm::Succeeded()); EXPECT_EQ(arginfo.get().count, 2); + EXPECT_EQ(arginfo.get().max_positional_args, + PythonCallable::ArgInfo::UNBOUNDED); EXPECT_EQ(arginfo.get().has_varargs, true); - EXPECT_EQ(arginfo.get().is_bound_method, false); } { @@ -690,6 +688,8 @@ TEST_F(PythonDataObjectsTest, TestCallable) { auto arginfo = lambda.GetArgInfo(); ASSERT_THAT_EXPECTED(arginfo, llvm::Succeeded()); EXPECT_EQ(arginfo.get().count, 2); + EXPECT_EQ(arginfo.get().max_positional_args, + PythonCallable::ArgInfo::UNBOUNDED); EXPECT_EQ(arginfo.get().has_varargs, true); } @@ -698,7 +698,18 @@ TEST_F(PythonDataObjectsTest, TestCallable) { class Foo: def bar(self, x): return x + @classmethod + def classbar(cls, x): + return x + @staticmethod + def staticbar(x): + return x + def __call__(self, x): + return x +obj = Foo() bar_bound = Foo().bar +bar_class = Foo().classbar +bar_static = Foo().staticbar bar_unbound = Foo.bar )"; PyObject *o = @@ -711,16 +722,37 @@ bar_unbound = Foo.bar auto arginfo = bar_bound.get().GetArgInfo(); ASSERT_THAT_EXPECTED(arginfo, llvm::Succeeded()); EXPECT_EQ(arginfo.get().count, 2); // FIXME, wrong + EXPECT_EQ(arginfo.get().max_positional_args, 1u); EXPECT_EQ(arginfo.get().has_varargs, false); - EXPECT_EQ(arginfo.get().is_bound_method, true); auto bar_unbound = As(globals.GetItem("bar_unbound")); ASSERT_THAT_EXPECTED(bar_unbound, llvm::Succeeded()); arginfo = bar_unbound.get().GetArgInfo(); ASSERT_THAT_EXPECTED(arginfo, llvm::Succeeded()); EXPECT_EQ(arginfo.get().count, 2); + EXPECT_EQ(arginfo.get().max_positional_args, 2u); + EXPECT_EQ(arginfo.get().has_varargs, false); + + auto bar_class = As(globals.GetItem("bar_class")); + ASSERT_THAT_EXPECTED(bar_class, llvm::Succeeded()); + arginfo = bar_class.get().GetArgInfo(); + ASSERT_THAT_EXPECTED(arginfo, llvm::Succeeded()); + EXPECT_EQ(arginfo.get().max_positional_args, 1u); + EXPECT_EQ(arginfo.get().has_varargs, false); + + auto bar_static = As(globals.GetItem("bar_static")); + ASSERT_THAT_EXPECTED(bar_static, llvm::Succeeded()); + arginfo = bar_static.get().GetArgInfo(); + ASSERT_THAT_EXPECTED(arginfo, llvm::Succeeded()); + EXPECT_EQ(arginfo.get().max_positional_args, 1u); + EXPECT_EQ(arginfo.get().has_varargs, false); + + auto obj = As(globals.GetItem("obj")); + ASSERT_THAT_EXPECTED(obj, llvm::Succeeded()); + arginfo = obj.get().GetArgInfo(); + ASSERT_THAT_EXPECTED(arginfo, llvm::Succeeded()); + EXPECT_EQ(arginfo.get().max_positional_args, 1u); EXPECT_EQ(arginfo.get().has_varargs, false); - EXPECT_EQ(arginfo.get().is_bound_method, false); } #if PY_MAJOR_VERSION >= 3 && PY_MINOR_VERSION >= 3 @@ -734,8 +766,8 @@ bar_unbound = Foo.bar auto arginfo = hex.get().GetArgInfo(); ASSERT_THAT_EXPECTED(arginfo, llvm::Succeeded()); EXPECT_EQ(arginfo.get().count, 1); + EXPECT_EQ(arginfo.get().max_positional_args, 1u); EXPECT_EQ(arginfo.get().has_varargs, false); - EXPECT_EQ(arginfo.get().is_bound_method, false); } #endif diff --git a/llvm-spirv/include/LLVMSPIRVLib.h b/llvm-spirv/include/LLVMSPIRVLib.h index 0c775c1f98e57..d71eaed382e78 100644 --- a/llvm-spirv/include/LLVMSPIRVLib.h +++ b/llvm-spirv/include/LLVMSPIRVLib.h @@ -50,6 +50,7 @@ namespace llvm { // Pass initialization functions need to be declared before inclusion of // PassSupport.h. class PassRegistry; +class ModulePass; void initializeLLVMToSPIRVPass(PassRegistry &); void initializeOCL20ToSPIRVPass(PassRegistry &); void initializeOCL21ToSPIRVPass(PassRegistry &); diff --git a/llvm/docs/CMake.rst b/llvm/docs/CMake.rst index 9cc5f63bec91a..3fd3ee3130740 100644 --- a/llvm/docs/CMake.rst +++ b/llvm/docs/CMake.rst @@ -369,11 +369,13 @@ LLVM-specific variables **LLVM_ENABLE_PROJECTS**:STRING Semicolon-separated list of projects to build, or *all* for building all - (clang, libcxx, libcxxabi, lldb, compiler-rt, lld, polly) projects. + (clang, libcxx, libcxxabi, lldb, compiler-rt, lld, polly, etc) projects. This flag assumes that projects are checked out side-by-side and not nested, i.e. clang needs to be in parallel of llvm instead of nested in `llvm/tools`. This feature allows to have one build for only LLVM and another for clang+llvm using the same source checkout. + The full list is: + ``clang;clang-tools-extra;compiler-rt;debuginfo-tests;libc;libclc;libcxx;libcxxabi;libunwind;lld;lldb;llgo;openmp;parallel-libs;polly;pstl`` **LLVM_EXTERNAL_PROJECTS**:STRING Semicolon-separated list of additional external projects to build as part of diff --git a/llvm/docs/CommandGuide/llvm-ar.rst b/llvm/docs/CommandGuide/llvm-ar.rst index 8a36a407e39a9..60187d8e44423 100644 --- a/llvm/docs/CommandGuide/llvm-ar.rst +++ b/llvm/docs/CommandGuide/llvm-ar.rst @@ -6,297 +6,342 @@ llvm-ar - LLVM archiver SYNOPSIS -------- -**llvm-ar** [-]{dmpqrtx}[Rabfikou] [relpos] [count] [files...] +:program:`llvm-ar` [-]{dmpqrstx}[abcDilLNoOPsSTuUvV] [relpos] [count] archive [files...] DESCRIPTION ----------- -The **llvm-ar** command is similar to the common Unix utility, ``ar``. It -archives several files together into a single file. The intent for this is -to produce archive libraries by LLVM bitcode that can be linked into an -LLVM program. However, the archive can contain any kind of file. By default, -**llvm-ar** generates a symbol table that makes linking faster because -only the symbol table needs to be consulted, not each individual file member -of the archive. +The :program:`llvm-ar` command is similar to the common Unix utility, +:program:`ar`. It archives several files, such as objects and LLVM bitcode +files into a single archive library that can be linked into a program. However, +the archive can contain any kind of file. By default, :program:`llvm-ar` +generates a symbol table that makes linking faster because only the symbol +table needs to be consulted, not each individual file member of the archive. -The **llvm-ar** command can be used to *read* SVR4, GNU and BSD style archive -files. However, right now it can only write in the GNU format. If an -SVR4 or BSD style archive is used with the ``r`` (replace) or ``q`` (quick -update) operations, the archive will be reconstructed in GNU format. +The :program:`llvm-ar` command can be used to *read* archive files in SVR4, +GNU, BSD and Darwin format, and *write* in the GNU, BSD, and Darwin style +archive files. If an SVR4 format archive is used with the :option:`r` +(replace), :option:`d` (delete), :option:`m` (move) or :option:`q` +(quick update) operations, the archive will be reconstructed in the format +defined by :option:`--format`. -Here's where **llvm-ar** departs from previous ``ar`` implementations: +Here's where :program:`llvm-ar` departs from previous :program:`ar` +implementations: -*Symbol Table* +*The following option is not supported* + + [f] - truncate inserted filenames + +*The following options are ignored for compatibility* - Since **llvm-ar** supports bitcode files. The symbol table it creates - is in GNU format and includes both native and bitcode files. + --plugin= - load a plugin which adds support for other file formats + + [l] - ignored in :program:`ar` -*Long Paths* +*Symbol Table* - Currently **llvm-ar** can read GNU and BSD long file names, but only writes - archives with the GNU format. + Since :program:`llvm-ar` supports bitcode files, the symbol table it creates + includes both native and bitcode symbols. + +*Deterministic Archives* + + By default, :program:`llvm-ar` always uses zero for timestamps and UIDs/GIDs + to write archives in a deterministic mode. This is equivalent to the + :option:`D` modifier being enabled by default. If you wish to maintain + compatibility with other :program:`ar` implementations, you can pass the + :option:`U` modifier to write actual timestamps and UIDs/GIDs. *Windows Paths* - When on Windows **llvm-ar** treats the names of archived *files* in the same + When on Windows :program:`llvm-ar` treats the names of archived *files* in the same case sensitive manner as the operating system. When on a non-Windows machine - **llvm-ar** does not consider character case. + :program:`llvm-ar` does not consider character case. OPTIONS ------- -The options to **llvm-ar** are compatible with other ``ar`` implementations. -However, there are a few modifiers (*R*) that are not found in other ``ar`` -implementations. The options to **llvm-ar** specify a single basic operation to -perform on the archive, a variety of modifiers for that operation, the name of -the archive file, and an optional list of file names. These options are used to -determine how **llvm-ar** should process the archive file. +:program:`llvm-ar` operations are compatible with other :program:`ar` +implementations. However, there are a few modifiers (:option:`L`) that are not +found in other :program:`ar` implementations. The options for +:program:`llvm-ar` specify a single basic Operation to perform on the archive, +a variety of Modifiers for that Operation, the name of the archive file, and an +optional list of file names. If the *files* option is not specified, it +generally means either "none" or "all" members, depending on the operation. The +Options, Operations and Modifiers are explained in the sections below. -The Operations and Modifiers are explained in the sections below. The minimal -set of options is at least one operator and the name of the archive. Typically -archive files end with a ``.a`` suffix, but this is not required. Following -the *archive-name* comes a list of *files* that indicate the specific members -of the archive to operate on. If the *files* option is not specified, it -generally means either "none" or "all" members, depending on the operation. +The minimal set of options is at least one operator and the name of the +archive. Operations ~~~~~~~~~~ -d +.. option:: d [NT] + + Delete files from the ``archive``. The :option:`N` and :option:`T` modifiers + apply to this operation. The *files* options specify which members should be + removed from the archive. It is not an error if a specified file does not + appear in the archive. If no *files* are specified, the archive is not + modified. + +.. option:: m [abi] - Delete files from the archive. No modifiers are applicable to this operation. - The *files* options specify which members should be removed from the - archive. It is not an error if a specified file does not appear in the archive. - If no *files* are specified, the archive is not modified. + Move files from one location in the ``archive`` to another. The :option:`a`, + :option:`b`, and :option:`i` modifiers apply to this operation. The *files* + will all be moved to the location given by the modifiers. If no modifiers are + used, the files will be moved to the end of the archive. If no *files* are + specified, the archive is not modified. -m[abi] +.. option:: p [v] - Move files from one location in the archive to another. The *a*, *b*, and - *i* modifiers apply to this operation. The *files* will all be moved - to the location given by the modifiers. If no modifiers are used, the files - will be moved to the end of the archive. If no *files* are specified, the - archive is not modified. + Print *files* to the standard output stream. If no *files* are specified, the + entire ``archive`` is printed. With the :option:`v` modifier, + :program:`llvm-ar` also prints out the name of the file being output. Printing + binary files is ill-advised as they might confuse your terminal settings. The + :option:`p` operation never modifies the archive. -p +.. option:: q [LT] - Print files to the standard output. This operation simply prints the - *files* indicated to the standard output. If no *files* are - specified, the entire archive is printed. Printing bitcode files is - ill-advised as they might confuse your terminal settings. The *p* - operation never modifies the archive. + Quickly append files to the end of the ``archive`` without removing + duplicates. If no *files* are specified, the archive is not modified. The + behavior when appending one archive to another depends upon whether the + :option:`L` and :option:`T` modifiers are used: -q + * Appending a regular archive to a regular archive will append the archive + file. If the :option:`L` modifier is specified the members will be appended + instead. - Quickly append files to the end of the archive. This operation quickly adds the - *files* to the archive without checking for duplicates that should be - removed first. If no *files* are specified, the archive is not modified. - Because of the way that **llvm-ar** constructs the archive file, its dubious - whether the *q* operation is any faster than the *r* operation. + * Appending a regular archive to a thin archive requires the :option:`T` + modifier and will append the archive file. The :option:`L` modifier is not + supported. -r[abu] + * Appending a thin archive to a regular archive will append the archive file. + If the :option:`L` modifier is specified the members will be appended + instead. - Replace or insert file members. The *a*, *b*, and *u* - modifiers apply to this operation. This operation will replace existing - *files* or insert them at the end of the archive if they do not exist. If no - *files* are specified, the archive is not modified. + * Appending a thin archive to a thin archive will always quick append its + members. -t[vO] +.. option:: r [abTu] + + Replace existing *files* or insert them at the end of the ``archive`` if + they do not exist. The :option:`a`, :option:`b`, :option:`T` and :option:`u` + modifiers apply to this operation. If no *files* are specified, the archive + is not modified. + +t[v] +.. option:: t [vO] Print the table of contents. Without any modifiers, this operation just prints - the names of the members to the standard output. With the *v* modifier, - **llvm-ar** also prints out the file type (B=bitcode, S=symbol - table, blank=regular file), the permission mode, the owner and group, the - size, and the date. With the :option:`O` modifier, display member offsets. - If any *files* are specified, the listing is only for those files. If no - *files* are specified, the table of contents for the whole archive is printed. + the names of the members to the standard output stream. With the :option:`v` + modifier, :program:`llvm-ar` also prints out the file type (B=bitcode, + S=symbol table, blank=regular file), the permission mode, the owner and group, + are ignored when extracting *files* and set to placeholder values when adding + size, and the date. With the :option:`O` modifier, display member offsets. If + any *files* are specified, the listing is only for those files. If no *files* + are specified, the table of contents for the whole archive is printed. + +.. option:: V -x[oP] + A synonym for the :option:`--version` option. - Extract archive members back to files. The *o* modifier applies to this - operation. This operation retrieves the indicated *files* from the archive - and writes them back to the operating system's file system. If no - *files* are specified, the entire archive is extract. +.. option:: x [oP] + + Extract ``archive`` members back to files. The :option:`o` modifier applies + to this operation. This operation retrieves the indicated *files* from the + archive and writes them back to the operating system's file system. If no + *files* are specified, the entire archive is extracted. Modifiers (operation specific) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The modifiers below are specific to certain operations. See the Operations -section (above) to determine which modifiers are applicable to which operations. +section to determine which modifiers are applicable to which operations. + +.. option:: a -[a] + When inserting or moving member files, this option specifies the destination + of the new files as being after the *relpos* member. If *relpos* is not found, + the files are placed at the end of the ``archive``. *relpos* cannot be + consumed without either :option:`a`, :option:`b` or :option:`i`. - When inserting or moving member files, this option specifies the destination of - the new files as being after the *relpos* member. If *relpos* is not found, - the files are placed at the end of the archive. +.. option:: b -[b] + When inserting or moving member files, this option specifies the destination + of the new files as being before the *relpos* member. If *relpos* is not + found, the files are placed at the end of the ``archive``. *relpos* cannot + be consumed without either :option:`a`, :option:`b` or :option:`i`. This + modifier is identical to the :option:`i` modifier. - When inserting or moving member files, this option specifies the destination of - the new files as being before the *relpos* member. If *relpos* is not - found, the files are placed at the end of the archive. This modifier is - identical to the *i* modifier. +.. option:: i -[i] + A synonym for the :option:`b` option. - A synonym for the *b* option. +.. option:: L -[o] + When quick appending an ``archive``, instead quick append its members. This + is a feature for :program:`llvm-ar` that is not found in gnu-ar. - When extracting files, this option will cause **llvm-ar** to preserve the - original modification times of the files it writes. +.. option:: N + When extracting or deleting a member that shares its name with another member, + the *count* parameter allows you to supply a positive whole number that + selects the instance of the given name, with "1" indicating the first + instance. If :option:`N` is not specified the first member of that name will + be selected. If *count* is not supplied, the operation fails.*count* cannot be + +.. option:: o + + When extracting files, use the modification times of any *files* as they + appear in the ``archive``. By default *files* extracted from the archive + use the time of extraction. + .. option:: O Display member offsets inside the archive. -[u] +.. option:: T + + When creating or modifying an archive, this option specifies that the + ``archive`` will be thin. By default, archives are not created as thin + archives and when modifying a thin archive, it will be converted to a regular + archive. - When replacing existing files in the archive, only replace those files that have - a time stamp than the time stamp of the member in the archive. +.. option:: v + + When printing *files* or the ``archive`` table of contents, this modifier + instructs :program:`llvm-ar` to include additional information in the output. Modifiers (generic) ~~~~~~~~~~~~~~~~~~~ The modifiers below may be applied to any operation. -[c] +.. option:: c + + For the :option:`r` (replace)and :option:`q` (quick update) operations, + :program:`llvm-ar` will always create the archive if it doesn't exist. + Normally, :program:`llvm-ar` will print a warning message indicating that the + ``archive`` is being created. Using this modifier turns off + that warning. + +.. option:: D - For all operations, **llvm-ar** will always create the archive if it doesn't - exist. Normally, **llvm-ar** will print a warning message indicating that the - archive is being created. Using this modifier turns off that warning. + Use zero for timestamps and UIDs/GIDs. This is set by default. +.. option:: P -[s] + Use full paths when matching member names rather than just the file name. + This can be useful when manipulating an ``archive`` generated by another + archiver, as some allow paths as member names. This is the default behavior + for thin archives. + +.. option:: s This modifier requests that an archive index (or symbol table) be added to the - archive. This is the default mode of operation. The symbol table will contain - all the externally visible functions and global variables defined by all the - bitcode files in the archive. + ``archive``, as if using ranlib. The symbol table will contain all the + externally visible functions and global variables defined by all the bitcode + files in the archive. By default :program:`llvm-ar` generates symbol tables in + archives. This can also be used as an operation. -[S] +.. option:: S - This modifier is the opposite of the *s* modifier. It instructs **llvm-ar** to - not build the symbol table. If both *s* and *S* are used, the last modifier to - occur in the options will prevail. + This modifier is the opposite of the :option:`s` modifier. It instructs + :program:`llvm-ar` to not build the symbol table. If both :option:`s` and + :option:`S` are used, the last modifier to occur in the options will prevail. + +.. option:: u -[v] + Only update ``archive`` members with *files* that have more recent + timestamps. + +.. option:: U - This modifier instructs **llvm-ar** to be verbose about what it is doing. Each - editing operation taken against the archive will produce a line of output saying - what is being done. + Use actual timestamps and UIDs/GIDs. -STANDARDS ---------- +Other +~~~~~ -The **llvm-ar** utility is intended to provide a superset of the IEEE Std 1003.2 -(POSIX.2) functionality for ``ar``. **llvm-ar** can read both SVR4 and BSD4.4 (or -macOS) archives. If the ``f`` modifier is given to the ``x`` or ``r`` operations -then **llvm-ar** will write SVR4 compatible archives. Without this modifier, -**llvm-ar** will write BSD4.4 compatible archives that have long names -immediately after the header and indicated using the "#1/ddd" notation for the -name in the header. +.. option:: --format= -FILE FORMAT ------------ + This option allows for default, gnu, darwin or bsd ```` to be selected. + When creating an ``archive``, ```` will default to that of the host + machine. -The file format for LLVM Archive files is similar to that of BSD 4.4 or macOS -archive files. In fact, except for the symbol table, the ``ar`` commands on those -operating systems should be able to read LLVM archive files. The details of the -file format follow. +.. option:: -h, --help -Each archive begins with the archive magic number which is the eight printable -characters "!\n" where \n represents the newline character (0x0A). -Following the magic number, the file is composed of even length members that -begin with an archive header and end with a \n padding character if necessary -(to make the length even). Each file member is composed of a header (defined -below), an optional newline-terminated "long file name" and the contents of -the file. + Print a summary of command-line options and their meanings. -The fields of the header are described in the items below. All fields of the -header contain only ASCII characters, are left justified and are right padded -with space characters. +.. option:: -M -name - char[16] + This option allows for MRI scripts to be read through the standard input + stream. No other options are compatible with this option. - This field of the header provides the name of the archive member. If the name is - longer than 15 characters or contains a slash (/) character, then this field - contains ``#1/nnn`` where ``nnn`` provides the length of the name and the ``#1/`` - is literal. In this case, the actual name of the file is provided in the ``nnn`` - bytes immediately following the header. If the name is 15 characters or less, it - is contained directly in this field and terminated with a slash (/) character. +.. option:: --version -date - char[12] + Display the version of the :program:`llvm-ar` executable. - This field provides the date of modification of the file in the form of a - decimal encoded number that provides the number of seconds since the epoch - (since 00:00:00 Jan 1, 1970) per Posix specifications. +.. option:: @ -uid - char[6] + Read command-line options and commands from response file ````. - This field provides the user id of the file encoded as a decimal ASCII string. - This field might not make much sense on non-Unix systems. On Unix, it is the - same value as the st_uid field of the stat structure returned by the stat(2) - operating system call. +MRI SCRIPTS +----------- -gid - char[6] +:program:`llvm-ar` understands a subset of the MRI scripting interface commonly +supported by archivers following in the ar tradition. An MRI script contains a +sequence of commands to be executed by the archiver. The :option:`-M` option +allows for an MRI script to be passed to :program:`llvm-ar` through the +standard input stream. + +Note that :program:`llvm-ar` has known limitations regarding the use of MRI +scripts: + +* Each script can only create one archive. +* Existing archives can not be modified. - This field provides the group id of the file encoded as a decimal ASCII string. - This field might not make much sense on non-Unix systems. On Unix, it is the - same value as the st_gid field of the stat structure returned by the stat(2) - operating system call. +MRI Script Commands +~~~~~~~~~~~~~~~~~~~ -mode - char[8] +Each command begins with the command's name and must appear on its own line. +Some commands have arguments, which must be separated from the name by +whitespace. An MRI script should begin with either a :option:`CREATE` or +:option:`CREATETHIN` command and will typically end with a :option:`SAVE` +command. Any text after either '*' or ';' is treated as a comment. - This field provides the access mode of the file encoded as an octal ASCII - string. This field might not make much sense on non-Unix systems. On Unix, it - is the same value as the st_mode field of the stat structure returned by the - stat(2) operating system call. +.. option:: CREATE archive -size - char[10] + Begin creation of a regular archive with the specified name. Subsequent + commands act upon this ``archive``. - This field provides the size of the file, in bytes, encoded as a decimal ASCII - string. +.. option:: CREATETHIN archive -fmag - char[2] + Begin creation of a thin archive with the specified name. Subsequent + commands act upon this ``archive``. - This field is the archive file member magic number. Its content is always the - two characters back tick (0x60) and newline (0x0A). This provides some measure - utility in identifying archive files that have been corrupted. +.. option:: ADDLIB archive -offset - vbr encoded 32-bit integer + Append the contents of ``archive`` to the current archive. - The offset item provides the offset into the archive file where the bitcode - member is stored that is associated with the symbol. The offset value is 0 - based at the start of the first "normal" file member. To derive the actual - file offset of the member, you must add the number of bytes occupied by the file - signature (8 bytes) and the symbol tables. The value of this item is encoded - using variable bit rate encoding to reduce the size of the symbol table. - Variable bit rate encoding uses the high bit (0x80) of each byte to indicate - if there are more bytes to follow. The remaining 7 bits in each byte carry bits - from the value. The final byte does not have the high bit set. +.. option:: ADDMOD -length - vbr encoded 32-bit integer + Append ```` to the current archive. - The length item provides the length of the symbol that follows. Like this - *offset* item, the length is variable bit rate encoded. +.. option:: DELETE -symbol - character array + Delete the member of the current archive whose file name, excluding directory + components, matches ````. - The symbol item provides the text of the symbol that is associated with the - *offset*. The symbol is not terminated by any character. Its length is provided - by the *length* field. Note that is allowed (but unwise) to use non-printing - characters (even 0x00) in the symbol. This allows for multiple encodings of - symbol names. +.. option:: SAVE -EXIT STATUS ------------ + Write the current archive to the path specified in the previous + :option:`CREATE`/:option:`CREATETHIN` command. -If **llvm-ar** succeeds, it will exit with 0. A usage error, results -in an exit code of 1. A hard (file system typically) error results in an -exit code of 2. Miscellaneous or unknown errors result in an -exit code of 3. +.. option:: END -SEE ALSO --------- + Ends the MRI script (optional). + +EXIT STATUS +----------- -ar(1) +If :program:`llvm-ar` succeeds, it will exit with 0. Otherwise, if an error occurs, it +will exit with a non-zero value. diff --git a/llvm/include/llvm/ADT/Hashing.h b/llvm/include/llvm/ADT/Hashing.h index 008188bfa2109..b22606bdb518e 100644 --- a/llvm/include/llvm/ADT/Hashing.h +++ b/llvm/include/llvm/ADT/Hashing.h @@ -45,7 +45,6 @@ #define LLVM_ADT_HASHING_H #include "llvm/Support/DataTypes.h" -#include "llvm/Support/Host.h" #include "llvm/Support/SwapByteOrder.h" #include "llvm/Support/type_traits.h" #include diff --git a/llvm/include/llvm/Analysis/Loads.h b/llvm/include/llvm/Analysis/Loads.h index b5884acf3b0cc..9604b2521e895 100644 --- a/llvm/include/llvm/Analysis/Loads.h +++ b/llvm/include/llvm/Analysis/Loads.h @@ -37,7 +37,8 @@ bool isDereferenceablePointer(const Value *V, Type *Ty, /// performs context-sensitive analysis and returns true if the pointer is /// dereferenceable at the specified instruction. bool isDereferenceableAndAlignedPointer(const Value *V, Type *Ty, - unsigned Align, const DataLayout &DL, + MaybeAlign Alignment, + const DataLayout &DL, const Instruction *CtxI = nullptr, const DominatorTree *DT = nullptr); @@ -45,7 +46,7 @@ bool isDereferenceableAndAlignedPointer(const Value *V, Type *Ty, /// greater or equal than requested. If the context instruction is specified /// performs context-sensitive analysis and returns true if the pointer is /// dereferenceable at the specified instruction. -bool isDereferenceableAndAlignedPointer(const Value *V, unsigned Align, +bool isDereferenceableAndAlignedPointer(const Value *V, Align Alignment, const APInt &Size, const DataLayout &DL, const Instruction *CtxI = nullptr, const DominatorTree *DT = nullptr); @@ -58,7 +59,7 @@ bool isDereferenceableAndAlignedPointer(const Value *V, unsigned Align, /// If it is not obviously safe to load from the specified pointer, we do a /// quick local scan of the basic block containing ScanFrom, to determine if /// the address is already accessed. -bool isSafeToLoadUnconditionally(Value *V, unsigned Align, APInt &Size, +bool isSafeToLoadUnconditionally(Value *V, MaybeAlign Alignment, APInt &Size, const DataLayout &DL, Instruction *ScanFrom = nullptr, const DominatorTree *DT = nullptr); @@ -82,7 +83,7 @@ bool isDereferenceableAndAlignedInLoop(LoadInst *LI, Loop *L, /// If it is not obviously safe to load from the specified pointer, we do a /// quick local scan of the basic block containing ScanFrom, to determine if /// the address is already accessed. -bool isSafeToLoadUnconditionally(Value *V, Type *Ty, unsigned Align, +bool isSafeToLoadUnconditionally(Value *V, Type *Ty, MaybeAlign Alignment, const DataLayout &DL, Instruction *ScanFrom = nullptr, const DominatorTree *DT = nullptr); diff --git a/llvm/include/llvm/BinaryFormat/Wasm.h b/llvm/include/llvm/BinaryFormat/Wasm.h index e2c94a12ad679..f550d880f68a3 100644 --- a/llvm/include/llvm/BinaryFormat/Wasm.h +++ b/llvm/include/llvm/BinaryFormat/Wasm.h @@ -16,6 +16,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" namespace llvm { namespace wasm { diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index f73976dbacf95..2e57b4c9d332f 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -1149,9 +1149,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { OpPropsBW); // For non-rotates (X != Y) we must add shift-by-zero handling costs. if (X != Y) { - Type *CondTy = Type::getInt1Ty(RetTy->getContext()); - if (RetVF > 1) - CondTy = VectorType::get(CondTy, RetVF); + Type *CondTy = RetTy->getWithNewBitWidth(1); Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy, nullptr); Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy, @@ -1169,7 +1167,6 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { unsigned getIntrinsicInstrCost( Intrinsic::ID IID, Type *RetTy, ArrayRef Tys, FastMathFlags FMF, unsigned ScalarizationCostPassed = std::numeric_limits::max()) { - unsigned RetVF = (RetTy->isVectorTy() ? RetTy->getVectorNumElements() : 1); auto *ConcreteTTI = static_cast(this); SmallVector ISDs; @@ -1326,9 +1323,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { /*IsUnsigned=*/false); case Intrinsic::sadd_sat: case Intrinsic::ssub_sat: { - Type *CondTy = Type::getInt1Ty(RetTy->getContext()); - if (RetVF > 1) - CondTy = VectorType::get(CondTy, RetVF); + Type *CondTy = RetTy->getWithNewBitWidth(1); Type *OpTy = StructType::create({RetTy, CondTy}); Intrinsic::ID OverflowOp = IID == Intrinsic::sadd_sat @@ -1348,9 +1343,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { } case Intrinsic::uadd_sat: case Intrinsic::usub_sat: { - Type *CondTy = Type::getInt1Ty(RetTy->getContext()); - if (RetVF > 1) - CondTy = VectorType::get(CondTy, RetVF); + Type *CondTy = RetTy->getWithNewBitWidth(1); Type *OpTy = StructType::create({RetTy, CondTy}); Intrinsic::ID OverflowOp = IID == Intrinsic::uadd_sat @@ -1367,9 +1360,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { case Intrinsic::smul_fix: case Intrinsic::umul_fix: { unsigned ExtSize = RetTy->getScalarSizeInBits() * 2; - Type *ExtTy = Type::getIntNTy(RetTy->getContext(), ExtSize); - if (RetVF > 1) - ExtTy = VectorType::get(ExtTy, RetVF); + Type *ExtTy = RetTy->getWithNewBitWidth(ExtSize); unsigned ExtOp = IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt; @@ -1433,9 +1424,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { Type *MulTy = RetTy->getContainedType(0); Type *OverflowTy = RetTy->getContainedType(1); unsigned ExtSize = MulTy->getScalarSizeInBits() * 2; - Type *ExtTy = Type::getIntNTy(RetTy->getContext(), ExtSize); - if (MulTy->isVectorTy()) - ExtTy = VectorType::get(ExtTy, MulTy->getVectorNumElements() ); + Type *ExtTy = MulTy->getWithNewBitWidth(ExtSize); unsigned ExtOp = IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt; diff --git a/llvm/include/llvm/CodeGen/DFAPacketizer.h b/llvm/include/llvm/CodeGen/DFAPacketizer.h index 77ada7fda920b..705465b15c4c8 100644 --- a/llvm/include/llvm/CodeGen/DFAPacketizer.h +++ b/llvm/include/llvm/CodeGen/DFAPacketizer.h @@ -144,7 +144,7 @@ class VLIWPacketizerList { protected: MachineFunction &MF; const TargetInstrInfo *TII; - AliasAnalysis *AA; + AAResults *AA; // The VLIW Scheduler. DefaultVLIWScheduler *VLIWScheduler; @@ -156,9 +156,9 @@ class VLIWPacketizerList { std::map MIToSUnit; public: - // The AliasAnalysis parameter can be nullptr. + // The AAResults parameter can be nullptr. VLIWPacketizerList(MachineFunction &MF, MachineLoopInfo &MLI, - AliasAnalysis *AA); + AAResults *AA); virtual ~VLIWPacketizerList(); diff --git a/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h b/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h index fb60191abd3a0..f812a2f6c5852 100644 --- a/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h +++ b/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h @@ -20,7 +20,6 @@ #include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/Analysis/LegacyDivergenceAnalysis.h" #include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/TargetRegisterInfo.h" @@ -37,6 +36,7 @@ namespace llvm { class Argument; class BasicBlock; class BranchProbabilityInfo; +class LegacyDivergenceAnalysis; class Function; class Instruction; class MachineFunction; diff --git a/llvm/include/llvm/CodeGen/MachineBlockFrequencyInfo.h b/llvm/include/llvm/CodeGen/MachineBlockFrequencyInfo.h index 2a826d0b64c03..a438ecfcc25ed 100644 --- a/llvm/include/llvm/CodeGen/MachineBlockFrequencyInfo.h +++ b/llvm/include/llvm/CodeGen/MachineBlockFrequencyInfo.h @@ -38,9 +38,6 @@ class MachineBlockFrequencyInfo : public MachineFunctionPass { static char ID; MachineBlockFrequencyInfo(); - explicit MachineBlockFrequencyInfo(MachineFunction &F, - MachineBranchProbabilityInfo &MBPI, - MachineLoopInfo &MLI); ~MachineBlockFrequencyInfo() override; void getAnalysisUsage(AnalysisUsage &AU) const override; diff --git a/llvm/include/llvm/CodeGen/MachineDominators.h b/llvm/include/llvm/CodeGen/MachineDominators.h index 9d31232c9b95b..e4d7a02f8c48e 100644 --- a/llvm/include/llvm/CodeGen/MachineDominators.h +++ b/llvm/include/llvm/CodeGen/MachineDominators.h @@ -81,9 +81,6 @@ class MachineDominatorTree : public MachineFunctionPass { static char ID; // Pass ID, replacement for typeid MachineDominatorTree(); - explicit MachineDominatorTree(MachineFunction &MF) : MachineFunctionPass(ID) { - calculate(MF); - } DomTreeT &getBase() { if (!DT) DT.reset(new DomTreeT()); @@ -114,8 +111,6 @@ class MachineDominatorTree : public MachineFunctionPass { bool runOnMachineFunction(MachineFunction &F) override; - void calculate(MachineFunction &F); - bool dominates(const MachineDomTreeNode *A, const MachineDomTreeNode *B) const { applySplitCriticalEdges(); diff --git a/llvm/include/llvm/CodeGen/MachineInstr.h b/llvm/include/llvm/CodeGen/MachineInstr.h index 0191c779419cc..c94ad292ec964 100644 --- a/llvm/include/llvm/CodeGen/MachineInstr.h +++ b/llvm/include/llvm/CodeGen/MachineInstr.h @@ -20,11 +20,9 @@ #include "llvm/ADT/ilist.h" #include "llvm/ADT/ilist_node.h" #include "llvm/ADT/iterator_range.h" -#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/TargetOpcodes.h" -#include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/InlineAsm.h" #include "llvm/MC/MCInstrDesc.h" @@ -38,6 +36,7 @@ namespace llvm { +class AAResults; template class ArrayRef; class DIExpression; class DILocalVariable; @@ -1043,9 +1042,7 @@ class MachineInstr /// A DBG_VALUE is an entry value iff its debug expression contains the /// DW_OP_LLVM_entry_value operation. - bool isDebugEntryValue() const { - return isDebugValue() && getDebugExpression()->isEntryValue(); - } + bool isDebugEntryValue() const; /// Return true if the instruction is a debug value which describes a part of /// a variable as unavailable. @@ -1414,7 +1411,7 @@ class MachineInstr /// Return true if it is safe to move this instruction. If /// SawStore is set to true, it means that there is a store (or call) between /// the instruction's location and its intended destination. - bool isSafeToMove(AliasAnalysis *AA, bool &SawStore) const; + bool isSafeToMove(AAResults *AA, bool &SawStore) const; /// Returns true if this instruction's memory access aliases the memory /// access of Other. @@ -1426,7 +1423,7 @@ class MachineInstr /// @param AA Optional alias analysis, used to compare memory operands. /// @param Other MachineInstr to check aliasing against. /// @param UseTBAA Whether to pass TBAA information to alias analysis. - bool mayAlias(AliasAnalysis *AA, const MachineInstr &Other, bool UseTBAA) const; + bool mayAlias(AAResults *AA, const MachineInstr &Other, bool UseTBAA) const; /// Return true if this instruction may have an ordered /// or volatile memory reference, or if the information describing the memory @@ -1441,7 +1438,7 @@ class MachineInstr /// argument area of a function (if it does not change). If the instruction /// does multiple loads, this returns true only if all of the loads are /// dereferenceable and invariant. - bool isDereferenceableInvariantLoad(AliasAnalysis *AA) const; + bool isDereferenceableInvariantLoad(AAResults *AA) const; /// If the specified instruction is a PHI that always merges together the /// same virtual register, return the register, otherwise return 0. diff --git a/llvm/include/llvm/CodeGen/MachineLoopInfo.h b/llvm/include/llvm/CodeGen/MachineLoopInfo.h index 1612c8b86a303..da6df59c739c2 100644 --- a/llvm/include/llvm/CodeGen/MachineLoopInfo.h +++ b/llvm/include/llvm/CodeGen/MachineLoopInfo.h @@ -37,7 +37,6 @@ namespace llvm { -class MachineDominatorTree; // Implementation in LoopInfoImpl.h class MachineLoop; extern template class LoopBase; @@ -92,10 +91,6 @@ class MachineLoopInfo : public MachineFunctionPass { MachineLoopInfo() : MachineFunctionPass(ID) { initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry()); } - explicit MachineLoopInfo(MachineDominatorTree &MDT) - : MachineFunctionPass(ID) { - calculate(MDT); - } MachineLoopInfo(const MachineLoopInfo &) = delete; MachineLoopInfo &operator=(const MachineLoopInfo &) = delete; @@ -138,7 +133,6 @@ class MachineLoopInfo : public MachineFunctionPass { /// Calculate the natural loop information. bool runOnMachineFunction(MachineFunction &F) override; - void calculate(MachineDominatorTree &MDT); void releaseMemory() override { LI.releaseMemory(); } diff --git a/llvm/include/llvm/CodeGen/MachinePipeliner.h b/llvm/include/llvm/CodeGen/MachinePipeliner.h index 893725c0f1d55..e9cf7e115bffe 100644 --- a/llvm/include/llvm/CodeGen/MachinePipeliner.h +++ b/llvm/include/llvm/CodeGen/MachinePipeliner.h @@ -40,6 +40,8 @@ #ifndef LLVM_LIB_CODEGEN_MACHINEPIPELINER_H #define LLVM_LIB_CODEGEN_MACHINEPIPELINER_H +#include "llvm/Analysis/AliasAnalysis.h" + #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/CodeGen/ScheduleDAGInstrs.h" diff --git a/llvm/include/llvm/CodeGen/MachineSizeOpts.h b/llvm/include/llvm/CodeGen/MachineSizeOpts.h deleted file mode 100644 index 75e871d974757..0000000000000 --- a/llvm/include/llvm/CodeGen/MachineSizeOpts.h +++ /dev/null @@ -1,37 +0,0 @@ -//===- MachineSizeOpts.h - machine size optimization ------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file contains some shared machine IR code size optimization related -// code. -// -//===----------------------------------------------------------------------===// -#ifndef LLVM_CODEGEN_MACHINE_SIZEOPTS_H -#define LLVM_CODEGEN_MACHINE_SIZEOPTS_H - -#include "llvm/Transforms/Utils/SizeOpts.h" - -namespace llvm { - -class ProfileSummaryInfo; -class MachineBasicBlock; -class MachineBlockFrequencyInfo; -class MachineFunction; - -/// Returns true if machine function \p MF is suggested to be size-optimized -/// base on the profile. -bool shouldOptimizeForSize(const MachineFunction *MF, ProfileSummaryInfo *PSI, - const MachineBlockFrequencyInfo *BFI); -/// Returns true if machine basic block \p MBB is suggested to be size-optimized -/// base on the profile. -bool shouldOptimizeForSize(const MachineBasicBlock *MBB, - ProfileSummaryInfo *PSI, - const MachineBlockFrequencyInfo *MBFI); - -} // end namespace llvm - -#endif // LLVM_CODEGEN_MACHINE_SIZEOPTS_H diff --git a/llvm/include/llvm/CodeGen/ScheduleDAGInstrs.h b/llvm/include/llvm/CodeGen/ScheduleDAGInstrs.h index eaa55c911b812..1eb9b9f322ba2 100644 --- a/llvm/include/llvm/CodeGen/ScheduleDAGInstrs.h +++ b/llvm/include/llvm/CodeGen/ScheduleDAGInstrs.h @@ -34,6 +34,7 @@ namespace llvm { + class AAResults; class LiveIntervals; class MachineFrameInfo; class MachineFunction; @@ -173,7 +174,7 @@ namespace llvm { /// Tracks the last instructions in this region using each virtual register. VReg2SUnitOperIdxMultiMap CurrentVRegUses; - AliasAnalysis *AAForDep = nullptr; + AAResults *AAForDep = nullptr; /// Remember a generic side-effecting instruction as we proceed. /// No other SU ever gets scheduled around it (except in the special @@ -201,7 +202,7 @@ namespace llvm { Value2SUsMap &loads, unsigned N); /// Adds a chain edge between SUa and SUb, but only if both - /// AliasAnalysis and Target fail to deny the dependency. + /// AAResults and Target fail to deny the dependency. void addChainDependency(SUnit *SUa, SUnit *SUb, unsigned Latency = 0); @@ -306,7 +307,7 @@ namespace llvm { /// If \p RPTracker is non-null, compute register pressure as a side effect. /// The DAG builder is an efficient place to do it because it already visits /// operands. - void buildSchedGraph(AliasAnalysis *AA, + void buildSchedGraph(AAResults *AA, RegPressureTracker *RPTracker = nullptr, PressureDiffs *PDiffs = nullptr, LiveIntervals *LIS = nullptr, diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h index be6e2bd7d6526..6b8e2dd803ba8 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAG.h +++ b/llvm/include/llvm/CodeGen/SelectionDAG.h @@ -26,8 +26,6 @@ #include "llvm/ADT/ilist.h" #include "llvm/ADT/iterator.h" #include "llvm/ADT/iterator_range.h" -#include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/LegacyDivergenceAnalysis.h" #include "llvm/CodeGen/DAGCombine.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/ISDOpcodes.h" @@ -58,6 +56,7 @@ namespace llvm { +class AAResults; class BlockAddress; class Constant; class ConstantFP; @@ -66,6 +65,7 @@ class DataLayout; struct fltSemantics; class GlobalValue; struct KnownBits; +class LegacyDivergenceAnalysis; class LLVMContext; class MachineBasicBlock; class MachineConstantPoolValue; @@ -499,7 +499,7 @@ class SelectionDAG { /// certain types of nodes together, or eliminating superfluous nodes. The /// Level argument controls whether Combine is allowed to produce nodes and /// types that are illegal on the target. - void Combine(CombineLevel Level, AliasAnalysis *AA, + void Combine(CombineLevel Level, AAResults *AA, CodeGenOpt::Level OptLevel); /// This transforms the SelectionDAG into a SelectionDAG that diff --git a/llvm/include/llvm/CodeGen/SelectionDAGISel.h b/llvm/include/llvm/CodeGen/SelectionDAGISel.h index a2011cc4b3959..de71a21d46718 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAGISel.h +++ b/llvm/include/llvm/CodeGen/SelectionDAGISel.h @@ -22,22 +22,23 @@ #include namespace llvm { - class FastISel; - class SelectionDAGBuilder; - class SDValue; - class MachineRegisterInfo; - class MachineBasicBlock; - class MachineFunction; - class MachineInstr; - class OptimizationRemarkEmitter; - class TargetLowering; - class TargetLibraryInfo; - class FunctionLoweringInfo; - class ScheduleHazardRecognizer; - class SwiftErrorValueTracking; - class GCFunctionInfo; - class ScheduleDAGSDNodes; - class LoadInst; +class AAResults; +class FastISel; +class SelectionDAGBuilder; +class SDValue; +class MachineRegisterInfo; +class MachineBasicBlock; +class MachineFunction; +class MachineInstr; +class OptimizationRemarkEmitter; +class TargetLowering; +class TargetLibraryInfo; +class FunctionLoweringInfo; +class ScheduleHazardRecognizer; +class SwiftErrorValueTracking; +class GCFunctionInfo; +class ScheduleDAGSDNodes; +class LoadInst; /// SelectionDAGISel - This is the common base class used for SelectionDAG-based /// pattern-matching instruction selectors. @@ -51,7 +52,7 @@ class SelectionDAGISel : public MachineFunctionPass { MachineRegisterInfo *RegInfo; SelectionDAG *CurDAG; SelectionDAGBuilder *SDB; - AliasAnalysis *AA; + AAResults *AA; GCFunctionInfo *GFI; CodeGenOpt::Level OptLevel; const TargetInstrInfo *TII; diff --git a/llvm/include/llvm/CodeGen/TargetCallingConv.h b/llvm/include/llvm/CodeGen/TargetCallingConv.h index 360fc51bd0415..db3d1175afee4 100644 --- a/llvm/include/llvm/CodeGen/TargetCallingConv.h +++ b/llvm/include/llvm/CodeGen/TargetCallingConv.h @@ -125,18 +125,18 @@ namespace ISD { MaybeAlign A = decodeMaybeAlign(ByValAlign); return A ? A->value() : 0; } - void setByValAlign(unsigned A) { - ByValAlign = encode(Align(A)); - assert(getByValAlign() == A && "bitfield overflow"); + void setByValAlign(Align A) { + ByValAlign = encode(A); + assert(getByValAlign() == A.value() && "bitfield overflow"); } unsigned getOrigAlign() const { MaybeAlign A = decodeMaybeAlign(OrigAlign); return A ? A->value() : 0; } - void setOrigAlign(unsigned A) { - OrigAlign = encode(Align(A)); - assert(getOrigAlign() == A && "bitfield overflow"); + void setOrigAlign(Align A) { + OrigAlign = encode(A); + assert(getOrigAlign() == A.value() && "bitfield overflow"); } unsigned getByValSize() const { return ByValSize; } diff --git a/llvm/include/llvm/CodeGen/TargetFrameLowering.h b/llvm/include/llvm/CodeGen/TargetFrameLowering.h index da00155ed546a..2100a64c8f549 100644 --- a/llvm/include/llvm/CodeGen/TargetFrameLowering.h +++ b/llvm/include/llvm/CodeGen/TargetFrameLowering.h @@ -55,12 +55,12 @@ class TargetFrameLowering { private: StackDirection StackDir; Align StackAlignment; - unsigned TransientStackAlignment; + Align TransientStackAlignment; int LocalAreaOffset; bool StackRealignable; public: TargetFrameLowering(StackDirection D, Align StackAl, int LAO, - unsigned TransAl = 1, bool StackReal = true) + Align TransAl = Align::None(), bool StackReal = true) : StackDir(D), StackAlignment(StackAl), TransientStackAlignment(TransAl), LocalAreaOffset(LAO), StackRealignable(StackReal) {} @@ -96,7 +96,7 @@ class TargetFrameLowering { /// calls. /// unsigned getTransientStackAlignment() const { - return TransientStackAlignment; + return TransientStackAlignment.value(); } /// isStackRealignable - This method returns whether the stack can be @@ -282,6 +282,11 @@ class TargetFrameLowering { return getFrameIndexReference(MF, FI, FrameReg); } + /// Returns the callee-saved registers as computed by determineCalleeSaves + /// in the BitVector \p SavedRegs. + virtual void getCalleeSaves(const MachineFunction &MF, + BitVector &SavedRegs) const; + /// This method determines which of the registers reported by /// TargetRegisterInfo::getCalleeSavedRegs() should actually get saved. /// The default implementation checks populates the \p SavedRegs bitset with @@ -289,6 +294,9 @@ class TargetFrameLowering { /// this function to save additional registers. /// This method also sets up the register scavenger ensuring there is a free /// register or a frameindex available. + /// This method should not be called by any passes outside of PEI, because + /// it may change state passed in by \p MF and \p RS. The preferred + /// interface outside PEI is getCalleeSaves. virtual void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS = nullptr) const; @@ -367,17 +375,7 @@ class TargetFrameLowering { /// Check if given function is safe for not having callee saved registers. /// This is used when interprocedural register allocation is enabled. - static bool isSafeForNoCSROpt(const Function &F) { - if (!F.hasLocalLinkage() || F.hasAddressTaken() || - !F.hasFnAttribute(Attribute::NoRecurse)) - return false; - // Function should not be optimized as tail call. - for (const User *U : F.users()) - if (auto CS = ImmutableCallSite(U)) - if (CS.isTailCall()) - return false; - return true; - } + static bool isSafeForNoCSROpt(const Function &F); /// Check if the no-CSR optimisation is profitable for the given function. virtual bool isProfitableForNoCSROpt(const Function &F) const { diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h index 9c3757828563e..5011cf34c0ee2 100644 --- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h +++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h @@ -23,7 +23,6 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineOutliner.h" #include "llvm/CodeGen/PseudoSourceValue.h" @@ -39,10 +38,12 @@ namespace llvm { +class AAResults; class DFAPacketizer; class InstrItineraryData; class LiveIntervals; class LiveVariables; +class MachineLoop; class MachineMemOperand; class MachineRegisterInfo; class MCAsmInfo; @@ -95,7 +96,7 @@ class TargetInstrInfo : public MCInstrInfo { /// registers so that the instructions result is independent of the place /// in the function. bool isTriviallyReMaterializable(const MachineInstr &MI, - AliasAnalysis *AA = nullptr) const { + AAResults *AA = nullptr) const { return MI.getOpcode() == TargetOpcode::IMPLICIT_DEF || (MI.getDesc().isRematerializable() && (isReallyTriviallyReMaterializable(MI, AA) || @@ -111,7 +112,7 @@ class TargetInstrInfo : public MCInstrInfo { /// not always available. /// Requirements must be check as stated in isTriviallyReMaterializable() . virtual bool isReallyTriviallyReMaterializable(const MachineInstr &MI, - AliasAnalysis *AA) const { + AAResults *AA) const { return false; } @@ -154,7 +155,7 @@ class TargetInstrInfo : public MCInstrInfo { /// this function does target-independent tests to determine if the /// instruction is really trivially rematerializable. bool isReallyTriviallyReMaterializableGeneric(const MachineInstr &MI, - AliasAnalysis *AA) const; + AAResults *AA) const; public: /// These methods return the opcode of the frame setup/destroy instructions diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index 4ab61edec25fa..a58fca7e73f5b 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -28,7 +28,6 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" -#include "llvm/Analysis/LegacyDivergenceAnalysis.h" #include "llvm/CodeGen/DAGCombine.h" #include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/RuntimeLibcalls.h" @@ -76,6 +75,7 @@ class GlobalValue; class GISelKnownBits; class IntrinsicInst; struct KnownBits; +class LegacyDivergenceAnalysis; class LLVMContext; class MachineBasicBlock; class MachineFunction; @@ -1357,9 +1357,9 @@ class TargetLoweringBase { /// Certain targets have context senstive alignment requirements, where one /// type has the alignment requirement of another type. - virtual unsigned getABIAlignmentForCallingConv(Type *ArgTy, - DataLayout DL) const { - return DL.getABITypeAlignment(ArgTy); + virtual Align getABIAlignmentForCallingConv(Type *ArgTy, + DataLayout DL) const { + return Align(DL.getABITypeAlignment(ArgTy)); } /// If true, then instruction selection should seek to shrink the FP constant @@ -2608,6 +2608,12 @@ class TargetLoweringBase { // same blocks of its users. virtual bool shouldConsiderGEPOffsetSplit() const { return false; } + // Return the shift amount threshold for profitable transforms into shifts. + // Transforms creating shifts above the returned value will be avoided. + virtual unsigned getShiftAmountThreshold(EVT VT) const { + return VT.getScalarSizeInBits(); + } + //===--------------------------------------------------------------------===// // Runtime Library hooks // diff --git a/llvm/include/llvm/DebugInfo/CodeView/SymbolRecord.h b/llvm/include/llvm/DebugInfo/CodeView/SymbolRecord.h index 41062456343a9..1aafa3ca9f1d2 100644 --- a/llvm/include/llvm/DebugInfo/CodeView/SymbolRecord.h +++ b/llvm/include/llvm/DebugInfo/CodeView/SymbolRecord.h @@ -469,61 +469,61 @@ class DefRangeSubfieldSym : public SymbolRecord { uint32_t RecordOffset = 0; }; +struct DefRangeRegisterHeader { + ulittle16_t Register; + ulittle16_t MayHaveNoName; +}; + // S_DEFRANGE_REGISTER class DefRangeRegisterSym : public SymbolRecord { public: - struct Header { - ulittle16_t Register; - ulittle16_t MayHaveNoName; - }; - explicit DefRangeRegisterSym(SymbolRecordKind Kind) : SymbolRecord(Kind) {} explicit DefRangeRegisterSym(uint32_t RecordOffset) : SymbolRecord(SymbolRecordKind::DefRangeRegisterSym), RecordOffset(RecordOffset) {} - uint32_t getRelocationOffset() const { return RecordOffset + sizeof(Header); } + uint32_t getRelocationOffset() const { return RecordOffset + sizeof(DefRangeRegisterHeader); } - Header Hdr; + DefRangeRegisterHeader Hdr; LocalVariableAddrRange Range; std::vector Gaps; uint32_t RecordOffset = 0; }; +struct DefRangeSubfieldRegisterHeader { + ulittle16_t Register; + ulittle16_t MayHaveNoName; + ulittle32_t OffsetInParent; +}; + // S_DEFRANGE_SUBFIELD_REGISTER class DefRangeSubfieldRegisterSym : public SymbolRecord { public: - struct Header { - ulittle16_t Register; - ulittle16_t MayHaveNoName; - ulittle32_t OffsetInParent; - }; - explicit DefRangeSubfieldRegisterSym(SymbolRecordKind Kind) : SymbolRecord(Kind) {} explicit DefRangeSubfieldRegisterSym(uint32_t RecordOffset) : SymbolRecord(SymbolRecordKind::DefRangeSubfieldRegisterSym), RecordOffset(RecordOffset) {} - uint32_t getRelocationOffset() const { return RecordOffset + sizeof(Header); } + uint32_t getRelocationOffset() const { return RecordOffset + sizeof(DefRangeSubfieldRegisterHeader); } - Header Hdr; + DefRangeSubfieldRegisterHeader Hdr; LocalVariableAddrRange Range; std::vector Gaps; uint32_t RecordOffset = 0; }; +struct DefRangeFramePointerRelHeader { + little32_t Offset; +}; + // S_DEFRANGE_FRAMEPOINTER_REL class DefRangeFramePointerRelSym : public SymbolRecord { static constexpr uint32_t RelocationOffset = 8; public: - struct Header { - little32_t Offset; - }; - explicit DefRangeFramePointerRelSym(SymbolRecordKind Kind) : SymbolRecord(Kind) {} explicit DefRangeFramePointerRelSym(uint32_t RecordOffset) @@ -534,22 +534,22 @@ class DefRangeFramePointerRelSym : public SymbolRecord { return RecordOffset + RelocationOffset; } - Header Hdr; + DefRangeFramePointerRelHeader Hdr; LocalVariableAddrRange Range; std::vector Gaps; uint32_t RecordOffset = 0; }; +struct DefRangeRegisterRelHeader { + ulittle16_t Register; + ulittle16_t Flags; + little32_t BasePointerOffset; +}; + // S_DEFRANGE_REGISTER_REL class DefRangeRegisterRelSym : public SymbolRecord { public: - struct Header { - ulittle16_t Register; - ulittle16_t Flags; - little32_t BasePointerOffset; - }; - explicit DefRangeRegisterRelSym(SymbolRecordKind Kind) : SymbolRecord(Kind) {} explicit DefRangeRegisterRelSym(uint32_t RecordOffset) : SymbolRecord(SymbolRecordKind::DefRangeRegisterRelSym), @@ -567,9 +567,9 @@ class DefRangeRegisterRelSym : public SymbolRecord { bool hasSpilledUDTMember() const { return Hdr.Flags & IsSubfieldFlag; } uint16_t offsetInParent() const { return Hdr.Flags >> OffsetInParentShift; } - uint32_t getRelocationOffset() const { return RecordOffset + sizeof(Header); } + uint32_t getRelocationOffset() const { return RecordOffset + sizeof(DefRangeRegisterRelHeader); } - Header Hdr; + DefRangeRegisterRelHeader Hdr; LocalVariableAddrRange Range; std::vector Gaps; diff --git a/llvm/include/llvm/IR/DataLayout.h b/llvm/include/llvm/IR/DataLayout.h index 08fd269807a85..85093dd218f8c 100644 --- a/llvm/include/llvm/IR/DataLayout.h +++ b/llvm/include/llvm/IR/DataLayout.h @@ -25,7 +25,6 @@ #include "llvm/ADT/StringRef.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Type.h" -#include "llvm/Pass.h" #include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" @@ -504,6 +503,13 @@ class DataLayout { /// Returns the minimum ABI-required alignment for the specified type. unsigned getABITypeAlignment(Type *Ty) const; + /// Helper function to return `Alignment` if it's set or the result of + /// `getABITypeAlignment(Ty)`, in any case the result is a valid alignment. + inline Align getValueOrABITypeAlignment(MaybeAlign Alignment, + Type *Ty) const { + return Alignment ? *Alignment : Align(getABITypeAlignment(Ty)); + } + /// Returns the minimum ABI-required alignment for an integer type of /// the specified bitwidth. Align getABIIntegerTypeAlignment(unsigned BitWidth) const; diff --git a/llvm/include/llvm/IR/DerivedTypes.h b/llvm/include/llvm/IR/DerivedTypes.h index ade63764ebb1f..20097ef3f31a5 100644 --- a/llvm/include/llvm/IR/DerivedTypes.h +++ b/llvm/include/llvm/IR/DerivedTypes.h @@ -571,6 +571,10 @@ bool Type::getVectorIsScalable() const { return cast(this)->isScalable(); } +ElementCount Type::getVectorElementCount() const { + return cast(this)->getElementCount(); +} + /// Class to represent pointers. class PointerType : public Type { explicit PointerType(Type *ElType, unsigned AddrSpace); @@ -618,6 +622,16 @@ Type *Type::getExtendedType() const { return cast(this)->getExtendedType(); } +Type *Type::getWithNewBitWidth(unsigned NewBitWidth) const { + assert( + isIntOrIntVectorTy() && + "Original type expected to be a vector of integers or a scalar integer."); + Type *NewType = getIntNTy(getContext(), NewBitWidth); + if (isVectorTy()) + NewType = VectorType::get(NewType, getVectorElementCount()); + return NewType; +} + unsigned Type::getPointerAddressSpace() const { return cast(getScalarType())->getAddressSpace(); } diff --git a/llvm/include/llvm/IR/IRBuilder.h b/llvm/include/llvm/IR/IRBuilder.h index 25febbfa1e23f..d1ddb75cde9b8 100644 --- a/llvm/include/llvm/IR/IRBuilder.h +++ b/llvm/include/llvm/IR/IRBuilder.h @@ -1461,7 +1461,7 @@ class IRBuilder : public IRBuilderBase, public Inserter { if (Value *V = foldConstant(Opc, LHS, RHS, Name)) return V; Instruction *BinOp = BinaryOperator::Create(Opc, LHS, RHS); if (isa(BinOp)) - BinOp = setFPAttrs(BinOp, FPMathTag, FMF); + setFPAttrs(BinOp, FPMathTag, FMF); return Insert(BinOp, Name); } @@ -1479,7 +1479,8 @@ class IRBuilder : public IRBuilderBase, public Inserter { CallInst *C = CreateIntrinsic(ID, {L->getType()}, {L, R, RoundingV, ExceptV}, nullptr, Name); - return cast(setFPAttrs(C, FPMathTag, UseFMF)); + setFPAttrs(C, FPMathTag, UseFMF); + return C; } Value *CreateNeg(Value *V, const Twine &Name = "", @@ -1532,7 +1533,7 @@ class IRBuilder : public IRBuilderBase, public Inserter { return Insert(Folder.CreateUnOp(Opc, VC), Name); Instruction *UnOp = UnaryOperator::Create(Opc, V); if (isa(UnOp)) - UnOp = setFPAttrs(UnOp, FPMathTag, FMF); + setFPAttrs(UnOp, FPMathTag, FMF); return Insert(UnOp, Name); } @@ -2084,7 +2085,7 @@ class IRBuilder : public IRBuilderBase, public Inserter { break; } if (isa(C)) - C = cast(setFPAttrs(C, FPMathTag, UseFMF)); + setFPAttrs(C, FPMathTag, UseFMF); return C; } @@ -2231,7 +2232,7 @@ class IRBuilder : public IRBuilderBase, public Inserter { const Twine &Name = "") { PHINode *Phi = PHINode::Create(Ty, NumReservedValues); if (isa(Phi)) - Phi = cast(setFPAttrs(Phi, nullptr /* MDNode* */, FMF)); + setFPAttrs(Phi, nullptr /* MDNode* */, FMF); return Insert(Phi, Name); } @@ -2240,7 +2241,7 @@ class IRBuilder : public IRBuilderBase, public Inserter { MDNode *FPMathTag = nullptr) { CallInst *CI = CallInst::Create(FTy, Callee, Args, DefaultOperandBundles); if (isa(CI)) - CI = cast(setFPAttrs(CI, FPMathTag, FMF)); + setFPAttrs(CI, FPMathTag, FMF); return Insert(CI, Name); } @@ -2249,7 +2250,7 @@ class IRBuilder : public IRBuilderBase, public Inserter { const Twine &Name = "", MDNode *FPMathTag = nullptr) { CallInst *CI = CallInst::Create(FTy, Callee, Args, OpBundles); if (isa(CI)) - CI = cast(setFPAttrs(CI, FPMathTag, FMF)); + setFPAttrs(CI, FPMathTag, FMF); return Insert(CI, Name); } @@ -2297,7 +2298,7 @@ class IRBuilder : public IRBuilderBase, public Inserter { Sel = addBranchMetadata(Sel, Prof, Unpred); } if (isa(Sel)) - Sel = cast(setFPAttrs(Sel, nullptr /* MDNode* */, FMF)); + setFPAttrs(Sel, nullptr /* MDNode* */, FMF); return Insert(Sel, Name); } diff --git a/llvm/include/llvm/IR/Instructions.h b/llvm/include/llvm/IR/Instructions.h index eaaf506464620..1e29d2cd361d1 100644 --- a/llvm/include/llvm/IR/Instructions.h +++ b/llvm/include/llvm/IR/Instructions.h @@ -5283,12 +5283,12 @@ inline Value *getPointerOperand(Value *V) { } /// A helper function that returns the alignment of load or store instruction. -inline unsigned getLoadStoreAlignment(Value *I) { +inline MaybeAlign getLoadStoreAlignment(Value *I) { assert((isa(I) || isa(I)) && "Expected Load or Store instruction"); if (auto *LI = dyn_cast(I)) - return LI->getAlignment(); - return cast(I)->getAlignment(); + return MaybeAlign(LI->getAlignment()); + return MaybeAlign(cast(I)->getAlignment()); } /// A helper function that returns the address space of the pointer operand of diff --git a/llvm/include/llvm/IR/Module.h b/llvm/include/llvm/IR/Module.h index f458680cfe15d..59331142766ae 100644 --- a/llvm/include/llvm/IR/Module.h +++ b/llvm/include/llvm/IR/Module.h @@ -46,6 +46,7 @@ class FunctionType; class GVMaterializer; class LLVMContext; class MemoryBuffer; +class Pass; class RandomNumberGenerator; template class SmallPtrSetImpl; class StructType; diff --git a/llvm/include/llvm/IR/PassManager.h b/llvm/include/llvm/IR/PassManager.h index 965c01b167ccb..1e1f4a92f844c 100644 --- a/llvm/include/llvm/IR/PassManager.h +++ b/llvm/include/llvm/IR/PassManager.h @@ -45,6 +45,7 @@ #include "llvm/IR/Module.h" #include "llvm/IR/PassInstrumentation.h" #include "llvm/IR/PassManagerInternal.h" +#include "llvm/Pass.h" #include "llvm/Support/Debug.h" #include "llvm/Support/TypeName.h" #include "llvm/Support/raw_ostream.h" diff --git a/llvm/include/llvm/IR/Type.h b/llvm/include/llvm/IR/Type.h index 63bc884f0b7d4..d0961dac833d6 100644 --- a/llvm/include/llvm/IR/Type.h +++ b/llvm/include/llvm/IR/Type.h @@ -372,6 +372,7 @@ class Type { inline bool getVectorIsScalable() const; inline unsigned getVectorNumElements() const; + inline ElementCount getVectorElementCount() const; Type *getVectorElementType() const { assert(getTypeID() == VectorTyID); return ContainedTys[0]; @@ -382,6 +383,10 @@ class Type { return ContainedTys[0]; } + /// Given an integer or vector type, change the lane bitwidth to NewBitwidth, + /// whilst keeping the old number of lanes. + inline Type *getWithNewBitWidth(unsigned NewBitWidth) const; + /// Given scalar/vector integer type, returns a type with elements twice as /// wide as in the original type. For vectors, preserves element count. inline Type *getExtendedType() const; diff --git a/llvm/include/llvm/MC/MCStreamer.h b/llvm/include/llvm/MC/MCStreamer.h index c703afe223a2d..6b48580ae57cf 100644 --- a/llvm/include/llvm/MC/MCStreamer.h +++ b/llvm/include/llvm/MC/MCStreamer.h @@ -18,7 +18,6 @@ #include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" -#include "llvm/DebugInfo/CodeView/SymbolRecord.h" #include "llvm/MC/MCDirectives.h" #include "llvm/MC/MCLinkerOptimizationHint.h" #include "llvm/MC/MCSymbol.h" @@ -55,6 +54,13 @@ class MCSubtargetInfo; class raw_ostream; class Twine; +namespace codeview { +struct DefRangeRegisterRelHeader; +struct DefRangeSubfieldRegisterHeader; +struct DefRangeRegisterHeader; +struct DefRangeFramePointerRelHeader; +} + using MCSectionSubPair = std::pair; /// Target specific streamer interface. This is used so that targets can @@ -873,19 +879,19 @@ class MCStreamer { virtual void EmitCVDefRangeDirective( ArrayRef> Ranges, - codeview::DefRangeRegisterRelSym::Header DRHdr); + codeview::DefRangeRegisterRelHeader DRHdr); virtual void EmitCVDefRangeDirective( ArrayRef> Ranges, - codeview::DefRangeSubfieldRegisterSym::Header DRHdr); + codeview::DefRangeSubfieldRegisterHeader DRHdr); virtual void EmitCVDefRangeDirective( ArrayRef> Ranges, - codeview::DefRangeRegisterSym::Header DRHdr); + codeview::DefRangeRegisterHeader DRHdr); virtual void EmitCVDefRangeDirective( ArrayRef> Ranges, - codeview::DefRangeFramePointerRelSym::Header DRHdr); + codeview::DefRangeFramePointerRelHeader DRHdr); /// This implements the CodeView '.cv_stringtable' assembler directive. virtual void EmitCVStringTableDirective() {} diff --git a/llvm/include/llvm/Object/ELFObjectFile.h b/llvm/include/llvm/Object/ELFObjectFile.h index 774d8c3ef3c5b..424289a9ccaa7 100644 --- a/llvm/include/llvm/Object/ELFObjectFile.h +++ b/llvm/include/llvm/Object/ELFObjectFile.h @@ -288,7 +288,8 @@ template class ELFObjectFile : public ELFObjectFileBase { relocation_iterator section_rel_begin(DataRefImpl Sec) const override; relocation_iterator section_rel_end(DataRefImpl Sec) const override; std::vector dynamic_relocation_sections() const override; - section_iterator getRelocatedSection(DataRefImpl Sec) const override; + Expected + getRelocatedSection(DataRefImpl Sec) const override; void moveRelocationNext(DataRefImpl &Rel) const override; uint64_t getRelocationOffset(DataRefImpl Rel) const override; @@ -841,7 +842,7 @@ ELFObjectFile::section_rel_end(DataRefImpl Sec) const { } template -section_iterator +Expected ELFObjectFile::getRelocatedSection(DataRefImpl Sec) const { if (EF.getHeader()->e_type != ELF::ET_REL) return section_end(); @@ -851,10 +852,10 @@ ELFObjectFile::getRelocatedSection(DataRefImpl Sec) const { if (Type != ELF::SHT_REL && Type != ELF::SHT_RELA) return section_end(); - auto R = EF.getSection(EShdr->sh_info); - if (!R) - report_fatal_error(errorToErrorCode(R.takeError()).message()); - return section_iterator(SectionRef(toDRI(*R), this)); + Expected SecOrErr = EF.getSection(EShdr->sh_info); + if (!SecOrErr) + return SecOrErr.takeError(); + return section_iterator(SectionRef(toDRI(*SecOrErr), this)); } // Relocations diff --git a/llvm/include/llvm/Object/ObjectFile.h b/llvm/include/llvm/Object/ObjectFile.h index 63c556b7f0d9a..adc9dbc189af7 100644 --- a/llvm/include/llvm/Object/ObjectFile.h +++ b/llvm/include/llvm/Object/ObjectFile.h @@ -130,7 +130,7 @@ class SectionRef { iterator_range relocations() const { return make_range(relocation_begin(), relocation_end()); } - section_iterator getRelocatedSection() const; + Expected getRelocatedSection() const; DataRefImpl getRawDataRefImpl() const; const ObjectFile *getObject() const; @@ -272,7 +272,7 @@ class ObjectFile : public SymbolicFile { virtual bool isBerkeleyData(DataRefImpl Sec) const; virtual relocation_iterator section_rel_begin(DataRefImpl Sec) const = 0; virtual relocation_iterator section_rel_end(DataRefImpl Sec) const = 0; - virtual section_iterator getRelocatedSection(DataRefImpl Sec) const; + virtual Expected getRelocatedSection(DataRefImpl Sec) const; // Same as above for RelocationRef. friend class RelocationRef; @@ -501,7 +501,7 @@ inline relocation_iterator SectionRef::relocation_end() const { return OwningObject->section_rel_end(SectionPimpl); } -inline section_iterator SectionRef::getRelocatedSection() const { +inline Expected SectionRef::getRelocatedSection() const { return OwningObject->getRelocatedSection(SectionPimpl); } diff --git a/llvm/include/llvm/ObjectYAML/ELFYAML.h b/llvm/include/llvm/ObjectYAML/ELFYAML.h index ef2b4fba031d1..0898a0e7d5324 100644 --- a/llvm/include/llvm/ObjectYAML/ELFYAML.h +++ b/llvm/include/llvm/ObjectYAML/ELFYAML.h @@ -376,7 +376,7 @@ struct Object { // cleaner and nicer if we read them from the YAML as a separate // top-level key, which automatically ensures that invariants like there // being a single SHT_SYMTAB section are upheld. - std::vector Symbols; + Optional> Symbols; std::vector DynamicSymbols; }; diff --git a/llvm/include/llvm/Support/Host.h b/llvm/include/llvm/Support/Host.h index b37cc514c92e2..44f543c363db5 100644 --- a/llvm/include/llvm/Support/Host.h +++ b/llvm/include/llvm/Support/Host.h @@ -15,39 +15,11 @@ #include "llvm/ADT/StringMap.h" -#if defined(__linux__) || defined(__GNU__) || defined(__HAIKU__) -#include -#elif defined(_AIX) -#include -#elif defined(__sun) -/* Solaris provides _BIG_ENDIAN/_LITTLE_ENDIAN selector in sys/types.h */ -#include -#define BIG_ENDIAN 4321 -#define LITTLE_ENDIAN 1234 -#if defined(_BIG_ENDIAN) -#define BYTE_ORDER BIG_ENDIAN -#else -#define BYTE_ORDER LITTLE_ENDIAN -#endif -#else -#if !defined(BYTE_ORDER) && !defined(_WIN32) -#include -#endif -#endif - #include namespace llvm { namespace sys { -#if defined(BYTE_ORDER) && defined(BIG_ENDIAN) && BYTE_ORDER == BIG_ENDIAN -constexpr bool IsBigEndianHost = true; -#else -constexpr bool IsBigEndianHost = false; -#endif - - static const bool IsLittleEndianHost = !IsBigEndianHost; - /// getDefaultTargetTriple() - Return the default target triple the compiler /// has been configured to produce code for. /// diff --git a/llvm/include/llvm/Support/SHA1.h b/llvm/include/llvm/Support/SHA1.h index 87fe94bbd5cd3..2cfbd21793645 100644 --- a/llvm/include/llvm/Support/SHA1.h +++ b/llvm/include/llvm/Support/SHA1.h @@ -16,13 +16,13 @@ #define LLVM_SUPPORT_SHA1_H #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringRef.h" #include #include namespace llvm { template class ArrayRef; -class StringRef; /// A class that wrap the SHA1 algorithm. class SHA1 { diff --git a/llvm/include/llvm/Support/SwapByteOrder.h b/llvm/include/llvm/Support/SwapByteOrder.h index ec60a214cd005..6cec87006c02c 100644 --- a/llvm/include/llvm/Support/SwapByteOrder.h +++ b/llvm/include/llvm/Support/SwapByteOrder.h @@ -22,9 +22,37 @@ #include #endif +#if defined(__linux__) || defined(__GNU__) || defined(__HAIKU__) +#include +#elif defined(_AIX) +#include +#elif defined(__sun) +/* Solaris provides _BIG_ENDIAN/_LITTLE_ENDIAN selector in sys/types.h */ +#include +#define BIG_ENDIAN 4321 +#define LITTLE_ENDIAN 1234 +#if defined(_BIG_ENDIAN) +#define BYTE_ORDER BIG_ENDIAN +#else +#define BYTE_ORDER LITTLE_ENDIAN +#endif +#else +#if !defined(BYTE_ORDER) && !defined(_WIN32) +#include +#endif +#endif + namespace llvm { namespace sys { +#if defined(BYTE_ORDER) && defined(BIG_ENDIAN) && BYTE_ORDER == BIG_ENDIAN +constexpr bool IsBigEndianHost = true; +#else +constexpr bool IsBigEndianHost = false; +#endif + +static const bool IsLittleEndianHost = !IsBigEndianHost; + /// SwapByteOrder_16 - This function returns a byte-swapped representation of /// the 16-bit argument. inline uint16_t SwapByteOrder_16(uint16_t value) { diff --git a/llvm/include/llvm/Support/YAMLTraits.h b/llvm/include/llvm/Support/YAMLTraits.h index 656020ec49d79..a3bfa7dc46782 100644 --- a/llvm/include/llvm/Support/YAMLTraits.h +++ b/llvm/include/llvm/Support/YAMLTraits.h @@ -649,7 +649,8 @@ inline bool isBool(StringRef S) { inline QuotingType needsQuotes(StringRef S) { if (S.empty()) return QuotingType::Single; - if (isspace(S.front()) || isspace(S.back())) + if (isspace(static_cast(S.front())) || + isspace(static_cast(S.back()))) return QuotingType::Single; if (isNull(S)) return QuotingType::Single; diff --git a/llvm/include/llvm/Transforms/Utils/SizeOpts.h b/llvm/include/llvm/Transforms/Utils/SizeOpts.h index 1c56da08ef8b8..1a052c694e6d6 100644 --- a/llvm/include/llvm/Transforms/Utils/SizeOpts.h +++ b/llvm/include/llvm/Transforms/Utils/SizeOpts.h @@ -13,18 +13,6 @@ #ifndef LLVM_TRANSFORMS_UTILS_SIZEOPTS_H #define LLVM_TRANSFORMS_UTILS_SIZEOPTS_H -#include "llvm/Analysis/BlockFrequencyInfo.h" -#include "llvm/Analysis/ProfileSummaryInfo.h" -#include "llvm/Support/CommandLine.h" - -using namespace llvm; - -extern cl::opt EnablePGSO; -extern cl::opt PGSOLargeWorkingSetSizeOnly; -extern cl::opt ForcePGSO; -extern cl::opt PgsoCutoffInstrProf; -extern cl::opt PgsoCutoffSampleProf; - namespace llvm { class BasicBlock; @@ -32,52 +20,13 @@ class BlockFrequencyInfo; class Function; class ProfileSummaryInfo; -template -bool shouldFuncOptimizeForSizeImpl(const FuncT *F, ProfileSummaryInfo *PSI, - BFIT *BFI) { - assert(F); - if (!PSI || !BFI || !PSI->hasProfileSummary()) - return false; - if (ForcePGSO) - return true; - if (!EnablePGSO) - return false; - if (PGSOLargeWorkingSetSizeOnly && !PSI->hasLargeWorkingSetSize()) { - // Even if the working set size isn't large, size-optimize cold code. - return AdapterT::isFunctionColdInCallGraph(F, PSI, *BFI); - } - return !AdapterT::isFunctionHotInCallGraphNthPercentile( - PSI->hasSampleProfile() ? PgsoCutoffSampleProf : PgsoCutoffInstrProf, - F, PSI, *BFI); -} - -template -bool shouldOptimizeForSizeImpl(const BlockT *BB, ProfileSummaryInfo *PSI, - BFIT *BFI) { - assert(BB); - if (!PSI || !BFI || !PSI->hasProfileSummary()) - return false; - if (ForcePGSO) - return true; - if (!EnablePGSO) - return false; - if (PGSOLargeWorkingSetSizeOnly && !PSI->hasLargeWorkingSetSize()) { - // Even if the working set size isn't large, size-optimize cold code. - return AdapterT::isColdBlock(BB, PSI, BFI); - } - return !AdapterT::isHotBlockNthPercentile( - PSI->hasSampleProfile() ? PgsoCutoffSampleProf : PgsoCutoffInstrProf, - BB, PSI, BFI); -} - /// Returns true if function \p F is suggested to be size-optimized base on the /// profile. -bool shouldOptimizeForSize(const Function *F, ProfileSummaryInfo *PSI, +bool shouldOptimizeForSize(Function *F, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI); - /// Returns true if basic block \p BB is suggested to be size-optimized base /// on the profile. -bool shouldOptimizeForSize(const BasicBlock *BB, ProfileSummaryInfo *PSI, +bool shouldOptimizeForSize(BasicBlock *BB, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI); } // end namespace llvm diff --git a/llvm/lib/Analysis/Loads.cpp b/llvm/lib/Analysis/Loads.cpp index a4fd49920ad9f..641e92eac781f 100644 --- a/llvm/lib/Analysis/Loads.cpp +++ b/llvm/lib/Analysis/Loads.cpp @@ -50,7 +50,7 @@ static bool isAligned(const Value *Base, const APInt &Offset, Align Alignment, /// Test if V is always a pointer to allocated and suitably aligned memory for /// a simple load or store. static bool isDereferenceableAndAlignedPointer( - const Value *V, unsigned Align, const APInt &Size, const DataLayout &DL, + const Value *V, Align Alignment, const APInt &Size, const DataLayout &DL, const Instruction *CtxI, const DominatorTree *DT, SmallPtrSetImpl &Visited) { // Already visited? Bail out, we've likely hit unreachable code. @@ -62,8 +62,8 @@ static bool isDereferenceableAndAlignedPointer( // bitcast instructions are no-ops as far as dereferenceability is concerned. if (const BitCastOperator *BC = dyn_cast(V)) - return isDereferenceableAndAlignedPointer(BC->getOperand(0), Align, Size, - DL, CtxI, DT, Visited); + return isDereferenceableAndAlignedPointer(BC->getOperand(0), Alignment, + Size, DL, CtxI, DT, Visited); bool CheckForNonNull = false; APInt KnownDerefBytes(Size.getBitWidth(), @@ -76,7 +76,7 @@ static bool isDereferenceableAndAlignedPointer( Type *Ty = V->getType(); assert(Ty->isSized() && "must be sized"); APInt Offset(DL.getTypeStoreSizeInBits(Ty), 0); - return isAligned(V, Offset, llvm::Align(Align), DL); + return isAligned(V, Offset, Alignment, DL); } // For GEPs, determine if the indexing lands within the allocated object. @@ -85,7 +85,8 @@ static bool isDereferenceableAndAlignedPointer( APInt Offset(DL.getIndexTypeSizeInBits(GEP->getType()), 0); if (!GEP->accumulateConstantOffset(DL, Offset) || Offset.isNegative() || - !Offset.urem(APInt(Offset.getBitWidth(), Align)).isMinValue()) + !Offset.urem(APInt(Offset.getBitWidth(), Alignment.value())) + .isMinValue()) return false; // If the base pointer is dereferenceable for Offset+Size bytes, then the @@ -97,72 +98,69 @@ static bool isDereferenceableAndAlignedPointer( // Offset and Size may have different bit widths if we have visited an // addrspacecast, so we can't do arithmetic directly on the APInt values. return isDereferenceableAndAlignedPointer( - Base, Align, Offset + Size.sextOrTrunc(Offset.getBitWidth()), - DL, CtxI, DT, Visited); + Base, Alignment, Offset + Size.sextOrTrunc(Offset.getBitWidth()), DL, + CtxI, DT, Visited); } // For gc.relocate, look through relocations if (const GCRelocateInst *RelocateInst = dyn_cast(V)) return isDereferenceableAndAlignedPointer( - RelocateInst->getDerivedPtr(), Align, Size, DL, CtxI, DT, Visited); + RelocateInst->getDerivedPtr(), Alignment, Size, DL, CtxI, DT, Visited); if (const AddrSpaceCastInst *ASC = dyn_cast(V)) - return isDereferenceableAndAlignedPointer(ASC->getOperand(0), Align, Size, - DL, CtxI, DT, Visited); + return isDereferenceableAndAlignedPointer(ASC->getOperand(0), Alignment, + Size, DL, CtxI, DT, Visited); if (const auto *Call = dyn_cast(V)) if (auto *RP = getArgumentAliasingToReturnedPointer(Call, true)) - return isDereferenceableAndAlignedPointer(RP, Align, Size, DL, CtxI, DT, - Visited); + return isDereferenceableAndAlignedPointer(RP, Alignment, Size, DL, CtxI, + DT, Visited); // If we don't know, assume the worst. return false; } -bool llvm::isDereferenceableAndAlignedPointer(const Value *V, unsigned Align, +bool llvm::isDereferenceableAndAlignedPointer(const Value *V, Align Alignment, const APInt &Size, const DataLayout &DL, const Instruction *CtxI, const DominatorTree *DT) { - assert(Align != 0 && "expected explicitly set alignment"); // Note: At the moment, Size can be zero. This ends up being interpreted as // a query of whether [Base, V] is dereferenceable and V is aligned (since // that's what the implementation happened to do). It's unclear if this is // the desired semantic, but at least SelectionDAG does exercise this case. SmallPtrSet Visited; - return ::isDereferenceableAndAlignedPointer(V, Align, Size, DL, CtxI, DT, + return ::isDereferenceableAndAlignedPointer(V, Alignment, Size, DL, CtxI, DT, Visited); } bool llvm::isDereferenceableAndAlignedPointer(const Value *V, Type *Ty, - unsigned Align, + MaybeAlign MA, const DataLayout &DL, const Instruction *CtxI, const DominatorTree *DT) { + if (!Ty->isSized()) + return false; + // When dereferenceability information is provided by a dereferenceable // attribute, we know exactly how many bytes are dereferenceable. If we can // determine the exact offset to the attributed variable, we can use that // information here. // Require ABI alignment for loads without alignment specification - if (Align == 0) - Align = DL.getABITypeAlignment(Ty); - - if (!Ty->isSized()) - return false; - + const Align Alignment = DL.getValueOrABITypeAlignment(MA, Ty); APInt AccessSize(DL.getIndexTypeSizeInBits(V->getType()), DL.getTypeStoreSize(Ty)); - return isDereferenceableAndAlignedPointer(V, Align, AccessSize, - DL, CtxI, DT); + return isDereferenceableAndAlignedPointer(V, Alignment, AccessSize, DL, CtxI, + DT); } bool llvm::isDereferenceablePointer(const Value *V, Type *Ty, const DataLayout &DL, const Instruction *CtxI, const DominatorTree *DT) { - return isDereferenceableAndAlignedPointer(V, Ty, 1, DL, CtxI, DT); + return isDereferenceableAndAlignedPointer(V, Ty, Align::None(), DL, CtxI, DT); } /// Test if A and B will obviously have the same value. @@ -204,17 +202,16 @@ bool llvm::isDereferenceableAndAlignedInLoop(LoadInst *LI, Loop *L, APInt EltSize(DL.getIndexTypeSizeInBits(Ptr->getType()), DL.getTypeStoreSize(LI->getType())); - unsigned Align = LI->getAlignment(); - if (Align == 0) - Align = DL.getABITypeAlignment(LI->getType()); + const Align Alignment = DL.getValueOrABITypeAlignment( + MaybeAlign(LI->getAlignment()), LI->getType()); Instruction *HeaderFirstNonPHI = L->getHeader()->getFirstNonPHI(); // If given a uniform (i.e. non-varying) address, see if we can prove the // access is safe within the loop w/o needing predication. if (L->isLoopInvariant(Ptr)) - return isDereferenceableAndAlignedPointer(Ptr, Align, EltSize, DL, - HeaderFirstNonPHI, &DT); + return isDereferenceableAndAlignedPointer(Ptr, Alignment, EltSize, DL, + HeaderFirstNonPHI, &DT); // Otherwise, check to see if we have a repeating access pattern where we can // prove that all accesses are well aligned and dereferenceable. @@ -245,10 +242,10 @@ bool llvm::isDereferenceableAndAlignedInLoop(LoadInst *LI, Loop *L, // For the moment, restrict ourselves to the case where the access size is a // multiple of the requested alignment and the base is aligned. // TODO: generalize if a case found which warrants - if (EltSize.urem(Align) != 0) + if (EltSize.urem(Alignment.value()) != 0) return false; - return isDereferenceableAndAlignedPointer(Base, Align, AccessSize, - DL, HeaderFirstNonPHI, &DT); + return isDereferenceableAndAlignedPointer(Base, Alignment, AccessSize, DL, + HeaderFirstNonPHI, &DT); } /// Check if executing a load of this pointer value cannot trap. @@ -262,18 +259,17 @@ bool llvm::isDereferenceableAndAlignedInLoop(LoadInst *LI, Loop *L, /// /// This uses the pointee type to determine how many bytes need to be safe to /// load from the pointer. -bool llvm::isSafeToLoadUnconditionally(Value *V, unsigned Align, APInt &Size, +bool llvm::isSafeToLoadUnconditionally(Value *V, MaybeAlign MA, APInt &Size, const DataLayout &DL, Instruction *ScanFrom, const DominatorTree *DT) { // Zero alignment means that the load has the ABI alignment for the target - if (Align == 0) - Align = DL.getABITypeAlignment(V->getType()->getPointerElementType()); - assert(isPowerOf2_32(Align)); + const Align Alignment = + DL.getValueOrABITypeAlignment(MA, V->getType()->getPointerElementType()); // If DT is not specified we can't make context-sensitive query const Instruction* CtxI = DT ? ScanFrom : nullptr; - if (isDereferenceableAndAlignedPointer(V, Align, Size, DL, CtxI, DT)) + if (isDereferenceableAndAlignedPointer(V, Alignment, Size, DL, CtxI, DT)) return true; if (!ScanFrom) @@ -305,7 +301,7 @@ bool llvm::isSafeToLoadUnconditionally(Value *V, unsigned Align, APInt &Size, return false; Value *AccessedPtr; - unsigned AccessedAlign; + MaybeAlign MaybeAccessedAlign; if (LoadInst *LI = dyn_cast(BBI)) { // Ignore volatile loads. The execution of a volatile load cannot // be used to prove an address is backed by regular memory; it can, @@ -313,20 +309,21 @@ bool llvm::isSafeToLoadUnconditionally(Value *V, unsigned Align, APInt &Size, if (LI->isVolatile()) continue; AccessedPtr = LI->getPointerOperand(); - AccessedAlign = LI->getAlignment(); + MaybeAccessedAlign = MaybeAlign(LI->getAlignment()); } else if (StoreInst *SI = dyn_cast(BBI)) { // Ignore volatile stores (see comment for loads). if (SI->isVolatile()) continue; AccessedPtr = SI->getPointerOperand(); - AccessedAlign = SI->getAlignment(); + MaybeAccessedAlign = MaybeAlign(SI->getAlignment()); } else continue; Type *AccessedTy = AccessedPtr->getType()->getPointerElementType(); - if (AccessedAlign == 0) - AccessedAlign = DL.getABITypeAlignment(AccessedTy); - if (AccessedAlign < Align) + + const Align AccessedAlign = + DL.getValueOrABITypeAlignment(MaybeAccessedAlign, AccessedTy); + if (AccessedAlign < Alignment) continue; // Handle trivial cases. @@ -341,12 +338,12 @@ bool llvm::isSafeToLoadUnconditionally(Value *V, unsigned Align, APInt &Size, return false; } -bool llvm::isSafeToLoadUnconditionally(Value *V, Type *Ty, unsigned Align, +bool llvm::isSafeToLoadUnconditionally(Value *V, Type *Ty, MaybeAlign Alignment, const DataLayout &DL, Instruction *ScanFrom, const DominatorTree *DT) { APInt Size(DL.getIndexTypeSizeInBits(V->getType()), DL.getTypeStoreSize(Ty)); - return isSafeToLoadUnconditionally(V, Align, Size, DL, ScanFrom, DT); + return isSafeToLoadUnconditionally(V, Alignment, Size, DL, ScanFrom, DT); } /// DefMaxInstsToScan - the default number of maximum instructions diff --git a/llvm/lib/Analysis/MemDerefPrinter.cpp b/llvm/lib/Analysis/MemDerefPrinter.cpp index 77ebf89d9a085..5cf516a538b55 100644 --- a/llvm/lib/Analysis/MemDerefPrinter.cpp +++ b/llvm/lib/Analysis/MemDerefPrinter.cpp @@ -55,8 +55,8 @@ bool MemDerefPrinter::runOnFunction(Function &F) { Value *PO = LI->getPointerOperand(); if (isDereferenceablePointer(PO, LI->getType(), DL)) Deref.push_back(PO); - if (isDereferenceableAndAlignedPointer(PO, LI->getType(), - LI->getAlignment(), DL)) + if (isDereferenceableAndAlignedPointer( + PO, LI->getType(), MaybeAlign(LI->getAlignment()), DL)) DerefAndAligned.insert(PO); } } diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index 8d4c7c5a55f16..5ce0a1adeaa0c 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -10338,10 +10338,43 @@ bool ScalarEvolution::isImpliedViaOperations(ICmpInst::Predicate Pred, return false; } +static bool isKnownPredicateExtendIdiom(ICmpInst::Predicate Pred, + const SCEV *LHS, const SCEV *RHS) { + // zext x u<= sext x, sext x s<= zext x + switch (Pred) { + case ICmpInst::ICMP_SGE: + std::swap(LHS, RHS); + LLVM_FALLTHROUGH; + case ICmpInst::ICMP_SLE: { + // If operand >=s 0 then ZExt == SExt. If operand (LHS); + const SCEVZeroExtendExpr *ZExt = dyn_cast(RHS); + if (SExt && ZExt && SExt->getOperand() == ZExt->getOperand()) + return true; + break; + } + case ICmpInst::ICMP_UGE: + std::swap(LHS, RHS); + LLVM_FALLTHROUGH; + case ICmpInst::ICMP_ULE: { + // If operand >=s 0 then ZExt == SExt. If operand (LHS); + const SCEVSignExtendExpr *SExt = dyn_cast(RHS); + if (SExt && ZExt && SExt->getOperand() == ZExt->getOperand()) + return true; + break; + } + default: + break; + }; + return false; +} + bool ScalarEvolution::isKnownViaNonRecursiveReasoning(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS) { - return isKnownPredicateViaConstantRanges(Pred, LHS, RHS) || + return isKnownPredicateExtendIdiom(Pred, LHS, RHS) || + isKnownPredicateViaConstantRanges(Pred, LHS, RHS) || IsKnownPredicateViaMinOrMax(*this, Pred, LHS, RHS) || IsKnownPredicateViaAddRecStart(*this, Pred, LHS, RHS) || isKnownPredicateViaNoOverflow(Pred, LHS, RHS); diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index c62ec353b83d6..bbf3899918367 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -3938,9 +3938,9 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V, if (mustSuppressSpeculation(*LI)) return false; const DataLayout &DL = LI->getModule()->getDataLayout(); - return isDereferenceableAndAlignedPointer(LI->getPointerOperand(), - LI->getType(), LI->getAlignment(), - DL, CtxI, DT); + return isDereferenceableAndAlignedPointer( + LI->getPointerOperand(), LI->getType(), MaybeAlign(LI->getAlignment()), + DL, CtxI, DT); } case Instruction::Call: { auto *CI = cast(Inst); diff --git a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp index 012da940f8995..c6457f3626d1b 100644 --- a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp @@ -2654,7 +2654,7 @@ void CodeViewDebug::emitLocalVariable(const FunctionInfo &FI, (bool(Flags & LocalSymFlags::IsParameter) ? (EncFP == FI.EncodedParamFramePtrReg) : (EncFP == FI.EncodedLocalFramePtrReg))) { - DefRangeFramePointerRelSym::Header DRHdr; + DefRangeFramePointerRelHeader DRHdr; DRHdr.Offset = Offset; OS.EmitCVDefRangeDirective(DefRange.Ranges, DRHdr); } else { @@ -2664,7 +2664,7 @@ void CodeViewDebug::emitLocalVariable(const FunctionInfo &FI, (DefRange.StructOffset << DefRangeRegisterRelSym::OffsetInParentShift); } - DefRangeRegisterRelSym::Header DRHdr; + DefRangeRegisterRelHeader DRHdr; DRHdr.Register = Reg; DRHdr.Flags = RegRelFlags; DRHdr.BasePointerOffset = Offset; @@ -2673,13 +2673,13 @@ void CodeViewDebug::emitLocalVariable(const FunctionInfo &FI, } else { assert(DefRange.DataOffset == 0 && "unexpected offset into register"); if (DefRange.IsSubfield) { - DefRangeSubfieldRegisterSym::Header DRHdr; + DefRangeSubfieldRegisterHeader DRHdr; DRHdr.Register = DefRange.CVRegister; DRHdr.MayHaveNoName = 0; DRHdr.OffsetInParent = DefRange.StructOffset; OS.EmitCVDefRangeDirective(DefRange.Ranges, DRHdr); } else { - DefRangeRegisterSym::Header DRHdr; + DefRangeRegisterHeader DRHdr; DRHdr.Register = DefRange.CVRegister; DRHdr.MayHaveNoName = 0; OS.EmitCVDefRangeDirective(DefRange.Ranges, DRHdr); diff --git a/llvm/lib/CodeGen/BreakFalseDeps.cpp b/llvm/lib/CodeGen/BreakFalseDeps.cpp index 9b6da64e043b0..709164e5f1789 100644 --- a/llvm/lib/CodeGen/BreakFalseDeps.cpp +++ b/llvm/lib/CodeGen/BreakFalseDeps.cpp @@ -23,6 +23,7 @@ #include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/Support/Debug.h" using namespace llvm; diff --git a/llvm/lib/CodeGen/CMakeLists.txt b/llvm/lib/CodeGen/CMakeLists.txt index f5c18f89fa062..50b469d6d936a 100644 --- a/llvm/lib/CodeGen/CMakeLists.txt +++ b/llvm/lib/CodeGen/CMakeLists.txt @@ -92,7 +92,6 @@ add_llvm_library(LLVMCodeGen MachineRegisterInfo.cpp MachineScheduler.cpp MachineSink.cpp - MachineSizeOpts.cpp MachineSSAUpdater.cpp MachineTraceMetrics.cpp MachineVerifier.cpp diff --git a/llvm/lib/CodeGen/DFAPacketizer.cpp b/llvm/lib/CodeGen/DFAPacketizer.cpp index d31f9f8d174a7..a169c3cb16b2c 100644 --- a/llvm/lib/CodeGen/DFAPacketizer.cpp +++ b/llvm/lib/CodeGen/DFAPacketizer.cpp @@ -24,6 +24,7 @@ #include "llvm/CodeGen/DFAPacketizer.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBundle.h" @@ -149,13 +150,13 @@ namespace llvm { // to build the dependence graph. class DefaultVLIWScheduler : public ScheduleDAGInstrs { private: - AliasAnalysis *AA; + AAResults *AA; /// Ordered list of DAG postprocessing steps. std::vector> Mutations; public: DefaultVLIWScheduler(MachineFunction &MF, MachineLoopInfo &MLI, - AliasAnalysis *AA); + AAResults *AA); // Actual scheduling work. void schedule() override; @@ -173,7 +174,7 @@ class DefaultVLIWScheduler : public ScheduleDAGInstrs { DefaultVLIWScheduler::DefaultVLIWScheduler(MachineFunction &MF, MachineLoopInfo &MLI, - AliasAnalysis *AA) + AAResults *AA) : ScheduleDAGInstrs(MF, &MLI), AA(AA) { CanHandleTerminators = true; } @@ -191,7 +192,7 @@ void DefaultVLIWScheduler::schedule() { } VLIWPacketizerList::VLIWPacketizerList(MachineFunction &mf, - MachineLoopInfo &mli, AliasAnalysis *aa) + MachineLoopInfo &mli, AAResults *aa) : MF(mf), TII(mf.getSubtarget().getInstrInfo()), AA(aa) { ResourceTracker = TII->CreateTargetScheduleState(MF.getSubtarget()); ResourceTracker->setTrackResources(true); diff --git a/llvm/lib/CodeGen/ExecutionDomainFix.cpp b/llvm/lib/CodeGen/ExecutionDomainFix.cpp index a2dd5eee33b73..2cca05ea6f559 100644 --- a/llvm/lib/CodeGen/ExecutionDomainFix.cpp +++ b/llvm/lib/CodeGen/ExecutionDomainFix.cpp @@ -9,6 +9,7 @@ #include "llvm/CodeGen/ExecutionDomainFix.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/Support/Debug.h" using namespace llvm; diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp index ed4d22578f6c3..cdad92f7db4fe 100644 --- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp @@ -106,11 +106,11 @@ void CallLowering::setArgFlags(CallLowering::ArgInfo &Arg, unsigned OpIdx, FrameAlign = FuncInfo.getParamAlignment(OpIdx - 2); else FrameAlign = getTLI()->getByValTypeAlignment(ElementTy, DL); - Flags.setByValAlign(FrameAlign); + Flags.setByValAlign(Align(FrameAlign)); } if (Attrs.hasAttribute(OpIdx, Attribute::Nest)) Flags.setNest(); - Flags.setOrigAlign(DL.getABITypeAlignment(Arg.Ty)); + Flags.setOrigAlign(Align(DL.getABITypeAlignment(Arg.Ty))); } template void @@ -235,7 +235,7 @@ bool CallLowering::handleAssignments(CCState &CCInfo, if (Part == 0) { Flags.setSplit(); } else { - Flags.setOrigAlign(1); + Flags.setOrigAlign(Align::None()); if (Part == NumParts - 1) Flags.setSplitEnd(); } @@ -268,7 +268,7 @@ bool CallLowering::handleAssignments(CCState &CCInfo, if (PartIdx == 0) { Flags.setSplit(); } else { - Flags.setOrigAlign(1); + Flags.setOrigAlign(Align::None()); if (PartIdx == NumParts - 1) Flags.setSplitEnd(); } diff --git a/llvm/lib/CodeGen/GlobalISel/RegisterBank.cpp b/llvm/lib/CodeGen/GlobalISel/RegisterBank.cpp index 4e41f338934db..fc9c802693abd 100644 --- a/llvm/lib/CodeGen/GlobalISel/RegisterBank.cpp +++ b/llvm/lib/CodeGen/GlobalISel/RegisterBank.cpp @@ -12,6 +12,7 @@ #include "llvm/CodeGen/GlobalISel/RegisterBank.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/Config/llvm-config.h" +#include "llvm/Support/Debug.h" #define DEBUG_TYPE "registerbank" diff --git a/llvm/lib/CodeGen/LexicalScopes.cpp b/llvm/lib/CodeGen/LexicalScopes.cpp index 503821537ed92..ac3ef0e709f38 100644 --- a/llvm/lib/CodeGen/LexicalScopes.cpp +++ b/llvm/lib/CodeGen/LexicalScopes.cpp @@ -21,6 +21,7 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/Config/llvm-config.h" #include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/Function.h" #include "llvm/IR/Metadata.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Compiler.h" diff --git a/llvm/lib/CodeGen/LiveDebugValues.cpp b/llvm/lib/CodeGen/LiveDebugValues.cpp index f1b237d83e8cf..ca20b111d3093 100644 --- a/llvm/lib/CodeGen/LiveDebugValues.cpp +++ b/llvm/lib/CodeGen/LiveDebugValues.cpp @@ -1439,8 +1439,7 @@ bool LiveDebugValues::runOnMachineFunction(MachineFunction &MF) { TRI = MF.getSubtarget().getRegisterInfo(); TII = MF.getSubtarget().getInstrInfo(); TFI = MF.getSubtarget().getFrameLowering(); - TFI->determineCalleeSaves(MF, CalleeSavedRegs, - std::make_unique().get()); + TFI->getCalleeSaves(MF, CalleeSavedRegs); LS.initialize(MF); bool Changed = ExtendRanges(MF); diff --git a/llvm/lib/CodeGen/LiveVariables.cpp b/llvm/lib/CodeGen/LiveVariables.cpp index 955792808d897..9bd55c6f750f1 100644 --- a/llvm/lib/CodeGen/LiveVariables.cpp +++ b/llvm/lib/CodeGen/LiveVariables.cpp @@ -26,6 +26,7 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/LiveVariables.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" diff --git a/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp b/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp index 9d218894fdd94..c9bb5461aa3c9 100644 --- a/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp +++ b/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp @@ -30,6 +30,7 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include diff --git a/llvm/lib/CodeGen/MIRVRegNamerUtils.cpp b/llvm/lib/CodeGen/MIRVRegNamerUtils.cpp index bd3e344d1c7a7..6629000f468f5 100644 --- a/llvm/lib/CodeGen/MIRVRegNamerUtils.cpp +++ b/llvm/lib/CodeGen/MIRVRegNamerUtils.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "MIRVRegNamerUtils.h" +#include "llvm/Support/Debug.h" using namespace llvm; diff --git a/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp b/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp index 889fde606a48e..53a35b7e89c2d 100644 --- a/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp +++ b/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp @@ -172,13 +172,6 @@ MachineBlockFrequencyInfo::MachineBlockFrequencyInfo() initializeMachineBlockFrequencyInfoPass(*PassRegistry::getPassRegistry()); } -MachineBlockFrequencyInfo::MachineBlockFrequencyInfo( - MachineFunction &F, - MachineBranchProbabilityInfo &MBPI, - MachineLoopInfo &MLI) : MachineFunctionPass(ID) { - calculate(F, MBPI, MLI); -} - MachineBlockFrequencyInfo::~MachineBlockFrequencyInfo() = default; void MachineBlockFrequencyInfo::getAnalysisUsage(AnalysisUsage &AU) const { diff --git a/llvm/lib/CodeGen/MachineDominators.cpp b/llvm/lib/CodeGen/MachineDominators.cpp index 0d57bca37d9c5..706c706d75274 100644 --- a/llvm/lib/CodeGen/MachineDominators.cpp +++ b/llvm/lib/CodeGen/MachineDominators.cpp @@ -49,15 +49,11 @@ void MachineDominatorTree::getAnalysisUsage(AnalysisUsage &AU) const { } bool MachineDominatorTree::runOnMachineFunction(MachineFunction &F) { - calculate(F); - return false; -} - -void MachineDominatorTree::calculate(MachineFunction &F) { CriticalEdgesToSplit.clear(); NewBBs.clear(); DT.reset(new DomTreeBase()); DT->recalculate(F); + return false; } MachineDominatorTree::MachineDominatorTree() diff --git a/llvm/lib/CodeGen/MachineInstr.cpp b/llvm/lib/CodeGen/MachineInstr.cpp index 5a7cbc394cc6d..fec20b2b1a054 100644 --- a/llvm/lib/CodeGen/MachineInstr.cpp +++ b/llvm/lib/CodeGen/MachineInstr.cpp @@ -832,6 +832,10 @@ const DIExpression *MachineInstr::getDebugExpression() const { return cast(getOperand(3).getMetadata()); } +bool MachineInstr::isDebugEntryValue() const { + return isDebugValue() && getDebugExpression()->isEntryValue(); +} + const TargetRegisterClass* MachineInstr::getRegClassConstraint(unsigned OpIdx, const TargetInstrInfo *TII, @@ -1164,7 +1168,7 @@ void MachineInstr::substituteRegister(Register FromReg, Register ToReg, /// isSafeToMove - Return true if it is safe to move this instruction. If /// SawStore is set to true, it means that there is a store (or call) between /// the instruction's location and its intended destination. -bool MachineInstr::isSafeToMove(AliasAnalysis *AA, bool &SawStore) const { +bool MachineInstr::isSafeToMove(AAResults *AA, bool &SawStore) const { // Ignore stuff that we obviously can't move. // // Treat volatile loads as stores. This is not strictly necessary for @@ -1193,7 +1197,7 @@ bool MachineInstr::isSafeToMove(AliasAnalysis *AA, bool &SawStore) const { return true; } -bool MachineInstr::mayAlias(AliasAnalysis *AA, const MachineInstr &Other, +bool MachineInstr::mayAlias(AAResults *AA, const MachineInstr &Other, bool UseTBAA) const { const MachineFunction *MF = getMF(); const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); @@ -1311,7 +1315,7 @@ bool MachineInstr::hasOrderedMemoryRef() const { /// isDereferenceableInvariantLoad - Return true if this instruction will never /// trap and is loading from a location whose value is invariant across a run of /// this function. -bool MachineInstr::isDereferenceableInvariantLoad(AliasAnalysis *AA) const { +bool MachineInstr::isDereferenceableInvariantLoad(AAResults *AA) const { // If the instruction doesn't load at all, it isn't an invariant load. if (!mayLoad()) return false; diff --git a/llvm/lib/CodeGen/MachineLoopInfo.cpp b/llvm/lib/CodeGen/MachineLoopInfo.cpp index 85822a6714911..3b8b430d1b0f0 100644 --- a/llvm/lib/CodeGen/MachineLoopInfo.cpp +++ b/llvm/lib/CodeGen/MachineLoopInfo.cpp @@ -36,13 +36,9 @@ INITIALIZE_PASS_END(MachineLoopInfo, "machine-loops", char &llvm::MachineLoopInfoID = MachineLoopInfo::ID; bool MachineLoopInfo::runOnMachineFunction(MachineFunction &) { - calculate(getAnalysis()); - return false; -} - -void MachineLoopInfo::calculate(MachineDominatorTree &MDT) { releaseMemory(); - LI.analyze(MDT.getBase()); + LI.analyze(getAnalysis().getBase()); + return false; } void MachineLoopInfo::getAnalysisUsage(AnalysisUsage &AU) const { diff --git a/llvm/lib/CodeGen/MachineOperand.cpp b/llvm/lib/CodeGen/MachineOperand.cpp index 8eccfb85a9461..8b19501ec3cf1 100644 --- a/llvm/lib/CodeGen/MachineOperand.cpp +++ b/llvm/lib/CodeGen/MachineOperand.cpp @@ -979,7 +979,8 @@ bool MachinePointerInfo::isDereferenceable(unsigned Size, LLVMContext &C, return false; return isDereferenceableAndAlignedPointer( - BasePtr, 1, APInt(DL.getPointerSizeInBits(), Offset + Size), DL); + BasePtr, Align::None(), APInt(DL.getPointerSizeInBits(), Offset + Size), + DL); } /// getConstantPool - Return a MachinePointerInfo record that refers to the diff --git a/llvm/lib/CodeGen/MachineSizeOpts.cpp b/llvm/lib/CodeGen/MachineSizeOpts.cpp deleted file mode 100644 index 0c2ef3321e0a8..0000000000000 --- a/llvm/lib/CodeGen/MachineSizeOpts.cpp +++ /dev/null @@ -1,120 +0,0 @@ -//===- MachineSizeOpts.cpp - code size optimization related code ----------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file contains some shared machine IR code size optimization related -// code. -// -//===----------------------------------------------------------------------===// - -#include "llvm/CodeGen/MachineSizeOpts.h" -#include "llvm/Analysis/ProfileSummaryInfo.h" -#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" - -using namespace llvm; - -extern cl::opt EnablePGSO; -extern cl::opt PGSOLargeWorkingSetSizeOnly; -extern cl::opt ForcePGSO; -extern cl::opt PgsoCutoffInstrProf; -extern cl::opt PgsoCutoffSampleProf; - -namespace machine_size_opts_detail { - -/// Like ProfileSummaryInfo::isColdBlock but for MachineBasicBlock. -bool isColdBlock(const MachineBasicBlock *MBB, - ProfileSummaryInfo *PSI, - const MachineBlockFrequencyInfo *MBFI) { - auto Count = MBFI->getBlockProfileCount(MBB); - return Count && PSI->isColdCount(*Count); -} - -/// Like ProfileSummaryInfo::isHotBlockNthPercentile but for MachineBasicBlock. -static bool isHotBlockNthPercentile(int PercentileCutoff, - const MachineBasicBlock *MBB, - ProfileSummaryInfo *PSI, - const MachineBlockFrequencyInfo *MBFI) { - auto Count = MBFI->getBlockProfileCount(MBB); - return Count && PSI->isHotCountNthPercentile(PercentileCutoff, *Count); -} - -/// Like ProfileSummaryInfo::isFunctionColdInCallGraph but for -/// MachineFunction. -bool isFunctionColdInCallGraph( - const MachineFunction *MF, - ProfileSummaryInfo *PSI, - const MachineBlockFrequencyInfo &MBFI) { - if (auto FunctionCount = MF->getFunction().getEntryCount()) - if (!PSI->isColdCount(FunctionCount.getCount())) - return false; - for (const auto &MBB : *MF) - if (!isColdBlock(&MBB, PSI, &MBFI)) - return false; - return true; -} - -/// Like ProfileSummaryInfo::isFunctionHotInCallGraphNthPercentile but for -/// MachineFunction. -bool isFunctionHotInCallGraphNthPercentile( - int PercentileCutoff, - const MachineFunction *MF, - ProfileSummaryInfo *PSI, - const MachineBlockFrequencyInfo &MBFI) { - if (auto FunctionCount = MF->getFunction().getEntryCount()) - if (PSI->isHotCountNthPercentile(PercentileCutoff, - FunctionCount.getCount())) - return true; - for (const auto &MBB : *MF) - if (isHotBlockNthPercentile(PercentileCutoff, &MBB, PSI, &MBFI)) - return true; - return false; -} -} // namespace machine_size_opts_detail - -namespace { -struct MachineBasicBlockBFIAdapter { - static bool isFunctionColdInCallGraph(const MachineFunction *MF, - ProfileSummaryInfo *PSI, - const MachineBlockFrequencyInfo &MBFI) { - return machine_size_opts_detail::isFunctionColdInCallGraph(MF, PSI, MBFI); - } - static bool isFunctionHotInCallGraphNthPercentile( - int CutOff, - const MachineFunction *MF, - ProfileSummaryInfo *PSI, - const MachineBlockFrequencyInfo &MBFI) { - return machine_size_opts_detail::isFunctionHotInCallGraphNthPercentile( - CutOff, MF, PSI, MBFI); - } - static bool isColdBlock(const MachineBasicBlock *MBB, - ProfileSummaryInfo *PSI, - const MachineBlockFrequencyInfo *MBFI) { - return machine_size_opts_detail::isColdBlock(MBB, PSI, MBFI); - } - static bool isHotBlockNthPercentile(int CutOff, - const MachineBasicBlock *MBB, - ProfileSummaryInfo *PSI, - const MachineBlockFrequencyInfo *MBFI) { - return machine_size_opts_detail::isHotBlockNthPercentile( - CutOff, MBB, PSI, MBFI); - } -}; -} // end anonymous namespace - -bool llvm::shouldOptimizeForSize(const MachineFunction *MF, - ProfileSummaryInfo *PSI, - const MachineBlockFrequencyInfo *MBFI) { - return shouldFuncOptimizeForSizeImpl( - MF, PSI, MBFI); -} - -bool llvm::shouldOptimizeForSize(const MachineBasicBlock *MBB, - ProfileSummaryInfo *PSI, - const MachineBlockFrequencyInfo *MBFI) { - return shouldOptimizeForSizeImpl( - MBB, PSI, MBFI); -} diff --git a/llvm/lib/CodeGen/ReachingDefAnalysis.cpp b/llvm/lib/CodeGen/ReachingDefAnalysis.cpp index f05c97ad621e2..2850033e64196 100644 --- a/llvm/lib/CodeGen/ReachingDefAnalysis.cpp +++ b/llvm/lib/CodeGen/ReachingDefAnalysis.cpp @@ -9,6 +9,7 @@ #include "llvm/CodeGen/ReachingDefAnalysis.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/Support/Debug.h" using namespace llvm; diff --git a/llvm/lib/CodeGen/RegUsageInfoCollector.cpp b/llvm/lib/CodeGen/RegUsageInfoCollector.cpp index 757ff0e44953d..5a79ac44dcf4c 100644 --- a/llvm/lib/CodeGen/RegUsageInfoCollector.cpp +++ b/llvm/lib/CodeGen/RegUsageInfoCollector.cpp @@ -56,7 +56,7 @@ class RegUsageInfoCollector : public MachineFunctionPass { bool runOnMachineFunction(MachineFunction &MF) override; - // Call determineCalleeSaves and then also set the bits for subregs and + // Call getCalleeSaves and then also set the bits for subregs and // fully saved superregs. static void computeCalleeSavedRegs(BitVector &SavedRegs, MachineFunction &MF); @@ -199,7 +199,7 @@ computeCalleeSavedRegs(BitVector &SavedRegs, MachineFunction &MF) { // Target will return the set of registers that it saves/restores as needed. SavedRegs.clear(); - TFI.determineCalleeSaves(MF, SavedRegs); + TFI.getCalleeSaves(MF, SavedRegs); if (SavedRegs.none()) return; diff --git a/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp b/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp index 735af2455d724..96a1f86c3e042 100644 --- a/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -18,7 +18,6 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/SparseSet.h" #include "llvm/ADT/iterator_range.h" -#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/LivePhysRegs.h" @@ -532,7 +531,7 @@ void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) { /// Returns true if MI is an instruction we are unable to reason about /// (like a call or something with unmodeled side effects). -static inline bool isGlobalMemoryObject(AliasAnalysis *AA, MachineInstr *MI) { +static inline bool isGlobalMemoryObject(AAResults *AA, MachineInstr *MI) { return MI->isCall() || MI->hasUnmodeledSideEffects() || (MI->hasOrderedMemoryRef() && !MI->isDereferenceableInvariantLoad(AA)); } @@ -719,7 +718,7 @@ void ScheduleDAGInstrs::insertBarrierChain(Value2SUsMap &map) { map.reComputeSize(); } -void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, +void ScheduleDAGInstrs::buildSchedGraph(AAResults *AA, RegPressureTracker *RPTracker, PressureDiffs *PDiffs, LiveIntervals *LIS, diff --git a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp index 23f57f216d2b0..6d7260d7aee5c 100644 --- a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -1213,14 +1213,13 @@ bool FastISel::lowerCallTo(CallLoweringInfo &CLI) { if (!FrameAlign) FrameAlign = TLI.getByValTypeAlignment(ElementTy, DL); Flags.setByValSize(FrameSize); - Flags.setByValAlign(FrameAlign); + Flags.setByValAlign(Align(FrameAlign)); } if (Arg.IsNest) Flags.setNest(); if (NeedsRegBlock) Flags.setInConsecutiveRegs(); - unsigned OriginalAlignment = DL.getABITypeAlignment(Arg.Ty); - Flags.setOrigAlign(OriginalAlignment); + Flags.setOrigAlign(Align(DL.getABITypeAlignment(Arg.Ty))); CLI.OutVals.push_back(Arg.Val); CLI.OutFlags.push_back(Flags); diff --git a/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index 02805826ef085..cf6711adad48d 100644 --- a/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/FunctionLoweringInfo.h" +#include "llvm/Analysis/LegacyDivergenceAnalysis.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 575046623a1cb..f9fdf525240fa 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -17,6 +17,7 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 7b929704d7a33..15c3a0b6cfadf 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -38,6 +38,7 @@ #include "llvm/IR/DataLayout.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MachineValueType.h" #include "llvm/Support/MathExtras.h" diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index 7468ba5115175..d4c1fb36475e7 100644 --- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -528,7 +528,7 @@ void ScheduleDAGSDNodes::AddSchedEdges() { /// are input. This SUnit graph is similar to the SelectionDAG, but /// excludes nodes that aren't interesting to scheduling, and represents /// glued together nodes with a single SUnit. -void ScheduleDAGSDNodes::BuildSchedGraph(AliasAnalysis *AA) { +void ScheduleDAGSDNodes::BuildSchedGraph(AAResults *AA) { // Cluster certain nodes which should be scheduled together. ClusterNodes(); // Populate the SUnits array. diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h index 5163b4fa4fd38..183ce4b0652d0 100644 --- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h +++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h @@ -26,6 +26,7 @@ namespace llvm { +class AAResults; class InstrItineraryData; /// ScheduleDAGSDNodes - A ScheduleDAG for scheduling SDNode-based DAGs. @@ -93,7 +94,7 @@ class InstrItineraryData; /// are input. This SUnit graph is similar to the SelectionDAG, but /// excludes nodes that aren't interesting to scheduling, and represents /// flagged together nodes with a single SUnit. - void BuildSchedGraph(AliasAnalysis *AA); + void BuildSchedGraph(AAResults *AA); /// InitNumRegDefsLeft - Determine the # of regs defined by this node. /// diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp index ab06b55b49fdd..e7bac73678a76 100644 --- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp @@ -63,14 +63,13 @@ class ScheduleDAGVLIW : public ScheduleDAGSDNodes { /// HazardRec - The hazard recognizer to use. ScheduleHazardRecognizer *HazardRec; - /// AA - AliasAnalysis for making memory reference queries. - AliasAnalysis *AA; + /// AA - AAResults for making memory reference queries. + AAResults *AA; public: - ScheduleDAGVLIW(MachineFunction &mf, - AliasAnalysis *aa, + ScheduleDAGVLIW(MachineFunction &mf, AAResults *aa, SchedulingPriorityQueue *availqueue) - : ScheduleDAGSDNodes(mf), AvailableQueue(availqueue), AA(aa) { + : ScheduleDAGSDNodes(mf), AvailableQueue(availqueue), AA(aa) { const TargetSubtargetInfo &STI = mf.getSubtarget(); HazardRec = STI.getInstrInfo()->CreateTargetHazardRecognizer(&STI, this); } diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp index 9592bc30a4e19..3a53ab9717a45 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp @@ -14,6 +14,7 @@ #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/Support/Casting.h" +#include "llvm/Support/Debug.h" #include using namespace llvm; diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 3d263453b09ec..8c15563fcd23d 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -9109,7 +9109,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { // Certain targets (such as MIPS), may have a different ABI alignment // for a type depending on the context. Give the target a chance to // specify the alignment it wants. - unsigned OriginalAlignment = getABIAlignmentForCallingConv(ArgTy, DL); + const Align OriginalAlignment(getABIAlignmentForCallingConv(ArgTy, DL)); if (Args[i].Ty->isPointerTy()) { Flags.setPointer(); @@ -9164,7 +9164,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { FrameAlign = Args[i].Alignment; else FrameAlign = getByValTypeAlignment(ElementTy, DL); - Flags.setByValAlign(FrameAlign); + Flags.setByValAlign(Align(FrameAlign)); } if (Args[i].IsNest) Flags.setNest(); @@ -9220,7 +9220,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { if (NumParts > 1 && j == 0) MyFlags.Flags.setSplit(); else if (j != 0) { - MyFlags.Flags.setOrigAlign(1); + MyFlags.Flags.setOrigAlign(Align::None()); if (j == NumParts - 1) MyFlags.Flags.setSplitEnd(); } @@ -9607,8 +9607,8 @@ void SelectionDAGISel::LowerArguments(const Function &F) { // Certain targets (such as MIPS), may have a different ABI alignment // for a type depending on the context. Give the target a chance to // specify the alignment it wants. - unsigned OriginalAlignment = - TLI->getABIAlignmentForCallingConv(ArgTy, DL); + const Align OriginalAlignment( + TLI->getABIAlignmentForCallingConv(ArgTy, DL)); if (Arg.getType()->isPointerTy()) { Flags.setPointer(); @@ -9668,7 +9668,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) { FrameAlign = Arg.getParamAlignment(); else FrameAlign = TLI->getByValTypeAlignment(ElementTy, DL); - Flags.setByValAlign(FrameAlign); + Flags.setByValAlign(Align(FrameAlign)); } if (Arg.hasAttribute(Attribute::Nest)) Flags.setNest(); @@ -9691,7 +9691,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) { MyFlags.Flags.setSplit(); // if it isn't first piece, alignment must be 1 else if (i > 0) { - MyFlags.Flags.setOrigAlign(1); + MyFlags.Flags.setOrigAlign(Align::None()); if (i == NumRegs - 1) MyFlags.Flags.setSplitEnd(); } diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 121de700f6ba4..1f07a241a8242 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -27,6 +27,7 @@ #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/EHPersonalities.h" +#include "llvm/Analysis/LegacyDivergenceAnalysis.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 0163ec35951e6..9ab1324533f1e 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -3603,33 +3603,35 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, // Back to non-vector simplifications. // TODO: Can we do these for vector splats? if (auto *N1C = dyn_cast(N1.getNode())) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); const APInt &C1 = N1C->getAPIntValue(); + EVT ShValTy = N0.getValueType(); // Fold bit comparisons when we can. if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && - (VT == N0.getValueType() || - (isTypeLegal(VT) && VT.bitsLE(N0.getValueType()))) && + (VT == ShValTy || (isTypeLegal(VT) && VT.bitsLE(ShValTy))) && N0.getOpcode() == ISD::AND) { auto &DL = DAG.getDataLayout(); if (auto *AndRHS = dyn_cast(N0.getOperand(1))) { - EVT ShiftTy = getShiftAmountTy(N0.getValueType(), DL, - !DCI.isBeforeLegalize()); + EVT ShiftTy = getShiftAmountTy(ShValTy, DL, !DCI.isBeforeLegalize()); if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0 --> (X & 8) >> 3 // Perform the xform if the AND RHS is a single bit. - if (AndRHS->getAPIntValue().isPowerOf2()) { + unsigned ShCt = AndRHS->getAPIntValue().logBase2(); + if (AndRHS->getAPIntValue().isPowerOf2() && + ShCt <= TLI.getShiftAmountThreshold(ShValTy)) { return DAG.getNode(ISD::TRUNCATE, dl, VT, - DAG.getNode(ISD::SRL, dl, N0.getValueType(), N0, - DAG.getConstant(AndRHS->getAPIntValue().logBase2(), dl, - ShiftTy))); + DAG.getNode(ISD::SRL, dl, ShValTy, N0, + DAG.getConstant(ShCt, dl, ShiftTy))); } } else if (Cond == ISD::SETEQ && C1 == AndRHS->getAPIntValue()) { // (X & 8) == 8 --> (X & 8) >> 3 // Perform the xform if C1 is a single bit. - if (C1.isPowerOf2()) { + unsigned ShCt = C1.logBase2(); + if (C1.isPowerOf2() && + ShCt <= TLI.getShiftAmountThreshold(ShValTy)) { return DAG.getNode(ISD::TRUNCATE, dl, VT, - DAG.getNode(ISD::SRL, dl, N0.getValueType(), N0, - DAG.getConstant(C1.logBase2(), dl, - ShiftTy))); + DAG.getNode(ISD::SRL, dl, ShValTy, N0, + DAG.getConstant(ShCt, dl, ShiftTy))); } } } diff --git a/llvm/lib/CodeGen/SwiftErrorValueTracking.cpp b/llvm/lib/CodeGen/SwiftErrorValueTracking.cpp index 96821cadb1b6e..c72a04276a4f2 100644 --- a/llvm/lib/CodeGen/SwiftErrorValueTracking.cpp +++ b/llvm/lib/CodeGen/SwiftErrorValueTracking.cpp @@ -13,9 +13,10 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/SwiftErrorValueTracking.h" +#include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/SmallSet.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/IR/Value.h" diff --git a/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp b/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp index c5cd87b1481bf..bc59be890c979 100644 --- a/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp +++ b/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp @@ -18,6 +18,7 @@ #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/Attributes.h" +#include "llvm/IR/CallSite.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/Function.h" #include "llvm/MC/MCRegisterInfo.h" @@ -59,6 +60,19 @@ bool TargetFrameLowering::needsFrameIndexResolution( return MF.getFrameInfo().hasStackObjects(); } +void TargetFrameLowering::getCalleeSaves(const MachineFunction &MF, + BitVector &CalleeSaves) const { + const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); + CalleeSaves.resize(TRI.getNumRegs()); + + const MachineFrameInfo &MFI = MF.getFrameInfo(); + if (!MFI.isCalleeSavedInfoValid()) + return; + + for (const CalleeSavedInfo &Info : MFI.getCalleeSavedInfo()) + CalleeSaves.set(Info.getReg()); +} + void TargetFrameLowering::determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS) const { @@ -120,6 +134,18 @@ unsigned TargetFrameLowering::getStackAlignmentSkew( return 0; } +bool TargetFrameLowering::isSafeForNoCSROpt(const Function &F) { + if (!F.hasLocalLinkage() || F.hasAddressTaken() || + !F.hasFnAttribute(Attribute::NoRecurse)) + return false; + // Function should not be optimized as tail call. + for (const User *U : F.users()) + if (auto CS = ImmutableCallSite(U)) + if (CS.isTailCall()) + return false; + return true; +} + int TargetFrameLowering::getInitialCFAOffset(const MachineFunction &MF) const { llvm_unreachable("getInitialCFAOffset() not implemented!"); } @@ -127,4 +153,4 @@ int TargetFrameLowering::getInitialCFAOffset(const MachineFunction &MF) const { unsigned TargetFrameLowering::getInitialCFARegister(const MachineFunction &MF) const { llvm_unreachable("getInitialCFARegister() not implemented!"); -} \ No newline at end of file +} diff --git a/llvm/lib/CodeGen/TargetInstrInfo.cpp b/llvm/lib/CodeGen/TargetInstrInfo.cpp index ba01bb79b266b..6cae3b869501b 100644 --- a/llvm/lib/CodeGen/TargetInstrInfo.cpp +++ b/llvm/lib/CodeGen/TargetInstrInfo.cpp @@ -880,7 +880,7 @@ void TargetInstrInfo::genAlternativeCodeSequence( } bool TargetInstrInfo::isReallyTriviallyReMaterializableGeneric( - const MachineInstr &MI, AliasAnalysis *AA) const { + const MachineInstr &MI, AAResults *AA) const { const MachineFunction &MF = *MI.getMF(); const MachineRegisterInfo &MRI = MF.getRegInfo(); diff --git a/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp b/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp index 8cb665967dd35..c06d85d50609b 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp @@ -1527,10 +1527,19 @@ class DWARFObjInMemory final : public DWARFObject { continue; StringRef Data; - section_iterator RelocatedSection = Section.getRelocatedSection(); + Expected SecOrErr = Section.getRelocatedSection(); + if (!SecOrErr) { + ErrorPolicy EP = HandleError(createError( + "failed to get relocated section: ", SecOrErr.takeError())); + if (EP == ErrorPolicy::Halt) + return; + continue; + } + // Try to obtain an already relocated version of this section. // Else use the unrelocated section from the object file. We'll have to // apply relocations ourselves later. + section_iterator RelocatedSection = *SecOrErr; if (!L || !L->getLoadedSectionContents(*RelocatedSection, Data)) { Expected E = Section.getContents(); if (E) diff --git a/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp b/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp index 0c0086aae927d..bf499b6ee0927 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp @@ -34,11 +34,11 @@ DWARFVerifier::DieRangeInfo::insert(const DWARFAddressRange &R) { if (Pos != End) { if (Pos->intersects(R)) - return Pos; + return std::move(Pos); if (Pos != Begin) { auto Iter = Pos - 1; if (Iter->intersects(R)) - return Iter; + return std::move(Iter); } } diff --git a/llvm/lib/ExecutionEngine/Orc/JITTargetMachineBuilder.cpp b/llvm/lib/ExecutionEngine/Orc/JITTargetMachineBuilder.cpp index 359ef99472abc..1d3e6db913e21 100644 --- a/llvm/lib/ExecutionEngine/Orc/JITTargetMachineBuilder.cpp +++ b/llvm/lib/ExecutionEngine/Orc/JITTargetMachineBuilder.cpp @@ -8,6 +8,7 @@ #include "llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h" +#include "llvm/Support/Host.h" #include "llvm/Support/TargetRegistry.h" namespace llvm { diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp index 434fecfb49e1a..2df71a5e5e741 100644 --- a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp +++ b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp @@ -348,8 +348,12 @@ RuntimeDyldImpl::loadObjectImpl(const object::ObjectFile &Obj) { for (section_iterator SI = Obj.section_begin(), SE = Obj.section_end(); SI != SE; ++SI) { StubMap Stubs; - section_iterator RelocatedSection = SI->getRelocatedSection(); + Expected RelSecOrErr = SI->getRelocatedSection(); + if (!RelSecOrErr) + return RelSecOrErr.takeError(); + + section_iterator RelocatedSection = *RelSecOrErr; if (RelocatedSection == SE) continue; @@ -648,7 +652,12 @@ unsigned RuntimeDyldImpl::computeSectionStubBufSize(const ObjectFile &Obj, unsigned StubBufSize = 0; for (section_iterator SI = Obj.section_begin(), SE = Obj.section_end(); SI != SE; ++SI) { - section_iterator RelSecI = SI->getRelocatedSection(); + + Expected RelSecOrErr = SI->getRelocatedSection(); + if (!RelSecOrErr) + report_fatal_error(toString(RelSecOrErr.takeError())); + + section_iterator RelSecI = *RelSecOrErr; if (!(RelSecI == Section)) continue; diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp index 8de3f7ef46740..440ab4174a565 100644 --- a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp +++ b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp @@ -606,7 +606,12 @@ Error RuntimeDyldELF::findOPDEntrySection(const ELFObjectFileBase &Obj, // .opd entries for (section_iterator si = Obj.section_begin(), se = Obj.section_end(); si != se; ++si) { - section_iterator RelSecI = si->getRelocatedSection(); + + Expected RelSecOrErr = si->getRelocatedSection(); + if (!RelSecOrErr) + report_fatal_error(toString(RelSecOrErr.takeError())); + + section_iterator RelSecI = *RelSecOrErr; if (RelSecI == Obj.section_end()) continue; @@ -1871,7 +1876,12 @@ Error RuntimeDyldELF::finalizeLoad(const ObjectFile &Obj, for (section_iterator SI = Obj.section_begin(), SE = Obj.section_end(); SI != SE; ++SI) { if (SI->relocation_begin() != SI->relocation_end()) { - section_iterator RelocatedSection = SI->getRelocatedSection(); + Expected RelSecOrErr = SI->getRelocatedSection(); + if (!RelSecOrErr) + return make_error( + toString(RelSecOrErr.takeError())); + + section_iterator RelocatedSection = *RelSecOrErr; ObjSectionToIDMap::iterator i = SectionMap.find(*RelocatedSection); assert (i != SectionMap.end()); SectionToGOTMap[i->second] = GOTSectionID; diff --git a/llvm/lib/IR/ConstantRange.cpp b/llvm/lib/IR/ConstantRange.cpp index 592042bc0c788..642bf0f39342b 100644 --- a/llvm/lib/IR/ConstantRange.cpp +++ b/llvm/lib/IR/ConstantRange.cpp @@ -269,6 +269,27 @@ ConstantRange::makeGuaranteedNoWrapRegion(Instruction::BinaryOps BinOp, return makeExactMulNSWRegion(Other.getSignedMin()) .intersectWith(makeExactMulNSWRegion(Other.getSignedMax())); + + case Instruction::Shl: { + // For given range of shift amounts, if we ignore all illegal shift amounts + // (that always produce poison), what shift amount range is left? + ConstantRange ShAmt = Other.intersectWith( + ConstantRange(APInt(BitWidth, 0), APInt(BitWidth, (BitWidth - 1) + 1))); + if (ShAmt.isEmptySet()) { + // If the entire range of shift amounts is already poison-producing, + // then we can freely add more poison-producing flags ontop of that. + return getFull(BitWidth); + } + // There are some legal shift amounts, we can compute conservatively-correct + // range of no-wrap inputs. Note that by now we have clamped the ShAmtUMax + // to be at most bitwidth-1, which results in most conservative range. + APInt ShAmtUMax = ShAmt.getUnsignedMax(); + if (Unsigned) + return getNonEmpty(APInt::getNullValue(BitWidth), + APInt::getMaxValue(BitWidth).lshr(ShAmtUMax) + 1); + return getNonEmpty(APInt::getSignedMinValue(BitWidth).ashr(ShAmtUMax), + APInt::getSignedMaxValue(BitWidth).ashr(ShAmtUMax) + 1); + } } } diff --git a/llvm/lib/IR/Instruction.cpp b/llvm/lib/IR/Instruction.cpp index ba5629d1662bb..b157c7bb34bfa 100644 --- a/llvm/lib/IR/Instruction.cpp +++ b/llvm/lib/IR/Instruction.cpp @@ -524,7 +524,7 @@ bool Instruction::mayReadFromMemory() const { case Instruction::Call: case Instruction::Invoke: case Instruction::CallBr: - return !cast(this)->doesNotAccessMemory(); + return !cast(this)->doesNotReadMemory(); case Instruction::Store: return !cast(this)->isUnordered(); } diff --git a/llvm/lib/MC/MCAsmStreamer.cpp b/llvm/lib/MC/MCAsmStreamer.cpp index dd27d0bc8a526..2d9c2cb21255b 100644 --- a/llvm/lib/MC/MCAsmStreamer.cpp +++ b/llvm/lib/MC/MCAsmStreamer.cpp @@ -11,6 +11,7 @@ #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Twine.h" +#include "llvm/DebugInfo/CodeView/SymbolRecord.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCAssembler.h" @@ -263,19 +264,19 @@ class MCAsmStreamer final : public MCStreamer { void EmitCVDefRangeDirective( ArrayRef> Ranges, - codeview::DefRangeRegisterRelSym::Header DRHdr) override; + codeview::DefRangeRegisterRelHeader DRHdr) override; void EmitCVDefRangeDirective( ArrayRef> Ranges, - codeview::DefRangeSubfieldRegisterSym::Header DRHdr) override; + codeview::DefRangeSubfieldRegisterHeader DRHdr) override; void EmitCVDefRangeDirective( ArrayRef> Ranges, - codeview::DefRangeRegisterSym::Header DRHdr) override; + codeview::DefRangeRegisterHeader DRHdr) override; void EmitCVDefRangeDirective( ArrayRef> Ranges, - codeview::DefRangeFramePointerRelSym::Header DRHdr) override; + codeview::DefRangeFramePointerRelHeader DRHdr) override; void EmitCVStringTableDirective() override; void EmitCVFileChecksumsDirective() override; @@ -1438,7 +1439,7 @@ void MCAsmStreamer::PrintCVDefRangePrefix( void MCAsmStreamer::EmitCVDefRangeDirective( ArrayRef> Ranges, - codeview::DefRangeRegisterRelSym::Header DRHdr) { + codeview::DefRangeRegisterRelHeader DRHdr) { PrintCVDefRangePrefix(Ranges); OS << ", reg_rel, "; OS << DRHdr.Register << ", " << DRHdr.Flags << ", " @@ -1448,7 +1449,7 @@ void MCAsmStreamer::EmitCVDefRangeDirective( void MCAsmStreamer::EmitCVDefRangeDirective( ArrayRef> Ranges, - codeview::DefRangeSubfieldRegisterSym::Header DRHdr) { + codeview::DefRangeSubfieldRegisterHeader DRHdr) { PrintCVDefRangePrefix(Ranges); OS << ", subfield_reg, "; OS << DRHdr.Register << ", " << DRHdr.OffsetInParent; @@ -1457,7 +1458,7 @@ void MCAsmStreamer::EmitCVDefRangeDirective( void MCAsmStreamer::EmitCVDefRangeDirective( ArrayRef> Ranges, - codeview::DefRangeRegisterSym::Header DRHdr) { + codeview::DefRangeRegisterHeader DRHdr) { PrintCVDefRangePrefix(Ranges); OS << ", reg, "; OS << DRHdr.Register; @@ -1466,7 +1467,7 @@ void MCAsmStreamer::EmitCVDefRangeDirective( void MCAsmStreamer::EmitCVDefRangeDirective( ArrayRef> Ranges, - codeview::DefRangeFramePointerRelSym::Header DRHdr) { + codeview::DefRangeFramePointerRelHeader DRHdr) { PrintCVDefRangePrefix(Ranges); OS << ", frame_ptr_rel, "; OS << DRHdr.Offset; diff --git a/llvm/lib/MC/MCParser/AsmParser.cpp b/llvm/lib/MC/MCParser/AsmParser.cpp index ca6bc252a0df7..b59ac08ad6cc4 100644 --- a/llvm/lib/MC/MCParser/AsmParser.cpp +++ b/llvm/lib/MC/MCParser/AsmParser.cpp @@ -22,6 +22,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" #include "llvm/BinaryFormat/Dwarf.h" +#include "llvm/DebugInfo/CodeView/SymbolRecord.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCCodeView.h" #include "llvm/MC/MCContext.h" @@ -3908,7 +3909,7 @@ bool AsmParser::parseDirectiveCVDefRange() { parseAbsoluteExpression(DRRegister)) return Error(Loc, "expected register number"); - codeview::DefRangeRegisterSym::Header DRHdr; + codeview::DefRangeRegisterHeader DRHdr; DRHdr.Register = DRRegister; DRHdr.MayHaveNoName = 0; getStreamer().EmitCVDefRangeDirective(Ranges, DRHdr); @@ -3921,7 +3922,7 @@ bool AsmParser::parseDirectiveCVDefRange() { parseAbsoluteExpression(DROffset)) return Error(Loc, "expected offset value"); - codeview::DefRangeFramePointerRelSym::Header DRHdr; + codeview::DefRangeFramePointerRelHeader DRHdr; DRHdr.Offset = DROffset; getStreamer().EmitCVDefRangeDirective(Ranges, DRHdr); break; @@ -3938,7 +3939,7 @@ bool AsmParser::parseDirectiveCVDefRange() { parseAbsoluteExpression(DROffsetInParent)) return Error(Loc, "expected offset value"); - codeview::DefRangeSubfieldRegisterSym::Header DRHdr; + codeview::DefRangeSubfieldRegisterHeader DRHdr; DRHdr.Register = DRRegister; DRHdr.MayHaveNoName = 0; DRHdr.OffsetInParent = DROffsetInParent; @@ -3963,7 +3964,7 @@ bool AsmParser::parseDirectiveCVDefRange() { parseAbsoluteExpression(DRBasePointerOffset)) return Error(Loc, "expected base pointer offset value"); - codeview::DefRangeRegisterRelSym::Header DRHdr; + codeview::DefRangeRegisterRelHeader DRHdr; DRHdr.Register = DRRegister; DRHdr.Flags = DRFlags; DRHdr.BasePointerOffset = DRBasePointerOffset; diff --git a/llvm/lib/MC/MCStreamer.cpp b/llvm/lib/MC/MCStreamer.cpp index 38d4b177e282c..b8278cb110799 100644 --- a/llvm/lib/MC/MCStreamer.cpp +++ b/llvm/lib/MC/MCStreamer.cpp @@ -12,6 +12,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" #include "llvm/BinaryFormat/COFF.h" +#include "llvm/DebugInfo/CodeView/SymbolRecord.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCCodeView.h" @@ -347,7 +348,7 @@ void MCStreamer::EmitCVDefRangeDirective( void MCStreamer::EmitCVDefRangeDirective( ArrayRef> Ranges, - codeview::DefRangeRegisterRelSym::Header DRHdr) { + codeview::DefRangeRegisterRelHeader DRHdr) { SmallString<20> BytePrefix; copyBytesForDefRange(BytePrefix, codeview::S_DEFRANGE_REGISTER_REL, DRHdr); EmitCVDefRangeDirective(Ranges, BytePrefix); @@ -355,7 +356,7 @@ void MCStreamer::EmitCVDefRangeDirective( void MCStreamer::EmitCVDefRangeDirective( ArrayRef> Ranges, - codeview::DefRangeSubfieldRegisterSym::Header DRHdr) { + codeview::DefRangeSubfieldRegisterHeader DRHdr) { SmallString<20> BytePrefix; copyBytesForDefRange(BytePrefix, codeview::S_DEFRANGE_SUBFIELD_REGISTER, DRHdr); @@ -364,7 +365,7 @@ void MCStreamer::EmitCVDefRangeDirective( void MCStreamer::EmitCVDefRangeDirective( ArrayRef> Ranges, - codeview::DefRangeRegisterSym::Header DRHdr) { + codeview::DefRangeRegisterHeader DRHdr) { SmallString<20> BytePrefix; copyBytesForDefRange(BytePrefix, codeview::S_DEFRANGE_REGISTER, DRHdr); EmitCVDefRangeDirective(Ranges, BytePrefix); @@ -372,7 +373,7 @@ void MCStreamer::EmitCVDefRangeDirective( void MCStreamer::EmitCVDefRangeDirective( ArrayRef> Ranges, - codeview::DefRangeFramePointerRelSym::Header DRHdr) { + codeview::DefRangeFramePointerRelHeader DRHdr) { SmallString<20> BytePrefix; copyBytesForDefRange(BytePrefix, codeview::S_DEFRANGE_FRAMEPOINTER_REL, DRHdr); diff --git a/llvm/lib/Object/ObjectFile.cpp b/llvm/lib/Object/ObjectFile.cpp index 75e0952deb686..e0e63a5a7d766 100644 --- a/llvm/lib/Object/ObjectFile.cpp +++ b/llvm/lib/Object/ObjectFile.cpp @@ -84,7 +84,8 @@ bool ObjectFile::isBerkeleyData(DataRefImpl Sec) const { return isSectionData(Sec); } -section_iterator ObjectFile::getRelocatedSection(DataRefImpl Sec) const { +Expected +ObjectFile::getRelocatedSection(DataRefImpl Sec) const { return section_iterator(SectionRef(Sec, this)); } diff --git a/llvm/lib/ObjectYAML/ELFEmitter.cpp b/llvm/lib/ObjectYAML/ELFEmitter.cpp index f9c31f335f112..e0faed256f6b5 100644 --- a/llvm/lib/ObjectYAML/ELFEmitter.cpp +++ b/llvm/lib/ObjectYAML/ELFEmitter.cpp @@ -200,10 +200,17 @@ template ELFState::ELFState(ELFYAML::Object &D, yaml::ErrorHandler EH) : Doc(D), ErrHandler(EH) { StringSet<> DocSections; - for (std::unique_ptr &D : Doc.Sections) + for (std::unique_ptr &D : Doc.Sections) { if (!D->Name.empty()) DocSections.insert(D->Name); + // Some sections wants to link to .symtab by default. + // That means we want to create the symbol table for them. + if (D->Type == llvm::ELF::SHT_REL || D->Type == llvm::ELF::SHT_RELA) + if (!Doc.Symbols && D->Link.empty()) + Doc.Symbols.emplace(); + } + // Insert SHT_NULL section implicitly when it is not defined in YAML. if (Doc.Sections.empty() || Doc.Sections.front()->Type != ELF::SHT_NULL) Doc.Sections.insert( @@ -211,7 +218,11 @@ ELFState::ELFState(ELFYAML::Object &D, yaml::ErrorHandler EH) std::make_unique( ELFYAML::Section::SectionKind::RawContent, /*IsImplicit=*/true)); - std::vector ImplicitSections = {".symtab", ".strtab", ".shstrtab"}; + std::vector ImplicitSections; + if (Doc.Symbols) + ImplicitSections.push_back(".symtab"); + ImplicitSections.insert(ImplicitSections.end(), {".strtab", ".shstrtab"}); + if (!Doc.DynamicSymbols.empty()) ImplicitSections.insert(ImplicitSections.end(), {".dynsym", ".dynstr"}); @@ -508,7 +519,11 @@ void ELFState::initSymtabSectionHeader(Elf_Shdr &SHeader, ELFYAML::Section *YAMLSec) { bool IsStatic = STType == SymtabType::Static; - const auto &Symbols = IsStatic ? Doc.Symbols : Doc.DynamicSymbols; + ArrayRef Symbols; + if (IsStatic && Doc.Symbols) + Symbols = *Doc.Symbols; + else if (!IsStatic) + Symbols = Doc.DynamicSymbols; ELFYAML::RawContentSection *RawSec = dyn_cast_or_null(YAMLSec); @@ -1044,14 +1059,16 @@ template void ELFState::buildSymbolIndexes() { } }; - Build(Doc.Symbols, SymN2I); + if (Doc.Symbols) + Build(*Doc.Symbols, SymN2I); Build(Doc.DynamicSymbols, DynSymN2I); } template void ELFState::finalizeStrings() { // Add the regular symbol names to .strtab section. - for (const ELFYAML::Symbol &Sym : Doc.Symbols) - DotStrtab.add(ELFYAML::dropUniqueSuffix(Sym.Name)); + if (Doc.Symbols) + for (const ELFYAML::Symbol &Sym : *Doc.Symbols) + DotStrtab.add(ELFYAML::dropUniqueSuffix(Sym.Name)); DotStrtab.finalize(); // Add the dynamic symbol names to .dynstr section. diff --git a/llvm/lib/Support/Windows/WindowsSupport.h b/llvm/lib/Support/Windows/WindowsSupport.h index fed9b2f462ef7..2e2e97430b76e 100644 --- a/llvm/lib/Support/Windows/WindowsSupport.h +++ b/llvm/lib/Support/Windows/WindowsSupport.h @@ -38,6 +38,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" #include "llvm/Config/config.h" // Get build system configuration settings +#include "llvm/Support/Allocator.h" #include "llvm/Support/Chrono.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/VersionTuple.h" diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index 68e1e6a302244..042d8fdcc51d0 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -1588,7 +1588,8 @@ static StackOffset getFPOffset(const MachineFunction &MF, int ObjectOffset) { bool IsWin64 = Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv()); unsigned FixedObject = IsWin64 ? alignTo(AFI->getVarArgsGPRSize(), 16) : 0; - unsigned FPAdjust = isTargetDarwin(MF) ? 16 : AFI->getCalleeSavedStackSize(); + unsigned FPAdjust = isTargetDarwin(MF) + ? 16 : AFI->getCalleeSavedStackSize(MF.getFrameInfo()); return {ObjectOffset + FixedObject + FPAdjust, MVT::i8}; } @@ -1630,7 +1631,7 @@ StackOffset AArch64FrameLowering::resolveFrameOffsetReference( int FPOffset = getFPOffset(MF, ObjectOffset).getBytes(); int Offset = getStackOffset(MF, ObjectOffset).getBytes(); bool isCSR = - !isFixed && ObjectOffset >= -((int)AFI->getCalleeSavedStackSize()); + !isFixed && ObjectOffset >= -((int)AFI->getCalleeSavedStackSize(MFI)); const StackOffset &SVEStackSize = getSVEStackSize(MF); @@ -2304,6 +2305,10 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF, << EstimatedStackSize + AlignedCSStackSize << " bytes.\n"); + assert((!MFI.isCalleeSavedInfoValid() || + AFI->getCalleeSavedStackSize() == AlignedCSStackSize) && + "Should not invalidate callee saved info"); + // Round up to register pair alignment to avoid additional SP adjustment // instructions. AFI->setCalleeSavedStackSize(AlignedCSStackSize); diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.h b/llvm/lib/Target/AArch64/AArch64FrameLowering.h index 9111b644b2c83..ac150e86c9eb5 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.h +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.h @@ -21,7 +21,7 @@ namespace llvm { class AArch64FrameLowering : public TargetFrameLowering { public: explicit AArch64FrameLowering() - : TargetFrameLowering(StackGrowsDown, Align(16), 0, 16, + : TargetFrameLowering(StackGrowsDown, Align(16), 0, Align(16), true /*StackRealignable*/) {} void emitCalleeSavedFrameMoves(MachineBasicBlock &MBB, diff --git a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h index a7d0a742573d7..32661860934a1 100644 --- a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h +++ b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h @@ -19,6 +19,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/IR/Function.h" #include "llvm/MC/MCLinkerOptimizationHint.h" #include @@ -54,6 +55,7 @@ class AArch64FunctionInfo final : public MachineFunctionInfo { /// Amount of stack frame size used for saving callee-saved registers. unsigned CalleeSavedStackSize; + bool HasCalleeSavedStackSize = false; /// Number of TLS accesses using the special (combinable) /// _TLS_MODULE_BASE_ symbol. @@ -166,8 +168,55 @@ class AArch64FunctionInfo final : public MachineFunctionInfo { void setLocalStackSize(unsigned Size) { LocalStackSize = Size; } unsigned getLocalStackSize() const { return LocalStackSize; } - void setCalleeSavedStackSize(unsigned Size) { CalleeSavedStackSize = Size; } - unsigned getCalleeSavedStackSize() const { return CalleeSavedStackSize; } + void setCalleeSavedStackSize(unsigned Size) { + CalleeSavedStackSize = Size; + HasCalleeSavedStackSize = true; + } + + // When CalleeSavedStackSize has not been set (for example when + // some MachineIR pass is run in isolation), then recalculate + // the CalleeSavedStackSize directly from the CalleeSavedInfo. + // Note: This information can only be recalculated after PEI + // has assigned offsets to the callee save objects. + unsigned getCalleeSavedStackSize(const MachineFrameInfo &MFI) const { + bool ValidateCalleeSavedStackSize = false; + +#ifndef NDEBUG + // Make sure the calculated size derived from the CalleeSavedInfo + // equals the cached size that was calculated elsewhere (e.g. in + // determineCalleeSaves). + ValidateCalleeSavedStackSize = HasCalleeSavedStackSize; +#endif + + if (!HasCalleeSavedStackSize || ValidateCalleeSavedStackSize) { + assert(MFI.isCalleeSavedInfoValid() && "CalleeSavedInfo not calculated"); + if (MFI.getCalleeSavedInfo().empty()) + return 0; + + int64_t MinOffset = std::numeric_limits::max(); + int64_t MaxOffset = std::numeric_limits::min(); + for (const auto &Info : MFI.getCalleeSavedInfo()) { + int FrameIdx = Info.getFrameIdx(); + int64_t Offset = MFI.getObjectOffset(FrameIdx); + int64_t ObjSize = MFI.getObjectSize(FrameIdx); + MinOffset = std::min(Offset, MinOffset); + MaxOffset = std::max(Offset + ObjSize, MaxOffset); + } + + unsigned Size = alignTo(MaxOffset - MinOffset, 16); + assert((!HasCalleeSavedStackSize || getCalleeSavedStackSize() == Size) && + "Invalid size calculated for callee saves"); + return Size; + } + + return getCalleeSavedStackSize(); + } + + unsigned getCalleeSavedStackSize() const { + assert(HasCalleeSavedStackSize && + "CalleeSavedStackSize has not been calculated"); + return CalleeSavedStackSize; + } void incNumLocalDynamicTLSAccesses() { ++NumLocalDynamicTLSAccesses; } unsigned getNumLocalDynamicTLSAccesses() const { diff --git a/llvm/lib/Target/AArch64/AArch64SpeculationHardening.cpp b/llvm/lib/Target/AArch64/AArch64SpeculationHardening.cpp index 21ad8e48b1f02..7307961ddb5fd 100644 --- a/llvm/lib/Target/AArch64/AArch64SpeculationHardening.cpp +++ b/llvm/lib/Target/AArch64/AArch64SpeculationHardening.cpp @@ -106,6 +106,7 @@ #include "llvm/IR/DebugLoc.h" #include "llvm/Pass.h" #include "llvm/Support/CodeGen.h" +#include "llvm/Support/Debug.h" #include "llvm/Target/TargetMachine.h" #include diff --git a/llvm/lib/Target/AMDGPU/AMDGPUFrameLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUFrameLowering.cpp index e35b6d00a284a..61ce83b30e004 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUFrameLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUFrameLowering.cpp @@ -14,7 +14,7 @@ using namespace llvm; AMDGPUFrameLowering::AMDGPUFrameLowering(StackDirection D, Align StackAl, - int LAO, unsigned TransAl) + int LAO, Align TransAl) : TargetFrameLowering(D, StackAl, LAO, TransAl) {} AMDGPUFrameLowering::~AMDGPUFrameLowering() = default; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUFrameLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUFrameLowering.h index 090e2bc302cc7..92e256cf2829f 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUFrameLowering.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUFrameLowering.h @@ -26,7 +26,7 @@ namespace llvm { class AMDGPUFrameLowering : public TargetFrameLowering { public: AMDGPUFrameLowering(StackDirection D, Align StackAl, int LAO, - unsigned TransAl = 1); + Align TransAl = Align::None()); ~AMDGPUFrameLowering() override; /// \returns The number of 32-bit sub-registers that are used when storing diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index 62904dc2380ab..f768a10a720b1 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -262,6 +262,8 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel { SDValue getHi16Elt(SDValue In) const; + SDValue getMaterializedScalarImm32(int64_t Val, const SDLoc &DL) const; + void SelectADD_SUB_I64(SDNode *N); void SelectAddcSubb(SDNode *N); void SelectUADDO_USUBO(SDNode *N); @@ -963,6 +965,14 @@ bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base, return true; } +SDValue AMDGPUDAGToDAGISel::getMaterializedScalarImm32(int64_t Val, + const SDLoc &DL) const { + SDNode *Mov = CurDAG->getMachineNode( + AMDGPU::S_MOV_B32, DL, MVT::i32, + CurDAG->getTargetConstant(Val, DL, MVT::i32)); + return SDValue(Mov, 0); +} + // FIXME: Should only handle addcarry/subcarry void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) { SDLoc DL(N); @@ -1630,13 +1640,80 @@ bool AMDGPUDAGToDAGISel::SelectFlatOffset(SDNode *N, CurDAG->isBaseWithConstantOffset(Addr)) { SDValue N0 = Addr.getOperand(0); SDValue N1 = Addr.getOperand(1); - int64_t COffsetVal = cast(N1)->getSExtValue(); + uint64_t COffsetVal = cast(N1)->getSExtValue(); const SIInstrInfo *TII = Subtarget->getInstrInfo(); - if (TII->isLegalFLATOffset(COffsetVal, findMemSDNode(N)->getAddressSpace(), - IsSigned)) { + unsigned AS = findMemSDNode(N)->getAddressSpace(); + if (TII->isLegalFLATOffset(COffsetVal, AS, IsSigned)) { Addr = N0; OffsetVal = COffsetVal; + } else { + // If the offset doesn't fit, put the low bits into the offset field and + // add the rest. + + SDLoc DL(N); + uint64_t ImmField; + const unsigned NumBits = TII->getNumFlatOffsetBits(AS, IsSigned); + if (IsSigned) { + ImmField = SignExtend64(COffsetVal, NumBits); + + // Don't use a negative offset field if the base offset is positive. + // Since the scheduler currently relies on the offset field, doing so + // could result in strange scheduling decisions. + + // TODO: Should we not do this in the opposite direction as well? + if (static_cast(COffsetVal) > 0) { + if (static_cast(ImmField) < 0) { + const uint64_t OffsetMask = maskTrailingOnes(NumBits - 1); + ImmField = COffsetVal & OffsetMask; + } + } + } else { + // TODO: Should we do this for a negative offset? + const uint64_t OffsetMask = maskTrailingOnes(NumBits); + ImmField = COffsetVal & OffsetMask; + } + + uint64_t RemainderOffset = COffsetVal - ImmField; + + assert(TII->isLegalFLATOffset(ImmField, AS, IsSigned)); + assert(RemainderOffset + ImmField == COffsetVal); + + OffsetVal = ImmField; + + // TODO: Should this try to use a scalar add pseudo if the base address is + // uniform and saddr is usable? + SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32); + SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32); + + SDNode *N0Lo = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, + DL, MVT::i32, N0, Sub0); + SDNode *N0Hi = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, + DL, MVT::i32, N0, Sub1); + + SDValue AddOffsetLo + = getMaterializedScalarImm32(Lo_32(RemainderOffset), DL); + SDValue AddOffsetHi + = getMaterializedScalarImm32(Hi_32(RemainderOffset), DL); + + SDVTList VTs = CurDAG->getVTList(MVT::i32, MVT::i1); + SDValue Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1); + + SDNode *Add = CurDAG->getMachineNode( + AMDGPU::V_ADD_I32_e64, DL, VTs, + {AddOffsetLo, SDValue(N0Lo, 0), Clamp}); + + SDNode *Addc = CurDAG->getMachineNode( + AMDGPU::V_ADDC_U32_e64, DL, VTs, + {AddOffsetHi, SDValue(N0Hi, 0), SDValue(Add, 1), Clamp}); + + SDValue RegSequenceArgs[] = { + CurDAG->getTargetConstant(AMDGPU::VReg_64RegClassID, DL, MVT::i32), + SDValue(Add, 0), Sub0, SDValue(Addc, 0), Sub1 + }; + + Addr = SDValue(CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL, + MVT::i64, RegSequenceArgs), 0); } } diff --git a/llvm/lib/Target/AMDGPU/GCNILPSched.cpp b/llvm/lib/Target/AMDGPU/GCNILPSched.cpp index 1eb617640c32a..39072af7d8711 100644 --- a/llvm/lib/Target/AMDGPU/GCNILPSched.cpp +++ b/llvm/lib/Target/AMDGPU/GCNILPSched.cpp @@ -11,6 +11,7 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/ScheduleDAG.h" +#include "llvm/Support/Debug.h" using namespace llvm; diff --git a/llvm/lib/Target/AMDGPU/R600FrameLowering.h b/llvm/lib/Target/AMDGPU/R600FrameLowering.h index 5b24901cf6449..283e4d1935ea1 100644 --- a/llvm/lib/Target/AMDGPU/R600FrameLowering.h +++ b/llvm/lib/Target/AMDGPU/R600FrameLowering.h @@ -16,7 +16,7 @@ namespace llvm { class R600FrameLowering : public AMDGPUFrameLowering { public: R600FrameLowering(StackDirection D, Align StackAl, int LAO, - unsigned TransAl = 1) + Align TransAl = Align::None()) : AMDGPUFrameLowering(D, StackAl, LAO, TransAl) {} ~R600FrameLowering() override; diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index 698bcbdb9832c..bdbcc658b88c8 100644 --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -239,9 +239,11 @@ static bool updateOperand(FoldCandidate &Fold, if ((Fold.isImm() || Fold.isFI() || Fold.isGlobal()) && Fold.needsShrink()) { MachineBasicBlock *MBB = MI->getParent(); - auto Liveness = MBB->computeRegisterLiveness(&TRI, AMDGPU::VCC, MI); - if (Liveness != MachineBasicBlock::LQR_Dead) + auto Liveness = MBB->computeRegisterLiveness(&TRI, AMDGPU::VCC, MI, 16); + if (Liveness != MachineBasicBlock::LQR_Dead) { + LLVM_DEBUG(dbgs() << "Not shrinking " << MI << " due to vcc liveness\n"); return false; + } MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); int Op32 = Fold.getShrinkOpcode(); diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.h b/llvm/lib/Target/AMDGPU/SIFrameLowering.h index 6e07dac32dfcb..d9970fd6b4b87 100644 --- a/llvm/lib/Target/AMDGPU/SIFrameLowering.h +++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.h @@ -21,7 +21,7 @@ class GCNSubtarget; class SIFrameLowering final : public AMDGPUFrameLowering { public: SIFrameLowering(StackDirection D, Align StackAl, int LAO, - unsigned TransAl = 1) + Align TransAl = Align::None()) : AMDGPUFrameLowering(D, StackAl, LAO, TransAl) {} ~SIFrameLowering() override = default; diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 8c61f3e9aa473..80eb2be9e6575 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -20,11 +20,11 @@ #include "AMDGPU.h" #include "AMDGPUSubtarget.h" #include "AMDGPUTargetMachine.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "SIDefines.h" #include "SIInstrInfo.h" #include "SIMachineFunctionInfo.h" #include "SIRegisterInfo.h" -#include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "Utils/AMDGPUBaseInfo.h" #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" @@ -35,6 +35,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Twine.h" +#include "llvm/Analysis/LegacyDivergenceAnalysis.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/DAGCombine.h" @@ -44,6 +45,7 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineOperand.h" @@ -2686,6 +2688,15 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI, bool IsThisReturn = false; MachineFunction &MF = DAG.getMachineFunction(); + if (Callee.isUndef() || isNullConstant(Callee)) { + if (!CLI.IsTailCall) { + for (unsigned I = 0, E = CLI.Ins.size(); I != E; ++I) + InVals.push_back(DAG.getUNDEF(CLI.Ins[I].VT)); + } + + return Chain; + } + if (IsVarArg) { return lowerUnhandledCall(CLI, InVals, "unsupported call to variadic function "); diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 6bc2a0ca08e32..88dc938e2b88d 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -6275,6 +6275,20 @@ bool SIInstrInfo::isBufferSMRD(const MachineInstr &MI) const { return RI.getRegClass(RCID)->hasSubClassEq(&AMDGPU::SGPR_128RegClass); } +unsigned SIInstrInfo::getNumFlatOffsetBits(unsigned AddrSpace, + bool Signed) const { + if (!ST.hasFlatInstOffsets()) + return 0; + + if (ST.hasFlatSegmentOffsetBug() && AddrSpace == AMDGPUAS::FLAT_ADDRESS) + return 0; + + if (ST.getGeneration() >= AMDGPUSubtarget::GFX10) + return Signed ? 12 : 11; + + return Signed ? 13 : 12; +} + bool SIInstrInfo::isLegalFLATOffset(int64_t Offset, unsigned AddrSpace, bool Signed) const { // TODO: Should 0 be special cased? diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h index 25b7e717c1148..be463442c888b 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -173,7 +173,7 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo { } bool isReallyTriviallyReMaterializable(const MachineInstr &MI, - AliasAnalysis *AA) const override; + AAResults *AA) const override; bool areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, int64_t &Offset1, @@ -1004,6 +1004,8 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo { return isUInt<12>(Imm); } + unsigned getNumFlatOffsetBits(unsigned AddrSpace, bool Signed) const; + /// Returns if \p Offset is legal for the subtarget as the offset to a FLAT /// encoded instruction. If \p Signed, this is for an instruction that /// interprets the offset as signed. diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index a3b08c716a420..1eecbf5556133 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -871,6 +871,18 @@ def ExpTgtMatchClass : AsmOperandClass { let RenderMethod = "printExpTgt"; } +def SWaitMatchClass : AsmOperandClass { + let Name = "SWaitCnt"; + let RenderMethod = "addImmOperands"; + let ParserMethod = "parseSWaitCntOps"; +} + +def VReg32OrOffClass : AsmOperandClass { + let Name = "VReg32OrOff"; + let ParserMethod = "parseVReg32OrOff"; +} + +let OperandType = "OPERAND_IMMEDIATE" in { def SendMsgImm : Operand { let PrintMethod = "printSendMsg"; let ParserMatchClass = SendMsgMatchClass; @@ -886,22 +898,11 @@ def EndpgmImm : Operand { let ParserMatchClass = EndpgmMatchClass; } -def SWaitMatchClass : AsmOperandClass { - let Name = "SWaitCnt"; - let RenderMethod = "addImmOperands"; - let ParserMethod = "parseSWaitCntOps"; -} - -def VReg32OrOffClass : AsmOperandClass { - let Name = "VReg32OrOff"; - let ParserMethod = "parseVReg32OrOff"; -} - def WAIT_FLAG : Operand { let ParserMatchClass = SWaitMatchClass; let PrintMethod = "printWaitFlag"; - let OperandType = "OPERAND_IMMEDIATE"; } +} // End OperandType = "OPERAND_IMMEDIATE" include "SIInstrFormats.td" include "VIInstrFormats.td" diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp index cb8b055b5da18..f58bc3060c429 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -108,8 +108,6 @@ SIRegisterInfo::SIRegisterInfo(const GCNSubtarget &ST) : unsigned SIRegisterInfo::reservedPrivateSegmentBufferReg( const MachineFunction &MF) const { - - const GCNSubtarget &ST = MF.getSubtarget(); unsigned BaseIdx = alignDown(ST.getMaxNumSGPRs(MF), 4) - 4; unsigned BaseReg(AMDGPU::SGPR_32RegClass.getRegister(BaseIdx)); return getMatchingSuperReg(BaseReg, AMDGPU::sub0, &AMDGPU::SGPR_128RegClass); @@ -134,7 +132,6 @@ static unsigned findPrivateSegmentWaveByteOffsetRegIndex(unsigned RegCount) { unsigned SIRegisterInfo::reservedPrivateSegmentWaveByteOffsetReg( const MachineFunction &MF) const { - const GCNSubtarget &ST = MF.getSubtarget(); unsigned Reg = findPrivateSegmentWaveByteOffsetRegIndex(ST.getMaxNumSGPRs(MF)); return AMDGPU::SGPR_32RegClass.getRegister(Reg); } @@ -192,8 +189,6 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const { Reserved.set(AMDGPU::VCC_HI); } - const GCNSubtarget &ST = MF.getSubtarget(); - unsigned MaxNumSGPRs = ST.getMaxNumSGPRs(MF); unsigned TotalNumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs(); for (unsigned i = MaxNumSGPRs; i < TotalNumSGPRs; ++i) { @@ -355,8 +350,7 @@ void SIRegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB, DL = Ins->getDebugLoc(); MachineFunction *MF = MBB->getParent(); - const GCNSubtarget &Subtarget = MF->getSubtarget(); - const SIInstrInfo *TII = Subtarget.getInstrInfo(); + const SIInstrInfo *TII = ST.getInstrInfo(); if (Offset == 0) { BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::V_MOV_B32_e32), BaseReg) @@ -382,11 +376,7 @@ void SIRegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB, void SIRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg, int64_t Offset) const { - - MachineBasicBlock *MBB = MI.getParent(); - MachineFunction *MF = MBB->getParent(); - const GCNSubtarget &Subtarget = MF->getSubtarget(); - const SIInstrInfo *TII = Subtarget.getInstrInfo(); + const SIInstrInfo *TII = ST.getInstrInfo(); #ifndef NDEBUG // FIXME: Is it possible to be storing a frame index to itself? @@ -402,6 +392,10 @@ void SIRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg, #endif MachineOperand *FIOp = TII->getNamedOperand(MI, AMDGPU::OpName::vaddr); +#ifndef NDEBUG + MachineBasicBlock *MBB = MI.getParent(); + MachineFunction *MF = MBB->getParent(); +#endif assert(FIOp && FIOp->isFI() && "frame index must be address operand"); assert(TII->isMUBUF(MI)); assert(TII->getNamedOperand(MI, AMDGPU::OpName::soffset)->getReg() == @@ -546,7 +540,8 @@ static int getOffsetMUBUFLoad(unsigned Opc) { } } -static MachineInstrBuilder spillVGPRtoAGPR(MachineBasicBlock::iterator MI, +static MachineInstrBuilder spillVGPRtoAGPR(const GCNSubtarget &ST, + MachineBasicBlock::iterator MI, int Index, unsigned Lane, unsigned ValueReg, @@ -554,7 +549,6 @@ static MachineInstrBuilder spillVGPRtoAGPR(MachineBasicBlock::iterator MI, MachineBasicBlock *MBB = MI->getParent(); MachineFunction *MF = MI->getParent()->getParent(); SIMachineFunctionInfo *MFI = MF->getInfo(); - const GCNSubtarget &ST = MF->getSubtarget(); const SIInstrInfo *TII = ST.getInstrInfo(); MCPhysReg Reg = MFI->getVGPRToAGPRSpill(Index, Lane); @@ -577,11 +571,12 @@ static MachineInstrBuilder spillVGPRtoAGPR(MachineBasicBlock::iterator MI, // This differs from buildSpillLoadStore by only scavenging a VGPR. It does not // need to handle the case where an SGPR may need to be spilled while spilling. -static bool buildMUBUFOffsetLoadStore(const SIInstrInfo *TII, +static bool buildMUBUFOffsetLoadStore(const GCNSubtarget &ST, MachineFrameInfo &MFI, MachineBasicBlock::iterator MI, int Index, int64_t Offset) { + const SIInstrInfo *TII = ST.getInstrInfo(); MachineBasicBlock *MBB = MI->getParent(); const DebugLoc &DL = MI->getDebugLoc(); bool IsStore = MI->mayStore(); @@ -593,7 +588,7 @@ static bool buildMUBUFOffsetLoadStore(const SIInstrInfo *TII, return false; const MachineOperand *Reg = TII->getNamedOperand(*MI, AMDGPU::OpName::vdata); - if (spillVGPRtoAGPR(MI, Index, 0, Reg->getReg(), false).getInstr()) + if (spillVGPRtoAGPR(ST, MI, Index, 0, Reg->getReg(), false).getInstr()) return true; MachineInstrBuilder NewMI = @@ -628,7 +623,6 @@ void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI, RegScavenger *RS) const { MachineBasicBlock *MBB = MI->getParent(); MachineFunction *MF = MI->getParent()->getParent(); - const GCNSubtarget &ST = MF->getSubtarget(); const SIInstrInfo *TII = ST.getInstrInfo(); const MachineFrameInfo &MFI = MF->getFrameInfo(); @@ -702,7 +696,7 @@ void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI, SrcDstRegState |= getKillRegState(IsKill); } - auto MIB = spillVGPRtoAGPR(MI, Index, i, SubReg, IsKill); + auto MIB = spillVGPRtoAGPR(ST, MI, Index, i, SubReg, IsKill); if (!MIB.getInstr()) { unsigned FinalReg = SubReg; @@ -763,7 +757,6 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI, if (OnlyToVGPR && !SpillToVGPR) return false; - const GCNSubtarget &ST = MF->getSubtarget(); const SIInstrInfo *TII = ST.getInstrInfo(); Register SuperReg = MI->getOperand(0).getReg(); @@ -882,7 +875,6 @@ bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI, return false; MachineFrameInfo &FrameInfo = MF->getFrameInfo(); - const GCNSubtarget &ST = MF->getSubtarget(); const SIInstrInfo *TII = ST.getInstrInfo(); const DebugLoc &DL = MI->getDebugLoc(); @@ -995,7 +987,6 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, MachineBasicBlock *MBB = MI->getParent(); SIMachineFunctionInfo *MFI = MF->getInfo(); MachineFrameInfo &FrameInfo = MF->getFrameInfo(); - const GCNSubtarget &ST = MF->getSubtarget(); const SIInstrInfo *TII = ST.getInstrInfo(); DebugLoc DL = MI->getDebugLoc(); @@ -1223,7 +1214,7 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, int64_t NewOffset = OldImm + Offset; if (isUInt<12>(NewOffset) && - buildMUBUFOffsetLoadStore(TII, FrameInfo, MI, Index, NewOffset)) { + buildMUBUFOffsetLoadStore(ST, FrameInfo, MI, Index, NewOffset)) { MI->eraseFromParent(); return; } @@ -1741,8 +1732,6 @@ bool SIRegisterInfo::shouldCoalesce(MachineInstr *MI, unsigned SIRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const { - - const GCNSubtarget &ST = MF.getSubtarget(); const SIMachineFunctionInfo *MFI = MF.getInfo(); unsigned Occupancy = ST.getOccupancyWithLocalMemSize(MFI->getLDSSize(), diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp index db20d5ccf5f9c..207e4232e8298 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp @@ -21,6 +21,8 @@ #include "SIDefines.h" #include "llvm/BinaryFormat/ELF.h" #include "llvm/IR/CallingConv.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Module.h" #include "llvm/Support/AMDGPUMetadata.h" #include "llvm/Support/EndianStream.h" diff --git a/llvm/lib/Target/ARM/ARMBasicBlockInfo.cpp b/llvm/lib/Target/ARM/ARMBasicBlockInfo.cpp index 2b34b1d854827..00a2231f59e3c 100644 --- a/llvm/lib/Target/ARM/ARMBasicBlockInfo.cpp +++ b/llvm/lib/Target/ARM/ARMBasicBlockInfo.cpp @@ -6,14 +6,16 @@ // //===----------------------------------------------------------------------===// +#include "ARMBasicBlockInfo.h" #include "ARM.h" #include "ARMBaseInstrInfo.h" -#include "ARMBasicBlockInfo.h" #include "ARMMachineFunctionInfo.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/Support/Debug.h" #include #define DEBUG_TYPE "arm-bb-utils" diff --git a/llvm/lib/Target/ARM/ARMBasicBlockInfo.h b/llvm/lib/Target/ARM/ARMBasicBlockInfo.h index d0f4a02463b7c..13df399ed9952 100644 --- a/llvm/lib/Target/ARM/ARMBasicBlockInfo.h +++ b/llvm/lib/Target/ARM/ARMBasicBlockInfo.h @@ -21,6 +21,7 @@ namespace llvm { +struct BasicBlockInfo; using BBInfoVector = SmallVectorImpl; /// UnknownPadding - Return the worst case padding that could result from diff --git a/llvm/lib/Target/ARM/ARMCallLowering.cpp b/llvm/lib/Target/ARM/ARMCallLowering.cpp index e465b660f233e..d3b595ce8323a 100644 --- a/llvm/lib/Target/ARM/ARMCallLowering.cpp +++ b/llvm/lib/Target/ARM/ARMCallLowering.cpp @@ -203,8 +203,7 @@ void ARMCallLowering::splitToValueTypes(const ArgInfo &OrigArg, // Even if there is no splitting to do, we still want to replace the // original type (e.g. pointer type -> integer). auto Flags = OrigArg.Flags[0]; - unsigned OriginalAlignment = DL.getABITypeAlignment(OrigArg.Ty); - Flags.setOrigAlign(OriginalAlignment); + Flags.setOrigAlign(Align(DL.getABITypeAlignment(OrigArg.Ty))); SplitArgs.emplace_back(OrigArg.Regs[0], SplitVTs[0].getTypeForEVT(Ctx), Flags, OrigArg.IsFixed); return; @@ -216,8 +215,7 @@ void ARMCallLowering::splitToValueTypes(const ArgInfo &OrigArg, Type *SplitTy = SplitVT.getTypeForEVT(Ctx); auto Flags = OrigArg.Flags[0]; - unsigned OriginalAlignment = DL.getABITypeAlignment(SplitTy); - Flags.setOrigAlign(OriginalAlignment); + Flags.setOrigAlign(Align(DL.getABITypeAlignment(SplitTy))); bool NeedsConsecutiveRegisters = TLI.functionArgumentNeedsConsecutiveRegisters( diff --git a/llvm/lib/Target/ARM/ARMConstantPoolValue.cpp b/llvm/lib/Target/ARM/ARMConstantPoolValue.cpp index 3bdb0e1ef62d9..72c95f4412655 100644 --- a/llvm/lib/Target/ARM/ARMConstantPoolValue.cpp +++ b/llvm/lib/Target/ARM/ARMConstantPoolValue.cpp @@ -17,6 +17,7 @@ #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" #include "llvm/IR/GlobalValue.h" +#include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Type.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Compiler.h" diff --git a/llvm/lib/Target/ARM/ARMFastISel.cpp b/llvm/lib/Target/ARM/ARMFastISel.cpp index 3e3745f129c32..1fc5ff6921c60 100644 --- a/llvm/lib/Target/ARM/ARMFastISel.cpp +++ b/llvm/lib/Target/ARM/ARMFastISel.cpp @@ -2237,8 +2237,7 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) { if (!isTypeLegal(ArgTy, ArgVT)) return false; ISD::ArgFlagsTy Flags; - unsigned OriginalAlignment = DL.getABITypeAlignment(ArgTy); - Flags.setOrigAlign(OriginalAlignment); + Flags.setOrigAlign(Align(DL.getABITypeAlignment(ArgTy))); Args.push_back(Op); ArgRegs.push_back(Arg); @@ -2371,8 +2370,7 @@ bool ARMFastISel::SelectCall(const Instruction *I, if (!Arg.isValid()) return false; - unsigned OriginalAlignment = DL.getABITypeAlignment(ArgTy); - Flags.setOrigAlign(OriginalAlignment); + Flags.setOrigAlign(Align(DL.getABITypeAlignment(ArgTy))); Args.push_back(*i); ArgRegs.push_back(Arg); diff --git a/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/llvm/lib/Target/ARM/ARMFrameLowering.cpp index 03681d5634cc5..5428bd6c94b35 100644 --- a/llvm/lib/Target/ARM/ARMFrameLowering.cpp +++ b/llvm/lib/Target/ARM/ARMFrameLowering.cpp @@ -76,7 +76,7 @@ skipAlignedDPRCS2Spills(MachineBasicBlock::iterator MI, unsigned NumAlignedDPRCS2Regs); ARMFrameLowering::ARMFrameLowering(const ARMSubtarget &sti) - : TargetFrameLowering(StackGrowsDown, sti.getStackAlignment(), 0, 4), + : TargetFrameLowering(StackGrowsDown, sti.getStackAlignment(), 0, Align(4)), STI(sti) {} bool ARMFrameLowering::keepFramePointer(const MachineFunction &MF) const { @@ -2128,10 +2128,16 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, AFI->setLRIsSpilledForFarJump(true); } AFI->setLRIsSpilled(SavedRegs.test(ARM::LR)); +} + +void ARMFrameLowering::getCalleeSaves(const MachineFunction &MF, + BitVector &SavedRegs) const { + TargetFrameLowering::getCalleeSaves(MF, SavedRegs); // If we have the "returned" parameter attribute which guarantees that we // return the value which was passed in r0 unmodified (e.g. C++ 'structors), // record that fact for IPRA. + const ARMFunctionInfo *AFI = MF.getInfo(); if (AFI->getPreservesR0()) SavedRegs.set(ARM::R0); } diff --git a/llvm/lib/Target/ARM/ARMFrameLowering.h b/llvm/lib/Target/ARM/ARMFrameLowering.h index 6d8aee5979459..0462b01af707c 100644 --- a/llvm/lib/Target/ARM/ARMFrameLowering.h +++ b/llvm/lib/Target/ARM/ARMFrameLowering.h @@ -53,6 +53,8 @@ class ARMFrameLowering : public TargetFrameLowering { int ResolveFrameIndexReference(const MachineFunction &MF, int FI, unsigned &FrameReg, int SPAdj) const; + void getCalleeSaves(const MachineFunction &MF, + BitVector &SavedRegs) const override; void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS) const override; diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index e9e3c6643505c..db26feb570103 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -1021,6 +1021,16 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setOperationAction(ISD::ADDCARRY, MVT::i32, Custom); setOperationAction(ISD::SUBCARRY, MVT::i32, Custom); + if (Subtarget->hasDSP()) { + setOperationAction(ISD::SADDSAT, MVT::i8, Custom); + setOperationAction(ISD::SSUBSAT, MVT::i8, Custom); + setOperationAction(ISD::SADDSAT, MVT::i16, Custom); + setOperationAction(ISD::SSUBSAT, MVT::i16, Custom); + } + if (Subtarget->hasBaseDSP()) { + setOperationAction(ISD::SADDSAT, MVT::i32, Legal); + setOperationAction(ISD::SSUBSAT, MVT::i32, Legal); + } // i64 operation support. setOperationAction(ISD::MUL, MVT::i64, Expand); @@ -1622,6 +1632,10 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::SMLSLDX: return "ARMISD::SMLSLDX"; case ARMISD::SMMLAR: return "ARMISD::SMMLAR"; case ARMISD::SMMLSR: return "ARMISD::SMMLSR"; + case ARMISD::QADD16b: return "ARMISD::QADD16b"; + case ARMISD::QSUB16b: return "ARMISD::QSUB16b"; + case ARMISD::QADD8b: return "ARMISD::QADD8b"; + case ARMISD::QSUB8b: return "ARMISD::QSUB8b"; case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR"; case ARMISD::BFI: return "ARMISD::BFI"; case ARMISD::VORRIMM: return "ARMISD::VORRIMM"; @@ -4445,6 +4459,35 @@ SDValue ARMTargetLowering::LowerUnsignedALUO(SDValue Op, return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow); } +static SDValue LowerSADDSUBSAT(SDValue Op, SelectionDAG &DAG, + const ARMSubtarget *Subtarget) { + EVT VT = Op.getValueType(); + if (!Subtarget->hasDSP()) + return SDValue(); + if (!VT.isSimple()) + return SDValue(); + + unsigned NewOpcode; + bool IsAdd = Op->getOpcode() == ISD::SADDSAT; + switch (VT.getSimpleVT().SimpleTy) { + default: + return SDValue(); + case MVT::i8: + NewOpcode = IsAdd ? ARMISD::QADD8b : ARMISD::QSUB8b; + break; + case MVT::i16: + NewOpcode = IsAdd ? ARMISD::QADD16b : ARMISD::QSUB16b; + break; + } + + SDLoc dl(Op); + SDValue Add = + DAG.getNode(NewOpcode, dl, MVT::i32, + DAG.getSExtOrTrunc(Op->getOperand(0), dl, MVT::i32), + DAG.getSExtOrTrunc(Op->getOperand(1), dl, MVT::i32)); + return DAG.getNode(ISD::TRUNCATE, dl, VT, Add); +} + SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { SDValue Cond = Op.getOperand(0); SDValue SelectTrue = Op.getOperand(1); @@ -9121,6 +9164,9 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::UADDO: case ISD::USUBO: return LowerUnsignedALUO(Op, DAG); + case ISD::SADDSAT: + case ISD::SSUBSAT: + return LowerSADDSUBSAT(Op, DAG, Subtarget); case ISD::LOAD: return LowerPredicateLoad(Op, DAG); case ISD::STORE: @@ -9205,6 +9251,10 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N, Results.push_back(Res.getValue(0)); Results.push_back(Res.getValue(1)); return; + case ISD::SADDSAT: + case ISD::SSUBSAT: + Res = LowerSADDSUBSAT(SDValue(N, 0), DAG, Subtarget); + break; case ISD::READCYCLECOUNTER: ReplaceREADCYCLECOUNTER(N, Results, DAG, Subtarget); return; @@ -11044,10 +11094,7 @@ static SDValue findMUL_LOHI(SDValue V) { static SDValue AddCombineTo64BitSMLAL16(SDNode *AddcNode, SDNode *AddeNode, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget) { - if (Subtarget->isThumb()) { - if (!Subtarget->hasDSP()) - return SDValue(); - } else if (!Subtarget->hasV5TEOps()) + if (!Subtarget->hasBaseDSP()) return SDValue(); // SMLALBB, SMLALBT, SMLALTB, SMLALTT multiply two 16-bit values and @@ -14382,7 +14429,9 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, return SDValue(); break; } - case ARMISD::SMLALBB: { + case ARMISD::SMLALBB: + case ARMISD::QADD16b: + case ARMISD::QSUB16b: { unsigned BitWidth = N->getValueType(0).getSizeInBits(); APInt DemandedMask = APInt::getLowBitsSet(BitWidth, 16); if ((SimplifyDemandedBits(N->getOperand(0), DemandedMask, DCI)) || @@ -14418,6 +14467,15 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, return SDValue(); break; } + case ARMISD::QADD8b: + case ARMISD::QSUB8b: { + unsigned BitWidth = N->getValueType(0).getSizeInBits(); + APInt DemandedMask = APInt::getLowBitsSet(BitWidth, 8); + if ((SimplifyDemandedBits(N->getOperand(0), DemandedMask, DCI)) || + (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI))) + return SDValue(); + break; + } case ISD::INTRINSIC_VOID: case ISD::INTRINSIC_W_CHAIN: switch (cast(N->getOperand(1))->getZExtValue()) { @@ -16935,16 +16993,15 @@ static bool isHomogeneousAggregate(Type *Ty, HABaseType &Base, } /// Return the correct alignment for the current calling convention. -unsigned -ARMTargetLowering::getABIAlignmentForCallingConv(Type *ArgTy, - DataLayout DL) const { +Align ARMTargetLowering::getABIAlignmentForCallingConv(Type *ArgTy, + DataLayout DL) const { + const Align ABITypeAlign(DL.getABITypeAlignment(ArgTy)); if (!ArgTy->isVectorTy()) - return DL.getABITypeAlignment(ArgTy); + return ABITypeAlign; // Avoid over-aligning vector parameters. It would require realigning the // stack and waste space for no real benefit. - return std::min(DL.getABITypeAlignment(ArgTy), - (unsigned)DL.getStackAlignment().value()); + return std::min(ABITypeAlign, DL.getStackAlignment()); } /// Return true if a type is an AAPCS-VFP homogeneous aggregate or one of diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h index 98e0684fd4030..53813fad5afd1 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.h +++ b/llvm/lib/Target/ARM/ARMISelLowering.h @@ -219,6 +219,12 @@ class VectorType; SMMLAR, // Signed multiply long, round and add SMMLSR, // Signed multiply long, subtract and round + // Single Lane QADD8 and QADD16. Only the bottom lane. That's what the b stands for. + QADD8b, + QSUB8b, + QADD16b, + QSUB16b, + // Operands of the standard BUILD_VECTOR node are not legalized, which // is fine if BUILD_VECTORs are always lowered to shuffles or other // operations, but for ARM some BUILD_VECTORs are legal as-is and their @@ -611,8 +617,8 @@ class VectorType; void finalizeLowering(MachineFunction &MF) const override; /// Return the correct alignment for the current calling convention. - unsigned getABIAlignmentForCallingConv(Type *ArgTy, - DataLayout DL) const override; + Align getABIAlignmentForCallingConv(Type *ArgTy, + DataLayout DL) const override; bool isDesirableToCommuteWithShift(const SDNode *N, CombineLevel Level) const override; diff --git a/llvm/lib/Target/ARM/ARMInstrInfo.td b/llvm/lib/Target/ARM/ARMInstrInfo.td index f75343675dad0..fe696222ec70a 100644 --- a/llvm/lib/Target/ARM/ARMInstrInfo.td +++ b/llvm/lib/Target/ARM/ARMInstrInfo.td @@ -238,6 +238,11 @@ def ARMsmlalbt : SDNode<"ARMISD::SMLALBT", SDT_LongMac, []>; def ARMsmlaltb : SDNode<"ARMISD::SMLALTB", SDT_LongMac, []>; def ARMsmlaltt : SDNode<"ARMISD::SMLALTT", SDT_LongMac, []>; +def ARMqadd8b : SDNode<"ARMISD::QADD8b", SDT_ARMAnd, []>; +def ARMqsub8b : SDNode<"ARMISD::QSUB8b", SDT_ARMAnd, []>; +def ARMqadd16b : SDNode<"ARMISD::QADD16b", SDT_ARMAnd, []>; +def ARMqsub16b : SDNode<"ARMISD::QSUB16b", SDT_ARMAnd, []>; + // Vector operations shared between NEON and MVE def ARMvdup : SDNode<"ARMISD::VDUP", SDTypeProfile<1, 1, [SDTCisVec<0>]>>; @@ -3750,6 +3755,23 @@ let DecoderMethod = "DecodeQADDInstruction" in [(set GPRnopc:$Rd, (int_arm_qadd GPRnopc:$Rm, GPRnopc:$Rn))]>; } +def : ARMV5TEPat<(saddsat GPR:$a, GPR:$b), + (QADD GPR:$a, GPR:$b)>; +def : ARMV5TEPat<(ssubsat GPR:$a, GPR:$b), + (QSUB GPR:$a, GPR:$b)>; +def : ARMV5TEPat<(saddsat(saddsat rGPR:$Rm, rGPR:$Rm), rGPR:$Rn), + (QDADD rGPR:$Rm, rGPR:$Rn)>; +def : ARMV5TEPat<(ssubsat rGPR:$Rm, (saddsat rGPR:$Rn, rGPR:$Rn)), + (QDSUB rGPR:$Rm, rGPR:$Rn)>; +def : ARMV6Pat<(ARMqadd8b rGPR:$Rm, rGPR:$Rn), + (QADD8 rGPR:$Rm, rGPR:$Rn)>; +def : ARMV6Pat<(ARMqsub8b rGPR:$Rm, rGPR:$Rn), + (QSUB8 rGPR:$Rm, rGPR:$Rn)>; +def : ARMV6Pat<(ARMqadd16b rGPR:$Rm, rGPR:$Rn), + (QADD16 rGPR:$Rm, rGPR:$Rn)>; +def : ARMV6Pat<(ARMqsub16b rGPR:$Rm, rGPR:$Rn), + (QSUB16 rGPR:$Rm, rGPR:$Rn)>; + def UQADD16 : AAIIntrinsic<0b01100110, 0b11110001, "uqadd16", int_arm_uqadd16>; def UQADD8 : AAIIntrinsic<0b01100110, 0b11111001, "uqadd8", int_arm_uqadd8>; def UQSUB16 : AAIIntrinsic<0b01100110, 0b11110111, "uqsub16", int_arm_uqsub16>; diff --git a/llvm/lib/Target/ARM/ARMInstrThumb2.td b/llvm/lib/Target/ARM/ARMInstrThumb2.td index ef5d0908b771c..25a45b39fa0c0 100644 --- a/llvm/lib/Target/ARM/ARMInstrThumb2.td +++ b/llvm/lib/Target/ARM/ARMInstrThumb2.td @@ -2395,6 +2395,23 @@ def : Thumb2DSPPat<(int_arm_qadd(int_arm_qadd rGPR:$Rm, rGPR:$Rm), rGPR:$Rn), def : Thumb2DSPPat<(int_arm_qsub rGPR:$Rm, (int_arm_qadd rGPR:$Rn, rGPR:$Rn)), (t2QDSUB rGPR:$Rm, rGPR:$Rn)>; +def : Thumb2DSPPat<(saddsat rGPR:$Rm, rGPR:$Rn), + (t2QADD rGPR:$Rm, rGPR:$Rn)>; +def : Thumb2DSPPat<(ssubsat rGPR:$Rm, rGPR:$Rn), + (t2QSUB rGPR:$Rm, rGPR:$Rn)>; +def : Thumb2DSPPat<(saddsat(saddsat rGPR:$Rm, rGPR:$Rm), rGPR:$Rn), + (t2QDADD rGPR:$Rm, rGPR:$Rn)>; +def : Thumb2DSPPat<(ssubsat rGPR:$Rm, (saddsat rGPR:$Rn, rGPR:$Rn)), + (t2QDSUB rGPR:$Rm, rGPR:$Rn)>; +def : Thumb2DSPPat<(ARMqadd8b rGPR:$Rm, rGPR:$Rn), + (t2QADD8 rGPR:$Rm, rGPR:$Rn)>; +def : Thumb2DSPPat<(ARMqsub8b rGPR:$Rm, rGPR:$Rn), + (t2QSUB8 rGPR:$Rm, rGPR:$Rn)>; +def : Thumb2DSPPat<(ARMqadd16b rGPR:$Rm, rGPR:$Rn), + (t2QADD16 rGPR:$Rm, rGPR:$Rn)>; +def : Thumb2DSPPat<(ARMqsub16b rGPR:$Rm, rGPR:$Rn), + (t2QSUB16 rGPR:$Rm, rGPR:$Rn)>; + // Signed/Unsigned add/subtract def t2SASX : T2I_pam_intrinsics<0b010, 0b0000, "sasx", int_arm_sasx>; diff --git a/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h b/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h index e80c2c6412321..bb136e92329ba 100644 --- a/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h +++ b/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h @@ -16,6 +16,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/IR/GlobalVariable.h" #include "llvm/Support/ErrorHandling.h" #include diff --git a/llvm/lib/Target/ARM/ARMSubtarget.h b/llvm/lib/Target/ARM/ARMSubtarget.h index af6f6914448ab..ef460342a69e3 100644 --- a/llvm/lib/Target/ARM/ARMSubtarget.h +++ b/llvm/lib/Target/ARM/ARMSubtarget.h @@ -672,6 +672,12 @@ class ARMSubtarget : public ARMGenSubtargetInfo { bool hasSB() const { return HasSB; } bool genLongCalls() const { return GenLongCalls; } bool genExecuteOnly() const { return GenExecuteOnly; } + bool hasBaseDSP() const { + if (isThumb()) + return hasDSP(); + else + return hasV5TEOps(); + } bool hasFP16() const { return HasFP16; } bool hasD32() const { return HasD32; } diff --git a/llvm/lib/Target/ARM/MVEVPTBlockPass.cpp b/llvm/lib/Target/ARM/MVEVPTBlockPass.cpp index 9d986b6167e06..bc0a80b177ed2 100644 --- a/llvm/lib/Target/ARM/MVEVPTBlockPass.cpp +++ b/llvm/lib/Target/ARM/MVEVPTBlockPass.cpp @@ -25,6 +25,7 @@ #include "llvm/IR/DebugLoc.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCRegisterInfo.h" +#include "llvm/Support/Debug.h" #include #include diff --git a/llvm/lib/Target/BPF/BPFInstrInfo.td b/llvm/lib/Target/BPF/BPFInstrInfo.td index c44702a78ec81..ae5a82a993033 100644 --- a/llvm/lib/Target/BPF/BPFInstrInfo.td +++ b/llvm/lib/Target/BPF/BPFInstrInfo.td @@ -473,7 +473,7 @@ class CALL class CALLX : TYPE_ALU_JMP { bits<32> BrDst; diff --git a/llvm/lib/Target/BPF/BPFMIChecking.cpp b/llvm/lib/Target/BPF/BPFMIChecking.cpp index 4c46289656b40..f82f166eda4d0 100644 --- a/llvm/lib/Target/BPF/BPFMIChecking.cpp +++ b/llvm/lib/Target/BPF/BPFMIChecking.cpp @@ -19,6 +19,7 @@ #include "BPFTargetMachine.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/Debug.h" using namespace llvm; diff --git a/llvm/lib/Target/BPF/BPFMIPeephole.cpp b/llvm/lib/Target/BPF/BPFMIPeephole.cpp index 72fd131c4594f..e9eecc55c3c32 100644 --- a/llvm/lib/Target/BPF/BPFMIPeephole.cpp +++ b/llvm/lib/Target/BPF/BPFMIPeephole.cpp @@ -26,6 +26,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/Debug.h" using namespace llvm; diff --git a/llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp b/llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp index 8cebc5b537fa3..9c689aed64178 100644 --- a/llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp +++ b/llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp @@ -30,6 +30,7 @@ #include "BPFTargetMachine.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/Debug.h" using namespace llvm; diff --git a/llvm/lib/Target/Hexagon/HexagonFrameLowering.h b/llvm/lib/Target/Hexagon/HexagonFrameLowering.h index 875533fd6b0db..27265dd537949 100644 --- a/llvm/lib/Target/Hexagon/HexagonFrameLowering.h +++ b/llvm/lib/Target/Hexagon/HexagonFrameLowering.h @@ -30,7 +30,7 @@ class TargetRegisterClass; class HexagonFrameLowering : public TargetFrameLowering { public: explicit HexagonFrameLowering() - : TargetFrameLowering(StackGrowsDown, Align(8), 0, 1, true) {} + : TargetFrameLowering(StackGrowsDown, Align(8), 0, Align::None(), true) {} // All of the prolog/epilog functionality, including saving and restoring // callee-saved registers is handled in emitPrologue. This is to have the diff --git a/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp b/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp index 7024dafd47969..fab5edefb553f 100644 --- a/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp +++ b/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp @@ -130,7 +130,7 @@ INITIALIZE_PASS_END(HexagonPacketizer, "hexagon-packetizer", "Hexagon Packetizer", false, false) HexagonPacketizerList::HexagonPacketizerList(MachineFunction &MF, - MachineLoopInfo &MLI, AliasAnalysis *AA, + MachineLoopInfo &MLI, AAResults *AA, const MachineBranchProbabilityInfo *MBPI, bool Minimal) : VLIWPacketizerList(MF, MLI, AA), MBPI(MBPI), MLI(&MLI), Minimal(Minimal) { diff --git a/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.h b/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.h index daa86b6f53933..943b9ac7ecc49 100644 --- a/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.h +++ b/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.h @@ -69,8 +69,7 @@ class HexagonPacketizerList : public VLIWPacketizerList { public: HexagonPacketizerList(MachineFunction &MF, MachineLoopInfo &MLI, - AliasAnalysis *AA, - const MachineBranchProbabilityInfo *MBPI, + AAResults *AA, const MachineBranchProbabilityInfo *MBPI, bool Minimal); // initPacketizerState - initialize some internal flags. diff --git a/llvm/lib/Target/Hexagon/RDFDeadCode.cpp b/llvm/lib/Target/Hexagon/RDFDeadCode.cpp index 52178931aa6d2..af86c7b1956bc 100644 --- a/llvm/lib/Target/Hexagon/RDFDeadCode.cpp +++ b/llvm/lib/Target/Hexagon/RDFDeadCode.cpp @@ -16,6 +16,7 @@ #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/Debug.h" #include diff --git a/llvm/lib/Target/MSP430/MSP430BranchSelector.cpp b/llvm/lib/Target/MSP430/MSP430BranchSelector.cpp index 45e7c26e4d306..ce5affdc25b0a 100644 --- a/llvm/lib/Target/MSP430/MSP430BranchSelector.cpp +++ b/llvm/lib/Target/MSP430/MSP430BranchSelector.cpp @@ -20,6 +20,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/MathExtras.h" #include "llvm/Target/TargetMachine.h" using namespace llvm; diff --git a/llvm/lib/Target/MSP430/MSP430FrameLowering.h b/llvm/lib/Target/MSP430/MSP430FrameLowering.h index 0d32fb6277147..70e2840530211 100644 --- a/llvm/lib/Target/MSP430/MSP430FrameLowering.h +++ b/llvm/lib/Target/MSP430/MSP430FrameLowering.h @@ -23,7 +23,7 @@ class MSP430FrameLowering : public TargetFrameLowering { public: explicit MSP430FrameLowering() : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, Align(2), -2, - 2) {} + Align(2)) {} /// emitProlog/emitEpilog - These methods insert prolog and epilog code into /// the function. diff --git a/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp b/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp index a83fd131ac357..64169d1f5eb19 100644 --- a/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp +++ b/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp @@ -353,6 +353,9 @@ SDValue MSP430TargetLowering::LowerOperation(SDValue Op, } } +unsigned MSP430TargetLowering::getShiftAmountThreshold(EVT VT) const { + return 2; +} //===----------------------------------------------------------------------===// // MSP430 Inline Assembly Support //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/MSP430/MSP430ISelLowering.h b/llvm/lib/Target/MSP430/MSP430ISelLowering.h index ee6b6316d7a91..9224e5e3d005a 100644 --- a/llvm/lib/Target/MSP430/MSP430ISelLowering.h +++ b/llvm/lib/Target/MSP430/MSP430ISelLowering.h @@ -124,6 +124,8 @@ namespace llvm { bool isZExtFree(EVT VT1, EVT VT2) const override; bool isZExtFree(SDValue Val, EVT VT2) const override; + unsigned getShiftAmountThreshold(EVT VT) const override; + MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const override; diff --git a/llvm/lib/Target/Mips/MipsCallLowering.cpp b/llvm/lib/Target/Mips/MipsCallLowering.cpp index 2853203398c0a..cad82953af50c 100644 --- a/llvm/lib/Target/Mips/MipsCallLowering.cpp +++ b/llvm/lib/Target/Mips/MipsCallLowering.cpp @@ -690,7 +690,7 @@ void MipsCallLowering::subTargetRegTypeForCallingConv( if (i == 0) Flags.setOrigAlign(TLI.getABIAlignmentForCallingConv(Arg.Ty, DL)); else - Flags.setOrigAlign(1); + Flags.setOrigAlign(Align::None()); ISDArgs.emplace_back(Flags, RegisterVT, VT, true, OrigArgIndices[ArgNo], 0); diff --git a/llvm/lib/Target/Mips/MipsFrameLowering.h b/llvm/lib/Target/Mips/MipsFrameLowering.h index 1043a6b1015dd..612b2b712fa88 100644 --- a/llvm/lib/Target/Mips/MipsFrameLowering.h +++ b/llvm/lib/Target/Mips/MipsFrameLowering.h @@ -25,8 +25,8 @@ class MipsFrameLowering : public TargetFrameLowering { public: explicit MipsFrameLowering(const MipsSubtarget &sti, Align Alignment) - : TargetFrameLowering(StackGrowsDown, Alignment, 0, Alignment.value()), - STI(sti) {} + : TargetFrameLowering(StackGrowsDown, Alignment, 0, Alignment), STI(sti) { + } static const MipsFrameLowering *create(const MipsSubtarget &ST); diff --git a/llvm/lib/Target/Mips/MipsISelLowering.h b/llvm/lib/Target/Mips/MipsISelLowering.h index f5daa28cf83bb..0a5cddd45afbf 100644 --- a/llvm/lib/Target/Mips/MipsISelLowering.h +++ b/llvm/lib/Target/Mips/MipsISelLowering.h @@ -304,11 +304,12 @@ class TargetRegisterClass; unsigned &NumIntermediates, MVT &RegisterVT) const override; /// Return the correct alignment for the current calling convention. - unsigned getABIAlignmentForCallingConv(Type *ArgTy, - DataLayout DL) const override { + Align getABIAlignmentForCallingConv(Type *ArgTy, + DataLayout DL) const override { + const Align ABIAlign(DL.getABITypeAlignment(ArgTy)); if (ArgTy->isVectorTy()) - return std::min(DL.getABITypeAlignment(ArgTy), 8U); - return DL.getABITypeAlignment(ArgTy); + return std::min(ABIAlign, Align(8)); + return ABIAlign; } ISD::NodeType getExtendForAtomicOps() const override { diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp index ba7c2222c2af1..66236b72a1a39 100644 --- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -536,6 +536,7 @@ static MCSymbol *getMCSymbolForTOCPseudoMO(const MachineOperand &MO, void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { MCInst TmpInst; const bool IsDarwin = TM.getTargetTriple().isOSDarwin(); + const bool IsPPC64 = Subtarget->isPPC64(); const bool IsAIX = Subtarget->isAIXABI(); const Module *M = MF->getFunction().getParent(); PICLevel::Level PL = M->getPICLevel(); @@ -759,8 +760,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { return; } case PPC::ADDIStocHA: { - assert((IsAIX && !Subtarget->isPPC64() && - TM.getCodeModel() == CodeModel::Large) && + assert((IsAIX && !IsPPC64 && TM.getCodeModel() == CodeModel::Large) && "This pseudo should only be selected for 32-bit large code model on" " AIX."); @@ -790,8 +790,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { return; } case PPC::LWZtocL: { - assert(IsAIX && !Subtarget->isPPC64() && - TM.getCodeModel() == CodeModel::Large && + assert(IsAIX && !IsPPC64 && TM.getCodeModel() == CodeModel::Large && "This pseudo should only be selected for 32-bit large code model on" " AIX."); @@ -921,7 +920,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { case PPC::ADDISgotTprelHA: { // Transform: %xd = ADDISgotTprelHA %x2, @sym // Into: %xd = ADDIS8 %x2, sym@got@tlsgd@ha - assert(Subtarget->isPPC64() && "Not supported for 32-bit PowerPC"); + assert(IsPPC64 && "Not supported for 32-bit PowerPC"); const MachineOperand &MO = MI->getOperand(2); const GlobalValue *GValue = MO.getGlobal(); MCSymbol *MOSymbol = getSymbol(GValue); @@ -940,14 +939,13 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, IsDarwin); // Change the opcode to LD. - TmpInst.setOpcode(Subtarget->isPPC64() ? PPC::LD : PPC::LWZ); + TmpInst.setOpcode(IsPPC64 ? PPC::LD : PPC::LWZ); const MachineOperand &MO = MI->getOperand(1); const GlobalValue *GValue = MO.getGlobal(); MCSymbol *MOSymbol = getSymbol(GValue); const MCExpr *Exp = MCSymbolRefExpr::create( - MOSymbol, - Subtarget->isPPC64() ? MCSymbolRefExpr::VK_PPC_GOT_TPREL_LO - : MCSymbolRefExpr::VK_PPC_GOT_TPREL, + MOSymbol, IsPPC64 ? MCSymbolRefExpr::VK_PPC_GOT_TPREL_LO + : MCSymbolRefExpr::VK_PPC_GOT_TPREL, OutContext); TmpInst.getOperand(1) = MCOperand::createExpr(Exp); EmitToStreamer(*OutStreamer, TmpInst); @@ -1001,7 +999,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { case PPC::ADDIStlsgdHA: { // Transform: %xd = ADDIStlsgdHA %x2, @sym // Into: %xd = ADDIS8 %x2, sym@got@tlsgd@ha - assert(Subtarget->isPPC64() && "Not supported for 32-bit PowerPC"); + assert(IsPPC64 && "Not supported for 32-bit PowerPC"); const MachineOperand &MO = MI->getOperand(2); const GlobalValue *GValue = MO.getGlobal(); MCSymbol *MOSymbol = getSymbol(GValue); @@ -1024,11 +1022,11 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { const GlobalValue *GValue = MO.getGlobal(); MCSymbol *MOSymbol = getSymbol(GValue); const MCExpr *SymGotTlsGD = MCSymbolRefExpr::create( - MOSymbol, Subtarget->isPPC64() ? MCSymbolRefExpr::VK_PPC_GOT_TLSGD_LO - : MCSymbolRefExpr::VK_PPC_GOT_TLSGD, + MOSymbol, IsPPC64 ? MCSymbolRefExpr::VK_PPC_GOT_TLSGD_LO + : MCSymbolRefExpr::VK_PPC_GOT_TLSGD, OutContext); EmitToStreamer(*OutStreamer, - MCInstBuilder(Subtarget->isPPC64() ? PPC::ADDI8 : PPC::ADDI) + MCInstBuilder(IsPPC64 ? PPC::ADDI8 : PPC::ADDI) .addReg(MI->getOperand(0).getReg()) .addReg(MI->getOperand(1).getReg()) .addExpr(SymGotTlsGD)); @@ -1046,7 +1044,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { case PPC::ADDIStlsldHA: { // Transform: %xd = ADDIStlsldHA %x2, @sym // Into: %xd = ADDIS8 %x2, sym@got@tlsld@ha - assert(Subtarget->isPPC64() && "Not supported for 32-bit PowerPC"); + assert(IsPPC64 && "Not supported for 32-bit PowerPC"); const MachineOperand &MO = MI->getOperand(2); const GlobalValue *GValue = MO.getGlobal(); MCSymbol *MOSymbol = getSymbol(GValue); @@ -1069,11 +1067,11 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { const GlobalValue *GValue = MO.getGlobal(); MCSymbol *MOSymbol = getSymbol(GValue); const MCExpr *SymGotTlsLD = MCSymbolRefExpr::create( - MOSymbol, Subtarget->isPPC64() ? MCSymbolRefExpr::VK_PPC_GOT_TLSLD_LO - : MCSymbolRefExpr::VK_PPC_GOT_TLSLD, + MOSymbol, IsPPC64 ? MCSymbolRefExpr::VK_PPC_GOT_TLSLD_LO + : MCSymbolRefExpr::VK_PPC_GOT_TLSLD, OutContext); EmitToStreamer(*OutStreamer, - MCInstBuilder(Subtarget->isPPC64() ? PPC::ADDI8 : PPC::ADDI) + MCInstBuilder(IsPPC64 ? PPC::ADDI8 : PPC::ADDI) .addReg(MI->getOperand(0).getReg()) .addReg(MI->getOperand(1).getReg()) .addExpr(SymGotTlsLD)); @@ -1102,7 +1100,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { OutContext); EmitToStreamer( *OutStreamer, - MCInstBuilder(Subtarget->isPPC64() ? PPC::ADDIS8 : PPC::ADDIS) + MCInstBuilder(IsPPC64 ? PPC::ADDIS8 : PPC::ADDIS) .addReg(MI->getOperand(0).getReg()) .addReg(MI->getOperand(1).getReg()) .addExpr(SymDtprel)); @@ -1121,7 +1119,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { MCSymbolRefExpr::create(MOSymbol, MCSymbolRefExpr::VK_PPC_DTPREL_LO, OutContext); EmitToStreamer(*OutStreamer, - MCInstBuilder(Subtarget->isPPC64() ? PPC::ADDI8 : PPC::ADDI) + MCInstBuilder(IsPPC64 ? PPC::ADDI8 : PPC::ADDI) .addReg(MI->getOperand(0).getReg()) .addReg(MI->getOperand(1).getReg()) .addExpr(SymDtprel)); @@ -1168,7 +1166,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { // suite shows a handful of test cases that fail this check for // Darwin. Those need to be investigated before this sanity test // can be enabled for those subtargets. - if (!Subtarget->isDarwin()) { + if (!IsDarwin) { unsigned OpNum = (MI->getOpcode() == PPC::STD) ? 2 : 1; const MachineOperand &MO = MI->getOperand(OpNum); if (MO.isGlobal() && MO.getGlobal()->getAlignment() < 4) diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/llvm/lib/Target/PowerPC/PPCInstrInfo.h index 54c5726dedfd7..19ab30cb0908d 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.h +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.h @@ -248,7 +248,7 @@ class PPCInstrInfo : public PPCGenInstrInfo { unsigned isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override; bool isReallyTriviallyReMaterializable(const MachineInstr &MI, - AliasAnalysis *AA) const override; + AAResults *AA) const override; unsigned isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override; diff --git a/llvm/lib/Target/Sparc/SparcFrameLowering.cpp b/llvm/lib/Target/Sparc/SparcFrameLowering.cpp index 2e774c06674d7..0f74f2bb344c4 100644 --- a/llvm/lib/Target/Sparc/SparcFrameLowering.cpp +++ b/llvm/lib/Target/Sparc/SparcFrameLowering.cpp @@ -35,7 +35,7 @@ DisableLeafProc("disable-sparc-leaf-proc", SparcFrameLowering::SparcFrameLowering(const SparcSubtarget &ST) : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, ST.is64Bit() ? Align(16) : Align(8), 0, - ST.is64Bit() ? 16 : 8) {} + ST.is64Bit() ? Align(16) : Align(8)) {} void SparcFrameLowering::emitSPAdjustment(MachineFunction &MF, MachineBasicBlock &MBB, diff --git a/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp b/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp index 04278d695b053..0b8b6880accc8 100644 --- a/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp +++ b/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp @@ -47,7 +47,7 @@ static const TargetFrameLowering::SpillSlot SpillOffsetTable[] = { SystemZFrameLowering::SystemZFrameLowering() : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, Align(8), - -SystemZMC::CallFrameSize, 8, + -SystemZMC::CallFrameSize, Align(8), false /* StackRealignable */) { // Create a mapping from register number to save slot offset. RegSpillOffsets.grow(SystemZ::NUM_TARGET_REGS); diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp index d69f578735ac0..e0ca9da93561c 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -847,7 +847,7 @@ supportedAddressingMode(Instruction *I, bool HasVector) { } if (isa(I) && I->hasOneUse()) { - auto *SingleUser = dyn_cast(*I->user_begin()); + auto *SingleUser = cast(*I->user_begin()); if (SingleUser->getParent() == I->getParent()) { if (isa(SingleUser)) { if (auto *C = dyn_cast(SingleUser->getOperand(1))) diff --git a/llvm/lib/Target/SystemZ/SystemZMachineScheduler.cpp b/llvm/lib/Target/SystemZ/SystemZMachineScheduler.cpp index eb9745f71b7db..3fc25034dded1 100644 --- a/llvm/lib/Target/SystemZ/SystemZMachineScheduler.cpp +++ b/llvm/lib/Target/SystemZ/SystemZMachineScheduler.cpp @@ -15,6 +15,7 @@ //===----------------------------------------------------------------------===// #include "SystemZMachineScheduler.h" +#include "llvm/CodeGen/MachineLoopInfo.h" using namespace llvm; diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp index 4893ca686c44d..7e867edaaa27f 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp @@ -29,6 +29,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/MC/MCAsmInfo.h" using namespace llvm; diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp index c18d6040375bf..157ea9d525c96 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp @@ -56,6 +56,7 @@ #include "WebAssembly.h" #include "WebAssemblySubtarget.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/Support/Debug.h" using namespace llvm; #define DEBUG_TYPE "wasm-fix-irreducible-control-flow" diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.h b/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.h index 95ddc1c88b58d..fdc0f561dcd96 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.h +++ b/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.h @@ -31,7 +31,7 @@ class WebAssemblyFrameLowering final : public TargetFrameLowering { WebAssemblyFrameLowering() : TargetFrameLowering(StackGrowsDown, /*StackAlignment=*/Align(16), /*LocalAreaOffset=*/0, - /*TransientStackAlignment=*/16, + /*TransientStackAlignment=*/Align(16), /*StackRealignable=*/true) {} MachineBasicBlock::iterator diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp index 68db96a1ce2f8..8e8126c90e724 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp @@ -38,7 +38,7 @@ WebAssemblyInstrInfo::WebAssemblyInstrInfo(const WebAssemblySubtarget &STI) RI(STI.getTargetTriple()) {} bool WebAssemblyInstrInfo::isReallyTriviallyReMaterializable( - const MachineInstr &MI, AliasAnalysis *AA) const { + const MachineInstr &MI, AAResults *AA) const { switch (MI.getOpcode()) { case WebAssembly::CONST_I32: case WebAssembly::CONST_I64: diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.h b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.h index df1051b4f42cf..fe6211663c317 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.h +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.h @@ -43,7 +43,7 @@ class WebAssemblyInstrInfo final : public WebAssemblyGenInstrInfo { const WebAssemblyRegisterInfo &getRegisterInfo() const { return RI; } bool isReallyTriviallyReMaterializable(const MachineInstr &MI, - AliasAnalysis *AA) const override; + AAResults *AA) const override; void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyLateEHPrepare.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyLateEHPrepare.cpp index f3d4dd68170b8..75d04252cbe99 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyLateEHPrepare.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyLateEHPrepare.cpp @@ -19,6 +19,7 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/WasmEHFuncInfo.h" #include "llvm/MC/MCAsmInfo.h" +#include "llvm/Support/Debug.h" using namespace llvm; #define DEBUG_TYPE "wasm-late-eh-prepare" diff --git a/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp b/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp index a241362a271d3..e287f6625115c 100644 --- a/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp +++ b/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp @@ -12,13 +12,14 @@ // //===----------------------------------------------------------------------===// +#include "X86DisassemblerDecoder.h" +#include "llvm/ADT/StringRef.h" + #include /* for va_*() */ #include /* for vsnprintf() */ #include /* for exit() */ #include /* for memset() */ -#include "X86DisassemblerDecoder.h" - using namespace llvm::X86Disassembler; /// Specifies whether a ModR/M byte is needed and (if so) which diff --git a/llvm/lib/Target/X86/X86AvoidStoreForwardingBlocks.cpp b/llvm/lib/Target/X86/X86AvoidStoreForwardingBlocks.cpp index 7c195e51d1629..69c6b3356cbb0 100644 --- a/llvm/lib/Target/X86/X86AvoidStoreForwardingBlocks.cpp +++ b/llvm/lib/Target/X86/X86AvoidStoreForwardingBlocks.cpp @@ -35,6 +35,7 @@ #include "X86InstrInfo.h" #include "X86Subtarget.h" +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 4edb3b37afd4c..5211b1bcebb89 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -6808,9 +6808,9 @@ static bool getTargetShuffleAndZeroables(SDValue N, SmallVectorImpl &Mask, } // Replace target shuffle mask elements with known undef/zero sentinels. -static void resolveTargetShuffleAndZeroables(SmallVectorImpl &Mask, - const APInt &KnownUndef, - const APInt &KnownZero) { +static void resolveTargetShuffleFromZeroables(SmallVectorImpl &Mask, + const APInt &KnownUndef, + const APInt &KnownZero) { unsigned NumElts = Mask.size(); assert(KnownUndef.getBitWidth() == NumElts && KnownZero.getBitWidth() == NumElts && "Shuffle mask size mismatch"); @@ -6823,6 +6823,22 @@ static void resolveTargetShuffleAndZeroables(SmallVectorImpl &Mask, } } +// Extract target shuffle mask sentinel elements to known undef/zero bitmasks. +static void resolveZeroablesFromTargetShuffle(const SmallVectorImpl &Mask, + APInt &KnownUndef, + APInt &KnownZero) { + unsigned NumElts = Mask.size(); + KnownUndef = KnownZero = APInt::getNullValue(NumElts); + + for (unsigned i = 0; i != NumElts; ++i) { + int M = Mask[i]; + if (SM_SentinelUndef == M) + KnownUndef.setBit(i); + if (SM_SentinelZero == M) + KnownZero.setBit(i); + } +} + // Forward declaration (for getFauxShuffleMask recursive check). // TODO: Use DemandedElts variant. static bool getTargetShuffleInputs(SDValue Op, SmallVectorImpl &Inputs, @@ -7273,19 +7289,12 @@ static bool getTargetShuffleInputs(SDValue Op, const APInt &DemandedElts, if (getTargetShuffleAndZeroables(Op, Mask, Inputs, KnownUndef, KnownZero)) { if (ResolveKnownElts) - resolveTargetShuffleAndZeroables(Mask, KnownUndef, KnownZero); + resolveTargetShuffleFromZeroables(Mask, KnownUndef, KnownZero); return true; } if (getFauxShuffleMask(Op, DemandedElts, Mask, Inputs, DAG, Depth, ResolveKnownElts)) { - KnownUndef = KnownZero = APInt::getNullValue(Mask.size()); - for (int i = 0, e = Mask.size(); i != e; ++i) { - int M = Mask[i]; - if (SM_SentinelUndef == M) - KnownUndef.setBit(i); - if (SM_SentinelZero == M) - KnownZero.setBit(i); - } + resolveZeroablesFromTargetShuffle(Mask, KnownUndef, KnownZero); return true; } return false; @@ -14269,8 +14278,16 @@ static SDValue lowerV16I8Shuffle(const SDLoc &DL, ArrayRef Mask, DAG.getUNDEF(MVT::v8i16), PreDupI16Shuffle)); // Unpack the bytes to form the i16s that will be shuffled into place. + bool EvenInUse = false, OddInUse = false; + for (int i = 0; i < 16; i += 2) { + EvenInUse |= (Mask[i + 0] >= 0); + OddInUse |= (Mask[i + 1] >= 0); + if (EvenInUse && OddInUse) + break; + } V1 = DAG.getNode(TargetLo ? X86ISD::UNPCKL : X86ISD::UNPCKH, DL, - MVT::v16i8, V1, V1); + MVT::v16i8, EvenInUse ? V1 : DAG.getUNDEF(MVT::v16i8), + OddInUse ? V1 : DAG.getUNDEF(MVT::v16i8)); int PostDupI16Shuffle[8] = {-1, -1, -1, -1, -1, -1, -1, -1}; for (int i = 0; i < 16; ++i) @@ -18510,6 +18527,16 @@ SDValue X86TargetLowering::BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, return Result; } +/// Horizontal vector math instructions may be slower than normal math with +/// shuffles. Limit horizontal op codegen based on size/speed trade-offs, uarch +/// implementation, and likely shuffle complexity of the alternate sequence. +static bool shouldUseHorizontalOp(bool IsSingleSource, SelectionDAG &DAG, + const X86Subtarget &Subtarget) { + bool IsOptimizingSize = DAG.getMachineFunction().getFunction().hasOptSize(); + bool HasFastHOps = Subtarget.hasFastHorizontalOps(); + return !IsSingleSource || IsOptimizingSize || HasFastHOps; +} + /// 64-bit unsigned integer to double expansion. static SDValue LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG, const X86Subtarget &Subtarget) { @@ -18564,8 +18591,7 @@ static SDValue LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG, SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::v2f64, XR2F, CLod1); SDValue Result; - if (Subtarget.hasSSE3()) { - // FIXME: The 'haddpd' instruction may be slower than 'shuffle + addsd'. + if (Subtarget.hasSSE3() && shouldUseHorizontalOp(true, DAG, Subtarget)) { Result = DAG.getNode(X86ISD::FHADD, dl, MVT::v2f64, Sub, Sub); } else { SDValue Shuffle = DAG.getVectorShuffle(MVT::v2f64, dl, Sub, Sub, {1,-1}); @@ -19623,16 +19649,6 @@ static SDValue LowerSTRICT_FP_ROUND(SDValue Op, SelectionDAG &DAG) { return Op; } -/// Horizontal vector math instructions may be slower than normal math with -/// shuffles. Limit horizontal op codegen based on size/speed trade-offs, uarch -/// implementation, and likely shuffle complexity of the alternate sequence. -static bool shouldUseHorizontalOp(bool IsSingleSource, SelectionDAG &DAG, - const X86Subtarget &Subtarget) { - bool IsOptimizingSize = DAG.getMachineFunction().getFunction().hasOptSize(); - bool HasFastHOps = Subtarget.hasFastHorizontalOps(); - return !IsSingleSource || IsOptimizingSize || HasFastHOps; -} - /// Depending on uarch and/or optimizing for size, we might prefer to use a /// vector operation in place of the typical scalar operation. static SDValue lowerAddSubToHorizontalOp(SDValue Op, SelectionDAG &DAG, @@ -33040,7 +33056,7 @@ static SDValue combineX86ShufflesRecursively( OpZero, DAG, Depth, false)) return SDValue(); - resolveTargetShuffleAndZeroables(OpMask, OpUndef, OpZero); + resolveTargetShuffleFromZeroables(OpMask, OpUndef, OpZero); // Add the inputs to the Ops list, avoiding duplicates. SmallVector Ops(SrcOps.begin(), SrcOps.end()); diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index 3c68ac2291f20..c29029daeec90 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -480,7 +480,7 @@ static bool regIsPICBase(unsigned BaseReg, const MachineRegisterInfo &MRI) { } bool X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI, - AliasAnalysis *AA) const { + AAResults *AA) const { switch (MI.getOpcode()) { default: // This function should only be called for opcodes with the ReMaterializable diff --git a/llvm/lib/Target/X86/X86InstrInfo.h b/llvm/lib/Target/X86/X86InstrInfo.h index 0502bbd7a013a..22b7b1d4cb193 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.h +++ b/llvm/lib/Target/X86/X86InstrInfo.h @@ -206,7 +206,7 @@ class X86InstrInfo final : public X86GenInstrInfo { int &FrameIndex) const override; bool isReallyTriviallyReMaterializable(const MachineInstr &MI, - AliasAnalysis *AA) const override; + AAResults *AA) const override; void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned DestReg, unsigned SubIdx, const MachineInstr &Orig, diff --git a/llvm/lib/Target/X86/X86TargetMachine.h b/llvm/lib/Target/X86/X86TargetMachine.h index b999e2e86af68..ec3db7b1e9e87 100644 --- a/llvm/lib/Target/X86/X86TargetMachine.h +++ b/llvm/lib/Target/X86/X86TargetMachine.h @@ -16,7 +16,6 @@ #include "X86Subtarget.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/StringMap.h" -#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Support/CodeGen.h" #include "llvm/Target/TargetMachine.h" #include @@ -26,6 +25,7 @@ namespace llvm { class StringRef; class X86Subtarget; class X86RegisterBankInfo; +class TargetTransformInfo; class X86TargetMachine final : public LLVMTargetMachine { std::unique_ptr TLOF; diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp index 11a54bdb0a752..95f47345d8fd0 100644 --- a/llvm/lib/Transforms/IPO/Attributor.cpp +++ b/llvm/lib/Transforms/IPO/Attributor.cpp @@ -2939,7 +2939,7 @@ struct AANoCaptureImpl : public AANoCapture { // Check what state the associated function can actually capture. if (F) - determineFunctionCaptureCapabilities(*F, *this); + determineFunctionCaptureCapabilities(IRP, *F, *this); else indicatePessimisticFixpoint(); } @@ -2965,7 +2965,8 @@ struct AANoCaptureImpl : public AANoCapture { /// Set the NOT_CAPTURED_IN_MEM and NOT_CAPTURED_IN_RET bits in \p Known /// depending on the ability of the function associated with \p IRP to capture /// state in memory and through "returning/throwing", respectively. - static void determineFunctionCaptureCapabilities(const Function &F, + static void determineFunctionCaptureCapabilities(const IRPosition &IRP, + const Function &F, IntegerState &State) { // TODO: Once we have memory behavior attributes we should use them here. @@ -2987,6 +2988,21 @@ struct AANoCaptureImpl : public AANoCapture { // exceptions and doesn not return values. if (F.doesNotThrow() && F.getReturnType()->isVoidTy()) State.addKnownBits(NOT_CAPTURED_IN_RET); + + // Check existing "returned" attributes. + int ArgNo = IRP.getArgNo(); + if (F.doesNotThrow() && ArgNo >= 0) { + for (unsigned u = 0, e = F.arg_size(); u< e; ++u) + if (F.hasParamAttribute(u, Attribute::Returned)) { + if (u == unsigned(ArgNo)) + State.removeAssumedBits(NOT_CAPTURED_IN_RET); + else if (F.onlyReadsMemory()) + State.addKnownBits(NO_CAPTURE); + else + State.addKnownBits(NOT_CAPTURED_IN_RET); + break; + } + } } /// See AbstractState::getAsStr(). @@ -3158,15 +3174,54 @@ ChangeStatus AANoCaptureImpl::updateImpl(Attributor &A) { const Function *F = getArgNo() >= 0 ? IRP.getAssociatedFunction() : IRP.getAnchorScope(); assert(F && "Expected a function!"); - const auto &IsDeadAA = A.getAAFor(*this, IRPosition::function(*F)); + const IRPosition &FnPos = IRPosition::function(*F); + const auto &IsDeadAA = A.getAAFor(*this, FnPos); AANoCapture::StateType T; - // TODO: Once we have memory behavior attributes we should use them here - // similar to the reasoning in - // AANoCaptureImpl::determineFunctionCaptureCapabilities(...). - // TODO: Use the AAReturnedValues to learn if the argument can return or - // not. + // Readonly means we cannot capture through memory. + const auto &FnMemAA = A.getAAFor(*this, FnPos); + if (FnMemAA.isAssumedReadOnly()) { + T.addKnownBits(NOT_CAPTURED_IN_MEM); + if (FnMemAA.isKnownReadOnly()) + addKnownBits(NOT_CAPTURED_IN_MEM); + } + + // Make sure all returned values are different than the underlying value. + // TODO: we could do this in a more sophisticated way inside + // AAReturnedValues, e.g., track all values that escape through returns + // directly somehow. + auto CheckReturnedArgs = [&](const AAReturnedValues &RVAA) { + bool SeenConstant = false; + for (auto &It : RVAA.returned_values()) { + if (isa(It.first)) { + if (SeenConstant) + return false; + SeenConstant = true; + } else if (!isa(It.first) || + It.first == getAssociatedArgument()) + return false; + } + return true; + }; + + const auto &NoUnwindAA = A.getAAFor(*this, FnPos); + if (NoUnwindAA.isAssumedNoUnwind()) { + bool IsVoidTy = F->getReturnType()->isVoidTy(); + const AAReturnedValues *RVAA = + IsVoidTy ? nullptr : &A.getAAFor(*this, FnPos); + if (IsVoidTy || CheckReturnedArgs(*RVAA)) { + T.addKnownBits(NOT_CAPTURED_IN_RET); + if (T.isKnown(NOT_CAPTURED_IN_MEM)) + return ChangeStatus::UNCHANGED; + if (NoUnwindAA.isKnownNoUnwind() && + (IsVoidTy || RVAA->getState().isAtFixpoint())) { + addKnownBits(NOT_CAPTURED_IN_RET); + if (isKnown(NOT_CAPTURED_IN_MEM)) + return indicateOptimisticFixpoint(); + } + } + } // Use the CaptureTracker interface and logic with the specialized tracker, // defined in AACaptureUseTracker, that can look at in-flight abstract diff --git a/llvm/lib/Transforms/IPO/CrossDSOCFI.cpp b/llvm/lib/Transforms/IPO/CrossDSOCFI.cpp index 5f3d0c2e3733c..e20159ba0db59 100644 --- a/llvm/lib/Transforms/IPO/CrossDSOCFI.cpp +++ b/llvm/lib/Transforms/IPO/CrossDSOCFI.cpp @@ -104,7 +104,7 @@ void CrossDSOCFI::buildCFICheck(Module &M) { FunctionCallee C = M.getOrInsertFunction( "__cfi_check", Type::getVoidTy(Ctx), Type::getInt64Ty(Ctx), Type::getInt8PtrTy(Ctx), Type::getInt8PtrTy(Ctx)); - Function *F = dyn_cast(C.getCallee()); + Function *F = cast(C.getCallee()); // Take over the existing function. The frontend emits a weak stub so that the // linker knows about the symbol; this pass replaces the function body. F->deleteBody(); diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp index 5d306cd8eea9d..8bc34825f8a7b 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -1097,12 +1097,13 @@ static Instruction *foldToUnsignedSaturatedAdd(BinaryOperator &I) { return nullptr; } -static Instruction * -canonicalizeCondSignextOfHighBitExtractToSignextHighBitExtract( - BinaryOperator &I, InstCombiner::BuilderTy &Builder) { +Instruction * +InstCombiner::canonicalizeCondSignextOfHighBitExtractToSignextHighBitExtract( + BinaryOperator &I) { assert((I.getOpcode() == Instruction::Add || + I.getOpcode() == Instruction::Or || I.getOpcode() == Instruction::Sub) && - "Expecting add/sub instruction"); + "Expecting add/or/sub instruction"); // We have a subtraction/addition between a (potentially truncated) *logical* // right-shift of X and a "select". @@ -1114,7 +1115,7 @@ canonicalizeCondSignextOfHighBitExtractToSignextHighBitExtract( m_Value(Select)))) return nullptr; - // `add` is commutative; but for `sub`, "select" *must* be on RHS. + // `add`/`or` is commutative; but for `sub`, "select" *must* be on RHS. if (I.getOpcode() == Instruction::Sub && I.getOperand(1) != Select) return nullptr; @@ -1140,13 +1141,13 @@ canonicalizeCondSignextOfHighBitExtractToSignextHighBitExtract( X->getType()->getScalarSizeInBits())))) return nullptr; - // Sign-extending value can be sign-extended itself if we `add` it, - // or zero-extended if we `sub`tract it. + // Sign-extending value can be zero-extended if we `sub`tract it, + // or sign-extended otherwise. auto SkipExtInMagic = [&I](Value *&V) { - if (I.getOpcode() == Instruction::Add) - match(V, m_SExtOrSelf(m_Value(V))); - else + if (I.getOpcode() == Instruction::Sub) match(V, m_ZExtOrSelf(m_Value(V))); + else + match(V, m_SExtOrSelf(m_Value(V))); }; // Now, finally validate the sign-extending magic. @@ -1157,7 +1158,7 @@ canonicalizeCondSignextOfHighBitExtractToSignextHighBitExtract( const APInt *Thr; Value *SignExtendingValue, *Zero; bool ShouldSignext; - // It must be a select between two values we will later estabilish to be a + // It must be a select between two values we will later establish to be a // sign-extending value and a zero constant. The condition guarding the // sign-extension must be based on a sign bit of the same X we had in `lshr`. if (!match(Select, m_Select(m_ICmp(Pred, m_Specific(X), m_APInt(Thr)), @@ -1169,7 +1170,7 @@ canonicalizeCondSignextOfHighBitExtractToSignextHighBitExtract( if (!ShouldSignext) std::swap(SignExtendingValue, Zero); - // If we should not perform sign-extension then we must add/subtract zero. + // If we should not perform sign-extension then we must add/or/subtract zero. if (!match(Zero, m_Zero())) return nullptr; // Otherwise, it should be some constant, left-shifted by the same NBits we @@ -1181,10 +1182,10 @@ canonicalizeCondSignextOfHighBitExtractToSignextHighBitExtract( m_Shl(m_Constant(SignExtendingValueBaseConstant), m_ZExtOrSelf(m_Specific(NBits))))) return nullptr; - // If we `add`, then the constant should be all-ones, else it should be one. - if (I.getOpcode() == Instruction::Add - ? !match(SignExtendingValueBaseConstant, m_AllOnes()) - : !match(SignExtendingValueBaseConstant, m_One())) + // If we `sub`, then the constant should be one, else it should be all-ones. + if (I.getOpcode() == Instruction::Sub + ? !match(SignExtendingValueBaseConstant, m_One()) + : !match(SignExtendingValueBaseConstant, m_AllOnes())) return nullptr; auto *NewAShr = BinaryOperator::CreateAShr(X, LowBitsToSkip, @@ -1403,8 +1404,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { return V; if (Instruction *V = - canonicalizeCondSignextOfHighBitExtractToSignextHighBitExtract( - I, Builder)) + canonicalizeCondSignextOfHighBitExtractToSignextHighBitExtract(I)) return V; if (Instruction *SatAdd = foldToUnsignedSaturatedAdd(I)) @@ -2006,8 +2006,7 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { } if (Instruction *V = - canonicalizeCondSignextOfHighBitExtractToSignextHighBitExtract( - I, Builder)) + canonicalizeCondSignextOfHighBitExtractToSignextHighBitExtract(I)) return V; if (Instruction *Ext = narrowMathIfNoOverflow(I)) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index db5095b6fea5e..4a30b60ca9315 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -2725,6 +2725,10 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { } } + if (Instruction *V = + canonicalizeCondSignextOfHighBitExtractToSignextHighBitExtract(I)) + return V; + return nullptr; } diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 8d4b0dc0a7a71..c15fb27a4c7af 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -1059,9 +1059,9 @@ Value *InstCombiner::simplifyMaskedLoad(IntrinsicInst &II) { // If we can unconditionally load from this address, replace with a // load/select idiom. TODO: use DT for context sensitive query - if (isDereferenceableAndAlignedPointer(LoadPtr, II.getType(), Alignment, - II.getModule()->getDataLayout(), - &II, nullptr)) { + if (isDereferenceableAndAlignedPointer( + LoadPtr, II.getType(), MaybeAlign(Alignment), + II.getModule()->getDataLayout(), &II, nullptr)) { Value *LI = Builder.CreateAlignedLoad(II.getType(), LoadPtr, Alignment, "unmaskedload"); return Builder.CreateSelect(II.getArgOperand(2), LI, II.getArgOperand(3)); diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index ee51bc03312fb..a9f64feb600c9 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -1358,19 +1358,28 @@ Instruction *InstCombiner::foldIRemByPowerOfTwoToBitTest(ICmpInst &I) { /// Fold equality-comparison between zero and any (maybe truncated) right-shift /// by one-less-than-bitwidth into a sign test on the original value. -Instruction *foldSignBitTest(ICmpInst &I) { +Instruction *InstCombiner::foldSignBitTest(ICmpInst &I) { + Instruction *Val; ICmpInst::Predicate Pred; - Value *X; - Constant *C; - if (!I.isEquality() || - !match(&I, m_ICmp(Pred, m_TruncOrSelf(m_Shr(m_Value(X), m_Constant(C))), - m_Zero()))) + if (!I.isEquality() || !match(&I, m_ICmp(Pred, m_Instruction(Val), m_Zero()))) return nullptr; - Type *XTy = X->getType(); - unsigned XBitWidth = XTy->getScalarSizeInBits(); - if (!match(C, m_SpecificInt_ICMP(ICmpInst::Predicate::ICMP_EQ, - APInt(XBitWidth, XBitWidth - 1)))) + Value *X; + Type *XTy; + + Constant *C; + if (match(Val, m_TruncOrSelf(m_Shr(m_Value(X), m_Constant(C))))) { + XTy = X->getType(); + unsigned XBitWidth = XTy->getScalarSizeInBits(); + if (!match(C, m_SpecificInt_ICMP(ICmpInst::Predicate::ICMP_EQ, + APInt(XBitWidth, XBitWidth - 1)))) + return nullptr; + } else if (isa(Val) && + (X = reassociateShiftAmtsOfTwoSameDirectionShifts( + cast(Val), SQ.getWithInstruction(Val), + /*AnalyzeForSignBitExtraction=*/true))) { + XTy = X->getType(); + } else return nullptr; return ICmpInst::Create(Instruction::ICmp, @@ -3067,6 +3076,28 @@ Instruction *InstCombiner::foldICmpEqIntrinsicWithConstant(ICmpInst &Cmp, } break; } + + case Intrinsic::uadd_sat: { + // uadd.sat(a, b) == 0 -> (a | b) == 0 + if (C.isNullValue()) { + Value *Or = Builder.CreateOr(II->getArgOperand(0), II->getArgOperand(1)); + return replaceInstUsesWith(Cmp, Builder.CreateICmp( + Cmp.getPredicate(), Or, Constant::getNullValue(Ty))); + + } + break; + } + + case Intrinsic::usub_sat: { + // usub.sat(a, b) == 0 -> a <= b + if (C.isNullValue()) { + ICmpInst::Predicate NewPred = Cmp.getPredicate() == ICmpInst::ICMP_EQ + ? ICmpInst::ICMP_ULE : ICmpInst::ICMP_UGT; + return ICmpInst::Create(Instruction::ICmp, NewPred, + II->getArgOperand(0), II->getArgOperand(1)); + } + break; + } default: break; } diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h index e04cd346b6fc5..4917a355cadde 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h +++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h @@ -390,6 +390,11 @@ class LLVM_LIBRARY_VISIBILITY InstCombiner Instruction *visitOr(BinaryOperator &I); Instruction *visitXor(BinaryOperator &I); Instruction *visitShl(BinaryOperator &I); + Value *reassociateShiftAmtsOfTwoSameDirectionShifts( + BinaryOperator *Sh0, const SimplifyQuery &SQ, + bool AnalyzeForSignBitExtraction = false); + Instruction *canonicalizeCondSignextOfHighBitExtractToSignextHighBitExtract( + BinaryOperator &I); Instruction *foldVariableSignZeroExtensionOfVariableHighBitExtract( BinaryOperator &OldAShr); Instruction *visitAShr(BinaryOperator &I); @@ -853,7 +858,8 @@ class LLVM_LIBRARY_VISIBILITY InstCombiner int DmaskIdx = -1); Value *SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, - APInt &UndefElts, unsigned Depth = 0); + APInt &UndefElts, unsigned Depth = 0, + bool AllowMultipleUsers = false); /// Canonicalize the position of binops relative to shufflevector. Instruction *foldVectorBinop(BinaryOperator &Inst); @@ -912,6 +918,7 @@ class LLVM_LIBRARY_VISIBILITY InstCombiner Instruction *foldICmpBinOp(ICmpInst &Cmp, const SimplifyQuery &SQ); Instruction *foldICmpEquality(ICmpInst &Cmp); Instruction *foldIRemByPowerOfTwoToBitTest(ICmpInst &I); + Instruction *foldSignBitTest(ICmpInst &I); Instruction *foldICmpWithZero(ICmpInst &Cmp); Value *foldUnsignedMultiplicationOverflowCheck(ICmpInst &Cmp); diff --git a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp index 4c5e1cc43760a..3e035f43370f0 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -175,7 +175,7 @@ static bool isDereferenceableForAllocaSize(const Value *V, const AllocaInst *AI, uint64_t AllocaSize = DL.getTypeStoreSize(AI->getAllocatedType()); if (!AllocaSize) return false; - return isDereferenceableAndAlignedPointer(V, AI->getAlignment(), + return isDereferenceableAndAlignedPointer(V, Align(AI->getAlignment()), APInt(64, AllocaSize), DL); } @@ -1020,11 +1020,11 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { // if (SelectInst *SI = dyn_cast(Op)) { // load (select (Cond, &V1, &V2)) --> select(Cond, load &V1, load &V2). - unsigned Align = LI.getAlignment(); - if (isSafeToLoadUnconditionally(SI->getOperand(1), LI.getType(), Align, - DL, SI) && - isSafeToLoadUnconditionally(SI->getOperand(2), LI.getType(), Align, - DL, SI)) { + const MaybeAlign Alignment(LI.getAlignment()); + if (isSafeToLoadUnconditionally(SI->getOperand(1), LI.getType(), + Alignment, DL, SI) && + isSafeToLoadUnconditionally(SI->getOperand(2), LI.getType(), + Alignment, DL, SI)) { LoadInst *V1 = Builder.CreateLoad(LI.getType(), SI->getOperand(1), SI->getOperand(1)->getName() + ".val"); @@ -1032,9 +1032,9 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { Builder.CreateLoad(LI.getType(), SI->getOperand(2), SI->getOperand(2)->getName() + ".val"); assert(LI.isUnordered() && "implied by above"); - V1->setAlignment(MaybeAlign(Align)); + V1->setAlignment(Alignment); V1->setAtomic(LI.getOrdering(), LI.getSyncScopeID()); - V2->setAlignment(MaybeAlign(Align)); + V2->setAlignment(Alignment); V2->setAtomic(LI.getOrdering(), LI.getSyncScopeID()); return SelectInst::Create(SI->getCondition(), V1, V2); } diff --git a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp index cc0e35e4a9cac..64294838644f3 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp @@ -25,10 +25,12 @@ using namespace PatternMatch; // we should rewrite it as // x shiftopcode (Q+K) iff (Q+K) u< bitwidth(x) // This is valid for any shift, but they must be identical. -static Instruction * -reassociateShiftAmtsOfTwoSameDirectionShifts(BinaryOperator *Sh0, - const SimplifyQuery &SQ, - InstCombiner::BuilderTy &Builder) { +// +// AnalyzeForSignBitExtraction indicates that we will only analyze whether this +// pattern has any 2 right-shifts that sum to 1 less than original bit width. +Value *InstCombiner::reassociateShiftAmtsOfTwoSameDirectionShifts( + BinaryOperator *Sh0, const SimplifyQuery &SQ, + bool AnalyzeForSignBitExtraction) { // Look for a shift of some instruction, ignore zext of shift amount if any. Instruction *Sh0Op0; Value *ShAmt0; @@ -56,14 +58,25 @@ reassociateShiftAmtsOfTwoSameDirectionShifts(BinaryOperator *Sh0, if (ShAmt0->getType() != ShAmt1->getType()) return nullptr; - // The shift opcodes must be identical. + // We are only looking for signbit extraction if we have two right shifts. + bool HadTwoRightShifts = match(Sh0, m_Shr(m_Value(), m_Value())) && + match(Sh1, m_Shr(m_Value(), m_Value())); + // ... and if it's not two right-shifts, we know the answer already. + if (AnalyzeForSignBitExtraction && !HadTwoRightShifts) + return nullptr; + + // The shift opcodes must be identical, unless we are just checking whether + // this pattern can be interpreted as a sign-bit-extraction. Instruction::BinaryOps ShiftOpcode = Sh0->getOpcode(); - if (ShiftOpcode != Sh1->getOpcode()) + bool IdenticalShOpcodes = Sh0->getOpcode() == Sh1->getOpcode(); + if (!IdenticalShOpcodes && !AnalyzeForSignBitExtraction) return nullptr; // If we saw truncation, we'll need to produce extra instruction, - // and for that one of the operands of the shift must be one-use. - if (Trunc && !match(Sh0, m_c_BinOp(m_OneUse(m_Value()), m_Value()))) + // and for that one of the operands of the shift must be one-use, + // unless of course we don't actually plan to produce any instructions here. + if (Trunc && !AnalyzeForSignBitExtraction && + !match(Sh0, m_c_BinOp(m_OneUse(m_Value()), m_Value()))) return nullptr; // Can we fold (ShAmt0+ShAmt1) ? @@ -80,14 +93,22 @@ reassociateShiftAmtsOfTwoSameDirectionShifts(BinaryOperator *Sh0, return nullptr; // FIXME: could perform constant-folding. // If there was a truncation, and we have a right-shift, we can only fold if - // we are left with the original sign bit. + // we are left with the original sign bit. Likewise, if we were just checking + // that this is a sighbit extraction, this is the place to check it. // FIXME: zero shift amount is also legal here, but we can't *easily* check // more than one predicate so it's not really worth it. - if (Trunc && ShiftOpcode != Instruction::BinaryOps::Shl && - !match(NewShAmt, - m_SpecificInt_ICMP(ICmpInst::Predicate::ICMP_EQ, - APInt(NewShAmtBitWidth, XBitWidth - 1)))) - return nullptr; + if (HadTwoRightShifts && (Trunc || AnalyzeForSignBitExtraction)) { + // If it's not a sign bit extraction, then we're done. + if (!match(NewShAmt, + m_SpecificInt_ICMP(ICmpInst::Predicate::ICMP_EQ, + APInt(NewShAmtBitWidth, XBitWidth - 1)))) + return nullptr; + // If it is, and that was the question, return the base value. + if (AnalyzeForSignBitExtraction) + return X; + } + + assert(IdenticalShOpcodes && "Should not get here with different shifts."); // All good, we can do this fold. NewShAmt = ConstantExpr::getZExtOrBitCast(NewShAmt, X->getType()); @@ -287,8 +308,8 @@ Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) { if (Instruction *Res = FoldShiftByConstant(Op0, CUI, I)) return Res; - if (Instruction *NewShift = - reassociateShiftAmtsOfTwoSameDirectionShifts(&I, SQ, Builder)) + if (auto *NewShift = cast_or_null( + reassociateShiftAmtsOfTwoSameDirectionShifts(&I, SQ))) return NewShift; // (C1 shift (A add C2)) -> (C1 shift C2) shift A) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp index 4680cb3006000..d30ab8001897f 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -1074,16 +1074,22 @@ Value *InstCombiner::simplifyAMDGCNMemoryIntrinsicDemanded(IntrinsicInst *II, } /// The specified value produces a vector with any number of elements. +/// This method analyzes which elements of the operand are undef and returns +/// that information in UndefElts. +/// /// DemandedElts contains the set of elements that are actually used by the -/// caller. This method analyzes which elements of the operand are undef and -/// returns that information in UndefElts. +/// caller, and by default (AllowMultipleUsers equals false) the value is +/// simplified only if it has a single caller. If AllowMultipleUsers is set +/// to true, DemandedElts refers to the union of sets of elements that are +/// used by all callers. /// /// If the information about demanded elements can be used to simplify the /// operation, the operation is simplified, then the resultant value is /// returned. This returns null if no change was made. Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, APInt &UndefElts, - unsigned Depth) { + unsigned Depth, + bool AllowMultipleUsers) { unsigned VWidth = V->getType()->getVectorNumElements(); APInt EltMask(APInt::getAllOnesValue(VWidth)); assert((DemandedElts & ~EltMask) == 0 && "Invalid DemandedElts!"); @@ -1137,19 +1143,21 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, if (Depth == 10) return nullptr; - // If multiple users are using the root value, proceed with - // simplification conservatively assuming that all elements - // are needed. - if (!V->hasOneUse()) { - // Quit if we find multiple users of a non-root value though. - // They'll be handled when it's their turn to be visited by - // the main instcombine process. - if (Depth != 0) - // TODO: Just compute the UndefElts information recursively. - return nullptr; + if (!AllowMultipleUsers) { + // If multiple users are using the root value, proceed with + // simplification conservatively assuming that all elements + // are needed. + if (!V->hasOneUse()) { + // Quit if we find multiple users of a non-root value though. + // They'll be handled when it's their turn to be visited by + // the main instcombine process. + if (Depth != 0) + // TODO: Just compute the UndefElts information recursively. + return nullptr; - // Conservatively assume that all elements are needed. - DemandedElts = EltMask; + // Conservatively assume that all elements are needed. + DemandedElts = EltMask; + } } Instruction *I = dyn_cast(V); diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index 13c38caa1e661..9c890748e5ab8 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -253,6 +253,69 @@ static Instruction *foldBitcastExtElt(ExtractElementInst &Ext, return nullptr; } +/// Find elements of V demanded by UserInstr. +static APInt findDemandedEltsBySingleUser(Value *V, Instruction *UserInstr) { + unsigned VWidth = V->getType()->getVectorNumElements(); + + // Conservatively assume that all elements are needed. + APInt UsedElts(APInt::getAllOnesValue(VWidth)); + + switch (UserInstr->getOpcode()) { + case Instruction::ExtractElement: { + ExtractElementInst *EEI = cast(UserInstr); + assert(EEI->getVectorOperand() == V); + ConstantInt *EEIIndexC = dyn_cast(EEI->getIndexOperand()); + if (EEIIndexC && EEIIndexC->getValue().ult(VWidth)) { + UsedElts = APInt::getOneBitSet(VWidth, EEIIndexC->getZExtValue()); + } + break; + } + case Instruction::ShuffleVector: { + ShuffleVectorInst *Shuffle = cast(UserInstr); + unsigned MaskNumElts = UserInstr->getType()->getVectorNumElements(); + + UsedElts = APInt(VWidth, 0); + for (unsigned i = 0; i < MaskNumElts; i++) { + unsigned MaskVal = Shuffle->getMaskValue(i); + if (MaskVal == -1u || MaskVal >= 2 * VWidth) + continue; + if (Shuffle->getOperand(0) == V && (MaskVal < VWidth)) + UsedElts.setBit(MaskVal); + if (Shuffle->getOperand(1) == V && + ((MaskVal >= VWidth) && (MaskVal < 2 * VWidth))) + UsedElts.setBit(MaskVal - VWidth); + } + break; + } + default: + break; + } + return UsedElts; +} + +/// Find union of elements of V demanded by all its users. +/// If it is known by querying findDemandedEltsBySingleUser that +/// no user demands an element of V, then the corresponding bit +/// remains unset in the returned value. +static APInt findDemandedEltsByAllUsers(Value *V) { + unsigned VWidth = V->getType()->getVectorNumElements(); + + APInt UnionUsedElts(VWidth, 0); + for (const Use &U : V->uses()) { + if (Instruction *I = dyn_cast(U.getUser())) { + UnionUsedElts |= findDemandedEltsBySingleUser(V, I); + } else { + UnionUsedElts = APInt::getAllOnesValue(VWidth); + break; + } + + if (UnionUsedElts.isAllOnesValue()) + break; + } + + return UnionUsedElts; +} + Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) { Value *SrcVec = EI.getVectorOperand(); Value *Index = EI.getIndexOperand(); @@ -271,19 +334,35 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) { return nullptr; // This instruction only demands the single element from the input vector. - // If the input vector has a single use, simplify it based on this use - // property. - if (SrcVec->hasOneUse() && NumElts != 1) { - APInt UndefElts(NumElts, 0); - APInt DemandedElts(NumElts, 0); - DemandedElts.setBit(IndexC->getZExtValue()); - if (Value *V = SimplifyDemandedVectorElts(SrcVec, DemandedElts, - UndefElts)) { - EI.setOperand(0, V); - return &EI; + if (NumElts != 1) { + // If the input vector has a single use, simplify it based on this use + // property. + if (SrcVec->hasOneUse()) { + APInt UndefElts(NumElts, 0); + APInt DemandedElts(NumElts, 0); + DemandedElts.setBit(IndexC->getZExtValue()); + if (Value *V = + SimplifyDemandedVectorElts(SrcVec, DemandedElts, UndefElts)) { + EI.setOperand(0, V); + return &EI; + } + } else { + // If the input vector has multiple uses, simplify it based on a union + // of all elements used. + APInt DemandedElts = findDemandedEltsByAllUsers(SrcVec); + if (!DemandedElts.isAllOnesValue()) { + APInt UndefElts(NumElts, 0); + if (Value *V = SimplifyDemandedVectorElts( + SrcVec, DemandedElts, UndefElts, 0 /* Depth */, + true /* AllowMultipleUsers */)) { + if (V != SrcVec) { + SrcVec->replaceAllUsesWith(V); + return &EI; + } + } + } } } - if (Instruction *I = foldBitcastExtElt(EI, Builder, DL.isBigEndian())) return I; diff --git a/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp index ba16afaa89689..6be715c1036cd 100644 --- a/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp +++ b/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp @@ -860,6 +860,7 @@ static bool runImpl(Function &F, LazyValueInfo *LVI, DominatorTree *DT, break; case Instruction::Add: case Instruction::Sub: + case Instruction::Mul: BBChanged |= processBinOp(cast(II), LVI); break; case Instruction::And: diff --git a/llvm/lib/Transforms/Scalar/GVNHoist.cpp b/llvm/lib/Transforms/Scalar/GVNHoist.cpp index 1f01ba2fbfc6c..c87e41484b132 100644 --- a/llvm/lib/Transforms/Scalar/GVNHoist.cpp +++ b/llvm/lib/Transforms/Scalar/GVNHoist.cpp @@ -539,7 +539,7 @@ class GVNHoist { // Check for unsafe hoistings due to side effects. if (K == InsKind::Store) { - if (hasEHOrLoadsOnPath(NewPt, dyn_cast(U), NBBsOnAllPaths)) + if (hasEHOrLoadsOnPath(NewPt, cast(U), NBBsOnAllPaths)) return false; } else if (hasEHOnPath(NewBB, OldBB, NBBsOnAllPaths)) return false; diff --git a/llvm/lib/Transforms/Scalar/GuardWidening.cpp b/llvm/lib/Transforms/Scalar/GuardWidening.cpp index e14f44bb70692..2697d78095681 100644 --- a/llvm/lib/Transforms/Scalar/GuardWidening.cpp +++ b/llvm/lib/Transforms/Scalar/GuardWidening.cpp @@ -591,7 +591,7 @@ bool GuardWideningImpl::widenCondCommon(Value *Cond0, Value *Cond1, else Result = RC.getCheckInst(); } - + assert(Result && "Failed to find result value"); Result->setName("wide.chk"); } return true; diff --git a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp index 6299deca08c92..5519a00c12c97 100644 --- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -2268,8 +2268,8 @@ static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB, if (BECount->getType()->isPointerTy() && !Phi->getType()->isPointerTy()) continue; - const auto *AR = dyn_cast(SE->getSCEV(Phi)); - + const auto *AR = cast(SE->getSCEV(Phi)); + // AR may be a pointer type, while BECount is an integer type. // AR may be wider than BECount. With eq/ne tests overflow is immaterial. // AR may not be a narrower type, or we may never exit. @@ -2717,6 +2717,24 @@ bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) { if (isa(MaxExitCount)) return false; + // Visit our exit blocks in order of dominance. We know from the fact that + // all exits (left) are analyzeable that the must be a total dominance order + // between them as each must dominate the latch. The visit order only + // matters for the provably equal case. + llvm::sort(ExitingBlocks, + [&](BasicBlock *A, BasicBlock *B) { + // std::sort sorts in ascending order, so we want the inverse of + // the normal dominance relation. + if (DT->properlyDominates(A, B)) return true; + if (DT->properlyDominates(B, A)) return false; + llvm_unreachable("expected total dominance order!"); + }); +#ifdef ASSERT + for (unsigned i = 1; i < ExitingBlocks.size(); i++) { + assert(DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i])); + } +#endif + auto FoldExit = [&](BasicBlock *ExitingBB, bool IsTaken) { BranchInst *BI = cast(ExitingBB->getTerminator()); bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB)); @@ -2729,6 +2747,7 @@ bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) { }; bool Changed = false; + SmallSet DominatingExitCounts; for (BasicBlock *ExitingBB : ExitingBlocks) { const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); assert(!isa(ExitCount) && "checked above"); @@ -2766,10 +2785,22 @@ bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) { continue; } - // TODO: If we can prove that the exiting iteration is equal to the exit - // count for this exit and that no previous exit oppurtunities exist within - // the loop, then we can discharge all other exits. (May fall out of - // previous TODO.) + // As we run, keep track of which exit counts we've encountered. If we + // find a duplicate, we've found an exit which would have exited on the + // exiting iteration, but (from the visit order) strictly follows another + // which does the same and is thus dead. + if (!DominatingExitCounts.insert(ExitCount).second) { + FoldExit(ExitingBB, false); + Changed = true; + continue; + } + + // TODO: There might be another oppurtunity to leverage SCEV's reasoning + // here. If we kept track of the min of dominanting exits so far, we could + // discharge exits with EC >= MDEC. This is less powerful than the existing + // transform (since later exits aren't considered), but potentially more + // powerful for any case where SCEV can prove a >=u b, but neither a == b + // or a >u b. Such a case is not currently known. } return Changed; } diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp index 262d64f161859..6ce4831a73592 100644 --- a/llvm/lib/Transforms/Scalar/LICM.cpp +++ b/llvm/lib/Transforms/Scalar/LICM.cpp @@ -2033,7 +2033,8 @@ bool llvm::promoteLoopAccessesToScalars( if (!DereferenceableInPH) { DereferenceableInPH = isDereferenceableAndAlignedPointer( Store->getPointerOperand(), Store->getValueOperand()->getType(), - Store->getAlignment(), MDL, Preheader->getTerminator(), DT); + MaybeAlign(Store->getAlignment()), MDL, + Preheader->getTerminator(), DT); } } else return false; // Not a load or store. diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp index 37d6d6085e430..2364748efb057 100644 --- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -513,9 +513,13 @@ static bool moveUp(AliasAnalysis &AA, StoreInst *SI, Instruction *P, ToLift.push_back(C); for (unsigned k = 0, e = C->getNumOperands(); k != e; ++k) - if (auto *A = dyn_cast(C->getOperand(k))) - if (A->getParent() == SI->getParent()) + if (auto *A = dyn_cast(C->getOperand(k))) { + if (A->getParent() == SI->getParent()) { + // Cannot hoist user of P above P + if(A == P) return false; Args.insert(A); + } + } } // We made it, we need to lift diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp index 4b816832c31ec..74b8ff9130502 100644 --- a/llvm/lib/Transforms/Scalar/SROA.cpp +++ b/llvm/lib/Transforms/Scalar/SROA.cpp @@ -1199,7 +1199,7 @@ static bool isSafePHIToSpeculate(PHINode &PN) { // TODO: Allow recursive phi users. // TODO: Allow stores. BasicBlock *BB = PN.getParent(); - unsigned MaxAlign = 0; + MaybeAlign MaxAlign; uint64_t APWidth = DL.getIndexTypeSizeInBits(PN.getType()); APInt MaxSize(APWidth, 0); bool HaveLoad = false; @@ -1221,7 +1221,7 @@ static bool isSafePHIToSpeculate(PHINode &PN) { return false; uint64_t Size = DL.getTypeStoreSize(LI->getType()); - MaxAlign = std::max(MaxAlign, LI->getAlignment()); + MaxAlign = std::max(MaxAlign, MaybeAlign(LI->getAlignment())); MaxSize = MaxSize.ult(Size) ? APInt(APWidth, Size) : MaxSize; HaveLoad = true; } @@ -1340,11 +1340,11 @@ static bool isSafeSelectToSpeculate(SelectInst &SI) { // Both operands to the select need to be dereferenceable, either // absolutely (e.g. allocas) or at this point because we can see other // accesses to it. - if (!isSafeToLoadUnconditionally(TValue, LI->getType(), LI->getAlignment(), - DL, LI)) + if (!isSafeToLoadUnconditionally(TValue, LI->getType(), + MaybeAlign(LI->getAlignment()), DL, LI)) return false; - if (!isSafeToLoadUnconditionally(FValue, LI->getType(), LI->getAlignment(), - DL, LI)) + if (!isSafeToLoadUnconditionally(FValue, LI->getType(), + MaybeAlign(LI->getAlignment()), DL, LI)) return false; } diff --git a/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp b/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp index f0b79079d8175..b27a36b67d62e 100644 --- a/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp +++ b/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp @@ -341,7 +341,7 @@ static bool canMoveAboveCall(Instruction *I, CallInst *CI, AliasAnalysis *AA) { const DataLayout &DL = L->getModule()->getDataLayout(); if (isModSet(AA->getModRefInfo(CI, MemoryLocation::get(L))) || !isSafeToLoadUnconditionally(L->getPointerOperand(), L->getType(), - L->getAlignment(), DL, L)) + MaybeAlign(L->getAlignment()), DL, L)) return false; } } diff --git a/llvm/lib/Transforms/Utils/CanonicalizeAliases.cpp b/llvm/lib/Transforms/Utils/CanonicalizeAliases.cpp index 455fcbb1cf98a..3c7c8d872595f 100644 --- a/llvm/lib/Transforms/Utils/CanonicalizeAliases.cpp +++ b/llvm/lib/Transforms/Utils/CanonicalizeAliases.cpp @@ -33,6 +33,7 @@ #include "llvm/IR/Operator.h" #include "llvm/IR/ValueHandle.h" +#include "llvm/Pass.h" using namespace llvm; diff --git a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp index 361b559ac02e5..0324993a8203d 100644 --- a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -177,7 +177,8 @@ static bool canTransformToMemCmp(CallInst *CI, Value *Str, uint64_t Len, if (!isOnlyUsedInComparisonWithZero(CI)) return false; - if (!isDereferenceableAndAlignedPointer(Str, 1, APInt(64, Len), DL)) + if (!isDereferenceableAndAlignedPointer(Str, Align::None(), APInt(64, Len), + DL)) return false; if (CI->getFunction()->hasFnAttribute(Attribute::SanitizeMemory)) diff --git a/llvm/lib/Transforms/Utils/SizeOpts.cpp b/llvm/lib/Transforms/Utils/SizeOpts.cpp index f819c67d69d0d..1519751197d24 100644 --- a/llvm/lib/Transforms/Utils/SizeOpts.cpp +++ b/llvm/lib/Transforms/Utils/SizeOpts.cpp @@ -10,66 +10,28 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Analysis/BlockFrequencyInfo.h" +#include "llvm/Analysis/ProfileSummaryInfo.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Transforms/Utils/SizeOpts.h" - using namespace llvm; -cl::opt EnablePGSO( +static cl::opt ProfileGuidedSizeOpt( "pgso", cl::Hidden, cl::init(true), - cl::desc("Enable the profile guided size optimizations. ")); - -cl::opt PGSOLargeWorkingSetSizeOnly( - "pgso-lwss-only", cl::Hidden, cl::init(true), - cl::desc("Apply the profile guided size optimizations only " - "if the working set size is large (except for cold code.)")); - -cl::opt ForcePGSO( - "force-pgso", cl::Hidden, cl::init(false), - cl::desc("Force the (profiled-guided) size optimizations. ")); - -cl::opt PgsoCutoffInstrProf( - "pgso-cutoff-instr-prof", cl::Hidden, cl::init(250000), cl::ZeroOrMore, - cl::desc("The profile guided size optimization profile summary cutoff " - "for instrumentation profile.")); - -cl::opt PgsoCutoffSampleProf( - "pgso-cutoff-sample-prof", cl::Hidden, cl::init(800000), cl::ZeroOrMore, - cl::desc("The profile guided size optimization profile summary cutoff " - "for sample profile.")); - -namespace { -struct BasicBlockBFIAdapter { - static bool isFunctionColdInCallGraph(const Function *F, - ProfileSummaryInfo *PSI, - BlockFrequencyInfo &BFI) { - return PSI->isFunctionColdInCallGraph(F, BFI); - } - static bool isFunctionHotInCallGraphNthPercentile(int CutOff, - const Function *F, - ProfileSummaryInfo *PSI, - BlockFrequencyInfo &BFI) { - return PSI->isFunctionHotInCallGraphNthPercentile(CutOff, F, BFI); - } - static bool isColdBlock(const BasicBlock *BB, - ProfileSummaryInfo *PSI, - BlockFrequencyInfo *BFI) { - return PSI->isColdBlock(BB, BFI); - } - static bool isHotBlockNthPercentile(int CutOff, - const BasicBlock *BB, - ProfileSummaryInfo *PSI, - BlockFrequencyInfo *BFI) { - return PSI->isHotBlockNthPercentile(CutOff, BB, BFI); - } -}; -} // end anonymous namespace + cl::desc("Enable the profile guided size optimization. ")); -bool llvm::shouldOptimizeForSize(const Function *F, ProfileSummaryInfo *PSI, +bool llvm::shouldOptimizeForSize(Function *F, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) { - return shouldFuncOptimizeForSizeImpl(F, PSI, BFI); + assert(F); + if (!PSI || !BFI || !PSI->hasProfileSummary()) + return false; + return ProfileGuidedSizeOpt && PSI->isFunctionColdInCallGraph(F, *BFI); } -bool llvm::shouldOptimizeForSize(const BasicBlock *BB, ProfileSummaryInfo *PSI, +bool llvm::shouldOptimizeForSize(BasicBlock *BB, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) { - return shouldOptimizeForSizeImpl(BB, PSI, BFI); + assert(BB); + if (!PSI || !BFI || !PSI->hasProfileSummary()) + return false; + return ProfileGuidedSizeOpt && PSI->isColdBlock(BB, BFI); } diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp index 0a20ab71dfaaa..f43842be53574 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp @@ -741,9 +741,9 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { // Arbitrarily try a vector of 2 elements. Type *VecTy = VectorType::get(T, /*NumElements=*/2); assert(VecTy && "did not find vectorized version of stored type"); - unsigned Alignment = getLoadStoreAlignment(ST); + const MaybeAlign Alignment = getLoadStoreAlignment(ST); assert(Alignment && "Alignment should be set"); - if (!TTI->isLegalNTStore(VecTy, Align(Alignment))) { + if (!TTI->isLegalNTStore(VecTy, *Alignment)) { reportVectorizationFailure( "nontemporal store instruction cannot be vectorized", "nontemporal store instruction cannot be vectorized", @@ -758,9 +758,9 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { // supported on the target (arbitrarily try a vector of 2 elements). Type *VecTy = VectorType::get(I.getType(), /*NumElements=*/2); assert(VecTy && "did not find vectorized version of load type"); - unsigned Alignment = getLoadStoreAlignment(LD); + const MaybeAlign Alignment = getLoadStoreAlignment(LD); assert(Alignment && "Alignment should be set"); - if (!TTI->isLegalNTLoad(VecTy, Align(Alignment))) { + if (!TTI->isLegalNTLoad(VecTy, *Alignment)) { reportVectorizationFailure( "nontemporal load instruction cannot be vectorized", "nontemporal load instruction cannot be vectorized", diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 51d0b47714407..8f0bf70f873cf 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -1190,16 +1190,16 @@ class LoopVectorizationCostModel { /// Returns true if the target machine supports masked store operation /// for the given \p DataType and kind of access to \p Ptr. - bool isLegalMaskedStore(Type *DataType, Value *Ptr, unsigned Alignment) { + bool isLegalMaskedStore(Type *DataType, Value *Ptr, MaybeAlign Alignment) { return Legal->isConsecutivePtr(Ptr) && - TTI.isLegalMaskedStore(DataType, MaybeAlign(Alignment)); + TTI.isLegalMaskedStore(DataType, Alignment); } /// Returns true if the target machine supports masked load operation /// for the given \p DataType and kind of access to \p Ptr. - bool isLegalMaskedLoad(Type *DataType, Value *Ptr, unsigned Alignment) { + bool isLegalMaskedLoad(Type *DataType, Value *Ptr, MaybeAlign Alignment) { return Legal->isConsecutivePtr(Ptr) && - TTI.isLegalMaskedLoad(DataType, MaybeAlign(Alignment)); + TTI.isLegalMaskedLoad(DataType, Alignment); } /// Returns true if the target machine supports masked scatter operation @@ -2359,12 +2359,11 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr, Type *ScalarDataTy = getMemInstValueType(Instr); Type *DataTy = VectorType::get(ScalarDataTy, VF); Value *Ptr = getLoadStorePointerOperand(Instr); - unsigned Alignment = getLoadStoreAlignment(Instr); // An alignment of 0 means target abi alignment. We need to use the scalar's // target abi alignment in such a case. const DataLayout &DL = Instr->getModule()->getDataLayout(); - if (!Alignment) - Alignment = DL.getABITypeAlignment(ScalarDataTy); + const Align Alignment = + DL.getValueOrABITypeAlignment(getLoadStoreAlignment(Instr), ScalarDataTy); unsigned AddressSpace = getLoadStoreAddressSpace(Instr); // Determine if the pointer operand of the access is either consecutive or @@ -2428,8 +2427,8 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr, if (CreateGatherScatter) { Value *MaskPart = isMaskRequired ? Mask[Part] : nullptr; Value *VectorGep = getOrCreateVectorValue(Ptr, Part); - NewSI = Builder.CreateMaskedScatter(StoredVal, VectorGep, Alignment, - MaskPart); + NewSI = Builder.CreateMaskedScatter(StoredVal, VectorGep, + Alignment.value(), MaskPart); } else { if (Reverse) { // If we store to reverse consecutive memory locations, then we need @@ -2440,10 +2439,11 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr, } auto *VecPtr = CreateVecPtr(Part, Ptr); if (isMaskRequired) - NewSI = Builder.CreateMaskedStore(StoredVal, VecPtr, Alignment, - Mask[Part]); + NewSI = Builder.CreateMaskedStore(StoredVal, VecPtr, + Alignment.value(), Mask[Part]); else - NewSI = Builder.CreateAlignedStore(StoredVal, VecPtr, Alignment); + NewSI = + Builder.CreateAlignedStore(StoredVal, VecPtr, Alignment.value()); } addMetadata(NewSI, SI); } @@ -2458,18 +2458,18 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr, if (CreateGatherScatter) { Value *MaskPart = isMaskRequired ? Mask[Part] : nullptr; Value *VectorGep = getOrCreateVectorValue(Ptr, Part); - NewLI = Builder.CreateMaskedGather(VectorGep, Alignment, MaskPart, + NewLI = Builder.CreateMaskedGather(VectorGep, Alignment.value(), MaskPart, nullptr, "wide.masked.gather"); addMetadata(NewLI, LI); } else { auto *VecPtr = CreateVecPtr(Part, Ptr); if (isMaskRequired) - NewLI = Builder.CreateMaskedLoad(VecPtr, Alignment, Mask[Part], + NewLI = Builder.CreateMaskedLoad(VecPtr, Alignment.value(), Mask[Part], UndefValue::get(DataTy), "wide.masked.load"); else - NewLI = - Builder.CreateAlignedLoad(DataTy, VecPtr, Alignment, "wide.load"); + NewLI = Builder.CreateAlignedLoad(DataTy, VecPtr, Alignment.value(), + "wide.load"); // Add metadata to the load, but setVectorValue to the reverse shuffle. addMetadata(NewLI, LI); @@ -4553,7 +4553,6 @@ bool LoopVectorizationCostModel::isScalarWithPredication(Instruction *I, unsigne return false; auto *Ptr = getLoadStorePointerOperand(I); auto *Ty = getMemInstValueType(I); - unsigned Alignment = getLoadStoreAlignment(I); // We have already decided how to vectorize this instruction, get that // result. if (VF > 1) { @@ -4562,6 +4561,7 @@ bool LoopVectorizationCostModel::isScalarWithPredication(Instruction *I, unsigne "Widening decision should be ready at this moment"); return WideningDecision == CM_Scalarize; } + const MaybeAlign Alignment = getLoadStoreAlignment(I); return isa(I) ? !(isLegalMaskedLoad(Ty, Ptr, Alignment) || isLegalMaskedGather(Ty)) : !(isLegalMaskedStore(Ty, Ptr, Alignment) || isLegalMaskedScatter(Ty)); @@ -4607,9 +4607,9 @@ bool LoopVectorizationCostModel::interleavedAccessCanBeWidened(Instruction *I, "Masked interleave-groups for predicated accesses are not enabled."); auto *Ty = getMemInstValueType(I); - unsigned Alignment = getLoadStoreAlignment(I); - return isa(I) ? TTI.isLegalMaskedLoad(Ty, MaybeAlign(Alignment)) - : TTI.isLegalMaskedStore(Ty, MaybeAlign(Alignment)); + const MaybeAlign Alignment = getLoadStoreAlignment(I); + return isa(I) ? TTI.isLegalMaskedLoad(Ty, Alignment) + : TTI.isLegalMaskedStore(Ty, Alignment); } bool LoopVectorizationCostModel::memoryInstructionCanBeWidened(Instruction *I, @@ -5731,7 +5731,6 @@ unsigned LoopVectorizationCostModel::getMemInstScalarizationCost(Instruction *I, Type *ValTy = getMemInstValueType(I); auto SE = PSE.getSE(); - unsigned Alignment = getLoadStoreAlignment(I); unsigned AS = getLoadStoreAddressSpace(I); Value *Ptr = getLoadStorePointerOperand(I); Type *PtrTy = ToVectorTy(Ptr->getType(), VF); @@ -5745,9 +5744,9 @@ unsigned LoopVectorizationCostModel::getMemInstScalarizationCost(Instruction *I, // Don't pass *I here, since it is scalar but will actually be part of a // vectorized loop where the user of it is a vectorized instruction. - Cost += VF * - TTI.getMemoryOpCost(I->getOpcode(), ValTy->getScalarType(), Alignment, - AS); + const MaybeAlign Alignment = getLoadStoreAlignment(I); + Cost += VF * TTI.getMemoryOpCost(I->getOpcode(), ValTy->getScalarType(), + Alignment ? Alignment->value() : 0, AS); // Get the overhead of the extractelement and insertelement instructions // we might create due to scalarization. @@ -5772,18 +5771,20 @@ unsigned LoopVectorizationCostModel::getConsecutiveMemOpCost(Instruction *I, unsigned VF) { Type *ValTy = getMemInstValueType(I); Type *VectorTy = ToVectorTy(ValTy, VF); - unsigned Alignment = getLoadStoreAlignment(I); Value *Ptr = getLoadStorePointerOperand(I); unsigned AS = getLoadStoreAddressSpace(I); int ConsecutiveStride = Legal->isConsecutivePtr(Ptr); assert((ConsecutiveStride == 1 || ConsecutiveStride == -1) && "Stride should be 1 or -1 for consecutive memory access"); + const MaybeAlign Alignment = getLoadStoreAlignment(I); unsigned Cost = 0; if (Legal->isMaskRequired(I)) - Cost += TTI.getMaskedMemoryOpCost(I->getOpcode(), VectorTy, Alignment, AS); + Cost += TTI.getMaskedMemoryOpCost(I->getOpcode(), VectorTy, + Alignment ? Alignment->value() : 0, AS); else - Cost += TTI.getMemoryOpCost(I->getOpcode(), VectorTy, Alignment, AS, I); + Cost += TTI.getMemoryOpCost(I->getOpcode(), VectorTy, + Alignment ? Alignment->value() : 0, AS, I); bool Reverse = ConsecutiveStride < 0; if (Reverse) @@ -5795,33 +5796,37 @@ unsigned LoopVectorizationCostModel::getUniformMemOpCost(Instruction *I, unsigned VF) { Type *ValTy = getMemInstValueType(I); Type *VectorTy = ToVectorTy(ValTy, VF); - unsigned Alignment = getLoadStoreAlignment(I); + const MaybeAlign Alignment = getLoadStoreAlignment(I); unsigned AS = getLoadStoreAddressSpace(I); if (isa(I)) { return TTI.getAddressComputationCost(ValTy) + - TTI.getMemoryOpCost(Instruction::Load, ValTy, Alignment, AS) + + TTI.getMemoryOpCost(Instruction::Load, ValTy, + Alignment ? Alignment->value() : 0, AS) + TTI.getShuffleCost(TargetTransformInfo::SK_Broadcast, VectorTy); } StoreInst *SI = cast(I); bool isLoopInvariantStoreValue = Legal->isUniform(SI->getValueOperand()); return TTI.getAddressComputationCost(ValTy) + - TTI.getMemoryOpCost(Instruction::Store, ValTy, Alignment, AS) + - (isLoopInvariantStoreValue ? 0 : TTI.getVectorInstrCost( - Instruction::ExtractElement, - VectorTy, VF - 1)); + TTI.getMemoryOpCost(Instruction::Store, ValTy, + Alignment ? Alignment->value() : 0, AS) + + (isLoopInvariantStoreValue + ? 0 + : TTI.getVectorInstrCost(Instruction::ExtractElement, VectorTy, + VF - 1)); } unsigned LoopVectorizationCostModel::getGatherScatterCost(Instruction *I, unsigned VF) { Type *ValTy = getMemInstValueType(I); Type *VectorTy = ToVectorTy(ValTy, VF); - unsigned Alignment = getLoadStoreAlignment(I); + const MaybeAlign Alignment = getLoadStoreAlignment(I); Value *Ptr = getLoadStorePointerOperand(I); return TTI.getAddressComputationCost(VectorTy) + TTI.getGatherScatterOpCost(I->getOpcode(), VectorTy, Ptr, - Legal->isMaskRequired(I), Alignment); + Legal->isMaskRequired(I), + Alignment ? Alignment->value() : 0); } unsigned LoopVectorizationCostModel::getInterleaveGroupCost(Instruction *I, @@ -5868,11 +5873,12 @@ unsigned LoopVectorizationCostModel::getMemoryInstructionCost(Instruction *I, // moment. if (VF == 1) { Type *ValTy = getMemInstValueType(I); - unsigned Alignment = getLoadStoreAlignment(I); + const MaybeAlign Alignment = getLoadStoreAlignment(I); unsigned AS = getLoadStoreAddressSpace(I); return TTI.getAddressComputationCost(ValTy) + - TTI.getMemoryOpCost(I->getOpcode(), ValTy, Alignment, AS, I); + TTI.getMemoryOpCost(I->getOpcode(), ValTy, + Alignment ? Alignment->value() : 0, AS, I); } return getWideningCost(I, VF); } diff --git a/llvm/test/Analysis/ScalarEvolution/max-trip-count-address-space.ll b/llvm/test/Analysis/ScalarEvolution/max-trip-count-address-space.ll index 1c72440a7492c..5260fe90e6b7d 100644 --- a/llvm/test/Analysis/ScalarEvolution/max-trip-count-address-space.ll +++ b/llvm/test/Analysis/ScalarEvolution/max-trip-count-address-space.ll @@ -65,4 +65,4 @@ for.end: ; preds = %for.cond.for.end_cr ; CHECK: Determining loop execution counts for: @test ; CHECK-NEXT: backedge-taken count is -; CHECK-NEXT: max backedge-taken count is -1 +; CHECK-NEXT: max backedge-taken count is 4294967294 diff --git a/llvm/test/Analysis/ScalarEvolution/max-trip-count.ll b/llvm/test/Analysis/ScalarEvolution/max-trip-count.ll index 53b882bdcbfe6..e3ba313a690b8 100644 --- a/llvm/test/Analysis/ScalarEvolution/max-trip-count.ll +++ b/llvm/test/Analysis/ScalarEvolution/max-trip-count.ll @@ -97,7 +97,7 @@ for.end: ; preds = %for.cond.for.end_cr ; CHECK: Determining loop execution counts for: @test ; CHECK-NEXT: backedge-taken count is -; CHECK-NEXT: max backedge-taken count is -1 +; CHECK-NEXT: max backedge-taken count is 4294967294 ; PR19799: Indvars miscompile due to an incorrect max backedge taken count from SCEV. ; CHECK-LABEL: @pr19799 diff --git a/llvm/test/Analysis/ScalarEvolution/sext-mul.ll b/llvm/test/Analysis/ScalarEvolution/sext-mul.ll index 42810be6ed77c..4a10749819712 100644 --- a/llvm/test/Analysis/ScalarEvolution/sext-mul.ll +++ b/llvm/test/Analysis/ScalarEvolution/sext-mul.ll @@ -11,7 +11,7 @@ ; CHECK: %tmp15 = getelementptr inbounds i32, i32* %arg, i64 %tmp14 ; CHECK-NEXT: --> {{.*}} Exits: (4 + (4 * (sext i32 (-2 + (2 * %arg2)) to i64)) + %arg) ; CHECK:Loop %bb7: backedge-taken count is (-1 + (zext i32 %arg2 to i64)) -; CHECK-NEXT:Loop %bb7: max backedge-taken count is -1 +; CHECK-NEXT:Loop %bb7: max backedge-taken count is 4294967294 ; CHECK-NEXT:Loop %bb7: Predicated backedge-taken count is (-1 + (zext i32 %arg2 to i64)) define void @foo(i32* nocapture %arg, i32 %arg1, i32 %arg2) { @@ -52,7 +52,7 @@ bb7: ; preds = %bb7, %bb3 ; CHECK: %t14 = or i128 %t10, 1 ; CHECK-NEXT: --> {{.*}} Exits: (1 + (sext i127 (-633825300114114700748351602688 + (633825300114114700748351602688 * (zext i32 %arg5 to i127))) to i128)) ; CHECK: Loop %bb7: backedge-taken count is (-1 + (zext i32 %arg5 to i128)) -; CHECK-NEXT: Loop %bb7: max backedge-taken count is -1 +; CHECK-NEXT: Loop %bb7: max backedge-taken count is 4294967294 ; CHECK-NEXT: Loop %bb7: Predicated backedge-taken count is (-1 + (zext i32 %arg5 to i128)) define void @goo(i32* nocapture %arg3, i32 %arg4, i32 %arg5) { diff --git a/llvm/test/Analysis/ScalarEvolution/umin-umax-folds.ll b/llvm/test/Analysis/ScalarEvolution/umin-umax-folds.ll index 7637ae92116f2..fb8f59fe42520 100644 --- a/llvm/test/Analysis/ScalarEvolution/umin-umax-folds.ll +++ b/llvm/test/Analysis/ScalarEvolution/umin-umax-folds.ll @@ -9,15 +9,15 @@ define void @umin_sext_x_zext_x(i32 %len) { ; CHECK-NEXT: %len.sext = sext i32 %len to i64 ; CHECK-NEXT: --> (sext i32 %len to i64) U: [-2147483648,2147483648) S: [-2147483648,2147483648) ; CHECK-NEXT: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] -; CHECK-NEXT: --> {0,+,1}<%loop> U: [0,4294967296) S: [0,4294967296) Exits: ((zext i32 %len to i64) umin (sext i32 %len to i64)) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {0,+,1}<%loop> U: [0,4294967296) S: [0,4294967296) Exits: (zext i32 %len to i64) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %iv.next = add i64 %iv, 1 -; CHECK-NEXT: --> {1,+,1}<%loop> U: [1,4294967297) S: [1,4294967297) Exits: (1 + ((zext i32 %len to i64) umin (sext i32 %len to i64))) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {1,+,1}<%loop> U: [1,4294967297) S: [1,4294967297) Exits: (1 + (zext i32 %len to i64)) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %and = and i1 %cmp1, %cmp2 ; CHECK-NEXT: --> %and U: full-set S: full-set Exits: <> LoopDispositions: { %loop: Variant } ; CHECK-NEXT: Determining loop execution counts for: @umin_sext_x_zext_x -; CHECK-NEXT: Loop %loop: backedge-taken count is ((zext i32 %len to i64) umin (sext i32 %len to i64)) +; CHECK-NEXT: Loop %loop: backedge-taken count is (zext i32 %len to i64) ; CHECK-NEXT: Loop %loop: max backedge-taken count is 4294967295 -; CHECK-NEXT: Loop %loop: Predicated backedge-taken count is ((zext i32 %len to i64) umin (sext i32 %len to i64)) +; CHECK-NEXT: Loop %loop: Predicated backedge-taken count is (zext i32 %len to i64) ; CHECK-NEXT: Predicates: ; CHECK: Loop %loop: Trip multiple is 1 ; @@ -44,15 +44,15 @@ define void @ule_sext_x_zext_x(i32 %len) { ; CHECK-NEXT: %len.sext = sext i32 %len to i64 ; CHECK-NEXT: --> (sext i32 %len to i64) U: [-2147483648,2147483648) S: [-2147483648,2147483648) ; CHECK-NEXT: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] -; CHECK-NEXT: --> {0,+,1}<%loop> U: [0,4294967296) S: [0,4294967296) Exits: ((zext i32 %len to i64) umin (sext i32 %len to i64)) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {0,+,1}<%loop> U: [0,4294967296) S: [0,4294967296) Exits: (zext i32 %len to i64) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %iv.next = add i64 %iv, 1 -; CHECK-NEXT: --> {1,+,1}<%loop> U: [1,4294967297) S: [1,4294967297) Exits: (1 + ((zext i32 %len to i64) umin (sext i32 %len to i64))) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {1,+,1}<%loop> U: [1,4294967297) S: [1,4294967297) Exits: (1 + (zext i32 %len to i64)) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %sel = select i1 %cmp1, i64 %len.zext, i64 %len.sext -; CHECK-NEXT: --> ((zext i32 %len to i64) umin (sext i32 %len to i64)) U: [0,4294967296) S: [0,4294967296) Exits: ((zext i32 %len to i64) umin (sext i32 %len to i64)) LoopDispositions: { %loop: Invariant } +; CHECK-NEXT: --> (zext i32 %len to i64) U: [0,4294967296) S: [0,4294967296) Exits: (zext i32 %len to i64) LoopDispositions: { %loop: Invariant } ; CHECK-NEXT: Determining loop execution counts for: @ule_sext_x_zext_x -; CHECK-NEXT: Loop %loop: backedge-taken count is ((zext i32 %len to i64) umin (sext i32 %len to i64)) +; CHECK-NEXT: Loop %loop: backedge-taken count is (zext i32 %len to i64) ; CHECK-NEXT: Loop %loop: max backedge-taken count is 4294967295 -; CHECK-NEXT: Loop %loop: Predicated backedge-taken count is ((zext i32 %len to i64) umin (sext i32 %len to i64)) +; CHECK-NEXT: Loop %loop: Predicated backedge-taken count is (zext i32 %len to i64) ; CHECK-NEXT: Predicates: ; CHECK: Loop %loop: Trip multiple is 1 ; @@ -79,15 +79,15 @@ define void @uge_sext_x_zext_x(i32 %len) { ; CHECK-NEXT: %len.sext = sext i32 %len to i64 ; CHECK-NEXT: --> (sext i32 %len to i64) U: [-2147483648,2147483648) S: [-2147483648,2147483648) ; CHECK-NEXT: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] -; CHECK-NEXT: --> {0,+,1}<%loop> U: full-set S: full-set Exits: ((zext i32 %len to i64) umax (sext i32 %len to i64)) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {0,+,1}<%loop> U: full-set S: full-set Exits: (sext i32 %len to i64) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %iv.next = add i64 %iv, 1 -; CHECK-NEXT: --> {1,+,1}<%loop> U: full-set S: full-set Exits: (1 + ((zext i32 %len to i64) umax (sext i32 %len to i64))) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {1,+,1}<%loop> U: full-set S: full-set Exits: (1 + (sext i32 %len to i64)) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %sel = select i1 %cmp1, i64 %len.zext, i64 %len.sext -; CHECK-NEXT: --> ((zext i32 %len to i64) umax (sext i32 %len to i64)) U: full-set S: full-set Exits: ((zext i32 %len to i64) umax (sext i32 %len to i64)) LoopDispositions: { %loop: Invariant } +; CHECK-NEXT: --> (sext i32 %len to i64) U: [-2147483648,2147483648) S: [-2147483648,2147483648) Exits: (sext i32 %len to i64) LoopDispositions: { %loop: Invariant } ; CHECK-NEXT: Determining loop execution counts for: @uge_sext_x_zext_x -; CHECK-NEXT: Loop %loop: backedge-taken count is ((zext i32 %len to i64) umax (sext i32 %len to i64)) +; CHECK-NEXT: Loop %loop: backedge-taken count is (sext i32 %len to i64) ; CHECK-NEXT: Loop %loop: max backedge-taken count is -1 -; CHECK-NEXT: Loop %loop: Predicated backedge-taken count is ((zext i32 %len to i64) umax (sext i32 %len to i64)) +; CHECK-NEXT: Loop %loop: Predicated backedge-taken count is (sext i32 %len to i64) ; CHECK-NEXT: Predicates: ; CHECK: Loop %loop: Trip multiple is 1 ; @@ -114,15 +114,15 @@ define void @ult_sext_x_zext_x(i32 %len) { ; CHECK-NEXT: %len.sext = sext i32 %len to i64 ; CHECK-NEXT: --> (sext i32 %len to i64) U: [-2147483648,2147483648) S: [-2147483648,2147483648) ; CHECK-NEXT: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] -; CHECK-NEXT: --> {0,+,1}<%loop> U: [0,4294967296) S: [0,4294967296) Exits: ((zext i32 %len to i64) umin (sext i32 %len to i64)) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {0,+,1}<%loop> U: [0,4294967296) S: [0,4294967296) Exits: (zext i32 %len to i64) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %iv.next = add i64 %iv, 1 -; CHECK-NEXT: --> {1,+,1}<%loop> U: [1,4294967297) S: [1,4294967297) Exits: (1 + ((zext i32 %len to i64) umin (sext i32 %len to i64))) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {1,+,1}<%loop> U: [1,4294967297) S: [1,4294967297) Exits: (1 + (zext i32 %len to i64)) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %umin = select i1 %cmp1, i64 %len.zext, i64 %len.sext -; CHECK-NEXT: --> ((zext i32 %len to i64) umin (sext i32 %len to i64)) U: [0,4294967296) S: [0,4294967296) Exits: ((zext i32 %len to i64) umin (sext i32 %len to i64)) LoopDispositions: { %loop: Invariant } +; CHECK-NEXT: --> (zext i32 %len to i64) U: [0,4294967296) S: [0,4294967296) Exits: (zext i32 %len to i64) LoopDispositions: { %loop: Invariant } ; CHECK-NEXT: Determining loop execution counts for: @ult_sext_x_zext_x -; CHECK-NEXT: Loop %loop: backedge-taken count is ((zext i32 %len to i64) umin (sext i32 %len to i64)) +; CHECK-NEXT: Loop %loop: backedge-taken count is (zext i32 %len to i64) ; CHECK-NEXT: Loop %loop: max backedge-taken count is 4294967295 -; CHECK-NEXT: Loop %loop: Predicated backedge-taken count is ((zext i32 %len to i64) umin (sext i32 %len to i64)) +; CHECK-NEXT: Loop %loop: Predicated backedge-taken count is (zext i32 %len to i64) ; CHECK-NEXT: Predicates: ; CHECK: Loop %loop: Trip multiple is 1 ; @@ -149,15 +149,15 @@ define void @ugt_sext_x_zext_x(i32 %len) { ; CHECK-NEXT: %len.sext = sext i32 %len to i64 ; CHECK-NEXT: --> (sext i32 %len to i64) U: [-2147483648,2147483648) S: [-2147483648,2147483648) ; CHECK-NEXT: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] -; CHECK-NEXT: --> {0,+,1}<%loop> U: full-set S: full-set Exits: ((zext i32 %len to i64) umax (sext i32 %len to i64)) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {0,+,1}<%loop> U: full-set S: full-set Exits: (sext i32 %len to i64) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %iv.next = add i64 %iv, 1 -; CHECK-NEXT: --> {1,+,1}<%loop> U: full-set S: full-set Exits: (1 + ((zext i32 %len to i64) umax (sext i32 %len to i64))) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {1,+,1}<%loop> U: full-set S: full-set Exits: (1 + (sext i32 %len to i64)) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %umax = select i1 %cmp1, i64 %len.zext, i64 %len.sext -; CHECK-NEXT: --> ((zext i32 %len to i64) umax (sext i32 %len to i64)) U: full-set S: full-set Exits: ((zext i32 %len to i64) umax (sext i32 %len to i64)) LoopDispositions: { %loop: Invariant } +; CHECK-NEXT: --> (sext i32 %len to i64) U: [-2147483648,2147483648) S: [-2147483648,2147483648) Exits: (sext i32 %len to i64) LoopDispositions: { %loop: Invariant } ; CHECK-NEXT: Determining loop execution counts for: @ugt_sext_x_zext_x -; CHECK-NEXT: Loop %loop: backedge-taken count is ((zext i32 %len to i64) umax (sext i32 %len to i64)) +; CHECK-NEXT: Loop %loop: backedge-taken count is (sext i32 %len to i64) ; CHECK-NEXT: Loop %loop: max backedge-taken count is -1 -; CHECK-NEXT: Loop %loop: Predicated backedge-taken count is ((zext i32 %len to i64) umax (sext i32 %len to i64)) +; CHECK-NEXT: Loop %loop: Predicated backedge-taken count is (sext i32 %len to i64) ; CHECK-NEXT: Predicates: ; CHECK: Loop %loop: Trip multiple is 1 ; @@ -184,15 +184,15 @@ define void @sle_sext_x_zext_x(i32 %len) { ; CHECK-NEXT: %len.sext = sext i32 %len to i64 ; CHECK-NEXT: --> (sext i32 %len to i64) U: [-2147483648,2147483648) S: [-2147483648,2147483648) ; CHECK-NEXT: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] -; CHECK-NEXT: --> {0,+,1}<%loop> U: [0,4294967296) S: [0,4294967296) Exits: ((zext i32 %len to i64) umin (sext i32 %len to i64)) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {0,+,1}<%loop> U: [0,4294967296) S: [0,4294967296) Exits: (zext i32 %len to i64) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %iv.next = add i64 %iv, 1 -; CHECK-NEXT: --> {1,+,1}<%loop> U: [1,4294967297) S: [1,4294967297) Exits: (1 + ((zext i32 %len to i64) umin (sext i32 %len to i64))) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {1,+,1}<%loop> U: [1,4294967297) S: [1,4294967297) Exits: (1 + (zext i32 %len to i64)) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %sel = select i1 %cmp1, i64 %len.zext, i64 %len.sext -; CHECK-NEXT: --> ((zext i32 %len to i64) umin (sext i32 %len to i64)) U: [0,4294967296) S: [0,4294967296) Exits: ((zext i32 %len to i64) umin (sext i32 %len to i64)) LoopDispositions: { %loop: Invariant } +; CHECK-NEXT: --> (zext i32 %len to i64) U: [0,4294967296) S: [0,4294967296) Exits: (zext i32 %len to i64) LoopDispositions: { %loop: Invariant } ; CHECK-NEXT: Determining loop execution counts for: @sle_sext_x_zext_x -; CHECK-NEXT: Loop %loop: backedge-taken count is ((zext i32 %len to i64) umin (sext i32 %len to i64)) +; CHECK-NEXT: Loop %loop: backedge-taken count is (zext i32 %len to i64) ; CHECK-NEXT: Loop %loop: max backedge-taken count is 4294967295 -; CHECK-NEXT: Loop %loop: Predicated backedge-taken count is ((zext i32 %len to i64) umin (sext i32 %len to i64)) +; CHECK-NEXT: Loop %loop: Predicated backedge-taken count is (zext i32 %len to i64) ; CHECK-NEXT: Predicates: ; CHECK: Loop %loop: Trip multiple is 1 ; @@ -219,15 +219,15 @@ define void @sge_sext_x_zext_x(i32 %len) { ; CHECK-NEXT: %len.sext = sext i32 %len to i64 ; CHECK-NEXT: --> (sext i32 %len to i64) U: [-2147483648,2147483648) S: [-2147483648,2147483648) ; CHECK-NEXT: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] -; CHECK-NEXT: --> {0,+,1}<%loop> U: [0,4294967296) S: [0,4294967296) Exits: ((zext i32 %len to i64) smax (sext i32 %len to i64)) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {0,+,1}<%loop> U: [0,4294967296) S: [0,4294967296) Exits: (zext i32 %len to i64) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %iv.next = add i64 %iv, 1 -; CHECK-NEXT: --> {1,+,1}<%loop> U: [1,4294967297) S: [1,4294967297) Exits: (1 + ((zext i32 %len to i64) smax (sext i32 %len to i64))) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {1,+,1}<%loop> U: [1,4294967297) S: [1,4294967297) Exits: (1 + (zext i32 %len to i64)) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %sel = select i1 %cmp1, i64 %len.zext, i64 %len.sext -; CHECK-NEXT: --> ((zext i32 %len to i64) smax (sext i32 %len to i64)) U: [0,4294967296) S: [0,4294967296) Exits: ((zext i32 %len to i64) smax (sext i32 %len to i64)) LoopDispositions: { %loop: Invariant } +; CHECK-NEXT: --> (zext i32 %len to i64) U: [0,4294967296) S: [0,4294967296) Exits: (zext i32 %len to i64) LoopDispositions: { %loop: Invariant } ; CHECK-NEXT: Determining loop execution counts for: @sge_sext_x_zext_x -; CHECK-NEXT: Loop %loop: backedge-taken count is ((zext i32 %len to i64) smax (sext i32 %len to i64)) +; CHECK-NEXT: Loop %loop: backedge-taken count is (zext i32 %len to i64) ; CHECK-NEXT: Loop %loop: max backedge-taken count is 4294967295 -; CHECK-NEXT: Loop %loop: Predicated backedge-taken count is ((zext i32 %len to i64) smax (sext i32 %len to i64)) +; CHECK-NEXT: Loop %loop: Predicated backedge-taken count is (zext i32 %len to i64) ; CHECK-NEXT: Predicates: ; CHECK: Loop %loop: Trip multiple is 1 ; @@ -254,15 +254,15 @@ define void @slt_sext_x_zext_x(i32 %len) { ; CHECK-NEXT: %len.sext = sext i32 %len to i64 ; CHECK-NEXT: --> (sext i32 %len to i64) U: [-2147483648,2147483648) S: [-2147483648,2147483648) ; CHECK-NEXT: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] -; CHECK-NEXT: --> {0,+,1}<%loop> U: full-set S: full-set Exits: ((zext i32 %len to i64) smin (sext i32 %len to i64)) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {0,+,1}<%loop> U: full-set S: full-set Exits: (sext i32 %len to i64) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %iv.next = add i64 %iv, 1 -; CHECK-NEXT: --> {1,+,1}<%loop> U: full-set S: full-set Exits: (1 + ((zext i32 %len to i64) smin (sext i32 %len to i64))) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {1,+,1}<%loop> U: full-set S: full-set Exits: (1 + (sext i32 %len to i64)) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %umin = select i1 %cmp1, i64 %len.zext, i64 %len.sext -; CHECK-NEXT: --> ((zext i32 %len to i64) smin (sext i32 %len to i64)) U: [-2147483648,2147483648) S: [-2147483648,2147483648) Exits: ((zext i32 %len to i64) smin (sext i32 %len to i64)) LoopDispositions: { %loop: Invariant } +; CHECK-NEXT: --> (sext i32 %len to i64) U: [-2147483648,2147483648) S: [-2147483648,2147483648) Exits: (sext i32 %len to i64) LoopDispositions: { %loop: Invariant } ; CHECK-NEXT: Determining loop execution counts for: @slt_sext_x_zext_x -; CHECK-NEXT: Loop %loop: backedge-taken count is ((zext i32 %len to i64) smin (sext i32 %len to i64)) +; CHECK-NEXT: Loop %loop: backedge-taken count is (sext i32 %len to i64) ; CHECK-NEXT: Loop %loop: max backedge-taken count is -1 -; CHECK-NEXT: Loop %loop: Predicated backedge-taken count is ((zext i32 %len to i64) smin (sext i32 %len to i64)) +; CHECK-NEXT: Loop %loop: Predicated backedge-taken count is (sext i32 %len to i64) ; CHECK-NEXT: Predicates: ; CHECK: Loop %loop: Trip multiple is 1 ; @@ -289,15 +289,15 @@ define void @sgt_sext_x_zext_x(i32 %len) { ; CHECK-NEXT: %len.sext = sext i32 %len to i64 ; CHECK-NEXT: --> (sext i32 %len to i64) U: [-2147483648,2147483648) S: [-2147483648,2147483648) ; CHECK-NEXT: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] -; CHECK-NEXT: --> {0,+,1}<%loop> U: [0,4294967296) S: [0,4294967296) Exits: ((zext i32 %len to i64) smax (sext i32 %len to i64)) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {0,+,1}<%loop> U: [0,4294967296) S: [0,4294967296) Exits: (zext i32 %len to i64) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %iv.next = add i64 %iv, 1 -; CHECK-NEXT: --> {1,+,1}<%loop> U: [1,4294967297) S: [1,4294967297) Exits: (1 + ((zext i32 %len to i64) smax (sext i32 %len to i64))) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {1,+,1}<%loop> U: [1,4294967297) S: [1,4294967297) Exits: (1 + (zext i32 %len to i64)) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %umax = select i1 %cmp1, i64 %len.zext, i64 %len.sext -; CHECK-NEXT: --> ((zext i32 %len to i64) smax (sext i32 %len to i64)) U: [0,4294967296) S: [0,4294967296) Exits: ((zext i32 %len to i64) smax (sext i32 %len to i64)) LoopDispositions: { %loop: Invariant } +; CHECK-NEXT: --> (zext i32 %len to i64) U: [0,4294967296) S: [0,4294967296) Exits: (zext i32 %len to i64) LoopDispositions: { %loop: Invariant } ; CHECK-NEXT: Determining loop execution counts for: @sgt_sext_x_zext_x -; CHECK-NEXT: Loop %loop: backedge-taken count is ((zext i32 %len to i64) smax (sext i32 %len to i64)) +; CHECK-NEXT: Loop %loop: backedge-taken count is (zext i32 %len to i64) ; CHECK-NEXT: Loop %loop: max backedge-taken count is 4294967295 -; CHECK-NEXT: Loop %loop: Predicated backedge-taken count is ((zext i32 %len to i64) smax (sext i32 %len to i64)) +; CHECK-NEXT: Loop %loop: Predicated backedge-taken count is (zext i32 %len to i64) ; CHECK-NEXT: Predicates: ; CHECK: Loop %loop: Trip multiple is 1 ; diff --git a/llvm/test/CodeGen/AArch64/wrong-callee-save-size-after-livedebugvariables.mir b/llvm/test/CodeGen/AArch64/wrong-callee-save-size-after-livedebugvariables.mir new file mode 100644 index 0000000000000..231de4e189664 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/wrong-callee-save-size-after-livedebugvariables.mir @@ -0,0 +1,92 @@ +# RUN: llc -start-before=prologepilog -filetype=obj -o %t %s +# RUN: llvm-dwarfdump --name=obj1 %t | FileCheck %s --check-prefix=CHECKDWARF1 +# RUN: llvm-dwarfdump --name=obj2 %t | FileCheck %s --check-prefix=CHECKDWARF2 +# RUN: llvm-objdump --disassemble %t | FileCheck %s --check-prefix=CHECKASM +# +# Test that the location for obj1 and obj2 in the debug information is +# the same as the location used by load instructions. +# +# CHECKDWARF1: DW_AT_location (DW_OP_fbreg -1) +# CHECKDWARF2: DW_AT_location (DW_OP_fbreg -2) +# CHECKASM: ldurb w0, [x29, #-1] +# CHECKASM: ldurb w1, [x29, #-2] +--- | + ; ModuleID = 'wrong-callee-save-size-after-livedebugvariables.c' + source_filename = "wrong-callee-save-size-after-livedebugvariables.c" + target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" + target triple = "aarch64-unknown-linux-gnu" + + ; Function Attrs: noinline nounwind optnone + define dso_local i8 @foo() #0 !dbg !7 { + entry: + %obj1 = alloca i8, align 1 + %obj2 = alloca i8, align 1 + %obj3 = alloca [238 x i8], align 1 + ret i8 undef, !dbg !24 + } + + declare dso_local i8 @bar(i8, i8, i8*) #0 + + attributes #0 = { noinline nounwind optnone "frame-pointer"="all" } + + !llvm.dbg.cu = !{!0} + !llvm.module.flags = !{!3, !4, !5} + !llvm.ident = !{!6} + + !0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 10.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, nameTableKind: None) + !1 = !DIFile(filename: "wrong-callee-save-size-after-livedebugvariables.c", directory: "") + !2 = !{} + !3 = !{i32 2, !"Dwarf Version", i32 4} + !4 = !{i32 2, !"Debug Info Version", i32 3} + !5 = !{i32 1, !"wchar_size", i32 4} + !6 = !{!"clang version 10.0.0"} + !7 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 3, type: !8, scopeLine: 3, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2) + !8 = !DISubroutineType(types: !9) + !9 = !{!10} + !10 = !DIBasicType(name: "char", size: 8, encoding: DW_ATE_unsigned_char) + !11 = !DILocalVariable(name: "obj1", scope: !7, file: !1, line: 4, type: !10) + !12 = !DILocation(line: 4, column: 8, scope: !7) + !13 = !DILocalVariable(name: "obj2", scope: !7, file: !1, line: 5, type: !10) + !14 = !DILocation(line: 5, column: 8, scope: !7) + !15 = !DILocalVariable(name: "obj3", scope: !7, file: !1, line: 6, type: !16) + !16 = !DICompositeType(tag: DW_TAG_array_type, baseType: !10, size: 1904, elements: !17) + !17 = !{!18} + !18 = !DISubrange(count: 238) + !19 = !DILocation(line: 6, column: 8, scope: !7) + !20 = !DILocation(line: 7, column: 14, scope: !7) + !21 = !DILocation(line: 7, column: 20, scope: !7) + !22 = !DILocation(line: 7, column: 27, scope: !7) + !23 = !DILocation(line: 7, column: 10, scope: !7) + !24 = !DILocation(line: 7, column: 3, scope: !7) + +... +--- +name: foo +tracksRegLiveness: true +frameInfo: + hasCalls: true +fixedStack: [] +stack: + - { id: 0, name: obj1, type: default, offset: 0, size: 1, alignment: 1, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + local-offset: -1, debug-info-variable: '!11', debug-info-expression: '!DIExpression()', + debug-info-location: '!12' } + - { id: 1, name: obj2, type: default, offset: 0, size: 1, alignment: 1, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + local-offset: -2, debug-info-variable: '!13', debug-info-expression: '!DIExpression()', + debug-info-location: '!14' } + - { id: 2, name: obj3, type: default, offset: 0, size: 238, alignment: 1, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + local-offset: -240, debug-info-variable: '!15', debug-info-expression: '!DIExpression()', + debug-info-location: '!19' } +body: | + bb.1.entry: + renamable $x2 = ADDXri %stack.2.obj3, 0, 0 + renamable $w0 = LDRBBui %stack.0.obj1, 0, debug-location !20 :: (load 1 from %ir.obj1) + renamable $w1 = LDRBBui %stack.1.obj2, 0, debug-location !21 :: (load 1 from %ir.obj2) + ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp, debug-location !23 + BL @bar, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit killed $w0, implicit killed $w1, implicit killed $x2, implicit-def $w0, debug-location !23 + ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp, debug-location !23 + RET_ReallyLR implicit killed $w0, debug-location !24 + +... diff --git a/llvm/test/CodeGen/AMDGPU/call-constant.ll b/llvm/test/CodeGen/AMDGPU/call-constant.ll new file mode 100644 index 0000000000000..19aadfc96ad13 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/call-constant.ll @@ -0,0 +1,45 @@ +; RUN: llc -mtriple=amdgcn-amd-amdhsa < %s | FileCheck -check-prefix=GCN %s + +; FIXME: Emitting unnecessary flat_scratch setup + +; GCN-LABEL: {{^}}test_call_undef: +; GCN: s_mov_b32 s8, s7 +; GCN: s_mov_b32 flat_scratch_lo, s5 +; GCN: s_add_u32 s4, s4, s8 +; GCN: s_lshr_b32 +; GCN: s_endpgm +define amdgpu_kernel void @test_call_undef() #0 { + %val = call i32 undef(i32 1) + %op = add i32 %val, 1 + store volatile i32 %op, i32 addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}test_tail_call_undef: +; GCN: s_waitcnt +; GCN-NEXT: .Lfunc_end +define i32 @test_tail_call_undef() #0 { + %call = tail call i32 undef(i32 1) + ret i32 %call +} + +; GCN-LABEL: {{^}}test_call_null: +; GCN: s_mov_b32 s8, s7 +; GCN: s_mov_b32 flat_scratch_lo, s5 +; GCN: s_add_u32 s4, s4, s8 +; GCN: s_lshr_b32 +; GCN: s_endpgm +define amdgpu_kernel void @test_call_null() #0 { + %val = call i32 null(i32 1) + %op = add i32 %val, 1 + store volatile i32 %op, i32 addrspace(1)* null + ret void +} + +; GCN-LABEL: {{^}}test_tail_call_null: +; GCN: s_waitcnt +; GCN-NEXT: .Lfunc_end +define i32 @test_tail_call_null() #0 { + %call = tail call i32 null(i32 1) + ret i32 %call +} diff --git a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes.ll b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes.ll index 47ca6054f5237..1cdf1d391f2ea 100644 --- a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes.ll +++ b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes.ll @@ -44,7 +44,10 @@ done: ; GCN-LABEL: {{^}}test_sink_global_small_max_i32_ds_offset: ; GCN: s_and_saveexec_b64 ; SICIVI: buffer_load_sbyte {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, s{{[0-9]+$}} -; GFX9: global_load_sbyte {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, off{{$}} + +; GFX9: v_add_co_u32_e32 v{{[0-9]+}}, vcc, 0xf000, +; GFX9: v_addc_co_u32_e32 v{{[0-9]+}}, vcc, 0, +; GFX9: global_load_sbyte {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, off offset:4095{{$}} ; GCN: {{^}}BB1_2: ; GCN: s_or_b64 exec define amdgpu_kernel void @test_sink_global_small_max_i32_ds_offset(i32 addrspace(1)* %out, i8 addrspace(1)* %in) { diff --git a/llvm/test/CodeGen/AMDGPU/copy-illegal-type.ll b/llvm/test/CodeGen/AMDGPU/copy-illegal-type.ll index 05e7626878912..ee16e7ebc8ed4 100644 --- a/llvm/test/CodeGen/AMDGPU/copy-illegal-type.ll +++ b/llvm/test/CodeGen/AMDGPU/copy-illegal-type.ll @@ -245,7 +245,6 @@ define amdgpu_kernel void @test_copy_v4i8_extra_use(<4 x i8> addrspace(1)* %out0 ; SI-NEXT: s_mov_b32 s6, s10 ; SI-NEXT: s_mov_b32 s7, s11 ; SI-NEXT: s_movk_i32 s1, 0xff -; SI-NEXT: s_movk_i32 s2, 0x900 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: v_lshrrev_b32_e32 v1, 16, v0 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 @@ -257,7 +256,7 @@ define amdgpu_kernel void @test_copy_v4i8_extra_use(<4 x i8> addrspace(1)* %out0 ; SI-NEXT: v_add_i32_e32 v1, vcc, 9, v1 ; SI-NEXT: v_or_b32_e32 v0, v2, v0 ; SI-NEXT: v_and_b32_e32 v1, s1, v1 -; SI-NEXT: v_add_i32_e32 v0, vcc, s2, v0 +; SI-NEXT: v_add_i32_e32 v0, vcc, 0x900, v0 ; SI-NEXT: v_or_b32_e32 v1, v3, v1 ; SI-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; SI-NEXT: v_lshlrev_b32_e32 v1, 16, v1 @@ -326,7 +325,6 @@ define amdgpu_kernel void @test_copy_v4i8_x2_extra_use(<4 x i8> addrspace(1)* %o ; SI-NEXT: buffer_load_dword v0, v[0:1], s[12:15], 0 addr64 ; SI-NEXT: s_mov_b32 s16, 0xff00 ; SI-NEXT: s_movk_i32 s17, 0xff -; SI-NEXT: s_movk_i32 s18, 0x900 ; SI-NEXT: s_mov_b32 s10, -1 ; SI-NEXT: s_mov_b32 s8, s4 ; SI-NEXT: s_mov_b32 s9, s5 @@ -345,7 +343,7 @@ define amdgpu_kernel void @test_copy_v4i8_x2_extra_use(<4 x i8> addrspace(1)* %o ; SI-NEXT: v_and_b32_e32 v3, s17, v3 ; SI-NEXT: v_or_b32_e32 v2, v2, v3 ; SI-NEXT: v_and_b32_e32 v1, s17, v1 -; SI-NEXT: v_add_i32_e32 v2, vcc, s18, v2 +; SI-NEXT: v_add_i32_e32 v2, vcc, 0x900, v2 ; SI-NEXT: v_or_b32_e32 v1, v4, v1 ; SI-NEXT: v_and_b32_e32 v2, 0xffff, v2 ; SI-NEXT: v_lshlrev_b32_e32 v1, 16, v1 diff --git a/llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll b/llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll index 4f693e204b632..c65d7fc02d97d 100644 --- a/llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll +++ b/llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll @@ -280,7 +280,6 @@ define amdgpu_kernel void @load_v4i8_to_v4f32_2_uses(<4 x float> addrspace(1)* n ; SI-NEXT: s_mov_b32 s10, s2 ; SI-NEXT: s_mov_b32 s11, s3 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; SI-NEXT: s_movk_i32 s13, 0x900 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: v_lshrrev_b32_e32 v4, 16, v1 ; SI-NEXT: v_add_i32_e32 v7, vcc, 9, v1 @@ -298,7 +297,7 @@ define amdgpu_kernel void @load_v4i8_to_v4f32_2_uses(<4 x float> addrspace(1)* n ; SI-NEXT: v_or_b32_e32 v0, v6, v7 ; SI-NEXT: v_lshlrev_b32_e32 v5, 8, v5 ; SI-NEXT: v_and_b32_e32 v1, s12, v4 -; SI-NEXT: v_add_i32_e32 v0, vcc, s13, v0 +; SI-NEXT: v_add_i32_e32 v0, vcc, 0x900, v0 ; SI-NEXT: v_or_b32_e32 v1, v5, v1 ; SI-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; SI-NEXT: v_lshlrev_b32_e32 v1, 16, v1 diff --git a/llvm/test/CodeGen/AMDGPU/ds-negative-offset-addressing-mode-loop.ll b/llvm/test/CodeGen/AMDGPU/ds-negative-offset-addressing-mode-loop.ll index e2c7f1c47cf9f..5997e27fd815e 100644 --- a/llvm/test/CodeGen/AMDGPU/ds-negative-offset-addressing-mode-loop.ll +++ b/llvm/test/CodeGen/AMDGPU/ds-negative-offset-addressing-mode-loop.ll @@ -7,8 +7,6 @@ declare void @llvm.amdgcn.s.barrier() #1 ; Function Attrs: nounwind ; CHECK-LABEL: {{^}}signed_ds_offset_addressing_loop: -; SI: s_movk_i32 [[K_0X88:s[0-9]+]], 0x -; SI: s_movk_i32 [[K_0X100:s[0-9]+]], 0x100 ; CHECK: BB0_1: ; CHECK: v_add_i32_e32 [[VADDR:v[0-9]+]], ; SI-DAG: ds_read_b32 v{{[0-9]+}}, [[VADDR]] @@ -16,9 +14,9 @@ declare void @llvm.amdgcn.s.barrier() #1 ; SI-DAG: ds_read_b32 v{{[0-9]+}}, [[VADDR8]] ; SI-DAG: v_add_i32_e32 [[VADDR0x80:v[0-9]+]], vcc, 0x80, [[VADDR]] ; SI-DAG: ds_read_b32 v{{[0-9]+}}, [[VADDR0x80]] -; SI-DAG: v_add_i32_e32 [[VADDR0x88:v[0-9]+]], vcc, [[K_0X88]], [[VADDR]] +; SI-DAG: v_add_i32_e32 [[VADDR0x88:v[0-9]+]], vcc, 0x88, [[VADDR]] ; SI-DAG: ds_read_b32 v{{[0-9]+}}, [[VADDR0x88]] -; SI-DAG: v_add_i32_e32 [[VADDR0x100:v[0-9]+]], vcc, [[K_0X100]], [[VADDR]] +; SI-DAG: v_add_i32_e32 [[VADDR0x100:v[0-9]+]], vcc, 0x100, [[VADDR]] ; SI-DAG: ds_read_b32 v{{[0-9]+}}, [[VADDR0x100]] ; CI-DAG: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[VADDR]] offset1:2 diff --git a/llvm/test/CodeGen/AMDGPU/extract-subvector-equal-length.ll b/llvm/test/CodeGen/AMDGPU/extract-subvector-equal-length.ll index 6f7fb53f76ced..23c96373dcb05 100644 --- a/llvm/test/CodeGen/AMDGPU/extract-subvector-equal-length.ll +++ b/llvm/test/CodeGen/AMDGPU/extract-subvector-equal-length.ll @@ -4,7 +4,7 @@ ; Test for ICE in SelectionDAG::computeKnownBits when visiting EXTRACT_SUBVECTOR ; with DemandedElts already as wide as the source vector. -define <3 x i32> @quux() #0 { +define <3 x i32> @quux() { ; CHECK-LABEL: quux: ; CHECK: ; %bb.0: ; %bb ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -26,5 +26,3 @@ bb: %tmp10 = lshr <3 x i32> %tmp9, ret <3 x i32> %tmp10 } - -attributes #0 = { noinline optnone } diff --git a/llvm/test/CodeGen/AMDGPU/fence-barrier.ll b/llvm/test/CodeGen/AMDGPU/fence-barrier.ll index 7de4f1796b08a..8f5a06d01fa22 100644 --- a/llvm/test/CodeGen/AMDGPU/fence-barrier.ll +++ b/llvm/test/CodeGen/AMDGPU/fence-barrier.ll @@ -54,8 +54,7 @@ define amdgpu_kernel void @test_local(i32 addrspace(1)*) { } ; GCN-LABEL: {{^}}test_global -; GCN: s_movk_i32 [[K:s[0-9]+]], 0x888 -; GCN: v_add_u32_e32 v{{[0-9]+}}, vcc, [[K]], v{{[0-9]+}} +; GCN: v_add_u32_e32 v{{[0-9]+}}, vcc, 0x888, v{{[0-9]+}} ; GCN: flat_store_dword ; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}} ; GCN-NEXT: s_barrier diff --git a/llvm/test/CodeGen/AMDGPU/flat-address-space.ll b/llvm/test/CodeGen/AMDGPU/flat-address-space.ll index 5c45528f9df6d..e48e8c96cb5d0 100644 --- a/llvm/test/CodeGen/AMDGPU/flat-address-space.ll +++ b/llvm/test/CodeGen/AMDGPU/flat-address-space.ll @@ -1,14 +1,7 @@ -; RUN: llc -O0 -mtriple=amdgcn-mesa-mesa3d -mcpu=bonaire < %s | FileCheck -check-prefixes=CHECK,CIVI %s -; RUN: llc -O0 -mtriple=amdgcn-mesa-mesa3d -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck -check-prefixes=CHECK,CIVI %s -; RUN: llc -O0 -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -mattr=-flat-for-global < %s | FileCheck -check-prefixes=CHECK,HSA %s -; RUN: llc -O0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-flat-for-global < %s | FileCheck -check-prefixes=CHECK,HSA,GFX9 %s - -; Disable optimizations in case there are optimizations added that -; specialize away generic pointer accesses. - - -; These testcases might become useless when there are optimizations to -; remove generic pointers. +; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=bonaire < %s | FileCheck -check-prefixes=CHECK,CIVI %s +; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck -check-prefixes=CHECK,CIVI %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -mattr=-flat-for-global < %s | FileCheck -check-prefixes=CHECK,CIVI,HSA %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-flat-for-global < %s | FileCheck -check-prefixes=CHECK,HSA,GFX9 %s ; CHECK-LABEL: {{^}}store_flat_i32: ; CHECK-DAG: s_load_dwordx2 s{{\[}}[[LO_SREG:[0-9]+]]:[[HI_SREG:[0-9]+]]], @@ -191,7 +184,11 @@ define amdgpu_kernel void @store_flat_i8_max_offset_p1(i8* %fptr, i8 %x) #0 { } ; CHECK-LABEL: {{^}}store_flat_i8_neg_offset: -; CHECK: flat_store_byte v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}{{$}} +; CIVI: flat_store_byte v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}{{$}} + +; GFX9: v_add_co_u32_e32 v{{[0-9]+}}, vcc, 0xfffff000, v +; GFX9: v_addc_co_u32_e32 v{{[0-9]+}}, vcc, -1, +; GFX9: flat_store_byte v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset:4094{{$}} define amdgpu_kernel void @store_flat_i8_neg_offset(i8* %fptr, i8 %x) #0 { %fptr.offset = getelementptr inbounds i8, i8* %fptr, i64 -2 store volatile i8 %x, i8* %fptr.offset @@ -216,7 +213,11 @@ define amdgpu_kernel void @load_flat_i8_max_offset_p1(i8* %fptr) #0 { } ; CHECK-LABEL: {{^}}load_flat_i8_neg_offset: -; CHECK: flat_load_ubyte v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}{{$}} +; CIVI: flat_load_ubyte v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}{{$}} + +; GFX9: v_add_co_u32_e32 v{{[0-9]+}}, vcc, 0xfffff000, v +; GFX9: v_addc_co_u32_e32 v{{[0-9]+}}, vcc, -1, +; GFX9: flat_load_ubyte v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}} offset:4094{{$}} define amdgpu_kernel void @load_flat_i8_neg_offset(i8* %fptr) #0 { %fptr.offset = getelementptr inbounds i8, i8* %fptr, i64 -2 %val = load volatile i8, i8* %fptr.offset diff --git a/llvm/test/CodeGen/AMDGPU/global-saddr.ll b/llvm/test/CodeGen/AMDGPU/global-saddr.ll index b21fd98522679..4df1ad683df64 100644 --- a/llvm/test/CodeGen/AMDGPU/global-saddr.ll +++ b/llvm/test/CodeGen/AMDGPU/global-saddr.ll @@ -46,7 +46,7 @@ entry: ; Test various offset boundaries. ; GFX9: global_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+}}:{{[0-9]+}}] offset:4088{{$}} -; GFX9: global_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, off{{$}} +; GFX9: global_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, off offset:4088{{$}} ; GFX9: global_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+}}:{{[0-9]+}}] offset:2040{{$}} %gep11 = getelementptr inbounds i64, i64 addrspace(1)* %gep, i64 511 %load11 = load i64, i64 addrspace(1)* %gep11 diff --git a/llvm/test/CodeGen/AMDGPU/global_atomics.ll b/llvm/test/CodeGen/AMDGPU/global_atomics.ll index 72f748578b342..618c70083077d 100644 --- a/llvm/test/CodeGen/AMDGPU/global_atomics.ll +++ b/llvm/test/CodeGen/AMDGPU/global_atomics.ll @@ -25,7 +25,9 @@ entry: ; SIVI: s_mov_b32 [[SREG:s[0-9]+]], 0x8ca0 ; SIVI: buffer_atomic_add v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], [[SREG]]{{$}} -; GFX9: global_atomic_add v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, off{{$}} +; GFX9: v_add_co_u32_e32 v{{[0-9]+}}, vcc, 0x8000, +; GFX9-NEXT: v_addc_co_u32_e32 v{{[0-9]+}}, vcc, 0, v{{[0-9]+}}, vcc +; GFX9: global_atomic_add v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, off offset:3232{{$}} define amdgpu_kernel void @atomic_add_i32_soffset(i32 addrspace(1)* %out, i32 %in) { entry: %gep = getelementptr i32, i32 addrspace(1)* %out, i64 9000 @@ -40,7 +42,10 @@ entry: ; VI: flat_atomic_add -; GFX9: global_atomic_add v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, off{{$}} +; GFX9: v_mov_b32_e32 [[HIGH_K:v[0-9]+]], 0xabcd +; GFX9: v_add_co_u32_e32 v{{[0-9]+}}, vcc, 0xd000, +; GFX9-NEXT: v_addc_co_u32_e32 v{{[0-9]+}}, vcc, [[HIGH_K]], v{{[0-9]+}}, vcc +; GFX9: global_atomic_add v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, off offset:3756{{$}} define amdgpu_kernel void @atomic_add_i32_huge_offset(i32 addrspace(1)* %out, i32 %in) { entry: %gep = getelementptr i32, i32 addrspace(1)* %out, i64 47224239175595 diff --git a/llvm/test/CodeGen/AMDGPU/global_atomics_i64.ll b/llvm/test/CodeGen/AMDGPU/global_atomics_i64.ll index 0ef58fc1fb82d..b996646a098a1 100644 --- a/llvm/test/CodeGen/AMDGPU/global_atomics_i64.ll +++ b/llvm/test/CodeGen/AMDGPU/global_atomics_i64.ll @@ -991,7 +991,9 @@ entry: ; CIVI: s_mov_b32 [[SREG:s[0-9]+]], 0x11940 ; CIVI: buffer_atomic_cmpswap_x2 v[{{[0-9]+}}:{{[0-9]+}}], off, s[{{[0-9]+}}:{{[0-9]+}}], [[SREG]]{{$}} -; GFX9: global_atomic_cmpswap_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off{{$}} +; GFX9: v_add_co_u32_e32 v{{[0-9]+}}, vcc, 0x11000, +; GFX9: v_addc_co_u32_e32 v{{[0-9]+}}, vcc, 0, v{{[0-9]+}}, vcc +; GFX9: global_atomic_cmpswap_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off offset:2368{{$}} define amdgpu_kernel void @atomic_cmpxchg_i64_soffset(i64 addrspace(1)* %out, i64 %in, i64 %old) { entry: %gep = getelementptr i64, i64 addrspace(1)* %out, i64 9000 diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.init.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.init.ll index a7a8fa7f2ccfc..9c39593d90379 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.init.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.init.ll @@ -114,7 +114,7 @@ define amdgpu_kernel void @gws_init_vgpr_offset_add(i32 %val) #0 { ; LOOP: s_mov_b32 m0, -1 ; LOOP: ds_write_b32 define amdgpu_kernel void @gws_init_save_m0_init_constant_offset(i32 %val) #0 { - store i32 1, i32 addrspace(3)* @lds + store volatile i32 1, i32 addrspace(3)* @lds call void @llvm.amdgcn.ds.gws.init(i32 %val, i32 10) store i32 2, i32 addrspace(3)* @lds ret void diff --git a/llvm/test/CodeGen/AMDGPU/offset-split-flat.ll b/llvm/test/CodeGen/AMDGPU/offset-split-flat.ll new file mode 100644 index 0000000000000..f7538c081e6d4 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/offset-split-flat.ll @@ -0,0 +1,1470 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX9 %s +; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s + +; Test splitting flat instruction offsets into the low and high bits +; when the offset doesn't fit in the offset field. + +define i8 @flat_inst_valu_offset_1(i8* %p) { +; GFX9-LABEL: flat_inst_valu_offset_1: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:1 +; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: flat_inst_valu_offset_1: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, v0, 1 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo +; GFX10-NEXT: flat_load_ubyte v0, v[0:1] +; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_setpc_b64 s[30:31] + %gep = getelementptr i8, i8* %p, i64 1 + %load = load i8, i8* %gep, align 4 + ret i8 %load +} + +define i8 @flat_inst_valu_offset_11bit_max(i8* %p) { +; GFX9-LABEL: flat_inst_valu_offset_11bit_max: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:2047 +; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: flat_inst_valu_offset_11bit_max: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x7ff, v0 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo +; GFX10-NEXT: flat_load_ubyte v0, v[0:1] +; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_setpc_b64 s[30:31] + %gep = getelementptr i8, i8* %p, i64 2047 + %load = load i8, i8* %gep, align 4 + ret i8 %load +} + +define i8 @flat_inst_valu_offset_12bit_max(i8* %p) { +; GFX9-LABEL: flat_inst_valu_offset_12bit_max: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:4095 +; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: flat_inst_valu_offset_12bit_max: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0xfff, v0 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo +; GFX10-NEXT: flat_load_ubyte v0, v[0:1] +; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_setpc_b64 s[30:31] + %gep = getelementptr i8, i8* %p, i64 4095 + %load = load i8, i8* %gep, align 4 + ret i8 %load +} + +define i8 @flat_inst_valu_offset_13bit_max(i8* %p) { +; GFX9-LABEL: flat_inst_valu_offset_13bit_max: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc +; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:4095 +; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: flat_inst_valu_offset_13bit_max: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x1fff, v0 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo +; GFX10-NEXT: flat_load_ubyte v0, v[0:1] +; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_setpc_b64 s[30:31] + %gep = getelementptr i8, i8* %p, i64 8191 + %load = load i8, i8* %gep, align 4 + ret i8 %load +} + +define i8 @flat_inst_valu_offset_neg_11bit_max(i8* %p) { +; GFX9-LABEL: flat_inst_valu_offset_neg_11bit_max: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff000, v0 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc +; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:2048 +; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: flat_inst_valu_offset_neg_11bit_max: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0xfffff800, v0 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo +; GFX10-NEXT: flat_load_ubyte v0, v[0:1] +; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_setpc_b64 s[30:31] + %gep = getelementptr i8, i8* %p, i64 -2048 + %load = load i8, i8* %gep, align 4 + ret i8 %load +} + +define i8 @flat_inst_valu_offset_neg_12bit_max(i8* %p) { +; GFX9-LABEL: flat_inst_valu_offset_neg_12bit_max: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff000, v0 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc +; GFX9-NEXT: flat_load_ubyte v0, v[0:1] +; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: flat_inst_valu_offset_neg_12bit_max: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0xfffff000, v0 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo +; GFX10-NEXT: flat_load_ubyte v0, v[0:1] +; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_setpc_b64 s[30:31] + %gep = getelementptr i8, i8* %p, i64 -4096 + %load = load i8, i8* %gep, align 4 + ret i8 %load +} + +define i8 @flat_inst_valu_offset_neg_13bit_max(i8* %p) { +; GFX9-LABEL: flat_inst_valu_offset_neg_13bit_max: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffe000, v0 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc +; GFX9-NEXT: flat_load_ubyte v0, v[0:1] +; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: flat_inst_valu_offset_neg_13bit_max: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0xffffe000, v0 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo +; GFX10-NEXT: flat_load_ubyte v0, v[0:1] +; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_setpc_b64 s[30:31] + %gep = getelementptr i8, i8* %p, i64 -8192 + %load = load i8, i8* %gep, align 4 + ret i8 %load +} + +define i8 @flat_inst_valu_offset_2x_11bit_max(i8* %p) { +; GFX9-LABEL: flat_inst_valu_offset_2x_11bit_max: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:4095 +; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: flat_inst_valu_offset_2x_11bit_max: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0xfff, v0 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo +; GFX10-NEXT: flat_load_ubyte v0, v[0:1] +; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_setpc_b64 s[30:31] + %gep = getelementptr i8, i8* %p, i64 4095 + %load = load i8, i8* %gep, align 4 + ret i8 %load +} + +define i8 @flat_inst_valu_offset_2x_12bit_max(i8* %p) { +; GFX9-LABEL: flat_inst_valu_offset_2x_12bit_max: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc +; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:4095 +; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: flat_inst_valu_offset_2x_12bit_max: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x1fff, v0 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo +; GFX10-NEXT: flat_load_ubyte v0, v[0:1] +; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_setpc_b64 s[30:31] + %gep = getelementptr i8, i8* %p, i64 8191 + %load = load i8, i8* %gep, align 4 + ret i8 %load +} + +define i8 @flat_inst_valu_offset_2x_13bit_max(i8* %p) { +; GFX9-LABEL: flat_inst_valu_offset_2x_13bit_max: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x3000, v0 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc +; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:4095 +; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: flat_inst_valu_offset_2x_13bit_max: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x3fff, v0 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo +; GFX10-NEXT: flat_load_ubyte v0, v[0:1] +; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_setpc_b64 s[30:31] + %gep = getelementptr i8, i8* %p, i64 16383 + %load = load i8, i8* %gep, align 4 + ret i8 %load +} + +define i8 @flat_inst_valu_offset_2x_neg_11bit_max(i8* %p) { +; GFX9-LABEL: flat_inst_valu_offset_2x_neg_11bit_max: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff000, v0 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc +; GFX9-NEXT: flat_load_ubyte v0, v[0:1] +; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: flat_inst_valu_offset_2x_neg_11bit_max: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0xfffff000, v0 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo +; GFX10-NEXT: flat_load_ubyte v0, v[0:1] +; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_setpc_b64 s[30:31] + %gep = getelementptr i8, i8* %p, i64 -4096 + %load = load i8, i8* %gep, align 4 + ret i8 %load +} + +define i8 @flat_inst_valu_offset_2x_neg_12bit_max(i8* %p) { +; GFX9-LABEL: flat_inst_valu_offset_2x_neg_12bit_max: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffe000, v0 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc +; GFX9-NEXT: flat_load_ubyte v0, v[0:1] +; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: flat_inst_valu_offset_2x_neg_12bit_max: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0xffffe000, v0 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo +; GFX10-NEXT: flat_load_ubyte v0, v[0:1] +; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_setpc_b64 s[30:31] + %gep = getelementptr i8, i8* %p, i64 -8192 + %load = load i8, i8* %gep, align 4 + ret i8 %load +} + +define i8 @flat_inst_valu_offset_2x_neg_13bit_max(i8* %p) { +; GFX9-LABEL: flat_inst_valu_offset_2x_neg_13bit_max: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffc000, v0 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc +; GFX9-NEXT: flat_load_ubyte v0, v[0:1] +; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: flat_inst_valu_offset_2x_neg_13bit_max: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0xffffc000, v0 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo +; GFX10-NEXT: flat_load_ubyte v0, v[0:1] +; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_setpc_b64 s[30:31] + %gep = getelementptr i8, i8* %p, i64 -16384 + %load = load i8, i8* %gep, align 4 + ret i8 %load +} + +; Fill 11-bit low-bits (1ull << 33) | 2047 +define i8 @flat_inst_valu_offset_64bit_11bit_split0(i8* %p) { +; GFX9-LABEL: flat_inst_valu_offset_64bit_11bit_split0: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc +; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:2047 +; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: flat_inst_valu_offset_64bit_11bit_split0: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x7ff, v0 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo +; GFX10-NEXT: flat_load_ubyte v0, v[0:1] +; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_setpc_b64 s[30:31] + %gep = getelementptr i8, i8* %p, i64 8589936639 + %load = load i8, i8* %gep, align 4 + ret i8 %load +} + +; Fill 11-bit low-bits (1ull << 33) | 2048 +define i8 @flat_inst_valu_offset_64bit_11bit_split1(i8* %p) { +; GFX9-LABEL: flat_inst_valu_offset_64bit_11bit_split1: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc +; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:2048 +; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: flat_inst_valu_offset_64bit_11bit_split1: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x800, v0 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo +; GFX10-NEXT: flat_load_ubyte v0, v[0:1] +; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_setpc_b64 s[30:31] + %gep = getelementptr i8, i8* %p, i64 8589936640 + %load = load i8, i8* %gep, align 4 + ret i8 %load +} + +; Fill 12-bit low-bits (1ull << 33) | 4095 +define i8 @flat_inst_valu_offset_64bit_12bit_split0(i8* %p) { +; GFX9-LABEL: flat_inst_valu_offset_64bit_12bit_split0: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc +; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:4095 +; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: flat_inst_valu_offset_64bit_12bit_split0: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0xfff, v0 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo +; GFX10-NEXT: flat_load_ubyte v0, v[0:1] +; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_setpc_b64 s[30:31] + %gep = getelementptr i8, i8* %p, i64 8589938687 + %load = load i8, i8* %gep, align 4 + ret i8 %load +} + +; Fill 12-bit low-bits (1ull << 33) | 4096 +define i8 @flat_inst_valu_offset_64bit_12bit_split1(i8* %p) { +; GFX9-LABEL: flat_inst_valu_offset_64bit_12bit_split1: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc +; GFX9-NEXT: flat_load_ubyte v0, v[0:1] +; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: flat_inst_valu_offset_64bit_12bit_split1: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x1000, v0 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo +; GFX10-NEXT: flat_load_ubyte v0, v[0:1] +; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_setpc_b64 s[30:31] + %gep = getelementptr i8, i8* %p, i64 8589938688 + %load = load i8, i8* %gep, align 4 + ret i8 %load +} + +; Fill 13-bit low-bits (1ull << 33) | 8191 +define i8 @flat_inst_valu_offset_64bit_13bit_split0(i8* %p) { +; GFX9-LABEL: flat_inst_valu_offset_64bit_13bit_split0: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc +; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:4095 +; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: flat_inst_valu_offset_64bit_13bit_split0: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x1fff, v0 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo +; GFX10-NEXT: flat_load_ubyte v0, v[0:1] +; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_setpc_b64 s[30:31] + %gep = getelementptr i8, i8* %p, i64 8589942783 + %load = load i8, i8* %gep, align 4 + ret i8 %load +} + +; Fill 13-bit low-bits (1ull << 33) | 8192 +define i8 @flat_inst_valu_offset_64bit_13bit_split1(i8* %p) { +; GFX9-LABEL: flat_inst_valu_offset_64bit_13bit_split1: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x2000, v0 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc +; GFX9-NEXT: flat_load_ubyte v0, v[0:1] +; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: flat_inst_valu_offset_64bit_13bit_split1: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x2000, v0 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo +; GFX10-NEXT: flat_load_ubyte v0, v[0:1] +; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_setpc_b64 s[30:31] + %gep = getelementptr i8, i8* %p, i64 8589942784 + %load = load i8, i8* %gep, align 4 + ret i8 %load +} + +; Fill 11-bit low-bits, negative high bits (1ull << 63) | 2047 +define i8 @flat_inst_valu_offset_64bit_11bit_neg_high_split0(i8* %p) { +; GFX9-LABEL: flat_inst_valu_offset_64bit_11bit_neg_high_split0: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_bfrev_b32_e32 v2, 1 +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc +; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:2047 +; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: flat_inst_valu_offset_64bit_11bit_neg_high_split0: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x7ff, v0 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo +; GFX10-NEXT: flat_load_ubyte v0, v[0:1] +; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_setpc_b64 s[30:31] + %gep = getelementptr i8, i8* %p, i64 -9223372036854773761 + %load = load i8, i8* %gep, align 4 + ret i8 %load +} + +; Fill 11-bit low-bits, negative high bits (1ull << 63) | 2048 +define i8 @flat_inst_valu_offset_64bit_11bit_neg_high_split1(i8* %p) { +; GFX9-LABEL: flat_inst_valu_offset_64bit_11bit_neg_high_split1: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_bfrev_b32_e32 v2, 1 +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc +; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:2048 +; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: flat_inst_valu_offset_64bit_11bit_neg_high_split1: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x800, v0 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo +; GFX10-NEXT: flat_load_ubyte v0, v[0:1] +; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_setpc_b64 s[30:31] + %gep = getelementptr i8, i8* %p, i64 -9223372036854773760 + %load = load i8, i8* %gep, align 4 + ret i8 %load +} + +; Fill 12-bit low-bits, negative high bits (1ull << 63) | 4095 +define i8 @flat_inst_valu_offset_64bit_12bit_neg_high_split0(i8* %p) { +; GFX9-LABEL: flat_inst_valu_offset_64bit_12bit_neg_high_split0: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_bfrev_b32_e32 v2, 1 +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc +; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:4095 +; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: flat_inst_valu_offset_64bit_12bit_neg_high_split0: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0xfff, v0 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo +; GFX10-NEXT: flat_load_ubyte v0, v[0:1] +; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_setpc_b64 s[30:31] + %gep = getelementptr i8, i8* %p, i64 -9223372036854771713 + %load = load i8, i8* %gep, align 4 + ret i8 %load +} + +; Fill 12-bit low-bits, negative high bits (1ull << 63) | 4096 +define i8 @flat_inst_valu_offset_64bit_12bit_neg_high_split1(i8* %p) { +; GFX9-LABEL: flat_inst_valu_offset_64bit_12bit_neg_high_split1: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 +; GFX9-NEXT: v_bfrev_b32_e32 v2, 1 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc +; GFX9-NEXT: flat_load_ubyte v0, v[0:1] +; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: flat_inst_valu_offset_64bit_12bit_neg_high_split1: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x1000, v0 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo +; GFX10-NEXT: flat_load_ubyte v0, v[0:1] +; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_setpc_b64 s[30:31] + %gep = getelementptr i8, i8* %p, i64 -9223372036854771712 + %load = load i8, i8* %gep, align 4 + ret i8 %load +} + +; Fill 13-bit low-bits, negative high bits (1ull << 63) | 8191 +define i8 @flat_inst_valu_offset_64bit_13bit_neg_high_split0(i8* %p) { +; GFX9-LABEL: flat_inst_valu_offset_64bit_13bit_neg_high_split0: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 +; GFX9-NEXT: v_bfrev_b32_e32 v2, 1 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc +; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:4095 +; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: flat_inst_valu_offset_64bit_13bit_neg_high_split0: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x1fff, v0 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo +; GFX10-NEXT: flat_load_ubyte v0, v[0:1] +; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_setpc_b64 s[30:31] + %gep = getelementptr i8, i8* %p, i64 -9223372036854767617 + %load = load i8, i8* %gep, align 4 + ret i8 %load +} + +; Fill 13-bit low-bits, negative high bits (1ull << 63) | 8192 +define i8 @flat_inst_valu_offset_64bit_13bit_neg_high_split1(i8* %p) { +; GFX9-LABEL: flat_inst_valu_offset_64bit_13bit_neg_high_split1: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x2000, v0 +; GFX9-NEXT: v_bfrev_b32_e32 v2, 1 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc +; GFX9-NEXT: flat_load_ubyte v0, v[0:1] +; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: flat_inst_valu_offset_64bit_13bit_neg_high_split1: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x2000, v0 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo +; GFX10-NEXT: flat_load_ubyte v0, v[0:1] +; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_setpc_b64 s[30:31] + %gep = getelementptr i8, i8* %p, i64 -9223372036854767616 + %load = load i8, i8* %gep, align 4 + ret i8 %load +} + +define amdgpu_kernel void @flat_inst_salu_offset_1(i8* %p) { +; GFX9-LABEL: flat_inst_salu_offset_1: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, s0 +; GFX9-NEXT: v_mov_b32_e32 v1, s1 +; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:1 +; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9-NEXT: flat_store_byte v[0:1], v0 +; GFX9-NEXT: s_endpgm +; +; GFX10-LABEL: flat_inst_salu_offset_1: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-NEXT: s_add_u32 s0, s0, 1 +; GFX10-NEXT: s_addc_u32 s1, s1, 0 +; GFX10-NEXT: v_mov_b32_e32 v0, s0 +; GFX10-NEXT: v_mov_b32_e32 v1, s1 +; GFX10-NEXT: flat_load_ubyte v0, v[0:1] +; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-NEXT: flat_store_byte v[0:1], v0 +; GFX10-NEXT: s_endpgm + %gep = getelementptr i8, i8* %p, i64 1 + %load = load volatile i8, i8* %gep, align 1 + store i8 %load, i8* undef + ret void +} + +define amdgpu_kernel void @flat_inst_salu_offset_11bit_max(i8* %p) { +; GFX9-LABEL: flat_inst_salu_offset_11bit_max: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, s0 +; GFX9-NEXT: v_mov_b32_e32 v1, s1 +; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:2047 +; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9-NEXT: flat_store_byte v[0:1], v0 +; GFX9-NEXT: s_endpgm +; +; GFX10-LABEL: flat_inst_salu_offset_11bit_max: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-NEXT: s_add_u32 s0, s0, 0x7ff +; GFX10-NEXT: s_addc_u32 s1, s1, 0 +; GFX10-NEXT: v_mov_b32_e32 v0, s0 +; GFX10-NEXT: v_mov_b32_e32 v1, s1 +; GFX10-NEXT: flat_load_ubyte v0, v[0:1] +; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-NEXT: flat_store_byte v[0:1], v0 +; GFX10-NEXT: s_endpgm + %gep = getelementptr i8, i8* %p, i64 2047 + %load = load volatile i8, i8* %gep, align 1 + store i8 %load, i8* undef + ret void +} + +define amdgpu_kernel void @flat_inst_salu_offset_12bit_max(i8* %p) { +; GFX9-LABEL: flat_inst_salu_offset_12bit_max: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, s0 +; GFX9-NEXT: v_mov_b32_e32 v1, s1 +; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:4095 +; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9-NEXT: flat_store_byte v[0:1], v0 +; GFX9-NEXT: s_endpgm +; +; GFX10-LABEL: flat_inst_salu_offset_12bit_max: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-NEXT: s_add_u32 s0, s0, 0xfff +; GFX10-NEXT: s_addc_u32 s1, s1, 0 +; GFX10-NEXT: v_mov_b32_e32 v0, s0 +; GFX10-NEXT: v_mov_b32_e32 v1, s1 +; GFX10-NEXT: flat_load_ubyte v0, v[0:1] +; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-NEXT: flat_store_byte v[0:1], v0 +; GFX10-NEXT: s_endpgm + %gep = getelementptr i8, i8* %p, i64 4095 + %load = load volatile i8, i8* %gep, align 1 + store i8 %load, i8* undef + ret void +} + +define amdgpu_kernel void @flat_inst_salu_offset_13bit_max(i8* %p) { +; GFX9-LABEL: flat_inst_salu_offset_13bit_max: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, s0 +; GFX9-NEXT: v_mov_b32_e32 v1, s1 +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc +; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:4095 +; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9-NEXT: flat_store_byte v[0:1], v0 +; GFX9-NEXT: s_endpgm +; +; GFX10-LABEL: flat_inst_salu_offset_13bit_max: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-NEXT: s_add_u32 s0, s0, 0x1fff +; GFX10-NEXT: s_addc_u32 s1, s1, 0 +; GFX10-NEXT: v_mov_b32_e32 v0, s0 +; GFX10-NEXT: v_mov_b32_e32 v1, s1 +; GFX10-NEXT: flat_load_ubyte v0, v[0:1] +; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-NEXT: flat_store_byte v[0:1], v0 +; GFX10-NEXT: s_endpgm + %gep = getelementptr i8, i8* %p, i64 8191 + %load = load volatile i8, i8* %gep, align 1 + store i8 %load, i8* undef + ret void +} + +define amdgpu_kernel void @flat_inst_salu_offset_neg_11bit_max(i8* %p) { +; GFX9-LABEL: flat_inst_salu_offset_neg_11bit_max: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, s0 +; GFX9-NEXT: v_mov_b32_e32 v1, s1 +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff000, v0 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc +; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:2048 +; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9-NEXT: flat_store_byte v[0:1], v0 +; GFX9-NEXT: s_endpgm +; +; GFX10-LABEL: flat_inst_salu_offset_neg_11bit_max: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-NEXT: s_add_u32 s0, s0, 0xfffff800 +; GFX10-NEXT: s_addc_u32 s1, s1, -1 +; GFX10-NEXT: v_mov_b32_e32 v0, s0 +; GFX10-NEXT: v_mov_b32_e32 v1, s1 +; GFX10-NEXT: flat_load_ubyte v0, v[0:1] +; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-NEXT: flat_store_byte v[0:1], v0 +; GFX10-NEXT: s_endpgm + %gep = getelementptr i8, i8* %p, i64 -2048 + %load = load volatile i8, i8* %gep, align 1 + store i8 %load, i8* undef + ret void +} + +define amdgpu_kernel void @flat_inst_salu_offset_neg_12bit_max(i8* %p) { +; GFX9-LABEL: flat_inst_salu_offset_neg_12bit_max: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, s0 +; GFX9-NEXT: v_mov_b32_e32 v1, s1 +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff000, v0 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc +; GFX9-NEXT: flat_load_ubyte v0, v[0:1] +; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9-NEXT: flat_store_byte v[0:1], v0 +; GFX9-NEXT: s_endpgm +; +; GFX10-LABEL: flat_inst_salu_offset_neg_12bit_max: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-NEXT: s_add_u32 s0, s0, 0xfffff000 +; GFX10-NEXT: s_addc_u32 s1, s1, -1 +; GFX10-NEXT: v_mov_b32_e32 v0, s0 +; GFX10-NEXT: v_mov_b32_e32 v1, s1 +; GFX10-NEXT: flat_load_ubyte v0, v[0:1] +; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-NEXT: flat_store_byte v[0:1], v0 +; GFX10-NEXT: s_endpgm + %gep = getelementptr i8, i8* %p, i64 -4096 + %load = load volatile i8, i8* %gep, align 1 + store i8 %load, i8* undef + ret void +} + +define amdgpu_kernel void @flat_inst_salu_offset_neg_13bit_max(i8* %p) { +; GFX9-LABEL: flat_inst_salu_offset_neg_13bit_max: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, s0 +; GFX9-NEXT: v_mov_b32_e32 v1, s1 +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffe000, v0 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc +; GFX9-NEXT: flat_load_ubyte v0, v[0:1] +; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9-NEXT: flat_store_byte v[0:1], v0 +; GFX9-NEXT: s_endpgm +; +; GFX10-LABEL: flat_inst_salu_offset_neg_13bit_max: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-NEXT: s_add_u32 s0, s0, 0xffffe000 +; GFX10-NEXT: s_addc_u32 s1, s1, -1 +; GFX10-NEXT: v_mov_b32_e32 v0, s0 +; GFX10-NEXT: v_mov_b32_e32 v1, s1 +; GFX10-NEXT: flat_load_ubyte v0, v[0:1] +; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-NEXT: flat_store_byte v[0:1], v0 +; GFX10-NEXT: s_endpgm + %gep = getelementptr i8, i8* %p, i64 -8192 + %load = load volatile i8, i8* %gep, align 1 + store i8 %load, i8* undef + ret void +} + +define amdgpu_kernel void @flat_inst_salu_offset_2x_11bit_max(i8* %p) { +; GFX9-LABEL: flat_inst_salu_offset_2x_11bit_max: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, s0 +; GFX9-NEXT: v_mov_b32_e32 v1, s1 +; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:4095 +; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9-NEXT: flat_store_byte v[0:1], v0 +; GFX9-NEXT: s_endpgm +; +; GFX10-LABEL: flat_inst_salu_offset_2x_11bit_max: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-NEXT: s_add_u32 s0, s0, 0xfff +; GFX10-NEXT: s_addc_u32 s1, s1, 0 +; GFX10-NEXT: v_mov_b32_e32 v0, s0 +; GFX10-NEXT: v_mov_b32_e32 v1, s1 +; GFX10-NEXT: flat_load_ubyte v0, v[0:1] +; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-NEXT: flat_store_byte v[0:1], v0 +; GFX10-NEXT: s_endpgm + %gep = getelementptr i8, i8* %p, i64 4095 + %load = load volatile i8, i8* %gep, align 1 + store i8 %load, i8* undef + ret void +} + +define amdgpu_kernel void @flat_inst_salu_offset_2x_12bit_max(i8* %p) { +; GFX9-LABEL: flat_inst_salu_offset_2x_12bit_max: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, s0 +; GFX9-NEXT: v_mov_b32_e32 v1, s1 +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc +; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:4095 +; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9-NEXT: flat_store_byte v[0:1], v0 +; GFX9-NEXT: s_endpgm +; +; GFX10-LABEL: flat_inst_salu_offset_2x_12bit_max: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-NEXT: s_add_u32 s0, s0, 0x1fff +; GFX10-NEXT: s_addc_u32 s1, s1, 0 +; GFX10-NEXT: v_mov_b32_e32 v0, s0 +; GFX10-NEXT: v_mov_b32_e32 v1, s1 +; GFX10-NEXT: flat_load_ubyte v0, v[0:1] +; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-NEXT: flat_store_byte v[0:1], v0 +; GFX10-NEXT: s_endpgm + %gep = getelementptr i8, i8* %p, i64 8191 + %load = load volatile i8, i8* %gep, align 1 + store i8 %load, i8* undef + ret void +} + +define amdgpu_kernel void @flat_inst_salu_offset_2x_13bit_max(i8* %p) { +; GFX9-LABEL: flat_inst_salu_offset_2x_13bit_max: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, s0 +; GFX9-NEXT: v_mov_b32_e32 v1, s1 +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x3000, v0 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc +; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:4095 +; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9-NEXT: flat_store_byte v[0:1], v0 +; GFX9-NEXT: s_endpgm +; +; GFX10-LABEL: flat_inst_salu_offset_2x_13bit_max: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-NEXT: s_add_u32 s0, s0, 0x3fff +; GFX10-NEXT: s_addc_u32 s1, s1, 0 +; GFX10-NEXT: v_mov_b32_e32 v0, s0 +; GFX10-NEXT: v_mov_b32_e32 v1, s1 +; GFX10-NEXT: flat_load_ubyte v0, v[0:1] +; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-NEXT: flat_store_byte v[0:1], v0 +; GFX10-NEXT: s_endpgm + %gep = getelementptr i8, i8* %p, i64 16383 + %load = load volatile i8, i8* %gep, align 1 + store i8 %load, i8* undef + ret void +} + +define amdgpu_kernel void @flat_inst_salu_offset_2x_neg_11bit_max(i8* %p) { +; GFX9-LABEL: flat_inst_salu_offset_2x_neg_11bit_max: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, s0 +; GFX9-NEXT: v_mov_b32_e32 v1, s1 +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff000, v0 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc +; GFX9-NEXT: flat_load_ubyte v0, v[0:1] +; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9-NEXT: flat_store_byte v[0:1], v0 +; GFX9-NEXT: s_endpgm +; +; GFX10-LABEL: flat_inst_salu_offset_2x_neg_11bit_max: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-NEXT: s_add_u32 s0, s0, 0xfffff000 +; GFX10-NEXT: s_addc_u32 s1, s1, -1 +; GFX10-NEXT: v_mov_b32_e32 v0, s0 +; GFX10-NEXT: v_mov_b32_e32 v1, s1 +; GFX10-NEXT: flat_load_ubyte v0, v[0:1] +; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-NEXT: flat_store_byte v[0:1], v0 +; GFX10-NEXT: s_endpgm + %gep = getelementptr i8, i8* %p, i64 -4096 + %load = load volatile i8, i8* %gep, align 1 + store i8 %load, i8* undef + ret void +} + +define amdgpu_kernel void @flat_inst_salu_offset_2x_neg_12bit_max(i8* %p) { +; GFX9-LABEL: flat_inst_salu_offset_2x_neg_12bit_max: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, s0 +; GFX9-NEXT: v_mov_b32_e32 v1, s1 +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffe000, v0 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc +; GFX9-NEXT: flat_load_ubyte v0, v[0:1] +; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9-NEXT: flat_store_byte v[0:1], v0 +; GFX9-NEXT: s_endpgm +; +; GFX10-LABEL: flat_inst_salu_offset_2x_neg_12bit_max: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-NEXT: s_add_u32 s0, s0, 0xffffe000 +; GFX10-NEXT: s_addc_u32 s1, s1, -1 +; GFX10-NEXT: v_mov_b32_e32 v0, s0 +; GFX10-NEXT: v_mov_b32_e32 v1, s1 +; GFX10-NEXT: flat_load_ubyte v0, v[0:1] +; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-NEXT: flat_store_byte v[0:1], v0 +; GFX10-NEXT: s_endpgm + %gep = getelementptr i8, i8* %p, i64 -8192 + %load = load volatile i8, i8* %gep, align 1 + store i8 %load, i8* undef + ret void +} + +define amdgpu_kernel void @flat_inst_salu_offset_2x_neg_13bit_max(i8* %p) { +; GFX9-LABEL: flat_inst_salu_offset_2x_neg_13bit_max: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, s0 +; GFX9-NEXT: v_mov_b32_e32 v1, s1 +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffc000, v0 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc +; GFX9-NEXT: flat_load_ubyte v0, v[0:1] +; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9-NEXT: flat_store_byte v[0:1], v0 +; GFX9-NEXT: s_endpgm +; +; GFX10-LABEL: flat_inst_salu_offset_2x_neg_13bit_max: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-NEXT: s_add_u32 s0, s0, 0xffffc000 +; GFX10-NEXT: s_addc_u32 s1, s1, -1 +; GFX10-NEXT: v_mov_b32_e32 v0, s0 +; GFX10-NEXT: v_mov_b32_e32 v1, s1 +; GFX10-NEXT: flat_load_ubyte v0, v[0:1] +; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-NEXT: flat_store_byte v[0:1], v0 +; GFX10-NEXT: s_endpgm + %gep = getelementptr i8, i8* %p, i64 -16384 + %load = load volatile i8, i8* %gep, align 1 + store i8 %load, i8* undef + ret void +} + +; Fill 11-bit low-bits (1ull << 33) | 2047 +define amdgpu_kernel void @flat_inst_salu_offset_64bit_11bit_split0(i8* %p) { +; GFX9-LABEL: flat_inst_salu_offset_64bit_11bit_split0: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v1, s1 +; GFX9-NEXT: v_add_co_u32_e64 v0, vcc, 0, s0 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc +; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:2047 +; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9-NEXT: flat_store_byte v[0:1], v0 +; GFX9-NEXT: s_endpgm +; +; GFX10-LABEL: flat_inst_salu_offset_64bit_11bit_split0: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-NEXT: s_add_u32 s0, s0, 0x7ff +; GFX10-NEXT: s_addc_u32 s1, s1, 2 +; GFX10-NEXT: v_mov_b32_e32 v0, s0 +; GFX10-NEXT: v_mov_b32_e32 v1, s1 +; GFX10-NEXT: flat_load_ubyte v0, v[0:1] +; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-NEXT: flat_store_byte v[0:1], v0 +; GFX10-NEXT: s_endpgm + %gep = getelementptr i8, i8* %p, i64 8589936639 + %load = load volatile i8, i8* %gep, align 1 + store i8 %load, i8* undef + ret void +} + +; Fill 11-bit low-bits (1ull << 33) | 2048 +define amdgpu_kernel void @flat_inst_salu_offset_64bit_11bit_split1(i8* %p) { +; GFX9-LABEL: flat_inst_salu_offset_64bit_11bit_split1: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v1, s1 +; GFX9-NEXT: v_add_co_u32_e64 v0, vcc, 0, s0 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc +; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:2048 +; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9-NEXT: flat_store_byte v[0:1], v0 +; GFX9-NEXT: s_endpgm +; +; GFX10-LABEL: flat_inst_salu_offset_64bit_11bit_split1: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-NEXT: s_add_u32 s0, s0, 0x800 +; GFX10-NEXT: s_addc_u32 s1, s1, 2 +; GFX10-NEXT: v_mov_b32_e32 v0, s0 +; GFX10-NEXT: v_mov_b32_e32 v1, s1 +; GFX10-NEXT: flat_load_ubyte v0, v[0:1] +; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-NEXT: flat_store_byte v[0:1], v0 +; GFX10-NEXT: s_endpgm + %gep = getelementptr i8, i8* %p, i64 8589936640 + %load = load volatile i8, i8* %gep, align 1 + store i8 %load, i8* undef + ret void +} + +; Fill 12-bit low-bits (1ull << 33) | 4095 +define amdgpu_kernel void @flat_inst_salu_offset_64bit_12bit_split0(i8* %p) { +; GFX9-LABEL: flat_inst_salu_offset_64bit_12bit_split0: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v1, s1 +; GFX9-NEXT: v_add_co_u32_e64 v0, vcc, 0, s0 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc +; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:4095 +; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9-NEXT: flat_store_byte v[0:1], v0 +; GFX9-NEXT: s_endpgm +; +; GFX10-LABEL: flat_inst_salu_offset_64bit_12bit_split0: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-NEXT: s_add_u32 s0, s0, 0xfff +; GFX10-NEXT: s_addc_u32 s1, s1, 2 +; GFX10-NEXT: v_mov_b32_e32 v0, s0 +; GFX10-NEXT: v_mov_b32_e32 v1, s1 +; GFX10-NEXT: flat_load_ubyte v0, v[0:1] +; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-NEXT: flat_store_byte v[0:1], v0 +; GFX10-NEXT: s_endpgm + %gep = getelementptr i8, i8* %p, i64 8589938687 + %load = load volatile i8, i8* %gep, align 1 + store i8 %load, i8* undef + ret void +} + +; Fill 12-bit low-bits (1ull << 33) | 4096 +define amdgpu_kernel void @flat_inst_salu_offset_64bit_12bit_split1(i8* %p) { +; GFX9-LABEL: flat_inst_salu_offset_64bit_12bit_split1: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, s0 +; GFX9-NEXT: v_mov_b32_e32 v1, s1 +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc +; GFX9-NEXT: flat_load_ubyte v0, v[0:1] +; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9-NEXT: flat_store_byte v[0:1], v0 +; GFX9-NEXT: s_endpgm +; +; GFX10-LABEL: flat_inst_salu_offset_64bit_12bit_split1: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-NEXT: s_add_u32 s0, s0, 0x1000 +; GFX10-NEXT: s_addc_u32 s1, s1, 2 +; GFX10-NEXT: v_mov_b32_e32 v0, s0 +; GFX10-NEXT: v_mov_b32_e32 v1, s1 +; GFX10-NEXT: flat_load_ubyte v0, v[0:1] +; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-NEXT: flat_store_byte v[0:1], v0 +; GFX10-NEXT: s_endpgm + %gep = getelementptr i8, i8* %p, i64 8589938688 + %load = load volatile i8, i8* %gep, align 1 + store i8 %load, i8* undef + ret void +} + +; Fill 13-bit low-bits (1ull << 33) | 8191 +define amdgpu_kernel void @flat_inst_salu_offset_64bit_13bit_split0(i8* %p) { +; GFX9-LABEL: flat_inst_salu_offset_64bit_13bit_split0: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, s0 +; GFX9-NEXT: v_mov_b32_e32 v1, s1 +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc +; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:4095 +; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9-NEXT: flat_store_byte v[0:1], v0 +; GFX9-NEXT: s_endpgm +; +; GFX10-LABEL: flat_inst_salu_offset_64bit_13bit_split0: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-NEXT: s_add_u32 s0, s0, 0x1fff +; GFX10-NEXT: s_addc_u32 s1, s1, 2 +; GFX10-NEXT: v_mov_b32_e32 v0, s0 +; GFX10-NEXT: v_mov_b32_e32 v1, s1 +; GFX10-NEXT: flat_load_ubyte v0, v[0:1] +; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-NEXT: flat_store_byte v[0:1], v0 +; GFX10-NEXT: s_endpgm + %gep = getelementptr i8, i8* %p, i64 8589942783 + %load = load volatile i8, i8* %gep, align 1 + store i8 %load, i8* undef + ret void +} + +; Fill 13-bit low-bits (1ull << 33) | 8192 +define amdgpu_kernel void @flat_inst_salu_offset_64bit_13bit_split1(i8* %p) { +; GFX9-LABEL: flat_inst_salu_offset_64bit_13bit_split1: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, s0 +; GFX9-NEXT: v_mov_b32_e32 v1, s1 +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x2000, v0 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc +; GFX9-NEXT: flat_load_ubyte v0, v[0:1] +; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9-NEXT: flat_store_byte v[0:1], v0 +; GFX9-NEXT: s_endpgm +; +; GFX10-LABEL: flat_inst_salu_offset_64bit_13bit_split1: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-NEXT: s_add_u32 s0, s0, 0x2000 +; GFX10-NEXT: s_addc_u32 s1, s1, 2 +; GFX10-NEXT: v_mov_b32_e32 v0, s0 +; GFX10-NEXT: v_mov_b32_e32 v1, s1 +; GFX10-NEXT: flat_load_ubyte v0, v[0:1] +; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-NEXT: flat_store_byte v[0:1], v0 +; GFX10-NEXT: s_endpgm + %gep = getelementptr i8, i8* %p, i64 8589942784 + %load = load volatile i8, i8* %gep, align 1 + store i8 %load, i8* undef + ret void +} + +; Fill 11-bit low-bits, negative high bits (1ull << 63) | 2047 +define amdgpu_kernel void @flat_inst_salu_offset_64bit_11bit_neg_high_split0(i8* %p) { +; GFX9-LABEL: flat_inst_salu_offset_64bit_11bit_neg_high_split0: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX9-NEXT: v_bfrev_b32_e32 v1, 1 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v2, s1 +; GFX9-NEXT: v_add_co_u32_e64 v0, vcc, 0, s0 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc +; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:2047 +; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9-NEXT: flat_store_byte v[0:1], v0 +; GFX9-NEXT: s_endpgm +; +; GFX10-LABEL: flat_inst_salu_offset_64bit_11bit_neg_high_split0: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-NEXT: s_add_u32 s0, s0, 0x7ff +; GFX10-NEXT: s_addc_u32 s1, s1, 0x80000000 +; GFX10-NEXT: v_mov_b32_e32 v0, s0 +; GFX10-NEXT: v_mov_b32_e32 v1, s1 +; GFX10-NEXT: flat_load_ubyte v0, v[0:1] +; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-NEXT: flat_store_byte v[0:1], v0 +; GFX10-NEXT: s_endpgm + %gep = getelementptr i8, i8* %p, i64 -9223372036854773761 + %load = load volatile i8, i8* %gep, align 1 + store i8 %load, i8* undef + ret void +} + +; Fill 11-bit low-bits, negative high bits (1ull << 63) | 2048 +define amdgpu_kernel void @flat_inst_salu_offset_64bit_11bit_neg_high_split1(i8* %p) { +; GFX9-LABEL: flat_inst_salu_offset_64bit_11bit_neg_high_split1: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX9-NEXT: v_bfrev_b32_e32 v1, 1 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v2, s1 +; GFX9-NEXT: v_add_co_u32_e64 v0, vcc, 0, s0 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc +; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:2048 +; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9-NEXT: flat_store_byte v[0:1], v0 +; GFX9-NEXT: s_endpgm +; +; GFX10-LABEL: flat_inst_salu_offset_64bit_11bit_neg_high_split1: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-NEXT: s_add_u32 s0, s0, 0x800 +; GFX10-NEXT: s_addc_u32 s1, s1, 0x80000000 +; GFX10-NEXT: v_mov_b32_e32 v0, s0 +; GFX10-NEXT: v_mov_b32_e32 v1, s1 +; GFX10-NEXT: flat_load_ubyte v0, v[0:1] +; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-NEXT: flat_store_byte v[0:1], v0 +; GFX10-NEXT: s_endpgm + %gep = getelementptr i8, i8* %p, i64 -9223372036854773760 + %load = load volatile i8, i8* %gep, align 1 + store i8 %load, i8* undef + ret void +} + +; Fill 12-bit low-bits, negative high bits (1ull << 63) | 4095 +define amdgpu_kernel void @flat_inst_salu_offset_64bit_12bit_neg_high_split0(i8* %p) { +; GFX9-LABEL: flat_inst_salu_offset_64bit_12bit_neg_high_split0: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX9-NEXT: v_bfrev_b32_e32 v1, 1 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v2, s1 +; GFX9-NEXT: v_add_co_u32_e64 v0, vcc, 0, s0 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc +; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:4095 +; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9-NEXT: flat_store_byte v[0:1], v0 +; GFX9-NEXT: s_endpgm +; +; GFX10-LABEL: flat_inst_salu_offset_64bit_12bit_neg_high_split0: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-NEXT: s_add_u32 s0, s0, 0xfff +; GFX10-NEXT: s_addc_u32 s1, s1, 0x80000000 +; GFX10-NEXT: v_mov_b32_e32 v0, s0 +; GFX10-NEXT: v_mov_b32_e32 v1, s1 +; GFX10-NEXT: flat_load_ubyte v0, v[0:1] +; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-NEXT: flat_store_byte v[0:1], v0 +; GFX10-NEXT: s_endpgm + %gep = getelementptr i8, i8* %p, i64 -9223372036854771713 + %load = load volatile i8, i8* %gep, align 1 + store i8 %load, i8* undef + ret void +} + +; Fill 12-bit low-bits, negative high bits (1ull << 63) | 4096 +define amdgpu_kernel void @flat_inst_salu_offset_64bit_12bit_neg_high_split1(i8* %p) { +; GFX9-LABEL: flat_inst_salu_offset_64bit_12bit_neg_high_split1: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX9-NEXT: v_bfrev_b32_e32 v1, 1 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, s0 +; GFX9-NEXT: v_mov_b32_e32 v2, s1 +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc +; GFX9-NEXT: flat_load_ubyte v0, v[0:1] +; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9-NEXT: flat_store_byte v[0:1], v0 +; GFX9-NEXT: s_endpgm +; +; GFX10-LABEL: flat_inst_salu_offset_64bit_12bit_neg_high_split1: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-NEXT: s_add_u32 s0, s0, 0x1000 +; GFX10-NEXT: s_addc_u32 s1, s1, 0x80000000 +; GFX10-NEXT: v_mov_b32_e32 v0, s0 +; GFX10-NEXT: v_mov_b32_e32 v1, s1 +; GFX10-NEXT: flat_load_ubyte v0, v[0:1] +; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-NEXT: flat_store_byte v[0:1], v0 +; GFX10-NEXT: s_endpgm + %gep = getelementptr i8, i8* %p, i64 -9223372036854771712 + %load = load volatile i8, i8* %gep, align 1 + store i8 %load, i8* undef + ret void +} + +; Fill 13-bit low-bits, negative high bits (1ull << 63) | 8191 +define amdgpu_kernel void @flat_inst_salu_offset_64bit_13bit_neg_high_split0(i8* %p) { +; GFX9-LABEL: flat_inst_salu_offset_64bit_13bit_neg_high_split0: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX9-NEXT: v_bfrev_b32_e32 v1, 1 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, s0 +; GFX9-NEXT: v_mov_b32_e32 v2, s1 +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc +; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:4095 +; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9-NEXT: flat_store_byte v[0:1], v0 +; GFX9-NEXT: s_endpgm +; +; GFX10-LABEL: flat_inst_salu_offset_64bit_13bit_neg_high_split0: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-NEXT: s_add_u32 s0, s0, 0x1fff +; GFX10-NEXT: s_addc_u32 s1, s1, 0x80000000 +; GFX10-NEXT: v_mov_b32_e32 v0, s0 +; GFX10-NEXT: v_mov_b32_e32 v1, s1 +; GFX10-NEXT: flat_load_ubyte v0, v[0:1] +; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-NEXT: flat_store_byte v[0:1], v0 +; GFX10-NEXT: s_endpgm + %gep = getelementptr i8, i8* %p, i64 -9223372036854767617 + %load = load volatile i8, i8* %gep, align 1 + store i8 %load, i8* undef + ret void +} + +; Fill 13-bit low-bits, negative high bits (1ull << 63) | 8192 +define amdgpu_kernel void @flat_inst_salu_offset_64bit_13bit_neg_high_split1(i8* %p) { +; GFX9-LABEL: flat_inst_salu_offset_64bit_13bit_neg_high_split1: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX9-NEXT: v_bfrev_b32_e32 v1, 1 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, s0 +; GFX9-NEXT: v_mov_b32_e32 v2, s1 +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x2000, v0 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc +; GFX9-NEXT: flat_load_ubyte v0, v[0:1] +; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9-NEXT: flat_store_byte v[0:1], v0 +; GFX9-NEXT: s_endpgm +; +; GFX10-LABEL: flat_inst_salu_offset_64bit_13bit_neg_high_split1: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-NEXT: s_add_u32 s0, s0, 0x2000 +; GFX10-NEXT: s_addc_u32 s1, s1, 0x80000000 +; GFX10-NEXT: v_mov_b32_e32 v0, s0 +; GFX10-NEXT: v_mov_b32_e32 v1, s1 +; GFX10-NEXT: flat_load_ubyte v0, v[0:1] +; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-NEXT: flat_store_byte v[0:1], v0 +; GFX10-NEXT: s_endpgm + %gep = getelementptr i8, i8* %p, i64 -9223372036854767616 + %load = load volatile i8, i8* %gep, align 1 + store i8 %load, i8* undef + ret void +} diff --git a/llvm/test/CodeGen/AMDGPU/offset-split-global.ll b/llvm/test/CodeGen/AMDGPU/offset-split-global.ll new file mode 100644 index 0000000000000..add4e687926b1 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/offset-split-global.ll @@ -0,0 +1,1408 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX9 %s +; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s + +; Test splitting flat instruction offsets into the low and high bits +; when the offset doesn't fit in the offset field. + +define i8 @global_inst_valu_offset_1(i8 addrspace(1)* %p) { +; GFX9-LABEL: global_inst_valu_offset_1: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:1 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: global_inst_valu_offset_1: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:1 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: s_setpc_b64 s[30:31] + %gep = getelementptr i8, i8 addrspace(1)* %p, i64 1 + %load = load i8, i8 addrspace(1)* %gep, align 4 + ret i8 %load +} + +define i8 @global_inst_valu_offset_11bit_max(i8 addrspace(1)* %p) { +; GFX9-LABEL: global_inst_valu_offset_11bit_max: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: global_inst_valu_offset_11bit_max: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: s_setpc_b64 s[30:31] + %gep = getelementptr i8, i8 addrspace(1)* %p, i64 2047 + %load = load i8, i8 addrspace(1)* %gep, align 4 + ret i8 %load +} + +define i8 @global_inst_valu_offset_12bit_max(i8 addrspace(1)* %p) { +; GFX9-LABEL: global_inst_valu_offset_12bit_max: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:4095 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: global_inst_valu_offset_12bit_max: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x800, v0 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo +; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: s_setpc_b64 s[30:31] + %gep = getelementptr i8, i8 addrspace(1)* %p, i64 4095 + %load = load i8, i8 addrspace(1)* %gep, align 4 + ret i8 %load +} + +define i8 @global_inst_valu_offset_13bit_max(i8 addrspace(1)* %p) { +; GFX9-LABEL: global_inst_valu_offset_13bit_max: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc +; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:4095 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: global_inst_valu_offset_13bit_max: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x1800, v0 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo +; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: s_setpc_b64 s[30:31] + %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8191 + %load = load i8, i8 addrspace(1)* %gep, align 4 + ret i8 %load +} + +define i8 @global_inst_valu_offset_neg_11bit_max(i8 addrspace(1)* %p) { +; GFX9-LABEL: global_inst_valu_offset_neg_11bit_max: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:-2048 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: global_inst_valu_offset_neg_11bit_max: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:-2048 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: s_setpc_b64 s[30:31] + %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -2048 + %load = load i8, i8 addrspace(1)* %gep, align 4 + ret i8 %load +} + +define i8 @global_inst_valu_offset_neg_12bit_max(i8 addrspace(1)* %p) { +; GFX9-LABEL: global_inst_valu_offset_neg_12bit_max: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:-4096 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: global_inst_valu_offset_neg_12bit_max: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0xfffff000, v0 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo +; GFX10-NEXT: global_load_ubyte v0, v[0:1], off +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: s_setpc_b64 s[30:31] + %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -4096 + %load = load i8, i8 addrspace(1)* %gep, align 4 + ret i8 %load +} + +define i8 @global_inst_valu_offset_neg_13bit_max(i8 addrspace(1)* %p) { +; GFX9-LABEL: global_inst_valu_offset_neg_13bit_max: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffe000, v0 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc +; GFX9-NEXT: global_load_ubyte v0, v[0:1], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: global_inst_valu_offset_neg_13bit_max: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0xffffe000, v0 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo +; GFX10-NEXT: global_load_ubyte v0, v[0:1], off +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: s_setpc_b64 s[30:31] + %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -8192 + %load = load i8, i8 addrspace(1)* %gep, align 4 + ret i8 %load +} + +define i8 @global_inst_valu_offset_2x_11bit_max(i8 addrspace(1)* %p) { +; GFX9-LABEL: global_inst_valu_offset_2x_11bit_max: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:4095 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: global_inst_valu_offset_2x_11bit_max: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x800, v0 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo +; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: s_setpc_b64 s[30:31] + %gep = getelementptr i8, i8 addrspace(1)* %p, i64 4095 + %load = load i8, i8 addrspace(1)* %gep, align 4 + ret i8 %load +} + +define i8 @global_inst_valu_offset_2x_12bit_max(i8 addrspace(1)* %p) { +; GFX9-LABEL: global_inst_valu_offset_2x_12bit_max: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc +; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:4095 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: global_inst_valu_offset_2x_12bit_max: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x1800, v0 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo +; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: s_setpc_b64 s[30:31] + %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8191 + %load = load i8, i8 addrspace(1)* %gep, align 4 + ret i8 %load +} + +define i8 @global_inst_valu_offset_2x_13bit_max(i8 addrspace(1)* %p) { +; GFX9-LABEL: global_inst_valu_offset_2x_13bit_max: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x3000, v0 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc +; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:4095 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: global_inst_valu_offset_2x_13bit_max: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x3800, v0 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo +; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: s_setpc_b64 s[30:31] + %gep = getelementptr i8, i8 addrspace(1)* %p, i64 16383 + %load = load i8, i8 addrspace(1)* %gep, align 4 + ret i8 %load +} + +define i8 @global_inst_valu_offset_2x_neg_11bit_max(i8 addrspace(1)* %p) { +; GFX9-LABEL: global_inst_valu_offset_2x_neg_11bit_max: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:-4096 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: global_inst_valu_offset_2x_neg_11bit_max: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0xfffff000, v0 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo +; GFX10-NEXT: global_load_ubyte v0, v[0:1], off +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: s_setpc_b64 s[30:31] + %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -4096 + %load = load i8, i8 addrspace(1)* %gep, align 4 + ret i8 %load +} + +define i8 @global_inst_valu_offset_2x_neg_12bit_max(i8 addrspace(1)* %p) { +; GFX9-LABEL: global_inst_valu_offset_2x_neg_12bit_max: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffe000, v0 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc +; GFX9-NEXT: global_load_ubyte v0, v[0:1], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: global_inst_valu_offset_2x_neg_12bit_max: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0xffffe000, v0 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo +; GFX10-NEXT: global_load_ubyte v0, v[0:1], off +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: s_setpc_b64 s[30:31] + %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -8192 + %load = load i8, i8 addrspace(1)* %gep, align 4 + ret i8 %load +} + +define i8 @global_inst_valu_offset_2x_neg_13bit_max(i8 addrspace(1)* %p) { +; GFX9-LABEL: global_inst_valu_offset_2x_neg_13bit_max: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffc000, v0 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc +; GFX9-NEXT: global_load_ubyte v0, v[0:1], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: global_inst_valu_offset_2x_neg_13bit_max: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0xffffc000, v0 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo +; GFX10-NEXT: global_load_ubyte v0, v[0:1], off +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: s_setpc_b64 s[30:31] + %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -16384 + %load = load i8, i8 addrspace(1)* %gep, align 4 + ret i8 %load +} + +; Fill 11-bit low-bits (1ull << 33) | 2047 +define i8 @global_inst_valu_offset_64bit_11bit_split0(i8 addrspace(1)* %p) { +; GFX9-LABEL: global_inst_valu_offset_64bit_11bit_split0: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc +; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: global_inst_valu_offset_64bit_11bit_split0: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0, v0 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo +; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: s_setpc_b64 s[30:31] + %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8589936639 + %load = load i8, i8 addrspace(1)* %gep, align 4 + ret i8 %load +} + +; Fill 11-bit low-bits (1ull << 33) | 2048 +define i8 @global_inst_valu_offset_64bit_11bit_split1(i8 addrspace(1)* %p) { +; GFX9-LABEL: global_inst_valu_offset_64bit_11bit_split1: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc +; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:2048 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: global_inst_valu_offset_64bit_11bit_split1: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x800, v0 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo +; GFX10-NEXT: global_load_ubyte v0, v[0:1], off +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: s_setpc_b64 s[30:31] + %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8589936640 + %load = load i8, i8 addrspace(1)* %gep, align 4 + ret i8 %load +} + +; Fill 12-bit low-bits (1ull << 33) | 4095 +define i8 @global_inst_valu_offset_64bit_12bit_split0(i8 addrspace(1)* %p) { +; GFX9-LABEL: global_inst_valu_offset_64bit_12bit_split0: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc +; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:4095 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: global_inst_valu_offset_64bit_12bit_split0: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x800, v0 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo +; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: s_setpc_b64 s[30:31] + %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8589938687 + %load = load i8, i8 addrspace(1)* %gep, align 4 + ret i8 %load +} + +; Fill 12-bit low-bits (1ull << 33) | 4096 +define i8 @global_inst_valu_offset_64bit_12bit_split1(i8 addrspace(1)* %p) { +; GFX9-LABEL: global_inst_valu_offset_64bit_12bit_split1: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc +; GFX9-NEXT: global_load_ubyte v0, v[0:1], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: global_inst_valu_offset_64bit_12bit_split1: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x1000, v0 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo +; GFX10-NEXT: global_load_ubyte v0, v[0:1], off +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: s_setpc_b64 s[30:31] + %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8589938688 + %load = load i8, i8 addrspace(1)* %gep, align 4 + ret i8 %load +} + +; Fill 13-bit low-bits (1ull << 33) | 8191 +define i8 @global_inst_valu_offset_64bit_13bit_split0(i8 addrspace(1)* %p) { +; GFX9-LABEL: global_inst_valu_offset_64bit_13bit_split0: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc +; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:4095 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: global_inst_valu_offset_64bit_13bit_split0: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x1800, v0 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo +; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: s_setpc_b64 s[30:31] + %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8589942783 + %load = load i8, i8 addrspace(1)* %gep, align 4 + ret i8 %load +} + +; Fill 13-bit low-bits (1ull << 33) | 8192 +define i8 @global_inst_valu_offset_64bit_13bit_split1(i8 addrspace(1)* %p) { +; GFX9-LABEL: global_inst_valu_offset_64bit_13bit_split1: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x2000, v0 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc +; GFX9-NEXT: global_load_ubyte v0, v[0:1], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: global_inst_valu_offset_64bit_13bit_split1: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x2000, v0 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo +; GFX10-NEXT: global_load_ubyte v0, v[0:1], off +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: s_setpc_b64 s[30:31] + %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8589942784 + %load = load i8, i8 addrspace(1)* %gep, align 4 + ret i8 %load +} + +; Fill 11-bit low-bits, negative high bits (1ull << 63) | 2047 +define i8 @global_inst_valu_offset_64bit_11bit_neg_high_split0(i8 addrspace(1)* %p) { +; GFX9-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split0: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_bfrev_b32_e32 v2, 1 +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc +; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split0: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0, v0 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo +; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: s_setpc_b64 s[30:31] + %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854773761 + %load = load i8, i8 addrspace(1)* %gep, align 4 + ret i8 %load +} + +; Fill 11-bit low-bits, negative high bits (1ull << 63) | 2048 +define i8 @global_inst_valu_offset_64bit_11bit_neg_high_split1(i8 addrspace(1)* %p) { +; GFX9-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split1: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_bfrev_b32_e32 v2, 1 +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc +; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:2048 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split1: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x1000, v0 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo +; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:-2048 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: s_setpc_b64 s[30:31] + %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854773760 + %load = load i8, i8 addrspace(1)* %gep, align 4 + ret i8 %load +} + +; Fill 12-bit low-bits, negative high bits (1ull << 63) | 4095 +define i8 @global_inst_valu_offset_64bit_12bit_neg_high_split0(i8 addrspace(1)* %p) { +; GFX9-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split0: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_bfrev_b32_e32 v2, 1 +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc +; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:4095 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split0: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x1000, v0 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo +; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:-1 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: s_setpc_b64 s[30:31] + %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854771713 + %load = load i8, i8 addrspace(1)* %gep, align 4 + ret i8 %load +} + +; Fill 12-bit low-bits, negative high bits (1ull << 63) | 4096 +define i8 @global_inst_valu_offset_64bit_12bit_neg_high_split1(i8 addrspace(1)* %p) { +; GFX9-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split1: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x2000, v0 +; GFX9-NEXT: v_bfrev_b32_e32 v2, 1 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc +; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:-4096 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split1: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x1000, v0 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo +; GFX10-NEXT: global_load_ubyte v0, v[0:1], off +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: s_setpc_b64 s[30:31] + %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854771712 + %load = load i8, i8 addrspace(1)* %gep, align 4 + ret i8 %load +} + +; Fill 13-bit low-bits, negative high bits (1ull << 63) | 8191 +define i8 @global_inst_valu_offset_64bit_13bit_neg_high_split0(i8 addrspace(1)* %p) { +; GFX9-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split0: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x2000, v0 +; GFX9-NEXT: v_bfrev_b32_e32 v2, 1 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc +; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:-1 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split0: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x2000, v0 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo +; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:-1 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: s_setpc_b64 s[30:31] + %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854767617 + %load = load i8, i8 addrspace(1)* %gep, align 4 + ret i8 %load +} + +; Fill 13-bit low-bits, negative high bits (1ull << 63) | 8192 +define i8 @global_inst_valu_offset_64bit_13bit_neg_high_split1(i8 addrspace(1)* %p) { +; GFX9-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split1: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x2000, v0 +; GFX9-NEXT: v_bfrev_b32_e32 v2, 1 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc +; GFX9-NEXT: global_load_ubyte v0, v[0:1], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split1: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x2000, v0 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo +; GFX10-NEXT: global_load_ubyte v0, v[0:1], off +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: s_setpc_b64 s[30:31] + %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854767616 + %load = load i8, i8 addrspace(1)* %gep, align 4 + ret i8 %load +} + +define amdgpu_kernel void @global_inst_salu_offset_1(i8 addrspace(1)* %p) { +; GFX9-LABEL: global_inst_salu_offset_1: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, s0 +; GFX9-NEXT: v_mov_b32_e32 v1, s1 +; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:1 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: global_store_byte v[0:1], v0, off +; GFX9-NEXT: s_endpgm +; +; GFX10-LABEL: global_inst_salu_offset_1: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-NEXT: v_mov_b32_e32 v0, s0 +; GFX10-NEXT: v_mov_b32_e32 v1, s1 +; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:1 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: global_store_byte v[0:1], v0, off +; GFX10-NEXT: s_endpgm + %gep = getelementptr i8, i8 addrspace(1)* %p, i64 1 + %load = load volatile i8, i8 addrspace(1)* %gep, align 1 + store i8 %load, i8 addrspace(1)* undef + ret void +} + +define amdgpu_kernel void @global_inst_salu_offset_11bit_max(i8 addrspace(1)* %p) { +; GFX9-LABEL: global_inst_salu_offset_11bit_max: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, s0 +; GFX9-NEXT: v_mov_b32_e32 v1, s1 +; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: global_store_byte v[0:1], v0, off +; GFX9-NEXT: s_endpgm +; +; GFX10-LABEL: global_inst_salu_offset_11bit_max: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-NEXT: v_mov_b32_e32 v0, s0 +; GFX10-NEXT: v_mov_b32_e32 v1, s1 +; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: global_store_byte v[0:1], v0, off +; GFX10-NEXT: s_endpgm + %gep = getelementptr i8, i8 addrspace(1)* %p, i64 2047 + %load = load volatile i8, i8 addrspace(1)* %gep, align 1 + store i8 %load, i8 addrspace(1)* undef + ret void +} + +define amdgpu_kernel void @global_inst_salu_offset_12bit_max(i8 addrspace(1)* %p) { +; GFX9-LABEL: global_inst_salu_offset_12bit_max: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, s0 +; GFX9-NEXT: v_mov_b32_e32 v1, s1 +; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:4095 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: global_store_byte v[0:1], v0, off +; GFX9-NEXT: s_endpgm +; +; GFX10-LABEL: global_inst_salu_offset_12bit_max: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-NEXT: v_add_co_u32_e64 v0, s0, 0x800, s0 +; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, 0, s1, s0 +; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: global_store_byte v[0:1], v0, off +; GFX10-NEXT: s_endpgm + %gep = getelementptr i8, i8 addrspace(1)* %p, i64 4095 + %load = load volatile i8, i8 addrspace(1)* %gep, align 1 + store i8 %load, i8 addrspace(1)* undef + ret void +} + +define amdgpu_kernel void @global_inst_salu_offset_13bit_max(i8 addrspace(1)* %p) { +; GFX9-LABEL: global_inst_salu_offset_13bit_max: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, s0 +; GFX9-NEXT: v_mov_b32_e32 v1, s1 +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc +; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:4095 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: global_store_byte v[0:1], v0, off +; GFX9-NEXT: s_endpgm +; +; GFX10-LABEL: global_inst_salu_offset_13bit_max: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-NEXT: v_add_co_u32_e64 v0, s0, 0x1800, s0 +; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, 0, s1, s0 +; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: global_store_byte v[0:1], v0, off +; GFX10-NEXT: s_endpgm + %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8191 + %load = load volatile i8, i8 addrspace(1)* %gep, align 1 + store i8 %load, i8 addrspace(1)* undef + ret void +} + +define amdgpu_kernel void @global_inst_salu_offset_neg_11bit_max(i8 addrspace(1)* %p) { +; GFX9-LABEL: global_inst_salu_offset_neg_11bit_max: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, s0 +; GFX9-NEXT: v_mov_b32_e32 v1, s1 +; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:-2048 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: global_store_byte v[0:1], v0, off +; GFX9-NEXT: s_endpgm +; +; GFX10-LABEL: global_inst_salu_offset_neg_11bit_max: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-NEXT: v_mov_b32_e32 v0, s0 +; GFX10-NEXT: v_mov_b32_e32 v1, s1 +; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:-2048 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: global_store_byte v[0:1], v0, off +; GFX10-NEXT: s_endpgm + %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -2048 + %load = load volatile i8, i8 addrspace(1)* %gep, align 1 + store i8 %load, i8 addrspace(1)* undef + ret void +} + +define amdgpu_kernel void @global_inst_salu_offset_neg_12bit_max(i8 addrspace(1)* %p) { +; GFX9-LABEL: global_inst_salu_offset_neg_12bit_max: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, s0 +; GFX9-NEXT: v_mov_b32_e32 v1, s1 +; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:-4096 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: global_store_byte v[0:1], v0, off +; GFX9-NEXT: s_endpgm +; +; GFX10-LABEL: global_inst_salu_offset_neg_12bit_max: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-NEXT: v_add_co_u32_e64 v0, s0, 0xfffff000, s0 +; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, -1, s1, s0 +; GFX10-NEXT: global_load_ubyte v0, v[0:1], off +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: global_store_byte v[0:1], v0, off +; GFX10-NEXT: s_endpgm + %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -4096 + %load = load volatile i8, i8 addrspace(1)* %gep, align 1 + store i8 %load, i8 addrspace(1)* undef + ret void +} + +define amdgpu_kernel void @global_inst_salu_offset_neg_13bit_max(i8 addrspace(1)* %p) { +; GFX9-LABEL: global_inst_salu_offset_neg_13bit_max: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, s0 +; GFX9-NEXT: v_mov_b32_e32 v1, s1 +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffe000, v0 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc +; GFX9-NEXT: global_load_ubyte v0, v[0:1], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: global_store_byte v[0:1], v0, off +; GFX9-NEXT: s_endpgm +; +; GFX10-LABEL: global_inst_salu_offset_neg_13bit_max: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-NEXT: v_add_co_u32_e64 v0, s0, 0xffffe000, s0 +; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, -1, s1, s0 +; GFX10-NEXT: global_load_ubyte v0, v[0:1], off +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: global_store_byte v[0:1], v0, off +; GFX10-NEXT: s_endpgm + %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -8192 + %load = load volatile i8, i8 addrspace(1)* %gep, align 1 + store i8 %load, i8 addrspace(1)* undef + ret void +} + +define amdgpu_kernel void @global_inst_salu_offset_2x_11bit_max(i8 addrspace(1)* %p) { +; GFX9-LABEL: global_inst_salu_offset_2x_11bit_max: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, s0 +; GFX9-NEXT: v_mov_b32_e32 v1, s1 +; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:4095 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: global_store_byte v[0:1], v0, off +; GFX9-NEXT: s_endpgm +; +; GFX10-LABEL: global_inst_salu_offset_2x_11bit_max: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-NEXT: v_add_co_u32_e64 v0, s0, 0x800, s0 +; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, 0, s1, s0 +; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: global_store_byte v[0:1], v0, off +; GFX10-NEXT: s_endpgm + %gep = getelementptr i8, i8 addrspace(1)* %p, i64 4095 + %load = load volatile i8, i8 addrspace(1)* %gep, align 1 + store i8 %load, i8 addrspace(1)* undef + ret void +} + +define amdgpu_kernel void @global_inst_salu_offset_2x_12bit_max(i8 addrspace(1)* %p) { +; GFX9-LABEL: global_inst_salu_offset_2x_12bit_max: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, s0 +; GFX9-NEXT: v_mov_b32_e32 v1, s1 +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc +; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:4095 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: global_store_byte v[0:1], v0, off +; GFX9-NEXT: s_endpgm +; +; GFX10-LABEL: global_inst_salu_offset_2x_12bit_max: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-NEXT: v_add_co_u32_e64 v0, s0, 0x1800, s0 +; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, 0, s1, s0 +; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: global_store_byte v[0:1], v0, off +; GFX10-NEXT: s_endpgm + %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8191 + %load = load volatile i8, i8 addrspace(1)* %gep, align 1 + store i8 %load, i8 addrspace(1)* undef + ret void +} + +define amdgpu_kernel void @global_inst_salu_offset_2x_13bit_max(i8 addrspace(1)* %p) { +; GFX9-LABEL: global_inst_salu_offset_2x_13bit_max: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, s0 +; GFX9-NEXT: v_mov_b32_e32 v1, s1 +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x3000, v0 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc +; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:4095 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: global_store_byte v[0:1], v0, off +; GFX9-NEXT: s_endpgm +; +; GFX10-LABEL: global_inst_salu_offset_2x_13bit_max: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-NEXT: v_add_co_u32_e64 v0, s0, 0x3800, s0 +; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, 0, s1, s0 +; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: global_store_byte v[0:1], v0, off +; GFX10-NEXT: s_endpgm + %gep = getelementptr i8, i8 addrspace(1)* %p, i64 16383 + %load = load volatile i8, i8 addrspace(1)* %gep, align 1 + store i8 %load, i8 addrspace(1)* undef + ret void +} + +define amdgpu_kernel void @global_inst_salu_offset_2x_neg_11bit_max(i8 addrspace(1)* %p) { +; GFX9-LABEL: global_inst_salu_offset_2x_neg_11bit_max: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, s0 +; GFX9-NEXT: v_mov_b32_e32 v1, s1 +; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:-4096 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: global_store_byte v[0:1], v0, off +; GFX9-NEXT: s_endpgm +; +; GFX10-LABEL: global_inst_salu_offset_2x_neg_11bit_max: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-NEXT: v_add_co_u32_e64 v0, s0, 0xfffff000, s0 +; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, -1, s1, s0 +; GFX10-NEXT: global_load_ubyte v0, v[0:1], off +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: global_store_byte v[0:1], v0, off +; GFX10-NEXT: s_endpgm + %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -4096 + %load = load volatile i8, i8 addrspace(1)* %gep, align 1 + store i8 %load, i8 addrspace(1)* undef + ret void +} + +define amdgpu_kernel void @global_inst_salu_offset_2x_neg_12bit_max(i8 addrspace(1)* %p) { +; GFX9-LABEL: global_inst_salu_offset_2x_neg_12bit_max: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, s0 +; GFX9-NEXT: v_mov_b32_e32 v1, s1 +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffe000, v0 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc +; GFX9-NEXT: global_load_ubyte v0, v[0:1], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: global_store_byte v[0:1], v0, off +; GFX9-NEXT: s_endpgm +; +; GFX10-LABEL: global_inst_salu_offset_2x_neg_12bit_max: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-NEXT: v_add_co_u32_e64 v0, s0, 0xffffe000, s0 +; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, -1, s1, s0 +; GFX10-NEXT: global_load_ubyte v0, v[0:1], off +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: global_store_byte v[0:1], v0, off +; GFX10-NEXT: s_endpgm + %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -8192 + %load = load volatile i8, i8 addrspace(1)* %gep, align 1 + store i8 %load, i8 addrspace(1)* undef + ret void +} + +define amdgpu_kernel void @global_inst_salu_offset_2x_neg_13bit_max(i8 addrspace(1)* %p) { +; GFX9-LABEL: global_inst_salu_offset_2x_neg_13bit_max: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, s0 +; GFX9-NEXT: v_mov_b32_e32 v1, s1 +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffc000, v0 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc +; GFX9-NEXT: global_load_ubyte v0, v[0:1], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: global_store_byte v[0:1], v0, off +; GFX9-NEXT: s_endpgm +; +; GFX10-LABEL: global_inst_salu_offset_2x_neg_13bit_max: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-NEXT: v_add_co_u32_e64 v0, s0, 0xffffc000, s0 +; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, -1, s1, s0 +; GFX10-NEXT: global_load_ubyte v0, v[0:1], off +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: global_store_byte v[0:1], v0, off +; GFX10-NEXT: s_endpgm + %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -16384 + %load = load volatile i8, i8 addrspace(1)* %gep, align 1 + store i8 %load, i8 addrspace(1)* undef + ret void +} + +; Fill 11-bit low-bits (1ull << 33) | 2047 +define amdgpu_kernel void @global_inst_salu_offset_64bit_11bit_split0(i8 addrspace(1)* %p) { +; GFX9-LABEL: global_inst_salu_offset_64bit_11bit_split0: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v1, s1 +; GFX9-NEXT: v_add_co_u32_e64 v0, vcc, 0, s0 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc +; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: global_store_byte v[0:1], v0, off +; GFX9-NEXT: s_endpgm +; +; GFX10-LABEL: global_inst_salu_offset_64bit_11bit_split0: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-NEXT: v_add_co_u32_e64 v0, s0, 0, s0 +; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, 2, s1, s0 +; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: global_store_byte v[0:1], v0, off +; GFX10-NEXT: s_endpgm + %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8589936639 + %load = load volatile i8, i8 addrspace(1)* %gep, align 1 + store i8 %load, i8 addrspace(1)* undef + ret void +} + +; Fill 11-bit low-bits (1ull << 33) | 2048 +define amdgpu_kernel void @global_inst_salu_offset_64bit_11bit_split1(i8 addrspace(1)* %p) { +; GFX9-LABEL: global_inst_salu_offset_64bit_11bit_split1: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v1, s1 +; GFX9-NEXT: v_add_co_u32_e64 v0, vcc, 0, s0 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc +; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:2048 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: global_store_byte v[0:1], v0, off +; GFX9-NEXT: s_endpgm +; +; GFX10-LABEL: global_inst_salu_offset_64bit_11bit_split1: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-NEXT: v_add_co_u32_e64 v0, s0, 0x800, s0 +; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, 2, s1, s0 +; GFX10-NEXT: global_load_ubyte v0, v[0:1], off +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: global_store_byte v[0:1], v0, off +; GFX10-NEXT: s_endpgm + %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8589936640 + %load = load volatile i8, i8 addrspace(1)* %gep, align 1 + store i8 %load, i8 addrspace(1)* undef + ret void +} + +; Fill 12-bit low-bits (1ull << 33) | 4095 +define amdgpu_kernel void @global_inst_salu_offset_64bit_12bit_split0(i8 addrspace(1)* %p) { +; GFX9-LABEL: global_inst_salu_offset_64bit_12bit_split0: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v1, s1 +; GFX9-NEXT: v_add_co_u32_e64 v0, vcc, 0, s0 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc +; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:4095 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: global_store_byte v[0:1], v0, off +; GFX9-NEXT: s_endpgm +; +; GFX10-LABEL: global_inst_salu_offset_64bit_12bit_split0: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-NEXT: v_add_co_u32_e64 v0, s0, 0x800, s0 +; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, 2, s1, s0 +; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: global_store_byte v[0:1], v0, off +; GFX10-NEXT: s_endpgm + %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8589938687 + %load = load volatile i8, i8 addrspace(1)* %gep, align 1 + store i8 %load, i8 addrspace(1)* undef + ret void +} + +; Fill 12-bit low-bits (1ull << 33) | 4096 +define amdgpu_kernel void @global_inst_salu_offset_64bit_12bit_split1(i8 addrspace(1)* %p) { +; GFX9-LABEL: global_inst_salu_offset_64bit_12bit_split1: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, s0 +; GFX9-NEXT: v_mov_b32_e32 v1, s1 +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc +; GFX9-NEXT: global_load_ubyte v0, v[0:1], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: global_store_byte v[0:1], v0, off +; GFX9-NEXT: s_endpgm +; +; GFX10-LABEL: global_inst_salu_offset_64bit_12bit_split1: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-NEXT: v_add_co_u32_e64 v0, s0, 0x1000, s0 +; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, 2, s1, s0 +; GFX10-NEXT: global_load_ubyte v0, v[0:1], off +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: global_store_byte v[0:1], v0, off +; GFX10-NEXT: s_endpgm + %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8589938688 + %load = load volatile i8, i8 addrspace(1)* %gep, align 1 + store i8 %load, i8 addrspace(1)* undef + ret void +} + +; Fill 13-bit low-bits (1ull << 33) | 8191 +define amdgpu_kernel void @global_inst_salu_offset_64bit_13bit_split0(i8 addrspace(1)* %p) { +; GFX9-LABEL: global_inst_salu_offset_64bit_13bit_split0: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, s0 +; GFX9-NEXT: v_mov_b32_e32 v1, s1 +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc +; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:4095 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: global_store_byte v[0:1], v0, off +; GFX9-NEXT: s_endpgm +; +; GFX10-LABEL: global_inst_salu_offset_64bit_13bit_split0: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-NEXT: v_add_co_u32_e64 v0, s0, 0x1800, s0 +; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, 2, s1, s0 +; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: global_store_byte v[0:1], v0, off +; GFX10-NEXT: s_endpgm + %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8589942783 + %load = load volatile i8, i8 addrspace(1)* %gep, align 1 + store i8 %load, i8 addrspace(1)* undef + ret void +} + +; Fill 13-bit low-bits (1ull << 33) | 8192 +define amdgpu_kernel void @global_inst_salu_offset_64bit_13bit_split1(i8 addrspace(1)* %p) { +; GFX9-LABEL: global_inst_salu_offset_64bit_13bit_split1: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, s0 +; GFX9-NEXT: v_mov_b32_e32 v1, s1 +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x2000, v0 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc +; GFX9-NEXT: global_load_ubyte v0, v[0:1], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: global_store_byte v[0:1], v0, off +; GFX9-NEXT: s_endpgm +; +; GFX10-LABEL: global_inst_salu_offset_64bit_13bit_split1: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-NEXT: v_add_co_u32_e64 v0, s0, 0x2000, s0 +; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, 2, s1, s0 +; GFX10-NEXT: global_load_ubyte v0, v[0:1], off +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: global_store_byte v[0:1], v0, off +; GFX10-NEXT: s_endpgm + %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8589942784 + %load = load volatile i8, i8 addrspace(1)* %gep, align 1 + store i8 %load, i8 addrspace(1)* undef + ret void +} + +; Fill 11-bit low-bits, negative high bits (1ull << 63) | 2047 +define amdgpu_kernel void @global_inst_salu_offset_64bit_11bit_neg_high_split0(i8 addrspace(1)* %p) { +; GFX9-LABEL: global_inst_salu_offset_64bit_11bit_neg_high_split0: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX9-NEXT: v_bfrev_b32_e32 v1, 1 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v2, s1 +; GFX9-NEXT: v_add_co_u32_e64 v0, vcc, 0, s0 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc +; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: global_store_byte v[0:1], v0, off +; GFX9-NEXT: s_endpgm +; +; GFX10-LABEL: global_inst_salu_offset_64bit_11bit_neg_high_split0: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-NEXT: v_mov_b32_e32 v1, s1 +; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0, s0 +; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo +; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: global_store_byte v[0:1], v0, off +; GFX10-NEXT: s_endpgm + %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854773761 + %load = load volatile i8, i8 addrspace(1)* %gep, align 1 + store i8 %load, i8 addrspace(1)* undef + ret void +} + +; Fill 11-bit low-bits, negative high bits (1ull << 63) | 2048 +define amdgpu_kernel void @global_inst_salu_offset_64bit_11bit_neg_high_split1(i8 addrspace(1)* %p) { +; GFX9-LABEL: global_inst_salu_offset_64bit_11bit_neg_high_split1: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX9-NEXT: v_bfrev_b32_e32 v1, 1 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v2, s1 +; GFX9-NEXT: v_add_co_u32_e64 v0, vcc, 0, s0 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc +; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:2048 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: global_store_byte v[0:1], v0, off +; GFX9-NEXT: s_endpgm +; +; GFX10-LABEL: global_inst_salu_offset_64bit_11bit_neg_high_split1: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-NEXT: v_mov_b32_e32 v1, s1 +; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x1000, s0 +; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo +; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:-2048 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: global_store_byte v[0:1], v0, off +; GFX10-NEXT: s_endpgm + %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854773760 + %load = load volatile i8, i8 addrspace(1)* %gep, align 1 + store i8 %load, i8 addrspace(1)* undef + ret void +} + +; Fill 12-bit low-bits, negative high bits (1ull << 63) | 4095 +define amdgpu_kernel void @global_inst_salu_offset_64bit_12bit_neg_high_split0(i8 addrspace(1)* %p) { +; GFX9-LABEL: global_inst_salu_offset_64bit_12bit_neg_high_split0: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX9-NEXT: v_bfrev_b32_e32 v1, 1 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v2, s1 +; GFX9-NEXT: v_add_co_u32_e64 v0, vcc, 0, s0 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc +; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:4095 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: global_store_byte v[0:1], v0, off +; GFX9-NEXT: s_endpgm +; +; GFX10-LABEL: global_inst_salu_offset_64bit_12bit_neg_high_split0: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-NEXT: v_mov_b32_e32 v1, s1 +; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x1000, s0 +; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo +; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:-1 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: global_store_byte v[0:1], v0, off +; GFX10-NEXT: s_endpgm + %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854771713 + %load = load volatile i8, i8 addrspace(1)* %gep, align 1 + store i8 %load, i8 addrspace(1)* undef + ret void +} + +; Fill 12-bit low-bits, negative high bits (1ull << 63) | 4096 +define amdgpu_kernel void @global_inst_salu_offset_64bit_12bit_neg_high_split1(i8 addrspace(1)* %p) { +; GFX9-LABEL: global_inst_salu_offset_64bit_12bit_neg_high_split1: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX9-NEXT: v_bfrev_b32_e32 v1, 1 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, s0 +; GFX9-NEXT: v_mov_b32_e32 v2, s1 +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x2000, v0 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc +; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:-4096 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: global_store_byte v[0:1], v0, off +; GFX9-NEXT: s_endpgm +; +; GFX10-LABEL: global_inst_salu_offset_64bit_12bit_neg_high_split1: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-NEXT: v_mov_b32_e32 v1, s1 +; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x1000, s0 +; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo +; GFX10-NEXT: global_load_ubyte v0, v[0:1], off +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: global_store_byte v[0:1], v0, off +; GFX10-NEXT: s_endpgm + %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854771712 + %load = load volatile i8, i8 addrspace(1)* %gep, align 1 + store i8 %load, i8 addrspace(1)* undef + ret void +} + +; Fill 13-bit low-bits, negative high bits (1ull << 63) | 8191 +define amdgpu_kernel void @global_inst_salu_offset_64bit_13bit_neg_high_split0(i8 addrspace(1)* %p) { +; GFX9-LABEL: global_inst_salu_offset_64bit_13bit_neg_high_split0: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX9-NEXT: v_bfrev_b32_e32 v1, 1 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, s0 +; GFX9-NEXT: v_mov_b32_e32 v2, s1 +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x2000, v0 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc +; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:-1 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: global_store_byte v[0:1], v0, off +; GFX9-NEXT: s_endpgm +; +; GFX10-LABEL: global_inst_salu_offset_64bit_13bit_neg_high_split0: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-NEXT: v_mov_b32_e32 v1, s1 +; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x2000, s0 +; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo +; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:-1 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: global_store_byte v[0:1], v0, off +; GFX10-NEXT: s_endpgm + %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854767617 + %load = load volatile i8, i8 addrspace(1)* %gep, align 1 + store i8 %load, i8 addrspace(1)* undef + ret void +} + +; Fill 13-bit low-bits, negative high bits (1ull << 63) | 8192 +define amdgpu_kernel void @global_inst_salu_offset_64bit_13bit_neg_high_split1(i8 addrspace(1)* %p) { +; GFX9-LABEL: global_inst_salu_offset_64bit_13bit_neg_high_split1: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX9-NEXT: v_bfrev_b32_e32 v1, 1 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, s0 +; GFX9-NEXT: v_mov_b32_e32 v2, s1 +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x2000, v0 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc +; GFX9-NEXT: global_load_ubyte v0, v[0:1], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: global_store_byte v[0:1], v0, off +; GFX9-NEXT: s_endpgm +; +; GFX10-LABEL: global_inst_salu_offset_64bit_13bit_neg_high_split1: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-NEXT: v_mov_b32_e32 v1, s1 +; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x2000, s0 +; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo +; GFX10-NEXT: global_load_ubyte v0, v[0:1], off +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: global_store_byte v[0:1], v0, off +; GFX10-NEXT: s_endpgm + %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854767616 + %load = load volatile i8, i8 addrspace(1)* %gep, align 1 + store i8 %load, i8 addrspace(1)* undef + ret void +} diff --git a/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll b/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll index a9fc318ce0e11..14e65fe0ee6ca 100644 --- a/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll +++ b/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll @@ -14,14 +14,15 @@ define amdgpu_kernel void @clmem_read_simplified(i8 addrspace(1)* %buffer) { ; GFX8: flat_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] ; GFX8: flat_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] ; -; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off +; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}} ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:2048 ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-4096 -; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-2048 -; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off +; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}} ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:2048 -; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-2048 -; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off +; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:2048 +; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}} +; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:2048 + entry: %call = tail call i64 @_Z13get_global_idj(i32 0) %conv = and i64 %call, 255 @@ -75,15 +76,15 @@ define hidden amdgpu_kernel void @clmem_read(i8 addrspace(1)* %buffer) { ; GFX8: flat_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] ; GFX8: flat_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] ; -; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-4096 ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-2048 -; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off -; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-2048 -; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off +; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}} ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:2048 ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-4096 ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-2048 +; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}} +; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:2048 +; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-4096 ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off entry: @@ -184,7 +185,7 @@ while.end: ; preds = %while.cond.loopexit } ; using 32bit address. -define amdgpu_kernel void @Address32(i8 addrspace(1)* %buffer) { +define amdgpu_kernel void @Address32(i8 addrspace(1)* %buffer) { ; GCN-LABEL: Address32: ; GFX8: flat_load_dword {{v[0-9]+}}, v[{{[0-9]+:[0-9]+}}] ; GFX8: flat_load_dword {{v[0-9]+}}, v[{{[0-9]+:[0-9]+}}] @@ -197,16 +198,16 @@ define amdgpu_kernel void @Address32(i8 addrspace(1)* %buffer) { ; GFX8: flat_load_dword {{v[0-9]+}}, v[{{[0-9]+:[0-9]+}}] ; GFX8: flat_load_dword {{v[0-9]+}}, v[{{[0-9]+:[0-9]+}}] ; -; GFX9: global_load_dword {{v[0-9]+}}, v[{{[0-9]+:[0-9]+}}], off +; GFX9: global_load_dword {{v[0-9]+}}, v[{{[0-9]+:[0-9]+}}], off offset:1024 +; GFX9: global_load_dword {{v[0-9]+}}, v[{{[0-9]+:[0-9]+}}], off{{$}} ; GFX9: global_load_dword {{v[0-9]+}}, v[{{[0-9]+:[0-9]+}}], off offset:1024 ; GFX9: global_load_dword {{v[0-9]+}}, v[{{[0-9]+:[0-9]+}}], off offset:2048 ; GFX9: global_load_dword {{v[0-9]+}}, v[{{[0-9]+:[0-9]+}}], off offset:3072 -; GFX9: global_load_dword {{v[0-9]+}}, v[{{[0-9]+:[0-9]+}}], off -; GFX9: global_load_dword {{v[0-9]+}}, v[{{[0-9]+:[0-9]+}}], off offset:-4096 -; GFX9: global_load_dword {{v[0-9]+}}, v[{{[0-9]+:[0-9]+}}], off offset:-3072 -; GFX9: global_load_dword {{v[0-9]+}}, v[{{[0-9]+:[0-9]+}}], off offset:-2048 -; GFX9: global_load_dword {{v[0-9]+}}, v[{{[0-9]+:[0-9]+}}], off offset:-1024 -; GFX9: global_load_dword {{v[0-9]+}}, v[{{[0-9]+:[0-9]+}}], off +; GFX9: global_load_dword {{v[0-9]+}}, v[{{[0-9]+:[0-9]+}}], off{{$}} +; GFX9: global_load_dword {{v[0-9]+}}, v[{{[0-9]+:[0-9]+}}], off offset:1024 +; GFX9: global_load_dword {{v[0-9]+}}, v[{{[0-9]+:[0-9]+}}], off offset:2048 +; GFX9: global_load_dword {{v[0-9]+}}, v[{{[0-9]+:[0-9]+}}], off offset:3072 +; GFX9: global_load_dword {{v[0-9]+}}, v[{{[0-9]+:[0-9]+}}], off{{$}} entry: %call = tail call i64 @_Z13get_global_idj(i32 0) %conv = and i64 %call, 255 @@ -265,10 +266,10 @@ define amdgpu_kernel void @Offset64(i8 addrspace(1)* %buffer) { ; GFX8: flat_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] ; GFX8: flat_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] ; -; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off +; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:2048 +; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}} ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-4096 -; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-2048 -; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off +; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}} entry: %call = tail call i64 @_Z13get_global_idj(i32 0) %conv = and i64 %call, 255 @@ -306,10 +307,10 @@ define amdgpu_kernel void @p32Offset64(i8 addrspace(1)* %buffer) { ; GFX8: flat_load_dword {{v[0-9]+}}, v[{{[0-9]+:[0-9]+}}] ; GFX8: flat_load_dword {{v[0-9]+}}, v[{{[0-9]+:[0-9]+}}] ; -; GFX9: global_load_dword {{v[0-9]+}}, v[{{[0-9]+:[0-9]+}}], off -; GFX9: global_load_dword {{v[0-9]+}}, v[{{[0-9]+:[0-9]+}}], off -; GFX9: global_load_dword {{v[0-9]+}}, v[{{[0-9]+:[0-9]+}}], off offset:-1024 -; GFX9: global_load_dword {{v[0-9]+}}, v[{{[0-9]+:[0-9]+}}], off +; GFX9: global_load_dword {{v[0-9]+}}, v[{{[0-9]+:[0-9]+}}], off offset:2048 +; GFX9: global_load_dword {{v[0-9]+}}, v[{{[0-9]+:[0-9]+}}], off offset:3072 +; GFX9: global_load_dword {{v[0-9]+}}, v[{{[0-9]+:[0-9]+}}], off{{$}} +; GFX9: global_load_dword {{v[0-9]+}}, v[{{[0-9]+:[0-9]+}}], off{{$}} entry: %call = tail call i64 @_Z13get_global_idj(i32 0) %conv = and i64 %call, 255 @@ -347,13 +348,13 @@ define amdgpu_kernel void @DiffBase(i8 addrspace(1)* %buffer1, ; GFX8: flat_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] ; GFX8: flat_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] ; GFX8: flat_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] -; -; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-4096 -; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-2048 + +; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:2048 +; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:2048 +; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}} +; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:2048 ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-4096 -; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-2048 -; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off -; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off +; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}} i8 addrspace(1)* %buffer2) { entry: %call = tail call i64 @_Z13get_global_idj(i32 0) @@ -403,13 +404,13 @@ define amdgpu_kernel void @ReverseOrder(i8 addrspace(1)* %buffer) { ; GFX8: flat_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] ; GFX8: flat_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] ; -; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-2048 -; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off +; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}} ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:2048 -; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off +; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}} ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:2048 -; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-4096 -; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off +; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}} +; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:2048 +; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}} ; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:2048 entry: %call = tail call i64 @_Z13get_global_idj(i32 0) diff --git a/llvm/test/CodeGen/AMDGPU/store-hi16.ll b/llvm/test/CodeGen/AMDGPU/store-hi16.ll index 3791337446c22..e8d6b24efd3eb 100644 --- a/llvm/test/CodeGen/AMDGPU/store-hi16.ll +++ b/llvm/test/CodeGen/AMDGPU/store-hi16.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -mcpu=gfx900 -amdgpu-sroa=0 -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,GFX900 %s -; RUN: llc -march=amdgcn -mcpu=gfx906 -amdgpu-sroa=0 -mattr=-promote-alloca,+sram-ecc -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,GFX906,NO-D16-HI %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -amdgpu-sroa=0 -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,GFX900,GFX9 %s +; RUN: llc -march=amdgcn -mcpu=gfx906 -amdgpu-sroa=0 -mattr=-promote-alloca,+sram-ecc -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,GFX906,GFX9,NO-D16-HI %s ; RUN: llc -march=amdgcn -mcpu=fiji -amdgpu-sroa=0 -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,GFX803,NO-D16-HI %s ; GCN-LABEL: {{^}}store_global_hi_v2i16: @@ -311,16 +311,16 @@ entry: ; GCN-LABEL: {{^}}store_flat_hi_v2i16_neg_offset: ; GCN: s_waitcnt -; GCN: v_add{{(_co)?}}_{{i|u}}32_e32 - +; GFX803: v_add{{(_co)?}}_{{i|u}}32_e32 ; GFX803: v_addc_u32_e32 -; GFX900: v_addc_co_u32_e32 -; GFX906-NEXT: v_lshrrev_b32_e32 -; GFX906-NEXT: v_addc_co_u32_e32 -; GFX906: flat_store_short v[0:1], v2 +; GFX9-DAG: v_add_co_u32_e32 v{{[0-9]+}}, vcc, 0xfffff000, v +; GFX9-DAG: v_addc_co_u32_e32 v{{[0-9]+}}, vcc, -1, v -; GFX900-NEXT: flat_store_short_d16_hi v[0:1], v2{{$}} +; GFX906-DAG: v_lshrrev_b32_e32 +; GFX906: flat_store_short v[0:1], v2 offset:2050{{$}} + +; GFX900-NEXT: flat_store_short_d16_hi v[0:1], v2 offset:2050{{$}} ; GFX803: flat_store_short v[0:1], v2{{$}} ; GCN-NEXT: s_waitcnt ; GCN-NEXT: s_setpc_b64 @@ -359,17 +359,17 @@ entry: ; GCN-LABEL: {{^}}store_flat_hi_v2i16_i8_neg_offset: ; GCN: s_waitcnt -; GCN-DAG: v_add{{(_co)?}}_{{i|u}}32_e32 +; GFX803-DAG: v_add_u32_e32 ; GFX803-DAG: v_addc_u32_e32 -; GFX900-DAG: v_addc_co_u32_e32 -; GFX906-DAG: v_add_co_u32_e32 -; GFX900-NEXT: flat_store_byte_d16_hi v[0:1], v2{{$}} +; GFX9-DAG: v_add_co_u32_e32 v{{[0-9]+}}, vcc, 0xfffff000, v +; GFX9-DAG: v_addc_co_u32_e32 v{{[0-9]+}}, vcc, -1, v{{[0-9]+}}, vcc -; GFX906-NEXT: v_lshrrev_b32_e32 v2, 16, v2 -; GFX906-NEXT: v_addc_co_u32_e32 -; GFX906-NEXT: flat_store_byte v[0:1], v2{{$}} +; GFX900-NEXT: flat_store_byte_d16_hi v[0:1], v2 offset:1{{$}} + +; GFX906-DAG: v_lshrrev_b32_e32 v2, 16, v2 +; GFX906: flat_store_byte v[0:1], v2 offset:1{{$}} ; GFX803-DAG: v_lshrrev_b32_e32 v2, 16, v2 ; GFX803: flat_store_byte v[0:1], v2{{$}} diff --git a/llvm/test/CodeGen/AMDGPU/unsupported-calls.ll b/llvm/test/CodeGen/AMDGPU/unsupported-calls.ll index 303a0d6a1140d..3cff62735a102 100644 --- a/llvm/test/CodeGen/AMDGPU/unsupported-calls.ll +++ b/llvm/test/CodeGen/AMDGPU/unsupported-calls.ll @@ -74,3 +74,13 @@ define amdgpu_ps i32 @test_call_from_shader() { %call = call i32 @defined_function(i32 0) ret i32 %call } + +; FIXME: Bad error message +; GCN: error: :0:0: in function test_call_absolute void (): unsupported indirect call to function +; R600: error: :0:0: in function test_call_absolute void (): unsupported call to function +define amdgpu_kernel void @test_call_absolute() #0 { + %val = call i32 inttoptr (i64 1234 to i32(i32)*) (i32 1) + %op = add i32 %val, 1 + store volatile i32 %op, i32 addrspace(1)* undef + ret void +} diff --git a/llvm/test/CodeGen/ARM/qdadd.ll b/llvm/test/CodeGen/ARM/qdadd.ll new file mode 100644 index 0000000000000..94442ca93afad --- /dev/null +++ b/llvm/test/CodeGen/ARM/qdadd.ll @@ -0,0 +1,186 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=thumbv7m-none-eabi | FileCheck %s --check-prefix=CHECK-T2 --check-prefix=CHECK-T2NODSP +; RUN: llc < %s -mtriple=thumbv7em-none-eabi | FileCheck %s --check-prefix=CHECK-T2 --check-prefix=CHECK-T2DSP +; RUN: llc < %s -mtriple=armv5te-none-none-eabi | FileCheck %s --check-prefix=CHECK-ARM --check-prefix=CHECK-ARM6 +; RUN: llc < %s -mtriple=armv8a-none-eabi | FileCheck %s --check-prefix=CHECK-ARM --check-prefix=CHECK-ARM8 + +define i32 @qdadd(i32 %x, i32 %y) nounwind { +; CHECK-T2NODSP-LABEL: qdadd: +; CHECK-T2NODSP: @ %bb.0: +; CHECK-T2NODSP-NEXT: .save {r7, lr} +; CHECK-T2NODSP-NEXT: push {r7, lr} +; CHECK-T2NODSP-NEXT: movs r3, #0 +; CHECK-T2NODSP-NEXT: adds.w r12, r0, r0 +; CHECK-T2NODSP-NEXT: it mi +; CHECK-T2NODSP-NEXT: movmi r3, #1 +; CHECK-T2NODSP-NEXT: cmp r3, #0 +; CHECK-T2NODSP-NEXT: mov.w r3, #-2147483648 +; CHECK-T2NODSP-NEXT: mov.w lr, #0 +; CHECK-T2NODSP-NEXT: it ne +; CHECK-T2NODSP-NEXT: mvnne r3, #-2147483648 +; CHECK-T2NODSP-NEXT: cmp r12, r0 +; CHECK-T2NODSP-NEXT: it vc +; CHECK-T2NODSP-NEXT: movvc r3, r12 +; CHECK-T2NODSP-NEXT: adds r0, r3, r1 +; CHECK-T2NODSP-NEXT: mov.w r2, #-2147483648 +; CHECK-T2NODSP-NEXT: it mi +; CHECK-T2NODSP-NEXT: movmi.w lr, #1 +; CHECK-T2NODSP-NEXT: cmp.w lr, #0 +; CHECK-T2NODSP-NEXT: it ne +; CHECK-T2NODSP-NEXT: mvnne r2, #-2147483648 +; CHECK-T2NODSP-NEXT: cmp r0, r3 +; CHECK-T2NODSP-NEXT: it vc +; CHECK-T2NODSP-NEXT: movvc r2, r0 +; CHECK-T2NODSP-NEXT: mov r0, r2 +; CHECK-T2NODSP-NEXT: pop {r7, pc} +; +; CHECK-T2DSP-LABEL: qdadd: +; CHECK-T2DSP: @ %bb.0: +; CHECK-T2DSP-NEXT: qdadd r0, r0, r1 +; CHECK-T2DSP-NEXT: bx lr +; +; CHECK-ARM-LABEL: qdadd: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: qdadd r0, r0, r1 +; CHECK-ARM-NEXT: bx lr + %z = call i32 @llvm.sadd.sat.i32(i32 %x, i32 %x) + %tmp = call i32 @llvm.sadd.sat.i32(i32 %z, i32 %y) + ret i32 %tmp +} + +define i32 @qdadd_c(i32 %x, i32 %y) nounwind { +; CHECK-T2NODSP-LABEL: qdadd_c: +; CHECK-T2NODSP: @ %bb.0: +; CHECK-T2NODSP-NEXT: .save {r7, lr} +; CHECK-T2NODSP-NEXT: push {r7, lr} +; CHECK-T2NODSP-NEXT: movs r3, #0 +; CHECK-T2NODSP-NEXT: adds.w r12, r0, r0 +; CHECK-T2NODSP-NEXT: it mi +; CHECK-T2NODSP-NEXT: movmi r3, #1 +; CHECK-T2NODSP-NEXT: cmp r3, #0 +; CHECK-T2NODSP-NEXT: mov.w r3, #-2147483648 +; CHECK-T2NODSP-NEXT: mov.w lr, #0 +; CHECK-T2NODSP-NEXT: it ne +; CHECK-T2NODSP-NEXT: mvnne r3, #-2147483648 +; CHECK-T2NODSP-NEXT: cmp r12, r0 +; CHECK-T2NODSP-NEXT: it vc +; CHECK-T2NODSP-NEXT: movvc r3, r12 +; CHECK-T2NODSP-NEXT: adds r0, r1, r3 +; CHECK-T2NODSP-NEXT: mov.w r2, #-2147483648 +; CHECK-T2NODSP-NEXT: it mi +; CHECK-T2NODSP-NEXT: movmi.w lr, #1 +; CHECK-T2NODSP-NEXT: cmp.w lr, #0 +; CHECK-T2NODSP-NEXT: it ne +; CHECK-T2NODSP-NEXT: mvnne r2, #-2147483648 +; CHECK-T2NODSP-NEXT: cmp r0, r1 +; CHECK-T2NODSP-NEXT: it vc +; CHECK-T2NODSP-NEXT: movvc r2, r0 +; CHECK-T2NODSP-NEXT: mov r0, r2 +; CHECK-T2NODSP-NEXT: pop {r7, pc} +; +; CHECK-T2DSP-LABEL: qdadd_c: +; CHECK-T2DSP: @ %bb.0: +; CHECK-T2DSP-NEXT: qdadd r0, r0, r1 +; CHECK-T2DSP-NEXT: bx lr +; +; CHECK-ARM-LABEL: qdadd_c: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: qdadd r0, r0, r1 +; CHECK-ARM-NEXT: bx lr + %z = call i32 @llvm.sadd.sat.i32(i32 %x, i32 %x) + %tmp = call i32 @llvm.sadd.sat.i32(i32 %y, i32 %z) + ret i32 %tmp +} + +define i32 @qdsub(i32 %x, i32 %y) nounwind { +; CHECK-T2NODSP-LABEL: qdsub: +; CHECK-T2NODSP: @ %bb.0: +; CHECK-T2NODSP-NEXT: .save {r7, lr} +; CHECK-T2NODSP-NEXT: push {r7, lr} +; CHECK-T2NODSP-NEXT: movs r3, #0 +; CHECK-T2NODSP-NEXT: adds.w r12, r0, r0 +; CHECK-T2NODSP-NEXT: it mi +; CHECK-T2NODSP-NEXT: movmi r3, #1 +; CHECK-T2NODSP-NEXT: cmp r3, #0 +; CHECK-T2NODSP-NEXT: mov.w r3, #-2147483648 +; CHECK-T2NODSP-NEXT: mov.w lr, #0 +; CHECK-T2NODSP-NEXT: it ne +; CHECK-T2NODSP-NEXT: mvnne r3, #-2147483648 +; CHECK-T2NODSP-NEXT: cmp r12, r0 +; CHECK-T2NODSP-NEXT: it vc +; CHECK-T2NODSP-NEXT: movvc r3, r12 +; CHECK-T2NODSP-NEXT: subs r0, r1, r3 +; CHECK-T2NODSP-NEXT: mov.w r2, #-2147483648 +; CHECK-T2NODSP-NEXT: it mi +; CHECK-T2NODSP-NEXT: movmi.w lr, #1 +; CHECK-T2NODSP-NEXT: cmp.w lr, #0 +; CHECK-T2NODSP-NEXT: it ne +; CHECK-T2NODSP-NEXT: mvnne r2, #-2147483648 +; CHECK-T2NODSP-NEXT: cmp r1, r3 +; CHECK-T2NODSP-NEXT: it vc +; CHECK-T2NODSP-NEXT: movvc r2, r0 +; CHECK-T2NODSP-NEXT: mov r0, r2 +; CHECK-T2NODSP-NEXT: pop {r7, pc} +; +; CHECK-T2DSP-LABEL: qdsub: +; CHECK-T2DSP: @ %bb.0: +; CHECK-T2DSP-NEXT: qdsub r0, r1, r0 +; CHECK-T2DSP-NEXT: bx lr +; +; CHECK-ARM-LABEL: qdsub: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: qdsub r0, r1, r0 +; CHECK-ARM-NEXT: bx lr + %z = call i32 @llvm.sadd.sat.i32(i32 %x, i32 %x) + %tmp = call i32 @llvm.ssub.sat.i32(i32 %y, i32 %z) + ret i32 %tmp +} + +define i32 @qdsub_c(i32 %x, i32 %y) nounwind { +; CHECK-T2NODSP-LABEL: qdsub_c: +; CHECK-T2NODSP: @ %bb.0: +; CHECK-T2NODSP-NEXT: .save {r7, lr} +; CHECK-T2NODSP-NEXT: push {r7, lr} +; CHECK-T2NODSP-NEXT: movs r3, #0 +; CHECK-T2NODSP-NEXT: adds.w r12, r0, r0 +; CHECK-T2NODSP-NEXT: it mi +; CHECK-T2NODSP-NEXT: movmi r3, #1 +; CHECK-T2NODSP-NEXT: cmp r3, #0 +; CHECK-T2NODSP-NEXT: mov.w r3, #-2147483648 +; CHECK-T2NODSP-NEXT: mov.w lr, #0 +; CHECK-T2NODSP-NEXT: it ne +; CHECK-T2NODSP-NEXT: mvnne r3, #-2147483648 +; CHECK-T2NODSP-NEXT: cmp r12, r0 +; CHECK-T2NODSP-NEXT: it vc +; CHECK-T2NODSP-NEXT: movvc r3, r12 +; CHECK-T2NODSP-NEXT: subs r0, r3, r1 +; CHECK-T2NODSP-NEXT: mov.w r2, #-2147483648 +; CHECK-T2NODSP-NEXT: it mi +; CHECK-T2NODSP-NEXT: movmi.w lr, #1 +; CHECK-T2NODSP-NEXT: cmp.w lr, #0 +; CHECK-T2NODSP-NEXT: it ne +; CHECK-T2NODSP-NEXT: mvnne r2, #-2147483648 +; CHECK-T2NODSP-NEXT: cmp r3, r1 +; CHECK-T2NODSP-NEXT: it vc +; CHECK-T2NODSP-NEXT: movvc r2, r0 +; CHECK-T2NODSP-NEXT: mov r0, r2 +; CHECK-T2NODSP-NEXT: pop {r7, pc} +; +; CHECK-T2DSP-LABEL: qdsub_c: +; CHECK-T2DSP: @ %bb.0: +; CHECK-T2DSP-NEXT: qadd r0, r0, r0 +; CHECK-T2DSP-NEXT: qsub r0, r0, r1 +; CHECK-T2DSP-NEXT: bx lr +; +; CHECK-ARM-LABEL: qdsub_c: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: qadd r0, r0, r0 +; CHECK-ARM-NEXT: qsub r0, r0, r1 +; CHECK-ARM-NEXT: bx lr + %z = call i32 @llvm.sadd.sat.i32(i32 %x, i32 %x) + %tmp = call i32 @llvm.ssub.sat.i32(i32 %z, i32 %y) + ret i32 %tmp +} + +declare i32 @llvm.sadd.sat.i32(i32, i32) +declare i32 @llvm.ssub.sat.i32(i32, i32) diff --git a/llvm/test/CodeGen/ARM/sadd_sat.ll b/llvm/test/CodeGen/ARM/sadd_sat.ll index e56bd420bb536..386e750ddd1b8 100644 --- a/llvm/test/CodeGen/ARM/sadd_sat.ll +++ b/llvm/test/CodeGen/ARM/sadd_sat.ll @@ -2,7 +2,9 @@ ; RUN: llc < %s -mtriple=thumbv6m-none-eabi | FileCheck %s --check-prefix=CHECK-T1 ; RUN: llc < %s -mtriple=thumbv7m-none-eabi | FileCheck %s --check-prefix=CHECK-T2 --check-prefix=CHECK-T2NODSP ; RUN: llc < %s -mtriple=thumbv7em-none-eabi | FileCheck %s --check-prefix=CHECK-T2 --check-prefix=CHECK-T2DSP -; RUN: llc < %s -mtriple=armv8a-none-eabi | FileCheck %s --check-prefix=CHECK-ARM +; RUN: llc < %s -mtriple=armv5t-none-eabi | FileCheck %s --check-prefix=CHECK-ARM --check-prefix=CHECK-ARMNODPS +; RUN: llc < %s -mtriple=armv5te-none-eabi | FileCheck %s --check-prefix=CHECK-ARM --check-prefix=CHECK-ARMBASEDSP +; RUN: llc < %s -mtriple=armv6-none-eabi | FileCheck %s --check-prefix=CHECK-ARM --check-prefix=CHECK-ARMDSP declare i4 @llvm.sadd.sat.i4(i4, i4) declare i8 @llvm.sadd.sat.i8(i8, i8) @@ -41,34 +43,49 @@ define i32 @func(i32 %x, i32 %y) nounwind { ; CHECK-T1-NEXT: .LCPI0_0: ; CHECK-T1-NEXT: .long 2147483647 @ 0x7fffffff ; -; CHECK-T2-LABEL: func: -; CHECK-T2: @ %bb.0: -; CHECK-T2-NEXT: adds r2, r0, r1 -; CHECK-T2-NEXT: mov.w r3, #0 -; CHECK-T2-NEXT: mov.w r1, #-2147483648 -; CHECK-T2-NEXT: it mi -; CHECK-T2-NEXT: movmi r3, #1 -; CHECK-T2-NEXT: cmp r3, #0 -; CHECK-T2-NEXT: it ne -; CHECK-T2-NEXT: mvnne r1, #-2147483648 -; CHECK-T2-NEXT: cmp r2, r0 -; CHECK-T2-NEXT: it vc -; CHECK-T2-NEXT: movvc r1, r2 -; CHECK-T2-NEXT: mov r0, r1 -; CHECK-T2-NEXT: bx lr +; CHECK-T2NODSP-LABEL: func: +; CHECK-T2NODSP: @ %bb.0: +; CHECK-T2NODSP-NEXT: adds r2, r0, r1 +; CHECK-T2NODSP-NEXT: mov.w r3, #0 +; CHECK-T2NODSP-NEXT: mov.w r1, #-2147483648 +; CHECK-T2NODSP-NEXT: it mi +; CHECK-T2NODSP-NEXT: movmi r3, #1 +; CHECK-T2NODSP-NEXT: cmp r3, #0 +; CHECK-T2NODSP-NEXT: it ne +; CHECK-T2NODSP-NEXT: mvnne r1, #-2147483648 +; CHECK-T2NODSP-NEXT: cmp r2, r0 +; CHECK-T2NODSP-NEXT: it vc +; CHECK-T2NODSP-NEXT: movvc r1, r2 +; CHECK-T2NODSP-NEXT: mov r0, r1 +; CHECK-T2NODSP-NEXT: bx lr ; -; CHECK-ARM-LABEL: func: -; CHECK-ARM: @ %bb.0: -; CHECK-ARM-NEXT: adds r2, r0, r1 -; CHECK-ARM-NEXT: mov r3, #0 -; CHECK-ARM-NEXT: movwmi r3, #1 -; CHECK-ARM-NEXT: mov r1, #-2147483648 -; CHECK-ARM-NEXT: cmp r3, #0 -; CHECK-ARM-NEXT: mvnne r1, #-2147483648 -; CHECK-ARM-NEXT: cmp r2, r0 -; CHECK-ARM-NEXT: movvc r1, r2 -; CHECK-ARM-NEXT: mov r0, r1 -; CHECK-ARM-NEXT: bx lr +; CHECK-T2DSP-LABEL: func: +; CHECK-T2DSP: @ %bb.0: +; CHECK-T2DSP-NEXT: qadd r0, r0, r1 +; CHECK-T2DSP-NEXT: bx lr +; +; CHECK-ARMNODPS-LABEL: func: +; CHECK-ARMNODPS: @ %bb.0: +; CHECK-ARMNODPS-NEXT: adds r2, r0, r1 +; CHECK-ARMNODPS-NEXT: mov r3, #0 +; CHECK-ARMNODPS-NEXT: movmi r3, #1 +; CHECK-ARMNODPS-NEXT: mov r1, #-2147483648 +; CHECK-ARMNODPS-NEXT: cmp r3, #0 +; CHECK-ARMNODPS-NEXT: mvnne r1, #-2147483648 +; CHECK-ARMNODPS-NEXT: cmp r2, r0 +; CHECK-ARMNODPS-NEXT: movvc r1, r2 +; CHECK-ARMNODPS-NEXT: mov r0, r1 +; CHECK-ARMNODPS-NEXT: bx lr +; +; CHECK-ARMBASEDSP-LABEL: func: +; CHECK-ARMBASEDSP: @ %bb.0: +; CHECK-ARMBASEDSP-NEXT: qadd r0, r0, r1 +; CHECK-ARMBASEDSP-NEXT: bx lr +; +; CHECK-ARMDSP-LABEL: func: +; CHECK-ARMDSP: @ %bb.0: +; CHECK-ARMDSP-NEXT: qadd r0, r0, r1 +; CHECK-ARMDSP-NEXT: bx lr %tmp = call i32 @llvm.sadd.sat.i32(i32 %x, i32 %y) ret i32 %tmp } @@ -179,29 +196,29 @@ define i64 @func2(i64 %x, i64 %y) nounwind { ; CHECK-ARM: @ %bb.0: ; CHECK-ARM-NEXT: .save {r11, lr} ; CHECK-ARM-NEXT: push {r11, lr} -; CHECK-ARM-NEXT: adds r0, r0, r2 -; CHECK-ARM-NEXT: mov r2, #0 -; CHECK-ARM-NEXT: adc r12, r1, r3 ; CHECK-ARM-NEXT: cmn r1, #1 -; CHECK-ARM-NEXT: mov r1, #0 ; CHECK-ARM-NEXT: mov lr, #0 -; CHECK-ARM-NEXT: movwgt r1, #1 -; CHECK-ARM-NEXT: cmn r12, #1 -; CHECK-ARM-NEXT: movwgt r2, #1 -; CHECK-ARM-NEXT: subs r2, r1, r2 -; CHECK-ARM-NEXT: movwne r2, #1 +; CHECK-ARM-NEXT: movgt lr, #1 +; CHECK-ARM-NEXT: adds r0, r0, r2 +; CHECK-ARM-NEXT: adc r2, r1, r3 +; CHECK-ARM-NEXT: mov r1, #0 +; CHECK-ARM-NEXT: cmn r2, #1 +; CHECK-ARM-NEXT: mov r12, #0 +; CHECK-ARM-NEXT: movgt r1, #1 +; CHECK-ARM-NEXT: subs r1, lr, r1 +; CHECK-ARM-NEXT: movne r1, #1 ; CHECK-ARM-NEXT: cmn r3, #1 -; CHECK-ARM-NEXT: movwgt lr, #1 -; CHECK-ARM-NEXT: sub r1, r1, lr -; CHECK-ARM-NEXT: clz r1, r1 -; CHECK-ARM-NEXT: lsr r1, r1, #5 -; CHECK-ARM-NEXT: ands r2, r1, r2 -; CHECK-ARM-NEXT: asrne r0, r12, #31 +; CHECK-ARM-NEXT: movgt r12, #1 +; CHECK-ARM-NEXT: sub r3, lr, r12 +; CHECK-ARM-NEXT: clz r3, r3 +; CHECK-ARM-NEXT: lsr r3, r3, #5 +; CHECK-ARM-NEXT: ands r3, r3, r1 ; CHECK-ARM-NEXT: mov r1, #-2147483648 -; CHECK-ARM-NEXT: cmp r12, #0 -; CHECK-ARM-NEXT: mvnmi r1, #-2147483648 +; CHECK-ARM-NEXT: asrne r0, r2, #31 ; CHECK-ARM-NEXT: cmp r2, #0 -; CHECK-ARM-NEXT: moveq r1, r12 +; CHECK-ARM-NEXT: mvnmi r1, #-2147483648 +; CHECK-ARM-NEXT: cmp r3, #0 +; CHECK-ARM-NEXT: moveq r1, r2 ; CHECK-ARM-NEXT: pop {r11, pc} %tmp = call i64 @llvm.sadd.sat.i64(i64 %x, i64 %y) ret i64 %tmp @@ -231,31 +248,55 @@ define signext i16 @func16(i16 signext %x, i16 signext %y) nounwind { ; CHECK-T1-NEXT: .LCPI2_1: ; CHECK-T1-NEXT: .long 4294934528 @ 0xffff8000 ; -; CHECK-T2-LABEL: func16: -; CHECK-T2: @ %bb.0: -; CHECK-T2-NEXT: add r0, r1 -; CHECK-T2-NEXT: movw r1, #32767 -; CHECK-T2-NEXT: cmp r0, r1 -; CHECK-T2-NEXT: it lt -; CHECK-T2-NEXT: movlt r1, r0 -; CHECK-T2-NEXT: movw r0, #32768 -; CHECK-T2-NEXT: cmn.w r1, #32768 -; CHECK-T2-NEXT: movt r0, #65535 -; CHECK-T2-NEXT: it gt -; CHECK-T2-NEXT: movgt r0, r1 -; CHECK-T2-NEXT: bx lr +; CHECK-T2NODSP-LABEL: func16: +; CHECK-T2NODSP: @ %bb.0: +; CHECK-T2NODSP-NEXT: add r0, r1 +; CHECK-T2NODSP-NEXT: movw r1, #32767 +; CHECK-T2NODSP-NEXT: cmp r0, r1 +; CHECK-T2NODSP-NEXT: it lt +; CHECK-T2NODSP-NEXT: movlt r1, r0 +; CHECK-T2NODSP-NEXT: movw r0, #32768 +; CHECK-T2NODSP-NEXT: cmn.w r1, #32768 +; CHECK-T2NODSP-NEXT: movt r0, #65535 +; CHECK-T2NODSP-NEXT: it gt +; CHECK-T2NODSP-NEXT: movgt r0, r1 +; CHECK-T2NODSP-NEXT: bx lr ; -; CHECK-ARM-LABEL: func16: -; CHECK-ARM: @ %bb.0: -; CHECK-ARM-NEXT: add r0, r0, r1 -; CHECK-ARM-NEXT: movw r1, #32767 -; CHECK-ARM-NEXT: cmp r0, r1 -; CHECK-ARM-NEXT: movlt r1, r0 -; CHECK-ARM-NEXT: movw r0, #32768 -; CHECK-ARM-NEXT: movt r0, #65535 -; CHECK-ARM-NEXT: cmn r1, #32768 -; CHECK-ARM-NEXT: movgt r0, r1 -; CHECK-ARM-NEXT: bx lr +; CHECK-T2DSP-LABEL: func16: +; CHECK-T2DSP: @ %bb.0: +; CHECK-T2DSP-NEXT: qadd16 r0, r0, r1 +; CHECK-T2DSP-NEXT: sxth r0, r0 +; CHECK-T2DSP-NEXT: bx lr +; +; CHECK-ARMNODPS-LABEL: func16: +; CHECK-ARMNODPS: @ %bb.0: +; CHECK-ARMNODPS-NEXT: add r0, r0, r1 +; CHECK-ARMNODPS-NEXT: mov r1, #255 +; CHECK-ARMNODPS-NEXT: orr r1, r1, #32512 +; CHECK-ARMNODPS-NEXT: cmp r0, r1 +; CHECK-ARMNODPS-NEXT: movlt r1, r0 +; CHECK-ARMNODPS-NEXT: ldr r0, .LCPI2_0 +; CHECK-ARMNODPS-NEXT: cmn r1, #32768 +; CHECK-ARMNODPS-NEXT: movgt r0, r1 +; CHECK-ARMNODPS-NEXT: bx lr +; CHECK-ARMNODPS-NEXT: .p2align 2 +; CHECK-ARMNODPS-NEXT: @ %bb.1: +; CHECK-ARMNODPS-NEXT: .LCPI2_0: +; CHECK-ARMNODPS-NEXT: .long 4294934528 @ 0xffff8000 +; +; CHECK-ARMBASEDSP-LABEL: func16: +; CHECK-ARMBASEDSP: @ %bb.0: +; CHECK-ARMBASEDSP-NEXT: lsl r0, r0, #16 +; CHECK-ARMBASEDSP-NEXT: lsl r1, r1, #16 +; CHECK-ARMBASEDSP-NEXT: qadd r0, r0, r1 +; CHECK-ARMBASEDSP-NEXT: asr r0, r0, #16 +; CHECK-ARMBASEDSP-NEXT: bx lr +; +; CHECK-ARMDSP-LABEL: func16: +; CHECK-ARMDSP: @ %bb.0: +; CHECK-ARMDSP-NEXT: qadd16 r0, r0, r1 +; CHECK-ARMDSP-NEXT: sxth r0, r0 +; CHECK-ARMDSP-NEXT: bx lr %tmp = call i16 @llvm.sadd.sat.i16(i16 %x, i16 %y) ret i16 %tmp } @@ -278,25 +319,45 @@ define signext i8 @func8(i8 signext %x, i8 signext %y) nounwind { ; CHECK-T1-NEXT: .LBB3_4: ; CHECK-T1-NEXT: bx lr ; -; CHECK-T2-LABEL: func8: -; CHECK-T2: @ %bb.0: -; CHECK-T2-NEXT: add r0, r1 -; CHECK-T2-NEXT: cmp r0, #127 -; CHECK-T2-NEXT: it ge -; CHECK-T2-NEXT: movge r0, #127 -; CHECK-T2-NEXT: cmn.w r0, #128 -; CHECK-T2-NEXT: it le -; CHECK-T2-NEXT: mvnle r0, #127 -; CHECK-T2-NEXT: bx lr +; CHECK-T2NODSP-LABEL: func8: +; CHECK-T2NODSP: @ %bb.0: +; CHECK-T2NODSP-NEXT: add r0, r1 +; CHECK-T2NODSP-NEXT: cmp r0, #127 +; CHECK-T2NODSP-NEXT: it ge +; CHECK-T2NODSP-NEXT: movge r0, #127 +; CHECK-T2NODSP-NEXT: cmn.w r0, #128 +; CHECK-T2NODSP-NEXT: it le +; CHECK-T2NODSP-NEXT: mvnle r0, #127 +; CHECK-T2NODSP-NEXT: bx lr ; -; CHECK-ARM-LABEL: func8: -; CHECK-ARM: @ %bb.0: -; CHECK-ARM-NEXT: add r0, r0, r1 -; CHECK-ARM-NEXT: cmp r0, #127 -; CHECK-ARM-NEXT: movge r0, #127 -; CHECK-ARM-NEXT: cmn r0, #128 -; CHECK-ARM-NEXT: mvnle r0, #127 -; CHECK-ARM-NEXT: bx lr +; CHECK-T2DSP-LABEL: func8: +; CHECK-T2DSP: @ %bb.0: +; CHECK-T2DSP-NEXT: qadd8 r0, r0, r1 +; CHECK-T2DSP-NEXT: sxtb r0, r0 +; CHECK-T2DSP-NEXT: bx lr +; +; CHECK-ARMNODPS-LABEL: func8: +; CHECK-ARMNODPS: @ %bb.0: +; CHECK-ARMNODPS-NEXT: add r0, r0, r1 +; CHECK-ARMNODPS-NEXT: cmp r0, #127 +; CHECK-ARMNODPS-NEXT: movge r0, #127 +; CHECK-ARMNODPS-NEXT: cmn r0, #128 +; CHECK-ARMNODPS-NEXT: mvnle r0, #127 +; CHECK-ARMNODPS-NEXT: bx lr +; +; CHECK-ARMBASEDSP-LABEL: func8: +; CHECK-ARMBASEDSP: @ %bb.0: +; CHECK-ARMBASEDSP-NEXT: lsl r0, r0, #24 +; CHECK-ARMBASEDSP-NEXT: lsl r1, r1, #24 +; CHECK-ARMBASEDSP-NEXT: qadd r0, r0, r1 +; CHECK-ARMBASEDSP-NEXT: asr r0, r0, #24 +; CHECK-ARMBASEDSP-NEXT: bx lr +; +; CHECK-ARMDSP-LABEL: func8: +; CHECK-ARMDSP: @ %bb.0: +; CHECK-ARMDSP-NEXT: qadd8 r0, r0, r1 +; CHECK-ARMDSP-NEXT: sxtb r0, r0 +; CHECK-ARMDSP-NEXT: bx lr %tmp = call i8 @llvm.sadd.sat.i8(i8 %x, i8 %y) ret i8 %tmp } @@ -319,25 +380,49 @@ define signext i4 @func3(i4 signext %x, i4 signext %y) nounwind { ; CHECK-T1-NEXT: .LBB4_4: ; CHECK-T1-NEXT: bx lr ; -; CHECK-T2-LABEL: func3: -; CHECK-T2: @ %bb.0: -; CHECK-T2-NEXT: add r0, r1 -; CHECK-T2-NEXT: cmp r0, #7 -; CHECK-T2-NEXT: it ge -; CHECK-T2-NEXT: movge r0, #7 -; CHECK-T2-NEXT: cmn.w r0, #8 -; CHECK-T2-NEXT: it le -; CHECK-T2-NEXT: mvnle r0, #7 -; CHECK-T2-NEXT: bx lr +; CHECK-T2NODSP-LABEL: func3: +; CHECK-T2NODSP: @ %bb.0: +; CHECK-T2NODSP-NEXT: add r0, r1 +; CHECK-T2NODSP-NEXT: cmp r0, #7 +; CHECK-T2NODSP-NEXT: it ge +; CHECK-T2NODSP-NEXT: movge r0, #7 +; CHECK-T2NODSP-NEXT: cmn.w r0, #8 +; CHECK-T2NODSP-NEXT: it le +; CHECK-T2NODSP-NEXT: mvnle r0, #7 +; CHECK-T2NODSP-NEXT: bx lr ; -; CHECK-ARM-LABEL: func3: -; CHECK-ARM: @ %bb.0: -; CHECK-ARM-NEXT: add r0, r0, r1 -; CHECK-ARM-NEXT: cmp r0, #7 -; CHECK-ARM-NEXT: movge r0, #7 -; CHECK-ARM-NEXT: cmn r0, #8 -; CHECK-ARM-NEXT: mvnle r0, #7 -; CHECK-ARM-NEXT: bx lr +; CHECK-T2DSP-LABEL: func3: +; CHECK-T2DSP: @ %bb.0: +; CHECK-T2DSP-NEXT: lsls r1, r1, #28 +; CHECK-T2DSP-NEXT: lsls r0, r0, #28 +; CHECK-T2DSP-NEXT: qadd r0, r0, r1 +; CHECK-T2DSP-NEXT: asrs r0, r0, #28 +; CHECK-T2DSP-NEXT: bx lr +; +; CHECK-ARMNODPS-LABEL: func3: +; CHECK-ARMNODPS: @ %bb.0: +; CHECK-ARMNODPS-NEXT: add r0, r0, r1 +; CHECK-ARMNODPS-NEXT: cmp r0, #7 +; CHECK-ARMNODPS-NEXT: movge r0, #7 +; CHECK-ARMNODPS-NEXT: cmn r0, #8 +; CHECK-ARMNODPS-NEXT: mvnle r0, #7 +; CHECK-ARMNODPS-NEXT: bx lr +; +; CHECK-ARMBASEDSP-LABEL: func3: +; CHECK-ARMBASEDSP: @ %bb.0: +; CHECK-ARMBASEDSP-NEXT: lsl r0, r0, #28 +; CHECK-ARMBASEDSP-NEXT: lsl r1, r1, #28 +; CHECK-ARMBASEDSP-NEXT: qadd r0, r0, r1 +; CHECK-ARMBASEDSP-NEXT: asr r0, r0, #28 +; CHECK-ARMBASEDSP-NEXT: bx lr +; +; CHECK-ARMDSP-LABEL: func3: +; CHECK-ARMDSP: @ %bb.0: +; CHECK-ARMDSP-NEXT: lsl r0, r0, #28 +; CHECK-ARMDSP-NEXT: lsl r1, r1, #28 +; CHECK-ARMDSP-NEXT: qadd r0, r0, r1 +; CHECK-ARMDSP-NEXT: asr r0, r0, #28 +; CHECK-ARMDSP-NEXT: bx lr %tmp = call i4 @llvm.sadd.sat.i4(i4 %x, i4 %y) ret i4 %tmp } diff --git a/llvm/test/CodeGen/ARM/sadd_sat_plus.ll b/llvm/test/CodeGen/ARM/sadd_sat_plus.ll index 94aca12a78b58..041506816a634 100644 --- a/llvm/test/CodeGen/ARM/sadd_sat_plus.ll +++ b/llvm/test/CodeGen/ARM/sadd_sat_plus.ll @@ -42,35 +42,33 @@ define i32 @func32(i32 %x, i32 %y, i32 %z) nounwind { ; CHECK-T1-NEXT: .LCPI0_0: ; CHECK-T1-NEXT: .long 2147483647 @ 0x7fffffff ; -; CHECK-T2-LABEL: func32: -; CHECK-T2: @ %bb.0: -; CHECK-T2-NEXT: mla r2, r1, r2, r0 -; CHECK-T2-NEXT: movs r3, #0 -; CHECK-T2-NEXT: mov.w r1, #-2147483648 -; CHECK-T2-NEXT: cmp r2, #0 -; CHECK-T2-NEXT: it mi -; CHECK-T2-NEXT: movmi r3, #1 -; CHECK-T2-NEXT: cmp r3, #0 -; CHECK-T2-NEXT: it ne -; CHECK-T2-NEXT: mvnne r1, #-2147483648 -; CHECK-T2-NEXT: cmp r2, r0 -; CHECK-T2-NEXT: it vc -; CHECK-T2-NEXT: movvc r1, r2 -; CHECK-T2-NEXT: mov r0, r1 -; CHECK-T2-NEXT: bx lr +; CHECK-T2NODSP-LABEL: func32: +; CHECK-T2NODSP: @ %bb.0: +; CHECK-T2NODSP-NEXT: mla r2, r1, r2, r0 +; CHECK-T2NODSP-NEXT: movs r3, #0 +; CHECK-T2NODSP-NEXT: mov.w r1, #-2147483648 +; CHECK-T2NODSP-NEXT: cmp r2, #0 +; CHECK-T2NODSP-NEXT: it mi +; CHECK-T2NODSP-NEXT: movmi r3, #1 +; CHECK-T2NODSP-NEXT: cmp r3, #0 +; CHECK-T2NODSP-NEXT: it ne +; CHECK-T2NODSP-NEXT: mvnne r1, #-2147483648 +; CHECK-T2NODSP-NEXT: cmp r2, r0 +; CHECK-T2NODSP-NEXT: it vc +; CHECK-T2NODSP-NEXT: movvc r1, r2 +; CHECK-T2NODSP-NEXT: mov r0, r1 +; CHECK-T2NODSP-NEXT: bx lr +; +; CHECK-T2DSP-LABEL: func32: +; CHECK-T2DSP: @ %bb.0: +; CHECK-T2DSP-NEXT: muls r1, r2, r1 +; CHECK-T2DSP-NEXT: qadd r0, r0, r1 +; CHECK-T2DSP-NEXT: bx lr ; ; CHECK-ARM-LABEL: func32: ; CHECK-ARM: @ %bb.0: -; CHECK-ARM-NEXT: mla r2, r1, r2, r0 -; CHECK-ARM-NEXT: mov r3, #0 -; CHECK-ARM-NEXT: mov r1, #-2147483648 -; CHECK-ARM-NEXT: cmp r2, #0 -; CHECK-ARM-NEXT: movwmi r3, #1 -; CHECK-ARM-NEXT: cmp r3, #0 -; CHECK-ARM-NEXT: mvnne r1, #-2147483648 -; CHECK-ARM-NEXT: cmp r2, r0 -; CHECK-ARM-NEXT: movvc r1, r2 -; CHECK-ARM-NEXT: mov r0, r1 +; CHECK-ARM-NEXT: mul r1, r1, r2 +; CHECK-ARM-NEXT: qadd r0, r0, r1 ; CHECK-ARM-NEXT: bx lr %a = mul i32 %y, %z %tmp = call i32 @llvm.sadd.sat.i32(i32 %x, i32 %a) @@ -258,29 +256,15 @@ define signext i16 @func16(i16 signext %x, i16 signext %y, i16 signext %z) nounw ; CHECK-T2DSP-LABEL: func16: ; CHECK-T2DSP: @ %bb.0: ; CHECK-T2DSP-NEXT: muls r1, r2, r1 -; CHECK-T2DSP-NEXT: sxtah r0, r0, r1 -; CHECK-T2DSP-NEXT: movw r1, #32767 -; CHECK-T2DSP-NEXT: cmp r0, r1 -; CHECK-T2DSP-NEXT: it lt -; CHECK-T2DSP-NEXT: movlt r1, r0 -; CHECK-T2DSP-NEXT: movw r0, #32768 -; CHECK-T2DSP-NEXT: cmn.w r1, #32768 -; CHECK-T2DSP-NEXT: movt r0, #65535 -; CHECK-T2DSP-NEXT: it gt -; CHECK-T2DSP-NEXT: movgt r0, r1 +; CHECK-T2DSP-NEXT: qadd16 r0, r0, r1 +; CHECK-T2DSP-NEXT: sxth r0, r0 ; CHECK-T2DSP-NEXT: bx lr ; ; CHECK-ARM-LABEL: func16: ; CHECK-ARM: @ %bb.0: ; CHECK-ARM-NEXT: smulbb r1, r1, r2 -; CHECK-ARM-NEXT: sxtah r0, r0, r1 -; CHECK-ARM-NEXT: movw r1, #32767 -; CHECK-ARM-NEXT: cmp r0, r1 -; CHECK-ARM-NEXT: movlt r1, r0 -; CHECK-ARM-NEXT: movw r0, #32768 -; CHECK-ARM-NEXT: movt r0, #65535 -; CHECK-ARM-NEXT: cmn r1, #32768 -; CHECK-ARM-NEXT: movgt r0, r1 +; CHECK-ARM-NEXT: qadd16 r0, r0, r1 +; CHECK-ARM-NEXT: sxth r0, r0 ; CHECK-ARM-NEXT: bx lr %a = mul i16 %y, %z %tmp = call i16 @llvm.sadd.sat.i16(i16 %x, i16 %a) @@ -323,23 +307,15 @@ define signext i8 @func8(i8 signext %x, i8 signext %y, i8 signext %z) nounwind { ; CHECK-T2DSP-LABEL: func8: ; CHECK-T2DSP: @ %bb.0: ; CHECK-T2DSP-NEXT: muls r1, r2, r1 -; CHECK-T2DSP-NEXT: sxtab r0, r0, r1 -; CHECK-T2DSP-NEXT: cmp r0, #127 -; CHECK-T2DSP-NEXT: it ge -; CHECK-T2DSP-NEXT: movge r0, #127 -; CHECK-T2DSP-NEXT: cmn.w r0, #128 -; CHECK-T2DSP-NEXT: it le -; CHECK-T2DSP-NEXT: mvnle r0, #127 +; CHECK-T2DSP-NEXT: qadd8 r0, r0, r1 +; CHECK-T2DSP-NEXT: sxtb r0, r0 ; CHECK-T2DSP-NEXT: bx lr ; ; CHECK-ARM-LABEL: func8: ; CHECK-ARM: @ %bb.0: ; CHECK-ARM-NEXT: smulbb r1, r1, r2 -; CHECK-ARM-NEXT: sxtab r0, r0, r1 -; CHECK-ARM-NEXT: cmp r0, #127 -; CHECK-ARM-NEXT: movge r0, #127 -; CHECK-ARM-NEXT: cmn r0, #128 -; CHECK-ARM-NEXT: mvnle r0, #127 +; CHECK-ARM-NEXT: qadd8 r0, r0, r1 +; CHECK-ARM-NEXT: sxtb r0, r0 ; CHECK-ARM-NEXT: bx lr %a = mul i8 %y, %z %tmp = call i8 @llvm.sadd.sat.i8(i8 %x, i8 %a) @@ -367,28 +343,35 @@ define signext i4 @func4(i4 signext %x, i4 signext %y, i4 signext %z) nounwind { ; CHECK-T1-NEXT: .LBB4_4: ; CHECK-T1-NEXT: bx lr ; -; CHECK-T2-LABEL: func4: -; CHECK-T2: @ %bb.0: -; CHECK-T2-NEXT: muls r1, r2, r1 -; CHECK-T2-NEXT: lsls r1, r1, #28 -; CHECK-T2-NEXT: add.w r0, r0, r1, asr #28 -; CHECK-T2-NEXT: cmp r0, #7 -; CHECK-T2-NEXT: it ge -; CHECK-T2-NEXT: movge r0, #7 -; CHECK-T2-NEXT: cmn.w r0, #8 -; CHECK-T2-NEXT: it le -; CHECK-T2-NEXT: mvnle r0, #7 -; CHECK-T2-NEXT: bx lr +; CHECK-T2NODSP-LABEL: func4: +; CHECK-T2NODSP: @ %bb.0: +; CHECK-T2NODSP-NEXT: muls r1, r2, r1 +; CHECK-T2NODSP-NEXT: lsls r1, r1, #28 +; CHECK-T2NODSP-NEXT: add.w r0, r0, r1, asr #28 +; CHECK-T2NODSP-NEXT: cmp r0, #7 +; CHECK-T2NODSP-NEXT: it ge +; CHECK-T2NODSP-NEXT: movge r0, #7 +; CHECK-T2NODSP-NEXT: cmn.w r0, #8 +; CHECK-T2NODSP-NEXT: it le +; CHECK-T2NODSP-NEXT: mvnle r0, #7 +; CHECK-T2NODSP-NEXT: bx lr +; +; CHECK-T2DSP-LABEL: func4: +; CHECK-T2DSP: @ %bb.0: +; CHECK-T2DSP-NEXT: muls r1, r2, r1 +; CHECK-T2DSP-NEXT: lsls r0, r0, #28 +; CHECK-T2DSP-NEXT: lsls r1, r1, #28 +; CHECK-T2DSP-NEXT: qadd r0, r0, r1 +; CHECK-T2DSP-NEXT: asrs r0, r0, #28 +; CHECK-T2DSP-NEXT: bx lr ; ; CHECK-ARM-LABEL: func4: ; CHECK-ARM: @ %bb.0: ; CHECK-ARM-NEXT: smulbb r1, r1, r2 +; CHECK-ARM-NEXT: lsl r0, r0, #28 ; CHECK-ARM-NEXT: lsl r1, r1, #28 -; CHECK-ARM-NEXT: add r0, r0, r1, asr #28 -; CHECK-ARM-NEXT: cmp r0, #7 -; CHECK-ARM-NEXT: movge r0, #7 -; CHECK-ARM-NEXT: cmn r0, #8 -; CHECK-ARM-NEXT: mvnle r0, #7 +; CHECK-ARM-NEXT: qadd r0, r0, r1 +; CHECK-ARM-NEXT: asr r0, r0, #28 ; CHECK-ARM-NEXT: bx lr %a = mul i4 %y, %z %tmp = call i4 @llvm.sadd.sat.i4(i4 %x, i4 %a) diff --git a/llvm/test/CodeGen/ARM/ssub_sat.ll b/llvm/test/CodeGen/ARM/ssub_sat.ll index 93be348d80a39..9c3c5babc73fd 100644 --- a/llvm/test/CodeGen/ARM/ssub_sat.ll +++ b/llvm/test/CodeGen/ARM/ssub_sat.ll @@ -2,7 +2,9 @@ ; RUN: llc < %s -mtriple=thumbv6m-none-eabi | FileCheck %s --check-prefix=CHECK-T1 ; RUN: llc < %s -mtriple=thumbv7m-none-eabi | FileCheck %s --check-prefix=CHECK-T2 --check-prefix=CHECK-T2NODSP ; RUN: llc < %s -mtriple=thumbv7em-none-eabi | FileCheck %s --check-prefix=CHECK-T2 --check-prefix=CHECK-T2DSP -; RUN: llc < %s -mtriple=armv8a-none-eabi | FileCheck %s --check-prefix=CHECK-ARM +; RUN: llc < %s -mtriple=armv5t-none-eabi | FileCheck %s --check-prefix=CHECK-ARM --check-prefix=CHECK-ARMNODPS +; RUN: llc < %s -mtriple=armv5te-none-eabi | FileCheck %s --check-prefix=CHECK-ARM --check-prefix=CHECK-ARMBASEDSP +; RUN: llc < %s -mtriple=armv6-none-eabi | FileCheck %s --check-prefix=CHECK-ARM --check-prefix=CHECK-ARMDSP declare i4 @llvm.ssub.sat.i4(i4, i4) declare i8 @llvm.ssub.sat.i8(i8, i8) @@ -44,34 +46,49 @@ define i32 @func(i32 %x, i32 %y) nounwind { ; CHECK-T1-NEXT: .LCPI0_0: ; CHECK-T1-NEXT: .long 2147483647 @ 0x7fffffff ; -; CHECK-T2-LABEL: func: -; CHECK-T2: @ %bb.0: -; CHECK-T2-NEXT: subs.w r12, r0, r1 -; CHECK-T2-NEXT: mov.w r3, #0 -; CHECK-T2-NEXT: mov.w r2, #-2147483648 -; CHECK-T2-NEXT: it mi -; CHECK-T2-NEXT: movmi r3, #1 -; CHECK-T2-NEXT: cmp r3, #0 -; CHECK-T2-NEXT: it ne -; CHECK-T2-NEXT: mvnne r2, #-2147483648 -; CHECK-T2-NEXT: cmp r0, r1 -; CHECK-T2-NEXT: it vc -; CHECK-T2-NEXT: movvc r2, r12 -; CHECK-T2-NEXT: mov r0, r2 -; CHECK-T2-NEXT: bx lr +; CHECK-T2NODSP-LABEL: func: +; CHECK-T2NODSP: @ %bb.0: +; CHECK-T2NODSP-NEXT: subs.w r12, r0, r1 +; CHECK-T2NODSP-NEXT: mov.w r3, #0 +; CHECK-T2NODSP-NEXT: mov.w r2, #-2147483648 +; CHECK-T2NODSP-NEXT: it mi +; CHECK-T2NODSP-NEXT: movmi r3, #1 +; CHECK-T2NODSP-NEXT: cmp r3, #0 +; CHECK-T2NODSP-NEXT: it ne +; CHECK-T2NODSP-NEXT: mvnne r2, #-2147483648 +; CHECK-T2NODSP-NEXT: cmp r0, r1 +; CHECK-T2NODSP-NEXT: it vc +; CHECK-T2NODSP-NEXT: movvc r2, r12 +; CHECK-T2NODSP-NEXT: mov r0, r2 +; CHECK-T2NODSP-NEXT: bx lr ; -; CHECK-ARM-LABEL: func: -; CHECK-ARM: @ %bb.0: -; CHECK-ARM-NEXT: subs r12, r0, r1 -; CHECK-ARM-NEXT: mov r3, #0 -; CHECK-ARM-NEXT: movwmi r3, #1 -; CHECK-ARM-NEXT: mov r2, #-2147483648 -; CHECK-ARM-NEXT: cmp r3, #0 -; CHECK-ARM-NEXT: mvnne r2, #-2147483648 -; CHECK-ARM-NEXT: cmp r0, r1 -; CHECK-ARM-NEXT: movvc r2, r12 -; CHECK-ARM-NEXT: mov r0, r2 -; CHECK-ARM-NEXT: bx lr +; CHECK-T2DSP-LABEL: func: +; CHECK-T2DSP: @ %bb.0: +; CHECK-T2DSP-NEXT: qsub r0, r0, r1 +; CHECK-T2DSP-NEXT: bx lr +; +; CHECK-ARMNODPS-LABEL: func: +; CHECK-ARMNODPS: @ %bb.0: +; CHECK-ARMNODPS-NEXT: subs r12, r0, r1 +; CHECK-ARMNODPS-NEXT: mov r3, #0 +; CHECK-ARMNODPS-NEXT: movmi r3, #1 +; CHECK-ARMNODPS-NEXT: mov r2, #-2147483648 +; CHECK-ARMNODPS-NEXT: cmp r3, #0 +; CHECK-ARMNODPS-NEXT: mvnne r2, #-2147483648 +; CHECK-ARMNODPS-NEXT: cmp r0, r1 +; CHECK-ARMNODPS-NEXT: movvc r2, r12 +; CHECK-ARMNODPS-NEXT: mov r0, r2 +; CHECK-ARMNODPS-NEXT: bx lr +; +; CHECK-ARMBASEDSP-LABEL: func: +; CHECK-ARMBASEDSP: @ %bb.0: +; CHECK-ARMBASEDSP-NEXT: qsub r0, r0, r1 +; CHECK-ARMBASEDSP-NEXT: bx lr +; +; CHECK-ARMDSP-LABEL: func: +; CHECK-ARMDSP: @ %bb.0: +; CHECK-ARMDSP-NEXT: qsub r0, r0, r1 +; CHECK-ARMDSP-NEXT: bx lr %tmp = call i32 @llvm.ssub.sat.i32(i32 %x, i32 %y) ret i32 %tmp } @@ -184,19 +201,19 @@ define i64 @func2(i64 %x, i64 %y) nounwind { ; CHECK-ARM-NEXT: push {r4, lr} ; CHECK-ARM-NEXT: cmn r3, #1 ; CHECK-ARM-NEXT: mov lr, #0 -; CHECK-ARM-NEXT: movwgt lr, #1 +; CHECK-ARM-NEXT: movgt lr, #1 ; CHECK-ARM-NEXT: cmn r1, #1 ; CHECK-ARM-NEXT: mov r4, #0 ; CHECK-ARM-NEXT: mov r12, #0 -; CHECK-ARM-NEXT: movwgt r4, #1 +; CHECK-ARM-NEXT: movgt r4, #1 ; CHECK-ARM-NEXT: subs lr, r4, lr -; CHECK-ARM-NEXT: movwne lr, #1 +; CHECK-ARM-NEXT: movne lr, #1 ; CHECK-ARM-NEXT: subs r0, r0, r2 ; CHECK-ARM-NEXT: sbc r2, r1, r3 ; CHECK-ARM-NEXT: cmn r2, #1 -; CHECK-ARM-NEXT: movwgt r12, #1 +; CHECK-ARM-NEXT: movgt r12, #1 ; CHECK-ARM-NEXT: subs r1, r4, r12 -; CHECK-ARM-NEXT: movwne r1, #1 +; CHECK-ARM-NEXT: movne r1, #1 ; CHECK-ARM-NEXT: ands r3, lr, r1 ; CHECK-ARM-NEXT: asrne r0, r2, #31 ; CHECK-ARM-NEXT: mov r1, #-2147483648 @@ -233,31 +250,55 @@ define signext i16 @func16(i16 signext %x, i16 signext %y) nounwind { ; CHECK-T1-NEXT: .LCPI2_1: ; CHECK-T1-NEXT: .long 4294934528 @ 0xffff8000 ; -; CHECK-T2-LABEL: func16: -; CHECK-T2: @ %bb.0: -; CHECK-T2-NEXT: subs r0, r0, r1 -; CHECK-T2-NEXT: movw r1, #32767 -; CHECK-T2-NEXT: cmp r0, r1 -; CHECK-T2-NEXT: it lt -; CHECK-T2-NEXT: movlt r1, r0 -; CHECK-T2-NEXT: movw r0, #32768 -; CHECK-T2-NEXT: cmn.w r1, #32768 -; CHECK-T2-NEXT: movt r0, #65535 -; CHECK-T2-NEXT: it gt -; CHECK-T2-NEXT: movgt r0, r1 -; CHECK-T2-NEXT: bx lr +; CHECK-T2NODSP-LABEL: func16: +; CHECK-T2NODSP: @ %bb.0: +; CHECK-T2NODSP-NEXT: subs r0, r0, r1 +; CHECK-T2NODSP-NEXT: movw r1, #32767 +; CHECK-T2NODSP-NEXT: cmp r0, r1 +; CHECK-T2NODSP-NEXT: it lt +; CHECK-T2NODSP-NEXT: movlt r1, r0 +; CHECK-T2NODSP-NEXT: movw r0, #32768 +; CHECK-T2NODSP-NEXT: cmn.w r1, #32768 +; CHECK-T2NODSP-NEXT: movt r0, #65535 +; CHECK-T2NODSP-NEXT: it gt +; CHECK-T2NODSP-NEXT: movgt r0, r1 +; CHECK-T2NODSP-NEXT: bx lr ; -; CHECK-ARM-LABEL: func16: -; CHECK-ARM: @ %bb.0: -; CHECK-ARM-NEXT: sub r0, r0, r1 -; CHECK-ARM-NEXT: movw r1, #32767 -; CHECK-ARM-NEXT: cmp r0, r1 -; CHECK-ARM-NEXT: movlt r1, r0 -; CHECK-ARM-NEXT: movw r0, #32768 -; CHECK-ARM-NEXT: movt r0, #65535 -; CHECK-ARM-NEXT: cmn r1, #32768 -; CHECK-ARM-NEXT: movgt r0, r1 -; CHECK-ARM-NEXT: bx lr +; CHECK-T2DSP-LABEL: func16: +; CHECK-T2DSP: @ %bb.0: +; CHECK-T2DSP-NEXT: qsub16 r0, r0, r1 +; CHECK-T2DSP-NEXT: sxth r0, r0 +; CHECK-T2DSP-NEXT: bx lr +; +; CHECK-ARMNODPS-LABEL: func16: +; CHECK-ARMNODPS: @ %bb.0: +; CHECK-ARMNODPS-NEXT: sub r0, r0, r1 +; CHECK-ARMNODPS-NEXT: mov r1, #255 +; CHECK-ARMNODPS-NEXT: orr r1, r1, #32512 +; CHECK-ARMNODPS-NEXT: cmp r0, r1 +; CHECK-ARMNODPS-NEXT: movlt r1, r0 +; CHECK-ARMNODPS-NEXT: ldr r0, .LCPI2_0 +; CHECK-ARMNODPS-NEXT: cmn r1, #32768 +; CHECK-ARMNODPS-NEXT: movgt r0, r1 +; CHECK-ARMNODPS-NEXT: bx lr +; CHECK-ARMNODPS-NEXT: .p2align 2 +; CHECK-ARMNODPS-NEXT: @ %bb.1: +; CHECK-ARMNODPS-NEXT: .LCPI2_0: +; CHECK-ARMNODPS-NEXT: .long 4294934528 @ 0xffff8000 +; +; CHECK-ARMBASEDSP-LABEL: func16: +; CHECK-ARMBASEDSP: @ %bb.0: +; CHECK-ARMBASEDSP-NEXT: lsl r0, r0, #16 +; CHECK-ARMBASEDSP-NEXT: lsl r1, r1, #16 +; CHECK-ARMBASEDSP-NEXT: qsub r0, r0, r1 +; CHECK-ARMBASEDSP-NEXT: asr r0, r0, #16 +; CHECK-ARMBASEDSP-NEXT: bx lr +; +; CHECK-ARMDSP-LABEL: func16: +; CHECK-ARMDSP: @ %bb.0: +; CHECK-ARMDSP-NEXT: qsub16 r0, r0, r1 +; CHECK-ARMDSP-NEXT: sxth r0, r0 +; CHECK-ARMDSP-NEXT: bx lr %tmp = call i16 @llvm.ssub.sat.i16(i16 %x, i16 %y) ret i16 %tmp } @@ -280,25 +321,45 @@ define signext i8 @func8(i8 signext %x, i8 signext %y) nounwind { ; CHECK-T1-NEXT: .LBB3_4: ; CHECK-T1-NEXT: bx lr ; -; CHECK-T2-LABEL: func8: -; CHECK-T2: @ %bb.0: -; CHECK-T2-NEXT: subs r0, r0, r1 -; CHECK-T2-NEXT: cmp r0, #127 -; CHECK-T2-NEXT: it ge -; CHECK-T2-NEXT: movge r0, #127 -; CHECK-T2-NEXT: cmn.w r0, #128 -; CHECK-T2-NEXT: it le -; CHECK-T2-NEXT: mvnle r0, #127 -; CHECK-T2-NEXT: bx lr +; CHECK-T2NODSP-LABEL: func8: +; CHECK-T2NODSP: @ %bb.0: +; CHECK-T2NODSP-NEXT: subs r0, r0, r1 +; CHECK-T2NODSP-NEXT: cmp r0, #127 +; CHECK-T2NODSP-NEXT: it ge +; CHECK-T2NODSP-NEXT: movge r0, #127 +; CHECK-T2NODSP-NEXT: cmn.w r0, #128 +; CHECK-T2NODSP-NEXT: it le +; CHECK-T2NODSP-NEXT: mvnle r0, #127 +; CHECK-T2NODSP-NEXT: bx lr ; -; CHECK-ARM-LABEL: func8: -; CHECK-ARM: @ %bb.0: -; CHECK-ARM-NEXT: sub r0, r0, r1 -; CHECK-ARM-NEXT: cmp r0, #127 -; CHECK-ARM-NEXT: movge r0, #127 -; CHECK-ARM-NEXT: cmn r0, #128 -; CHECK-ARM-NEXT: mvnle r0, #127 -; CHECK-ARM-NEXT: bx lr +; CHECK-T2DSP-LABEL: func8: +; CHECK-T2DSP: @ %bb.0: +; CHECK-T2DSP-NEXT: qsub8 r0, r0, r1 +; CHECK-T2DSP-NEXT: sxtb r0, r0 +; CHECK-T2DSP-NEXT: bx lr +; +; CHECK-ARMNODPS-LABEL: func8: +; CHECK-ARMNODPS: @ %bb.0: +; CHECK-ARMNODPS-NEXT: sub r0, r0, r1 +; CHECK-ARMNODPS-NEXT: cmp r0, #127 +; CHECK-ARMNODPS-NEXT: movge r0, #127 +; CHECK-ARMNODPS-NEXT: cmn r0, #128 +; CHECK-ARMNODPS-NEXT: mvnle r0, #127 +; CHECK-ARMNODPS-NEXT: bx lr +; +; CHECK-ARMBASEDSP-LABEL: func8: +; CHECK-ARMBASEDSP: @ %bb.0: +; CHECK-ARMBASEDSP-NEXT: lsl r0, r0, #24 +; CHECK-ARMBASEDSP-NEXT: lsl r1, r1, #24 +; CHECK-ARMBASEDSP-NEXT: qsub r0, r0, r1 +; CHECK-ARMBASEDSP-NEXT: asr r0, r0, #24 +; CHECK-ARMBASEDSP-NEXT: bx lr +; +; CHECK-ARMDSP-LABEL: func8: +; CHECK-ARMDSP: @ %bb.0: +; CHECK-ARMDSP-NEXT: qsub8 r0, r0, r1 +; CHECK-ARMDSP-NEXT: sxtb r0, r0 +; CHECK-ARMDSP-NEXT: bx lr %tmp = call i8 @llvm.ssub.sat.i8(i8 %x, i8 %y) ret i8 %tmp } @@ -321,25 +382,49 @@ define signext i4 @func3(i4 signext %x, i4 signext %y) nounwind { ; CHECK-T1-NEXT: .LBB4_4: ; CHECK-T1-NEXT: bx lr ; -; CHECK-T2-LABEL: func3: -; CHECK-T2: @ %bb.0: -; CHECK-T2-NEXT: subs r0, r0, r1 -; CHECK-T2-NEXT: cmp r0, #7 -; CHECK-T2-NEXT: it ge -; CHECK-T2-NEXT: movge r0, #7 -; CHECK-T2-NEXT: cmn.w r0, #8 -; CHECK-T2-NEXT: it le -; CHECK-T2-NEXT: mvnle r0, #7 -; CHECK-T2-NEXT: bx lr +; CHECK-T2NODSP-LABEL: func3: +; CHECK-T2NODSP: @ %bb.0: +; CHECK-T2NODSP-NEXT: subs r0, r0, r1 +; CHECK-T2NODSP-NEXT: cmp r0, #7 +; CHECK-T2NODSP-NEXT: it ge +; CHECK-T2NODSP-NEXT: movge r0, #7 +; CHECK-T2NODSP-NEXT: cmn.w r0, #8 +; CHECK-T2NODSP-NEXT: it le +; CHECK-T2NODSP-NEXT: mvnle r0, #7 +; CHECK-T2NODSP-NEXT: bx lr ; -; CHECK-ARM-LABEL: func3: -; CHECK-ARM: @ %bb.0: -; CHECK-ARM-NEXT: sub r0, r0, r1 -; CHECK-ARM-NEXT: cmp r0, #7 -; CHECK-ARM-NEXT: movge r0, #7 -; CHECK-ARM-NEXT: cmn r0, #8 -; CHECK-ARM-NEXT: mvnle r0, #7 -; CHECK-ARM-NEXT: bx lr +; CHECK-T2DSP-LABEL: func3: +; CHECK-T2DSP: @ %bb.0: +; CHECK-T2DSP-NEXT: lsls r1, r1, #28 +; CHECK-T2DSP-NEXT: lsls r0, r0, #28 +; CHECK-T2DSP-NEXT: qsub r0, r0, r1 +; CHECK-T2DSP-NEXT: asrs r0, r0, #28 +; CHECK-T2DSP-NEXT: bx lr +; +; CHECK-ARMNODPS-LABEL: func3: +; CHECK-ARMNODPS: @ %bb.0: +; CHECK-ARMNODPS-NEXT: sub r0, r0, r1 +; CHECK-ARMNODPS-NEXT: cmp r0, #7 +; CHECK-ARMNODPS-NEXT: movge r0, #7 +; CHECK-ARMNODPS-NEXT: cmn r0, #8 +; CHECK-ARMNODPS-NEXT: mvnle r0, #7 +; CHECK-ARMNODPS-NEXT: bx lr +; +; CHECK-ARMBASEDSP-LABEL: func3: +; CHECK-ARMBASEDSP: @ %bb.0: +; CHECK-ARMBASEDSP-NEXT: lsl r0, r0, #28 +; CHECK-ARMBASEDSP-NEXT: lsl r1, r1, #28 +; CHECK-ARMBASEDSP-NEXT: qsub r0, r0, r1 +; CHECK-ARMBASEDSP-NEXT: asr r0, r0, #28 +; CHECK-ARMBASEDSP-NEXT: bx lr +; +; CHECK-ARMDSP-LABEL: func3: +; CHECK-ARMDSP: @ %bb.0: +; CHECK-ARMDSP-NEXT: lsl r0, r0, #28 +; CHECK-ARMDSP-NEXT: lsl r1, r1, #28 +; CHECK-ARMDSP-NEXT: qsub r0, r0, r1 +; CHECK-ARMDSP-NEXT: asr r0, r0, #28 +; CHECK-ARMDSP-NEXT: bx lr %tmp = call i4 @llvm.ssub.sat.i4(i4 %x, i4 %y) ret i4 %tmp } @@ -446,85 +531,148 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind { ; CHECK-T1-NEXT: .LCPI5_0: ; CHECK-T1-NEXT: .long 2147483647 @ 0x7fffffff ; -; CHECK-T2-LABEL: vec: -; CHECK-T2: @ %bb.0: -; CHECK-T2-NEXT: .save {r4, r5, r6, r7, lr} -; CHECK-T2-NEXT: push {r4, r5, r6, r7, lr} -; CHECK-T2-NEXT: .pad #4 -; CHECK-T2-NEXT: sub sp, #4 -; CHECK-T2-NEXT: ldr r4, [sp, #24] -; CHECK-T2-NEXT: mov lr, r0 -; CHECK-T2-NEXT: ldr r7, [sp, #28] -; CHECK-T2-NEXT: movs r5, #0 -; CHECK-T2-NEXT: subs r6, r0, r4 -; CHECK-T2-NEXT: mov.w r0, #0 -; CHECK-T2-NEXT: it mi -; CHECK-T2-NEXT: movmi r0, #1 -; CHECK-T2-NEXT: cmp r0, #0 -; CHECK-T2-NEXT: mov.w r0, #-2147483648 -; CHECK-T2-NEXT: mov.w r12, #-2147483648 -; CHECK-T2-NEXT: it ne -; CHECK-T2-NEXT: mvnne r0, #-2147483648 -; CHECK-T2-NEXT: cmp lr, r4 -; CHECK-T2-NEXT: it vc -; CHECK-T2-NEXT: movvc r0, r6 -; CHECK-T2-NEXT: subs r6, r1, r7 -; CHECK-T2-NEXT: mov.w r4, #0 -; CHECK-T2-NEXT: mov.w lr, #-2147483648 -; CHECK-T2-NEXT: it mi -; CHECK-T2-NEXT: movmi r4, #1 -; CHECK-T2-NEXT: cmp r4, #0 -; CHECK-T2-NEXT: it ne -; CHECK-T2-NEXT: mvnne lr, #-2147483648 -; CHECK-T2-NEXT: cmp r1, r7 -; CHECK-T2-NEXT: ldr r1, [sp, #32] -; CHECK-T2-NEXT: mov.w r4, #0 -; CHECK-T2-NEXT: it vc -; CHECK-T2-NEXT: movvc lr, r6 -; CHECK-T2-NEXT: subs r6, r2, r1 -; CHECK-T2-NEXT: it mi -; CHECK-T2-NEXT: movmi r4, #1 -; CHECK-T2-NEXT: cmp r4, #0 -; CHECK-T2-NEXT: mov.w r4, #-2147483648 -; CHECK-T2-NEXT: it ne -; CHECK-T2-NEXT: mvnne r4, #-2147483648 -; CHECK-T2-NEXT: cmp r2, r1 -; CHECK-T2-NEXT: ldr r1, [sp, #36] -; CHECK-T2-NEXT: it vc -; CHECK-T2-NEXT: movvc r4, r6 -; CHECK-T2-NEXT: subs r2, r3, r1 -; CHECK-T2-NEXT: it mi -; CHECK-T2-NEXT: movmi r5, #1 -; CHECK-T2-NEXT: cmp r5, #0 -; CHECK-T2-NEXT: it ne -; CHECK-T2-NEXT: mvnne r12, #-2147483648 -; CHECK-T2-NEXT: cmp r3, r1 -; CHECK-T2-NEXT: it vc -; CHECK-T2-NEXT: movvc r12, r2 -; CHECK-T2-NEXT: mov r1, lr -; CHECK-T2-NEXT: mov r2, r4 -; CHECK-T2-NEXT: mov r3, r12 -; CHECK-T2-NEXT: add sp, #4 -; CHECK-T2-NEXT: pop {r4, r5, r6, r7, pc} +; CHECK-T2NODSP-LABEL: vec: +; CHECK-T2NODSP: @ %bb.0: +; CHECK-T2NODSP-NEXT: .save {r4, r5, r6, r7, lr} +; CHECK-T2NODSP-NEXT: push {r4, r5, r6, r7, lr} +; CHECK-T2NODSP-NEXT: .pad #4 +; CHECK-T2NODSP-NEXT: sub sp, #4 +; CHECK-T2NODSP-NEXT: ldr r4, [sp, #24] +; CHECK-T2NODSP-NEXT: mov lr, r0 +; CHECK-T2NODSP-NEXT: ldr r7, [sp, #28] +; CHECK-T2NODSP-NEXT: movs r5, #0 +; CHECK-T2NODSP-NEXT: subs r6, r0, r4 +; CHECK-T2NODSP-NEXT: mov.w r0, #0 +; CHECK-T2NODSP-NEXT: it mi +; CHECK-T2NODSP-NEXT: movmi r0, #1 +; CHECK-T2NODSP-NEXT: cmp r0, #0 +; CHECK-T2NODSP-NEXT: mov.w r0, #-2147483648 +; CHECK-T2NODSP-NEXT: mov.w r12, #-2147483648 +; CHECK-T2NODSP-NEXT: it ne +; CHECK-T2NODSP-NEXT: mvnne r0, #-2147483648 +; CHECK-T2NODSP-NEXT: cmp lr, r4 +; CHECK-T2NODSP-NEXT: it vc +; CHECK-T2NODSP-NEXT: movvc r0, r6 +; CHECK-T2NODSP-NEXT: subs r6, r1, r7 +; CHECK-T2NODSP-NEXT: mov.w r4, #0 +; CHECK-T2NODSP-NEXT: mov.w lr, #-2147483648 +; CHECK-T2NODSP-NEXT: it mi +; CHECK-T2NODSP-NEXT: movmi r4, #1 +; CHECK-T2NODSP-NEXT: cmp r4, #0 +; CHECK-T2NODSP-NEXT: it ne +; CHECK-T2NODSP-NEXT: mvnne lr, #-2147483648 +; CHECK-T2NODSP-NEXT: cmp r1, r7 +; CHECK-T2NODSP-NEXT: ldr r1, [sp, #32] +; CHECK-T2NODSP-NEXT: mov.w r4, #0 +; CHECK-T2NODSP-NEXT: it vc +; CHECK-T2NODSP-NEXT: movvc lr, r6 +; CHECK-T2NODSP-NEXT: subs r6, r2, r1 +; CHECK-T2NODSP-NEXT: it mi +; CHECK-T2NODSP-NEXT: movmi r4, #1 +; CHECK-T2NODSP-NEXT: cmp r4, #0 +; CHECK-T2NODSP-NEXT: mov.w r4, #-2147483648 +; CHECK-T2NODSP-NEXT: it ne +; CHECK-T2NODSP-NEXT: mvnne r4, #-2147483648 +; CHECK-T2NODSP-NEXT: cmp r2, r1 +; CHECK-T2NODSP-NEXT: ldr r1, [sp, #36] +; CHECK-T2NODSP-NEXT: it vc +; CHECK-T2NODSP-NEXT: movvc r4, r6 +; CHECK-T2NODSP-NEXT: subs r2, r3, r1 +; CHECK-T2NODSP-NEXT: it mi +; CHECK-T2NODSP-NEXT: movmi r5, #1 +; CHECK-T2NODSP-NEXT: cmp r5, #0 +; CHECK-T2NODSP-NEXT: it ne +; CHECK-T2NODSP-NEXT: mvnne r12, #-2147483648 +; CHECK-T2NODSP-NEXT: cmp r3, r1 +; CHECK-T2NODSP-NEXT: it vc +; CHECK-T2NODSP-NEXT: movvc r12, r2 +; CHECK-T2NODSP-NEXT: mov r1, lr +; CHECK-T2NODSP-NEXT: mov r2, r4 +; CHECK-T2NODSP-NEXT: mov r3, r12 +; CHECK-T2NODSP-NEXT: add sp, #4 +; CHECK-T2NODSP-NEXT: pop {r4, r5, r6, r7, pc} ; -; CHECK-ARM-LABEL: vec: -; CHECK-ARM: @ %bb.0: -; CHECK-ARM-NEXT: vmov d17, r2, r3 -; CHECK-ARM-NEXT: mov r12, sp -; CHECK-ARM-NEXT: vld1.64 {d18, d19}, [r12] -; CHECK-ARM-NEXT: vmov d16, r0, r1 -; CHECK-ARM-NEXT: vmvn.i32 q11, #0x80000000 -; CHECK-ARM-NEXT: vsub.i32 q10, q8, q9 -; CHECK-ARM-NEXT: vcgt.s32 q9, q9, #0 -; CHECK-ARM-NEXT: vclt.s32 q12, q10, #0 -; CHECK-ARM-NEXT: vmvn q13, q12 -; CHECK-ARM-NEXT: vcgt.s32 q8, q8, q10 -; CHECK-ARM-NEXT: vbsl q11, q12, q13 -; CHECK-ARM-NEXT: veor q8, q9, q8 -; CHECK-ARM-NEXT: vbsl q8, q11, q10 -; CHECK-ARM-NEXT: vmov r0, r1, d16 -; CHECK-ARM-NEXT: vmov r2, r3, d17 -; CHECK-ARM-NEXT: bx lr +; CHECK-T2DSP-LABEL: vec: +; CHECK-T2DSP: @ %bb.0: +; CHECK-T2DSP-NEXT: ldr.w r12, [sp] +; CHECK-T2DSP-NEXT: qsub r0, r0, r12 +; CHECK-T2DSP-NEXT: ldr.w r12, [sp, #4] +; CHECK-T2DSP-NEXT: qsub r1, r1, r12 +; CHECK-T2DSP-NEXT: ldr.w r12, [sp, #8] +; CHECK-T2DSP-NEXT: qsub r2, r2, r12 +; CHECK-T2DSP-NEXT: ldr.w r12, [sp, #12] +; CHECK-T2DSP-NEXT: qsub r3, r3, r12 +; CHECK-T2DSP-NEXT: bx lr +; +; CHECK-ARMNODPS-LABEL: vec: +; CHECK-ARMNODPS: @ %bb.0: +; CHECK-ARMNODPS-NEXT: .save {r4, r5, r6, r7, r11, lr} +; CHECK-ARMNODPS-NEXT: push {r4, r5, r6, r7, r11, lr} +; CHECK-ARMNODPS-NEXT: ldr r4, [sp, #24] +; CHECK-ARMNODPS-NEXT: mov lr, r0 +; CHECK-ARMNODPS-NEXT: ldr r7, [sp, #28] +; CHECK-ARMNODPS-NEXT: mov r5, #0 +; CHECK-ARMNODPS-NEXT: subs r6, r0, r4 +; CHECK-ARMNODPS-NEXT: mov r0, #0 +; CHECK-ARMNODPS-NEXT: movmi r0, #1 +; CHECK-ARMNODPS-NEXT: cmp r0, #0 +; CHECK-ARMNODPS-NEXT: mov r0, #-2147483648 +; CHECK-ARMNODPS-NEXT: mov r12, #-2147483648 +; CHECK-ARMNODPS-NEXT: mvnne r0, #-2147483648 +; CHECK-ARMNODPS-NEXT: cmp lr, r4 +; CHECK-ARMNODPS-NEXT: movvc r0, r6 +; CHECK-ARMNODPS-NEXT: subs r6, r1, r7 +; CHECK-ARMNODPS-NEXT: mov r4, #0 +; CHECK-ARMNODPS-NEXT: mov lr, #-2147483648 +; CHECK-ARMNODPS-NEXT: movmi r4, #1 +; CHECK-ARMNODPS-NEXT: cmp r4, #0 +; CHECK-ARMNODPS-NEXT: mvnne lr, #-2147483648 +; CHECK-ARMNODPS-NEXT: cmp r1, r7 +; CHECK-ARMNODPS-NEXT: ldr r1, [sp, #32] +; CHECK-ARMNODPS-NEXT: movvc lr, r6 +; CHECK-ARMNODPS-NEXT: mov r4, #0 +; CHECK-ARMNODPS-NEXT: subs r6, r2, r1 +; CHECK-ARMNODPS-NEXT: movmi r4, #1 +; CHECK-ARMNODPS-NEXT: cmp r4, #0 +; CHECK-ARMNODPS-NEXT: mov r4, #-2147483648 +; CHECK-ARMNODPS-NEXT: mvnne r4, #-2147483648 +; CHECK-ARMNODPS-NEXT: cmp r2, r1 +; CHECK-ARMNODPS-NEXT: ldr r1, [sp, #36] +; CHECK-ARMNODPS-NEXT: movvc r4, r6 +; CHECK-ARMNODPS-NEXT: subs r2, r3, r1 +; CHECK-ARMNODPS-NEXT: movmi r5, #1 +; CHECK-ARMNODPS-NEXT: cmp r5, #0 +; CHECK-ARMNODPS-NEXT: mvnne r12, #-2147483648 +; CHECK-ARMNODPS-NEXT: cmp r3, r1 +; CHECK-ARMNODPS-NEXT: movvc r12, r2 +; CHECK-ARMNODPS-NEXT: mov r1, lr +; CHECK-ARMNODPS-NEXT: mov r2, r4 +; CHECK-ARMNODPS-NEXT: mov r3, r12 +; CHECK-ARMNODPS-NEXT: pop {r4, r5, r6, r7, r11, pc} +; +; CHECK-ARMBASEDSP-LABEL: vec: +; CHECK-ARMBASEDSP: @ %bb.0: +; CHECK-ARMBASEDSP-NEXT: ldr r12, [sp] +; CHECK-ARMBASEDSP-NEXT: qsub r0, r0, r12 +; CHECK-ARMBASEDSP-NEXT: ldr r12, [sp, #4] +; CHECK-ARMBASEDSP-NEXT: qsub r1, r1, r12 +; CHECK-ARMBASEDSP-NEXT: ldr r12, [sp, #8] +; CHECK-ARMBASEDSP-NEXT: qsub r2, r2, r12 +; CHECK-ARMBASEDSP-NEXT: ldr r12, [sp, #12] +; CHECK-ARMBASEDSP-NEXT: qsub r3, r3, r12 +; CHECK-ARMBASEDSP-NEXT: bx lr +; +; CHECK-ARMDSP-LABEL: vec: +; CHECK-ARMDSP: @ %bb.0: +; CHECK-ARMDSP-NEXT: ldr r12, [sp] +; CHECK-ARMDSP-NEXT: qsub r0, r0, r12 +; CHECK-ARMDSP-NEXT: ldr r12, [sp, #4] +; CHECK-ARMDSP-NEXT: qsub r1, r1, r12 +; CHECK-ARMDSP-NEXT: ldr r12, [sp, #8] +; CHECK-ARMDSP-NEXT: qsub r2, r2, r12 +; CHECK-ARMDSP-NEXT: ldr r12, [sp, #12] +; CHECK-ARMDSP-NEXT: qsub r3, r3, r12 +; CHECK-ARMDSP-NEXT: bx lr %tmp = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %x, <4 x i32> %y) ret <4 x i32> %tmp } diff --git a/llvm/test/CodeGen/ARM/ssub_sat_plus.ll b/llvm/test/CodeGen/ARM/ssub_sat_plus.ll index 9a598408d9e23..147c0a0e5857d 100644 --- a/llvm/test/CodeGen/ARM/ssub_sat_plus.ll +++ b/llvm/test/CodeGen/ARM/ssub_sat_plus.ll @@ -44,39 +44,36 @@ define i32 @func32(i32 %x, i32 %y, i32 %z) nounwind { ; CHECK-T1-NEXT: .LCPI0_0: ; CHECK-T1-NEXT: .long 2147483647 @ 0x7fffffff ; -; CHECK-T2-LABEL: func32: -; CHECK-T2: @ %bb.0: -; CHECK-T2-NEXT: .save {r7, lr} -; CHECK-T2-NEXT: push {r7, lr} -; CHECK-T2-NEXT: mls r12, r1, r2, r0 -; CHECK-T2-NEXT: mov.w lr, #0 -; CHECK-T2-NEXT: mov.w r3, #-2147483648 -; CHECK-T2-NEXT: muls r1, r2, r1 -; CHECK-T2-NEXT: cmp.w r12, #0 -; CHECK-T2-NEXT: it mi -; CHECK-T2-NEXT: movmi.w lr, #1 -; CHECK-T2-NEXT: cmp.w lr, #0 -; CHECK-T2-NEXT: it ne -; CHECK-T2-NEXT: mvnne r3, #-2147483648 -; CHECK-T2-NEXT: cmp r0, r1 -; CHECK-T2-NEXT: it vc -; CHECK-T2-NEXT: movvc r3, r12 -; CHECK-T2-NEXT: mov r0, r3 -; CHECK-T2-NEXT: pop {r7, pc} +; CHECK-T2NODSP-LABEL: func32: +; CHECK-T2NODSP: @ %bb.0: +; CHECK-T2NODSP-NEXT: .save {r7, lr} +; CHECK-T2NODSP-NEXT: push {r7, lr} +; CHECK-T2NODSP-NEXT: mls r12, r1, r2, r0 +; CHECK-T2NODSP-NEXT: mov.w lr, #0 +; CHECK-T2NODSP-NEXT: mov.w r3, #-2147483648 +; CHECK-T2NODSP-NEXT: muls r1, r2, r1 +; CHECK-T2NODSP-NEXT: cmp.w r12, #0 +; CHECK-T2NODSP-NEXT: it mi +; CHECK-T2NODSP-NEXT: movmi.w lr, #1 +; CHECK-T2NODSP-NEXT: cmp.w lr, #0 +; CHECK-T2NODSP-NEXT: it ne +; CHECK-T2NODSP-NEXT: mvnne r3, #-2147483648 +; CHECK-T2NODSP-NEXT: cmp r0, r1 +; CHECK-T2NODSP-NEXT: it vc +; CHECK-T2NODSP-NEXT: movvc r3, r12 +; CHECK-T2NODSP-NEXT: mov r0, r3 +; CHECK-T2NODSP-NEXT: pop {r7, pc} +; +; CHECK-T2DSP-LABEL: func32: +; CHECK-T2DSP: @ %bb.0: +; CHECK-T2DSP-NEXT: muls r1, r2, r1 +; CHECK-T2DSP-NEXT: qsub r0, r0, r1 +; CHECK-T2DSP-NEXT: bx lr ; ; CHECK-ARM-LABEL: func32: ; CHECK-ARM: @ %bb.0: -; CHECK-ARM-NEXT: mls r3, r1, r2, r0 -; CHECK-ARM-NEXT: mul r12, r1, r2 -; CHECK-ARM-NEXT: mov r2, #0 -; CHECK-ARM-NEXT: mov r1, #-2147483648 -; CHECK-ARM-NEXT: cmp r3, #0 -; CHECK-ARM-NEXT: movwmi r2, #1 -; CHECK-ARM-NEXT: cmp r2, #0 -; CHECK-ARM-NEXT: mvnne r1, #-2147483648 -; CHECK-ARM-NEXT: cmp r0, r12 -; CHECK-ARM-NEXT: movvc r1, r3 -; CHECK-ARM-NEXT: mov r0, r1 +; CHECK-ARM-NEXT: mul r1, r1, r2 +; CHECK-ARM-NEXT: qsub r0, r0, r1 ; CHECK-ARM-NEXT: bx lr %a = mul i32 %y, %z %tmp = call i32 @llvm.ssub.sat.i32(i32 %x, i32 %a) @@ -245,34 +242,34 @@ define signext i16 @func16(i16 signext %x, i16 signext %y, i16 signext %z) nounw ; CHECK-T1-NEXT: .LCPI2_1: ; CHECK-T1-NEXT: .long 4294934528 @ 0xffff8000 ; -; CHECK-T2-LABEL: func16: -; CHECK-T2: @ %bb.0: -; CHECK-T2-NEXT: muls r1, r2, r1 -; CHECK-T2-NEXT: sxth r1, r1 -; CHECK-T2-NEXT: subs r0, r0, r1 -; CHECK-T2-NEXT: movw r1, #32767 -; CHECK-T2-NEXT: cmp r0, r1 -; CHECK-T2-NEXT: it lt -; CHECK-T2-NEXT: movlt r1, r0 -; CHECK-T2-NEXT: movw r0, #32768 -; CHECK-T2-NEXT: movt r0, #65535 -; CHECK-T2-NEXT: cmn.w r1, #32768 -; CHECK-T2-NEXT: it gt -; CHECK-T2-NEXT: movgt r0, r1 -; CHECK-T2-NEXT: bx lr +; CHECK-T2NODSP-LABEL: func16: +; CHECK-T2NODSP: @ %bb.0: +; CHECK-T2NODSP-NEXT: muls r1, r2, r1 +; CHECK-T2NODSP-NEXT: sxth r1, r1 +; CHECK-T2NODSP-NEXT: subs r0, r0, r1 +; CHECK-T2NODSP-NEXT: movw r1, #32767 +; CHECK-T2NODSP-NEXT: cmp r0, r1 +; CHECK-T2NODSP-NEXT: it lt +; CHECK-T2NODSP-NEXT: movlt r1, r0 +; CHECK-T2NODSP-NEXT: movw r0, #32768 +; CHECK-T2NODSP-NEXT: movt r0, #65535 +; CHECK-T2NODSP-NEXT: cmn.w r1, #32768 +; CHECK-T2NODSP-NEXT: it gt +; CHECK-T2NODSP-NEXT: movgt r0, r1 +; CHECK-T2NODSP-NEXT: bx lr +; +; CHECK-T2DSP-LABEL: func16: +; CHECK-T2DSP: @ %bb.0: +; CHECK-T2DSP-NEXT: muls r1, r2, r1 +; CHECK-T2DSP-NEXT: qsub16 r0, r0, r1 +; CHECK-T2DSP-NEXT: sxth r0, r0 +; CHECK-T2DSP-NEXT: bx lr ; ; CHECK-ARM-LABEL: func16: ; CHECK-ARM: @ %bb.0: ; CHECK-ARM-NEXT: smulbb r1, r1, r2 -; CHECK-ARM-NEXT: sxth r1, r1 -; CHECK-ARM-NEXT: sub r0, r0, r1 -; CHECK-ARM-NEXT: movw r1, #32767 -; CHECK-ARM-NEXT: cmp r0, r1 -; CHECK-ARM-NEXT: movlt r1, r0 -; CHECK-ARM-NEXT: movw r0, #32768 -; CHECK-ARM-NEXT: movt r0, #65535 -; CHECK-ARM-NEXT: cmn r1, #32768 -; CHECK-ARM-NEXT: movgt r0, r1 +; CHECK-ARM-NEXT: qsub16 r0, r0, r1 +; CHECK-ARM-NEXT: sxth r0, r0 ; CHECK-ARM-NEXT: bx lr %a = mul i16 %y, %z %tmp = call i16 @llvm.ssub.sat.i16(i16 %x, i16 %a) @@ -299,28 +296,31 @@ define signext i8 @func8(i8 signext %x, i8 signext %y, i8 signext %z) nounwind { ; CHECK-T1-NEXT: .LBB3_4: ; CHECK-T1-NEXT: bx lr ; -; CHECK-T2-LABEL: func8: -; CHECK-T2: @ %bb.0: -; CHECK-T2-NEXT: muls r1, r2, r1 -; CHECK-T2-NEXT: sxtb r1, r1 -; CHECK-T2-NEXT: subs r0, r0, r1 -; CHECK-T2-NEXT: cmp r0, #127 -; CHECK-T2-NEXT: it ge -; CHECK-T2-NEXT: movge r0, #127 -; CHECK-T2-NEXT: cmn.w r0, #128 -; CHECK-T2-NEXT: it le -; CHECK-T2-NEXT: mvnle r0, #127 -; CHECK-T2-NEXT: bx lr +; CHECK-T2NODSP-LABEL: func8: +; CHECK-T2NODSP: @ %bb.0: +; CHECK-T2NODSP-NEXT: muls r1, r2, r1 +; CHECK-T2NODSP-NEXT: sxtb r1, r1 +; CHECK-T2NODSP-NEXT: subs r0, r0, r1 +; CHECK-T2NODSP-NEXT: cmp r0, #127 +; CHECK-T2NODSP-NEXT: it ge +; CHECK-T2NODSP-NEXT: movge r0, #127 +; CHECK-T2NODSP-NEXT: cmn.w r0, #128 +; CHECK-T2NODSP-NEXT: it le +; CHECK-T2NODSP-NEXT: mvnle r0, #127 +; CHECK-T2NODSP-NEXT: bx lr +; +; CHECK-T2DSP-LABEL: func8: +; CHECK-T2DSP: @ %bb.0: +; CHECK-T2DSP-NEXT: muls r1, r2, r1 +; CHECK-T2DSP-NEXT: qsub8 r0, r0, r1 +; CHECK-T2DSP-NEXT: sxtb r0, r0 +; CHECK-T2DSP-NEXT: bx lr ; ; CHECK-ARM-LABEL: func8: ; CHECK-ARM: @ %bb.0: ; CHECK-ARM-NEXT: smulbb r1, r1, r2 -; CHECK-ARM-NEXT: sxtb r1, r1 -; CHECK-ARM-NEXT: sub r0, r0, r1 -; CHECK-ARM-NEXT: cmp r0, #127 -; CHECK-ARM-NEXT: movge r0, #127 -; CHECK-ARM-NEXT: cmn r0, #128 -; CHECK-ARM-NEXT: mvnle r0, #127 +; CHECK-ARM-NEXT: qsub8 r0, r0, r1 +; CHECK-ARM-NEXT: sxtb r0, r0 ; CHECK-ARM-NEXT: bx lr %a = mul i8 %y, %z %tmp = call i8 @llvm.ssub.sat.i8(i8 %x, i8 %a) @@ -348,28 +348,35 @@ define signext i4 @func4(i4 signext %x, i4 signext %y, i4 signext %z) nounwind { ; CHECK-T1-NEXT: .LBB4_4: ; CHECK-T1-NEXT: bx lr ; -; CHECK-T2-LABEL: func4: -; CHECK-T2: @ %bb.0: -; CHECK-T2-NEXT: muls r1, r2, r1 -; CHECK-T2-NEXT: lsls r1, r1, #28 -; CHECK-T2-NEXT: sub.w r0, r0, r1, asr #28 -; CHECK-T2-NEXT: cmp r0, #7 -; CHECK-T2-NEXT: it ge -; CHECK-T2-NEXT: movge r0, #7 -; CHECK-T2-NEXT: cmn.w r0, #8 -; CHECK-T2-NEXT: it le -; CHECK-T2-NEXT: mvnle r0, #7 -; CHECK-T2-NEXT: bx lr +; CHECK-T2NODSP-LABEL: func4: +; CHECK-T2NODSP: @ %bb.0: +; CHECK-T2NODSP-NEXT: muls r1, r2, r1 +; CHECK-T2NODSP-NEXT: lsls r1, r1, #28 +; CHECK-T2NODSP-NEXT: sub.w r0, r0, r1, asr #28 +; CHECK-T2NODSP-NEXT: cmp r0, #7 +; CHECK-T2NODSP-NEXT: it ge +; CHECK-T2NODSP-NEXT: movge r0, #7 +; CHECK-T2NODSP-NEXT: cmn.w r0, #8 +; CHECK-T2NODSP-NEXT: it le +; CHECK-T2NODSP-NEXT: mvnle r0, #7 +; CHECK-T2NODSP-NEXT: bx lr +; +; CHECK-T2DSP-LABEL: func4: +; CHECK-T2DSP: @ %bb.0: +; CHECK-T2DSP-NEXT: muls r1, r2, r1 +; CHECK-T2DSP-NEXT: lsls r0, r0, #28 +; CHECK-T2DSP-NEXT: lsls r1, r1, #28 +; CHECK-T2DSP-NEXT: qsub r0, r0, r1 +; CHECK-T2DSP-NEXT: asrs r0, r0, #28 +; CHECK-T2DSP-NEXT: bx lr ; ; CHECK-ARM-LABEL: func4: ; CHECK-ARM: @ %bb.0: ; CHECK-ARM-NEXT: smulbb r1, r1, r2 +; CHECK-ARM-NEXT: lsl r0, r0, #28 ; CHECK-ARM-NEXT: lsl r1, r1, #28 -; CHECK-ARM-NEXT: sub r0, r0, r1, asr #28 -; CHECK-ARM-NEXT: cmp r0, #7 -; CHECK-ARM-NEXT: movge r0, #7 -; CHECK-ARM-NEXT: cmn r0, #8 -; CHECK-ARM-NEXT: mvnle r0, #7 +; CHECK-ARM-NEXT: qsub r0, r0, r1 +; CHECK-ARM-NEXT: asr r0, r0, #28 ; CHECK-ARM-NEXT: bx lr %a = mul i4 %y, %z %tmp = call i4 @llvm.ssub.sat.i4(i4 %x, i4 %a) diff --git a/llvm/test/CodeGen/BPF/callx.ll b/llvm/test/CodeGen/BPF/callx.ll new file mode 100644 index 0000000000000..bb31e11898520 --- /dev/null +++ b/llvm/test/CodeGen/BPF/callx.ll @@ -0,0 +1,20 @@ +; RUN: llc < %s -march=bpfel | FileCheck %s +; source: +; int test(int (*f)(void)) { return f(); } + +; Function Attrs: nounwind +define dso_local i32 @test(i32 ()* nocapture %f) local_unnamed_addr #0 { +entry: + %call = tail call i32 %f() #1 +; CHECK: callx r{{[0-9]+}} + ret i32 %call +} + +attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang version 10.0.0 (https://github.com/llvm/llvm-project.git 7015a5c54b53d8d2297a3aa38bc32aab167bdcfc)"} diff --git a/llvm/test/CodeGen/MSP430/shift-amount-threshold.ll b/llvm/test/CodeGen/MSP430/shift-amount-threshold.ll new file mode 100644 index 0000000000000..633cd785dc1bf --- /dev/null +++ b/llvm/test/CodeGen/MSP430/shift-amount-threshold.ll @@ -0,0 +1,170 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=msp430-- < %s | FileCheck %s + +define i16 @testSimplifySetCC_0(i16 %a) { +; CHECK-LABEL: testSimplifySetCC_0: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: bit #32, r12 +; CHECK-NEXT: mov r2, r12 +; CHECK-NEXT: and #1, r12 +; CHECK-NEXT: ret +entry: + %and = and i16 %a, 32 + %cmp = icmp ne i16 %and, 0 + %conv = zext i1 %cmp to i16 + ret i16 %conv +} + +define i16 @testSimplifySetCC_1(i16 %a) { +; CHECK-LABEL: testSimplifySetCC_1: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: bit #32, r12 +; CHECK-NEXT: mov r2, r12 +; CHECK-NEXT: and #1, r12 +; CHECK-NEXT: ret +entry: + %and = and i16 %a, 32 + %cmp = icmp eq i16 %and, 32 + %conv = zext i1 %cmp to i16 + ret i16 %conv +} + +define i16 @testSiymplifySelect(i16 %a) { +; CHECK-LABEL: testSiymplifySelect: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: mov r12, r13 +; CHECK-NEXT: clr r12 +; CHECK-NEXT: bit #2048, r13 +; CHECK-NEXT: jeq .LBB2_2 +; CHECK-NEXT: ; %bb.1: ; %entry +; CHECK-NEXT: mov #3, r12 +; CHECK-NEXT: .LBB2_2: ; %entry +; CHECK-NEXT: ret +entry: + %and = and i16 %a, 2048 + %cmp = icmp eq i16 %and, 0 + %cond = select i1 %cmp, i16 0, i16 3 + ret i16 %cond +} + +define i16 @testExtendSignBit(i16 %a) { +; CHECK-LABEL: testExtendSignBit: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: inv r12 +; CHECK-NEXT: swpb r12 +; CHECK-NEXT: mov.b r12, r12 +; CHECK-NEXT: clrc +; CHECK-NEXT: rrc r12 +; CHECK-NEXT: rra r12 +; CHECK-NEXT: rra r12 +; CHECK-NEXT: rra r12 +; CHECK-NEXT: rra r12 +; CHECK-NEXT: rra r12 +; CHECK-NEXT: rra r12 +; CHECK-NEXT: ret +entry: + %cmp = icmp sgt i16 %a, -1 + %cond = select i1 %cmp, i16 1, i16 0 + ret i16 %cond +} + +define i16 @testShiftAnd_0(i16 %a) { +; CHECK-LABEL: testShiftAnd_0: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: swpb r12 +; CHECK-NEXT: sxt r12 +; CHECK-NEXT: rra r12 +; CHECK-NEXT: rra r12 +; CHECK-NEXT: rra r12 +; CHECK-NEXT: rra r12 +; CHECK-NEXT: rra r12 +; CHECK-NEXT: rra r12 +; CHECK-NEXT: rra r12 +; CHECK-NEXT: ret +entry: + %cmp = icmp slt i16 %a, 0 + %cond = select i1 %cmp, i16 -1, i16 0 + ret i16 %cond +} + +define i16 @testShiftAnd_1(i16 %a) { +; CHECK-LABEL: testShiftAnd_1: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: swpb r12 +; CHECK-NEXT: mov.b r12, r12 +; CHECK-NEXT: clrc +; CHECK-NEXT: rrc r12 +; CHECK-NEXT: rra r12 +; CHECK-NEXT: rra r12 +; CHECK-NEXT: rra r12 +; CHECK-NEXT: rra r12 +; CHECK-NEXT: rra r12 +; CHECK-NEXT: rra r12 +; CHECK-NEXT: ret +entry: + %cmp = icmp slt i16 %a, 0 + %cond = select i1 %cmp, i16 1, i16 0 + ret i16 %cond +} + +define i16 @testShiftAnd_2(i16 %a) { +; CHECK-LABEL: testShiftAnd_2: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: swpb r12 +; CHECK-NEXT: mov.b r12, r12 +; CHECK-NEXT: clrc +; CHECK-NEXT: rrc r12 +; CHECK-NEXT: rra r12 +; CHECK-NEXT: rra r12 +; CHECK-NEXT: rra r12 +; CHECK-NEXT: rra r12 +; CHECK-NEXT: rra r12 +; CHECK-NEXT: and #2, r12 +; CHECK-NEXT: ret +entry: + %cmp = icmp slt i16 %a, 0 + %cond = select i1 %cmp, i16 2, i16 0 + ret i16 %cond +} + +define i16 @testShiftAnd_3(i16 %a) { +; CHECK-LABEL: testShiftAnd_3: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: swpb r12 +; CHECK-NEXT: sxt r12 +; CHECK-NEXT: rra r12 +; CHECK-NEXT: rra r12 +; CHECK-NEXT: rra r12 +; CHECK-NEXT: rra r12 +; CHECK-NEXT: rra r12 +; CHECK-NEXT: rra r12 +; CHECK-NEXT: rra r12 +; CHECK-NEXT: and #3, r12 +; CHECK-NEXT: ret +entry: + %cmp = icmp slt i16 %a, 0 + %cond = select i1 %cmp, i16 3, i16 0 + ret i16 %cond +} + +define i16 @testShiftAnd_4(i16 %a, i16 %b) { +; CHECK-LABEL: testShiftAnd_4: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: mov r12, r14 +; CHECK-NEXT: mov #1, r12 +; CHECK-NEXT: cmp r14, r13 +; CHECK-NEXT: jl .LBB8_2 +; CHECK-NEXT: ; %bb.1: ; %entry +; CHECK-NEXT: clr r12 +; CHECK-NEXT: .LBB8_2: ; %entry +; CHECK-NEXT: add r12, r12 +; CHECK-NEXT: add r12, r12 +; CHECK-NEXT: add r12, r12 +; CHECK-NEXT: add r12, r12 +; CHECK-NEXT: add r12, r12 +; CHECK-NEXT: ret +entry: + %cmp = icmp sgt i16 %a, %b + %cond = select i1 %cmp, i16 32, i16 0 + ret i16 %cond +} diff --git a/llvm/test/CodeGen/PowerPC/pr42492.ll b/llvm/test/CodeGen/PowerPC/pr42492.ll index 8dface68b6a65..3aa9ccdbf69a3 100644 --- a/llvm/test/CodeGen/PowerPC/pr42492.ll +++ b/llvm/test/CodeGen/PowerPC/pr42492.ll @@ -4,13 +4,25 @@ define void @f(i8*, i8*, i64*) { ; Check we don't assert and this is not a Hardware Loop ; CHECK-LABEL: f: -; CHECK: .LBB0_2: # -; CHECK-NEXT: cmplwi -; CHECK-NEXT: cmpd -; CHECK-NEXT: sldi -; CHECK-NEXT: cror -; CHECK-NEXT: addi -; CHECK-NEXT: bc +; CHECK: # %bb.0: +; CHECK-NEXT: cmpld 3, 4 +; CHECK-NEXT: beqlr 0 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ld 6, 8(5) +; CHECK-NEXT: not 3, 3 +; CHECK-NEXT: add 3, 3, 4 +; CHECK-NEXT: li 4, 0 +; CHECK-NEXT: .p2align 5 +; CHECK-NEXT: .LBB0_2: # +; CHECK-NEXT: cmplwi 4, 14 +; CHECK-NEXT: cmpd 1, 3, 4 +; CHECK-NEXT: sldi 6, 6, 4 +; CHECK-NEXT: cror 20, 6, 1 +; CHECK-NEXT: addi 4, 4, 1 +; CHECK-NEXT: bc 4, 20, .LBB0_2 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: std 6, 8(5) +; CHECK-NEXT: blr %4 = icmp eq i8* %0, %1 br i1 %4, label %9, label %5 diff --git a/llvm/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll index d7611d1de0a78..134e378aa1ae9 100644 --- a/llvm/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll +++ b/llvm/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll @@ -1841,7 +1841,8 @@ define <2 x double> @test_mm_cvtu64_sd(<2 x double> %__A, i64 %__B) { ; X86-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm1, %xmm1 ; X86-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] ; X86-NEXT: vsubpd {{\.LCPI.*}}, %xmm1, %xmm1 -; X86-NEXT: vhaddpd %xmm1, %xmm1, %xmm1 +; X86-NEXT: vpermilpd {{.*#+}} xmm2 = xmm1[1,0] +; X86-NEXT: vaddsd %xmm1, %xmm2, %xmm1 ; X86-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1] ; X86-NEXT: retl ; diff --git a/llvm/test/CodeGen/X86/bitcast-and-setcc-128.ll b/llvm/test/CodeGen/X86/bitcast-and-setcc-128.ll index 8565d6ead1f69..7d5886d7e594b 100644 --- a/llvm/test/CodeGen/X86/bitcast-and-setcc-128.ll +++ b/llvm/test/CodeGen/X86/bitcast-and-setcc-128.ll @@ -185,8 +185,8 @@ define i2 @v2i8(<2 x i8> %a, <2 x i8> %b, <2 x i8> %c, <2 x i8> %d) { ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] ; SSE2-NEXT: pcmpgtb %xmm3, %xmm2 -; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] -; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] +; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3] ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1] ; SSE2-NEXT: pand %xmm0, %xmm1 ; SSE2-NEXT: movmskpd %xmm1, %eax @@ -196,7 +196,7 @@ define i2 @v2i8(<2 x i8> %a, <2 x i8> %b, <2 x i8> %c, <2 x i8> %d) { ; SSSE3-LABEL: v2i8: ; SSSE3: # %bb.0: ; SSSE3-NEXT: pcmpgtb %xmm1, %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = ; SSSE3-NEXT: pshufb %xmm1, %xmm0 ; SSSE3-NEXT: pcmpgtb %xmm3, %xmm2 ; SSSE3-NEXT: pshufb %xmm1, %xmm2 @@ -450,8 +450,8 @@ define i4 @v4i8(<4 x i8> %a, <4 x i8> %b, <4 x i8> %c, <4 x i8> %d) { ; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] ; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] ; SSE2-SSSE3-NEXT: pcmpgtb %xmm3, %xmm2 -; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] -; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] +; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] +; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3] ; SSE2-SSSE3-NEXT: pand %xmm0, %xmm1 ; SSE2-SSSE3-NEXT: movmskps %xmm1, %eax ; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax diff --git a/llvm/test/CodeGen/X86/bitcast-setcc-128.ll b/llvm/test/CodeGen/X86/bitcast-setcc-128.ll index 0dcdb3ec9eeec..d7871ff21e844 100644 --- a/llvm/test/CodeGen/X86/bitcast-setcc-128.ll +++ b/llvm/test/CodeGen/X86/bitcast-setcc-128.ll @@ -157,7 +157,7 @@ define i2 @v2i8(<2 x i8> %a, <2 x i8> %b) { ; SSSE3-LABEL: v2i8: ; SSSE3: # %bb.0: ; SSSE3-NEXT: pcmpgtb %xmm1, %xmm0 -; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[u,u,0,0,u,u,0,0,u,u,1,1,u,u,1,1] +; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[u,u,u,0,u,u,u,0,u,u,u,1,u,u,u,1] ; SSSE3-NEXT: movmskpd %xmm0, %eax ; SSSE3-NEXT: # kill: def $al killed $al killed $eax ; SSSE3-NEXT: retq diff --git a/llvm/test/CodeGen/X86/haddsub-3.ll b/llvm/test/CodeGen/X86/haddsub-3.ll index 29b9a626dd2c4..f603ace202a1e 100644 --- a/llvm/test/CodeGen/X86/haddsub-3.ll +++ b/llvm/test/CodeGen/X86/haddsub-3.ll @@ -1,8 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2 -; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+ssse3 | FileCheck %s --check-prefixes=SSE,SSSE3 -; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1 -; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2 +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2 +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+ssse3 | FileCheck %s --check-prefixes=SSE,SSSE3,SSSE3-SLOW +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+ssse3,fast-hops | FileCheck %s --check-prefixes=SSE,SSSE3,SSSE3-FAST +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1,AVX1-SLOW +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx,fast-hops | FileCheck %s --check-prefixes=AVX,AVX1,AVX1-FAST +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2 define float @pr26491(<4 x float> %a0) { ; SSE2-LABEL: pr26491: @@ -58,37 +60,68 @@ define <4 x double> @PR41414(i64 %x, <4 x double> %y) { ; SSE2-NEXT: addpd %xmm2, %xmm1 ; SSE2-NEXT: retq ; -; SSSE3-LABEL: PR41414: -; SSSE3: # %bb.0: -; SSSE3-NEXT: movq %rdi, %xmm2 -; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],mem[0],xmm2[1],mem[1] -; SSSE3-NEXT: subpd {{.*}}(%rip), %xmm2 -; SSSE3-NEXT: haddpd %xmm2, %xmm2 -; SSSE3-NEXT: divpd %xmm2, %xmm1 -; SSSE3-NEXT: divpd %xmm2, %xmm0 -; SSSE3-NEXT: xorpd %xmm2, %xmm2 -; SSSE3-NEXT: addpd %xmm2, %xmm0 -; SSSE3-NEXT: addpd %xmm2, %xmm1 -; SSSE3-NEXT: retq +; SSSE3-SLOW-LABEL: PR41414: +; SSSE3-SLOW: # %bb.0: +; SSSE3-SLOW-NEXT: movq %rdi, %xmm2 +; SSSE3-SLOW-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],mem[0],xmm2[1],mem[1] +; SSSE3-SLOW-NEXT: subpd {{.*}}(%rip), %xmm2 +; SSSE3-SLOW-NEXT: movapd %xmm2, %xmm3 +; SSSE3-SLOW-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1],xmm2[1] +; SSSE3-SLOW-NEXT: addpd %xmm2, %xmm3 +; SSSE3-SLOW-NEXT: movddup {{.*#+}} xmm2 = xmm3[0,0] +; SSSE3-SLOW-NEXT: divpd %xmm2, %xmm1 +; SSSE3-SLOW-NEXT: divpd %xmm2, %xmm0 +; SSSE3-SLOW-NEXT: xorpd %xmm2, %xmm2 +; SSSE3-SLOW-NEXT: addpd %xmm2, %xmm0 +; SSSE3-SLOW-NEXT: addpd %xmm2, %xmm1 +; SSSE3-SLOW-NEXT: retq +; +; SSSE3-FAST-LABEL: PR41414: +; SSSE3-FAST: # %bb.0: +; SSSE3-FAST-NEXT: movq %rdi, %xmm2 +; SSSE3-FAST-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],mem[0],xmm2[1],mem[1] +; SSSE3-FAST-NEXT: subpd {{.*}}(%rip), %xmm2 +; SSSE3-FAST-NEXT: haddpd %xmm2, %xmm2 +; SSSE3-FAST-NEXT: divpd %xmm2, %xmm1 +; SSSE3-FAST-NEXT: divpd %xmm2, %xmm0 +; SSSE3-FAST-NEXT: xorpd %xmm2, %xmm2 +; SSSE3-FAST-NEXT: addpd %xmm2, %xmm0 +; SSSE3-FAST-NEXT: addpd %xmm2, %xmm1 +; SSSE3-FAST-NEXT: retq +; +; AVX1-SLOW-LABEL: PR41414: +; AVX1-SLOW: # %bb.0: +; AVX1-SLOW-NEXT: vmovq %rdi, %xmm1 +; AVX1-SLOW-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] +; AVX1-SLOW-NEXT: vsubpd {{.*}}(%rip), %xmm1, %xmm1 +; AVX1-SLOW-NEXT: vpermilpd {{.*#+}} xmm2 = xmm1[1,0] +; AVX1-SLOW-NEXT: vaddpd %xmm1, %xmm2, %xmm1 +; AVX1-SLOW-NEXT: vmovddup {{.*#+}} xmm1 = xmm1[0,0] +; AVX1-SLOW-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1 +; AVX1-SLOW-NEXT: vdivpd %ymm1, %ymm0, %ymm0 +; AVX1-SLOW-NEXT: vxorpd %xmm1, %xmm1, %xmm1 +; AVX1-SLOW-NEXT: vaddpd %ymm1, %ymm0, %ymm0 +; AVX1-SLOW-NEXT: retq ; -; AVX1-LABEL: PR41414: -; AVX1: # %bb.0: -; AVX1-NEXT: vmovq %rdi, %xmm1 -; AVX1-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] -; AVX1-NEXT: vsubpd {{.*}}(%rip), %xmm1, %xmm1 -; AVX1-NEXT: vhaddpd %xmm1, %xmm1, %xmm1 -; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1 -; AVX1-NEXT: vdivpd %ymm1, %ymm0, %ymm0 -; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1 -; AVX1-NEXT: vaddpd %ymm1, %ymm0, %ymm0 -; AVX1-NEXT: retq +; AVX1-FAST-LABEL: PR41414: +; AVX1-FAST: # %bb.0: +; AVX1-FAST-NEXT: vmovq %rdi, %xmm1 +; AVX1-FAST-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] +; AVX1-FAST-NEXT: vsubpd {{.*}}(%rip), %xmm1, %xmm1 +; AVX1-FAST-NEXT: vhaddpd %xmm1, %xmm1, %xmm1 +; AVX1-FAST-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1 +; AVX1-FAST-NEXT: vdivpd %ymm1, %ymm0, %ymm0 +; AVX1-FAST-NEXT: vxorpd %xmm1, %xmm1, %xmm1 +; AVX1-FAST-NEXT: vaddpd %ymm1, %ymm0, %ymm0 +; AVX1-FAST-NEXT: retq ; ; AVX2-LABEL: PR41414: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovq %rdi, %xmm1 ; AVX2-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] ; AVX2-NEXT: vsubpd {{.*}}(%rip), %xmm1, %xmm1 -; AVX2-NEXT: vhaddpd %xmm1, %xmm1, %xmm1 +; AVX2-NEXT: vpermilpd {{.*#+}} xmm2 = xmm1[1,0] +; AVX2-NEXT: vaddsd %xmm1, %xmm2, %xmm1 ; AVX2-NEXT: vbroadcastsd %xmm1, %ymm1 ; AVX2-NEXT: vdivpd %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vxorpd %xmm1, %xmm1, %xmm1 diff --git a/llvm/test/CodeGen/X86/haddsub-broadcast.ll b/llvm/test/CodeGen/X86/haddsub-broadcast.ll index a37cf80ff79e4..ec617bb2b03af 100644 --- a/llvm/test/CodeGen/X86/haddsub-broadcast.ll +++ b/llvm/test/CodeGen/X86/haddsub-broadcast.ll @@ -9,7 +9,8 @@ define <4 x double> @PR43402(i64 %x) { ; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; CHECK-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] ; CHECK-NEXT: vsubpd {{\.LCPI.*}}, %xmm0, %xmm0 -; CHECK-NEXT: vhaddpd %xmm0, %xmm0, %xmm0 +; CHECK-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] +; CHECK-NEXT: vaddsd %xmm0, %xmm1, %xmm0 ; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0 ; CHECK-NEXT: retl %conv = uitofp i64 %x to double diff --git a/llvm/test/CodeGen/X86/scalar-int-to-fp.ll b/llvm/test/CodeGen/X86/scalar-int-to-fp.ll index 580da1535e394..6386ffbcc378b 100644 --- a/llvm/test/CodeGen/X86/scalar-int-to-fp.ll +++ b/llvm/test/CodeGen/X86/scalar-int-to-fp.ll @@ -610,8 +610,9 @@ define double @u64_to_d(i64 %a) nounwind { ; AVX512F_32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; AVX512F_32-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] ; AVX512F_32-NEXT: vsubpd {{\.LCPI.*}}, %xmm0, %xmm0 -; AVX512F_32-NEXT: vhaddpd %xmm0, %xmm0, %xmm0 -; AVX512F_32-NEXT: vmovlpd %xmm0, (%esp) +; AVX512F_32-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] +; AVX512F_32-NEXT: vaddsd %xmm0, %xmm1, %xmm0 +; AVX512F_32-NEXT: vmovsd %xmm0, (%esp) ; AVX512F_32-NEXT: fldl (%esp) ; AVX512F_32-NEXT: movl %ebp, %esp ; AVX512F_32-NEXT: popl %ebp @@ -669,6 +670,110 @@ define double @u64_to_d(i64 %a) nounwind { ret double %r } +define double @u64_to_d_optsize(i64 %a) nounwind optsize { +; AVX512DQVL_32-LABEL: u64_to_d_optsize: +; AVX512DQVL_32: # %bb.0: +; AVX512DQVL_32-NEXT: pushl %ebp +; AVX512DQVL_32-NEXT: movl %esp, %ebp +; AVX512DQVL_32-NEXT: andl $-8, %esp +; AVX512DQVL_32-NEXT: subl $8, %esp +; AVX512DQVL_32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX512DQVL_32-NEXT: vcvtuqq2pd %ymm0, %ymm0 +; AVX512DQVL_32-NEXT: vmovlps %xmm0, (%esp) +; AVX512DQVL_32-NEXT: fldl (%esp) +; AVX512DQVL_32-NEXT: movl %ebp, %esp +; AVX512DQVL_32-NEXT: popl %ebp +; AVX512DQVL_32-NEXT: vzeroupper +; AVX512DQVL_32-NEXT: retl +; +; AVX512_64-LABEL: u64_to_d_optsize: +; AVX512_64: # %bb.0: +; AVX512_64-NEXT: vcvtusi2sd %rdi, %xmm0, %xmm0 +; AVX512_64-NEXT: retq +; +; AVX512DQ_32-LABEL: u64_to_d_optsize: +; AVX512DQ_32: # %bb.0: +; AVX512DQ_32-NEXT: pushl %ebp +; AVX512DQ_32-NEXT: movl %esp, %ebp +; AVX512DQ_32-NEXT: andl $-8, %esp +; AVX512DQ_32-NEXT: subl $8, %esp +; AVX512DQ_32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX512DQ_32-NEXT: vcvtuqq2pd %zmm0, %zmm0 +; AVX512DQ_32-NEXT: vmovlps %xmm0, (%esp) +; AVX512DQ_32-NEXT: fldl (%esp) +; AVX512DQ_32-NEXT: movl %ebp, %esp +; AVX512DQ_32-NEXT: popl %ebp +; AVX512DQ_32-NEXT: vzeroupper +; AVX512DQ_32-NEXT: retl +; +; AVX512F_32-LABEL: u64_to_d_optsize: +; AVX512F_32: # %bb.0: +; AVX512F_32-NEXT: pushl %ebp +; AVX512F_32-NEXT: movl %esp, %ebp +; AVX512F_32-NEXT: andl $-8, %esp +; AVX512F_32-NEXT: subl $8, %esp +; AVX512F_32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX512F_32-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] +; AVX512F_32-NEXT: vsubpd {{\.LCPI.*}}, %xmm0, %xmm0 +; AVX512F_32-NEXT: vhaddpd %xmm0, %xmm0, %xmm0 +; AVX512F_32-NEXT: vmovlpd %xmm0, (%esp) +; AVX512F_32-NEXT: fldl (%esp) +; AVX512F_32-NEXT: movl %ebp, %esp +; AVX512F_32-NEXT: popl %ebp +; AVX512F_32-NEXT: retl +; +; SSE2_32-LABEL: u64_to_d_optsize: +; SSE2_32: # %bb.0: +; SSE2_32-NEXT: pushl %ebp +; SSE2_32-NEXT: movl %esp, %ebp +; SSE2_32-NEXT: andl $-8, %esp +; SSE2_32-NEXT: subl $8, %esp +; SSE2_32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; SSE2_32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] +; SSE2_32-NEXT: subpd {{\.LCPI.*}}, %xmm0 +; SSE2_32-NEXT: movapd %xmm0, %xmm1 +; SSE2_32-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] +; SSE2_32-NEXT: addsd %xmm0, %xmm1 +; SSE2_32-NEXT: movsd %xmm1, (%esp) +; SSE2_32-NEXT: fldl (%esp) +; SSE2_32-NEXT: movl %ebp, %esp +; SSE2_32-NEXT: popl %ebp +; SSE2_32-NEXT: retl +; +; SSE2_64-LABEL: u64_to_d_optsize: +; SSE2_64: # %bb.0: +; SSE2_64-NEXT: movq %rdi, %xmm1 +; SSE2_64-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] +; SSE2_64-NEXT: subpd {{.*}}(%rip), %xmm1 +; SSE2_64-NEXT: movapd %xmm1, %xmm0 +; SSE2_64-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] +; SSE2_64-NEXT: addsd %xmm1, %xmm0 +; SSE2_64-NEXT: retq +; +; X87-LABEL: u64_to_d_optsize: +; X87: # %bb.0: +; X87-NEXT: pushl %ebp +; X87-NEXT: movl %esp, %ebp +; X87-NEXT: andl $-8, %esp +; X87-NEXT: subl $16, %esp +; X87-NEXT: movl 8(%ebp), %eax +; X87-NEXT: movl 12(%ebp), %ecx +; X87-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X87-NEXT: movl %eax, (%esp) +; X87-NEXT: xorl %eax, %eax +; X87-NEXT: testl %ecx, %ecx +; X87-NEXT: setns %al +; X87-NEXT: fildll (%esp) +; X87-NEXT: fadds {{\.LCPI.*}}(,%eax,4) +; X87-NEXT: fstpl {{[0-9]+}}(%esp) +; X87-NEXT: fldl {{[0-9]+}}(%esp) +; X87-NEXT: movl %ebp, %esp +; X87-NEXT: popl %ebp +; X87-NEXT: retl + %r = uitofp i64 %a to double + ret double %r +} + define double @s64_to_d(i64 %a) nounwind { ; AVX512DQVL_32-LABEL: s64_to_d: ; AVX512DQVL_32: # %bb.0: diff --git a/llvm/test/CodeGen/X86/vec_int_to_fp.ll b/llvm/test/CodeGen/X86/vec_int_to_fp.ll index 6b863456dfa5f..269879e7f1a31 100644 --- a/llvm/test/CodeGen/X86/vec_int_to_fp.ll +++ b/llvm/test/CodeGen/X86/vec_int_to_fp.ll @@ -1708,14 +1708,13 @@ define <8 x float> @sitofp_8i16_to_8f32(<8 x i16> %a) { define <8 x float> @sitofp_8i8_to_8f32(<16 x i8> %a) { ; SSE2-LABEL: sitofp_8i8_to_8f32: ; SSE2: # %bb.0: -; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] -; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] -; SSE2-NEXT: psrad $24, %xmm1 -; SSE2-NEXT: cvtdq2ps %xmm1, %xmm2 -; SSE2-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7] +; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] ; SSE2-NEXT: psrad $24, %xmm0 -; SSE2-NEXT: cvtdq2ps %xmm0, %xmm1 -; SSE2-NEXT: movaps %xmm2, %xmm0 +; SSE2-NEXT: cvtdq2ps %xmm0, %xmm0 +; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7] +; SSE2-NEXT: psrad $24, %xmm1 +; SSE2-NEXT: cvtdq2ps %xmm1, %xmm1 ; SSE2-NEXT: retq ; ; SSE41-LABEL: sitofp_8i8_to_8f32: @@ -1756,14 +1755,13 @@ define <8 x float> @sitofp_8i8_to_8f32(<16 x i8> %a) { define <8 x float> @sitofp_16i8_to_8f32(<16 x i8> %a) { ; SSE2-LABEL: sitofp_16i8_to_8f32: ; SSE2: # %bb.0: -; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] -; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] -; SSE2-NEXT: psrad $24, %xmm1 -; SSE2-NEXT: cvtdq2ps %xmm1, %xmm2 -; SSE2-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7] +; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] ; SSE2-NEXT: psrad $24, %xmm0 -; SSE2-NEXT: cvtdq2ps %xmm0, %xmm1 -; SSE2-NEXT: movaps %xmm2, %xmm0 +; SSE2-NEXT: cvtdq2ps %xmm0, %xmm0 +; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7] +; SSE2-NEXT: psrad $24, %xmm1 +; SSE2-NEXT: cvtdq2ps %xmm1, %xmm1 ; SSE2-NEXT: retq ; ; SSE41-LABEL: sitofp_16i8_to_8f32: @@ -4458,8 +4456,8 @@ define <8 x float> @sitofp_load_8i16_to_8f32(<8 x i16> *%a) { define <8 x float> @sitofp_load_8i8_to_8f32(<8 x i8> *%a) { ; SSE2-LABEL: sitofp_load_8i8_to_8f32: ; SSE2: # %bb.0: -; SSE2-NEXT: movq {{.*#+}} xmm1 = mem[0],zero -; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] +; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero +; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] ; SSE2-NEXT: psrad $24, %xmm0 ; SSE2-NEXT: cvtdq2ps %xmm0, %xmm0 diff --git a/llvm/test/CodeGen/X86/vector-reduce-mul.ll b/llvm/test/CodeGen/X86/vector-reduce-mul.ll index 88b0f0b467948..927f51e9b1b1e 100644 --- a/llvm/test/CodeGen/X86/vector-reduce-mul.ll +++ b/llvm/test/CodeGen/X86/vector-reduce-mul.ll @@ -1587,40 +1587,60 @@ define i8 @test_v4i8(<4 x i8> %a0) { ; ; SSE41-LABEL: test_v4i8: ; SSE41: # %bb.0: -; SSE41-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero -; SSE41-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] -; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] -; SSE41-NEXT: pmullw %xmm1, %xmm0 -; SSE41-NEXT: movdqa %xmm0, %xmm1 -; SSE41-NEXT: pshufb {{.*#+}} xmm1 = xmm1[2],zero,xmm1[6],zero,xmm1[u],zero,xmm1[u],zero,xmm1[u],zero,xmm1[u],zero,xmm1[u],zero,xmm1[u],zero +; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] ; SSE41-NEXT: pmullw %xmm0, %xmm1 -; SSE41-NEXT: pextrb $0, %xmm1, %eax +; SSE41-NEXT: movdqa %xmm1, %xmm0 +; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2],zero,xmm0[6],zero,xmm0[u],zero,xmm0[u],zero,xmm0[u],zero,xmm0[u],zero,xmm0[u],zero,xmm0[u],zero +; SSE41-NEXT: pmullw %xmm1, %xmm0 +; SSE41-NEXT: pextrb $0, %xmm0, %eax ; SSE41-NEXT: # kill: def $al killed $al killed $eax ; SSE41-NEXT: retq ; ; AVX-LABEL: test_v4i8: ; AVX: # %bb.0: -; AVX-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero -; AVX-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] -; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] -; AVX-NEXT: vpmullw %xmm0, %xmm1, %xmm0 +; AVX-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] +; AVX-NEXT: vpmullw %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[2],zero,xmm0[6],zero,xmm0[u],zero,xmm0[u],zero,xmm0[u],zero,xmm0[u],zero,xmm0[u],zero,xmm0[u],zero ; AVX-NEXT: vpmullw %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vpextrb $0, %xmm0, %eax ; AVX-NEXT: # kill: def $al killed $al killed $eax ; AVX-NEXT: retq ; -; AVX512-LABEL: test_v4i8: -; AVX512: # %bb.0: -; AVX512-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero -; AVX512-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] -; AVX512-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] -; AVX512-NEXT: vpmullw %xmm0, %xmm1, %xmm0 -; AVX512-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[2],zero,xmm0[6],zero,xmm0[u],zero,xmm0[u],zero,xmm0[u],zero,xmm0[u],zero,xmm0[u],zero,xmm0[u],zero -; AVX512-NEXT: vpmullw %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpextrb $0, %xmm0, %eax -; AVX512-NEXT: # kill: def $al killed $al killed $eax -; AVX512-NEXT: retq +; AVX512BW-LABEL: test_v4i8: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; AVX512BW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] +; AVX512BW-NEXT: vpmullw %xmm1, %xmm0, %xmm0 +; AVX512BW-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[2],zero,xmm0[6],zero,xmm0[u],zero,xmm0[u],zero,xmm0[u],zero,xmm0[u],zero,xmm0[u],zero,xmm0[u],zero +; AVX512BW-NEXT: vpmullw %xmm1, %xmm0, %xmm0 +; AVX512BW-NEXT: vpextrb $0, %xmm0, %eax +; AVX512BW-NEXT: # kill: def $al killed $al killed $eax +; AVX512BW-NEXT: retq +; +; AVX512VL-LABEL: test_v4i8: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] +; AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,2,3] +; AVX512VL-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; AVX512VL-NEXT: vpmullw %xmm1, %xmm0, %xmm0 +; AVX512VL-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[2],zero,xmm0[6],zero,xmm0[u],zero,xmm0[u],zero,xmm0[u],zero,xmm0[u],zero,xmm0[u],zero,xmm0[u],zero +; AVX512VL-NEXT: vpmullw %xmm1, %xmm0, %xmm0 +; AVX512VL-NEXT: vpextrb $0, %xmm0, %eax +; AVX512VL-NEXT: # kill: def $al killed $al killed $eax +; AVX512VL-NEXT: retq +; +; AVX512DQ-LABEL: test_v4i8: +; AVX512DQ: # %bb.0: +; AVX512DQ-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; AVX512DQ-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] +; AVX512DQ-NEXT: vpmullw %xmm1, %xmm0, %xmm0 +; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[2],zero,xmm0[6],zero,xmm0[u],zero,xmm0[u],zero,xmm0[u],zero,xmm0[u],zero,xmm0[u],zero,xmm0[u],zero +; AVX512DQ-NEXT: vpmullw %xmm1, %xmm0, %xmm0 +; AVX512DQ-NEXT: vpextrb $0, %xmm0, %eax +; AVX512DQ-NEXT: # kill: def $al killed $al killed $eax +; AVX512DQ-NEXT: retq %1 = call i8 @llvm.experimental.vector.reduce.mul.v4i8(<4 x i8> %a0) ret i8 %1 } @@ -1645,28 +1665,25 @@ define i8 @test_v8i8(<8 x i8> %a0) { ; ; SSE41-LABEL: test_v8i8: ; SSE41: # %bb.0: -; SSE41-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero -; SSE41-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] -; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] -; SSE41-NEXT: pmullw %xmm1, %xmm0 -; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u] -; SSE41-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero -; SSE41-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] -; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] -; SSE41-NEXT: pmullw %xmm1, %xmm0 -; SSE41-NEXT: movdqa %xmm0, %xmm1 -; SSE41-NEXT: pshufb {{.*#+}} xmm1 = xmm1[2],zero,xmm1[6],zero,xmm1[10],zero,xmm1[14],zero,xmm1[u],zero,xmm1[u],zero,xmm1[u],zero,xmm1[u],zero +; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] ; SSE41-NEXT: pmullw %xmm0, %xmm1 -; SSE41-NEXT: pextrb $0, %xmm1, %eax +; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] +; SSE41-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u] +; SSE41-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero +; SSE41-NEXT: pmullw %xmm0, %xmm1 +; SSE41-NEXT: movdqa %xmm1, %xmm0 +; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2],zero,xmm0[6],zero,xmm0[10],zero,xmm0[14],zero,xmm0[u],zero,xmm0[u],zero,xmm0[u],zero,xmm0[u],zero +; SSE41-NEXT: pmullw %xmm1, %xmm0 +; SSE41-NEXT: pextrb $0, %xmm0, %eax ; SSE41-NEXT: # kill: def $al killed $al killed $eax ; SSE41-NEXT: retq ; ; AVX-LABEL: test_v8i8: ; AVX: # %bb.0: -; AVX-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero -; AVX-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] -; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] -; AVX-NEXT: vpmullw %xmm0, %xmm1, %xmm0 +; AVX-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; AVX-NEXT: vpmullw %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] ; AVX-NEXT: vpmullw %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[2],zero,xmm0[6],zero,xmm0[10],zero,xmm0[14],zero,xmm0[u],zero,xmm0[u],zero,xmm0[u],zero,xmm0[u],zero @@ -1677,10 +1694,9 @@ define i8 @test_v8i8(<8 x i8> %a0) { ; ; AVX512BW-LABEL: test_v8i8: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero -; AVX512BW-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] -; AVX512BW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] -; AVX512BW-NEXT: vpmullw %xmm0, %xmm1, %xmm0 +; AVX512BW-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; AVX512BW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; AVX512BW-NEXT: vpmullw %xmm1, %xmm0, %xmm0 ; AVX512BW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] ; AVX512BW-NEXT: vpmullw %xmm1, %xmm0, %xmm0 ; AVX512BW-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[2],zero,xmm0[6],zero,xmm0[10],zero,xmm0[14],zero,xmm0[u],zero,xmm0[u],zero,xmm0[u],zero,xmm0[u],zero @@ -1691,10 +1707,10 @@ define i8 @test_v8i8(<8 x i8> %a0) { ; ; AVX512VL-LABEL: test_v8i8: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero -; AVX512VL-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] -; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] -; AVX512VL-NEXT: vpmullw %xmm0, %xmm1, %xmm0 +; AVX512VL-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] +; AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] +; AVX512VL-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; AVX512VL-NEXT: vpmullw %xmm1, %xmm0, %xmm0 ; AVX512VL-NEXT: vpalignr {{.*#+}} xmm1 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15,0,1,2,3] ; AVX512VL-NEXT: vpmullw %xmm1, %xmm0, %xmm0 ; AVX512VL-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[2],zero,xmm0[6],zero,xmm0[10],zero,xmm0[14],zero,xmm0[u],zero,xmm0[u],zero,xmm0[u],zero,xmm0[u],zero @@ -1705,10 +1721,9 @@ define i8 @test_v8i8(<8 x i8> %a0) { ; ; AVX512DQ-LABEL: test_v8i8: ; AVX512DQ: # %bb.0: -; AVX512DQ-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero -; AVX512DQ-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] -; AVX512DQ-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] -; AVX512DQ-NEXT: vpmullw %xmm0, %xmm1, %xmm0 +; AVX512DQ-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; AVX512DQ-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; AVX512DQ-NEXT: vpmullw %xmm1, %xmm0, %xmm0 ; AVX512DQ-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] ; AVX512DQ-NEXT: vpmullw %xmm1, %xmm0, %xmm0 ; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[2],zero,xmm0[6],zero,xmm0[10],zero,xmm0[14],zero,xmm0[u],zero,xmm0[u],zero,xmm0[u],zero,xmm0[u],zero @@ -1755,21 +1770,19 @@ define i8 @test_v16i8(<16 x i8> %a0) { ; SSE41-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] ; SSE41-NEXT: pmullw %xmm1, %xmm0 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [255,255,255,255,255,255,255,255] +; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1] ; SSE41-NEXT: pand %xmm1, %xmm0 -; SSE41-NEXT: pxor %xmm2, %xmm2 -; SSE41-NEXT: packuswb %xmm2, %xmm0 -; SSE41-NEXT: pmovzxbw {{.*#+}} xmm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero -; SSE41-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] -; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] -; SSE41-NEXT: pmullw %xmm3, %xmm0 +; SSE41-NEXT: pxor %xmm3, %xmm3 +; SSE41-NEXT: packuswb %xmm3, %xmm0 +; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; SSE41-NEXT: pmullw %xmm2, %xmm0 +; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,2,3] ; SSE41-NEXT: pand %xmm1, %xmm0 -; SSE41-NEXT: packuswb %xmm2, %xmm0 -; SSE41-NEXT: pmovzxbw {{.*#+}} xmm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero -; SSE41-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] -; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] -; SSE41-NEXT: pmullw %xmm3, %xmm0 +; SSE41-NEXT: packuswb %xmm3, %xmm0 +; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; SSE41-NEXT: pmullw %xmm2, %xmm0 ; SSE41-NEXT: pand %xmm1, %xmm0 -; SSE41-NEXT: packuswb %xmm2, %xmm0 +; SSE41-NEXT: packuswb %xmm3, %xmm0 ; SSE41-NEXT: movdqa %xmm0, %xmm1 ; SSE41-NEXT: psrlw $8, %xmm1 ; SSE41-NEXT: pmullw %xmm0, %xmm1 @@ -1782,9 +1795,9 @@ define i8 @test_v16i8(<16 x i8> %a0) { ; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; AVX1-NEXT: vpmullw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[8,8,10,10,12,12,14,14,0,0,2,2,4,4,6,6] +; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] ; AVX1-NEXT: vpmullw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[4,4,6,6,4,4,6,6,8,8,10,10,12,12,14,14] +; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] ; AVX1-NEXT: vpmullw %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm1 ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 @@ -1948,30 +1961,31 @@ define i8 @test_v32i8(<32 x i8> %a0) { ; SSE41-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15] ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; SSE41-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] +; SSE41-NEXT: pmullw %xmm1, %xmm0 +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [255,255,255,255,255,255,255,255] +; SSE41-NEXT: movdqa %xmm0, %xmm4 +; SSE41-NEXT: pand %xmm1, %xmm4 ; SSE41-NEXT: pmullw %xmm2, %xmm3 -; SSE41-NEXT: pshufb {{.*#+}} xmm3 = xmm3[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u] -; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255] -; SSE41-NEXT: pmovzxbw {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero -; SSE41-NEXT: pmullw %xmm1, %xmm3 -; SSE41-NEXT: pmullw %xmm0, %xmm3 -; SSE41-NEXT: pand %xmm2, %xmm3 -; SSE41-NEXT: pxor %xmm0, %xmm0 -; SSE41-NEXT: packuswb %xmm0, %xmm3 -; SSE41-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero -; SSE41-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] -; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm3[2,3,0,1] -; SSE41-NEXT: pmullw %xmm1, %xmm3 -; SSE41-NEXT: pand %xmm2, %xmm3 -; SSE41-NEXT: packuswb %xmm0, %xmm3 -; SSE41-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero -; SSE41-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] -; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,2,3] -; SSE41-NEXT: pmullw %xmm1, %xmm3 -; SSE41-NEXT: pand %xmm2, %xmm3 -; SSE41-NEXT: packuswb %xmm0, %xmm3 -; SSE41-NEXT: movdqa %xmm3, %xmm0 +; SSE41-NEXT: pand %xmm1, %xmm3 +; SSE41-NEXT: packuswb %xmm4, %xmm3 +; SSE41-NEXT: pmovzxbw {{.*#+}} xmm2 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero +; SSE41-NEXT: pmullw %xmm0, %xmm2 +; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1] +; SSE41-NEXT: pand %xmm1, %xmm2 +; SSE41-NEXT: pxor %xmm3, %xmm3 +; SSE41-NEXT: packuswb %xmm3, %xmm2 +; SSE41-NEXT: pmovzxbw {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero +; SSE41-NEXT: pmullw %xmm0, %xmm2 +; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,2,3] +; SSE41-NEXT: pand %xmm1, %xmm2 +; SSE41-NEXT: packuswb %xmm3, %xmm2 +; SSE41-NEXT: pmovzxbw {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero +; SSE41-NEXT: pmullw %xmm0, %xmm2 +; SSE41-NEXT: pand %xmm1, %xmm2 +; SSE41-NEXT: packuswb %xmm3, %xmm2 +; SSE41-NEXT: movdqa %xmm2, %xmm0 ; SSE41-NEXT: psrlw $8, %xmm0 -; SSE41-NEXT: pmullw %xmm3, %xmm0 +; SSE41-NEXT: pmullw %xmm2, %xmm0 ; SSE41-NEXT: pextrb $0, %xmm0, %eax ; SSE41-NEXT: # kill: def $al killed $al killed $eax ; SSE41-NEXT: retq @@ -1986,9 +2000,9 @@ define i8 @test_v32i8(<32 x i8> %a0) { ; AVX1-NEXT: vpmullw %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; AVX1-NEXT: vpmullw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[8,8,10,10,12,12,14,14,0,0,2,2,4,4,6,6] +; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] ; AVX1-NEXT: vpmullw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[4,4,6,6,4,4,6,6,8,8,10,10,12,12,14,14] +; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] ; AVX1-NEXT: vpmullw %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -2204,52 +2218,48 @@ define i8 @test_v64i8(<64 x i8> %a0) { ; ; SSE41-LABEL: test_v64i8: ; SSE41: # %bb.0: -; SSE41-NEXT: pmovzxbw {{.*#+}} xmm4 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero -; SSE41-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm0[8],xmm2[9],xmm0[9],xmm2[10],xmm0[10],xmm2[11],xmm0[11],xmm2[12],xmm0[12],xmm2[13],xmm0[13],xmm2[14],xmm0[14],xmm2[15],xmm0[15] -; SSE41-NEXT: pmovzxbw {{.*#+}} xmm5 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero -; SSE41-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] -; SSE41-NEXT: pmullw %xmm2, %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255] -; SSE41-NEXT: movdqa %xmm0, %xmm6 -; SSE41-NEXT: pand %xmm2, %xmm6 -; SSE41-NEXT: pmullw %xmm4, %xmm5 -; SSE41-NEXT: pand %xmm2, %xmm5 -; SSE41-NEXT: packuswb %xmm6, %xmm5 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm4 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero ; SSE41-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm0[8],xmm3[9],xmm0[9],xmm3[10],xmm0[10],xmm3[11],xmm0[11],xmm3[12],xmm0[12],xmm3[13],xmm0[13],xmm3[14],xmm0[14],xmm3[15],xmm0[15] -; SSE41-NEXT: pmovzxbw {{.*#+}} xmm6 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero +; SSE41-NEXT: pmovzxbw {{.*#+}} xmm5 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero ; SSE41-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15] ; SSE41-NEXT: pmullw %xmm3, %xmm1 -; SSE41-NEXT: movdqa %xmm1, %xmm3 +; SSE41-NEXT: pmovzxbw {{.*#+}} xmm3 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero +; SSE41-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm0[8],xmm2[9],xmm0[9],xmm2[10],xmm0[10],xmm2[11],xmm0[11],xmm2[12],xmm0[12],xmm2[13],xmm0[13],xmm2[14],xmm0[14],xmm2[15],xmm0[15] +; SSE41-NEXT: pmullw %xmm1, %xmm2 +; SSE41-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; SSE41-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] +; SSE41-NEXT: pmullw %xmm2, %xmm0 +; SSE41-NEXT: pmullw %xmm3, %xmm1 +; SSE41-NEXT: movdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> +; SSE41-NEXT: pshufb %xmm2, %xmm1 +; SSE41-NEXT: pmullw %xmm4, %xmm5 +; SSE41-NEXT: pshufb %xmm2, %xmm5 +; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255] +; SSE41-NEXT: movdqa %xmm0, %xmm3 ; SSE41-NEXT: pand %xmm2, %xmm3 -; SSE41-NEXT: pmullw %xmm4, %xmm6 -; SSE41-NEXT: pand %xmm2, %xmm6 -; SSE41-NEXT: packuswb %xmm3, %xmm6 -; SSE41-NEXT: pmovzxbw {{.*#+}} xmm3 = xmm6[0],zero,xmm6[1],zero,xmm6[2],zero,xmm6[3],zero,xmm6[4],zero,xmm6[5],zero,xmm6[6],zero,xmm6[7],zero ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm4 = xmm5[0],zero,xmm5[1],zero,xmm5[2],zero,xmm5[3],zero,xmm5[4],zero,xmm5[5],zero,xmm5[6],zero,xmm5[7],zero -; SSE41-NEXT: pmullw %xmm3, %xmm4 -; SSE41-NEXT: pshufb {{.*#+}} xmm4 = xmm4[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u] -; SSE41-NEXT: pmovzxbw {{.*#+}} xmm3 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero,xmm4[4],zero,xmm4[5],zero,xmm4[6],zero,xmm4[7],zero -; SSE41-NEXT: pmullw %xmm1, %xmm3 -; SSE41-NEXT: pmullw %xmm0, %xmm3 -; SSE41-NEXT: pand %xmm2, %xmm3 -; SSE41-NEXT: pxor %xmm0, %xmm0 -; SSE41-NEXT: packuswb %xmm0, %xmm3 -; SSE41-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero -; SSE41-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] -; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm3[2,3,0,1] -; SSE41-NEXT: pmullw %xmm1, %xmm3 -; SSE41-NEXT: pand %xmm2, %xmm3 -; SSE41-NEXT: packuswb %xmm0, %xmm3 -; SSE41-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero -; SSE41-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] -; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,2,3] -; SSE41-NEXT: pmullw %xmm1, %xmm3 -; SSE41-NEXT: pand %xmm2, %xmm3 -; SSE41-NEXT: packuswb %xmm0, %xmm3 -; SSE41-NEXT: movdqa %xmm3, %xmm0 +; SSE41-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero +; SSE41-NEXT: pmullw %xmm4, %xmm1 +; SSE41-NEXT: pand %xmm2, %xmm1 +; SSE41-NEXT: packuswb %xmm3, %xmm1 +; SSE41-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero +; SSE41-NEXT: pmullw %xmm0, %xmm1 +; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1] +; SSE41-NEXT: pand %xmm2, %xmm1 +; SSE41-NEXT: pxor %xmm3, %xmm3 +; SSE41-NEXT: packuswb %xmm3, %xmm1 +; SSE41-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero +; SSE41-NEXT: pmullw %xmm0, %xmm1 +; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] +; SSE41-NEXT: pand %xmm2, %xmm1 +; SSE41-NEXT: packuswb %xmm3, %xmm1 +; SSE41-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero +; SSE41-NEXT: pmullw %xmm0, %xmm1 +; SSE41-NEXT: pand %xmm2, %xmm1 +; SSE41-NEXT: packuswb %xmm3, %xmm1 +; SSE41-NEXT: movdqa %xmm1, %xmm0 ; SSE41-NEXT: psrlw $8, %xmm0 -; SSE41-NEXT: pmullw %xmm3, %xmm0 +; SSE41-NEXT: pmullw %xmm1, %xmm0 ; SSE41-NEXT: pextrb $0, %xmm0, %eax ; SSE41-NEXT: # kill: def $al killed $al killed $eax ; SSE41-NEXT: retq @@ -2273,9 +2283,9 @@ define i8 @test_v64i8(<64 x i8> %a0) { ; AVX1-NEXT: vpmullw %xmm3, %xmm1, %xmm1 ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; AVX1-NEXT: vpmullw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[8,8,10,10,12,12,14,14,0,0,2,2,4,4,6,6] +; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] ; AVX1-NEXT: vpmullw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[4,4,6,6,4,4,6,6,8,8,10,10,12,12,14,14] +; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] ; AVX1-NEXT: vpmullw %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -2555,78 +2565,82 @@ define i8 @test_v128i8(<128 x i8> %a0) { ; ; SSE41-LABEL: test_v128i8: ; SSE41: # %bb.0: -; SSE41-NEXT: pmovzxbw {{.*#+}} xmm8 = xmm7[0],zero,xmm7[1],zero,xmm7[2],zero,xmm7[3],zero,xmm7[4],zero,xmm7[5],zero,xmm7[6],zero,xmm7[7],zero -; SSE41-NEXT: punpckhbw {{.*#+}} xmm7 = xmm7[8],xmm0[8],xmm7[9],xmm0[9],xmm7[10],xmm0[10],xmm7[11],xmm0[11],xmm7[12],xmm0[12],xmm7[13],xmm0[13],xmm7[14],xmm0[14],xmm7[15],xmm0[15] -; SSE41-NEXT: pmovzxbw {{.*#+}} xmm9 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero -; SSE41-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm0[8],xmm3[9],xmm0[9],xmm3[10],xmm0[10],xmm3[11],xmm0[11],xmm3[12],xmm0[12],xmm3[13],xmm0[13],xmm3[14],xmm0[14],xmm3[15],xmm0[15] -; SSE41-NEXT: pmullw %xmm7, %xmm3 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm10 = xmm5[0],zero,xmm5[1],zero,xmm5[2],zero,xmm5[3],zero,xmm5[4],zero,xmm5[5],zero,xmm5[6],zero,xmm5[7],zero ; SSE41-NEXT: punpckhbw {{.*#+}} xmm5 = xmm5[8],xmm0[8],xmm5[9],xmm0[9],xmm5[10],xmm0[10],xmm5[11],xmm0[11],xmm5[12],xmm0[12],xmm5[13],xmm0[13],xmm5[14],xmm0[14],xmm5[15],xmm0[15] -; SSE41-NEXT: pmullw %xmm3, %xmm5 -; SSE41-NEXT: pmovzxbw {{.*#+}} xmm3 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero +; SSE41-NEXT: pmovzxbw {{.*#+}} xmm8 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero ; SSE41-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15] ; SSE41-NEXT: pmullw %xmm5, %xmm1 -; SSE41-NEXT: pmovzxbw {{.*#+}} xmm5 = xmm6[0],zero,xmm6[1],zero,xmm6[2],zero,xmm6[3],zero,xmm6[4],zero,xmm6[5],zero,xmm6[6],zero,xmm6[7],zero -; SSE41-NEXT: punpckhbw {{.*#+}} xmm6 = xmm6[8],xmm0[8],xmm6[9],xmm0[9],xmm6[10],xmm0[10],xmm6[11],xmm0[11],xmm6[12],xmm0[12],xmm6[13],xmm0[13],xmm6[14],xmm0[14],xmm6[15],xmm0[15] -; SSE41-NEXT: pmovzxbw {{.*#+}} xmm7 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero -; SSE41-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm0[8],xmm2[9],xmm0[9],xmm2[10],xmm0[10],xmm2[11],xmm0[11],xmm2[12],xmm0[12],xmm2[13],xmm0[13],xmm2[14],xmm0[14],xmm2[15],xmm0[15] -; SSE41-NEXT: pmullw %xmm6, %xmm2 -; SSE41-NEXT: pmovzxbw {{.*#+}} xmm6 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero,xmm4[4],zero,xmm4[5],zero,xmm4[6],zero,xmm4[7],zero +; SSE41-NEXT: movdqa {{.*#+}} xmm9 = [255,255,255,255,255,255,255,255] +; SSE41-NEXT: pmullw %xmm10, %xmm8 +; SSE41-NEXT: pmovzxbw {{.*#+}} xmm10 = xmm7[0],zero,xmm7[1],zero,xmm7[2],zero,xmm7[3],zero,xmm7[4],zero,xmm7[5],zero,xmm7[6],zero,xmm7[7],zero +; SSE41-NEXT: punpckhbw {{.*#+}} xmm7 = xmm7[8],xmm0[8],xmm7[9],xmm0[9],xmm7[10],xmm0[10],xmm7[11],xmm0[11],xmm7[12],xmm0[12],xmm7[13],xmm0[13],xmm7[14],xmm0[14],xmm7[15],xmm0[15] +; SSE41-NEXT: pmovzxbw {{.*#+}} xmm5 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero +; SSE41-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm0[8],xmm3[9],xmm0[9],xmm3[10],xmm0[10],xmm3[11],xmm0[11],xmm3[12],xmm0[12],xmm3[13],xmm0[13],xmm3[14],xmm0[14],xmm3[15],xmm0[15] +; SSE41-NEXT: pmullw %xmm7, %xmm3 +; SSE41-NEXT: movdqa %xmm3, %xmm7 +; SSE41-NEXT: pmullw %xmm1, %xmm3 +; SSE41-NEXT: pand %xmm9, %xmm1 +; SSE41-NEXT: pand %xmm9, %xmm8 +; SSE41-NEXT: packuswb %xmm1, %xmm8 +; SSE41-NEXT: pand %xmm9, %xmm7 +; SSE41-NEXT: pmullw %xmm10, %xmm5 +; SSE41-NEXT: pand %xmm9, %xmm5 +; SSE41-NEXT: packuswb %xmm7, %xmm5 +; SSE41-NEXT: pmovzxbw {{.*#+}} xmm10 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero,xmm4[4],zero,xmm4[5],zero,xmm4[6],zero,xmm4[7],zero ; SSE41-NEXT: punpckhbw {{.*#+}} xmm4 = xmm4[8],xmm0[8],xmm4[9],xmm0[9],xmm4[10],xmm0[10],xmm4[11],xmm0[11],xmm4[12],xmm0[12],xmm4[13],xmm0[13],xmm4[14],xmm0[14],xmm4[15],xmm0[15] -; SSE41-NEXT: pmullw %xmm2, %xmm4 -; SSE41-NEXT: pmovzxbw {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; SSE41-NEXT: pmovzxbw {{.*#+}} xmm7 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; SSE41-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] ; SSE41-NEXT: pmullw %xmm4, %xmm0 -; SSE41-NEXT: pmullw %xmm10, %xmm3 -; SSE41-NEXT: movdqa {{.*#+}} xmm4 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> -; SSE41-NEXT: pshufb %xmm4, %xmm3 -; SSE41-NEXT: pmullw %xmm8, %xmm9 -; SSE41-NEXT: pshufb %xmm4, %xmm9 -; SSE41-NEXT: pmullw %xmm6, %xmm2 -; SSE41-NEXT: pshufb %xmm4, %xmm2 -; SSE41-NEXT: pmullw %xmm5, %xmm7 -; SSE41-NEXT: pshufb %xmm4, %xmm7 -; SSE41-NEXT: movdqa {{.*#+}} xmm5 = [255,255,255,255,255,255,255,255] -; SSE41-NEXT: movdqa %xmm0, %xmm6 -; SSE41-NEXT: pand %xmm5, %xmm6 -; SSE41-NEXT: pmovzxbw {{.*#+}} xmm7 = xmm7[0],zero,xmm7[1],zero,xmm7[2],zero,xmm7[3],zero,xmm7[4],zero,xmm7[5],zero,xmm7[6],zero,xmm7[7],zero -; SSE41-NEXT: pmovzxbw {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero -; SSE41-NEXT: pmullw %xmm7, %xmm2 -; SSE41-NEXT: pand %xmm5, %xmm2 -; SSE41-NEXT: packuswb %xmm6, %xmm2 -; SSE41-NEXT: movdqa %xmm1, %xmm6 -; SSE41-NEXT: pand %xmm5, %xmm6 -; SSE41-NEXT: pmovzxbw {{.*#+}} xmm7 = xmm9[0],zero,xmm9[1],zero,xmm9[2],zero,xmm9[3],zero,xmm9[4],zero,xmm9[5],zero,xmm9[6],zero,xmm9[7],zero -; SSE41-NEXT: pmovzxbw {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero -; SSE41-NEXT: pmullw %xmm7, %xmm3 -; SSE41-NEXT: pand %xmm5, %xmm3 -; SSE41-NEXT: packuswb %xmm6, %xmm3 -; SSE41-NEXT: pmovzxbw {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero -; SSE41-NEXT: pmovzxbw {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero -; SSE41-NEXT: pmullw %xmm3, %xmm2 -; SSE41-NEXT: pshufb %xmm4, %xmm2 -; SSE41-NEXT: pmovzxbw {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero -; SSE41-NEXT: pmullw %xmm1, %xmm2 -; SSE41-NEXT: pmullw %xmm0, %xmm2 -; SSE41-NEXT: pand %xmm5, %xmm2 -; SSE41-NEXT: pxor %xmm0, %xmm0 -; SSE41-NEXT: packuswb %xmm0, %xmm2 -; SSE41-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero -; SSE41-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] -; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,0,1] -; SSE41-NEXT: pmullw %xmm1, %xmm2 -; SSE41-NEXT: pand %xmm5, %xmm2 -; SSE41-NEXT: packuswb %xmm0, %xmm2 +; SSE41-NEXT: pmovzxbw {{.*#+}} xmm4 = xmm6[0],zero,xmm6[1],zero,xmm6[2],zero,xmm6[3],zero,xmm6[4],zero,xmm6[5],zero,xmm6[6],zero,xmm6[7],zero +; SSE41-NEXT: punpckhbw {{.*#+}} xmm6 = xmm6[8],xmm0[8],xmm6[9],xmm0[9],xmm6[10],xmm0[10],xmm6[11],xmm0[11],xmm6[12],xmm0[12],xmm6[13],xmm0[13],xmm6[14],xmm0[14],xmm6[15],xmm0[15] ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero -; SSE41-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] -; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,2,3] -; SSE41-NEXT: pmullw %xmm1, %xmm2 -; SSE41-NEXT: pand %xmm5, %xmm2 -; SSE41-NEXT: packuswb %xmm0, %xmm2 -; SSE41-NEXT: movdqa %xmm2, %xmm0 -; SSE41-NEXT: psrlw $8, %xmm0 -; SSE41-NEXT: pmullw %xmm2, %xmm0 -; SSE41-NEXT: pextrb $0, %xmm0, %eax +; SSE41-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm0[8],xmm2[9],xmm0[9],xmm2[10],xmm0[10],xmm2[11],xmm0[11],xmm2[12],xmm0[12],xmm2[13],xmm0[13],xmm2[14],xmm0[14],xmm2[15],xmm0[15] +; SSE41-NEXT: pmullw %xmm6, %xmm2 +; SSE41-NEXT: pmullw %xmm2, %xmm3 +; SSE41-NEXT: pmullw %xmm0, %xmm3 +; SSE41-NEXT: pand %xmm9, %xmm0 +; SSE41-NEXT: pmullw %xmm10, %xmm7 +; SSE41-NEXT: pand %xmm9, %xmm7 +; SSE41-NEXT: packuswb %xmm0, %xmm7 +; SSE41-NEXT: pand %xmm9, %xmm2 +; SSE41-NEXT: pmullw %xmm4, %xmm1 +; SSE41-NEXT: pand %xmm9, %xmm1 +; SSE41-NEXT: packuswb %xmm2, %xmm1 +; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero +; SSE41-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm7[0],zero,xmm7[1],zero,xmm7[2],zero,xmm7[3],zero,xmm7[4],zero,xmm7[5],zero,xmm7[6],zero,xmm7[7],zero +; SSE41-NEXT: pmullw %xmm0, %xmm1 +; SSE41-NEXT: movdqa {{.*#+}} xmm0 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> +; SSE41-NEXT: pshufb %xmm0, %xmm1 +; SSE41-NEXT: pmovzxbw {{.*#+}} xmm2 = xmm5[0],zero,xmm5[1],zero,xmm5[2],zero,xmm5[3],zero,xmm5[4],zero,xmm5[5],zero,xmm5[6],zero,xmm5[7],zero +; SSE41-NEXT: pmovzxbw {{.*#+}} xmm4 = xmm8[0],zero,xmm8[1],zero,xmm8[2],zero,xmm8[3],zero,xmm8[4],zero,xmm8[5],zero,xmm8[6],zero,xmm8[7],zero +; SSE41-NEXT: pmullw %xmm2, %xmm4 +; SSE41-NEXT: pshufb %xmm0, %xmm4 +; SSE41-NEXT: movdqa %xmm3, %xmm0 +; SSE41-NEXT: pand %xmm9, %xmm0 +; SSE41-NEXT: pmovzxbw {{.*#+}} xmm2 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero,xmm4[4],zero,xmm4[5],zero,xmm4[6],zero,xmm4[7],zero +; SSE41-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero +; SSE41-NEXT: pmullw %xmm2, %xmm1 +; SSE41-NEXT: pand %xmm9, %xmm1 +; SSE41-NEXT: packuswb %xmm0, %xmm1 +; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero +; SSE41-NEXT: pmullw %xmm3, %xmm0 +; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; SSE41-NEXT: pand %xmm9, %xmm0 +; SSE41-NEXT: pxor %xmm2, %xmm2 +; SSE41-NEXT: packuswb %xmm2, %xmm0 +; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; SSE41-NEXT: pmullw %xmm1, %xmm0 +; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] +; SSE41-NEXT: pand %xmm9, %xmm0 +; SSE41-NEXT: packuswb %xmm2, %xmm0 +; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; SSE41-NEXT: pmullw %xmm1, %xmm0 +; SSE41-NEXT: pand %xmm9, %xmm0 +; SSE41-NEXT: packuswb %xmm2, %xmm0 +; SSE41-NEXT: movdqa %xmm0, %xmm1 +; SSE41-NEXT: psrlw $8, %xmm1 +; SSE41-NEXT: pmullw %xmm0, %xmm1 +; SSE41-NEXT: pextrb $0, %xmm1, %eax ; SSE41-NEXT: # kill: def $al killed $al killed $eax ; SSE41-NEXT: retq ; @@ -2667,9 +2681,9 @@ define i8 @test_v128i8(<128 x i8> %a0) { ; AVX1-NEXT: vpmullw %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vpmullw %xmm4, %xmm1, %xmm1 ; AVX1-NEXT: vpmullw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[8,8,10,10,12,12,14,14,0,0,2,2,4,4,6,6] +; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] ; AVX1-NEXT: vpmullw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[4,4,6,6,4,4,6,6,8,8,10,10,12,12,14,14] +; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] ; AVX1-NEXT: vpmullw %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 diff --git a/llvm/test/CodeGen/X86/vector-sext.ll b/llvm/test/CodeGen/X86/vector-sext.ll index 81071907584dc..48ff42319a4f7 100644 --- a/llvm/test/CodeGen/X86/vector-sext.ll +++ b/llvm/test/CodeGen/X86/vector-sext.ll @@ -267,22 +267,20 @@ entry: define <8 x i32> @sext_16i8_to_8i32(<16 x i8> %A) nounwind uwtable readnone ssp { ; SSE2-LABEL: sext_16i8_to_8i32: ; SSE2: # %bb.0: # %entry -; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] -; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] -; SSE2-NEXT: psrad $24, %xmm2 -; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] +; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; SSE2-NEXT: psrad $24, %xmm0 +; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7] ; SSE2-NEXT: psrad $24, %xmm1 -; SSE2-NEXT: movdqa %xmm2, %xmm0 ; SSE2-NEXT: retq ; ; SSSE3-LABEL: sext_16i8_to_8i32: ; SSSE3: # %bb.0: # %entry -; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] -; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] -; SSSE3-NEXT: psrad $24, %xmm2 -; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] +; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] +; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; SSSE3-NEXT: psrad $24, %xmm0 +; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7] ; SSSE3-NEXT: psrad $24, %xmm1 -; SSSE3-NEXT: movdqa %xmm2, %xmm0 ; SSSE3-NEXT: retq ; ; SSE41-LABEL: sext_16i8_to_8i32: @@ -313,12 +311,11 @@ define <8 x i32> @sext_16i8_to_8i32(<16 x i8> %A) nounwind uwtable readnone ssp ; ; X32-SSE2-LABEL: sext_16i8_to_8i32: ; X32-SSE2: # %bb.0: # %entry -; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] -; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] -; X32-SSE2-NEXT: psrad $24, %xmm2 -; X32-SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] +; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] +; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; X32-SSE2-NEXT: psrad $24, %xmm0 +; X32-SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7] ; X32-SSE2-NEXT: psrad $24, %xmm1 -; X32-SSE2-NEXT: movdqa %xmm2, %xmm0 ; X32-SSE2-NEXT: retl ; ; X32-SSE41-LABEL: sext_16i8_to_8i32: @@ -337,7 +334,6 @@ entry: define <16 x i32> @sext_16i8_to_16i32(<16 x i8> %A) nounwind uwtable readnone ssp { ; SSE2-LABEL: sext_16i8_to_16i32: ; SSE2: # %bb.0: # %entry -; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] ; SSE2-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1],xmm4[2],xmm1[2],xmm4[3],xmm1[3] ; SSE2-NEXT: psrad $24, %xmm4 @@ -353,7 +349,6 @@ define <16 x i32> @sext_16i8_to_16i32(<16 x i8> %A) nounwind uwtable readnone ss ; ; SSSE3-LABEL: sext_16i8_to_16i32: ; SSSE3: # %bb.0: # %entry -; SSSE3-NEXT: movdqa %xmm0, %xmm1 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1],xmm4[2],xmm1[2],xmm4[3],xmm1[3] ; SSSE3-NEXT: psrad $24, %xmm4 @@ -408,7 +403,6 @@ define <16 x i32> @sext_16i8_to_16i32(<16 x i8> %A) nounwind uwtable readnone ss ; ; X32-SSE2-LABEL: sext_16i8_to_16i32: ; X32-SSE2: # %bb.0: # %entry -; X32-SSE2-NEXT: movdqa %xmm0, %xmm1 ; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] ; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1],xmm4[2],xmm1[2],xmm4[3],xmm1[3] ; X32-SSE2-NEXT: psrad $24, %xmm4 @@ -568,42 +562,40 @@ entry: define <8 x i64> @sext_16i8_to_8i64(<16 x i8> %A) nounwind uwtable readnone ssp { ; SSE2-LABEL: sext_16i8_to_8i64: ; SSE2: # %bb.0: # %entry -; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] -; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] +; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] ; SSE2-NEXT: psrad $24, %xmm1 -; SSE2-NEXT: pxor %xmm5, %xmm5 -; SSE2-NEXT: pxor %xmm2, %xmm2 -; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 -; SSE2-NEXT: movdqa %xmm1, %xmm4 -; SSE2-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1] -; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] -; SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] +; SSE2-NEXT: pxor %xmm4, %xmm4 +; SSE2-NEXT: pxor %xmm3, %xmm3 +; SSE2-NEXT: pcmpgtd %xmm1, %xmm3 +; SSE2-NEXT: movdqa %xmm1, %xmm0 +; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] +; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm3[2],xmm1[3],xmm3[3] +; SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] ; SSE2-NEXT: psrad $24, %xmm3 -; SSE2-NEXT: pcmpgtd %xmm3, %xmm5 +; SSE2-NEXT: pcmpgtd %xmm3, %xmm4 ; SSE2-NEXT: movdqa %xmm3, %xmm2 -; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm5[0],xmm2[1],xmm5[1] -; SSE2-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm5[2],xmm3[3],xmm5[3] -; SSE2-NEXT: movdqa %xmm4, %xmm0 +; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1] +; SSE2-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3] ; SSE2-NEXT: retq ; ; SSSE3-LABEL: sext_16i8_to_8i64: ; SSSE3: # %bb.0: # %entry -; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] -; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] +; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] +; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] ; SSSE3-NEXT: psrad $24, %xmm1 -; SSSE3-NEXT: pxor %xmm5, %xmm5 -; SSSE3-NEXT: pxor %xmm2, %xmm2 -; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 -; SSSE3-NEXT: movdqa %xmm1, %xmm4 -; SSSE3-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1] -; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] -; SSSE3-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] +; SSSE3-NEXT: pxor %xmm4, %xmm4 +; SSSE3-NEXT: pxor %xmm3, %xmm3 +; SSSE3-NEXT: pcmpgtd %xmm1, %xmm3 +; SSSE3-NEXT: movdqa %xmm1, %xmm0 +; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] +; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm3[2],xmm1[3],xmm3[3] +; SSSE3-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] ; SSSE3-NEXT: psrad $24, %xmm3 -; SSSE3-NEXT: pcmpgtd %xmm3, %xmm5 +; SSSE3-NEXT: pcmpgtd %xmm3, %xmm4 ; SSSE3-NEXT: movdqa %xmm3, %xmm2 -; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm5[0],xmm2[1],xmm5[1] -; SSSE3-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm5[2],xmm3[3],xmm5[3] -; SSSE3-NEXT: movdqa %xmm4, %xmm0 +; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1] +; SSSE3-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3] ; SSSE3-NEXT: retq ; ; SSE41-LABEL: sext_16i8_to_8i64: @@ -648,22 +640,21 @@ define <8 x i64> @sext_16i8_to_8i64(<16 x i8> %A) nounwind uwtable readnone ssp ; ; X32-SSE2-LABEL: sext_16i8_to_8i64: ; X32-SSE2: # %bb.0: # %entry -; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] -; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] +; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] +; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] ; X32-SSE2-NEXT: psrad $24, %xmm1 -; X32-SSE2-NEXT: pxor %xmm5, %xmm5 -; X32-SSE2-NEXT: pxor %xmm2, %xmm2 -; X32-SSE2-NEXT: pcmpgtd %xmm1, %xmm2 -; X32-SSE2-NEXT: movdqa %xmm1, %xmm4 -; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1] -; X32-SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] -; X32-SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] +; X32-SSE2-NEXT: pxor %xmm4, %xmm4 +; X32-SSE2-NEXT: pxor %xmm3, %xmm3 +; X32-SSE2-NEXT: pcmpgtd %xmm1, %xmm3 +; X32-SSE2-NEXT: movdqa %xmm1, %xmm0 +; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] +; X32-SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm3[2],xmm1[3],xmm3[3] +; X32-SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] ; X32-SSE2-NEXT: psrad $24, %xmm3 -; X32-SSE2-NEXT: pcmpgtd %xmm3, %xmm5 +; X32-SSE2-NEXT: pcmpgtd %xmm3, %xmm4 ; X32-SSE2-NEXT: movdqa %xmm3, %xmm2 -; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm5[0],xmm2[1],xmm5[1] -; X32-SSE2-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm5[2],xmm3[3],xmm5[3] -; X32-SSE2-NEXT: movdqa %xmm4, %xmm0 +; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1] +; X32-SSE2-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3] ; X32-SSE2-NEXT: retl ; ; X32-SSE41-LABEL: sext_16i8_to_8i64: @@ -2134,8 +2125,8 @@ entry: define <8 x i64> @load_sext_8i8_to_8i64(<8 x i8> *%ptr) { ; SSE2-LABEL: load_sext_8i8_to_8i64: ; SSE2: # %bb.0: # %entry -; SSE2-NEXT: movq {{.*#+}} xmm2 = mem[0],zero -; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] +; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero +; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] ; SSE2-NEXT: psrad $24, %xmm1 ; SSE2-NEXT: pxor %xmm4, %xmm4 @@ -2154,8 +2145,8 @@ define <8 x i64> @load_sext_8i8_to_8i64(<8 x i8> *%ptr) { ; ; SSSE3-LABEL: load_sext_8i8_to_8i64: ; SSSE3: # %bb.0: # %entry -; SSSE3-NEXT: movq {{.*#+}} xmm2 = mem[0],zero -; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] +; SSSE3-NEXT: movq {{.*#+}} xmm0 = mem[0],zero +; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] ; SSSE3-NEXT: psrad $24, %xmm1 ; SSSE3-NEXT: pxor %xmm4, %xmm4 @@ -2204,8 +2195,8 @@ define <8 x i64> @load_sext_8i8_to_8i64(<8 x i8> *%ptr) { ; X32-SSE2-LABEL: load_sext_8i8_to_8i64: ; X32-SSE2: # %bb.0: # %entry ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-SSE2-NEXT: movq {{.*#+}} xmm2 = mem[0],zero -; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] +; X32-SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero +; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] ; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] ; X32-SSE2-NEXT: psrad $24, %xmm1 ; X32-SSE2-NEXT: pxor %xmm4, %xmm4 @@ -2303,8 +2294,8 @@ entry: define <8 x i32> @load_sext_8i8_to_8i32(<8 x i8> *%ptr) { ; SSE2-LABEL: load_sext_8i8_to_8i32: ; SSE2: # %bb.0: # %entry -; SSE2-NEXT: movq {{.*#+}} xmm1 = mem[0],zero -; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] +; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero +; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] ; SSE2-NEXT: psrad $24, %xmm0 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7] @@ -2313,8 +2304,8 @@ define <8 x i32> @load_sext_8i8_to_8i32(<8 x i8> *%ptr) { ; ; SSSE3-LABEL: load_sext_8i8_to_8i32: ; SSSE3: # %bb.0: # %entry -; SSSE3-NEXT: movq {{.*#+}} xmm1 = mem[0],zero -; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] +; SSSE3-NEXT: movq {{.*#+}} xmm0 = mem[0],zero +; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] ; SSSE3-NEXT: psrad $24, %xmm0 ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7] @@ -2347,8 +2338,8 @@ define <8 x i32> @load_sext_8i8_to_8i32(<8 x i8> *%ptr) { ; X32-SSE2-LABEL: load_sext_8i8_to_8i32: ; X32-SSE2: # %bb.0: # %entry ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-SSE2-NEXT: movq {{.*#+}} xmm1 = mem[0],zero -; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] +; X32-SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero +; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] ; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] ; X32-SSE2-NEXT: psrad $24, %xmm0 ; X32-SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7] diff --git a/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll b/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll index 8dd211b5566c5..31c71354ee472 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll @@ -3106,16 +3106,14 @@ define <4 x i64> @PR28136(<32 x i8> %a0, <32 x i8> %a1) { ; AVX1: # %bb.0: ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 ; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] -; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [8,8,10,10,12,12,14,14,9,9,11,11,13,13,15,15] -; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm3 -; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4 -; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3],xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7] -; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm2 +; AVX1-NEXT: vpshufb {{.*#+}} xmm2 = xmm0[8,u,10,u,12,u,14,u,9,u,11,u,13,u,15,u] +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 +; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7] +; AVX1-NEXT: vpshufb {{.*#+}} xmm3 = xmm1[u,8,u,10,u,12,u,14,u,9,u,11,u,13,u,15] ; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0] -; AVX1-NEXT: vpblendvb %xmm4, %xmm3, %xmm2, %xmm2 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,0,2,2,4,4,6,6,1,1,3,3,5,5,7,7] -; AVX1-NEXT: vpshufb %xmm3, %xmm0, %xmm0 -; AVX1-NEXT: vpshufb %xmm3, %xmm1, %xmm1 +; AVX1-NEXT: vpblendvb %xmm4, %xmm2, %xmm3, %xmm2 +; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,u,2,u,4,u,6,u,1,u,3,u,5,u,7,u] +; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[u,0,u,2,u,4,u,6,u,1,u,3,u,5,u,7] ; AVX1-NEXT: vpblendvb %xmm4, %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; AVX1-NEXT: retq diff --git a/llvm/test/CodeGen/X86/widen_conv-3.ll b/llvm/test/CodeGen/X86/widen_conv-3.ll index 85f7c37b99688..21e80c99dcfa9 100644 --- a/llvm/test/CodeGen/X86/widen_conv-3.ll +++ b/llvm/test/CodeGen/X86/widen_conv-3.ll @@ -60,8 +60,8 @@ define void @convert_v3i8_to_v3f32(<3 x float>* %dst.addr, <3 x i8>* %src.addr) ; X86-SSE2-NEXT: pslld $16, %xmm2 ; X86-SSE2-NEXT: pandn %xmm2, %xmm1 ; X86-SSE2-NEXT: por %xmm0, %xmm1 -; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] -; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] +; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] ; X86-SSE2-NEXT: psrad $24, %xmm0 ; X86-SSE2-NEXT: cvtdq2ps %xmm0, %xmm0 ; X86-SSE2-NEXT: movss %xmm0, (%eax) @@ -97,8 +97,8 @@ define void @convert_v3i8_to_v3f32(<3 x float>* %dst.addr, <3 x i8>* %src.addr) ; X64-SSE2-NEXT: pslld $16, %xmm2 ; X64-SSE2-NEXT: pandn %xmm2, %xmm1 ; X64-SSE2-NEXT: por %xmm0, %xmm1 -; X64-SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] -; X64-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; X64-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] +; X64-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] ; X64-SSE2-NEXT: psrad $24, %xmm0 ; X64-SSE2-NEXT: cvtdq2ps %xmm0, %xmm0 ; X64-SSE2-NEXT: movlps %xmm0, (%rdi) diff --git a/llvm/test/DebugInfo/MIR/X86/live-debug-values-reg-copy.mir b/llvm/test/DebugInfo/MIR/X86/live-debug-values-reg-copy.mir index ba748b989c435..3abde776904ec 100644 --- a/llvm/test/DebugInfo/MIR/X86/live-debug-values-reg-copy.mir +++ b/llvm/test/DebugInfo/MIR/X86/live-debug-values-reg-copy.mir @@ -1,4 +1,4 @@ -# RUN: llc -run-pass=livedebugvalues %s -o - | FileCheck %s +# RUN: llc -start-before=prologepilog -stop-after=livedebugvalues %s -o - | FileCheck %s # # This test tests tracking variables value transferring from one register to another. # This example is altered additionally in order to test transferring from one float register diff --git a/llvm/test/Object/invalid.test b/llvm/test/Object/invalid.test index cbf770afbdc64..56372bf37c246 100644 --- a/llvm/test/Object/invalid.test +++ b/llvm/test/Object/invalid.test @@ -226,6 +226,7 @@ Sections: Type: SHT_SYMTAB_SHNDX Entries: [ 0, 1 ] Link: .symtab +Symbols: [] ## Check that llvm-readobj reports an error if the e_phentsize field is broken. @@ -274,8 +275,8 @@ Sections: Type: SHT_RELA ShOffset: 0x10000 -## Check that llvm-objdump reports an error when .shstrtab has a broken sh_offset -## so large that sh_offset + sh_size overflows the platform address size type. +## Check that llvm-objdump reports an error when we try to print symbols and +## .shstrtab has a broken sh_offset so large that sh_offset + sh_size overflows the platform address size type. # RUN: yaml2obj %s --docnum=14 -o %t14 # RUN: not llvm-readobj --symbols %t14 2>&1 | FileCheck -DFILE=%t14 --check-prefix=INVALID-SECTION-SIZE2 %s @@ -292,6 +293,7 @@ Sections: - Name: .shstrtab Type: SHT_STRTAB ShOffset: 0xFFFFFFFF +Symbols: [] ## Check that llvm-readobj reports an error when trying to dump sections ## when the e_shnum field is broken (is greater than the actual number of sections). @@ -565,7 +567,7 @@ Sections: # RUN: yaml2obj --docnum=26 %s -o %t26 # RUN: not llvm-readobj -h %t26 2>&1 | FileCheck -DFILE=%t26 --check-prefix=INVALID-SEC-NUM1 %s -# INVALID-SEC-NUM1: error: '[[FILE]]': invalid section header table offset (e_shoff = 0x78) or invalid number of sections specified in the first section header's sh_size field (0x3ffffffffffffff) +# INVALID-SEC-NUM1: error: '[[FILE]]': invalid section header table offset (e_shoff = 0x58) or invalid number of sections specified in the first section header's sh_size field (0x3ffffffffffffff) --- !ELF FileHeader: diff --git a/llvm/test/Object/objdump-sectionheaders.test b/llvm/test/Object/objdump-sectionheaders.test index 434d73e77ad4a..fba4fa799805c 100644 --- a/llvm/test/Object/objdump-sectionheaders.test +++ b/llvm/test/Object/objdump-sectionheaders.test @@ -37,6 +37,5 @@ Sections: - Name: .rela.text Type: SHT_RELA Address: 0x0000000000000038 - Link: .symtab Info: .text Relocations: diff --git a/llvm/test/Transforms/CorrelatedValuePropagation/mul.ll b/llvm/test/Transforms/CorrelatedValuePropagation/mul.ll index 786c1b5c8809a..288d114cb3704 100644 --- a/llvm/test/Transforms/CorrelatedValuePropagation/mul.ll +++ b/llvm/test/Transforms/CorrelatedValuePropagation/mul.ll @@ -7,7 +7,7 @@ define i8 @test0(i8 %a) { ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[A:%.*]], 3 ; CHECK-NEXT: br i1 [[CMP]], label [[BB:%.*]], label [[EXIT:%.*]] ; CHECK: bb: -; CHECK-NEXT: [[MUL:%.*]] = mul i8 [[A]], 50 +; CHECK-NEXT: [[MUL:%.*]] = mul nuw nsw i8 [[A]], 50 ; CHECK-NEXT: ret i8 [[MUL]] ; CHECK: exit: ; CHECK-NEXT: ret i8 0 @@ -30,7 +30,7 @@ define i8 @test1(i8 %a) { ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[A:%.*]], 4 ; CHECK-NEXT: br i1 [[CMP]], label [[BB:%.*]], label [[EXIT:%.*]] ; CHECK: bb: -; CHECK-NEXT: [[MUL:%.*]] = mul i8 [[A]], 50 +; CHECK-NEXT: [[MUL:%.*]] = mul nuw i8 [[A]], 50 ; CHECK-NEXT: ret i8 [[MUL]] ; CHECK: exit: ; CHECK-NEXT: ret i8 0 @@ -53,7 +53,7 @@ define i8 @test2(i8 %a) { ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[A:%.*]], 6 ; CHECK-NEXT: br i1 [[CMP]], label [[BB:%.*]], label [[EXIT:%.*]] ; CHECK: bb: -; CHECK-NEXT: [[MUL:%.*]] = mul i8 [[A]], 50 +; CHECK-NEXT: [[MUL:%.*]] = mul nuw i8 [[A]], 50 ; CHECK-NEXT: ret i8 [[MUL]] ; CHECK: exit: ; CHECK-NEXT: ret i8 0 @@ -101,7 +101,7 @@ define i8 @test4(i8 %a) { ; CHECK-NEXT: [[COND:%.*]] = and i1 [[CMP1]], [[CMP2]] ; CHECK-NEXT: br i1 [[COND]], label [[BB:%.*]], label [[EXIT:%.*]] ; CHECK: bb: -; CHECK-NEXT: [[MUL:%.*]] = mul i8 [[A]], 50 +; CHECK-NEXT: [[MUL:%.*]] = mul nsw i8 [[A]], 50 ; CHECK-NEXT: ret i8 [[MUL]] ; CHECK: exit: ; CHECK-NEXT: ret i8 0 diff --git a/llvm/test/Transforms/EarlyCSE/writeonly.ll b/llvm/test/Transforms/EarlyCSE/writeonly.ll new file mode 100644 index 0000000000000..0a3cd1c7401ca --- /dev/null +++ b/llvm/test/Transforms/EarlyCSE/writeonly.ll @@ -0,0 +1,15 @@ +; RUN: opt -S -early-cse < %s | FileCheck %s + +@var = global i32 undef +declare void @foo() nounwind + +define void @test() { +; CHECK-LABEL: @test( +; CHECK-NOT: store + store i32 1, i32* @var +; CHECK: call void @foo() + call void @foo() writeonly +; CHECK: store i32 2, i32* @var + store i32 2, i32* @var + ret void +} diff --git a/llvm/test/Transforms/FunctionAttrs/arg_nocapture.ll b/llvm/test/Transforms/FunctionAttrs/arg_nocapture.ll index 18050dba201fb..7afaab6637e42 100644 --- a/llvm/test/Transforms/FunctionAttrs/arg_nocapture.ll +++ b/llvm/test/Transforms/FunctionAttrs/arg_nocapture.ll @@ -405,35 +405,33 @@ entry: ; ; Make sure the returned flag on %r is strong enough to justify nocapture on %b but **not** on %r. ; -; FIXME: The "returned" information is not propagated to the fullest extend causing us to miss "nocapture" on %b in the following: -; CHECK: define i32* @not_captured_by_readonly_call_not_returned_either1(i32* readonly %b, i32* readonly returned %r) +; CHECK: define i32* @not_captured_by_readonly_call_not_returned_either1(i32* nocapture readonly %b, i32* readonly returned %r) ; -; CHECK: define i32* @not_captured_by_readonly_call_not_returned_either2(i32* readonly %b, i32* readonly returned %r) -; CHECK: define i32* @not_captured_by_readonly_call_not_returned_either3(i32* readonly %b, i32* readonly returned %r) +; CHECK: define i32* @not_captured_by_readonly_call_not_returned_either2(i32* nocapture readonly %b, i32* readonly returned %r) +; CHECK: define i32* @not_captured_by_readonly_call_not_returned_either3(i32* nocapture readonly %b, i32* readonly returned %r) ; -; FIXME: The "nounwind" information is not derived to the fullest extend causing us to miss "nocapture" on %b in the following: -; CHECK: define i32* @not_captured_by_readonly_call_not_returned_either4(i32* readonly %b, i32* readonly returned %r) -define i32* @not_captured_by_readonly_call_not_returned_either1(i32* %b, i32* returned %r) #0 { +; CHECK: define i32* @not_captured_by_readonly_call_not_returned_either4(i32* nocapture readonly %b, i32* readonly returned %r) +define i32* @not_captured_by_readonly_call_not_returned_either1(i32* %b, i32* returned %r) { entry: %call = call i32* @readonly_unknown(i32* %b, i32* %r) nounwind ret i32* %call } declare i32* @readonly_unknown_r1a(i32*, i32* returned) readonly -define i32* @not_captured_by_readonly_call_not_returned_either2(i32* %b, i32* %r) #0 { +define i32* @not_captured_by_readonly_call_not_returned_either2(i32* %b, i32* %r) { entry: %call = call i32* @readonly_unknown_r1a(i32* %b, i32* %r) nounwind ret i32* %call } declare i32* @readonly_unknown_r1b(i32*, i32* returned) readonly nounwind -define i32* @not_captured_by_readonly_call_not_returned_either3(i32* %b, i32* %r) #0 { +define i32* @not_captured_by_readonly_call_not_returned_either3(i32* %b, i32* %r) { entry: %call = call i32* @readonly_unknown_r1b(i32* %b, i32* %r) ret i32* %call } -define i32* @not_captured_by_readonly_call_not_returned_either4(i32* %b, i32* %r) #0 { +define i32* @not_captured_by_readonly_call_not_returned_either4(i32* %b, i32* %r) nounwind { entry: %call = call i32* @readonly_unknown_r1a(i32* %b, i32* %r) ret i32* %call diff --git a/llvm/test/Transforms/FunctionAttrs/arg_returned.ll b/llvm/test/Transforms/FunctionAttrs/arg_returned.ll index 3fe960c87e6cd..d927cdf79278f 100644 --- a/llvm/test/Transforms/FunctionAttrs/arg_returned.ll +++ b/llvm/test/Transforms/FunctionAttrs/arg_returned.ll @@ -1,5 +1,5 @@ ; RUN: opt -functionattrs -S < %s | FileCheck %s --check-prefix=FNATTR -; RUN: opt -attributor -attributor-manifest-internal -attributor-disable=false -attributor-max-iterations-verify -attributor-max-iterations=6 -S < %s | FileCheck %s --check-prefix=ATTRIBUTOR +; RUN: opt -attributor -attributor-manifest-internal -attributor-disable=false -attributor-max-iterations-verify -attributor-max-iterations=7 -S < %s | FileCheck %s --check-prefix=ATTRIBUTOR ; RUN: opt -attributor -attributor-manifest-internal -attributor-disable=false -functionattrs -S < %s | FileCheck %s --check-prefix=BOTH ; ; Test cases specifically designed for the "returned" argument attribute. diff --git a/llvm/test/Transforms/FunctionAttrs/nocapture.ll b/llvm/test/Transforms/FunctionAttrs/nocapture.ll index e0580849983b8..f8927a60cb14b 100644 --- a/llvm/test/Transforms/FunctionAttrs/nocapture.ll +++ b/llvm/test/Transforms/FunctionAttrs/nocapture.ll @@ -39,6 +39,20 @@ l1: ret i1 1 ; escaping value not caught by def-use chaining. } +; c4b is c4 but without the escaping part +; FNATTR: define i1 @c4b(i32* %q, i32 %bitno) +; ATTRIBUTOR: define i1 @c4b(i32* nocapture readnone %q, i32 %bitno) +define i1 @c4b(i32* %q, i32 %bitno) { + %tmp = ptrtoint i32* %q to i32 + %tmp2 = lshr i32 %tmp, %bitno + %bit = trunc i32 %tmp2 to i1 + br i1 %bit, label %l1, label %l0 +l0: + ret i1 0 ; not escaping! +l1: + ret i1 0 ; not escaping! +} + @lookup_table = global [2 x i1] [ i1 0, i1 1 ] ; FNATTR: define i1 @c5(i32* %q, i32 %bitno) @@ -331,5 +345,20 @@ entry: ret void } +declare i8* @unknownpi8pi8(i8*,i8* returned) +define i8* @test_returned1(i8* %A, i8* returned %B) nounwind readonly { +; ATTRIBUTOR: define i8* @test_returned1(i8* nocapture readonly %A, i8* readonly returned %B) +entry: + %p = call i8* @unknownpi8pi8(i8* %A, i8* %B) + ret i8* %p +} + +define i8* @test_returned2(i8* %A, i8* %B) { +; ATTRIBUTOR: define i8* @test_returned2(i8* nocapture readonly %A, i8* readonly returned %B) +entry: + %p = call i8* @unknownpi8pi8(i8* %A, i8* %B) nounwind readonly + ret i8* %p +} + declare i8* @llvm.launder.invariant.group.p0i8(i8*) declare i8* @llvm.strip.invariant.group.p0i8(i8*) diff --git a/llvm/test/Transforms/FunctionAttrs/nonnull.ll b/llvm/test/Transforms/FunctionAttrs/nonnull.ll index 84d6720032f45..9a7eb114eaeed 100644 --- a/llvm/test/Transforms/FunctionAttrs/nonnull.ll +++ b/llvm/test/Transforms/FunctionAttrs/nonnull.ll @@ -220,7 +220,7 @@ bb: define dso_local noalias i32* @f3(i32* %arg) { ; FIXME: missing nonnull. It should be nonnull @f3(i32* nonnull readonly %arg) -; ATTRIBUTOR: define dso_local noalias i32* @f3(i32* readonly %arg) +; ATTRIBUTOR: define dso_local noalias i32* @f3(i32* nocapture readonly %arg) bb: ; FIXME: missing nonnull. It should be @f1(i32* nonnull readonly %arg) ; ATTRIBUTOR: %tmp = call i32* @f1(i32* readonly %arg) diff --git a/llvm/test/Transforms/FunctionAttrs/read_write_returned_arguments_scc.ll b/llvm/test/Transforms/FunctionAttrs/read_write_returned_arguments_scc.ll index 5f9e477679c3a..6fbc54502b150 100644 --- a/llvm/test/Transforms/FunctionAttrs/read_write_returned_arguments_scc.ll +++ b/llvm/test/Transforms/FunctionAttrs/read_write_returned_arguments_scc.ll @@ -1,4 +1,4 @@ -; RUN: opt -functionattrs -enable-nonnull-arg-prop -attributor -attributor-manifest-internal -attributor-disable=false -attributor-max-iterations-verify -attributor-max-iterations=7 -S < %s | FileCheck %s +; RUN: opt -functionattrs -enable-nonnull-arg-prop -attributor -attributor-manifest-internal -attributor-disable=false -attributor-max-iterations-verify -attributor-max-iterations=8 -S < %s | FileCheck %s ; ; This is an evolved example to stress test SCC parameter attribute propagation. ; The SCC in this test is made up of the following six function, three of which diff --git a/llvm/test/Transforms/IndVarSimplify/eliminate-exit.ll b/llvm/test/Transforms/IndVarSimplify/eliminate-exit.ll index b1c6c4d2ade8c..726f3cd7f2f26 100644 --- a/llvm/test/Transforms/IndVarSimplify/eliminate-exit.ll +++ b/llvm/test/Transforms/IndVarSimplify/eliminate-exit.ll @@ -185,5 +185,39 @@ exit: ret void } +define void @mixed_width(i32 %len) { +; CHECK-LABEL: @mixed_width( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[LEN_ZEXT:%.*]] = zext i32 [[LEN:%.*]] to i64 +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[CMP1:%.*]] = icmp ult i64 [[IV]], [[LEN_ZEXT]] +; CHECK-NEXT: br i1 [[CMP1]], label [[BACKEDGE]], label [[EXIT:%.*]] +; CHECK: backedge: +; CHECK-NEXT: call void @side_effect() +; CHECK-NEXT: br i1 true, label [[LOOP]], label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + %len.zext = zext i32 %len to i64 + br label %loop +loop: + %iv = phi i64 [0, %entry], [%iv.next, %backedge] + %iv2 = phi i32 [0, %entry], [%iv2.next, %backedge] + %iv.next = add i64 %iv, 1 + %iv2.next = add i32 %iv2, 1 + %cmp1 = icmp ult i64 %iv, %len.zext + br i1 %cmp1, label %backedge, label %exit + +backedge: + call void @side_effect() + %cmp2 = icmp ult i32 %iv2, %len + br i1 %cmp2, label %loop, label %exit +exit: + ret void +} declare void @side_effect() diff --git a/llvm/test/Transforms/IndVarSimplify/loop-predication.ll b/llvm/test/Transforms/IndVarSimplify/loop-predication.ll index 94363706b5b1c..77c18ef23d7df 100644 --- a/llvm/test/Transforms/IndVarSimplify/loop-predication.ll +++ b/llvm/test/Transforms/IndVarSimplify/loop-predication.ll @@ -464,7 +464,6 @@ define i32 @duplicate_checks(i32* %array.1, i32* %array.2, i32* %array.3, i32 %l ; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i32 [[LENGTH:%.*]], [[TMP1]] ; CHECK-NEXT: [[UMIN:%.*]] = select i1 [[TMP2]], i32 [[LENGTH]], i32 [[TMP1]] ; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[LENGTH]], [[UMIN]] -; CHECK-NEXT: [[TMP4:%.*]] = icmp ne i32 [[LENGTH]], [[UMIN]] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED1:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ] @@ -478,7 +477,7 @@ define i32 @duplicate_checks(i32* %array.1, i32* %array.2, i32* %array.3, i32 %l ; CHECK-NEXT: [[ARRAY_1_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY_1:%.*]], i64 [[I_I64]] ; CHECK-NEXT: [[ARRAY_1_I:%.*]] = load i32, i32* [[ARRAY_1_I_PTR]], align 4 ; CHECK-NEXT: [[LOOP_ACC_1:%.*]] = add i32 [[LOOP_ACC]], [[ARRAY_1_I]] -; CHECK-NEXT: br i1 [[TMP4]], label [[GUARDED1]], label [[DEOPT2:%.*]], !prof !0 +; CHECK-NEXT: br i1 true, label [[GUARDED1]], label [[DEOPT2:%.*]], !prof !0 ; CHECK: deopt2: ; CHECK-NEXT: call void @prevent_merging() ; CHECK-NEXT: ret i32 -1 @@ -784,7 +783,7 @@ exit: ; If we have a dominating exit (exit1) which can't be itself rewritten, we ; can't rewrite a later exit (exit2). Doing so would cause the loop to exit ; from the exit2 when it should have exited from exit1. -define i32 @neg_dominating_exit(i32* %array, i32 %length, i32 %n) { +define i32 @neg_dominating_exit(i32* %array, i32 %length, i32 %length2, i32 %n) { ; CHECK-LABEL: @neg_dominating_exit( ; CHECK-NEXT: loop.preheader: ; CHECK-NEXT: br label [[LOOP:%.*]] @@ -798,7 +797,7 @@ define i32 @neg_dominating_exit(i32* %array, i32 %length, i32 %n) { ; CHECK-NEXT: call void @prevent_merging() ; CHECK-NEXT: ret i32 [[RESULT]] ; CHECK: guarded: -; CHECK-NEXT: [[WITHIN_BOUNDS2:%.*]] = icmp ult i32 [[I]], [[LENGTH]] +; CHECK-NEXT: [[WITHIN_BOUNDS2:%.*]] = icmp ult i32 [[I]], [[LENGTH2:%.*]] ; CHECK-NEXT: br i1 [[WITHIN_BOUNDS2]], label [[GUARDED2]], label [[DEOPT2:%.*]], !prof !0 ; CHECK: deopt2: ; CHECK-NEXT: call void @prevent_merging() @@ -830,7 +829,7 @@ deopt: ; preds = %loop ret i32 %result guarded: ; preds = %loop - %within.bounds2 = icmp ult i32 %i, %length + %within.bounds2 = icmp ult i32 %i, %length2 br i1 %within.bounds2, label %guarded2, label %deopt2, !prof !0 deopt2: ; preds = %loop diff --git a/llvm/test/Transforms/IndVarSimplify/pr38674.ll b/llvm/test/Transforms/IndVarSimplify/pr38674.ll index 1c839ffd2acb4..390a68d7cbbe4 100644 --- a/llvm/test/Transforms/IndVarSimplify/pr38674.ll +++ b/llvm/test/Transforms/IndVarSimplify/pr38674.ll @@ -14,10 +14,9 @@ define i32 @test_01() { ; CHECK-NEXT: [[ZEXT:%.*]] = zext i16 1 to i32 ; CHECK-NEXT: br label [[FOR_BODY6:%.*]] ; CHECK: for.cond4: -; CHECK-NEXT: [[CMP5:%.*]] = icmp ult i32 [[INC:%.*]], 2 -; CHECK-NEXT: br i1 [[CMP5]], label [[FOR_BODY6]], label [[FOR_END:%.*]] +; CHECK-NEXT: br i1 true, label [[FOR_BODY6]], label [[FOR_END:%.*]] ; CHECK: for.body6: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[FOR_COND4_PREHEADER]] ], [ [[INC]], [[FOR_COND4:%.*]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[FOR_COND4_PREHEADER]] ], [ [[INC:%.*]], [[FOR_COND4:%.*]] ] ; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i32 [[IV]], [[ZEXT]] ; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[IV]], 1 ; CHECK-NEXT: br i1 [[TMP0]], label [[RETURN_LOOPEXIT:%.*]], label [[FOR_COND4]] diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-demanded-vector-elts.ll b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-demanded-vector-elts.ll index db7533a37230e..389ca6010049c 100644 --- a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-demanded-vector-elts.ll +++ b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-demanded-vector-elts.ll @@ -152,11 +152,10 @@ define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_buffer_load_v4f32(<4 x i32> ret <3 x float> %shuf } -; FIXME: Not handled even though only 2 elts used ; CHECK-LABEL: @extract_elt0_elt1_buffer_load_v4f32_2( -; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false) -; CHECK-NEXT: %elt0 = extractelement <4 x float> %data, i32 0 -; CHECK-NEXT: %elt1 = extractelement <4 x float> %data, i32 1 +; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false) +; CHECK-NEXT: %elt0 = extractelement <2 x float> %data, i32 0 +; CHECK-NEXT: %elt1 = extractelement <2 x float> %data, i32 1 ; CHECK-NEXT: %ins0 = insertvalue { float, float } undef, float %elt0, 0 ; CHECK-NEXT: %ins1 = insertvalue { float, float } %ins0, float %elt1, 1 ; CHECK-NEXT: ret { float, float } %ins1 @@ -169,6 +168,74 @@ define amdgpu_ps { float, float } @extract_elt0_elt1_buffer_load_v4f32_2(<4 x i3 ret { float, float } %ins1 } +; CHECK-LABEL: @extract_elt0_elt1_elt2_buffer_load_v4f32_2( +; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false) +; CHECK-NEXT: %elt0 = extractelement <3 x float> %data, i32 0 +; CHECK-NEXT: %elt1 = extractelement <3 x float> %data, i32 1 +; CHECK-NEXT: %elt2 = extractelement <3 x float> %data, i32 2 +; CHECK-NEXT: %ins0 = insertvalue { float, float, float } undef, float %elt0, 0 +; CHECK-NEXT: %ins1 = insertvalue { float, float, float } %ins0, float %elt1, 1 +; CHECK-NEXT: %ins2 = insertvalue { float, float, float } %ins1, float %elt2, 2 +; CHECK-NEXT: ret { float, float, float } %ins2 +define amdgpu_ps { float, float, float } @extract_elt0_elt1_elt2_buffer_load_v4f32_2(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 { + %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false) + %elt0 = extractelement <4 x float> %data, i32 0 + %elt1 = extractelement <4 x float> %data, i32 1 + %elt2 = extractelement <4 x float> %data, i32 2 + %ins0 = insertvalue { float, float, float } undef, float %elt0, 0 + %ins1 = insertvalue { float, float, float } %ins0, float %elt1, 1 + %ins2 = insertvalue { float, float, float } %ins1, float %elt2, 2 + ret { float, float, float } %ins2 +} + +; CHECK-LABEL: @extract_elt0_elt1_elt2_buffer_load_v4f32_3( +; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false) +; CHECK-NEXT: %ins1 = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> +; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> +; CHECK-NEXT: %ret = fadd <2 x float> %ins1, %shuf +define amdgpu_ps <2 x float> @extract_elt0_elt1_elt2_buffer_load_v4f32_3(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 { + %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false) + %elt0 = extractelement <4 x float> %data, i32 0 + %elt2 = extractelement <4 x float> %data, i32 2 + %ins0 = insertelement <2 x float> undef, float %elt0, i32 0 + %ins1 = insertelement <2 x float> %ins0, float %elt2, i32 1 + %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> + %ret = fadd <2 x float> %ins1, %shuf + ret <2 x float> %ret +} + +; CHECK-LABEL: @extract_elt0_elt1_elt2_buffer_load_v4f32_4( +; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false) +; CHECK-NEXT: %ins1 = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> +; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> +; CHECK-NEXT: %ret = fadd <2 x float> %ins1, %shuf +; CHECK-NEXT: ret <2 x float> %ret +define amdgpu_ps <2 x float> @extract_elt0_elt1_elt2_buffer_load_v4f32_4(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 { + %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false) + %elt0 = extractelement <4 x float> %data, i32 0 + %elt2 = extractelement <4 x float> %data, i32 2 + %ins0 = insertelement <2 x float> undef, float %elt0, i32 0 + %ins1 = insertelement <2 x float> %ins0, float %elt2, i32 1 + %shuf = shufflevector <4 x float> undef, <4 x float> %data, <2 x i32> + %ret = fadd <2 x float> %ins1, %shuf + ret <2 x float> %ret +} + +; CHECK-LABEL: @extract_elt0_elt1_elt2_buffer_load_v4f32_5( +; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false) +; CHECK-NEXT: %ins1 = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> +; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> +; CHECK-NEXT: %ret = fadd <2 x float> %ins1, %shuf +define amdgpu_ps <2 x float> @extract_elt0_elt1_elt2_buffer_load_v4f32_5(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 { + %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false) + %elt2 = extractelement <4 x float> %data, i32 2 + %ins0 = insertelement <2 x float> undef, float %elt2, i32 0 + %ins1 = insertelement <2 x float> %ins0, float %elt2, i32 1 + %shuf = shufflevector <4 x float> %data, <4 x float> %data, <2 x i32> + %ret = fadd <2 x float> %ins1, %shuf + ret <2 x float> %ret +} + ; CHECK-LABEL: @extract_elt0_buffer_load_v3f32( ; CHECK-NEXT: %data = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false) ; CHECK-NEXT: ret float %data diff --git a/llvm/test/Transforms/InstCombine/conditional-variable-length-signext-after-high-bit-extract.ll b/llvm/test/Transforms/InstCombine/conditional-variable-length-signext-after-high-bit-extract.ll index 027755641e87e..cb4d38d6641ea 100644 --- a/llvm/test/Transforms/InstCombine/conditional-variable-length-signext-after-high-bit-extract.ll +++ b/llvm/test/Transforms/InstCombine/conditional-variable-length-signext-after-high-bit-extract.ll @@ -45,6 +45,37 @@ define i32 @t0_notrunc_add(i32 %data, i32 %nbits) { ret i32 %signextended } +define i32 @t0_notrunc_or(i32 %data, i32 %nbits) { +; CHECK-LABEL: @t0_notrunc_or( +; CHECK-NEXT: [[LOW_BITS_TO_SKIP:%.*]] = sub i32 32, [[NBITS:%.*]] +; CHECK-NEXT: [[HIGH_BITS_EXTRACTED:%.*]] = lshr i32 [[DATA:%.*]], [[LOW_BITS_TO_SKIP]] +; CHECK-NEXT: [[SHOULD_SIGNEXT:%.*]] = icmp slt i32 [[DATA]], 0 +; CHECK-NEXT: [[ALL_BITS_EXCEPT_LOW_NBITS:%.*]] = shl i32 -1, [[NBITS]] +; CHECK-NEXT: [[MAGIC:%.*]] = select i1 [[SHOULD_SIGNEXT]], i32 [[ALL_BITS_EXCEPT_LOW_NBITS]], i32 0 +; CHECK-NEXT: call void @use32(i32 [[LOW_BITS_TO_SKIP]]) +; CHECK-NEXT: call void @use32(i32 [[HIGH_BITS_EXTRACTED]]) +; CHECK-NEXT: call void @use1(i1 [[SHOULD_SIGNEXT]]) +; CHECK-NEXT: call void @use32(i32 [[ALL_BITS_EXCEPT_LOW_NBITS]]) +; CHECK-NEXT: call void @use32(i32 [[MAGIC]]) +; CHECK-NEXT: [[SIGNEXTENDED:%.*]] = ashr i32 [[DATA]], [[LOW_BITS_TO_SKIP]] +; CHECK-NEXT: ret i32 [[SIGNEXTENDED]] +; + %low_bits_to_skip = sub i32 32, %nbits + %high_bits_extracted = lshr i32 %data, %low_bits_to_skip + %should_signext = icmp slt i32 %data, 0 + %all_bits_except_low_nbits = shl i32 -1, %nbits + %magic = select i1 %should_signext, i32 %all_bits_except_low_nbits, i32 0 + + call void @use32(i32 %low_bits_to_skip) + call void @use32(i32 %high_bits_extracted) + call void @use1(i1 %should_signext) + call void @use32(i32 %all_bits_except_low_nbits) + call void @use32(i32 %magic) + + %signextended = or i32 %high_bits_extracted, %magic + ret i32 %signextended +} + define i32 @t1_notrunc_sub(i32 %data, i32 %nbits) { ; CHECK-LABEL: @t1_notrunc_sub( ; CHECK-NEXT: [[LOW_BITS_TO_SKIP:%.*]] = sub i32 32, [[NBITS:%.*]] @@ -113,6 +144,43 @@ define i32 @t2_trunc_add(i64 %data, i32 %nbits) { ret i32 %signextended } +define i32 @t2_trunc_or(i64 %data, i32 %nbits) { +; CHECK-LABEL: @t2_trunc_or( +; CHECK-NEXT: [[LOW_BITS_TO_SKIP:%.*]] = sub i32 64, [[NBITS:%.*]] +; CHECK-NEXT: [[LOW_BITS_TO_SKIP_WIDE:%.*]] = zext i32 [[LOW_BITS_TO_SKIP]] to i64 +; CHECK-NEXT: [[HIGH_BITS_EXTRACTED_WIDE:%.*]] = lshr i64 [[DATA:%.*]], [[LOW_BITS_TO_SKIP_WIDE]] +; CHECK-NEXT: [[HIGH_BITS_EXTRACTED:%.*]] = trunc i64 [[HIGH_BITS_EXTRACTED_WIDE]] to i32 +; CHECK-NEXT: [[SHOULD_SIGNEXT:%.*]] = icmp slt i64 [[DATA]], 0 +; CHECK-NEXT: [[ALL_BITS_EXCEPT_LOW_NBITS:%.*]] = shl i32 -1, [[NBITS]] +; CHECK-NEXT: call void @use32(i32 [[LOW_BITS_TO_SKIP]]) +; CHECK-NEXT: call void @use64(i64 [[LOW_BITS_TO_SKIP_WIDE]]) +; CHECK-NEXT: call void @use64(i64 [[HIGH_BITS_EXTRACTED_WIDE]]) +; CHECK-NEXT: call void @use32(i32 [[HIGH_BITS_EXTRACTED]]) +; CHECK-NEXT: call void @use1(i1 [[SHOULD_SIGNEXT]]) +; CHECK-NEXT: call void @use32(i32 [[ALL_BITS_EXCEPT_LOW_NBITS]]) +; CHECK-NEXT: [[TMP1:%.*]] = ashr i64 [[DATA]], [[LOW_BITS_TO_SKIP_WIDE]] +; CHECK-NEXT: [[SIGNEXTENDED:%.*]] = trunc i64 [[TMP1]] to i32 +; CHECK-NEXT: ret i32 [[SIGNEXTENDED]] +; + %low_bits_to_skip = sub i32 64, %nbits + %low_bits_to_skip_wide = zext i32 %low_bits_to_skip to i64 + %high_bits_extracted_wide = lshr i64 %data, %low_bits_to_skip_wide + %high_bits_extracted = trunc i64 %high_bits_extracted_wide to i32 + %should_signext = icmp slt i64 %data, 0 + %all_bits_except_low_nbits = shl i32 -1, %nbits + %magic = select i1 %should_signext, i32 %all_bits_except_low_nbits, i32 0 ; one-use + + call void @use32(i32 %low_bits_to_skip) + call void @use64(i64 %low_bits_to_skip_wide) + call void @use64(i64 %high_bits_extracted_wide) + call void @use32(i32 %high_bits_extracted) + call void @use1(i1 %should_signext) + call void @use32(i32 %all_bits_except_low_nbits) + + %signextended = or i32 %magic, %high_bits_extracted + ret i32 %signextended +} + define i32 @t3_trunc_sub(i64 %data, i32 %nbits) { ; CHECK-LABEL: @t3_trunc_sub( ; CHECK-NEXT: [[LOW_BITS_TO_SKIP:%.*]] = sub i32 64, [[NBITS:%.*]] @@ -1038,3 +1106,34 @@ define i32 @n28_sub_sext_of_magic(i32 %data, i8 %nbits) { %signextended = sub i32 %high_bits_extracted, %magic_wide ret i32 %signextended } + +define i32 @n290_or_with_wrong_magic(i32 %data, i32 %nbits) { +; CHECK-LABEL: @n290_or_with_wrong_magic( +; CHECK-NEXT: [[LOW_BITS_TO_SKIP:%.*]] = sub i32 32, [[NBITS:%.*]] +; CHECK-NEXT: [[HIGH_BITS_EXTRACTED:%.*]] = lshr i32 [[DATA:%.*]], [[LOW_BITS_TO_SKIP]] +; CHECK-NEXT: [[SHOULD_SIGNEXT:%.*]] = icmp slt i32 [[DATA]], 0 +; CHECK-NEXT: [[ALL_BITS_EXCEPT_LOW_NBITS:%.*]] = shl i32 1, [[NBITS]] +; CHECK-NEXT: [[MAGIC:%.*]] = select i1 [[SHOULD_SIGNEXT]], i32 [[ALL_BITS_EXCEPT_LOW_NBITS]], i32 0 +; CHECK-NEXT: call void @use32(i32 [[LOW_BITS_TO_SKIP]]) +; CHECK-NEXT: call void @use32(i32 [[HIGH_BITS_EXTRACTED]]) +; CHECK-NEXT: call void @use1(i1 [[SHOULD_SIGNEXT]]) +; CHECK-NEXT: call void @use32(i32 [[ALL_BITS_EXCEPT_LOW_NBITS]]) +; CHECK-NEXT: call void @use32(i32 [[MAGIC]]) +; CHECK-NEXT: [[SIGNEXTENDED:%.*]] = or i32 [[HIGH_BITS_EXTRACTED]], [[MAGIC]] +; CHECK-NEXT: ret i32 [[SIGNEXTENDED]] +; + %low_bits_to_skip = sub i32 32, %nbits + %high_bits_extracted = lshr i32 %data, %low_bits_to_skip + %should_signext = icmp slt i32 %data, 0 + %all_bits_except_low_nbits = shl i32 1, %nbits ; not -1 + %magic = select i1 %should_signext, i32 %all_bits_except_low_nbits, i32 0 + + call void @use32(i32 %low_bits_to_skip) + call void @use32(i32 %high_bits_extracted) + call void @use1(i1 %should_signext) + call void @use32(i32 %all_bits_except_low_nbits) + call void @use32(i32 %magic) + + %signextended = or i32 %high_bits_extracted, %magic + ret i32 %signextended +} diff --git a/llvm/test/Transforms/InstCombine/fmul.ll b/llvm/test/Transforms/InstCombine/fmul.ll index d53062aa57760..adc9381631a63 100644 --- a/llvm/test/Transforms/InstCombine/fmul.ll +++ b/llvm/test/Transforms/InstCombine/fmul.ll @@ -1069,3 +1069,53 @@ define <2 x double> @negate_if_true_wrong_constant(<2 x double> %px, i1 %cond) { %r = fmul <2 x double> %x, %sel ret <2 x double> %r } + +; X *fast (C ? 1.0 : 0.0) -> C ? X : 0.0 +define float @fmul_select(float %x, i1 %c) { +; CHECK-LABEL: @fmul_select( +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[C:%.*]], float 1.000000e+00, float 0.000000e+00 +; CHECK-NEXT: [[MUL:%.*]] = fmul fast float [[SEL]], [[X:%.*]] +; CHECK-NEXT: ret float [[MUL]] +; + %sel = select i1 %c, float 1.0, float 0.0 + %mul = fmul fast float %sel, %x + ret float %mul +} + +; X *fast (C ? 1.0 : 0.0) -> C ? X : 0.0 +define <2 x float> @fmul_select_vec(<2 x float> %x, i1 %c) { +; CHECK-LABEL: @fmul_select_vec( +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[C:%.*]], <2 x float> , <2 x float> zeroinitializer +; CHECK-NEXT: [[MUL:%.*]] = fmul fast <2 x float> [[SEL]], [[X:%.*]] +; CHECK-NEXT: ret <2 x float> [[MUL]] +; + %sel = select i1 %c, <2 x float> , <2 x float> zeroinitializer + %mul = fmul fast <2 x float> %sel, %x + ret <2 x float> %mul +} + +; Without fast math flags we can't optimize X * (C ? 1.0 : 0.0) -> C ? X : 0.0 +define float @fmul_select_strict(float %x, i1 %c) { +; CHECK-LABEL: @fmul_select_strict( +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[C:%.*]], float 1.000000e+00, float 0.000000e+00 +; CHECK-NEXT: [[MUL:%.*]] = fmul float [[SEL]], [[X:%.*]] +; CHECK-NEXT: ret float [[MUL]] +; + %sel = select i1 %c, float 1.0, float 0.0 + %mul = fmul float %sel, %x + ret float %mul +} + +; sqrt(X) *fast (C ? sqrt(X) : 1.0) -> C ? X : sqrt(X) +define double @fmul_sqrt_select(double %x, i1 %c) { +; CHECK-LABEL: @fmul_sqrt_select( +; CHECK-NEXT: [[SQR:%.*]] = call double @llvm.sqrt.f64(double [[X:%.*]]) +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[C:%.*]], double [[SQR]], double 1.000000e+00 +; CHECK-NEXT: [[MUL:%.*]] = fmul fast double [[SQR]], [[SEL]] +; CHECK-NEXT: ret double [[MUL]] +; + %sqr = call double @llvm.sqrt.f64(double %x) + %sel = select i1 %c, double %sqr, double 1.0 + %mul = fmul fast double %sqr, %sel + ret double %mul +} diff --git a/llvm/test/Transforms/InstCombine/mul.ll b/llvm/test/Transforms/InstCombine/mul.ll index f106026762bab..5064bbae71d8d 100644 --- a/llvm/test/Transforms/InstCombine/mul.ll +++ b/llvm/test/Transforms/InstCombine/mul.ll @@ -595,3 +595,17 @@ define <2 x i8> @negate_if_true_wrong_constant(<2 x i8> %px, i1 %cond) { %r = mul <2 x i8> %x, %sel ret <2 x i8> %r } + +; (C ? (X /exact Y) : 1) * Y -> C ? X : Y +define i32 @mul_div_select(i32 %x, i32 %y, i1 %c) { +; CHECK-LABEL: @mul_div_select( +; CHECK-NEXT: [[DIV:%.*]] = udiv exact i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[C:%.*]], i32 [[DIV]], i32 1 +; CHECK-NEXT: [[MUL:%.*]] = mul i32 [[SEL]], [[Y]] +; CHECK-NEXT: ret i32 [[MUL]] +; + %div = udiv exact i32 %x, %y + %sel = select i1 %c, i32 %div, i32 1 + %mul = mul i32 %sel, %y + ret i32 %mul +} diff --git a/llvm/test/Transforms/InstCombine/saturating-add-sub.ll b/llvm/test/Transforms/InstCombine/saturating-add-sub.ll index 364c80d205f2d..f5f719864b095 100644 --- a/llvm/test/Transforms/InstCombine/saturating-add-sub.ll +++ b/llvm/test/Transforms/InstCombine/saturating-add-sub.ll @@ -1156,6 +1156,48 @@ define i8 @test_scalar_uadd_sub_const(i8 %a) { ret i8 %res } +define i1 @scalar_uadd_eq_zero(i8 %a, i8 %b) { +; CHECK-LABEL: @scalar_uadd_eq_zero( +; CHECK-NEXT: [[TMP1:%.*]] = or i8 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i8 [[TMP1]], 0 +; CHECK-NEXT: ret i1 [[TMP2]] +; + %sat = call i8 @llvm.uadd.sat.i8(i8 %a, i8 %b) + %cmp = icmp eq i8 %sat, 0 + ret i1 %cmp +} + +define i1 @scalar_uadd_ne_zero(i8 %a, i8 %b) { +; CHECK-LABEL: @scalar_uadd_ne_zero( +; CHECK-NEXT: [[TMP1:%.*]] = or i8 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i8 [[TMP1]], 0 +; CHECK-NEXT: ret i1 [[TMP2]] +; + %sat = call i8 @llvm.uadd.sat.i8(i8 %a, i8 %b) + %cmp = icmp ne i8 %sat, 0 + ret i1 %cmp +} + +define i1 @scalar_usub_eq_zero(i8 %a, i8 %b) { +; CHECK-LABEL: @scalar_usub_eq_zero( +; CHECK-NEXT: [[CMP:%.*]] = icmp ule i8 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[CMP]] +; + %sat = call i8 @llvm.usub.sat.i8(i8 %a, i8 %b) + %cmp = icmp eq i8 %sat, 0 + ret i1 %cmp +} + +define i1 @scalar_usub_ne_zero(i8 %a, i8 %b) { +; CHECK-LABEL: @scalar_usub_ne_zero( +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i8 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i1 [[CMP]] +; + %sat = call i8 @llvm.usub.sat.i8(i8 %a, i8 %b) + %cmp = icmp ne i8 %sat, 0 + ret i1 %cmp +} + ; Raw IR tests define i32 @uadd_sat(i32 %x, i32 %y) { diff --git a/llvm/test/Transforms/InstCombine/sign-bit-test-via-right-shifting-all-other-bits.ll b/llvm/test/Transforms/InstCombine/sign-bit-test-via-right-shifting-all-other-bits.ll index 0e5848e7303b5..8e89a0649ebce 100644 --- a/llvm/test/Transforms/InstCombine/sign-bit-test-via-right-shifting-all-other-bits.ll +++ b/llvm/test/Transforms/InstCombine/sign-bit-test-via-right-shifting-all-other-bits.ll @@ -45,7 +45,7 @@ define i1 @highest_bit_test_via_lshr_with_truncation(i64 %data, i32 %nbits) { ; CHECK-NEXT: call void @use32(i32 [[HIGH_BITS_EXTRACTED_NARROW]]) ; CHECK-NEXT: call void @use32(i32 [[SKIP_ALL_BITS_TILL_SIGNBIT]]) ; CHECK-NEXT: call void @use32(i32 [[SIGNBIT]]) -; CHECK-NEXT: [[ISNEG:%.*]] = icmp ne i32 [[SIGNBIT]], 0 +; CHECK-NEXT: [[ISNEG:%.*]] = icmp slt i64 [[DATA]], 0 ; CHECK-NEXT: ret i1 [[ISNEG]] ; %num_low_bits_to_skip = sub i32 64, %nbits @@ -107,7 +107,7 @@ define i1 @highest_bit_test_via_ashr_with_truncation(i64 %data, i32 %nbits) { ; CHECK-NEXT: call void @use32(i32 [[HIGH_BITS_EXTRACTED_NARROW]]) ; CHECK-NEXT: call void @use32(i32 [[SKIP_ALL_BITS_TILL_SIGNBIT]]) ; CHECK-NEXT: call void @use32(i32 [[SIGNBIT]]) -; CHECK-NEXT: [[ISNEG:%.*]] = icmp ne i32 [[SIGNBIT]], 0 +; CHECK-NEXT: [[ISNEG:%.*]] = icmp slt i64 [[DATA]], 0 ; CHECK-NEXT: ret i1 [[ISNEG]] ; %num_low_bits_to_skip = sub i32 64, %nbits @@ -138,7 +138,7 @@ define i1 @highest_bit_test_via_lshr_ashr(i32 %data, i32 %nbits) { ; CHECK-NEXT: call void @use32(i32 [[HIGH_BITS_EXTRACTED]]) ; CHECK-NEXT: call void @use32(i32 [[SKIP_ALL_BITS_TILL_SIGNBIT]]) ; CHECK-NEXT: call void @use32(i32 [[SIGNBIT]]) -; CHECK-NEXT: [[ISNEG:%.*]] = icmp ne i32 [[SIGNBIT]], 0 +; CHECK-NEXT: [[ISNEG:%.*]] = icmp slt i32 [[DATA]], 0 ; CHECK-NEXT: ret i1 [[ISNEG]] ; %num_low_bits_to_skip = sub i32 32, %nbits @@ -169,7 +169,7 @@ define i1 @highest_bit_test_via_lshr_ashe_with_truncation(i64 %data, i32 %nbits) ; CHECK-NEXT: call void @use32(i32 [[HIGH_BITS_EXTRACTED_NARROW]]) ; CHECK-NEXT: call void @use32(i32 [[SKIP_ALL_BITS_TILL_SIGNBIT]]) ; CHECK-NEXT: call void @use32(i32 [[SIGNBIT]]) -; CHECK-NEXT: [[ISNEG:%.*]] = icmp ne i32 [[SIGNBIT]], 0 +; CHECK-NEXT: [[ISNEG:%.*]] = icmp slt i64 [[DATA]], 0 ; CHECK-NEXT: ret i1 [[ISNEG]] ; %num_low_bits_to_skip = sub i32 64, %nbits @@ -200,7 +200,7 @@ define i1 @highest_bit_test_via_ashr_lshr(i32 %data, i32 %nbits) { ; CHECK-NEXT: call void @use32(i32 [[HIGH_BITS_EXTRACTED]]) ; CHECK-NEXT: call void @use32(i32 [[SKIP_ALL_BITS_TILL_SIGNBIT]]) ; CHECK-NEXT: call void @use32(i32 [[SIGNBIT]]) -; CHECK-NEXT: [[ISNEG:%.*]] = icmp ne i32 [[SIGNBIT]], 0 +; CHECK-NEXT: [[ISNEG:%.*]] = icmp slt i32 [[DATA]], 0 ; CHECK-NEXT: ret i1 [[ISNEG]] ; %num_low_bits_to_skip = sub i32 32, %nbits @@ -231,7 +231,7 @@ define i1 @highest_bit_test_via_ashr_lshr_with_truncation(i64 %data, i32 %nbits) ; CHECK-NEXT: call void @use32(i32 [[HIGH_BITS_EXTRACTED_NARROW]]) ; CHECK-NEXT: call void @use32(i32 [[SKIP_ALL_BITS_TILL_SIGNBIT]]) ; CHECK-NEXT: call void @use32(i32 [[SIGNBIT]]) -; CHECK-NEXT: [[ISNEG:%.*]] = icmp ne i32 [[SIGNBIT]], 0 +; CHECK-NEXT: [[ISNEG:%.*]] = icmp slt i64 [[DATA]], 0 ; CHECK-NEXT: ret i1 [[ISNEG]] ; %num_low_bits_to_skip = sub i32 64, %nbits diff --git a/llvm/test/Transforms/MemCpyOpt/aggregate-type-crash.ll b/llvm/test/Transforms/MemCpyOpt/aggregate-type-crash.ll new file mode 100644 index 0000000000000..16d107730acd1 --- /dev/null +++ b/llvm/test/Transforms/MemCpyOpt/aggregate-type-crash.ll @@ -0,0 +1,30 @@ +; RUN: opt -memcpyopt -S -o - < %s | FileCheck %s + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.14.0" + +%my_struct = type { i8, i32 } + +; Function Attrs: inaccessiblemem_or_argmemonly +declare noalias i8* @my_malloc(%my_struct*) #0 + +define void @my_func(%my_struct* %0) { +entry: +; CHECK: entry: + %1 = load %my_struct, %my_struct* %0 + %2 = call i8* @my_malloc(%my_struct* %0) + %3 = bitcast i8* %2 to %my_struct* + store %my_struct %1, %my_struct* %3 +; CHECK-NOT: call void @llvm.memcpy.{{.*}}.{{.*}}.{{.*}} + ret void +} + +attributes #0 = { inaccessiblemem_or_argmemonly } + +!llvm.module.flags = !{!0, !1, !2} +!llvm.ident = !{!3} + +!0 = !{i32 2, !"SDK Version", [2 x i32] [i32 10, i32 14]} +!1 = !{i32 1, !"wchar_size", i32 4} +!2 = !{i32 7, !"PIC Level", i32 2} +!3 = !{!"Apple LLVM version 10.0.1 (clang-1001.0.46.4)"} diff --git a/llvm/test/tools/llvm-cxxdump/broken-reloc-sec.test b/llvm/test/tools/llvm-cxxdump/broken-reloc-sec.test new file mode 100644 index 0000000000000..c38786c9f7a57 --- /dev/null +++ b/llvm/test/tools/llvm-cxxdump/broken-reloc-sec.test @@ -0,0 +1,20 @@ +## Check we report an error when trying to dump an object +## which has a relocation section that has a broken sh_info +## field, which is larger than the number of sections. + +# RUN: yaml2obj %s -o %t +# RUN: not llvm-cxxdump %t 2>&1 | FileCheck %s +# CHECK: error: reading file: invalid section index: 255 + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_X86_64 +Sections: +- Name: .rela.foo + Type: SHT_RELA + Link: 0 + Info: 0xFF + Relocations: [] diff --git a/llvm/test/tools/llvm-dwarfdump/elf-broken-reloc-target.yaml b/llvm/test/tools/llvm-dwarfdump/elf-broken-reloc-target.yaml new file mode 100644 index 0000000000000..925d76e32d6a2 --- /dev/null +++ b/llvm/test/tools/llvm-dwarfdump/elf-broken-reloc-target.yaml @@ -0,0 +1,20 @@ +## Check we report an error if the relocated section identified by the +## sh_info field of a relocation section is invalid. + +# RUN: yaml2obj %s -o %t +# RUN: llvm-dwarfdump %t 2>&1 | FileCheck %s -DFILE=%t --check-prefix=ERR + +# ERR: error: failed to get relocated section: invalid section index: 255 + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_X86_64 +Sections: +- Name: .rela.debug_info + Type: SHT_RELA + Link: 0 + Info: 0xFF + Relocations: [] diff --git a/llvm/test/tools/llvm-objcopy/ELF/add-section-remove.test b/llvm/test/tools/llvm-objcopy/ELF/add-section-remove.test index fe462dba1e84f..ad41d74947d4c 100644 --- a/llvm/test/tools/llvm-objcopy/ELF/add-section-remove.test +++ b/llvm/test/tools/llvm-objcopy/ELF/add-section-remove.test @@ -23,11 +23,10 @@ Sections: Flags: [ SHF_ALLOC ] Content: "32323232" -# CHECK: SectionHeaderCount: 7 +# CHECK: SectionHeaderCount: 6 # CHECK: Name: .test1 # CHECK: Name: .test3 -# CHECK: Name: .symtab # CHECK: Name: .strtab # CHECK: Name: .shstrtab # CHECK: Name: .test2 diff --git a/llvm/test/tools/llvm-objcopy/ELF/add-section.test b/llvm/test/tools/llvm-objcopy/ELF/add-section.test index 4acbd9ae4e226..e930d2754b860 100644 --- a/llvm/test/tools/llvm-objcopy/ELF/add-section.test +++ b/llvm/test/tools/llvm-objcopy/ELF/add-section.test @@ -24,11 +24,10 @@ Sections: Flags: [ SHF_ALLOC ] Content: "32323232" -# CHECK: SectionHeaderCount: 7 +# CHECK: SectionHeaderCount: 6 # CHECK: Name: .test1 # CHECK: Name: .test3 -# CHECK: Name: .symtab # CHECK: Name: .strtab # CHECK: Name: .shstrtab # CHECK: Name: .test2 diff --git a/llvm/test/tools/llvm-objcopy/ELF/add-symbol.test b/llvm/test/tools/llvm-objcopy/ELF/add-symbol.test index fd838602ba705..a9002d4297129 100644 --- a/llvm/test/tools/llvm-objcopy/ELF/add-symbol.test +++ b/llvm/test/tools/llvm-objcopy/ELF/add-symbol.test @@ -58,6 +58,9 @@ ProgramHeaders: Align: 0x1000 Sections: - Section: .data +## TODO (grimar): llvm-objcopy seems produce a broken output without +## the following line, i.e. when there is no symbol table in the input. +Symbols: [] # CHECK: 0: 00000000 0 NOTYPE LOCAL DEFAULT UND # CHECK-NEXT: 1: 00000001 0 NOTYPE GLOBAL DEFAULT ABS abs1 diff --git a/llvm/test/tools/llvm-objcopy/ELF/basic-only-section.test b/llvm/test/tools/llvm-objcopy/ELF/basic-only-section.test index e47a97ac6d731..f95ea38911675 100644 --- a/llvm/test/tools/llvm-objcopy/ELF/basic-only-section.test +++ b/llvm/test/tools/llvm-objcopy/ELF/basic-only-section.test @@ -15,9 +15,7 @@ Sections: Type: SHT_PROGBITS Flags: [ ] -# CHECK: SectionHeaderCount: 5 +# CHECK: SectionHeaderCount: 3 # CHECK: Name: .test -# CHECK: Name: .symtab -# CHECK: Name: .strtab # CHECK: Name: .shstrtab diff --git a/llvm/test/tools/llvm-objcopy/ELF/explicit-keep-remove.test b/llvm/test/tools/llvm-objcopy/ELF/explicit-keep-remove.test index 98ad3ae7592de..6512afac0cf10 100644 --- a/llvm/test/tools/llvm-objcopy/ELF/explicit-keep-remove.test +++ b/llvm/test/tools/llvm-objcopy/ELF/explicit-keep-remove.test @@ -13,9 +13,8 @@ Sections: Type: SHT_PROGBITS Flags: [ ] -# CHECK: SectionHeaderCount: 5 +# CHECK: SectionHeaderCount: 4 # CHECK: Name: .test -# CHECK: Name: .symtab # CHECK: Name: .strtab # CHECK: Name: .shstrtab diff --git a/llvm/test/tools/llvm-objcopy/ELF/explicit-only-section-remove.test b/llvm/test/tools/llvm-objcopy/ELF/explicit-only-section-remove.test index 8a152e57a5d73..5baf845abe7de 100644 --- a/llvm/test/tools/llvm-objcopy/ELF/explicit-only-section-remove.test +++ b/llvm/test/tools/llvm-objcopy/ELF/explicit-only-section-remove.test @@ -13,9 +13,7 @@ Sections: Type: SHT_PROGBITS Flags: [ ] -# CHECK: SectionHeaderCount: 5 +# CHECK: SectionHeaderCount: 3 # CHECK: Name: .test -# CHECK: Name: .symtab -# CHECK: Name: .strtab # CHECK: Name: .shstrtab diff --git a/llvm/test/tools/llvm-objcopy/ELF/invalid-e_shoff.test b/llvm/test/tools/llvm-objcopy/ELF/invalid-e_shoff.test index fc2e08484b291..9d50922f1964d 100644 --- a/llvm/test/tools/llvm-objcopy/ELF/invalid-e_shoff.test +++ b/llvm/test/tools/llvm-objcopy/ELF/invalid-e_shoff.test @@ -28,5 +28,6 @@ FileHeader: Sections: - Name: .foo Type: SHT_PROGBITS +Symbols: [] # CASE2: error: '[[INPUT]]': section header table goes past the end of the file: e_shoff = 0x40000000 diff --git a/llvm/test/tools/llvm-objcopy/ELF/keep-only-section.test b/llvm/test/tools/llvm-objcopy/ELF/keep-only-section.test index 7ea8468fe5f99..75b5bbd77d431 100644 --- a/llvm/test/tools/llvm-objcopy/ELF/keep-only-section.test +++ b/llvm/test/tools/llvm-objcopy/ELF/keep-only-section.test @@ -20,19 +20,15 @@ Sections: - Name: .test3 Type: SHT_PROGBITS -# CHECK: SectionHeaderCount: 6 +# CHECK: SectionHeaderCount: 4 # CHECK: Name: .test # CHECK: Name: .test2 -# CHECK: Name: .symtab -# CHECK: Name: .strtab # CHECK: Name: .shstrtab -# REGEX: SectionHeaderCount: 7 +# REGEX: SectionHeaderCount: 5 # REGEX: Name: .test # REGEX: Name: .test2 # REGEX: Name: .test3 -# REGEX: Name: .symtab -# REGEX: Name: .strtab # REGEX: Name: .shstrtab diff --git a/llvm/test/tools/llvm-objcopy/ELF/no-strip-all.test b/llvm/test/tools/llvm-objcopy/ELF/no-strip-all.test index a0158d4b83493..90b4c15765f8c 100644 --- a/llvm/test/tools/llvm-objcopy/ELF/no-strip-all.test +++ b/llvm/test/tools/llvm-objcopy/ELF/no-strip-all.test @@ -37,6 +37,7 @@ Sections: - Name: .alloc Type: SHT_PROGBITS Flags: [ SHF_ALLOC ] +Symbols: [] # ALL: SectionHeaderCount: 3 # ALL: Name: .alloc diff --git a/llvm/test/tools/llvm-objcopy/ELF/no-symbol-relocation.test b/llvm/test/tools/llvm-objcopy/ELF/no-symbol-relocation.test index 9def536c239c1..4b13dda6484be 100644 --- a/llvm/test/tools/llvm-objcopy/ELF/no-symbol-relocation.test +++ b/llvm/test/tools/llvm-objcopy/ELF/no-symbol-relocation.test @@ -17,7 +17,6 @@ Sections: Content: "0000000000000000" - Name: .rel.text Type: SHT_REL - Link: .symtab Info: .text Relocations: - Offset: 0x1000 diff --git a/llvm/test/tools/llvm-objcopy/ELF/null-symbol.test b/llvm/test/tools/llvm-objcopy/ELF/null-symbol.test index 94f5ab5d8c2e5..b7ac3e8cf1eb3 100644 --- a/llvm/test/tools/llvm-objcopy/ELF/null-symbol.test +++ b/llvm/test/tools/llvm-objcopy/ELF/null-symbol.test @@ -15,6 +15,7 @@ Sections: Address: 0x1000 AddressAlign: 0x0000000000000010 Size: 8 +Symbols: [] #CHECK: Symbols [ #CHECK-NEXT: Symbol { diff --git a/llvm/test/tools/llvm-objcopy/ELF/only-section-many.test b/llvm/test/tools/llvm-objcopy/ELF/only-section-many.test index 43ccff1c2c2d8..9f1f77d04d059 100644 --- a/llvm/test/tools/llvm-objcopy/ELF/only-section-many.test +++ b/llvm/test/tools/llvm-objcopy/ELF/only-section-many.test @@ -19,10 +19,8 @@ Sections: Type: SHT_PROGBITS Flags: [ ] -# CHECK: SectionHeaderCount: 6 +# CHECK: SectionHeaderCount: 4 # CHECK: Name: .test1 # CHECK: Name: .test2 -# CHECK: Name: .symtab -# CHECK: Name: .strtab # CHECK: Name: .shstrtab diff --git a/llvm/test/tools/llvm-objcopy/ELF/preserve-segment-contents-ehdr-phdrs.test b/llvm/test/tools/llvm-objcopy/ELF/preserve-segment-contents-ehdr-phdrs.test index 4afd2c9285e1e..c6824c4cb47f9 100644 --- a/llvm/test/tools/llvm-objcopy/ELF/preserve-segment-contents-ehdr-phdrs.test +++ b/llvm/test/tools/llvm-objcopy/ELF/preserve-segment-contents-ehdr-phdrs.test @@ -39,3 +39,4 @@ ProgramHeaders: - Type: PT_LOAD Sections: - Section: .keep_me +Symbols: [] diff --git a/llvm/test/tools/llvm-objcopy/ELF/remove-section.test b/llvm/test/tools/llvm-objcopy/ELF/remove-section.test index 8f8b98e90f085..927485a60020f 100644 --- a/llvm/test/tools/llvm-objcopy/ELF/remove-section.test +++ b/llvm/test/tools/llvm-objcopy/ELF/remove-section.test @@ -30,6 +30,7 @@ Sections: - Name: .test3 Type: SHT_PROGBITS Flags: [ ] +Symbols: [] # CHECK: SectionHeaderCount: 6 diff --git a/llvm/test/tools/llvm-objcopy/ELF/rename-section-multiple.test b/llvm/test/tools/llvm-objcopy/ELF/rename-section-multiple.test index e614f656053c7..c508ac80c3068 100644 --- a/llvm/test/tools/llvm-objcopy/ELF/rename-section-multiple.test +++ b/llvm/test/tools/llvm-objcopy/ELF/rename-section-multiple.test @@ -22,7 +22,7 @@ Sections: Flags: [ SHF_ALLOC ] Content: "37373737" -# CHECK: SectionHeaderCount: 7 +# CHECK: SectionHeaderCount: 6 # CHECK: Name: .test2 # CHECK: SectionData ( @@ -36,6 +36,5 @@ Sections: # CHECK: SectionData ( # CHECK-NEXT: 0000: 37373737 # CHECK-NEXT: ) -# CHECK: Name: .symtab # CHECK: Name: .strtab # CHECK: Name: .shstrtab diff --git a/llvm/test/tools/llvm-objcopy/ELF/rename-section.test b/llvm/test/tools/llvm-objcopy/ELF/rename-section.test index 3829b5b0da8ae..c8a1eafaa3029 100644 --- a/llvm/test/tools/llvm-objcopy/ELF/rename-section.test +++ b/llvm/test/tools/llvm-objcopy/ELF/rename-section.test @@ -16,13 +16,12 @@ Sections: Flags: [ SHF_ALLOC ] Content: "c3c3c3c3" -# CHECK: SectionHeaderCount: 5 +# CHECK: SectionHeaderCount: 4 # CHECK: Name: .bar # CHECK: SectionData ( # CHECK-NEXT: 0000: C3C3C3C3 # CHECK-NEXT: ) -# CHECK: Name: .symtab # CHECK: Name: .strtab # CHECK: Name: .shstrtab diff --git a/llvm/test/tools/llvm-objcopy/ELF/segment-shift-section-remove.test b/llvm/test/tools/llvm-objcopy/ELF/segment-shift-section-remove.test index cef783cb9b29b..9a50a10bc2491 100644 --- a/llvm/test/tools/llvm-objcopy/ELF/segment-shift-section-remove.test +++ b/llvm/test/tools/llvm-objcopy/ELF/segment-shift-section-remove.test @@ -40,6 +40,9 @@ ProgramHeaders: PAddr: 0x3000 Sections: - Section: .text3 +## TODO (grimar): without the following line (i.e. without an empty symbol table), +## llvm-objcopy adds an empty .strtab section. It doesn't look correct. +Symbols: [] #CHECK: SectionHeaderCount: 4 diff --git a/llvm/test/tools/llvm-objcopy/ELF/segment-test-remove-section.test b/llvm/test/tools/llvm-objcopy/ELF/segment-test-remove-section.test index 60cd5f3483d37..ca83e58f53a96 100644 --- a/llvm/test/tools/llvm-objcopy/ELF/segment-test-remove-section.test +++ b/llvm/test/tools/llvm-objcopy/ELF/segment-test-remove-section.test @@ -42,6 +42,9 @@ ProgramHeaders: - Section: .text - Section: .text2 - Section: .text3 +## TODO (grimar): without the following line (i.e. without an empty symbol table), +## llvm-objcopy adds an empty .strtab section. It doesn't look correct. +Symbols: [] # Make sure that when we remove a section we overwrite it with zeros # DATA: {{^[^[:blank:]]+}} 00 00 00 00 diff --git a/llvm/test/tools/llvm-objcopy/ELF/shstrtab-optimize.test b/llvm/test/tools/llvm-objcopy/ELF/shstrtab-optimize.test index 24b546cbaf5b1..8b1da2141aa50 100644 --- a/llvm/test/tools/llvm-objcopy/ELF/shstrtab-optimize.test +++ b/llvm/test/tools/llvm-objcopy/ELF/shstrtab-optimize.test @@ -11,7 +11,7 @@ # CHECK-NEXT: ] # CHECK-NEXT: Address: # CHECK-NEXT: Offset: -# CHECK-NEXT: Size: 36 +# CHECK-NEXT: Size: 28 !ELF FileHeader: diff --git a/llvm/test/tools/llvm-objcopy/ELF/strip-unneeded-remove-debug-keep-link.test b/llvm/test/tools/llvm-objcopy/ELF/strip-unneeded-remove-debug-keep-link.test index b6a72f965bd55..948bfea2d9c53 100644 --- a/llvm/test/tools/llvm-objcopy/ELF/strip-unneeded-remove-debug-keep-link.test +++ b/llvm/test/tools/llvm-objcopy/ELF/strip-unneeded-remove-debug-keep-link.test @@ -20,11 +20,10 @@ Sections: - Name: .debugfoo Type: SHT_PROGBITS -# CHECK: There are 6 section headers +# CHECK: There are 5 section headers # CHECK: [ 0] # CHECK-NEXT: [ 1] .text -# CHECK-NEXT: [ 2] .symtab -# CHECK-NEXT: [ 3] .strtab -# CHECK-NEXT: [ 4] .shstrtab -# CHECK-NEXT: [ 5] .gnu_debuglink +# CHECK-NEXT: [ 2] .strtab +# CHECK-NEXT: [ 3] .shstrtab +# CHECK-NEXT: [ 4] .gnu_debuglink diff --git a/llvm/test/tools/llvm-objcopy/ELF/symtab-error-on-remove-strtab.test b/llvm/test/tools/llvm-objcopy/ELF/symtab-error-on-remove-strtab.test index 7c9a46062d9ed..4aac930e46986 100644 --- a/llvm/test/tools/llvm-objcopy/ELF/symtab-error-on-remove-strtab.test +++ b/llvm/test/tools/llvm-objcopy/ELF/symtab-error-on-remove-strtab.test @@ -9,6 +9,7 @@ FileHeader: Data: ELFDATA2LSB Type: ET_REL Machine: EM_X86_64 +Symbols: [] # ERR1: error: '[[INPUT]]': string table '.strtab' cannot be removed because it is referenced by the symbol table '.symtab' # ERR2: error: '[[INPUT]]': string table '.strtab' cannot be removed because it is referenced by the symbol table '.symtab' diff --git a/llvm/test/tools/llvm-objcopy/ELF/symtab-link.test b/llvm/test/tools/llvm-objcopy/ELF/symtab-link.test index 68b8f78dde9f0..8297529cb6529 100644 --- a/llvm/test/tools/llvm-objcopy/ELF/symtab-link.test +++ b/llvm/test/tools/llvm-objcopy/ELF/symtab-link.test @@ -16,6 +16,7 @@ Sections: Link: .symtab Type: SHT_PROGBITS Flags: [ ] +Symbols: [] # CHECK: Name: .foo # CHECK-NEXT: Type: diff --git a/llvm/test/tools/llvm-objcopy/ELF/wildcard-syntax.test b/llvm/test/tools/llvm-objcopy/ELF/wildcard-syntax.test index 0564289672792..685ba16070484 100644 --- a/llvm/test/tools/llvm-objcopy/ELF/wildcard-syntax.test +++ b/llvm/test/tools/llvm-objcopy/ELF/wildcard-syntax.test @@ -57,6 +57,7 @@ Sections: Type: SHT_PROGBITS - Name: .bar Type: SHT_PROGBITS +Symbols: [] ## Use a separate test file with special characters for the following tests. @@ -128,6 +129,7 @@ Sections: Type: SHT_PROGBITS - Name: .foo Type: SHT_PROGBITS +Symbols: [] # WARN: warning: invalid glob pattern: ][] diff --git a/llvm/test/tools/llvm-objdump/X86/elf-disassemble-relocs.test b/llvm/test/tools/llvm-objdump/X86/elf-disassemble-relocs.test index 04390eb4e3084..8450b4e1f39fc 100644 --- a/llvm/test/tools/llvm-objdump/X86/elf-disassemble-relocs.test +++ b/llvm/test/tools/llvm-objdump/X86/elf-disassemble-relocs.test @@ -1,8 +1,8 @@ ## Show that --disassemble + --reloc prints relocations inline and does not dump ## the relocation sections. -# RUN: yaml2obj %s -o %t.o -# RUN: llvm-objdump %t.o -d -r | FileCheck %s --implicit-check-not="RELOCATION RECORDS" +# RUN: yaml2obj %s --docnum=1 -o %t1.o +# RUN: llvm-objdump %t1.o -d -r | FileCheck %s --implicit-check-not="RELOCATION RECORDS" # CHECK: 0: e8 00 00 00 00 callq 0 <.text+0x5> # CHECK-NEXT: 0000000000000001: R_X86_64_PC32 foo-4 @@ -40,3 +40,24 @@ Sections: Symbols: - Name: foo - Name: bar + +## Check we report an error if the relocated section identified by the +## sh_info field of a relocation section is invalid. + +# RUN: yaml2obj %s --docnum=2 -o %t2.o +# RUN: not llvm-objdump %t2.o --disassemble --reloc 2>&1 | FileCheck %s -DFILE=%t2.o --check-prefix=ERR + +# ERR: error: '[[FILE]]': section (1): failed to get a relocated section: invalid section index: 255 + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_X86_64 +Sections: +- Name: .rela.debug_info + Type: SHT_RELA + Link: 0 + Info: 0xFF + Relocations: [] diff --git a/llvm/test/tools/llvm-objdump/full-contents.test b/llvm/test/tools/llvm-objdump/full-contents.test index e4939f2aade33..29cc9fdb800c4 100644 --- a/llvm/test/tools/llvm-objdump/full-contents.test +++ b/llvm/test/tools/llvm-objdump/full-contents.test @@ -64,3 +64,4 @@ Sections: Type: SHT_PROGBITS Flags: [ ] Content: '11112222' +Symbols: [] diff --git a/llvm/test/tools/llvm-objdump/relocations-elf.test b/llvm/test/tools/llvm-objdump/relocations-elf.test index 87fe55c0411f3..751dfbb5807be 100644 --- a/llvm/test/tools/llvm-objdump/relocations-elf.test +++ b/llvm/test/tools/llvm-objdump/relocations-elf.test @@ -74,8 +74,8 @@ Symbols: ## Check we report an error if the relocated section identified by the ## sh_info field of a relocation section is invalid. # RUN: yaml2obj --docnum=2 %s > %t2 -# RUN: not llvm-objdump --reloc %t2 2>&1 | FileCheck %s --check-prefix=ERR -# ERR: LLVM ERROR: Invalid data was encountered while parsing the file +# RUN: not llvm-objdump --reloc %t2 2>&1 | FileCheck %s -DFILE=%t2 --check-prefix=ERR +# ERR: error: '[[FILE]]': section (1): unable to get a relocation target: invalid section index: 255 --- !ELF FileHeader: @@ -86,8 +86,7 @@ FileHeader: Sections: - Name: .rela.foo Type: SHT_RELA - Link: .symtab - Info: 0x255 + Info: 0xFF Relocations: - Offset: 0x1 Type: R_X86_64_NONE diff --git a/llvm/test/tools/llvm-readobj/check-output-order.test b/llvm/test/tools/llvm-readobj/check-output-order.test index 7097d7faa3d05..36fce3ac64263 100644 --- a/llvm/test/tools/llvm-readobj/check-output-order.test +++ b/llvm/test/tools/llvm-readobj/check-output-order.test @@ -39,6 +39,7 @@ Sections: Entries: - Tag: DT_NULL Value: 0 +Symbols: [] ProgramHeaders: - Type: PT_DYNAMIC Sections: diff --git a/llvm/test/tools/llvm-readobj/elf-dynamic-malformed.test b/llvm/test/tools/llvm-readobj/elf-dynamic-malformed.test index afdcbe6b6b5ec..1a31fa39e9f9d 100644 --- a/llvm/test/tools/llvm-readobj/elf-dynamic-malformed.test +++ b/llvm/test/tools/llvm-readobj/elf-dynamic-malformed.test @@ -34,6 +34,7 @@ Sections: Type: SHT_DYNAMIC Address: 0x1000 Content: "01234567" +Symbols: [] ProgramHeaders: - Type: PT_LOAD VAddr: 0x1000 @@ -76,6 +77,7 @@ Sections: Value: 0 - Tag: DT_NULL Value: 0 +Symbols: [] ProgramHeaders: - Type: PT_LOAD VAddr: 0x1000 @@ -143,6 +145,7 @@ Sections: Value: 1 - Tag: DT_NULL Value: 0 +Symbols: [] ProgramHeaders: - Type: PT_LOAD VAddr: 0x1000 @@ -189,6 +192,7 @@ Sections: Value: 1 - Tag: DT_NULL Value: 0x0 +Symbols: [] ProgramHeaders: - Type: PT_LOAD VAddr: 0x1000 @@ -231,6 +235,7 @@ Sections: Value: 0x1000000 - Tag: DT_NULL Value: 0x0 +Symbols: [] ProgramHeaders: - Type: PT_LOAD VAddr: 0x1000 diff --git a/llvm/test/tools/llvm-readobj/elf-file-headers.test b/llvm/test/tools/llvm-readobj/elf-file-headers.test index ce7711c8b2d91..1cb3563a07747 100644 --- a/llvm/test/tools/llvm-readobj/elf-file-headers.test +++ b/llvm/test/tools/llvm-readobj/elf-file-headers.test @@ -21,15 +21,15 @@ # I386-NEXT: Version: 1 # I386-NEXT: Entry: 0x0 # I386-NEXT: ProgramHeaderOffset: 0x0 -# I386-NEXT: SectionHeaderOffset: 0x64 +# I386-NEXT: SectionHeaderOffset: 0x48 # I386-NEXT: Flags [ (0x0) # I386-NEXT: ] # I386-NEXT: HeaderSize: 52 # I386-NEXT: ProgramHeaderEntrySize: 0 # I386-NEXT: ProgramHeaderCount: 0 # I386-NEXT: SectionHeaderEntrySize: 40 -# I386-NEXT: SectionHeaderCount: 4 -# I386-NEXT: StringTableSectionIndex: 3 +# I386-NEXT: SectionHeaderCount: 3 +# I386-NEXT: StringTableSectionIndex: 2 # I386-NEXT:} # I386-NOT:{{.}} @@ -65,15 +65,15 @@ FileHeader: # X86-64-NEXT: Version: 1 # X86-64-NEXT: Entry: 0x0 # X86-64-NEXT: ProgramHeaderOffset: 0x0 -# X86-64-NEXT: SectionHeaderOffset: 0x78 +# X86-64-NEXT: SectionHeaderOffset: 0x58 # X86-64-NEXT: Flags [ (0x0) # X86-64-NEXT: ] # X86-64-NEXT: HeaderSize: 64 # X86-64-NEXT: ProgramHeaderEntrySize: 0 # X86-64-NEXT: ProgramHeaderCount: 0 # X86-64-NEXT: SectionHeaderEntrySize: 64 -# X86-64-NEXT: SectionHeaderCount: 4 -# X86-64-NEXT: StringTableSectionIndex: 3 +# X86-64-NEXT: SectionHeaderCount: 3 +# X86-64-NEXT: StringTableSectionIndex: 2 # X86-64-NEXT:} # X86-64-NOT:{{.}} @@ -116,14 +116,14 @@ FileHeader: # LANAI-NEXT: Version: 1 # LANAI-NEXT: Entry: 0x0 # LANAI-NEXT: ProgramHeaderOffset: 0x0 -# LANAI-NEXT: SectionHeaderOffset: 0x64 +# LANAI-NEXT: SectionHeaderOffset: 0x48 # LANAI-NEXT: Flags [ (0x0) # LANAI-NEXT: ] # LANAI-NEXT: HeaderSize: 52 # LANAI-NEXT: ProgramHeaderEntrySize: 0 # LANAI-NEXT: ProgramHeaderCount: 0 # LANAI-NEXT: SectionHeaderEntrySize: 40 -# LANAI-NEXT: SectionHeaderCount: 4 -# LANAI-NEXT: StringTableSectionIndex: 3 +# LANAI-NEXT: SectionHeaderCount: 3 +# LANAI-NEXT: StringTableSectionIndex: 2 # LANAI-NEXT:} # LANAI-NOT:{{.}} diff --git a/llvm/test/tools/llvm-readobj/elf-hidden-versym.test b/llvm/test/tools/llvm-readobj/elf-hidden-versym.test index b6cf05aad48c6..0dcee4ab09d70 100644 --- a/llvm/test/tools/llvm-readobj/elf-hidden-versym.test +++ b/llvm/test/tools/llvm-readobj/elf-hidden-versym.test @@ -2,7 +2,7 @@ # RUN: llvm-readelf -V %t | FileCheck %s --check-prefix=HIDDEN # HIDDEN: Version symbols section '.gnu.version' contains 2 entries: -# HIDDEN-NEXT: Addr: 0000000000200210 Offset: 0x000040 Link: 6 (.dynsym) +# HIDDEN-NEXT: Addr: 0000000000200210 Offset: 0x000040 Link: 5 (.dynsym) # HIDDEN-NEXT: 000: 0 (*local*) 3h(hiddensym) --- !ELF diff --git a/llvm/test/tools/llvm-readobj/elf-invalid-shstrndx.test b/llvm/test/tools/llvm-readobj/elf-invalid-shstrndx.test index ba899141f2333..f57f7e83c9a2b 100644 --- a/llvm/test/tools/llvm-readobj/elf-invalid-shstrndx.test +++ b/llvm/test/tools/llvm-readobj/elf-invalid-shstrndx.test @@ -4,7 +4,7 @@ # GNU: ELF Header: # GNU: Section header string table index: 255 -# GNU-NEXT: There are 4 section headers, starting at offset 0x78: +# GNU-NEXT: There are 3 section headers, starting at offset 0x58: # GNU: Section Headers: # GNU-NEXT: [Nr] Name # GNU-EMPTY: diff --git a/llvm/test/tools/llvm-readobj/elf-invalid-versioning.test b/llvm/test/tools/llvm-readobj/elf-invalid-versioning.test index ae1b2ad6c5173..e963d6905c2df 100644 --- a/llvm/test/tools/llvm-readobj/elf-invalid-versioning.test +++ b/llvm/test/tools/llvm-readobj/elf-invalid-versioning.test @@ -2,7 +2,7 @@ # RUN: llvm-readelf -V %t | FileCheck %s --check-prefix=INVALID # INVALID: Version symbols section '.gnu.version' contains 2 entries: -# INVALID-NEXT: Addr: 0000000000200210 Offset: 0x000040 Link: 6 (.dynsym) +# INVALID-NEXT: Addr: 0000000000200210 Offset: 0x000040 Link: 5 (.dynsym) # INVALID-NEXT: 000: 0 (*local*) 3 (*invalid*) --- !ELF diff --git a/llvm/test/tools/llvm-readobj/elf-no-phdrs.test b/llvm/test/tools/llvm-readobj/elf-no-phdrs.test index 4bb0067c2d651..f01569596ac95 100644 --- a/llvm/test/tools/llvm-readobj/elf-no-phdrs.test +++ b/llvm/test/tools/llvm-readobj/elf-no-phdrs.test @@ -14,7 +14,7 @@ # GNU-EMPTY: # GNU-NEXT: Section to Segment mapping: # GNU-NEXT: Segment Sections... -# GNU-NEXT: None .symtab .strtab .shstrtab +# GNU-NEXT: None .strtab .shstrtab --- !ELF FileHeader: diff --git a/llvm/test/tools/llvm-readobj/elf-no-relocs.test b/llvm/test/tools/llvm-readobj/elf-no-relocs.test index df80b903f4cc2..c632e9751dd4e 100644 --- a/llvm/test/tools/llvm-readobj/elf-no-relocs.test +++ b/llvm/test/tools/llvm-readobj/elf-no-relocs.test @@ -41,8 +41,6 @@ Sections: - Name: .rela.text Type: SHT_RELA Info: .text - Link: .symtab - Name: .rel.text Type: SHT_REL Info: .text - Link: .symtab diff --git a/llvm/test/tools/llvm-readobj/elf-relr-relocs.test b/llvm/test/tools/llvm-readobj/elf-relr-relocs.test index 3badb736ae90e..9d3b5de99281a 100644 --- a/llvm/test/tools/llvm-readobj/elf-relr-relocs.test +++ b/llvm/test/tools/llvm-readobj/elf-relr-relocs.test @@ -91,6 +91,7 @@ Sections: Link: .symtab AddressAlign: 0x0000000000000001 Content: 600D0100000000000301000000000000000002000000000001050F00000000000900405005700A00 +Symbols: [] ... # RUN: yaml2obj -docnum 2 %s \ @@ -169,4 +170,5 @@ Sections: Link: .symtab AddressAlign: 0x00000001 Content: 600D0100030100000000020001050F0009004050 +Symbols: [] ... diff --git a/llvm/test/tools/llvm-readobj/elf-versioninfo.test b/llvm/test/tools/llvm-readobj/elf-versioninfo.test index 46b43430269c7..393889c772a57 100644 --- a/llvm/test/tools/llvm-readobj/elf-versioninfo.test +++ b/llvm/test/tools/llvm-readobj/elf-versioninfo.test @@ -164,12 +164,12 @@ DynamicSymbols: # LLVM-NEXT: ] # GNU: Version symbols section '.gnu.version' contains 6 entries: -# GNU-NEXT: Addr: 0000000000000000 Offset: 0x000040 Link: 7 (.dynsym) +# GNU-NEXT: Addr: 0000000000000000 Offset: 0x000040 Link: 6 (.dynsym) # GNU-NEXT: 000: 0 (*local*) 2 (VERSION1) 3 (VERSION2) 4 (v1) # GNU-NEXT: 004: 5 (v2) 6 (v3) # GNU-EMPTY: # GNU-NEXT: Version definition section '.gnu.version_d' contains 3 entries: -# GNU-NEXT: Addr: 0000000000000000 Offset: 0x00004c Link: 8 (.dynstr) +# GNU-NEXT: Addr: 0000000000000000 Offset: 0x00004c Link: 7 (.dynstr) # GNU-NEXT: 0x0000: Rev: 1 Flags: none Index: 2 Cnt: 1 Name: VERSION1 # GNU-NEXT: 0x001c: Rev: 1 Flags: none Index: 3 Cnt: 2 Name: VERSION2 # GNU-NEXT: 0x0038: Parent 1: VERSION1 @@ -177,7 +177,7 @@ DynamicSymbols: # GNU-NEXT: 0x0038: Parent 1: VERSION1 # GNU-EMPTY: # GNU-NEXT: Version needs section '.gnu.version_r' contains 2 entries: -# GNU-NEXT: Addr: 0000000000000000 Offset: 0x00008c Link: 8 (.dynstr) +# GNU-NEXT: Addr: 0000000000000000 Offset: 0x00008c Link: 7 (.dynstr) # GNU-NEXT: 0x0000: Version: 1 File: verneed1.so.0 Cnt: 2 # GNU-NEXT: 0x0010: Name: v1 Flags: none Version: 4 # GNU-NEXT: 0x0020: Name: v2 Flags: none Version: 5 diff --git a/llvm/test/tools/llvm-readobj/elf-wrong-shstrtab-type.test b/llvm/test/tools/llvm-readobj/elf-wrong-shstrtab-type.test index 6a600dc3b69af..cc40f49d4a585 100644 --- a/llvm/test/tools/llvm-readobj/elf-wrong-shstrtab-type.test +++ b/llvm/test/tools/llvm-readobj/elf-wrong-shstrtab-type.test @@ -15,7 +15,7 @@ # GNU: Section Headers: # GNU: [Nr] Name Type Address Off Size ES Flg Lk Inf Al # GNU: warning: '[[FILE]]': invalid sh_type for string table section [index 1]: expected SHT_STRTAB, but got SHT_PROGBITS -# GNU: [ 1] .shstrtab PROGBITS 0000000000000000 000040 00001b 00 0 0 0 +# GNU: [ 1] .shstrtab PROGBITS 0000000000000000 000040 000013 00 0 0 0 ## Test we report multiple identical warnings (one for each object) when dumping an archive. diff --git a/llvm/test/tools/llvm-readobj/reloc-types-elf-aarch64.test b/llvm/test/tools/llvm-readobj/reloc-types-elf-aarch64.test index f50668c353920..fdd3b97b6b266 100644 --- a/llvm/test/tools/llvm-readobj/reloc-types-elf-aarch64.test +++ b/llvm/test/tools/llvm-readobj/reloc-types-elf-aarch64.test @@ -140,7 +140,6 @@ Sections: Content: 00 - Name: .rela.text Type: SHT_RELA - Link: .symtab AddressAlign: 0x0000000000000008 EntSize: 0x0000000000000018 Info: .text diff --git a/llvm/test/tools/llvm-readobj/reloc-types-elf-arm.test b/llvm/test/tools/llvm-readobj/reloc-types-elf-arm.test index 6e29637d35792..ac6bda68587a7 100644 --- a/llvm/test/tools/llvm-readobj/reloc-types-elf-arm.test +++ b/llvm/test/tools/llvm-readobj/reloc-types-elf-arm.test @@ -149,7 +149,6 @@ Sections: Content: 00 - Name: .rel.text Type: SHT_REL - Link: .symtab AddressAlign: 0x0000000000000004 EntSize: 0x0000000000000008 Info: .text diff --git a/llvm/test/tools/llvm-readobj/reloc-types-elf-lanai.test b/llvm/test/tools/llvm-readobj/reloc-types-elf-lanai.test index 270e2c397d3ac..b5804c9aa5749 100644 --- a/llvm/test/tools/llvm-readobj/reloc-types-elf-lanai.test +++ b/llvm/test/tools/llvm-readobj/reloc-types-elf-lanai.test @@ -26,7 +26,6 @@ Sections: Content: 00 - Name: .rela.text Type: SHT_RELA - Link: .symtab AddressAlign: 0x0000000000000004 EntSize: 0x000000000000000C Info: .text diff --git a/llvm/test/tools/llvm-readobj/reloc-types-elf-mips.test b/llvm/test/tools/llvm-readobj/reloc-types-elf-mips.test index b948a3d5a6d0f..16dfd2f77ddde 100644 --- a/llvm/test/tools/llvm-readobj/reloc-types-elf-mips.test +++ b/llvm/test/tools/llvm-readobj/reloc-types-elf-mips.test @@ -70,7 +70,6 @@ Sections: Content: 00 - Name: .rel.text Type: SHT_REL - Link: .symtab AddressAlign: 0x0000000000000004 EntSize: 0x0000000000000008 Info: .text diff --git a/llvm/test/tools/llvm-readobj/reloc-types-elf-mips64.test b/llvm/test/tools/llvm-readobj/reloc-types-elf-mips64.test index f04064217bcb6..f1fefb26974c7 100644 --- a/llvm/test/tools/llvm-readobj/reloc-types-elf-mips64.test +++ b/llvm/test/tools/llvm-readobj/reloc-types-elf-mips64.test @@ -70,7 +70,6 @@ Sections: Content: 00 - Name: .rela.text Type: SHT_RELA - Link: .symtab AddressAlign: 0x0000000000000008 EntSize: 0x0000000000000018 Info: .text diff --git a/llvm/test/tools/llvm-readobj/stack-sizes.test b/llvm/test/tools/llvm-readobj/stack-sizes.test index 8786ec665352f..3d660fcd2563e 100644 --- a/llvm/test/tools/llvm-readobj/stack-sizes.test +++ b/llvm/test/tools/llvm-readobj/stack-sizes.test @@ -641,3 +641,26 @@ Sections: Relocations: - Offset: 0 Type: R_X86_64_64 + +## Check we report an error when dumping stack sizes if the relocated section +## identified by the sh_info field is invalid. Here sh_info value is larger than +## the number of sections. + +# RUN: yaml2obj --docnum=13 %s > %t18 +# RUN: not llvm-readelf --stack-sizes %t18 2>&1 | FileCheck %s -DFILE=%t18 --check-prefix=INVALID-TARGET +# RUN: not llvm-readobj --stack-sizes %t18 2>&1 | FileCheck %s -DFILE=%t18 --check-prefix=INVALID-TARGET + +# INVALID-TARGET: error: '[[FILE]]': .rela.stack_sizes: failed to get a relocated section: invalid section index: 255 + +--- !ELF +FileHeader: + Class: ELFCLASS32 + Data: ELFDATA2MSB + Type: ET_REL + Machine: EM_X86_64 +Sections: + - Name: .rela.stack_sizes + Type: SHT_RELA + Link: 0 + Info: 0xFF + Relocations: [] diff --git a/llvm/test/tools/obj2yaml/elf-llvm-addrsig-section.yaml b/llvm/test/tools/obj2yaml/elf-llvm-addrsig-section.yaml index 6f21c3212bd9f..9519d1f05ba9d 100644 --- a/llvm/test/tools/obj2yaml/elf-llvm-addrsig-section.yaml +++ b/llvm/test/tools/obj2yaml/elf-llvm-addrsig-section.yaml @@ -62,7 +62,6 @@ Symbols: # INVALID-ENTRY: - Name: .llvm_addrsig # INVALID-ENTRY-NEXT: Type: SHT_LLVM_ADDRSIG -# INVALID-ENTRY-NEXT: Link: .symtab # INVALID-ENTRY-NEXT: Content: FFFFFFFFFF --- !ELF @@ -83,7 +82,6 @@ Sections: # EMPTY: - Name: .llvm_addrsig # EMPTY-NEXT: Type: SHT_LLVM_ADDRSIG -# EMPTY-NEXT: Link: .symtab # EMPTY-NEXT: Symbols: [] --- !ELF diff --git a/llvm/test/tools/obj2yaml/elf-no-symtab.yaml b/llvm/test/tools/obj2yaml/elf-no-symtab.yaml new file mode 100644 index 0000000000000..cab5953fb7d62 --- /dev/null +++ b/llvm/test/tools/obj2yaml/elf-no-symtab.yaml @@ -0,0 +1,43 @@ +## Check that obj2yaml doesn't create a "Symbols" tag for the objects +## without a symbol table. + +# RUN: yaml2obj --docnum=1 %s -o %t1 +# RUN: obj2yaml %t1 | FileCheck %s --check-prefix=NOSYMTAB + +# NOSYMTAB: --- !ELF +# NOSYMTAB-NEXT: FileHeader: +# NOSYMTAB-NEXT: Class: ELFCLASS64 +# NOSYMTAB-NEXT: Data: ELFDATA2LSB +# NOSYMTAB-NEXT: Type: ET_DYN +# NOSYMTAB-NEXT: Machine: EM_X86_64 +# NOSYMTAB-NEXT: ... + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_DYN + Machine: EM_X86_64 + +## Check that obj2yaml creates a "Symbols" tag for the objects +## that have a symbol table. + +# RUN: yaml2obj --docnum=2 %s -o %t2 +# RUN: obj2yaml %t2 | FileCheck %s --check-prefix=SYMTAB + +# SYMTAB: --- !ELF +# SYMTAB-NEXT: FileHeader: +# SYMTAB-NEXT: Class: ELFCLASS64 +# SYMTAB-NEXT: Data: ELFDATA2LSB +# SYMTAB-NEXT: Type: ET_DYN +# SYMTAB-NEXT: Machine: EM_X86_64 +# SYMTAB-NEXT: Symbols: [] +# SYMTAB-NEXT: ... + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_DYN + Machine: EM_X86_64 +Symbols: [] diff --git a/llvm/test/tools/obj2yaml/elf-sht-symtab-shndx.yaml b/llvm/test/tools/obj2yaml/elf-sht-symtab-shndx.yaml index e926019655d95..735ad6ed4abe7 100644 --- a/llvm/test/tools/obj2yaml/elf-sht-symtab-shndx.yaml +++ b/llvm/test/tools/obj2yaml/elf-sht-symtab-shndx.yaml @@ -142,6 +142,7 @@ Sections: Type: SHT_SYMTAB_SHNDX Entries: [ 0 ] Link: .symtab +Symbols: [] ## Check that yaml2obj can't dump the object if SHT_SYMTAB_SHNDX is ## not associated with a SHT_SYMTAB section (this case is illegal). diff --git a/llvm/test/tools/obj2yaml/invalid-section-name.yaml b/llvm/test/tools/obj2yaml/invalid-section-name.yaml new file mode 100644 index 0000000000000..0f1251c212c65 --- /dev/null +++ b/llvm/test/tools/obj2yaml/invalid-section-name.yaml @@ -0,0 +1,31 @@ +## Check we do not crash/assert when dumping a broken section name. +## Here we replace "foo" name with a sequence of characters that +## are not representable as unsigned char. +## We used to have an assert for this case before. + +# RUN: yaml2obj %s -o %t +# RUN: obj2yaml %t | FileCheck %s + +# CHECK: --- !ELF +# CHECK-NEXT: FileHeader: +# CHECK-NEXT: Class: ELFCLASS64 +# CHECK-NEXT: Data: ELFDATA2LSB +# CHECK-NEXT: Type: ET_REL +# CHECK-NEXT: Machine: EM_X86_64 +# CHECK-NEXT: Sections: +# CHECK-NEXT: - Name: "{{.*}}" +# CHECK-NEXT: Type: SHT_PROGBITS +# CHECK-NEXT: ... + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_X86_64 +Sections: + - Name: foo + Type: SHT_PROGBITS + - Name: .shstrtab + Type: SHT_STRTAB + Content: "00FEFEFEFEFEFEFEFEFEFEFEFEFEFEFEFEFEFEFEFEFEFEFEFEFEFEFEFEFEFE00" diff --git a/llvm/test/tools/obj2yaml/no-symbol-reloc.test b/llvm/test/tools/obj2yaml/no-symbol-reloc.test index 351aad3f7f6b3..f2f1c385ce8a0 100644 --- a/llvm/test/tools/obj2yaml/no-symbol-reloc.test +++ b/llvm/test/tools/obj2yaml/no-symbol-reloc.test @@ -26,6 +26,7 @@ # CHECK-NEXT: - Offset: 0x0000000000000004 # CHECK-NEXT: Symbol: '' # CHECK-NEXT: Type: R_X86_64_NONE +# CHECK-NEXT: Symbols: [] # CHECK-NEXT: ... --- !ELF diff --git a/llvm/test/tools/obj2yaml/section-group.test b/llvm/test/tools/obj2yaml/section-group.test index cd520cb1b361f..111bffda8e582 100644 --- a/llvm/test/tools/obj2yaml/section-group.test +++ b/llvm/test/tools/obj2yaml/section-group.test @@ -66,5 +66,6 @@ Sections: Info: 0xFF Members: - SectionOrType: GRP_COMDAT +Symbols: [] # ERR: Error reading file: {{.*}}2.o: unable to get symbol from section [index 2]: invalid symbol index (255) diff --git a/llvm/test/tools/yaml2obj/dynamic-symbols.yaml b/llvm/test/tools/yaml2obj/dynamic-symbols.yaml index 9ecb1c278f6bf..7eb58fa618922 100644 --- a/llvm/test/tools/yaml2obj/dynamic-symbols.yaml +++ b/llvm/test/tools/yaml2obj/dynamic-symbols.yaml @@ -52,7 +52,7 @@ DynamicSymbols: # NUM: Name: bar # NUM: Section: -# NUM-SAME: .symtab (0x2) +# NUM-SAME: .strtab (0x2) # NUM: error: '[[FILE]]': invalid section index: 255 diff --git a/llvm/test/tools/yaml2obj/elf-comdat-broken-info.yaml b/llvm/test/tools/yaml2obj/elf-comdat-broken-info.yaml index 929213660a1e9..6f2f7acd49254 100644 --- a/llvm/test/tools/yaml2obj/elf-comdat-broken-info.yaml +++ b/llvm/test/tools/yaml2obj/elf-comdat-broken-info.yaml @@ -12,7 +12,6 @@ FileHeader: Sections: - Name: .group Type: SHT_GROUP - Link: .symtab Info: 12345 Members: - SectionOrType: GRP_COMDAT diff --git a/llvm/test/tools/yaml2obj/elf-custom-null-section.yaml b/llvm/test/tools/yaml2obj/elf-custom-null-section.yaml index 90cc6f03f996a..e8cbb4edb630f 100644 --- a/llvm/test/tools/yaml2obj/elf-custom-null-section.yaml +++ b/llvm/test/tools/yaml2obj/elf-custom-null-section.yaml @@ -8,9 +8,8 @@ # DEFAULT: Section Headers: # DEFAULT-NEXT: [Nr] Name Type Address Off Size ES Flg Lk Inf Al # DEFAULT-NEXT: [ 0] NULL 0000000000000000 000000 000000 00 0 0 0 -# DEFAULT-NEXT: [ 1] .symtab SYMTAB 0000000000000000 000040 000018 18 2 1 8 -# DEFAULT-NEXT: [ 2] .strtab STRTAB 0000000000000000 000058 000001 00 0 0 1 -# DEFAULT-NEXT: [ 3] .shstrtab STRTAB 0000000000000000 000059 00001b 00 0 0 1 +# DEFAULT-NEXT: [ 1] .strtab STRTAB 0000000000000000 000040 000001 00 0 0 1 +# DEFAULT-NEXT: [ 2] .shstrtab STRTAB 0000000000000000 000041 000013 00 0 0 1 --- !ELF FileHeader: @@ -105,9 +104,8 @@ Sections: # OTHER-SECTION-NEXT: [Nr] Name Type Address Off Size ES Flg Lk Inf Al # OTHER-SECTION-NEXT: [ 0] NULL 0000000000000000 000000 000000 00 0 0 0 # OTHER-SECTION-NEXT: [ 1] foo PROGBITS 0000000000000000 000040 000000 00 0 0 0 -# OTHER-SECTION-NEXT: [ 2] .symtab SYMTAB 0000000000000000 000040 000018 18 3 1 8 -# OTHER-SECTION-NEXT: [ 3] .strtab STRTAB 0000000000000000 000058 000001 00 0 0 1 -# OTHER-SECTION-NEXT: [ 4] .shstrtab STRTAB 0000000000000000 000059 00001f 00 0 0 1 +# OTHER-SECTION-NEXT: [ 2] .strtab STRTAB 0000000000000000 000040 000001 00 0 0 1 +# OTHER-SECTION-NEXT: [ 3] .shstrtab STRTAB 0000000000000000 000041 000017 00 0 0 1 --- !ELF FileHeader: diff --git a/llvm/test/tools/yaml2obj/elf-header-sh-fields.yaml b/llvm/test/tools/yaml2obj/elf-header-sh-fields.yaml index 6ae98b2c7fb3e..821b77418857c 100644 --- a/llvm/test/tools/yaml2obj/elf-header-sh-fields.yaml +++ b/llvm/test/tools/yaml2obj/elf-header-sh-fields.yaml @@ -6,10 +6,10 @@ # RUN: yaml2obj --docnum=1 %s -o %t1 # RUN: llvm-readelf --file-headers %t1 | FileCheck %s --check-prefix=DEFAULT -# DEFAULT: Start of section headers: 120 (bytes into file) +# DEFAULT: Start of section headers: 88 (bytes into file) # DEFAULT: Size of section headers: 64 (bytes) -# DEFAULT: Number of section headers: 4 -# DEFAULT: Section header string table index: 3 +# DEFAULT: Number of section headers: 3 +# DEFAULT: Section header string table index: 2 --- !ELF FileHeader: diff --git a/llvm/test/tools/yaml2obj/elf-sht-symtab-shndx.yaml b/llvm/test/tools/yaml2obj/elf-sht-symtab-shndx.yaml index 6e20912d76df9..612087a305c85 100644 --- a/llvm/test/tools/yaml2obj/elf-sht-symtab-shndx.yaml +++ b/llvm/test/tools/yaml2obj/elf-sht-symtab-shndx.yaml @@ -126,4 +126,3 @@ Sections: Type: SHT_SYMTAB_SHNDX Entries: [ 0 ] EntSize: 2 - Link: .symtab diff --git a/llvm/test/tools/yaml2obj/implicit-sections-types.test b/llvm/test/tools/yaml2obj/implicit-sections-types.test index 8360fa73d825c..2860fa3db5dd0 100644 --- a/llvm/test/tools/yaml2obj/implicit-sections-types.test +++ b/llvm/test/tools/yaml2obj/implicit-sections-types.test @@ -24,6 +24,8 @@ FileHeader: Data: ELFDATA2LSB Type: ET_DYN Machine: EM_X86_64 +## Needed to force the creation of the .symtab. +Symbols: [] ## Needed to force the creation of the .dynsym and .dynstr. DynamicSymbols: - Name: foo diff --git a/llvm/test/tools/yaml2obj/implicit-sections.test b/llvm/test/tools/yaml2obj/implicit-sections.test index 77e9e1c2f6308..cde17c23287d7 100644 --- a/llvm/test/tools/yaml2obj/implicit-sections.test +++ b/llvm/test/tools/yaml2obj/implicit-sections.test @@ -84,3 +84,30 @@ Sections: - Name: .text.foo Type: SHT_PROGBITS Address: 0x200 + +## Check we don't add a symbol table when no "Symbols" key is specified. + +# RUN: yaml2obj --docnum=3 %s -o %t3 +# RUN: llvm-readelf -S %t3 | FileCheck /dev/null --implicit-check-not=.symtab + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_DYN + Machine: EM_X86_64 + +## Check we add a symbol table when "Symbols" key is specified. + +# RUN: yaml2obj --docnum=4 %s -o %t4 +# RUN: llvm-readelf -S %t4 | FileCheck %s --check-prefix=SYMTAB + +# SYMTAB: .symtab + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_DYN + Machine: EM_X86_64 +Symbols: [] diff --git a/llvm/test/tools/yaml2obj/invalid-symboless-relocation.yaml b/llvm/test/tools/yaml2obj/invalid-symboless-relocation.yaml index 716778e16f835..9ec956fcf8c8f 100644 --- a/llvm/test/tools/yaml2obj/invalid-symboless-relocation.yaml +++ b/llvm/test/tools/yaml2obj/invalid-symboless-relocation.yaml @@ -16,7 +16,6 @@ Sections: Content: "00000000" - Name: .rel.text Type: SHT_REL - Link: .symtab Info: .text Relocations: - Offset: 0x1000 diff --git a/llvm/test/tools/yaml2obj/reloc-sec-info.yaml b/llvm/test/tools/yaml2obj/reloc-sec-info.yaml index 801fe02e05f2e..bbbdc859d541a 100644 --- a/llvm/test/tools/yaml2obj/reloc-sec-info.yaml +++ b/llvm/test/tools/yaml2obj/reloc-sec-info.yaml @@ -20,7 +20,6 @@ FileHeader: Sections: - Name: .rela.text Type: SHT_RELA - Link: .symtab Info: 12345 Relocations: diff --git a/llvm/test/tools/yaml2obj/symboless-relocation.yaml b/llvm/test/tools/yaml2obj/symboless-relocation.yaml index 99f7af6109599..680e03e5c146a 100644 --- a/llvm/test/tools/yaml2obj/symboless-relocation.yaml +++ b/llvm/test/tools/yaml2obj/symboless-relocation.yaml @@ -14,7 +14,6 @@ Sections: Content: "00000000" - Name: .rel.text Type: SHT_REL - Link: .symtab Info: .text Relocations: - Offset: 0x1000 diff --git a/llvm/test/tools/yaml2obj/symtab-implicit-sections-flags.yaml b/llvm/test/tools/yaml2obj/symtab-implicit-sections-flags.yaml index 55ea02ab88363..d56c0dafd1532 100644 --- a/llvm/test/tools/yaml2obj/symtab-implicit-sections-flags.yaml +++ b/llvm/test/tools/yaml2obj/symtab-implicit-sections-flags.yaml @@ -77,3 +77,4 @@ FileHeader: Data: ELFDATA2LSB Type: ET_DYN Machine: EM_X86_64 +Symbols: [] diff --git a/llvm/tools/llvm-cxxdump/llvm-cxxdump.cpp b/llvm/tools/llvm-cxxdump/llvm-cxxdump.cpp index 833312655788c..03e1bab9417e1 100644 --- a/llvm/tools/llvm-cxxdump/llvm-cxxdump.cpp +++ b/llvm/tools/llvm-cxxdump/llvm-cxxdump.cpp @@ -174,7 +174,11 @@ static void dumpCXXData(const ObjectFile *Obj) { SectionRelocMap.clear(); for (const SectionRef &Section : Obj->sections()) { - section_iterator Sec2 = Section.getRelocatedSection(); + Expected ErrOrSec = Section.getRelocatedSection(); + if (!ErrOrSec) + error(ErrOrSec.takeError()); + + section_iterator Sec2 = *ErrOrSec; if (Sec2 != Obj->section_end()) SectionRelocMap[*Sec2].push_back(Section); } diff --git a/llvm/tools/llvm-exegesis/lib/Assembler.cpp b/llvm/tools/llvm-exegesis/lib/Assembler.cpp index c2b304f5e215c..4983823df0fcc 100644 --- a/llvm/tools/llvm-exegesis/lib/Assembler.cpp +++ b/llvm/tools/llvm-exegesis/lib/Assembler.cpp @@ -10,6 +10,7 @@ #include "SnippetRepetitor.h" #include "Target.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/CodeGen/GlobalISel/CallLowering.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" #include "llvm/CodeGen/MachineInstrBuilder.h" diff --git a/llvm/tools/llvm-exegesis/lib/RegisterValue.cpp b/llvm/tools/llvm-exegesis/lib/RegisterValue.cpp index e83b73eaafc77..f881aa6d5388c 100644 --- a/llvm/tools/llvm-exegesis/lib/RegisterValue.cpp +++ b/llvm/tools/llvm-exegesis/lib/RegisterValue.cpp @@ -8,6 +8,7 @@ #include "RegisterValue.h" #include "llvm/ADT/APFloat.h" +#include "llvm/ADT/StringRef.h" namespace llvm { namespace exegesis { diff --git a/llvm/tools/llvm-objdump/llvm-objdump.cpp b/llvm/tools/llvm-objdump/llvm-objdump.cpp index c5381d5371793..34a44b3b7fa94 100644 --- a/llvm/tools/llvm-objdump/llvm-objdump.cpp +++ b/llvm/tools/llvm-objdump/llvm-objdump.cpp @@ -993,8 +993,17 @@ static size_t countSkippableZeroBytes(ArrayRef Buf) { static std::map> getRelocsMap(object::ObjectFile const &Obj) { std::map> Ret; + uint64_t I = (uint64_t)-1; for (SectionRef Sec : Obj.sections()) { - section_iterator Relocated = Sec.getRelocatedSection(); + ++I; + Expected RelocatedOrErr = Sec.getRelocatedSection(); + if (!RelocatedOrErr) + reportError(Obj.getFileName(), + "section (" + Twine(I) + + "): failed to get a relocated section: " + + toString(RelocatedOrErr.takeError())); + + section_iterator Relocated = *RelocatedOrErr; if (Relocated == Obj.section_end() || !checkSectionFilter(*Relocated).Keep) continue; std::vector &V = Ret[*Relocated]; @@ -1606,11 +1615,17 @@ void printRelocations(const ObjectFile *Obj) { // sections. Usually, there is an only one relocation section for // each relocated section. MapVector> SecToRelSec; - for (const SectionRef &Section : ToolSectionFilter(*Obj)) { + uint64_t Ndx; + for (const SectionRef &Section : ToolSectionFilter(*Obj, &Ndx)) { if (Section.relocation_begin() == Section.relocation_end()) continue; - const SectionRef TargetSec = *Section.getRelocatedSection(); - SecToRelSec[TargetSec].push_back(Section); + Expected SecOrErr = Section.getRelocatedSection(); + if (!SecOrErr) + reportError(Obj->getFileName(), + "section (" + Twine(Ndx) + + "): unable to get a relocation target: " + + toString(SecOrErr.takeError())); + SecToRelSec[**SecOrErr].push_back(Section); } for (std::pair> &P : SecToRelSec) { diff --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp index 1470442c38b61..41e9abb82b1fc 100644 --- a/llvm/tools/llvm-profdata/llvm-profdata.cpp +++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp @@ -26,6 +26,7 @@ #include "llvm/Support/InitLLVM.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Path.h" +#include "llvm/Support/Threading.h" #include "llvm/Support/ThreadPool.h" #include "llvm/Support/WithColor.h" #include "llvm/Support/raw_ostream.h" diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp index 7e140933393c7..57144882c4b44 100644 --- a/llvm/tools/llvm-readobj/ELFDumper.cpp +++ b/llvm/tools/llvm-readobj/ELFDumper.cpp @@ -4894,8 +4894,16 @@ void DumpStyle::printRelocatableStackSizes( if (SectionType != ELF::SHT_RELA && SectionType != ELF::SHT_REL) continue; - SectionRef Contents = *Sec.getRelocatedSection(); - const Elf_Shdr *ContentsSec = Obj->getSection(Contents.getRawDataRefImpl()); + Expected RelSecOrErr = Sec.getRelocatedSection(); + if (!RelSecOrErr) + reportError(createStringError(object_error::parse_failed, + "%s: failed to get a relocated section: %s", + SectionName.data(), + toString(RelSecOrErr.takeError()).c_str()), + Obj->getFileName()); + + const Elf_Shdr *ContentsSec = + Obj->getSection((*RelSecOrErr)->getRawDataRefImpl()); Expected ContentsSectionNameOrErr = EF->getSectionName(ContentsSec); if (!ContentsSectionNameOrErr) { diff --git a/llvm/tools/obj2yaml/elf2yaml.cpp b/llvm/tools/obj2yaml/elf2yaml.cpp index 2c17b9570e1ba..35c33960c3737 100644 --- a/llvm/tools/obj2yaml/elf2yaml.cpp +++ b/llvm/tools/obj2yaml/elf2yaml.cpp @@ -200,9 +200,13 @@ template Expected ELFDumper::dump() { return TableOrErr.takeError(); ShndxTable = *TableOrErr; } - if (SymTab) - if (Error E = dumpSymbols(SymTab, Y->Symbols)) + + if (SymTab) { + Y->Symbols.emplace(); + if (Error E = dumpSymbols(SymTab, *Y->Symbols)) return std::move(E); + } + if (DynSymTab) if (Error E = dumpSymbols(DynSymTab, Y->DynamicSymbols)) return std::move(E); diff --git a/llvm/unittests/CodeGen/CMakeLists.txt b/llvm/unittests/CodeGen/CMakeLists.txt index 95cbafbe6115d..fc8cd22bcc7c7 100644 --- a/llvm/unittests/CodeGen/CMakeLists.txt +++ b/llvm/unittests/CodeGen/CMakeLists.txt @@ -19,7 +19,6 @@ add_llvm_unittest(CodeGenTests MachineInstrBundleIteratorTest.cpp MachineInstrTest.cpp MachineOperandTest.cpp - MachineSizeOptsTest.cpp ScalableVectorMVTsTest.cpp TypeTraitsTest.cpp TargetOptionsTest.cpp diff --git a/llvm/unittests/CodeGen/MachineSizeOptsTest.cpp b/llvm/unittests/CodeGen/MachineSizeOptsTest.cpp deleted file mode 100644 index f8b0c23e97177..0000000000000 --- a/llvm/unittests/CodeGen/MachineSizeOptsTest.cpp +++ /dev/null @@ -1,234 +0,0 @@ -//===- MachineSizeOptsTest.cpp --------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "llvm/CodeGen/MachineSizeOpts.h" -#include "llvm/Analysis/ProfileSummaryInfo.h" -#include "llvm/Analysis/BlockFrequencyInfo.h" -#include "llvm/Analysis/BranchProbabilityInfo.h" -#include "llvm/Analysis/LoopInfo.h" -#include "llvm/CodeGen/MIRParser/MIRParser.h" -#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" -#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" -#include "llvm/CodeGen/MachineDominators.h" -#include "llvm/CodeGen/MachineLoopInfo.h" -#include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/Support/TargetRegistry.h" -#include "llvm/Support/TargetSelect.h" -#include "llvm/Target/TargetMachine.h" -#include "gtest/gtest.h" - -using namespace llvm; - -namespace { - -std::unique_ptr createTargetMachine() { - auto TT(Triple::normalize("x86_64--")); - std::string Error; - const Target *TheTarget = TargetRegistry::lookupTarget(TT, Error); - return std::unique_ptr(static_cast( - TheTarget->createTargetMachine(TT, "", "", TargetOptions(), None, None, - CodeGenOpt::Default))); -} - -class MachineSizeOptsTest : public testing::Test { - protected: - static const char* MIRString; - LLVMContext Context; - std::unique_ptr TM; - std::unique_ptr MMI; - std::unique_ptr Parser; - std::unique_ptr M; - struct BFIData { - std::unique_ptr MDT; - std::unique_ptr MLI; - std::unique_ptr MBPI; - std::unique_ptr MBFI; - BFIData(MachineFunction &MF) { - MDT.reset(new MachineDominatorTree(MF)); - MLI.reset(new MachineLoopInfo(*MDT)); - MBPI.reset(new MachineBranchProbabilityInfo()); - MBFI.reset(new MachineBlockFrequencyInfo(MF, *MBPI, *MLI)); - } - MachineBlockFrequencyInfo *get() { return MBFI.get(); } - }; - - static void SetUpTestCase() { - InitializeAllTargets(); - InitializeAllTargetMCs(); - } - - void SetUp() override { - TM = createTargetMachine(); - std::unique_ptr MBuffer = - MemoryBuffer::getMemBuffer(MIRString); - Parser = createMIRParser(std::move(MBuffer), Context); - if (!Parser) - report_fatal_error("null MIRParser"); - M = Parser->parseIRModule(); - if (!M) - report_fatal_error("parseIRModule failed"); - M->setTargetTriple(TM->getTargetTriple().getTriple()); - M->setDataLayout(TM->createDataLayout()); - MMI = std::make_unique(TM.get()); - if (Parser->parseMachineFunctions(*M, *MMI.get())) - report_fatal_error("parseMachineFunctions failed"); - } - - MachineFunction *getMachineFunction(Module *M, StringRef Name) { - auto F = M->getFunction(Name); - if (!F) - report_fatal_error("null Function"); - auto &MF = MMI->getOrCreateMachineFunction(*F); - return &MF; - } -}; - -TEST_F(MachineSizeOptsTest, Test) { - MachineFunction *F = getMachineFunction(M.get(), "f"); - ASSERT_TRUE(F != nullptr); - MachineFunction *G = getMachineFunction(M.get(), "g"); - ASSERT_TRUE(G != nullptr); - MachineFunction *H = getMachineFunction(M.get(), "h"); - ASSERT_TRUE(H != nullptr); - ProfileSummaryInfo PSI = ProfileSummaryInfo(*M.get()); - ASSERT_TRUE(PSI.hasProfileSummary()); - BFIData BFID_F(*F); - BFIData BFID_G(*G); - BFIData BFID_H(*H); - MachineBlockFrequencyInfo *MBFI_F = BFID_F.get(); - MachineBlockFrequencyInfo *MBFI_G = BFID_G.get(); - MachineBlockFrequencyInfo *MBFI_H = BFID_H.get(); - MachineBasicBlock &BB0 = F->front(); - auto iter = BB0.succ_begin(); - MachineBasicBlock *BB1 = *iter; - iter++; - MachineBasicBlock *BB2 = *iter; - iter++; - ASSERT_TRUE(iter == BB0.succ_end()); - MachineBasicBlock *BB3 = *BB1->succ_begin(); - ASSERT_TRUE(BB3 == *BB2->succ_begin()); - EXPECT_FALSE(shouldOptimizeForSize(F, &PSI, MBFI_F)); - EXPECT_TRUE(shouldOptimizeForSize(G, &PSI, MBFI_G)); - EXPECT_FALSE(shouldOptimizeForSize(H, &PSI, MBFI_H)); - EXPECT_FALSE(shouldOptimizeForSize(&BB0, &PSI, MBFI_F)); - EXPECT_FALSE(shouldOptimizeForSize(BB1, &PSI, MBFI_F)); - EXPECT_TRUE(shouldOptimizeForSize(BB2, &PSI, MBFI_F)); - EXPECT_FALSE(shouldOptimizeForSize(BB3, &PSI, MBFI_F)); -} - -const char* MachineSizeOptsTest::MIRString = R"MIR( ---- | - define i32 @g(i32 %x) !prof !14 { - ret i32 0 - } - - define i32 @h(i32 %x) !prof !15 { - ret i32 0 - } - - define i32 @f(i32 %x) !prof !16 { - bb0: - %y1 = icmp eq i32 %x, 0 - br i1 %y1, label %bb1, label %bb2, !prof !17 - - bb1: ; preds = %bb0 - %z1 = call i32 @g(i32 %x) - br label %bb3 - - bb2: ; preds = %bb0 - %z2 = call i32 @h(i32 %x) - br label %bb3 - - bb3: ; preds = %bb2, %bb1 - %y2 = phi i32 [ 0, %bb1 ], [ 1, %bb2 ] - ret i32 %y2 - } - - !llvm.module.flags = !{!0} - - !0 = !{i32 1, !"ProfileSummary", !1} - !1 = !{!2, !3, !4, !5, !6, !7, !8, !9} - !2 = !{!"ProfileFormat", !"InstrProf"} - !3 = !{!"TotalCount", i64 10000} - !4 = !{!"MaxCount", i64 10} - !5 = !{!"MaxInternalCount", i64 1} - !6 = !{!"MaxFunctionCount", i64 1000} - !7 = !{!"NumCounts", i64 3} - !8 = !{!"NumFunctions", i64 3} - !9 = !{!"DetailedSummary", !10} - !10 = !{!11, !12, !13} - !11 = !{i32 10000, i64 1000, i32 1} - !12 = !{i32 999000, i64 300, i32 3} - !13 = !{i32 999999, i64 5, i32 10} - !14 = !{!"function_entry_count", i64 1} - !15 = !{!"function_entry_count", i64 100} - !16 = !{!"function_entry_count", i64 400} - !17 = !{!"branch_weights", i32 100, i32 1} - -... ---- -name: g -body: | - bb.0: - %1:gr32 = MOV32r0 implicit-def dead $eflags - $eax = COPY %1 - RET 0, $eax - -... ---- -name: h -body: | - bb.0: - %1:gr32 = MOV32r0 implicit-def dead $eflags - $eax = COPY %1 - RET 0, $eax - -... ---- -name: f -tracksRegLiveness: true -body: | - bb.0: - successors: %bb.1(0x7ebb907a), %bb.2(0x01446f86) - liveins: $edi - - %1:gr32 = COPY $edi - TEST32rr %1, %1, implicit-def $eflags - JCC_1 %bb.2, 5, implicit $eflags - JMP_1 %bb.1 - - bb.1: - successors: %bb.3(0x80000000) - - ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp - $edi = COPY %1 - CALL64pcrel32 @g, csr_64, implicit $rsp, implicit $ssp, implicit $edi, implicit-def $rsp, implicit-def $ssp, implicit-def $eax - ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp - %5:gr32 = COPY $eax - %4:gr32 = MOV32r0 implicit-def dead $eflags - JMP_1 %bb.3 - - bb.2: - successors: %bb.3(0x80000000) - - ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp - $edi = COPY %1 - CALL64pcrel32 @h, csr_64, implicit $rsp, implicit $ssp, implicit $edi, implicit-def $rsp, implicit-def $ssp, implicit-def $eax - ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp - %3:gr32 = COPY $eax - %2:gr32 = MOV32ri 1 - - bb.3: - %0:gr32 = PHI %2, %bb.2, %4, %bb.1 - $eax = COPY %0 - RET 0, $eax - -... -)MIR"; - -} // anonymous namespace diff --git a/llvm/unittests/IR/ConstantRangeTest.cpp b/llvm/unittests/IR/ConstantRangeTest.cpp index 58a25166d83dc..742c5fa9fd901 100644 --- a/llvm/unittests/IR/ConstantRangeTest.cpp +++ b/llvm/unittests/IR/ConstantRangeTest.cpp @@ -1532,37 +1532,100 @@ TEST(ConstantRange, MakeGuaranteedNoWrapRegion) { EXPECT_EQ(ConstantRange::makeGuaranteedNoWrapRegion( Instruction::Sub, One, OBO::NoUnsignedWrap), ConstantRange(APInt::getMinValue(32) + 1, APInt::getMinValue(32))); + + ConstantRange OneLessThanBitWidth(APInt(32, 0), APInt(32, 31) + 1); + ConstantRange UpToBitWidth(APInt(32, 0), APInt(32, 32) + 1); + EXPECT_EQ(ConstantRange::makeGuaranteedNoWrapRegion( + Instruction::Shl, UpToBitWidth, OBO::NoUnsignedWrap), + ConstantRange::makeGuaranteedNoWrapRegion( + Instruction::Shl, OneLessThanBitWidth, OBO::NoUnsignedWrap)); + EXPECT_EQ(ConstantRange::makeGuaranteedNoWrapRegion( + Instruction::Shl, UpToBitWidth, OBO::NoSignedWrap), + ConstantRange::makeGuaranteedNoWrapRegion( + Instruction::Shl, OneLessThanBitWidth, OBO::NoSignedWrap)); + EXPECT_EQ(ConstantRange::makeGuaranteedNoWrapRegion( + Instruction::Shl, UpToBitWidth, OBO::NoUnsignedWrap), + ConstantRange(APInt(32, 0), APInt(32, 1) + 1)); + EXPECT_EQ(ConstantRange::makeGuaranteedNoWrapRegion( + Instruction::Shl, UpToBitWidth, OBO::NoSignedWrap), + ConstantRange(APInt(32, -1), APInt(32, 0) + 1)); + + EXPECT_EQ( + ConstantRange::makeGuaranteedNoWrapRegion( + Instruction::Shl, ConstantRange::getFull(32), OBO::NoUnsignedWrap), + ConstantRange::makeGuaranteedNoWrapRegion( + Instruction::Shl, OneLessThanBitWidth, OBO::NoUnsignedWrap)); + EXPECT_EQ( + ConstantRange::makeGuaranteedNoWrapRegion( + Instruction::Shl, ConstantRange::getFull(32), OBO::NoSignedWrap), + ConstantRange::makeGuaranteedNoWrapRegion( + Instruction::Shl, OneLessThanBitWidth, OBO::NoSignedWrap)); + + ConstantRange IllegalShAmt(APInt(32, 32), APInt(32, 0) + 1); + EXPECT_EQ(ConstantRange::makeGuaranteedNoWrapRegion( + Instruction::Shl, IllegalShAmt, OBO::NoUnsignedWrap), + ConstantRange::getFull(32)); + EXPECT_EQ(ConstantRange::makeGuaranteedNoWrapRegion( + Instruction::Shl, IllegalShAmt, OBO::NoSignedWrap), + ConstantRange::getFull(32)); + + EXPECT_EQ( + ConstantRange::makeGuaranteedNoWrapRegion( + Instruction::Shl, ConstantRange(APInt(32, -32), APInt(32, 16) + 1), + OBO::NoUnsignedWrap), + ConstantRange::makeGuaranteedNoWrapRegion( + Instruction::Shl, ConstantRange(APInt(32, 0), APInt(32, 16) + 1), + OBO::NoUnsignedWrap)); + EXPECT_EQ( + ConstantRange::makeGuaranteedNoWrapRegion( + Instruction::Shl, ConstantRange(APInt(32, -32), APInt(32, 16) + 1), + OBO::NoSignedWrap), + ConstantRange::makeGuaranteedNoWrapRegion( + Instruction::Shl, ConstantRange(APInt(32, 0), APInt(32, 16) + 1), + OBO::NoSignedWrap)); + + EXPECT_EQ(ConstantRange::makeGuaranteedNoWrapRegion( + Instruction::Shl, + ConstantRange(APInt(32, -32), APInt(32, 16) + 1), + OBO::NoUnsignedWrap), + ConstantRange(APInt(32, 0), APInt(32, 65535) + 1)); + EXPECT_EQ(ConstantRange::makeGuaranteedNoWrapRegion( + Instruction::Shl, + ConstantRange(APInt(32, -32), APInt(32, 16) + 1), + OBO::NoSignedWrap), + ConstantRange(APInt(32, -32768), APInt(32, 32767) + 1)); } template void TestNoWrapRegionExhaustive(Instruction::BinaryOps BinOp, unsigned NoWrapKind, Fn OverflowFn) { - // When using 4 bits this test needs ~3s on a debug build. - unsigned Bits = 3; - EnumerateTwoConstantRanges(Bits, - [&](const ConstantRange &CR1, const ConstantRange &CR2) { - if (CR2.isEmptySet()) - return; - - ConstantRange NoWrap = - ConstantRange::makeGuaranteedNoWrapRegion(BinOp, CR2, NoWrapKind); - ForeachNumInConstantRange(CR1, [&](const APInt &N1) { - bool NoOverflow = true; - bool Overflow = true; - ForeachNumInConstantRange(CR2, [&](const APInt &N2) { - if (OverflowFn(N1, N2)) - NoOverflow = false; - else - Overflow = false; - }); - EXPECT_EQ(NoOverflow, NoWrap.contains(N1)); + unsigned Bits = 5; + EnumerateConstantRanges(Bits, [&](const ConstantRange &CR) { + if (CR.isEmptySet()) + return; + if (Instruction::isShift(BinOp) && CR.getUnsignedMax().uge(Bits)) + return; - // The no-wrap range is exact for single-element ranges. - if (CR2.isSingleElement()) { - EXPECT_EQ(Overflow, !NoWrap.contains(N1)); - } - }); + ConstantRange NoWrap = + ConstantRange::makeGuaranteedNoWrapRegion(BinOp, CR, NoWrapKind); + ConstantRange Full = ConstantRange::getFull(Bits); + ForeachNumInConstantRange(Full, [&](const APInt &N1) { + bool NoOverflow = true; + bool Overflow = true; + ForeachNumInConstantRange(CR, [&](const APInt &N2) { + if (OverflowFn(N1, N2)) + NoOverflow = false; + else + Overflow = false; }); + EXPECT_EQ(NoOverflow, NoWrap.contains(N1)); + + // The no-wrap range is exact for single-element ranges. + if (CR.isSingleElement()) { + EXPECT_EQ(Overflow, !NoWrap.contains(N1)); + } + }); + }); } // Show that makeGuaranteedNoWrapRegion() is maximal, and for single-element @@ -1610,6 +1673,20 @@ TEST(ConstantRange, NoWrapRegionExhaustive) { (void) N1.smul_ov(N2, Overflow); return Overflow; }); + TestNoWrapRegionExhaustive(Instruction::Shl, + OverflowingBinaryOperator::NoUnsignedWrap, + [](const APInt &N1, const APInt &N2) { + bool Overflow; + (void)N1.ushl_ov(N2, Overflow); + return Overflow; + }); + TestNoWrapRegionExhaustive(Instruction::Shl, + OverflowingBinaryOperator::NoSignedWrap, + [](const APInt &N1, const APInt &N2) { + bool Overflow; + (void)N1.sshl_ov(N2, Overflow); + return Overflow; + }); } TEST(ConstantRange, GetEquivalentICmp) { @@ -1694,85 +1771,6 @@ TEST(ConstantRange, GetEquivalentICmp) { EXPECT_EQ(RHS, APInt(32, -1)); } -TEST(ConstantRange, MakeGuaranteedNoWrapRegionMulUnsignedSingleValue) { - typedef OverflowingBinaryOperator OBO; - - for (uint64_t I = std::numeric_limits::min(); - I <= std::numeric_limits::max(); I++) { - auto Range = ConstantRange::makeGuaranteedNoWrapRegion( - Instruction::Mul, ConstantRange(APInt(8, I), APInt(8, I + 1)), - OBO::NoUnsignedWrap); - - for (uint64_t V = std::numeric_limits::min(); - V <= std::numeric_limits::max(); V++) { - bool Overflow; - (void)APInt(8, I).umul_ov(APInt(8, V), Overflow); - EXPECT_EQ(!Overflow, Range.contains(APInt(8, V))); - } - } -} - -TEST(ConstantRange, MakeGuaranteedNoWrapRegionMulSignedSingleValue) { - typedef OverflowingBinaryOperator OBO; - - for (int64_t I = std::numeric_limits::min(); - I <= std::numeric_limits::max(); I++) { - auto Range = ConstantRange::makeGuaranteedNoWrapRegion( - Instruction::Mul, - ConstantRange(APInt(8, I, /*isSigned=*/true), - APInt(8, I + 1, /*isSigned=*/true)), - OBO::NoSignedWrap); - - for (int64_t V = std::numeric_limits::min(); - V <= std::numeric_limits::max(); V++) { - bool Overflow; - (void)APInt(8, I, /*isSigned=*/true) - .smul_ov(APInt(8, V, /*isSigned=*/true), Overflow); - EXPECT_EQ(!Overflow, Range.contains(APInt(8, V, /*isSigned=*/true))); - } - } -} - -TEST(ConstantRange, MakeGuaranteedNoWrapRegionMulUnsignedRange) { - typedef OverflowingBinaryOperator OBO; - - for (uint64_t Lo = std::numeric_limits::min(); - Lo <= std::numeric_limits::max(); Lo++) { - for (uint64_t Hi = Lo; Hi <= std::numeric_limits::max(); Hi++) { - EXPECT_EQ( - ConstantRange::makeGuaranteedNoWrapRegion( - Instruction::Mul, ConstantRange(APInt(8, Lo), APInt(8, Hi + 1)), - OBO::NoUnsignedWrap), - ConstantRange::makeGuaranteedNoWrapRegion( - Instruction::Mul, ConstantRange(APInt(8, Hi), APInt(8, Hi + 1)), - OBO::NoUnsignedWrap)); - } - } -} - -TEST(ConstantRange, MakeGuaranteedNoWrapRegionMulSignedRange) { - typedef OverflowingBinaryOperator OBO; - - int Lo = -12, Hi = 16; - auto Range = ConstantRange::makeGuaranteedNoWrapRegion( - Instruction::Mul, - ConstantRange(APInt(8, Lo, /*isSigned=*/true), - APInt(8, Hi + 1, /*isSigned=*/true)), - OBO::NoSignedWrap); - - for (int64_t V = std::numeric_limits::min(); - V <= std::numeric_limits::max(); V++) { - bool AnyOverflow = false; - for (int64_t I = Lo; I <= Hi; I++) { - bool Overflow; - (void)APInt(8, I, /*isSigned=*/true) - .smul_ov(APInt(8, V, /*isSigned=*/true), Overflow); - AnyOverflow |= Overflow; - } - EXPECT_EQ(!AnyOverflow, Range.contains(APInt(8, V, /*isSigned=*/true))); - } -} - #define EXPECT_MAY_OVERFLOW(op) \ EXPECT_EQ(ConstantRange::OverflowResult::MayOverflow, (op)) #define EXPECT_ALWAYS_OVERFLOWS_LOW(op) \ diff --git a/llvm/unittests/IR/DataLayoutTest.cpp b/llvm/unittests/IR/DataLayoutTest.cpp index e24e8e045dbd7..de8ac253c45cf 100644 --- a/llvm/unittests/IR/DataLayoutTest.cpp +++ b/llvm/unittests/IR/DataLayoutTest.cpp @@ -7,6 +7,8 @@ //===----------------------------------------------------------------------===// #include "llvm/IR/DataLayout.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Type.h" #include "gtest/gtest.h" using namespace llvm; @@ -44,4 +46,14 @@ TEST(DataLayoutTest, FunctionPtrAlign) { EXPECT_EQ(a, c); } +TEST(DataLayoutTest, ValueOrABITypeAlignment) { + const DataLayout DL("Fi8"); + LLVMContext Context; + Type *const FourByteAlignType = Type::getInt32Ty(Context); + EXPECT_EQ(Align(16), + DL.getValueOrABITypeAlignment(MaybeAlign(16), FourByteAlignType)); + EXPECT_EQ(Align(4), + DL.getValueOrABITypeAlignment(MaybeAlign(), FourByteAlignType)); +} + } // anonymous namespace diff --git a/llvm/unittests/IR/ModuleTest.cpp b/llvm/unittests/IR/ModuleTest.cpp index ae420bb5406d5..12eba7025eec8 100644 --- a/llvm/unittests/IR/ModuleTest.cpp +++ b/llvm/unittests/IR/ModuleTest.cpp @@ -8,6 +8,7 @@ #include "llvm/IR/Module.h" #include "llvm/IR/GlobalVariable.h" +#include "llvm/Pass.h" #include "llvm/Support/RandomNumberGenerator.h" #include "gtest/gtest.h" diff --git a/llvm/unittests/Support/SignalsTest.cpp b/llvm/unittests/Support/SignalsTest.cpp index 6dfa4bf996de7..8c595c203ae1b 100644 --- a/llvm/unittests/Support/SignalsTest.cpp +++ b/llvm/unittests/Support/SignalsTest.cpp @@ -9,6 +9,7 @@ #if !defined(_WIN32) #include #include +#include #endif // !defined(_WIN32) #include "llvm/Support/Signals.h" diff --git a/llvm/unittests/Target/ARM/CMakeLists.txt b/llvm/unittests/Target/ARM/CMakeLists.txt index 443c5253289df..1634c8cb36e93 100644 --- a/llvm/unittests/Target/ARM/CMakeLists.txt +++ b/llvm/unittests/Target/ARM/CMakeLists.txt @@ -8,6 +8,7 @@ set(LLVM_LINK_COMPONENTS ARMDesc ARMInfo CodeGen + GlobalISel MC SelectionDAG Support diff --git a/llvm/unittests/Transforms/Utils/CMakeLists.txt b/llvm/unittests/Transforms/Utils/CMakeLists.txt index bc993a84f0212..785b79865dc13 100644 --- a/llvm/unittests/Transforms/Utils/CMakeLists.txt +++ b/llvm/unittests/Transforms/Utils/CMakeLists.txt @@ -14,7 +14,6 @@ add_llvm_unittest(UtilsTests FunctionComparatorTest.cpp IntegerDivisionTest.cpp LocalTest.cpp - SizeOptsTest.cpp SSAUpdaterBulkTest.cpp UnrollLoopTest.cpp ValueMapperTest.cpp diff --git a/llvm/unittests/Transforms/Utils/SizeOptsTest.cpp b/llvm/unittests/Transforms/Utils/SizeOptsTest.cpp deleted file mode 100644 index 55ca78635759b..0000000000000 --- a/llvm/unittests/Transforms/Utils/SizeOptsTest.cpp +++ /dev/null @@ -1,129 +0,0 @@ -//===- SizeOptsTest.cpp - SizeOpts unit tests -----------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "llvm/Transforms/Utils/SizeOpts.h" -#include "llvm/Analysis/ProfileSummaryInfo.h" -#include "llvm/Analysis/BlockFrequencyInfo.h" -#include "llvm/Analysis/BranchProbabilityInfo.h" -#include "llvm/Analysis/LoopInfo.h" -#include "llvm/AsmParser/Parser.h" -#include "llvm/IR/BasicBlock.h" -#include "llvm/IR/Dominators.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/LLVMContext.h" -#include "llvm/IR/Module.h" -#include "llvm/Support/FormatVariadic.h" -#include "llvm/Support/SourceMgr.h" -#include "gtest/gtest.h" - -using namespace llvm; - -namespace { - -class SizeOptsTest : public testing::Test { -protected: - static const char* IRString; - LLVMContext C; - std::unique_ptr M; - struct BFIData { - std::unique_ptr DT; - std::unique_ptr LI; - std::unique_ptr BPI; - std::unique_ptr BFI; - BFIData(Function &F) { - DT.reset(new DominatorTree(F)); - LI.reset(new LoopInfo(*DT)); - BPI.reset(new BranchProbabilityInfo(F, *LI)); - BFI.reset(new BlockFrequencyInfo(F, *BPI, *LI)); - } - BlockFrequencyInfo *get() { return BFI.get(); } - }; - - void SetUp() override { - SMDiagnostic Err; - M = parseAssemblyString(IRString, Err, C); - } -}; - -TEST_F(SizeOptsTest, Test) { - Function *F = M->getFunction("f"); - Function *G = M->getFunction("g"); - Function *H = M->getFunction("h"); - - ProfileSummaryInfo PSI(*M.get()); - BFIData BFID_F(*F); - BFIData BFID_G(*G); - BFIData BFID_H(*H); - BlockFrequencyInfo *BFI_F = BFID_F.get(); - BlockFrequencyInfo *BFI_G = BFID_G.get(); - BlockFrequencyInfo *BFI_H = BFID_H.get(); - BasicBlock &BB0 = F->getEntryBlock(); - BasicBlock *BB1 = BB0.getTerminator()->getSuccessor(0); - BasicBlock *BB2 = BB0.getTerminator()->getSuccessor(1); - BasicBlock *BB3 = BB1->getSingleSuccessor(); - - EXPECT_TRUE(PSI.hasProfileSummary()); - EXPECT_FALSE(shouldOptimizeForSize(F, &PSI, BFI_F)); - EXPECT_TRUE(shouldOptimizeForSize(G, &PSI, BFI_G)); - EXPECT_FALSE(shouldOptimizeForSize(H, &PSI, BFI_H)); - EXPECT_FALSE(shouldOptimizeForSize(&BB0, &PSI, BFI_F)); - EXPECT_FALSE(shouldOptimizeForSize(BB1, &PSI, BFI_F)); - EXPECT_TRUE(shouldOptimizeForSize(BB2, &PSI, BFI_F)); - EXPECT_FALSE(shouldOptimizeForSize(BB3, &PSI, BFI_F)); -} - -const char* SizeOptsTest::IRString = R"IR( - define i32 @g(i32 %x) !prof !14 { - ret i32 0 - } - - define i32 @h(i32 %x) !prof !15 { - ret i32 0 - } - - define i32 @f(i32 %x) !prof !16 { - bb0: - %y1 = icmp eq i32 %x, 0 - br i1 %y1, label %bb1, label %bb2, !prof !17 - - bb1: ; preds = %bb0 - %z1 = call i32 @g(i32 %x) - br label %bb3 - - bb2: ; preds = %bb0 - %z2 = call i32 @h(i32 %x) - br label %bb3 - - bb3: ; preds = %bb2, %bb1 - %y2 = phi i32 [ 0, %bb1 ], [ 1, %bb2 ] - ret i32 %y2 - } - - !llvm.module.flags = !{!0} - - !0 = !{i32 1, !"ProfileSummary", !1} - !1 = !{!2, !3, !4, !5, !6, !7, !8, !9} - !2 = !{!"ProfileFormat", !"InstrProf"} - !3 = !{!"TotalCount", i64 10000} - !4 = !{!"MaxCount", i64 10} - !5 = !{!"MaxInternalCount", i64 1} - !6 = !{!"MaxFunctionCount", i64 1000} - !7 = !{!"NumCounts", i64 3} - !8 = !{!"NumFunctions", i64 3} - !9 = !{!"DetailedSummary", !10} - !10 = !{!11, !12, !13} - !11 = !{i32 10000, i64 1000, i32 1} - !12 = !{i32 999000, i64 300, i32 3} - !13 = !{i32 999999, i64 5, i32 10} - !14 = !{!"function_entry_count", i64 1} - !15 = !{!"function_entry_count", i64 100} - !16 = !{!"function_entry_count", i64 400} - !17 = !{!"branch_weights", i32 100, i32 1} -)IR"; - -} // end anonymous namespace diff --git a/llvm/utils/gn/secondary/lld/Common/BUILD.gn b/llvm/utils/gn/secondary/lld/Common/BUILD.gn index 6f7e50bdfa502..a01696a7cf061 100644 --- a/llvm/utils/gn/secondary/lld/Common/BUILD.gn +++ b/llvm/utils/gn/secondary/lld/Common/BUILD.gn @@ -16,6 +16,7 @@ static_library("Common") { ] sources = [ "Args.cpp", + "DWARF.cpp", "ErrorHandler.cpp", "Filesystem.cpp", "Memory.cpp", diff --git a/llvm/utils/gn/secondary/llvm/lib/CodeGen/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/CodeGen/BUILD.gn index 32a6c2ddad17a..8f7c27ebf0b64 100644 --- a/llvm/utils/gn/secondary/llvm/lib/CodeGen/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/CodeGen/BUILD.gn @@ -116,7 +116,6 @@ static_library("CodeGen") { "MachineSSAUpdater.cpp", "MachineScheduler.cpp", "MachineSink.cpp", - "MachineSizeOpts.cpp", "MachineTraceMetrics.cpp", "MachineVerifier.cpp", "MacroFusion.cpp", diff --git a/llvm/utils/gn/secondary/llvm/unittests/CodeGen/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/CodeGen/BUILD.gn index e074a300426e0..94697889e2d91 100644 --- a/llvm/utils/gn/secondary/llvm/unittests/CodeGen/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/unittests/CodeGen/BUILD.gn @@ -20,7 +20,6 @@ unittest("CodeGenTests") { "MachineInstrBundleIteratorTest.cpp", "MachineInstrTest.cpp", "MachineOperandTest.cpp", - "MachineSizeOptsTest.cpp", "ScalableVectorMVTsTest.cpp", "TargetOptionsTest.cpp", "TypeTraitsTest.cpp", diff --git a/llvm/utils/gn/secondary/llvm/unittests/Transforms/Utils/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/Transforms/Utils/BUILD.gn index 3313e12cddb1a..e4585e246c28e 100644 --- a/llvm/utils/gn/secondary/llvm/unittests/Transforms/Utils/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/unittests/Transforms/Utils/BUILD.gn @@ -17,7 +17,6 @@ unittest("UtilsTests") { "IntegerDivisionTest.cpp", "LocalTest.cpp", "SSAUpdaterBulkTest.cpp", - "SizeOptsTest.cpp", "UnrollLoopTest.cpp", "ValueMapperTest.cpp", ] diff --git a/llvm/utils/lit/lit/llvm/config.py b/llvm/utils/lit/lit/llvm/config.py index 5b13e8de9603c..b0432995df149 100644 --- a/llvm/utils/lit/lit/llvm/config.py +++ b/llvm/utils/lit/lit/llvm/config.py @@ -99,6 +99,8 @@ def __init__(self, lit_config, config): features.add('target-x86_64') elif re.match(r'^aarch64.*', target_triple): features.add('target-aarch64') + elif re.match(r'^arm.*', target_triple): + features.add('target-arm') use_gmalloc = lit_config.params.get('use_gmalloc', None) if lit.util.pythonize_bool(use_gmalloc): diff --git a/polly/lib/Analysis/ScopBuilder.cpp b/polly/lib/Analysis/ScopBuilder.cpp index db7bea1917607..61efbef96aef6 100644 --- a/polly/lib/Analysis/ScopBuilder.cpp +++ b/polly/lib/Analysis/ScopBuilder.cpp @@ -2890,7 +2890,7 @@ isl::set ScopBuilder::getNonHoistableCtx(MemoryAccess *Access, auto &DL = scop->getFunction().getParent()->getDataLayout(); if (isSafeToLoadUnconditionally(LI->getPointerOperand(), LI->getType(), - LI->getAlignment(), DL)) { + MaybeAlign(LI->getAlignment()), DL)) { SafeToLoad = isl::set::universe(AccessRelation.get_space().range()); } else if (BB != LI->getParent()) { // Skip accesses in non-affine subregions as they might not be executed @@ -2940,9 +2940,9 @@ bool ScopBuilder::canAlwaysBeHoisted(MemoryAccess *MA, // TODO: We can provide more information for better but more expensive // results. - if (!isDereferenceableAndAlignedPointer(LInst->getPointerOperand(), - LInst->getType(), - LInst->getAlignment(), DL)) + if (!isDereferenceableAndAlignedPointer( + LInst->getPointerOperand(), LInst->getType(), + MaybeAlign(LInst->getAlignment()), DL)) return false; // If the location might be overwritten we do not hoist it unconditionally. diff --git a/polly/lib/Analysis/ScopDetection.cpp b/polly/lib/Analysis/ScopDetection.cpp index da4b9bb179776..f57174f675df5 100644 --- a/polly/lib/Analysis/ScopDetection.cpp +++ b/polly/lib/Analysis/ScopDetection.cpp @@ -468,8 +468,8 @@ bool ScopDetection::onlyValidRequiredInvariantLoads( for (auto NonAffineRegion : Context.NonAffineSubRegionSet) { if (isSafeToLoadUnconditionally(Load->getPointerOperand(), - Load->getType(), Load->getAlignment(), - DL)) + Load->getType(), + MaybeAlign(Load->getAlignment()), DL)) continue; if (NonAffineRegion->contains(Load) &&