diff --git a/clang-tools-extra/clang-tidy/utils/CMakeLists.txt b/clang-tools-extra/clang-tidy/utils/CMakeLists.txt index 5b2cc93296420..b00f2ea2e6dc1 100644 --- a/clang-tools-extra/clang-tidy/utils/CMakeLists.txt +++ b/clang-tools-extra/clang-tidy/utils/CMakeLists.txt @@ -23,5 +23,5 @@ add_clang_library(clangTidyUtils clangBasic clangLex clangTidy - clangToolingRefactoring + clangTransformer ) diff --git a/clang-tools-extra/clang-tidy/utils/TransformerClangTidyCheck.h b/clang-tools-extra/clang-tidy/utils/TransformerClangTidyCheck.h index 9a40bb7397b48..bcbc41507db22 100644 --- a/clang-tools-extra/clang-tidy/utils/TransformerClangTidyCheck.h +++ b/clang-tools-extra/clang-tidy/utils/TransformerClangTidyCheck.h @@ -13,7 +13,7 @@ #include "../utils/IncludeInserter.h" #include "clang/ASTMatchers/ASTMatchFinder.h" #include "clang/Frontend/CompilerInstance.h" -#include "clang/Tooling/Refactoring/Transformer.h" +#include "clang/Tooling/Transformer/Transformer.h" #include #include diff --git a/clang-tools-extra/clangd/xpc/cmake/modules/CreateClangdXPCFramework.cmake b/clang-tools-extra/clangd/xpc/cmake/modules/CreateClangdXPCFramework.cmake index fad58660df0a6..46738a204ace1 100644 --- a/clang-tools-extra/clangd/xpc/cmake/modules/CreateClangdXPCFramework.cmake +++ b/clang-tools-extra/clangd/xpc/cmake/modules/CreateClangdXPCFramework.cmake @@ -28,7 +28,7 @@ macro(create_clangd_xpc_framework target name) # Copy the framework binary. COMMAND ${CMAKE_COMMAND} -E copy - "${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/lib${target}.dylib" + "$" "${CLANGD_FRAMEWORK_OUT_LOCATION}/${name}" # Copy the XPC Service PLIST. @@ -38,7 +38,7 @@ macro(create_clangd_xpc_framework target name) # Copy the Clangd binary. COMMAND ${CMAKE_COMMAND} -E copy - "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/clangd" + "$" "${CLANGD_XPC_SERVICE_OUT_LOCATION}/MacOS/clangd" COMMAND ${CMAKE_COMMAND} -E create_symlink "A" diff --git a/clang-tools-extra/unittests/clang-tidy/CMakeLists.txt b/clang-tools-extra/unittests/clang-tidy/CMakeLists.txt index 287b431c2cfb0..6c58b98786af4 100644 --- a/clang-tools-extra/unittests/clang-tidy/CMakeLists.txt +++ b/clang-tools-extra/unittests/clang-tidy/CMakeLists.txt @@ -31,7 +31,7 @@ clang_target_link_libraries(ClangTidyTests clangSerialization clangTooling clangToolingCore - clangToolingRefactoring + clangTransformer ) target_link_libraries(ClangTidyTests PRIVATE diff --git a/clang-tools-extra/unittests/clang-tidy/TransformerClangTidyCheckTest.cpp b/clang-tools-extra/unittests/clang-tidy/TransformerClangTidyCheckTest.cpp index a1bdbab22f106..c8e65e9bf9881 100644 --- a/clang-tools-extra/unittests/clang-tidy/TransformerClangTidyCheckTest.cpp +++ b/clang-tools-extra/unittests/clang-tidy/TransformerClangTidyCheckTest.cpp @@ -9,9 +9,9 @@ #include "../clang-tidy/utils/TransformerClangTidyCheck.h" #include "ClangTidyTest.h" #include "clang/ASTMatchers/ASTMatchers.h" -#include "clang/Tooling/Refactoring/RangeSelector.h" -#include "clang/Tooling/Refactoring/Stencil.h" -#include "clang/Tooling/Refactoring/Transformer.h" +#include "clang/Tooling/Transformer/RangeSelector.h" +#include "clang/Tooling/Transformer/Stencil.h" +#include "clang/Tooling/Transformer/Transformer.h" #include "gtest/gtest.h" namespace clang { diff --git a/clang/docs/OpenMPSupport.rst b/clang/docs/OpenMPSupport.rst index 1563ca8309d07..afde3f91299b0 100644 --- a/clang/docs/OpenMPSupport.rst +++ b/clang/docs/OpenMPSupport.rst @@ -179,7 +179,7 @@ implementation. +------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ | task extension | combined taskloop constructs | :none:`unclaimed` | | +------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| task extension | master taskloop | :none:`unclaimed` | | +| task extension | master taskloop | :good:`done` | | +------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ | task extension | parallel master taskloop | :none:`unclaimed` | | +------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index d6c734a53a1d0..7f823e38262de 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -60,6 +60,16 @@ Improvements to Clang's diagnostics Non-comprehensive list of changes in this release ------------------------------------------------- +* In both C and C++ (C17 ``6.5.6p8``, C++ ``[expr.add]``), pointer arithmetic is + only permitted within arrays. In particular, the behavior of a program is not + defined if it adds a non-zero offset (or in C, any offset) to a null pointer, + or if it forms a null pointer by subtracting an integer from a non-null + pointer, and the LLVM optimizer now uses those guarantees for transformations. + This may lead to unintended behavior in code that performs these operations. + The Undefined Behavior Sanitizer ``-fsanitize=pointer-overflow`` check has + been extended to detect these cases, so that code relying on them can be + detected and fixed. + - For X86 target, -march=skylake-avx512, -march=icelake-client, -march=icelake-server, -march=cascadelake, -march=cooperlake will default to not using 512-bit zmm registers in vectorized code unless 512-bit intrinsics @@ -70,7 +80,10 @@ Non-comprehensive list of changes in this release New Compiler Flags ------------------ -- ... +- The -fgnuc-version= flag now controls the value of ``__GNUC__`` and related + macros. This flag does not enable or disable any GCC extensions implemented in + Clang. Setting the version to zero causes Clang to leave ``__GNUC__`` and + other GNU-namespaced macros, such as ``__GXX_WEAK__``, undefined. Deprecated Compiler Flags ------------------------- @@ -238,7 +251,40 @@ Static Analyzer Undefined Behavior Sanitizer (UBSan) ------------------------------------ -- ... +- * The ``pointer-overflow`` check was extended added to catch the cases where + a non-zero offset is applied to a null pointer, or the result of + applying the offset is a null pointer. + + .. code-block:: c++ + + #include // for intptr_t + + static char *getelementpointer_inbounds(char *base, unsigned long offset) { + // Potentially UB. + return base + offset; + } + + char *getelementpointer_unsafe(char *base, unsigned long offset) { + // Always apply offset. UB if base is ``nullptr`` and ``offset`` is not + // zero, or if ``base`` is non-``nullptr`` and ``offset`` is + // ``-reinterpret_cast(base)``. + return getelementpointer_inbounds(base, offset); + } + + char *getelementpointer_safe(char *base, unsigned long offset) { + // Cast pointer to integer, perform usual arithmetic addition, + // and cast to pointer. This is legal. + char *computed = + reinterpret_cast(reinterpret_cast(base) + offset); + // If either the pointer becomes non-``nullptr``, or becomes + // ``nullptr``, we must use ``computed`` result. + if (((base == nullptr) && (computed != nullptr)) || + ((base != nullptr) && (computed == nullptr))) + return computed; + // Else we can use ``getelementpointer_inbounds()``. + return getelementpointer_inbounds(base, offset); + } + Core Analysis Improvements ========================== diff --git a/clang/docs/UndefinedBehaviorSanitizer.rst b/clang/docs/UndefinedBehaviorSanitizer.rst index 06d533f4931e2..0f6a42a2113f5 100644 --- a/clang/docs/UndefinedBehaviorSanitizer.rst +++ b/clang/docs/UndefinedBehaviorSanitizer.rst @@ -130,7 +130,8 @@ Available checks are: ``__builtin_object_size``, and consequently may be able to detect more problems at higher optimization levels. - ``-fsanitize=pointer-overflow``: Performing pointer arithmetic which - overflows. + overflows, or where either the old or new pointer value is a null pointer + (or in C, when they both are). - ``-fsanitize=return``: In C++, reaching the end of a value-returning function without returning a value. - ``-fsanitize=returns-nonnull-attribute``: Returning null pointer diff --git a/clang/docs/UsersManual.rst b/clang/docs/UsersManual.rst index 419714d38cdc4..de28d77671517 100644 --- a/clang/docs/UsersManual.rst +++ b/clang/docs/UsersManual.rst @@ -701,6 +701,13 @@ Other Options ------------- Clang options that don't fit neatly into other categories. +.. option:: -fgnuc-version= + + This flag controls the value of ``__GNUC__`` and related macros. This flag + does not enable or disable any GCC extensions implemented in Clang. Setting + the version to zero causes Clang to leave ``__GNUC__`` and other + GNU-namespaced macros, such as ``__GXX_WEAK__``, undefined. + .. option:: -MV When emitting a dependency file, use formatting conventions appropriate diff --git a/clang/include/clang-c/Index.h b/clang/include/clang-c/Index.h index 52c34dc9603c0..5cbcc3b7f586a 100644 --- a/clang/include/clang-c/Index.h +++ b/clang/include/clang-c/Index.h @@ -2555,7 +2555,12 @@ enum CXCursorKind { */ CXCursor_BuiltinBitCastExpr = 280, - CXCursor_LastStmt = CXCursor_BuiltinBitCastExpr, + /** OpenMP master taskloop directive. + */ + CXCursor_OMPMasterTaskLoopDirective = 281, + + + CXCursor_LastStmt = CXCursor_OMPMasterTaskLoopDirective, /** * Cursor that represents the translation unit itself. diff --git a/clang/include/clang/AST/CXXRecordDeclDefinitionBits.def b/clang/include/clang/AST/CXXRecordDeclDefinitionBits.def new file mode 100644 index 0000000000000..bd4d8247aeca5 --- /dev/null +++ b/clang/include/clang/AST/CXXRecordDeclDefinitionBits.def @@ -0,0 +1,236 @@ +//===-- CXXRecordDeclDefinitionBits.def - Class definition bits -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file enumerates the various bitfields that we want to store on C++ class +// definitions. +// +//===----------------------------------------------------------------------===// +// +/// @file CXXRecordDeclDefinitionBits.def +/// +/// In this file, each of the bitfields representing data about a C++ class +/// results in an expansion of the FIELD macro, which should be defined before +/// including this file. +/// +/// The macro have three operands: +/// +/// Name: The name of the field, as a member of CXXRecordDecl::DefinitionData. +/// +/// BitWidth: The width of the field in bits. +/// +/// MergePolicy: How to behave when the value of the field is different in +/// multiple translation units, one of: +/// NO_MERGE: It is an ODR violation if the fields do not match. +/// MERGE_OR: Merge the fields by ORing them together. + +#ifndef FIELD +#error define FIELD before including this file +#endif + +/// True if this class has any user-declared constructors. +FIELD(UserDeclaredConstructor, 1, NO_MERGE) + +/// The user-declared special members which this class has. +FIELD(UserDeclaredSpecialMembers, 6, NO_MERGE) + +/// True when this class is an aggregate. +FIELD(Aggregate, 1, NO_MERGE) + +/// True when this class is a POD-type. +FIELD(PlainOldData, 1, NO_MERGE) + +/// True when this class is empty for traits purposes, that is: +/// * has no data members other than 0-width bit-fields and empty fields +/// marked [[no_unique_address]] +/// * has no virtual function/base, and +/// * doesn't inherit from a non-empty class. +/// Doesn't take union-ness into account. +FIELD(Empty, 1, NO_MERGE) + +/// True when this class is polymorphic, i.e., has at +/// least one virtual member or derives from a polymorphic class. +FIELD(Polymorphic, 1, NO_MERGE) + +/// True when this class is abstract, i.e., has at least +/// one pure virtual function, (that can come from a base class). +FIELD(Abstract, 1, NO_MERGE) + +/// True when this class is standard-layout, per the applicable +/// language rules (including DRs). +FIELD(IsStandardLayout, 1, NO_MERGE) + +/// True when this class was standard-layout under the C++11 +/// definition. +/// +/// C++11 [class]p7. A standard-layout class is a class that: +/// * has no non-static data members of type non-standard-layout class (or +/// array of such types) or reference, +/// * has no virtual functions (10.3) and no virtual base classes (10.1), +/// * has the same access control (Clause 11) for all non-static data +/// members +/// * has no non-standard-layout base classes, +/// * either has no non-static data members in the most derived class and at +/// most one base class with non-static data members, or has no base +/// classes with non-static data members, and +/// * has no base classes of the same type as the first non-static data +/// member. +FIELD(IsCXX11StandardLayout, 1, NO_MERGE) + +/// True when any base class has any declared non-static data +/// members or bit-fields. +/// This is a helper bit of state used to implement IsStandardLayout more +/// efficiently. +FIELD(HasBasesWithFields, 1, NO_MERGE) + +/// True when any base class has any declared non-static data +/// members. +/// This is a helper bit of state used to implement IsCXX11StandardLayout +/// more efficiently. +FIELD(HasBasesWithNonStaticDataMembers, 1, NO_MERGE) + +/// True when there are private non-static data members. +FIELD(HasPrivateFields, 1, NO_MERGE) + +/// True when there are protected non-static data members. +FIELD(HasProtectedFields, 1, NO_MERGE) + +/// True when there are private non-static data members. +FIELD(HasPublicFields, 1, NO_MERGE) + +/// True if this class (or any subobject) has mutable fields. +FIELD(HasMutableFields, 1, NO_MERGE) + +/// True if this class (or any nested anonymous struct or union) +/// has variant members. +FIELD(HasVariantMembers, 1, NO_MERGE) + +/// True if there no non-field members declared by the user. +FIELD(HasOnlyCMembers, 1, NO_MERGE) + +/// True if any field has an in-class initializer, including those +/// within anonymous unions or structs. +FIELD(HasInClassInitializer, 1, NO_MERGE) + +/// True if any field is of reference type, and does not have an +/// in-class initializer. +/// +/// In this case, value-initialization of this class is illegal in C++98 +/// even if the class has a trivial default constructor. +FIELD(HasUninitializedReferenceMember, 1, NO_MERGE) + +/// True if any non-mutable field whose type doesn't have a user- +/// provided default ctor also doesn't have an in-class initializer. +FIELD(HasUninitializedFields, 1, NO_MERGE) + +/// True if there are any member using-declarations that inherit +/// constructors from a base class. +FIELD(HasInheritedConstructor, 1, NO_MERGE) + +/// True if there are any member using-declarations named +/// 'operator='. +FIELD(HasInheritedAssignment, 1, NO_MERGE) + +/// These flags are \c true if a defaulted corresponding special +/// member can't be fully analyzed without performing overload resolution. +/// @{ +FIELD(NeedOverloadResolutionForCopyConstructor, 1, NO_MERGE) +FIELD(NeedOverloadResolutionForMoveConstructor, 1, NO_MERGE) +FIELD(NeedOverloadResolutionForMoveAssignment, 1, NO_MERGE) +FIELD(NeedOverloadResolutionForDestructor, 1, NO_MERGE) +/// @} + +/// These flags are \c true if an implicit defaulted corresponding +/// special member would be defined as deleted. +/// @{ +FIELD(DefaultedCopyConstructorIsDeleted, 1, NO_MERGE) +FIELD(DefaultedMoveConstructorIsDeleted, 1, NO_MERGE) +FIELD(DefaultedMoveAssignmentIsDeleted, 1, NO_MERGE) +FIELD(DefaultedDestructorIsDeleted, 1, NO_MERGE) +/// @} + +/// The trivial special members which this class has, per +/// C++11 [class.ctor]p5, C++11 [class.copy]p12, C++11 [class.copy]p25, +/// C++11 [class.dtor]p5, or would have if the member were not suppressed. +/// +/// This excludes any user-declared but not user-provided special members +/// which have been declared but not yet defined. +FIELD(HasTrivialSpecialMembers, 6, MERGE_OR) + +/// These bits keep track of the triviality of special functions for the +/// purpose of calls. Only the bits corresponding to SMF_CopyConstructor, +/// SMF_MoveConstructor, and SMF_Destructor are meaningful here. +FIELD(HasTrivialSpecialMembersForCall, 6, MERGE_OR) + +/// The declared special members of this class which are known to be +/// non-trivial. +/// +/// This excludes any user-declared but not user-provided special members +/// which have been declared but not yet defined, and any implicit special +/// members which have not yet been declared. +FIELD(DeclaredNonTrivialSpecialMembers, 6, MERGE_OR) + +/// These bits keep track of the declared special members that are +/// non-trivial for the purpose of calls. +/// Only the bits corresponding to SMF_CopyConstructor, +/// SMF_MoveConstructor, and SMF_Destructor are meaningful here. +FIELD(DeclaredNonTrivialSpecialMembersForCall, 6, MERGE_OR) + +/// True when this class has a destructor with no semantic effect. +FIELD(HasIrrelevantDestructor, 1, NO_MERGE) + +/// True when this class has at least one user-declared constexpr +/// constructor which is neither the copy nor move constructor. +FIELD(HasConstexprNonCopyMoveConstructor, 1, MERGE_OR) + +/// True if this class has a (possibly implicit) defaulted default +/// constructor. +FIELD(HasDefaultedDefaultConstructor, 1, MERGE_OR) + +/// True if a defaulted default constructor for this class would +/// be constexpr. +FIELD(DefaultedDefaultConstructorIsConstexpr, 1, NO_MERGE) + +/// True if this class has a constexpr default constructor. +/// +/// This is true for either a user-declared constexpr default constructor +/// or an implicitly declared constexpr default constructor. +FIELD(HasConstexprDefaultConstructor, 1, MERGE_OR) + +/// True if a defaulted destructor for this class would be constexpr. +FIELD(DefaultedDestructorIsConstexpr, 1, NO_MERGE) + +/// True when this class contains at least one non-static data +/// member or base class of non-literal or volatile type. +FIELD(HasNonLiteralTypeFieldsOrBases, 1, NO_MERGE) + +/// Whether we have a C++11 user-provided default constructor (not +/// explicitly deleted or defaulted). +FIELD(UserProvidedDefaultConstructor, 1, NO_MERGE) + +/// The special members which have been declared for this class, +/// either by the user or implicitly. +FIELD(DeclaredSpecialMembers, 6, MERGE_OR) + +/// Whether an implicit copy constructor could have a const-qualified +/// parameter, for initializing virtual bases and for other subobjects. +FIELD(ImplicitCopyConstructorCanHaveConstParamForVBase, 1, NO_MERGE) +FIELD(ImplicitCopyConstructorCanHaveConstParamForNonVBase, 1, NO_MERGE) + +/// Whether an implicit copy assignment operator would have a +/// const-qualified parameter. +FIELD(ImplicitCopyAssignmentHasConstParam, 1, NO_MERGE) + +/// Whether any declared copy constructor has a const-qualified +/// parameter. +FIELD(HasDeclaredCopyConstructorWithConstParam, 1, MERGE_OR) + +/// Whether any declared copy assignment operator has either a +/// const-qualified reference parameter or a non-reference parameter. +FIELD(HasDeclaredCopyAssignmentWithConstParam, 1, MERGE_OR) + +#undef FIELD diff --git a/clang/include/clang/AST/Decl.h b/clang/include/clang/AST/Decl.h index cbac50daf14c7..ce674e09c44d4 100644 --- a/clang/include/clang/AST/Decl.h +++ b/clang/include/clang/AST/Decl.h @@ -4114,13 +4114,9 @@ class BlockDecl : public Decl, public DeclContext { void setCaptures(ASTContext &Context, ArrayRef Captures, bool CapturesCXXThis); - unsigned getBlockManglingNumber() const { - return ManglingNumber; - } + unsigned getBlockManglingNumber() const { return ManglingNumber; } - Decl *getBlockManglingContextDecl() const { - return ManglingContextDecl; - } + Decl *getBlockManglingContextDecl() const { return ManglingContextDecl; } void setBlockMangling(unsigned Number, Decl *Ctx) { ManglingNumber = Number; diff --git a/clang/include/clang/AST/DeclCXX.h b/clang/include/clang/AST/DeclCXX.h index de0aa5f9b900f..0c6c2846c9b8b 100644 --- a/clang/include/clang/AST/DeclCXX.h +++ b/clang/include/clang/AST/DeclCXX.h @@ -277,210 +277,9 @@ class CXXRecordDecl : public RecordDecl { }; struct DefinitionData { - /// True if this class has any user-declared constructors. - unsigned UserDeclaredConstructor : 1; - - /// The user-declared special members which this class has. - unsigned UserDeclaredSpecialMembers : 6; - - /// True when this class is an aggregate. - unsigned Aggregate : 1; - - /// True when this class is a POD-type. - unsigned PlainOldData : 1; - - /// True when this class is empty for traits purposes, that is: - /// * has no data members other than 0-width bit-fields and empty fields - /// marked [[no_unique_address]] - /// * has no virtual function/base, and - /// * doesn't inherit from a non-empty class. - /// Doesn't take union-ness into account. - unsigned Empty : 1; - - /// True when this class is polymorphic, i.e., has at - /// least one virtual member or derives from a polymorphic class. - unsigned Polymorphic : 1; - - /// True when this class is abstract, i.e., has at least - /// one pure virtual function, (that can come from a base class). - unsigned Abstract : 1; - - /// True when this class is standard-layout, per the applicable - /// language rules (including DRs). - unsigned IsStandardLayout : 1; - - /// True when this class was standard-layout under the C++11 - /// definition. - /// - /// C++11 [class]p7. A standard-layout class is a class that: - /// * has no non-static data members of type non-standard-layout class (or - /// array of such types) or reference, - /// * has no virtual functions (10.3) and no virtual base classes (10.1), - /// * has the same access control (Clause 11) for all non-static data - /// members - /// * has no non-standard-layout base classes, - /// * either has no non-static data members in the most derived class and at - /// most one base class with non-static data members, or has no base - /// classes with non-static data members, and - /// * has no base classes of the same type as the first non-static data - /// member. - unsigned IsCXX11StandardLayout : 1; - - /// True when any base class has any declared non-static data - /// members or bit-fields. - /// This is a helper bit of state used to implement IsStandardLayout more - /// efficiently. - unsigned HasBasesWithFields : 1; - - /// True when any base class has any declared non-static data - /// members. - /// This is a helper bit of state used to implement IsCXX11StandardLayout - /// more efficiently. - unsigned HasBasesWithNonStaticDataMembers : 1; - - /// True when there are private non-static data members. - unsigned HasPrivateFields : 1; - - /// True when there are protected non-static data members. - unsigned HasProtectedFields : 1; - - /// True when there are private non-static data members. - unsigned HasPublicFields : 1; - - /// True if this class (or any subobject) has mutable fields. - unsigned HasMutableFields : 1; - - /// True if this class (or any nested anonymous struct or union) - /// has variant members. - unsigned HasVariantMembers : 1; - - /// True if there no non-field members declared by the user. - unsigned HasOnlyCMembers : 1; - - /// True if any field has an in-class initializer, including those - /// within anonymous unions or structs. - unsigned HasInClassInitializer : 1; - - /// True if any field is of reference type, and does not have an - /// in-class initializer. - /// - /// In this case, value-initialization of this class is illegal in C++98 - /// even if the class has a trivial default constructor. - unsigned HasUninitializedReferenceMember : 1; - - /// True if any non-mutable field whose type doesn't have a user- - /// provided default ctor also doesn't have an in-class initializer. - unsigned HasUninitializedFields : 1; - - /// True if there are any member using-declarations that inherit - /// constructors from a base class. - unsigned HasInheritedConstructor : 1; - - /// True if there are any member using-declarations named - /// 'operator='. - unsigned HasInheritedAssignment : 1; - - /// These flags are \c true if a defaulted corresponding special - /// member can't be fully analyzed without performing overload resolution. - /// @{ - unsigned NeedOverloadResolutionForCopyConstructor : 1; - unsigned NeedOverloadResolutionForMoveConstructor : 1; - unsigned NeedOverloadResolutionForMoveAssignment : 1; - unsigned NeedOverloadResolutionForDestructor : 1; - /// @} - - /// These flags are \c true if an implicit defaulted corresponding - /// special member would be defined as deleted. - /// @{ - unsigned DefaultedCopyConstructorIsDeleted : 1; - unsigned DefaultedMoveConstructorIsDeleted : 1; - unsigned DefaultedMoveAssignmentIsDeleted : 1; - unsigned DefaultedDestructorIsDeleted : 1; - /// @} - - /// The trivial special members which this class has, per - /// C++11 [class.ctor]p5, C++11 [class.copy]p12, C++11 [class.copy]p25, - /// C++11 [class.dtor]p5, or would have if the member were not suppressed. - /// - /// This excludes any user-declared but not user-provided special members - /// which have been declared but not yet defined. - unsigned HasTrivialSpecialMembers : 6; - - /// These bits keep track of the triviality of special functions for the - /// purpose of calls. Only the bits corresponding to SMF_CopyConstructor, - /// SMF_MoveConstructor, and SMF_Destructor are meaningful here. - unsigned HasTrivialSpecialMembersForCall : 6; - - /// The declared special members of this class which are known to be - /// non-trivial. - /// - /// This excludes any user-declared but not user-provided special members - /// which have been declared but not yet defined, and any implicit special - /// members which have not yet been declared. - unsigned DeclaredNonTrivialSpecialMembers : 6; - - /// These bits keep track of the declared special members that are - /// non-trivial for the purpose of calls. - /// Only the bits corresponding to SMF_CopyConstructor, - /// SMF_MoveConstructor, and SMF_Destructor are meaningful here. - unsigned DeclaredNonTrivialSpecialMembersForCall : 6; - - /// True when this class has a destructor with no semantic effect. - unsigned HasIrrelevantDestructor : 1; - - /// True when this class has at least one user-declared constexpr - /// constructor which is neither the copy nor move constructor. - unsigned HasConstexprNonCopyMoveConstructor : 1; - - /// True if this class has a (possibly implicit) defaulted default - /// constructor. - unsigned HasDefaultedDefaultConstructor : 1; - - /// True if a defaulted default constructor for this class would - /// be constexpr. - unsigned DefaultedDefaultConstructorIsConstexpr : 1; - - /// True if this class has a constexpr default constructor. - /// - /// This is true for either a user-declared constexpr default constructor - /// or an implicitly declared constexpr default constructor. - unsigned HasConstexprDefaultConstructor : 1; - - /// True if a defaulted destructor for this class would be constexpr. - unsigned DefaultedDestructorIsConstexpr : 1; - - /// True when this class contains at least one non-static data - /// member or base class of non-literal or volatile type. - unsigned HasNonLiteralTypeFieldsOrBases : 1; - - /// True when visible conversion functions are already computed - /// and are available. - unsigned ComputedVisibleConversions : 1; - - /// Whether we have a C++11 user-provided default constructor (not - /// explicitly deleted or defaulted). - unsigned UserProvidedDefaultConstructor : 1; - - /// The special members which have been declared for this class, - /// either by the user or implicitly. - unsigned DeclaredSpecialMembers : 6; - - /// Whether an implicit copy constructor could have a const-qualified - /// parameter, for initializing virtual bases and for other subobjects. - unsigned ImplicitCopyConstructorCanHaveConstParamForVBase : 1; - unsigned ImplicitCopyConstructorCanHaveConstParamForNonVBase : 1; - - /// Whether an implicit copy assignment operator would have a - /// const-qualified parameter. - unsigned ImplicitCopyAssignmentHasConstParam : 1; - - /// Whether any declared copy constructor has a const-qualified - /// parameter. - unsigned HasDeclaredCopyConstructorWithConstParam : 1; - - /// Whether any declared copy assignment operator has either a - /// const-qualified reference parameter or a non-reference parameter. - unsigned HasDeclaredCopyAssignmentWithConstParam : 1; + #define FIELD(Name, Width, Merge) \ + unsigned Name : Width; + #include "CXXRecordDeclDefinitionBits.def" /// Whether this class describes a C++ lambda. unsigned IsLambda : 1; @@ -488,6 +287,10 @@ class CXXRecordDecl : public RecordDecl { /// Whether we are currently parsing base specifiers. unsigned IsParsingBaseSpecifiers : 1; + /// True when visible conversion functions are already computed + /// and are available. + unsigned ComputedVisibleConversions : 1; + unsigned HasODRHash : 1; /// A hash of parts of the class to help in ODR checking. diff --git a/clang/include/clang/AST/RecursiveASTVisitor.h b/clang/include/clang/AST/RecursiveASTVisitor.h index 426c256b6a48d..20b298093cfc5 100644 --- a/clang/include/clang/AST/RecursiveASTVisitor.h +++ b/clang/include/clang/AST/RecursiveASTVisitor.h @@ -2788,6 +2788,9 @@ DEF_TRAVERSE_STMT(OMPTaskLoopDirective, DEF_TRAVERSE_STMT(OMPTaskLoopSimdDirective, { TRY_TO(TraverseOMPExecutableDirective(S)); }) +DEF_TRAVERSE_STMT(OMPMasterTaskLoopDirective, + { TRY_TO(TraverseOMPExecutableDirective(S)); }) + DEF_TRAVERSE_STMT(OMPDistributeDirective, { TRY_TO(TraverseOMPExecutableDirective(S)); }) diff --git a/clang/include/clang/AST/StmtOpenMP.h b/clang/include/clang/AST/StmtOpenMP.h index 9d58c1d793a82..90eb541a9c949 100644 --- a/clang/include/clang/AST/StmtOpenMP.h +++ b/clang/include/clang/AST/StmtOpenMP.h @@ -1164,6 +1164,7 @@ class OMPLoopDirective : public OMPExecutableDirective { T->getStmtClass() == OMPParallelForSimdDirectiveClass || T->getStmtClass() == OMPTaskLoopDirectiveClass || T->getStmtClass() == OMPTaskLoopSimdDirectiveClass || + T->getStmtClass() == OMPMasterTaskLoopDirectiveClass || T->getStmtClass() == OMPDistributeDirectiveClass || T->getStmtClass() == OMPTargetParallelForDirectiveClass || T->getStmtClass() == OMPDistributeParallelForDirectiveClass || @@ -3119,6 +3120,74 @@ class OMPTaskLoopSimdDirective : public OMPLoopDirective { } }; +/// This represents '#pragma omp master taskloop' directive. +/// +/// \code +/// #pragma omp master taskloop private(a,b) grainsize(val) num_tasks(num) +/// \endcode +/// In this example directive '#pragma omp master taskloop' has clauses +/// 'private' with the variables 'a' and 'b', 'grainsize' with expression 'val' +/// and 'num_tasks' with expression 'num'. +/// +class OMPMasterTaskLoopDirective : public OMPLoopDirective { + friend class ASTStmtReader; + /// Build directive with the given start and end location. + /// + /// \param StartLoc Starting location of the directive kind. + /// \param EndLoc Ending location of the directive. + /// \param CollapsedNum Number of collapsed nested loops. + /// \param NumClauses Number of clauses. + /// + OMPMasterTaskLoopDirective(SourceLocation StartLoc, SourceLocation EndLoc, + unsigned CollapsedNum, unsigned NumClauses) + : OMPLoopDirective(this, OMPMasterTaskLoopDirectiveClass, + OMPD_master_taskloop, StartLoc, EndLoc, CollapsedNum, + NumClauses) {} + + /// Build an empty directive. + /// + /// \param CollapsedNum Number of collapsed nested loops. + /// \param NumClauses Number of clauses. + /// + explicit OMPMasterTaskLoopDirective(unsigned CollapsedNum, + unsigned NumClauses) + : OMPLoopDirective(this, OMPMasterTaskLoopDirectiveClass, + OMPD_master_taskloop, SourceLocation(), + SourceLocation(), CollapsedNum, NumClauses) {} + +public: + /// Creates directive with a list of \a Clauses. + /// + /// \param C AST context. + /// \param StartLoc Starting location of the directive kind. + /// \param EndLoc Ending Location of the directive. + /// \param CollapsedNum Number of collapsed loops. + /// \param Clauses List of clauses. + /// \param AssociatedStmt Statement, associated with the directive. + /// \param Exprs Helper expressions for CodeGen. + /// + static OMPMasterTaskLoopDirective * + Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc, + unsigned CollapsedNum, ArrayRef Clauses, + Stmt *AssociatedStmt, const HelperExprs &Exprs); + + /// Creates an empty directive with the place + /// for \a NumClauses clauses. + /// + /// \param C AST context. + /// \param CollapsedNum Number of collapsed nested loops. + /// \param NumClauses Number of clauses. + /// + static OMPMasterTaskLoopDirective *CreateEmpty(const ASTContext &C, + unsigned NumClauses, + unsigned CollapsedNum, + EmptyShell); + + static bool classof(const Stmt *T) { + return T->getStmtClass() == OMPMasterTaskLoopDirectiveClass; + } +}; + /// This represents '#pragma omp distribute' directive. /// /// \code diff --git a/clang/include/clang/Basic/CodeGenOptions.def b/clang/include/clang/Basic/CodeGenOptions.def index 736f201a12b45..d2266cc2d613c 100644 --- a/clang/include/clang/Basic/CodeGenOptions.def +++ b/clang/include/clang/Basic/CodeGenOptions.def @@ -278,6 +278,10 @@ CODEGENOPT(EmitLLVMUseLists, 1, 0) ///< Control whether to serialize use-lists. CODEGENOPT(WholeProgramVTables, 1, 0) ///< Whether to apply whole-program /// vtable optimization. +CODEGENOPT(VirtualFunctionElimination, 1, 0) ///< Whether to apply the dead + /// virtual function elimination + /// optimization. + /// Whether to use public LTO visibility for entities in std and stdext /// namespaces. This is enabled by clang-cl's /MT and /MTd flags. CODEGENOPT(LTOVisibilityPublicStd, 1, 0) diff --git a/clang/include/clang/Basic/DiagnosticASTKinds.td b/clang/include/clang/Basic/DiagnosticASTKinds.td index 63207a0e2254a..04d767445a8f4 100644 --- a/clang/include/clang/Basic/DiagnosticASTKinds.td +++ b/clang/include/clang/Basic/DiagnosticASTKinds.td @@ -126,7 +126,8 @@ def note_constexpr_lifetime_ended : Note< "%plural{8:storage duration|:lifetime}0 has ended">; def note_constexpr_access_uninit : Note< "%select{read of|read of|assignment to|increment of|decrement of|" - "member call on|dynamic_cast of|typeid applied to||destruction of}0 " + "member call on|dynamic_cast of|typeid applied to|" + "construction of subobject of|destruction of}0 " "%select{object outside its lifetime|uninitialized object}1 " "is not allowed in a constant expression">; def note_constexpr_use_uninit_reference : Note< diff --git a/clang/include/clang/Basic/LangOptions.def b/clang/include/clang/Basic/LangOptions.def index eca8b7d0d822d..696d6d3d0cb40 100644 --- a/clang/include/clang/Basic/LangOptions.def +++ b/clang/include/clang/Basic/LangOptions.def @@ -111,6 +111,7 @@ BENIGN_LANGOPT(DollarIdents , 1, 1, "'$' in identifiers") BENIGN_LANGOPT(AsmPreprocessor, 1, 0, "preprocessor in asm mode") LANGOPT(GNUMode , 1, 1, "GNU extensions") LANGOPT(GNUKeywords , 1, 1, "GNU keywords") +VALUE_LANGOPT(GNUCVersion , 32, 0, "GNU C compatibility version") BENIGN_LANGOPT(ImplicitInt, 1, !C99 && !CPlusPlus, "C89 implicit 'int'") LANGOPT(Digraphs , 1, 0, "digraphs") BENIGN_LANGOPT(HexFloats , 1, C99, "C99 hexadecimal float constants") diff --git a/clang/include/clang/Basic/OpenMPKinds.def b/clang/include/clang/Basic/OpenMPKinds.def index e8a40de3e3cdb..6f9067d698bcc 100644 --- a/clang/include/clang/Basic/OpenMPKinds.def +++ b/clang/include/clang/Basic/OpenMPKinds.def @@ -92,6 +92,9 @@ #ifndef OPENMP_TASKLOOP_SIMD_CLAUSE # define OPENMP_TASKLOOP_SIMD_CLAUSE(Name) #endif +#ifndef OPENMP_MASTER_TASKLOOP_CLAUSE +# define OPENMP_MASTER_TASKLOOP_CLAUSE(Name) +#endif #ifndef OPENMP_CRITICAL_CLAUSE # define OPENMP_CRITICAL_CLAUSE(Name) #endif @@ -258,6 +261,7 @@ OPENMP_DIRECTIVE_EXT(target_teams_distribute_parallel_for_simd, "target teams di OPENMP_DIRECTIVE_EXT(target_teams_distribute_simd, "target teams distribute simd") OPENMP_DIRECTIVE(allocate) OPENMP_DIRECTIVE_EXT(declare_variant, "declare variant") +OPENMP_DIRECTIVE_EXT(master_taskloop, "master taskloop") // OpenMP clauses. OPENMP_CLAUSE(allocator, OMPAllocatorClause) @@ -666,6 +670,25 @@ OPENMP_TASKLOOP_SIMD_CLAUSE(reduction) OPENMP_TASKLOOP_SIMD_CLAUSE(in_reduction) OPENMP_TASKLOOP_SIMD_CLAUSE(allocate) +// Clauses allowed for OpenMP directive 'master taskloop'. +OPENMP_MASTER_TASKLOOP_CLAUSE(if) +OPENMP_MASTER_TASKLOOP_CLAUSE(shared) +OPENMP_MASTER_TASKLOOP_CLAUSE(private) +OPENMP_MASTER_TASKLOOP_CLAUSE(firstprivate) +OPENMP_MASTER_TASKLOOP_CLAUSE(lastprivate) +OPENMP_MASTER_TASKLOOP_CLAUSE(default) +OPENMP_MASTER_TASKLOOP_CLAUSE(collapse) +OPENMP_MASTER_TASKLOOP_CLAUSE(final) +OPENMP_MASTER_TASKLOOP_CLAUSE(untied) +OPENMP_MASTER_TASKLOOP_CLAUSE(mergeable) +OPENMP_MASTER_TASKLOOP_CLAUSE(priority) +OPENMP_MASTER_TASKLOOP_CLAUSE(grainsize) +OPENMP_MASTER_TASKLOOP_CLAUSE(nogroup) +OPENMP_MASTER_TASKLOOP_CLAUSE(num_tasks) +OPENMP_MASTER_TASKLOOP_CLAUSE(reduction) +OPENMP_MASTER_TASKLOOP_CLAUSE(in_reduction) +OPENMP_MASTER_TASKLOOP_CLAUSE(allocate) + // Clauses allowed for OpenMP directive 'critical'. OPENMP_CRITICAL_CLAUSE(hint) @@ -978,6 +1001,7 @@ OPENMP_MATCH_KIND(implementation) #undef OPENMP_ALLOCATE_CLAUSE #undef OPENMP_DECLARE_MAPPER_CLAUSE #undef OPENMP_TASKGROUP_CLAUSE +#undef OPENMP_MASTER_TASKLOOP_CLAUSE #undef OPENMP_TASKLOOP_SIMD_CLAUSE #undef OPENMP_TASKLOOP_CLAUSE #undef OPENMP_LINEAR_KIND diff --git a/clang/include/clang/Basic/StmtNodes.td b/clang/include/clang/Basic/StmtNodes.td index be364de1a76cf..eb5af6f4153d5 100644 --- a/clang/include/clang/Basic/StmtNodes.td +++ b/clang/include/clang/Basic/StmtNodes.td @@ -242,6 +242,7 @@ def OMPCancellationPointDirective : DStmt; def OMPCancelDirective : DStmt; def OMPTaskLoopDirective : DStmt; def OMPTaskLoopSimdDirective : DStmt; +def OMPMasterTaskLoopDirective : DStmt; def OMPDistributeDirective : DStmt; def OMPDistributeParallelForDirective : DStmt; def OMPDistributeParallelForSimdDirective : DStmt; diff --git a/clang/include/clang/Basic/arm_neon.td b/clang/include/clang/Basic/arm_neon.td index 428c22d1a0111..a52ed496580d6 100644 --- a/clang/include/clang/Basic/arm_neon.td +++ b/clang/include/clang/Basic/arm_neon.td @@ -1651,10 +1651,10 @@ let ArchGuard = "defined(__ARM_FEATURE_DOTPROD) && defined(__aarch64__)" in { // v8.2-A FP16 fused multiply-add long instructions. let ArchGuard = "defined(__ARM_FEATURE_FP16FML) && defined(__aarch64__)" in { - def VFMLAL_LOW : SInst<"vfmlal_low", "ffHH", "hQh">; - def VFMLSL_LOW : SInst<"vfmlsl_low", "ffHH", "hQh">; - def VFMLAL_HIGH : SInst<"vfmlal_high", "ffHH", "hQh">; - def VFMLSL_HIGH : SInst<"vfmlsl_high", "ffHH", "hQh">; + def VFMLAL_LOW : SInst<"vfmlal_low", "nndd", "hQh">; + def VFMLSL_LOW : SInst<"vfmlsl_low", "nndd", "hQh">; + def VFMLAL_HIGH : SInst<"vfmlal_high", "nndd", "hQh">; + def VFMLSL_HIGH : SInst<"vfmlsl_high", "nndd", "hQh">; def VFMLAL_LANE_LOW : SOpInst<"vfmlal_lane_low", "ffH0i", "hQh", OP_FMLAL_LN>; def VFMLSL_LANE_LOW : SOpInst<"vfmlsl_lane_low", "ffH0i", "hQh", OP_FMLSL_LN>; diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 915ab509c4c63..a5c38858a4578 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -1169,7 +1169,8 @@ def ftrapping_math : Flag<["-"], "ftrapping-math">, Group, Flags<[CC1Op def fno_trapping_math : Flag<["-"], "fno-trapping-math">, Group, Flags<[CC1Option]>; def ffp_contract : Joined<["-"], "ffp-contract=">, Group, Flags<[CC1Option]>, HelpText<"Form fused FP ops (e.g. FMAs): fast (everywhere)" - " | on (according to FP_CONTRACT pragma, default) | off (never fuse)">, Values<"fast,on,off">; + " | on (according to FP_CONTRACT pragma) | off (never fuse). Default" + " is 'fast' for CUDA/HIP and 'on' otherwise.">, Values<"fast,on,off">; def fstrict_float_cast_overflow : Flag<["-"], "fstrict-float-cast-overflow">, Group, Flags<[CC1Option]>, @@ -1209,6 +1210,9 @@ def fno_use_line_directives : Flag<["-"], "fno-use-line-directives">, Group, Group, Flags<[CC1Option]>, HelpText<"Assert that the compilation takes place in a freestanding environment">; +def fgnuc_version_EQ : Joined<["-"], "fgnuc-version=">, Group, + HelpText<"Sets various macros to claim compatibility with the given GCC version (default is 4.2.1)">, + Flags<[CC1Option, CoreOption]>; def fgnu_keywords : Flag<["-"], "fgnu-keywords">, Group, Flags<[CC1Option]>, HelpText<"Allow GNU-extension keywords regardless of language standard">; def fgnu89_inline : Flag<["-"], "fgnu89-inline">, Group, Flags<[CC1Option]>, @@ -1843,7 +1847,12 @@ def : Flag<["-"], "fterminated-vtables">, Alias; def fthreadsafe_statics : Flag<["-"], "fthreadsafe-statics">, Group; def ftime_report : Flag<["-"], "ftime-report">, Group, Flags<[CC1Option]>; def ftime_trace : Flag<["-"], "ftime-trace">, Group, - HelpText<"Turn on time profiler">, Flags<[CC1Option, CoreOption]>; + HelpText<"Turn on time profiler. Generates JSON file based on output filename.">, + DocBrief<[{ +Turn on time profiler. Generates JSON file based on output filename. Results +can be analyzed with chrome://tracing or `Speedscope App +`_ for flamegraph visualization.}]>, + Flags<[CC1Option, CoreOption]>; def ftime_trace_granularity_EQ : Joined<["-"], "ftime-trace-granularity=">, Group, HelpText<"Minimum time granularity (in microseconds) traced by time profiler">, Flags<[CC1Option, CoreOption]>; @@ -1906,6 +1915,13 @@ def fforce_emit_vtables : Flag<["-"], "fforce-emit-vtables">, Group, HelpText<"Emits more virtual tables to improve devirtualization">; def fno_force_emit_vtables : Flag<["-"], "fno-force-emit-vtables">, Group, Flags<[CoreOption]>; + +def fvirtual_function_elimination : Flag<["-"], "fvirtual-function-elimination">, Group, + Flags<[CoreOption, CC1Option]>, + HelpText<"Enables dead virtual function elimination optimization. Requires -flto=full">; +def fno_virtual_function_elimination : Flag<["-"], "fno-virtual-function_elimination">, Group, + Flags<[CoreOption]>; + def fwrapv : Flag<["-"], "fwrapv">, Group, Flags<[CC1Option]>, HelpText<"Treat signed integer overflow as two's complement">; def fwritable_strings : Flag<["-"], "fwritable-strings">, Group, Flags<[CC1Option]>, diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index 24302b811ae91..c220cdfb49b7a 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -57,6 +57,7 @@ #include #include #include +#include #include namespace llvm { @@ -1214,15 +1215,12 @@ class Sema { void WarnOnPendingNoDerefs(ExpressionEvaluationContextRecord &Rec); /// Compute the mangling number context for a lambda expression or - /// block literal. + /// block literal. Also return the extra mangling decl if any. /// /// \param DC - The DeclContext containing the lambda expression or /// block literal. - /// \param[out] ManglingContextDecl - Returns the ManglingContextDecl - /// associated with the context, if relevant. - MangleNumberingContext *getCurrentMangleNumberContext( - const DeclContext *DC, - Decl *&ManglingContextDecl); + std::tuple + getCurrentMangleNumberContext(const DeclContext *DC); /// SpecialMemberOverloadResult - The overloading result for a special member @@ -3573,6 +3571,19 @@ class Sema { bool DiagnoseMissing); bool isKnownName(StringRef name); + /// Status of the function emission on the CUDA/HIP/OpenMP host/device attrs. + enum class FunctionEmissionStatus { + Emitted, + CUDADiscarded, // Discarded due to CUDA/HIP hostness + OMPDiscarded, // Discarded due to OpenMP hostness + TemplateDiscarded, // Discarded due to uninstantiated templates + Unknown, + }; + FunctionEmissionStatus getEmissionStatus(FunctionDecl *Decl); + + // Whether the callee should be ignored in CUDA/HIP/OpenMP host/device check. + bool shouldIgnoreInHostDeviceCheck(FunctionDecl *Callee); + void ArgumentDependentLookup(DeclarationName Name, SourceLocation Loc, ArrayRef Args, ADLResult &Functions); @@ -9593,6 +9604,11 @@ class Sema { StmtResult ActOnOpenMPTaskLoopSimdDirective( ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, SourceLocation EndLoc, VarsWithInheritedDSAType &VarsWithImplicitDSA); + /// Called on well-formed '\#pragma omp master taskloop' after parsing of the + /// associated statement. + StmtResult ActOnOpenMPMasterTaskLoopDirective( + ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, + SourceLocation EndLoc, VarsWithInheritedDSAType &VarsWithImplicitDSA); /// Called on well-formed '\#pragma omp distribute' after parsing /// of the associated statement. StmtResult diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h index be0bbe1a8c816..0a535f9f0e030 100644 --- a/clang/include/clang/Serialization/ASTBitCodes.h +++ b/clang/include/clang/Serialization/ASTBitCodes.h @@ -1961,6 +1961,7 @@ namespace serialization { STMT_OMP_CANCEL_DIRECTIVE, STMT_OMP_TASKLOOP_DIRECTIVE, STMT_OMP_TASKLOOP_SIMD_DIRECTIVE, + STMT_OMP_MASTER_TASKLOOP_DIRECTIVE, STMT_OMP_DISTRIBUTE_DIRECTIVE, STMT_OMP_TARGET_UPDATE_DIRECTIVE, STMT_OMP_DISTRIBUTE_PARALLEL_FOR_DIRECTIVE, diff --git a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h index 1d0d26589e04d..7d0881343478a 100644 --- a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h +++ b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h @@ -168,6 +168,9 @@ class DependencyScanningWorkerFilesystem : public llvm::vfs::ProxyFileSystem { return It == Cache.end() ? nullptr : It->getValue(); } + llvm::ErrorOr + getOrCreateFileSystemEntry(const StringRef Filename); + DependencyScanningFilesystemSharedCache &SharedCache; /// The local cache is used by the worker thread to cache file system queries /// locally instead of querying the global cache every time. diff --git a/clang/include/clang/Tooling/Refactoring/MatchConsumer.h b/clang/include/clang/Tooling/Transformer/MatchConsumer.h similarity index 92% rename from clang/include/clang/Tooling/Refactoring/MatchConsumer.h rename to clang/include/clang/Tooling/Transformer/MatchConsumer.h index d516550d0f02b..51eb6af25a40e 100644 --- a/clang/include/clang/Tooling/Refactoring/MatchConsumer.h +++ b/clang/include/clang/Tooling/Transformer/MatchConsumer.h @@ -12,8 +12,8 @@ /// //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_TOOLING_REFACTOR_MATCH_CONSUMER_H_ -#define LLVM_CLANG_TOOLING_REFACTOR_MATCH_CONSUMER_H_ +#ifndef LLVM_CLANG_TOOLING_TRANSFORMER_MATCH_CONSUMER_H_ +#define LLVM_CLANG_TOOLING_TRANSFORMER_MATCH_CONSUMER_H_ #include "clang/AST/ASTTypeTraits.h" #include "clang/ASTMatchers/ASTMatchFinder.h" @@ -55,4 +55,4 @@ MatchConsumer ifBound(std::string ID, MatchConsumer TrueC, } // namespace tooling } // namespace clang -#endif // LLVM_CLANG_TOOLING_REFACTOR_MATCH_CONSUMER_H_ +#endif // LLVM_CLANG_TOOLING_TRANSFORMER_MATCH_CONSUMER_H_ diff --git a/clang/include/clang/Tooling/Refactoring/RangeSelector.h b/clang/include/clang/Tooling/Transformer/RangeSelector.h similarity index 98% rename from clang/include/clang/Tooling/Refactoring/RangeSelector.h rename to clang/include/clang/Tooling/Transformer/RangeSelector.h index d5b5c8fbd8a5b..e178fb36a8e36 100644 --- a/clang/include/clang/Tooling/Refactoring/RangeSelector.h +++ b/clang/include/clang/Tooling/Transformer/RangeSelector.h @@ -17,7 +17,7 @@ #include "clang/ASTMatchers/ASTMatchFinder.h" #include "clang/Basic/SourceLocation.h" -#include "clang/Tooling/Refactoring/MatchConsumer.h" +#include "clang/Tooling/Transformer/MatchConsumer.h" #include "llvm/Support/Error.h" #include #include diff --git a/clang/include/clang/Tooling/Refactoring/SourceCode.h b/clang/include/clang/Tooling/Transformer/SourceCode.h similarity index 95% rename from clang/include/clang/Tooling/Refactoring/SourceCode.h rename to clang/include/clang/Tooling/Transformer/SourceCode.h index 72dbee4a43170..bc9cc3d2a2580 100644 --- a/clang/include/clang/Tooling/Refactoring/SourceCode.h +++ b/clang/include/clang/Tooling/Transformer/SourceCode.h @@ -10,8 +10,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_TOOLING_REFACTOR_SOURCE_CODE_H -#define LLVM_CLANG_TOOLING_REFACTOR_SOURCE_CODE_H +#ifndef LLVM_CLANG_TOOLING_TRANSFORMER_SOURCE_CODE_H +#define LLVM_CLANG_TOOLING_TRANSFORMER_SOURCE_CODE_H #include "clang/AST/ASTContext.h" #include "clang/Basic/SourceLocation.h" @@ -87,4 +87,4 @@ getRangeForEdit(const CharSourceRange &EditRange, const ASTContext &Context) { } } // namespace tooling } // namespace clang -#endif // LLVM_CLANG_TOOLING_REFACTOR_SOURCE_CODE_H +#endif // LLVM_CLANG_TOOLING_TRANSFORMER_SOURCE_CODE_H diff --git a/clang/include/clang/Tooling/Refactoring/SourceCodeBuilders.h b/clang/include/clang/Tooling/Transformer/SourceCodeBuilders.h similarity index 94% rename from clang/include/clang/Tooling/Refactoring/SourceCodeBuilders.h rename to clang/include/clang/Tooling/Transformer/SourceCodeBuilders.h index 797046f3ecffb..6c79a7588f28d 100644 --- a/clang/include/clang/Tooling/Refactoring/SourceCodeBuilders.h +++ b/clang/include/clang/Tooling/Transformer/SourceCodeBuilders.h @@ -11,8 +11,8 @@ /// //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_TOOLING_REFACTOR_SOURCE_CODE_BUILDERS_H_ -#define LLVM_CLANG_TOOLING_REFACTOR_SOURCE_CODE_BUILDERS_H_ +#ifndef LLVM_CLANG_TOOLING_TRANSFORMER_SOURCE_CODE_BUILDERS_H_ +#define LLVM_CLANG_TOOLING_TRANSFORMER_SOURCE_CODE_BUILDERS_H_ #include "clang/AST/ASTContext.h" #include "clang/AST/Expr.h" @@ -83,4 +83,4 @@ llvm::Optional buildArrow(const Expr &E, } // namespace tooling } // namespace clang -#endif // LLVM_CLANG_TOOLING_REFACTOR_SOURCE_CODE_BUILDERS_H_ +#endif // LLVM_CLANG_TOOLING_TRANSFORMER_SOURCE_CODE_BUILDERS_H_ diff --git a/clang/include/clang/Tooling/Refactoring/Stencil.h b/clang/include/clang/Tooling/Transformer/Stencil.h similarity index 96% rename from clang/include/clang/Tooling/Refactoring/Stencil.h rename to clang/include/clang/Tooling/Transformer/Stencil.h index b80320d409b33..617585cacdbfb 100644 --- a/clang/include/clang/Tooling/Refactoring/Stencil.h +++ b/clang/include/clang/Tooling/Transformer/Stencil.h @@ -17,14 +17,14 @@ /// //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_TOOLING_REFACTOR_STENCIL_H_ -#define LLVM_CLANG_TOOLING_REFACTOR_STENCIL_H_ +#ifndef LLVM_CLANG_TOOLING_TRANSFORMER_STENCIL_H_ +#define LLVM_CLANG_TOOLING_TRANSFORMER_STENCIL_H_ #include "clang/AST/ASTContext.h" #include "clang/AST/ASTTypeTraits.h" #include "clang/ASTMatchers/ASTMatchFinder.h" -#include "clang/Tooling/Refactoring/MatchConsumer.h" -#include "clang/Tooling/Refactoring/RangeSelector.h" +#include "clang/Tooling/Transformer/MatchConsumer.h" +#include "clang/Tooling/Transformer/RangeSelector.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/Error.h" #include @@ -223,4 +223,4 @@ StencilPart dPrint(llvm::StringRef Id); } // namespace stencil } // namespace tooling } // namespace clang -#endif // LLVM_CLANG_TOOLING_REFACTOR_STENCIL_H_ +#endif // LLVM_CLANG_TOOLING_TRANSFORMER_STENCIL_H_ diff --git a/clang/include/clang/Tooling/Refactoring/Transformer.h b/clang/include/clang/Tooling/Transformer/Transformer.h similarity index 98% rename from clang/include/clang/Tooling/Refactoring/Transformer.h rename to clang/include/clang/Tooling/Transformer/Transformer.h index 0971cc3e66793..0dc1e820e21bf 100644 --- a/clang/include/clang/Tooling/Refactoring/Transformer.h +++ b/clang/include/clang/Tooling/Transformer/Transformer.h @@ -12,15 +12,15 @@ /// //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_TOOLING_REFACTOR_TRANSFORMER_H_ -#define LLVM_CLANG_TOOLING_REFACTOR_TRANSFORMER_H_ +#ifndef LLVM_CLANG_TOOLING_TRANSFORMER_TRANSFORMER_H_ +#define LLVM_CLANG_TOOLING_TRANSFORMER_TRANSFORMER_H_ #include "clang/ASTMatchers/ASTMatchFinder.h" #include "clang/ASTMatchers/ASTMatchers.h" #include "clang/ASTMatchers/ASTMatchersInternal.h" #include "clang/Tooling/Refactoring/AtomicChange.h" -#include "clang/Tooling/Refactoring/MatchConsumer.h" -#include "clang/Tooling/Refactoring/RangeSelector.h" +#include "clang/Tooling/Transformer/MatchConsumer.h" +#include "clang/Tooling/Transformer/RangeSelector.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Support/Error.h" @@ -317,4 +317,4 @@ class Transformer : public ast_matchers::MatchFinder::MatchCallback { } // namespace tooling } // namespace clang -#endif // LLVM_CLANG_TOOLING_REFACTOR_TRANSFORMER_H_ +#endif // LLVM_CLANG_TOOLING_TRANSFORMER_TRANSFORMER_H_ diff --git a/clang/lib/AST/ASTImporter.cpp b/clang/lib/AST/ASTImporter.cpp index e483216e8f370..20539feb44860 100644 --- a/clang/lib/AST/ASTImporter.cpp +++ b/clang/lib/AST/ASTImporter.cpp @@ -1836,71 +1836,10 @@ Error ASTNodeImporter::ImportDefinition( struct CXXRecordDecl::DefinitionData &ToData = ToCXX->data(); struct CXXRecordDecl::DefinitionData &FromData = FromCXX->data(); - ToData.UserDeclaredConstructor = FromData.UserDeclaredConstructor; - ToData.UserDeclaredSpecialMembers = FromData.UserDeclaredSpecialMembers; - ToData.Aggregate = FromData.Aggregate; - ToData.PlainOldData = FromData.PlainOldData; - ToData.Empty = FromData.Empty; - ToData.Polymorphic = FromData.Polymorphic; - ToData.Abstract = FromData.Abstract; - ToData.IsStandardLayout = FromData.IsStandardLayout; - ToData.IsCXX11StandardLayout = FromData.IsCXX11StandardLayout; - ToData.HasBasesWithFields = FromData.HasBasesWithFields; - ToData.HasBasesWithNonStaticDataMembers = - FromData.HasBasesWithNonStaticDataMembers; - ToData.HasPrivateFields = FromData.HasPrivateFields; - ToData.HasProtectedFields = FromData.HasProtectedFields; - ToData.HasPublicFields = FromData.HasPublicFields; - ToData.HasMutableFields = FromData.HasMutableFields; - ToData.HasVariantMembers = FromData.HasVariantMembers; - ToData.HasOnlyCMembers = FromData.HasOnlyCMembers; - ToData.HasInClassInitializer = FromData.HasInClassInitializer; - ToData.HasUninitializedReferenceMember - = FromData.HasUninitializedReferenceMember; - ToData.HasUninitializedFields = FromData.HasUninitializedFields; - ToData.HasInheritedConstructor = FromData.HasInheritedConstructor; - ToData.HasInheritedAssignment = FromData.HasInheritedAssignment; - ToData.NeedOverloadResolutionForCopyConstructor - = FromData.NeedOverloadResolutionForCopyConstructor; - ToData.NeedOverloadResolutionForMoveConstructor - = FromData.NeedOverloadResolutionForMoveConstructor; - ToData.NeedOverloadResolutionForMoveAssignment - = FromData.NeedOverloadResolutionForMoveAssignment; - ToData.NeedOverloadResolutionForDestructor - = FromData.NeedOverloadResolutionForDestructor; - ToData.DefaultedCopyConstructorIsDeleted - = FromData.DefaultedCopyConstructorIsDeleted; - ToData.DefaultedMoveConstructorIsDeleted - = FromData.DefaultedMoveConstructorIsDeleted; - ToData.DefaultedMoveAssignmentIsDeleted - = FromData.DefaultedMoveAssignmentIsDeleted; - ToData.DefaultedDestructorIsDeleted = FromData.DefaultedDestructorIsDeleted; - ToData.HasTrivialSpecialMembers = FromData.HasTrivialSpecialMembers; - ToData.HasIrrelevantDestructor = FromData.HasIrrelevantDestructor; - ToData.HasConstexprNonCopyMoveConstructor - = FromData.HasConstexprNonCopyMoveConstructor; - ToData.HasDefaultedDefaultConstructor - = FromData.HasDefaultedDefaultConstructor; - ToData.DefaultedDefaultConstructorIsConstexpr - = FromData.DefaultedDefaultConstructorIsConstexpr; - ToData.HasConstexprDefaultConstructor - = FromData.HasConstexprDefaultConstructor; - ToData.HasNonLiteralTypeFieldsOrBases - = FromData.HasNonLiteralTypeFieldsOrBases; - // ComputedVisibleConversions not imported. - ToData.UserProvidedDefaultConstructor - = FromData.UserProvidedDefaultConstructor; - ToData.DeclaredSpecialMembers = FromData.DeclaredSpecialMembers; - ToData.ImplicitCopyConstructorCanHaveConstParamForVBase - = FromData.ImplicitCopyConstructorCanHaveConstParamForVBase; - ToData.ImplicitCopyConstructorCanHaveConstParamForNonVBase - = FromData.ImplicitCopyConstructorCanHaveConstParamForNonVBase; - ToData.ImplicitCopyAssignmentHasConstParam - = FromData.ImplicitCopyAssignmentHasConstParam; - ToData.HasDeclaredCopyConstructorWithConstParam - = FromData.HasDeclaredCopyConstructorWithConstParam; - ToData.HasDeclaredCopyAssignmentWithConstParam - = FromData.HasDeclaredCopyAssignmentWithConstParam; + + #define FIELD(Name, Width, Merge) \ + ToData.Name = FromData.Name; + #include "clang/AST/CXXRecordDeclDefinitionBits.def" // Copy over the data stored in RecordDeclBits ToCXX->setArgPassingRestrictions(FromCXX->getArgPassingRestrictions()); diff --git a/clang/lib/AST/DeclCXX.cpp b/clang/lib/AST/DeclCXX.cpp index a085941e68c59..12ec44fa02791 100644 --- a/clang/lib/AST/DeclCXX.cpp +++ b/clang/lib/AST/DeclCXX.cpp @@ -96,14 +96,15 @@ CXXRecordDecl::DefinitionData::DefinitionData(CXXRecordDecl *D) DefaultedDefaultConstructorIsConstexpr(true), HasConstexprDefaultConstructor(false), DefaultedDestructorIsConstexpr(true), - HasNonLiteralTypeFieldsOrBases(false), ComputedVisibleConversions(false), + HasNonLiteralTypeFieldsOrBases(false), UserProvidedDefaultConstructor(false), DeclaredSpecialMembers(0), ImplicitCopyConstructorCanHaveConstParamForVBase(true), ImplicitCopyConstructorCanHaveConstParamForNonVBase(true), ImplicitCopyAssignmentHasConstParam(true), HasDeclaredCopyConstructorWithConstParam(false), HasDeclaredCopyAssignmentWithConstParam(false), IsLambda(false), - IsParsingBaseSpecifiers(false), HasODRHash(false), Definition(D) {} + IsParsingBaseSpecifiers(false), ComputedVisibleConversions(false), + HasODRHash(false), Definition(D) {} CXXBaseSpecifier *CXXRecordDecl::DefinitionData::getBasesSlowCase() const { return Bases.get(Definition->getASTContext().getExternalSource()); diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 02639679a40a7..ceee50da30937 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -3178,7 +3178,7 @@ findSubobject(EvalInfo &Info, const Expr *E, const CompleteObject &Obj, // Walk the designator's path to find the subobject. for (unsigned I = 0, N = Sub.Entries.size(); /**/; ++I) { // Reading an indeterminate value is undefined, but assigning over one is OK. - if ((O->isAbsent() && handler.AccessKind != AK_Construct) || + if ((O->isAbsent() && !(handler.AccessKind == AK_Construct && I == N)) || (O->isIndeterminate() && handler.AccessKind != AK_Construct && handler.AccessKind != AK_Assign && handler.AccessKind != AK_ReadObjectRepresentation)) { @@ -5441,18 +5441,18 @@ static bool EvaluateArgs(ArrayRef Args, ArgVector &ArgValues, } } } - for (ArrayRef::iterator I = Args.begin(), E = Args.end(); - I != E; ++I) { - if (!Evaluate(ArgValues[I - Args.begin()], Info, *I)) { + for (unsigned Idx = 0; Idx < Args.size(); Idx++) { + if (!Evaluate(ArgValues[Idx], Info, Args[Idx])) { // If we're checking for a potential constant expression, evaluate all // initializers even if some of them fail. if (!Info.noteFailure()) return false; Success = false; } else if (!ForbiddenNullArgs.empty() && - ForbiddenNullArgs[I - Args.begin()] && - ArgValues[I - Args.begin()].isNullPointer()) { - Info.CCEDiag(*I, diag::note_non_null_attribute_failed); + ForbiddenNullArgs[Idx] && + ArgValues[Idx].isLValue() && + ArgValues[Idx].isNullPointer()) { + Info.CCEDiag(Args[Idx], diag::note_non_null_attribute_failed); if (!Info.noteFailure()) return false; Success = false; diff --git a/clang/lib/AST/StmtOpenMP.cpp b/clang/lib/AST/StmtOpenMP.cpp index 7fda574bae0e8..a61510bc545d6 100644 --- a/clang/lib/AST/StmtOpenMP.cpp +++ b/clang/lib/AST/StmtOpenMP.cpp @@ -1021,6 +1021,58 @@ OMPTaskLoopSimdDirective::CreateEmpty(const ASTContext &C, unsigned NumClauses, return new (Mem) OMPTaskLoopSimdDirective(CollapsedNum, NumClauses); } +OMPMasterTaskLoopDirective *OMPMasterTaskLoopDirective::Create( + const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc, + unsigned CollapsedNum, ArrayRef Clauses, Stmt *AssociatedStmt, + const HelperExprs &Exprs) { + unsigned Size = + llvm::alignTo(sizeof(OMPMasterTaskLoopDirective), alignof(OMPClause *)); + void *Mem = C.Allocate( + Size + sizeof(OMPClause *) * Clauses.size() + + sizeof(Stmt *) * numLoopChildren(CollapsedNum, OMPD_master_taskloop)); + OMPMasterTaskLoopDirective *Dir = new (Mem) OMPMasterTaskLoopDirective( + StartLoc, EndLoc, CollapsedNum, Clauses.size()); + Dir->setClauses(Clauses); + Dir->setAssociatedStmt(AssociatedStmt); + Dir->setIterationVariable(Exprs.IterationVarRef); + Dir->setLastIteration(Exprs.LastIteration); + Dir->setCalcLastIteration(Exprs.CalcLastIteration); + Dir->setPreCond(Exprs.PreCond); + Dir->setCond(Exprs.Cond); + Dir->setInit(Exprs.Init); + Dir->setInc(Exprs.Inc); + Dir->setIsLastIterVariable(Exprs.IL); + Dir->setLowerBoundVariable(Exprs.LB); + Dir->setUpperBoundVariable(Exprs.UB); + Dir->setStrideVariable(Exprs.ST); + Dir->setEnsureUpperBound(Exprs.EUB); + Dir->setNextLowerBound(Exprs.NLB); + Dir->setNextUpperBound(Exprs.NUB); + Dir->setNumIterations(Exprs.NumIterations); + Dir->setCounters(Exprs.Counters); + Dir->setPrivateCounters(Exprs.PrivateCounters); + Dir->setInits(Exprs.Inits); + Dir->setUpdates(Exprs.Updates); + Dir->setFinals(Exprs.Finals); + Dir->setDependentCounters(Exprs.DependentCounters); + Dir->setDependentInits(Exprs.DependentInits); + Dir->setFinalsConditions(Exprs.FinalsConditions); + Dir->setPreInits(Exprs.PreInits); + return Dir; +} + +OMPMasterTaskLoopDirective * +OMPMasterTaskLoopDirective::CreateEmpty(const ASTContext &C, + unsigned NumClauses, + unsigned CollapsedNum, EmptyShell) { + unsigned Size = + llvm::alignTo(sizeof(OMPMasterTaskLoopDirective), alignof(OMPClause *)); + void *Mem = C.Allocate( + Size + sizeof(OMPClause *) * NumClauses + + sizeof(Stmt *) * numLoopChildren(CollapsedNum, OMPD_master_taskloop)); + return new (Mem) OMPMasterTaskLoopDirective(CollapsedNum, NumClauses); +} + OMPDistributeDirective *OMPDistributeDirective::Create( const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc, unsigned CollapsedNum, ArrayRef Clauses, Stmt *AssociatedStmt, diff --git a/clang/lib/AST/StmtPrinter.cpp b/clang/lib/AST/StmtPrinter.cpp index e86f9c7063eaa..3d6b02f4dcb5d 100644 --- a/clang/lib/AST/StmtPrinter.cpp +++ b/clang/lib/AST/StmtPrinter.cpp @@ -823,6 +823,12 @@ void StmtPrinter::VisitOMPTaskLoopSimdDirective( PrintOMPExecutableDirective(Node); } +void StmtPrinter::VisitOMPMasterTaskLoopDirective( + OMPMasterTaskLoopDirective *Node) { + Indent() << "#pragma omp master taskloop"; + PrintOMPExecutableDirective(Node); +} + void StmtPrinter::VisitOMPDistributeDirective(OMPDistributeDirective *Node) { Indent() << "#pragma omp distribute"; PrintOMPExecutableDirective(Node); diff --git a/clang/lib/AST/StmtProfile.cpp b/clang/lib/AST/StmtProfile.cpp index f92c3dc60ba5c..a51523df99e81 100644 --- a/clang/lib/AST/StmtProfile.cpp +++ b/clang/lib/AST/StmtProfile.cpp @@ -918,6 +918,11 @@ void StmtProfiler::VisitOMPTaskLoopSimdDirective( VisitOMPLoopDirective(S); } +void StmtProfiler::VisitOMPMasterTaskLoopDirective( + const OMPMasterTaskLoopDirective *S) { + VisitOMPLoopDirective(S); +} + void StmtProfiler::VisitOMPDistributeDirective( const OMPDistributeDirective *S) { VisitOMPLoopDirective(S); diff --git a/clang/lib/AST/TextNodeDumper.cpp b/clang/lib/AST/TextNodeDumper.cpp index 9b76652277c9e..63a6510324f75 100644 --- a/clang/lib/AST/TextNodeDumper.cpp +++ b/clang/lib/AST/TextNodeDumper.cpp @@ -1643,6 +1643,7 @@ void TextNodeDumper::VisitCXXRecordDecl(const CXXRecordDecl *D) { FLAG(hasTrivialDestructor, trivial); FLAG(hasNonTrivialDestructor, non_trivial); FLAG(hasUserDeclaredDestructor, user_declared); + FLAG(hasConstexprDestructor, constexpr); FLAG(needsImplicitDestructor, needs_implicit); FLAG(needsOverloadResolutionForDestructor, needs_overload_resolution); if (!D->needsOverloadResolutionForDestructor()) diff --git a/clang/lib/Basic/OpenMPKinds.cpp b/clang/lib/Basic/OpenMPKinds.cpp index f69efe1aaaba6..46c0a61a708c9 100644 --- a/clang/lib/Basic/OpenMPKinds.cpp +++ b/clang/lib/Basic/OpenMPKinds.cpp @@ -646,6 +646,16 @@ bool clang::isAllowedClauseForDirective(OpenMPDirectiveKind DKind, #define OPENMP_TASKLOOP_SIMD_CLAUSE(Name) \ case OMPC_##Name: \ return true; +#include "clang/Basic/OpenMPKinds.def" + default: + break; + } + break; + case OMPD_master_taskloop: + switch (CKind) { +#define OPENMP_MASTER_TASKLOOP_CLAUSE(Name) \ + case OMPC_##Name: \ + return true; #include "clang/Basic/OpenMPKinds.def" default: break; @@ -872,7 +882,8 @@ bool clang::isOpenMPLoopDirective(OpenMPDirectiveKind DKind) { return DKind == OMPD_simd || DKind == OMPD_for || DKind == OMPD_for_simd || DKind == OMPD_parallel_for || DKind == OMPD_parallel_for_simd || DKind == OMPD_taskloop || DKind == OMPD_taskloop_simd || - DKind == OMPD_distribute || DKind == OMPD_target_parallel_for || + DKind == OMPD_master_taskloop || DKind == OMPD_distribute || + DKind == OMPD_target_parallel_for || DKind == OMPD_distribute_parallel_for || DKind == OMPD_distribute_parallel_for_simd || DKind == OMPD_distribute_simd || @@ -903,7 +914,8 @@ bool clang::isOpenMPWorksharingDirective(OpenMPDirectiveKind DKind) { } bool clang::isOpenMPTaskLoopDirective(OpenMPDirectiveKind DKind) { - return DKind == OMPD_taskloop || DKind == OMPD_taskloop_simd; + return DKind == OMPD_taskloop || DKind == OMPD_taskloop_simd || + DKind == OMPD_master_taskloop; } bool clang::isOpenMPParallelDirective(OpenMPDirectiveKind DKind) { @@ -1053,6 +1065,7 @@ void clang::getOpenMPCaptureRegions( break; case OMPD_taskloop: case OMPD_taskloop_simd: + case OMPD_master_taskloop: CaptureRegions.push_back(OMPD_taskloop); break; case OMPD_target_teams_distribute_parallel_for: diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index 150264a40587a..2eeab9d5abaa7 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -990,6 +990,7 @@ static void addSanitizersAtO0(ModulePassManager &MPM, } if (LangOpts.Sanitize.has(SanitizerKind::Memory)) { + MPM.addPass(MemorySanitizerPass({})); MPM.addPass(createModuleToFunctionPassAdaptor(MemorySanitizerPass({}))); } @@ -999,6 +1000,7 @@ static void addSanitizersAtO0(ModulePassManager &MPM, } if (LangOpts.Sanitize.has(SanitizerKind::Thread)) { + MPM.addPass(ThreadSanitizerPass()); MPM.addPass(createModuleToFunctionPassAdaptor(ThreadSanitizerPass())); } } @@ -1178,16 +1180,23 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager( [](FunctionPassManager &FPM, PassBuilder::OptimizationLevel Level) { FPM.addPass(BoundsCheckingPass()); }); - if (LangOpts.Sanitize.has(SanitizerKind::Memory)) + if (LangOpts.Sanitize.has(SanitizerKind::Memory)) { + PB.registerPipelineStartEPCallback([](ModulePassManager &MPM) { + MPM.addPass(MemorySanitizerPass({})); + }); PB.registerOptimizerLastEPCallback( [](FunctionPassManager &FPM, PassBuilder::OptimizationLevel Level) { FPM.addPass(MemorySanitizerPass({})); }); - if (LangOpts.Sanitize.has(SanitizerKind::Thread)) + } + if (LangOpts.Sanitize.has(SanitizerKind::Thread)) { + PB.registerPipelineStartEPCallback( + [](ModulePassManager &MPM) { MPM.addPass(ThreadSanitizerPass()); }); PB.registerOptimizerLastEPCallback( [](FunctionPassManager &FPM, PassBuilder::OptimizationLevel Level) { FPM.addPass(ThreadSanitizerPass()); }); + } if (LangOpts.Sanitize.has(SanitizerKind::Address)) { PB.registerPipelineStartEPCallback([&](ModulePassManager &MPM) { MPM.addPass( diff --git a/clang/lib/CodeGen/CGClass.cpp b/clang/lib/CodeGen/CGClass.cpp index 272c3586934ff..31cf2d965feb1 100644 --- a/clang/lib/CodeGen/CGClass.cpp +++ b/clang/lib/CodeGen/CGClass.cpp @@ -2786,11 +2786,16 @@ void CodeGenFunction::EmitVTablePtrCheck(const CXXRecordDecl *RD, bool CodeGenFunction::ShouldEmitVTableTypeCheckedLoad(const CXXRecordDecl *RD) { if (!CGM.getCodeGenOpts().WholeProgramVTables || - !SanOpts.has(SanitizerKind::CFIVCall) || - !CGM.getCodeGenOpts().SanitizeTrap.has(SanitizerKind::CFIVCall) || !CGM.HasHiddenLTOVisibility(RD)) return false; + if (CGM.getCodeGenOpts().VirtualFunctionElimination) + return true; + + if (!SanOpts.has(SanitizerKind::CFIVCall) || + !CGM.getCodeGenOpts().SanitizeTrap.has(SanitizerKind::CFIVCall)) + return false; + std::string TypeName = RD->getQualifiedNameAsString(); return !getContext().getSanitizerBlacklist().isBlacklistedType( SanitizerKind::CFIVCall, TypeName); @@ -2813,8 +2818,13 @@ llvm::Value *CodeGenFunction::EmitVTableTypeCheckedLoad( TypeId}); llvm::Value *CheckResult = Builder.CreateExtractValue(CheckedLoad, 1); - EmitCheck(std::make_pair(CheckResult, SanitizerKind::CFIVCall), - SanitizerHandler::CFICheckFail, nullptr, nullptr); + std::string TypeName = RD->getQualifiedNameAsString(); + if (SanOpts.has(SanitizerKind::CFIVCall) && + !getContext().getSanitizerBlacklist().isBlacklistedType( + SanitizerKind::CFIVCall, TypeName)) { + EmitCheck(std::make_pair(CheckResult, SanitizerKind::CFIVCall), + SanitizerHandler::CFICheckFail, {}, {}); + } return Builder.CreateBitCast( Builder.CreateExtractValue(CheckedLoad, 0), diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp index 877a96a6e836d..f8712b06e8226 100644 --- a/clang/lib/CodeGen/CGExprScalar.cpp +++ b/clang/lib/CodeGen/CGExprScalar.cpp @@ -4634,7 +4634,7 @@ struct GEPOffsetAndOverflow { llvm::Value *OffsetOverflows; }; -/// Evaluate given GEPVal, which must be an inbounds GEP, +/// Evaluate given GEPVal, which is either an inbounds GEP, or a constant, /// and compute the total offset it applies from it's base pointer BasePtr. /// Returns offset in bytes and a boolean flag whether an overflow happened /// during evaluation. @@ -4642,10 +4642,28 @@ static GEPOffsetAndOverflow EmitGEPOffsetInBytes(Value *BasePtr, Value *GEPVal, llvm::LLVMContext &VMContext, CodeGenModule &CGM, CGBuilderTy Builder) { + const auto &DL = CGM.getDataLayout(); + + // The total (signed) byte offset for the GEP. + llvm::Value *TotalOffset = nullptr; + + // Was the GEP already reduced to a constant? + if (isa(GEPVal)) { + // Compute the offset by casting both pointers to integers and subtracting: + // GEPVal = BasePtr + ptr(Offset) <--> Offset = int(GEPVal) - int(BasePtr) + Value *BasePtr_int = + Builder.CreatePtrToInt(BasePtr, DL.getIntPtrType(BasePtr->getType())); + Value *GEPVal_int = + Builder.CreatePtrToInt(GEPVal, DL.getIntPtrType(GEPVal->getType())); + TotalOffset = Builder.CreateSub(GEPVal_int, BasePtr_int); + return {TotalOffset, /*OffsetOverflows=*/Builder.getFalse()}; + } + auto *GEP = cast(GEPVal); + assert(GEP->getPointerOperand() == BasePtr && + "BasePtr must be the the base of the GEP."); assert(GEP->isInBounds() && "Expected inbounds GEP"); - const auto &DL = CGM.getDataLayout(); auto *IntPtrTy = DL.getIntPtrType(GEP->getPointerOperandType()); // Grab references to the signed add/mul overflow intrinsics for intptr_t. @@ -4655,8 +4673,6 @@ static GEPOffsetAndOverflow EmitGEPOffsetInBytes(Value *BasePtr, Value *GEPVal, auto *SMulIntrinsic = CGM.getIntrinsic(llvm::Intrinsic::smul_with_overflow, IntPtrTy); - // The total (signed) byte offset for the GEP. - llvm::Value *TotalOffset = nullptr; // The offset overflow flag - true if the total offset overflows. llvm::Value *OffsetOverflows = Builder.getFalse(); @@ -4727,69 +4743,102 @@ CodeGenFunction::EmitCheckedInBoundsGEP(Value *Ptr, ArrayRef IdxList, if (!SanOpts.has(SanitizerKind::PointerOverflow)) return GEPVal; - // If the GEP has already been reduced to a constant, leave it be. - if (isa(GEPVal)) - return GEPVal; + llvm::Type *PtrTy = Ptr->getType(); + + // Perform nullptr-and-offset check unless the nullptr is defined. + bool PerformNullCheck = !NullPointerIsDefined( + Builder.GetInsertBlock()->getParent(), PtrTy->getPointerAddressSpace()); + // Check for overflows unless the GEP got constant-folded, + // and only in the default address space + bool PerformOverflowCheck = + !isa(GEPVal) && PtrTy->getPointerAddressSpace() == 0; - // Only check for overflows in the default address space. - if (GEPVal->getType()->getPointerAddressSpace()) + if (!(PerformNullCheck || PerformOverflowCheck)) return GEPVal; + const auto &DL = CGM.getDataLayout(); + SanitizerScope SanScope(this); + llvm::Type *IntPtrTy = DL.getIntPtrType(PtrTy); GEPOffsetAndOverflow EvaluatedGEP = EmitGEPOffsetInBytes(Ptr, GEPVal, getLLVMContext(), CGM, Builder); - auto *GEP = cast(GEPVal); - - const auto &DL = CGM.getDataLayout(); - auto *IntPtrTy = DL.getIntPtrType(GEP->getPointerOperandType()); + assert((!isa(EvaluatedGEP.TotalOffset) || + EvaluatedGEP.OffsetOverflows == Builder.getFalse()) && + "If the offset got constant-folded, we don't expect that there was an " + "overflow."); auto *Zero = llvm::ConstantInt::getNullValue(IntPtrTy); - // Common case: if the total offset is zero, don't emit a check. - if (EvaluatedGEP.TotalOffset == Zero) + // Common case: if the total offset is zero, and we are using C++ semantics, + // where nullptr+0 is defined, don't emit a check. + if (EvaluatedGEP.TotalOffset == Zero && CGM.getLangOpts().CPlusPlus) return GEPVal; // Now that we've computed the total offset, add it to the base pointer (with // wrapping semantics). - auto *IntPtr = Builder.CreatePtrToInt(GEP->getPointerOperand(), IntPtrTy); + auto *IntPtr = Builder.CreatePtrToInt(Ptr, IntPtrTy); auto *ComputedGEP = Builder.CreateAdd(IntPtr, EvaluatedGEP.TotalOffset); - llvm::SmallVector, 1> Checks; - - // The GEP is valid if: - // 1) The total offset doesn't overflow, and - // 2) The sign of the difference between the computed address and the base - // pointer matches the sign of the total offset. - llvm::Value *ValidGEP; - auto *NoOffsetOverflow = Builder.CreateNot(EvaluatedGEP.OffsetOverflows); - if (SignedIndices) { - // GEP is computed as `unsigned base + signed offset`, therefore: - // * If offset was positive, then the computed pointer can not be - // [unsigned] less than the base pointer, unless it overflowed. - // * If offset was negative, then the computed pointer can not be - // [unsigned] greater than the bas pointere, unless it overflowed. - auto *PosOrZeroValid = Builder.CreateICmpUGE(ComputedGEP, IntPtr); - auto *PosOrZeroOffset = - Builder.CreateICmpSGE(EvaluatedGEP.TotalOffset, Zero); - llvm::Value *NegValid = Builder.CreateICmpULT(ComputedGEP, IntPtr); - ValidGEP = Builder.CreateSelect(PosOrZeroOffset, PosOrZeroValid, NegValid); - } else if (!IsSubtraction) { - // GEP is computed as `unsigned base + unsigned offset`, therefore the - // computed pointer can not be [unsigned] less than base pointer, - // unless there was an overflow. - // Equivalent to `@llvm.uadd.with.overflow(%base, %offset)`. - ValidGEP = Builder.CreateICmpUGE(ComputedGEP, IntPtr); - } else { - // GEP is computed as `unsigned base - unsigned offset`, therefore the - // computed pointer can not be [unsigned] greater than base pointer, - // unless there was an overflow. - // Equivalent to `@llvm.usub.with.overflow(%base, sub(0, %offset))`. - ValidGEP = Builder.CreateICmpULE(ComputedGEP, IntPtr); - } - ValidGEP = Builder.CreateAnd(ValidGEP, NoOffsetOverflow); - Checks.emplace_back(ValidGEP, SanitizerKind::PointerOverflow); + llvm::SmallVector, 2> Checks; + + if (PerformNullCheck) { + // In C++, if the base pointer evaluates to a null pointer value, + // the only valid pointer this inbounds GEP can produce is also + // a null pointer, so the offset must also evaluate to zero. + // Likewise, if we have non-zero base pointer, we can not get null pointer + // as a result, so the offset can not be -intptr_t(BasePtr). + // In other words, both pointers are either null, or both are non-null, + // or the behaviour is undefined. + // + // C, however, is more strict in this regard, and gives more + // optimization opportunities: in C, additionally, nullptr+0 is undefined. + // So both the input to the 'gep inbounds' AND the output must not be null. + auto *BaseIsNotNullptr = Builder.CreateIsNotNull(Ptr); + auto *ResultIsNotNullptr = Builder.CreateIsNotNull(ComputedGEP); + auto *Valid = + CGM.getLangOpts().CPlusPlus + ? Builder.CreateICmpEQ(BaseIsNotNullptr, ResultIsNotNullptr) + : Builder.CreateAnd(BaseIsNotNullptr, ResultIsNotNullptr); + Checks.emplace_back(Valid, SanitizerKind::PointerOverflow); + } + + if (PerformOverflowCheck) { + // The GEP is valid if: + // 1) The total offset doesn't overflow, and + // 2) The sign of the difference between the computed address and the base + // pointer matches the sign of the total offset. + llvm::Value *ValidGEP; + auto *NoOffsetOverflow = Builder.CreateNot(EvaluatedGEP.OffsetOverflows); + if (SignedIndices) { + // GEP is computed as `unsigned base + signed offset`, therefore: + // * If offset was positive, then the computed pointer can not be + // [unsigned] less than the base pointer, unless it overflowed. + // * If offset was negative, then the computed pointer can not be + // [unsigned] greater than the bas pointere, unless it overflowed. + auto *PosOrZeroValid = Builder.CreateICmpUGE(ComputedGEP, IntPtr); + auto *PosOrZeroOffset = + Builder.CreateICmpSGE(EvaluatedGEP.TotalOffset, Zero); + llvm::Value *NegValid = Builder.CreateICmpULT(ComputedGEP, IntPtr); + ValidGEP = + Builder.CreateSelect(PosOrZeroOffset, PosOrZeroValid, NegValid); + } else if (!IsSubtraction) { + // GEP is computed as `unsigned base + unsigned offset`, therefore the + // computed pointer can not be [unsigned] less than base pointer, + // unless there was an overflow. + // Equivalent to `@llvm.uadd.with.overflow(%base, %offset)`. + ValidGEP = Builder.CreateICmpUGE(ComputedGEP, IntPtr); + } else { + // GEP is computed as `unsigned base - unsigned offset`, therefore the + // computed pointer can not be [unsigned] greater than base pointer, + // unless there was an overflow. + // Equivalent to `@llvm.usub.with.overflow(%base, sub(0, %offset))`. + ValidGEP = Builder.CreateICmpULE(ComputedGEP, IntPtr); + } + ValidGEP = Builder.CreateAnd(ValidGEP, NoOffsetOverflow); + Checks.emplace_back(ValidGEP, SanitizerKind::PointerOverflow); + } assert(!Checks.empty() && "Should have produced some checks."); diff --git a/clang/lib/CodeGen/CGLoopInfo.cpp b/clang/lib/CodeGen/CGLoopInfo.cpp index 92ed14c90c54d..5542b5b426678 100644 --- a/clang/lib/CodeGen/CGLoopInfo.cpp +++ b/clang/lib/CodeGen/CGLoopInfo.cpp @@ -270,6 +270,14 @@ LoopInfo::createLoopVectorizeMetadata(const LoopAttributes &Attrs, // Setting vectorize.width if (Attrs.VectorizeWidth > 0) { + // This implies vectorize.enable = true, but only add it when it is not + // already enabled. + if (Attrs.VectorizeEnable != LoopAttributes::Enable) + Args.push_back( + MDNode::get(Ctx, {MDString::get(Ctx, "llvm.loop.vectorize.enable"), + ConstantAsMetadata::get(ConstantInt::get( + llvm::Type::getInt1Ty(Ctx), 1))})); + Metadata *Vals[] = { MDString::get(Ctx, "llvm.loop.vectorize.width"), ConstantAsMetadata::get(ConstantInt::get(llvm::Type::getInt32Ty(Ctx), diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index d2f18a5767b18..162890e08e415 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -1264,9 +1264,10 @@ CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator, loadOffloadInfoMetadata(); } -static bool tryEmitAlias(CodeGenModule &CGM, const GlobalDecl &NewGD, - const GlobalDecl &OldGD, llvm::GlobalValue *OrigAddr, - bool IsForDefinition) { +bool CGOpenMPRuntime::tryEmitDeclareVariant(const GlobalDecl &NewGD, + const GlobalDecl &OldGD, + llvm::GlobalValue *OrigAddr, + bool IsForDefinition) { // Emit at least a definition for the aliasee if the the address of the // original function is requested. if (IsForDefinition || OrigAddr) @@ -1327,8 +1328,8 @@ void CGOpenMPRuntime::clear() { StringRef MangledName = CGM.getMangledName(Pair.second.second); llvm::GlobalValue *Addr = CGM.GetGlobalValue(MangledName); // If not able to emit alias, just emit original declaration. - (void)tryEmitAlias(CGM, Pair.second.first, Pair.second.second, Addr, - /*IsForDefinition=*/false); + (void)tryEmitDeclareVariant(Pair.second.first, Pair.second.second, Addr, + /*IsForDefinition=*/false); } } @@ -6847,6 +6848,7 @@ emitNumTeamsForTargetDirective(CodeGenFunction &CGF, case OMPD_declare_mapper: case OMPD_taskloop: case OMPD_taskloop_simd: + case OMPD_master_taskloop: case OMPD_requires: case OMPD_unknown: break; @@ -7153,6 +7155,7 @@ emitNumThreadsForTargetDirective(CodeGenFunction &CGF, case OMPD_declare_mapper: case OMPD_taskloop: case OMPD_taskloop_simd: + case OMPD_master_taskloop: case OMPD_requires: case OMPD_unknown: break; @@ -8924,6 +8927,7 @@ getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) { case OMPD_declare_mapper: case OMPD_taskloop: case OMPD_taskloop_simd: + case OMPD_master_taskloop: case OMPD_requires: case OMPD_unknown: llvm_unreachable("Unexpected directive."); @@ -9681,6 +9685,7 @@ void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, case OMPD_declare_mapper: case OMPD_taskloop: case OMPD_taskloop_simd: + case OMPD_master_taskloop: case OMPD_requires: case OMPD_unknown: llvm_unreachable("Unknown target directive for OpenMP device codegen."); @@ -10307,6 +10312,7 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall( case OMPD_declare_mapper: case OMPD_taskloop: case OMPD_taskloop_simd: + case OMPD_master_taskloop: case OMPD_target: case OMPD_target_simd: case OMPD_target_teams_distribute: @@ -11187,7 +11193,7 @@ static bool greaterCtxScore(ASTContext &Ctx, const Expr *LHS, const Expr *RHS) { return false; llvm::APSInt LHSVal = LHS->EvaluateKnownConstInt(Ctx); llvm::APSInt RHSVal = RHS->EvaluateKnownConstInt(Ctx); - return llvm::APSInt::compareValues(LHSVal, RHSVal) <= 0; + return llvm::APSInt::compareValues(LHSVal, RHSVal) >= 0; } namespace { @@ -11250,7 +11256,7 @@ static const FunctionDecl *getDeclareVariantFunction(ASTContext &Ctx, } // If the attribute matches the context, find the attribute with the highest // score. - if (SelectedAttr && (!TopMostAttr || Comparer(TopMostAttr, SelectedAttr))) + if (SelectedAttr && (!TopMostAttr || !Comparer(TopMostAttr, SelectedAttr))) TopMostAttr = SelectedAttr; } if (!TopMostAttr) @@ -11273,7 +11279,7 @@ bool CGOpenMPRuntime::emitDeclareVariant(GlobalDecl GD, bool IsForDefinition) { if (NewFD == D) return false; GlobalDecl NewGD = GD.getWithDecl(NewFD); - if (tryEmitAlias(CGM, NewGD, GD, Orig, IsForDefinition)) { + if (tryEmitDeclareVariant(NewGD, GD, Orig, IsForDefinition)) { DeferredVariantFunction.erase(D); return true; } diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h b/clang/lib/CodeGen/CGOpenMPRuntime.h index b8137a20d0013..9215bd666c9cf 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.h +++ b/clang/lib/CodeGen/CGOpenMPRuntime.h @@ -291,6 +291,17 @@ class CGOpenMPRuntime { /// default location. virtual unsigned getDefaultLocationReserved2Flags() const { return 0; } + /// Tries to emit declare variant function for \p OldGD from \p NewGD. + /// \param OrigAddr LLVM IR value for \p OldGD. + /// \param IsForDefinition true, if requested emission for the definition of + /// \p OldGD. + /// \returns true, was able to emit a definition function for \p OldGD, which + /// points to \p NewGD. + virtual bool tryEmitDeclareVariant(const GlobalDecl &NewGD, + const GlobalDecl &OldGD, + llvm::GlobalValue *OrigAddr, + bool IsForDefinition); + /// Returns default flags for the barriers depending on the directive, for /// which this barier is going to be emitted. static unsigned getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind); diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp index 9e70a5a9bcbc3..a5be9aabc5632 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp @@ -803,6 +803,7 @@ static bool hasNestedSPMDDirective(ASTContext &Ctx, case OMPD_declare_mapper: case OMPD_taskloop: case OMPD_taskloop_simd: + case OMPD_master_taskloop: case OMPD_requires: case OMPD_unknown: llvm_unreachable("Unexpected directive."); @@ -874,6 +875,7 @@ static bool supportsSPMDExecutionMode(ASTContext &Ctx, case OMPD_declare_mapper: case OMPD_taskloop: case OMPD_taskloop_simd: + case OMPD_master_taskloop: case OMPD_requires: case OMPD_unknown: break; @@ -1038,6 +1040,7 @@ static bool hasNestedLightweightDirective(ASTContext &Ctx, case OMPD_declare_mapper: case OMPD_taskloop: case OMPD_taskloop_simd: + case OMPD_master_taskloop: case OMPD_requires: case OMPD_unknown: llvm_unreachable("Unexpected directive."); @@ -1115,6 +1118,7 @@ static bool supportsLightweightRuntime(ASTContext &Ctx, case OMPD_declare_mapper: case OMPD_taskloop: case OMPD_taskloop_simd: + case OMPD_master_taskloop: case OMPD_requires: case OMPD_unknown: break; @@ -1895,6 +1899,19 @@ unsigned CGOpenMPRuntimeNVPTX::getDefaultLocationReserved2Flags() const { llvm_unreachable("Unknown flags are requested."); } +bool CGOpenMPRuntimeNVPTX::tryEmitDeclareVariant(const GlobalDecl &NewGD, + const GlobalDecl &OldGD, + llvm::GlobalValue *OrigAddr, + bool IsForDefinition) { + // Emit the function in OldGD with the body from NewGD, if NewGD is defined. + auto *NewFD = cast(NewGD.getDecl()); + if (NewFD->isDefined()) { + CGM.emitOpenMPDeviceFunctionRedefinition(OldGD, NewGD, OrigAddr); + return true; + } + return false; +} + CGOpenMPRuntimeNVPTX::CGOpenMPRuntimeNVPTX(CodeGenModule &CGM) : CGOpenMPRuntime(CGM, "_", "$") { if (!CGM.getLangOpts().OpenMPIsDevice) diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h index e7fd458e72713..0f78627c95e63 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h +++ b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h @@ -193,6 +193,18 @@ class CGOpenMPRuntimeNVPTX : public CGOpenMPRuntime { /// Full/Lightweight runtime mode. Used for better optimization. unsigned getDefaultLocationReserved2Flags() const override; + /// Tries to emit declare variant function for \p OldGD from \p NewGD. + /// \param OrigAddr LLVM IR value for \p OldGD. + /// \param IsForDefinition true, if requested emission for the definition of + /// \p OldGD. + /// \returns true, was able to emit a definition function for \p OldGD, which + /// points to \p NewGD. + /// NVPTX backend does not support global aliases, so just use the function, + /// emitted for \p NewGD instead of \p OldGD. + bool tryEmitDeclareVariant(const GlobalDecl &NewGD, const GlobalDecl &OldGD, + llvm::GlobalValue *OrigAddr, + bool IsForDefinition) override; + public: explicit CGOpenMPRuntimeNVPTX(CodeGenModule &CGM); void clear() override; diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp index d9df1ba992344..1da092f787405 100644 --- a/clang/lib/CodeGen/CGStmt.cpp +++ b/clang/lib/CodeGen/CGStmt.cpp @@ -281,6 +281,9 @@ void CodeGenFunction::EmitStmt(const Stmt *S, ArrayRef Attrs) { case Stmt::OMPTaskLoopSimdDirectiveClass: EmitOMPTaskLoopSimdDirective(cast(*S)); break; + case Stmt::OMPMasterTaskLoopDirectiveClass: + EmitOMPMasterTaskLoopDirective(cast(*S)); + break; case Stmt::OMPDistributeDirectiveClass: EmitOMPDistributeDirective(cast(*S)); break; diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index 0c264bb371c43..e50cdb1b678f8 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -5109,6 +5109,16 @@ void CodeGenFunction::EmitOMPTaskLoopSimdDirective( EmitOMPTaskLoopBasedDirective(S); } +void CodeGenFunction::EmitOMPMasterTaskLoopDirective( + const OMPMasterTaskLoopDirective &S) { + auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) { + Action.Enter(CGF); + EmitOMPTaskLoopBasedDirective(S); + }; + OMPLexicalScope Scope(*this, S, llvm::None, /*EmitPreInitStmt=*/false); + CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getBeginLoc()); +} + // Generate the instructions for '#pragma omp target update' directive. void CodeGenFunction::EmitOMPTargetUpdateDirective( const OMPTargetUpdateDirective &S) { diff --git a/clang/lib/CodeGen/CGVTables.cpp b/clang/lib/CodeGen/CGVTables.cpp index a74905fd70fd4..f9f25e7e57adc 100644 --- a/clang/lib/CodeGen/CGVTables.cpp +++ b/clang/lib/CodeGen/CGVTables.cpp @@ -808,7 +808,7 @@ CodeGenVTables::GenerateConstructionVTable(const CXXRecordDecl *RD, assert(!VTable->isDeclaration() && "Shouldn't set properties on declaration"); CGM.setGVProperties(VTable, RD); - CGM.EmitVTableTypeMetadata(VTable, *VTLayout.get()); + CGM.EmitVTableTypeMetadata(RD, VTable, *VTLayout.get()); return VTable; } @@ -1039,7 +1039,32 @@ bool CodeGenModule::HasHiddenLTOVisibility(const CXXRecordDecl *RD) { return true; } -void CodeGenModule::EmitVTableTypeMetadata(llvm::GlobalVariable *VTable, +llvm::GlobalObject::VCallVisibility +CodeGenModule::GetVCallVisibilityLevel(const CXXRecordDecl *RD) { + LinkageInfo LV = RD->getLinkageAndVisibility(); + llvm::GlobalObject::VCallVisibility TypeVis; + if (!isExternallyVisible(LV.getLinkage())) + TypeVis = llvm::GlobalObject::VCallVisibilityTranslationUnit; + else if (HasHiddenLTOVisibility(RD)) + TypeVis = llvm::GlobalObject::VCallVisibilityLinkageUnit; + else + TypeVis = llvm::GlobalObject::VCallVisibilityPublic; + + for (auto B : RD->bases()) + if (B.getType()->getAsCXXRecordDecl()->isDynamicClass()) + TypeVis = std::min(TypeVis, + GetVCallVisibilityLevel(B.getType()->getAsCXXRecordDecl())); + + for (auto B : RD->vbases()) + if (B.getType()->getAsCXXRecordDecl()->isDynamicClass()) + TypeVis = std::min(TypeVis, + GetVCallVisibilityLevel(B.getType()->getAsCXXRecordDecl())); + + return TypeVis; +} + +void CodeGenModule::EmitVTableTypeMetadata(const CXXRecordDecl *RD, + llvm::GlobalVariable *VTable, const VTableLayout &VTLayout) { if (!getCodeGenOpts().LTOUnit) return; @@ -1099,4 +1124,10 @@ void CodeGenModule::EmitVTableTypeMetadata(llvm::GlobalVariable *VTable, VTable->addTypeMetadata((PointerWidth * I).getQuantity(), MD); } } + + if (getCodeGenOpts().VirtualFunctionElimination) { + llvm::GlobalObject::VCallVisibility TypeVis = GetVCallVisibilityLevel(RD); + if (TypeVis != llvm::GlobalObject::VCallVisibilityPublic) + VTable->addVCallVisibilityMetadata(TypeVis); + } } diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index 18b2bb0453c98..4a1c5416ab26d 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -3160,6 +3160,7 @@ class CodeGenFunction : public CodeGenTypeCache { void EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S); void EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S); void EmitOMPTaskLoopSimdDirective(const OMPTaskLoopSimdDirective &S); + void EmitOMPMasterTaskLoopDirective(const OMPMasterTaskLoopDirective &S); void EmitOMPDistributeDirective(const OMPDistributeDirective &S); void EmitOMPDistributeParallelForDirective( const OMPDistributeParallelForDirective &S); diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 8e8b3854f790d..a7dd9e5c2613a 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -2782,6 +2782,50 @@ void CodeGenModule::EmitMultiVersionFunctionDefinition(GlobalDecl GD, EmitGlobalFunctionDefinition(GD, GV); } +void CodeGenModule::emitOpenMPDeviceFunctionRedefinition( + GlobalDecl OldGD, GlobalDecl NewGD, llvm::GlobalValue *GV) { + assert(getLangOpts().OpenMP && getLangOpts().OpenMPIsDevice && + OpenMPRuntime && "Expected OpenMP device mode."); + const auto *D = cast(OldGD.getDecl()); + + // Compute the function info and LLVM type. + const CGFunctionInfo &FI = getTypes().arrangeGlobalDeclaration(OldGD); + llvm::FunctionType *Ty = getTypes().GetFunctionType(FI); + + // Get or create the prototype for the function. + if (!GV || (GV->getType()->getElementType() != Ty)) { + GV = cast(GetOrCreateLLVMFunction( + getMangledName(OldGD), Ty, GlobalDecl(), /*ForVTable=*/false, + /*DontDefer=*/true, /*IsThunk=*/false, llvm::AttributeList(), + ForDefinition)); + SetFunctionAttributes(OldGD, cast(GV), + /*IsIncompleteFunction=*/false, + /*IsThunk=*/false); + } + // We need to set linkage and visibility on the function before + // generating code for it because various parts of IR generation + // want to propagate this information down (e.g. to local static + // declarations). + auto *Fn = cast(GV); + setFunctionLinkage(OldGD, Fn); + + // FIXME: this is redundant with part of + // setFunctionDefinitionAttributes + setGVProperties(Fn, OldGD); + + MaybeHandleStaticInExternC(D, Fn); + + maybeSetTrivialComdat(*D, *Fn); + + CodeGenFunction(*this).GenerateCode(NewGD, Fn, FI); + + setNonAliasAttributes(OldGD, Fn); + SetLLVMFunctionAttributesForDefinition(D, Fn); + + if (D->hasAttr()) + AddGlobalAnnotations(D, Fn); +} + void CodeGenModule::EmitGlobalDefinition(GlobalDecl GD, llvm::GlobalValue *GV) { const auto *D = cast(GD.getDecl()); diff --git a/clang/lib/CodeGen/CodeGenModule.h b/clang/lib/CodeGen/CodeGenModule.h index f2ff21f820d0f..f7f52ad17a506 100644 --- a/clang/lib/CodeGen/CodeGenModule.h +++ b/clang/lib/CodeGen/CodeGenModule.h @@ -1283,13 +1283,26 @@ class CodeGenModule : public CodeGenTypeCache { /// \param D Requires declaration void EmitOMPRequiresDecl(const OMPRequiresDecl *D); + /// Emits the definition of \p OldGD function with body from \p NewGD. + /// Required for proper handling of declare variant directive on the GPU. + void emitOpenMPDeviceFunctionRedefinition(GlobalDecl OldGD, GlobalDecl NewGD, + llvm::GlobalValue *GV); + /// Returns whether the given record has hidden LTO visibility and therefore /// may participate in (single-module) CFI and whole-program vtable /// optimization. bool HasHiddenLTOVisibility(const CXXRecordDecl *RD); + /// Returns the vcall visibility of the given type. This is the scope in which + /// a virtual function call could be made which ends up being dispatched to a + /// member function of this class. This scope can be wider than the visibility + /// of the class itself when the class has a more-visible dynamic base class. + llvm::GlobalObject::VCallVisibility + GetVCallVisibilityLevel(const CXXRecordDecl *RD); + /// Emit type metadata for the given vtable using the given layout. - void EmitVTableTypeMetadata(llvm::GlobalVariable *VTable, + void EmitVTableTypeMetadata(const CXXRecordDecl *RD, + llvm::GlobalVariable *VTable, const VTableLayout &VTLayout); /// Generate a cross-DSO type identifier for MD. diff --git a/clang/lib/CodeGen/CoverageMappingGen.cpp b/clang/lib/CodeGen/CoverageMappingGen.cpp index 0a7a4fe33ac2d..a6f6e38d5f148 100644 --- a/clang/lib/CodeGen/CoverageMappingGen.cpp +++ b/clang/lib/CodeGen/CoverageMappingGen.cpp @@ -1278,13 +1278,6 @@ std::string getCoverageSection(const CodeGenModule &CGM) { CGM.getContext().getTargetInfo().getTriple().getObjectFormat()); } -std::string normalizeFilename(StringRef Filename) { - llvm::SmallString<256> Path(Filename); - llvm::sys::fs::make_absolute(Path); - llvm::sys::path::remove_dots(Path, /*remove_dot_dot=*/true); - return Path.str().str(); -} - } // end anonymous namespace static void dump(llvm::raw_ostream &OS, StringRef FunctionName, @@ -1317,6 +1310,24 @@ static void dump(llvm::raw_ostream &OS, StringRef FunctionName, } } +CoverageMappingModuleGen::CoverageMappingModuleGen( + CodeGenModule &CGM, CoverageSourceInfo &SourceInfo) + : CGM(CGM), SourceInfo(SourceInfo), FunctionRecordTy(nullptr) { + // Honor -fdebug-compilation-dir in paths in coverage data. Otherwise, use the + // regular working directory when normalizing paths. + if (!CGM.getCodeGenOpts().DebugCompilationDir.empty()) + CWD = CGM.getCodeGenOpts().DebugCompilationDir; + else + llvm::sys::fs::current_path(CWD); +} + +std::string CoverageMappingModuleGen::normalizeFilename(StringRef Filename) { + llvm::SmallString<256> Path(Filename); + llvm::sys::fs::make_absolute(CWD, Path); + llvm::sys::path::remove_dots(Path, /*remove_dot_dot=*/true); + return Path.str().str(); +} + void CoverageMappingModuleGen::addFunctionMappingRecord( llvm::GlobalVariable *NamePtr, StringRef NameValue, uint64_t FuncHash, const std::string &CoverageMapping, bool IsUsed) { diff --git a/clang/lib/CodeGen/CoverageMappingGen.h b/clang/lib/CodeGen/CoverageMappingGen.h index 3bf51f590479f..2bdc00e256689 100644 --- a/clang/lib/CodeGen/CoverageMappingGen.h +++ b/clang/lib/CodeGen/CoverageMappingGen.h @@ -54,10 +54,14 @@ class CoverageMappingModuleGen { std::vector FunctionNames; llvm::StructType *FunctionRecordTy; std::vector CoverageMappings; + SmallString<256> CWD; + + /// Make the filename absolute, remove dots, and normalize slashes to local + /// path style. + std::string normalizeFilename(StringRef Filename); public: - CoverageMappingModuleGen(CodeGenModule &CGM, CoverageSourceInfo &SourceInfo) - : CGM(CGM), SourceInfo(SourceInfo), FunctionRecordTy(nullptr) {} + CoverageMappingModuleGen(CodeGenModule &CGM, CoverageSourceInfo &SourceInfo); CoverageSourceInfo &getSourceInfo() const { return SourceInfo; diff --git a/clang/lib/CodeGen/ItaniumCXXABI.cpp b/clang/lib/CodeGen/ItaniumCXXABI.cpp index 2945dec0bd7dd..8f9b16470b642 100644 --- a/clang/lib/CodeGen/ItaniumCXXABI.cpp +++ b/clang/lib/CodeGen/ItaniumCXXABI.cpp @@ -644,8 +644,6 @@ CGCallee ItaniumCXXABI::EmitLoadOfMemberFunctionPointer( VTableOffset = Builder.CreateTrunc(VTableOffset, CGF.Int32Ty); VTableOffset = Builder.CreateZExt(VTableOffset, CGM.PtrDiffTy); } - // Compute the address of the virtual function pointer. - llvm::Value *VFPAddr = Builder.CreateGEP(VTable, VTableOffset); // Check the address of the function pointer if CFI on member function // pointers is enabled. @@ -653,44 +651,81 @@ CGCallee ItaniumCXXABI::EmitLoadOfMemberFunctionPointer( llvm::Constant *CheckTypeDesc; bool ShouldEmitCFICheck = CGF.SanOpts.has(SanitizerKind::CFIMFCall) && CGM.HasHiddenLTOVisibility(RD); - if (ShouldEmitCFICheck) { - CodeGenFunction::SanitizerScope SanScope(&CGF); - - CheckSourceLocation = CGF.EmitCheckSourceLocation(E->getBeginLoc()); - CheckTypeDesc = CGF.EmitCheckTypeDescriptor(QualType(MPT, 0)); - llvm::Constant *StaticData[] = { - llvm::ConstantInt::get(CGF.Int8Ty, CodeGenFunction::CFITCK_VMFCall), - CheckSourceLocation, - CheckTypeDesc, - }; - - llvm::Metadata *MD = - CGM.CreateMetadataIdentifierForVirtualMemPtrType(QualType(MPT, 0)); - llvm::Value *TypeId = llvm::MetadataAsValue::get(CGF.getLLVMContext(), MD); + bool ShouldEmitVFEInfo = CGM.getCodeGenOpts().VirtualFunctionElimination && + CGM.HasHiddenLTOVisibility(RD); + llvm::Value *VirtualFn = nullptr; - llvm::Value *TypeTest = Builder.CreateCall( - CGM.getIntrinsic(llvm::Intrinsic::type_test), {VFPAddr, TypeId}); + { + CodeGenFunction::SanitizerScope SanScope(&CGF); + llvm::Value *TypeId = nullptr; + llvm::Value *CheckResult = nullptr; + + if (ShouldEmitCFICheck || ShouldEmitVFEInfo) { + // If doing CFI or VFE, we will need the metadata node to check against. + llvm::Metadata *MD = + CGM.CreateMetadataIdentifierForVirtualMemPtrType(QualType(MPT, 0)); + TypeId = llvm::MetadataAsValue::get(CGF.getLLVMContext(), MD); + } - if (CGM.getCodeGenOpts().SanitizeTrap.has(SanitizerKind::CFIMFCall)) { - CGF.EmitTrapCheck(TypeTest); + llvm::Value *VFPAddr = Builder.CreateGEP(VTable, VTableOffset); + + if (ShouldEmitVFEInfo) { + // If doing VFE, load from the vtable with a type.checked.load intrinsic + // call. Note that we use the GEP to calculate the address to load from + // and pass 0 as the offset to the intrinsic. This is because every + // vtable slot of the correct type is marked with matching metadata, and + // we know that the load must be from one of these slots. + llvm::Value *CheckedLoad = Builder.CreateCall( + CGM.getIntrinsic(llvm::Intrinsic::type_checked_load), + {VFPAddr, llvm::ConstantInt::get(CGM.Int32Ty, 0), TypeId}); + CheckResult = Builder.CreateExtractValue(CheckedLoad, 1); + VirtualFn = Builder.CreateExtractValue(CheckedLoad, 0); + VirtualFn = Builder.CreateBitCast(VirtualFn, FTy->getPointerTo(), + "memptr.virtualfn"); } else { - llvm::Value *AllVtables = llvm::MetadataAsValue::get( - CGM.getLLVMContext(), - llvm::MDString::get(CGM.getLLVMContext(), "all-vtables")); - llvm::Value *ValidVtable = Builder.CreateCall( - CGM.getIntrinsic(llvm::Intrinsic::type_test), {VTable, AllVtables}); - CGF.EmitCheck(std::make_pair(TypeTest, SanitizerKind::CFIMFCall), - SanitizerHandler::CFICheckFail, StaticData, - {VTable, ValidVtable}); + // When not doing VFE, emit a normal load, as it allows more + // optimisations than type.checked.load. + if (ShouldEmitCFICheck) { + CheckResult = Builder.CreateCall( + CGM.getIntrinsic(llvm::Intrinsic::type_test), + {Builder.CreateBitCast(VFPAddr, CGF.Int8PtrTy), TypeId}); + } + VFPAddr = + Builder.CreateBitCast(VFPAddr, FTy->getPointerTo()->getPointerTo()); + VirtualFn = Builder.CreateAlignedLoad(VFPAddr, CGF.getPointerAlign(), + "memptr.virtualfn"); } + assert(VirtualFn && "Virtual fuction pointer not created!"); + assert((!ShouldEmitCFICheck || !ShouldEmitVFEInfo || CheckResult) && + "Check result required but not created!"); + + if (ShouldEmitCFICheck) { + // If doing CFI, emit the check. + CheckSourceLocation = CGF.EmitCheckSourceLocation(E->getBeginLoc()); + CheckTypeDesc = CGF.EmitCheckTypeDescriptor(QualType(MPT, 0)); + llvm::Constant *StaticData[] = { + llvm::ConstantInt::get(CGF.Int8Ty, CodeGenFunction::CFITCK_VMFCall), + CheckSourceLocation, + CheckTypeDesc, + }; - FnVirtual = Builder.GetInsertBlock(); - } + if (CGM.getCodeGenOpts().SanitizeTrap.has(SanitizerKind::CFIMFCall)) { + CGF.EmitTrapCheck(CheckResult); + } else { + llvm::Value *AllVtables = llvm::MetadataAsValue::get( + CGM.getLLVMContext(), + llvm::MDString::get(CGM.getLLVMContext(), "all-vtables")); + llvm::Value *ValidVtable = Builder.CreateCall( + CGM.getIntrinsic(llvm::Intrinsic::type_test), {VTable, AllVtables}); + CGF.EmitCheck(std::make_pair(CheckResult, SanitizerKind::CFIMFCall), + SanitizerHandler::CFICheckFail, StaticData, + {VTable, ValidVtable}); + } + + FnVirtual = Builder.GetInsertBlock(); + } + } // End of sanitizer scope - // Load the virtual function to call. - VFPAddr = Builder.CreateBitCast(VFPAddr, FTy->getPointerTo()->getPointerTo()); - llvm::Value *VirtualFn = Builder.CreateAlignedLoad( - VFPAddr, CGF.getPointerAlign(), "memptr.virtualfn"); CGF.EmitBranch(FnEnd); // In the non-virtual path, the function pointer is actually a @@ -1634,7 +1669,7 @@ void ItaniumCXXABI::emitVTableDefinitions(CodeGenVTables &CGVT, EmitFundamentalRTTIDescriptors(RD); if (!VTable->isDeclarationForLinker()) - CGM.EmitVTableTypeMetadata(VTable, VTLayout); + CGM.EmitVTableTypeMetadata(RD, VTable, VTLayout); } bool ItaniumCXXABI::isVirtualOffsetNeededForVTableField( diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 1f1e72ad48167..000e8a06abce6 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -4961,13 +4961,36 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back("-fuse-line-directives"); // -fms-compatibility=0 is default. - if (Args.hasFlag(options::OPT_fms_compatibility, - options::OPT_fno_ms_compatibility, - (IsWindowsMSVC && - Args.hasFlag(options::OPT_fms_extensions, - options::OPT_fno_ms_extensions, true)))) + bool IsMSVCCompat = Args.hasFlag( + options::OPT_fms_compatibility, options::OPT_fno_ms_compatibility, + (IsWindowsMSVC && Args.hasFlag(options::OPT_fms_extensions, + options::OPT_fno_ms_extensions, true))); + if (IsMSVCCompat) CmdArgs.push_back("-fms-compatibility"); + // Handle -fgcc-version, if present. + VersionTuple GNUCVer; + if (Arg *A = Args.getLastArg(options::OPT_fgnuc_version_EQ)) { + // Check that the version has 1 to 3 components and the minor and patch + // versions fit in two decimal digits. + StringRef Val = A->getValue(); + Val = Val.empty() ? "0" : Val; // Treat "" as 0 or disable. + bool Invalid = GNUCVer.tryParse(Val); + unsigned Minor = GNUCVer.getMinor().getValueOr(0); + unsigned Patch = GNUCVer.getSubminor().getValueOr(0); + if (Invalid || GNUCVer.getBuild() || Minor >= 100 || Patch >= 100) { + D.Diag(diag::err_drv_invalid_value) + << A->getAsString(Args) << A->getValue(); + } + } else if (!IsMSVCCompat) { + // Imitate GCC 4.2.1 by default if -fms-compatibility is not in effect. + GNUCVer = VersionTuple(4, 2, 1); + } + if (!GNUCVer.empty()) { + CmdArgs.push_back( + Args.MakeArgString("-fgnuc-version=" + GNUCVer.getAsString())); + } + VersionTuple MSVT = TC.computeMSVCVersion(&D, Args); if (!MSVT.empty()) CmdArgs.push_back( @@ -5564,9 +5587,30 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back(Args.MakeArgString(TargetInfo.str())); } - bool WholeProgramVTables = - Args.hasFlag(options::OPT_fwhole_program_vtables, - options::OPT_fno_whole_program_vtables, false); + bool VirtualFunctionElimination = + Args.hasFlag(options::OPT_fvirtual_function_elimination, + options::OPT_fno_virtual_function_elimination, false); + if (VirtualFunctionElimination) { + // VFE requires full LTO (currently, this might be relaxed to allow ThinLTO + // in the future). + if (D.getLTOMode() != LTOK_Full) + D.Diag(diag::err_drv_argument_only_allowed_with) + << "-fvirtual-function-elimination" + << "-flto=full"; + + CmdArgs.push_back("-fvirtual-function-elimination"); + } + + // VFE requires whole-program-vtables, and enables it by default. + bool WholeProgramVTables = Args.hasFlag( + options::OPT_fwhole_program_vtables, + options::OPT_fno_whole_program_vtables, VirtualFunctionElimination); + if (VirtualFunctionElimination && !WholeProgramVTables) { + D.Diag(diag::err_drv_argument_not_allowed_with) + << "-fno-whole-program-vtables" + << "-fvirtual-function-elimination"; + } + if (WholeProgramVTables) { if (!D.isUsingLTO()) D.Diag(diag::err_drv_argument_only_allowed_with) diff --git a/clang/lib/Driver/ToolChains/MSVC.cpp b/clang/lib/Driver/ToolChains/MSVC.cpp index cccbd5b506757..6521a628ae3af 100644 --- a/clang/lib/Driver/ToolChains/MSVC.cpp +++ b/clang/lib/Driver/ToolChains/MSVC.cpp @@ -371,6 +371,11 @@ void visualstudio::Linker::ConstructJob(Compilation &C, const JobAction &JA, TC.getSubDirectoryPath( toolchains::MSVCToolChain::SubDirectoryType::Lib))); + CmdArgs.push_back(Args.MakeArgString( + Twine("-libpath:") + + TC.getSubDirectoryPath(toolchains::MSVCToolChain::SubDirectoryType::Lib, + "atlmfc"))); + if (TC.useUniversalCRT()) { std::string UniversalCRTLibPath; if (TC.getUniversalCRTLibraryPath(UniversalCRTLibPath)) @@ -595,7 +600,7 @@ void visualstudio::Linker::ConstructJob(Compilation &C, const JobAction &JA, EnvVar.substr(0, PrefixLen) + TC.getSubDirectoryPath(SubDirectoryType::Bin) + llvm::Twine(llvm::sys::EnvPathSeparator) + - TC.getSubDirectoryPath(SubDirectoryType::Bin, HostArch) + + TC.getSubDirectoryPath(SubDirectoryType::Bin, "", HostArch) + (EnvVar.size() > PrefixLen ? llvm::Twine(llvm::sys::EnvPathSeparator) + EnvVar.substr(PrefixLen) @@ -871,6 +876,7 @@ static const char *llvmArchToDevDivInternalArch(llvm::Triple::ArchType Arch) { // of hardcoding paths. std::string MSVCToolChain::getSubDirectoryPath(SubDirectoryType Type, + llvm::StringRef SubdirParent, llvm::Triple::ArchType TargetArch) const { const char *SubdirName; const char *IncludeName; @@ -890,6 +896,9 @@ MSVCToolChain::getSubDirectoryPath(SubDirectoryType Type, } llvm::SmallString<256> Path(VCToolChainPath); + if (!SubdirParent.empty()) + llvm::sys::path::append(Path, SubdirParent); + switch (Type) { case SubDirectoryType::Bin: if (VSLayout == ToolsetLayout::VS2017OrNewer) { @@ -1275,6 +1284,8 @@ void MSVCToolChain::AddClangSystemIncludeArgs(const ArgList &DriverArgs, if (!VCToolChainPath.empty()) { addSystemInclude(DriverArgs, CC1Args, getSubDirectoryPath(SubDirectoryType::Include)); + addSystemInclude(DriverArgs, CC1Args, + getSubDirectoryPath(SubDirectoryType::Include, "atlmfc")); if (useUniversalCRT()) { std::string UniversalCRTSdkPath; diff --git a/clang/lib/Driver/ToolChains/MSVC.h b/clang/lib/Driver/ToolChains/MSVC.h index 436547bf4f218..804394c6e2abd 100644 --- a/clang/lib/Driver/ToolChains/MSVC.h +++ b/clang/lib/Driver/ToolChains/MSVC.h @@ -104,12 +104,14 @@ class LLVM_LIBRARY_VISIBILITY MSVCToolChain : public ToolChain { Lib, }; std::string getSubDirectoryPath(SubDirectoryType Type, + llvm::StringRef SubdirParent, llvm::Triple::ArchType TargetArch) const; // Convenience overload. // Uses the current target arch. - std::string getSubDirectoryPath(SubDirectoryType Type) const { - return getSubDirectoryPath(Type, getArch()); + std::string getSubDirectoryPath(SubDirectoryType Type, + llvm::StringRef SubdirParent = "") const { + return getSubDirectoryPath(Type, SubdirParent, getArch()); } enum class ToolsetLayout { diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp index 88f7468122930..9823a5af9c1b3 100644 --- a/clang/lib/Format/TokenAnnotator.cpp +++ b/clang/lib/Format/TokenAnnotator.cpp @@ -845,7 +845,6 @@ class AnnotatingParser { break; case tok::kw_if: case tok::kw_while: - assert(!Line.startsWith(tok::hash)); if (Tok->is(tok::kw_if) && CurrentToken && CurrentToken->isOneOf(tok::kw_constexpr, tok::identifier)) next(); diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index 45342c58701d3..5463cd027a121 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -759,6 +759,8 @@ static bool ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, InputKind IK, Opts.CodeViewGHash = Args.hasArg(OPT_gcodeview_ghash); Opts.MacroDebugInfo = Args.hasArg(OPT_debug_info_macro); Opts.WholeProgramVTables = Args.hasArg(OPT_fwhole_program_vtables); + Opts.VirtualFunctionElimination = + Args.hasArg(OPT_fvirtual_function_elimination); Opts.LTOVisibilityPublicStd = Args.hasArg(OPT_flto_visibility_public_std); Opts.SplitDwarfFile = Args.getLastArgValue(OPT_split_dwarf_file); Opts.SplitDwarfOutput = Args.getLastArgValue(OPT_split_dwarf_output); @@ -2252,6 +2254,7 @@ void CompilerInvocation::setLangDefaults(LangOptions &Opts, InputKind IK, Opts.Digraphs = Std.hasDigraphs(); Opts.GNUMode = Std.isGNUMode(); Opts.GNUInline = !Opts.C99 && !Opts.CPlusPlus; + Opts.GNUCVersion = 0; Opts.HexFloats = Std.hasHexFloats(); Opts.ImplicitInt = Std.hasImplicitInt(); @@ -2596,6 +2599,21 @@ static void ParseLangArgs(LangOptions &Opts, ArgList &Args, InputKind IK, (Opts.ObjCRuntime.getKind() == ObjCRuntime::FragileMacOSX); } + if (Arg *A = Args.getLastArg(options::OPT_fgnuc_version_EQ)) { + // Check that the version has 1 to 3 components and the minor and patch + // versions fit in two decimal digits. + VersionTuple GNUCVer; + bool Invalid = GNUCVer.tryParse(A->getValue()); + unsigned Major = GNUCVer.getMajor(); + unsigned Minor = GNUCVer.getMinor().getValueOr(0); + unsigned Patch = GNUCVer.getSubminor().getValueOr(0); + if (Invalid || GNUCVer.getBuild() || Minor >= 100 || Patch >= 100) { + Diags.Report(diag::err_drv_invalid_value) + << A->getAsString(Args) << A->getValue(); + } + Opts.GNUCVersion = Major * 100 * 100 + Minor * 100 + Patch; + } + if (Args.hasArg(OPT_fgnu89_inline)) { if (Opts.CPlusPlus) Diags.Report(diag::err_drv_argument_not_allowed_with) diff --git a/clang/lib/Frontend/InitPreprocessor.cpp b/clang/lib/Frontend/InitPreprocessor.cpp index 3fa2cd4841421..02268fea57c18 100644 --- a/clang/lib/Frontend/InitPreprocessor.cpp +++ b/clang/lib/Frontend/InitPreprocessor.cpp @@ -586,13 +586,22 @@ static void InitializePredefinedMacros(const TargetInfo &TI, Builder.defineMacro("__clang_version__", "\"" CLANG_VERSION_STRING " " + getClangFullRepositoryVersion() + "\""); - if (!LangOpts.MSVCCompat) { - // Currently claim to be compatible with GCC 4.2.1-5621, but only if we're - // not compiling for MSVC compatibility - Builder.defineMacro("__GNUC_MINOR__", "2"); - Builder.defineMacro("__GNUC_PATCHLEVEL__", "1"); - Builder.defineMacro("__GNUC__", "4"); + + if (LangOpts.GNUCVersion != 0) { + // Major, minor, patch, are given two decimal places each, so 4.2.1 becomes + // 40201. + unsigned GNUCMajor = LangOpts.GNUCVersion / 100 / 100; + unsigned GNUCMinor = LangOpts.GNUCVersion / 100 % 100; + unsigned GNUCPatch = LangOpts.GNUCVersion % 100; + Builder.defineMacro("__GNUC__", Twine(GNUCMajor)); + Builder.defineMacro("__GNUC_MINOR__", Twine(GNUCMinor)); + Builder.defineMacro("__GNUC_PATCHLEVEL__", Twine(GNUCPatch)); Builder.defineMacro("__GXX_ABI_VERSION", "1002"); + + if (LangOpts.CPlusPlus) { + Builder.defineMacro("__GNUG__", Twine(GNUCMajor)); + Builder.defineMacro("__GXX_WEAK__"); + } } // Define macros for the C11 / C++11 memory orderings @@ -631,7 +640,7 @@ static void InitializePredefinedMacros(const TargetInfo &TI, if (!LangOpts.GNUMode && !LangOpts.MSVCCompat) Builder.defineMacro("__STRICT_ANSI__"); - if (!LangOpts.MSVCCompat && LangOpts.CPlusPlus11) + if (LangOpts.GNUCVersion && LangOpts.CPlusPlus11) Builder.defineMacro("__GXX_EXPERIMENTAL_CXX0X__"); if (LangOpts.ObjC) { @@ -711,7 +720,7 @@ static void InitializePredefinedMacros(const TargetInfo &TI, if (!LangOpts.MSVCCompat && LangOpts.Exceptions) Builder.defineMacro("__EXCEPTIONS"); - if (!LangOpts.MSVCCompat && LangOpts.RTTI) + if (LangOpts.GNUCVersion && LangOpts.RTTI) Builder.defineMacro("__GXX_RTTI"); if (LangOpts.SjLjExceptions) @@ -725,11 +734,8 @@ static void InitializePredefinedMacros(const TargetInfo &TI, if (LangOpts.Deprecated) Builder.defineMacro("__DEPRECATED"); - if (!LangOpts.MSVCCompat && LangOpts.CPlusPlus) { - Builder.defineMacro("__GNUG__", "4"); - Builder.defineMacro("__GXX_WEAK__"); + if (!LangOpts.MSVCCompat && LangOpts.CPlusPlus) Builder.defineMacro("__private_extern__", "extern"); - } if (LangOpts.MicrosoftExt) { if (LangOpts.WChar) { @@ -939,7 +945,7 @@ static void InitializePredefinedMacros(const TargetInfo &TI, else Builder.defineMacro("__FINITE_MATH_ONLY__", "0"); - if (!LangOpts.MSVCCompat) { + if (LangOpts.GNUCVersion) { if (LangOpts.GNUInline || LangOpts.CPlusPlus) Builder.defineMacro("__GNUC_GNU_INLINE__"); else @@ -976,7 +982,7 @@ static void InitializePredefinedMacros(const TargetInfo &TI, #undef DEFINE_LOCK_FREE_MACRO }; addLockFreeMacros("__CLANG_ATOMIC_"); - if (!LangOpts.MSVCCompat) + if (LangOpts.GNUCVersion) addLockFreeMacros("__GCC_ATOMIC_"); if (LangOpts.NoInlineDefine) diff --git a/clang/lib/Headers/bmiintrin.h b/clang/lib/Headers/bmiintrin.h index b7af62f609aec..841bd84070e89 100644 --- a/clang/lib/Headers/bmiintrin.h +++ b/clang/lib/Headers/bmiintrin.h @@ -14,27 +14,13 @@ #ifndef __BMIINTRIN_H #define __BMIINTRIN_H -#define _tzcnt_u16(a) (__tzcnt_u16((a))) - -#define _andn_u32(a, b) (__andn_u32((a), (b))) - -/* _bextr_u32 != __bextr_u32 */ -#define _blsi_u32(a) (__blsi_u32((a))) - -#define _blsmsk_u32(a) (__blsmsk_u32((a))) - -#define _blsr_u32(a) (__blsr_u32((a))) - -#define _tzcnt_u32(a) (__tzcnt_u32((a))) - -/* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("bmi"))) - /* Allow using the tzcnt intrinsics even for non-BMI targets. Since the TZCNT instruction behaves as BSF on non-BMI targets, there is code that expects to use it as a potentially faster version of BSF. */ #define __RELAXED_FN_ATTRS __attribute__((__always_inline__, __nodebug__)) +#define _tzcnt_u16(a) (__tzcnt_u16((a))) + /// Counts the number of trailing zero bits in the operand. /// /// \headerfile @@ -51,6 +37,94 @@ __tzcnt_u16(unsigned short __X) return __builtin_ia32_tzcnt_u16(__X); } +/// Counts the number of trailing zero bits in the operand. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the TZCNT instruction. +/// +/// \param __X +/// An unsigned 32-bit integer whose trailing zeros are to be counted. +/// \returns An unsigned 32-bit integer containing the number of trailing zero +/// bits in the operand. +static __inline__ unsigned int __RELAXED_FN_ATTRS +__tzcnt_u32(unsigned int __X) +{ + return __builtin_ia32_tzcnt_u32(__X); +} + +/// Counts the number of trailing zero bits in the operand. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the TZCNT instruction. +/// +/// \param __X +/// An unsigned 32-bit integer whose trailing zeros are to be counted. +/// \returns An 32-bit integer containing the number of trailing zero bits in +/// the operand. +static __inline__ int __RELAXED_FN_ATTRS +_mm_tzcnt_32(unsigned int __X) +{ + return __builtin_ia32_tzcnt_u32(__X); +} + +#define _tzcnt_u32(a) (__tzcnt_u32((a))) + +#ifdef __x86_64__ + +/// Counts the number of trailing zero bits in the operand. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the TZCNT instruction. +/// +/// \param __X +/// An unsigned 64-bit integer whose trailing zeros are to be counted. +/// \returns An unsigned 64-bit integer containing the number of trailing zero +/// bits in the operand. +static __inline__ unsigned long long __RELAXED_FN_ATTRS +__tzcnt_u64(unsigned long long __X) +{ + return __builtin_ia32_tzcnt_u64(__X); +} + +/// Counts the number of trailing zero bits in the operand. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the TZCNT instruction. +/// +/// \param __X +/// An unsigned 64-bit integer whose trailing zeros are to be counted. +/// \returns An 64-bit integer containing the number of trailing zero bits in +/// the operand. +static __inline__ long long __RELAXED_FN_ATTRS +_mm_tzcnt_64(unsigned long long __X) +{ + return __builtin_ia32_tzcnt_u64(__X); +} + +#define _tzcnt_u64(a) (__tzcnt_u64((a))) + +#endif /* __x86_64__ */ + +#undef __RELAXED_FN_ATTRS + +#if !defined(_MSC_VER) || __has_feature(modules) || defined(__BMI__) + +/* Define the default attributes for the functions in this file. */ +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("bmi"))) + +#define _andn_u32(a, b) (__andn_u32((a), (b))) + +/* _bextr_u32 != __bextr_u32 */ +#define _blsi_u32(a) (__blsi_u32((a))) + +#define _blsmsk_u32(a) (__blsmsk_u32((a))) + +#define _blsr_u32(a) (__blsr_u32((a))) + /// Performs a bitwise AND of the second operand with the one's /// complement of the first operand. /// @@ -169,38 +243,6 @@ __blsr_u32(unsigned int __X) return __X & (__X - 1); } -/// Counts the number of trailing zero bits in the operand. -/// -/// \headerfile -/// -/// This intrinsic corresponds to the TZCNT instruction. -/// -/// \param __X -/// An unsigned 32-bit integer whose trailing zeros are to be counted. -/// \returns An unsigned 32-bit integer containing the number of trailing zero -/// bits in the operand. -static __inline__ unsigned int __RELAXED_FN_ATTRS -__tzcnt_u32(unsigned int __X) -{ - return __builtin_ia32_tzcnt_u32(__X); -} - -/// Counts the number of trailing zero bits in the operand. -/// -/// \headerfile -/// -/// This intrinsic corresponds to the TZCNT instruction. -/// -/// \param __X -/// An unsigned 32-bit integer whose trailing zeros are to be counted. -/// \returns An 32-bit integer containing the number of trailing zero bits in -/// the operand. -static __inline__ int __RELAXED_FN_ATTRS -_mm_tzcnt_32(unsigned int __X) -{ - return __builtin_ia32_tzcnt_u32(__X); -} - #ifdef __x86_64__ #define _andn_u64(a, b) (__andn_u64((a), (b))) @@ -212,8 +254,6 @@ _mm_tzcnt_32(unsigned int __X) #define _blsr_u64(a) (__blsr_u64((a))) -#define _tzcnt_u64(a) (__tzcnt_u64((a))) - /// Performs a bitwise AND of the second operand with the one's /// complement of the first operand. /// @@ -332,41 +372,10 @@ __blsr_u64(unsigned long long __X) return __X & (__X - 1); } -/// Counts the number of trailing zero bits in the operand. -/// -/// \headerfile -/// -/// This intrinsic corresponds to the TZCNT instruction. -/// -/// \param __X -/// An unsigned 64-bit integer whose trailing zeros are to be counted. -/// \returns An unsigned 64-bit integer containing the number of trailing zero -/// bits in the operand. -static __inline__ unsigned long long __RELAXED_FN_ATTRS -__tzcnt_u64(unsigned long long __X) -{ - return __builtin_ia32_tzcnt_u64(__X); -} - -/// Counts the number of trailing zero bits in the operand. -/// -/// \headerfile -/// -/// This intrinsic corresponds to the TZCNT instruction. -/// -/// \param __X -/// An unsigned 64-bit integer whose trailing zeros are to be counted. -/// \returns An 64-bit integer containing the number of trailing zero bits in -/// the operand. -static __inline__ long long __RELAXED_FN_ATTRS -_mm_tzcnt_64(unsigned long long __X) -{ - return __builtin_ia32_tzcnt_u64(__X); -} - #endif /* __x86_64__ */ #undef __DEFAULT_FN_ATTRS -#undef __RELAXED_FN_ATTRS + +#endif /* !defined(_MSC_VER) || __has_feature(modules) || defined(__BMI__) */ #endif /* __BMIINTRIN_H */ diff --git a/clang/lib/Headers/immintrin.h b/clang/lib/Headers/immintrin.h index 7555ad82fac75..ae900ee85b760 100644 --- a/clang/lib/Headers/immintrin.h +++ b/clang/lib/Headers/immintrin.h @@ -64,9 +64,8 @@ #include #endif -#if !defined(_MSC_VER) || __has_feature(modules) || defined(__BMI__) +/* No feature check desired due to internal checks */ #include -#endif #if !defined(_MSC_VER) || __has_feature(modules) || defined(__BMI2__) #include diff --git a/clang/lib/Parse/ParseOpenMP.cpp b/clang/lib/Parse/ParseOpenMP.cpp index 82a4e700552e1..ad4d935d4da51 100644 --- a/clang/lib/Parse/ParseOpenMP.cpp +++ b/clang/lib/Parse/ParseOpenMP.cpp @@ -17,6 +17,7 @@ #include "clang/Parse/RAIIObjectsForParser.h" #include "clang/Sema/Scope.h" #include "llvm/ADT/PointerIntPair.h" +#include "llvm/ADT/UniqueVector.h" using namespace clang; @@ -134,7 +135,8 @@ static OpenMPDirectiveKind parseOpenMPDirectiveKind(Parser &P) { {OMPD_target_teams_distribute_parallel, OMPD_for, OMPD_target_teams_distribute_parallel_for}, {OMPD_target_teams_distribute_parallel_for, OMPD_simd, - OMPD_target_teams_distribute_parallel_for_simd}}; + OMPD_target_teams_distribute_parallel_for_simd}, + {OMPD_master, OMPD_taskloop, OMPD_master_taskloop}}; enum { CancellationPoint = 0, DeclareReduction = 1, TargetData = 2 }; Token Tok = P.getCurToken(); unsigned DKind = @@ -853,7 +855,7 @@ static void parseImplementationSelector( (void)T.expectAndConsume(diag::err_expected_lparen_after, CtxSelectorName.data()); const ExprResult Score = parseContextScore(P); - SmallVector, 4> Vendors; + llvm::UniqueVector> Vendors; do { // Parse . StringRef VendorName; @@ -862,7 +864,7 @@ static void parseImplementationSelector( VendorName = P.getPreprocessor().getSpelling(P.getCurToken(), Buffer); (void)P.ConsumeToken(); if (!VendorName.empty()) - Vendors.push_back(VendorName); + Vendors.insert(VendorName); } else { P.Diag(Tok.getLocation(), diag::err_omp_declare_variant_item_expected) << "vendor identifier" @@ -878,10 +880,10 @@ static void parseImplementationSelector( (void)T.consumeClose(); if (!Vendors.empty()) { SmallVector ImplVendors(Vendors.size()); - for (int I = 0, E = Vendors.size(); I < E; ++I) - ImplVendors[I] = Vendors[I]; + llvm::copy(Vendors, ImplVendors.begin()); Sema::OpenMPDeclareVariantCtsSelectorData Data( - OMPDeclareVariantAttr::CtxSetImplementation, CSKind, ImplVendors, + OMPDeclareVariantAttr::CtxSetImplementation, CSKind, + llvm::makeMutableArrayRef(ImplVendors.begin(), ImplVendors.size()), Score); Callback(SourceRange(Loc, Tok.getLocation()), Data); } @@ -1502,6 +1504,7 @@ Parser::DeclGroupPtrTy Parser::ParseOpenMPDeclarativeDirectiveWithExtDecl( case OMPD_target_parallel_for: case OMPD_taskloop: case OMPD_taskloop_simd: + case OMPD_master_taskloop: case OMPD_distribute: case OMPD_end_declare_target: case OMPD_target_update: @@ -1556,12 +1559,11 @@ Parser::DeclGroupPtrTy Parser::ParseOpenMPDeclarativeDirectiveWithExtDecl( /// 'parallel for' | 'parallel sections' | 'task' | 'taskyield' | /// 'barrier' | 'taskwait' | 'flush' | 'ordered' | 'atomic' | /// 'for simd' | 'parallel for simd' | 'target' | 'target data' | -/// 'taskgroup' | 'teams' | 'taskloop' | 'taskloop simd' | -/// 'distribute' | 'target enter data' | 'target exit data' | -/// 'target parallel' | 'target parallel for' | -/// 'target update' | 'distribute parallel for' | -/// 'distribute paralle for simd' | 'distribute simd' | -/// 'target parallel for simd' | 'target simd' | +/// 'taskgroup' | 'teams' | 'taskloop' | 'taskloop simd' | 'master +/// taskloop' | 'distribute' | 'target enter data' | 'target exit data' +/// | 'target parallel' | 'target parallel for' | 'target update' | +/// 'distribute parallel for' | 'distribute paralle for simd' | +/// 'distribute simd' | 'target parallel for simd' | 'target simd' | /// 'teams distribute' | 'teams distribute simd' | /// 'teams distribute parallel for simd' | /// 'teams distribute parallel for' | 'target teams' | @@ -1744,6 +1746,7 @@ Parser::ParseOpenMPDeclarativeOrExecutableDirective(ParsedStmtContext StmtCtx) { case OMPD_target_parallel_for: case OMPD_taskloop: case OMPD_taskloop_simd: + case OMPD_master_taskloop: case OMPD_distribute: case OMPD_distribute_parallel_for: case OMPD_distribute_parallel_for_simd: diff --git a/clang/lib/Sema/SemaCUDA.cpp b/clang/lib/Sema/SemaCUDA.cpp index 3c9c991c77feb..d0ddfd040c9c9 100644 --- a/clang/lib/Sema/SemaCUDA.cpp +++ b/clang/lib/Sema/SemaCUDA.cpp @@ -600,40 +600,6 @@ void Sema::maybeAddCUDAHostDeviceAttrs(FunctionDecl *NewD, NewD->addAttr(CUDADeviceAttr::CreateImplicit(Context)); } -// Do we know that we will eventually codegen the given function? -static bool IsKnownEmitted(Sema &S, FunctionDecl *FD) { - // Templates are emitted when they're instantiated. - if (FD->isDependentContext()) - return false; - - // When compiling for device, host functions are never emitted. Similarly, - // when compiling for host, device and global functions are never emitted. - // (Technically, we do emit a host-side stub for global functions, but this - // doesn't count for our purposes here.) - Sema::CUDAFunctionTarget T = S.IdentifyCUDATarget(FD); - if (S.getLangOpts().CUDAIsDevice && T == Sema::CFT_Host) - return false; - if (!S.getLangOpts().CUDAIsDevice && - (T == Sema::CFT_Device || T == Sema::CFT_Global)) - return false; - - // Check whether this function is externally visible -- if so, it's - // known-emitted. - // - // We have to check the GVA linkage of the function's *definition* -- if we - // only have a declaration, we don't know whether or not the function will be - // emitted, because (say) the definition could include "inline". - FunctionDecl *Def = FD->getDefinition(); - - if (Def && - !isDiscardableGVALinkage(S.getASTContext().GetGVALinkageForFunction(Def))) - return true; - - // Otherwise, the function is known-emitted if it's in our set of - // known-emitted functions. - return S.DeviceKnownEmittedFns.count(FD) > 0; -} - Sema::DeviceDiagBuilder Sema::CUDADiagIfDeviceCode(SourceLocation Loc, unsigned DiagID) { assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); @@ -647,7 +613,8 @@ Sema::DeviceDiagBuilder Sema::CUDADiagIfDeviceCode(SourceLocation Loc, // device code if we're compiling for device. Defer any errors in device // mode until the function is known-emitted. if (getLangOpts().CUDAIsDevice) { - return IsKnownEmitted(*this, dyn_cast(CurContext)) + return (getEmissionStatus(cast(CurContext)) == + FunctionEmissionStatus::Emitted) ? DeviceDiagBuilder::K_ImmediateWithCallStack : DeviceDiagBuilder::K_Deferred; } @@ -675,7 +642,8 @@ Sema::DeviceDiagBuilder Sema::CUDADiagIfHostCode(SourceLocation Loc, if (getLangOpts().CUDAIsDevice) return DeviceDiagBuilder::K_Nop; - return IsKnownEmitted(*this, dyn_cast(CurContext)) + return (getEmissionStatus(cast(CurContext)) == + FunctionEmissionStatus::Emitted) ? DeviceDiagBuilder::K_ImmediateWithCallStack : DeviceDiagBuilder::K_Deferred; default: @@ -702,12 +670,16 @@ bool Sema::CheckCUDACall(SourceLocation Loc, FunctionDecl *Callee) { // If the caller is known-emitted, mark the callee as known-emitted. // Otherwise, mark the call in our call graph so we can traverse it later. - bool CallerKnownEmitted = IsKnownEmitted(*this, Caller); + bool CallerKnownEmitted = + getEmissionStatus(Caller) == FunctionEmissionStatus::Emitted; if (CallerKnownEmitted) { // Host-side references to a __global__ function refer to the stub, so the // function itself is never emitted and therefore should not be marked. - if (getLangOpts().CUDAIsDevice || IdentifyCUDATarget(Callee) != CFT_Global) - markKnownEmitted(*this, Caller, Callee, Loc, IsKnownEmitted); + if (!shouldIgnoreInHostDeviceCheck(Callee)) + markKnownEmitted( + *this, Caller, Callee, Loc, [](Sema &S, FunctionDecl *FD) { + return S.getEmissionStatus(FD) == FunctionEmissionStatus::Emitted; + }); } else { // If we have // host fn calls kernel fn calls host+device, @@ -715,7 +687,7 @@ bool Sema::CheckCUDACall(SourceLocation Loc, FunctionDecl *Callee) { // omitting at the call to the kernel from the callgraph. This ensures // that, when compiling for host, only HD functions actually called from the // host get marked as known-emitted. - if (getLangOpts().CUDAIsDevice || IdentifyCUDATarget(Callee) != CFT_Global) + if (!shouldIgnoreInHostDeviceCheck(Callee)) DeviceCallGraph[Caller].insert({Callee, Loc}); } diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index 9409dc8072cad..eda7de8e0f31b 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -4284,9 +4284,11 @@ void Sema::handleTagNumbering(const TagDecl *Tag, Scope *TagScope) { } // If this tag isn't a direct child of a class, number it if it is local. + MangleNumberingContext *MCtx; Decl *ManglingContextDecl; - if (MangleNumberingContext *MCtx = getCurrentMangleNumberContext( - Tag->getDeclContext(), ManglingContextDecl)) { + std::tie(MCtx, ManglingContextDecl) = + getCurrentMangleNumberContext(Tag->getDeclContext()); + if (MCtx) { Context.setManglingNumber( Tag, MCtx->getManglingNumber( Tag, getMSManglingNumber(getLangOpts(), TagScope))); @@ -5022,9 +5024,11 @@ Decl *Sema::BuildAnonymousStructOrUnion(Scope *S, DeclSpec &DS, if (VarDecl *NewVD = dyn_cast(Anon)) { if (getLangOpts().CPlusPlus && NewVD->isStaticLocal()) { + MangleNumberingContext *MCtx; Decl *ManglingContextDecl; - if (MangleNumberingContext *MCtx = getCurrentMangleNumberContext( - NewVD->getDeclContext(), ManglingContextDecl)) { + std::tie(MCtx, ManglingContextDecl) = + getCurrentMangleNumberContext(NewVD->getDeclContext()); + if (MCtx) { Context.setManglingNumber( NewVD, MCtx->getManglingNumber( NewVD, getMSManglingNumber(getLangOpts(), S))); @@ -7098,9 +7102,11 @@ NamedDecl *Sema::ActOnVariableDeclarator( RegisterLocallyScopedExternCDecl(NewVD, S); if (getLangOpts().CPlusPlus && NewVD->isStaticLocal()) { + MangleNumberingContext *MCtx; Decl *ManglingContextDecl; - if (MangleNumberingContext *MCtx = getCurrentMangleNumberContext( - NewVD->getDeclContext(), ManglingContextDecl)) { + std::tie(MCtx, ManglingContextDecl) = + getCurrentMangleNumberContext(NewVD->getDeclContext()); + if (MCtx) { Context.setManglingNumber( NewVD, MCtx->getManglingNumber( NewVD, getMSManglingNumber(getLangOpts(), S))); @@ -17618,3 +17624,87 @@ void Sema::ActOnPragmaWeakAlias(IdentifierInfo* Name, Decl *Sema::getObjCDeclContext() const { return (dyn_cast_or_null(CurContext)); } + +Sema::FunctionEmissionStatus Sema::getEmissionStatus(FunctionDecl *FD) { + // Templates are emitted when they're instantiated. + if (FD->isDependentContext()) + return FunctionEmissionStatus::TemplateDiscarded; + + FunctionEmissionStatus OMPES = FunctionEmissionStatus::Unknown; + if (LangOpts.OpenMPIsDevice) { + Optional DevTy = + OMPDeclareTargetDeclAttr::getDeviceType(FD->getCanonicalDecl()); + if (DevTy.hasValue()) { + if (*DevTy == OMPDeclareTargetDeclAttr::DT_Host) + OMPES = FunctionEmissionStatus::OMPDiscarded; + else if (DeviceKnownEmittedFns.count(FD) > 0) + OMPES = FunctionEmissionStatus::Emitted; + } + } else if (LangOpts.OpenMP) { + // In OpenMP 4.5 all the functions are host functions. + if (LangOpts.OpenMP <= 45) { + OMPES = FunctionEmissionStatus::Emitted; + } else { + Optional DevTy = + OMPDeclareTargetDeclAttr::getDeviceType(FD->getCanonicalDecl()); + // In OpenMP 5.0 or above, DevTy may be changed later by + // #pragma omp declare target to(*) device_type(*). Therefore DevTy + // having no value does not imply host. The emission status will be + // checked again at the end of compilation unit. + if (DevTy.hasValue()) { + if (*DevTy == OMPDeclareTargetDeclAttr::DT_NoHost) { + OMPES = FunctionEmissionStatus::OMPDiscarded; + } else if (DeviceKnownEmittedFns.count(FD) > 0) { + OMPES = FunctionEmissionStatus::Emitted; + } + } + } + } + if (OMPES == FunctionEmissionStatus::OMPDiscarded || + (OMPES == FunctionEmissionStatus::Emitted && !LangOpts.CUDA)) + return OMPES; + + if (LangOpts.CUDA) { + // When compiling for device, host functions are never emitted. Similarly, + // when compiling for host, device and global functions are never emitted. + // (Technically, we do emit a host-side stub for global functions, but this + // doesn't count for our purposes here.) + Sema::CUDAFunctionTarget T = IdentifyCUDATarget(FD); + if (LangOpts.CUDAIsDevice && T == Sema::CFT_Host) + return FunctionEmissionStatus::CUDADiscarded; + if (!LangOpts.CUDAIsDevice && + (T == Sema::CFT_Device || T == Sema::CFT_Global)) + return FunctionEmissionStatus::CUDADiscarded; + + // Check whether this function is externally visible -- if so, it's + // known-emitted. + // + // We have to check the GVA linkage of the function's *definition* -- if we + // only have a declaration, we don't know whether or not the function will + // be emitted, because (say) the definition could include "inline". + FunctionDecl *Def = FD->getDefinition(); + + if (Def && + !isDiscardableGVALinkage(getASTContext().GetGVALinkageForFunction(Def)) + && (!LangOpts.OpenMP || OMPES == FunctionEmissionStatus::Emitted)) + return FunctionEmissionStatus::Emitted; + } + + // Otherwise, the function is known-emitted if it's in our set of + // known-emitted functions. + return (DeviceKnownEmittedFns.count(FD) > 0) + ? FunctionEmissionStatus::Emitted + : FunctionEmissionStatus::Unknown; +} + +bool Sema::shouldIgnoreInHostDeviceCheck(FunctionDecl *Callee) { + // Host-side references to a __global__ function refer to the stub, so the + // function itself is never emitted and therefore should not be marked. + // If we have host fn calls kernel fn calls host+device, the HD function + // does not get instantiated on the host. We model this by omitting at the + // call to the kernel from the callgraph. This ensures that, when compiling + // for host, only HD functions actually called from the host get marked as + // known-emitted. + return LangOpts.CUDA && !LangOpts.CUDAIsDevice && + IdentifyCUDATarget(Callee) == CFT_Global; +} diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index 3db36f3bafc16..424971dc88a46 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -14109,10 +14109,11 @@ void Sema::ActOnBlockStart(SourceLocation CaretLoc, Scope *CurScope) { BlockDecl *Block = BlockDecl::Create(Context, CurContext, CaretLoc); if (LangOpts.CPlusPlus) { + MangleNumberingContext *MCtx; Decl *ManglingContextDecl; - if (MangleNumberingContext *MCtx = - getCurrentMangleNumberContext(Block->getDeclContext(), - ManglingContextDecl)) { + std::tie(MCtx, ManglingContextDecl) = + getCurrentMangleNumberContext(Block->getDeclContext()); + if (MCtx) { unsigned ManglingNumber = MCtx->getManglingNumber(Block); Block->setBlockMangling(ManglingNumber, ManglingContextDecl); } diff --git a/clang/lib/Sema/SemaLambda.cpp b/clang/lib/Sema/SemaLambda.cpp index b05ed75e8791a..df4f0ddc6c37d 100644 --- a/clang/lib/Sema/SemaLambda.cpp +++ b/clang/lib/Sema/SemaLambda.cpp @@ -272,12 +272,11 @@ static bool isInInlineFunction(const DeclContext *DC) { return false; } -MangleNumberingContext * -Sema::getCurrentMangleNumberContext(const DeclContext *DC, - Decl *&ManglingContextDecl) { +std::tuple +Sema::getCurrentMangleNumberContext(const DeclContext *DC) { // Compute the context for allocating mangling numbers in the current // expression, if the ABI requires them. - ManglingContextDecl = ExprEvalContexts.back().ManglingContextDecl; + Decl *ManglingContextDecl = ExprEvalContexts.back().ManglingContextDecl; enum ContextKind { Normal, @@ -325,22 +324,18 @@ Sema::getCurrentMangleNumberContext(const DeclContext *DC, if ((IsInNonspecializedTemplate && !(ManglingContextDecl && isa(ManglingContextDecl))) || isInInlineFunction(CurContext)) { - ManglingContextDecl = nullptr; while (auto *CD = dyn_cast(DC)) DC = CD->getParent(); - return &Context.getManglingNumberContext(DC); + return std::make_tuple(&Context.getManglingNumberContext(DC), nullptr); } - ManglingContextDecl = nullptr; - return nullptr; + return std::make_tuple(nullptr, nullptr); } case StaticDataMember: // -- the initializers of nonspecialized static members of template classes - if (!IsInNonspecializedTemplate) { - ManglingContextDecl = nullptr; - return nullptr; - } + if (!IsInNonspecializedTemplate) + return std::make_tuple(nullptr, nullptr); // Fall through to get the current context. LLVM_FALLTHROUGH; @@ -352,8 +347,10 @@ Sema::getCurrentMangleNumberContext(const DeclContext *DC, // -- the initializers of inline variables case VariableTemplate: // -- the initializers of templated variables - return &Context.getManglingNumberContext(ASTContext::NeedExtraManglingDecl, - ManglingContextDecl); + return std::make_tuple( + &Context.getManglingNumberContext(ASTContext::NeedExtraManglingDecl, + ManglingContextDecl), + ManglingContextDecl); } llvm_unreachable("unexpected context"); @@ -431,10 +428,11 @@ CXXMethodDecl *Sema::startLambdaDefinition( if (Mangling) { Class->setLambdaMangling(Mangling->first, Mangling->second); } else { + MangleNumberingContext *MCtx; Decl *ManglingContextDecl; - if (MangleNumberingContext *MCtx = - getCurrentMangleNumberContext(Class->getDeclContext(), - ManglingContextDecl)) { + std::tie(MCtx, ManglingContextDecl) = + getCurrentMangleNumberContext(Class->getDeclContext()); + if (MCtx) { unsigned ManglingNumber = MCtx->getManglingNumber(Method); Class->setLambdaMangling(ManglingNumber, ManglingContextDecl); } diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index 5ceee810c6b32..b736c36a299bb 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -1565,34 +1565,11 @@ enum class FunctionEmissionStatus { }; } // anonymous namespace -/// Do we know that we will eventually codegen the given function? -static FunctionEmissionStatus isKnownDeviceEmitted(Sema &S, FunctionDecl *FD) { - assert(S.LangOpts.OpenMP && S.LangOpts.OpenMPIsDevice && - "Expected OpenMP device compilation."); - // Templates are emitted when they're instantiated. - if (FD->isDependentContext()) - return FunctionEmissionStatus::Discarded; - - Optional DevTy = - OMPDeclareTargetDeclAttr::getDeviceType(FD->getCanonicalDecl()); - if (DevTy.hasValue()) - return (*DevTy == OMPDeclareTargetDeclAttr::DT_Host) - ? FunctionEmissionStatus::Discarded - : FunctionEmissionStatus::Emitted; - - // Otherwise, the function is known-emitted if it's in our set of - // known-emitted functions. - return (S.DeviceKnownEmittedFns.count(FD) > 0) - ? FunctionEmissionStatus::Emitted - : FunctionEmissionStatus::Unknown; -} - Sema::DeviceDiagBuilder Sema::diagIfOpenMPDeviceCode(SourceLocation Loc, unsigned DiagID) { assert(LangOpts.OpenMP && LangOpts.OpenMPIsDevice && "Expected OpenMP device compilation."); - FunctionEmissionStatus FES = - isKnownDeviceEmitted(*this, getCurFunctionDecl()); + FunctionEmissionStatus FES = getEmissionStatus(getCurFunctionDecl()); DeviceDiagBuilder::Kind Kind = DeviceDiagBuilder::K_Nop; switch (FES) { case FunctionEmissionStatus::Emitted: @@ -1602,42 +1579,23 @@ Sema::DeviceDiagBuilder Sema::diagIfOpenMPDeviceCode(SourceLocation Loc, Kind = isOpenMPDeviceDelayedContext(*this) ? DeviceDiagBuilder::K_Deferred : DeviceDiagBuilder::K_Immediate; break; - case FunctionEmissionStatus::Discarded: + case FunctionEmissionStatus::TemplateDiscarded: + case FunctionEmissionStatus::OMPDiscarded: Kind = DeviceDiagBuilder::K_Nop; break; + case FunctionEmissionStatus::CUDADiscarded: + llvm_unreachable("CUDADiscarded unexpected in OpenMP device compilation"); + break; } return DeviceDiagBuilder(Kind, Loc, DiagID, getCurFunctionDecl(), *this); } -/// Do we know that we will eventually codegen the given function? -static FunctionEmissionStatus isKnownHostEmitted(Sema &S, FunctionDecl *FD) { - assert(S.LangOpts.OpenMP && !S.LangOpts.OpenMPIsDevice && - "Expected OpenMP host compilation."); - // In OpenMP 4.5 all the functions are host functions. - if (S.LangOpts.OpenMP <= 45) - return FunctionEmissionStatus::Emitted; - - Optional DevTy = - OMPDeclareTargetDeclAttr::getDeviceType(FD->getCanonicalDecl()); - if (DevTy.hasValue()) - return (*DevTy == OMPDeclareTargetDeclAttr::DT_NoHost) - ? FunctionEmissionStatus::Discarded - : FunctionEmissionStatus::Emitted; - - // Otherwise, the function is known-emitted if it's in our set of - // known-emitted functions. - return (S.DeviceKnownEmittedFns.count(FD) > 0) - ? FunctionEmissionStatus::Emitted - : FunctionEmissionStatus::Unknown; -} - Sema::DeviceDiagBuilder Sema::diagIfOpenMPHostCode(SourceLocation Loc, unsigned DiagID) { assert(LangOpts.OpenMP && !LangOpts.OpenMPIsDevice && "Expected OpenMP host compilation."); - FunctionEmissionStatus FES = - isKnownHostEmitted(*this, getCurFunctionDecl()); + FunctionEmissionStatus FES = getEmissionStatus(getCurFunctionDecl()); DeviceDiagBuilder::Kind Kind = DeviceDiagBuilder::K_Nop; switch (FES) { case FunctionEmissionStatus::Emitted: @@ -1646,7 +1604,9 @@ Sema::DeviceDiagBuilder Sema::diagIfOpenMPHostCode(SourceLocation Loc, case FunctionEmissionStatus::Unknown: Kind = DeviceDiagBuilder::K_Deferred; break; - case FunctionEmissionStatus::Discarded: + case FunctionEmissionStatus::TemplateDiscarded: + case FunctionEmissionStatus::OMPDiscarded: + case FunctionEmissionStatus::CUDADiscarded: Kind = DeviceDiagBuilder::K_Nop; break; } @@ -1663,31 +1623,34 @@ void Sema::checkOpenMPDeviceFunction(SourceLocation Loc, FunctionDecl *Callee, FunctionDecl *Caller = getCurFunctionDecl(); // host only function are not available on the device. - if (Caller && - (isKnownDeviceEmitted(*this, Caller) == FunctionEmissionStatus::Emitted || - (!isOpenMPDeviceDelayedContext(*this) && - isKnownDeviceEmitted(*this, Caller) == - FunctionEmissionStatus::Unknown)) && - isKnownDeviceEmitted(*this, Callee) == - FunctionEmissionStatus::Discarded) { - StringRef HostDevTy = - getOpenMPSimpleClauseTypeName(OMPC_device_type, OMPC_DEVICE_TYPE_host); - Diag(Loc, diag::err_omp_wrong_device_function_call) << HostDevTy << 0; - Diag(Callee->getAttr()->getLocation(), - diag::note_omp_marked_device_type_here) - << HostDevTy; - return; + if (Caller) { + FunctionEmissionStatus CallerS = getEmissionStatus(Caller); + FunctionEmissionStatus CalleeS = getEmissionStatus(Callee); + assert(CallerS != FunctionEmissionStatus::CUDADiscarded && + CalleeS != FunctionEmissionStatus::CUDADiscarded && + "CUDADiscarded unexpected in OpenMP device function check"); + if ((CallerS == FunctionEmissionStatus::Emitted || + (!isOpenMPDeviceDelayedContext(*this) && + CallerS == FunctionEmissionStatus::Unknown)) && + CalleeS == FunctionEmissionStatus::OMPDiscarded) { + StringRef HostDevTy = getOpenMPSimpleClauseTypeName( + OMPC_device_type, OMPC_DEVICE_TYPE_host); + Diag(Loc, diag::err_omp_wrong_device_function_call) << HostDevTy << 0; + Diag(Callee->getAttr()->getLocation(), + diag::note_omp_marked_device_type_here) + << HostDevTy; + return; + } } // If the caller is known-emitted, mark the callee as known-emitted. // Otherwise, mark the call in our call graph so we can traverse it later. if ((CheckForDelayedContext && !isOpenMPDeviceDelayedContext(*this)) || (!Caller && !CheckForDelayedContext) || - (Caller && - isKnownDeviceEmitted(*this, Caller) == FunctionEmissionStatus::Emitted)) + (Caller && getEmissionStatus(Caller) == FunctionEmissionStatus::Emitted)) markKnownEmitted(*this, Caller, Callee, Loc, [CheckForDelayedContext](Sema &S, FunctionDecl *FD) { return CheckForDelayedContext && - isKnownDeviceEmitted(S, FD) == + S.getEmissionStatus(FD) == FunctionEmissionStatus::Emitted; }); else if (Caller) @@ -1703,29 +1666,38 @@ void Sema::checkOpenMPHostFunction(SourceLocation Loc, FunctionDecl *Callee, FunctionDecl *Caller = getCurFunctionDecl(); // device only function are not available on the host. - if (Caller && - isKnownHostEmitted(*this, Caller) == FunctionEmissionStatus::Emitted && - isKnownHostEmitted(*this, Callee) == FunctionEmissionStatus::Discarded) { - StringRef NoHostDevTy = getOpenMPSimpleClauseTypeName( - OMPC_device_type, OMPC_DEVICE_TYPE_nohost); - Diag(Loc, diag::err_omp_wrong_device_function_call) << NoHostDevTy << 1; - Diag(Callee->getAttr()->getLocation(), - diag::note_omp_marked_device_type_here) - << NoHostDevTy; - return; + if (Caller) { + FunctionEmissionStatus CallerS = getEmissionStatus(Caller); + FunctionEmissionStatus CalleeS = getEmissionStatus(Callee); + assert( + (LangOpts.CUDA || (CallerS != FunctionEmissionStatus::CUDADiscarded && + CalleeS != FunctionEmissionStatus::CUDADiscarded)) && + "CUDADiscarded unexpected in OpenMP host function check"); + if (CallerS == FunctionEmissionStatus::Emitted && + CalleeS == FunctionEmissionStatus::OMPDiscarded) { + StringRef NoHostDevTy = getOpenMPSimpleClauseTypeName( + OMPC_device_type, OMPC_DEVICE_TYPE_nohost); + Diag(Loc, diag::err_omp_wrong_device_function_call) << NoHostDevTy << 1; + Diag(Callee->getAttr()->getLocation(), + diag::note_omp_marked_device_type_here) + << NoHostDevTy; + return; + } } // If the caller is known-emitted, mark the callee as known-emitted. // Otherwise, mark the call in our call graph so we can traverse it later. - if ((!CheckCaller && !Caller) || - (Caller && - isKnownHostEmitted(*this, Caller) == FunctionEmissionStatus::Emitted)) - markKnownEmitted( - *this, Caller, Callee, Loc, [CheckCaller](Sema &S, FunctionDecl *FD) { - return CheckCaller && - isKnownHostEmitted(S, FD) == FunctionEmissionStatus::Emitted; - }); - else if (Caller) - DeviceCallGraph[Caller].insert({Callee, Loc}); + if (!shouldIgnoreInHostDeviceCheck(Callee)) { + if ((!CheckCaller && !Caller) || + (Caller && + getEmissionStatus(Caller) == FunctionEmissionStatus::Emitted)) + markKnownEmitted( + *this, Caller, Callee, Loc, [CheckCaller](Sema &S, FunctionDecl *FD) { + return CheckCaller && + S.getEmissionStatus(FD) == FunctionEmissionStatus::Emitted; + }); + else if (Caller) + DeviceCallGraph[Caller].insert({Callee, Loc}); + } } void Sema::checkOpenMPDeviceExpr(const Expr *E) { @@ -3265,7 +3237,8 @@ void Sema::ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind, Scope *CurScope) { break; } case OMPD_taskloop: - case OMPD_taskloop_simd: { + case OMPD_taskloop_simd: + case OMPD_master_taskloop: { QualType KmpInt32Ty = Context.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1) .withConst(); @@ -4438,6 +4411,11 @@ StmtResult Sema::ActOnOpenMPExecutableDirective( EndLoc, VarsWithInheritedDSA); AllowedNameModifiers.push_back(OMPD_taskloop); break; + case OMPD_master_taskloop: + Res = ActOnOpenMPMasterTaskLoopDirective( + ClausesWithImplicit, AStmt, StartLoc, EndLoc, VarsWithInheritedDSA); + AllowedNameModifiers.push_back(OMPD_taskloop); + break; case OMPD_distribute: Res = ActOnOpenMPDistributeDirective(ClausesWithImplicit, AStmt, StartLoc, EndLoc, VarsWithInheritedDSA); @@ -6462,6 +6440,7 @@ void Sema::ActOnOpenMPLoopInitialization(SourceLocation ForLoc, Stmt *Init) { (LangOpts.OpenMP <= 45 || (DVar.CKind != OMPC_lastprivate && DVar.CKind != OMPC_private))) || ((isOpenMPWorksharingDirective(DKind) || DKind == OMPD_taskloop || + DKind == OMPD_master_taskloop || isOpenMPDistributeDirective(DKind)) && !isOpenMPSimdDirective(DKind) && DVar.CKind != OMPC_unknown && DVar.CKind != OMPC_private && DVar.CKind != OMPC_lastprivate)) && @@ -9276,6 +9255,42 @@ StmtResult Sema::ActOnOpenMPTaskLoopSimdDirective( NestedLoopCount, Clauses, AStmt, B); } +StmtResult Sema::ActOnOpenMPMasterTaskLoopDirective( + ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, + SourceLocation EndLoc, VarsWithInheritedDSAType &VarsWithImplicitDSA) { + if (!AStmt) + return StmtError(); + + assert(isa(AStmt) && "Captured statement expected"); + OMPLoopDirective::HelperExprs B; + // In presence of clause 'collapse' or 'ordered' with number of loops, it will + // define the nested loops number. + unsigned NestedLoopCount = + checkOpenMPLoop(OMPD_master_taskloop, getCollapseNumberExpr(Clauses), + /*OrderedLoopCountExpr=*/nullptr, AStmt, *this, *DSAStack, + VarsWithImplicitDSA, B); + if (NestedLoopCount == 0) + return StmtError(); + + assert((CurContext->isDependentContext() || B.builtAll()) && + "omp for loop exprs were not built"); + + // OpenMP, [2.9.2 taskloop Construct, Restrictions] + // The grainsize clause and num_tasks clause are mutually exclusive and may + // not appear on the same taskloop directive. + if (checkGrainsizeNumTasksClauses(*this, Clauses)) + return StmtError(); + // OpenMP, [2.9.2 taskloop Construct, Restrictions] + // If a reduction clause is present on the taskloop directive, the nogroup + // clause must not be specified. + if (checkReductionClauseWithNogroup(*this, Clauses)) + return StmtError(); + + setFunctionHasBranchProtectedScope(); + return OMPMasterTaskLoopDirective::Create(Context, StartLoc, EndLoc, + NestedLoopCount, Clauses, AStmt, B); +} + StmtResult Sema::ActOnOpenMPDistributeDirective( ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, SourceLocation EndLoc, VarsWithInheritedDSAType &VarsWithImplicitDSA) { @@ -10182,6 +10197,7 @@ static OpenMPDirectiveKind getOpenMPCaptureRegionForClause( case OMPD_task: case OMPD_taskloop: case OMPD_taskloop_simd: + case OMPD_master_taskloop: case OMPD_target_data: // Do not capture if-clause expressions. break; @@ -10254,6 +10270,7 @@ static OpenMPDirectiveKind getOpenMPCaptureRegionForClause( case OMPD_task: case OMPD_taskloop: case OMPD_taskloop_simd: + case OMPD_master_taskloop: case OMPD_threadprivate: case OMPD_allocate: case OMPD_taskyield: @@ -10310,6 +10327,7 @@ static OpenMPDirectiveKind getOpenMPCaptureRegionForClause( case OMPD_task: case OMPD_taskloop: case OMPD_taskloop_simd: + case OMPD_master_taskloop: case OMPD_target_data: case OMPD_target_enter_data: case OMPD_target_exit_data: @@ -10377,6 +10395,7 @@ static OpenMPDirectiveKind getOpenMPCaptureRegionForClause( case OMPD_task: case OMPD_taskloop: case OMPD_taskloop_simd: + case OMPD_master_taskloop: case OMPD_target_data: case OMPD_target_enter_data: case OMPD_target_exit_data: @@ -10444,6 +10463,7 @@ static OpenMPDirectiveKind getOpenMPCaptureRegionForClause( case OMPD_task: case OMPD_taskloop: case OMPD_taskloop_simd: + case OMPD_master_taskloop: case OMPD_target_data: case OMPD_target_enter_data: case OMPD_target_exit_data: @@ -10515,6 +10535,7 @@ static OpenMPDirectiveKind getOpenMPCaptureRegionForClause( case OMPD_task: case OMPD_taskloop: case OMPD_taskloop_simd: + case OMPD_master_taskloop: case OMPD_target_data: case OMPD_target_enter_data: case OMPD_target_exit_data: @@ -10587,6 +10608,7 @@ static OpenMPDirectiveKind getOpenMPCaptureRegionForClause( case OMPD_task: case OMPD_taskloop: case OMPD_taskloop_simd: + case OMPD_master_taskloop: case OMPD_cancel: case OMPD_parallel: case OMPD_parallel_sections: diff --git a/clang/lib/Sema/SemaTemplate.cpp b/clang/lib/Sema/SemaTemplate.cpp index 62dc17254c710..284962f3e07ca 100644 --- a/clang/lib/Sema/SemaTemplate.cpp +++ b/clang/lib/Sema/SemaTemplate.cpp @@ -1099,9 +1099,6 @@ QualType Sema::CheckNonTypeTemplateParameterType(QualType T, T->isMemberPointerType() || // -- std::nullptr_t. T->isNullPtrType() || - // If T is a dependent type, we can't do the check now, so we - // assume that it is well-formed. - T->isDependentType() || // Allow use of auto in template parameter declarations. T->isUndeducedType()) { // C++ [temp.param]p5: The top-level cv-qualifiers on the template-parameter @@ -1114,9 +1111,18 @@ QualType Sema::CheckNonTypeTemplateParameterType(QualType T, // A non-type template-parameter of type "array of T" or // "function returning T" is adjusted to be of type "pointer to // T" or "pointer to function returning T", respectively. - else if (T->isArrayType() || T->isFunctionType()) + if (T->isArrayType() || T->isFunctionType()) return Context.getDecayedType(T); + // If T is a dependent type, we can't do the check now, so we + // assume that it is well-formed. Note that stripping off the + // qualifiers here is not really correct if T turns out to be + // an array type, but we'll recompute the type everywhere it's + // used during instantiation, so that should be OK. (Using the + // qualified type is equally wrong.) + if (T->isDependentType()) + return T.getUnqualifiedType(); + Diag(Loc, diag::err_template_nontype_parm_bad_type) << T; diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h index f62c0d891a458..98506125151bc 100644 --- a/clang/lib/Sema/TreeTransform.h +++ b/clang/lib/Sema/TreeTransform.h @@ -8246,6 +8246,17 @@ StmtResult TreeTransform::TransformOMPTaskLoopSimdDirective( return Res; } +template +StmtResult TreeTransform::TransformOMPMasterTaskLoopDirective( + OMPMasterTaskLoopDirective *D) { + DeclarationNameInfo DirName; + getDerived().getSema().StartOpenMPDSABlock(OMPD_master_taskloop, DirName, + nullptr, D->getBeginLoc()); + StmtResult Res = getDerived().TransformOMPExecutableDirective(D); + getDerived().getSema().EndOpenMPDSABlock(Res.get()); + return Res; +} + template StmtResult TreeTransform::TransformOMPDistributeDirective( OMPDistributeDirective *D) { diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index 55bec61693a37..55f2be3e10317 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -1523,9 +1523,9 @@ bool ASTReader::ReadSLocEntry(int ID) { if (Record[3]) FileInfo.setHasLineDirectives(); - const DeclID *FirstDecl = F->FileSortedDecls + Record[6]; unsigned NumFileDecls = Record[7]; if (NumFileDecls && ContextObj) { + const DeclID *FirstDecl = F->FileSortedDecls + Record[6]; assert(F->FileSortedDecls && "FILE_SORTED_DECLS not encountered yet ?"); FileDeclIDs[FID] = FileDeclsInfo(F, llvm::makeArrayRef(FirstDecl, NumFileDecls)); diff --git a/clang/lib/Serialization/ASTReaderDecl.cpp b/clang/lib/Serialization/ASTReaderDecl.cpp index 76928182a1e8e..d906286db5434 100644 --- a/clang/lib/Serialization/ASTReaderDecl.cpp +++ b/clang/lib/Serialization/ASTReaderDecl.cpp @@ -1656,55 +1656,11 @@ void ASTDeclReader::VisitUnresolvedUsingTypenameDecl( void ASTDeclReader::ReadCXXDefinitionData( struct CXXRecordDecl::DefinitionData &Data, const CXXRecordDecl *D) { + #define FIELD(Name, Width, Merge) \ + Data.Name = Record.readInt(); + #include "clang/AST/CXXRecordDeclDefinitionBits.def" + // Note: the caller has deserialized the IsLambda bit already. - Data.UserDeclaredConstructor = Record.readInt(); - Data.UserDeclaredSpecialMembers = Record.readInt(); - Data.Aggregate = Record.readInt(); - Data.PlainOldData = Record.readInt(); - Data.Empty = Record.readInt(); - Data.Polymorphic = Record.readInt(); - Data.Abstract = Record.readInt(); - Data.IsStandardLayout = Record.readInt(); - Data.IsCXX11StandardLayout = Record.readInt(); - Data.HasBasesWithFields = Record.readInt(); - Data.HasBasesWithNonStaticDataMembers = Record.readInt(); - Data.HasPrivateFields = Record.readInt(); - Data.HasProtectedFields = Record.readInt(); - Data.HasPublicFields = Record.readInt(); - Data.HasMutableFields = Record.readInt(); - Data.HasVariantMembers = Record.readInt(); - Data.HasOnlyCMembers = Record.readInt(); - Data.HasInClassInitializer = Record.readInt(); - Data.HasUninitializedReferenceMember = Record.readInt(); - Data.HasUninitializedFields = Record.readInt(); - Data.HasInheritedConstructor = Record.readInt(); - Data.HasInheritedAssignment = Record.readInt(); - Data.NeedOverloadResolutionForCopyConstructor = Record.readInt(); - Data.NeedOverloadResolutionForMoveConstructor = Record.readInt(); - Data.NeedOverloadResolutionForMoveAssignment = Record.readInt(); - Data.NeedOverloadResolutionForDestructor = Record.readInt(); - Data.DefaultedCopyConstructorIsDeleted = Record.readInt(); - Data.DefaultedMoveConstructorIsDeleted = Record.readInt(); - Data.DefaultedMoveAssignmentIsDeleted = Record.readInt(); - Data.DefaultedDestructorIsDeleted = Record.readInt(); - Data.HasTrivialSpecialMembers = Record.readInt(); - Data.HasTrivialSpecialMembersForCall = Record.readInt(); - Data.DeclaredNonTrivialSpecialMembers = Record.readInt(); - Data.DeclaredNonTrivialSpecialMembersForCall = Record.readInt(); - Data.HasIrrelevantDestructor = Record.readInt(); - Data.HasConstexprNonCopyMoveConstructor = Record.readInt(); - Data.HasDefaultedDefaultConstructor = Record.readInt(); - Data.DefaultedDefaultConstructorIsConstexpr = Record.readInt(); - Data.HasConstexprDefaultConstructor = Record.readInt(); - Data.HasNonLiteralTypeFieldsOrBases = Record.readInt(); - Data.ComputedVisibleConversions = Record.readInt(); - Data.UserProvidedDefaultConstructor = Record.readInt(); - Data.DeclaredSpecialMembers = Record.readInt(); - Data.ImplicitCopyConstructorCanHaveConstParamForVBase = Record.readInt(); - Data.ImplicitCopyConstructorCanHaveConstParamForNonVBase = Record.readInt(); - Data.ImplicitCopyAssignmentHasConstParam = Record.readInt(); - Data.HasDeclaredCopyConstructorWithConstParam = Record.readInt(); - Data.HasDeclaredCopyAssignmentWithConstParam = Record.readInt(); Data.ODRHash = Record.readInt(); Data.HasODRHash = true; @@ -1719,7 +1675,9 @@ void ASTDeclReader::ReadCXXDefinitionData( Data.VBases = ReadGlobalOffset(); Record.readUnresolvedSet(Data.Conversions); - Record.readUnresolvedSet(Data.VisibleConversions); + Data.ComputedVisibleConversions = Record.readInt(); + if (Data.ComputedVisibleConversions) + Record.readUnresolvedSet(Data.VisibleConversions); assert(Data.Definition && "Data.Definition should be already set!"); Data.FirstFriend = ReadDeclID(); @@ -1792,63 +1750,17 @@ void ASTDeclReader::MergeDefinitionData( return; } - // FIXME: Move this out into a .def file? bool DetectedOdrViolation = false; -#define OR_FIELD(Field) DD.Field |= MergeDD.Field; -#define MATCH_FIELD(Field) \ + + #define FIELD(Name, Width, Merge) Merge(Name) + #define MERGE_OR(Field) DD.Field |= MergeDD.Field; + #define NO_MERGE(Field) \ DetectedOdrViolation |= DD.Field != MergeDD.Field; \ - OR_FIELD(Field) - MATCH_FIELD(UserDeclaredConstructor) - MATCH_FIELD(UserDeclaredSpecialMembers) - MATCH_FIELD(Aggregate) - MATCH_FIELD(PlainOldData) - MATCH_FIELD(Empty) - MATCH_FIELD(Polymorphic) - MATCH_FIELD(Abstract) - MATCH_FIELD(IsStandardLayout) - MATCH_FIELD(IsCXX11StandardLayout) - MATCH_FIELD(HasBasesWithFields) - MATCH_FIELD(HasBasesWithNonStaticDataMembers) - MATCH_FIELD(HasPrivateFields) - MATCH_FIELD(HasProtectedFields) - MATCH_FIELD(HasPublicFields) - MATCH_FIELD(HasMutableFields) - MATCH_FIELD(HasVariantMembers) - MATCH_FIELD(HasOnlyCMembers) - MATCH_FIELD(HasInClassInitializer) - MATCH_FIELD(HasUninitializedReferenceMember) - MATCH_FIELD(HasUninitializedFields) - MATCH_FIELD(HasInheritedConstructor) - MATCH_FIELD(HasInheritedAssignment) - MATCH_FIELD(NeedOverloadResolutionForCopyConstructor) - MATCH_FIELD(NeedOverloadResolutionForMoveConstructor) - MATCH_FIELD(NeedOverloadResolutionForMoveAssignment) - MATCH_FIELD(NeedOverloadResolutionForDestructor) - MATCH_FIELD(DefaultedCopyConstructorIsDeleted) - MATCH_FIELD(DefaultedMoveConstructorIsDeleted) - MATCH_FIELD(DefaultedMoveAssignmentIsDeleted) - MATCH_FIELD(DefaultedDestructorIsDeleted) - OR_FIELD(HasTrivialSpecialMembers) - OR_FIELD(HasTrivialSpecialMembersForCall) - OR_FIELD(DeclaredNonTrivialSpecialMembers) - OR_FIELD(DeclaredNonTrivialSpecialMembersForCall) - MATCH_FIELD(HasIrrelevantDestructor) - OR_FIELD(HasConstexprNonCopyMoveConstructor) - OR_FIELD(HasDefaultedDefaultConstructor) - MATCH_FIELD(DefaultedDefaultConstructorIsConstexpr) - OR_FIELD(HasConstexprDefaultConstructor) - MATCH_FIELD(HasNonLiteralTypeFieldsOrBases) - // ComputedVisibleConversions is handled below. - MATCH_FIELD(UserProvidedDefaultConstructor) - OR_FIELD(DeclaredSpecialMembers) - MATCH_FIELD(ImplicitCopyConstructorCanHaveConstParamForVBase) - MATCH_FIELD(ImplicitCopyConstructorCanHaveConstParamForNonVBase) - MATCH_FIELD(ImplicitCopyAssignmentHasConstParam) - OR_FIELD(HasDeclaredCopyConstructorWithConstParam) - OR_FIELD(HasDeclaredCopyAssignmentWithConstParam) - MATCH_FIELD(IsLambda) -#undef OR_FIELD -#undef MATCH_FIELD + MERGE_OR(Field) + #include "clang/AST/CXXRecordDeclDefinitionBits.def" + NO_MERGE(IsLambda) + #undef NO_MERGE + #undef MERGE_OR if (DD.NumBases != MergeDD.NumBases || DD.NumVBases != MergeDD.NumVBases) DetectedOdrViolation = true; diff --git a/clang/lib/Serialization/ASTReaderStmt.cpp b/clang/lib/Serialization/ASTReaderStmt.cpp index 8ab0845d151e5..8ee359646ff41 100644 --- a/clang/lib/Serialization/ASTReaderStmt.cpp +++ b/clang/lib/Serialization/ASTReaderStmt.cpp @@ -2276,6 +2276,11 @@ void ASTStmtReader::VisitOMPTaskLoopSimdDirective(OMPTaskLoopSimdDirective *D) { VisitOMPLoopDirective(D); } +void ASTStmtReader::VisitOMPMasterTaskLoopDirective( + OMPMasterTaskLoopDirective *D) { + VisitOMPLoopDirective(D); +} + void ASTStmtReader::VisitOMPDistributeDirective(OMPDistributeDirective *D) { VisitOMPLoopDirective(D); } @@ -3067,6 +3072,14 @@ Stmt *ASTReader::ReadStmtFromStream(ModuleFile &F) { break; } + case STMT_OMP_MASTER_TASKLOOP_DIRECTIVE: { + unsigned NumClauses = Record[ASTStmtReader::NumStmtFields]; + unsigned CollapsedNum = Record[ASTStmtReader::NumStmtFields + 1]; + S = OMPMasterTaskLoopDirective::CreateEmpty(Context, NumClauses, + CollapsedNum, Empty); + break; + } + case STMT_OMP_DISTRIBUTE_DIRECTIVE: { unsigned NumClauses = Record[ASTStmtReader::NumStmtFields]; unsigned CollapsedNum = Record[ASTStmtReader::NumStmtFields + 1]; diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index 2d0a643877c28..df89e44680d20 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -6141,54 +6141,10 @@ void ASTRecordWriter::AddCXXCtorInitializers( void ASTRecordWriter::AddCXXDefinitionData(const CXXRecordDecl *D) { auto &Data = D->data(); Record->push_back(Data.IsLambda); - Record->push_back(Data.UserDeclaredConstructor); - Record->push_back(Data.UserDeclaredSpecialMembers); - Record->push_back(Data.Aggregate); - Record->push_back(Data.PlainOldData); - Record->push_back(Data.Empty); - Record->push_back(Data.Polymorphic); - Record->push_back(Data.Abstract); - Record->push_back(Data.IsStandardLayout); - Record->push_back(Data.IsCXX11StandardLayout); - Record->push_back(Data.HasBasesWithFields); - Record->push_back(Data.HasBasesWithNonStaticDataMembers); - Record->push_back(Data.HasPrivateFields); - Record->push_back(Data.HasProtectedFields); - Record->push_back(Data.HasPublicFields); - Record->push_back(Data.HasMutableFields); - Record->push_back(Data.HasVariantMembers); - Record->push_back(Data.HasOnlyCMembers); - Record->push_back(Data.HasInClassInitializer); - Record->push_back(Data.HasUninitializedReferenceMember); - Record->push_back(Data.HasUninitializedFields); - Record->push_back(Data.HasInheritedConstructor); - Record->push_back(Data.HasInheritedAssignment); - Record->push_back(Data.NeedOverloadResolutionForCopyConstructor); - Record->push_back(Data.NeedOverloadResolutionForMoveConstructor); - Record->push_back(Data.NeedOverloadResolutionForMoveAssignment); - Record->push_back(Data.NeedOverloadResolutionForDestructor); - Record->push_back(Data.DefaultedCopyConstructorIsDeleted); - Record->push_back(Data.DefaultedMoveConstructorIsDeleted); - Record->push_back(Data.DefaultedMoveAssignmentIsDeleted); - Record->push_back(Data.DefaultedDestructorIsDeleted); - Record->push_back(Data.HasTrivialSpecialMembers); - Record->push_back(Data.HasTrivialSpecialMembersForCall); - Record->push_back(Data.DeclaredNonTrivialSpecialMembers); - Record->push_back(Data.DeclaredNonTrivialSpecialMembersForCall); - Record->push_back(Data.HasIrrelevantDestructor); - Record->push_back(Data.HasConstexprNonCopyMoveConstructor); - Record->push_back(Data.HasDefaultedDefaultConstructor); - Record->push_back(Data.DefaultedDefaultConstructorIsConstexpr); - Record->push_back(Data.HasConstexprDefaultConstructor); - Record->push_back(Data.HasNonLiteralTypeFieldsOrBases); - Record->push_back(Data.ComputedVisibleConversions); - Record->push_back(Data.UserProvidedDefaultConstructor); - Record->push_back(Data.DeclaredSpecialMembers); - Record->push_back(Data.ImplicitCopyConstructorCanHaveConstParamForVBase); - Record->push_back(Data.ImplicitCopyConstructorCanHaveConstParamForNonVBase); - Record->push_back(Data.ImplicitCopyAssignmentHasConstParam); - Record->push_back(Data.HasDeclaredCopyConstructorWithConstParam); - Record->push_back(Data.HasDeclaredCopyAssignmentWithConstParam); + + #define FIELD(Name, Width, Merge) \ + Record->push_back(Data.Name); + #include "clang/AST/CXXRecordDeclDefinitionBits.def" // getODRHash will compute the ODRHash if it has not been previously computed. Record->push_back(D->getODRHash()); @@ -6210,7 +6166,9 @@ void ASTRecordWriter::AddCXXDefinitionData(const CXXRecordDecl *D) { AddCXXBaseSpecifiers(Data.vbases()); AddUnresolvedSet(Data.Conversions.get(*Writer->Context)); - AddUnresolvedSet(Data.VisibleConversions.get(*Writer->Context)); + Record->push_back(Data.ComputedVisibleConversions); + if (Data.ComputedVisibleConversions) + AddUnresolvedSet(Data.VisibleConversions.get(*Writer->Context)); // Data.Definition is the owning decl, no need to write it. AddDeclRef(D->getFirstFriend()); diff --git a/clang/lib/Serialization/ASTWriterStmt.cpp b/clang/lib/Serialization/ASTWriterStmt.cpp index a6927f32c0eed..7cf4ab80722d1 100644 --- a/clang/lib/Serialization/ASTWriterStmt.cpp +++ b/clang/lib/Serialization/ASTWriterStmt.cpp @@ -2223,6 +2223,12 @@ void ASTStmtWriter::VisitOMPTaskLoopSimdDirective(OMPTaskLoopSimdDirective *D) { Code = serialization::STMT_OMP_TASKLOOP_SIMD_DIRECTIVE; } +void ASTStmtWriter::VisitOMPMasterTaskLoopDirective( + OMPMasterTaskLoopDirective *D) { + VisitOMPLoopDirective(D); + Code = serialization::STMT_OMP_MASTER_TASKLOOP_DIRECTIVE; +} + void ASTStmtWriter::VisitOMPDistributeDirective(OMPDistributeDirective *D) { VisitOMPLoopDirective(D); Code = serialization::STMT_OMP_DISTRIBUTE_DIRECTIVE; diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp index 2e0625640889c..799e26e9b1c0c 100644 --- a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp +++ b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp @@ -1265,6 +1265,7 @@ void ExprEngine::Visit(const Stmt *S, ExplodedNode *Pred, case Stmt::OMPCancelDirectiveClass: case Stmt::OMPTaskLoopDirectiveClass: case Stmt::OMPTaskLoopSimdDirectiveClass: + case Stmt::OMPMasterTaskLoopDirectiveClass: case Stmt::OMPDistributeDirectiveClass: case Stmt::OMPDistributeParallelForDirectiveClass: case Stmt::OMPDistributeParallelForSimdDirectiveClass: diff --git a/clang/lib/Tooling/CMakeLists.txt b/clang/lib/Tooling/CMakeLists.txt index 81185d7417d79..05061f0a10a80 100644 --- a/clang/lib/Tooling/CMakeLists.txt +++ b/clang/lib/Tooling/CMakeLists.txt @@ -9,6 +9,7 @@ add_subdirectory(Refactoring) add_subdirectory(ASTDiff) add_subdirectory(Syntax) add_subdirectory(DependencyScanning) +add_subdirectory(Transformer) add_clang_library(clangTooling AllTUsExecution.cpp diff --git a/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp b/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp index 35ecbd4a7fb4c..7436c72563277 100644 --- a/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp +++ b/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp @@ -122,14 +122,12 @@ DependencyScanningFilesystemSharedCache::get(StringRef Key) { return It.first->getValue(); } -llvm::ErrorOr -DependencyScanningWorkerFilesystem::status(const Twine &Path) { - SmallString<256> OwnedFilename; - StringRef Filename = Path.toStringRef(OwnedFilename); - - // Check the local cache first. - if (const CachedFileSystemEntry *Entry = getCachedEntry(Filename)) - return Entry->getStatus(); +llvm::ErrorOr +DependencyScanningWorkerFilesystem::getOrCreateFileSystemEntry( + const StringRef Filename) { + if (const CachedFileSystemEntry *Entry = getCachedEntry(Filename)) { + return Entry; + } // FIXME: Handle PCM/PCH files. // FIXME: Handle module map files. @@ -160,7 +158,18 @@ DependencyScanningWorkerFilesystem::status(const Twine &Path) { // Store the result in the local cache. setCachedEntry(Filename, Result); - return Result->getStatus(); + return Result; +} + +llvm::ErrorOr +DependencyScanningWorkerFilesystem::status(const Twine &Path) { + SmallString<256> OwnedFilename; + StringRef Filename = Path.toStringRef(OwnedFilename); + const llvm::ErrorOr Result = + getOrCreateFileSystemEntry(Filename); + if (!Result) + return Result.getError(); + return (*Result)->getStatus(); } namespace { @@ -217,30 +226,9 @@ DependencyScanningWorkerFilesystem::openFileForRead(const Twine &Path) { SmallString<256> OwnedFilename; StringRef Filename = Path.toStringRef(OwnedFilename); - // Check the local cache first. - if (const CachedFileSystemEntry *Entry = getCachedEntry(Filename)) - return createFile(Entry, PPSkipMappings); - - // FIXME: Handle PCM/PCH files. - // FIXME: Handle module map files. - - bool KeepOriginalSource = IgnoredFiles.count(Filename); - DependencyScanningFilesystemSharedCache::SharedFileSystemEntry - &SharedCacheEntry = SharedCache.get(Filename); - const CachedFileSystemEntry *Result; - { - std::unique_lock LockGuard(SharedCacheEntry.ValueLock); - CachedFileSystemEntry &CacheEntry = SharedCacheEntry.Value; - - if (!CacheEntry.isValid()) { - CacheEntry = CachedFileSystemEntry::createFileEntry( - Filename, getUnderlyingFS(), !KeepOriginalSource); - } - - Result = &CacheEntry; - } - - // Store the result in the local cache. - setCachedEntry(Filename, Result); - return createFile(Result, PPSkipMappings); + const llvm::ErrorOr Result = + getOrCreateFileSystemEntry(Filename); + if (!Result) + return Result.getError(); + return createFile(Result.get(), PPSkipMappings); } diff --git a/clang/lib/Tooling/Refactoring/CMakeLists.txt b/clang/lib/Tooling/Refactoring/CMakeLists.txt index e3961db2841ec..db889d2a06b5b 100644 --- a/clang/lib/Tooling/Refactoring/CMakeLists.txt +++ b/clang/lib/Tooling/Refactoring/CMakeLists.txt @@ -6,17 +6,12 @@ add_clang_library(clangToolingRefactoring AtomicChange.cpp Extract/Extract.cpp Extract/SourceExtraction.cpp - RangeSelector.cpp RefactoringActions.cpp Rename/RenamingAction.cpp Rename/SymbolOccurrences.cpp Rename/USRFinder.cpp Rename/USRFindingAction.cpp Rename/USRLocFinder.cpp - SourceCode.cpp - SourceCodeBuilders.cpp - Stencil.cpp - Transformer.cpp LINK_LIBS clangAST diff --git a/clang/lib/Tooling/Tooling.cpp b/clang/lib/Tooling/Tooling.cpp index 0aae4906099bb..60d4dce0cb981 100644 --- a/clang/lib/Tooling/Tooling.cpp +++ b/clang/lib/Tooling/Tooling.cpp @@ -91,7 +91,34 @@ static const llvm::opt::ArgStringList *getCC1Arguments( // We expect to get back exactly one Command job, if we didn't something // failed. Extract that job from the Compilation. const driver::JobList &Jobs = Compilation->getJobs(); - if (Jobs.size() != 1 || !isa(*Jobs.begin())) { + const driver::ActionList &Actions = Compilation->getActions(); + bool OffloadCompilation = false; + if (Jobs.size() > 1) { + for (auto A : Actions){ + // On MacOSX real actions may end up being wrapped in BindArchAction + if (isa(A)) + A = *A->input_begin(); + if (isa(A)) { + // Offload compilation has 2 top-level actions, one (at the front) is + // the original host compilation and the other is offload action + // composed of at least one device compilation. For such case, general + // tooling will consider host-compilation only. For tooling on device + // compilation, device compilation only option, such as + // `--cuda-device-only`, needs specifying. + assert(Actions.size() == 2); + assert( + isa(Actions.front()) || + // On MacOSX real actions may end up being wrapped in + // BindArchAction. + (isa(Actions.front()) && + isa(*Actions.front()->input_begin()))); + OffloadCompilation = true; + break; + } + } + } + if (Jobs.size() == 0 || !isa(*Jobs.begin()) || + (Jobs.size() > 1 && !OffloadCompilation)) { SmallString<256> error_msg; llvm::raw_svector_ostream error_stream(error_msg); Jobs.Print(error_stream, "; ", true); diff --git a/clang/lib/Tooling/Transformer/CMakeLists.txt b/clang/lib/Tooling/Transformer/CMakeLists.txt new file mode 100644 index 0000000000000..2e9ba58862cf5 --- /dev/null +++ b/clang/lib/Tooling/Transformer/CMakeLists.txt @@ -0,0 +1,17 @@ +set(LLVM_LINK_COMPONENTS Support) + +add_clang_library(clangTransformer + RangeSelector.cpp + SourceCode.cpp + SourceCodeBuilders.cpp + Stencil.cpp + Transformer.cpp + + LINK_LIBS + clangAST + clangASTMatchers + clangBasic + clangLex + clangToolingCore + clangToolingRefactoring + ) diff --git a/clang/lib/Tooling/Refactoring/RangeSelector.cpp b/clang/lib/Tooling/Transformer/RangeSelector.cpp similarity index 99% rename from clang/lib/Tooling/Refactoring/RangeSelector.cpp rename to clang/lib/Tooling/Transformer/RangeSelector.cpp index 972c7e65540a9..496c2d987c40f 100644 --- a/clang/lib/Tooling/Refactoring/RangeSelector.cpp +++ b/clang/lib/Tooling/Transformer/RangeSelector.cpp @@ -6,12 +6,12 @@ // //===----------------------------------------------------------------------===// -#include "clang/Tooling/Refactoring/RangeSelector.h" +#include "clang/Tooling/Transformer/RangeSelector.h" #include "clang/AST/Expr.h" #include "clang/ASTMatchers/ASTMatchFinder.h" #include "clang/Basic/SourceLocation.h" #include "clang/Lex/Lexer.h" -#include "clang/Tooling/Refactoring/SourceCode.h" +#include "clang/Tooling/Transformer/SourceCode.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/Errc.h" #include "llvm/Support/Error.h" diff --git a/clang/lib/Tooling/Refactoring/SourceCode.cpp b/clang/lib/Tooling/Transformer/SourceCode.cpp similarity index 98% rename from clang/lib/Tooling/Refactoring/SourceCode.cpp rename to clang/lib/Tooling/Transformer/SourceCode.cpp index cee8f43f3e609..836401d1e605c 100644 --- a/clang/lib/Tooling/Refactoring/SourceCode.cpp +++ b/clang/lib/Tooling/Transformer/SourceCode.cpp @@ -9,7 +9,7 @@ // This file provides functions that simplify extraction of source code. // //===----------------------------------------------------------------------===// -#include "clang/Tooling/Refactoring/SourceCode.h" +#include "clang/Tooling/Transformer/SourceCode.h" #include "clang/Lex/Lexer.h" using namespace clang; diff --git a/clang/lib/Tooling/Refactoring/SourceCodeBuilders.cpp b/clang/lib/Tooling/Transformer/SourceCodeBuilders.cpp similarity index 98% rename from clang/lib/Tooling/Refactoring/SourceCodeBuilders.cpp rename to clang/lib/Tooling/Transformer/SourceCodeBuilders.cpp index 2499c0f1eb398..56ec45e8fd1dc 100644 --- a/clang/lib/Tooling/Refactoring/SourceCodeBuilders.cpp +++ b/clang/lib/Tooling/Transformer/SourceCodeBuilders.cpp @@ -6,11 +6,11 @@ // //===----------------------------------------------------------------------===// -#include "clang/Tooling/Refactoring/SourceCodeBuilders.h" +#include "clang/Tooling/Transformer/SourceCodeBuilders.h" #include "clang/AST/ASTContext.h" #include "clang/AST/Expr.h" #include "clang/AST/ExprCXX.h" -#include "clang/Tooling/Refactoring/SourceCode.h" +#include "clang/Tooling/Transformer/SourceCode.h" #include "llvm/ADT/Twine.h" #include diff --git a/clang/lib/Tooling/Refactoring/Stencil.cpp b/clang/lib/Tooling/Transformer/Stencil.cpp similarity index 98% rename from clang/lib/Tooling/Refactoring/Stencil.cpp rename to clang/lib/Tooling/Transformer/Stencil.cpp index ebfe78099db21..82fde2bc4db02 100644 --- a/clang/lib/Tooling/Refactoring/Stencil.cpp +++ b/clang/lib/Tooling/Transformer/Stencil.cpp @@ -6,15 +6,15 @@ // //===----------------------------------------------------------------------===// -#include "clang/Tooling/Refactoring/Stencil.h" +#include "clang/Tooling/Transformer/Stencil.h" #include "clang/AST/ASTContext.h" #include "clang/AST/ASTTypeTraits.h" #include "clang/AST/Expr.h" #include "clang/ASTMatchers/ASTMatchFinder.h" #include "clang/ASTMatchers/ASTMatchers.h" #include "clang/Lex/Lexer.h" -#include "clang/Tooling/Refactoring/SourceCode.h" -#include "clang/Tooling/Refactoring/SourceCodeBuilders.h" +#include "clang/Tooling/Transformer/SourceCode.h" +#include "clang/Tooling/Transformer/SourceCodeBuilders.h" #include "llvm/ADT/Twine.h" #include "llvm/Support/Errc.h" #include diff --git a/clang/lib/Tooling/Refactoring/Transformer.cpp b/clang/lib/Tooling/Transformer/Transformer.cpp similarity index 98% rename from clang/lib/Tooling/Refactoring/Transformer.cpp rename to clang/lib/Tooling/Transformer/Transformer.cpp index 905d5944449c9..1aecf6ab8e140 100644 --- a/clang/lib/Tooling/Refactoring/Transformer.cpp +++ b/clang/lib/Tooling/Transformer/Transformer.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "clang/Tooling/Refactoring/Transformer.h" +#include "clang/Tooling/Transformer/Transformer.h" #include "clang/AST/Expr.h" #include "clang/ASTMatchers/ASTMatchFinder.h" #include "clang/ASTMatchers/ASTMatchers.h" @@ -14,15 +14,15 @@ #include "clang/Basic/SourceLocation.h" #include "clang/Rewrite/Core/Rewriter.h" #include "clang/Tooling/Refactoring/AtomicChange.h" -#include "clang/Tooling/Refactoring/SourceCode.h" +#include "clang/Tooling/Transformer/SourceCode.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/Errc.h" #include "llvm/Support/Error.h" +#include #include #include #include -#include using namespace clang; using namespace tooling; diff --git a/clang/test/ClangScanDeps/Inputs/headerwithdirnamefollowedbyinclude.json b/clang/test/ClangScanDeps/Inputs/headerwithdirnamefollowedbyinclude.json new file mode 100644 index 0000000000000..de7759d0b110c --- /dev/null +++ b/clang/test/ClangScanDeps/Inputs/headerwithdirnamefollowedbyinclude.json @@ -0,0 +1,7 @@ +[ + { + "directory": "DIR", + "command": "clang -c -IDIR -IInputs DIR/headerwithdirname_input.cpp", + "file": "DIR/headerwithdirname_input.cpp" + } +] diff --git a/clang/test/ClangScanDeps/headerwithdirnamefollowedbyinclude.cpp b/clang/test/ClangScanDeps/headerwithdirnamefollowedbyinclude.cpp new file mode 100644 index 0000000000000..e8e8a69e82e26 --- /dev/null +++ b/clang/test/ClangScanDeps/headerwithdirnamefollowedbyinclude.cpp @@ -0,0 +1,21 @@ +// RUN: rm -rf %t.dir +// RUN: rm -rf %t.dir/foodir +// RUN: rm -rf %t.cdb + +// RUN: mkdir -p %t.dir +// RUN: mkdir -p %t.dir/foodir + +// RUN: cp %S/Inputs/header.h %t.dir/foodir/foodirheader.h +// RUN: cp %s %t.dir/headerwithdirname_input.cpp +// RUN: mkdir %t.dir/Inputs +// RUN: cp %S/Inputs/foodir %t.dir/Inputs/foodir +// RUN: sed -e "s|DIR|%/t.dir|g" %S/Inputs/headerwithdirnamefollowedbyinclude.json > %t.cdb +// +// RUN: clang-scan-deps -compilation-database %t.cdb -j 1 | FileCheck %s + +#include +#include "foodir/foodirheader.h" + +// CHECK: headerwithdirname_input.o +// CHECK-NEXT: headerwithdirname_input.cpp +// CHECK-NEXT: Inputs{{/|\\}}foodir diff --git a/clang/test/CodeGen/aarch64-neon-intrinsics.c b/clang/test/CodeGen/aarch64-neon-intrinsics.c index 2eddd03786a52..5e35a2dc4dd1c 100644 --- a/clang/test/CodeGen/aarch64-neon-intrinsics.c +++ b/clang/test/CodeGen/aarch64-neon-intrinsics.c @@ -1,5 +1,6 @@ // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon \ -// RUN: -fallow-half-arguments-and-returns -S -disable-O0-optnone -emit-llvm -o - %s \ +// RUN: -fallow-half-arguments-and-returns -S -disable-O0-optnone \ +// RUN: -flax-vector-conversions=none -emit-llvm -o - %s \ // RUN: | opt -S -mem2reg \ // RUN: | FileCheck %s @@ -406,7 +407,7 @@ int8x8_t test_vmla_s8(int8x8_t v1, int8x8_t v2, int8x8_t v3) { // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[ADD_I]] to <8 x i8> // CHECK: ret <8 x i8> [[TMP0]] int8x8_t test_vmla_s16(int16x4_t v1, int16x4_t v2, int16x4_t v3) { - return vmla_s16(v1, v2, v3); + return (int8x8_t)vmla_s16(v1, v2, v3); } // CHECK-LABEL: @test_vmla_s32( @@ -527,7 +528,7 @@ int8x8_t test_vmls_s8(int8x8_t v1, int8x8_t v2, int8x8_t v3) { // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SUB_I]] to <8 x i8> // CHECK: ret <8 x i8> [[TMP0]] int8x8_t test_vmls_s16(int16x4_t v1, int16x4_t v2, int16x4_t v3) { - return vmls_s16(v1, v2, v3); + return (int8x8_t)vmls_s16(v1, v2, v3); } // CHECK-LABEL: @test_vmls_s32( @@ -978,7 +979,7 @@ int8x8_t test_vbsl_s8(uint8x8_t v1, int8x8_t v2, int8x8_t v3) { // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[VBSL5_I]] to <8 x i8> // CHECK: ret <8 x i8> [[TMP4]] int8x8_t test_vbsl_s16(uint16x4_t v1, int16x4_t v2, int16x4_t v3) { - return vbsl_s16(v1, v2, v3); + return (int8x8_t)vbsl_s16(v1, v2, v3); } // CHECK-LABEL: @test_vbsl_s32( @@ -1003,7 +1004,7 @@ int32x2_t test_vbsl_s32(uint32x2_t v1, int32x2_t v2, int32x2_t v3) { // CHECK: [[VBSL4_I:%.*]] = and <1 x i64> [[TMP3]], %v3 // CHECK: [[VBSL5_I:%.*]] = or <1 x i64> [[VBSL3_I]], [[VBSL4_I]] // CHECK: ret <1 x i64> [[VBSL5_I]] -uint64x1_t test_vbsl_s64(uint64x1_t v1, uint64x1_t v2, uint64x1_t v3) { +int64x1_t test_vbsl_s64(uint64x1_t v1, int64x1_t v2, int64x1_t v3) { return vbsl_s64(v1, v2, v3); } @@ -1057,19 +1058,18 @@ uint64x1_t test_vbsl_u64(uint64x1_t v1, uint64x1_t v2, uint64x1_t v3) { } // CHECK-LABEL: @test_vbsl_f32( -// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %v1 to <2 x i32> -// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8> +// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v1 to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x float> %v2 to <8 x i8> // CHECK: [[TMP3:%.*]] = bitcast <2 x float> %v3 to <8 x i8> // CHECK: [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> // CHECK: [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32> -// CHECK: [[VBSL3_I:%.*]] = and <2 x i32> [[TMP0]], [[VBSL1_I]] -// CHECK: [[TMP4:%.*]] = xor <2 x i32> [[TMP0]], +// CHECK: [[VBSL3_I:%.*]] = and <2 x i32> %v1, [[VBSL1_I]] +// CHECK: [[TMP4:%.*]] = xor <2 x i32> %v1, // CHECK: [[VBSL4_I:%.*]] = and <2 x i32> [[TMP4]], [[VBSL2_I]] // CHECK: [[VBSL5_I:%.*]] = or <2 x i32> [[VBSL3_I]], [[VBSL4_I]] // CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[VBSL5_I]] to <2 x float> // CHECK: ret <2 x float> [[TMP5]] -float32x2_t test_vbsl_f32(float32x2_t v1, float32x2_t v2, float32x2_t v3) { +float32x2_t test_vbsl_f32(uint32x2_t v1, float32x2_t v2, float32x2_t v3) { return vbsl_f32(v1, v2, v3); } @@ -4661,7 +4661,7 @@ int64x2_t test_vshlq_n_s64(int64x2_t a) { // CHECK-LABEL: @test_vshl_n_u8( // CHECK: [[VSHL_N:%.*]] = shl <8 x i8> %a, // CHECK: ret <8 x i8> [[VSHL_N]] -int8x8_t test_vshl_n_u8(int8x8_t a) { +uint8x8_t test_vshl_n_u8(uint8x8_t a) { return vshl_n_u8(a, 3); } @@ -4670,7 +4670,7 @@ int8x8_t test_vshl_n_u8(int8x8_t a) { // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> // CHECK: [[VSHL_N:%.*]] = shl <4 x i16> [[TMP1]], // CHECK: ret <4 x i16> [[VSHL_N]] -int16x4_t test_vshl_n_u16(int16x4_t a) { +uint16x4_t test_vshl_n_u16(uint16x4_t a) { return vshl_n_u16(a, 3); } @@ -4679,14 +4679,14 @@ int16x4_t test_vshl_n_u16(int16x4_t a) { // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> // CHECK: [[VSHL_N:%.*]] = shl <2 x i32> [[TMP1]], // CHECK: ret <2 x i32> [[VSHL_N]] -int32x2_t test_vshl_n_u32(int32x2_t a) { +uint32x2_t test_vshl_n_u32(uint32x2_t a) { return vshl_n_u32(a, 3); } // CHECK-LABEL: @test_vshlq_n_u8( // CHECK: [[VSHL_N:%.*]] = shl <16 x i8> %a, // CHECK: ret <16 x i8> [[VSHL_N]] -int8x16_t test_vshlq_n_u8(int8x16_t a) { +uint8x16_t test_vshlq_n_u8(uint8x16_t a) { return vshlq_n_u8(a, 3); } @@ -4695,7 +4695,7 @@ int8x16_t test_vshlq_n_u8(int8x16_t a) { // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[VSHL_N:%.*]] = shl <8 x i16> [[TMP1]], // CHECK: ret <8 x i16> [[VSHL_N]] -int16x8_t test_vshlq_n_u16(int16x8_t a) { +uint16x8_t test_vshlq_n_u16(uint16x8_t a) { return vshlq_n_u16(a, 3); } @@ -4704,7 +4704,7 @@ int16x8_t test_vshlq_n_u16(int16x8_t a) { // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[VSHL_N:%.*]] = shl <4 x i32> [[TMP1]], // CHECK: ret <4 x i32> [[VSHL_N]] -int32x4_t test_vshlq_n_u32(int32x4_t a) { +uint32x4_t test_vshlq_n_u32(uint32x4_t a) { return vshlq_n_u32(a, 3); } @@ -4713,7 +4713,7 @@ int32x4_t test_vshlq_n_u32(int32x4_t a) { // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> // CHECK: [[VSHL_N:%.*]] = shl <2 x i64> [[TMP1]], // CHECK: ret <2 x i64> [[VSHL_N]] -int64x2_t test_vshlq_n_u64(int64x2_t a) { +uint64x2_t test_vshlq_n_u64(uint64x2_t a) { return vshlq_n_u64(a, 3); } @@ -4779,7 +4779,7 @@ int64x2_t test_vshrq_n_s64(int64x2_t a) { // CHECK-LABEL: @test_vshr_n_u8( // CHECK: [[VSHR_N:%.*]] = lshr <8 x i8> %a, // CHECK: ret <8 x i8> [[VSHR_N]] -int8x8_t test_vshr_n_u8(int8x8_t a) { +uint8x8_t test_vshr_n_u8(uint8x8_t a) { return vshr_n_u8(a, 3); } @@ -4788,7 +4788,7 @@ int8x8_t test_vshr_n_u8(int8x8_t a) { // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> // CHECK: [[VSHR_N:%.*]] = lshr <4 x i16> [[TMP1]], // CHECK: ret <4 x i16> [[VSHR_N]] -int16x4_t test_vshr_n_u16(int16x4_t a) { +uint16x4_t test_vshr_n_u16(uint16x4_t a) { return vshr_n_u16(a, 3); } @@ -4797,14 +4797,14 @@ int16x4_t test_vshr_n_u16(int16x4_t a) { // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> // CHECK: [[VSHR_N:%.*]] = lshr <2 x i32> [[TMP1]], // CHECK: ret <2 x i32> [[VSHR_N]] -int32x2_t test_vshr_n_u32(int32x2_t a) { +uint32x2_t test_vshr_n_u32(uint32x2_t a) { return vshr_n_u32(a, 3); } // CHECK-LABEL: @test_vshrq_n_u8( // CHECK: [[VSHR_N:%.*]] = lshr <16 x i8> %a, // CHECK: ret <16 x i8> [[VSHR_N]] -int8x16_t test_vshrq_n_u8(int8x16_t a) { +uint8x16_t test_vshrq_n_u8(uint8x16_t a) { return vshrq_n_u8(a, 3); } @@ -4813,7 +4813,7 @@ int8x16_t test_vshrq_n_u8(int8x16_t a) { // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[VSHR_N:%.*]] = lshr <8 x i16> [[TMP1]], // CHECK: ret <8 x i16> [[VSHR_N]] -int16x8_t test_vshrq_n_u16(int16x8_t a) { +uint16x8_t test_vshrq_n_u16(uint16x8_t a) { return vshrq_n_u16(a, 3); } @@ -4822,7 +4822,7 @@ int16x8_t test_vshrq_n_u16(int16x8_t a) { // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[VSHR_N:%.*]] = lshr <4 x i32> [[TMP1]], // CHECK: ret <4 x i32> [[VSHR_N]] -int32x4_t test_vshrq_n_u32(int32x4_t a) { +uint32x4_t test_vshrq_n_u32(uint32x4_t a) { return vshrq_n_u32(a, 3); } @@ -4831,7 +4831,7 @@ int32x4_t test_vshrq_n_u32(int32x4_t a) { // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> // CHECK: [[VSHR_N:%.*]] = lshr <2 x i64> [[TMP1]], // CHECK: ret <2 x i64> [[VSHR_N]] -int64x2_t test_vshrq_n_u64(int64x2_t a) { +uint64x2_t test_vshrq_n_u64(uint64x2_t a) { return vshrq_n_u64(a, 3); } @@ -4915,7 +4915,7 @@ int64x2_t test_vsraq_n_s64(int64x2_t a, int64x2_t b) { // CHECK: [[VSRA_N:%.*]] = lshr <8 x i8> %b, // CHECK: [[TMP0:%.*]] = add <8 x i8> %a, [[VSRA_N]] // CHECK: ret <8 x i8> [[TMP0]] -int8x8_t test_vsra_n_u8(int8x8_t a, int8x8_t b) { +uint8x8_t test_vsra_n_u8(uint8x8_t a, uint8x8_t b) { return vsra_n_u8(a, b, 3); } @@ -4927,7 +4927,7 @@ int8x8_t test_vsra_n_u8(int8x8_t a, int8x8_t b) { // CHECK: [[VSRA_N:%.*]] = lshr <4 x i16> [[TMP3]], // CHECK: [[TMP4:%.*]] = add <4 x i16> [[TMP2]], [[VSRA_N]] // CHECK: ret <4 x i16> [[TMP4]] -int16x4_t test_vsra_n_u16(int16x4_t a, int16x4_t b) { +uint16x4_t test_vsra_n_u16(uint16x4_t a, uint16x4_t b) { return vsra_n_u16(a, b, 3); } @@ -4939,7 +4939,7 @@ int16x4_t test_vsra_n_u16(int16x4_t a, int16x4_t b) { // CHECK: [[VSRA_N:%.*]] = lshr <2 x i32> [[TMP3]], // CHECK: [[TMP4:%.*]] = add <2 x i32> [[TMP2]], [[VSRA_N]] // CHECK: ret <2 x i32> [[TMP4]] -int32x2_t test_vsra_n_u32(int32x2_t a, int32x2_t b) { +uint32x2_t test_vsra_n_u32(uint32x2_t a, uint32x2_t b) { return vsra_n_u32(a, b, 3); } @@ -4947,7 +4947,7 @@ int32x2_t test_vsra_n_u32(int32x2_t a, int32x2_t b) { // CHECK: [[VSRA_N:%.*]] = lshr <16 x i8> %b, // CHECK: [[TMP0:%.*]] = add <16 x i8> %a, [[VSRA_N]] // CHECK: ret <16 x i8> [[TMP0]] -int8x16_t test_vsraq_n_u8(int8x16_t a, int8x16_t b) { +uint8x16_t test_vsraq_n_u8(uint8x16_t a, uint8x16_t b) { return vsraq_n_u8(a, b, 3); } @@ -4959,7 +4959,7 @@ int8x16_t test_vsraq_n_u8(int8x16_t a, int8x16_t b) { // CHECK: [[VSRA_N:%.*]] = lshr <8 x i16> [[TMP3]], // CHECK: [[TMP4:%.*]] = add <8 x i16> [[TMP2]], [[VSRA_N]] // CHECK: ret <8 x i16> [[TMP4]] -int16x8_t test_vsraq_n_u16(int16x8_t a, int16x8_t b) { +uint16x8_t test_vsraq_n_u16(uint16x8_t a, uint16x8_t b) { return vsraq_n_u16(a, b, 3); } @@ -4971,7 +4971,7 @@ int16x8_t test_vsraq_n_u16(int16x8_t a, int16x8_t b) { // CHECK: [[VSRA_N:%.*]] = lshr <4 x i32> [[TMP3]], // CHECK: [[TMP4:%.*]] = add <4 x i32> [[TMP2]], [[VSRA_N]] // CHECK: ret <4 x i32> [[TMP4]] -int32x4_t test_vsraq_n_u32(int32x4_t a, int32x4_t b) { +uint32x4_t test_vsraq_n_u32(uint32x4_t a, uint32x4_t b) { return vsraq_n_u32(a, b, 3); } @@ -4983,7 +4983,7 @@ int32x4_t test_vsraq_n_u32(int32x4_t a, int32x4_t b) { // CHECK: [[VSRA_N:%.*]] = lshr <2 x i64> [[TMP3]], // CHECK: [[TMP4:%.*]] = add <2 x i64> [[TMP2]], [[VSRA_N]] // CHECK: ret <2 x i64> [[TMP4]] -int64x2_t test_vsraq_n_u64(int64x2_t a, int64x2_t b) { +uint64x2_t test_vsraq_n_u64(uint64x2_t a, uint64x2_t b) { return vsraq_n_u64(a, b, 3); } @@ -5049,7 +5049,7 @@ int64x2_t test_vrshrq_n_s64(int64x2_t a) { // CHECK-LABEL: @test_vrshr_n_u8( // CHECK: [[VRSHR_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %a, <8 x i8> ) // CHECK: ret <8 x i8> [[VRSHR_N]] -int8x8_t test_vrshr_n_u8(int8x8_t a) { +uint8x8_t test_vrshr_n_u8(uint8x8_t a) { return vrshr_n_u8(a, 3); } @@ -5058,7 +5058,7 @@ int8x8_t test_vrshr_n_u8(int8x8_t a) { // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> // CHECK: [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> ) // CHECK: ret <4 x i16> [[VRSHR_N1]] -int16x4_t test_vrshr_n_u16(int16x4_t a) { +uint16x4_t test_vrshr_n_u16(uint16x4_t a) { return vrshr_n_u16(a, 3); } @@ -5067,14 +5067,14 @@ int16x4_t test_vrshr_n_u16(int16x4_t a) { // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> // CHECK: [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> ) // CHECK: ret <2 x i32> [[VRSHR_N1]] -int32x2_t test_vrshr_n_u32(int32x2_t a) { +uint32x2_t test_vrshr_n_u32(uint32x2_t a) { return vrshr_n_u32(a, 3); } // CHECK-LABEL: @test_vrshrq_n_u8( // CHECK: [[VRSHR_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %a, <16 x i8> ) // CHECK: ret <16 x i8> [[VRSHR_N]] -int8x16_t test_vrshrq_n_u8(int8x16_t a) { +uint8x16_t test_vrshrq_n_u8(uint8x16_t a) { return vrshrq_n_u8(a, 3); } @@ -5083,7 +5083,7 @@ int8x16_t test_vrshrq_n_u8(int8x16_t a) { // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> ) // CHECK: ret <8 x i16> [[VRSHR_N1]] -int16x8_t test_vrshrq_n_u16(int16x8_t a) { +uint16x8_t test_vrshrq_n_u16(uint16x8_t a) { return vrshrq_n_u16(a, 3); } @@ -5092,7 +5092,7 @@ int16x8_t test_vrshrq_n_u16(int16x8_t a) { // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> ) // CHECK: ret <4 x i32> [[VRSHR_N1]] -int32x4_t test_vrshrq_n_u32(int32x4_t a) { +uint32x4_t test_vrshrq_n_u32(uint32x4_t a) { return vrshrq_n_u32(a, 3); } @@ -5101,7 +5101,7 @@ int32x4_t test_vrshrq_n_u32(int32x4_t a) { // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> // CHECK: [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> ) // CHECK: ret <2 x i64> [[VRSHR_N1]] -int64x2_t test_vrshrq_n_u64(int64x2_t a) { +uint64x2_t test_vrshrq_n_u64(uint64x2_t a) { return vrshrq_n_u64(a, 3); } @@ -5185,7 +5185,7 @@ int64x2_t test_vrsraq_n_s64(int64x2_t a, int64x2_t b) { // CHECK: [[VRSHR_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %b, <8 x i8> ) // CHECK: [[TMP0:%.*]] = add <8 x i8> %a, [[VRSHR_N]] // CHECK: ret <8 x i8> [[TMP0]] -int8x8_t test_vrsra_n_u8(int8x8_t a, int8x8_t b) { +uint8x8_t test_vrsra_n_u8(uint8x8_t a, uint8x8_t b) { return vrsra_n_u8(a, b, 3); } @@ -5197,7 +5197,7 @@ int8x8_t test_vrsra_n_u8(int8x8_t a, int8x8_t b) { // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> // CHECK: [[TMP3:%.*]] = add <4 x i16> [[TMP2]], [[VRSHR_N1]] // CHECK: ret <4 x i16> [[TMP3]] -int16x4_t test_vrsra_n_u16(int16x4_t a, int16x4_t b) { +uint16x4_t test_vrsra_n_u16(uint16x4_t a, uint16x4_t b) { return vrsra_n_u16(a, b, 3); } @@ -5209,7 +5209,7 @@ int16x4_t test_vrsra_n_u16(int16x4_t a, int16x4_t b) { // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> // CHECK: [[TMP3:%.*]] = add <2 x i32> [[TMP2]], [[VRSHR_N1]] // CHECK: ret <2 x i32> [[TMP3]] -int32x2_t test_vrsra_n_u32(int32x2_t a, int32x2_t b) { +uint32x2_t test_vrsra_n_u32(uint32x2_t a, uint32x2_t b) { return vrsra_n_u32(a, b, 3); } @@ -5217,7 +5217,7 @@ int32x2_t test_vrsra_n_u32(int32x2_t a, int32x2_t b) { // CHECK: [[VRSHR_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %b, <16 x i8> ) // CHECK: [[TMP0:%.*]] = add <16 x i8> %a, [[VRSHR_N]] // CHECK: ret <16 x i8> [[TMP0]] -int8x16_t test_vrsraq_n_u8(int8x16_t a, int8x16_t b) { +uint8x16_t test_vrsraq_n_u8(uint8x16_t a, uint8x16_t b) { return vrsraq_n_u8(a, b, 3); } @@ -5229,7 +5229,7 @@ int8x16_t test_vrsraq_n_u8(int8x16_t a, int8x16_t b) { // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[TMP3:%.*]] = add <8 x i16> [[TMP2]], [[VRSHR_N1]] // CHECK: ret <8 x i16> [[TMP3]] -int16x8_t test_vrsraq_n_u16(int16x8_t a, int16x8_t b) { +uint16x8_t test_vrsraq_n_u16(uint16x8_t a, uint16x8_t b) { return vrsraq_n_u16(a, b, 3); } @@ -5241,7 +5241,7 @@ int16x8_t test_vrsraq_n_u16(int16x8_t a, int16x8_t b) { // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[TMP3:%.*]] = add <4 x i32> [[TMP2]], [[VRSHR_N1]] // CHECK: ret <4 x i32> [[TMP3]] -int32x4_t test_vrsraq_n_u32(int32x4_t a, int32x4_t b) { +uint32x4_t test_vrsraq_n_u32(uint32x4_t a, uint32x4_t b) { return vrsraq_n_u32(a, b, 3); } @@ -5253,7 +5253,7 @@ int32x4_t test_vrsraq_n_u32(int32x4_t a, int32x4_t b) { // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> // CHECK: [[TMP3:%.*]] = add <2 x i64> [[TMP2]], [[VRSHR_N1]] // CHECK: ret <2 x i64> [[TMP3]] -int64x2_t test_vrsraq_n_u64(int64x2_t a, int64x2_t b) { +uint64x2_t test_vrsraq_n_u64(uint64x2_t a, uint64x2_t b) { return vrsraq_n_u64(a, b, 3); } @@ -5329,7 +5329,7 @@ int64x2_t test_vsriq_n_s64(int64x2_t a, int64x2_t b) { // CHECK-LABEL: @test_vsri_n_u8( // CHECK: [[VSRI_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.vsri.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3) // CHECK: ret <8 x i8> [[VSRI_N]] -int8x8_t test_vsri_n_u8(int8x8_t a, int8x8_t b) { +uint8x8_t test_vsri_n_u8(uint8x8_t a, uint8x8_t b) { return vsri_n_u8(a, b, 3); } @@ -5340,7 +5340,7 @@ int8x8_t test_vsri_n_u8(int8x8_t a, int8x8_t b) { // CHECK: [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> // CHECK: [[VSRI_N2:%.*]] = call <4 x i16> @llvm.aarch64.neon.vsri.v4i16(<4 x i16> [[VSRI_N]], <4 x i16> [[VSRI_N1]], i32 3) // CHECK: ret <4 x i16> [[VSRI_N2]] -int16x4_t test_vsri_n_u16(int16x4_t a, int16x4_t b) { +uint16x4_t test_vsri_n_u16(uint16x4_t a, uint16x4_t b) { return vsri_n_u16(a, b, 3); } @@ -5351,14 +5351,14 @@ int16x4_t test_vsri_n_u16(int16x4_t a, int16x4_t b) { // CHECK: [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> // CHECK: [[VSRI_N2:%.*]] = call <2 x i32> @llvm.aarch64.neon.vsri.v2i32(<2 x i32> [[VSRI_N]], <2 x i32> [[VSRI_N1]], i32 3) // CHECK: ret <2 x i32> [[VSRI_N2]] -int32x2_t test_vsri_n_u32(int32x2_t a, int32x2_t b) { +uint32x2_t test_vsri_n_u32(uint32x2_t a, uint32x2_t b) { return vsri_n_u32(a, b, 3); } // CHECK-LABEL: @test_vsriq_n_u8( // CHECK: [[VSRI_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.vsri.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3) // CHECK: ret <16 x i8> [[VSRI_N]] -int8x16_t test_vsriq_n_u8(int8x16_t a, int8x16_t b) { +uint8x16_t test_vsriq_n_u8(uint8x16_t a, uint8x16_t b) { return vsriq_n_u8(a, b, 3); } @@ -5369,7 +5369,7 @@ int8x16_t test_vsriq_n_u8(int8x16_t a, int8x16_t b) { // CHECK: [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> // CHECK: [[VSRI_N2:%.*]] = call <8 x i16> @llvm.aarch64.neon.vsri.v8i16(<8 x i16> [[VSRI_N]], <8 x i16> [[VSRI_N1]], i32 3) // CHECK: ret <8 x i16> [[VSRI_N2]] -int16x8_t test_vsriq_n_u16(int16x8_t a, int16x8_t b) { +uint16x8_t test_vsriq_n_u16(uint16x8_t a, uint16x8_t b) { return vsriq_n_u16(a, b, 3); } @@ -5380,7 +5380,7 @@ int16x8_t test_vsriq_n_u16(int16x8_t a, int16x8_t b) { // CHECK: [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> // CHECK: [[VSRI_N2:%.*]] = call <4 x i32> @llvm.aarch64.neon.vsri.v4i32(<4 x i32> [[VSRI_N]], <4 x i32> [[VSRI_N1]], i32 3) // CHECK: ret <4 x i32> [[VSRI_N2]] -int32x4_t test_vsriq_n_u32(int32x4_t a, int32x4_t b) { +uint32x4_t test_vsriq_n_u32(uint32x4_t a, uint32x4_t b) { return vsriq_n_u32(a, b, 3); } @@ -5391,7 +5391,7 @@ int32x4_t test_vsriq_n_u32(int32x4_t a, int32x4_t b) { // CHECK: [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> // CHECK: [[VSRI_N2:%.*]] = call <2 x i64> @llvm.aarch64.neon.vsri.v2i64(<2 x i64> [[VSRI_N]], <2 x i64> [[VSRI_N1]], i32 3) // CHECK: ret <2 x i64> [[VSRI_N2]] -int64x2_t test_vsriq_n_u64(int64x2_t a, int64x2_t b) { +uint64x2_t test_vsriq_n_u64(uint64x2_t a, uint64x2_t b) { return vsriq_n_u64(a, b, 3); } @@ -5608,7 +5608,7 @@ poly16x8_t test_vsliq_n_p16(poly16x8_t a, poly16x8_t b) { // CHECK-LABEL: @test_vqshlu_n_s8( // CHECK: [[VQSHLU_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshlu.v8i8(<8 x i8> %a, <8 x i8> ) // CHECK: ret <8 x i8> [[VQSHLU_N]] -int8x8_t test_vqshlu_n_s8(int8x8_t a) { +uint8x8_t test_vqshlu_n_s8(int8x8_t a) { return vqshlu_n_s8(a, 3); } @@ -5617,7 +5617,7 @@ int8x8_t test_vqshlu_n_s8(int8x8_t a) { // CHECK: [[VQSHLU_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> // CHECK: [[VQSHLU_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshlu.v4i16(<4 x i16> [[VQSHLU_N]], <4 x i16> ) // CHECK: ret <4 x i16> [[VQSHLU_N1]] -int16x4_t test_vqshlu_n_s16(int16x4_t a) { +uint16x4_t test_vqshlu_n_s16(int16x4_t a) { return vqshlu_n_s16(a, 3); } @@ -5626,14 +5626,14 @@ int16x4_t test_vqshlu_n_s16(int16x4_t a) { // CHECK: [[VQSHLU_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> // CHECK: [[VQSHLU_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshlu.v2i32(<2 x i32> [[VQSHLU_N]], <2 x i32> ) // CHECK: ret <2 x i32> [[VQSHLU_N1]] -int32x2_t test_vqshlu_n_s32(int32x2_t a) { +uint32x2_t test_vqshlu_n_s32(int32x2_t a) { return vqshlu_n_s32(a, 3); } // CHECK-LABEL: @test_vqshluq_n_s8( // CHECK: [[VQSHLU_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqshlu.v16i8(<16 x i8> %a, <16 x i8> ) // CHECK: ret <16 x i8> [[VQSHLU_N]] -int8x16_t test_vqshluq_n_s8(int8x16_t a) { +uint8x16_t test_vqshluq_n_s8(int8x16_t a) { return vqshluq_n_s8(a, 3); } @@ -5642,7 +5642,7 @@ int8x16_t test_vqshluq_n_s8(int8x16_t a) { // CHECK: [[VQSHLU_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[VQSHLU_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqshlu.v8i16(<8 x i16> [[VQSHLU_N]], <8 x i16> ) // CHECK: ret <8 x i16> [[VQSHLU_N1]] -int16x8_t test_vqshluq_n_s16(int16x8_t a) { +uint16x8_t test_vqshluq_n_s16(int16x8_t a) { return vqshluq_n_s16(a, 3); } @@ -5651,7 +5651,7 @@ int16x8_t test_vqshluq_n_s16(int16x8_t a) { // CHECK: [[VQSHLU_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[VQSHLU_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqshlu.v4i32(<4 x i32> [[VQSHLU_N]], <4 x i32> ) // CHECK: ret <4 x i32> [[VQSHLU_N1]] -int32x4_t test_vqshluq_n_s32(int32x4_t a) { +uint32x4_t test_vqshluq_n_s32(int32x4_t a) { return vqshluq_n_s32(a, 3); } @@ -5660,7 +5660,7 @@ int32x4_t test_vqshluq_n_s32(int32x4_t a) { // CHECK: [[VQSHLU_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> // CHECK: [[VQSHLU_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqshlu.v2i64(<2 x i64> [[VQSHLU_N]], <2 x i64> ) // CHECK: ret <2 x i64> [[VQSHLU_N1]] -int64x2_t test_vqshluq_n_s64(int64x2_t a) { +uint64x2_t test_vqshluq_n_s64(int64x2_t a) { return vqshluq_n_s64(a, 3); } @@ -5795,7 +5795,7 @@ uint32x4_t test_vshrn_high_n_u64(uint32x2_t a, uint64x2_t b) { // CHECK: [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[VQSHRUN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> [[VQSHRUN_N]], i32 3) // CHECK: ret <8 x i8> [[VQSHRUN_N1]] -int8x8_t test_vqshrun_n_s16(int16x8_t a) { +uint8x8_t test_vqshrun_n_s16(int16x8_t a) { return vqshrun_n_s16(a, 3); } @@ -5804,7 +5804,7 @@ int8x8_t test_vqshrun_n_s16(int16x8_t a) { // CHECK: [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[VQSHRUN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> [[VQSHRUN_N]], i32 9) // CHECK: ret <4 x i16> [[VQSHRUN_N1]] -int16x4_t test_vqshrun_n_s32(int32x4_t a) { +uint16x4_t test_vqshrun_n_s32(int32x4_t a) { return vqshrun_n_s32(a, 9); } @@ -5813,7 +5813,7 @@ int16x4_t test_vqshrun_n_s32(int32x4_t a) { // CHECK: [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> // CHECK: [[VQSHRUN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64> [[VQSHRUN_N]], i32 19) // CHECK: ret <2 x i32> [[VQSHRUN_N1]] -int32x2_t test_vqshrun_n_s64(int64x2_t a) { +uint32x2_t test_vqshrun_n_s64(int64x2_t a) { return vqshrun_n_s64(a, 19); } @@ -5966,7 +5966,7 @@ uint32x4_t test_vrshrn_high_n_u64(uint32x2_t a, uint64x2_t b) { // CHECK: [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[VQRSHRUN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> [[VQRSHRUN_N]], i32 3) // CHECK: ret <8 x i8> [[VQRSHRUN_N1]] -int8x8_t test_vqrshrun_n_s16(int16x8_t a) { +uint8x8_t test_vqrshrun_n_s16(int16x8_t a) { return vqrshrun_n_s16(a, 3); } @@ -5975,7 +5975,7 @@ int8x8_t test_vqrshrun_n_s16(int16x8_t a) { // CHECK: [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[VQRSHRUN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> [[VQRSHRUN_N]], i32 9) // CHECK: ret <4 x i16> [[VQRSHRUN_N1]] -int16x4_t test_vqrshrun_n_s32(int32x4_t a) { +uint16x4_t test_vqrshrun_n_s32(int32x4_t a) { return vqrshrun_n_s32(a, 9); } @@ -5984,7 +5984,7 @@ int16x4_t test_vqrshrun_n_s32(int32x4_t a) { // CHECK: [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> // CHECK: [[VQRSHRUN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64> [[VQRSHRUN_N]], i32 19) // CHECK: ret <2 x i32> [[VQRSHRUN_N1]] -int32x2_t test_vqrshrun_n_s64(int64x2_t a) { +uint32x2_t test_vqrshrun_n_s64(int64x2_t a) { return vqrshrun_n_s64(a, 19); } diff --git a/clang/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c b/clang/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c index de1dd4a059770..dc15923a41749 100644 --- a/clang/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c +++ b/clang/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c @@ -146,14 +146,14 @@ int16x8_t test_vcvtq_s16_f16 (float16x8_t a) { // CHECK-LABEL: test_vcvt_u16_f16 // CHECK: [[VCVT:%.*]] = fptoui <4 x half> %a to <4 x i16> // CHECK: ret <4 x i16> [[VCVT]] -int16x4_t test_vcvt_u16_f16 (float16x4_t a) { +uint16x4_t test_vcvt_u16_f16 (float16x4_t a) { return vcvt_u16_f16(a); } // CHECK-LABEL: test_vcvtq_u16_f16 // CHECK: [[VCVT:%.*]] = fptoui <8 x half> %a to <8 x i16> // CHECK: ret <8 x i16> [[VCVT]] -int16x8_t test_vcvtq_u16_f16 (float16x8_t a) { +uint16x8_t test_vcvtq_u16_f16 (float16x8_t a) { return vcvtq_u16_f16(a); } @@ -167,7 +167,7 @@ int16x4_t test_vcvta_s16_f16 (float16x4_t a) { // CHECK-LABEL: test_vcvta_u16_f16 // CHECK: [[VCVT:%.*]] = call <4 x i16> @llvm.aarch64.neon.fcvtau.v4i16.v4f16(<4 x half> %a) // CHECK: ret <4 x i16> [[VCVT]] -int16x4_t test_vcvta_u16_f16 (float16x4_t a) { +uint16x4_t test_vcvta_u16_f16 (float16x4_t a) { return vcvta_u16_f16(a); } diff --git a/clang/test/CodeGen/arm64-vrnd.c b/clang/test/CodeGen/arm64-vrnd.c index 7729c094a20cc..c710caedf181b 100644 --- a/clang/test/CodeGen/arm64-vrnd.c +++ b/clang/test/CodeGen/arm64-vrnd.c @@ -1,22 +1,22 @@ -// RUN: %clang_cc1 -triple arm64-apple-ios7 -target-feature +neon -ffreestanding -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple arm64-apple-ios7 -target-feature +neon -ffreestanding -flax-vector-conversions=none -emit-llvm -o - %s | FileCheck %s #include -int64x2_t rnd5(float64x2_t a) { return vrndq_f64(a); } +float64x2_t rnd5(float64x2_t a) { return vrndq_f64(a); } // CHECK: call <2 x double> @llvm.trunc.v2f64(<2 x double> -int64x2_t rnd9(float64x2_t a) { return vrndnq_f64(a); } +float64x2_t rnd9(float64x2_t a) { return vrndnq_f64(a); } // CHECK: call <2 x double> @llvm.aarch64.neon.frintn.v2f64(<2 x double> -int64x2_t rnd13(float64x2_t a) { return vrndmq_f64(a); } +float64x2_t rnd13(float64x2_t a) { return vrndmq_f64(a); } // CHECK: call <2 x double> @llvm.floor.v2f64(<2 x double> -int64x2_t rnd18(float64x2_t a) { return vrndpq_f64(a); } +float64x2_t rnd18(float64x2_t a) { return vrndpq_f64(a); } // CHECK: call <2 x double> @llvm.ceil.v2f64(<2 x double> -int64x2_t rnd22(float64x2_t a) { return vrndaq_f64(a); } +float64x2_t rnd22(float64x2_t a) { return vrndaq_f64(a); } // CHECK: call <2 x double> @llvm.round.v2f64(<2 x double> -int64x2_t rnd25(float64x2_t a) { return vrndxq_f64(a); } +float64x2_t rnd25(float64x2_t a) { return vrndxq_f64(a); } // CHECK: call <2 x double> @llvm.rint.v2f64(<2 x double> diff --git a/clang/test/CodeGen/arm_neon_intrinsics.c b/clang/test/CodeGen/arm_neon_intrinsics.c index b09e63883e260..93b11b2c52372 100644 --- a/clang/test/CodeGen/arm_neon_intrinsics.c +++ b/clang/test/CodeGen/arm_neon_intrinsics.c @@ -1,6 +1,7 @@ // RUN: %clang_cc1 -triple thumbv7s-apple-darwin -target-abi apcs-gnu\ // RUN: -target-cpu swift -fallow-half-arguments-and-returns \ // RUN: -target-feature +fullfp16 -ffreestanding \ +// RUN: -flax-vector-conversions=none \ // RUN: -disable-O0-optnone -emit-llvm -o - %s \ // RUN: | opt -S -mem2reg | FileCheck %s @@ -2184,8 +2185,8 @@ float32x2_t test_vcreate_f32(uint64_t a) { // CHECK: [[TMP0:%.*]] = bitcast i64 %a to <8 x i8> // CHECK: [[VCLZ_V_I:%.*]] = call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> [[TMP0]], i1 false) // CHECK: ret <8 x i8> [[VCLZ_V_I]] -uint8x8_t test_vcreate_u8(uint64_t a) { - return vclz_s8(vcreate_u8(a)); +int8x8_t test_vcreate_u8(uint64_t a) { + return vclz_s8((int8x8_t)vcreate_u8(a)); } // CHECK-LABEL: @test_vcreate_u16( @@ -2194,8 +2195,8 @@ uint8x8_t test_vcreate_u8(uint64_t a) { // CHECK: [[VCLZ_V1_I:%.*]] = call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> [[TMP0]], i1 false) // CHECK: [[VCLZ_V2_I:%.*]] = bitcast <4 x i16> [[VCLZ_V1_I]] to <8 x i8> // CHECK: ret <4 x i16> [[VCLZ_V1_I]] -uint16x4_t test_vcreate_u16(uint64_t a) { - return vclz_s16(vcreate_u16(a)); +int16x4_t test_vcreate_u16(uint64_t a) { + return vclz_s16((int16x4_t)vcreate_u16(a)); } // CHECK-LABEL: @test_vcreate_u32( @@ -2204,8 +2205,8 @@ uint16x4_t test_vcreate_u16(uint64_t a) { // CHECK: [[VCLZ_V1_I:%.*]] = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[TMP0]], i1 false) // CHECK: [[VCLZ_V2_I:%.*]] = bitcast <2 x i32> [[VCLZ_V1_I]] to <8 x i8> // CHECK: ret <2 x i32> [[VCLZ_V1_I]] -uint32x2_t test_vcreate_u32(uint64_t a) { - return vclz_s32(vcreate_u32(a)); +int32x2_t test_vcreate_u32(uint64_t a) { + return vclz_s32((int32x2_t)vcreate_u32(a)); } // CHECK-LABEL: @test_vcreate_u64( @@ -2235,7 +2236,7 @@ poly8x8_t test_vcreate_p8(uint64_t a) { // CHECK: ret <4 x i16> [[TMP4]] poly16x4_t test_vcreate_p16(uint64_t a) { poly16x4_t tmp = vcreate_p16(a); - return vbsl_p16(tmp, tmp, tmp); + return vbsl_p16((uint16x4_t)tmp, tmp, tmp); } // CHECK-LABEL: @test_vcreate_s64( @@ -2830,8 +2831,8 @@ int64x1_t test_vdup_n_s64(int64_t a) { // CHECK: [[VECINIT_I:%.*]] = insertelement <1 x i64> undef, i64 %a, i32 0 // CHECK: [[ADD_I:%.*]] = add <1 x i64> [[VECINIT_I]], [[VECINIT_I]] // CHECK: ret <1 x i64> [[ADD_I]] -uint64x1_t test_vdup_n_u64(uint64_t a) { - int64x1_t tmp = vdup_n_u64(a); +int64x1_t test_vdup_n_u64(uint64_t a) { + int64x1_t tmp = (int64x1_t)vdup_n_u64(a); return vadd_s64(tmp, tmp); } @@ -2851,7 +2852,7 @@ int64x2_t test_vdupq_n_s64(int64_t a) { // CHECK: [[ADD_I:%.*]] = add <2 x i64> [[VECINIT1_I]], [[VECINIT1_I]] // CHECK: ret <2 x i64> [[ADD_I]] uint64x2_t test_vdupq_n_u64(uint64_t a) { - int64x2_t tmp = vdupq_n_u64(a); + uint64x2_t tmp = vdupq_n_u64(a); return vaddq_u64(tmp, tmp); } diff --git a/clang/test/CodeGen/bmi-builtins.c b/clang/test/CodeGen/bmi-builtins.c index 9eda3f614d4aa..9f2d776299f81 100644 --- a/clang/test/CodeGen/bmi-builtins.c +++ b/clang/test/CodeGen/bmi-builtins.c @@ -1,4 +1,5 @@ -// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +bmi -emit-llvm -o - -Wall -Werror | FileCheck %s +// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +bmi -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,CHECK_TZCNT +// RUN: %clang_cc1 -fms-extensions -fms-compatibility -fms-compatibility-version=17.00 -ffreestanding %s -triple=x86_64-windows-msvc -emit-llvm -o - -Wall -Werror -DTEST_TZCNT | FileCheck %s --check-prefix=CHECK-TZCNT #include @@ -13,12 +14,57 @@ // instruction is identical in hardware, the AMD and Intel // intrinsics are different! +unsigned short test_tzcnt_u16(unsigned short __X) { + // CHECK-TZCNT-LABEL: test_tzcnt_u16 + // CHECK-TZCNT: i16 @llvm.cttz.i16(i16 %{{.*}}, i1 false) + return _tzcnt_u16(__X); +} + unsigned short test__tzcnt_u16(unsigned short __X) { - // CHECK-LABEL: test__tzcnt_u16 - // CHECK: i16 @llvm.cttz.i16(i16 %{{.*}}, i1 false) + // CHECK-TZCNT-LABEL: test__tzcnt_u16 + // CHECK-TZCNT: i16 @llvm.cttz.i16(i16 %{{.*}}, i1 false) return __tzcnt_u16(__X); } +unsigned int test__tzcnt_u32(unsigned int __X) { + // CHECK-TZCNT-LABEL: test__tzcnt_u32 + // CHECK-TZCNT: i32 @llvm.cttz.i32(i32 %{{.*}}, i1 false) + return __tzcnt_u32(__X); +} + +int test_mm_tzcnt_32(unsigned int __X) { + // CHECK-TZCNT-LABEL: test_mm_tzcnt_32 + // CHECK-TZCNT: i32 @llvm.cttz.i32(i32 %{{.*}}, i1 false) + return _mm_tzcnt_32(__X); +} + +unsigned int test_tzcnt_u32(unsigned int __X) { + // CHECK-TZCNT-LABEL: test_tzcnt_u32 + // CHECK-TZCNT: i32 @llvm.cttz.i32(i32 %{{.*}}, i1 false) + return _tzcnt_u32(__X); +} + +#ifdef __x86_64__ +unsigned long long test__tzcnt_u64(unsigned long long __X) { + // CHECK-TZCNT-LABEL: test__tzcnt_u64 + // CHECK-TZCNT: i64 @llvm.cttz.i64(i64 %{{.*}}, i1 false) + return __tzcnt_u64(__X); +} + +long long test_mm_tzcnt_64(unsigned long long __X) { + // CHECK-TZCNT-LABEL: test_mm_tzcnt_64 + // CHECK-TZCNT: i64 @llvm.cttz.i64(i64 %{{.*}}, i1 false) + return _mm_tzcnt_64(__X); +} + +unsigned long long test_tzcnt_u64(unsigned long long __X) { + // CHECK-TZCNT-LABEL: test_tzcnt_u64 + // CHECK-TZCNT: i64 @llvm.cttz.i64(i64 %{{.*}}, i1 false) + return _tzcnt_u64(__X); +} +#endif + +#if !defined(TEST_TZCNT) unsigned int test__andn_u32(unsigned int __X, unsigned int __Y) { // CHECK-LABEL: test__andn_u32 // CHECK: xor i32 %{{.*}}, -1 @@ -53,18 +99,6 @@ unsigned int test__blsr_u32(unsigned int __X) { return __blsr_u32(__X); } -unsigned int test__tzcnt_u32(unsigned int __X) { - // CHECK-LABEL: test__tzcnt_u32 - // CHECK: i32 @llvm.cttz.i32(i32 %{{.*}}, i1 false) - return __tzcnt_u32(__X); -} - -int test_mm_tzcnt_32(unsigned int __X) { - // CHECK-LABEL: test_mm_tzcnt_32 - // CHECK: i32 @llvm.cttz.i32(i32 %{{.*}}, i1 false) - return _mm_tzcnt_32(__X); -} - #ifdef __x86_64__ unsigned long long test__andn_u64(unsigned long __X, unsigned long __Y) { // CHECK-LABEL: test__andn_u64 @@ -99,28 +133,10 @@ unsigned long long test__blsr_u64(unsigned long long __X) { // CHECK: and i64 %{{.*}}, %{{.*}} return __blsr_u64(__X); } - -unsigned long long test__tzcnt_u64(unsigned long long __X) { - // CHECK-LABEL: test__tzcnt_u64 - // CHECK: i64 @llvm.cttz.i64(i64 %{{.*}}, i1 false) - return __tzcnt_u64(__X); -} - -long long test_mm_tzcnt_64(unsigned long long __X) { - // CHECK-LABEL: test_mm_tzcnt_64 - // CHECK: i64 @llvm.cttz.i64(i64 %{{.*}}, i1 false) - return _mm_tzcnt_64(__X); -} #endif // Intel intrinsics -unsigned short test_tzcnt_u16(unsigned short __X) { - // CHECK-LABEL: test_tzcnt_u16 - // CHECK: i16 @llvm.cttz.i16(i16 %{{.*}}, i1 false) - return _tzcnt_u16(__X); -} - unsigned int test_andn_u32(unsigned int __X, unsigned int __Y) { // CHECK-LABEL: test_andn_u32 // CHECK: xor i32 %{{.*}}, -1 @@ -160,12 +176,6 @@ unsigned int test_blsr_u32(unsigned int __X) { return _blsr_u32(__X); } -unsigned int test_tzcnt_u32(unsigned int __X) { - // CHECK-LABEL: test_tzcnt_u32 - // CHECK: i32 @llvm.cttz.i32(i32 %{{.*}}, i1 false) - return _tzcnt_u32(__X); -} - #ifdef __x86_64__ unsigned long long test_andn_u64(unsigned long __X, unsigned long __Y) { // CHECK-LABEL: test_andn_u64 @@ -206,10 +216,6 @@ unsigned long long test_blsr_u64(unsigned long long __X) { // CHECK: and i64 %{{.*}}, %{{.*}} return _blsr_u64(__X); } - -unsigned long long test_tzcnt_u64(unsigned long long __X) { - // CHECK-LABEL: test_tzcnt_u64 - // CHECK: i64 @llvm.cttz.i64(i64 %{{.*}}, i1 false) - return _tzcnt_u64(__X); -} #endif + +#endif // !defined(TEST_TZCNT) diff --git a/clang/test/CodeGen/catch-nullptr-and-nonzero-offset-blacklist.c b/clang/test/CodeGen/catch-nullptr-and-nonzero-offset-blacklist.c new file mode 100644 index 0000000000000..3cf53c86de506 --- /dev/null +++ b/clang/test/CodeGen/catch-nullptr-and-nonzero-offset-blacklist.c @@ -0,0 +1,34 @@ +// RUN: %clang_cc1 -x c -fsanitize=pointer-overflow -fsanitize-recover=pointer-overflow -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s -implicit-check-not="call void @__ubsan_handle_pointer_overflow" --check-prefixes=CHECK,CHECK-NULL-IS-INVALID-PTR +// RUN: %clang_cc1 -x c -fno-delete-null-pointer-checks -fsanitize=pointer-overflow -fsanitize-recover=pointer-overflow -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s -implicit-check-not="call void @__ubsan_handle_pointer_overflow" --check-prefixes=CHECK,CHECK-NULL-IS-VALID-PTR + +// RUN: %clang_cc1 -x c++ -fsanitize=pointer-overflow -fsanitize-recover=pointer-overflow -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s -implicit-check-not="call void @__ubsan_handle_pointer_overflow" --check-prefixes=CHECK,CHECK-NULL-IS-INVALID-PTR +// RUN: %clang_cc1 -x c++ -fno-delete-null-pointer-checks -fsanitize=pointer-overflow -fsanitize-recover=pointer-overflow -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s -implicit-check-not="call void @__ubsan_handle_pointer_overflow" --check-prefixes=CHECK,CHECK-NULL-IS-VALID-PTR + +#ifdef __cplusplus +extern "C" { +#endif + +// CHECK-LABEL: @baseline +char *baseline(char *base, unsigned long offset) { + // CHECK: call void @__ubsan_handle_pointer_overflow( + return base + offset; +} + +// CHECK-LABEL: @blacklist_0 +__attribute__((no_sanitize("undefined"))) char *blacklist_0(char *base, unsigned long offset) { + return base + offset; +} + +// CHECK-LABEL: @blacklist_1 +__attribute__((no_sanitize("pointer-overflow"))) char *blacklist_1(char *base, unsigned long offset) { + return base + offset; +} + +// CHECK-LABEL: @ignore_non_default_address_space +__attribute__((address_space(1))) char *ignore_non_default_address_space(__attribute__((address_space(1))) char *base, unsigned long offset) { + return base + offset; +} + +#ifdef __cplusplus +} +#endif diff --git a/clang/test/CodeGen/catch-nullptr-and-nonzero-offset-in-offsetof-idiom.c b/clang/test/CodeGen/catch-nullptr-and-nonzero-offset-in-offsetof-idiom.c new file mode 100644 index 0000000000000..09d644a859832 --- /dev/null +++ b/clang/test/CodeGen/catch-nullptr-and-nonzero-offset-in-offsetof-idiom.c @@ -0,0 +1,35 @@ +// RUN: %clang_cc1 -x c -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s +// RUN: %clang_cc1 -x c -fsanitize=pointer-overflow -fno-sanitize-recover=pointer-overflow -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s + +// RUN: %clang_cc1 -x c++ -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s +// RUN: %clang_cc1 -x c++ -fsanitize=pointer-overflow -fno-sanitize-recover=pointer-overflow -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +struct S { + int x, y; +}; + +// CHECK-LABEL: define i64 @{{.*}}get_offset_of_y_naively{{.*}}( +uintptr_t get_offset_of_y_naively() { + // CHECK: [[ENTRY:.*]]: + // CHECK-NEXT: ret i64 ptrtoint (i32* getelementptr (i32, i32* null, i32 1) to i64) + // CHECK-NEXT: } + return ((uintptr_t)(&(((struct S *)0)->y))); +} + +// CHECK-LABEL: define i64 @{{.*}}get_offset_of_y_via_builtin{{.*}}( +uintptr_t get_offset_of_y_via_builtin() { + // CHECK: [[ENTRY:.*]]: + // CHECK-NEXT: ret i64 4 + // CHECK-NEXT: } + return __builtin_offsetof(struct S, y); +} + +#ifdef __cplusplus +} +#endif diff --git a/clang/test/CodeGen/catch-nullptr-and-nonzero-offset-when-nullptr-is-defined.c b/clang/test/CodeGen/catch-nullptr-and-nonzero-offset-when-nullptr-is-defined.c new file mode 100644 index 0000000000000..3f1e2250d3469 --- /dev/null +++ b/clang/test/CodeGen/catch-nullptr-and-nonzero-offset-when-nullptr-is-defined.c @@ -0,0 +1,66 @@ +// RUN: %clang_cc1 -x c -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s --check-prefixes=CHECK,CHECK-NOSANITIZE +// RUN: %clang_cc1 -x c -fsanitize=pointer-overflow -fno-sanitize-recover=pointer-overflow -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s -implicit-check-not="call void @__ubsan_handle_pointer_overflow" --check-prefixes=CHECK,CHECK-SANITIZE,CHECK-SANITIZE-NULLNOTOK,CHECK-SANITIZE-NULLNOTOK-C,CHECK-SANITIZE-ANYRECOVER,CHECK-SANITIZE-NORECOVER,CHECK-SANITIZE-UNREACHABLE +// RUN: %clang_cc1 -x c -fsanitize=pointer-overflow -fsanitize-recover=pointer-overflow -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s -implicit-check-not="call void @__ubsan_handle_pointer_overflow" --check-prefixes=CHECK,CHECK-SANITIZE,CHECK-SANITIZE-NULLNOTOK,CHECK-SANITIZE-NULLNOTOK-C,CHECK-SANITIZE-ANYRECOVER,CHECK-SANITIZE-RECOVER +// RUN: %clang_cc1 -x c -fsanitize=pointer-overflow -fsanitize-trap=pointer-overflow -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s -implicit-check-not="call void @__ubsan_handle_pointer_overflow" --check-prefixes=CHECK,CHECK-SANITIZE,CHECK-SANITIZE-NULLNOTOK,CHECK-SANITIZE-NULLNOTOK-C,CHECK-SANITIZE-TRAP,CHECK-SANITIZE-UNREACHABLE + +// RUN: %clang_cc1 -x c -fno-delete-null-pointer-checks -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s --check-prefixes=CHECK,CHECK-NOSANITIZE +// RUN: %clang_cc1 -x c -fno-delete-null-pointer-checks -fsanitize=pointer-overflow -fno-sanitize-recover=pointer-overflow -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s -implicit-check-not="call void @__ubsan_handle_pointer_overflow" --check-prefixes=CHECK,CHECK-SANITIZE,CHECK-SANITIZE-NULLOK,CHECK-SANITIZE-ANYRECOVER,CHECK-SANITIZE-NORECOVER,CHECK-SANITIZE-UNREACHABLE +// RUN: %clang_cc1 -x c -fno-delete-null-pointer-checks -fsanitize=pointer-overflow -fsanitize-recover=pointer-overflow -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s -implicit-check-not="call void @__ubsan_handle_pointer_overflow" --check-prefixes=CHECK,CHECK-SANITIZE,CHECK-SANITIZE-NULLOK,CHECK-SANITIZE-ANYRECOVER,CHECK-SANITIZE-RECOVER +// RUN: %clang_cc1 -x c -fno-delete-null-pointer-checks -fsanitize=pointer-overflow -fsanitize-trap=pointer-overflow -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s -implicit-check-not="call void @__ubsan_handle_pointer_overflow" --check-prefixes=CHECK,CHECK-SANITIZE,CHECK-SANITIZE-NULLOK,CHECK-SANITIZE-TRAP,CHECK-SANITIZE-UNREACHABLE + +// RUN: %clang_cc1 -x c++ -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s --check-prefixes=CHECK,CHECK-NOSANITIZE +// RUN: %clang_cc1 -x c++ -fsanitize=pointer-overflow -fno-sanitize-recover=pointer-overflow -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s -implicit-check-not="call void @__ubsan_handle_pointer_overflow" --check-prefixes=CHECK,CHECK-SANITIZE,CHECK-SANITIZE-NULLNOTOK,CHECK-SANITIZE-NULLNOTOK-CPP,CHECK-SANITIZE-ANYRECOVER,CHECK-SANITIZE-NORECOVER,CHECK-SANITIZE-UNREACHABLE +// RUN: %clang_cc1 -x c++ -fsanitize=pointer-overflow -fsanitize-recover=pointer-overflow -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s -implicit-check-not="call void @__ubsan_handle_pointer_overflow" --check-prefixes=CHECK,CHECK-SANITIZE,CHECK-SANITIZE-NULLNOTOK,CHECK-SANITIZE-NULLNOTOK-CPP,CHECK-SANITIZE-ANYRECOVER,CHECK-SANITIZE-RECOVER +// RUN: %clang_cc1 -x c++ -fsanitize=pointer-overflow -fsanitize-trap=pointer-overflow -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s -implicit-check-not="call void @__ubsan_handle_pointer_overflow" --check-prefixes=CHECK,CHECK-SANITIZE,CHECK-SANITIZE-NULLNOTOK,CHECK-SANITIZE-NULLNOTOK-CPP,CHECK-SANITIZE-TRAP,CHECK-SANITIZE-UNREACHABLE + +// RUN: %clang_cc1 -x c++ -fno-delete-null-pointer-checks -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s --check-prefixes=CHECK,CHECK-NOSANITIZE +// RUN: %clang_cc1 -x c++ -fno-delete-null-pointer-checks -fsanitize=pointer-overflow -fno-sanitize-recover=pointer-overflow -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s -implicit-check-not="call void @__ubsan_handle_pointer_overflow" --check-prefixes=CHECK,CHECK-SANITIZE,CHECK-SANITIZE-NULLOK,CHECK-SANITIZE-ANYRECOVER,CHECK-SANITIZE-NORECOVER,CHECK-SANITIZE-UNREACHABLE +// RUN: %clang_cc1 -x c++ -fno-delete-null-pointer-checks -fsanitize=pointer-overflow -fsanitize-recover=pointer-overflow -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s -implicit-check-not="call void @__ubsan_handle_pointer_overflow" --check-prefixes=CHECK,CHECK-SANITIZE,CHECK-SANITIZE-NULLOK,CHECK-SANITIZE-ANYRECOVER,CHECK-SANITIZE-RECOVER +// RUN: %clang_cc1 -x c++ -fno-delete-null-pointer-checks -fsanitize=pointer-overflow -fsanitize-trap=pointer-overflow -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s -implicit-check-not="call void @__ubsan_handle_pointer_overflow" --check-prefixes=CHECK,CHECK-SANITIZE,CHECK-SANITIZE-NULLOK,CHECK-SANITIZE-TRAP,CHECK-SANITIZE-UNREACHABLE + +// CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_100:.*]] = {{.*}}, i32 100, i32 15 } } + +#ifdef __cplusplus +extern "C" { +#endif + +char *add_unsigned(char *base, unsigned long offset) { + // CHECK: define i8* @add_unsigned(i8* %[[BASE:.*]], i64 %[[OFFSET:.*]]) + // CHECK-NEXT: [[ENTRY:.*]]: + // CHECK-NEXT: %[[BASE_ADDR:.*]] = alloca i8*, align 8 + // CHECK-NEXT: %[[OFFSET_ADDR:.*]] = alloca i64, align 8 + // CHECK-NEXT: store i8* %[[BASE]], i8** %[[BASE_ADDR]], align 8 + // CHECK-NEXT: store i64 %[[OFFSET]], i64* %[[OFFSET_ADDR]], align 8 + // CHECK-NEXT: %[[BASE_RELOADED:.*]] = load i8*, i8** %[[BASE_ADDR]], align 8 + // CHECK-NEXT: %[[OFFSET_RELOADED:.*]] = load i64, i64* %[[OFFSET_ADDR]], align 8 + // CHECK-NEXT: %[[ADD_PTR:.*]] = getelementptr inbounds i8, i8* %[[BASE_RELOADED]], i64 %[[OFFSET_RELOADED]] + // CHECK-SANITIZE-NEXT: %[[COMPUTED_OFFSET_AGGREGATE:.*]] = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 1, i64 %[[OFFSET_RELOADED]]), !nosanitize + // CHECK-SANITIZE-NEXT: %[[COMPUTED_OFFSET_OVERFLOWED:.*]] = extractvalue { i64, i1 } %[[COMPUTED_OFFSET_AGGREGATE]], 1, !nosanitize + // CHECK-SANITIZE-NEXT: %[[COMPUTED_OFFSET:.*]] = extractvalue { i64, i1 } %[[COMPUTED_OFFSET_AGGREGATE]], 0, !nosanitize + // CHECK-SANITIZE-NEXT: %[[BASE_RELOADED_INT:.*]] = ptrtoint i8* %[[BASE_RELOADED]] to i64, !nosanitize + // CHECK-SANITIZE-NEXT: %[[COMPUTED_GEP:.*]] = add i64 %[[BASE_RELOADED_INT]], %[[COMPUTED_OFFSET]], !nosanitize + // CHECK-SANITIZE-NULLNOTOK-NEXT: %[[BASE_IS_NOT_NULLPTR:.*]] = icmp ne i8* %[[BASE_RELOADED]], null, !nosanitize + // CHECK-SANITIZE-NULLNOTOK-NEXT: %[[COMPUTED_GEP_IS_NOT_NULL:.*]] = icmp ne i64 %[[COMPUTED_GEP]], 0, !nosanitize + // CHECK-SANITIZE-NULLNOTOK-C-NEXT: %[[BOTH_POINTERS_ARE_NULL_OR_BOTH_ARE_NONNULL:.*]] = and i1 %[[BASE_IS_NOT_NULLPTR]], %[[COMPUTED_GEP_IS_NOT_NULL]], !nosanitize + // CHECK-SANITIZE-NULLNOTOK-CPP-NEXT: %[[BOTH_POINTERS_ARE_NULL_OR_BOTH_ARE_NONNULL:.*]] = icmp eq i1 %[[BASE_IS_NOT_NULLPTR]], %[[COMPUTED_GEP_IS_NOT_NULL]], !nosanitize + // CHECK-SANITIZE-NEXT: %[[COMPUTED_OFFSET_DID_NOT_OVERFLOW:.*]] = xor i1 %[[COMPUTED_OFFSET_OVERFLOWED]], true, !nosanitize + // CHECK-SANITIZE-NEXT: %[[COMPUTED_GEP_IS_UGE_BASE:.*]] = icmp uge i64 %[[COMPUTED_GEP]], %[[BASE_RELOADED_INT]], !nosanitize + // CHECK-SANITIZE-NEXT: %[[GEP_DID_NOT_OVERFLOW:.*]] = and i1 %[[COMPUTED_GEP_IS_UGE_BASE]], %[[COMPUTED_OFFSET_DID_NOT_OVERFLOW]], !nosanitize + // CHECK-SANITIZE-NULLNOTOK-NEXT: %[[GEP_IS_OKAY:.*]] = and i1 %[[BOTH_POINTERS_ARE_NULL_OR_BOTH_ARE_NONNULL]], %[[GEP_DID_NOT_OVERFLOW]], !nosanitize + // CHECK-SANITIZE-NULLNOTOK-NEXT: br i1 %[[GEP_IS_OKAY]], + // CHECK-SANITIZE-NULLOK-NEXT: br i1 %[[GEP_DID_NOT_OVERFLOW]], + // CHECK-SANITIZE-SAME: label %[[CONT:.*]], label %[[HANDLER_POINTER_OVERFLOW:[^,]+]],{{.*}} !nosanitize + // CHECK-SANITIZE: [[HANDLER_POINTER_OVERFLOW]]: + // CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_pointer_overflow_abort(i8* bitcast ({ {{{.*}}} }* @[[LINE_100]] to i8*), i64 %[[BASE_RELOADED_INT]], i64 %[[COMPUTED_GEP]]) + // CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_pointer_overflow(i8* bitcast ({ {{{.*}}} }* @[[LINE_100]] to i8*), i64 %[[BASE_RELOADED_INT]], i64 %[[COMPUTED_GEP]]) + // CHECK-SANITIZE-TRAP-NEXT: call void @llvm.trap(){{.*}}, !nosanitize + // CHECK-SANITIZE-UNREACHABLE-NEXT: unreachable, !nosanitize + // CHECK-SANITIZE: [[CONT]]: + // CHECK-NEXT: ret i8* %[[ADD_PTR]] +#line 100 + return base + offset; +} + +#ifdef __cplusplus +} +#endif diff --git a/clang/test/CodeGen/catch-nullptr-and-nonzero-offset.c b/clang/test/CodeGen/catch-nullptr-and-nonzero-offset.c new file mode 100644 index 0000000000000..60283e76dde75 --- /dev/null +++ b/clang/test/CodeGen/catch-nullptr-and-nonzero-offset.c @@ -0,0 +1,421 @@ +// RUN: %clang_cc1 -x c -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s --check-prefixes=CHECK,CHECK-NOSANITIZE +// RUN: %clang_cc1 -x c -fsanitize=pointer-overflow -fno-sanitize-recover=pointer-overflow -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s -implicit-check-not="call void @__ubsan_handle_pointer_overflow" --check-prefixes=CHECK,CHECK-SANITIZE,CHECK-SANITIZE-ANYRECOVER,CHECK-SANITIZE-NORECOVER,CHECK-SANITIZE-UNREACHABLE,CHECK-SANITIZE-C,CHECK-SANITIZE-ANYRECOVER-C,CHECK-SANITIZE-NORECOVER-C,CHECK-SANITIZE-UNREACHABLE-C +// RUN: %clang_cc1 -x c -fsanitize=pointer-overflow -fsanitize-recover=pointer-overflow -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s -implicit-check-not="call void @__ubsan_handle_pointer_overflow" --check-prefixes=CHECK,CHECK-SANITIZE,CHECK-SANITIZE-ANYRECOVER,CHECK-SANITIZE-RECOVER,CHECK-SANITIZE-C,CHECK-SANITIZE-ANYRECOVER-C,CHECK-SANITIZE-RECOVER-C +// RUN: %clang_cc1 -x c -fsanitize=pointer-overflow -fsanitize-trap=pointer-overflow -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s -implicit-check-not="call void @__ubsan_handle_pointer_overflow" --check-prefixes=CHECK,CHECK-SANITIZE,CHECK-SANITIZE-TRAP,CHECK-SANITIZE-UNREACHABLE,CHECK-SANITIZE-C,CHECK-SANITIZE-TRAP-C,CHECK-SANITIZE-UNREACHABLE-C + +// RUN: %clang_cc1 -x c++ -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s --check-prefixes=CHECK,CHECK-NOSANITIZE +// RUN: %clang_cc1 -x c++ -fsanitize=pointer-overflow -fno-sanitize-recover=pointer-overflow -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s -implicit-check-not="call void @__ubsan_handle_pointer_overflow" --check-prefixes=CHECK,CHECK-SANITIZE,CHECK-SANITIZE-ANYRECOVER,CHECK-SANITIZE-NORECOVER,CHECK-SANITIZE-UNREACHABLE,CHECK-SANITIZE-CPP,CHECK-SANITIZE-ANYRECOVER-CPP,CHECK-SANITIZE-NORECOVER-CPP,CHECK-SANITIZE-UNREACHABLE-CPP +// RUN: %clang_cc1 -x c++ -fsanitize=pointer-overflow -fsanitize-recover=pointer-overflow -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s -implicit-check-not="call void @__ubsan_handle_pointer_overflow" --check-prefixes=CHECK,CHECK-SANITIZE,CHECK-SANITIZE-ANYRECOVER,CHECK-SANITIZE-RECOVER,CHECK-SANITIZE-CPP,CHECK-SANITIZE-ANYRECOVER-CPP,CHECK-SANITIZE-RECOVER-CPP +// RUN: %clang_cc1 -x c++ -fsanitize=pointer-overflow -fsanitize-trap=pointer-overflow -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s -implicit-check-not="call void @__ubsan_handle_pointer_overflow" --check-prefixes=CHECK,CHECK-SANITIZE,CHECK-SANITIZE-TRAP,CHECK-SANITIZE-UNREACHABLE,CHECK-SANITIZE-CPP,CHECK-SANITIZE-TRAP-CPP,CHECK-SANITIZE-UNREACHABLE-CPP + +// In C++/LLVM IR, if the base pointer evaluates to a null pointer value, +// the only valid pointer this inbounds GEP can produce is also a null pointer. +// Likewise, if we have non-zero base pointer, we can not get null pointer +// as a result, so the offset can not be -int(BasePtr). + +// So in other words, the offset can not change "null status" of the pointer, +// as in if the pointer was null, it can not become non-null, and vice versa, +// if it was non-null, it can not become null. + +// In C, however, offsetting null pointer is completely undefined, even by 0. + +// CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_100:.*]] = {{.*}}, i32 100, i32 15 } } +// CHECK-SANITIZE-ANYRECOVER-C-DAG: @[[LINE_200:.*]] = {{.*}}, i32 200, i32 15 } } +// CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_300:.*]] = {{.*}}, i32 300, i32 15 } } +// CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_400:.*]] = {{.*}}, i32 400, i32 15 } } +// CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_500:.*]] = {{.*}}, i32 500, i32 15 } } +// CHECK-SANITIZE-ANYRECOVER-C-DAG: @[[LINE_600:.*]] = {{.*}}, i32 600, i32 15 } } +// CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_700:.*]] = {{.*}}, i32 700, i32 15 } } +// CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_800:.*]] = {{.*}}, i32 800, i32 15 } } +// CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_900:.*]] = {{.*}}, i32 900, i32 15 } } +// CHECK-SANITIZE-ANYRECOVER-C-DAG: @[[LINE_1000:.*]] = {{.*}}, i32 1000, i32 15 } } +// CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_1100:.*]] = {{.*}}, i32 1100, i32 15 } } +// CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_1200:.*]] = {{.*}}, i32 1200, i32 15 } } +// CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_1300:.*]] = {{.*}}, i32 1300, i32 15 } } +// CHECK-SANITIZE-ANYRECOVER-C-DAG: @[[LINE_1400:.*]] = {{.*}}, i32 1400, i32 15 } } +// CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_1500:.*]] = {{.*}}, i32 1500, i32 15 } } +// CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_1600:.*]] = {{.*}}, i32 1600, i32 15 } } + +#ifdef __cplusplus +extern "C" { +#endif + +char *var_var(char *base, unsigned long offset) { + // CHECK: define i8* @var_var(i8* %[[BASE:.*]], i64 %[[OFFSET:.*]]) + // CHECK-NEXT: [[ENTRY:.*]]: + // CHECK-NEXT: %[[BASE_ADDR:.*]] = alloca i8*, align 8 + // CHECK-NEXT: %[[OFFSET_ADDR:.*]] = alloca i64, align 8 + // CHECK-NEXT: store i8* %[[BASE]], i8** %[[BASE_ADDR]], align 8 + // CHECK-NEXT: store i64 %[[OFFSET]], i64* %[[OFFSET_ADDR]], align 8 + // CHECK-NEXT: %[[BASE_RELOADED:.*]] = load i8*, i8** %[[BASE_ADDR]], align 8 + // CHECK-NEXT: %[[OFFSET_RELOADED:.*]] = load i64, i64* %[[OFFSET_ADDR]], align 8 + // CHECK-NEXT: %[[ADD_PTR:.*]] = getelementptr inbounds i8, i8* %[[BASE_RELOADED]], i64 %[[OFFSET_RELOADED]] + // CHECK-SANITIZE-NEXT: %[[COMPUTED_OFFSET_AGGREGATE:.*]] = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 1, i64 %[[OFFSET_RELOADED]]), !nosanitize + // CHECK-SANITIZE-NEXT: %[[COMPUTED_OFFSET_OVERFLOWED:.*]] = extractvalue { i64, i1 } %[[COMPUTED_OFFSET_AGGREGATE]], 1, !nosanitize + // CHECK-SANITIZE-NEXT: %[[COMPUTED_OFFSET:.*]] = extractvalue { i64, i1 } %[[COMPUTED_OFFSET_AGGREGATE]], 0, !nosanitize + // CHECK-SANITIZE-NEXT: %[[BASE_RELOADED_INT:.*]] = ptrtoint i8* %[[BASE_RELOADED]] to i64, !nosanitize + // CHECK-SANITIZE-NEXT: %[[COMPUTED_GEP:.*]] = add i64 %[[BASE_RELOADED_INT]], %[[COMPUTED_OFFSET]], !nosanitize + // CHECK-SANITIZE-NEXT: %[[BASE_IS_NOT_NULLPTR:.*]] = icmp ne i8* %[[BASE_RELOADED]], null, !nosanitize + // CHECK-SANITIZE-NEXT: %[[COMPUTED_GEP_IS_NOT_NULL:.*]] = icmp ne i64 %[[COMPUTED_GEP]], 0, !nosanitize + // CHECK-SANITIZE-C-NEXT: %[[BOTH_POINTERS_ARE_NULL_OR_BOTH_ARE_NONNULL:.*]] = and i1 %[[BASE_IS_NOT_NULLPTR]], %[[COMPUTED_GEP_IS_NOT_NULL]], !nosanitize + // CHECK-SANITIZE-CPP-NEXT: %[[BOTH_POINTERS_ARE_NULL_OR_BOTH_ARE_NONNULL:.*]] = icmp eq i1 %[[BASE_IS_NOT_NULLPTR]], %[[COMPUTED_GEP_IS_NOT_NULL]], !nosanitize + // CHECK-SANITIZE-NEXT: %[[COMPUTED_OFFSET_DID_NOT_OVERFLOW:.*]] = xor i1 %[[COMPUTED_OFFSET_OVERFLOWED]], true, !nosanitize + // CHECK-SANITIZE-NEXT: %[[COMPUTED_GEP_IS_UGE_BASE:.*]] = icmp uge i64 %[[COMPUTED_GEP]], %[[BASE_RELOADED_INT]], !nosanitize + // CHECK-SANITIZE-NEXT: %[[GEP_DID_NOT_OVERFLOW:.*]] = and i1 %[[COMPUTED_GEP_IS_UGE_BASE]], %[[COMPUTED_OFFSET_DID_NOT_OVERFLOW]], !nosanitize + // CHECK-SANITIZE-NEXT: %[[GEP_IS_OKAY:.*]] = and i1 %[[BOTH_POINTERS_ARE_NULL_OR_BOTH_ARE_NONNULL]], %[[GEP_DID_NOT_OVERFLOW]], !nosanitize + // CHECK-SANITIZE-NEXT: br i1 %[[GEP_IS_OKAY]], label %[[CONT:.*]], label %[[HANDLER_POINTER_OVERFLOW:[^,]+]],{{.*}} !nosanitize + // CHECK-SANITIZE: [[HANDLER_POINTER_OVERFLOW]]: + // CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_pointer_overflow_abort(i8* bitcast ({ {{{.*}}} }* @[[LINE_100]] to i8*), i64 %[[BASE_RELOADED_INT]], i64 %[[COMPUTED_GEP]]) + // CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_pointer_overflow(i8* bitcast ({ {{{.*}}} }* @[[LINE_100]] to i8*), i64 %[[BASE_RELOADED_INT]], i64 %[[COMPUTED_GEP]]) + // CHECK-SANITIZE-TRAP-NEXT: call void @llvm.trap(){{.*}}, !nosanitize + // CHECK-SANITIZE-UNREACHABLE-NEXT: unreachable, !nosanitize + // CHECK-SANITIZE: [[CONT]]: + // CHECK-NEXT: ret i8* %[[ADD_PTR]] +#line 100 + return base + offset; +} + +char *var_zero(char *base) { + // CHECK: define i8* @var_zero(i8* %[[BASE:.*]]) + // CHECK-NEXT: [[ENTRY:.*]]: + // CHECK-NEXT: %[[BASE_ADDR:.*]] = alloca i8*, align 8 + // CHECK-NEXT: store i8* %[[BASE]], i8** %[[BASE_ADDR]], align 8 + // CHECK-NEXT: %[[BASE_RELOADED:.*]] = load i8*, i8** %[[BASE_ADDR]], align 8 + // CHECK-NEXT: %[[ADD_PTR:.*]] = getelementptr inbounds i8, i8* %[[BASE_RELOADED]], i64 0 + // CHECK-SANITIZE-C-NEXT: %[[BASE_RELOADED_INT:.*]] = ptrtoint i8* %[[BASE_RELOADED]] to i64, !nosanitize + // CHECK-SANITIZE-C-NEXT: %[[COMPUTED_GEP:.*]] = add i64 %[[BASE_RELOADED_INT]], 0, !nosanitize + // CHECK-SANITIZE-C-NEXT: %[[BASE_IS_NOT_NULLPTR:.*]] = icmp ne i8* %[[BASE_RELOADED]], null, !nosanitize + // CHECK-SANITIZE-C-NEXT: %[[COMPUTED_GEP_IS_NOT_NULL:.*]] = icmp ne i64 %[[COMPUTED_GEP]], 0, !nosanitize + // CHECK-SANITIZE-C-NEXT: %[[BOTH_POINTERS_ARE_NULL_OR_BOTH_ARE_NONNULL:.*]] = and i1 %[[BASE_IS_NOT_NULLPTR]], %[[COMPUTED_GEP_IS_NOT_NULL]], !nosanitize + // CHECK-SANITIZE-C-NEXT: %[[COMPUTED_GEP_IS_UGE_BASE:.*]] = icmp uge i64 %[[COMPUTED_GEP]], %[[BASE_RELOADED_INT]], !nosanitize + // CHECK-SANITIZE-C-NEXT: %[[GEP_IS_OKAY:.*]] = and i1 %[[BOTH_POINTERS_ARE_NULL_OR_BOTH_ARE_NONNULL]], %[[COMPUTED_GEP_IS_UGE_BASE]], !nosanitize + // CHECK-SANITIZE-C-NEXT: br i1 %[[GEP_IS_OKAY]], label %[[CONT:.*]], label %[[HANDLER_POINTER_OVERFLOW:[^,]+]],{{.*}} !nosanitize + // CHECK-SANITIZE-C: [[HANDLER_POINTER_OVERFLOW]]: + // CHECK-SANITIZE-NORECOVER-C-NEXT: call void @__ubsan_handle_pointer_overflow_abort(i8* bitcast ({ {{{.*}}} }* @[[LINE_200]] to i8*), i64 %[[BASE_RELOADED_INT]], i64 %[[COMPUTED_GEP]]) + // CHECK-SANITIZE-RECOVER-C-NEXT: call void @__ubsan_handle_pointer_overflow(i8* bitcast ({ {{{.*}}} }* @[[LINE_200]] to i8*), i64 %[[BASE_RELOADED_INT]], i64 %[[COMPUTED_GEP]]) + // CHECK-SANITIZE-TRAP-C-NEXT: call void @llvm.trap(){{.*}}, !nosanitize + // CHECK-SANITIZE-UNREACHABLE-C-NEXT: unreachable, !nosanitize + // CHECK-SANITIZE-C: [[CONT]]: + // CHECK-NEXT: ret i8* %[[ADD_PTR]] + static const unsigned long offset = 0; +#line 200 + return base + offset; +} + +char *var_one(char *base) { + // CHECK: define i8* @var_one(i8* %[[BASE:.*]]) + // CHECK-NEXT: [[ENTRY:.*]]: + // CHECK-NEXT: %[[BASE_ADDR:.*]] = alloca i8*, align 8 + // CHECK-NEXT: store i8* %[[BASE]], i8** %[[BASE_ADDR]], align 8 + // CHECK-NEXT: %[[BASE_RELOADED:.*]] = load i8*, i8** %[[BASE_ADDR]], align 8 + // CHECK-NEXT: %[[ADD_PTR:.*]] = getelementptr inbounds i8, i8* %[[BASE_RELOADED]], i64 1 + // CHECK-SANITIZE-NEXT: %[[BASE_RELOADED_INT:.*]] = ptrtoint i8* %[[BASE_RELOADED]] to i64, !nosanitize + // CHECK-SANITIZE-NEXT: %[[COMPUTED_GEP:.*]] = add i64 %[[BASE_RELOADED_INT]], 1, !nosanitize + // CHECK-SANITIZE-NEXT: %[[BASE_IS_NOT_NULLPTR:.*]] = icmp ne i8* %[[BASE_RELOADED]], null, !nosanitize + // CHECK-SANITIZE-NEXT: %[[COMPUTED_GEP_IS_NOT_NULL:.*]] = icmp ne i64 %[[COMPUTED_GEP]], 0, !nosanitize + // CHECK-SANITIZE-C-NEXT: %[[BOTH_POINTERS_ARE_NULL_OR_BOTH_ARE_NONNULL:.*]] = and i1 %[[BASE_IS_NOT_NULLPTR]], %[[COMPUTED_GEP_IS_NOT_NULL]], !nosanitize + // CHECK-SANITIZE-CPP-NEXT: %[[BOTH_POINTERS_ARE_NULL_OR_BOTH_ARE_NONNULL:.*]] = icmp eq i1 %[[BASE_IS_NOT_NULLPTR]], %[[COMPUTED_GEP_IS_NOT_NULL]], !nosanitize + // CHECK-SANITIZE-NEXT: %[[COMPUTED_GEP_IS_UGE_BASE:.*]] = icmp uge i64 %[[COMPUTED_GEP]], %[[BASE_RELOADED_INT]], !nosanitize + // CHECK-SANITIZE-NEXT: %[[GEP_IS_OKAY:.*]] = and i1 %[[BOTH_POINTERS_ARE_NULL_OR_BOTH_ARE_NONNULL]], %[[COMPUTED_GEP_IS_UGE_BASE]], !nosanitize + // CHECK-SANITIZE-NEXT: br i1 %[[GEP_IS_OKAY]], label %[[CONT:.*]], label %[[HANDLER_POINTER_OVERFLOW:[^,]+]],{{.*}} !nosanitize + // CHECK-SANITIZE: [[HANDLER_POINTER_OVERFLOW]]: + // CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_pointer_overflow_abort(i8* bitcast ({ {{{.*}}} }* @[[LINE_300]] to i8*), i64 %[[BASE_RELOADED_INT]], i64 %[[COMPUTED_GEP]]) + // CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_pointer_overflow(i8* bitcast ({ {{{.*}}} }* @[[LINE_300]] to i8*), i64 %[[BASE_RELOADED_INT]], i64 %[[COMPUTED_GEP]]) + // CHECK-SANITIZE-TRAP-NEXT: call void @llvm.trap(){{.*}}, !nosanitize + // CHECK-SANITIZE-UNREACHABLE-NEXT: unreachable, !nosanitize + // CHECK-SANITIZE: [[CONT]]: + // CHECK-NEXT: ret i8* %[[ADD_PTR]] + static const unsigned long offset = 1; +#line 300 + return base + offset; +} + +char *var_allones(char *base) { + // CHECK: define i8* @var_allones(i8* %[[BASE:.*]]) + // CHECK-NEXT: [[ENTRY:.*]]: + // CHECK-NEXT: %[[BASE_ADDR:.*]] = alloca i8*, align 8 + // CHECK-NEXT: store i8* %[[BASE]], i8** %[[BASE_ADDR]], align 8 + // CHECK-NEXT: %[[BASE_RELOADED:.*]] = load i8*, i8** %[[BASE_ADDR]], align 8 + // CHECK-NEXT: %[[ADD_PTR:.*]] = getelementptr inbounds i8, i8* %[[BASE_RELOADED]], i64 -1 + // CHECK-SANITIZE-NEXT: %[[BASE_RELOADED_INT:.*]] = ptrtoint i8* %[[BASE_RELOADED]] to i64, !nosanitize + // CHECK-SANITIZE-NEXT: %[[COMPUTED_GEP:.*]] = add i64 %[[BASE_RELOADED_INT]], -1, !nosanitize + // CHECK-SANITIZE-NEXT: %[[BASE_IS_NOT_NULLPTR:.*]] = icmp ne i8* %[[BASE_RELOADED]], null, !nosanitize + // CHECK-SANITIZE-NEXT: %[[COMPUTED_GEP_IS_NOT_NULL:.*]] = icmp ne i64 %[[COMPUTED_GEP]], 0, !nosanitize + // CHECK-SANITIZE-C-NEXT: %[[BOTH_POINTERS_ARE_NULL_OR_BOTH_ARE_NONNULL:.*]] = and i1 %[[BASE_IS_NOT_NULLPTR]], %[[COMPUTED_GEP_IS_NOT_NULL]], !nosanitize + // CHECK-SANITIZE-CPP-NEXT: %[[BOTH_POINTERS_ARE_NULL_OR_BOTH_ARE_NONNULL:.*]] = icmp eq i1 %[[BASE_IS_NOT_NULLPTR]], %[[COMPUTED_GEP_IS_NOT_NULL]], !nosanitize + // CHECK-SANITIZE-NEXT: %[[COMPUTED_GEP_IS_UGE_BASE:.*]] = icmp uge i64 %[[COMPUTED_GEP]], %[[BASE_RELOADED_INT]], !nosanitize + // CHECK-SANITIZE-NEXT: %[[GEP_IS_OKAY:.*]] = and i1 %[[BOTH_POINTERS_ARE_NULL_OR_BOTH_ARE_NONNULL]], %[[COMPUTED_GEP_IS_UGE_BASE]], !nosanitize + // CHECK-SANITIZE-NEXT: br i1 %[[GEP_IS_OKAY]], label %[[CONT:.*]], label %[[HANDLER_POINTER_OVERFLOW:[^,]+]],{{.*}} !nosanitize + // CHECK-SANITIZE: [[HANDLER_POINTER_OVERFLOW]]: + // CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_pointer_overflow_abort(i8* bitcast ({ {{{.*}}} }* @[[LINE_400]] to i8*), i64 %[[BASE_RELOADED_INT]], i64 %[[COMPUTED_GEP]]) + // CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_pointer_overflow(i8* bitcast ({ {{{.*}}} }* @[[LINE_400]] to i8*), i64 %[[BASE_RELOADED_INT]], i64 %[[COMPUTED_GEP]]) + // CHECK-SANITIZE-TRAP-NEXT: call void @llvm.trap(){{.*}}, !nosanitize + // CHECK-SANITIZE-UNREACHABLE-NEXT: unreachable, !nosanitize + // CHECK-SANITIZE: [[CONT]]: + // CHECK-NEXT: ret i8* %[[ADD_PTR]] + static const unsigned long offset = -1; +#line 400 + return base + offset; +} + +//------------------------------------------------------------------------------ + +char *nullptr_var(unsigned long offset) { + // CHECK: define i8* @nullptr_var(i64 %[[OFFSET:.*]]) + // CHECK-NEXT: [[ENTRY:.*]]: + // CHECK-NEXT: %[[OFFSET_ADDR:.*]] = alloca i64, align 8 + // CHECK-NEXT: store i64 %[[OFFSET]], i64* %[[OFFSET_ADDR]], align 8 + // CHECK-NEXT: %[[OFFSET_RELOADED:.*]] = load i64, i64* %[[OFFSET_ADDR]], align 8 + // CHECK-NEXT: %[[ADD_PTR:.*]] = getelementptr inbounds i8, i8* null, i64 %[[OFFSET_RELOADED]] + // CHECK-SANITIZE-NEXT: %[[COMPUTED_OFFSET_AGGREGATE:.*]] = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 1, i64 %[[OFFSET_RELOADED]]), !nosanitize + // CHECK-SANITIZE-NEXT: %[[COMPUTED_OFFSET_OVERFLOWED:.*]] = extractvalue { i64, i1 } %[[COMPUTED_OFFSET_AGGREGATE]], 1, !nosanitize + // CHECK-SANITIZE-NEXT: %[[COMPUTED_OFFSET:.*]] = extractvalue { i64, i1 } %[[COMPUTED_OFFSET_AGGREGATE]], 0, !nosanitize + // CHECK-SANITIZE-NEXT: %[[COMPUTED_GEP:.*]] = add i64 0, %[[COMPUTED_OFFSET]], !nosanitize + // CHECK-SANITIZE-NEXT: %[[COMPUTED_GEP_IS_NOT_NULL:.*]] = icmp ne i64 %[[COMPUTED_GEP]], 0, !nosanitize + // CHECK-SANITIZE-C-NEXT: %[[BOTH_POINTERS_ARE_NULL_OR_BOTH_ARE_NONNULL:.*]] = and i1 false, %[[COMPUTED_GEP_IS_NOT_NULL]], !nosanitize + // CHECK-SANITIZE-CPP-NEXT: %[[BOTH_POINTERS_ARE_NULL_OR_BOTH_ARE_NONNULL:.*]] = icmp eq i1 false, %[[COMPUTED_GEP_IS_NOT_NULL]], !nosanitize + // CHECK-SANITIZE-NEXT: %[[COMPUTED_OFFSET_DID_NOT_OVERFLOW:.*]] = xor i1 %[[COMPUTED_OFFSET_OVERFLOWED]], true, !nosanitize + // CHECK-SANITIZE-NEXT: %[[COMPUTED_GEP_IS_UGE_BASE:.*]] = icmp uge i64 %[[COMPUTED_GEP]], 0, !nosanitize + // CHECK-SANITIZE-NEXT: %[[GEP_DID_NOT_OVERFLOW:.*]] = and i1 %[[COMPUTED_GEP_IS_UGE_BASE]], %[[COMPUTED_OFFSET_DID_NOT_OVERFLOW]], !nosanitize + // CHECK-SANITIZE-NEXT: %[[GEP_IS_OKAY:.*]] = and i1 %[[BOTH_POINTERS_ARE_NULL_OR_BOTH_ARE_NONNULL]], %[[GEP_DID_NOT_OVERFLOW]], !nosanitize + // CHECK-SANITIZE-NEXT: br i1 %[[GEP_IS_OKAY]], label %[[CONT:.*]], label %[[HANDLER_POINTER_OVERFLOW:[^,]+]],{{.*}} !nosanitize + // CHECK-SANITIZE: [[HANDLER_POINTER_OVERFLOW]]: + // CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_pointer_overflow_abort(i8* bitcast ({ {{{.*}}} }* @[[LINE_500]] to i8*), i64 0, i64 %[[COMPUTED_GEP]]) + // CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_pointer_overflow(i8* bitcast ({ {{{.*}}} }* @[[LINE_500]] to i8*), i64 0, i64 %[[COMPUTED_GEP]]) + // CHECK-SANITIZE-TRAP-NEXT: call void @llvm.trap(){{.*}}, !nosanitize + // CHECK-SANITIZE-UNREACHABLE-NEXT: unreachable, !nosanitize + // CHECK-SANITIZE: [[CONT]]: + // CHECK-NEXT: ret i8* %[[ADD_PTR]] + static char *const base = (char *)0; +#line 500 + return base + offset; +} + +char *nullptr_zero() { + // CHECK: define i8* @nullptr_zero() + // CHECK-NEXT: [[ENTRY:.*]]: + // CHECK-SANITIZE-C-NEXT: br i1 false, label %[[CONT:.*]], label %[[HANDLER_POINTER_OVERFLOW:[^,]+]],{{.*}} !nosanitize + // CHECK-SANITIZE-C: [[HANDLER_POINTER_OVERFLOW]]: + // CHECK-SANITIZE-NORECOVER-C-NEXT: call void @__ubsan_handle_pointer_overflow_abort(i8* bitcast ({ {{{.*}}} }* @[[LINE_600]] to i8*), i64 0, i64 0) + // CHECK-SANITIZE-RECOVER-C-NEXT: call void @__ubsan_handle_pointer_overflow(i8* bitcast ({ {{{.*}}} }* @[[LINE_600]] to i8*), i64 0, i64 0) + // CHECK-SANITIZE-TRAP-C-NEXT: call void @llvm.trap(){{.*}}, !nosanitize + // CHECK-SANITIZE-UNREACHABLE-C-NEXT: unreachable, !nosanitize + // CHECK-SANITIZE-C: [[CONT]]: + // CHECK-NEXT: ret i8* null + static char *const base = (char *)0; + static const unsigned long offset = 0; +#line 600 + return base + offset; +} + +char *nullptr_one_BAD() { + // CHECK: define i8* @nullptr_one_BAD() + // CHECK-NEXT: [[ENTRY:.*]]: + // CHECK-SANITIZE-C-NEXT: br i1 false, label %[[CONT:.*]], label %[[HANDLER_POINTER_OVERFLOW:[^,]+]],{{.*}} !nosanitize + // CHECK-SANITIZE-CPP-NEXT: br i1 icmp eq (i64 ptrtoint (i8* getelementptr inbounds (i8, i8* null, i64 1) to i64), i64 0), label %[[CONT:.*]], label %[[HANDLER_POINTER_OVERFLOW:[^,]+]],{{.*}} !nosanitize + // CHECK-SANITIZE: [[HANDLER_POINTER_OVERFLOW]]: + // CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_pointer_overflow_abort(i8* bitcast ({ {{{.*}}} }* @[[LINE_700]] to i8*), i64 0, i64 ptrtoint (i8* getelementptr inbounds (i8, i8* null, i64 1) to i64)) + // CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_pointer_overflow(i8* bitcast ({ {{{.*}}} }* @[[LINE_700]] to i8*), i64 0, i64 ptrtoint (i8* getelementptr inbounds (i8, i8* null, i64 1) to i64)) + // CHECK-SANITIZE-TRAP-NEXT: call void @llvm.trap(){{.*}}, !nosanitize + // CHECK-SANITIZE-UNREACHABLE-NEXT: unreachable, !nosanitize + // CHECK-SANITIZE: [[CONT]]: + // CHECK-NEXT: ret i8* getelementptr inbounds (i8, i8* null, i64 1) + static char *const base = (char *)0; + static const unsigned long offset = 1; +#line 700 + return base + offset; +} + +char *nullptr_allones_BAD() { + // CHECK: define i8* @nullptr_allones_BAD() + // CHECK-NEXT: [[ENTRY:.*]]: + // CHECK-SANITIZE-C-NEXT: br i1 false, label %[[CONT:.*]], label %[[HANDLER_POINTER_OVERFLOW:[^,]+]],{{.*}} !nosanitize + // CHECK-SANITIZE-CPP-NEXT: br i1 icmp eq (i64 mul (i64 ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64), i64 -1), i64 0), label %[[CONT:.*]], label %[[HANDLER_POINTER_OVERFLOW:[^,]+]],{{.*}} !nosanitize + // CHECK-SANITIZE: [[HANDLER_POINTER_OVERFLOW]]: + // CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_pointer_overflow_abort(i8* bitcast ({ {{{.*}}} }* @[[LINE_800]] to i8*), i64 0, i64 mul (i64 ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64), i64 -1)) + // CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_pointer_overflow(i8* bitcast ({ {{{.*}}} }* @[[LINE_800]] to i8*), i64 0, i64 mul (i64 ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64), i64 -1)) + // CHECK-SANITIZE-TRAP-NEXT: call void @llvm.trap(){{.*}}, !nosanitize + // CHECK-SANITIZE-UNREACHABLE-NEXT: unreachable, !nosanitize + // CHECK-SANITIZE: [[CONT]]: + // CHECK-NEXT: ret i8* getelementptr inbounds (i8, i8* null, i64 -1) + static char *const base = (char *)0; + static const unsigned long offset = -1; +#line 800 + return base + offset; +} + +//------------------------------------------------------------------------------ + +char *one_var(unsigned long offset) { + // CHECK: define i8* @one_var(i64 %[[OFFSET:.*]]) + // CHECK-NEXT: [[ENTRY:.*]]: + // CHECK-NEXT: %[[OFFSET_ADDR:.*]] = alloca i64, align 8 + // CHECK-NEXT: store i64 %[[OFFSET]], i64* %[[OFFSET_ADDR]], align 8 + // CHECK-NEXT: %[[OFFSET_RELOADED:.*]] = load i64, i64* %[[OFFSET_ADDR]], align 8 + // CHECK-NEXT: %[[ADD_PTR:.*]] = getelementptr inbounds i8, i8* inttoptr (i64 1 to i8*), i64 %[[OFFSET_RELOADED]] + // CHECK-SANITIZE-NEXT: %[[COMPUTED_OFFSET_AGGREGATE:.*]] = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 1, i64 %[[OFFSET_RELOADED]]), !nosanitize + // CHECK-SANITIZE-NEXT: %[[COMPUTED_OFFSET_OVERFLOWED:.*]] = extractvalue { i64, i1 } %[[COMPUTED_OFFSET_AGGREGATE]], 1, !nosanitize + // CHECK-SANITIZE-NEXT: %[[COMPUTED_OFFSET:.*]] = extractvalue { i64, i1 } %[[COMPUTED_OFFSET_AGGREGATE]], 0, !nosanitize + // CHECK-SANITIZE-NEXT: %[[COMPUTED_GEP:.*]] = add i64 1, %[[COMPUTED_OFFSET]], !nosanitize + // CHECK-SANITIZE-NEXT: %[[COMPUTED_GEP_IS_NOT_NULL:.*]] = icmp ne i64 %[[COMPUTED_GEP]], 0, !nosanitize + // CHECK-SANITIZE-C-NEXT: %[[BOTH_POINTERS_ARE_NULL_OR_BOTH_ARE_NONNULL:.*]] = and i1 icmp ne (i8* inttoptr (i64 1 to i8*), i8* null), %[[COMPUTED_GEP_IS_NOT_NULL]], !nosanitize + // CHECK-SANITIZE-CPP-NEXT: %[[BOTH_POINTERS_ARE_NULL_OR_BOTH_ARE_NONNULL:.*]] = icmp eq i1 icmp ne (i8* inttoptr (i64 1 to i8*), i8* null), %[[COMPUTED_GEP_IS_NOT_NULL]], !nosanitize + // CHECK-SANITIZE-NEXT: %[[COMPUTED_OFFSET_DID_NOT_OVERFLOW:.*]] = xor i1 %[[COMPUTED_OFFSET_OVERFLOWED]], true, !nosanitize + // CHECK-SANITIZE-NEXT: %[[COMPUTED_GEP_IS_UGE_BASE:.*]] = icmp uge i64 %[[COMPUTED_GEP]], 1, !nosanitize + // CHECK-SANITIZE-NEXT: %[[GEP_DID_NOT_OVERFLOW:.*]] = and i1 %[[COMPUTED_GEP_IS_UGE_BASE]], %[[COMPUTED_OFFSET_DID_NOT_OVERFLOW]], !nosanitize + // CHECK-SANITIZE-NEXT: %[[GEP_IS_OKAY:.*]] = and i1 %[[BOTH_POINTERS_ARE_NULL_OR_BOTH_ARE_NONNULL]], %[[GEP_DID_NOT_OVERFLOW]], !nosanitize + // CHECK-SANITIZE-NEXT: br i1 %[[GEP_IS_OKAY]], label %[[CONT:.*]], label %[[HANDLER_POINTER_OVERFLOW:[^,]+]],{{.*}} !nosanitize + // CHECK-SANITIZE: [[HANDLER_POINTER_OVERFLOW]]: + // CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_pointer_overflow_abort(i8* bitcast ({ {{{.*}}} }* @[[LINE_900]] to i8*), i64 1, i64 %[[COMPUTED_GEP]]) + // CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_pointer_overflow(i8* bitcast ({ {{{.*}}} }* @[[LINE_900]] to i8*), i64 1, i64 %[[COMPUTED_GEP]]) + // CHECK-SANITIZE-TRAP-NEXT: call void @llvm.trap(){{.*}}, !nosanitize + // CHECK-SANITIZE-UNREACHABLE-NEXT: unreachable, !nosanitize + // CHECK-SANITIZE: [[CONT]]: + // CHECK-NEXT: ret i8* %[[ADD_PTR]] + static char *const base = (char *)1; +#line 900 + return base + offset; +} + +char *one_zero() { + // CHECK: define i8* @one_zero() + // CHECK-NEXT: [[ENTRY:.*]]: + // CHECK-SANITIZE-C-NEXT: br i1 icmp ne (i8* inttoptr (i64 1 to i8*), i8* null), label %[[CONT:.*]], label %[[HANDLER_POINTER_OVERFLOW:[^,]+]],{{.*}} !nosanitize + // CHECK-SANITIZE-C: [[HANDLER_POINTER_OVERFLOW]]: + // CHECK-SANITIZE-NORECOVER-C-NEXT: call void @__ubsan_handle_pointer_overflow_abort(i8* bitcast ({ {{{.*}}} }* @[[LINE_1000]] to i8*), i64 1, i64 1) + // CHECK-SANITIZE-RECOVER-C-NEXT: call void @__ubsan_handle_pointer_overflow(i8* bitcast ({ {{{.*}}} }* @[[LINE_1000]] to i8*), i64 1, i64 1) + // CHECK-SANITIZE-TRAP-C-NEXT: call void @llvm.trap(){{.*}}, !nosanitize + // CHECK-SANITIZE-UNREACHABLE-C-NEXT: unreachable, !nosanitize + // CHECK-SANITIZE-C: [[CONT]]: + // CHECK-NEXT: ret i8* inttoptr (i64 1 to i8*) + static char *const base = (char *)1; + static const unsigned long offset = 0; +#line 1000 + return base + offset; +} + +char *one_one_OK() { + // CHECK: define i8* @one_one_OK() + // CHECK-NEXT: [[ENTRY:.*]]: + // CHECK-SANITIZE-C-NEXT: br i1 and (i1 icmp ne (i8* inttoptr (i64 1 to i8*), i8* null), i1 icmp ne (i64 add (i64 sub (i64 ptrtoint (i8* getelementptr inbounds (i8, i8* inttoptr (i64 1 to i8*), i64 1) to i64), i64 1), i64 1), i64 0)), label %[[CONT:.*]], label %[[HANDLER_POINTER_OVERFLOW:[^,]+]],{{.*}} !nosanitize + // CHECK-SANITIZE-CPP-NEXT: br i1 xor (i1 icmp eq (i8* inttoptr (i64 1 to i8*), i8* null), i1 icmp ne (i64 add (i64 sub (i64 ptrtoint (i8* getelementptr inbounds (i8, i8* inttoptr (i64 1 to i8*), i64 1) to i64), i64 1), i64 1), i64 0)), label %[[CONT:.*]], label %[[HANDLER_POINTER_OVERFLOW:[^,]+]],{{.*}} !nosanitize + // CHECK-SANITIZE: [[HANDLER_POINTER_OVERFLOW]]: + // CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_pointer_overflow_abort(i8* bitcast ({ {{{.*}}} }* @[[LINE_1100]] to i8*), i64 1, i64 add (i64 sub (i64 ptrtoint (i8* getelementptr inbounds (i8, i8* inttoptr (i64 1 to i8*), i64 1) to i64), i64 1), i64 1)) + // CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_pointer_overflow(i8* bitcast ({ {{{.*}}} }* @[[LINE_1100]] to i8*), i64 1, i64 add (i64 sub (i64 ptrtoint (i8* getelementptr inbounds (i8, i8* inttoptr (i64 1 to i8*), i64 1) to i64), i64 1), i64 1)) + // CHECK-SANITIZE-TRAP-NEXT: call void @llvm.trap(){{.*}}, !nosanitize + // CHECK-SANITIZE-UNREACHABLE-NEXT: unreachable, !nosanitize + // CHECK-SANITIZE: [[CONT]]: + // CHECK-NEXT: ret i8* getelementptr inbounds (i8, i8* inttoptr (i64 1 to i8*), i64 1) + static char *const base = (char *)1; + static const unsigned long offset = 1; +#line 1100 + return base + offset; +} + +char *one_allones_BAD() { + // CHECK: define i8* @one_allones_BAD() + // CHECK-NEXT: [[ENTRY:.*]]: + // CHECK-SANITIZE-C-NEXT: br i1 and (i1 icmp ne (i8* inttoptr (i64 1 to i8*), i8* null), i1 icmp ne (i64 add (i64 sub (i64 ptrtoint (i8* getelementptr inbounds (i8, i8* inttoptr (i64 1 to i8*), i64 -1) to i64), i64 1), i64 1), i64 0)), label %[[CONT:.*]], label %[[HANDLER_POINTER_OVERFLOW:[^,]+]],{{.*}} !nosanitize + // CHECK-SANITIZE-CPP-NEXT: br i1 xor (i1 icmp eq (i8* inttoptr (i64 1 to i8*), i8* null), i1 icmp ne (i64 add (i64 sub (i64 ptrtoint (i8* getelementptr inbounds (i8, i8* inttoptr (i64 1 to i8*), i64 -1) to i64), i64 1), i64 1), i64 0)), label %[[CONT:.*]], label %[[HANDLER_POINTER_OVERFLOW:[^,]+]],{{.*}} !nosanitize + // CHECK-SANITIZE: [[HANDLER_POINTER_OVERFLOW]]: + // CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_pointer_overflow_abort(i8* bitcast ({ {{{.*}}} }* @[[LINE_1200]] to i8*), i64 1, i64 add (i64 sub (i64 ptrtoint (i8* getelementptr inbounds (i8, i8* inttoptr (i64 1 to i8*), i64 -1) to i64), i64 1), i64 1)) + // CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_pointer_overflow(i8* bitcast ({ {{{.*}}} }* @[[LINE_1200]] to i8*), i64 1, i64 add (i64 sub (i64 ptrtoint (i8* getelementptr inbounds (i8, i8* inttoptr (i64 1 to i8*), i64 -1) to i64), i64 1), i64 1)) + // CHECK-SANITIZE-TRAP-NEXT: call void @llvm.trap(){{.*}}, !nosanitize + // CHECK-SANITIZE-UNREACHABLE-NEXT: unreachable, !nosanitize + // CHECK-SANITIZE: [[CONT]]: + // CHECK-NEXT: ret i8* getelementptr inbounds (i8, i8* inttoptr (i64 1 to i8*), i64 -1) + static char *const base = (char *)1; + static const unsigned long offset = -1; +#line 1200 + return base + offset; +} + +//------------------------------------------------------------------------------ + +char *allones_var(unsigned long offset) { + // CHECK: define i8* @allones_var(i64 %[[OFFSET:.*]]) + // CHECK-NEXT: [[ENTRY:.*]]: + // CHECK-NEXT: %[[OFFSET_ADDR:.*]] = alloca i64, align 8 + // CHECK-NEXT: store i64 %[[OFFSET]], i64* %[[OFFSET_ADDR]], align 8 + // CHECK-NEXT: %[[OFFSET_RELOADED:.*]] = load i64, i64* %[[OFFSET_ADDR]], align 8 + // CHECK-NEXT: %[[ADD_PTR:.*]] = getelementptr inbounds i8, i8* inttoptr (i64 -1 to i8*), i64 %[[OFFSET_RELOADED]] + // CHECK-SANITIZE-NEXT: %[[COMPUTED_OFFSET_AGGREGATE:.*]] = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 1, i64 %[[OFFSET_RELOADED]]), !nosanitize + // CHECK-SANITIZE-NEXT: %[[COMPUTED_OFFSET_OVERFLOWED:.*]] = extractvalue { i64, i1 } %[[COMPUTED_OFFSET_AGGREGATE]], 1, !nosanitize + // CHECK-SANITIZE-NEXT: %[[COMPUTED_OFFSET:.*]] = extractvalue { i64, i1 } %[[COMPUTED_OFFSET_AGGREGATE]], 0, !nosanitize + // CHECK-SANITIZE-NEXT: %[[COMPUTED_GEP:.*]] = add i64 -1, %[[COMPUTED_OFFSET]], !nosanitize + // CHECK-SANITIZE-NEXT: %[[COMPUTED_GEP_IS_NOT_NULL:.*]] = icmp ne i64 %[[COMPUTED_GEP]], 0, !nosanitize + // CHECK-SANITIZE-C-NEXT: %[[BOTH_POINTERS_ARE_NULL_OR_BOTH_ARE_NONNULL:.*]] = and i1 icmp ne (i8* inttoptr (i64 -1 to i8*), i8* null), %[[COMPUTED_GEP_IS_NOT_NULL]], !nosanitize + // CHECK-SANITIZE-CPP-NEXT: %[[BOTH_POINTERS_ARE_NULL_OR_BOTH_ARE_NONNULL:.*]] = icmp eq i1 icmp ne (i8* inttoptr (i64 -1 to i8*), i8* null), %[[COMPUTED_GEP_IS_NOT_NULL]], !nosanitize + // CHECK-SANITIZE-NEXT: %[[COMPUTED_OFFSET_DID_NOT_OVERFLOW:.*]] = xor i1 %[[COMPUTED_OFFSET_OVERFLOWED]], true, !nosanitize + // CHECK-SANITIZE-NEXT: %[[COMPUTED_GEP_IS_UGE_BASE:.*]] = icmp uge i64 %[[COMPUTED_GEP]], -1, !nosanitize + // CHECK-SANITIZE-NEXT: %[[GEP_DID_NOT_OVERFLOW:.*]] = and i1 %[[COMPUTED_GEP_IS_UGE_BASE]], %[[COMPUTED_OFFSET_DID_NOT_OVERFLOW]], !nosanitize + // CHECK-SANITIZE-NEXT: %[[GEP_IS_OKAY:.*]] = and i1 %[[BOTH_POINTERS_ARE_NULL_OR_BOTH_ARE_NONNULL]], %[[GEP_DID_NOT_OVERFLOW]], !nosanitize + // CHECK-SANITIZE-NEXT: br i1 %[[GEP_IS_OKAY]], label %[[CONT:.*]], label %[[HANDLER_POINTER_OVERFLOW:[^,]+]],{{.*}} !nosanitize + // CHECK-SANITIZE: [[HANDLER_POINTER_OVERFLOW]]: + // CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_pointer_overflow_abort(i8* bitcast ({ {{{.*}}} }* @[[LINE_1300]] to i8*), i64 -1, i64 %[[COMPUTED_GEP]]) + // CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_pointer_overflow(i8* bitcast ({ {{{.*}}} }* @[[LINE_1300]] to i8*), i64 -1, i64 %[[COMPUTED_GEP]]) + // CHECK-SANITIZE-TRAP-NEXT: call void @llvm.trap(){{.*}}, !nosanitize + // CHECK-SANITIZE-UNREACHABLE-NEXT: unreachable, !nosanitize + // CHECK-SANITIZE: [[CONT]]: + // CHECK-NEXT: ret i8* %[[ADD_PTR]] + static char *const base = (char *)-1; +#line 1300 + return base + offset; +} + +char *allones_zero_OK() { + // CHECK: define i8* @allones_zero_OK() + // CHECK-NEXT: [[ENTRY:.*]]: + // CHECK-SANITIZE-C-NEXT: br i1 icmp ne (i8* inttoptr (i64 -1 to i8*), i8* null), label %[[CONT:.*]], label %[[HANDLER_POINTER_OVERFLOW:[^,]+]],{{.*}} !nosanitize + // CHECK-SANITIZE-C: [[HANDLER_POINTER_OVERFLOW]]: + // CHECK-SANITIZE-NORECOVER-C-NEXT: call void @__ubsan_handle_pointer_overflow_abort(i8* bitcast ({ {{{.*}}} }* @[[LINE_1400]] to i8*), i64 -1, i64 -1) + // CHECK-SANITIZE-RECOVER-C-NEXT: call void @__ubsan_handle_pointer_overflow(i8* bitcast ({ {{{.*}}} }* @[[LINE_1400]] to i8*), i64 -1, i64 -1) + // CHECK-SANITIZE-TRAP-C-NEXT: call void @llvm.trap(){{.*}}, !nosanitize + // CHECK-SANITIZE-UNREACHABLE-C-NEXT: unreachable, !nosanitize + // CHECK-SANITIZE-C: [[CONT]]: + // CHECK-NEXT: ret i8* inttoptr (i64 -1 to i8*) + static char *const base = (char *)-1; + static const unsigned long offset = 0; +#line 1400 + return base + offset; +} + +char *allones_one_BAD() { + // CHECK: define i8* @allones_one_BAD() + // CHECK-NEXT: [[ENTRY:.*]]: + // CHECK-SANITIZE-C-NEXT: br i1 and (i1 icmp ne (i8* inttoptr (i64 -1 to i8*), i8* null), i1 icmp ne (i64 add (i64 sub (i64 ptrtoint (i8* getelementptr inbounds (i8, i8* inttoptr (i64 -1 to i8*), i64 1) to i64), i64 -1), i64 -1), i64 0)), label %[[CONT:.*]], label %[[HANDLER_POINTER_OVERFLOW:[^,]+]],{{.*}} !nosanitize + // CHECK-SANITIZE-CPP-NEXT: br i1 xor (i1 icmp eq (i8* inttoptr (i64 -1 to i8*), i8* null), i1 icmp ne (i64 add (i64 sub (i64 ptrtoint (i8* getelementptr inbounds (i8, i8* inttoptr (i64 -1 to i8*), i64 1) to i64), i64 -1), i64 -1), i64 0)), label %[[CONT:.*]], label %[[HANDLER_POINTER_OVERFLOW:[^,]+]],{{.*}} !nosanitize + // CHECK-SANITIZE: [[HANDLER_POINTER_OVERFLOW]]: + // CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_pointer_overflow_abort(i8* bitcast ({ {{{.*}}} }* @[[LINE_1500]] to i8*), i64 -1, i64 add (i64 sub (i64 ptrtoint (i8* getelementptr inbounds (i8, i8* inttoptr (i64 -1 to i8*), i64 1) to i64), i64 -1), i64 -1)) + // CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_pointer_overflow(i8* bitcast ({ {{{.*}}} }* @[[LINE_1500]] to i8*), i64 -1, i64 add (i64 sub (i64 ptrtoint (i8* getelementptr inbounds (i8, i8* inttoptr (i64 -1 to i8*), i64 1) to i64), i64 -1), i64 -1)) + // CHECK-SANITIZE-TRAP-NEXT: call void @llvm.trap(){{.*}}, !nosanitize + // CHECK-SANITIZE-UNREACHABLE-NEXT: unreachable, !nosanitize + // CHECK-SANITIZE: [[CONT]]: + // CHECK-NEXT: ret i8* getelementptr inbounds (i8, i8* inttoptr (i64 -1 to i8*), i64 1) + static char *const base = (char *)-1; + static const unsigned long offset = 1; +#line 1500 + return base + offset; +} + +char *allones_allones_OK() { + // CHECK: define i8* @allones_allones_OK() + // CHECK-NEXT: [[ENTRY:.*]]: + // CHECK-SANITIZE-C-NEXT: br i1 and (i1 icmp ne (i8* inttoptr (i64 -1 to i8*), i8* null), i1 icmp ne (i64 add (i64 sub (i64 ptrtoint (i8* getelementptr inbounds (i8, i8* inttoptr (i64 -1 to i8*), i64 -1) to i64), i64 -1), i64 -1), i64 0)), label %[[CONT:.*]], label %[[HANDLER_POINTER_OVERFLOW:[^,]+]],{{.*}} !nosanitize + // CHECK-SANITIZE-CPP-NEXT: br i1 xor (i1 icmp eq (i8* inttoptr (i64 -1 to i8*), i8* null), i1 icmp ne (i64 add (i64 sub (i64 ptrtoint (i8* getelementptr inbounds (i8, i8* inttoptr (i64 -1 to i8*), i64 -1) to i64), i64 -1), i64 -1), i64 0)), label %[[CONT:.*]], label %[[HANDLER_POINTER_OVERFLOW:[^,]+]],{{.*}} !nosanitize + // CHECK-SANITIZE: [[HANDLER_POINTER_OVERFLOW]]: + // CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_pointer_overflow_abort(i8* bitcast ({ {{{.*}}} }* @[[LINE_1600]] to i8*), i64 -1, i64 add (i64 sub (i64 ptrtoint (i8* getelementptr inbounds (i8, i8* inttoptr (i64 -1 to i8*), i64 -1) to i64), i64 -1), i64 -1)) + // CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_pointer_overflow(i8* bitcast ({ {{{.*}}} }* @[[LINE_1600]] to i8*), i64 -1, i64 add (i64 sub (i64 ptrtoint (i8* getelementptr inbounds (i8, i8* inttoptr (i64 -1 to i8*), i64 -1) to i64), i64 -1), i64 -1)) + // CHECK-SANITIZE-TRAP-NEXT: call void @llvm.trap(){{.*}}, !nosanitize + // CHECK-SANITIZE-UNREACHABLE-NEXT: unreachable, !nosanitize + // CHECK-SANITIZE: [[CONT]]: + // CHECK-NEXT: ret i8* getelementptr inbounds (i8, i8* inttoptr (i64 -1 to i8*), i64 -1) + static char *const base = (char *)-1; + static const unsigned long offset = -1; +#line 1600 + return base + offset; +} + +#ifdef __cplusplus +} +#endif diff --git a/clang/test/CodeGen/catch-pointer-overflow-volatile.c b/clang/test/CodeGen/catch-pointer-overflow-volatile.c new file mode 100644 index 0000000000000..138a3b660f5e9 --- /dev/null +++ b/clang/test/CodeGen/catch-pointer-overflow-volatile.c @@ -0,0 +1,54 @@ +// RUN: %clang_cc1 -x c -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s --check-prefixes=CHECK,CHECK-NOSANITIZE +// RUN: %clang_cc1 -x c -fsanitize=pointer-overflow -fno-sanitize-recover=pointer-overflow -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s -implicit-check-not="call void @__ubsan_handle_pointer_overflow" --check-prefixes=CHECK,CHECK-SANITIZE,CHECK-SANITIZE-C,CHECK-SANITIZE-ANYRECOVER,CHECK-SANITIZE-NORECOVER,CHECK-SANITIZE-UNREACHABLE +// RUN: %clang_cc1 -x c -fsanitize=pointer-overflow -fsanitize-recover=pointer-overflow -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s -implicit-check-not="call void @__ubsan_handle_pointer_overflow" --check-prefixes=CHECK,CHECK-SANITIZE,CHECK-SANITIZE-C,CHECK-SANITIZE-ANYRECOVER,CHECK-SANITIZE-RECOVER +// RUN: %clang_cc1 -x c -fsanitize=pointer-overflow -fsanitize-trap=pointer-overflow -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s -implicit-check-not="call void @__ubsan_handle_pointer_overflow" --check-prefixes=CHECK,CHECK-SANITIZE,CHECK-SANITIZE-C,CHECK-SANITIZE-TRAP,CHECK-SANITIZE-UNREACHABLE + +// RUN: %clang_cc1 -x c++ -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s --check-prefixes=CHECK,CHECK-NOSANITIZE +// RUN: %clang_cc1 -x c++ -fsanitize=pointer-overflow -fno-sanitize-recover=pointer-overflow -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s -implicit-check-not="call void @__ubsan_handle_pointer_overflow" --check-prefixes=CHECK,CHECK-SANITIZE,CHECK-SANITIZE-CPP,CHECK-SANITIZE-ANYRECOVER,CHECK-SANITIZE-NORECOVER,CHECK-SANITIZE-UNREACHABLE +// RUN: %clang_cc1 -x c++ -fsanitize=pointer-overflow -fsanitize-recover=pointer-overflow -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s -implicit-check-not="call void @__ubsan_handle_pointer_overflow" --check-prefixes=CHECK,CHECK-SANITIZE,CHECK-SANITIZE-CPP,CHECK-SANITIZE-ANYRECOVER,CHECK-SANITIZE-RECOVER +// RUN: %clang_cc1 -x c++ -fsanitize=pointer-overflow -fsanitize-trap=pointer-overflow -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s -implicit-check-not="call void @__ubsan_handle_pointer_overflow" --check-prefixes=CHECK,CHECK-SANITIZE,CHECK-SANITIZE-CPP,CHECK-SANITIZE-TRAP,CHECK-SANITIZE-UNREACHABLE + +// CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_100:.*]] = {{.*}}, i32 100, i32 15 } } + +#ifdef __cplusplus +extern "C" { +#endif + +char *volatile_ptr(char *volatile base, unsigned long offset) { + // CHECK: define i8* @volatile_ptr(i8* %[[BASE:.*]], i64 %[[OFFSET:.*]]) + // CHECK-NEXT: [[ENTRY:.*]]: + // CHECK-NEXT: %[[BASE_ADDR:.*]] = alloca i8*, align 8 + // CHECK-NEXT: %[[OFFSET_ADDR:.*]] = alloca i64, align 8 + // CHECK-NEXT: store volatile i8* %[[BASE]], i8** %[[BASE_ADDR]], align 8 + // CHECK-NEXT: store i64 %[[OFFSET]], i64* %[[OFFSET_ADDR]], align 8 + // CHECK-NEXT: %[[BASE_RELOADED:.*]] = load volatile i8*, i8** %[[BASE_ADDR]], align 8 + // CHECK-NEXT: %[[OFFSET_RELOADED:.*]] = load i64, i64* %[[OFFSET_ADDR]], align 8 + // CHECK-NEXT: %[[ADD_PTR:.*]] = getelementptr inbounds i8, i8* %[[BASE_RELOADED]], i64 %[[OFFSET_RELOADED]] + // CHECK-SANITIZE-NEXT: %[[COMPUTED_OFFSET_AGGREGATE:.*]] = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 1, i64 %[[OFFSET_RELOADED]]), !nosanitize + // CHECK-SANITIZE-NEXT: %[[COMPUTED_OFFSET_OVERFLOWED:.*]] = extractvalue { i64, i1 } %[[COMPUTED_OFFSET_AGGREGATE]], 1, !nosanitize + // CHECK-SANITIZE-NEXT: %[[COMPUTED_OFFSET:.*]] = extractvalue { i64, i1 } %[[COMPUTED_OFFSET_AGGREGATE]], 0, !nosanitize + // CHECK-SANITIZE-NEXT: %[[BASE_RELOADED_INT:.*]] = ptrtoint i8* %[[BASE_RELOADED]] to i64, !nosanitize + // CHECK-SANITIZE-NEXT: %[[COMPUTED_GEP:.*]] = add i64 %[[BASE_RELOADED_INT]], %[[COMPUTED_OFFSET]], !nosanitize + // CHECK-SANITIZE-NEXT: %[[BASE_IS_NOT_NULLPTR:.*]] = icmp ne i8* %[[BASE_RELOADED]], null, !nosanitize + // CHECK-SANITIZE-NEXT: %[[COMPUTED_GEP_IS_NOT_NULL:.*]] = icmp ne i64 %[[COMPUTED_GEP]], 0, !nosanitize + // CHECK-SANITIZE-C-NEXT: %[[BOTH_POINTERS_ARE_NULL_OR_BOTH_ARE_NONNULL:.*]] = and i1 %[[BASE_IS_NOT_NULLPTR]], %[[COMPUTED_GEP_IS_NOT_NULL]], !nosanitize + // CHECK-SANITIZE-CPP-NEXT: %[[BOTH_POINTERS_ARE_NULL_OR_BOTH_ARE_NONNULL:.*]] = icmp eq i1 %[[BASE_IS_NOT_NULLPTR]], %[[COMPUTED_GEP_IS_NOT_NULL]], !nosanitize + // CHECK-SANITIZE-NEXT: %[[COMPUTED_OFFSET_DID_NOT_OVERFLOW:.*]] = xor i1 %[[COMPUTED_OFFSET_OVERFLOWED]], true, !nosanitize + // CHECK-SANITIZE-NEXT: %[[COMPUTED_GEP_IS_UGE_BASE:.*]] = icmp uge i64 %[[COMPUTED_GEP]], %[[BASE_RELOADED_INT]], !nosanitize + // CHECK-SANITIZE-NEXT: %[[GEP_DID_NOT_OVERFLOW:.*]] = and i1 %[[COMPUTED_GEP_IS_UGE_BASE]], %[[COMPUTED_OFFSET_DID_NOT_OVERFLOW]], !nosanitize + // CHECK-SANITIZE-NEXT: %[[GEP_IS_OKAY:.*]] = and i1 %[[BOTH_POINTERS_ARE_NULL_OR_BOTH_ARE_NONNULL]], %[[GEP_DID_NOT_OVERFLOW]], !nosanitize + // CHECK-SANITIZE-NEXT: br i1 %[[GEP_IS_OKAY]], label %[[CONT:.*]], label %[[HANDLER_POINTER_OVERFLOW:[^,]+]],{{.*}} !nosanitize + // CHECK-SANITIZE: [[HANDLER_POINTER_OVERFLOW]]: + // CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_pointer_overflow_abort(i8* bitcast ({ {{{.*}}} }* @[[LINE_100]] to i8*), i64 %[[BASE_RELOADED_INT]], i64 %[[COMPUTED_GEP]]) + // CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_pointer_overflow(i8* bitcast ({ {{{.*}}} }* @[[LINE_100]] to i8*), i64 %[[BASE_RELOADED_INT]], i64 %[[COMPUTED_GEP]]) + // CHECK-SANITIZE-TRAP-NEXT: call void @llvm.trap(){{.*}}, !nosanitize + // CHECK-SANITIZE-UNREACHABLE-NEXT: unreachable, !nosanitize + // CHECK-SANITIZE: [[CONT]]: + // CHECK-NEXT: ret i8* %[[ADD_PTR]] +#line 100 + return base + offset; +} + +#ifdef __cplusplus +} +#endif diff --git a/clang/test/CodeGen/catch-pointer-overflow.c b/clang/test/CodeGen/catch-pointer-overflow.c new file mode 100644 index 0000000000000..19115e91ff172 --- /dev/null +++ b/clang/test/CodeGen/catch-pointer-overflow.c @@ -0,0 +1,294 @@ +// RUN: %clang_cc1 -x c -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s --check-prefixes=CHECK,CHECK-NOSANITIZE +// RUN: %clang_cc1 -x c -fsanitize=pointer-overflow -fno-sanitize-recover=pointer-overflow -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s -implicit-check-not="call void @__ubsan_handle_pointer_overflow" --check-prefixes=CHECK,CHECK-SANITIZE,CHECK-SANITIZE-C,CHECK-SANITIZE-ANYRECOVER,CHECK-SANITIZE-NORECOVER,CHECK-SANITIZE-UNREACHABLE +// RUN: %clang_cc1 -x c -fsanitize=pointer-overflow -fsanitize-recover=pointer-overflow -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s -implicit-check-not="call void @__ubsan_handle_pointer_overflow" --check-prefixes=CHECK,CHECK-SANITIZE,CHECK-SANITIZE-C,CHECK-SANITIZE-ANYRECOVER,CHECK-SANITIZE-RECOVER +// RUN: %clang_cc1 -x c -fsanitize=pointer-overflow -fsanitize-trap=pointer-overflow -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s -implicit-check-not="call void @__ubsan_handle_pointer_overflow" --check-prefixes=CHECK,CHECK-SANITIZE,CHECK-SANITIZE-C,CHECK-SANITIZE-TRAP,CHECK-SANITIZE-UNREACHABLE + +// RUN: %clang_cc1 -x c++ -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s --check-prefixes=CHECK,CHECK-NOSANITIZE +// RUN: %clang_cc1 -x c++ -fsanitize=pointer-overflow -fno-sanitize-recover=pointer-overflow -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s -implicit-check-not="call void @__ubsan_handle_pointer_overflow" --check-prefixes=CHECK,CHECK-SANITIZE,CHECK-SANITIZE-CPP,CHECK-SANITIZE-ANYRECOVER,CHECK-SANITIZE-NORECOVER,CHECK-SANITIZE-UNREACHABLE +// RUN: %clang_cc1 -x c++ -fsanitize=pointer-overflow -fsanitize-recover=pointer-overflow -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s -implicit-check-not="call void @__ubsan_handle_pointer_overflow" --check-prefixes=CHECK,CHECK-SANITIZE,CHECK-SANITIZE-CPP,CHECK-SANITIZE-ANYRECOVER,CHECK-SANITIZE-RECOVER +// RUN: %clang_cc1 -x c++ -fsanitize=pointer-overflow -fsanitize-trap=pointer-overflow -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s -implicit-check-not="call void @__ubsan_handle_pointer_overflow" --check-prefixes=CHECK,CHECK-SANITIZE,CHECK-SANITIZE-CPP,CHECK-SANITIZE-TRAP,CHECK-SANITIZE-UNREACHABLE + +// CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_100:.*]] = {{.*}}, i32 100, i32 15 } } +// CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_200:.*]] = {{.*}}, i32 200, i32 15 } } +// CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_300:.*]] = {{.*}}, i32 300, i32 15 } } +// CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_400:.*]] = {{.*}}, i32 400, i32 15 } } +// CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_500:.*]] = {{.*}}, i32 500, i32 7 } } +// CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_600:.*]] = {{.*}}, i32 600, i32 7 } } +// CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_700:.*]] = {{.*}}, i32 700, i32 3 } } +// CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_800:.*]] = {{.*}}, i32 800, i32 3 } } + +#ifdef __cplusplus +extern "C" { +#endif + +char *add_unsigned(char *base, unsigned long offset) { + // CHECK: define i8* @add_unsigned(i8* %[[BASE:.*]], i64 %[[OFFSET:.*]]) + // CHECK-NEXT: [[ENTRY:.*]]: + // CHECK-NEXT: %[[BASE_ADDR:.*]] = alloca i8*, align 8 + // CHECK-NEXT: %[[OFFSET_ADDR:.*]] = alloca i64, align 8 + // CHECK-NEXT: store i8* %[[BASE]], i8** %[[BASE_ADDR]], align 8 + // CHECK-NEXT: store i64 %[[OFFSET]], i64* %[[OFFSET_ADDR]], align 8 + // CHECK-NEXT: %[[BASE_RELOADED:.*]] = load i8*, i8** %[[BASE_ADDR]], align 8 + // CHECK-NEXT: %[[OFFSET_RELOADED:.*]] = load i64, i64* %[[OFFSET_ADDR]], align 8 + // CHECK-NEXT: %[[ADD_PTR:.*]] = getelementptr inbounds i8, i8* %[[BASE_RELOADED]], i64 %[[OFFSET_RELOADED]] + // CHECK-SANITIZE-NEXT: %[[COMPUTED_OFFSET_AGGREGATE:.*]] = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 1, i64 %[[OFFSET_RELOADED]]), !nosanitize + // CHECK-SANITIZE-NEXT: %[[COMPUTED_OFFSET_OVERFLOWED:.*]] = extractvalue { i64, i1 } %[[COMPUTED_OFFSET_AGGREGATE]], 1, !nosanitize + // CHECK-SANITIZE-NEXT: %[[COMPUTED_OFFSET:.*]] = extractvalue { i64, i1 } %[[COMPUTED_OFFSET_AGGREGATE]], 0, !nosanitize + // CHECK-SANITIZE-NEXT: %[[BASE_RELOADED_INT:.*]] = ptrtoint i8* %[[BASE_RELOADED]] to i64, !nosanitize + // CHECK-SANITIZE-NEXT: %[[COMPUTED_GEP:.*]] = add i64 %[[BASE_RELOADED_INT]], %[[COMPUTED_OFFSET]], !nosanitize + // CHECK-SANITIZE-NEXT: %[[BASE_IS_NOT_NULLPTR:.*]] = icmp ne i8* %[[BASE_RELOADED]], null, !nosanitize + // CHECK-SANITIZE-NEXT: %[[COMPUTED_GEP_IS_NOT_NULL:.*]] = icmp ne i64 %[[COMPUTED_GEP]], 0, !nosanitize + // CHECK-SANITIZE-C-NEXT: %[[BOTH_POINTERS_ARE_NULL_OR_BOTH_ARE_NONNULL:.*]] = and i1 %[[BASE_IS_NOT_NULLPTR]], %[[COMPUTED_GEP_IS_NOT_NULL]], !nosanitize + // CHECK-SANITIZE-CPP-NEXT: %[[BOTH_POINTERS_ARE_NULL_OR_BOTH_ARE_NONNULL:.*]] = icmp eq i1 %[[BASE_IS_NOT_NULLPTR]], %[[COMPUTED_GEP_IS_NOT_NULL]], !nosanitize + // CHECK-SANITIZE-NEXT: %[[COMPUTED_OFFSET_DID_NOT_OVERFLOW:.*]] = xor i1 %[[COMPUTED_OFFSET_OVERFLOWED]], true, !nosanitize + // CHECK-SANITIZE-NEXT: %[[COMPUTED_GEP_IS_UGE_BASE:.*]] = icmp uge i64 %[[COMPUTED_GEP]], %[[BASE_RELOADED_INT]], !nosanitize + // CHECK-SANITIZE-NEXT: %[[GEP_DID_NOT_OVERFLOW:.*]] = and i1 %[[COMPUTED_GEP_IS_UGE_BASE]], %[[COMPUTED_OFFSET_DID_NOT_OVERFLOW]], !nosanitize + // CHECK-SANITIZE-NEXT: %[[GEP_IS_OKAY:.*]] = and i1 %[[BOTH_POINTERS_ARE_NULL_OR_BOTH_ARE_NONNULL]], %[[GEP_DID_NOT_OVERFLOW]], !nosanitize + // CHECK-SANITIZE-NEXT: br i1 %[[GEP_IS_OKAY]], label %[[CONT:.*]], label %[[HANDLER_POINTER_OVERFLOW:[^,]+]],{{.*}} !nosanitize + // CHECK-SANITIZE: [[HANDLER_POINTER_OVERFLOW]]: + // CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_pointer_overflow_abort(i8* bitcast ({ {{{.*}}} }* @[[LINE_100]] to i8*), i64 %[[BASE_RELOADED_INT]], i64 %[[COMPUTED_GEP]]) + // CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_pointer_overflow(i8* bitcast ({ {{{.*}}} }* @[[LINE_100]] to i8*), i64 %[[BASE_RELOADED_INT]], i64 %[[COMPUTED_GEP]]) + // CHECK-SANITIZE-TRAP-NEXT: call void @llvm.trap(){{.*}}, !nosanitize + // CHECK-SANITIZE-UNREACHABLE-NEXT: unreachable, !nosanitize + // CHECK-SANITIZE: [[CONT]]: + // CHECK-NEXT: ret i8* %[[ADD_PTR]] +#line 100 + return base + offset; +} + +char *sub_unsigned(char *base, unsigned long offset) { + // CHECK: define i8* @sub_unsigned(i8* %[[BASE:.*]], i64 %[[OFFSET:.*]]) + // CHECK-NEXT: [[ENTRY:.*]]: + // CHECK-NEXT: %[[BASE_ADDR:.*]] = alloca i8*, align 8 + // CHECK-NEXT: %[[OFFSET_ADDR:.*]] = alloca i64, align 8 + // CHECK-NEXT: store i8* %[[BASE]], i8** %[[BASE_ADDR]], align 8 + // CHECK-NEXT: store i64 %[[OFFSET]], i64* %[[OFFSET_ADDR]], align 8 + // CHECK-NEXT: %[[BASE_RELOADED:.*]] = load i8*, i8** %[[BASE_ADDR]], align 8 + // CHECK-NEXT: %[[OFFSET_RELOADED:.*]] = load i64, i64* %[[OFFSET_ADDR]], align 8 + // CHECK-NEXT: %[[IDX_NEG:.*]] = sub i64 0, %[[OFFSET_RELOADED]] + // CHECK-NEXT: %[[ADD_PTR:.*]] = getelementptr inbounds i8, i8* %[[BASE_RELOADED]], i64 %[[IDX_NEG]] + // CHECK-SANITIZE-NEXT: %[[COMPUTED_OFFSET_AGGREGATE:.*]] = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 1, i64 %[[IDX_NEG]]), !nosanitize + // CHECK-SANITIZE-NEXT: %[[COMPUTED_OFFSET_OVERFLOWED:.*]] = extractvalue { i64, i1 } %[[COMPUTED_OFFSET_AGGREGATE]], 1, !nosanitize + // CHECK-SANITIZE-NEXT: %[[COMPUTED_OFFSET:.*]] = extractvalue { i64, i1 } %[[COMPUTED_OFFSET_AGGREGATE]], 0, !nosanitize + // CHECK-SANITIZE-NEXT: %[[BASE_RELOADED_INT:.*]] = ptrtoint i8* %[[BASE_RELOADED]] to i64, !nosanitize + // CHECK-SANITIZE-NEXT: %[[COMPUTED_GEP:.*]] = add i64 %[[BASE_RELOADED_INT]], %[[COMPUTED_OFFSET]], !nosanitize + // CHECK-SANITIZE-NEXT: %[[BASE_IS_NOT_NULLPTR:.*]] = icmp ne i8* %[[BASE_RELOADED]], null, !nosanitize + // CHECK-SANITIZE-NEXT: %[[COMPUTED_GEP_IS_NOT_NULL:.*]] = icmp ne i64 %[[COMPUTED_GEP]], 0, !nosanitize + // CHECK-SANITIZE-C-NEXT: %[[BOTH_POINTERS_ARE_NULL_OR_BOTH_ARE_NONNULL:.*]] = and i1 %[[BASE_IS_NOT_NULLPTR]], %[[COMPUTED_GEP_IS_NOT_NULL]], !nosanitize + // CHECK-SANITIZE-CPP-NEXT: %[[BOTH_POINTERS_ARE_NULL_OR_BOTH_ARE_NONNULL:.*]] = icmp eq i1 %[[BASE_IS_NOT_NULLPTR]], %[[COMPUTED_GEP_IS_NOT_NULL]], !nosanitize + // CHECK-SANITIZE-NEXT: %[[COMPUTED_OFFSET_DID_NOT_OVERFLOW:.*]] = xor i1 %[[COMPUTED_OFFSET_OVERFLOWED]], true, !nosanitize + // CHECK-SANITIZE-NEXT: %[[COMPUTED_GEP_IS_ULE_BASE:.*]] = icmp ule i64 %[[COMPUTED_GEP]], %[[BASE_RELOADED_INT]], !nosanitize + // CHECK-SANITIZE-NEXT: %[[GEP_DID_NOT_OVERFLOW:.*]] = and i1 %[[COMPUTED_GEP_IS_ULE_BASE]], %[[COMPUTED_OFFSET_DID_NOT_OVERFLOW]], !nosanitize + // CHECK-SANITIZE-NEXT: %[[GEP_IS_OKAY:.*]] = and i1 %[[BOTH_POINTERS_ARE_NULL_OR_BOTH_ARE_NONNULL]], %[[GEP_DID_NOT_OVERFLOW]], !nosanitize + // CHECK-SANITIZE-NEXT: br i1 %[[GEP_IS_OKAY]], label %[[CONT:.*]], label %[[HANDLER_POINTER_OVERFLOW:[^,]+]],{{.*}} !nosanitize + // CHECK-SANITIZE: [[HANDLER_POINTER_OVERFLOW]]: + // CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_pointer_overflow_abort(i8* bitcast ({ {{{.*}}} }* @[[LINE_200]] to i8*), i64 %[[BASE_RELOADED_INT]], i64 %[[COMPUTED_GEP]]) + // CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_pointer_overflow(i8* bitcast ({ {{{.*}}} }* @[[LINE_200]] to i8*), i64 %[[BASE_RELOADED_INT]], i64 %[[COMPUTED_GEP]]) + // CHECK-SANITIZE-TRAP-NEXT: call void @llvm.trap(){{.*}}, !nosanitize + // CHECK-SANITIZE-UNREACHABLE-NEXT: unreachable, !nosanitize + // CHECK-SANITIZE: [[CONT]]: + // CHECK-NEXT: ret i8* %[[ADD_PTR]] +#line 200 + return base - offset; +} + +char *add_signed(char *base, signed long offset) { + // CHECK: define i8* @add_signed(i8* %[[BASE:.*]], i64 %[[OFFSET:.*]]) + // CHECK-NEXT: [[ENTRY:.*]]: + // CHECK-NEXT: %[[BASE_ADDR:.*]] = alloca i8*, align 8 + // CHECK-NEXT: %[[OFFSET_ADDR:.*]] = alloca i64, align 8 + // CHECK-NEXT: store i8* %[[BASE]], i8** %[[BASE_ADDR]], align 8 + // CHECK-NEXT: store i64 %[[OFFSET]], i64* %[[OFFSET_ADDR]], align 8 + // CHECK-NEXT: %[[BASE_RELOADED:.*]] = load i8*, i8** %[[BASE_ADDR]], align 8 + // CHECK-NEXT: %[[OFFSET_RELOADED:.*]] = load i64, i64* %[[OFFSET_ADDR]], align 8 + // CHECK-NEXT: %[[ADD_PTR:.*]] = getelementptr inbounds i8, i8* %[[BASE_RELOADED]], i64 %[[OFFSET_RELOADED]] + // CHECK-SANITIZE-NEXT: %[[COMPUTED_OFFSET_AGGREGATE:.*]] = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 1, i64 %[[OFFSET_RELOADED]]), !nosanitize + // CHECK-SANITIZE-NEXT: %[[COMPUTED_OFFSET_OVERFLOWED:.*]] = extractvalue { i64, i1 } %[[COMPUTED_OFFSET_AGGREGATE]], 1, !nosanitize + // CHECK-SANITIZE-NEXT: %[[COMPUTED_OFFSET:.*]] = extractvalue { i64, i1 } %[[COMPUTED_OFFSET_AGGREGATE]], 0, !nosanitize + // CHECK-SANITIZE-NEXT: %[[BASE_RELOADED_INT:.*]] = ptrtoint i8* %[[BASE_RELOADED]] to i64, !nosanitize + // CHECK-SANITIZE-NEXT: %[[COMPUTED_GEP:.*]] = add i64 %[[BASE_RELOADED_INT]], %[[COMPUTED_OFFSET]], !nosanitize + // CHECK-SANITIZE-NEXT: %[[BASE_IS_NOT_NULLPTR:.*]] = icmp ne i8* %[[BASE_RELOADED]], null, !nosanitize + // CHECK-SANITIZE-NEXT: %[[COMPUTED_GEP_IS_NOT_NULL:.*]] = icmp ne i64 %[[COMPUTED_GEP]], 0, !nosanitize + // CHECK-SANITIZE-C-NEXT: %[[BOTH_POINTERS_ARE_NULL_OR_BOTH_ARE_NONNULL:.*]] = and i1 %[[BASE_IS_NOT_NULLPTR]], %[[COMPUTED_GEP_IS_NOT_NULL]], !nosanitize + // CHECK-SANITIZE-CPP-NEXT: %[[BOTH_POINTERS_ARE_NULL_OR_BOTH_ARE_NONNULL:.*]] = icmp eq i1 %[[BASE_IS_NOT_NULLPTR]], %[[COMPUTED_GEP_IS_NOT_NULL]], !nosanitize + // CHECK-SANITIZE-NEXT: %[[COMPUTED_OFFSET_DID_NOT_OVERFLOW:.*]] = xor i1 %[[COMPUTED_OFFSET_OVERFLOWED]], true, !nosanitize + // CHECK-SANITIZE-NEXT: %[[POSORZEROVALID:.*]] = icmp uge i64 %[[COMPUTED_GEP]], %[[BASE_RELOADED_INT]], !nosanitize + // CHECK-SANITIZE-NEXT: %[[POSORZEROOFFSET:.*]] = icmp sge i64 %[[COMPUTED_OFFSET]], 0, !nosanitize + // CHECK-SANITIZE-NEXT: %[[NEGVALID:.*]] = icmp ult i64 %[[COMPUTED_GEP]], %[[BASE_RELOADED_INT]], !nosanitize + // CHECK-SANITIZE-NEXT: %[[APPLYING_OFFSET_DID_NOT_OVERFLOW:.*]] = select i1 %[[POSORZEROOFFSET]], i1 %[[POSORZEROVALID]], i1 %[[NEGVALID]], !nosanitize + // CHECK-SANITIZE-NEXT: %[[GEP_DID_NOT_OVERFLOW:.*]] = and i1 %[[APPLYING_OFFSET_DID_NOT_OVERFLOW]], %[[COMPUTED_OFFSET_DID_NOT_OVERFLOW]], !nosanitize + // CHECK-SANITIZE-NEXT: %[[GEP_IS_OKAY:.*]] = and i1 %[[BOTH_POINTERS_ARE_NULL_OR_BOTH_ARE_NONNULL]], %[[GEP_DID_NOT_OVERFLOW]], !nosanitize + // CHECK-SANITIZE-NEXT: br i1 %[[GEP_IS_OKAY]], label %[[CONT:.*]], label %[[HANDLER_POINTER_OVERFLOW:[^,]+]],{{.*}} !nosanitize + // CHECK-SANITIZE: [[HANDLER_POINTER_OVERFLOW]]: + // CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_pointer_overflow_abort(i8* bitcast ({ {{{.*}}} }* @[[LINE_300]] to i8*), i64 %[[BASE_RELOADED_INT]], i64 %[[COMPUTED_GEP]]) + // CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_pointer_overflow(i8* bitcast ({ {{{.*}}} }* @[[LINE_300]] to i8*), i64 %[[BASE_RELOADED_INT]], i64 %[[COMPUTED_GEP]]) + // CHECK-SANITIZE-TRAP-NEXT: call void @llvm.trap(){{.*}}, !nosanitize + // CHECK-SANITIZE-UNREACHABLE-NEXT: unreachable, !nosanitize + // CHECK-SANITIZE: [[CONT]]: + // CHECK-NEXT: ret i8* %[[ADD_PTR]] +#line 300 + return base + offset; +} + +char *sub_signed(char *base, signed long offset) { + // CHECK: define i8* @sub_signed(i8* %[[BASE:.*]], i64 %[[OFFSET:.*]]) + // CHECK-NEXT: [[ENTRY:.*]]: + // CHECK-NEXT: %[[BASE_ADDR:.*]] = alloca i8*, align 8 + // CHECK-NEXT: %[[OFFSET_ADDR:.*]] = alloca i64, align 8 + // CHECK-NEXT: store i8* %[[BASE]], i8** %[[BASE_ADDR]], align 8 + // CHECK-NEXT: store i64 %[[OFFSET]], i64* %[[OFFSET_ADDR]], align 8 + // CHECK-NEXT: %[[BASE_RELOADED:.*]] = load i8*, i8** %[[BASE_ADDR]], align 8 + // CHECK-NEXT: %[[OFFSET_RELOADED:.*]] = load i64, i64* %[[OFFSET_ADDR]], align 8 + // CHECK-NEXT: %[[IDX_NEG:.*]] = sub i64 0, %[[OFFSET_RELOADED]] + // CHECK-NEXT: %[[ADD_PTR:.*]] = getelementptr inbounds i8, i8* %[[BASE_RELOADED]], i64 %[[IDX_NEG]] + // CHECK-SANITIZE-NEXT: %[[COMPUTED_OFFSET_AGGREGATE:.*]] = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 1, i64 %[[IDX_NEG]]), !nosanitize + // CHECK-SANITIZE-NEXT: %[[COMPUTED_OFFSET_OVERFLOWED:.*]] = extractvalue { i64, i1 } %[[COMPUTED_OFFSET_AGGREGATE]], 1, !nosanitize + // CHECK-SANITIZE-NEXT: %[[COMPUTED_OFFSET:.*]] = extractvalue { i64, i1 } %[[COMPUTED_OFFSET_AGGREGATE]], 0, !nosanitize + // CHECK-SANITIZE-NEXT: %[[BASE_RELOADED_INT:.*]] = ptrtoint i8* %[[BASE_RELOADED]] to i64, !nosanitize + // CHECK-SANITIZE-NEXT: %[[COMPUTED_GEP:.*]] = add i64 %[[BASE_RELOADED_INT]], %[[COMPUTED_OFFSET]], !nosanitize + // CHECK-SANITIZE-NEXT: %[[BASE_IS_NOT_NULLPTR:.*]] = icmp ne i8* %[[BASE_RELOADED]], null, !nosanitize + // CHECK-SANITIZE-NEXT: %[[COMPUTED_GEP_IS_NOT_NULL:.*]] = icmp ne i64 %[[COMPUTED_GEP]], 0, !nosanitize + // CHECK-SANITIZE-C-NEXT: %[[BOTH_POINTERS_ARE_NULL_OR_BOTH_ARE_NONNULL:.*]] = and i1 %[[BASE_IS_NOT_NULLPTR]], %[[COMPUTED_GEP_IS_NOT_NULL]], !nosanitize + // CHECK-SANITIZE-CPP-NEXT: %[[BOTH_POINTERS_ARE_NULL_OR_BOTH_ARE_NONNULL:.*]] = icmp eq i1 %[[BASE_IS_NOT_NULLPTR]], %[[COMPUTED_GEP_IS_NOT_NULL]], !nosanitize + // CHECK-SANITIZE-NEXT: %[[COMPUTED_OFFSET_DID_NOT_OVERFLOW:.*]] = xor i1 %[[COMPUTED_OFFSET_OVERFLOWED]], true, !nosanitize + // CHECK-SANITIZE-NEXT: %[[POSORZEROVALID:.*]] = icmp uge i64 %[[COMPUTED_GEP]], %[[BASE_RELOADED_INT]], !nosanitize + // CHECK-SANITIZE-NEXT: %[[POSORZEROOFFSET:.*]] = icmp sge i64 %[[COMPUTED_OFFSET]], 0, !nosanitize + // CHECK-SANITIZE-NEXT: %[[NEGVALID:.*]] = icmp ult i64 %[[COMPUTED_GEP]], %[[BASE_RELOADED_INT]], !nosanitize + // CHECK-SANITIZE-NEXT: %[[APPLYING_OFFSET_DID_NOT_OVERFLOW:.*]] = select i1 %[[POSORZEROOFFSET]], i1 %[[POSORZEROVALID]], i1 %[[NEGVALID]], !nosanitize + // CHECK-SANITIZE-NEXT: %[[GEP_DID_NOT_OVERFLOW:.*]] = and i1 %[[APPLYING_OFFSET_DID_NOT_OVERFLOW]], %[[COMPUTED_OFFSET_DID_NOT_OVERFLOW]], !nosanitize + // CHECK-SANITIZE-NEXT: %[[GEP_IS_OKAY:.*]] = and i1 %[[BOTH_POINTERS_ARE_NULL_OR_BOTH_ARE_NONNULL]], %[[GEP_DID_NOT_OVERFLOW]], !nosanitize + // CHECK-SANITIZE-NEXT: br i1 %[[GEP_IS_OKAY]], label %[[CONT:.*]], label %[[HANDLER_POINTER_OVERFLOW:[^,]+]],{{.*}} !nosanitize + // CHECK-SANITIZE: [[HANDLER_POINTER_OVERFLOW]]: + // CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_pointer_overflow_abort(i8* bitcast ({ {{{.*}}} }* @[[LINE_400]] to i8*), i64 %[[BASE_RELOADED_INT]], i64 %[[COMPUTED_GEP]]) + // CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_pointer_overflow(i8* bitcast ({ {{{.*}}} }* @[[LINE_400]] to i8*), i64 %[[BASE_RELOADED_INT]], i64 %[[COMPUTED_GEP]]) + // CHECK-SANITIZE-TRAP-NEXT: call void @llvm.trap(){{.*}}, !nosanitize + // CHECK-SANITIZE-UNREACHABLE-NEXT: unreachable, !nosanitize + // CHECK-SANITIZE: [[CONT]]: + // CHECK-NEXT: ret i8* %[[ADD_PTR]] +#line 400 + return base - offset; +} + +char *postinc(char *base) { + // CHECK: define i8* @postinc(i8* %[[BASE:.*]]) + // CHECK-NEXT: [[ENTRY:.*]]: + // CHECK-NEXT: %[[BASE_ADDR:.*]] = alloca i8*, align 8 + // CHECK-NEXT: store i8* %[[BASE]], i8** %[[BASE_ADDR]], align 8 + // CHECK-NEXT: %[[BASE_RELOADED:.*]] = load i8*, i8** %[[BASE_ADDR]], align 8 + // CHECK-NEXT: %[[ADD_PTR:.*]] = getelementptr inbounds i8, i8* %[[BASE_RELOADED]], i32 1 + // CHECK-SANITIZE-NEXT: %[[BASE_RELOADED_INT:.*]] = ptrtoint i8* %[[BASE_RELOADED]] to i64, !nosanitize + // CHECK-SANITIZE-NEXT: %[[COMPUTED_GEP:.*]] = add i64 %[[BASE_RELOADED_INT]], 1, !nosanitize + // CHECK-SANITIZE-NEXT: %[[BASE_IS_NOT_NULLPTR:.*]] = icmp ne i8* %[[BASE_RELOADED]], null, !nosanitize + // CHECK-SANITIZE-NEXT: %[[COMPUTED_GEP_IS_NOT_NULL:.*]] = icmp ne i64 %[[COMPUTED_GEP]], 0, !nosanitize + // CHECK-SANITIZE-C-NEXT: %[[BOTH_POINTERS_ARE_NULL_OR_BOTH_ARE_NONNULL:.*]] = and i1 %[[BASE_IS_NOT_NULLPTR]], %[[COMPUTED_GEP_IS_NOT_NULL]], !nosanitize + // CHECK-SANITIZE-CPP-NEXT: %[[BOTH_POINTERS_ARE_NULL_OR_BOTH_ARE_NONNULL:.*]] = icmp eq i1 %[[BASE_IS_NOT_NULLPTR]], %[[COMPUTED_GEP_IS_NOT_NULL]], !nosanitize + // CHECK-SANITIZE-NEXT: %[[COMPUTED_GEP_IS_UGE_BASE:.*]] = icmp uge i64 %[[COMPUTED_GEP]], %[[BASE_RELOADED_INT]], !nosanitize + // CHECK-SANITIZE-NEXT: %[[GEP_IS_OKAY:.*]] = and i1 %[[BOTH_POINTERS_ARE_NULL_OR_BOTH_ARE_NONNULL]], %[[COMPUTED_GEP_IS_UGE_BASE]], !nosanitize + // CHECK-SANITIZE-NEXT: br i1 %[[GEP_IS_OKAY]], label %[[CONT:.*]], label %[[HANDLER_POINTER_OVERFLOW:[^,]+]],{{.*}} !nosanitize + // CHECK-SANITIZE: [[HANDLER_POINTER_OVERFLOW]]: + // CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_pointer_overflow_abort(i8* bitcast ({ {{{.*}}} }* @[[LINE_500]] to i8*), i64 %[[BASE_RELOADED_INT]], i64 %[[COMPUTED_GEP]]) + // CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_pointer_overflow(i8* bitcast ({ {{{.*}}} }* @[[LINE_500]] to i8*), i64 %[[BASE_RELOADED_INT]], i64 %[[COMPUTED_GEP]]) + // CHECK-SANITIZE-TRAP-NEXT: call void @llvm.trap(){{.*}}, !nosanitize + // CHECK-SANITIZE-UNREACHABLE-NEXT: unreachable, !nosanitize + // CHECK-SANITIZE: [[CONT]]: + // CHECK-NEXT: store i8* %[[ADD_PTR]], i8** %[[BASE_ADDR]], align 8 + // CHECK-NEXT: %[[ADD_PTR_RELOADED:.*]] = load i8*, i8** %[[BASE_ADDR]], align 8 + // CHECK-NEXT: ret i8* %[[ADD_PTR_RELOADED]] +#line 500 + base++; + return base; +} + +char *postdec(char *base) { + // CHECK: define i8* @postdec(i8* %[[BASE:.*]]) + // CHECK-NEXT: [[ENTRY:.*]]: + // CHECK-NEXT: %[[BASE_ADDR:.*]] = alloca i8*, align 8 + // CHECK-NEXT: store i8* %[[BASE]], i8** %[[BASE_ADDR]], align 8 + // CHECK-NEXT: %[[BASE_RELOADED:.*]] = load i8*, i8** %[[BASE_ADDR]], align 8 + // CHECK-NEXT: %[[ADD_PTR:.*]] = getelementptr inbounds i8, i8* %[[BASE_RELOADED]], i32 -1 + // CHECK-SANITIZE-NEXT: %[[BASE_RELOADED_INT:.*]] = ptrtoint i8* %[[BASE_RELOADED]] to i64, !nosanitize + // CHECK-SANITIZE-NEXT: %[[COMPUTED_GEP:.*]] = add i64 %[[BASE_RELOADED_INT]], -1, !nosanitize + // CHECK-SANITIZE-NEXT: %[[BASE_IS_NOT_NULLPTR:.*]] = icmp ne i8* %[[BASE_RELOADED]], null, !nosanitize + // CHECK-SANITIZE-NEXT: %[[COMPUTED_GEP_IS_NOT_NULL:.*]] = icmp ne i64 %[[COMPUTED_GEP]], 0, !nosanitize + // CHECK-SANITIZE-C-NEXT: %[[BOTH_POINTERS_ARE_NULL_OR_BOTH_ARE_NONNULL:.*]] = and i1 %[[BASE_IS_NOT_NULLPTR]], %[[COMPUTED_GEP_IS_NOT_NULL]], !nosanitize + // CHECK-SANITIZE-CPP-NEXT: %[[BOTH_POINTERS_ARE_NULL_OR_BOTH_ARE_NONNULL:.*]] = icmp eq i1 %[[BASE_IS_NOT_NULLPTR]], %[[COMPUTED_GEP_IS_NOT_NULL]], !nosanitize + // CHECK-SANITIZE-NEXT: %[[COMPUTED_GEP_IS_ULE_BASE:.*]] = icmp ule i64 %[[COMPUTED_GEP]], %[[BASE_RELOADED_INT]], !nosanitize + // CHECK-SANITIZE-NEXT: %[[GEP_IS_OKAY:.*]] = and i1 %[[BOTH_POINTERS_ARE_NULL_OR_BOTH_ARE_NONNULL]], %[[COMPUTED_GEP_IS_ULE_BASE]], !nosanitize + // CHECK-SANITIZE-NEXT: br i1 %[[GEP_IS_OKAY]], label %[[CONT:.*]], label %[[HANDLER_POINTER_OVERFLOW:[^,]+]],{{.*}} !nosanitize + // CHECK-SANITIZE: [[HANDLER_POINTER_OVERFLOW]]: + // CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_pointer_overflow_abort(i8* bitcast ({ {{{.*}}} }* @[[LINE_600]] to i8*), i64 %[[BASE_RELOADED_INT]], i64 %[[COMPUTED_GEP]]) + // CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_pointer_overflow(i8* bitcast ({ {{{.*}}} }* @[[LINE_600]] to i8*), i64 %[[BASE_RELOADED_INT]], i64 %[[COMPUTED_GEP]]) + // CHECK-SANITIZE-TRAP-NEXT: call void @llvm.trap(){{.*}}, !nosanitize + // CHECK-SANITIZE-UNREACHABLE-NEXT: unreachable, !nosanitize + // CHECK-SANITIZE: [[CONT]]: + // CHECK-NEXT: store i8* %[[ADD_PTR]], i8** %[[BASE_ADDR]], align 8 + // CHECK-NEXT: %[[ADD_PTR_RELOADED:.*]] = load i8*, i8** %[[BASE_ADDR]], align 8 + // CHECK-NEXT: ret i8* %[[ADD_PTR_RELOADED]] +#line 600 + base--; + return base; +} + +char *preinc(char *base) { + // CHECK: define i8* @preinc(i8* %[[BASE:.*]]) + // CHECK-NEXT: [[ENTRY:.*]]: + // CHECK-NEXT: %[[BASE_ADDR:.*]] = alloca i8*, align 8 + // CHECK-NEXT: store i8* %[[BASE]], i8** %[[BASE_ADDR]], align 8 + // CHECK-NEXT: %[[BASE_RELOADED:.*]] = load i8*, i8** %[[BASE_ADDR]], align 8 + // CHECK-NEXT: %[[ADD_PTR:.*]] = getelementptr inbounds i8, i8* %[[BASE_RELOADED]], i32 1 + // CHECK-SANITIZE-NEXT: %[[BASE_RELOADED_INT:.*]] = ptrtoint i8* %[[BASE_RELOADED]] to i64, !nosanitize + // CHECK-SANITIZE-NEXT: %[[COMPUTED_GEP:.*]] = add i64 %[[BASE_RELOADED_INT]], 1, !nosanitize + // CHECK-SANITIZE-NEXT: %[[BASE_IS_NOT_NULLPTR:.*]] = icmp ne i8* %[[BASE_RELOADED]], null, !nosanitize + // CHECK-SANITIZE-NEXT: %[[COMPUTED_GEP_IS_NOT_NULL:.*]] = icmp ne i64 %[[COMPUTED_GEP]], 0, !nosanitize + // CHECK-SANITIZE-C-NEXT: %[[BOTH_POINTERS_ARE_NULL_OR_BOTH_ARE_NONNULL:.*]] = and i1 %[[BASE_IS_NOT_NULLPTR]], %[[COMPUTED_GEP_IS_NOT_NULL]], !nosanitize + // CHECK-SANITIZE-CPP-NEXT: %[[BOTH_POINTERS_ARE_NULL_OR_BOTH_ARE_NONNULL:.*]] = icmp eq i1 %[[BASE_IS_NOT_NULLPTR]], %[[COMPUTED_GEP_IS_NOT_NULL]], !nosanitize + // CHECK-SANITIZE-NEXT: %[[COMPUTED_GEP_IS_UGE_BASE:.*]] = icmp uge i64 %[[COMPUTED_GEP]], %[[BASE_RELOADED_INT]], !nosanitize + // CHECK-SANITIZE-NEXT: %[[GEP_IS_OKAY:.*]] = and i1 %[[BOTH_POINTERS_ARE_NULL_OR_BOTH_ARE_NONNULL]], %[[COMPUTED_GEP_IS_UGE_BASE]], !nosanitize + // CHECK-SANITIZE-NEXT: br i1 %[[GEP_IS_OKAY]], label %[[CONT:.*]], label %[[HANDLER_POINTER_OVERFLOW:[^,]+]],{{.*}} !nosanitize + // CHECK-SANITIZE: [[HANDLER_POINTER_OVERFLOW]]: + // CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_pointer_overflow_abort(i8* bitcast ({ {{{.*}}} }* @[[LINE_700]] to i8*), i64 %[[BASE_RELOADED_INT]], i64 %[[COMPUTED_GEP]]) + // CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_pointer_overflow(i8* bitcast ({ {{{.*}}} }* @[[LINE_700]] to i8*), i64 %[[BASE_RELOADED_INT]], i64 %[[COMPUTED_GEP]]) + // CHECK-SANITIZE-TRAP-NEXT: call void @llvm.trap(){{.*}}, !nosanitize + // CHECK-SANITIZE-UNREACHABLE-NEXT: unreachable, !nosanitize + // CHECK-SANITIZE: [[CONT]]: + // CHECK-NEXT: store i8* %[[ADD_PTR]], i8** %[[BASE_ADDR]], align 8 + // CHECK-NEXT: %[[ADD_PTR_RELOADED:.*]] = load i8*, i8** %[[BASE_ADDR]], align 8 + // CHECK-NEXT: ret i8* %[[ADD_PTR_RELOADED]] +#line 700 + ++base; + return base; +} + +char *predec(char *base) { + // CHECK: define i8* @predec(i8* %[[BASE:.*]]) + // CHECK-NEXT: [[ENTRY:.*]]: + // CHECK-NEXT: %[[BASE_ADDR:.*]] = alloca i8*, align 8 + // CHECK-NEXT: store i8* %[[BASE]], i8** %[[BASE_ADDR]], align 8 + // CHECK-NEXT: %[[BASE_RELOADED:.*]] = load i8*, i8** %[[BASE_ADDR]], align 8 + // CHECK-NEXT: %[[ADD_PTR:.*]] = getelementptr inbounds i8, i8* %[[BASE_RELOADED]], i32 -1 + // CHECK-SANITIZE-NEXT: %[[BASE_RELOADED_INT:.*]] = ptrtoint i8* %[[BASE_RELOADED]] to i64, !nosanitize + // CHECK-SANITIZE-NEXT: %[[COMPUTED_GEP:.*]] = add i64 %[[BASE_RELOADED_INT]], -1, !nosanitize + // CHECK-SANITIZE-NEXT: %[[BASE_IS_NOT_NULLPTR:.*]] = icmp ne i8* %[[BASE_RELOADED]], null, !nosanitize + // CHECK-SANITIZE-NEXT: %[[COMPUTED_GEP_IS_NOT_NULL:.*]] = icmp ne i64 %[[COMPUTED_GEP]], 0, !nosanitize + // CHECK-SANITIZE-C-NEXT: %[[BOTH_POINTERS_ARE_NULL_OR_BOTH_ARE_NONNULL:.*]] = and i1 %[[BASE_IS_NOT_NULLPTR]], %[[COMPUTED_GEP_IS_NOT_NULL]], !nosanitize + // CHECK-SANITIZE-CPP-NEXT: %[[BOTH_POINTERS_ARE_NULL_OR_BOTH_ARE_NONNULL:.*]] = icmp eq i1 %[[BASE_IS_NOT_NULLPTR]], %[[COMPUTED_GEP_IS_NOT_NULL]], !nosanitize + // CHECK-SANITIZE-NEXT: %[[COMPUTED_GEP_IS_ULE_BASE:.*]] = icmp ule i64 %[[COMPUTED_GEP]], %[[BASE_RELOADED_INT]], !nosanitize + // CHECK-SANITIZE-NEXT: %[[GEP_IS_OKAY:.*]] = and i1 %[[BOTH_POINTERS_ARE_NULL_OR_BOTH_ARE_NONNULL]], %[[COMPUTED_GEP_IS_ULE_BASE]], !nosanitize + // CHECK-SANITIZE-NEXT: br i1 %[[GEP_IS_OKAY]], label %[[CONT:.*]], label %[[HANDLER_POINTER_OVERFLOW:[^,]+]],{{.*}} !nosanitize + // CHECK-SANITIZE: [[HANDLER_POINTER_OVERFLOW]]: + // CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_pointer_overflow_abort(i8* bitcast ({ {{{.*}}} }* @[[LINE_800]] to i8*), i64 %[[BASE_RELOADED_INT]], i64 %[[COMPUTED_GEP]]) + // CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_pointer_overflow(i8* bitcast ({ {{{.*}}} }* @[[LINE_800]] to i8*), i64 %[[BASE_RELOADED_INT]], i64 %[[COMPUTED_GEP]]) + // CHECK-SANITIZE-TRAP-NEXT: call void @llvm.trap(){{.*}}, !nosanitize + // CHECK-SANITIZE-UNREACHABLE-NEXT: unreachable, !nosanitize + // CHECK-SANITIZE: [[CONT]]: + // CHECK-NEXT: store i8* %[[ADD_PTR]], i8** %[[BASE_ADDR]], align 8 + // CHECK-NEXT: %[[ADD_PTR_RELOADED:.*]] = load i8*, i8** %[[BASE_ADDR]], align 8 + // CHECK-NEXT: ret i8* %[[ADD_PTR_RELOADED]] +#line 800 + --base; + return base; +} + +#ifdef __cplusplus +} +#endif diff --git a/clang/test/CodeGen/debug-prefix-map.c b/clang/test/CodeGen/debug-prefix-map.c index f755ba47a2913..d6032a658c2e2 100644 --- a/clang/test/CodeGen/debug-prefix-map.c +++ b/clang/test/CodeGen/debug-prefix-map.c @@ -16,7 +16,7 @@ void test_rewrite_includes() { vprintf("string", argp); } -// CHECK-NO-MAIN-FILE-NAME: !DIFile(filename: "/UNLIKELY_PATH/empty{{/|\\5C}}" +// CHECK-NO-MAIN-FILE-NAME: !DIFile(filename: "/UNLIKELY_PATH/empty{{/|\\\\}}" // CHECK-NO-MAIN-FILE-NAME: !DIFile(filename: "/UNLIKELY_PATH/empty{{[/\\]}}{{.*}}", // On POSIX systems "Dir" should actually be empty, but on Windows we // can't recognize "/UNLIKELY_PATH" as being an absolute path. diff --git a/clang/test/CodeGen/sanitizer-module-constructor.c b/clang/test/CodeGen/sanitizer-module-constructor.c new file mode 100644 index 0000000000000..70d75e9aa7b4f --- /dev/null +++ b/clang/test/CodeGen/sanitizer-module-constructor.c @@ -0,0 +1,22 @@ +// RUN: %clang_cc1 -triple x86_64-linux-gnu -fsanitize=address -emit-llvm -O3 -fdebug-pass-manager -fexperimental-new-pass-manager -o - %s 2>&1 | FileCheck %s +// RUN: %clang_cc1 -triple x86_64-linux-gnu -fsanitize=thread -emit-llvm -O3 -fdebug-pass-manager -fexperimental-new-pass-manager -o - %s 2>&1 | FileCheck %s +// RUN: %clang_cc1 -triple x86_64-linux-gnu -fsanitize=memory -emit-llvm -O3 -fdebug-pass-manager -fexperimental-new-pass-manager -o - %s 2>&1 | FileCheck %s + +// This is regression test for PR42877 + +typedef struct a *b; +struct a { + int c; +}; +int d; +b e; +static void f(b g) { + for (d = g->c;;) + ; +} +void h() { f(e); } + +// CHECK: Running pass: {{.*}}SanitizerPass on {{.*}}sanitizer-module-constructor.c +// CHECK-NOT: Running pass: LoopSimplifyPass on {{.*}}san.module_ctor +// CHECK: Running analysis: DominatorTreeAnalysis on {{.*}}san.module_ctor +// CHECK: Running pass: LoopSimplifyPass on {{.*}}san.module_ctor diff --git a/clang/test/CodeGen/string-literal.c b/clang/test/CodeGen/string-literal.c index c46018743620b..6634872821045 100644 --- a/clang/test/CodeGen/string-literal.c +++ b/clang/test/CodeGen/string-literal.c @@ -85,7 +85,7 @@ int main() { // CHECK-CXX11: private unnamed_addr constant [3 x i32] [i32 75, i32 76, i32 0], align 4 const wchar_t *l = LR"bar(KL)bar"; - // CHECK-CXX11: private unnamed_addr constant [9 x i8] c"abc\5Cndef\00", align 1 + // CHECK-CXX11: private unnamed_addr constant [9 x i8] c"abc\\ndef\00", align 1 const char *m = R"(abc\ndef)"; // CHECK-CXX11: private unnamed_addr constant [8 x i8] c"abc\0Adef\00", align 1 @@ -96,13 +96,13 @@ def)"; const char *q = R"(abc def)" "ghi"; - // CHECK-CXX11: private unnamed_addr constant [13 x i8] c"abc\5C\0A??=\0Adef\00", align 1 + // CHECK-CXX11: private unnamed_addr constant [13 x i8] c"abc\\\0A??=\0Adef\00", align 1 const char *r = R\ "(abc\ ??= def)"; - // CHECK-CXX11: private unnamed_addr constant [13 x i8] c"def\5C\0A??=\0Aabc\00", align 1 + // CHECK-CXX11: private unnamed_addr constant [13 x i8] c"def\\\0A??=\0Aabc\00", align 1 const char *s = u8R\ "(def\ ??= diff --git a/clang/test/CodeGen/ubsan-pointer-overflow.c b/clang/test/CodeGen/ubsan-pointer-overflow.c new file mode 100644 index 0000000000000..7934442ad9394 --- /dev/null +++ b/clang/test/CodeGen/ubsan-pointer-overflow.c @@ -0,0 +1,123 @@ +// RUN: %clang_cc1 -x c -triple x86_64-apple-darwin10 -w -emit-llvm -o - %s -fsanitize=pointer-overflow | FileCheck %s --check-prefixes=CHECK,CHECK-C +// RUN: %clang_cc1 -x c++ -triple x86_64-apple-darwin10 -w -emit-llvm -o - %s -fsanitize=pointer-overflow | FileCheck %s --check-prefixes=CHECK,CHECK-CPP + +#ifdef __cplusplus +extern "C" { +#endif + +// CHECK-LABEL: define void @fixed_len_array +void fixed_len_array(int k) { + // CHECK: getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[ARR:%.*]], i64 0, i64 [[IDXPROM:%.*]] + // CHECK-NEXT: [[SMUL:%.*]] = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 40, i64 [[IDXPROM]]), !nosanitize + // CHECK-NEXT: [[SMULOFLOW:%.*]] = extractvalue { i64, i1 } [[SMUL]], 1, !nosanitize + // CHECK-NEXT: [[SMULVAL:%.*]] = extractvalue { i64, i1 } [[SMUL]], 0, !nosanitize + // CHECK-NEXT: [[BASE:%.*]] = ptrtoint [10 x [10 x i32]]* [[ARR]] to i64, !nosanitize + // CHECK-NEXT: [[COMPGEP:%.*]] = add i64 [[BASE]], [[SMULVAL]], !nosanitize + // CHECK: call void @__ubsan_handle_pointer_overflow{{.*}}, i64 [[BASE]], i64 [[COMPGEP]]){{.*}}, !nosanitize + + // CHECK: getelementptr inbounds [10 x i32], [10 x i32]* {{.*}}, i64 0, i64 [[IDXPROM1:%.*]] + // CHECK-NEXT: @llvm.smul.with.overflow.i64(i64 4, i64 [[IDXPROM1]]), !nosanitize + // CHECK: call void @__ubsan_handle_pointer_overflow{{.*}} + + int arr[10][10]; + arr[k][k]; +} + +// CHECK-LABEL: define void @variable_len_array +void variable_len_array(int n, int k) { + // CHECK: getelementptr inbounds i32, i32* {{.*}}, i64 [[IDXPROM:%.*]] + // CHECK-NEXT: @llvm.smul.with.overflow.i64(i64 4, i64 [[IDXPROM]]), !nosanitize + // CHECK: call void @__ubsan_handle_pointer_overflow{{.*}} + + // CHECK: getelementptr inbounds i32, i32* {{.*}}, i64 [[IDXPROM1:%.*]] + // CHECK-NEXT: @llvm.smul.with.overflow.i64(i64 4, i64 [[IDXPROM1]]), !nosanitize + // CHECK: call void @__ubsan_handle_pointer_overflow{{.*}} + + int arr[n][n]; + arr[k][k]; +} + +// CHECK-LABEL: define void @pointer_array +void pointer_array(int **arr, int k) { + // CHECK: @llvm.smul.with.overflow.i64(i64 8, i64 {{.*}}), !nosanitize + // CHECK: call void @__ubsan_handle_pointer_overflow{{.*}} + + // CHECK: @llvm.smul.with.overflow.i64(i64 4, i64 {{.*}}), !nosanitize + // CHECK: call void @__ubsan_handle_pointer_overflow{{.*}} + + arr[k][k]; +} + +// CHECK-LABEL: define void @pointer_array_unsigned_indices +void pointer_array_unsigned_indices(int **arr, unsigned k) { + // CHECK: icmp uge + // CHECK-NOT: select + // CHECK: call void @__ubsan_handle_pointer_overflow{{.*}} + // CHECK: icmp uge + // CHECK-NOT: select + // CHECK: call void @__ubsan_handle_pointer_overflow{{.*}} + arr[k][k]; +} + +// CHECK-LABEL: define void @pointer_array_mixed_indices +void pointer_array_mixed_indices(int **arr, int i, unsigned j) { + // CHECK: select + // CHECK: call void @__ubsan_handle_pointer_overflow{{.*}} + // CHECK-NOT: select + // CHECK: call void @__ubsan_handle_pointer_overflow{{.*}} + arr[i][j]; +} + +struct S1 { + int pad1; + union { + char leaf; + struct S1 *link; + } u; + struct S1 *arr; +}; + +// TODO: Currently, structure GEPs are not checked, so there are several +// potentially unsafe GEPs here which we don't instrument. +// +// CHECK-LABEL: define void @struct_index +void struct_index(struct S1 *p) { + // CHECK: getelementptr inbounds %struct.S1, %struct.S1* [[P:%.*]], i64 10 + // CHECK-NEXT: [[BASE:%.*]] = ptrtoint %struct.S1* [[P]] to i64, !nosanitize + // CHECK-NEXT: [[COMPGEP:%.*]] = add i64 [[BASE]], 240, !nosanitize + // CHECK: select + // CHECK: @__ubsan_handle_pointer_overflow{{.*}} i64 [[BASE]], i64 [[COMPGEP]]) {{.*}}, !nosanitize + + // CHECK-NOT: @__ubsan_handle_pointer_overflow + + p->arr[10].u.link->u.leaf; +} + +typedef void (*funcptr_t)(void); + +// CHECK-LABEL: define void @function_pointer_arith +void function_pointer_arith(funcptr_t *p, int k) { + // CHECK: add i64 {{.*}}, 8, !nosanitize + // CHECK-NOT: select + // CHECK: @__ubsan_handle_pointer_overflow{{.*}} + ++p; + + // CHECK: @llvm.smul.with.overflow.i64(i64 8, i64 {{.*}}), !nosanitize + // CHECK: select + // CHECK: call void @__ubsan_handle_pointer_overflow{{.*}} + p + k; +} + +// CHECK-LABEL: define void @dont_emit_checks_for_no_op_GEPs +// CHECK-C: __ubsan_handle_pointer_overflow +// CHECK-CPP-NOT: __ubsan_handle_pointer_overflow +void dont_emit_checks_for_no_op_GEPs(char *p) { + &p[0]; + + int arr[10][10]; + &arr[0][0]; +} + +#ifdef __cplusplus +} +#endif diff --git a/clang/test/CodeGen/ubsan-pointer-overflow.m b/clang/test/CodeGen/ubsan-pointer-overflow.m index 977e2458384b5..d48e22579a029 100644 --- a/clang/test/CodeGen/ubsan-pointer-overflow.m +++ b/clang/test/CodeGen/ubsan-pointer-overflow.m @@ -1,189 +1,5 @@ // RUN: %clang_cc1 -triple x86_64-apple-darwin10 -w -emit-llvm -o - %s -fsanitize=pointer-overflow | FileCheck %s -// CHECK-LABEL: define void @unary_arith -void unary_arith(char *p) { - // CHECK: [[BASE:%.*]] = ptrtoint i8* {{.*}} to i64, !nosanitize - // CHECK-NEXT: [[COMPGEP:%.*]] = add i64 [[BASE]], 1, !nosanitize - // CHECK-NEXT: [[POSVALID:%.*]] = icmp uge i64 [[COMPGEP]], [[BASE]], !nosanitize - // CHECK-NEXT: br i1 [[POSVALID]]{{.*}}, !nosanitize - // CHECK: call void @__ubsan_handle_pointer_overflow{{.*}}, i64 [[BASE]], i64 [[COMPGEP]]){{.*}}, !nosanitize - ++p; - - // CHECK: ptrtoint i8* {{.*}} to i64, !nosanitize - // CHECK-NEXT: [[COMPGEP:%.*]] = add i64 {{.*}}, -1, !nosanitize - // CHECK: [[NEGVALID:%.*]] = icmp ule i64 [[COMPGEP]], {{.*}}, !nosanitize - // CHECK-NOT: select - // CHECK: br i1 [[NEGVALID]]{{.*}}, !nosanitize - // CHECK: call void @__ubsan_handle_pointer_overflow{{.*}} - --p; - - // CHECK: icmp uge i64 - // CHECK-NOT: select - // CHECK: call void @__ubsan_handle_pointer_overflow{{.*}} - p++; - - // CHECK: icmp ule i64 - // CHECK-NOT: select - // CHECK: call void @__ubsan_handle_pointer_overflow{{.*}} - p--; -} - -// CHECK-LABEL: define void @binary_arith -void binary_arith(char *p, int i) { - // CHECK: [[SMUL:%.*]] = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 1, i64 %{{.*}}), !nosanitize - // CHECK-NEXT: [[SMULOFLOW:%.*]] = extractvalue { i64, i1 } [[SMUL]], 1, !nosanitize - // CHECK-NEXT: [[SMULVAL:%.*]] = extractvalue { i64, i1 } [[SMUL]], 0, !nosanitize - // CHECK-NEXT: [[BASE:%.*]] = ptrtoint i8* {{.*}} to i64, !nosanitize - // CHECK-NEXT: [[COMPGEP:%.*]] = add i64 [[BASE]], [[SMULVAL]], !nosanitize - // CHECK-NEXT: [[OFFSETVALID:%.*]] = xor i1 [[SMULOFLOW]], true, !nosanitize - // CHECK-NEXT: [[POSVALID:%.*]] = icmp uge i64 [[COMPGEP]], [[BASE]], !nosanitize - // CHECK-NEXT: [[POSOFFSET:%.*]] = icmp sge i64 [[SMULVAL]], 0, !nosanitize - // CHECK-NEXT: [[NEGVALID:%.*]] = icmp ult i64 [[COMPGEP]], [[BASE]], !nosanitize - // CHECK-NEXT: [[DIFFVALID:%.*]] = select i1 [[POSOFFSET]], i1 [[POSVALID]], i1 [[NEGVALID]], !nosanitize - // CHECK: [[VALID:%.*]] = and i1 [[DIFFVALID]], [[OFFSETVALID]], !nosanitize - // CHECK-NEXT: br i1 [[VALID]]{{.*}}, !nosanitize - // CHECK: call void @__ubsan_handle_pointer_overflow{{.*}}, i64 [[BASE]], i64 [[COMPGEP]]){{.*}}, !nosanitize - p + i; - - // CHECK: [[OFFSET:%.*]] = sub i64 0, {{.*}} - // CHECK-NEXT: getelementptr inbounds {{.*}} [[OFFSET]] - // CHECK: select - // CHECK: call void @__ubsan_handle_pointer_overflow{{.*}} - p - i; -} - -// CHECK-LABEL: define void @binary_arith_unsigned -void binary_arith_unsigned(char *p, unsigned i) { - // CHECK: [[SMUL:%.*]] = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 1, i64 %{{.*}}), !nosanitize - // CHECK-NEXT: [[SMULOFLOW:%.*]] = extractvalue { i64, i1 } [[SMUL]], 1, !nosanitize - // CHECK-NEXT: [[SMULVAL:%.*]] = extractvalue { i64, i1 } [[SMUL]], 0, !nosanitize - // CHECK-NEXT: [[BASE:%.*]] = ptrtoint i8* {{.*}} to i64, !nosanitize - // CHECK-NEXT: [[COMPGEP:%.*]] = add i64 [[BASE]], [[SMULVAL]], !nosanitize - // CHECK-NEXT: [[OFFSETVALID:%.*]] = xor i1 [[SMULOFLOW]], true, !nosanitize - // CHECK-NEXT: [[POSVALID:%.*]] = icmp uge i64 [[COMPGEP]], [[BASE]], !nosanitize - // CHECK: [[VALID:%.*]] = and i1 [[POSVALID]], [[OFFSETVALID]], !nosanitize - // CHECK-NEXT: br i1 [[VALID]]{{.*}}, !nosanitize - // CHECK: call void @__ubsan_handle_pointer_overflow{{.*}}, i64 [[BASE]], i64 [[COMPGEP]]){{.*}}, !nosanitize - p + i; - - // CHECK: [[OFFSET:%.*]] = sub i64 0, {{.*}} - // CHECK-NEXT: getelementptr inbounds {{.*}} [[OFFSET]] - // CHECK: icmp ule i64 - // CHECK-NOT: select - // CHECK: call void @__ubsan_handle_pointer_overflow{{.*}} - p - i; -} - -// CHECK-LABEL: define void @fixed_len_array -void fixed_len_array(int k) { - // CHECK: getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[ARR:%.*]], i64 0, i64 [[IDXPROM:%.*]] - // CHECK-NEXT: [[SMUL:%.*]] = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 40, i64 [[IDXPROM]]), !nosanitize - // CHECK-NEXT: [[SMULOFLOW:%.*]] = extractvalue { i64, i1 } [[SMUL]], 1, !nosanitize - // CHECK-NEXT: [[SMULVAL:%.*]] = extractvalue { i64, i1 } [[SMUL]], 0, !nosanitize - // CHECK-NEXT: [[BASE:%.*]] = ptrtoint [10 x [10 x i32]]* [[ARR]] to i64, !nosanitize - // CHECK-NEXT: [[COMPGEP:%.*]] = add i64 [[BASE]], [[SMULVAL]], !nosanitize - // CHECK-NEXT: [[OFFSETVALID:%.*]] = xor i1 [[SMULOFLOW]], true, !nosanitize - // CHECK-NEXT: [[POSVALID:%.*]] = icmp uge i64 [[COMPGEP]], [[BASE]], !nosanitize - // CHECK-NEXT: [[POSOFFSET:%.*]] = icmp sge i64 [[SMULVAL]], 0, !nosanitize - // CHECK-NEXT: [[NEGVALID:%.*]] = icmp ult i64 [[COMPGEP]], [[BASE]], !nosanitize - // CHECK-NEXT: [[DIFFVALID:%.*]] = select i1 [[POSOFFSET]], i1 [[POSVALID]], i1 [[NEGVALID]], !nosanitize - // CHECK: [[VALID:%.*]] = and i1 [[DIFFVALID]], [[OFFSETVALID]], !nosanitize - // CHECK-NEXT: br i1 [[VALID]]{{.*}}, !nosanitize - // CHECK: call void @__ubsan_handle_pointer_overflow{{.*}}, i64 [[BASE]], i64 [[COMPGEP]]){{.*}}, !nosanitize - - // CHECK: getelementptr inbounds [10 x i32], [10 x i32]* {{.*}}, i64 0, i64 [[IDXPROM1:%.*]] - // CHECK-NEXT: @llvm.smul.with.overflow.i64(i64 4, i64 [[IDXPROM1]]), !nosanitize - // CHECK: call void @__ubsan_handle_pointer_overflow{{.*}} - - int arr[10][10]; - arr[k][k]; -} - -// CHECK-LABEL: define void @variable_len_array -void variable_len_array(int n, int k) { - // CHECK: getelementptr inbounds i32, i32* {{.*}}, i64 [[IDXPROM:%.*]] - // CHECK-NEXT: @llvm.smul.with.overflow.i64(i64 4, i64 [[IDXPROM]]), !nosanitize - // CHECK: call void @__ubsan_handle_pointer_overflow{{.*}} - - // CHECK: getelementptr inbounds i32, i32* {{.*}}, i64 [[IDXPROM1:%.*]] - // CHECK-NEXT: @llvm.smul.with.overflow.i64(i64 4, i64 [[IDXPROM1]]), !nosanitize - // CHECK: call void @__ubsan_handle_pointer_overflow{{.*}} - - int arr[n][n]; - arr[k][k]; -} - -// CHECK-LABEL: define void @pointer_array -void pointer_array(int **arr, int k) { - // CHECK: @llvm.smul.with.overflow.i64(i64 8, i64 {{.*}}), !nosanitize - // CHECK: call void @__ubsan_handle_pointer_overflow{{.*}} - - // CHECK: @llvm.smul.with.overflow.i64(i64 4, i64 {{.*}}), !nosanitize - // CHECK: call void @__ubsan_handle_pointer_overflow{{.*}} - - arr[k][k]; -} - -// CHECK-LABEL: define void @pointer_array_unsigned_indices -void pointer_array_unsigned_indices(int **arr, unsigned k) { - // CHECK: icmp uge - // CHECK-NOT: select - // CHECK: call void @__ubsan_handle_pointer_overflow{{.*}} - // CHECK: icmp uge - // CHECK-NOT: select - // CHECK: call void @__ubsan_handle_pointer_overflow{{.*}} - arr[k][k]; -} - -// CHECK-LABEL: define void @pointer_array_mixed_indices -void pointer_array_mixed_indices(int **arr, int i, unsigned j) { - // CHECK: select - // CHECK: call void @__ubsan_handle_pointer_overflow{{.*}} - // CHECK-NOT: select - // CHECK: call void @__ubsan_handle_pointer_overflow{{.*}} - arr[i][j]; -} - -struct S1 { - int pad1; - union { - char leaf; - struct S1 *link; - } u; - struct S1 *arr; -}; - -// TODO: Currently, structure GEPs are not checked, so there are several -// potentially unsafe GEPs here which we don't instrument. -// -// CHECK-LABEL: define void @struct_index -void struct_index(struct S1 *p) { - // CHECK: getelementptr inbounds %struct.S1, %struct.S1* [[P:%.*]], i64 10 - // CHECK-NEXT: [[BASE:%.*]] = ptrtoint %struct.S1* [[P]] to i64, !nosanitize - // CHECK-NEXT: [[COMPGEP:%.*]] = add i64 [[BASE]], 240, !nosanitize - // CHECK: select - // CHECK: @__ubsan_handle_pointer_overflow{{.*}} i64 [[BASE]], i64 [[COMPGEP]]) {{.*}}, !nosanitize - - // CHECK-NOT: @__ubsan_handle_pointer_overflow - - p->arr[10].u.link->u.leaf; -} - -typedef void (*funcptr_t)(void); - -// CHECK-LABEL: define void @function_pointer_arith -void function_pointer_arith(funcptr_t *p, int k) { - // CHECK: add i64 {{.*}}, 8, !nosanitize - // CHECK-NOT: select - // CHECK: @__ubsan_handle_pointer_overflow{{.*}} - ++p; - - // CHECK: @llvm.smul.with.overflow.i64(i64 8, i64 {{.*}}), !nosanitize - // CHECK: select - // CHECK: call void @__ubsan_handle_pointer_overflow{{.*}} - p + k; -} - // CHECK-LABEL: define void @variable_len_array_arith void variable_len_array_arith(int n, int k) { int vla[n]; @@ -209,12 +25,3 @@ void objc_id(id *p) { // CHECK: @__ubsan_handle_pointer_overflow{{.*}} p++; } - -// CHECK-LABEL: define void @dont_emit_checks_for_no_op_GEPs -// CHECK-NOT: __ubsan_handle_pointer_overflow -void dont_emit_checks_for_no_op_GEPs(char *p) { - &p[0]; - - int arr[10][10]; - &arr[0][0]; -} diff --git a/clang/test/CodeGen/ubsan-strip-path-components.cpp b/clang/test/CodeGen/ubsan-strip-path-components.cpp index 7a95324d128be..7ab000785e1f8 100644 --- a/clang/test/CodeGen/ubsan-strip-path-components.cpp +++ b/clang/test/CodeGen/ubsan-strip-path-components.cpp @@ -10,13 +10,13 @@ // RUN: %clang_cc1 %s -triple=x86_64-linux-gnu -emit-llvm -fsanitize=unreachable -o - -fsanitize-undefined-strip-path-components=-2 | FileCheck %s -check-prefix=LAST-TWO // RUN: %clang_cc1 %s -triple=x86_64-linux-gnu -emit-llvm -fsanitize=unreachable -o - -fsanitize-undefined-strip-path-components=-1 | FileCheck %s -check-prefix=LAST-ONLY -// REGULAR: @[[SRC:[0-9.a-zA-Z_]+]] = private unnamed_addr constant [{{.*}} x i8] c"{{.*test(.|\\5C)CodeGen(.|\\5C)ubsan-strip-path-components\.cpp}}\00", align 1 +// REGULAR: @[[SRC:[0-9.a-zA-Z_]+]] = private unnamed_addr constant [{{.*}} x i8] c"{{.*test(.|\\\\)CodeGen(.|\\\\)ubsan-strip-path-components\.cpp}}\00", align 1 -// First path component: "/" or "$drive_letter:", then a name, or '\5C' on Windows -// REMOVE-FIRST-TWO: @[[STR:[0-9.a-zA-Z_]+]] = private unnamed_addr constant [{{.*}} x i8] c"{{(.:|/)([^\\/]*(/|\\5C))}}[[REST:.*ubsan-strip-path-components\.cpp]]\00", align 1 +// First path component: "/" or "$drive_letter:", then a name, or '\\' on Windows +// REMOVE-FIRST-TWO: @[[STR:[0-9.a-zA-Z_]+]] = private unnamed_addr constant [{{.*}} x i8] c"{{(.:|/)([^\\/]*(/|\\\\))}}[[REST:.*ubsan-strip-path-components\.cpp]]\00", align 1 // REMOVE-FIRST-TWO: @[[SRC:[0-9.a-zA-Z_]+]] = private unnamed_addr constant [{{.*}} x i8] c"[[REST]]\00", align 1 -// LAST-TWO: @[[SRC:[0-9.a-zA-Z_]+]] = private unnamed_addr constant [{{.*}} x i8] c"CodeGen{{/|\\5C}}ubsan-strip-path-components.cpp\00", align 1 +// LAST-TWO: @[[SRC:[0-9.a-zA-Z_]+]] = private unnamed_addr constant [{{.*}} x i8] c"CodeGen{{/|\\\\}}ubsan-strip-path-components.cpp\00", align 1 // LAST-ONLY: @[[SRC:[0-9.a-zA-Z_]+]] = private unnamed_addr constant [{{.*}} x i8] c"ubsan-strip-path-components.cpp\00", align 1 // CHECK: @[[STATIC_DATA:[0-9.a-zA-Z_]+]] = private unnamed_addr global { { [{{.*}} x i8]*, i32, i32 } } { { [{{.*}} x i8]*, i32, i32 } { [{{.*}} x i8]* @[[SRC]], i32 [[@LINE+6]], i32 3 } } diff --git a/clang/test/CodeGenCUDA/openmp-target.cu b/clang/test/CodeGenCUDA/openmp-target.cu new file mode 100644 index 0000000000000..869bde6072934 --- /dev/null +++ b/clang/test/CodeGenCUDA/openmp-target.cu @@ -0,0 +1,20 @@ +// REQUIRES: x86-registered-target +// REQUIRES: nvptx-registered-target + +// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm \ +// RUN: -fopenmp -fopenmp-version=50 -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm \ +// RUN: -fopenmp -fopenmp-version=50 -o - -x c++ %s | FileCheck %s +// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fcuda-is-device \ +// RUN: -emit-llvm -o - %s | FileCheck -check-prefixes=DEV %s + +// CHECK: declare{{.*}}@_Z7nohost1v() +// DEV-NOT: _Z7nohost1v +void nohost1() {} +#pragma omp declare target to(nohost1) device_type(nohost) + +// CHECK: declare{{.*}}@_Z7nohost2v() +// DEV-NOT: _Z7nohost2v +void nohost2() {nohost1();} +#pragma omp declare target to(nohost2) device_type(nohost) + diff --git a/clang/test/CodeGenCXX/catch-nullptr-and-nonzero-offset-in-offsetof-idiom.cpp b/clang/test/CodeGenCXX/catch-nullptr-and-nonzero-offset-in-offsetof-idiom.cpp new file mode 100644 index 0000000000000..9dc8b90b5dcb7 --- /dev/null +++ b/clang/test/CodeGenCXX/catch-nullptr-and-nonzero-offset-in-offsetof-idiom.cpp @@ -0,0 +1,24 @@ +// RUN: %clang_cc1 -x c++ -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s +// RUN: %clang_cc1 -x c++ -fsanitize=pointer-overflow -fno-sanitize-recover=pointer-overflow -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s + +#include + +struct S { + int x, y; +}; + +// CHECK-LABEL: define i64 @{{.*}}get_offset_of_y_naively{{.*}}( +uintptr_t get_offset_of_y_naively() { + // CHECK: [[ENTRY:.*]]: + // CHECK-NEXT: ret i64 ptrtoint (i32* getelementptr (i32, i32* null, i32 1) to i64) + // CHECK-NEXT: } + return ((uintptr_t)(&(((S *)nullptr)->y))); +} + +// CHECK-LABEL: define i64 @{{.*}}get_offset_of_y_via_builtin{{.*}}( +uintptr_t get_offset_of_y_via_builtin() { + // CHECK: [[ENTRY:.*]]: + // CHECK-NEXT: ret i64 4 + // CHECK-NEXT: } + return __builtin_offsetof(S, y); +} diff --git a/clang/test/CodeGenCXX/pragma-loop-predicate.cpp b/clang/test/CodeGenCXX/pragma-loop-predicate.cpp index ec2161d1772ec..33e4cf5df4f47 100644 --- a/clang/test/CodeGenCXX/pragma-loop-predicate.cpp +++ b/clang/test/CodeGenCXX/pragma-loop-predicate.cpp @@ -58,7 +58,6 @@ void test5(int *List, int Length) { List[i] = i * 2; } - // CHECK: ![[LOOP0]] = distinct !{![[LOOP0]], !3} // CHECK-NEXT: !3 = !{!"llvm.loop.vectorize.enable", i1 true} @@ -70,7 +69,7 @@ void test5(int *List, int Length) { // CHECK-NEXT: ![[LOOP3]] = distinct !{![[LOOP3]], !5, !3} -// CHECK-NEXT: ![[LOOP4]] = distinct !{![[LOOP4]], !10} +// CHECK-NEXT: ![[LOOP4]] = distinct !{![[LOOP4]], !3, !10} // CHECK-NEXT: !10 = !{!"llvm.loop.vectorize.width", i32 1} -// CHECK-NEXT: ![[LOOP5]] = distinct !{![[LOOP5]], !10} +// CHECK-NEXT: ![[LOOP5]] = distinct !{![[LOOP5]], !3, !10} diff --git a/clang/test/CodeGenCXX/pragma-loop.cpp b/clang/test/CodeGenCXX/pragma-loop.cpp index 32075f965cd78..6b44dff4030e7 100644 --- a/clang/test/CodeGenCXX/pragma-loop.cpp +++ b/clang/test/CodeGenCXX/pragma-loop.cpp @@ -158,23 +158,41 @@ void template_test(double *List, int Length) { for_template_constant_expression_test(List, Length); } +void vec_width_1(int *List, int Length) { +// CHECK-LABEL: @{{.*}}vec_width_1{{.*}}( +// CHECK: br label {{.*}}, !llvm.loop ![[LOOP_15:.*]] + + #pragma clang loop vectorize(enable) vectorize_width(1) + for (int i = 0; i < Length; i++) + List[i] = i * 2; +} + +void width_1(int *List, int Length) { +// CHECK-LABEL: @{{.*}}width_1{{.*}}( +// CHECK: br label {{.*}}, !llvm.loop ![[LOOP_16:.*]] + + #pragma clang loop vectorize_width(1) + for (int i = 0; i < Length; i++) + List[i] = i * 2; +} + // CHECK: ![[LOOP_1]] = distinct !{![[LOOP_1]], ![[UNROLL_FULL:.*]]} // CHECK: ![[UNROLL_FULL]] = !{!"llvm.loop.unroll.full"} -// CHECK: ![[LOOP_2]] = distinct !{![[LOOP_2]], ![[UNROLL_DISABLE:.*]], ![[DISTRIBUTE_DISABLE:.*]], ![[WIDTH_8:.*]], ![[INTERLEAVE_4:.*]]} +// CHECK: ![[LOOP_2]] = distinct !{![[LOOP_2]], ![[UNROLL_DISABLE:.*]], ![[DISTRIBUTE_DISABLE:.*]], ![[VECTORIZE_ENABLE:.*]], ![[WIDTH_8:.*]], ![[INTERLEAVE_4:.*]]} // CHECK: ![[UNROLL_DISABLE]] = !{!"llvm.loop.unroll.disable"} // CHECK: ![[DISTRIBUTE_DISABLE]] = !{!"llvm.loop.distribute.enable", i1 false} +// CHECK: ![[VECTORIZE_ENABLE]] = !{!"llvm.loop.vectorize.enable", i1 true} // CHECK: ![[WIDTH_8]] = !{!"llvm.loop.vectorize.width", i32 8} // CHECK: ![[INTERLEAVE_4]] = !{!"llvm.loop.interleave.count", i32 4} -// CHECK: ![[LOOP_3]] = distinct !{![[LOOP_3]], ![[INTERLEAVE_4:.*]], ![[INTENABLE_1:.*]], ![[FOLLOWUP_VECTOR_3:.*]]} -// CHECK: ![[INTENABLE_1]] = !{!"llvm.loop.vectorize.enable", i1 true} +// CHECK: ![[LOOP_3]] = distinct !{![[LOOP_3]], ![[INTERLEAVE_4:.*]], ![[VECTORIZE_ENABLE]], ![[FOLLOWUP_VECTOR_3:.*]]} // CHECK: ![[FOLLOWUP_VECTOR_3]] = !{!"llvm.loop.vectorize.followup_all", ![[AFTER_VECTOR_3:.*]]} // CHECK: ![[AFTER_VECTOR_3]] = distinct !{![[AFTER_VECTOR_3]], ![[ISVECTORIZED:.*]], ![[UNROLL_8:.*]]} // CHECK: ![[ISVECTORIZED]] = !{!"llvm.loop.isvectorized"} // CHECK: ![[UNROLL_8]] = !{!"llvm.loop.unroll.count", i32 8} -// CHECK: ![[LOOP_4]] = distinct !{![[LOOP_4]], ![[WIDTH_2:.*]], ![[INTERLEAVE_2:.*]]} +// CHECK: ![[LOOP_4]] = distinct !{![[LOOP_4]], ![[VECTORIZE_ENABLE]], ![[WIDTH_2:.*]], ![[INTERLEAVE_2:.*]]} // CHECK: ![[WIDTH_2]] = !{!"llvm.loop.vectorize.width", i32 2} // CHECK: ![[INTERLEAVE_2]] = !{!"llvm.loop.interleave.count", i32 2} @@ -185,7 +203,7 @@ void template_test(double *List, int Length) { // CHECK: ![[FOLLOWUP_VECTOR_6]] = !{!"llvm.loop.vectorize.followup_all", ![[AFTER_VECTOR_6:.*]]} // CHECK: ![[AFTER_VECTOR_6]] = distinct !{![[AFTER_VECTOR_6]], ![[ISVECTORIZED:.*]], ![[UNROLL_8:.*]]} -// CHECK: ![[LOOP_7]] = distinct !{![[LOOP_7]], ![[WIDTH_5:.*]]} +// CHECK: ![[LOOP_7]] = distinct !{![[LOOP_7]], ![[VECTORIZE_ENABLE]], ![[WIDTH_5:.*]]} // CHECK: ![[WIDTH_5]] = !{!"llvm.loop.vectorize.width", i32 5} // CHECK: ![[LOOP_8]] = distinct !{![[LOOP_8]], ![[WIDTH_5:.*]]} @@ -213,5 +231,9 @@ void template_test(double *List, int Length) { // CHECK: ![[AFTER_VECTOR_13]] = distinct !{![[AFTER_VECTOR_13]], ![[ISVECTORIZED:.*]], ![[UNROLL_32:.*]]} // CHECK: ![[UNROLL_32]] = !{!"llvm.loop.unroll.count", i32 32} -// CHECK: ![[LOOP_14]] = distinct !{![[LOOP_14]], ![[WIDTH_10:.*]]} +// CHECK: ![[LOOP_14]] = distinct !{![[LOOP_14]], ![[VECTORIZE_ENABLE]], ![[WIDTH_10:.*]]} // CHECK: ![[WIDTH_10]] = !{!"llvm.loop.vectorize.width", i32 10} + +// CHECK: ![[LOOP_15]] = distinct !{![[LOOP_15]], ![[WIDTH_1]], ![[VECTORIZE_ENABLE]]} + +// CHECK-NEXT: ![[LOOP_16]] = distinct !{![[LOOP_16]], ![[VECTORIZE_ENABLE]], ![[WIDTH_1]]} diff --git a/clang/test/CodeGenCXX/vcall-visibility-metadata.cpp b/clang/test/CodeGenCXX/vcall-visibility-metadata.cpp new file mode 100644 index 0000000000000..7cf48395673cc --- /dev/null +++ b/clang/test/CodeGenCXX/vcall-visibility-metadata.cpp @@ -0,0 +1,88 @@ +// RUN: %clang_cc1 -flto -flto-unit -triple x86_64-unknown-linux -emit-llvm -fvirtual-function-elimination -fwhole-program-vtables -o - %s | FileCheck %s + + +// Anonymous namespace. +namespace { +// CHECK: @_ZTVN12_GLOBAL__N_11AE = {{.*}} !vcall_visibility [[VIS_TU:![0-9]+]] +struct A { + A() {} + virtual int f() { return 1; } +}; +} +void *construct_A() { + return new A(); +} + + +// Hidden visibility. +// CHECK: @_ZTV1B = {{.*}} !vcall_visibility [[VIS_DSO:![0-9]+]] +struct __attribute__((visibility("hidden"))) B { + B() {} + virtual int f() { return 1; } +}; +B *construct_B() { + return new B(); +} + + +// Default visibility. +// CHECK-NOT: @_ZTV1C = {{.*}} !vcall_visibility +struct __attribute__((visibility("default"))) C { + C() {} + virtual int f() { return 1; } +}; +C *construct_C() { + return new C(); +} + + +// Hidden visibility, public LTO visibility. +// CHECK-NOT: @_ZTV1D = {{.*}} !vcall_visibility +struct __attribute__((visibility("hidden"))) [[clang::lto_visibility_public]] D { + D() {} + virtual int f() { return 1; } +}; +D *construct_D() { + return new D(); +} + + +// Hidden visibility, but inherits from class with default visibility. +// CHECK-NOT: @_ZTV1E = {{.*}} !vcall_visibility +struct __attribute__((visibility("hidden"))) E : C { + E() {} + virtual int f() { return 1; } +}; +E *construct_E() { + return new E(); +} + + +// Anonymous namespace, but inherits from class with default visibility. +// CHECK-NOT: @_ZTVN12_GLOBAL__N_11FE = {{.*}} !vcall_visibility +namespace { +struct __attribute__((visibility("hidden"))) F : C { + F() {} + virtual int f() { return 1; } +}; +} +void *construct_F() { + return new F(); +} + + +// Anonymous namespace, but inherits from class with hidden visibility. +// CHECK: @_ZTVN12_GLOBAL__N_11GE = {{.*}} !vcall_visibility [[VIS_DSO:![0-9]+]] +namespace { +struct __attribute__((visibility("hidden"))) G : B { + G() {} + virtual int f() { return 1; } +}; +} +void *construct_G() { + return new G(); +} + + +// CHECK-DAG: [[VIS_DSO]] = !{i64 1} +// CHECK-DAG: [[VIS_TU]] = !{i64 2} diff --git a/clang/test/CodeGenCXX/virtual-function-elimination.cpp b/clang/test/CodeGenCXX/virtual-function-elimination.cpp new file mode 100644 index 0000000000000..a89e6ebceeaf9 --- /dev/null +++ b/clang/test/CodeGenCXX/virtual-function-elimination.cpp @@ -0,0 +1,75 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-linux -flto -flto-unit -fvirtual-function-elimination -fwhole-program-vtables -emit-llvm -o - %s | FileCheck %s + + +struct __attribute__((visibility("default"))) A { + virtual void foo(); +}; + +void test_1(A *p) { + // A has default visibility, so no need for type.checked.load. +// CHECK-LABEL: define void @_Z6test_1P1A +// CHECK: [[FN_PTR_ADDR:%.+]] = getelementptr inbounds void (%struct.A*)*, void (%struct.A*)** {{%.+}}, i64 0 +// CHECK: [[FN_PTR:%.+]] = load void (%struct.A*)*, void (%struct.A*)** [[FN_PTR_ADDR]] +// CHECK: call void [[FN_PTR]]( + p->foo(); +} + + +struct __attribute__((visibility("hidden"))) [[clang::lto_visibility_public]] B { + virtual void foo(); +}; + +void test_2(B *p) { + // B has public LTO visibility, so no need for type.checked.load. +// CHECK-LABEL: define void @_Z6test_2P1B +// CHECK: [[FN_PTR_ADDR:%.+]] = getelementptr inbounds void (%struct.B*)*, void (%struct.B*)** {{%.+}}, i64 0 +// CHECK: [[FN_PTR:%.+]] = load void (%struct.B*)*, void (%struct.B*)** [[FN_PTR_ADDR]] +// CHECK: call void [[FN_PTR]]( + p->foo(); +} + + +struct __attribute__((visibility("hidden"))) C { + virtual void foo(); + virtual void bar(); +}; + +void test_3(C *p) { + // C has hidden visibility, so we generate type.checked.load to allow VFE. +// CHECK-LABEL: define void @_Z6test_3P1C +// CHECK: [[LOAD:%.+]] = call { i8*, i1 } @llvm.type.checked.load(i8* {{%.+}}, i32 0, metadata !"_ZTS1C") +// CHECK: [[FN_PTR_I8:%.+]] = extractvalue { i8*, i1 } [[LOAD]], 0 +// CHECK: [[FN_PTR:%.+]] = bitcast i8* [[FN_PTR_I8]] to void (%struct.C*)* +// CHECK: call void [[FN_PTR]]( + p->foo(); +} + +void test_4(C *p) { + // When using type.checked.load, we pass the vtable offset to the intrinsic, + // rather than adding it to the pointer with a GEP. +// CHECK-LABEL: define void @_Z6test_4P1C +// CHECK: [[LOAD:%.+]] = call { i8*, i1 } @llvm.type.checked.load(i8* {{%.+}}, i32 8, metadata !"_ZTS1C") +// CHECK: [[FN_PTR_I8:%.+]] = extractvalue { i8*, i1 } [[LOAD]], 0 +// CHECK: [[FN_PTR:%.+]] = bitcast i8* [[FN_PTR_I8]] to void (%struct.C*)* +// CHECK: call void [[FN_PTR]]( + p->bar(); +} + +void test_5(C *p, void (C::*q)(void)) { + // We also use type.checked.load for the virtual side of member function + // pointer calls. We use a GEP to calculate the address to load from and pass + // 0 as the offset to the intrinsic, because we know that the load must be + // from exactly the point marked by one of the function-type metadatas (in + // this case "_ZTSM1CFvvE.virtual"). If we passed the offset from the member + // function pointer to the intrinsic, this information would be lost. No + // codegen changes on the non-virtual side. +// CHECK-LABEL: define void @_Z6test_5P1CMS_FvvE( +// CHECK: [[FN_PTR_ADDR:%.+]] = getelementptr i8, i8* %vtable, i64 {{%.+}} +// CHECK: [[LOAD:%.+]] = call { i8*, i1 } @llvm.type.checked.load(i8* [[FN_PTR_ADDR]], i32 0, metadata !"_ZTSM1CFvvE.virtual") +// CHECK: [[FN_PTR_I8:%.+]] = extractvalue { i8*, i1 } [[LOAD]], 0 +// CHECK: [[FN_PTR:%.+]] = bitcast i8* [[FN_PTR_I8]] to void (%struct.C*)* + +// CHECK: [[PHI:%.+]] = phi void (%struct.C*)* {{.*}}[ [[FN_PTR]], {{.*}} ] +// CHECK: call void [[PHI]]( + (p->*q)(); +} diff --git a/clang/test/CoverageMapping/abspath.cpp b/clang/test/CoverageMapping/abspath.cpp index 4b92a3773db0d..892dff8de0166 100644 --- a/clang/test/CoverageMapping/abspath.cpp +++ b/clang/test/CoverageMapping/abspath.cpp @@ -9,7 +9,7 @@ // RUN: %clang_cc1 -fprofile-instrument=clang -fcoverage-mapping -emit-llvm -main-file-name abspath.cpp ../test/f1.c -o - | FileCheck -check-prefix=RELPATH %s // RELPATH: @__llvm_coverage_mapping = {{.*}}"\01 -// RELPATH: {{[/\\]}}{{.*}}{{[/\\][^/\\]*}}test{{[/\\][^/\\]*}}f1.c +// RELPATH: {{[/\\].*(/|\\\\)test(/|\\\\)f1}}.c // RELPATH: " void f1() {} diff --git a/clang/test/CoverageMapping/debug-dir.cpp b/clang/test/CoverageMapping/debug-dir.cpp new file mode 100644 index 0000000000000..657c9f28134ad --- /dev/null +++ b/clang/test/CoverageMapping/debug-dir.cpp @@ -0,0 +1,16 @@ +// %s expands to an absolute path, so to test relative paths we need to create a +// clean directory, put the source there, and cd into it. +// RUN: rm -rf %t +// RUN: mkdir -p %t/foo/bar/baz +// RUN: cp %s %t/foo/bar/baz/debug-dir.cpp +// RUN: cd %t/foo/bar + +// RUN: %clang_cc1 -fprofile-instrument=clang -fcoverage-mapping -emit-llvm -main-file-name debug-dir.cpp baz/debug-dir.cpp -o - | FileCheck -check-prefix=ABSOLUTE %s +// +// ABSOLUTE: @__llvm_coverage_mapping = {{.*"\\01.*foo.*bar.*baz.*debug-dir\.cpp}} + +// RUN: %clang_cc1 -fprofile-instrument=clang -fcoverage-mapping -emit-llvm -main-file-name debug-dir.cpp baz/debug-dir.cpp -fdebug-compilation-dir . -o - | FileCheck -check-prefix=RELATIVE %s +// +// RELATIVE: @__llvm_coverage_mapping = {{.*"\\01[^/]*baz.*debug-dir\.cpp}} + +void f1() {} diff --git a/clang/test/Driver/darwin-debug-flags.c b/clang/test/Driver/darwin-debug-flags.c index e37680e7f94a1..79a7f48350f5c 100644 --- a/clang/test/Driver/darwin-debug-flags.c +++ b/clang/test/Driver/darwin-debug-flags.c @@ -7,7 +7,7 @@ // CHECK: distinct !DICompileUnit( // CHECK-SAME: flags: -// CHECK-SAME: -I path\5C with\5C \5C\5Cspaces +// CHECK-SAME: -I path\\ with\\ \\\\spaces // CHECK-SAME: -g -Os // CHECK-SAME: -mmacosx-version-min=10.5.0 diff --git a/clang/test/Driver/fgnuc-version.c b/clang/test/Driver/fgnuc-version.c new file mode 100644 index 0000000000000..dea82bbaae0af --- /dev/null +++ b/clang/test/Driver/fgnuc-version.c @@ -0,0 +1,26 @@ +// +// Verify -fgnuc-version parsing +// + +// RUN: %clang -c %s -target i686-linux -### 2>&1 | FileCheck %s -check-prefix GNUC-DEFAULT +// GNUC-DEFAULT: "-fgnuc-version=4.2.1" + +// RUN: %clang -c %s -target i686-linux -fgnuc-version=100.99.99 -### 2>&1 | FileCheck %s -check-prefix GNUC-OVERRIDE +// GNUC-OVERRIDE: "-fgnuc-version=100.99.99" + +// RUN: %clang -c %s -target i686-linux -fgnuc-version=0 -### 2>&1 | FileCheck %s -check-prefix GNUC-DISABLE +// RUN: %clang -c %s -target i686-linux -fgnuc-version= -### 2>&1 | FileCheck %s -check-prefix GNUC-DISABLE +// GNUC-DISABLE-NOT: "-fgnuc-version= + +// RUN: not %clang -c %s -target i686-linux -fgnuc-version=100.100.10 2>&1 | FileCheck %s -check-prefix GNUC-INVALID +// RUN: not %clang -c %s -target i686-linux -fgnuc-version=100.10.100 2>&1 | FileCheck %s -check-prefix GNUC-INVALID +// RUN: not %clang -c %s -target i686-linux -fgnuc-version=-1.0.0 2>&1 | FileCheck %s -check-prefix GNUC-INVALID +// GNUC-INVALID: error: invalid value {{.*}} in '-fgnuc-version={{.*}}' + +// RUN: %clang -fgnuc-version=100.99.99 %s -dM -E -o - | FileCheck %s -check-prefix GNUC-LARGE +// GNUC-LARGE: #define __GNUC_MINOR__ 99 +// GNUC-LARGE: #define __GNUC_PATCHLEVEL__ 99 +// GNUC-LARGE: #define __GNUC__ 100 + +// RUN: %clang -fgnuc-version=100.99.99 -x c++ %s -dM -E -o - | FileCheck %s -check-prefix GXX-LARGE +// GXX-LARGE: #define __GNUG__ 100 diff --git a/clang/test/Driver/rewrite-legacy-objc.m b/clang/test/Driver/rewrite-legacy-objc.m index 6461aecfe5d9b..dc92dd4bf1073 100644 --- a/clang/test/Driver/rewrite-legacy-objc.m +++ b/clang/test/Driver/rewrite-legacy-objc.m @@ -3,11 +3,11 @@ // TEST0: clang{{.*}}" "-cc1" // TEST0: "-rewrite-objc" // FIXME: CHECK-NOT is broken somehow, it doesn't work here. Check adjacency instead. -// TEST0: "-fmessage-length" "0" "-stack-protector" "1" "-fblocks" "-fencode-extended-block-signature" "-fregister-global-dtors-with-atexit" "-fobjc-runtime=macosx-fragile" "-fno-objc-infer-related-result-type" "-fobjc-exceptions" "-fexceptions" "-fmax-type-align=16" "-fdiagnostics-show-option" +// TEST0: "-fmessage-length" "0" "-stack-protector" "1" "-fblocks" "-fencode-extended-block-signature" "-fregister-global-dtors-with-atexit" "-fgnuc-version=4.2.1" "-fobjc-runtime=macosx-fragile" "-fno-objc-infer-related-result-type" "-fobjc-exceptions" "-fexceptions" "-fmax-type-align=16" "-fdiagnostics-show-option" // TEST0: rewrite-legacy-objc.m" // RUN: %clang -no-canonical-prefixes -target i386-apple-macosx10.9.0 -rewrite-legacy-objc %s -o - -### 2>&1 | \ // RUN: FileCheck -check-prefix=TEST1 %s // RUN: %clang -no-canonical-prefixes -target i386-apple-macosx10.6.0 -rewrite-legacy-objc %s -o - -### 2>&1 | \ // RUN: FileCheck -check-prefix=TEST2 %s -// TEST1: "-fmessage-length" "0" "-stack-protector" "1" "-fblocks" "-fencode-extended-block-signature" "-fregister-global-dtors-with-atexit" "-fobjc-runtime=macosx-fragile" "-fobjc-subscripting-legacy-runtime" "-fno-objc-infer-related-result-type" "-fobjc-exceptions" "-fmax-type-align=16" "-fdiagnostics-show-option" -// TEST2: "-fmessage-length" "0" "-stack-protector" "1" "-fblocks" "-fencode-extended-block-signature" "-fregister-global-dtors-with-atexit" "-fobjc-runtime=macosx-fragile" "-fobjc-subscripting-legacy-runtime" "-fno-objc-infer-related-result-type" "-fobjc-exceptions" "-fmax-type-align=16" "-fdiagnostics-show-option" +// TEST1: "-fmessage-length" "0" "-stack-protector" "1" "-fblocks" "-fencode-extended-block-signature" "-fregister-global-dtors-with-atexit" "-fgnuc-version=4.2.1" "-fobjc-runtime=macosx-fragile" "-fobjc-subscripting-legacy-runtime" "-fno-objc-infer-related-result-type" "-fobjc-exceptions" "-fmax-type-align=16" "-fdiagnostics-show-option" +// TEST2: "-fmessage-length" "0" "-stack-protector" "1" "-fblocks" "-fencode-extended-block-signature" "-fregister-global-dtors-with-atexit" "-fgnuc-version=4.2.1" "-fobjc-runtime=macosx-fragile" "-fobjc-subscripting-legacy-runtime" "-fno-objc-infer-related-result-type" "-fobjc-exceptions" "-fmax-type-align=16" "-fdiagnostics-show-option" diff --git a/clang/test/Driver/rewrite-objc.m b/clang/test/Driver/rewrite-objc.m index 1c6dbcc329e2f..b04062992b7f4 100644 --- a/clang/test/Driver/rewrite-objc.m +++ b/clang/test/Driver/rewrite-objc.m @@ -3,4 +3,4 @@ // TEST0: clang{{.*}}" "-cc1" // TEST0: "-rewrite-objc" // FIXME: CHECK-NOT is broken somehow, it doesn't work here. Check adjacency instead. -// TEST0: "-fmessage-length" "0" "-stack-protector" "1" "-fblocks" "-fencode-extended-block-signature" "-fregister-global-dtors-with-atexit" "-fobjc-runtime=macosx" "-fno-objc-infer-related-result-type" "-fobjc-exceptions" "-fexceptions" "-fmax-type-align=16" "-fdiagnostics-show-option" +// TEST0: "-fmessage-length" "0" "-stack-protector" "1" "-fblocks" "-fencode-extended-block-signature" "-fregister-global-dtors-with-atexit" "-fgnuc-version=4.2.1" "-fobjc-runtime=macosx" "-fno-objc-infer-related-result-type" "-fobjc-exceptions" "-fexceptions" "-fmax-type-align=16" "-fdiagnostics-show-option" diff --git a/clang/test/Driver/virtual-function-elimination.cpp b/clang/test/Driver/virtual-function-elimination.cpp new file mode 100644 index 0000000000000..3a026bbb434e0 --- /dev/null +++ b/clang/test/Driver/virtual-function-elimination.cpp @@ -0,0 +1,11 @@ +// RUN: %clang -target x86_64-unknown-linux -fvirtual-function-elimination -### %s 2>&1 | FileCheck --check-prefix=BAD-LTO %s +// RUN: %clang -target x86_64-unknown-linux -fvirtual-function-elimination -flto=thin -### %s 2>&1 | FileCheck --check-prefix=BAD-LTO %s +// BAD-LTO: invalid argument '-fvirtual-function-elimination' only allowed with '-flto=full' + +// RUN: %clang -target x86_64-unknown-linux -fvirtual-function-elimination -flto -### %s 2>&1 | FileCheck --check-prefix=GOOD %s +// RUN: %clang -target x86_64-unknown-linux -fvirtual-function-elimination -flto=full -### %s 2>&1 | FileCheck --check-prefix=GOOD %s +// RUN: %clang -target x86_64-unknown-linux -fvirtual-function-elimination -flto -fwhole-program-vtables -### %s 2>&1 | FileCheck --check-prefix=GOOD %s +// GOOD: "-fvirtual-function-elimination" "-fwhole-program-vtables" + +// RUN: %clang -target x86_64-unknown-linux -fvirtual-function-elimination -fno-whole-program-vtables -flto -### %s 2>&1 | FileCheck --check-prefix=NO-WHOLE-PROGRAM-VTABLES %s +// NO-WHOLE-PROGRAM-VTABLES: invalid argument '-fno-whole-program-vtables' not allowed with '-fvirtual-function-elimination' diff --git a/clang/test/Frontend/gnu-inline.c b/clang/test/Frontend/gnu-inline.c index 75d4fe6516f13..ef99669313723 100644 --- a/clang/test/Frontend/gnu-inline.c +++ b/clang/test/Frontend/gnu-inline.c @@ -1,9 +1,9 @@ -// RUN: %clang_cc1 -std=c89 -fsyntax-only -x c -E -dM %s | FileCheck --check-prefix=GNU-INLINE %s -// RUN: %clang_cc1 -std=c99 -fsyntax-only -x c -E -dM %s | FileCheck --check-prefix=STDC-INLINE %s -// RUN: %clang_cc1 -std=c99 -fgnu89-inline -fsyntax-only -x c -E -dM %s | FileCheck --check-prefix=GNU-INLINE %s -// RUN: %clang_cc1 -fsyntax-only -x c++ -E -dM %s | FileCheck --check-prefix=GNU-INLINE %s -// RUN: not %clang_cc1 -fgnu89-inline -fsyntax-only -x c++ %s 2>&1 | FileCheck --check-prefix=CXX %s -// RUN: not %clang_cc1 -fgnu89-inline -fsyntax-only -x objective-c++ %s 2>&1 | FileCheck --check-prefix=OBJCXX %s +// RUN: %clang_cc1 -fgnuc-version=4.2.1 -std=c89 -fsyntax-only -x c -E -dM %s | FileCheck --check-prefix=GNU-INLINE %s +// RUN: %clang_cc1 -fgnuc-version=4.2.1 -std=c99 -fsyntax-only -x c -E -dM %s | FileCheck --check-prefix=STDC-INLINE %s +// RUN: %clang_cc1 -fgnuc-version=4.2.1 -std=c99 -fgnu89-inline -fsyntax-only -x c -E -dM %s | FileCheck --check-prefix=GNU-INLINE %s +// RUN: %clang_cc1 -fgnuc-version=4.2.1 -fsyntax-only -x c++ -E -dM %s | FileCheck --check-prefix=GNU-INLINE %s +// RUN: not %clang_cc1 -fgnu89-inline -fgnuc-version=4.2.1 -fsyntax-only -x c++ %s 2>&1 | FileCheck --check-prefix=CXX %s +// RUN: not %clang_cc1 -fgnu89-inline -fgnuc-version=4.2.1 -fsyntax-only -x objective-c++ %s 2>&1 | FileCheck --check-prefix=OBJCXX %s // CXX: '-fgnu89-inline' not allowed with 'C++' // OBJCXX: '-fgnu89-inline' not allowed with 'Objective-C++' diff --git a/clang/test/Headers/arm-neon-header.c b/clang/test/Headers/arm-neon-header.c index 251075ff354f7..f6362886010a5 100644 --- a/clang/test/Headers/arm-neon-header.c +++ b/clang/test/Headers/arm-neon-header.c @@ -20,7 +20,7 @@ // RUN: %clang -fsyntax-only -Wall -Werror -ffreestanding -nostdinc++ --target=aarch64_be-none-eabi -march=armv8.2-a+fp16 -std=c++14 -xc++ %s // RUN: %clang -fsyntax-only -Wall -Werror -ffreestanding -nostdinc++ --target=aarch64_be-none-eabi -march=armv8.2-a+fp16 -std=c++17 -xc++ %s -// RUN: %clang -fsyntax-only -Wall -Werror -ffreestanding --target=aarch64-none-eabi -march=armv8.2-a+fp16 -std=c11 -xc -flax-vector-conversions=none %s -// RUN: %clang -fsyntax-only -Wall -Werror -ffreestanding --target=aarch64_be-none-eabi -march=armv8.2-a+fp16 -std=c11 -xc -flax-vector-conversions=none %s +// RUN: %clang -fsyntax-only -Wall -Werror -ffreestanding --target=aarch64-none-eabi -march=armv8.2-a+fp16fml+crypto+dotprod -std=c11 -xc -flax-vector-conversions=none %s +// RUN: %clang -fsyntax-only -Wall -Werror -ffreestanding --target=aarch64_be-none-eabi -march=armv8.2-a+fp16fml+crypto+dotprod -std=c11 -xc -flax-vector-conversions=none %s #include diff --git a/clang/test/Headers/stdbool.cpp b/clang/test/Headers/stdbool.cpp index 0110a45b2dc6b..70a921fe743f9 100644 --- a/clang/test/Headers/stdbool.cpp +++ b/clang/test/Headers/stdbool.cpp @@ -1,7 +1,7 @@ -// RUN: %clang_cc1 -std=gnu++98 -E -dM %s | FileCheck --check-prefix=CHECK-GNU-COMPAT-98 %s -// RUN: %clang_cc1 -std=gnu++11 -E -dM %s | FileCheck --check-prefix=CHECK-GNU-COMPAT-11 %s -// RUN: %clang_cc1 -std=c++98 -E -dM %s | FileCheck --check-prefix=CHECK-CONFORMING %s -// RUN: %clang_cc1 -fsyntax-only -std=gnu++98 -verify -Weverything %s +// RUN: %clang_cc1 -fgnuc-version=4.2.1 -std=gnu++98 -E -dM %s | FileCheck --check-prefix=CHECK-GNU-COMPAT-98 %s +// RUN: %clang_cc1 -fgnuc-version=4.2.1 -std=gnu++11 -E -dM %s | FileCheck --check-prefix=CHECK-GNU-COMPAT-11 %s +// RUN: %clang_cc1 -fgnuc-version=4.2.1 -std=c++98 -E -dM %s | FileCheck --check-prefix=CHECK-CONFORMING %s +// RUN: %clang_cc1 -fgnuc-version=4.2.1 -fsyntax-only -std=gnu++98 -verify -Weverything %s #include #define zzz diff --git a/clang/test/OpenMP/declare_target_messages.cpp b/clang/test/OpenMP/declare_target_messages.cpp index f416eb6ac129f..cc6558debde6d 100644 --- a/clang/test/OpenMP/declare_target_messages.cpp +++ b/clang/test/OpenMP/declare_target_messages.cpp @@ -162,10 +162,10 @@ namespace { #pragma omp declare target link(x) // expected-error {{'x' must not appear in both clauses 'to' and 'link'}} void bazz() {} -#pragma omp declare target to(bazz) device_type(nohost) // omp45-error {{unexpected 'device_type' clause, only 'to' or 'link' clauses expected}} +#pragma omp declare target to(bazz) device_type(nohost) // omp45-error {{unexpected 'device_type' clause, only 'to' or 'link' clauses expected}} host5-note {{marked as 'device_type(nohost)' here}} void bazzz() {bazz();} #pragma omp declare target to(bazzz) device_type(nohost) // omp45-error {{unexpected 'device_type' clause, only 'to' or 'link' clauses expected}} -void any() {bazz();} +void any() {bazz();} // host5-error {{function with 'device_type(nohost)' is not available on host}} void host1() {bazz();} #pragma omp declare target to(host1) device_type(host) // omp45-error {{unexpected 'device_type' clause, only 'to' or 'link' clauses expected}} dev5-note 2 {{marked as 'device_type(host)' here}} void host2() {bazz();} diff --git a/clang/test/OpenMP/declare_variant_ast_print.c b/clang/test/OpenMP/declare_variant_ast_print.c index a8a11bc53d3d5..971211c1e9444 100644 --- a/clang/test/OpenMP/declare_variant_ast_print.c +++ b/clang/test/OpenMP/declare_variant_ast_print.c @@ -11,7 +11,7 @@ int foo(void); #pragma omp declare variant(foo) match(implementation={vendor(llvm)}) #pragma omp declare variant(foo) match(implementation={vendor(llvm), xxx}) #pragma omp declare variant(foo) match(implementation={vendor(unknown)}) -#pragma omp declare variant(foo) match(implementation={vendor(score(5): ibm, xxx)}) +#pragma omp declare variant(foo) match(implementation={vendor(score(5): ibm, xxx, ibm)}) int bar(void); // CHECK: int foo(); diff --git a/clang/test/OpenMP/declare_variant_ast_print.cpp b/clang/test/OpenMP/declare_variant_ast_print.cpp index ce67b818f30cd..a026febfb523e 100644 --- a/clang/test/OpenMP/declare_variant_ast_print.cpp +++ b/clang/test/OpenMP/declare_variant_ast_print.cpp @@ -40,7 +40,7 @@ int bar(); #pragma omp declare variant(foofoo ) match(user = {condition()}) #pragma omp declare variant(foofoo ) match(implementation={vendor(llvm)}) #pragma omp declare variant(foofoo ) match(implementation={vendor(unknown)}) -#pragma omp declare variant(foofoo ) match(implementation={vendor(score(C+5): ibm, xxx)}) +#pragma omp declare variant(foofoo ) match(implementation={vendor(score(C+5): ibm, xxx, ibm)}) template T barbar(); diff --git a/clang/test/OpenMP/declare_variant_implementation_vendor_codegen.cpp b/clang/test/OpenMP/declare_variant_implementation_vendor_codegen.cpp index cf72e4c68b015..a8f6a2689b297 100644 --- a/clang/test/OpenMP/declare_variant_implementation_vendor_codegen.cpp +++ b/clang/test/OpenMP/declare_variant_implementation_vendor_codegen.cpp @@ -8,11 +8,11 @@ // CHECK-DAG: @_ZN16SpecSpecialFuncs6MethodEv = {{.*}}alias i32 (%struct.SpecSpecialFuncs*), i32 (%struct.SpecSpecialFuncs*)* @_ZN16SpecSpecialFuncs7method_Ev // CHECK-DAG: @_ZN16SpecSpecialFuncs6methodEv = linkonce_odr {{.*}}alias i32 (%struct.SpecSpecialFuncs*), i32 (%struct.SpecSpecialFuncs*)* @_ZN16SpecSpecialFuncs7method_Ev // CHECK-DAG: @_ZN12SpecialFuncs6methodEv = linkonce_odr {{.*}}alias i32 (%struct.SpecialFuncs*), i32 (%struct.SpecialFuncs*)* @_ZN12SpecialFuncs7method_Ev -// CHECK-DAG: @_Z5prio_v = {{.*}}alias i32 (), i32 ()* @_Z4priov +// CHECK-DAG: @_Z5prio_v = {{.*}}alias i32 (), i32 ()* @_Z5prio1v // CHECK-DAG: @_ZL6prio1_v = internal alias i32 (), i32 ()* @_ZL5prio2v // CHECK-DAG: @_Z4callv = {{.*}}alias i32 (), i32 ()* @_Z4testv // CHECK-DAG: @_ZL9stat_usedv = internal alias i32 (), i32 ()* @_ZL10stat_used_v -// CHECK-DAG: @_ZN12SpecialFuncs6MethodEv = {{.*}}alias i32 (%struct.SpecialFuncs*), i32 (%struct.SpecialFuncs*)* @_ZN12SpecialFuncs7method_Ev +// CHECK-DAG: @_ZN12SpecialFuncs6MethodEv = {{.*}}alias i32 (%struct.SpecialFuncs*), i32 (%struct.SpecialFuncs*)* @_ZN12SpecialFuncs7method_Ev // CHECK-DAG: @fn_linkage = {{.*}}alias i32 (), i32 ()* @_Z18fn_linkage_variantv // CHECK-DAG: @_Z11fn_linkage1v = {{.*}}alias i32 (), i32 ()* @fn_linkage_variant1 // CHECK-DAG: declare {{.*}}i32 @_Z5bazzzv() diff --git a/clang/test/OpenMP/master_taskloop_ast_print.cpp b/clang/test/OpenMP/master_taskloop_ast_print.cpp new file mode 100644 index 0000000000000..55ed21b394c08 --- /dev/null +++ b/clang/test/OpenMP/master_taskloop_ast_print.cpp @@ -0,0 +1,83 @@ +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -ast-print %s | FileCheck %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s + +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -ast-print %s | FileCheck %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s +// expected-no-diagnostics + +#ifndef HEADER +#define HEADER + +void foo() {} + +template +T tmain(T argc) { + T b = argc, c, d, e, f, g; + static T a; +// CHECK: static T a; +#pragma omp taskgroup allocate(d) task_reduction(+: d) +#pragma omp master taskloop if(taskloop: argc > N) default(shared) untied priority(N) grainsize(N) reduction(+:g) in_reduction(+: d) allocate(d) + // CHECK-NEXT: #pragma omp taskgroup allocate(d) task_reduction(+: d) + // CHECK-NEXT: #pragma omp master taskloop if(taskloop: argc > N) default(shared) untied priority(N) grainsize(N) reduction(+: g) in_reduction(+: d) allocate(d){{$}} + for (int i = 0; i < 2; ++i) + a = 2; +// CHECK-NEXT: for (int i = 0; i < 2; ++i) +// CHECK-NEXT: a = 2; +#pragma omp parallel +#pragma omp master taskloop private(argc, b), firstprivate(c, d), lastprivate(d, f) collapse(N) shared(g) if (c) final(d) mergeable priority(f) nogroup num_tasks(N) + for (int i = 0; i < 2; ++i) + for (int j = 0; j < 2; ++j) + for (int j = 0; j < 2; ++j) + for (int j = 0; j < 2; ++j) + for (int j = 0; j < 2; ++j) + for (int i = 0; i < 2; ++i) + for (int j = 0; j < 2; ++j) + for (int j = 0; j < 2; ++j) + for (int j = 0; j < 2; ++j) + for (int j = 0; j < 2; ++j) + foo(); + // CHECK-NEXT: #pragma omp parallel + // CHECK-NEXT: #pragma omp master taskloop private(argc,b) firstprivate(c,d) lastprivate(d,f) collapse(N) shared(g) if(c) final(d) mergeable priority(f) nogroup num_tasks(N) + // CHECK-NEXT: for (int i = 0; i < 2; ++i) + // CHECK-NEXT: for (int j = 0; j < 2; ++j) + // CHECK-NEXT: for (int j = 0; j < 2; ++j) + // CHECK-NEXT: for (int j = 0; j < 2; ++j) + // CHECK-NEXT: for (int j = 0; j < 2; ++j) + // CHECK-NEXT: for (int i = 0; i < 2; ++i) + // CHECK-NEXT: for (int j = 0; j < 2; ++j) + // CHECK-NEXT: for (int j = 0; j < 2; ++j) + // CHECK-NEXT: for (int j = 0; j < 2; ++j) + // CHECK-NEXT: for (int j = 0; j < 2; ++j) + // CHECK-NEXT: foo(); + return T(); +} + +// CHECK-LABEL: int main(int argc, char **argv) { +int main(int argc, char **argv) { + int b = argc, c, d, e, f, g; + static int a; +// CHECK: static int a; +#pragma omp taskgroup task_reduction(+: d) +#pragma omp master taskloop if(taskloop: a) default(none) shared(a) final(b) priority(5) num_tasks(argc) reduction(*: g) in_reduction(+:d) + // CHECK-NEXT: #pragma omp taskgroup task_reduction(+: d) + // CHECK-NEXT: #pragma omp master taskloop if(taskloop: a) default(none) shared(a) final(b) priority(5) num_tasks(argc) reduction(*: g) in_reduction(+: d) + for (int i = 0; i < 2; ++i) + a = 2; +// CHECK-NEXT: for (int i = 0; i < 2; ++i) +// CHECK-NEXT: a = 2; +#pragma omp parallel +#pragma omp master taskloop private(argc, b), firstprivate(argv, c), lastprivate(d, f) collapse(2) shared(g) if(argc) mergeable priority(argc) grainsize(argc) reduction(max: a, e) + for (int i = 0; i < 10; ++i) + for (int j = 0; j < 10; ++j) + foo(); + // CHECK-NEXT: #pragma omp parallel + // CHECK-NEXT: #pragma omp master taskloop private(argc,b) firstprivate(argv,c) lastprivate(d,f) collapse(2) shared(g) if(argc) mergeable priority(argc) grainsize(argc) reduction(max: a,e) + // CHECK-NEXT: for (int i = 0; i < 10; ++i) + // CHECK-NEXT: for (int j = 0; j < 10; ++j) + // CHECK-NEXT: foo(); + return (tmain(argc) + tmain(argv[0][0])); +} + +#endif diff --git a/clang/test/OpenMP/master_taskloop_codegen.cpp b/clang/test/OpenMP/master_taskloop_codegen.cpp new file mode 100644 index 0000000000000..9b97400f3ddd4 --- /dev/null +++ b/clang/test/OpenMP/master_taskloop_codegen.cpp @@ -0,0 +1,225 @@ +// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -x c++ -emit-llvm %s -o - -femit-all-decls | FileCheck %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - -femit-all-decls | FileCheck %s + +// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp-simd -x c++ -emit-llvm %s -o - -femit-all-decls | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple x86_64-apple-darwin10 -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - -femit-all-decls | FileCheck --check-prefix SIMD-ONLY0 %s +// SIMD-ONLY0-NOT: {{__kmpc|__tgt}} +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +// CHECK-LABEL: @main +int main(int argc, char **argv) { +// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[DEFLOC:@.+]]) +// CHECK: call i8* @__kmpc_omp_task_alloc(%struct.ident_t* [[DEFLOC]], i32 [[GTID]], +// CHECK: call i32 @__kmpc_omp_task(%struct.ident_t* [[DEFLOC]], i32 [[GTID]], +#pragma omp task + ; +// CHECK: [[RES:%.+]] = call {{.*}}i32 @__kmpc_master(%struct.ident_t* [[DEFLOC]], i32 [[GTID]]) +// CHECK-NEXT: [[IS_MASTER:%.+]] = icmp ne i32 [[RES]], 0 +// CHECK-NEXT: br i1 [[IS_MASTER]], label {{%?}}[[THEN:.+]], label {{%?}}[[EXIT:.+]] +// CHECK: [[THEN]] +// CHECK: call void @__kmpc_taskgroup(%struct.ident_t* [[DEFLOC]], i32 [[GTID]]) +// CHECK: [[TASKV:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* [[DEFLOC]], i32 [[GTID]], i32 33, i64 80, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[TDP_TY:%.+]]*)* [[TASK1:@.+]] to i32 (i32, i8*)*)) +// CHECK: [[TASK:%.+]] = bitcast i8* [[TASKV]] to [[TDP_TY]]* +// CHECK: [[TASK_DATA:%.+]] = getelementptr inbounds [[TDP_TY]], [[TDP_TY]]* [[TASK]], i32 0, i32 0 +// CHECK: [[DOWN:%.+]] = getelementptr inbounds [[TD_TY:%.+]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 5 +// CHECK: store i64 0, i64* [[DOWN]], +// CHECK: [[UP:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 6 +// CHECK: store i64 9, i64* [[UP]], +// CHECK: [[ST:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 7 +// CHECK: store i64 1, i64* [[ST]], +// CHECK: [[ST_VAL:%.+]] = load i64, i64* [[ST]], +// CHECK: call void @__kmpc_taskloop(%struct.ident_t* [[DEFLOC]], i32 [[GTID]], i8* [[TASKV]], i32 1, i64* [[DOWN]], i64* [[UP]], i64 [[ST_VAL]], i32 1, i32 0, i64 0, i8* null) +// CHECK: call void @__kmpc_end_taskgroup(%struct.ident_t* [[DEFLOC]], i32 [[GTID]]) +// CHECK-NEXT: call {{.*}}void @__kmpc_end_master(%struct.ident_t* [[DEFLOC]], i32 [[GTID]]) +// CHECK-NEXT: br label {{%?}}[[EXIT]] +// CHECK: [[EXIT]] +#pragma omp master taskloop priority(argc) + for (int i = 0; i < 10; ++i) + ; +// CHECK: [[RES:%.+]] = call {{.*}}i32 @__kmpc_master(%struct.ident_t* [[DEFLOC]], i32 [[GTID]]) +// CHECK-NEXT: [[IS_MASTER:%.+]] = icmp ne i32 [[RES]], 0 +// CHECK-NEXT: br i1 [[IS_MASTER]], label {{%?}}[[THEN:.+]], label {{%?}}[[EXIT:.+]] +// CHECK: [[THEN]] +// CHECK: [[TASKV:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* [[DEFLOC]], i32 [[GTID]], i32 1, i64 80, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[TDP_TY:%.+]]*)* [[TASK2:@.+]] to i32 (i32, i8*)*)) +// CHECK: [[TASK:%.+]] = bitcast i8* [[TASKV]] to [[TDP_TY]]* +// CHECK: [[TASK_DATA:%.+]] = getelementptr inbounds [[TDP_TY]], [[TDP_TY]]* [[TASK]], i32 0, i32 0 +// CHECK: [[DOWN:%.+]] = getelementptr inbounds [[TD_TY:%.+]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 5 +// CHECK: store i64 0, i64* [[DOWN]], +// CHECK: [[UP:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 6 +// CHECK: store i64 9, i64* [[UP]], +// CHECK: [[ST:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 7 +// CHECK: store i64 1, i64* [[ST]], +// CHECK: [[ST_VAL:%.+]] = load i64, i64* [[ST]], +// CHECK: [[GRAINSIZE:%.+]] = zext i32 %{{.+}} to i64 +// CHECK: call void @__kmpc_taskloop(%struct.ident_t* [[DEFLOC]], i32 [[GTID]], i8* [[TASKV]], i32 1, i64* [[DOWN]], i64* [[UP]], i64 [[ST_VAL]], i32 1, i32 1, i64 [[GRAINSIZE]], i8* null) +// CHECK-NEXT: call {{.*}}void @__kmpc_end_master(%struct.ident_t* [[DEFLOC]], i32 [[GTID]]) +// CHECK-NEXT: br label {{%?}}[[EXIT]] +// CHECK: [[EXIT]] +#pragma omp master taskloop nogroup grainsize(argc) + for (int i = 0; i < 10; ++i) + ; +// CHECK: [[RES:%.+]] = call {{.*}}i32 @__kmpc_master(%struct.ident_t* [[DEFLOC]], i32 [[GTID]]) +// CHECK-NEXT: [[IS_MASTER:%.+]] = icmp ne i32 [[RES]], 0 +// CHECK-NEXT: br i1 [[IS_MASTER]], label {{%?}}[[THEN:.+]], label {{%?}}[[EXIT:.+]] +// CHECK: [[THEN]] +// CHECK: call void @__kmpc_taskgroup(%struct.ident_t* [[DEFLOC]], i32 [[GTID]]) +// CHECK: [[TASKV:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* [[DEFLOC]], i32 [[GTID]], i32 1, i64 80, i64 16, i32 (i32, i8*)* bitcast (i32 (i32, [[TDP_TY:%.+]]*)* [[TASK3:@.+]] to i32 (i32, i8*)*)) +// CHECK: [[TASK:%.+]] = bitcast i8* [[TASKV]] to [[TDP_TY]]* +// CHECK: [[TASK_DATA:%.+]] = getelementptr inbounds [[TDP_TY]], [[TDP_TY]]* [[TASK]], i32 0, i32 0 +// CHECK: [[IF:%.+]] = icmp ne i32 %{{.+}}, 0 +// CHECK: [[IF_INT:%.+]] = sext i1 [[IF]] to i32 +// CHECK: [[DOWN:%.+]] = getelementptr inbounds [[TD_TY:%.+]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 5 +// CHECK: store i64 0, i64* [[DOWN]], +// CHECK: [[UP:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 6 +// CHECK: store i64 %{{.+}}, i64* [[UP]], +// CHECK: [[ST:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 7 +// CHECK: store i64 1, i64* [[ST]], +// CHECK: [[ST_VAL:%.+]] = load i64, i64* [[ST]], +// CHECK: call void @__kmpc_taskloop(%struct.ident_t* [[DEFLOC]], i32 [[GTID]], i8* [[TASKV]], i32 [[IF_INT]], i64* [[DOWN]], i64* [[UP]], i64 [[ST_VAL]], i32 1, i32 2, i64 4, i8* null) +// CHECK: call void @__kmpc_end_taskgroup(%struct.ident_t* [[DEFLOC]], i32 [[GTID]]) +// CHECK-NEXT: call {{.*}}void @__kmpc_end_master(%struct.ident_t* [[DEFLOC]], i32 [[GTID]]) +// CHECK-NEXT: br label {{%?}}[[EXIT]] +// CHECK: [[EXIT]] + int i; +#pragma omp master taskloop if(argc) shared(argc, argv) collapse(2) num_tasks(4) + for (i = 0; i < argc; ++i) + for (int j = argc; j < argv[argc][argc]; ++j) + ; +} + +// CHECK: define internal i32 [[TASK1]]( +// CHECK: [[DOWN:%.+]] = getelementptr inbounds [[TD_TY:%.+]], [[TD_TY]]* %{{.+}}, i32 0, i32 5 +// CHECK: [[DOWN_VAL:%.+]] = load i64, i64* [[DOWN]], +// CHECK: [[UP:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 6 +// CHECK: [[UP_VAL:%.+]] = load i64, i64* [[UP]], +// CHECK: [[ST:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 7 +// CHECK: [[ST_VAL:%.+]] = load i64, i64* [[ST]], +// CHECK: [[LITER:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 8 +// CHECK: [[LITER_VAL:%.+]] = load i32, i32* [[LITER]], +// CHECK: store i64 [[DOWN_VAL]], i64* [[LB:%[^,]+]], +// CHECK: store i64 [[UP_VAL]], i64* [[UB:%[^,]+]], +// CHECK: store i64 [[ST_VAL]], i64* [[ST:%[^,]+]], +// CHECK: store i32 [[LITER_VAL]], i32* [[LITER:%[^,]+]], +// CHECK: [[LB_VAL:%.+]] = load i64, i64* [[LB]], +// CHECK: [[LB_I32:%.+]] = trunc i64 [[LB_VAL]] to i32 +// CHECK: store i32 [[LB_I32]], i32* [[CNT:%.+]], +// CHECK: br label +// CHECK: [[VAL:%.+]] = load i32, i32* [[CNT]], +// CHECK: [[VAL_I64:%.+]] = sext i32 [[VAL]] to i64 +// CHECK: [[UB_VAL:%.+]] = load i64, i64* [[UB]], +// CHECK: [[CMP:%.+]] = icmp ule i64 [[VAL_I64]], [[UB_VAL]] +// CHECK: br i1 [[CMP]], label %{{.+}}, label %{{.+}} +// CHECK: load i32, i32* % +// CHECK: store i32 % +// CHECK: load i32, i32* % +// CHECK: add nsw i32 %{{.+}}, 1 +// CHECK: store i32 %{{.+}}, i32* % +// CHECK: br label % +// CHECK: ret i32 0 + +// CHECK: define internal i32 [[TASK2]]( +// CHECK: [[DOWN:%.+]] = getelementptr inbounds [[TD_TY:%.+]], [[TD_TY]]* %{{.+}}, i32 0, i32 5 +// CHECK: [[DOWN_VAL:%.+]] = load i64, i64* [[DOWN]], +// CHECK: [[UP:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 6 +// CHECK: [[UP_VAL:%.+]] = load i64, i64* [[UP]], +// CHECK: [[ST:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 7 +// CHECK: [[ST_VAL:%.+]] = load i64, i64* [[ST]], +// CHECK: [[LITER:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 8 +// CHECK: [[LITER_VAL:%.+]] = load i32, i32* [[LITER]], +// CHECK: store i64 [[DOWN_VAL]], i64* [[LB:%[^,]+]], +// CHECK: store i64 [[UP_VAL]], i64* [[UB:%[^,]+]], +// CHECK: store i64 [[ST_VAL]], i64* [[ST:%[^,]+]], +// CHECK: store i32 [[LITER_VAL]], i32* [[LITER:%[^,]+]], +// CHECK: [[LB_VAL:%.+]] = load i64, i64* [[LB]], +// CHECK: [[LB_I32:%.+]] = trunc i64 [[LB_VAL]] to i32 +// CHECK: store i32 [[LB_I32]], i32* [[CNT:%.+]], +// CHECK: br label +// CHECK: [[VAL:%.+]] = load i32, i32* [[CNT]], +// CHECK: [[VAL_I64:%.+]] = sext i32 [[VAL]] to i64 +// CHECK: [[UB_VAL:%.+]] = load i64, i64* [[UB]], +// CHECK: [[CMP:%.+]] = icmp ule i64 [[VAL_I64]], [[UB_VAL]] +// CHECK: br i1 [[CMP]], label %{{.+}}, label %{{.+}} +// CHECK: load i32, i32* % +// CHECK: store i32 % +// CHECK: load i32, i32* % +// CHECK: add nsw i32 %{{.+}}, 1 +// CHECK: store i32 %{{.+}}, i32* % +// CHECK: br label % +// CHECK: ret i32 0 + +// CHECK: define internal i32 [[TASK3]]( +// CHECK: [[DOWN:%.+]] = getelementptr inbounds [[TD_TY:%.+]], [[TD_TY]]* %{{.+}}, i32 0, i32 5 +// CHECK: [[DOWN_VAL:%.+]] = load i64, i64* [[DOWN]], +// CHECK: [[UP:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 6 +// CHECK: [[UP_VAL:%.+]] = load i64, i64* [[UP]], +// CHECK: [[ST:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 7 +// CHECK: [[ST_VAL:%.+]] = load i64, i64* [[ST]], +// CHECK: [[LITER:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 8 +// CHECK: [[LITER_VAL:%.+]] = load i32, i32* [[LITER]], +// CHECK: store i64 [[DOWN_VAL]], i64* [[LB:%[^,]+]], +// CHECK: store i64 [[UP_VAL]], i64* [[UB:%[^,]+]], +// CHECK: store i64 [[ST_VAL]], i64* [[ST:%[^,]+]], +// CHECK: store i32 [[LITER_VAL]], i32* [[LITER:%[^,]+]], +// CHECK: [[LB_VAL:%.+]] = load i64, i64* [[LB]], +// CHECK: store i64 [[LB_VAL]], i64* [[CNT:%.+]], +// CHECK: br label +// CHECK: ret i32 0 + +// CHECK-LABEL: @_ZN1SC2Ei +struct S { + int a; + S(int c) { +// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[DEFLOC:@.+]]) +// CHECK: [[TASKV:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* [[DEFLOC]], i32 [[GTID]], i32 1, i64 80, i64 16, i32 (i32, i8*)* bitcast (i32 (i32, [[TDP_TY:%.+]]*)* [[TASK4:@.+]] to i32 (i32, i8*)*)) +// CHECK: [[TASK:%.+]] = bitcast i8* [[TASKV]] to [[TDP_TY]]* +// CHECK: [[TASK_DATA:%.+]] = getelementptr inbounds [[TDP_TY]], [[TDP_TY]]* [[TASK]], i32 0, i32 0 +// CHECK: [[DOWN:%.+]] = getelementptr inbounds [[TD_TY:%.+]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 5 +// CHECK: store i64 0, i64* [[DOWN]], +// CHECK: [[UP:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 6 +// CHECK: store i64 %{{.+}}, i64* [[UP]], +// CHECK: [[ST:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 7 +// CHECK: store i64 1, i64* [[ST]], +// CHECK: [[ST_VAL:%.+]] = load i64, i64* [[ST]], +// CHECK: [[NUM_TASKS:%.+]] = zext i32 %{{.+}} to i64 +// CHECK: call void @__kmpc_taskloop(%struct.ident_t* [[DEFLOC]], i32 [[GTID]], i8* [[TASKV]], i32 1, i64* [[DOWN]], i64* [[UP]], i64 [[ST_VAL]], i32 1, i32 2, i64 [[NUM_TASKS]], i8* null) +#pragma omp master taskloop shared(c) num_tasks(a) + for (a = 0; a < c; ++a) + ; + } +} s(1); + +// CHECK: define internal i32 [[TASK4]]( +// CHECK: [[DOWN:%.+]] = getelementptr inbounds [[TD_TY:%.+]], [[TD_TY]]* %{{.+}}, i32 0, i32 5 +// CHECK: [[DOWN_VAL:%.+]] = load i64, i64* [[DOWN]], +// CHECK: [[UP:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 6 +// CHECK: [[UP_VAL:%.+]] = load i64, i64* [[UP]], +// CHECK: [[ST:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 7 +// CHECK: [[ST_VAL:%.+]] = load i64, i64* [[ST]], +// CHECK: [[LITER:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 8 +// CHECK: [[LITER_VAL:%.+]] = load i32, i32* [[LITER]], +// CHECK: store i64 [[DOWN_VAL]], i64* [[LB:%[^,]+]], +// CHECK: store i64 [[UP_VAL]], i64* [[UB:%[^,]+]], +// CHECK: store i64 [[ST_VAL]], i64* [[ST:%[^,]+]], +// CHECK: store i32 [[LITER_VAL]], i32* [[LITER:%[^,]+]], +// CHECK: [[LB_VAL:%.+]] = load i64, i64* [[LB]], +// CHECK: [[LB_I32:%.+]] = trunc i64 [[LB_VAL]] to i32 +// CHECK: store i32 [[LB_I32]], i32* [[CNT:%.+]], +// CHECK: br label +// CHECK: [[VAL:%.+]] = load i32, i32* [[CNT]], +// CHECK: [[VAL_I64:%.+]] = sext i32 [[VAL]] to i64 +// CHECK: [[UB_VAL:%.+]] = load i64, i64* [[UB]], +// CHECK: [[CMP:%.+]] = icmp ule i64 [[VAL_I64]], [[UB_VAL]] +// CHECK: br i1 [[CMP]], label %{{.+}}, label %{{.+}} +// CHECK: load i32, i32* % +// CHECK: store i32 % +// CHECK: load i32, i32* % +// CHECK: add nsw i32 %{{.+}}, 1 +// CHECK: store i32 %{{.+}}, i32* % +// CHECK: br label % +// CHECK: ret i32 0 + +#endif diff --git a/clang/test/OpenMP/master_taskloop_collapse_messages.cpp b/clang/test/OpenMP/master_taskloop_collapse_messages.cpp new file mode 100644 index 0000000000000..83e5b85c4745d --- /dev/null +++ b/clang/test/OpenMP/master_taskloop_collapse_messages.cpp @@ -0,0 +1,110 @@ +// RUN: %clang_cc1 -verify -fopenmp %s -Wuninitialized +// RUN: %clang_cc1 -verify -fopenmp -std=c++98 %s -Wuninitialized +// RUN: %clang_cc1 -verify -fopenmp -std=c++11 %s -Wuninitialized + +// RUN: %clang_cc1 -verify -fopenmp-simd %s -Wuninitialized +// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 %s -Wuninitialized +// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s -Wuninitialized + +void foo() { +} + +#if __cplusplus >= 201103L +// expected-note@+2 4 {{declared here}} +#endif +bool foobool(int argc) { + return argc; +} + +struct S1; // expected-note {{declared here}} + +template // expected-note {{declared here}} +T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} + #pragma omp master taskloop collapse // expected-error {{expected '(' after 'collapse'}} + for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; + #pragma omp master taskloop collapse ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}} + for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; + #pragma omp master taskloop collapse () // expected-error {{expected expression}} + for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; + // expected-error@+3 {{expected ')'}} expected-note@+3 {{to match this '('}} + // expected-error@+2 2 {{expression is not an integral constant expression}} + // expected-note@+1 2 {{read of non-const variable 'argc' is not allowed in a constant expression}} + #pragma omp master taskloop collapse (argc + for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; + // expected-error@+1 2 {{argument to 'collapse' clause must be a strictly positive integer value}} + #pragma omp master taskloop collapse (ST // expected-error {{expected ')'}} expected-note {{to match this '('}} + for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; + #pragma omp master taskloop collapse (1)) // expected-warning {{extra tokens at the end of '#pragma omp master taskloop' are ignored}} + for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; + #pragma omp master taskloop collapse ((ST > 0) ? 1 + ST : 2) // expected-note 2 {{as specified in 'collapse' clause}} + for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; // expected-error 2 {{expected 2 for loops after '#pragma omp master taskloop', but found only 1}} + // expected-error@+6 2 {{directive '#pragma omp master taskloop' cannot contain more than one 'collapse' clause}} + // expected-error@+5 {{argument to 'collapse' clause must be a strictly positive integer value}} + // expected-error@+4 2 {{expression is not an integral constant expression}} +#if __cplusplus >= 201103L + // expected-note@+2 2 {{non-constexpr function 'foobool' cannot be used in a constant expression}} +#endif + #pragma omp master taskloop collapse (foobool(argc)), collapse (true), collapse (-5) + for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; + #pragma omp master taskloop collapse (S) // expected-error {{'S' does not refer to a value}} + for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; +#if __cplusplus <= 199711L + // expected-error@+4 2 {{expression is not an integral constant expression}} +#else + // expected-error@+2 2 {{integral constant expression must have integral or unscoped enumeration type, not 'char *'}} +#endif + #pragma omp master taskloop collapse (argv[1]=2) // expected-error {{expected ')'}} expected-note {{to match this '('}} + for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; + #pragma omp master taskloop collapse (1) + for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; + #pragma omp master taskloop collapse (N) // expected-error {{argument to 'collapse' clause must be a strictly positive integer value}} + for (T i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; + #pragma omp master taskloop collapse (2) // expected-note {{as specified in 'collapse' clause}} + foo(); // expected-error {{expected 2 for loops after '#pragma omp master taskloop'}} + return argc; +} + +int main(int argc, char **argv) { + #pragma omp master taskloop collapse // expected-error {{expected '(' after 'collapse'}} + for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; + #pragma omp master taskloop collapse ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}} + for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; + #pragma omp master taskloop collapse () // expected-error {{expected expression}} + for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; + #pragma omp master taskloop collapse (4 // expected-error {{expected ')'}} expected-note {{to match this '('}} expected-note {{as specified in 'collapse' clause}} + for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; // expected-error {{expected 4 for loops after '#pragma omp master taskloop', but found only 1}} + #pragma omp master taskloop collapse (2+2)) // expected-warning {{extra tokens at the end of '#pragma omp master taskloop' are ignored}} expected-note {{as specified in 'collapse' clause}} + for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; // expected-error {{expected 4 for loops after '#pragma omp master taskloop', but found only 1}} + // expected-error@+4 {{expression is not an integral constant expression}} +#if __cplusplus >= 201103L + // expected-note@+2 {{non-constexpr function 'foobool' cannot be used in a constant expression}} +#endif + #pragma omp master taskloop collapse (foobool(1) > 0 ? 1 : 2) + for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; + // expected-error@+6 {{expression is not an integral constant expression}} +#if __cplusplus >= 201103L + // expected-note@+4 {{non-constexpr function 'foobool' cannot be used in a constant expression}} +#endif + // expected-error@+2 2 {{directive '#pragma omp master taskloop' cannot contain more than one 'collapse' clause}} + // expected-error@+1 {{argument to 'collapse' clause must be a strictly positive integer value}} + #pragma omp master taskloop collapse (foobool(argc)), collapse (true), collapse (-5) + for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; + #pragma omp master taskloop collapse (S1) // expected-error {{'S1' does not refer to a value}} + for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; +#if __cplusplus <= 199711L + // expected-error@+4 {{expression is not an integral constant expression}} +#else + // expected-error@+2 {{integral constant expression must have integral or unscoped enumeration type, not 'char *'}} +#endif + #pragma omp master taskloop collapse (argv[1]=2) // expected-error {{expected ')'}} expected-note {{to match this '('}} + for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; + // expected-error@+3 {{statement after '#pragma omp master taskloop' must be a for loop}} + // expected-note@+1 {{in instantiation of function template specialization 'tmain' requested here}} + #pragma omp master taskloop collapse(collapse(tmain(argc, argv) // expected-error 2 {{expected ')'}} expected-note 2 {{to match this '('}} + foo(); + #pragma omp master taskloop collapse (2) // expected-note {{as specified in 'collapse' clause}} + foo(); // expected-error {{expected 2 for loops after '#pragma omp master taskloop'}} + // expected-note@+1 {{in instantiation of function template specialization 'tmain' requested here}} + return tmain(argc, argv); +} + diff --git a/clang/test/OpenMP/master_taskloop_final_messages.cpp b/clang/test/OpenMP/master_taskloop_final_messages.cpp new file mode 100644 index 0000000000000..caa507cf2918a --- /dev/null +++ b/clang/test/OpenMP/master_taskloop_final_messages.cpp @@ -0,0 +1,94 @@ +// RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 %s -Wuninitialized + +// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 %s -Wuninitialized + +void foo() { +} + +bool foobool(int argc) { + return argc; +} + +struct S1; // expected-note {{declared here}} + +template // expected-note {{declared here}} +int tmain(T argc, S **argv) { + T z; +#pragma omp master taskloop final // expected-error {{expected '(' after 'final'}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp master taskloop final( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp master taskloop final() // expected-error {{expected expression}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp master taskloop final(argc // expected-error {{expected ')'}} expected-note {{to match this '('}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp master taskloop final(argc)) // expected-warning {{extra tokens at the end of '#pragma omp master taskloop' are ignored}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp master taskloop final(argc > 0 ? argv[1] : argv[2] + z) + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp master taskloop final(foobool(argc)), final(true) // expected-error {{directive '#pragma omp master taskloop' cannot contain more than one 'final' clause}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp master taskloop final(S) // expected-error {{'S' does not refer to a value}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp master taskloop final(argv[1] = 2) // expected-error {{expected ')'}} expected-note {{to match this '('}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp master taskloop final(argc argc) // expected-error {{expected ')'}} expected-note {{to match this '('}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp master taskloop final(argc) + for (int i = 0; i < 10; ++i) + foo(); + + return 0; +} + +int main(int argc, char **argv) { + int z; +#pragma omp master taskloop final // expected-error {{expected '(' after 'final'}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp master taskloop final( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp master taskloop final() // expected-error {{expected expression}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp master taskloop final(argc // expected-error {{expected ')'}} expected-note {{to match this '('}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp master taskloop final(argc)) // expected-warning {{extra tokens at the end of '#pragma omp master taskloop' are ignored}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp master taskloop final(argc > 0 ? argv[1] : argv[2] - z) + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp master taskloop final(foobool(argc)), final(true) // expected-error {{directive '#pragma omp master taskloop' cannot contain more than one 'final' clause}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp master taskloop final(S1) // expected-error {{'S1' does not refer to a value}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp master taskloop final(argv[1] = 2) // expected-error {{expected ')'}} expected-note {{to match this '('}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp master taskloop final(argc argc) // expected-error {{expected ')'}} expected-note {{to match this '('}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp master taskloop final(1 0) // expected-error {{expected ')'}} expected-note {{to match this '('}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp master taskloop final(if (tmain(argc, argv) // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}} + for (int i = 0; i < 10; ++i) + foo(); + + return tmain(argc, argv); +} diff --git a/clang/test/OpenMP/master_taskloop_firstprivate_codegen.cpp b/clang/test/OpenMP/master_taskloop_firstprivate_codegen.cpp new file mode 100644 index 0000000000000..6d78ea586d378 --- /dev/null +++ b/clang/test/OpenMP/master_taskloop_firstprivate_codegen.cpp @@ -0,0 +1,538 @@ +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-apple-darwin10 -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -DLAMBDA -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck -check-prefix=LAMBDA %s +// RUN: %clang_cc1 -verify -fopenmp -x c++ -fblocks -DBLOCKS -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck -check-prefix=BLOCKS %s +// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -DARRAY -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck -check-prefix=ARRAY %s + +// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple x86_64-apple-darwin10 -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple x86_64-apple-darwin10 -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -DLAMBDA -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -fblocks -DBLOCKS -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -DARRAY -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// SIMD-ONLY0-NOT: {{__kmpc|__tgt}} +// expected-no-diagnostics + +#ifndef ARRAY +#ifndef HEADER +#define HEADER + +template +struct S { + T f; + S(T a) : f(a) {} + S() : f() {} + S(const S &s, T t = T()) : f(s.f + t) {} + operator T() { return T(); } + ~S() {} +}; + +volatile double g; + +// CHECK-DAG: [[KMP_TASK_T_TY:%.+]] = type { i8*, i32 (i32, i8*)*, i32, %union{{.+}}, %union{{.+}}, i64, i64, i64, i32, i8* } +// CHECK-DAG: [[S_DOUBLE_TY:%.+]] = type { double } +// CHECK-DAG: [[PRIVATES_MAIN_TY:%.+]] = type {{.?}}{ [2 x [[S_DOUBLE_TY]]], [[S_DOUBLE_TY]], i32, [2 x i32] +// CHECK-DAG: [[CAP_MAIN_TY:%.+]] = type {{.*}}{ [2 x i32]*, i32, {{.*}}[2 x [[S_DOUBLE_TY]]]*, [[S_DOUBLE_TY]]*, i{{[0-9]+}} +// CHECK-DAG: [[KMP_TASK_MAIN_TY:%.+]] = type { [[KMP_TASK_T_TY]], [[PRIVATES_MAIN_TY]] } +// CHECK-DAG: [[S_INT_TY:%.+]] = type { i32 } +// CHECK-DAG: [[CAP_TMAIN_TY:%.+]] = type { [2 x i32]*, i32*, [2 x [[S_INT_TY]]]*, [[S_INT_TY]]* } +// CHECK-DAG: [[PRIVATES_TMAIN_TY:%.+]] = type { i32, [2 x i32], [2 x [[S_INT_TY]]], [[S_INT_TY]], [104 x i8] } +// CHECK-DAG: [[KMP_TASK_TMAIN_TY:%.+]] = type { [[KMP_TASK_T_TY]], [{{[0-9]+}} x i8], [[PRIVATES_TMAIN_TY]] } +template +T tmain() { + S ttt; + S test(ttt); + T t_var __attribute__((aligned(128))) = T(); + T vec[] = {1, 2}; + S s_arr[] = {1, 2}; + S var(3); +#pragma omp master taskloop firstprivate(t_var, vec, s_arr, s_arr, var, var) + for (int i = 0; i < 10; ++i) { + vec[0] = t_var; + s_arr[0] = var; + } + return T(); +} + +int main() { + static int sivar; +#ifdef LAMBDA + // LAMBDA: [[G:@.+]] = global double + // LAMBDA: [[SIVAR:@.+]] = internal global i{{[0-9]+}} 0, + // LAMBDA-LABEL: @main + // LAMBDA: call{{( x86_thiscallcc)?}} void [[OUTER_LAMBDA:@.+]]( + [&]() { + // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]]( +// LAMBDA: [[RES:%.+]] = call {{.*}}i32 @__kmpc_master( +// LAMBDA-NEXT: [[IS_MASTER:%.+]] = icmp ne i32 [[RES]], 0 +// LAMBDA-NEXT: br i1 [[IS_MASTER]], label {{%?}}[[THEN:.+]], label {{%?}}[[EXIT:.+]] +// LAMBDA: [[THEN]] +// LAMBDA: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%{{[^ ]+}} @{{[^,]+}}, i32 %{{[^,]+}}, i32 1, i64 96, i64 16, i32 (i32, i8*)* bitcast (i32 (i32, %{{[^*]+}}*)* [[TASK_ENTRY:@[^ ]+]] to i32 (i32, i8*)*)) +// LAMBDA: [[PRIVATES:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i{{.+}} 0, i{{.+}} 1 +// LAMBDA: [[G_PRIVATE_ADDR:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[PRIVATES]], i{{.+}} 0, i{{.+}} 0 +// LAMBDA: [[G_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i{{.+}} 0, i{{.+}} 0 +// LAMBDA: [[G_VAL:%.+]] = load volatile double, double* [[G_ADDR_REF]] +// LAMBDA: store volatile double [[G_VAL]], double* [[G_PRIVATE_ADDR]] + +// LAMBDA: [[SIVAR_PRIVATE_ADDR:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[PRIVATES]], i{{.+}} 0, i{{.+}} 1 +// LAMBDA: [[SIVAR_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i{{.+}} 0, i{{.+}} 1 +// LAMBDA: [[SIVAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SIVAR_ADDR_REF]] +// LAMBDA: store i{{[0-9]+}} [[SIVAR_VAL]], i{{[0-9]+}}* [[SIVAR_PRIVATE_ADDR]] + +// LAMBDA: call void @__kmpc_taskloop(%{{.+}}* @{{.+}}, i32 %{{.+}}, i8* [[RES]], i32 1, i64* %{{.+}}, i64* %{{.+}}, i64 %{{.+}}, i32 1, i32 0, i64 0, i8* null) +// LAMBDA: call {{.*}}void @__kmpc_end_master( +// LAMBDA-NEXT: br label {{%?}}[[EXIT]] +// LAMBDA: [[EXIT]] +// LAMBDA: ret +#pragma omp master taskloop firstprivate(g, sivar) + for (int i = 0; i < 10; ++i) { + // LAMBDA: define {{.+}} void [[INNER_LAMBDA:@.+]](%{{.+}}* [[ARG_PTR:%.+]]) + // LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]], + // LAMBDA: [[ARG_PTR:%.+]] = load %{{.+}}*, %{{.+}}** [[ARG_PTR_REF]] + // LAMBDA: [[G_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 + // LAMBDA: [[G_REF:%.+]] = load double*, double** [[G_PTR_REF]] + // LAMBDA: store double 2.0{{.+}}, double* [[G_REF]] + + // LAMBDA: store double* %{{.+}}, double** %{{.+}}, + // LAMBDA: define internal i32 [[TASK_ENTRY]](i32 %0, %{{.+}}* noalias %1) + g = 1; + sivar = 11; + // LAMBDA: store double 1.0{{.+}}, double* %{{.+}}, + // LAMBDA: store i{{[0-9]+}} 11, i{{[0-9]+}}* %{{.+}}, + // LAMBDA: call void [[INNER_LAMBDA]](% + // LAMBDA: ret + [&]() { + g = 2; + sivar = 22; + }(); + } + }(); + return 0; +#elif defined(BLOCKS) + // BLOCKS: [[G:@.+]] = global double + // BLOCKS-LABEL: @main + // BLOCKS: call void {{%.+}}(i8 + ^{ + // BLOCKS: define{{.*}} internal{{.*}} void {{.+}}(i8* + // BLOCKS: [[RES:%.+]] = call {{.*}}i32 @__kmpc_master( + // BLOCKS-NEXT: [[IS_MASTER:%.+]] = icmp ne i32 [[RES]], 0 + // BLOCKS-NEXT: br i1 [[IS_MASTER]], label {{%?}}[[THEN:.+]], label {{%?}}[[EXIT:.+]] + // BLOCKS: [[THEN]] + // BLOCKS: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%{{[^ ]+}} @{{[^,]+}}, i32 %{{[^,]+}}, i32 1, i64 96, i64 16, i32 (i32, i8*)* bitcast (i32 (i32, %{{[^*]+}}*)* [[TASK_ENTRY:@[^ ]+]] to i32 (i32, i8*)*)) + // BLOCKS: [[PRIVATES:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i{{.+}} 0, i{{.+}} 1 + // BLOCKS: [[G_PRIVATE_ADDR:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[PRIVATES]], i{{.+}} 0, i{{.+}} 0 + // BLOCKS: [[G_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i{{.+}} 0, i{{.+}} 0 + // BLOCKS: [[G_VAL:%.+]] = load volatile double, double* [[G_ADDR_REF]] + // BLOCKS: store volatile double [[G_VAL]], double* [[G_PRIVATE_ADDR]] + + // BLOCKS: [[SIVAR_PRIVATE_ADDR:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[PRIVATES]], i{{.+}} 0, i{{.+}} 1 + // BLOCKS: [[SIVAR_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i{{.+}} 0, i{{.+}} 1 + // BLOCKS: [[SIVAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SIVAR_ADDR_REF]] + // BLOCKS: store i{{[0-9]+}} [[SIVAR_VAL]], i{{[0-9]+}}* [[SIVAR_PRIVATE_ADDR]] + // BLOCKS: call void @__kmpc_taskloop(%{{.+}}* @{{.+}}, i32 %{{.+}}, i8* [[RES]], i32 1, i64* %{{.+}}, i64* %{{.+}}, i64 %{{.+}}, i32 1, i32 0, i64 0, i8* null) + // BLOCKS: call {{.*}}void @__kmpc_end_master( + // BLOCKS-NEXT: br label {{%?}}[[EXIT]] + // BLOCKS: [[EXIT]] + // BLOCKS: ret +#pragma omp master taskloop firstprivate(g, sivar) + for (int i = 0; i < 10; ++i) { + // BLOCKS: define {{.+}} void {{@.+}}(i8* + // BLOCKS-NOT: [[G]]{{[[^:word:]]}} + // BLOCKS: store double 2.0{{.+}}, double* + // BLOCKS-NOT: [[G]]{{[[^:word:]]}} + // BLOCKS-NOT: [[ISVAR]]{{[[^:word:]]}} + // BLOCKS: store i{{[0-9]+}} 22, i{{[0-9]+}}* + // BLOCKS-NOT: [[SIVAR]]{{[[^:word:]]}} + // BLOCKS: ret + + // BLOCKS: store double* %{{.+}}, double** %{{.+}}, + // BLOCKS: store i{{[0-9]+}}* %{{.+}}, i{{[0-9]+}}** %{{.+}}, + // BLOCKS: define internal i32 [[TASK_ENTRY]](i32 %0, %{{.+}}* noalias %1) + g = 1; + sivar = 11; + // BLOCKS: store double 1.0{{.+}}, double* %{{.+}}, + // BLOCKS-NOT: [[G]]{{[[^:word:]]}} + // BLOCKS: store i{{[0-9]+}} 11, i{{[0-9]+}}* %{{.+}}, + // BLOCKS-NOT: [[SIVAR]]{{[[^:word:]]}} + // BLOCKS: call void {{%.+}}(i8 + ^{ + g = 2; + sivar = 22; + }(); + } + }(); + return 0; +#else + S ttt; + S test(ttt); + int t_var = 0; + int vec[] = {1, 2}; + S s_arr[] = {1, 2}; + S var(3); +#pragma omp master taskloop firstprivate(var, t_var, s_arr, vec, s_arr, var, sivar) + for (int i = 0; i < 10; ++i) { + vec[0] = t_var; + s_arr[0] = var; + sivar = 33; + } + return tmain(); +#endif +} + +// CHECK: [[SIVAR:.+]] = internal global i{{[0-9]+}} 0, +// CHECK: define i{{[0-9]+}} @main() +// CHECK: alloca [[S_DOUBLE_TY]], +// CHECK: [[TEST:%.+]] = alloca [[S_DOUBLE_TY]], +// CHECK: [[T_VAR_ADDR:%.+]] = alloca i32, +// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i32], +// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_DOUBLE_TY]]], +// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_DOUBLE_TY]], +// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num([[LOC:%.+]]) + +// CHECK: call {{.*}} [[S_DOUBLE_TY_COPY_CONSTR:@.+]]([[S_DOUBLE_TY]]* [[TEST]], + +// CHECK: [[RES:%.+]] = call {{.*}}i32 @__kmpc_master( +// CHECK-NEXT: [[IS_MASTER:%.+]] = icmp ne i32 [[RES]], 0 +// CHECK-NEXT: br i1 [[IS_MASTER]], label {{%?}}[[THEN:.+]], label {{%?}}[[EXIT:.+]] +// CHECK: [[THEN]] +// Store original variables in capture struct. +// CHECK: [[VEC_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 0 +// CHECK: store [2 x i32]* [[VEC_ADDR]], [2 x i32]** [[VEC_REF]], +// CHECK: [[T_VAR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 1 +// CHECK: [[T_VAR_VAL:%.+]] = load i32, i32* [[T_VAR_ADDR]], +// CHECK: store i32 [[T_VAR_VAL]], i32* [[T_VAR_REF]], +// CHECK: [[S_ARR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 3 +// CHECK: store [2 x [[S_DOUBLE_TY]]]* [[S_ARR_ADDR]], [2 x [[S_DOUBLE_TY]]]** [[S_ARR_REF]], +// CHECK: [[VAR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 4 +// CHECK: store [[S_DOUBLE_TY]]* [[VAR_ADDR]], [[S_DOUBLE_TY]]** [[VAR_REF]], +// CHECK: [[SIVAR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 5 +// CHECK: [[SIVAR_VAL:%.+]] = load i32, i32* [[SIVAR]], +// CHECK: store i{{[0-9]+}} [[SIVAR_VAL]], i{{[0-9]+}}* [[SIVAR_REF]], + +// Allocate task. +// Returns struct kmp_task_t { +// [[KMP_TASK_T]] task_data; +// [[KMP_TASK_MAIN_TY]] privates; +// }; +// CHECK: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc([[LOC]], i32 [[GTID]], i32 9, i64 120, i64 40, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_MAIN_TY]]*)* [[TASK_ENTRY:@[^ ]+]] to i32 (i32, i8*)*)) +// CHECK: [[RES_KMP_TASK:%.+]] = bitcast i8* [[RES]] to [[KMP_TASK_MAIN_TY]]* + +// Fill kmp_task_t->shareds by copying from original capture argument. +// CHECK: [[TASK:%.+]] = getelementptr inbounds [[KMP_TASK_MAIN_TY]], [[KMP_TASK_MAIN_TY]]* [[RES_KMP_TASK]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 +// CHECK: [[SHAREDS_REF_ADDR:%.+]] = getelementptr inbounds [[KMP_TASK_T_TY]], [[KMP_TASK_T_TY]]* [[TASK]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 +// CHECK: [[SHAREDS_REF:%.+]] = load i8*, i8** [[SHAREDS_REF_ADDR]], +// CHECK: [[CAPTURES_ADDR:%.+]] = bitcast [[CAP_MAIN_TY]]* %{{.+}} to i8* +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[SHAREDS_REF]], i8* align 8 [[CAPTURES_ADDR]], i64 40, i1 false) + +// Initialize kmp_task_t->privates with default values (no init for simple types, default constructors for classes). +// Also copy address of private copy to the corresponding shareds reference. +// CHECK: [[PRIVATES:%.+]] = getelementptr inbounds [[KMP_TASK_MAIN_TY]], [[KMP_TASK_MAIN_TY]]* [[RES_KMP_TASK]], i{{[0-9]+}} 0, i{{[0-9]+}} 1 +// CHECK: [[SHAREDS:%.+]] = bitcast i8* [[SHAREDS_REF]] to [[CAP_MAIN_TY]]* + +// Constructors for s_arr and var. +// s_arr; +// CHECK: [[PRIVATE_S_ARR_REF:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 +// CHECK: [[S_ARR_ADDR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* [[SHAREDS]], i{{.+}} 0, i{{.+}} 3 +// CHECK: load [2 x [[S_DOUBLE_TY]]]*, [2 x [[S_DOUBLE_TY]]]** [[S_ARR_ADDR_REF]], +// CHECK: call void [[S_DOUBLE_TY_COPY_CONSTR]]([[S_DOUBLE_TY]]* [[S_ARR_CUR:%[^,]+]], +// CHECK: getelementptr [[S_DOUBLE_TY]], [[S_DOUBLE_TY]]* [[S_ARR_CUR]], i{{.+}} 1 +// CHECK: getelementptr [[S_DOUBLE_TY]], [[S_DOUBLE_TY]]* %{{.+}}, i{{.+}} 1 +// CHECK: icmp eq +// CHECK: br i1 + +// var; +// CHECK: [[PRIVATE_VAR_REF:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 1 +// CHECK: [[VAR_ADDR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* [[SHAREDS]], i{{.+}} 0, i{{.+}} 4 +// CHECK: [[VAR_REF:%.+]] = load [[S_DOUBLE_TY]]*, [[S_DOUBLE_TY]]** [[VAR_ADDR_REF]], +// CHECK: call void [[S_DOUBLE_TY_COPY_CONSTR]]([[S_DOUBLE_TY]]* [[PRIVATE_VAR_REF]], [[S_DOUBLE_TY]]* {{.*}}[[VAR_REF]], + +// t_var; +// CHECK: [[PRIVATE_T_VAR_REF:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 2 +// CHECK: [[T_VAR_ADDR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* [[SHAREDS]], i{{.+}} 0, i{{.+}} 1 +// CHECK: [[T_VAR:%.+]] = load i{{.+}}, i{{.+}}* [[T_VAR_ADDR_REF]], +// CHECK: store i32 [[T_VAR]], i32* [[PRIVATE_T_VAR_REF]], + +// vec; +// CHECK: [[PRIVATE_VEC_REF:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 3 +// CHECK: [[VEC_ADDR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* [[SHAREDS]], i{{.+}} 0, i{{.+}} 0 +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64( + +// sivar; +// CHECK: [[PRIVATE_SIVAR_REF:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 4 +// CHECK: [[SIVAR_ADDR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* [[SHAREDS]], i{{.+}} 0, i{{.+}} 5 +// CHECK: [[SIVAR:%.+]] = load i{{.+}}, i{{.+}}* [[SIVAR_ADDR_REF]], +// CHECK: store i32 [[SIVAR]], i32* [[PRIVATE_SIVAR_REF]], + +// Provide pointer to destructor function, which will destroy private variables at the end of the task. +// CHECK: [[DESTRUCTORS_REF:%.+]] = getelementptr inbounds [[KMP_TASK_T_TY]], [[KMP_TASK_T_TY]]* [[TASK]], i{{.+}} 0, i{{.+}} 3 +// CHECK: [[DESTRUCTORS_PTR:%.+]] = bitcast %union{{.+}}* [[DESTRUCTORS_REF]] to i32 (i32, i8*)** +// CHECK: store i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_MAIN_TY]]*)* [[DESTRUCTORS:@.+]] to i32 (i32, i8*)*), i32 (i32, i8*)** [[DESTRUCTORS_PTR]], + +// Start task. +// CHECK: call void @__kmpc_taskloop([[LOC]], i32 [[GTID]], i8* [[RES]], i32 1, i64* %{{.+}}, i64* %{{.+}}, i64 %{{.+}}, i32 1, i32 0, i64 0, i8* bitcast (void ([[KMP_TASK_MAIN_TY]]*, [[KMP_TASK_MAIN_TY]]*, i32)* [[MAIN_DUP:@.+]] to i8*)) +// CHECK: call {{.*}}void @__kmpc_end_master( +// CHECK-NEXT: br label {{%?}}[[EXIT]] +// CHECK: [[EXIT]] + +// CHECK: = call i{{.+}} [[TMAIN_INT:@.+]]() + +// No destructors must be called for private copies of s_arr and var. +// CHECK-NOT: getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 2 +// CHECK-NOT: getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 3 +// CHECK: call void [[S_DOUBLE_TY_DESTR:@.+]]([[S_DOUBLE_TY]]* +// CHECK-NOT: getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 2 +// CHECK-NOT: getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 3 +// CHECK: ret +// + +// CHECK: define internal void [[PRIVATES_MAP_FN:@.+]]([[PRIVATES_MAIN_TY]]* noalias %0, [[S_DOUBLE_TY]]** noalias %1, i32** noalias %2, [2 x [[S_DOUBLE_TY]]]** noalias %3, [2 x i32]** noalias %4, i32** noalias %5) +// CHECK: [[PRIVATES:%.+]] = load [[PRIVATES_MAIN_TY]]*, [[PRIVATES_MAIN_TY]]** +// CHECK: [[PRIV_S_VAR:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i32 0, i32 0 +// CHECK: [[ARG3:%.+]] = load [2 x [[S_DOUBLE_TY]]]**, [2 x [[S_DOUBLE_TY]]]*** %{{.+}}, +// CHECK: store [2 x [[S_DOUBLE_TY]]]* [[PRIV_S_VAR]], [2 x [[S_DOUBLE_TY]]]** [[ARG3]], +// CHECK: [[PRIV_VAR:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i32 0, i32 1 +// CHECK: [[ARG1:%.+]] = load [[S_DOUBLE_TY]]**, [[S_DOUBLE_TY]]*** {{.+}}, +// CHECK: store [[S_DOUBLE_TY]]* [[PRIV_VAR]], [[S_DOUBLE_TY]]** [[ARG1]], +// CHECK: [[PRIV_T_VAR:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i32 0, i32 2 +// CHECK: [[ARG2:%.+]] = load i32**, i32*** %{{.+}}, +// CHECK: store i32* [[PRIV_T_VAR]], i32** [[ARG2]], +// CHECK: [[PRIV_VEC:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i32 0, i32 3 +// CHECK: [[ARG4:%.+]] = load [2 x i32]**, [2 x i32]*** %{{.+}}, +// CHECK: store [2 x i32]* [[PRIV_VEC]], [2 x i32]** [[ARG4]], +// CHECK: [[PRIV_SIVAR:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i32 0, i32 4 +// CHECK: [[ARG5:%.+]] = load i{{[0-9]+}}**, i{{[0-9]+}}*** %{{.+}}, +// CHECK: store i{{[0-9]+}}* [[PRIV_SIVAR]], i{{[0-9]+}}** [[ARG5]], +// CHECK: ret void + +// CHECK: define internal i32 [[TASK_ENTRY]](i32 %0, [[KMP_TASK_MAIN_TY]]* noalias %1) + +// CHECK: [[PRIV_VAR_ADDR:%.+]] = alloca [[S_DOUBLE_TY]]*, +// CHECK: [[PRIV_T_VAR_ADDR:%.+]] = alloca i32*, +// CHECK: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_DOUBLE_TY]]]*, +// CHECK: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*, +// CHECK: [[PRIV_SIVAR_ADDR:%.+]] = alloca i32*, +// CHECK: store void (i8*, ...)* bitcast (void ([[PRIVATES_MAIN_TY]]*, [[S_DOUBLE_TY]]**, i32**, [2 x [[S_DOUBLE_TY]]]**, [2 x i32]**, i32**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), void (i8*, ...)** [[MAP_FN_ADDR:%.+]], +// CHECK: [[MAP_FN:%.+]] = load void (i8*, ...)*, void (i8*, ...)** [[MAP_FN_ADDR]], + +// CHECK: call void (i8*, ...) [[MAP_FN]](i8* %{{.+}}, [[S_DOUBLE_TY]]** [[PRIV_VAR_ADDR]], i32** [[PRIV_T_VAR_ADDR]], [2 x [[S_DOUBLE_TY]]]** [[PRIV_S_ARR_ADDR]], [2 x i32]** [[PRIV_VEC_ADDR]], i32** [[PRIV_SIVAR_ADDR]]) + +// CHECK: [[PRIV_VAR:%.+]] = load [[S_DOUBLE_TY]]*, [[S_DOUBLE_TY]]** [[PRIV_VAR_ADDR]], +// CHECK: [[PRIV_T_VAR:%.+]] = load i32*, i32** [[PRIV_T_VAR_ADDR]], +// CHECK: [[PRIV_S_ARR:%.+]] = load [2 x [[S_DOUBLE_TY]]]*, [2 x [[S_DOUBLE_TY]]]** [[PRIV_S_ARR_ADDR]], +// CHECK: [[PRIV_VEC:%.+]] = load [2 x i32]*, [2 x i32]** [[PRIV_VEC_ADDR]], +// CHECK: [[PRIV_SIVAR:%.+]] = load i32*, i32** [[PRIV_SIVAR_ADDR]], + +// Privates actually are used. +// CHECK-DAG: [[PRIV_VAR]] +// CHECK-DAG: [[PRIV_T_VAR]] +// CHECK-DAG: [[PRIV_S_ARR]] +// CHECK-DAG: [[PRIV_VEC]] +// CHECK-DAG: [[PRIV_SIVAR]] + +// CHECK: ret + +// CHECK: define internal void [[MAIN_DUP]]([[KMP_TASK_MAIN_TY]]* %0, [[KMP_TASK_MAIN_TY]]* %1, i32 %2) +// CHECK: getelementptr inbounds [[KMP_TASK_MAIN_TY]], [[KMP_TASK_MAIN_TY]]* %{{.+}}, i32 0, i32 1 +// CHECK: getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* %{{.+}}, i32 0, i32 0 +// CHECK: getelementptr inbounds [2 x [[S_DOUBLE_TY]]], [2 x [[S_DOUBLE_TY]]]* %{{.+}}, i32 0, i32 0 +// CHECK: getelementptr [[S_DOUBLE_TY]], [[S_DOUBLE_TY]]* %{{.+}}, i64 2 +// CHECK: br i1 % + +// CHECK: phi [[S_DOUBLE_TY]]* +// CHECK: call {{.*}} [[S_DOUBLE_TY_COPY_CONSTR]]([[S_DOUBLE_TY]]* +// CHECK: getelementptr [[S_DOUBLE_TY]], [[S_DOUBLE_TY]]* %{{.+}}, i32 1 +// CHECK: icmp eq [[S_DOUBLE_TY]]* % +// CHECK: br i1 % + +// CHECK: getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* %{{.+}}, i32 0, i32 1 +// CHECK: call {{.*}} [[S_DOUBLE_TY_COPY_CONSTR]]([[S_DOUBLE_TY]]* +// CHECK: ret void + +// CHECK: define internal i32 [[DESTRUCTORS]](i32 %0, [[KMP_TASK_MAIN_TY]]* noalias %1) +// CHECK: [[PRIVATES:%.+]] = getelementptr inbounds [[KMP_TASK_MAIN_TY]], [[KMP_TASK_MAIN_TY]]* [[RES_KMP_TASK:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 1 +// CHECK: [[PRIVATE_S_ARR_REF:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 0 +// CHECK: [[PRIVATE_VAR_REF:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 1 +// CHECK: call void [[S_DOUBLE_TY_DESTR]]([[S_DOUBLE_TY]]* [[PRIVATE_VAR_REF]]) +// CHECK: getelementptr inbounds [2 x [[S_DOUBLE_TY]]], [2 x [[S_DOUBLE_TY]]]* [[PRIVATE_S_ARR_REF]], i{{.+}} 0, i{{.+}} 0 +// CHECK: getelementptr inbounds [[S_DOUBLE_TY]], [[S_DOUBLE_TY]]* %{{.+}}, i{{.+}} 2 +// CHECK: [[PRIVATE_S_ARR_ELEM_REF:%.+]] = getelementptr inbounds [[S_DOUBLE_TY]], [[S_DOUBLE_TY]]* %{{.+}}, i{{.+}} -1 +// CHECK: call void [[S_DOUBLE_TY_DESTR]]([[S_DOUBLE_TY]]* [[PRIVATE_S_ARR_ELEM_REF]]) +// CHECK: icmp eq +// CHECK: br i1 +// CHECK: ret i32 + +// CHECK: define {{.*}} i{{[0-9]+}} [[TMAIN_INT]]() +// CHECK: alloca [[S_INT_TY]], +// CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]], +// CHECK: [[T_VAR_ADDR:%.+]] = alloca i32, align 128 +// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i32], +// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]], +// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_INT_TY]], +// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num([[LOC:%.+]]) + +// CHECK: call {{.*}} [[S_INT_TY_COPY_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]], + +// Store original variables in capture struct. +// CHECK: [[VEC_REF:%.+]] = getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 0 +// CHECK: store [2 x i32]* [[VEC_ADDR]], [2 x i32]** [[VEC_REF]], +// CHECK: [[T_VAR_REF:%.+]] = getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 1 +// CHECK: store i32* [[T_VAR_ADDR]], i32** [[T_VAR_REF]], +// CHECK: [[S_ARR_REF:%.+]] = getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 2 +// CHECK: store [2 x [[S_INT_TY]]]* [[S_ARR_ADDR]], [2 x [[S_INT_TY]]]** [[S_ARR_REF]], +// CHECK: [[VAR_REF:%.+]] = getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 3 +// CHECK: store [[S_INT_TY]]* [[VAR_ADDR]], [[S_INT_TY]]** [[VAR_REF]], + +// Allocate task. +// Returns struct kmp_task_t { +// [[KMP_TASK_T_TY]] task_data; +// [[KMP_TASK_TMAIN_TY]] privates; +// }; +// CHECK: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc([[LOC]], i32 [[GTID]], i32 9, i64 256, i64 32, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_TMAIN_TY]]*)* [[TASK_ENTRY:@[^ ]+]] to i32 (i32, i8*)*)) +// CHECK: [[RES_KMP_TASK:%.+]] = bitcast i8* [[RES]] to [[KMP_TASK_TMAIN_TY]]* + +// Fill kmp_task_t->shareds by copying from original capture argument. +// CHECK: [[TASK:%.+]] = getelementptr inbounds [[KMP_TASK_TMAIN_TY]], [[KMP_TASK_TMAIN_TY]]* [[RES_KMP_TASK]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 +// CHECK: [[SHAREDS_REF_ADDR:%.+]] = getelementptr inbounds [[KMP_TASK_T_TY]], [[KMP_TASK_T_TY]]* [[TASK]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 +// CHECK: [[SHAREDS_REF:%.+]] = load i8*, i8** [[SHAREDS_REF_ADDR]], +// CHECK: [[CAPTURES_ADDR:%.+]] = bitcast [[CAP_TMAIN_TY]]* %{{.+}} to i8* +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[SHAREDS_REF]], i8* align 8 [[CAPTURES_ADDR]], i64 32, i1 false) + +// Initialize kmp_task_t->privates with default values (no init for simple types, default constructors for classes). +// CHECK: [[PRIVATES:%.+]] = getelementptr inbounds [[KMP_TASK_TMAIN_TY]], [[KMP_TASK_TMAIN_TY]]* [[RES_KMP_TASK]], i{{[0-9]+}} 0, i{{[0-9]+}} 2 +// CHECK: [[SHAREDS:%.+]] = bitcast i8* [[SHAREDS_REF]] to [[CAP_TMAIN_TY]]* + +// t_var; +// CHECK: [[PRIVATE_T_VAR_REF:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 0 +// CHECK: [[T_VAR_ADDR_REF:%.+]] = getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* [[SHAREDS]], i{{.+}} 0, i{{.+}} 1 +// CHECK: [[T_VAR_REF:%.+]] = load i{{.+}}*, i{{.+}}** [[T_VAR_ADDR_REF]], +// CHECK: [[T_VAR:%.+]] = load i{{.+}}, i{{.+}}* [[T_VAR_REF]], align 128 +// CHECK: store i32 [[T_VAR]], i32* [[PRIVATE_T_VAR_REF]], align 128 + +// vec; +// CHECK: [[PRIVATE_VEC_REF:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 1 +// CHECK: [[VEC_ADDR_REF:%.+]] = getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* [[SHAREDS]], i{{.+}} 0, i{{.+}} 0 +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64( + +// Constructors for s_arr and var. +// a_arr; +// CHECK: [[PRIVATE_S_ARR_REF:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i{{[0-9]+}} 0, i{{[0-9]+}} 2 +// CHECK: [[S_ARR_ADDR_REF:%.+]] = getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* [[SHAREDS]], i{{.+}} 0, i{{.+}} 2 +// CHECK: getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[PRIVATE_S_ARR_REF]], i{{.+}} 0, i{{.+}} 0 +// CHECK: getelementptr [[S_INT_TY]], [[S_INT_TY]]* %{{.+}}, i{{.+}} 2 +// CHECK: call void [[S_INT_TY_COPY_CONSTR]]([[S_INT_TY]]* [[S_ARR_CUR:%[^,]+]], +// CHECK: getelementptr [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_CUR]], i{{.+}} 1 +// CHECK: icmp eq +// CHECK: br i1 + +// var; +// CHECK: [[PRIVATE_VAR_REF:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 3 +// CHECK: [[VAR_ADDR_REF:%.+]] = getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* [[SHAREDS]], i{{.+}} 0, i{{.+}} 3 +// CHECK: call void [[S_INT_TY_COPY_CONSTR]]([[S_INT_TY]]* [[PRIVATE_VAR_REF]], + +// Provide pointer to destructor function, which will destroy private variables at the end of the task. +// CHECK: [[DESTRUCTORS_REF:%.+]] = getelementptr inbounds [[KMP_TASK_T_TY]], [[KMP_TASK_T_TY]]* [[TASK]], i{{.+}} 0, i{{.+}} 3 +// CHECK: [[DESTRUCTORS_PTR:%.+]] = bitcast %union{{.+}}* [[DESTRUCTORS_REF]] to i32 (i32, i8*)** +// CHECK: store i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_TMAIN_TY]]*)* [[DESTRUCTORS:@.+]] to i32 (i32, i8*)*), i32 (i32, i8*)** [[DESTRUCTORS_PTR]], + +// Start task. +// CHECK: call void @__kmpc_taskloop([[LOC]], i32 [[GTID]], i8* [[RES]], i32 1, i64* %{{.+}}, i64* %{{.+}}, i64 %{{.+}}, i32 1, i32 0, i64 0, i8* bitcast (void ([[KMP_TASK_TMAIN_TY]]*, [[KMP_TASK_TMAIN_TY]]*, i32)* [[TMAIN_DUP:@.+]] to i8*)) + +// No destructors must be called for private copies of s_arr and var. +// CHECK-NOT: getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 2 +// CHECK-NOT: getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 3 +// CHECK: call void [[S_INT_TY_DESTR:@.+]]([[S_INT_TY]]* +// CHECK-NOT: getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 2 +// CHECK-NOT: getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 3 +// CHECK: ret +// + +// CHECK: define internal void [[PRIVATES_MAP_FN:@.+]]([[PRIVATES_TMAIN_TY]]* noalias %0, i32** noalias %1, [2 x i32]** noalias %2, [2 x [[S_INT_TY]]]** noalias %3, [[S_INT_TY]]** noalias %4) +// CHECK: [[PRIVATES:%.+]] = load [[PRIVATES_TMAIN_TY]]*, [[PRIVATES_TMAIN_TY]]** +// CHECK: [[PRIV_T_VAR:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i32 0, i32 0 +// CHECK: [[ARG1:%.+]] = load i32**, i32*** %{{.+}}, +// CHECK: store i32* [[PRIV_T_VAR]], i32** [[ARG1]], +// CHECK: [[PRIV_VEC:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i32 0, i32 1 +// CHECK: [[ARG2:%.+]] = load [2 x i32]**, [2 x i32]*** %{{.+}}, +// CHECK: store [2 x i32]* [[PRIV_VEC]], [2 x i32]** [[ARG2]], +// CHECK: [[PRIV_S_VAR:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i32 0, i32 2 +// CHECK: [[ARG3:%.+]] = load [2 x [[S_INT_TY]]]**, [2 x [[S_INT_TY]]]*** %{{.+}}, +// CHECK: store [2 x [[S_INT_TY]]]* [[PRIV_S_VAR]], [2 x [[S_INT_TY]]]** [[ARG3]], +// CHECK: [[PRIV_VAR:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i32 0, i32 3 +// CHECK: [[ARG4:%.+]] = load [[S_INT_TY]]**, [[S_INT_TY]]*** {{.+}}, +// CHECK: store [[S_INT_TY]]* [[PRIV_VAR]], [[S_INT_TY]]** [[ARG4]], +// CHECK: ret void + +// CHECK: define internal i32 [[TASK_ENTRY]](i32 %0, [[KMP_TASK_TMAIN_TY]]* noalias %1) +// CHECK: alloca i32*, +// CHECK-DAG: [[PRIV_T_VAR_ADDR:%.+]] = alloca i32*, +// CHECK-DAG: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*, +// CHECK-DAG: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*, +// CHECK-DAG: [[PRIV_VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*, +// CHECK: store void (i8*, ...)* bitcast (void ([[PRIVATES_TMAIN_TY]]*, i32**, [2 x i32]**, [2 x [[S_INT_TY]]]**, [[S_INT_TY]]**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), void (i8*, ...)** [[MAP_FN_ADDR:%.+]], +// CHECK: [[MAP_FN:%.+]] = load void (i8*, ...)*, void (i8*, ...)** [[MAP_FN_ADDR]], +// CHECK: call void (i8*, ...) [[MAP_FN]](i8* %{{.+}}, i32** [[PRIV_T_VAR_ADDR]], [2 x i32]** [[PRIV_VEC_ADDR]], [2 x [[S_INT_TY]]]** [[PRIV_S_ARR_ADDR]], [[S_INT_TY]]** [[PRIV_VAR_ADDR]]) +// CHECK: [[PRIV_T_VAR:%.+]] = load i32*, i32** [[PRIV_T_VAR_ADDR]], +// CHECK: [[PRIV_VEC:%.+]] = load [2 x i32]*, [2 x i32]** [[PRIV_VEC_ADDR]], +// CHECK: [[PRIV_S_ARR:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** [[PRIV_S_ARR_ADDR]], +// CHECK: [[PRIV_VAR:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[PRIV_VAR_ADDR]], + +// Privates actually are used. +// CHECK-DAG: [[PRIV_VAR]] +// CHECK-DAG: [[PRIV_T_VAR]] +// CHECK-DAG: [[PRIV_S_ARR]] +// CHECK-DAG: [[PRIV_VEC]] + +// CHECK: ret + +// CHECK: define internal void [[TMAIN_DUP]]([[KMP_TASK_TMAIN_TY]]* %0, [[KMP_TASK_TMAIN_TY]]* %1, i32 %2) +// CHECK: getelementptr inbounds [[KMP_TASK_TMAIN_TY]], [[KMP_TASK_TMAIN_TY]]* %{{.+}}, i32 0, i32 2 +// CHECK: getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* %{{.+}}, i32 0, i32 2 +// CHECK: getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* %{{.+}}, i32 0, i32 0 +// CHECK: getelementptr [[S_INT_TY]], [[S_INT_TY]]* %{{.+}}, i64 2 +// CHECK: br i1 % + +// CHECK: phi [[S_INT_TY]]* +// CHECK: call {{.*}} [[S_INT_TY_COPY_CONSTR]]([[S_INT_TY]]* +// CHECK: getelementptr [[S_INT_TY]], [[S_INT_TY]]* %{{.+}}, i32 1 +// CHECK: icmp eq [[S_INT_TY]]* % +// CHECK: br i1 % + +// CHECK: getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* %{{.+}}, i32 0, i32 3 +// CHECK: call {{.*}} [[S_INT_TY_COPY_CONSTR]]([[S_INT_TY]]* +// CHECK: ret void + +// CHECK: define internal i32 [[DESTRUCTORS]](i32 %0, [[KMP_TASK_TMAIN_TY]]* noalias %1) +// CHECK: [[PRIVATES:%.+]] = getelementptr inbounds [[KMP_TASK_TMAIN_TY]], [[KMP_TASK_TMAIN_TY]]* [[RES_KMP_TASK:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 2 +// CHECK: [[PRIVATE_S_ARR_REF:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 2 +// CHECK: [[PRIVATE_VAR_REF:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 3 +// CHECK: call void [[S_INT_TY_DESTR]]([[S_INT_TY]]* [[PRIVATE_VAR_REF]]) +// CHECK: getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[PRIVATE_S_ARR_REF]], i{{.+}} 0, i{{.+}} 0 +// CHECK: getelementptr inbounds [[S_INT_TY]], [[S_INT_TY]]* %{{.+}}, i{{.+}} 2 +// CHECK: [[PRIVATE_S_ARR_ELEM_REF:%.+]] = getelementptr inbounds [[S_INT_TY]], [[S_INT_TY]]* %{{.+}}, i{{.+}} -1 +// CHECK: call void [[S_INT_TY_DESTR]]([[S_INT_TY]]* [[PRIVATE_S_ARR_ELEM_REF]]) +// CHECK: icmp eq +// CHECK: br i1 +// CHECK: ret i32 + +#endif +#else +// ARRAY-LABEL: array_func +struct St { + int a, b; + St() : a(0), b(0) {} + St(const St &) {} + ~St() {} +}; + +void array_func(int n, float a[n], St s[2]) { +// ARRAY: call i8* @__kmpc_omp_task_alloc( +// ARRAY: call void @__kmpc_taskloop( +// ARRAY: store float** %{{.+}}, float*** %{{.+}}, +// ARRAY: store %struct.St** %{{.+}}, %struct.St*** %{{.+}}, +#pragma omp master taskloop firstprivate(a, s) + for (int i = 0; i < 10; ++i) + ; +} +#endif + diff --git a/clang/test/OpenMP/master_taskloop_firstprivate_messages.cpp b/clang/test/OpenMP/master_taskloop_firstprivate_messages.cpp new file mode 100644 index 0000000000000..0a4e7a88716c1 --- /dev/null +++ b/clang/test/OpenMP/master_taskloop_firstprivate_messages.cpp @@ -0,0 +1,335 @@ +// RUN: %clang_cc1 -verify -fopenmp %s -Wuninitialized + +// RUN: %clang_cc1 -verify -fopenmp-simd %s -Wuninitialized + +typedef void **omp_allocator_handle_t; +extern const omp_allocator_handle_t omp_default_mem_alloc; +extern const omp_allocator_handle_t omp_large_cap_mem_alloc; +extern const omp_allocator_handle_t omp_const_mem_alloc; +extern const omp_allocator_handle_t omp_high_bw_mem_alloc; +extern const omp_allocator_handle_t omp_low_lat_mem_alloc; +extern const omp_allocator_handle_t omp_cgroup_mem_alloc; +extern const omp_allocator_handle_t omp_pteam_mem_alloc; +extern const omp_allocator_handle_t omp_thread_mem_alloc; + +void foo() { +} + +bool foobool(int argc) { + return argc; +} + +void xxx(int argc) { + int fp; // expected-note {{initialize the variable 'fp' to silence this warning}} +#pragma omp master taskloop firstprivate(fp) // expected-warning {{variable 'fp' is uninitialized when used here}} + for (int i = 0; i < 10; ++i) + ; +} + +struct S1; // expected-note 2 {{declared here}} expected-note 2 {{forward declaration of 'S1'}} +extern S1 a; +class S2 { + mutable int a; + +public: + S2() : a(0) {} + S2(const S2 &s2) : a(s2.a) {} + static float S2s; + static const float S2sc; +}; +const float S2::S2sc = 0; +const S2 b; +const S2 ba[5]; +class S3 { + int a; + S3 &operator=(const S3 &s3); + +public: + S3() : a(0) {} // expected-note 2 {{candidate constructor not viable: requires 0 arguments, but 1 was provided}} + S3(S3 &s3) : a(s3.a) {} // expected-note 2 {{candidate constructor not viable: 1st argument ('const S3') would lose const qualifier}} +}; +const S3 c; +const S3 ca[5]; +extern const int f; +class S4 { + int a; + S4(); + S4(const S4 &s4); // expected-note 2 {{implicitly declared private here}} + +public: + S4(int v) : a(v) {} +}; +class S5 { + int a; + S5(const S5 &s5) : a(s5.a) {} // expected-note 4 {{implicitly declared private here}} + +public: + S5() : a(0) {} + S5(int v) : a(v) {} +}; +class S6 { + int a; + S6() : a(0) {} + +public: + S6(const S6 &s6) : a(s6.a) {} + S6(int v) : a(v) {} +}; + +S3 h; +#pragma omp threadprivate(h) // expected-note 2 {{defined as threadprivate or thread local}} + +template +int foomain(int argc, char **argv) { + I e(4); + C g(5); + int i, z; + int &j = i; +#pragma omp parallel +#pragma omp master taskloop firstprivate // expected-error {{expected '(' after 'firstprivate'}} + for (int k = 0; k < argc; ++k) + ++k; +#pragma omp parallel +#pragma omp master taskloop firstprivate( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}} + for (int k = 0; k < argc; ++k) + ++k; +#pragma omp parallel +#pragma omp master taskloop firstprivate() // expected-error {{expected expression}} + for (int k = 0; k < argc; ++k) + ++k; +#pragma omp parallel +#pragma omp master taskloop firstprivate(argc // expected-error {{expected ')'}} expected-note {{to match this '('}} + for (int k = 0; k < argc; ++k) + ++k; +#pragma omp parallel +#pragma omp master taskloop firstprivate(argc, // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}} + for (int k = 0; k < argc; ++k) + ++k; +#pragma omp parallel +#pragma omp master taskloop firstprivate(argc > 0 ? argv[1] : argv[2]) // expected-error {{expected variable name}} + for (int k = 0; k < argc; ++k) + ++k; +#pragma omp parallel +#pragma omp master taskloop allocate(omp_thread_mem_alloc: argc) firstprivate(argc) // expected-warning {{allocator with the 'thread' trait access has unspecified behavior on 'master taskloop' directive}} + for (int k = 0; k < argc; ++k) + ++k; +#pragma omp parallel +#pragma omp master taskloop firstprivate(S1) // expected-error {{'S1' does not refer to a value}} + for (int k = 0; k < argc; ++k) + ++k; +#pragma omp parallel +#pragma omp master taskloop firstprivate(a, b) // expected-error {{firstprivate variable with incomplete type 'S1'}} + for (int k = 0; k < argc; ++k) + ++k; +#pragma omp parallel +#pragma omp master taskloop firstprivate(argv[1]) // expected-error {{expected variable name}} + for (int k = 0; k < argc; ++k) + ++k; +#pragma omp parallel +#pragma omp master taskloop firstprivate(z, e, g) // expected-error {{calling a private constructor of class 'S4'}} expected-error {{calling a private constructor of class 'S5'}} + for (int k = 0; k < argc; ++k) + ++k; +#pragma omp parallel +#pragma omp master taskloop firstprivate(h) // expected-error {{threadprivate or thread local variable cannot be firstprivate}} + for (int k = 0; k < argc; ++k) + ++k; +#pragma omp parallel + { + int v = 0; + int i; +#pragma omp master taskloop firstprivate(i) + for (int k = 0; k < argc; ++k) { + i = k; + v += i; + } + } +#pragma omp parallel shared(i) +#pragma omp parallel private(i) +#pragma omp master taskloop firstprivate(j) + for (int k = 0; k < argc; ++k) + ++k; +#pragma omp parallel +#pragma omp master taskloop firstprivate(i) + for (int k = 0; k < argc; ++k) + ++k; +#pragma omp parallel +#pragma omp master taskloop lastprivate(g) firstprivate(g) // expected-error {{calling a private constructor of class 'S5'}} + for (i = 0; i < argc; ++i) + foo(); +#pragma omp parallel private(i) +#pragma omp master taskloop firstprivate(i) // expected-note 2 {{defined as firstprivate}} + for (i = 0; i < argc; ++i) // expected-error 2 {{loop iteration variable in the associated loop of 'omp master taskloop' directive may not be firstprivate, predetermined as private}} + foo(); +#pragma omp parallel reduction(+ : i) // expected-note {{defined as reduction}} +#pragma omp master taskloop firstprivate(i) // expected-note {{defined as firstprivate}} expected-error {{argument of a reduction clause of a parallel construct must not appear in a firstprivate clause on a task construct}} + for (i = 0; i < argc; ++i) // expected-error {{loop iteration variable in the associated loop of 'omp master taskloop' directive may not be firstprivate, predetermined as private}} + foo(); + return 0; +} + +void bar(S4 a[2]) { +#pragma omp parallel +#pragma omp master taskloop firstprivate(a) + for (int i = 0; i < 2; ++i) + foo(); +} + +namespace A { +double x; +#pragma omp threadprivate(x) // expected-note {{defined as threadprivate or thread local}} +} +namespace B { +using A::x; +} + +int main(int argc, char **argv) { + const int d = 5; + const int da[5] = {0}; + S4 e(4); + S5 g(5); + S3 m; + S6 n(2); + int i; + int &j = i; +#pragma omp parallel +#pragma omp master taskloop firstprivate // expected-error {{expected '(' after 'firstprivate'}} + for (i = 0; i < argc; ++i) + foo(); +#pragma omp parallel +#pragma omp master taskloop firstprivate( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}} + for (i = 0; i < argc; ++i) + foo(); +#pragma omp parallel +#pragma omp master taskloop firstprivate() // expected-error {{expected expression}} + for (i = 0; i < argc; ++i) + foo(); +#pragma omp parallel +#pragma omp master taskloop firstprivate(argc // expected-error {{expected ')'}} expected-note {{to match this '('}} + for (i = 0; i < argc; ++i) + foo(); +#pragma omp parallel +#pragma omp master taskloop firstprivate(argc, // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}} + for (i = 0; i < argc; ++i) + foo(); +#pragma omp parallel +#pragma omp master taskloop firstprivate(argc > 0 ? argv[1] : argv[2]) // expected-error {{expected variable name}} + for (i = 0; i < argc; ++i) + foo(); +#pragma omp parallel +#pragma omp master taskloop firstprivate(argc) allocate , allocate(, allocate(omp_default , allocate(omp_default_mem_alloc, allocate(omp_default_mem_alloc:, allocate(omp_default_mem_alloc: argc, allocate(omp_default_mem_alloc: argv), allocate(argv) // expected-error {{expected '(' after 'allocate'}} expected-error 2 {{expected expression}} expected-error 2 {{expected ')'}} expected-error {{use of undeclared identifier 'omp_default'}} expected-note 2 {{to match this '('}} + for (i = 0; i < argc; ++i) + foo(); +#pragma omp parallel +#pragma omp master taskloop firstprivate(S1) // expected-error {{'S1' does not refer to a value}} + for (i = 0; i < argc; ++i) + foo(); +#pragma omp parallel +#pragma omp master taskloop firstprivate(a, b, c, d, f) // expected-error {{firstprivate variable with incomplete type 'S1'}} expected-error {{no matching constructor for initialization of 'S3'}} + for (i = 0; i < argc; ++i) + foo(); +#pragma omp parallel +#pragma omp master taskloop firstprivate(argv[1]) // expected-error {{expected variable name}} + for (i = 0; i < argc; ++i) + foo(); +#pragma omp parallel +#pragma omp master taskloop firstprivate(2 * 2) // expected-error {{expected variable name}} + for (i = 0; i < argc; ++i) + foo(); +#pragma omp parallel +#pragma omp master taskloop firstprivate(ba) // OK + for (i = 0; i < argc; ++i) + foo(); +#pragma omp parallel +#pragma omp master taskloop firstprivate(ca) // expected-error {{no matching constructor for initialization of 'S3'}} + for (i = 0; i < argc; ++i) + foo(); +#pragma omp parallel +#pragma omp master taskloop firstprivate(da) // OK + for (i = 0; i < argc; ++i) + foo(); + int xa; +#pragma omp parallel +#pragma omp master taskloop firstprivate(xa) // OK + for (i = 0; i < argc; ++i) + foo(); +#pragma omp parallel +#pragma omp master taskloop firstprivate(S2::S2s) // OK + for (i = 0; i < argc; ++i) + foo(); +#pragma omp parallel +#pragma omp master taskloop firstprivate(S2::S2sc) // OK + for (i = 0; i < argc; ++i) + foo(); +#pragma omp parallel +#pragma omp master taskloop safelen(5) // expected-error {{unexpected OpenMP clause 'safelen' in directive '#pragma omp master taskloop'}} + for (i = 0; i < argc; ++i) + foo(); +#pragma omp parallel +#pragma omp master taskloop firstprivate(e, g) // expected-error {{calling a private constructor of class 'S4'}} expected-error {{calling a private constructor of class 'S5'}} + for (i = 0; i < argc; ++i) + foo(); +#pragma omp parallel +#pragma omp master taskloop firstprivate(m) // OK + for (i = 0; i < argc; ++i) + foo(); +#pragma omp parallel +#pragma omp master taskloop firstprivate(h) // expected-error {{threadprivate or thread local variable cannot be firstprivate}} + for (i = 0; i < argc; ++i) + foo(); +#pragma omp parallel +#pragma omp master taskloop private(xa), firstprivate(xa) // expected-error {{private variable cannot be firstprivate}} expected-note {{defined as private}} + for (i = 0; i < argc; ++i) + foo(); +#pragma omp parallel +#pragma omp master taskloop firstprivate(i) // expected-note {{defined as firstprivate}} + for (i = 0; i < argc; ++i) // expected-error {{loop iteration variable in the associated loop of 'omp master taskloop' directive may not be firstprivate, predetermined as private}} + foo(); +#pragma omp parallel shared(xa) +#pragma omp master taskloop firstprivate(xa) // OK: may be firstprivate + for (i = 0; i < argc; ++i) + foo(); +#pragma omp parallel +#pragma omp master taskloop firstprivate(j) + for (i = 0; i < argc; ++i) + foo(); +#pragma omp parallel +#pragma omp master taskloop lastprivate(g) firstprivate(g) // expected-error {{calling a private constructor of class 'S5'}} + for (i = 0; i < argc; ++i) + foo(); +#pragma omp parallel +#pragma omp master taskloop lastprivate(n) firstprivate(n) // OK + for (i = 0; i < argc; ++i) + foo(); +#pragma omp parallel + { + int v = 0; + int i; +#pragma omp master taskloop firstprivate(i) + for (int k = 0; k < argc; ++k) { + i = k; + v += i; + } + } +#pragma omp parallel private(i) +#pragma omp master taskloop firstprivate(i) // expected-note {{defined as firstprivate}} + for (i = 0; i < argc; ++i) // expected-error {{loop iteration variable in the associated loop of 'omp master taskloop' directive may not be firstprivate, predetermined as private}} + foo(); +#pragma omp parallel reduction(+ : i) // expected-note {{defined as reduction}} +#pragma omp master taskloop firstprivate(i) //expected-error {{argument of a reduction clause of a parallel construct must not appear in a firstprivate clause on a task construct}} + for (i = 0; i < argc; ++i) + foo(); +#pragma omp master taskloop firstprivate(i) //expected-note {{defined as firstprivate}} + for (i = 0; i < argc; ++i) // expected-error {{loop iteration variable in the associated loop of 'omp master taskloop' directive may not be firstprivate, predetermined as private}} + foo(); +#pragma omp parallel +#pragma omp master taskloop firstprivate(B::x) // expected-error {{threadprivate or thread local variable cannot be firstprivate}} + for (i = 0; i < argc; ++i) + foo(); + static int si; +#pragma omp master taskloop firstprivate(si) // OK + for (i = 0; i < argc; ++i) + si = i + 1; + + return foomain(argc, argv); // expected-note {{in instantiation of function template specialization 'foomain' requested here}} +} + diff --git a/clang/test/OpenMP/master_taskloop_grainsize_messages.cpp b/clang/test/OpenMP/master_taskloop_grainsize_messages.cpp new file mode 100644 index 0000000000000..077dfb9976c53 --- /dev/null +++ b/clang/test/OpenMP/master_taskloop_grainsize_messages.cpp @@ -0,0 +1,103 @@ +// RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 %s -Wuninitialized + +// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 %s -Wuninitialized + +void foo() { +} + +bool foobool(int argc) { + return argc; +} + +struct S1; // expected-note {{declared here}} + +template // expected-note {{declared here}} +int tmain(T argc, S **argv) { + T z; + #pragma omp master taskloop grainsize // expected-error {{expected '(' after 'grainsize'}} + for (int i = 0; i < 10; ++i) + foo(); + #pragma omp master taskloop grainsize ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}} + for (int i = 0; i < 10; ++i) + foo(); + #pragma omp master taskloop grainsize () // expected-error {{expected expression}} + for (int i = 0; i < 10; ++i) + foo(); + #pragma omp master taskloop grainsize (argc // expected-error {{expected ')'}} expected-note {{to match this '('}} + for (int i = 0; i < 10; ++i) + foo(); + #pragma omp master taskloop grainsize (argc)) // expected-warning {{extra tokens at the end of '#pragma omp master taskloop' are ignored}} + for (int i = 0; i < 10; ++i) + foo(); + #pragma omp master taskloop grainsize (argc > 0 ? argv[1][0] : argv[2][argc] + z) + for (int i = 0; i < 10; ++i) + foo(); + #pragma omp master taskloop grainsize (foobool(argc)), grainsize (true) // expected-error {{directive '#pragma omp master taskloop' cannot contain more than one 'grainsize' clause}} + for (int i = 0; i < 10; ++i) + foo(); + #pragma omp master taskloop grainsize (S) // expected-error {{'S' does not refer to a value}} + for (int i = 0; i < 10; ++i) + foo(); + #pragma omp master taskloop grainsize (argc argc) // expected-error {{expected ')'}} expected-note {{to match this '('}} + for (int i = 0; i < 10; ++i) + foo(); + #pragma omp master taskloop grainsize(0) // expected-error {{argument to 'grainsize' clause must be a strictly positive integer value}} + for (int i = 0; i < 10; ++i) + foo(); + #pragma omp master taskloop grainsize(-1) // expected-error {{argument to 'grainsize' clause must be a strictly positive integer value}} + for (int i = 0; i < 10; ++i) + foo(); + #pragma omp master taskloop grainsize(argc) num_tasks(argc) // expected-error {{'num_tasks' and 'grainsize' clause are mutually exclusive and may not appear on the same directive}} expected-note {{'grainsize' clause is specified here}} + for (int i = 0; i < 10; ++i) + foo(); + + return 0; +} + +int main(int argc, char **argv) { + int z; + #pragma omp master taskloop grainsize // expected-error {{expected '(' after 'grainsize'}} + for (int i = 0; i < 10; ++i) + foo(); + #pragma omp master taskloop grainsize ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}} + for (int i = 0; i < 10; ++i) + foo(); + #pragma omp master taskloop grainsize () // expected-error {{expected expression}} + for (int i = 0; i < 10; ++i) + foo(); + #pragma omp master taskloop grainsize (argc // expected-error {{expected ')'}} expected-note {{to match this '('}} + for (int i = 0; i < 10; ++i) + foo(); + #pragma omp master taskloop grainsize (argc)) // expected-warning {{extra tokens at the end of '#pragma omp master taskloop' are ignored}} + for (int i = 0; i < 10; ++i) + foo(); + #pragma omp master taskloop grainsize (argc > 0 ? argv[1][0] : argv[2][argc] + z) + for (int i = 0; i < 10; ++i) + foo(); + #pragma omp master taskloop grainsize (foobool(argc)), grainsize (true) // expected-error {{directive '#pragma omp master taskloop' cannot contain more than one 'grainsize' clause}} + for (int i = 0; i < 10; ++i) + foo(); + #pragma omp master taskloop grainsize (S1) // expected-error {{'S1' does not refer to a value}} + for (int i = 0; i < 10; ++i) + foo(); + #pragma omp master taskloop grainsize (argc argc) // expected-error {{expected ')'}} expected-note {{to match this '('}} + for (int i = 0; i < 10; ++i) + foo(); + #pragma omp master taskloop grainsize (1 0) // expected-error {{expected ')'}} expected-note {{to match this '('}} + for (int i = 0; i < 10; ++i) + foo(); + #pragma omp master taskloop grainsize(if(tmain(argc, argv) // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}} + for (int i = 0; i < 10; ++i) + foo(); + #pragma omp master taskloop grainsize(0) // expected-error {{argument to 'grainsize' clause must be a strictly positive integer value}} + for (int i = 0; i < 10; ++i) + foo(); + #pragma omp master taskloop grainsize(-1) // expected-error {{argument to 'grainsize' clause must be a strictly positive integer value}} + for (int i = 0; i < 10; ++i) + foo(); + #pragma omp master taskloop grainsize(argc) num_tasks(argc) // expected-error {{'num_tasks' and 'grainsize' clause are mutually exclusive and may not appear on the same directive}} expected-note {{'grainsize' clause is specified here}} + for (int i = 0; i < 10; ++i) + foo(); + + return tmain(argc, argv); +} diff --git a/clang/test/OpenMP/master_taskloop_in_reduction_codegen.cpp b/clang/test/OpenMP/master_taskloop_in_reduction_codegen.cpp new file mode 100644 index 0000000000000..62c782dfe6931 --- /dev/null +++ b/clang/test/OpenMP/master_taskloop_in_reduction_codegen.cpp @@ -0,0 +1,94 @@ +// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -x c++ -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s + +// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp-simd -x c++ -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple x86_64-apple-darwin10 -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// SIMD-ONLY0-NOT: {{__kmpc|__tgt}} +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +// CHECK: [[PRIVATES:%.+]] = type { i8*, i8* } + +struct S { + int a; + S() : a(0) {} + S(const S&) {} + S& operator=(const S&) {return *this;} + ~S() {} + friend S operator+(const S&a, const S&b) {return a;} +}; + + +int main(int argc, char **argv) { + int a; + float b; + S c[5]; + short d[argc]; +#pragma omp taskgroup task_reduction(+: a, b, argc) + { +#pragma omp taskgroup task_reduction(-:c, d) +#pragma omp parallel +#pragma omp master taskloop in_reduction(+:a) in_reduction(-:d) + for (int i = 0; i < 5; ++i) + a += d[a]; + } + return 0; +} + +// CHECK-LABEL: @main +// CHECK: void @__kmpc_taskgroup(%struct.ident_t* @0, i32 [[GTID:%.+]]) +// CHECK: [[TD1:%.+]] = call i8* @__kmpc_task_reduction_init(i32 [[GTID]], i32 3, i8* % +// CHECK-NEXT: store i8* [[TD1]], i8** [[TD1_ADDR:%[^,]+]], +// CHECK-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @0, i32 [[GTID]]) +// CHECK: [[TD2:%.+]] = call i8* @__kmpc_task_reduction_init(i32 [[GTID]], i32 2, i8* % +// CHECK-NEXT: store i8* [[TD2]], i8** [[TD2_ADDR:%[^,]+]], +// CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @0, i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i16*, i8**, i8**)* [[OMP_PARALLEL:@.+]] to void (i32*, i32*, ...)*), i32* %{{.+}}, i64 %{{.+}}, i16* %{{.+}}, i8** [[TD1_ADDR]], i8** [[TD2_ADDR]]) +// CHECK-NEXT: call void @__kmpc_end_taskgroup(%struct.ident_t* @0, i32 [[GTID]]) +// CHECK-NEXT: call void @__kmpc_end_taskgroup(%struct.ident_t* @0, i32 [[GTID]]) + +// CHECK: define internal void [[OMP_PARALLEL]]( +// CHECK: [[RES:%.+]] = call {{.*}}i32 @__kmpc_master( +// CHECK-NEXT: [[IS_MASTER:%.+]] = icmp ne i32 [[RES]], 0 +// CHECK-NEXT: br i1 [[IS_MASTER]], label {{%?}}[[THEN:.+]], label {{%?}}[[EXIT:.+]] +// CHECK: [[THEN]] +// CHECK: [[TASK_T:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @0, i32 [[GTID:%.+]], i32 1, i64 96, i64 40, i32 (i32, i8*)* bitcast (i32 (i32, [[T:%.+]]*)* [[OMP_TASK:@.+]] to i32 (i32, i8*)*)) +// CHECK-NEXT: [[TASK_T_WITH_PRIVS:%.+]] = bitcast i8* [[TASK_T]] to [[T]]* +// CHECK: [[PRIVS:%.+]] = getelementptr inbounds [[T]], [[T]]* [[TASK_T_WITH_PRIVS]], i32 0, i32 1 +// CHECK: [[TD1_REF:%.+]] = getelementptr inbounds [[PRIVATES]], [[PRIVATES]]* [[PRIVS]], i32 0, i32 0 +// CHECK-NEXT: [[TD1_SHAR:%.+]] = getelementptr inbounds % +// CHECK-NEXT: [[TD1_ADDR:%.+]] = load i8**, i8*** [[TD1_SHAR]], +// CHECK-NEXT: [[TD1:%.+]] = load i8*, i8** [[TD1_ADDR]], +// CHECK-NEXT: store i8* [[TD1]], i8** [[TD1_REF]], +// CHECK-NEXT: [[TD2_REF:%.+]] = getelementptr inbounds [[PRIVATES]], [[PRIVATES]]* [[PRIVS]], i32 0, i32 1 +// CHECK-NEXT: [[TD2_SHAR:%.+]] = getelementptr inbounds % +// CHECK-NEXT: [[TD2_ADDR:%.+]] = load i8**, i8*** [[TD2_SHAR]], +// CHECK-NEXT: [[TD2:%.+]] = load i8*, i8** [[TD2_ADDR]], +// CHECK-NEXT: store i8* [[TD2]], i8** [[TD2_REF]], +// CHECK: call void @__kmpc_taskloop(%struct.ident_t* @0, i32 [[GTID]], i8* [[TASK_T]], i32 1, +// CHECK: call {{.*}}void @__kmpc_end_master( +// CHECK-NEXT: br label {{%?}}[[EXIT]] +// CHECK: [[EXIT]] +// CHECK: ret void +// CHECK-NEXT: } + +// CHECK: define internal {{.*}} [[OMP_TASK]]( +// CHECK: call void (i8*, ...) %{{[^(]+}}(i8* %{{.+}}, i8*** [[TD1_REF:%[^,]+]], i8*** [[TD2_REF:%[^,]+]]) +// CHECK-NEXT: [[TD1_ADDR:%.+]] = load i8**, i8*** [[TD1_REF]], +// CHECK-NEXT: [[TD2_ADDR:%.+]] = load i8**, i8*** [[TD2_REF]], +// CHECK-NEXT: [[A_REF:%.+]] = getelementptr inbounds % +// CHECK-NEXT: [[A_ADDR:%.+]] = load i32*, i32** [[A_REF]], +// CHECK-NEXT: [[TD1:%.+]] = load i8*, i8** [[TD1_ADDR]], +// CHECK-NEXT: [[GTID:%.+]] = load i32, i32* % +// CHECK-NEXT: [[A_PTR:%.+]] = bitcast i32* [[A_ADDR]] to i8* +// CHECK-NEXT: call i8* @__kmpc_task_reduction_get_th_data(i32 [[GTID]], i8* [[TD1]], i8* [[A_PTR]]) +// CHECK: [[D_REF:%.+]] = getelementptr inbounds % +// CHECK-NEXT: [[D_ADDR:%.+]] = load i16*, i16** [[D_REF]], +// CHECK: [[TD2:%.+]] = load i8*, i8** [[TD2_ADDR]], +// CHECK-NEXT: [[D_PTR:%.+]] = bitcast i16* [[D_ADDR]] to i8* +// CHECK-NEXT: call i8* @__kmpc_task_reduction_get_th_data(i32 [[GTID]], i8* [[TD2]], i8* [[D_PTR]]) +// CHECK: add nsw i32 +// CHECK: store i32 % +#endif diff --git a/clang/test/OpenMP/master_taskloop_in_reduction_messages.cpp b/clang/test/OpenMP/master_taskloop_in_reduction_messages.cpp new file mode 100644 index 0000000000000..b9fa587ca2ec6 --- /dev/null +++ b/clang/test/OpenMP/master_taskloop_in_reduction_messages.cpp @@ -0,0 +1,390 @@ +// RUN: %clang_cc1 -verify -fopenmp -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify -fopenmp -std=c++98 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify -fopenmp -std=c++11 -ferror-limit 150 -o - %s -Wuninitialized + +// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 -ferror-limit 150 -o - %s -Wuninitialized + +typedef void **omp_allocator_handle_t; +extern const omp_allocator_handle_t omp_default_mem_alloc; +extern const omp_allocator_handle_t omp_large_cap_mem_alloc; +extern const omp_allocator_handle_t omp_const_mem_alloc; +extern const omp_allocator_handle_t omp_high_bw_mem_alloc; +extern const omp_allocator_handle_t omp_low_lat_mem_alloc; +extern const omp_allocator_handle_t omp_cgroup_mem_alloc; +extern const omp_allocator_handle_t omp_pteam_mem_alloc; +extern const omp_allocator_handle_t omp_thread_mem_alloc; + +void foo() { +} + +bool foobool(int argc) { + return argc; +} + +void foobar(int &ref) { +#pragma omp taskgroup task_reduction(+:ref) +#pragma omp master taskloop in_reduction(+:ref) + for (int i = 0; i < 10; ++i) + foo(); +} + +void foobar1(int &ref) { +#pragma omp taskgroup task_reduction(+:ref) +#pragma omp master taskloop in_reduction(-:ref) + for (int i = 0; i < 10; ++i) + foo(); +} + +#pragma omp declare reduction (red:int:omp_out += omp_in) + +void foobar2(int &ref) { +#pragma omp taskgroup task_reduction(+:ref) // expected-note {{previously marked as task_reduction with different reduction operation}} +#pragma omp master taskloop in_reduction(red:ref) // expected-error{{in_reduction variable must have the same reduction operation as in a task_reduction clause}} + for (int i = 0; i < 10; ++i) + foo(); +} + +void foobar3(int &ref) { +#pragma omp taskgroup task_reduction(red:ref) // expected-note {{previously marked as task_reduction with different reduction operation}} +#pragma omp master taskloop in_reduction(min:ref) // expected-error{{in_reduction variable must have the same reduction operation as in a task_reduction clause}} + for (int i = 0; i < 10; ++i) + foo(); +} + +void foobar4(int &ref) { +#pragma omp master taskloop in_reduction(min:ref) // expected-error {{in_reduction variable must appear in a task_reduction clause}} + for (int i = 0; i < 10; ++i) + foo(); +} + +struct S1; // expected-note {{declared here}} expected-note 4 {{forward declaration of 'S1'}} +extern S1 a; +class S2 { + mutable int a; + S2 &operator+(const S2 &arg) { return (*this); } // expected-note 3 {{implicitly declared private here}} + +public: + S2() : a(0) {} + S2(S2 &s2) : a(s2.a) {} + static float S2s; // expected-note 2 {{static data member is predetermined as shared}} + static const float S2sc; // expected-note 2 {{'S2sc' declared here}} +}; +const float S2::S2sc = 0; +S2 b; // expected-note 3 {{'b' defined here}} +const S2 ba[5]; // expected-note 2 {{'ba' defined here}} +class S3 { + int a; + +public: + int b; + S3() : a(0) {} + S3(const S3 &s3) : a(s3.a) {} + S3 operator+(const S3 &arg1) { return arg1; } +}; +int operator+(const S3 &arg1, const S3 &arg2) { return 5; } +S3 c; // expected-note 3 {{'c' defined here}} +const S3 ca[5]; // expected-note 2 {{'ca' defined here}} +extern const int f; // expected-note 4 {{'f' declared here}} +class S4 { + int a; + S4(); // expected-note {{implicitly declared private here}} + S4(const S4 &s4); + S4 &operator+(const S4 &arg) { return (*this); } + +public: + S4(int v) : a(v) {} +}; +S4 &operator&=(S4 &arg1, S4 &arg2) { return arg1; } +class S5 { + int a; + S5() : a(0) {} // expected-note {{implicitly declared private here}} + S5(const S5 &s5) : a(s5.a) {} + S5 &operator+(const S5 &arg); + +public: + S5(int v) : a(v) {} +}; +class S6 { // expected-note 3 {{candidate function (the implicit copy assignment operator) not viable: no known conversion from 'int' to 'const S6' for 1st argument}} +#if __cplusplus >= 201103L // C++11 or later +// expected-note@-2 3 {{candidate function (the implicit move assignment operator) not viable}} +#endif + int a; + +public: + S6() : a(6) {} + operator int() { return 6; } +} o; + +S3 h, k; +#pragma omp threadprivate(h) // expected-note 2 {{defined as threadprivate or thread local}} + +template // expected-note {{declared here}} +T tmain(T argc) { + const T d = T(); // expected-note 4 {{'d' defined here}} + const T da[5] = {T()}; // expected-note 2 {{'da' defined here}} + T qa[5] = {T()}; + T i; + T &j = i; // expected-note 2 {{'j' defined here}} + S3 &p = k; // expected-note 2 {{'p' defined here}} + const T &r = da[(int)i]; // expected-note 2 {{'r' defined here}} + T &q = qa[(int)i]; + T fl; +#pragma omp taskgroup task_reduction(+:argc) +#pragma omp master taskloop in_reduction // expected-error {{expected '(' after 'in_reduction'}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp taskgroup task_reduction(+:argc) +#pragma omp master taskloop in_reduction + // expected-error {{expected '(' after 'in_reduction'}} expected-warning {{extra tokens at the end of '#pragma omp master taskloop' are ignored}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp taskgroup task_reduction(+:argc) +#pragma omp master taskloop in_reduction( // expected-error {{expected unqualified-id}} expected-warning {{missing ':' after reduction identifier - ignoring}} expected-error {{expected ')'}} expected-note {{to match this '('}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp taskgroup task_reduction(+:argc) +#pragma omp master taskloop in_reduction(- // expected-warning {{missing ':' after reduction identifier - ignoring}} expected-error {{expected ')'}} expected-note {{to match this '('}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp taskgroup task_reduction(+:argc) +#pragma omp master taskloop in_reduction() // expected-error {{expected unqualified-id}} expected-warning {{missing ':' after reduction identifier - ignoring}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp taskgroup task_reduction(+:argc) +#pragma omp master taskloop in_reduction(*) // expected-warning {{missing ':' after reduction identifier - ignoring}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp taskgroup task_reduction(+:argc) +#pragma omp master taskloop in_reduction(\) // expected-error {{expected unqualified-id}} expected-warning {{missing ':' after reduction identifier - ignoring}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp taskgroup task_reduction(&:argc) // expected-error {{invalid operands to binary expression ('float' and 'float')}} +#pragma omp master taskloop in_reduction(& : argc // expected-error {{expected ')'}} expected-note {{to match this '('}} expected-error {{invalid operands to binary expression ('float' and 'float')}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp taskgroup task_reduction(|:argc) // expected-error {{invalid operands to binary expression ('float' and 'float')}} +#pragma omp master taskloop in_reduction(| : argc, // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}} expected-error {{invalid operands to binary expression ('float' and 'float')}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp master taskloop in_reduction(|| : argc ? i : argc) // expected-error 2 {{expected variable name, array element or array section}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp master taskloop in_reduction(foo : argc) //expected-error {{incorrect reduction identifier, expected one of '+', '-', '*', '&', '|', '^', '&&', '||', 'min' or 'max' or declare reduction for type 'float'}} expected-error {{incorrect reduction identifier, expected one of '+', '-', '*', '&', '|', '^', '&&', '||', 'min' or 'max' or declare reduction for type 'int'}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp taskgroup task_reduction(&&:argc) +#pragma omp master taskloop in_reduction(&& : argc) allocate , allocate(, allocate(omp_default , allocate(omp_default_mem_alloc, allocate(omp_default_mem_alloc:, allocate(omp_default_mem_alloc: argc, allocate(omp_default_mem_alloc: argv), allocate(argv) // expected-error {{expected '(' after 'allocate'}} expected-error 2 {{expected expression}} expected-error 2 {{expected ')'}} expected-error {{use of undeclared identifier 'omp_default'}} expected-note 2 {{to match this '('}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp master taskloop in_reduction(^ : T) // expected-error {{'T' does not refer to a value}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp taskgroup task_reduction(+:c) +#pragma omp master taskloop in_reduction(+ : a, b, c, d, f) // expected-error {{a reduction list item with incomplete type 'S1'}} expected-error 3 {{const-qualified variable cannot be in_reduction}} expected-error 2 {{'operator+' is a private member of 'S2'}} expected-error 2 {{in_reduction variable must appear in a task_reduction clause}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp master taskloop in_reduction(min : a, b, c, d, f) // expected-error {{a reduction list item with incomplete type 'S1'}} expected-error 4 {{arguments of OpenMP clause 'in_reduction' for 'min' or 'max' must be of arithmetic type}} expected-error 3 {{const-qualified variable cannot be in_reduction}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp master taskloop in_reduction(max : h.b) // expected-error {{expected variable name, array element or array section}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp master taskloop in_reduction(+ : ba) // expected-error {{const-qualified variable cannot be in_reduction}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp master taskloop in_reduction(* : ca) // expected-error {{const-qualified variable cannot be in_reduction}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp master taskloop in_reduction(- : da) // expected-error {{const-qualified variable cannot be in_reduction}} expected-error {{const-qualified variable cannot be in_reduction}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp master taskloop in_reduction(^ : fl) // expected-error {{invalid operands to binary expression ('float' and 'float')}} expected-error {{in_reduction variable must appear in a task_reduction clause}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp master taskloop in_reduction(&& : S2::S2s) // expected-error {{shared variable cannot be reduction}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp master taskloop in_reduction(&& : S2::S2sc) // expected-error {{const-qualified variable cannot be in_reduction}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp taskgroup task_reduction(+:k) +#pragma omp master taskloop in_reduction(+ : h, k) // expected-error {{threadprivate or thread local variable cannot be reduction}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp master taskloop in_reduction(+ : o) // expected-error 2 {{no viable overloaded '='}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp parallel private(k) +#pragma omp master taskloop in_reduction(+ : p), in_reduction(+ : p) // expected-error 2 {{argument of OpenMP clause 'in_reduction' must reference the same object in all threads}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp taskgroup task_reduction(+:p) +#pragma omp master taskloop in_reduction(+ : p), in_reduction(+ : p) // expected-error 2 {{variable can appear only once in OpenMP 'in_reduction' clause}} expected-note 2 {{previously referenced here}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp master taskloop in_reduction(+ : r) // expected-error 2 {{const-qualified variable cannot be in_reduction}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp parallel shared(i) +#pragma omp parallel reduction(min : i) +#pragma omp master taskloop in_reduction(max : j) // expected-error 2 {{argument of OpenMP clause 'in_reduction' must reference the same object in all threads}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp taskgroup task_reduction(+:fl) +{ +#pragma omp master taskloop in_reduction(+ : fl) allocate(omp_thread_mem_alloc: fl) // expected-warning 2 {{allocator with the 'thread' trait access has unspecified behavior on 'master taskloop' directive}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp taskgroup task_reduction(*:fl) // expected-note 2 {{previously marked as task_reduction with different reduction operation}} +{ +#pragma omp master taskloop in_reduction(+ : fl) // expected-error 2 {{in_reduction variable must have the same reduction operation as in a task_reduction clause}} + for (int i = 0; i < 10; ++i) + foo(); +} +} +#pragma omp parallel +#pragma omp for reduction(- : fl) + for (int i = 0; i < 10; ++i) +#pragma omp taskgroup task_reduction(+:fl) +#pragma omp master taskloop in_reduction(+ : fl) + for (int j = 0; j < 10; ++j) + foo(); + + return T(); +} + +namespace A { +double x; +#pragma omp threadprivate(x) // expected-note {{defined as threadprivate or thread local}} +} +namespace B { +using A::x; +} + +int main(int argc, char **argv) { + const int d = 5; // expected-note 2 {{'d' defined here}} + const int da[5] = {0}; // expected-note {{'da' defined here}} + int qa[5] = {0}; + S4 e(4); + S5 g(5); + int i; + int &j = i; // expected-note {{'j' defined here}} + S3 &p = k; // expected-note 2 {{'p' defined here}} + const int &r = da[i]; // expected-note {{'r' defined here}} + int &q = qa[i]; + float fl; +#pragma omp master taskloop in_reduction // expected-error {{expected '(' after 'in_reduction'}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp master taskloop in_reduction + // expected-error {{expected '(' after 'in_reduction'}} expected-warning {{extra tokens at the end of '#pragma omp master taskloop' are ignored}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp master taskloop in_reduction( // expected-error {{expected unqualified-id}} expected-warning {{missing ':' after reduction identifier - ignoring}} expected-error {{expected ')'}} expected-note {{to match this '('}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp master taskloop in_reduction(- // expected-warning {{missing ':' after reduction identifier - ignoring}} expected-error {{expected ')'}} expected-note {{to match this '('}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp master taskloop in_reduction() // expected-error {{expected unqualified-id}} expected-warning {{missing ':' after reduction identifier - ignoring}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp master taskloop in_reduction(*) // expected-warning {{missing ':' after reduction identifier - ignoring}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp master taskloop in_reduction(\) // expected-error {{expected unqualified-id}} expected-warning {{missing ':' after reduction identifier - ignoring}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp master taskloop in_reduction(foo : argc // expected-error {{expected ')'}} expected-note {{to match this '('}} expected-error {{incorrect reduction identifier, expected one of '+', '-', '*', '&', '|', '^', '&&', '||', 'min' or 'max'}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp taskgroup task_reduction(|:argc) +#pragma omp master taskloop in_reduction(| : argc, // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp master taskloop in_reduction(|| : argc > 0 ? argv[1] : argv[2]) // expected-error {{expected variable name, array element or array section}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp master taskloop in_reduction(~ : argc) // expected-error {{expected unqualified-id}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp taskgroup task_reduction(&&:argc) +#pragma omp master taskloop in_reduction(&& : argc) + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp master taskloop in_reduction(^ : S1) // expected-error {{'S1' does not refer to a value}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp taskgroup task_reduction(+:c) +#pragma omp master taskloop in_reduction(+ : a, b, c, d, f) // expected-error {{a reduction list item with incomplete type 'S1'}} expected-error 2 {{const-qualified variable cannot be in_reduction}} expected-error {{'operator+' is a private member of 'S2'}} expected-error {{in_reduction variable must appear in a task_reduction clause}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp master taskloop in_reduction(min : a, b, c, d, f) // expected-error {{a reduction list item with incomplete type 'S1'}} expected-error 2 {{arguments of OpenMP clause 'in_reduction' for 'min' or 'max' must be of arithmetic type}} expected-error 2 {{const-qualified variable cannot be in_reduction}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp master taskloop in_reduction(max : h.b) // expected-error {{expected variable name, array element or array section}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp master taskloop in_reduction(+ : ba) // expected-error {{const-qualified variable cannot be in_reduction}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp master taskloop in_reduction(* : ca) // expected-error {{const-qualified variable cannot be in_reduction}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp master taskloop in_reduction(- : da) // expected-error {{const-qualified variable cannot be in_reduction}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp master taskloop in_reduction(^ : fl) // expected-error {{invalid operands to binary expression ('float' and 'float')}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp master taskloop in_reduction(&& : S2::S2s) // expected-error {{shared variable cannot be reduction}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp master taskloop in_reduction(&& : S2::S2sc) // expected-error {{const-qualified variable cannot be in_reduction}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp master taskloop in_reduction(& : e, g) // expected-error {{calling a private constructor of class 'S4'}} expected-error {{nvalid operands to binary expression ('S4' and 'S4')}} expected-error {{calling a private constructor of class 'S5'}} expected-error {{invalid operands to binary expression ('S5' and 'S5')}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp taskgroup task_reduction(+:k) +#pragma omp master taskloop in_reduction(+ : h, k, B::x) // expected-error 2 {{threadprivate or thread local variable cannot be reduction}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp master taskloop in_reduction(+ : o) // expected-error {{no viable overloaded '='}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp parallel private(k) +#pragma omp master taskloop in_reduction(+ : p), in_reduction(+ : p) // expected-error 2 {{argument of OpenMP clause 'in_reduction' must reference the same object in all threads}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp taskgroup task_reduction(+:p) +#pragma omp master taskloop in_reduction(+ : p), in_reduction(+ : p) // expected-error {{variable can appear only once in OpenMP 'in_reduction' clause}} expected-note {{previously referenced here}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp master taskloop in_reduction(+ : r) // expected-error {{const-qualified variable cannot be in_reduction}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp parallel shared(i) +#pragma omp parallel reduction(min : i) +#pragma omp master taskloop in_reduction(max : j) // expected-error {{argument of OpenMP clause 'in_reduction' must reference the same object in all threads}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp parallel +#pragma omp for private(fl) + for (int i = 0; i < 10; ++i) +#pragma omp taskgroup task_reduction(+:fl) +#pragma omp master taskloop in_reduction(+ : fl) + for (int j = 0; j < 10; ++j) + foo(); +#pragma omp taskgroup task_reduction(+:fl) +#pragma omp master taskloop in_reduction(+ : fl) + for (int i = 0; i < 10; ++i) + foo(); + static int m; +#pragma omp taskgroup task_reduction(+:m) +#pragma omp master taskloop in_reduction(+ : m) // OK + for (int i = 0; i < 10; ++i) + m++; + + return tmain(argc) + tmain(fl); // expected-note {{in instantiation of function template specialization 'tmain' requested here}} expected-note {{in instantiation of function template specialization 'tmain' requested here}} +} diff --git a/clang/test/OpenMP/master_taskloop_lastprivate_codegen.cpp b/clang/test/OpenMP/master_taskloop_lastprivate_codegen.cpp new file mode 100644 index 0000000000000..90a82e4330de6 --- /dev/null +++ b/clang/test/OpenMP/master_taskloop_lastprivate_codegen.cpp @@ -0,0 +1,545 @@ +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-apple-darwin10 -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -DLAMBDA -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck -check-prefix=LAMBDA %s +// RUN: %clang_cc1 -verify -fopenmp -x c++ -fblocks -DBLOCKS -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck -check-prefix=BLOCKS %s +// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -DARRAY -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck -check-prefix=ARRAY %s +// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -DLOOP -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck -check-prefix=LOOP %s + +// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple x86_64-apple-darwin10 -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple x86_64-apple-darwin10 -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -DLAMBDA -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -fblocks -DBLOCKS -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -DARRAY -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -DLOOP -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck -check-prefix=SIMD-ONLY0 %s +// SIMD-ONLY0-NOT: {{__kmpc|__tgt}} +// expected-no-diagnostics + +#if !defined(ARRAY) && !defined(LOOP) +#ifndef HEADER +#define HEADER + +template +struct S { + T f; + S(T a) : f(a) {} + S() : f() {} + S(const S &s, T t = T()) : f(s.f + t) {} + operator T() { return T(); } + ~S() {} +}; + +volatile double g; + +// CHECK-DAG: [[KMP_TASK_T_TY:%.+]] = type { i8*, i32 (i32, i8*)*, i32, %union{{.+}}, %union{{.+}}, i64, i64, i64, i32, i8* } +// CHECK-DAG: [[S_DOUBLE_TY:%.+]] = type { double } +// CHECK-DAG: [[PRIVATES_MAIN_TY:%.+]] = type {{.?}}{ [2 x [[S_DOUBLE_TY]]], [[S_DOUBLE_TY]], i32, [2 x i32] +// CHECK-DAG: [[CAP_MAIN_TY:%.+]] = type { [2 x i32]*, i32*, [2 x [[S_DOUBLE_TY]]]*, [[S_DOUBLE_TY]]*, i{{[0-9]+}}* } +// CHECK-DAG: [[KMP_TASK_MAIN_TY:%.+]] = type { [[KMP_TASK_T_TY]], [[PRIVATES_MAIN_TY]] } +// CHECK-DAG: [[S_INT_TY:%.+]] = type { i32 } +// CHECK-DAG: [[CAP_TMAIN_TY:%.+]] = type { [2 x i32]*, i32*, [2 x [[S_INT_TY]]]*, [[S_INT_TY]]* } +// CHECK-DAG: [[PRIVATES_TMAIN_TY:%.+]] = type { i32, [2 x i32], [2 x [[S_INT_TY]]], [[S_INT_TY]], [104 x i8] } +// CHECK-DAG: [[KMP_TASK_TMAIN_TY:%.+]] = type { [[KMP_TASK_T_TY]], [{{[0-9]+}} x i8], [[PRIVATES_TMAIN_TY]] } +template +T tmain() { + S ttt; + S test; + T t_var __attribute__((aligned(128))) = T(); + T vec[] = {1, 2}; + S s_arr[] = {1, 2}; + S var(3); +#pragma omp master taskloop lastprivate(t_var, vec, s_arr, s_arr, var, var) + for (int i = 0; i < 10; ++i) { + vec[0] = t_var; + s_arr[0] = var; + } + return T(); +} + +int main() { + static int sivar; +#ifdef LAMBDA + // LAMBDA: [[G:@.+]] = global double + // LAMBDA: [[SIVAR:@.+]] = internal global i{{[0-9]+}} 0, + // LAMBDA-LABEL: @main + // LAMBDA: call{{( x86_thiscallcc)?}} void [[OUTER_LAMBDA:@.+]]( + [&]() { + // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]]( + // LAMBDA: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%{{[^ ]+}} @{{[^,]+}}, i32 %{{[^,]+}}, i32 1, i64 96, i64 16, i32 (i32, i8*)* bitcast (i32 (i32, %{{[^*]+}}*)* [[TASK_ENTRY:@[^ ]+]] to i32 (i32, i8*)*)) +// LAMBDA: [[PRIVATES:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i{{.+}} 0, i{{.+}} 1 + +// LAMBDA: call void @__kmpc_taskloop(%{{.+}}* @{{.+}}, i32 %{{.+}}, i8* [[RES]], i32 1, i64* %{{.+}}, i64* %{{.+}}, i64 %{{.+}}, i32 1, i32 0, i64 0, i8* bitcast (void ([[KMP_TASK_MAIN_TY:%[^*]+]]*, [[KMP_TASK_MAIN_TY]]*, i32)* [[MAIN_DUP:@.+]] to i8*)) +// LAMBDA: ret +#pragma omp master taskloop lastprivate(g, sivar) + for (int i = 0; i < 10; ++i) { + // LAMBDA: define {{.+}} void [[INNER_LAMBDA:@.+]](%{{.+}}* [[ARG_PTR:%.+]]) + // LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]], + // LAMBDA: [[ARG_PTR:%.+]] = load %{{.+}}*, %{{.+}}** [[ARG_PTR_REF]] + // LAMBDA: [[G_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 + // LAMBDA: [[G_REF:%.+]] = load double*, double** [[G_PTR_REF]] + // LAMBDA: store double 2.0{{.+}}, double* [[G_REF]] + + // LAMBDA: store double* %{{.+}}, double** %{{.+}}, + // LAMBDA: define internal i32 [[TASK_ENTRY]](i32 %0, %{{.+}}* noalias %1) + g = 1; + sivar = 11; + // LAMBDA: store double 1.0{{.+}}, double* %{{.+}}, + // LAMBDA: store i{{[0-9]+}} 11, i{{[0-9]+}}* %{{.+}}, + // LAMBDA: call void [[INNER_LAMBDA]](% + // LAMBDA: icmp ne i32 %{{.+}}, 0 + // LAMBDA: br i1 + // LAMBDA: load double, double* % + // LAMBDA: store volatile double % + // LAMBDA: load i32, i32* % + // LAMBDA: store i32 % + // LAMBDA: ret + [&]() { + g = 2; + sivar = 22; + }(); + } + }(); + return 0; +#elif defined(BLOCKS) + // BLOCKS: [[G:@.+]] = global double + // BLOCKS-LABEL: @main + // BLOCKS: call void {{%.+}}(i8 + ^{ + // BLOCKS: define{{.*}} internal{{.*}} void {{.+}}(i8* + // BLOCKS: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%{{[^ ]+}} @{{[^,]+}}, i32 %{{[^,]+}}, i32 1, i64 96, i64 16, i32 (i32, i8*)* bitcast (i32 (i32, %{{[^*]+}}*)* [[TASK_ENTRY:@[^ ]+]] to i32 (i32, i8*)*)) + // BLOCKS: [[PRIVATES:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i{{.+}} 0, i{{.+}} 1 + // BLOCKS: call void @__kmpc_taskloop(%{{.+}}* @{{.+}}, i32 %{{.+}}, i8* [[RES]], i32 1, i64* %{{.+}}, i64* %{{.+}}, i64 %{{.+}}, i32 1, i32 0, i64 0, i8* bitcast (void ([[KMP_TASK_MAIN_TY:%[^*]+]]*, [[KMP_TASK_MAIN_TY]]*, i32)* [[MAIN_DUP:@.+]] to i8*)) + // BLOCKS: ret +#pragma omp master taskloop lastprivate(g, sivar) + for (int i = 0; i < 10; ++i) { + // BLOCKS: define {{.+}} void {{@.+}}(i8* + // BLOCKS-NOT: [[G]]{{[[^:word:]]}} + // BLOCKS: store double 2.0{{.+}}, double* + // BLOCKS-NOT: [[G]]{{[[^:word:]]}} + // BLOCKS-NOT: [[ISVAR]]{{[[^:word:]]}} + // BLOCKS: store i{{[0-9]+}} 22, i{{[0-9]+}}* + // BLOCKS-NOT: [[SIVAR]]{{[[^:word:]]}} + // BLOCKS: ret + + // BLOCKS: store double* %{{.+}}, double** %{{.+}}, + // BLOCKS: store i{{[0-9]+}}* %{{.+}}, i{{[0-9]+}}** %{{.+}}, + // BLOCKS: define internal i32 [[TASK_ENTRY]](i32 %0, %{{.+}}* noalias %1) + g = 1; + sivar = 11; + // BLOCKS: store double 1.0{{.+}}, double* %{{.+}}, + // BLOCKS-NOT: [[G]]{{[[^:word:]]}} + // BLOCKS: store i{{[0-9]+}} 11, i{{[0-9]+}}* %{{.+}}, + // BLOCKS-NOT: [[SIVAR]]{{[[^:word:]]}} + // BLOCKS: call void {{%.+}}(i8 + // BLOCKS: icmp ne i32 %{{.+}}, 0 + // BLOCKS: br i1 + // BLOCKS: load double, double* % + // BLOCKS: store volatile double % + // BLOCKS: load i32, i32* % + // BLOCKS: store i32 % + ^{ + g = 2; + sivar = 22; + }(); + } + }(); + return 0; +#else + S ttt; + S test; + int t_var = 0; + int vec[] = {1, 2}; + S s_arr[] = {1, 2}; + S var(3); +#pragma omp master taskloop lastprivate(var, t_var, s_arr, vec, s_arr, var, sivar) + for (int i = 0; i < 10; ++i) { + vec[0] = t_var; + s_arr[0] = var; + sivar = 33; + } + return tmain(); +#endif +} + +// CHECK: [[SIVAR:.+]] = internal global i{{[0-9]+}} 0, +// CHECK: define i{{[0-9]+}} @main() +// CHECK: alloca [[S_DOUBLE_TY]], +// CHECK: [[TEST:%.+]] = alloca [[S_DOUBLE_TY]], +// CHECK: [[T_VAR_ADDR:%.+]] = alloca i32, +// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i32], +// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_DOUBLE_TY]]], +// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_DOUBLE_TY]], +// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num([[LOC:%.+]]) + +// CHECK: call {{.*}} [[S_DOUBLE_TY_CONSTR:@.+]]([[S_DOUBLE_TY]]* [[TEST]]) + +// CHECK: [[RES:%.+]] = call {{.*}}i32 @__kmpc_master( +// CHECK-NEXT: [[IS_MASTER:%.+]] = icmp ne i32 [[RES]], 0 +// CHECK-NEXT: br i1 [[IS_MASTER]], label {{%?}}[[THEN:.+]], label {{%?}}[[EXIT:.+]] +// CHECK: [[THEN]] +// Store original variables in capture struct. +// CHECK: [[VEC_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 0 +// CHECK: store [2 x i32]* [[VEC_ADDR]], [2 x i32]** [[VEC_REF]], +// CHECK: [[T_VAR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 1 +// CHECK: store i32* [[T_VAR_ADDR]], i32** [[T_VAR_REF]], +// CHECK: [[S_ARR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 2 +// CHECK: store [2 x [[S_DOUBLE_TY]]]* [[S_ARR_ADDR]], [2 x [[S_DOUBLE_TY]]]** [[S_ARR_REF]], +// CHECK: [[VAR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 3 +// CHECK: store [[S_DOUBLE_TY]]* [[VAR_ADDR]], [[S_DOUBLE_TY]]** [[VAR_REF]], +// CHECK: [[SIVAR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 4 +// CHECK: store i{{[0-9]+}}* [[SIVAR]], i{{[0-9]+}}** [[SIVAR_REF]], + +// Allocate task. +// Returns struct kmp_task_t { +// [[KMP_TASK_T]] task_data; +// [[KMP_TASK_MAIN_TY]] privates; +// }; +// CHECK: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc([[LOC]], i32 [[GTID]], i32 9, i64 120, i64 40, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_MAIN_TY]]*)* [[TASK_ENTRY:@[^ ]+]] to i32 (i32, i8*)*)) +// CHECK: [[RES_KMP_TASK:%.+]] = bitcast i8* [[RES]] to [[KMP_TASK_MAIN_TY]]* + +// Fill kmp_task_t->shareds by copying from original capture argument. +// CHECK: [[TASK:%.+]] = getelementptr inbounds [[KMP_TASK_MAIN_TY]], [[KMP_TASK_MAIN_TY]]* [[RES_KMP_TASK]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 +// CHECK: [[SHAREDS_REF_ADDR:%.+]] = getelementptr inbounds [[KMP_TASK_T_TY]], [[KMP_TASK_T_TY]]* [[TASK]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 +// CHECK: [[SHAREDS_REF:%.+]] = load i8*, i8** [[SHAREDS_REF_ADDR]], +// CHECK: [[CAPTURES_ADDR:%.+]] = bitcast [[CAP_MAIN_TY]]* %{{.+}} to i8* +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[SHAREDS_REF]], i8* align 8 [[CAPTURES_ADDR]], i64 40, i1 false) + +// Initialize kmp_task_t->privates with default values (no init for simple types, default constructors for classes). +// Also copy address of private copy to the corresponding shareds reference. +// CHECK: [[PRIVATES:%.+]] = getelementptr inbounds [[KMP_TASK_MAIN_TY]], [[KMP_TASK_MAIN_TY]]* [[RES_KMP_TASK]], i{{[0-9]+}} 0, i{{[0-9]+}} 1 + +// Constructors for s_arr and var. +// s_arr; +// CHECK: [[PRIVATE_S_ARR_REF:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 +// CHECK: call {{.*}} [[S_DOUBLE_TY_CONSTR]]([[S_DOUBLE_TY]]* [[S_ARR_CUR:%[^,]+]]) +// CHECK: getelementptr inbounds [[S_DOUBLE_TY]], [[S_DOUBLE_TY]]* [[S_ARR_CUR]], i{{.+}} 1 +// CHECK: icmp eq +// CHECK: br i1 + +// var; +// CHECK: [[PRIVATE_VAR_REF:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 1 +// CHECK: call {{.*}} [[S_DOUBLE_TY_CONSTR]]([[S_DOUBLE_TY]]* [[PRIVATE_VAR_REF]]) + +// t_var; +// vec; +// sivar; + +// Provide pointer to destructor function, which will destroy private variables at the end of the task. +// CHECK: [[DESTRUCTORS_REF:%.+]] = getelementptr inbounds [[KMP_TASK_T_TY]], [[KMP_TASK_T_TY]]* [[TASK]], i{{.+}} 0, i{{.+}} 3 +// CHECK: [[DESTRUCTORS_PTR:%.+]] = bitcast %union{{.+}}* [[DESTRUCTORS_REF]] to i32 (i32, i8*)** +// CHECK: store i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_MAIN_TY]]*)* [[DESTRUCTORS:@.+]] to i32 (i32, i8*)*), i32 (i32, i8*)** [[DESTRUCTORS_PTR]], + +// Start task. +// CHECK: call void @__kmpc_taskloop([[LOC]], i32 [[GTID]], i8* [[RES]], i32 1, i64* %{{.+}}, i64* %{{.+}}, i64 %{{.+}}, i32 1, i32 0, i64 0, i8* bitcast (void ([[KMP_TASK_MAIN_TY]]*, [[KMP_TASK_MAIN_TY]]*, i32)* [[MAIN_DUP:@.+]] to i8*)) +// CHECK: call {{.*}}void @__kmpc_end_master( +// CHECK-NEXT: br label {{%?}}[[EXIT]] +// CHECK: [[EXIT]] + +// CHECK: = call i{{.+}} [[TMAIN_INT:@.+]]() + +// No destructors must be called for private copies of s_arr and var. +// CHECK-NOT: getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 2 +// CHECK-NOT: getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 3 +// CHECK: call void [[S_DOUBLE_TY_DESTR:@.+]]([[S_DOUBLE_TY]]* +// CHECK-NOT: getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 2 +// CHECK-NOT: getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 3 +// CHECK: ret +// + +// CHECK: define internal void [[PRIVATES_MAP_FN:@.+]]([[PRIVATES_MAIN_TY]]* noalias %0, [[S_DOUBLE_TY]]** noalias %1, i32** noalias %2, [2 x [[S_DOUBLE_TY]]]** noalias %3, [2 x i32]** noalias %4, i32** noalias %5) +// CHECK: [[PRIVATES:%.+]] = load [[PRIVATES_MAIN_TY]]*, [[PRIVATES_MAIN_TY]]** +// CHECK: [[PRIV_S_VAR:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i32 0, i32 0 +// CHECK: [[ARG3:%.+]] = load [2 x [[S_DOUBLE_TY]]]**, [2 x [[S_DOUBLE_TY]]]*** %{{.+}}, +// CHECK: store [2 x [[S_DOUBLE_TY]]]* [[PRIV_S_VAR]], [2 x [[S_DOUBLE_TY]]]** [[ARG3]], +// CHECK: [[PRIV_VAR:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i32 0, i32 1 +// CHECK: [[ARG1:%.+]] = load [[S_DOUBLE_TY]]**, [[S_DOUBLE_TY]]*** {{.+}}, +// CHECK: store [[S_DOUBLE_TY]]* [[PRIV_VAR]], [[S_DOUBLE_TY]]** [[ARG1]], +// CHECK: [[PRIV_T_VAR:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i32 0, i32 2 +// CHECK: [[ARG2:%.+]] = load i32**, i32*** %{{.+}}, +// CHECK: store i32* [[PRIV_T_VAR]], i32** [[ARG2]], +// CHECK: [[PRIV_VEC:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i32 0, i32 3 +// CHECK: [[ARG4:%.+]] = load [2 x i32]**, [2 x i32]*** %{{.+}}, +// CHECK: store [2 x i32]* [[PRIV_VEC]], [2 x i32]** [[ARG4]], +// CHECK: [[PRIV_SIVAR:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i32 0, i32 4 +// CHECK: [[ARG5:%.+]] = load i{{[0-9]+}}**, i{{[0-9]+}}*** %{{.+}}, +// CHECK: store i{{[0-9]+}}* [[PRIV_SIVAR]], i{{[0-9]+}}** [[ARG5]], +// CHECK: ret void + +// CHECK: define internal i32 [[TASK_ENTRY]](i32 %0, [[KMP_TASK_MAIN_TY]]* noalias %1) + +// CHECK: [[PRIV_VAR_ADDR:%.+]] = alloca [[S_DOUBLE_TY]]*, +// CHECK: [[PRIV_T_VAR_ADDR:%.+]] = alloca i32*, +// CHECK: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_DOUBLE_TY]]]*, +// CHECK: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*, +// CHECK: [[PRIV_SIVAR_ADDR:%.+]] = alloca i32*, +// CHECK: store void (i8*, ...)* bitcast (void ([[PRIVATES_MAIN_TY]]*, [[S_DOUBLE_TY]]**, i32**, [2 x [[S_DOUBLE_TY]]]**, [2 x i32]**, i32**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), void (i8*, ...)** [[MAP_FN_ADDR:%.+]], +// CHECK: [[MAP_FN:%.+]] = load void (i8*, ...)*, void (i8*, ...)** [[MAP_FN_ADDR]], + +// CHECK: call void (i8*, ...) [[MAP_FN]](i8* %{{.+}}, [[S_DOUBLE_TY]]** [[PRIV_VAR_ADDR]], i32** [[PRIV_T_VAR_ADDR]], [2 x [[S_DOUBLE_TY]]]** [[PRIV_S_ARR_ADDR]], [2 x i32]** [[PRIV_VEC_ADDR]], i32** [[PRIV_SIVAR_ADDR]]) + +// CHECK: [[PRIV_VAR:%.+]] = load [[S_DOUBLE_TY]]*, [[S_DOUBLE_TY]]** [[PRIV_VAR_ADDR]], +// CHECK: [[PRIV_T_VAR:%.+]] = load i32*, i32** [[PRIV_T_VAR_ADDR]], +// CHECK: [[PRIV_S_ARR:%.+]] = load [2 x [[S_DOUBLE_TY]]]*, [2 x [[S_DOUBLE_TY]]]** [[PRIV_S_ARR_ADDR]], +// CHECK: [[PRIV_VEC:%.+]] = load [2 x i32]*, [2 x i32]** [[PRIV_VEC_ADDR]], +// CHECK: [[PRIV_SIVAR:%.+]] = load i32*, i32** [[PRIV_SIVAR_ADDR]], + +// Privates actually are used. +// CHECK-DAG: [[PRIV_VAR]] +// CHECK-DAG: [[PRIV_T_VAR]] +// CHECK-DAG: [[PRIV_S_ARR]] +// CHECK-DAG: [[PRIV_VEC]] +// CHECK-DAG: [[PRIV_SIVAR]] + +// CHECK: icmp ne i32 %{{.+}}, 0 +// CHECK-NEXT: br i1 +// CHECK: bitcast [[S_DOUBLE_TY]]* %{{.+}} to i8* +// CHECK: bitcast [[S_DOUBLE_TY]]* %{{.+}} to i8* +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align {{[0-9]+}} % +// CHECK: load i32, i32* % +// CHECK: store i32 %{{.+}}, i32* % +// CHECK: getelementptr inbounds [2 x [[S_DOUBLE_TY]]], [2 x [[S_DOUBLE_TY]]]* % +// CHECK: phi [[S_DOUBLE_TY]]* +// CHECK: phi [[S_DOUBLE_TY]]* +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align {{[0-9]+}} % +// CHECK: icmp eq [[S_DOUBLE_TY]]* % +// CHECK-NEXT: br i1 +// CHECK: bitcast [2 x i32]* %{{.+}} to i8* +// CHECK: bitcast [2 x i32]* %{{.+}} to i8* +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align {{[0-9]+}} % +// CHECK: load i32, i32* % +// CHECK: store i32 %{{.+}}, i32* % +// CHECK: br label +// CHECK: ret + +// CHECK: define internal void [[MAIN_DUP]]([[KMP_TASK_MAIN_TY]]* %0, [[KMP_TASK_MAIN_TY]]* %1, i32 %2) +// CHECK: getelementptr inbounds [[KMP_TASK_MAIN_TY]], [[KMP_TASK_MAIN_TY]]* %{{.+}}, i32 0, i32 0 +// CHECK: getelementptr inbounds [[KMP_TASK_T_TY]], [[KMP_TASK_T_TY]]* %{{.+}}, i32 0, i32 8 +// CHECK: load i32, i32* % +// CHECK: store i32 %{{.+}}, i32* % +// CHECK: getelementptr inbounds [[KMP_TASK_MAIN_TY]], [[KMP_TASK_MAIN_TY]]* %{{.+}}, i32 0, i32 1 +// CHECK: getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* %{{.+}}, i32 0, i32 0 +// CHECK: getelementptr inbounds [2 x [[S_DOUBLE_TY]]], [2 x [[S_DOUBLE_TY]]]* %{{.+}}, i32 0, i32 0 +// CHECK: getelementptr inbounds [[S_DOUBLE_TY]], [[S_DOUBLE_TY]]* %{{.+}}, i64 2 +// CHECK: br label % + +// CHECK: phi [[S_DOUBLE_TY]]* +// CHECK: call {{.*}} [[S_DOUBLE_TY_CONSTR]]([[S_DOUBLE_TY]]* +// CHECK: getelementptr inbounds [[S_DOUBLE_TY]], [[S_DOUBLE_TY]]* %{{.+}}, i64 1 +// CHECK: icmp eq [[S_DOUBLE_TY]]* % +// CHECK: br i1 % + +// CHECK: getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* %{{.+}}, i32 0, i32 1 +// CHECK: call {{.*}} [[S_DOUBLE_TY_CONSTR]]([[S_DOUBLE_TY]]* +// CHECK: ret void + +// CHECK: define internal i32 [[DESTRUCTORS]](i32 %0, [[KMP_TASK_MAIN_TY]]* noalias %1) +// CHECK: [[PRIVATES:%.+]] = getelementptr inbounds [[KMP_TASK_MAIN_TY]], [[KMP_TASK_MAIN_TY]]* [[RES_KMP_TASK:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 1 +// CHECK: [[PRIVATE_S_ARR_REF:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 0 +// CHECK: [[PRIVATE_VAR_REF:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 1 +// CHECK: call {{.*}} [[S_DOUBLE_TY_DESTR]]([[S_DOUBLE_TY]]* [[PRIVATE_VAR_REF]]) +// CHECK: getelementptr inbounds [2 x [[S_DOUBLE_TY]]], [2 x [[S_DOUBLE_TY]]]* [[PRIVATE_S_ARR_REF]], i{{.+}} 0, i{{.+}} 0 +// CHECK: getelementptr inbounds [[S_DOUBLE_TY]], [[S_DOUBLE_TY]]* %{{.+}}, i{{.+}} 2 +// CHECK: [[PRIVATE_S_ARR_ELEM_REF:%.+]] = getelementptr inbounds [[S_DOUBLE_TY]], [[S_DOUBLE_TY]]* %{{.+}}, i{{.+}} -1 +// CHECK: call {{.*}} [[S_DOUBLE_TY_DESTR]]([[S_DOUBLE_TY]]* [[PRIVATE_S_ARR_ELEM_REF]]) +// CHECK: icmp eq +// CHECK: br i1 +// CHECK: ret i32 + +// CHECK: define {{.*}} i{{[0-9]+}} [[TMAIN_INT]]() +// CHECK: alloca [[S_INT_TY]], +// CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]], +// CHECK: [[T_VAR_ADDR:%.+]] = alloca i32, align 128 +// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i32], +// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]], +// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_INT_TY]], +// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num([[LOC:%.+]]) + +// CHECK: call {{.*}} [[S_INT_TY_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]]) + +// Store original variables in capture struct. +// CHECK: [[VEC_REF:%.+]] = getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 0 +// CHECK: store [2 x i32]* [[VEC_ADDR]], [2 x i32]** [[VEC_REF]], +// CHECK: [[T_VAR_REF:%.+]] = getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 1 +// CHECK: store i32* [[T_VAR_ADDR]], i32** [[T_VAR_REF]], +// CHECK: [[S_ARR_REF:%.+]] = getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 2 +// CHECK: store [2 x [[S_INT_TY]]]* [[S_ARR_ADDR]], [2 x [[S_INT_TY]]]** [[S_ARR_REF]], +// CHECK: [[VAR_REF:%.+]] = getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 3 +// CHECK: store [[S_INT_TY]]* [[VAR_ADDR]], [[S_INT_TY]]** [[VAR_REF]], + +// Allocate task. +// Returns struct kmp_task_t { +// [[KMP_TASK_T_TY]] task_data; +// [[KMP_TASK_TMAIN_TY]] privates; +// }; +// CHECK: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc([[LOC]], i32 [[GTID]], i32 9, i64 256, i64 32, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_TMAIN_TY]]*)* [[TASK_ENTRY:@[^ ]+]] to i32 (i32, i8*)*)) +// CHECK: [[RES_KMP_TASK:%.+]] = bitcast i8* [[RES]] to [[KMP_TASK_TMAIN_TY]]* + +// Fill kmp_task_t->shareds by copying from original capture argument. +// CHECK: [[TASK:%.+]] = getelementptr inbounds [[KMP_TASK_TMAIN_TY]], [[KMP_TASK_TMAIN_TY]]* [[RES_KMP_TASK]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 +// CHECK: [[SHAREDS_REF_ADDR:%.+]] = getelementptr inbounds [[KMP_TASK_T_TY]], [[KMP_TASK_T_TY]]* [[TASK]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 +// CHECK: [[SHAREDS_REF:%.+]] = load i8*, i8** [[SHAREDS_REF_ADDR]], +// CHECK: [[CAPTURES_ADDR:%.+]] = bitcast [[CAP_TMAIN_TY]]* %{{.+}} to i8* +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[SHAREDS_REF]], i8* align 8 [[CAPTURES_ADDR]], i64 32, i1 false) + +// Initialize kmp_task_t->privates with default values (no init for simple types, default constructors for classes). +// CHECK: [[PRIVATES:%.+]] = getelementptr inbounds [[KMP_TASK_TMAIN_TY]], [[KMP_TASK_TMAIN_TY]]* [[RES_KMP_TASK]], i{{[0-9]+}} 0, i{{[0-9]+}} 2 + +// t_var; +// vec; + +// Constructors for s_arr and var. +// a_arr; +// CHECK: [[PRIVATE_S_ARR_REF:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i{{[0-9]+}} 0, i{{[0-9]+}} 2 +// CHECK: getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[PRIVATE_S_ARR_REF]], i{{.+}} 0, i{{.+}} 0 +// CHECK: getelementptr inbounds [[S_INT_TY]], [[S_INT_TY]]* %{{.+}}, i{{.+}} 2 +// CHECK: call {{.*}} [[S_INT_TY_CONSTR]]([[S_INT_TY]]* [[S_ARR_CUR:%[^,]+]]) +// CHECK: getelementptr inbounds [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_CUR]], i{{.+}} 1 +// CHECK: icmp eq +// CHECK: br i1 + +// var; +// CHECK: [[PRIVATE_VAR_REF:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 3 +// CHECK: call {{.*}} [[S_INT_TY_CONSTR]]([[S_INT_TY]]* [[PRIVATE_VAR_REF]]) + +// Provide pointer to destructor function, which will destroy private variables at the end of the task. +// CHECK: [[DESTRUCTORS_REF:%.+]] = getelementptr inbounds [[KMP_TASK_T_TY]], [[KMP_TASK_T_TY]]* [[TASK]], i{{.+}} 0, i{{.+}} 3 +// CHECK: [[DESTRUCTORS_PTR:%.+]] = bitcast %union{{.+}}* [[DESTRUCTORS_REF]] to i32 (i32, i8*)** +// CHECK: store i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_TMAIN_TY]]*)* [[DESTRUCTORS:@.+]] to i32 (i32, i8*)*), i32 (i32, i8*)** [[DESTRUCTORS_PTR]], + +// Start task. +// CHECK: call void @__kmpc_taskloop([[LOC]], i32 [[GTID]], i8* [[RES]], i32 1, i64* %{{.+}}, i64* %{{.+}}, i64 %{{.+}}, i32 1, i32 0, i64 0, i8* bitcast (void ([[KMP_TASK_TMAIN_TY]]*, [[KMP_TASK_TMAIN_TY]]*, i32)* [[TMAIN_DUP:@.+]] to i8*)) + +// No destructors must be called for private copies of s_arr and var. +// CHECK-NOT: getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 2 +// CHECK-NOT: getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 3 +// CHECK: call void [[S_INT_TY_DESTR:@.+]]([[S_INT_TY]]* +// CHECK-NOT: getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 2 +// CHECK-NOT: getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 3 +// CHECK: ret +// + +// CHECK: define internal void [[PRIVATES_MAP_FN:@.+]]([[PRIVATES_TMAIN_TY]]* noalias %0, i32** noalias %1, [2 x i32]** noalias %2, [2 x [[S_INT_TY]]]** noalias %3, [[S_INT_TY]]** noalias %4) +// CHECK: [[PRIVATES:%.+]] = load [[PRIVATES_TMAIN_TY]]*, [[PRIVATES_TMAIN_TY]]** +// CHECK: [[PRIV_T_VAR:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i32 0, i32 0 +// CHECK: [[ARG1:%.+]] = load i32**, i32*** %{{.+}}, +// CHECK: store i32* [[PRIV_T_VAR]], i32** [[ARG1]], +// CHECK: [[PRIV_VEC:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i32 0, i32 1 +// CHECK: [[ARG2:%.+]] = load [2 x i32]**, [2 x i32]*** %{{.+}}, +// CHECK: store [2 x i32]* [[PRIV_VEC]], [2 x i32]** [[ARG2]], +// CHECK: [[PRIV_S_VAR:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i32 0, i32 2 +// CHECK: [[ARG3:%.+]] = load [2 x [[S_INT_TY]]]**, [2 x [[S_INT_TY]]]*** %{{.+}}, +// CHECK: store [2 x [[S_INT_TY]]]* [[PRIV_S_VAR]], [2 x [[S_INT_TY]]]** [[ARG3]], +// CHECK: [[PRIV_VAR:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i32 0, i32 3 +// CHECK: [[ARG4:%.+]] = load [[S_INT_TY]]**, [[S_INT_TY]]*** {{.+}}, +// CHECK: store [[S_INT_TY]]* [[PRIV_VAR]], [[S_INT_TY]]** [[ARG4]], +// CHECK: ret void + +// CHECK: define internal i32 [[TASK_ENTRY]](i32 %0, [[KMP_TASK_TMAIN_TY]]* noalias %1) +// CHECK: alloca i32*, +// CHECK-DAG: [[PRIV_T_VAR_ADDR:%.+]] = alloca i32*, +// CHECK-DAG: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*, +// CHECK-DAG: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*, +// CHECK-DAG: [[PRIV_VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*, +// CHECK: store void (i8*, ...)* bitcast (void ([[PRIVATES_TMAIN_TY]]*, i32**, [2 x i32]**, [2 x [[S_INT_TY]]]**, [[S_INT_TY]]**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), void (i8*, ...)** [[MAP_FN_ADDR:%.+]], +// CHECK: [[MAP_FN:%.+]] = load void (i8*, ...)*, void (i8*, ...)** [[MAP_FN_ADDR]], +// CHECK: call void (i8*, ...) [[MAP_FN]](i8* %{{.+}}, i32** [[PRIV_T_VAR_ADDR]], [2 x i32]** [[PRIV_VEC_ADDR]], [2 x [[S_INT_TY]]]** [[PRIV_S_ARR_ADDR]], [[S_INT_TY]]** [[PRIV_VAR_ADDR]]) +// CHECK: [[PRIV_T_VAR:%.+]] = load i32*, i32** [[PRIV_T_VAR_ADDR]], +// CHECK: [[PRIV_VEC:%.+]] = load [2 x i32]*, [2 x i32]** [[PRIV_VEC_ADDR]], +// CHECK: [[PRIV_S_ARR:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** [[PRIV_S_ARR_ADDR]], +// CHECK: [[PRIV_VAR:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[PRIV_VAR_ADDR]], + +// Privates actually are used. +// CHECK-DAG: [[PRIV_VAR]] +// CHECK-DAG: [[PRIV_T_VAR]] +// CHECK-DAG: [[PRIV_S_ARR]] +// CHECK-DAG: [[PRIV_VEC]] + +// CHECK: icmp ne i32 %{{.+}}, 0 +// CHECK-NEXT: br i1 +// CHECK: load i32, i32* % +// CHECK: store i32 %{{.+}}, i32* % +// CHECK: bitcast [2 x i32]* %{{.+}} to i8* +// CHECK: bitcast [2 x i32]* %{{.+}} to i8* +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align {{[0-9]+}} % +// CHECK: getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* % +// CHECK: phi [[S_INT_TY]]* +// CHECK: phi [[S_INT_TY]]* +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align {{[0-9]+}} % +// CHECK: icmp eq [[S_INT_TY]]* % +// CHECK-NEXT: br i1 +// CHECK: bitcast [[S_INT_TY]]* %{{.+}} to i8* +// CHECK: bitcast [[S_INT_TY]]* %{{.+}} to i8* +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align {{[0-9]+}} % +// CHECK: br label +// CHECK: ret + +// CHECK: define internal void [[TMAIN_DUP]]([[KMP_TASK_TMAIN_TY]]* %0, [[KMP_TASK_TMAIN_TY]]* %1, i32 %2) +// CHECK: getelementptr inbounds [[KMP_TASK_TMAIN_TY]], [[KMP_TASK_TMAIN_TY]]* %{{.+}}, i32 0, i32 0 +// CHECK: getelementptr inbounds [[KMP_TASK_T_TY]], [[KMP_TASK_T_TY]]* %{{.+}}, i32 0, i32 8 +// CHECK: load i32, i32* % +// CHECK: store i32 %{{.+}}, i32* % +// CHECK: getelementptr inbounds [[KMP_TASK_TMAIN_TY]], [[KMP_TASK_TMAIN_TY]]* %{{.+}}, i32 0, i32 2 +// CHECK: getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* %{{.+}}, i32 0, i32 2 +// CHECK: getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* %{{.+}}, i32 0, i32 0 +// CHECK: getelementptr inbounds [[S_INT_TY]], [[S_INT_TY]]* %{{.+}}, i64 2 +// CHECK: br label % + +// CHECK: phi [[S_INT_TY]]* +// CHECK: call {{.*}} [[S_INT_TY_CONSTR]]([[S_INT_TY]]* +// CHECK: getelementptr inbounds [[S_INT_TY]], [[S_INT_TY]]* %{{.+}}, i64 1 +// CHECK: icmp eq [[S_INT_TY]]* % +// CHECK: br i1 % + +// CHECK: getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* %{{.+}}, i32 0, i32 3 +// CHECK: call {{.*}} [[S_INT_TY_CONSTR]]([[S_INT_TY]]* +// CHECK: ret void + +// CHECK: define internal i32 [[DESTRUCTORS]](i32 %0, [[KMP_TASK_TMAIN_TY]]* noalias %1) +// CHECK: [[PRIVATES:%.+]] = getelementptr inbounds [[KMP_TASK_TMAIN_TY]], [[KMP_TASK_TMAIN_TY]]* [[RES_KMP_TASK:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 2 +// CHECK: [[PRIVATE_S_ARR_REF:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 2 +// CHECK: [[PRIVATE_VAR_REF:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 3 +// CHECK: call void [[S_INT_TY_DESTR]]([[S_INT_TY]]* [[PRIVATE_VAR_REF]]) +// CHECK: getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[PRIVATE_S_ARR_REF]], i{{.+}} 0, i{{.+}} 0 +// CHECK: getelementptr inbounds [[S_INT_TY]], [[S_INT_TY]]* %{{.+}}, i{{.+}} 2 +// CHECK: [[PRIVATE_S_ARR_ELEM_REF:%.+]] = getelementptr inbounds [[S_INT_TY]], [[S_INT_TY]]* %{{.+}}, i{{.+}} -1 +// CHECK: call void [[S_INT_TY_DESTR]]([[S_INT_TY]]* [[PRIVATE_S_ARR_ELEM_REF]]) +// CHECK: icmp eq +// CHECK: br i1 +// CHECK: ret i32 + +#endif +#elif defined(ARRAY) +// ARRAY-LABEL: array_func +struct St { + int a, b; + St() : a(0), b(0) {} + St(const St &) {} + ~St() {} +}; + +void array_func(int n, float a[n], St s[2]) { +// ARRAY: call i8* @__kmpc_omp_task_alloc( +// ARRAY: call void @__kmpc_taskloop( +// ARRAY: store float** %{{.+}}, float*** %{{.+}}, +// ARRAY: store %struct.St** %{{.+}}, %struct.St*** %{{.+}}, +// ARRAY: icmp ne i32 %{{.+}}, 0 +// ARRAY: store float* %{{.+}}, float** %{{.+}}, +// ARRAY: store %struct.St* %{{.+}}, %struct.St** %{{.+}}, +#pragma omp master taskloop lastprivate(a, s) + for (int i = 0; i < 10; ++i) + ; +} +#else + +// LOOP-LABEL: loop +void loop() { +// LOOP: call i8* @__kmpc_omp_task_alloc( +// LOOP: call void @__kmpc_taskloop( + int i; +#pragma omp master taskloop lastprivate(i) + for (i = 0; i < 10; ++i) + ; +} +#endif + diff --git a/clang/test/OpenMP/master_taskloop_lastprivate_messages.cpp b/clang/test/OpenMP/master_taskloop_lastprivate_messages.cpp new file mode 100644 index 0000000000000..c8af6b21534f5 --- /dev/null +++ b/clang/test/OpenMP/master_taskloop_lastprivate_messages.cpp @@ -0,0 +1,299 @@ +// RUN: %clang_cc1 -verify -fopenmp %s -Wuninitialized + +// RUN: %clang_cc1 -verify -fopenmp-simd %s -Wuninitialized + +typedef void **omp_allocator_handle_t; +extern const omp_allocator_handle_t omp_default_mem_alloc; +extern const omp_allocator_handle_t omp_large_cap_mem_alloc; +extern const omp_allocator_handle_t omp_const_mem_alloc; +extern const omp_allocator_handle_t omp_high_bw_mem_alloc; +extern const omp_allocator_handle_t omp_low_lat_mem_alloc; +extern const omp_allocator_handle_t omp_cgroup_mem_alloc; +extern const omp_allocator_handle_t omp_pteam_mem_alloc; +extern const omp_allocator_handle_t omp_thread_mem_alloc; + +void foo() { +} + +bool foobool(int argc) { + return argc; +} + +struct S1; // expected-note 2 {{declared here}} expected-note 2 {{forward declaration of 'S1'}} +extern S1 a; +class S2 { + mutable int a; + +public: + S2() : a(0) {} + S2(S2 &s2) : a(s2.a) {} + const S2 &operator =(const S2&) const; + S2 &operator =(const S2&); + static float S2s; // expected-note {{static data member is predetermined as shared}} + static const float S2sc; // expected-note {{'S2sc' declared here}} +}; +const float S2::S2sc = 0; +const S2 b; +const S2 ba[5]; +class S3 { + int a; + S3 &operator=(const S3 &s3); // expected-note 2 {{implicitly declared private here}} + +public: + S3() : a(0) {} + S3(S3 &s3) : a(s3.a) {} +}; +const S3 c; // expected-note {{'c' defined here}} +const S3 ca[5]; // expected-note {{'ca' defined here}} +extern const int f; // expected-note {{'f' declared here}} +class S4 { + int a; + S4(); // expected-note 3 {{implicitly declared private here}} + S4(const S4 &s4); + +public: + S4(int v) : a(v) {} +}; +class S5 { + int a; + S5() : a(0) {} // expected-note {{implicitly declared private here}} + +public: + S5(const S5 &s5) : a(s5.a) {} + S5(int v) : a(v) {} +}; +class S6 { + int a; + S6() : a(0) {} + +public: + S6(const S6 &s6) : a(s6.a) {} + S6(int v) : a(v) {} +}; + +S3 h; +#pragma omp threadprivate(h) // expected-note 2 {{defined as threadprivate or thread local}} + +template +int foomain(int argc, char **argv) { + I e(4); + I g(5); + int i, z; + int &j = i; +#pragma omp parallel +#pragma omp master taskloop lastprivate // expected-error {{expected '(' after 'lastprivate'}} + for (int k = 0; k < argc; ++k) + ++k; +#pragma omp parallel +#pragma omp master taskloop lastprivate( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}} + for (int k = 0; k < argc; ++k) + ++k; +#pragma omp parallel +#pragma omp master taskloop lastprivate() // expected-error {{expected expression}} + for (int k = 0; k < argc; ++k) + ++k; +#pragma omp parallel +#pragma omp master taskloop lastprivate(argc // expected-error {{expected ')'}} expected-note {{to match this '('}} + for (int k = 0; k < argc; ++k) + ++k; +#pragma omp parallel +#pragma omp master taskloop lastprivate(argc, // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}} + for (int k = 0; k < argc; ++k) + ++k; +#pragma omp parallel +#pragma omp master taskloop lastprivate(argc > 0 ? argv[1] : argv[2]) // expected-error {{expected variable name}} + for (int k = 0; k < argc; ++k) + ++k; +#pragma omp parallel +#pragma omp master taskloop lastprivate(argc) allocate , allocate(, allocate(omp_default , allocate(omp_default_mem_alloc, allocate(omp_default_mem_alloc:, allocate(omp_default_mem_alloc: argc, allocate(omp_default_mem_alloc: argv), allocate(argv) // expected-error {{expected '(' after 'allocate'}} expected-error 2 {{expected expression}} expected-error 2 {{expected ')'}} expected-error {{use of undeclared identifier 'omp_default'}} expected-note 2 {{to match this '('}} + for (int k = 0; k < argc; ++k) + ++k; +#pragma omp parallel +#pragma omp master taskloop lastprivate(S1) // expected-error {{'S1' does not refer to a value}} + for (int k = 0; k < argc; ++k) + ++k; +#pragma omp parallel +#pragma omp master taskloop lastprivate(a, b) // expected-error {{lastprivate variable with incomplete type 'S1'}} + for (int k = 0; k < argc; ++k) + ++k; +#pragma omp parallel +#pragma omp master taskloop lastprivate(argv[1]) // expected-error {{expected variable name}} + for (int k = 0; k < argc; ++k) + ++k; +#pragma omp parallel +#pragma omp master taskloop lastprivate(z, e, g) // expected-error 2 {{calling a private constructor of class 'S4'}} + for (int k = 0; k < argc; ++k) + ++k; +#pragma omp parallel +#pragma omp master taskloop lastprivate(h) // expected-error {{threadprivate or thread local variable cannot be lastprivate}} + for (int k = 0; k < argc; ++k) + ++k; +#pragma omp parallel + { + int v = 0; + int i; +#pragma omp master taskloop allocate(omp_thread_mem_alloc: i) lastprivate(i) // expected-warning {{allocator with the 'thread' trait access has unspecified behavior on 'master taskloop' directive}} + for (int k = 0; k < argc; ++k) { + i = k; + v += i; + } + } +#pragma omp parallel shared(i) +#pragma omp parallel private(i) +#pragma omp master taskloop lastprivate(j) + for (int k = 0; k < argc; ++k) + ++k; +#pragma omp parallel +#pragma omp master taskloop lastprivate(i) + for (int k = 0; k < argc; ++k) + ++k; + return 0; +} + +void bar(S4 a[2]) { +#pragma omp parallel +#pragma omp master taskloop lastprivate(a) + for (int i = 0; i < 2; ++i) + foo(); +} + +namespace A { +double x; +#pragma omp threadprivate(x) // expected-note {{defined as threadprivate or thread local}} +} +namespace B { +using A::x; +} + +int main(int argc, char **argv) { + const int d = 5; // expected-note {{'d' defined here}} + const int da[5] = {0}; // expected-note {{'da' defined here}} + S4 e(4); + S5 g(5); + S3 m; + S6 n(2); + int i, z; + int &j = i; +#pragma omp parallel +#pragma omp master taskloop lastprivate // expected-error {{expected '(' after 'lastprivate'}} + for (i = 0; i < argc; ++i) + foo(); +#pragma omp parallel +#pragma omp master taskloop lastprivate( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}} + for (i = 0; i < argc; ++i) + foo(); +#pragma omp parallel +#pragma omp master taskloop lastprivate() // expected-error {{expected expression}} + for (i = 0; i < argc; ++i) + foo(); +#pragma omp parallel +#pragma omp master taskloop lastprivate(argc // expected-error {{expected ')'}} expected-note {{to match this '('}} + for (i = 0; i < argc; ++i) + foo(); +#pragma omp parallel +#pragma omp master taskloop lastprivate(argc, // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}} + for (i = 0; i < argc; ++i) + foo(); +#pragma omp parallel +#pragma omp master taskloop lastprivate(argc > 0 ? argv[1] : argv[2]) // expected-error {{expected variable name}} + for (i = 0; i < argc; ++i) + foo(); +#pragma omp parallel +#pragma omp master taskloop lastprivate(argc, z) + for (i = 0; i < argc; ++i) + foo(); +#pragma omp parallel +#pragma omp master taskloop lastprivate(S1) // expected-error {{'S1' does not refer to a value}} + for (i = 0; i < argc; ++i) + foo(); +#pragma omp parallel +#pragma omp master taskloop lastprivate(a, b, c, d, f) // expected-error {{lastprivate variable with incomplete type 'S1'}} expected-error 1 {{const-qualified variable without mutable fields cannot be lastprivate}} expected-error 2 {{const-qualified variable cannot be lastprivate}} + for (i = 0; i < argc; ++i) + foo(); +#pragma omp parallel +#pragma omp master taskloop lastprivate(argv[1]) // expected-error {{expected variable name}} + for (i = 0; i < argc; ++i) + foo(); +#pragma omp parallel +#pragma omp master taskloop lastprivate(2 * 2) // expected-error {{expected variable name}} + for (i = 0; i < argc; ++i) + foo(); +#pragma omp parallel +#pragma omp master taskloop lastprivate(ba) + for (i = 0; i < argc; ++i) + foo(); +#pragma omp parallel +#pragma omp master taskloop lastprivate(ca) // expected-error {{const-qualified variable without mutable fields cannot be lastprivate}} + for (i = 0; i < argc; ++i) + foo(); +#pragma omp parallel +#pragma omp master taskloop lastprivate(da) // expected-error {{const-qualified variable cannot be lastprivate}} + for (i = 0; i < argc; ++i) + foo(); + int xa; +#pragma omp parallel +#pragma omp master taskloop lastprivate(xa) // OK + for (i = 0; i < argc; ++i) + foo(); +#pragma omp parallel +#pragma omp master taskloop lastprivate(S2::S2s) // expected-error {{shared variable cannot be lastprivate}} + for (i = 0; i < argc; ++i) + foo(); +#pragma omp parallel +#pragma omp master taskloop lastprivate(S2::S2sc) // expected-error {{const-qualified variable cannot be lastprivate}} + for (i = 0; i < argc; ++i) + foo(); +#pragma omp parallel +#pragma omp master taskloop safelen(5) // expected-error {{unexpected OpenMP clause 'safelen' in directive '#pragma omp master taskloop'}} + for (i = 0; i < argc; ++i) + foo(); +#pragma omp parallel +#pragma omp master taskloop lastprivate(e, g) // expected-error {{calling a private constructor of class 'S4'}} expected-error {{calling a private constructor of class 'S5'}} + for (i = 0; i < argc; ++i) + foo(); +#pragma omp parallel +#pragma omp master taskloop lastprivate(m) // expected-error {{'operator=' is a private member of 'S3'}} + for (i = 0; i < argc; ++i) + foo(); +#pragma omp parallel +#pragma omp master taskloop lastprivate(h) // expected-error {{threadprivate or thread local variable cannot be lastprivate}} + for (i = 0; i < argc; ++i) + foo(); +#pragma omp parallel +#pragma omp master taskloop lastprivate(B::x) // expected-error {{threadprivate or thread local variable cannot be lastprivate}} + for (i = 0; i < argc; ++i) + foo(); +#pragma omp parallel +#pragma omp master taskloop private(xa), lastprivate(xa) // expected-error {{private variable cannot be lastprivate}} expected-note {{defined as private}} + for (i = 0; i < argc; ++i) + foo(); +#pragma omp parallel +#pragma omp master taskloop lastprivate(i) + for (i = 0; i < argc; ++i) + foo(); +#pragma omp parallel private(xa) +#pragma omp master taskloop lastprivate(xa) + for (i = 0; i < argc; ++i) + foo(); +#pragma omp parallel reduction(+ : xa) +#pragma omp master taskloop lastprivate(xa) + for (i = 0; i < argc; ++i) + foo(); +#pragma omp parallel +#pragma omp master taskloop lastprivate(j) + for (i = 0; i < argc; ++i) + foo(); +#pragma omp parallel +#pragma omp master taskloop firstprivate(m) lastprivate(m) // expected-error {{'operator=' is a private member of 'S3'}} + for (i = 0; i < argc; ++i) + foo(); +#pragma omp parallel +#pragma omp master taskloop lastprivate(n) firstprivate(n) // OK + for (i = 0; i < argc; ++i) + foo(); + static int si; +#pragma omp master taskloop lastprivate(si) // OK + for (i = 0; i < argc; ++i) + si = i + 1; + return foomain(argc, argv); // expected-note {{in instantiation of function template specialization 'foomain' requested here}} +} diff --git a/clang/test/OpenMP/master_taskloop_loop_messages.cpp b/clang/test/OpenMP/master_taskloop_loop_messages.cpp new file mode 100644 index 0000000000000..9b50439705c42 --- /dev/null +++ b/clang/test/OpenMP/master_taskloop_loop_messages.cpp @@ -0,0 +1,741 @@ +// RUN: %clang_cc1 -fsyntax-only -fopenmp -x c++ -std=c++11 -fexceptions -fcxx-exceptions -verify=expected,omp4 %s -Wuninitialized +// RUN: %clang_cc1 -fsyntax-only -fopenmp -fopenmp-version=50 -x c++ -std=c++11 -fexceptions -fcxx-exceptions -verify=expected,omp5 %s -Wuninitialized + +// RUN: %clang_cc1 -fsyntax-only -fopenmp-simd -x c++ -std=c++11 -fexceptions -fcxx-exceptions -verify=expected,omp4 %s -Wuninitialized +// RUN: %clang_cc1 -fsyntax-only -fopenmp-simd -fopenmp-version=50 -x c++ -std=c++11 -fexceptions -fcxx-exceptions -verify=expected,omp5 %s -Wuninitialized + +class S { + int a; + S() : a(0) {} + +public: + S(int v) : a(v) {} + S(const S &s) : a(s.a) {} +}; + +static int sii; +// expected-note@+1 {{defined as threadprivate or thread local}} +#pragma omp threadprivate(sii) +static int globalii; + +// Currently, we cannot use "0" for global register variables. +// register int reg0 __asm__("0"); +int reg0; + +int test_iteration_spaces() { + const int N = 100; + float a[N], b[N], c[N]; + int ii, jj, kk; + float fii; + double dii; + register int reg; // expected-warning {{'register' storage class specifier is deprecated}} +#pragma omp parallel +#pragma omp master taskloop + for (int i = 0; i < 10; i += 1) { + c[i] = a[i] + b[i]; + } +#pragma omp parallel +#pragma omp master taskloop + for (char i = 0; i < 10; i++) { + c[i] = a[i] + b[i]; + } +#pragma omp parallel +#pragma omp master taskloop + for (char i = 0; i < 10; i += '\1') { + c[i] = a[i] + b[i]; + } +#pragma omp parallel +#pragma omp master taskloop + for (long long i = 0; i < 10; i++) { + c[i] = a[i] + b[i]; + } +#pragma omp parallel +// expected-error@+2 {{expression must have integral or unscoped enumeration type, not 'double'}} +#pragma omp master taskloop + for (long long i = 0; i < 10; i += 1.5) { + c[i] = a[i] + b[i]; + } +#pragma omp parallel +#pragma omp master taskloop + for (long long i = 0; i < 'z'; i += 1u) { + c[i] = a[i] + b[i]; + } +#pragma omp parallel +// expected-error@+2 {{variable must be of integer or random access iterator type}} +#pragma omp master taskloop + for (float fi = 0; fi < 10.0; fi++) { + c[(int)fi] = a[(int)fi] + b[(int)fi]; + } +#pragma omp parallel +// expected-error@+2 {{variable must be of integer or random access iterator type}} +#pragma omp master taskloop + for (double fi = 0; fi < 10.0; fi++) { + c[(int)fi] = a[(int)fi] + b[(int)fi]; + } +#pragma omp parallel +// expected-error@+2 {{initialization clause of OpenMP for loop is not in canonical form ('var = init' or 'T var = init')}} +#pragma omp master taskloop + for (int &ref = ii; ref < 10; ref++) { + } +#pragma omp parallel +// expected-error@+2 {{initialization clause of OpenMP for loop is not in canonical form ('var = init' or 'T var = init')}} +#pragma omp master taskloop + for (int i; i < 10; i++) + c[i] = a[i]; + +#pragma omp parallel +// expected-error@+2 {{initialization clause of OpenMP for loop is not in canonical form ('var = init' or 'T var = init')}} +#pragma omp master taskloop + for (int i = 0, j = 0; i < 10; ++i) + c[i] = a[i]; + +#pragma omp parallel +// expected-error@+2 {{initialization clause of OpenMP for loop is not in canonical form ('var = init' or 'T var = init')}} +#pragma omp master taskloop + for (; ii < 10; ++ii) + c[ii] = a[ii]; + +#pragma omp parallel +// expected-warning@+3 {{expression result unused}} +// expected-error@+2 {{initialization clause of OpenMP for loop is not in canonical form ('var = init' or 'T var = init')}} +#pragma omp master taskloop + for (ii + 1; ii < 10; ++ii) + c[ii] = a[ii]; + +#pragma omp parallel +// expected-error@+2 {{initialization clause of OpenMP for loop is not in canonical form ('var = init' or 'T var = init')}} +#pragma omp master taskloop + for (c[ii] = 0; ii < 10; ++ii) + c[ii] = a[ii]; + +#pragma omp parallel +// Ok to skip parenthesises. +#pragma omp master taskloop + for (((ii)) = 0; ii < 10; ++ii) + c[ii] = a[ii]; + +#pragma omp parallel +// omp4-error@+2 {{condition of OpenMP for loop must be a relational comparison ('<', '<=', '>', or '>=') of loop variable 'i'}} omp5-error@+2 {{condition of OpenMP for loop must be a relational comparison ('<', '<=', '>', '>=', or '!=') of loop variable 'i'}} +#pragma omp master taskloop + for (int i = 0; i; i++) + c[i] = a[i]; + +#pragma omp parallel +// omp4-error@+3 {{condition of OpenMP for loop must be a relational comparison ('<', '<=', '>', or '>=') of loop variable 'i'}} omp5-error@+3 {{condition of OpenMP for loop must be a relational comparison ('<', '<=', '>', '>=', or '!=') of loop variable 'i'}} +// expected-error@+2 {{increment clause of OpenMP for loop must perform simple addition or subtraction on loop variable 'i'}} +#pragma omp master taskloop + for (int i = 0; jj < kk; ii++) + c[i] = a[i]; + +#pragma omp parallel +// omp4-error@+2 {{condition of OpenMP for loop must be a relational comparison ('<', '<=', '>', or '>=') of loop variable 'i'}} omp5-error@+2 {{condition of OpenMP for loop must be a relational comparison ('<', '<=', '>', '>=', or '!=') of loop variable 'i'}} +#pragma omp master taskloop + for (int i = 0; !!i; i++) + c[i] = a[i]; + +// Ok +#pragma omp parallel +// omp4-error@+2 {{condition of OpenMP for loop must be a relational comparison ('<', '<=', '>', or '>=') of loop variable 'i'}} +#pragma omp master taskloop + for (int i = 0; i != 1; i++) + c[i] = a[i]; + +#pragma omp parallel +// omp4-error@+2 {{condition of OpenMP for loop must be a relational comparison ('<', '<=', '>', or '>=') of loop variable 'i'}} omp5-error@+2 {{condition of OpenMP for loop must be a relational comparison ('<', '<=', '>', '>=', or '!=') of loop variable 'i'}} +#pragma omp master taskloop + for (int i = 0;; i++) + c[i] = a[i]; + +#pragma omp parallel +// Ok. +#pragma omp master taskloop + for (int i = 11; i > 10; i--) + c[i] = a[i]; + +#pragma omp parallel +// Ok. +#pragma omp master taskloop + for (int i = 0; i < 10; ++i) + c[i] = a[i]; + +#pragma omp parallel +// Ok. +#pragma omp master taskloop + for (ii = 0; ii < 10; ++ii) + c[ii] = a[ii]; + +#pragma omp parallel +// expected-error@+2 {{increment clause of OpenMP for loop must perform simple addition or subtraction on loop variable 'ii'}} +#pragma omp master taskloop + for (ii = 0; ii < 10; ++jj) + c[ii] = a[jj]; + +#pragma omp parallel +// expected-error@+2 {{increment clause of OpenMP for loop must perform simple addition or subtraction on loop variable 'ii'}} +#pragma omp master taskloop + for (ii = 0; ii < 10; ++++ii) + c[ii] = a[ii]; + +#pragma omp parallel +// Ok but undefined behavior (in general, cannot check that incr +// is really loop-invariant). +#pragma omp master taskloop + for (ii = 0; ii < 10; ii = ii + ii) + c[ii] = a[ii]; + +#pragma omp parallel +// expected-error@+2 {{expression must have integral or unscoped enumeration type, not 'float'}} +#pragma omp master taskloop + for (ii = 0; ii < 10; ii = ii + 1.0f) + c[ii] = a[ii]; + +#pragma omp parallel +// Ok - step was converted to integer type. +#pragma omp master taskloop + for (ii = 0; ii < 10; ii = ii + (int)1.1f) + c[ii] = a[ii]; + +#pragma omp parallel +// expected-error@+2 {{increment clause of OpenMP for loop must perform simple addition or subtraction on loop variable 'ii'}} +#pragma omp master taskloop + for (ii = 0; ii < 10; jj = ii + 2) + c[ii] = a[ii]; + +#pragma omp parallel +// expected-warning@+3 {{relational comparison result unused}} +// expected-error@+2 {{increment clause of OpenMP for loop must perform simple addition or subtraction on loop variable 'ii'}} +#pragma omp master taskloop + for (ii = 0; ii<10; jj> kk + 2) + c[ii] = a[ii]; + +#pragma omp parallel +// expected-error@+2 {{increment clause of OpenMP for loop must perform simple addition or subtraction on loop variable 'ii'}} +#pragma omp master taskloop + for (ii = 0; ii < 10;) + c[ii] = a[ii]; + +#pragma omp parallel +// expected-warning@+3 {{expression result unused}} +// expected-error@+2 {{increment clause of OpenMP for loop must perform simple addition or subtraction on loop variable 'ii'}} +#pragma omp master taskloop + for (ii = 0; ii < 10; !ii) + c[ii] = a[ii]; + +#pragma omp parallel +// expected-error@+2 {{increment clause of OpenMP for loop must perform simple addition or subtraction on loop variable 'ii'}} +#pragma omp master taskloop + for (ii = 0; ii < 10; ii ? ++ii : ++jj) + c[ii] = a[ii]; + +#pragma omp parallel +// expected-error@+2 {{increment clause of OpenMP for loop must perform simple addition or subtraction on loop variable 'ii'}} +#pragma omp master taskloop + for (ii = 0; ii < 10; ii = ii < 10) + c[ii] = a[ii]; + +#pragma omp parallel +// expected-note@+3 {{loop step is expected to be positive due to this condition}} +// expected-error@+2 {{increment expression must cause 'ii' to increase on each iteration of OpenMP for loop}} +#pragma omp master taskloop + for (ii = 0; ii < 10; ii = ii + 0) + c[ii] = a[ii]; + +#pragma omp parallel +// expected-note@+3 {{loop step is expected to be positive due to this condition}} +// expected-error@+2 {{increment expression must cause 'ii' to increase on each iteration of OpenMP for loop}} +#pragma omp master taskloop + for (ii = 0; ii < 10; ii = ii + (int)(0.8 - 0.45)) + c[ii] = a[ii]; + +#pragma omp parallel +// expected-note@+3 {{loop step is expected to be positive due to this condition}} +// expected-error@+2 {{increment expression must cause 'ii' to increase on each iteration of OpenMP for loop}} +#pragma omp master taskloop + for (ii = 0; (ii) < 10; ii -= 25) + c[ii] = a[ii]; + +#pragma omp parallel +// expected-note@+3 {{loop step is expected to be positive due to this condition}} +// expected-error@+2 {{increment expression must cause 'ii' to increase on each iteration of OpenMP for loop}} +#pragma omp master taskloop + for (ii = 0; (ii < 10); ii -= 0) + c[ii] = a[ii]; + +#pragma omp parallel +// expected-note@+3 {{loop step is expected to be negative due to this condition}} +// expected-error@+2 {{increment expression must cause 'ii' to decrease on each iteration of OpenMP for loop}} +#pragma omp master taskloop + for (ii = 0; ii > 10; (ii += 0)) + c[ii] = a[ii]; + +#pragma omp parallel +// expected-note@+3 {{loop step is expected to be positive due to this condition}} +// expected-error@+2 {{increment expression must cause 'ii' to increase on each iteration of OpenMP for loop}} +#pragma omp master taskloop + for (ii = 0; ii < 10; (ii) = (1 - 1) + (ii)) + c[ii] = a[ii]; + +#pragma omp parallel +// expected-note@+3 {{loop step is expected to be negative due to this condition}} +// expected-error@+2 {{increment expression must cause 'ii' to decrease on each iteration of OpenMP for loop}} +#pragma omp master taskloop + for ((ii = 0); ii > 10; (ii -= 0)) + c[ii] = a[ii]; + +#pragma omp parallel +// expected-note@+3 {{loop step is expected to be positive due to this condition}} +// expected-error@+2 {{increment expression must cause 'ii' to increase on each iteration of OpenMP for loop}} +#pragma omp master taskloop + for (ii = 0; (ii < 10); (ii -= 0)) + c[ii] = a[ii]; + +#pragma omp parallel +// expected-note@+2 {{defined as firstprivate}} +// expected-error@+2 {{loop iteration variable in the associated loop of 'omp master taskloop' directive may not be firstprivate, predetermined as private}} +#pragma omp master taskloop firstprivate(ii) + for (ii = 0; ii < 10; ii++) + c[ii] = a[ii]; + +#pragma omp parallel +// expected-error@+1 {{unexpected OpenMP clause 'linear' in directive '#pragma omp master taskloop'}} +#pragma omp master taskloop linear(ii) + for (ii = 0; ii < 10; ii++) + c[ii] = a[ii]; + +#pragma omp parallel +#pragma omp master taskloop private(ii) + for (ii = 0; ii < 10; ii++) + c[ii] = a[ii]; + +#pragma omp parallel +#pragma omp master taskloop lastprivate(ii) + for (ii = 0; ii < 10; ii++) + c[ii] = a[ii]; + +#pragma omp parallel + { +// expected-error@+2 {{loop iteration variable in the associated loop of 'omp master taskloop' directive may not be threadprivate or thread local, predetermined as private}} +#pragma omp master taskloop + for (sii = 0; sii < 10; sii += 1) + c[sii] = a[sii]; + } + +#pragma omp parallel + { +#pragma omp master taskloop + for (reg0 = 0; reg0 < 10; reg0 += 1) + c[reg0] = a[reg0]; + } + +#pragma omp parallel + { +#pragma omp master taskloop + for (reg = 0; reg < 10; reg += 1) + c[reg] = a[reg]; + } + +#pragma omp parallel + { +#pragma omp master taskloop + for (globalii = 0; globalii < 10; globalii += 1) + c[globalii] = a[globalii]; + } + +#pragma omp parallel + { +#pragma omp master taskloop collapse(2) + for (ii = 0; ii < 10; ii += 1) + for (globalii = 0; globalii < 10; globalii += 1) + c[globalii] += a[globalii] + ii; + } + +#pragma omp parallel +// omp4-error@+2 {{statement after '#pragma omp master taskloop' must be a for loop}} +#pragma omp master taskloop + for (auto &item : a) { + item = item + 1; + } + +#pragma omp parallel +// expected-note@+3 {{loop step is expected to be positive due to this condition}} +// expected-error@+2 {{increment expression must cause 'i' to increase on each iteration of OpenMP for loop}} +#pragma omp master taskloop + for (unsigned i = 9; i < 10; i--) { + c[i] = a[i] + b[i]; + } + + int(*lb)[4] = nullptr; +#pragma omp parallel +#pragma omp master taskloop + for (int(*p)[4] = lb; p < lb + 8; ++p) { + } + +#pragma omp parallel +// expected-warning@+2 {{initialization clause of OpenMP for loop is not in canonical form ('var = init' or 'T var = init')}} +#pragma omp master taskloop + for (int a{0}; a < 10; ++a) { + } + + return 0; +} + +// Iterators allowed in openmp for-loops. +namespace std { +struct random_access_iterator_tag {}; +template +struct iterator_traits { + typedef typename Iter::difference_type difference_type; + typedef typename Iter::iterator_category iterator_category; +}; +template +typename iterator_traits::difference_type +distance(Iter first, Iter last) { return first - last; } +} +class Iter0 { +public: + Iter0() {} + Iter0(const Iter0 &) {} + Iter0 operator++() { return *this; } + Iter0 operator--() { return *this; } + bool operator<(Iter0 a) { return true; } +}; +// expected-note@+2 {{candidate function not viable: no known conversion from 'GoodIter' to 'Iter0' for 1st argument}} +// expected-note@+1 2 {{candidate function not viable: no known conversion from 'Iter1' to 'Iter0' for 1st argument}} +int operator-(Iter0 a, Iter0 b) { return 0; } +class Iter1 { +public: + Iter1(float f = 0.0f, double d = 0.0) {} + Iter1(const Iter1 &) {} + Iter1 operator++() { return *this; } + Iter1 operator--() { return *this; } + bool operator<(Iter1 a) { return true; } + bool operator>=(Iter1 a) { return false; } +}; +class GoodIter { +public: + GoodIter() {} + GoodIter(const GoodIter &) {} + GoodIter(int fst, int snd) {} + GoodIter &operator=(const GoodIter &that) { return *this; } + GoodIter &operator=(const Iter0 &that) { return *this; } + GoodIter &operator+=(int x) { return *this; } + GoodIter &operator-=(int x) { return *this; } + explicit GoodIter(void *) {} + GoodIter operator++() { return *this; } + GoodIter operator--() { return *this; } + bool operator!() { return true; } + bool operator<(GoodIter a) { return true; } + bool operator<=(GoodIter a) { return true; } + bool operator>=(GoodIter a) { return false; } + typedef int difference_type; + typedef std::random_access_iterator_tag iterator_category; +}; +// expected-note@+2 {{candidate function not viable: no known conversion from 'const Iter0' to 'GoodIter' for 2nd argument}} +// expected-note@+1 2 {{candidate function not viable: no known conversion from 'Iter1' to 'GoodIter' for 1st argument}} +int operator-(GoodIter a, GoodIter b) { return 0; } +// expected-note@+1 3 {{candidate function not viable: requires single argument 'a', but 2 arguments were provided}} +GoodIter operator-(GoodIter a) { return a; } +// expected-note@+2 {{candidate function not viable: no known conversion from 'const Iter0' to 'int' for 2nd argument}} +// expected-note@+1 2 {{candidate function not viable: no known conversion from 'Iter1' to 'GoodIter' for 1st argument}} +GoodIter operator-(GoodIter a, int v) { return GoodIter(); } +// expected-note@+1 2 {{candidate function not viable: no known conversion from 'Iter0' to 'GoodIter' for 1st argument}} +GoodIter operator+(GoodIter a, int v) { return GoodIter(); } +// expected-note@+2 {{candidate function not viable: no known conversion from 'GoodIter' to 'int' for 1st argument}} +// expected-note@+1 2 {{candidate function not viable: no known conversion from 'Iter1' to 'int' for 1st argument}} +GoodIter operator-(int v, GoodIter a) { return GoodIter(); } +// expected-note@+1 2 {{candidate function not viable: no known conversion from 'Iter0' to 'int' for 1st argument}} +GoodIter operator+(int v, GoodIter a) { return GoodIter(); } + +int test_with_random_access_iterator() { + GoodIter begin, end; + Iter0 begin0, end0; +#pragma omp parallel +#pragma omp master taskloop + for (GoodIter I = begin; I < end; ++I) + ++I; +#pragma omp parallel +// expected-error@+2 {{initialization clause of OpenMP for loop is not in canonical form ('var = init' or 'T var = init')}} +#pragma omp master taskloop + for (GoodIter &I = begin; I < end; ++I) + ++I; +#pragma omp parallel +#pragma omp master taskloop + for (GoodIter I = begin; I >= end; --I) + ++I; +#pragma omp parallel +// expected-warning@+2 {{initialization clause of OpenMP for loop is not in canonical form ('var = init' or 'T var = init')}} +#pragma omp master taskloop + for (GoodIter I(begin); I < end; ++I) + ++I; +#pragma omp parallel +// expected-warning@+2 {{initialization clause of OpenMP for loop is not in canonical form ('var = init' or 'T var = init')}} +#pragma omp master taskloop + for (GoodIter I(nullptr); I < end; ++I) + ++I; +#pragma omp parallel +// expected-warning@+2 {{initialization clause of OpenMP for loop is not in canonical form ('var = init' or 'T var = init')}} +#pragma omp master taskloop + for (GoodIter I(0); I < end; ++I) + ++I; +#pragma omp parallel +// expected-warning@+2 {{initialization clause of OpenMP for loop is not in canonical form ('var = init' or 'T var = init')}} +#pragma omp master taskloop + for (GoodIter I(1, 2); I < end; ++I) + ++I; +#pragma omp parallel +#pragma omp master taskloop + for (begin = GoodIter(0); begin < end; ++begin) + ++begin; +// expected-error@+4 {{invalid operands to binary expression ('GoodIter' and 'const Iter0')}} +// expected-error@+3 {{could not calculate number of iterations calling 'operator-' with upper and lower loop bounds}} +#pragma omp parallel +#pragma omp master taskloop + for (begin = begin0; begin < end; ++begin) + ++begin; +#pragma omp parallel +// expected-error@+2 {{initialization clause of OpenMP for loop is not in canonical form ('var = init' or 'T var = init')}} +#pragma omp master taskloop + for (++begin; begin < end; ++begin) + ++begin; +#pragma omp parallel +#pragma omp master taskloop + for (begin = end; begin < end; ++begin) + ++begin; +#pragma omp parallel +// omp4-error@+2 {{condition of OpenMP for loop must be a relational comparison ('<', '<=', '>', or '>=') of loop variable 'I'}} omp5-error@+2 {{condition of OpenMP for loop must be a relational comparison ('<', '<=', '>', '>=', or '!=') of loop variable 'I'}} +#pragma omp master taskloop + for (GoodIter I = begin; I - I; ++I) + ++I; +#pragma omp parallel +// omp4-error@+2 {{condition of OpenMP for loop must be a relational comparison ('<', '<=', '>', or '>=') of loop variable 'I'}} omp5-error@+2 {{condition of OpenMP for loop must be a relational comparison ('<', '<=', '>', '>=', or '!=') of loop variable 'I'}} +#pragma omp master taskloop + for (GoodIter I = begin; begin < end; ++I) + ++I; +#pragma omp parallel +// omp4-error@+2 {{condition of OpenMP for loop must be a relational comparison ('<', '<=', '>', or '>=') of loop variable 'I'}} omp5-error@+2 {{condition of OpenMP for loop must be a relational comparison ('<', '<=', '>', '>=', or '!=') of loop variable 'I'}} +#pragma omp master taskloop + for (GoodIter I = begin; !I; ++I) + ++I; +#pragma omp parallel +// expected-note@+3 {{loop step is expected to be negative due to this condition}} +// expected-error@+2 {{increment expression must cause 'I' to decrease on each iteration of OpenMP for loop}} +#pragma omp master taskloop + for (GoodIter I = begin; I >= end; I = I + 1) + ++I; +#pragma omp parallel +#pragma omp master taskloop + for (GoodIter I = begin; I >= end; I = I - 1) + ++I; +#pragma omp parallel +// expected-error@+2 {{increment clause of OpenMP for loop must perform simple addition or subtraction on loop variable 'I'}} +#pragma omp master taskloop + for (GoodIter I = begin; I >= end; I = -I) + ++I; +#pragma omp parallel +// expected-note@+3 {{loop step is expected to be negative due to this condition}} +// expected-error@+2 {{increment expression must cause 'I' to decrease on each iteration of OpenMP for loop}} +#pragma omp master taskloop + for (GoodIter I = begin; I >= end; I = 2 + I) + ++I; +#pragma omp parallel +// expected-error@+2 {{increment clause of OpenMP for loop must perform simple addition or subtraction on loop variable 'I'}} +#pragma omp master taskloop + for (GoodIter I = begin; I >= end; I = 2 - I) + ++I; +// In the following example, we cannot update the loop variable using '+=' +// expected-error@+3 {{invalid operands to binary expression ('Iter0' and 'int')}} +#pragma omp parallel +#pragma omp master taskloop + for (Iter0 I = begin0; I < end0; ++I) + ++I; +#pragma omp parallel +// Initializer is constructor without params. +// expected-error@+3 {{invalid operands to binary expression ('Iter0' and 'int')}} +// expected-warning@+2 {{initialization clause of OpenMP for loop is not in canonical form ('var = init' or 'T var = init')}} +#pragma omp master taskloop + for (Iter0 I; I < end0; ++I) + ++I; + Iter1 begin1, end1; +// expected-error@+4 {{invalid operands to binary expression ('Iter1' and 'Iter1')}} +// expected-error@+3 {{could not calculate number of iterations calling 'operator-' with upper and lower loop bounds}} +#pragma omp parallel +#pragma omp master taskloop + for (Iter1 I = begin1; I < end1; ++I) + ++I; +#pragma omp parallel +// expected-note@+3 {{loop step is expected to be negative due to this condition}} +// expected-error@+2 {{increment expression must cause 'I' to decrease on each iteration of OpenMP for loop}} +#pragma omp master taskloop + for (Iter1 I = begin1; I >= end1; ++I) + ++I; +#pragma omp parallel +// expected-error@+5 {{invalid operands to binary expression ('Iter1' and 'float')}} +// expected-error@+4 {{could not calculate number of iterations calling 'operator-' with upper and lower loop bounds}} +// Initializer is constructor with all default params. +// expected-warning@+2 {{initialization clause of OpenMP for loop is not in canonical form ('var = init' or 'T var = init')}} +#pragma omp master taskloop + for (Iter1 I; I < end1; ++I) { + } + return 0; +} + +template +class TC { +public: + int dotest_lt(IT begin, IT end) { +#pragma omp parallel +// expected-note@+3 {{loop step is expected to be positive due to this condition}} +// expected-error@+2 {{increment expression must cause 'I' to increase on each iteration of OpenMP for loop}} +#pragma omp master taskloop + for (IT I = begin; I < end; I = I + ST) { + ++I; + } +#pragma omp parallel +// expected-note@+3 {{loop step is expected to be positive due to this condition}} +// expected-error@+2 {{increment expression must cause 'I' to increase on each iteration of OpenMP for loop}} +#pragma omp master taskloop + for (IT I = begin; I <= end; I += ST) { + ++I; + } +#pragma omp parallel +#pragma omp master taskloop + for (IT I = begin; I < end; ++I) { + ++I; + } + } + + static IT step() { + return IT(ST); + } +}; +template +int dotest_gt(IT begin, IT end) { +#pragma omp parallel +// expected-note@+3 2 {{loop step is expected to be negative due to this condition}} +// expected-error@+2 2 {{increment expression must cause 'I' to decrease on each iteration of OpenMP for loop}} +#pragma omp master taskloop + for (IT I = begin; I >= end; I = I + ST) { + ++I; + } +#pragma omp parallel +// expected-note@+3 2 {{loop step is expected to be negative due to this condition}} +// expected-error@+2 2 {{increment expression must cause 'I' to decrease on each iteration of OpenMP for loop}} +#pragma omp master taskloop + for (IT I = begin; I >= end; I += ST) { + ++I; + } + +#pragma omp parallel +// expected-note@+3 {{loop step is expected to be negative due to this condition}} +// expected-error@+2 {{increment expression must cause 'I' to decrease on each iteration of OpenMP for loop}} +#pragma omp master taskloop + for (IT I = begin; I >= end; ++I) { + ++I; + } + +#pragma omp parallel +#pragma omp master taskloop + for (IT I = begin; I < end; I += TC::step()) { + ++I; + } +} + +void test_with_template() { + GoodIter begin, end; + TC t1; + TC t2; + t1.dotest_lt(begin, end); + t2.dotest_lt(begin, end); // expected-note {{in instantiation of member function 'TC::dotest_lt' requested here}} + dotest_gt(begin, end); // expected-note {{in instantiation of function template specialization 'dotest_gt' requested here}} + dotest_gt(0, 100); // expected-note {{in instantiation of function template specialization 'dotest_gt' requested here}} +} + +void test_loop_break() { + const int N = 100; + float a[N], b[N], c[N]; +#pragma omp parallel +#pragma omp master taskloop + for (int i = 0; i < 10; i++) { + c[i] = a[i] + b[i]; + for (int j = 0; j < 10; ++j) { + if (a[i] > b[j]) + break; // OK in nested loop + } + switch (i) { + case 1: + b[i]++; + break; + default: + break; + } + if (c[i] > 10) + break; // expected-error {{'break' statement cannot be used in OpenMP for loop}} + + if (c[i] > 11) + break; // expected-error {{'break' statement cannot be used in OpenMP for loop}} + } + +#pragma omp parallel +#pragma omp master taskloop + for (int i = 0; i < 10; i++) { + for (int j = 0; j < 10; j++) { + c[i] = a[i] + b[i]; + if (c[i] > 10) { + if (c[i] < 20) { + break; // OK + } + } + } + } +} + +void test_loop_eh() { + const int N = 100; + float a[N], b[N], c[N]; +#pragma omp parallel +#pragma omp master taskloop + for (int i = 0; i < 10; i++) { + c[i] = a[i] + b[i]; + try { + for (int j = 0; j < 10; ++j) { + if (a[i] > b[j]) + throw a[i]; + } + throw a[i]; + } catch (float f) { + if (f > 0.1) + throw a[i]; + return; // expected-error {{cannot return from OpenMP region}} + } + switch (i) { + case 1: + b[i]++; + break; + default: + break; + } + for (int j = 0; j < 10; j++) { + if (c[i] > 10) + throw c[i]; + } + } + if (c[9] > 10) + throw c[9]; // OK + +#pragma omp parallel +#pragma omp master taskloop + for (int i = 0; i < 10; ++i) { + struct S { + void g() { throw 0; } + }; + } +} + +void test_loop_firstprivate_lastprivate() { + S s(4); +#pragma omp parallel +#pragma omp master taskloop lastprivate(s) firstprivate(s) + for (int i = 0; i < 16; ++i) + ; +} + diff --git a/clang/test/OpenMP/master_taskloop_misc_messages.c b/clang/test/OpenMP/master_taskloop_misc_messages.c new file mode 100644 index 0000000000000..17f2831b12270 --- /dev/null +++ b/clang/test/OpenMP/master_taskloop_misc_messages.c @@ -0,0 +1,382 @@ +// RUN: %clang_cc1 -fsyntax-only -fopenmp -triple x86_64-unknown-unknown -verify %s -Wuninitialized + +// RUN: %clang_cc1 -fsyntax-only -fopenmp-simd -triple x86_64-unknown-unknown -verify %s -Wuninitialized + +void xxx(int argc) { + int x; // expected-note {{initialize the variable 'x' to silence this warning}} +#pragma omp master taskloop + for (int i = 0; i < 10; ++i) + argc = x; // expected-warning {{variable 'x' is uninitialized when used here}} +} + +// expected-error@+1 {{unexpected OpenMP directive '#pragma omp master taskloop'}} +#pragma omp master taskloop + +// expected-error@+1 {{unexpected OpenMP directive '#pragma omp master taskloop'}} +#pragma omp master taskloop foo + +void test_no_clause() { + int i; +#pragma omp master taskloop + for (i = 0; i < 16; ++i) + ; + +// expected-error@+2 {{statement after '#pragma omp master taskloop' must be a for loop}} +#pragma omp master taskloop + ++i; +} + +void test_branch_protected_scope() { + int i = 0; +L1: + ++i; + + int x[24]; + +#pragma omp parallel +#pragma omp master taskloop + for (i = 0; i < 16; ++i) { + if (i == 5) + goto L1; // expected-error {{use of undeclared label 'L1'}} + else if (i == 6) + return; // expected-error {{cannot return from OpenMP region}} + else if (i == 7) + goto L2; + else if (i == 8) { + L2: + x[i]++; + } + } + + if (x[0] == 0) + goto L2; // expected-error {{use of undeclared label 'L2'}} + else if (x[1] == 1) + goto L1; +} + +void test_invalid_clause() { + int i; +#pragma omp parallel +// expected-warning@+1 {{extra tokens at the end of '#pragma omp master taskloop' are ignored}} +#pragma omp master taskloop foo bar + for (i = 0; i < 16; ++i) + ; +// expected-error@+1 {{directive '#pragma omp master taskloop' cannot contain more than one 'nogroup' clause}} +#pragma omp master taskloop nogroup nogroup + for (i = 0; i < 16; ++i) + ; +} + +void test_non_identifiers() { + int i, x; + +#pragma omp parallel +// expected-warning@+1 {{extra tokens at the end of '#pragma omp master taskloop' are ignored}} +#pragma omp master taskloop; + for (i = 0; i < 16; ++i) + ; +// expected-warning@+3 {{extra tokens at the end of '#pragma omp master taskloop' are ignored}} +// expected-error@+2 {{unexpected OpenMP clause 'linear' in directive '#pragma omp master taskloop'}} +#pragma omp parallel +#pragma omp master taskloop linear(x); + for (i = 0; i < 16; ++i) + ; + +#pragma omp parallel +// expected-warning@+1 {{extra tokens at the end of '#pragma omp master taskloop' are ignored}} +#pragma omp master taskloop private(x); + for (i = 0; i < 16; ++i) + ; + +#pragma omp parallel +// expected-warning@+1 {{extra tokens at the end of '#pragma omp master taskloop' are ignored}} +#pragma omp master taskloop, private(x); + for (i = 0; i < 16; ++i) + ; +} + +extern int foo(); + +void test_collapse() { + int i; +#pragma omp parallel +// expected-error@+1 {{expected '('}} +#pragma omp master taskloop collapse + for (i = 0; i < 16; ++i) + ; +#pragma omp parallel +// expected-error@+1 {{expected expression}} expected-error@+1 {{expected ')'}} expected-note@+1 {{to match this '('}} +#pragma omp master taskloop collapse( + for (i = 0; i < 16; ++i) + ; +#pragma omp parallel +// expected-error@+1 {{expected expression}} +#pragma omp master taskloop collapse() + for (i = 0; i < 16; ++i) + ; +#pragma omp parallel +// expected-error@+1 {{expected expression}} expected-error@+1 {{expected ')'}} expected-note@+1 {{to match this '('}} +#pragma omp master taskloop collapse(, + for (i = 0; i < 16; ++i) + ; +#pragma omp parallel +// expected-error@+1 {{expected expression}} expected-error@+1 {{expected ')'}} expected-note@+1 {{to match this '('}} +#pragma omp master taskloop collapse(, ) + for (i = 0; i < 16; ++i) + ; +#pragma omp parallel +// expected-warning@+2 {{extra tokens at the end of '#pragma omp master taskloop' are ignored}} +// expected-error@+1 {{expected '('}} +#pragma omp master taskloop collapse 4) + for (i = 0; i < 16; ++i) + ; +#pragma omp parallel +// expected-error@+2 {{expected ')'}} +// expected-note@+1 {{to match this '('}} expected-note@+1 {{as specified in 'collapse' clause}} +#pragma omp master taskloop collapse(4 + for (i = 0; i < 16; ++i) + ; // expected-error {{expected 4 for loops after '#pragma omp master taskloop', but found only 1}} +#pragma omp parallel +// expected-error@+2 {{expected ')'}} +// expected-note@+1 {{to match this '('}} expected-note@+1 {{as specified in 'collapse' clause}} +#pragma omp master taskloop collapse(4, + for (i = 0; i < 16; ++i) + ; // expected-error {{expected 4 for loops after '#pragma omp master taskloop', but found only 1}} +#pragma omp parallel +// expected-error@+2 {{expected ')'}} +// expected-note@+1 {{to match this '('}} expected-note@+1 {{as specified in 'collapse' clause}} +#pragma omp master taskloop collapse(4, ) + for (i = 0; i < 16; ++i) + ; // expected-error {{expected 4 for loops after '#pragma omp master taskloop', but found only 1}} +#pragma omp parallel +// expected-note@+1 {{as specified in 'collapse' clause}} +#pragma omp master taskloop collapse(4) + for (i = 0; i < 16; ++i) + ; // expected-error {{expected 4 for loops after '#pragma omp master taskloop', but found only 1}} +#pragma omp parallel +// expected-error@+2 {{expected ')'}} +// expected-note@+1 {{to match this '('}} expected-note@+1 {{as specified in 'collapse' clause}} +#pragma omp master taskloop collapse(4 4) + for (i = 0; i < 16; ++i) + ; // expected-error {{expected 4 for loops after '#pragma omp master taskloop', but found only 1}} +#pragma omp parallel +// expected-error@+2 {{expected ')'}} +// expected-note@+1 {{to match this '('}} expected-note@+1 {{as specified in 'collapse' clause}} +#pragma omp master taskloop collapse(4, , 4) + for (i = 0; i < 16; ++i) + ; // expected-error {{expected 4 for loops after '#pragma omp master taskloop', but found only 1}} +#pragma omp parallel +#pragma omp master taskloop collapse(4) + for (int i1 = 0; i1 < 16; ++i1) + for (int i2 = 0; i2 < 16; ++i2) + for (int i3 = 0; i3 < 16; ++i3) + for (int i4 = 0; i4 < 16; ++i4) + foo(); +#pragma omp parallel +// expected-error@+2 {{expected ')'}} +// expected-note@+1 {{to match this '('}} expected-note@+1 {{as specified in 'collapse' clause}} +#pragma omp master taskloop collapse(4, 8) + for (i = 0; i < 16; ++i) + ; // expected-error {{expected 4 for loops after '#pragma omp master taskloop', but found only 1}} +#pragma omp parallel +// expected-error@+1 {{expression is not an integer constant expression}} +#pragma omp master taskloop collapse(2.5) + for (i = 0; i < 16; ++i) + ; +#pragma omp parallel +// expected-error@+1 {{expression is not an integer constant expression}} +#pragma omp master taskloop collapse(foo()) + for (i = 0; i < 16; ++i) + ; +#pragma omp parallel +// expected-error@+1 {{argument to 'collapse' clause must be a strictly positive integer value}} +#pragma omp master taskloop collapse(-5) + for (i = 0; i < 16; ++i) + ; +#pragma omp parallel +// expected-error@+1 {{argument to 'collapse' clause must be a strictly positive integer value}} +#pragma omp master taskloop collapse(0) + for (i = 0; i < 16; ++i) + ; +#pragma omp parallel +// expected-error@+1 {{argument to 'collapse' clause must be a strictly positive integer value}} +#pragma omp master taskloop collapse(5 - 5) + for (i = 0; i < 16; ++i) + ; +} + +void test_private() { + int i; +#pragma omp parallel +// expected-error@+2 {{expected expression}} +// expected-error@+1 {{expected ')'}} expected-note@+1 {{to match this '('}} +#pragma omp master taskloop private( + for (i = 0; i < 16; ++i) + ; +#pragma omp parallel +// expected-error@+2 {{expected ')'}} expected-note@+2 {{to match this '('}} +// expected-error@+1 2 {{expected expression}} +#pragma omp master taskloop private(, + for (i = 0; i < 16; ++i) + ; +#pragma omp parallel +// expected-error@+1 2 {{expected expression}} +#pragma omp master taskloop private(, ) + for (i = 0; i < 16; ++i) + ; +#pragma omp parallel +// expected-error@+1 {{expected expression}} +#pragma omp master taskloop private() + for (i = 0; i < 16; ++i) + ; +#pragma omp parallel +// expected-error@+1 {{expected expression}} +#pragma omp master taskloop private(int) + for (i = 0; i < 16; ++i) + ; +#pragma omp parallel +// expected-error@+1 {{expected variable name}} +#pragma omp master taskloop private(0) + for (i = 0; i < 16; ++i) + ; + + int x, y, z; +#pragma omp parallel +#pragma omp master taskloop private(x) + for (i = 0; i < 16; ++i) + ; +#pragma omp parallel +#pragma omp master taskloop private(x, y) + for (i = 0; i < 16; ++i) + ; +#pragma omp parallel +#pragma omp master taskloop private(x, y, z) + for (i = 0; i < 16; ++i) { + x = y * i + z; + } +} + +void test_lastprivate() { + int i; +#pragma omp parallel +// expected-error@+2 {{expected ')'}} expected-note@+2 {{to match this '('}} +// expected-error@+1 {{expected expression}} +#pragma omp master taskloop lastprivate( + for (i = 0; i < 16; ++i) + ; + +#pragma omp parallel +// expected-error@+2 {{expected ')'}} expected-note@+2 {{to match this '('}} +// expected-error@+1 2 {{expected expression}} +#pragma omp master taskloop lastprivate(, + for (i = 0; i < 16; ++i) + ; +#pragma omp parallel +// expected-error@+1 2 {{expected expression}} +#pragma omp master taskloop lastprivate(, ) + for (i = 0; i < 16; ++i) + ; +#pragma omp parallel +// expected-error@+1 {{expected expression}} +#pragma omp master taskloop lastprivate() + for (i = 0; i < 16; ++i) + ; +#pragma omp parallel +// expected-error@+1 {{expected expression}} +#pragma omp master taskloop lastprivate(int) + for (i = 0; i < 16; ++i) + ; +#pragma omp parallel +// expected-error@+1 {{expected variable name}} +#pragma omp master taskloop lastprivate(0) + for (i = 0; i < 16; ++i) + ; + + int x, y, z; +#pragma omp parallel +#pragma omp master taskloop lastprivate(x) + for (i = 0; i < 16; ++i) + ; +#pragma omp parallel +#pragma omp master taskloop lastprivate(x, y) + for (i = 0; i < 16; ++i) + ; +#pragma omp parallel +#pragma omp master taskloop lastprivate(x, y, z) + for (i = 0; i < 16; ++i) + ; +} + +void test_firstprivate() { + int i; +#pragma omp parallel +// expected-error@+2 {{expected ')'}} expected-note@+2 {{to match this '('}} +// expected-error@+1 {{expected expression}} +#pragma omp master taskloop firstprivate( + for (i = 0; i < 16; ++i) + ; + +#pragma omp parallel +// expected-error@+2 {{expected ')'}} expected-note@+2 {{to match this '('}} +// expected-error@+1 2 {{expected expression}} +#pragma omp master taskloop firstprivate(, + for (i = 0; i < 16; ++i) + ; +#pragma omp parallel +// expected-error@+1 2 {{expected expression}} +#pragma omp master taskloop firstprivate(, ) + for (i = 0; i < 16; ++i) + ; +#pragma omp parallel +// expected-error@+1 {{expected expression}} +#pragma omp master taskloop firstprivate() + for (i = 0; i < 16; ++i) + ; +#pragma omp parallel +// expected-error@+1 {{expected expression}} +#pragma omp master taskloop firstprivate(int) + for (i = 0; i < 16; ++i) + ; +#pragma omp parallel +// expected-error@+1 {{expected variable name}} +#pragma omp master taskloop firstprivate(0) + for (i = 0; i < 16; ++i) + ; + + int x, y, z; +#pragma omp parallel +#pragma omp master taskloop lastprivate(x) firstprivate(x) + for (i = 0; i < 16; ++i) + ; +#pragma omp parallel +#pragma omp master taskloop lastprivate(x, y) firstprivate(x, y) + for (i = 0; i < 16; ++i) + ; +#pragma omp parallel +#pragma omp master taskloop lastprivate(x, y, z) firstprivate(x, y, z) + for (i = 0; i < 16; ++i) + ; +} + +void test_loop_messages() { + float a[100], b[100], c[100]; +#pragma omp parallel +// expected-error@+2 {{variable must be of integer or pointer type}} +#pragma omp master taskloop + for (float fi = 0; fi < 10.0; fi++) { + c[(int)fi] = a[(int)fi] + b[(int)fi]; + } +#pragma omp parallel +// expected-error@+2 {{variable must be of integer or pointer type}} +#pragma omp master taskloop + for (double fi = 0; fi < 10.0; fi++) { + c[(int)fi] = a[(int)fi] + b[(int)fi]; + } + + // expected-warning@+2 {{OpenMP loop iteration variable cannot have more than 64 bits size and will be narrowed}} + #pragma omp master taskloop + for (__int128 ii = 0; ii < 10; ii++) { + c[ii] = a[ii] + b[ii]; + } +} + diff --git a/clang/test/OpenMP/master_taskloop_num_tasks_messages.cpp b/clang/test/OpenMP/master_taskloop_num_tasks_messages.cpp new file mode 100644 index 0000000000000..0675fc6632911 --- /dev/null +++ b/clang/test/OpenMP/master_taskloop_num_tasks_messages.cpp @@ -0,0 +1,103 @@ +// RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 %s -Wuninitialized + +// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 %s -Wuninitialized + +void foo() { +} + +bool foobool(int argc) { + return argc; +} + +struct S1; // expected-note {{declared here}} + +template // expected-note {{declared here}} +int tmain(T argc, S **argv) { + T z; + #pragma omp master taskloop num_tasks // expected-error {{expected '(' after 'num_tasks'}} + for (int i = 0; i < 10; ++i) + foo(); + #pragma omp master taskloop num_tasks ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}} + for (int i = 0; i < 10; ++i) + foo(); + #pragma omp master taskloop num_tasks () // expected-error {{expected expression}} + for (int i = 0; i < 10; ++i) + foo(); + #pragma omp master taskloop num_tasks (argc // expected-error {{expected ')'}} expected-note {{to match this '('}} + for (int i = 0; i < 10; ++i) + foo(); + #pragma omp master taskloop num_tasks (argc)) // expected-warning {{extra tokens at the end of '#pragma omp master taskloop' are ignored}} + for (int i = 0; i < 10; ++i) + foo(); + #pragma omp master taskloop num_tasks (argc > 0 ? argv[1][0] : argv[2][argc] + z) + for (int i = 0; i < 10; ++i) + foo(); + #pragma omp master taskloop num_tasks (foobool(argc)), num_tasks (true) // expected-error {{directive '#pragma omp master taskloop' cannot contain more than one 'num_tasks' clause}} + for (int i = 0; i < 10; ++i) + foo(); + #pragma omp master taskloop num_tasks (S) // expected-error {{'S' does not refer to a value}} + for (int i = 0; i < 10; ++i) + foo(); + #pragma omp master taskloop num_tasks (argc argc) // expected-error {{expected ')'}} expected-note {{to match this '('}} + for (int i = 0; i < 10; ++i) + foo(); + #pragma omp master taskloop num_tasks(0) // expected-error {{argument to 'num_tasks' clause must be a strictly positive integer value}} + for (int i = 0; i < 10; ++i) + foo(); + #pragma omp master taskloop num_tasks(-1) // expected-error {{argument to 'num_tasks' clause must be a strictly positive integer value}} + for (int i = 0; i < 10; ++i) + foo(); + #pragma omp master taskloop num_tasks(argc) grainsize(argc) // expected-error {{'grainsize' and 'num_tasks' clause are mutually exclusive and may not appear on the same directive}} expected-note {{'num_tasks' clause is specified here}} + for (int i = 0; i < 10; ++i) + foo(); + + return 0; +} + +int main(int argc, char **argv) { + int z; + #pragma omp master taskloop num_tasks // expected-error {{expected '(' after 'num_tasks'}} + for (int i = 0; i < 10; ++i) + foo(); + #pragma omp master taskloop num_tasks ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}} + for (int i = 0; i < 10; ++i) + foo(); + #pragma omp master taskloop num_tasks () // expected-error {{expected expression}} + for (int i = 0; i < 10; ++i) + foo(); + #pragma omp master taskloop num_tasks (argc // expected-error {{expected ')'}} expected-note {{to match this '('}} + for (int i = 0; i < 10; ++i) + foo(); + #pragma omp master taskloop num_tasks (argc)) // expected-warning {{extra tokens at the end of '#pragma omp master taskloop' are ignored}} + for (int i = 0; i < 10; ++i) + foo(); + #pragma omp master taskloop num_tasks (argc > 0 ? argv[1][0] : argv[2][argc] - z) + for (int i = 0; i < 10; ++i) + foo(); + #pragma omp master taskloop num_tasks (foobool(argc)), num_tasks (true) // expected-error {{directive '#pragma omp master taskloop' cannot contain more than one 'num_tasks' clause}} + for (int i = 0; i < 10; ++i) + foo(); + #pragma omp master taskloop num_tasks (S1) // expected-error {{'S1' does not refer to a value}} + for (int i = 0; i < 10; ++i) + foo(); + #pragma omp master taskloop num_tasks (argc argc) // expected-error {{expected ')'}} expected-note {{to match this '('}} + for (int i = 0; i < 10; ++i) + foo(); + #pragma omp master taskloop num_tasks (1 0) // expected-error {{expected ')'}} expected-note {{to match this '('}} + for (int i = 0; i < 10; ++i) + foo(); + #pragma omp master taskloop num_tasks(if(tmain(argc, argv) // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}} + for (int i = 0; i < 10; ++i) + foo(); + #pragma omp master taskloop num_tasks(0) // expected-error {{argument to 'num_tasks' clause must be a strictly positive integer value}} + for (int i = 0; i < 10; ++i) + foo(); + #pragma omp master taskloop num_tasks(-1) // expected-error {{argument to 'num_tasks' clause must be a strictly positive integer value}} + for (int i = 0; i < 10; ++i) + foo(); + #pragma omp master taskloop num_tasks(argc) grainsize(argc) // expected-error {{'grainsize' and 'num_tasks' clause are mutually exclusive and may not appear on the same directive}} expected-note {{'num_tasks' clause is specified here}} + for (int i = 0; i < 10; ++i) + foo(); + + return tmain(argc, argv); +} diff --git a/clang/test/OpenMP/master_taskloop_priority_messages.cpp b/clang/test/OpenMP/master_taskloop_priority_messages.cpp new file mode 100644 index 0000000000000..63c3d5c2f32d2 --- /dev/null +++ b/clang/test/OpenMP/master_taskloop_priority_messages.cpp @@ -0,0 +1,97 @@ +// RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 %s -Wuninitialized + +// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 %s -Wuninitialized + +void foo() { +} + +bool foobool(int argc) { + return argc; +} + +struct S1; // expected-note {{declared here}} + +template // expected-note {{declared here}} +int tmain(T argc, S **argv) { + T z; + #pragma omp master taskloop priority // expected-error {{expected '(' after 'priority'}} + for (int i = 0; i < 10; ++i) + foo(); + #pragma omp master taskloop priority ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}} + for (int i = 0; i < 10; ++i) + foo(); + #pragma omp master taskloop priority () // expected-error {{expected expression}} + for (int i = 0; i < 10; ++i) + foo(); + #pragma omp master taskloop priority (argc // expected-error {{expected ')'}} expected-note {{to match this '('}} + for (int i = 0; i < 10; ++i) + foo(); + #pragma omp master taskloop priority (argc)) // expected-warning {{extra tokens at the end of '#pragma omp master taskloop' are ignored}} + for (int i = 0; i < 10; ++i) + foo(); + #pragma omp master taskloop priority (argc > 0 ? argv[1][0] : argv[2][argc] + z) + for (int i = 0; i < 10; ++i) + foo(); + #pragma omp master taskloop priority (foobool(argc)), priority (true) // expected-error {{directive '#pragma omp master taskloop' cannot contain more than one 'priority' clause}} + for (int i = 0; i < 10; ++i) + foo(); + #pragma omp master taskloop priority (S) // expected-error {{'S' does not refer to a value}} + for (int i = 0; i < 10; ++i) + foo(); + #pragma omp master taskloop priority (argc argc) // expected-error {{expected ')'}} expected-note {{to match this '('}} + for (int i = 0; i < 10; ++i) + foo(); + #pragma omp master taskloop priority(0) + for (int i = 0; i < 10; ++i) + foo(); + #pragma omp master taskloop priority(-1) // expected-error {{argument to 'priority' clause must be a non-negative integer value}} + for (int i = 0; i < 10; ++i) + foo(); + + return 0; +} + +int main(int argc, char **argv) { + int z; + #pragma omp master taskloop priority // expected-error {{expected '(' after 'priority'}} + for (int i = 0; i < 10; ++i) + foo(); + #pragma omp master taskloop priority ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}} + for (int i = 0; i < 10; ++i) + foo(); + #pragma omp master taskloop priority () // expected-error {{expected expression}} + for (int i = 0; i < 10; ++i) + foo(); + #pragma omp master taskloop priority (argc // expected-error {{expected ')'}} expected-note {{to match this '('}} + for (int i = 0; i < 10; ++i) + foo(); + #pragma omp master taskloop priority (argc)) // expected-warning {{extra tokens at the end of '#pragma omp master taskloop' are ignored}} + for (int i = 0; i < 10; ++i) + foo(); + #pragma omp master taskloop priority (argc > 0 ? argv[1][0] : argv[2][argc] - z) + for (int i = 0; i < 10; ++i) + foo(); + #pragma omp master taskloop priority (foobool(argc)), priority (true) // expected-error {{directive '#pragma omp master taskloop' cannot contain more than one 'priority' clause}} + for (int i = 0; i < 10; ++i) + foo(); + #pragma omp master taskloop priority (S1) // expected-error {{'S1' does not refer to a value}} + for (int i = 0; i < 10; ++i) + foo(); + #pragma omp master taskloop priority (argc argc) // expected-error {{expected ')'}} expected-note {{to match this '('}} + for (int i = 0; i < 10; ++i) + foo(); + #pragma omp master taskloop priority (1 0) // expected-error {{expected ')'}} expected-note {{to match this '('}} + for (int i = 0; i < 10; ++i) + foo(); + #pragma omp master taskloop priority(if(tmain(argc, argv) // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}} + for (int i = 0; i < 10; ++i) + foo(); + #pragma omp master taskloop priority(0) + for (int i = 0; i < 10; ++i) + foo(); + #pragma omp master taskloop priority(-1) // expected-error {{argument to 'priority' clause must be a non-negative integer value}} + for (int i = 0; i < 10; ++i) + foo(); + + return tmain(argc, argv); +} diff --git a/clang/test/OpenMP/master_taskloop_private_codegen.cpp b/clang/test/OpenMP/master_taskloop_private_codegen.cpp new file mode 100644 index 0000000000000..5d3ad2a0ed94f --- /dev/null +++ b/clang/test/OpenMP/master_taskloop_private_codegen.cpp @@ -0,0 +1,432 @@ +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-apple-darwin10 -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -DLAMBDA -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck -check-prefix=LAMBDA %s +// RUN: %clang_cc1 -verify -fopenmp -x c++ -fblocks -DBLOCKS -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck -check-prefix=BLOCKS %s +// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -DARRAY -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck -check-prefix=ARRAY %s + +// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple x86_64-apple-darwin10 -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple x86_64-apple-darwin10 -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -DLAMBDA -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -fblocks -DBLOCKS -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -DARRAY -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// SIMD-ONLY0-NOT: {{__kmpc|__tgt}} +// expected-no-diagnostics + +#ifndef ARRAY +#ifndef HEADER +#define HEADER + +template +struct S { + T f; + S(T a) : f(a) {} + S() : f() {} + operator T() { return T(); } + ~S() {} +}; + +volatile double g; + +// CHECK-DAG: [[KMP_TASK_T_TY:%.+]] = type { i8*, i32 (i32, i8*)*, i32, %union{{.+}}, %union{{.+}}, i64, i64, i64, i32, i8* } +// CHECK-DAG: [[S_DOUBLE_TY:%.+]] = type { double } +// CHECK-DAG: [[CAP_MAIN_TY:%.+]] = type { i8 } +// CHECK-DAG: [[PRIVATES_MAIN_TY:%.+]] = type {{.?}}{ [2 x [[S_DOUBLE_TY]]], [[S_DOUBLE_TY]], i32, [2 x i32] +// CHECK-DAG: [[KMP_TASK_MAIN_TY:%.+]] = type { [[KMP_TASK_T_TY]], [[PRIVATES_MAIN_TY]] } +// CHECK-DAG: [[S_INT_TY:%.+]] = type { i32 } +// CHECK-DAG: [[CAP_TMAIN_TY:%.+]] = type { i8 } +// CHECK-DAG: [[PRIVATES_TMAIN_TY:%.+]] = type { i32, [2 x i32], [2 x [[S_INT_TY]]], [[S_INT_TY]], [104 x i8] } +// CHECK-DAG: [[KMP_TASK_TMAIN_TY:%.+]] = type { [[KMP_TASK_T_TY]], [{{[0-9]+}} x i8], [[PRIVATES_TMAIN_TY]] } +template +T tmain() { + S test; + T t_var __attribute__((aligned(128))) = T(); + T vec[] = {1, 2}; + S s_arr[] = {1, 2}; + S var(3); +#pragma omp master taskloop private(t_var, vec, s_arr, s_arr, var, var) + for (int i = 0; i < 10; ++i) { + vec[0] = t_var; + s_arr[0] = var; + } + return T(); +} + +int main() { + static int sivar; +#ifdef LAMBDA + // LAMBDA: [[G:@.+]] = global double + // LAMBDA-LABEL: @main + // LAMBDA: call{{( x86_thiscallcc)?}} void [[OUTER_LAMBDA:@.+]]( + [&]() { + // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]]( + // LAMBDA: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%{{[^ ]+}} @{{[^,]+}}, i32 %{{[^,]+}}, i32 1, i64 96, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %{{[^*]+}}*)* [[TASK_ENTRY:@[^ ]+]] to i32 (i32, i8*)*)) +// LAMBDA: [[PRIVATES:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i{{.+}} 0, i{{.+}} 1 +// LAMBDA: call void @__kmpc_taskloop(%{{.+}}* @{{.+}}, i32 %{{.+}}, i8* [[RES]], i32 1, i64* %{{.+}}, i64* %{{.+}}, i64 %{{.+}}, i32 1, i32 0, i64 0, i8* null) +// LAMBDA: ret +#pragma omp master taskloop private(g, sivar) + for (int i = 0; i < 10; ++i) { + // LAMBDA: define {{.+}} void [[INNER_LAMBDA:@.+]](%{{.+}}* [[ARG_PTR:%.+]]) + // LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]], + // LAMBDA: [[ARG_PTR:%.+]] = load %{{.+}}*, %{{.+}}** [[ARG_PTR_REF]] + // LAMBDA: [[G_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 + // LAMBDA: [[G_REF:%.+]] = load double*, double** [[G_PTR_REF]] + // LAMBDA: store double 2.0{{.+}}, double* [[G_REF]] + // LAMBDA: [[SIVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 1 + // LAMBDA: [[SIVAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SIVAR_PTR_REF]] + // LAMBDA: store i{{[0-9]+}} 3, i{{[0-9]+}}* [[SIVAR_REF]] + + // LAMBDA: define internal i32 [[TASK_ENTRY]](i32 %0, %{{.+}}* noalias %1) + g = 1; + sivar = 2; + // LAMBDA: store double 1.0{{.+}}, double* %{{.+}}, + // LAMBDA: store i{{[0-9]+}} 2, i{{[0-9]+}}* %{{.+}}, + // LAMBDA: call void [[INNER_LAMBDA]](% + // LAMBDA: ret + [&]() { + g = 2; + sivar = 3; + }(); + } + }(); + return 0; +#elif defined(BLOCKS) + // BLOCKS: [[G:@.+]] = global double + // BLOCKS-LABEL: @main + // BLOCKS: call void {{%.+}}(i8 + ^{ + // BLOCKS: define{{.*}} internal{{.*}} void {{.+}}(i8* + // BLOCKS: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%{{[^ ]+}} @{{[^,]+}}, i32 %{{[^,]+}}, i32 1, i64 96, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %{{[^*]+}}*)* [[TASK_ENTRY:@[^ ]+]] to i32 (i32, i8*)*)) + // BLOCKS: [[PRIVATES:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i{{.+}} 0, i{{.+}} 1 + // BLOCKS: call void @__kmpc_taskloop(%{{.+}}* @{{.+}}, i32 %{{.+}}, i8* [[RES]], i32 1, i64* %{{.+}}, i64* %{{.+}}, i64 %{{.+}}, i32 1, i32 0, i64 0, i8* null) + // BLOCKS: ret +#pragma omp master taskloop private(g, sivar) + for (int i = 0; i < 10; ++i) { + // BLOCKS: define {{.+}} void {{@.+}}(i8* + // BLOCKS-NOT: [[G]]{{[[^:word:]]}} + // BLOCKS: store double 2.0{{.+}}, double* + // BLOCKS-NOT: [[G]]{{[[^:word:]]}} + // BLOCKS-NOT: [[SIVAR]]{{[[^:word:]]}} + // BLOCKS: store i{{[0-9]+}} 4, i{{[0-9]+}}* + // BLOCKS-NOT: [[SIVAR]]{{[[^:word:]]}} + // BLOCKS: ret + + // BLOCKS: define internal i32 [[TASK_ENTRY]](i32 %0, %{{.+}}* noalias %1) + g = 1; + sivar = 3; + // BLOCKS: store double 1.0{{.+}}, double* %{{.+}}, + // BLOCKS-NOT: [[G]]{{[[^:word:]]}} + // BLOCKS: store i{{[0-9]+}} 3, i{{[0-9]+}}* %{{.+}}, + // BLOCKS-NOT: [[SIVAR]]{{[[^:word:]]}} + // BLOCKS: call void {{%.+}}(i8 + ^{ + g = 2; + sivar = 4; + }(); + } + }(); + return 0; +#else + S test; + int t_var = 0; + int vec[] = {1, 2}; + S s_arr[] = {1, 2}; + S var(3); +#pragma omp master taskloop private(var, t_var, s_arr, vec, s_arr, var, sivar) + for (int i = 0; i < 10; ++i) { + vec[0] = t_var; + s_arr[0] = var; + sivar = 8; + } +#pragma omp task + g+=1; + return tmain(); +#endif +} + +// CHECK: define i{{[0-9]+}} @main() +// CHECK: [[TEST:%.+]] = alloca [[S_DOUBLE_TY]], +// CHECK: [[T_VAR_ADDR:%.+]] = alloca i32, +// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i32], +// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_DOUBLE_TY]]], +// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_DOUBLE_TY]], +// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num([[LOC:%.+]]) + +// CHECK: call {{.*}} [[S_DOUBLE_TY_DEF_CONSTR:@.+]]([[S_DOUBLE_TY]]* [[TEST]]) + +// CHECK: [[RES:%.+]] = call {{.*}}i32 @__kmpc_master( +// CHECK-NEXT: [[IS_MASTER:%.+]] = icmp ne i32 [[RES]], 0 +// CHECK-NEXT: br i1 [[IS_MASTER]], label {{%?}}[[THEN:.+]], label {{%?}}[[EXIT:.+]] +// CHECK: [[THEN]] +// Do not store original variables in capture struct. +// CHECK-NOT: getelementptr inbounds [[CAP_MAIN_TY]], + +// Allocate task. +// Returns struct kmp_task_t { +// [[KMP_TASK_T_TY]] task_data; +// [[KMP_TASK_MAIN_TY]] privates; +// }; +// CHECK: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc([[LOC]], i32 [[GTID]], i32 9, i64 120, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_MAIN_TY]]*)* [[TASK_ENTRY:@[^ ]+]] to i32 (i32, i8*)*)) +// CHECK: [[RES_KMP_TASK:%.+]] = bitcast i8* [[RES]] to [[KMP_TASK_MAIN_TY]]* + +// CHECK: [[TASK:%.+]] = getelementptr inbounds [[KMP_TASK_MAIN_TY]], [[KMP_TASK_MAIN_TY]]* [[RES_KMP_TASK]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 +// Initialize kmp_task_t->privates with default values (no init for simple types, default constructors for classes). +// Also copy address of private copy to the corresponding shareds reference. +// CHECK: [[PRIVATES:%.+]] = getelementptr inbounds [[KMP_TASK_MAIN_TY]], [[KMP_TASK_MAIN_TY]]* [[RES_KMP_TASK]], i{{[0-9]+}} 0, i{{[0-9]+}} 1 + +// Constructors for s_arr and var. +// a_arr; +// CHECK: [[PRIVATE_S_ARR_REF:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 +// CHECK: getelementptr inbounds [2 x [[S_DOUBLE_TY]]], [2 x [[S_DOUBLE_TY]]]* [[PRIVATE_S_ARR_REF]], i{{.+}} 0, i{{.+}} 0 +// CHECK: getelementptr inbounds [[S_DOUBLE_TY]], [[S_DOUBLE_TY]]* %{{.+}}, i{{.+}} 2 +// CHECK: call void [[S_DOUBLE_TY_DEF_CONSTR]]([[S_DOUBLE_TY]]* [[S_ARR_CUR:%.+]]) +// CHECK: getelementptr inbounds [[S_DOUBLE_TY]], [[S_DOUBLE_TY]]* [[S_ARR_CUR]], i{{.+}} 1 +// CHECK: icmp eq +// CHECK: br i1 + +// var; +// CHECK: [[PRIVATE_VAR_REF:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 1 +// CHECK: call void [[S_DOUBLE_TY_DEF_CONSTR]]([[S_DOUBLE_TY]]* [[PRIVATE_VAR_REF:%.+]]) + +// Provide pointer to destructor function, which will destroy private variables at the end of the task. +// CHECK: [[DESTRUCTORS_REF:%.+]] = getelementptr inbounds [[KMP_TASK_T_TY]], [[KMP_TASK_T_TY]]* [[TASK]], i{{.+}} 0, i{{.+}} 3 +// CHECK: [[DESTRUCTORS_PTR:%.+]] = bitcast %union{{.+}}* [[DESTRUCTORS_REF]] to i32 (i32, i8*)** +// CHECK: store i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_MAIN_TY]]*)* [[DESTRUCTORS:@.+]] to i32 (i32, i8*)*), i32 (i32, i8*)** [[DESTRUCTORS_PTR]], + +// Start task. +// CHECK: call void @__kmpc_taskloop([[LOC]], i32 [[GTID]], i8* [[RES]], i32 1, i64* %{{.+}}, i64* %{{.+}}, i64 %{{.+}}, i32 1, i32 0, i64 0, i8* bitcast (void ([[KMP_TASK_MAIN_TY]]*, [[KMP_TASK_MAIN_TY]]*, i32)* [[MAIN_DUP:@.+]] to i8*)) +// CHECK: call {{.*}}void @__kmpc_end_master( +// CHECK-NEXT: br label {{%?}}[[EXIT]] +// CHECK: [[EXIT]] +// CHECK: call i32 @__kmpc_omp_task([[LOC]], i32 [[GTID]], i8* + +// CHECK: = call i{{.+}} [[TMAIN_INT:@.+]]() + +// No destructors must be called for private copies of s_arr and var. +// CHECK-NOT: getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 2 +// CHECK-NOT: getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 3 +// CHECK: call void [[S_DOUBLE_TY_DESTR:@.+]]([[S_DOUBLE_TY]]* +// CHECK-NOT: getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 2 +// CHECK-NOT: getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 3 +// CHECK: ret +// + +// CHECK: define internal void [[PRIVATES_MAP_FN:@.+]]([[PRIVATES_MAIN_TY]]* noalias %0, [[S_DOUBLE_TY]]** noalias %1, i32** noalias %2, [2 x [[S_DOUBLE_TY]]]** noalias %3, [2 x i32]** noalias %4, i32** noalias %5) +// CHECK: [[PRIVATES:%.+]] = load [[PRIVATES_MAIN_TY]]*, [[PRIVATES_MAIN_TY]]** +// CHECK: [[PRIV_S_VAR:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i32 0, i32 0 +// CHECK: [[ARG3:%.+]] = load [2 x [[S_DOUBLE_TY]]]**, [2 x [[S_DOUBLE_TY]]]*** %{{.+}}, +// CHECK: store [2 x [[S_DOUBLE_TY]]]* [[PRIV_S_VAR]], [2 x [[S_DOUBLE_TY]]]** [[ARG3]], +// CHECK: [[PRIV_VAR:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i32 0, i32 1 +// CHECK: [[ARG1:%.+]] = load [[S_DOUBLE_TY]]**, [[S_DOUBLE_TY]]*** {{.+}}, +// CHECK: store [[S_DOUBLE_TY]]* [[PRIV_VAR]], [[S_DOUBLE_TY]]** [[ARG1]], +// CHECK: [[PRIV_T_VAR:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i32 0, i32 2 +// CHECK: [[ARG2:%.+]] = load i32**, i32*** %{{.+}}, +// CHECK: store i32* [[PRIV_T_VAR]], i32** [[ARG2]], +// CHECK: [[PRIV_VEC:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i32 0, i32 3 +// CHECK: [[ARG4:%.+]] = load [2 x i32]**, [2 x i32]*** %{{.+}}, +// CHECK: store [2 x i32]* [[PRIV_VEC]], [2 x i32]** [[ARG4]], +// CHECK: ret void + +// CHECK: define internal i32 [[TASK_ENTRY]](i32 %0, [[KMP_TASK_MAIN_TY]]* noalias %1) + +// CHECK: [[PRIV_VAR_ADDR:%.+]] = alloca [[S_DOUBLE_TY]]*, +// CHECK: [[PRIV_T_VAR_ADDR:%.+]] = alloca i32*, +// CHECK: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_DOUBLE_TY]]]*, +// CHECK: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*, +// CHECK: [[PRIV_SIVAR_ADDR:%.+]] = alloca i32*, +// CHECK: store void (i8*, ...)* bitcast (void ([[PRIVATES_MAIN_TY]]*, [[S_DOUBLE_TY]]**, i32**, [2 x [[S_DOUBLE_TY]]]**, [2 x i32]**, i32**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), void (i8*, ...)** [[MAP_FN_ADDR:%.+]], +// CHECK: [[MAP_FN:%.+]] = load void (i8*, ...)*, void (i8*, ...)** [[MAP_FN_ADDR]], +// CHECK: call void (i8*, ...) [[MAP_FN]](i8* %{{.+}}, [[S_DOUBLE_TY]]** [[PRIV_VAR_ADDR]], i32** [[PRIV_T_VAR_ADDR]], [2 x [[S_DOUBLE_TY]]]** [[PRIV_S_ARR_ADDR]], [2 x i32]** [[PRIV_VEC_ADDR]], i32** [[PRIV_SIVAR_ADDR]]) +// CHECK: [[PRIV_VAR:%.+]] = load [[S_DOUBLE_TY]]*, [[S_DOUBLE_TY]]** [[PRIV_VAR_ADDR]], +// CHECK: [[PRIV_T_VAR:%.+]] = load i32*, i32** [[PRIV_T_VAR_ADDR]], +// CHECK: [[PRIV_S_ARR:%.+]] = load [2 x [[S_DOUBLE_TY]]]*, [2 x [[S_DOUBLE_TY]]]** [[PRIV_S_ARR_ADDR]], +// CHECK: [[PRIV_VEC:%.+]] = load [2 x i32]*, [2 x i32]** [[PRIV_VEC_ADDR]], +// CHECK: [[PRIV_SIVAR:%.+]] = load i32*, i32** [[PRIV_SIVAR_ADDR]], + +// Privates actually are used. +// CHECK-DAG: [[PRIV_VAR]] +// CHECK-DAG: [[PRIV_T_VAR]] +// CHECK-DAG: [[PRIV_S_ARR]] +// CHECK-DAG: [[PRIV_VEC]] +// CHECK_DAG: [[PRIV_SIVAR]] + +// CHECK: ret + +// CHECK: define internal void [[MAIN_DUP]]([[KMP_TASK_MAIN_TY]]* %0, [[KMP_TASK_MAIN_TY]]* %1, i32 %2) +// CHECK: getelementptr inbounds [[KMP_TASK_MAIN_TY]], [[KMP_TASK_MAIN_TY]]* %{{.+}}, i32 0, i32 1 +// CHECK: getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* %{{.+}}, i32 0, i32 0 +// CHECK: getelementptr inbounds [2 x [[S_DOUBLE_TY]]], [2 x [[S_DOUBLE_TY]]]* %{{.+}}, i32 0, i32 0 +// CHECK: getelementptr inbounds [[S_DOUBLE_TY]], [[S_DOUBLE_TY]]* %{{.+}}, i64 2 +// CHECK: br label % + +// CHECK: phi [[S_DOUBLE_TY]]* +// CHECK: call {{.*}} [[S_DOUBLE_TY_DEF_CONSTR]]([[S_DOUBLE_TY]]* +// CHECK: getelementptr inbounds [[S_DOUBLE_TY]], [[S_DOUBLE_TY]]* %{{.+}}, i64 1 +// CHECK: icmp eq [[S_DOUBLE_TY]]* % +// CHECK: br i1 % + +// CHECK: getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* %{{.+}}, i32 0, i32 1 +// CHECK: call {{.*}} [[S_DOUBLE_TY_DEF_CONSTR]]([[S_DOUBLE_TY]]* +// CHECK: ret void + +// CHECK: define internal i32 [[DESTRUCTORS]](i32 %0, [[KMP_TASK_MAIN_TY]]* noalias %1) +// CHECK: [[PRIVATES:%.+]] = getelementptr inbounds [[KMP_TASK_MAIN_TY]], [[KMP_TASK_MAIN_TY]]* [[RES_KMP_TASK:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 1 +// CHECK: [[PRIVATE_S_ARR_REF:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 0 +// CHECK: [[PRIVATE_VAR_REF:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 1 +// CHECK: call void [[S_DOUBLE_TY_DESTR]]([[S_DOUBLE_TY]]* [[PRIVATE_VAR_REF]]) +// CHECK: getelementptr inbounds [2 x [[S_DOUBLE_TY]]], [2 x [[S_DOUBLE_TY]]]* [[PRIVATE_S_ARR_REF]], i{{.+}} 0, i{{.+}} 0 +// CHECK: getelementptr inbounds [[S_DOUBLE_TY]], [[S_DOUBLE_TY]]* %{{.+}}, i{{.+}} 2 +// CHECK: [[PRIVATE_S_ARR_ELEM_REF:%.+]] = getelementptr inbounds [[S_DOUBLE_TY]], [[S_DOUBLE_TY]]* %{{.+}}, i{{.+}} -1 +// CHECK: call void [[S_DOUBLE_TY_DESTR]]([[S_DOUBLE_TY]]* [[PRIVATE_S_ARR_ELEM_REF]]) +// CHECK: icmp eq +// CHECK: br i1 +// CHECK: ret i32 + +// CHECK: define {{.*}} i{{[0-9]+}} [[TMAIN_INT]]() +// CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]], +// CHECK: [[T_VAR_ADDR:%.+]] = alloca i32, +// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i32], +// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]], +// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_INT_TY]], +// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num([[LOC:%.+]]) + +// CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]]) + +// Do not store original variables in capture struct. +// CHECK-NOT: getelementptr inbounds [[CAP_TMAIN_TY]], + +// Allocate task. +// Returns struct kmp_task_t { +// [[KMP_TASK_T_TY]] task_data; +// [[KMP_TASK_TMAIN_TY]] privates; +// }; +// CHECK: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc([[LOC]], i32 [[GTID]], i32 9, i64 256, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_TMAIN_TY]]*)* [[TASK_ENTRY:@[^ ]+]] to i32 (i32, i8*)*)) +// CHECK: [[RES_KMP_TASK:%.+]] = bitcast i8* [[RES]] to [[KMP_TASK_TMAIN_TY]]* + +// CHECK: [[TASK:%.+]] = getelementptr inbounds [[KMP_TASK_TMAIN_TY]], [[KMP_TASK_TMAIN_TY]]* [[RES_KMP_TASK]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 + +// Initialize kmp_task_t->privates with default values (no init for simple types, default constructors for classes). +// CHECK: [[PRIVATES:%.+]] = getelementptr inbounds [[KMP_TASK_TMAIN_TY]], [[KMP_TASK_TMAIN_TY]]* [[RES_KMP_TASK]], i{{[0-9]+}} 0, i{{[0-9]+}} 2 + +// Constructors for s_arr and var. +// a_arr; +// CHECK: [[PRIVATE_S_ARR_REF:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i{{[0-9]+}} 0, i{{[0-9]+}} 2 +// CHECK: getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[PRIVATE_S_ARR_REF]], i{{.+}} 0, i{{.+}} 0 +// CHECK: getelementptr inbounds [[S_INT_TY]], [[S_INT_TY]]* %{{.+}}, i{{.+}} 2 +// CHECK: call void [[S_INT_TY_DEF_CONSTR]]([[S_INT_TY]]* [[S_ARR_CUR:%.+]]) +// CHECK: getelementptr inbounds [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_CUR]], i{{.+}} 1 +// CHECK: icmp eq +// CHECK: br i1 + +// var; +// CHECK: [[PRIVATE_VAR_REF:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 3 +// CHECK: call void [[S_INT_TY_DEF_CONSTR]]([[S_INT_TY]]* [[PRIVATE_VAR_REF:%.+]]) + +// Provide pointer to destructor function, which will destroy private variables at the end of the task. +// CHECK: [[DESTRUCTORS_REF:%.+]] = getelementptr inbounds [[KMP_TASK_T_TY]], [[KMP_TASK_T_TY]]* [[TASK]], i{{.+}} 0, i{{.+}} 3 +// CHECK: [[DESTRUCTORS_PTR:%.+]] = bitcast %union{{.+}}* [[DESTRUCTORS_REF]] to i32 (i32, i8*)** +// CHECK: store i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_TMAIN_TY]]*)* [[DESTRUCTORS:@.+]] to i32 (i32, i8*)*), i32 (i32, i8*)** [[DESTRUCTORS_PTR]], + +// Start task. +// CHECK: call void @__kmpc_taskloop([[LOC]], i32 [[GTID]], i8* [[RES]], i32 1, i64* %{{.+}}, i64* %{{.+}}, i64 %{{.+}}, i32 1, i32 0, i64 0, i8* bitcast (void ([[KMP_TASK_TMAIN_TY]]*, [[KMP_TASK_TMAIN_TY]]*, i32)* [[TMAIN_DUP:@.+]] to i8*)) + +// No destructors must be called for private copies of s_arr and var. +// CHECK-NOT: getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 2 +// CHECK-NOT: getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 3 +// CHECK: call void [[S_INT_TY_DESTR:@.+]]([[S_INT_TY]]* +// CHECK-NOT: getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 2 +// CHECK-NOT: getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 3 +// CHECK: ret +// + +// CHECK: define internal void [[PRIVATES_MAP_FN:@.+]]([[PRIVATES_TMAIN_TY]]* noalias %0, i32** noalias %1, [2 x i32]** noalias %2, [2 x [[S_INT_TY]]]** noalias %3, [[S_INT_TY]]** noalias %4) +// CHECK: [[PRIVATES:%.+]] = load [[PRIVATES_TMAIN_TY]]*, [[PRIVATES_TMAIN_TY]]** +// CHECK: [[PRIV_T_VAR:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i32 0, i32 0 +// CHECK: [[ARG1:%.+]] = load i32**, i32*** %{{.+}}, +// CHECK: store i32* [[PRIV_T_VAR]], i32** [[ARG1]], +// CHECK: [[PRIV_VEC:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i32 0, i32 1 +// CHECK: [[ARG2:%.+]] = load [2 x i32]**, [2 x i32]*** %{{.+}}, +// CHECK: store [2 x i32]* [[PRIV_VEC]], [2 x i32]** [[ARG2]], +// CHECK: [[PRIV_S_VAR:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i32 0, i32 2 +// CHECK: [[ARG3:%.+]] = load [2 x [[S_INT_TY]]]**, [2 x [[S_INT_TY]]]*** %{{.+}}, +// CHECK: store [2 x [[S_INT_TY]]]* [[PRIV_S_VAR]], [2 x [[S_INT_TY]]]** [[ARG3]], +// CHECK: [[PRIV_VAR:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i32 0, i32 3 +// CHECK: [[ARG4:%.+]] = load [[S_INT_TY]]**, [[S_INT_TY]]*** {{.+}}, +// CHECK: store [[S_INT_TY]]* [[PRIV_VAR]], [[S_INT_TY]]** [[ARG4]], +// CHECK: ret void + +// CHECK: define internal i32 [[TASK_ENTRY]](i32 %0, [[KMP_TASK_TMAIN_TY]]* noalias %1) + +// CHECK: alloca i32*, +// CHECK-DAG: [[PRIV_T_VAR_ADDR:%.+]] = alloca i32*, +// CHECK-DAG: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*, +// CHECK-DAG: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*, +// CHECK-DAG: [[PRIV_VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*, +// CHECK: store void (i8*, ...)* bitcast (void ([[PRIVATES_TMAIN_TY]]*, i32**, [2 x i32]**, [2 x [[S_INT_TY]]]**, [[S_INT_TY]]**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), void (i8*, ...)** [[MAP_FN_ADDR:%.+]], +// CHECK: [[MAP_FN:%.+]] = load void (i8*, ...)*, void (i8*, ...)** [[MAP_FN_ADDR]], +// CHECK: call void (i8*, ...) [[MAP_FN]](i8* %{{.+}}, i32** [[PRIV_T_VAR_ADDR]], [2 x i32]** [[PRIV_VEC_ADDR]], [2 x [[S_INT_TY]]]** [[PRIV_S_ARR_ADDR]], [[S_INT_TY]]** [[PRIV_VAR_ADDR]]) +// CHECK: [[PRIV_T_VAR:%.+]] = load i32*, i32** [[PRIV_T_VAR_ADDR]], +// CHECK: [[PRIV_VEC:%.+]] = load [2 x i32]*, [2 x i32]** [[PRIV_VEC_ADDR]], +// CHECK: [[PRIV_S_ARR:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** [[PRIV_S_ARR_ADDR]], +// CHECK: [[PRIV_VAR:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[PRIV_VAR_ADDR]], + +// Privates actually are used. +// CHECK-DAG: [[PRIV_VAR]] +// CHECK-DAG: [[PRIV_T_VAR]] +// CHECK-DAG: [[PRIV_S_ARR]] +// CHECK-DAG: [[PRIV_VEC]] + +// CHECK: ret + +// CHECK: define internal void [[TMAIN_DUP]]([[KMP_TASK_TMAIN_TY]]* %0, [[KMP_TASK_TMAIN_TY]]* %1, i32 %2) +// CHECK: getelementptr inbounds [[KMP_TASK_TMAIN_TY]], [[KMP_TASK_TMAIN_TY]]* %{{.+}}, i32 0, i32 2 +// CHECK: getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* %{{.+}}, i32 0, i32 2 +// CHECK: getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* %{{.+}}, i32 0, i32 0 +// CHECK: getelementptr inbounds [[S_INT_TY]], [[S_INT_TY]]* %{{.+}}, i64 2 +// CHECK: br label % + +// CHECK: phi [[S_INT_TY]]* +// CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR]]([[S_INT_TY]]* +// CHECK: getelementptr inbounds [[S_INT_TY]], [[S_INT_TY]]* %{{.+}}, i64 1 +// CHECK: icmp eq [[S_INT_TY]]* % +// CHECK: br i1 % + +// CHECK: getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* %{{.+}}, i32 0, i32 3 +// CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR]]([[S_INT_TY]]* +// CHECK: ret void + +// CHECK: define internal i32 [[DESTRUCTORS]](i32 %0, [[KMP_TASK_TMAIN_TY]]* noalias %1) +// CHECK: [[PRIVATES:%.+]] = getelementptr inbounds [[KMP_TASK_TMAIN_TY]], [[KMP_TASK_TMAIN_TY]]* [[RES_KMP_TASK:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 2 +// CHECK: [[PRIVATE_S_ARR_REF:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 2 +// CHECK: [[PRIVATE_VAR_REF:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 3 +// CHECK: call void [[S_INT_TY_DESTR]]([[S_INT_TY]]* [[PRIVATE_VAR_REF]]) +// CHECK: getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[PRIVATE_S_ARR_REF]], i{{.+}} 0, i{{.+}} 0 +// CHECK: getelementptr inbounds [[S_INT_TY]], [[S_INT_TY]]* %{{.+}}, i{{.+}} 2 +// CHECK: [[PRIVATE_S_ARR_ELEM_REF:%.+]] = getelementptr inbounds [[S_INT_TY]], [[S_INT_TY]]* %{{.+}}, i{{.+}} -1 +// CHECK: call void [[S_INT_TY_DESTR]]([[S_INT_TY]]* [[PRIVATE_S_ARR_ELEM_REF]]) +// CHECK: icmp eq +// CHECK: br i1 +// CHECK: ret i32 + +#endif +#else +// ARRAY-LABEL: array_func +struct St { + int a, b; + St() : a(0), b(0) {} + St &operator=(const St &) { return *this; }; + ~St() {} +}; + +void array_func(int n, float a[n], St s[2]) { +// ARRAY: call i8* @__kmpc_omp_task_alloc( +// ARRAY: call void @__kmpc_taskloop( +// ARRAY: store float** %{{.+}}, float*** %{{.+}}, +// ARRAY: store %struct.St** %{{.+}}, %struct.St*** %{{.+}}, +#pragma omp master taskloop private(a, s) + for (int i = 0; i < 10; ++i) + ; +} +#endif + diff --git a/clang/test/OpenMP/master_taskloop_private_messages.cpp b/clang/test/OpenMP/master_taskloop_private_messages.cpp new file mode 100644 index 0000000000000..30220cb294e47 --- /dev/null +++ b/clang/test/OpenMP/master_taskloop_private_messages.cpp @@ -0,0 +1,259 @@ +// RUN: %clang_cc1 -verify -fopenmp %s -Wuninitialized + +// RUN: %clang_cc1 -verify -fopenmp-simd %s -Wuninitialized + +typedef void **omp_allocator_handle_t; +extern const omp_allocator_handle_t omp_default_mem_alloc; +extern const omp_allocator_handle_t omp_large_cap_mem_alloc; +extern const omp_allocator_handle_t omp_const_mem_alloc; +extern const omp_allocator_handle_t omp_high_bw_mem_alloc; +extern const omp_allocator_handle_t omp_low_lat_mem_alloc; +extern const omp_allocator_handle_t omp_cgroup_mem_alloc; +extern const omp_allocator_handle_t omp_pteam_mem_alloc; +extern const omp_allocator_handle_t omp_thread_mem_alloc; + +void foo() { +} + +bool foobool(int argc) { + return argc; +} + +struct S1; // expected-note 2 {{declared here}} expected-note 2 {{forward declaration of 'S1'}} +extern S1 a; +class S2 { + mutable int a; + +public: + S2() : a(0) {} +}; +const S2 b; +const S2 ba[5]; +class S3 { + int a; + +public: + S3() : a(0) {} +}; +const S3 ca[5]; +class S4 { + int a; + S4(); // expected-note {{implicitly declared private here}} + +public: + S4(int v) : a(v) { +#pragma omp master taskloop private(a) private(this->a) + for (int k = 0; k < v; ++k) + ++this->a; + } +}; +class S5 { + int a; + S5() : a(0) {} // expected-note {{implicitly declared private here}} + +public: + S5(int v) : a(v) {} + S5 &operator=(S5 &s) { +#pragma omp master taskloop private(a) private(this->a) private(s.a) // expected-error {{expected variable name or data member of current class}} + for (int k = 0; k < s.a; ++k) + ++s.a; + return *this; + } +}; + +template +class S6 { +public: + T a; + + S6() : a(0) {} + S6(T v) : a(v) { +#pragma omp master taskloop private(a) private(this->a) allocate(omp_thread_mem_alloc: a) // expected-warning {{allocator with the 'thread' trait access has unspecified behavior on 'master taskloop' directive}} + for (int k = 0; k < v; ++k) + ++this->a; + } + S6 &operator=(S6 &s) { +#pragma omp master taskloop private(a) private(this->a) private(s.a) // expected-error {{expected variable name or data member of current class}} + for (int k = 0; k < s.a; ++k) + ++s.a; + return *this; + } +}; + +template +class S7 : public T { + T a; + S7() : a(0) {} + +public: + S7(T v) : a(v) { +#pragma omp master taskloop private(a) private(this->a) private(T::a) + for (int k = 0; k < a.a; ++k) + ++this->a.a; + } + S7 &operator=(S7 &s) { +#pragma omp master taskloop private(a) private(this->a) private(s.a) private(s.T::a) // expected-error 2 {{expected variable name or data member of current class}} + for (int k = 0; k < s.a.a; ++k) + ++s.a.a; + return *this; + } +}; + +S3 h; +#pragma omp threadprivate(h) // expected-note 2 {{defined as threadprivate or thread local}} + +template +int foomain(I argc, C **argv) { + I e(4); + I g(5); + int i, z; + int &j = i; +#pragma omp master taskloop private // expected-error {{expected '(' after 'private'}} + for (int k = 0; k < argc; ++k) + ++k; +#pragma omp master taskloop private( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}} + for (int k = 0; k < argc; ++k) + ++k; +#pragma omp master taskloop private() // expected-error {{expected expression}} + for (int k = 0; k < argc; ++k) + ++k; +#pragma omp master taskloop private(argc // expected-error {{expected ')'}} expected-note {{to match this '('}} + for (int k = 0; k < argc; ++k) + ++k; +#pragma omp master taskloop private(argc, // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}} + for (int k = 0; k < argc; ++k) + ++k; +#pragma omp master taskloop private(argc > 0 ? argv[1] : argv[2]) // expected-error {{expected variable name}} + for (int k = 0; k < argc; ++k) + ++k; +#pragma omp master taskloop private(argc) allocate , allocate(, allocate(omp_default , allocate(omp_default_mem_alloc, allocate(omp_default_mem_alloc:, allocate(omp_default_mem_alloc: argc, allocate(omp_default_mem_alloc: argv), allocate(argv) // expected-error {{expected '(' after 'allocate'}} expected-error 2 {{expected expression}} expected-error 2 {{expected ')'}} expected-error {{use of undeclared identifier 'omp_default'}} expected-note 2 {{to match this '('}} + for (int k = 0; k < argc; ++k) + ++k; +#pragma omp master taskloop private(S1) // expected-error {{'S1' does not refer to a value}} + for (int k = 0; k < argc; ++k) + ++k; +#pragma omp master taskloop private(a, b) // expected-error {{private variable with incomplete type 'S1'}} + for (int k = 0; k < argc; ++k) + ++k; +#pragma omp master taskloop private(argv[1]) // expected-error {{expected variable name}} + for (int k = 0; k < argc; ++k) + ++k; +#pragma omp master taskloop private(e, g, z) + for (int k = 0; k < argc; ++k) + ++k; +#pragma omp master taskloop private(h) // expected-error {{threadprivate or thread local variable cannot be private}} + for (int k = 0; k < argc; ++k) + ++k; +#pragma omp master taskloop shared(i) + for (int k = 0; k < argc; ++k) + ++k; +#pragma omp parallel + { + int v = 0; + int i; +#pragma omp master taskloop private(i) + for (int k = 0; k < argc; ++k) { + i = k; + v += i; + } + } +#pragma omp parallel shared(i) +#pragma omp parallel private(i) +#pragma omp master taskloop private(j) + for (int k = 0; k < argc; ++k) + ++k; +#pragma omp master taskloop private(i) + for (int k = 0; k < argc; ++k) + ++k; + return 0; +} + +void bar(S4 a[2]) { +#pragma omp parallel +#pragma omp master taskloop private(a) + for (int i = 0; i < 2; ++i) + foo(); +} + +namespace A { +double x; +#pragma omp threadprivate(x) // expected-note {{defined as threadprivate or thread local}} +} +namespace B { +using A::x; +} + +int main(int argc, char **argv) { + S4 e(4); + S5 g(5); + S6 s6(0.0) , s6_0(1.0); // expected-note {{in instantiation of member function 'S6::S6' requested here}} + S7 > s7(0.0) , s7_0(1.0); + int i, z; + int &j = i; +#pragma omp master taskloop private // expected-error {{expected '(' after 'private'}} + for (int k = 0; k < argc; ++k) + ++k; +#pragma omp master taskloop private( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}} + for (int k = 0; k < argc; ++k) + ++k; +#pragma omp master taskloop private() // expected-error {{expected expression}} + for (int k = 0; k < argc; ++k) + ++k; +#pragma omp master taskloop private(argc // expected-error {{expected ')'}} expected-note {{to match this '('}} + for (int k = 0; k < argc; ++k) + ++k; +#pragma omp master taskloop private(argc, // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}} + for (int k = 0; k < argc; ++k) + ++k; +#pragma omp master taskloop private(argc > 0 ? argv[1] : argv[2]) // expected-error {{expected variable name}} + for (int k = 0; k < argc; ++k) + ++k; +#pragma omp master taskloop private(argc) + for (int k = 0; k < argc; ++k) + ++k; +#pragma omp master taskloop private(S1) // expected-error {{'S1' does not refer to a value}} + for (int k = 0; k < argc; ++k) + ++k; +#pragma omp master taskloop private(a, b) // expected-error {{private variable with incomplete type 'S1'}} + for (int k = 0; k < argc; ++k) + ++k; +#pragma omp master taskloop private(argv[1]) // expected-error {{expected variable name}} + for (int k = 0; k < argc; ++k) + ++k; +#pragma omp master taskloop private(e, g) // expected-error {{calling a private constructor of class 'S4'}} expected-error {{calling a private constructor of class 'S5'}} + for (int k = 0; k < argc; ++k) + ++k; +#pragma omp master taskloop private(h) // expected-error {{threadprivate or thread local variable cannot be private}} + for (int k = 0; k < argc; ++k) + ++k; +#pragma omp master taskloop private(B::x) // expected-error {{threadprivate or thread local variable cannot be private}} + for (int k = 0; k < argc; ++k) + ++k; +#pragma omp master taskloop shared(i) + for (int k = 0; k < argc; ++k) + ++k; +#pragma omp parallel + { + int i; +#pragma omp master taskloop private(i) + for (int k = 0; k < argc; ++k) + ++k; + } +#pragma omp parallel shared(i) +#pragma omp parallel private(i) +#pragma omp master taskloop private(j) + for (int k = 0; k < argc; ++k) + ++k; +#pragma omp master taskloop private(i, z) + for (int k = 0; k < argc; ++k) + ++k; + static int si; +#pragma omp master taskloop private(si) // OK + for(int k = 0; k < argc; ++k) + si = k + 1; + + s6 = s6_0; // expected-note {{in instantiation of member function 'S6::operator=' requested here}} + s7 = s7_0; // expected-note {{in instantiation of member function 'S7 >::operator=' requested here}} + return foomain(argc, argv); // expected-note {{in instantiation of function template specialization 'foomain' requested here}} +} + diff --git a/clang/test/OpenMP/master_taskloop_reduction_codegen.cpp b/clang/test/OpenMP/master_taskloop_reduction_codegen.cpp new file mode 100644 index 0000000000000..59ac23f5e84f7 --- /dev/null +++ b/clang/test/OpenMP/master_taskloop_reduction_codegen.cpp @@ -0,0 +1,236 @@ +// RUN: %clang_cc1 -fopenmp -x c++ %s -verify -debug-info-kind=limited -emit-llvm -o - -triple powerpc64le-unknown-linux-gnu -std=c++98 | FileCheck %s + +// RUN: %clang_cc1 -fopenmp-simd -x c++ %s -verify -debug-info-kind=limited -emit-llvm -o - -triple powerpc64le-unknown-linux-gnu -std=c++98 | FileCheck --check-prefix SIMD-ONLY0 %s +// SIMD-ONLY0-NOT: {{__kmpc|__tgt}} +// expected-no-diagnostics + +struct S { + float a; + S() : a(0.0f) {} + ~S() {} +}; + +#pragma omp declare reduction(+:S:omp_out.a += omp_in.a) initializer(omp_priv = omp_orig) + +float g; + +int a; +#pragma omp threadprivate(a) +int main (int argc, char *argv[]) +{ +int i, n; +float a[100], b[100], sum, e[argc + 100]; +S c[100]; +float &d = g; + +/* Some initializations */ +n = 100; +for (i=0; i < n; i++) + a[i] = b[i] = i * 1.0; +sum = 0.0; + +#pragma omp master taskloop reduction(+:sum, c[:n], d, e) + for (i=0; i < n; i++) { + sum = sum + (a[i] * b[i]); + c[i].a = i*i; + d += i*i; + e[i] = i; + } + +} + +// CHECK-LABEL: @main( +// CHECK: [[RETVAL:%.*]] = alloca i32, +// CHECK: [[ARGC_ADDR:%.*]] = alloca i32, +// CHECK: [[ARGV_ADDR:%.*]] = alloca i8**, +// CHECK: [[I:%.*]] = alloca i32, +// CHECK: [[N:%.*]] = alloca i32, +// CHECK: [[A:%.*]] = alloca [100 x float], +// CHECK: [[B:%.*]] = alloca [100 x float], +// CHECK: [[SUM:%.*]] = alloca float, +// CHECK: [[SAVED_STACK:%.*]] = alloca i8*, +// CHECK: [[C:%.*]] = alloca [100 x %struct.S], +// CHECK: [[D:%.*]] = alloca float*, +// CHECK: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], +// CHECK: [[DOTRD_INPUT_:%.*]] = alloca [4 x %struct.kmp_task_red_input_t], +// CHECK: alloca i32, +// CHECK: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, +// CHECK: [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, +// CHECK: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* +// CHECK: store i32 0, i32* [[RETVAL]], +// CHECK: store i32 [[ARGC:%.*]], i32* [[ARGC_ADDR]], +// CHECK: store i8** [[ARGV:%.*]], i8*** [[ARGV_ADDR]], +// CHECK: [[TMP1:%.*]] = load i32, i32* [[ARGC_ADDR]], +// CHECK: [[ADD:%.*]] = add nsw i32 [[TMP1]], 100 +// CHECK: [[TMP2:%.*]] = zext i32 [[ADD]] to i64 +// CHECK: [[VLA:%.+]] = alloca float, i64 % + +// CHECK: [[RES:%.+]] = call {{.*}}i32 @__kmpc_master( +// CHECK-NEXT: [[IS_MASTER:%.+]] = icmp ne i32 [[RES]], 0 +// CHECK-NEXT: br i1 [[IS_MASTER]], label {{%?}}[[THEN:.+]], label {{%?}}[[EXIT:[^,]+]] +// CHECK: [[THEN]] +// CHECK: call void @__kmpc_taskgroup(%struct.ident_t* +// CHECK-DAG: [[TMP21:%.*]] = bitcast float* [[SUM]] to i8* +// CHECK-DAG: store i8* [[TMP21]], i8** [[TMP20:%[^,]+]], +// CHECK-DAG: [[TMP20]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T:%.+]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_:%.+]], i32 0, i32 0 +// CHECK-DAG: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 1 +// CHECK-DAG: store i64 4, i64* [[TMP22]], +// CHECK-DAG: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 2 +// CHECK-DAG: store i8* bitcast (void (i8*)* @[[RED_INIT1:.+]] to i8*), i8** [[TMP23]], +// CHECK-DAG: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 3 +// CHECK-DAG: store i8* null, i8** [[TMP24]], +// CHECK-DAG: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 4 +// CHECK-DAG: store i8* bitcast (void (i8*, i8*)* @[[RED_COMB1:.+]] to i8*), i8** [[TMP25]], +// CHECK-DAG: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 5 +// CHECK-DAG: [[TMP27:%.*]] = bitcast i32* [[TMP26]] to i8* +// CHECK-DAG: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP27]], i8 0, i64 4, i1 false) +// CHECK-DAG: [[ARRAYIDX5:%.*]] = getelementptr inbounds [100 x %struct.S], [100 x %struct.S]* [[C]], i64 0, i64 0 +// CHECK-DAG: [[LB_ADD_LEN:%.*]] = add nsw i64 -1, % +// CHECK-DAG: [[ARRAYIDX6:%.*]] = getelementptr inbounds [100 x %struct.S], [100 x %struct.S]* [[C]], i64 0, i64 [[LB_ADD_LEN]] +// CHECK-DAG: [[TMP31:%.*]] = bitcast %struct.S* [[ARRAYIDX5]] to i8* +// CHECK-DAG: store i8* [[TMP31]], i8** [[TMP28:%[^,]+]], +// CHECK-DAG: [[TMP28]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_4:%.+]], i32 0, i32 0 +// CHECK-DAG: [[TMP32:%.*]] = ptrtoint %struct.S* [[ARRAYIDX6]] to i64 +// CHECK-DAG: [[TMP33:%.*]] = ptrtoint %struct.S* [[ARRAYIDX5]] to i64 +// CHECK-DAG: [[TMP34:%.*]] = sub i64 [[TMP32]], [[TMP33]] +// CHECK-DAG: [[TMP35:%.*]] = sdiv exact i64 [[TMP34]], ptrtoint (float* getelementptr (float, float* null, i32 1) to i64) +// CHECK-DAG: [[TMP36:%.*]] = add nuw i64 [[TMP35]], 1 +// CHECK-DAG: [[TMP37:%.*]] = mul nuw i64 [[TMP36]], ptrtoint (float* getelementptr (float, float* null, i32 1) to i64) +// CHECK-DAG: store i64 [[TMP37]], i64* [[TMP38:%[^,]+]], +// CHECK-DAG: [[TMP38]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_4]], i32 0, i32 1 +// CHECK-DAG: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_4]], i32 0, i32 2 +// CHECK-DAG: store i8* bitcast (void (i8*)* @[[RED_INIT2:.+]] to i8*), i8** [[TMP39]], +// CHECK-DAG: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_4]], i32 0, i32 3 +// CHECK-DAG: store i8* bitcast (void (i8*)* @[[RED_FINI2:.+]] to i8*), i8** [[TMP40]], +// CHECK-DAG: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_4]], i32 0, i32 4 +// CHECK-DAG: store i8* bitcast (void (i8*, i8*)* @[[RED_COMB2:.+]] to i8*), i8** [[TMP41]], +// CHECK-DAG: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_4]], i32 0, i32 5 +// CHECK-DAG: store i32 1, i32* [[TMP42]], +// CHECK-DAG: [[TMP44:%.*]] = load float*, float** [[D]], +// CHECK-DAG: [[TMP45:%.*]] = bitcast float* [[TMP44]] to i8* +// CHECK-DAG: store i8* [[TMP45]], i8** [[TMP43:%[^,]+]], +// CHECK-DAG: [[TMP43]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_7:%.+]], i32 0, i32 0 +// CHECK-DAG: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 1 +// CHECK-DAG: store i64 4, i64* [[TMP46]], +// CHECK-DAG: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 2 +// CHECK-DAG: store i8* bitcast (void (i8*)* @[[RED_INIT3:.+]] to i8*), i8** [[TMP47]], +// CHECK-DAG: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 3 +// CHECK-DAG: store i8* null, i8** [[TMP48]], +// CHECK-DAG: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 4 +// CHECK-DAG: store i8* bitcast (void (i8*, i8*)* @[[RED_COMB3:.+]] to i8*), i8** [[TMP49]], +// CHECK-DAG: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 5 +// CHECK-DAG: [[TMP51:%.*]] = bitcast i32* [[TMP50]] to i8* +// CHECK-DAG: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP51]], i8 0, i64 4, i1 false) +// CHECK-DAG: [[TMP53:%.*]] = bitcast float* [[VLA]] to i8* +// CHECK-DAG: store i8* [[TMP53]], i8** [[TMP52:%[^,]+]], +// CHECK-DAG: [[TMP52]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_8:%.+]], i32 0, i32 0 +// CHECK-DAG: [[TMP54:%.*]] = mul nuw i64 [[TMP2]], 4 +// CHECK-DAG: [[TMP55:%.*]] = udiv exact i64 [[TMP54]], ptrtoint (float* getelementptr (float, float* null, i32 1) to i64) +// CHECK-DAG: store i64 [[TMP54]], i64* [[TMP56:%[^,]+]], +// CHECK-DAG: [[TMP56]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_8]], i32 0, i32 1 +// CHECK-DAG: [[TMP57:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_8]], i32 0, i32 2 +// CHECK-DAG: store i8* bitcast (void (i8*)* @[[RED_INIT4:.+]] to i8*), i8** [[TMP57]], +// CHECK-DAG: [[TMP58:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_8]], i32 0, i32 3 +// CHECK-DAG: store i8* null, i8** [[TMP58]], +// CHECK-DAG: [[TMP59:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_8]], i32 0, i32 4 +// CHECK-DAG: store i8* bitcast (void (i8*, i8*)* @[[RED_COMB4:.+]] to i8*), i8** [[TMP59]], +// CHECK-DAG: [[TMP60:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_8]], i32 0, i32 5 +// CHECK-DAG: store i32 1, i32* [[TMP60]], +// CHECK-DAG: [[DOTRD_INPUT_GEP_]] = getelementptr inbounds [4 x %struct.kmp_task_red_input_t], [4 x %struct.kmp_task_red_input_t]* [[DOTRD_INPUT_]], i64 0, i64 +// CHECK-DAG: [[DOTRD_INPUT_GEP_4]] = getelementptr inbounds [4 x %struct.kmp_task_red_input_t], [4 x %struct.kmp_task_red_input_t]* [[DOTRD_INPUT_]], i64 0, i64 +// CHECK-DAG: [[DOTRD_INPUT_GEP_7]] = getelementptr inbounds [4 x %struct.kmp_task_red_input_t], [4 x %struct.kmp_task_red_input_t]* [[DOTRD_INPUT_]], i64 0, i64 +// CHECK-DAG: [[DOTRD_INPUT_GEP_8]] = getelementptr inbounds [4 x %struct.kmp_task_red_input_t], [4 x %struct.kmp_task_red_input_t]* [[DOTRD_INPUT_]], i64 0, i64 +// CHECK: [[TMP61:%.*]] = bitcast [4 x %struct.kmp_task_red_input_t]* [[DOTRD_INPUT_]] to i8* +// CHECK: [[TMP62:%.*]] = call i8* @__kmpc_task_reduction_init(i32 [[TMP0]], i32 4, i8* [[TMP61]]) +// CHECK: [[TMP63:%.*]] = load i32, i32* [[N]], +// CHECK: store i32 [[TMP63]], i32* [[DOTCAPTURE_EXPR_]], +// CHECK: [[TMP64:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], +// CHECK: [[SUB:%.*]] = sub nsw i32 [[TMP64]], 0 +// CHECK: [[SUB10:%.*]] = sub nsw i32 [[SUB]], 1 +// CHECK: [[ADD11:%.*]] = add nsw i32 [[SUB10]], 1 +// CHECK: [[DIV:%.*]] = sdiv i32 [[ADD11]], 1 +// CHECK: [[SUB12:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK: store i32 [[SUB12]], i32* [[DOTCAPTURE_EXPR_9]], +// CHECK: [[TMP65:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* %{{.+}}, i32 [[TMP0]], i32 1, i64 888, i64 64, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @[[TASK:.+]] to i32 (i32, i8*)*)) +// CHECK: call void @__kmpc_taskloop(%struct.ident_t* %{{.+}}, i32 [[TMP0]], i8* [[TMP65]], i32 1, i64* %{{.+}}, i64* %{{.+}}, i64 %{{.+}}, i32 1, i32 0, i64 0, i8* null) +// CHECK: call void @__kmpc_end_taskgroup(%struct.ident_t* +// CHECK: call {{.*}}void @__kmpc_end_master( +// CHECK-NEXT: br label {{%?}}[[EXIT]] +// CHECK: [[EXIT]] + +// CHECK: ret i32 + +// CHECK: define internal void @[[RED_INIT1]](i8* %0) +// CHECK: store float 0.000000e+00, float* % +// CHECK: ret void + +// CHECK: define internal void @[[RED_COMB1]](i8* %0, i8* %1) +// CHECK: fadd float % +// CHECK: store float %{{.+}}, float* % +// CHECK: ret void + +// CHECK: define internal void @[[RED_INIT2]](i8* %0) +// CHECK: call i8* @__kmpc_threadprivate_cached( +// CHECK: [[ORIG_PTR_ADDR:%.+]] = call i8* @__kmpc_threadprivate_cached( +// CHECK: [[ORIG_PTR_REF:%.+]] = bitcast i8* [[ORIG_PTR_ADDR]] to i8** +// CHECK: load i8*, i8** [[ORIG_PTR_REF]], +// CHECK: call void [[OMP_INIT1:@.+]]( +// CHECK: ret void + +// CHECK: define internal void [[OMP_COMB1:@.+]](%struct.S* noalias %0, %struct.S* noalias %1) +// CHECK: fadd float % + +// CHECK: define internal void [[OMP_INIT1]](%struct.S* noalias %0, %struct.S* noalias %1) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64( + +// CHECK: define internal void @[[RED_FINI2]](i8* %0) +// CHECK: call i8* @__kmpc_threadprivate_cached( +// CHECK: call void @ +// CHECK: ret void + +// CHECK: define internal void @[[RED_COMB2]](i8* %0, i8* %1) +// CHECK: call i8* @__kmpc_threadprivate_cached( +// CHECK: call void [[OMP_COMB1]]( +// CHECK: ret void + +// CHECK: define internal void @[[RED_INIT3]](i8* %0) +// CHECK: store float 0.000000e+00, float* % +// CHECK: ret void + +// CHECK: define internal void @[[RED_COMB3]](i8* %0, i8* %1) +// CHECK: fadd float % +// CHECK: store float %{{.+}}, float* % +// CHECK: ret void + +// CHECK: define internal void @[[RED_INIT4]](i8* %0) +// CHECK: call i8* @__kmpc_threadprivate_cached( +// CHECK: store float 0.000000e+00, float* % +// CHECK: ret void + +// CHECK: define internal void @[[RED_COMB4]](i8* %0, i8* %1) +// CHECK: call i8* @__kmpc_threadprivate_cached( +// CHECK: fadd float % +// CHECK: store float %{{.+}}, float* % +// CHECK: ret void + +// CHECK-NOT: call i8* @__kmpc_threadprivate_cached( +// CHECK: call i8* @__kmpc_task_reduction_get_th_data( +// CHECK: call i8* @__kmpc_threadprivate_cached( +// CHECK: call i8* @__kmpc_threadprivate_cached( +// CHECK: call i8* @__kmpc_task_reduction_get_th_data( +// CHECK-NOT: call i8* @__kmpc_threadprivate_cached( +// CHECK: call i8* @__kmpc_task_reduction_get_th_data( +// CHECK: call i8* @__kmpc_threadprivate_cached( +// CHECK: call i8* @__kmpc_task_reduction_get_th_data( +// CHECK-NOT: call i8* @__kmpc_threadprivate_cached( + +// CHECK-DAG: distinct !DISubprogram(linkageName: "[[TASK]]", scope: ! +// CHECK-DAG: !DISubprogram(linkageName: "[[RED_INIT1]]" +// CHECK-DAG: !DISubprogram(linkageName: "[[RED_COMB1]]" +// CHECK-DAG: !DISubprogram(linkageName: "[[RED_INIT2]]" +// CHECK-DAG: !DISubprogram(linkageName: "[[RED_FINI2]]" +// CHECK-DAG: !DISubprogram(linkageName: "[[RED_COMB2]]" +// CHECK-DAG: !DISubprogram(linkageName: "[[RED_INIT3]]" +// CHECK-DAG: !DISubprogram(linkageName: "[[RED_COMB3]]" +// CHECK-DAG: !DISubprogram(linkageName: "[[RED_INIT4]]" +// CHECK-DAG: !DISubprogram(linkageName: "[[RED_COMB4]]" diff --git a/clang/test/OpenMP/master_taskloop_reduction_messages.cpp b/clang/test/OpenMP/master_taskloop_reduction_messages.cpp new file mode 100644 index 0000000000000..58be898a9f1e4 --- /dev/null +++ b/clang/test/OpenMP/master_taskloop_reduction_messages.cpp @@ -0,0 +1,352 @@ +// RUN: %clang_cc1 -verify -fopenmp -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify -fopenmp -std=c++98 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify -fopenmp -std=c++11 -ferror-limit 150 -o - %s -Wuninitialized + +// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 -ferror-limit 150 -o - %s -Wuninitialized +// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 -ferror-limit 150 -o - %s -Wuninitialized + +typedef void **omp_allocator_handle_t; +extern const omp_allocator_handle_t omp_default_mem_alloc; +extern const omp_allocator_handle_t omp_large_cap_mem_alloc; +extern const omp_allocator_handle_t omp_const_mem_alloc; +extern const omp_allocator_handle_t omp_high_bw_mem_alloc; +extern const omp_allocator_handle_t omp_low_lat_mem_alloc; +extern const omp_allocator_handle_t omp_cgroup_mem_alloc; +extern const omp_allocator_handle_t omp_pteam_mem_alloc; +extern const omp_allocator_handle_t omp_thread_mem_alloc; + +void xxx(int argc) { + int fp; // expected-note {{initialize the variable 'fp' to silence this warning}} +#pragma omp master taskloop reduction(+:fp) // expected-warning {{variable 'fp' is uninitialized when used here}} + for (int i = 0; i < 10; ++i) + ; +} + +void foo() { +} + +bool foobool(int argc) { + return argc; +} + +void foobar(int &ref) { +#pragma omp master taskloop reduction(+:ref) + for (int i = 0; i < 10; ++i) + foo(); +} + +struct S1; // expected-note {{declared here}} expected-note 4 {{forward declaration of 'S1'}} +extern S1 a; +class S2 { + mutable int a; + S2 &operator+(const S2 &arg) { return (*this); } // expected-note 3 {{implicitly declared private here}} + +public: + S2() : a(0) {} + S2(S2 &s2) : a(s2.a) {} + static float S2s; // expected-note 2 {{static data member is predetermined as shared}} + static const float S2sc; // expected-note 2 {{'S2sc' declared here}} +}; +const float S2::S2sc = 0; +S2 b; // expected-note 3 {{'b' defined here}} +const S2 ba[5]; // expected-note 2 {{'ba' defined here}} +class S3 { + int a; + +public: + int b; + S3() : a(0) {} + S3(const S3 &s3) : a(s3.a) {} + S3 operator+(const S3 &arg1) { return arg1; } +}; +int operator+(const S3 &arg1, const S3 &arg2) { return 5; } +S3 c; // expected-note 3 {{'c' defined here}} +const S3 ca[5]; // expected-note 2 {{'ca' defined here}} +extern const int f; // expected-note 4 {{'f' declared here}} +class S4 { + int a; + S4(); // expected-note {{implicitly declared private here}} + S4(const S4 &s4); + S4 &operator+(const S4 &arg) { return (*this); } + +public: + S4(int v) : a(v) {} +}; +S4 &operator&=(S4 &arg1, S4 &arg2) { return arg1; } +class S5 { + int a:32; + S5() : a(0) {} // expected-note {{implicitly declared private here}} + S5(const S5 &s5) : a(s5.a) {} + S5 &operator+(const S5 &arg); + +public: + S5(int v) : a(v) {} +}; +class S6 { // expected-note 3 {{candidate function (the implicit copy assignment operator) not viable: no known conversion from 'int' to 'const S6' for 1st argument}} +#if __cplusplus >= 201103L // C++11 or later +// expected-note@-2 3 {{candidate function (the implicit move assignment operator) not viable}} +#endif + int a; + +public: + S6() : a(6) {} + operator int() { return 6; } +} o; + +struct S7 { + int a: 32; + S7() { +#pragma omp master taskloop reduction(+:a) // expected-error {{expected addressable reduction item for the task-based directives}} + for (int i = 0; i < 10; ++i) + ++a; + } +}; + +S3 h, k; +#pragma omp threadprivate(h) // expected-note 2 {{defined as threadprivate or thread local}} + +template // expected-note {{declared here}} +T tmain(T argc) { + const T d = T(); // expected-note 4 {{'d' defined here}} + const T da[5] = {T()}; // expected-note 2 {{'da' defined here}} + T qa[5] = {T()}; + T i, z; + T &j = i; // expected-note 4 {{'j' defined here}} + S3 &p = k; // expected-note 2 {{'p' defined here}} + const T &r = da[(int)i]; // expected-note 2 {{'r' defined here}} + T &q = qa[(int)i]; // expected-note 2 {{'q' defined here}} + T fl; +#pragma omp master taskloop reduction // expected-error {{expected '(' after 'reduction'}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp master taskloop reduction + // expected-error {{expected '(' after 'reduction'}} expected-warning {{extra tokens at the end of '#pragma omp master taskloop' are ignored}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp master taskloop reduction( // expected-error {{expected unqualified-id}} expected-warning {{missing ':' after reduction identifier - ignoring}} expected-error {{expected ')'}} expected-note {{to match this '('}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp master taskloop reduction(- // expected-warning {{missing ':' after reduction identifier - ignoring}} expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp master taskloop reduction() // expected-error {{expected unqualified-id}} expected-warning {{missing ':' after reduction identifier - ignoring}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp master taskloop reduction(*) // expected-warning {{missing ':' after reduction identifier - ignoring}} expected-error {{expected expression}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp master taskloop reduction(\) // expected-error {{expected unqualified-id}} expected-warning {{missing ':' after reduction identifier - ignoring}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp master taskloop reduction(& : argc // expected-error {{expected ')'}} expected-note {{to match this '('}} expected-error {{invalid operands to binary expression ('float' and 'float')}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp master taskloop reduction(| : argc, // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}} expected-error {{invalid operands to binary expression ('float' and 'float')}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp master taskloop reduction(|| : argc ? i : argc) // expected-error 2 {{expected variable name, array element or array section}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp master taskloop reduction(foo : argc) //expected-error {{incorrect reduction identifier, expected one of '+', '-', '*', '&', '|', '^', '&&', '||', 'min' or 'max' or declare reduction for type 'float'}} expected-error {{incorrect reduction identifier, expected one of '+', '-', '*', '&', '|', '^', '&&', '||', 'min' or 'max' or declare reduction for type 'int'}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp master taskloop reduction(&& : argc) allocate , allocate(, allocate(omp_default , allocate(omp_default_mem_alloc, allocate(omp_default_mem_alloc:, allocate(omp_default_mem_alloc: argc, allocate(omp_default_mem_alloc: argv), allocate(argv) // expected-error {{expected '(' after 'allocate'}} expected-error 2 {{expected expression}} expected-error 2 {{expected ')'}} expected-error {{use of undeclared identifier 'omp_default'}} expected-note 2 {{to match this '('}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp master taskloop reduction(^ : T) // expected-error {{'T' does not refer to a value}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp master taskloop reduction(+ : z, a, b, c, d, f) // expected-error {{a reduction list item with incomplete type 'S1'}} expected-error 3 {{const-qualified variable cannot be reduction}} expected-error 2 {{'operator+' is a private member of 'S2'}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp master taskloop reduction(min : a, b, c, d, f) // expected-error {{a reduction list item with incomplete type 'S1'}} expected-error 4 {{arguments of OpenMP clause 'reduction' for 'min' or 'max' must be of arithmetic type}} expected-error 3 {{const-qualified variable cannot be reduction}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp master taskloop reduction(max : h.b) // expected-error {{expected variable name, array element or array section}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp master taskloop reduction(+ : ba) // expected-error {{const-qualified variable cannot be reduction}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp master taskloop reduction(* : ca) // expected-error {{const-qualified variable cannot be reduction}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp master taskloop reduction(- : da) // expected-error {{const-qualified variable cannot be reduction}} expected-error {{const-qualified variable cannot be reduction}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp master taskloop reduction(^ : fl) // expected-error {{invalid operands to binary expression ('float' and 'float')}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp master taskloop reduction(&& : S2::S2s) // expected-error {{shared variable cannot be reduction}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp master taskloop reduction(&& : S2::S2sc) // expected-error {{const-qualified variable cannot be reduction}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp master taskloop reduction(+ : h, k) // expected-error {{threadprivate or thread local variable cannot be reduction}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp master taskloop reduction(+ : o) // expected-error 2 {{no viable overloaded '='}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp master taskloop private(i), reduction(+ : j), reduction(+ : q) // expected-error 4 {{argument of OpenMP clause 'reduction' must reference the same object in all threads}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp parallel private(k) +#pragma omp master taskloop reduction(+ : p), reduction(+ : p) // expected-error 2 {{argument of OpenMP clause 'reduction' must reference the same object in all threads}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp master taskloop reduction(+ : p), reduction(+ : p) // expected-error 2 {{variable can appear only once in OpenMP 'reduction' clause}} expected-note 2 {{previously referenced here}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp master taskloop reduction(+ : r) // expected-error 2 {{const-qualified variable cannot be reduction}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp parallel shared(i) +#pragma omp parallel reduction(min : i) +#pragma omp master taskloop reduction(max : j) // expected-error 2 {{argument of OpenMP clause 'reduction' must reference the same object in all threads}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp parallel private(fl) +#pragma omp master taskloop reduction(+ : fl) allocate(omp_thread_mem_alloc: fl) // expected-warning 2 {{allocator with the 'thread' trait access has unspecified behavior on 'master taskloop' directive}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp parallel reduction(* : fl) +#pragma omp master taskloop reduction(+ : fl) + for (int i = 0; i < 10; ++i) + foo(); + + return T(); +} + +namespace A { +double x; +#pragma omp threadprivate(x) // expected-note {{defined as threadprivate or thread local}} +} +namespace B { +using A::x; +} + +int main(int argc, char **argv) { + const int d = 5; // expected-note 2 {{'d' defined here}} + const int da[5] = {0}; // expected-note {{'da' defined here}} + int qa[5] = {0}; + S4 e(4); + S5 g(5); + int i, z; + int &j = i; // expected-note 2 {{'j' defined here}} + S3 &p = k; // expected-note 2 {{'p' defined here}} + const int &r = da[i]; // expected-note {{'r' defined here}} + int &q = qa[i]; // expected-note {{'q' defined here}} + float fl; +#pragma omp master taskloop reduction // expected-error {{expected '(' after 'reduction'}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp master taskloop reduction + // expected-error {{expected '(' after 'reduction'}} expected-warning {{extra tokens at the end of '#pragma omp master taskloop' are ignored}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp master taskloop reduction( // expected-error {{expected unqualified-id}} expected-warning {{missing ':' after reduction identifier - ignoring}} expected-error {{expected ')'}} expected-note {{to match this '('}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp master taskloop reduction(- // expected-warning {{missing ':' after reduction identifier - ignoring}} expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp master taskloop reduction() // expected-error {{expected unqualified-id}} expected-warning {{missing ':' after reduction identifier - ignoring}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp master taskloop reduction(*) // expected-warning {{missing ':' after reduction identifier - ignoring}} expected-error {{expected expression}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp master taskloop reduction(\) // expected-error {{expected unqualified-id}} expected-warning {{missing ':' after reduction identifier - ignoring}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp master taskloop reduction(foo : argc // expected-error {{expected ')'}} expected-note {{to match this '('}} expected-error {{incorrect reduction identifier, expected one of '+', '-', '*', '&', '|', '^', '&&', '||', 'min' or 'max'}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp master taskloop reduction(| : argc, // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp master taskloop reduction(|| : argc > 0 ? argv[1] : argv[2]) // expected-error {{expected variable name, array element or array section}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp master taskloop reduction(~ : argc) // expected-error {{expected unqualified-id}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp master taskloop reduction(&& : argc, z) + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp master taskloop reduction(^ : S1) // expected-error {{'S1' does not refer to a value}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp master taskloop reduction(+ : a, b, c, d, f) // expected-error {{a reduction list item with incomplete type 'S1'}} expected-error 2 {{const-qualified variable cannot be reduction}} expected-error {{'operator+' is a private member of 'S2'}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp master taskloop reduction(min : a, b, c, d, f) // expected-error {{a reduction list item with incomplete type 'S1'}} expected-error 2 {{arguments of OpenMP clause 'reduction' for 'min' or 'max' must be of arithmetic type}} expected-error 2 {{const-qualified variable cannot be reduction}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp master taskloop reduction(max : h.b) // expected-error {{expected variable name, array element or array section}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp master taskloop reduction(+ : ba) // expected-error {{const-qualified variable cannot be reduction}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp master taskloop reduction(* : ca) // expected-error {{const-qualified variable cannot be reduction}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp master taskloop reduction(- : da) // expected-error {{const-qualified variable cannot be reduction}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp master taskloop reduction(^ : fl) // expected-error {{invalid operands to binary expression ('float' and 'float')}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp master taskloop reduction(&& : S2::S2s) // expected-error {{shared variable cannot be reduction}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp master taskloop reduction(&& : S2::S2sc) // expected-error {{const-qualified variable cannot be reduction}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp master taskloop reduction(& : e, g) // expected-error {{calling a private constructor of class 'S4'}} expected-error {{invalid operands to binary expression ('S4' and 'S4')}} expected-error {{calling a private constructor of class 'S5'}} expected-error {{invalid operands to binary expression ('S5' and 'S5')}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp master taskloop reduction(+ : h, k, B::x) // expected-error 2 {{threadprivate or thread local variable cannot be reduction}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp master taskloop reduction(+ : o) // expected-error {{no viable overloaded '='}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp master taskloop private(i), reduction(+ : j), reduction(+ : q) // expected-error 2 {{argument of OpenMP clause 'reduction' must reference the same object in all threads}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp parallel private(k) +#pragma omp master taskloop reduction(+ : p), reduction(+ : p) // expected-error 2 {{argument of OpenMP clause 'reduction' must reference the same object in all threads}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp master taskloop reduction(+ : p), reduction(+ : p) // expected-error {{variable can appear only once in OpenMP 'reduction' clause}} expected-note {{previously referenced here}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp master taskloop reduction(+ : r) // expected-error {{const-qualified variable cannot be reduction}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp parallel shared(i) +#pragma omp parallel reduction(min : i) +#pragma omp master taskloop reduction(max : j) // expected-error {{argument of OpenMP clause 'reduction' must reference the same object in all threads}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp parallel private(fl) +#pragma omp master taskloop reduction(+ : fl) + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp parallel reduction(* : fl) +#pragma omp master taskloop reduction(+ : fl) + for (int i = 0; i < 10; ++i) + foo(); + static int m; +#pragma omp master taskloop reduction(+ : m) // OK + for (int i = 0; i < 10; ++i) + m++; +#pragma omp master taskloop nogroup reduction(+ : m) // expected-error {{'reduction' clause cannot be used with 'nogroup' clause}} + for (int i = 0; i < 10; ++i) + m++; + + return tmain(argc) + tmain(fl); // expected-note {{in instantiation of function template specialization 'tmain' requested here}} expected-note {{in instantiation of function template specialization 'tmain' requested here}} +} diff --git a/clang/test/OpenMP/nvptx_declare_variant_implementation_vendor_codegen.cpp b/clang/test/OpenMP/nvptx_declare_variant_implementation_vendor_codegen.cpp new file mode 100644 index 0000000000000..04870f0845982 --- /dev/null +++ b/clang/test/OpenMP/nvptx_declare_variant_implementation_vendor_codegen.cpp @@ -0,0 +1,158 @@ +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc -fopenmp-version=50 +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -aux-triple powerpc64le-unknown-unknown -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -fopenmp-version=50 | FileCheck %s --implicit-check-not='ret i32 {{1|81|84}}' +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -aux-triple powerpc64le-unknown-unknown -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -emit-pch -o %t -fopenmp-version=50 +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -aux-triple powerpc64le-unknown-unknown -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -o - -fopenmp-version=50 | FileCheck %s --implicit-check-not='ret i32 {{1|81|84}}' +// expected-no-diagnostics + +// CHECK-NOT: ret i32 {{1|81|84}} +// CHECK-DAG: define {{.*}}i32 @_Z3barv() +// CHECK-DAG: define {{.*}}i32 @_ZN16SpecSpecialFuncs6MethodEv(%struct.SpecSpecialFuncs* %{{.+}}) +// CHECK-DAG: define {{.*}}i32 @_ZN12SpecialFuncs6MethodEv(%struct.SpecialFuncs* %{{.+}}) +// CHECK-DAG: define linkonce_odr {{.*}}i32 @_ZN16SpecSpecialFuncs6methodEv(%struct.SpecSpecialFuncs* %{{.+}}) +// CHECK-DAG: define linkonce_odr {{.*}}i32 @_ZN12SpecialFuncs6methodEv(%struct.SpecialFuncs* %{{.+}}) +// CHECK-DAG: define {{.*}}i32 @_Z5prio_v() +// CHECK-DAG: define internal i32 @_ZL6prio1_v() +// CHECK-DAG: define {{.*}}i32 @_Z4callv() +// CHECK-DAG: define internal i32 @_ZL9stat_usedv() +// CHECK-DAG: define {{.*}}i32 @fn_linkage() +// CHECK-DAG: define {{.*}}i32 @_Z11fn_linkage1v() + +// CHECK-DAG: ret i32 2 +// CHECK-DAG: ret i32 3 +// CHECK-DAG: ret i32 4 +// CHECK-DAG: ret i32 5 +// CHECK-DAG: ret i32 6 +// CHECK-DAG: ret i32 7 +// CHECK-DAG: ret i32 82 +// CHECK-DAG: ret i32 83 +// CHECK-DAG: ret i32 85 +// CHECK-DAG: ret i32 86 +// CHECK-DAG: ret i32 87 + +// Outputs for function members +// CHECK-DAG: ret i32 6 +// CHECK-DAG: ret i32 7 +// CHECK-NOT: ret i32 {{1|81|84}} + +#ifndef HEADER +#define HEADER + +int foo() { return 2; } +int bazzz(); +int test(); +static int stat_unused_(); +static int stat_used_(); + +#pragma omp declare target + +#pragma omp declare variant(foo) match(implementation = {vendor(llvm)}) +int bar() { return 1; } + +#pragma omp declare variant(bazzz) match(implementation = {vendor(llvm)}) +int baz() { return 1; } + +#pragma omp declare variant(test) match(implementation = {vendor(llvm)}) +int call() { return 1; } + +#pragma omp declare variant(stat_unused_) match(implementation = {vendor(llvm)}) +static int stat_unused() { return 1; } + +#pragma omp declare variant(stat_used_) match(implementation = {vendor(llvm)}) +static int stat_used() { return 1; } + +#pragma omp end declare target + +int main() { + int res; +#pragma omp target map(from \ + : res) + res = bar() + baz() + call(); + return res; +} + +int test() { return 3; } +static int stat_unused_() { return 4; } +static int stat_used_() { return 5; } + +#pragma omp declare target + +struct SpecialFuncs { + void vd() {} + SpecialFuncs(); + ~SpecialFuncs(); + + int method_() { return 6; } +#pragma omp declare variant(SpecialFuncs::method_) \ + match(implementation = {vendor(llvm)}) + int method() { return 1; } +#pragma omp declare variant(SpecialFuncs::method_) \ + match(implementation = {vendor(llvm)}) + int Method(); +} s; + +int SpecialFuncs::Method() { return 1; } + +struct SpecSpecialFuncs { + void vd() {} + SpecSpecialFuncs(); + ~SpecSpecialFuncs(); + + int method_(); +#pragma omp declare variant(SpecSpecialFuncs::method_) \ + match(implementation = {vendor(llvm)}) + int method() { return 1; } +#pragma omp declare variant(SpecSpecialFuncs::method_) \ + match(implementation = {vendor(llvm)}) + int Method(); +} s1; + +#pragma omp end declare target + +int SpecSpecialFuncs::method_() { return 7; } +int SpecSpecialFuncs::Method() { return 1; } + +int prio() { return 81; } +int prio1() { return 82; } +static int prio2() { return 83; } +static int prio3() { return 84; } +static int prio4() { return 84; } +int fn_linkage_variant() { return 85; } +extern "C" int fn_linkage_variant1() { return 86; } +int fn_variant2() { return 1; } + +#pragma omp declare target + +void xxx() { + (void)s.method(); + (void)s1.method(); +} + +#pragma omp declare variant(prio) match(implementation = {vendor(llvm)}) +#pragma omp declare variant(prio1) match(implementation = {vendor(score(1) \ + : llvm)}) +int prio_() { return 1; } + +#pragma omp declare variant(prio4) match(implementation = {vendor(score(3) \ + : llvm)}) +#pragma omp declare variant(prio2) match(implementation = {vendor(score(5) \ + : llvm)}) +#pragma omp declare variant(prio3) match(implementation = {vendor(score(1) \ + : llvm)}) +static int prio1_() { return 1; } + +int int_fn() { return prio1_(); } + +extern "C" { +#pragma omp declare variant(fn_linkage_variant) match(implementation = {vendor(llvm)}) +int fn_linkage() { return 1; } +} + +#pragma omp declare variant(fn_linkage_variant1) match(implementation = {vendor(llvm)}) +int fn_linkage1() { return 1; } + +#pragma omp declare variant(fn_variant2) match(implementation = {vendor(llvm, ibm)}) +int fn2() { return 87; } + +#pragma omp end declare target + +#endif // HEADER diff --git a/clang/test/Preprocessor/init.c b/clang/test/Preprocessor/init.c index 0f5e43bfb0463..18972de353482 100644 --- a/clang/test/Preprocessor/init.c +++ b/clang/test/Preprocessor/init.c @@ -9,18 +9,18 @@ // BLOCKS:#define __block __attribute__((__blocks__(byref))) // // -// RUN: %clang_cc1 -x c++ -std=c++2a -E -dM < /dev/null | FileCheck -match-full-lines -check-prefix CXX2A %s +// RUN: %clang_cc1 -x c++ -fgnuc-version=4.2.1 -std=c++2a -E -dM < /dev/null | FileCheck -match-full-lines -check-prefix CXX2A %s // -// CXX2A:#define __GNUG__ {{.*}} +// CXX2A:#define __GNUG__ 4 // CXX2A:#define __GXX_EXPERIMENTAL_CXX0X__ 1 // CXX2A:#define __GXX_RTTI 1 // CXX2A:#define __GXX_WEAK__ 1 // CXX2A:#define __cplusplus 201707L // CXX2A:#define __private_extern__ extern // -// RUN: %clang_cc1 -x c++ -std=c++1z -E -dM < /dev/null | FileCheck -match-full-lines -check-prefix CXX1Z %s +// RUN: %clang_cc1 -x c++ -fgnuc-version=4.2.1 -std=c++1z -E -dM < /dev/null | FileCheck -match-full-lines -check-prefix CXX1Z %s // -// CXX1Z:#define __GNUG__ {{.*}} +// CXX1Z:#define __GNUG__ 4 // CXX1Z:#define __GXX_EXPERIMENTAL_CXX0X__ 1 // CXX1Z:#define __GXX_RTTI 1 // CXX1Z:#define __GXX_WEAK__ 1 @@ -28,9 +28,9 @@ // CXX1Z:#define __private_extern__ extern // // -// RUN: %clang_cc1 -x c++ -std=c++1y -E -dM < /dev/null | FileCheck -match-full-lines -check-prefix CXX1Y %s +// RUN: %clang_cc1 -x c++ -fgnuc-version=4.2.1 -std=c++1y -E -dM < /dev/null | FileCheck -match-full-lines -check-prefix CXX1Y %s // -// CXX1Y:#define __GNUG__ {{.*}} +// CXX1Y:#define __GNUG__ 4 // CXX1Y:#define __GXX_EXPERIMENTAL_CXX0X__ 1 // CXX1Y:#define __GXX_RTTI 1 // CXX1Y:#define __GXX_WEAK__ 1 @@ -38,9 +38,9 @@ // CXX1Y:#define __private_extern__ extern // // -// RUN: %clang_cc1 -x c++ -std=c++11 -E -dM < /dev/null | FileCheck -match-full-lines -check-prefix CXX11 %s +// RUN: %clang_cc1 -x c++ -fgnuc-version=4.2.1 -std=c++11 -E -dM < /dev/null | FileCheck -match-full-lines -check-prefix CXX11 %s // -// CXX11:#define __GNUG__ {{.*}} +// CXX11:#define __GNUG__ 4 // CXX11:#define __GXX_EXPERIMENTAL_CXX0X__ 1 // CXX11:#define __GXX_RTTI 1 // CXX11:#define __GXX_WEAK__ 1 @@ -48,9 +48,9 @@ // CXX11:#define __private_extern__ extern // // -// RUN: %clang_cc1 -x c++ -std=c++98 -E -dM < /dev/null | FileCheck -match-full-lines -check-prefix CXX98 %s +// RUN: %clang_cc1 -x c++ -fgnuc-version=4.2.1 -std=c++98 -E -dM < /dev/null | FileCheck -match-full-lines -check-prefix CXX98 %s // -// CXX98:#define __GNUG__ {{.*}} +// CXX98:#define __GNUG__ 4 // CXX98:#define __GXX_RTTI 1 // CXX98:#define __GXX_WEAK__ 1 // CXX98:#define __cplusplus 199711L @@ -87,7 +87,7 @@ // C11-NOT: __cplusplus // // -// RUN: %clang_cc1 -E -dM < /dev/null | FileCheck -match-full-lines -check-prefix COMMON %s +// RUN: %clang_cc1 -fgnuc-version=4.2.1 -E -dM < /dev/null | FileCheck -match-full-lines -check-prefix COMMON %s // // COMMON:#define __CONSTANT_CFSTRINGS__ 1 // COMMON:#define __FINITE_MATH_ONLY__ 0 @@ -119,41 +119,41 @@ // RUN: %clang_cc1 -ffreestanding -E -dM < /dev/null | FileCheck -match-full-lines -check-prefix FREESTANDING %s // FREESTANDING:#define __STDC_HOSTED__ 0 // -// RUN: %clang_cc1 -x c++ -std=gnu++2a -E -dM < /dev/null | FileCheck -match-full-lines -check-prefix GXX2A %s +// RUN: %clang_cc1 -x c++ -fgnuc-version=4.2.1 -std=gnu++2a -E -dM < /dev/null | FileCheck -match-full-lines -check-prefix GXX2A %s // -// GXX2A:#define __GNUG__ {{.*}} +// GXX2A:#define __GNUG__ 4 // GXX2A:#define __GXX_WEAK__ 1 // GXX2A:#define __cplusplus 201707L // GXX2A:#define __private_extern__ extern // // -// RUN: %clang_cc1 -x c++ -std=gnu++1z -E -dM < /dev/null | FileCheck -match-full-lines -check-prefix GXX1Z %s +// RUN: %clang_cc1 -x c++ -fgnuc-version=4.2.1 -std=gnu++1z -E -dM < /dev/null | FileCheck -match-full-lines -check-prefix GXX1Z %s // -// GXX1Z:#define __GNUG__ {{.*}} +// GXX1Z:#define __GNUG__ 4 // GXX1Z:#define __GXX_WEAK__ 1 // GXX1Z:#define __cplusplus 201703L // GXX1Z:#define __private_extern__ extern // // -// RUN: %clang_cc1 -x c++ -std=gnu++1y -E -dM < /dev/null | FileCheck -match-full-lines -check-prefix GXX1Y %s +// RUN: %clang_cc1 -x c++ -fgnuc-version=4.2.1 -std=gnu++1y -E -dM < /dev/null | FileCheck -match-full-lines -check-prefix GXX1Y %s // -// GXX1Y:#define __GNUG__ {{.*}} +// GXX1Y:#define __GNUG__ 4 // GXX1Y:#define __GXX_WEAK__ 1 // GXX1Y:#define __cplusplus 201402L // GXX1Y:#define __private_extern__ extern // // -// RUN: %clang_cc1 -x c++ -std=gnu++11 -E -dM < /dev/null | FileCheck -match-full-lines -check-prefix GXX11 %s +// RUN: %clang_cc1 -x c++ -fgnuc-version=4.2.1 -std=gnu++11 -E -dM < /dev/null | FileCheck -match-full-lines -check-prefix GXX11 %s // -// GXX11:#define __GNUG__ {{.*}} +// GXX11:#define __GNUG__ 4 // GXX11:#define __GXX_WEAK__ 1 // GXX11:#define __cplusplus 201103L // GXX11:#define __private_extern__ extern // // -// RUN: %clang_cc1 -x c++ -std=gnu++98 -E -dM < /dev/null | FileCheck -match-full-lines -check-prefix GXX98 %s +// RUN: %clang_cc1 -x c++ -fgnuc-version=4.2.1 -std=gnu++98 -E -dM < /dev/null | FileCheck -match-full-lines -check-prefix GXX98 %s // -// GXX98:#define __GNUG__ {{.*}} +// GXX98:#define __GNUG__ 4 // GXX98:#define __GXX_WEAK__ 1 // GXX98:#define __cplusplus 199711L // GXX98:#define __private_extern__ extern @@ -2845,9 +2845,9 @@ // I386:#define __i386__ 1 // I386:#define i386 1 // -// RUN: %clang_cc1 -E -dM -ffreestanding -triple=i386-pc-linux-gnu -target-cpu pentium4 < /dev/null | FileCheck -match-full-lines -check-prefix I386-LINUX -check-prefix I386-LINUX-ALIGN32 %s -// RUN: %clang_cc1 -x c++ -E -dM -ffreestanding -triple=i386-pc-linux-gnu -target-cpu pentium4 < /dev/null | FileCheck -match-full-lines -check-prefix I386-LINUX -check-prefix I386-LINUX-CXX -check-prefix I386-LINUX-ALIGN32 %s -// RUN: %clang_cc1 -E -dM -ffreestanding -triple=i386-pc-linux-gnu -target-cpu pentium4 -malign-double < /dev/null | FileCheck -match-full-lines -check-prefix I386-LINUX -check-prefix I386-LINUX-ALIGN64 %s +// RUN: %clang_cc1 -E -dM -ffreestanding -fgnuc-version=4.2.1 -triple=i386-pc-linux-gnu -target-cpu pentium4 < /dev/null | FileCheck -match-full-lines -check-prefix I386-LINUX -check-prefix I386-LINUX-ALIGN32 %s +// RUN: %clang_cc1 -x c++ -E -dM -ffreestanding -fgnuc-version=4.2.1 -triple=i386-pc-linux-gnu -target-cpu pentium4 < /dev/null | FileCheck -match-full-lines -check-prefix I386-LINUX -check-prefix I386-LINUX-CXX -check-prefix I386-LINUX-ALIGN32 %s +// RUN: %clang_cc1 -E -dM -ffreestanding -fgnuc-version=4.2.1 -triple=i386-pc-linux-gnu -target-cpu pentium4 -malign-double < /dev/null | FileCheck -match-full-lines -check-prefix I386-LINUX -check-prefix I386-LINUX-ALIGN64 %s // // I386-LINUX-NOT:#define _LP64 // I386-LINUX:#define __BIGGEST_ALIGNMENT__ 16 @@ -3047,9 +3047,9 @@ // I386-LINUX:#define __i386__ 1 // I386-LINUX:#define i386 1 // -// RUN: %clang_cc1 -E -dM -ffreestanding -triple=i386-netbsd -target-cpu i486 < /dev/null | FileCheck -match-full-lines -check-prefix I386-NETBSD %s -// RUN: %clang_cc1 -x c++ -E -dM -ffreestanding -triple=i386-netbsd -target-cpu i486 < /dev/null | FileCheck -match-full-lines -check-prefix I386-NETBSD -check-prefix I386-NETBSD-CXX %s -// RUN: %clang_cc1 -E -dM -ffreestanding -triple=i386-netbsd -target-cpu i486 -malign-double < /dev/null | FileCheck -match-full-lines -check-prefix I386-NETBSD %s +// RUN: %clang_cc1 -E -dM -ffreestanding -fgnuc-version=4.2.1 -triple=i386-netbsd -target-cpu i486 < /dev/null | FileCheck -match-full-lines -check-prefix I386-NETBSD %s +// RUN: %clang_cc1 -x c++ -E -dM -ffreestanding -fgnuc-version=4.2.1 -triple=i386-netbsd -target-cpu i486 < /dev/null | FileCheck -match-full-lines -check-prefix I386-NETBSD -check-prefix I386-NETBSD-CXX %s +// RUN: %clang_cc1 -E -dM -ffreestanding -fgnuc-version=4.2.1 -triple=i386-netbsd -target-cpu i486 -malign-double < /dev/null | FileCheck -match-full-lines -check-prefix I386-NETBSD %s // // // I386-NETBSD-NOT:#define _LP64 @@ -3263,8 +3263,8 @@ // I386-DECLSPEC: #define __declspec{{.*}} // -// RUN: %clang_cc1 -E -dM -ffreestanding -triple=mips-none-none < /dev/null | FileCheck -match-full-lines -check-prefix MIPS32BE -check-prefix MIPS32BE-C %s -// RUN: %clang_cc1 -x c++ -E -dM -ffreestanding -triple=mips-none-none < /dev/null | FileCheck -match-full-lines -check-prefix MIPS32BE -check-prefix MIPS32BE-CXX %s +// RUN: %clang_cc1 -E -dM -ffreestanding -fgnuc-version=4.2.1 -triple=mips-none-none < /dev/null | FileCheck -match-full-lines -check-prefix MIPS32BE -check-prefix MIPS32BE-C %s +// RUN: %clang_cc1 -x c++ -E -dM -ffreestanding -fgnuc-version=4.2.1 -triple=mips-none-none < /dev/null | FileCheck -match-full-lines -check-prefix MIPS32BE -check-prefix MIPS32BE-CXX %s // // MIPS32BE:#define MIPSEB 1 // MIPS32BE:#define _ABIO32 1 @@ -3682,10 +3682,10 @@ // MIPS32EL:#define _mips 1 // MIPS32EL:#define mips 1 // -// RUN: %clang_cc1 -E -dM -ffreestanding \ +// RUN: %clang_cc1 -E -dM -ffreestanding -fgnuc-version=4.2.1 \ // RUN: -triple=mips64-none-none -target-abi n32 < /dev/null \ // RUN: | FileCheck -match-full-lines -check-prefix MIPSN32BE -check-prefix MIPSN32BE-C %s -// RUN: %clang_cc1 -x c++ -E -dM -ffreestanding \ +// RUN: %clang_cc1 -x c++ -E -dM -ffreestanding -fgnuc-version=4.2.1 \ // RUN: -triple=mips64-none-none -target-abi n32 < /dev/null \ // RUN: | FileCheck -match-full-lines -check-prefix MIPSN32BE -check-prefix MIPSN32BE-CXX %s // @@ -3993,7 +3993,7 @@ // MIPSN32BE: #define _mips 1 // MIPSN32BE: #define mips 1 // -// RUN: %clang_cc1 -E -dM -ffreestanding \ +// RUN: %clang_cc1 -E -dM -ffreestanding -fgnuc-version=4.2.1 \ // RUN: -triple=mips64el-none-none -target-abi n32 < /dev/null \ // RUN: | FileCheck -match-full-lines -check-prefix MIPSN32EL %s // @@ -4300,8 +4300,8 @@ // MIPSN32EL: #define _mips 1 // MIPSN32EL: #define mips 1 // -// RUN: %clang_cc1 -E -dM -ffreestanding -triple=mips64-none-none < /dev/null | FileCheck -match-full-lines -check-prefix MIPS64BE %s -// RUN: %clang_cc1 -x c++ -E -dM -ffreestanding -triple=mips64-none-none < /dev/null | FileCheck -match-full-lines -check-prefix MIPS64BE -check-prefix MIPS64BE-CXX %s +// RUN: %clang_cc1 -E -dM -ffreestanding -fgnuc-version=4.2.1 -triple=mips64-none-none < /dev/null | FileCheck -match-full-lines -check-prefix MIPS64BE %s +// RUN: %clang_cc1 -x c++ -E -dM -ffreestanding -fgnuc-version=4.2.1 -triple=mips64-none-none < /dev/null | FileCheck -match-full-lines -check-prefix MIPS64BE -check-prefix MIPS64BE-CXX %s // // MIPS64BE:#define MIPSEB 1 // MIPS64BE:#define _ABI64 3 @@ -6838,13 +6838,13 @@ // PPC64-LINUX:#define __ppc64__ 1 // PPC64-LINUX:#define __ppc__ 1 -// RUN: %clang_cc1 -E -dM -ffreestanding -triple=powerpc64-unknown-linux-gnu < /dev/null | FileCheck -match-full-lines -check-prefix PPC64-ELFv1 %s -// RUN: %clang_cc1 -E -dM -ffreestanding -triple=powerpc64-unknown-linux-gnu -target-abi elfv1 < /dev/null | FileCheck -match-full-lines -check-prefix PPC64-ELFv1 %s -// RUN: %clang_cc1 -E -dM -ffreestanding -triple=powerpc64-unknown-linux-gnu -target-abi elfv1-qpx < /dev/null | FileCheck -match-full-lines -check-prefix PPC64-ELFv1 %s -// RUN: %clang_cc1 -E -dM -ffreestanding -triple=powerpc64-unknown-linux-gnu -target-abi elfv2 < /dev/null | FileCheck -match-full-lines -check-prefix PPC64-ELFv2 %s -// RUN: %clang_cc1 -E -dM -ffreestanding -triple=powerpc64le-unknown-linux-gnu < /dev/null | FileCheck -match-full-lines -check-prefix PPC64-ELFv2 %s -// RUN: %clang_cc1 -E -dM -ffreestanding -triple=powerpc64le-unknown-linux-gnu -target-abi elfv1 < /dev/null | FileCheck -match-full-lines -check-prefix PPC64-ELFv1 %s -// RUN: %clang_cc1 -E -dM -ffreestanding -triple=powerpc64le-unknown-linux-gnu -target-abi elfv2 < /dev/null | FileCheck -match-full-lines -check-prefix PPC64-ELFv2 %s +// RUN: %clang_cc1 -E -dM -ffreestanding -fgnuc-version=4.2.1 -triple=powerpc64-unknown-linux-gnu < /dev/null | FileCheck -match-full-lines -check-prefix PPC64-ELFv1 %s +// RUN: %clang_cc1 -E -dM -ffreestanding -fgnuc-version=4.2.1 -triple=powerpc64-unknown-linux-gnu -target-abi elfv1 < /dev/null | FileCheck -match-full-lines -check-prefix PPC64-ELFv1 %s +// RUN: %clang_cc1 -E -dM -ffreestanding -fgnuc-version=4.2.1 -triple=powerpc64-unknown-linux-gnu -target-abi elfv1-qpx < /dev/null | FileCheck -match-full-lines -check-prefix PPC64-ELFv1 %s +// RUN: %clang_cc1 -E -dM -ffreestanding -fgnuc-version=4.2.1 -triple=powerpc64-unknown-linux-gnu -target-abi elfv2 < /dev/null | FileCheck -match-full-lines -check-prefix PPC64-ELFv2 %s +// RUN: %clang_cc1 -E -dM -ffreestanding -fgnuc-version=4.2.1 -triple=powerpc64le-unknown-linux-gnu < /dev/null | FileCheck -match-full-lines -check-prefix PPC64-ELFv2 %s +// RUN: %clang_cc1 -E -dM -ffreestanding -fgnuc-version=4.2.1 -triple=powerpc64le-unknown-linux-gnu -target-abi elfv1 < /dev/null | FileCheck -match-full-lines -check-prefix PPC64-ELFv1 %s +// RUN: %clang_cc1 -E -dM -ffreestanding -fgnuc-version=4.2.1 -triple=powerpc64le-unknown-linux-gnu -target-abi elfv2 < /dev/null | FileCheck -match-full-lines -check-prefix PPC64-ELFv2 %s // PPC64-ELFv1:#define _CALL_ELF 1 // PPC64-ELFv2:#define _CALL_ELF 2 // @@ -7980,12 +7980,12 @@ // S390X:#define __s390__ 1 // S390X:#define __s390x__ 1 // -// RUN: %clang_cc1 -E -dM -ffreestanding -triple=sparc-none-none < /dev/null | FileCheck -match-full-lines -check-prefix SPARC -check-prefix SPARC-DEFAULT %s -// RUN: %clang_cc1 -E -dM -ffreestanding -triple=sparc-rtems-elf < /dev/null | FileCheck -match-full-lines -check-prefix SPARC -check-prefix SPARC-DEFAULT %s -// RUN: %clang_cc1 -E -dM -ffreestanding -triple=sparc-none-netbsd < /dev/null | FileCheck -match-full-lines -check-prefix SPARC -check-prefix SPARC-NETOPENBSD %s -// RUN: %clang_cc1 -E -dM -ffreestanding -triple=sparc-none-openbsd < /dev/null | FileCheck -match-full-lines -check-prefix SPARC -check-prefix SPARC-NETOPENBSD %s -// RUN: %clang_cc1 -x c++ -E -dM -ffreestanding -triple=sparc-none-none < /dev/null | FileCheck -match-full-lines -check-prefix SPARC -check-prefix SPARC-DEFAULT -check-prefix SPARC-DEFAULT-CXX %s -// RUN: %clang_cc1 -x c++ -E -dM -ffreestanding -triple=sparc-none-openbsd < /dev/null | FileCheck -match-full-lines -check-prefix SPARC -check-prefix SPARC-NETOPENBSD -check-prefix SPARC-NETOPENBSD-CXX %s +// RUN: %clang_cc1 -E -dM -ffreestanding -fgnuc-version=4.2.1 -triple=sparc-none-none < /dev/null | FileCheck -match-full-lines -check-prefix SPARC -check-prefix SPARC-DEFAULT %s +// RUN: %clang_cc1 -E -dM -ffreestanding -fgnuc-version=4.2.1 -triple=sparc-rtems-elf < /dev/null | FileCheck -match-full-lines -check-prefix SPARC -check-prefix SPARC-DEFAULT %s +// RUN: %clang_cc1 -E -dM -ffreestanding -fgnuc-version=4.2.1 -triple=sparc-none-netbsd < /dev/null | FileCheck -match-full-lines -check-prefix SPARC -check-prefix SPARC-NETOPENBSD %s +// RUN: %clang_cc1 -E -dM -ffreestanding -fgnuc-version=4.2.1 -triple=sparc-none-openbsd < /dev/null | FileCheck -match-full-lines -check-prefix SPARC -check-prefix SPARC-NETOPENBSD %s +// RUN: %clang_cc1 -x c++ -E -dM -ffreestanding -fgnuc-version=4.2.1 -triple=sparc-none-none < /dev/null | FileCheck -match-full-lines -check-prefix SPARC -check-prefix SPARC-DEFAULT -check-prefix SPARC-DEFAULT-CXX %s +// RUN: %clang_cc1 -x c++ -E -dM -ffreestanding -fgnuc-version=4.2.1 -triple=sparc-none-openbsd < /dev/null | FileCheck -match-full-lines -check-prefix SPARC -check-prefix SPARC-NETOPENBSD -check-prefix SPARC-NETOPENBSD-CXX %s // // SPARC-NOT:#define _LP64 // SPARC:#define __BIGGEST_ALIGNMENT__ 8 @@ -8185,8 +8185,8 @@ // SPARC:#define __sparcv8 1 // SPARC:#define sparc 1 -// RUN: %clang_cc1 -E -dM -ffreestanding -triple=tce-none-none < /dev/null | FileCheck -match-full-lines -check-prefix TCE %s -// RUN: %clang_cc1 -x c++ -E -dM -ffreestanding -triple=tce-none-none < /dev/null | FileCheck -match-full-lines -check-prefix TCE -check-prefix TCE-CXX %s +// RUN: %clang_cc1 -E -dM -ffreestanding -fgnuc-version=4.2.1 -triple=tce-none-none < /dev/null | FileCheck -match-full-lines -check-prefix TCE %s +// RUN: %clang_cc1 -x c++ -E -dM -ffreestanding -fgnuc-version=4.2.1 -triple=tce-none-none < /dev/null | FileCheck -match-full-lines -check-prefix TCE -check-prefix TCE-CXX %s // // TCE-NOT:#define _LP64 // TCE:#define __BIGGEST_ALIGNMENT__ 4 @@ -8354,8 +8354,8 @@ // TCE:#define __tce__ 1 // TCE:#define tce 1 // -// RUN: %clang_cc1 -E -dM -ffreestanding -triple=x86_64-none-none < /dev/null | FileCheck -match-full-lines -check-prefix X86_64 %s -// RUN: %clang_cc1 -x c++ -E -dM -ffreestanding -triple=x86_64-none-none < /dev/null | FileCheck -match-full-lines -check-prefix X86_64 -check-prefix X86_64-CXX %s +// RUN: %clang_cc1 -E -dM -ffreestanding -fgnuc-version=4.2.1 -triple=x86_64-none-none < /dev/null | FileCheck -match-full-lines -check-prefix X86_64 %s +// RUN: %clang_cc1 -x c++ -E -dM -ffreestanding -fgnuc-version=4.2.1 -triple=x86_64-none-none < /dev/null | FileCheck -match-full-lines -check-prefix X86_64 -check-prefix X86_64-CXX %s // // X86_64:#define _LP64 1 // X86_64-NOT:#define _LP32 1 @@ -8562,8 +8562,8 @@ // RUN: %clang -xc - -E -dM -mcmodel=medium --target=i386-unknown-linux < /dev/null | FileCheck -match-full-lines -check-prefix X86_MEDIUM %s // X86_MEDIUM:#define __code_model_medium_ 1 // -// RUN: %clang_cc1 -E -dM -ffreestanding -triple=x86_64-none-none-gnux32 < /dev/null | FileCheck -match-full-lines -check-prefix X32 %s -// RUN: %clang_cc1 -x c++ -E -dM -ffreestanding -triple=x86_64-none-none-gnux32 < /dev/null | FileCheck -match-full-lines -check-prefix X32 -check-prefix X32-CXX %s +// RUN: %clang_cc1 -E -dM -ffreestanding -fgnuc-version=4.2.1 -triple=x86_64-none-none-gnux32 < /dev/null | FileCheck -match-full-lines -check-prefix X32 %s +// RUN: %clang_cc1 -x c++ -E -dM -ffreestanding -fgnuc-version=4.2.1 -triple=x86_64-none-none-gnux32 < /dev/null | FileCheck -match-full-lines -check-prefix X32 -check-prefix X32-CXX %s // // X32:#define _ILP32 1 // X32-NOT:#define _LP64 1 @@ -8759,7 +8759,7 @@ // X32:#define __x86_64 1 // X32:#define __x86_64__ 1 // -// RUN: %clang_cc1 -E -dM -ffreestanding -triple=x86_64-unknown-cloudabi < /dev/null | FileCheck -match-full-lines -check-prefix X86_64-CLOUDABI %s +// RUN: %clang_cc1 -E -dM -ffreestanding -fgnuc-version=4.2.1 -triple=x86_64-unknown-cloudabi < /dev/null | FileCheck -match-full-lines -check-prefix X86_64-CLOUDABI %s // // X86_64-CLOUDABI:#define _LP64 1 // X86_64-CLOUDABI:#define __ATOMIC_ACQUIRE 2 @@ -9064,7 +9064,7 @@ // X86_64-CLOUDABI:#define __x86_64 1 // X86_64-CLOUDABI:#define __x86_64__ 1 // -// RUN: %clang_cc1 -E -dM -ffreestanding -triple=x86_64-pc-linux-gnu < /dev/null | FileCheck -match-full-lines -check-prefix X86_64-LINUX %s +// RUN: %clang_cc1 -E -dM -ffreestanding -fgnuc-version=4.2.1 -triple=x86_64-pc-linux-gnu < /dev/null | FileCheck -match-full-lines -check-prefix X86_64-LINUX %s // // X86_64-LINUX:#define _LP64 1 // X86_64-LINUX:#define __BIGGEST_ALIGNMENT__ 16 @@ -9277,7 +9277,7 @@ // X86_64-FREEBSD:#define __LDBL_DECIMAL_DIG__ 21 // X86_64-FREEBSD:#define __STDC_MB_MIGHT_NEQ_WC__ 1 // -// RUN: %clang_cc1 -E -dM -ffreestanding -triple=x86_64-netbsd < /dev/null | FileCheck -match-full-lines -check-prefix X86_64-NETBSD %s +// RUN: %clang_cc1 -E -dM -ffreestanding -fgnuc-version=4.2.1 -triple=x86_64-netbsd < /dev/null | FileCheck -match-full-lines -check-prefix X86_64-NETBSD %s // // X86_64-NETBSD:#define _LP64 1 // X86_64-NETBSD:#define __BIGGEST_ALIGNMENT__ 16 @@ -9481,7 +9481,7 @@ // X86_64-NETBSD:#define __x86_64 1 // X86_64-NETBSD:#define __x86_64__ 1 // -// RUN: %clang_cc1 -E -dM -ffreestanding -triple=x86_64-scei-ps4 < /dev/null | FileCheck -match-full-lines -check-prefix PS4 %s +// RUN: %clang_cc1 -E -dM -ffreestanding -fgnuc-version=4.2.1 -triple=x86_64-scei-ps4 < /dev/null | FileCheck -match-full-lines -check-prefix PS4 %s // // PS4:#define _LP64 1 // PS4:#define __BYTE_ORDER__ __ORDER_LITTLE_ENDIAN__ @@ -9641,7 +9641,7 @@ // RUN: %clang_cc1 -x objective-c -triple i386-unknown-freebsd -fobjc-runtime=gnustep-2.5 -E -dM < /dev/null | FileCheck -match-full-lines -check-prefix GNUSTEP2 %s // GNUSTEP2:#define __OBJC_GNUSTEP_RUNTIME_ABI__ 20 // -// RUN: %clang_cc1 -x c++ -std=c++98 -fno-rtti -E -dM < /dev/null | FileCheck -match-full-lines -check-prefix NORTTI %s +// RUN: %clang_cc1 -x c++ -fgnuc-version=4.2.1 -std=c++98 -fno-rtti -E -dM < /dev/null | FileCheck -match-full-lines -check-prefix NORTTI %s // NORTTI: #define __GXX_ABI_VERSION {{.*}} // NORTTI-NOT:#define __GXX_RTTI // NORTTI:#define __STDC__ 1 @@ -9700,16 +9700,16 @@ // XCORE:#define __LITTLE_ENDIAN__ 1 // XCORE:#define __XS1B__ 1 // -// RUN: %clang_cc1 -E -dM -ffreestanding -triple=wasm32-unknown-unknown \ +// RUN: %clang_cc1 -E -dM -ffreestanding -fgnuc-version=4.2.1 -triple=wasm32-unknown-unknown \ // RUN: < /dev/null \ // RUN: | FileCheck -match-full-lines -check-prefixes=WEBASSEMBLY,WEBASSEMBLY32 %s -// RUN: %clang_cc1 -E -dM -ffreestanding -triple=wasm64-unknown-unknown \ +// RUN: %clang_cc1 -E -dM -ffreestanding -fgnuc-version=4.2.1 -triple=wasm64-unknown-unknown \ // RUN: < /dev/null \ // RUN: | FileCheck -match-full-lines -check-prefixes=WEBASSEMBLY,WEBASSEMBLY64 %s -// RUN: %clang_cc1 -E -dM -ffreestanding -triple=wasm32-wasi \ +// RUN: %clang_cc1 -E -dM -ffreestanding -fgnuc-version=4.2.1 -triple=wasm32-wasi \ // RUN: < /dev/null \ // RUN: | FileCheck -match-full-lines -check-prefixes=WEBASSEMBLY,WEBASSEMBLY32,WEBASSEMBLY-WASI %s -// RUN: %clang_cc1 -E -dM -ffreestanding -triple=wasm64-wasi \ +// RUN: %clang_cc1 -E -dM -ffreestanding -fgnuc-version=4.2.1 -triple=wasm64-wasi \ // RUN: < /dev/null \ // RUN: | FileCheck -match-full-lines -check-prefixes=WEBASSEMBLY,WEBASSEMBLY64,WEBASSEMBLY-WASI %s // @@ -10087,7 +10087,7 @@ // RUN: %clang_cc1 -E -dM -ffreestanding -triple x86_64-windows-cygnus < /dev/null | FileCheck -match-full-lines -check-prefix CYGWIN-X64 %s // CYGWIN-X64: #define __USER_LABEL_PREFIX__ -// RUN: %clang_cc1 -E -dM -ffreestanding -triple=avr \ +// RUN: %clang_cc1 -E -dM -ffreestanding -fgnuc-version=4.2.1 -triple=avr \ // RUN: < /dev/null \ // RUN: | FileCheck -match-full-lines -check-prefix=AVR %s // @@ -10295,10 +10295,10 @@ // MSVC-X64:#define __STDCPP_DEFAULT_NEW_ALIGNMENT__ 16ULL // RUN: %clang_cc1 -E -dM -ffreestanding \ -// RUN: -triple=aarch64-apple-ios9 < /dev/null \ +// RUN: -fgnuc-version=4.2.1 -triple=aarch64-apple-ios9 < /dev/null \ // RUN: | FileCheck -check-prefix=DARWIN %s // RUN: %clang_cc1 -E -dM -ffreestanding \ -// RUN: -triple=aarch64-apple-macosx10.12 < /dev/null \ +// RUN: -fgnuc-version=4.2.1 -triple=aarch64-apple-macosx10.12 < /dev/null \ // RUN: | FileCheck -check-prefix=DARWIN %s // DARWIN:#define __STDC_NO_THREADS__ 1 @@ -10364,11 +10364,11 @@ // ARM-DARWIN-BAREMETAL-64: #define __PTRDIFF_TYPE__ long int // ARM-DARWIN-BAREMETAL-64: #define __SIZE_TYPE__ long unsigned int -// RUN: %clang_cc1 -E -dM -ffreestanding -triple=riscv32 < /dev/null \ +// RUN: %clang_cc1 -E -dM -ffreestanding -fgnuc-version=4.2.1 -triple=riscv32 < /dev/null \ // RUN: | FileCheck -match-full-lines -check-prefix=RISCV32 %s -// RUN: %clang_cc1 -E -dM -ffreestanding -triple=riscv32-unknown-linux < /dev/null \ +// RUN: %clang_cc1 -E -dM -ffreestanding -fgnuc-version=4.2.1 -triple=riscv32-unknown-linux < /dev/null \ // RUN: | FileCheck -match-full-lines -check-prefixes=RISCV32,RISCV32-LINUX %s -// RUN: %clang_cc1 -E -dM -ffreestanding -triple=riscv32 \ +// RUN: %clang_cc1 -E -dM -ffreestanding -fgnuc-version=4.2.1 -triple=riscv32 \ // RUN: -fforce-enable-int128 < /dev/null | FileCheck -match-full-lines \ // RUN: -check-prefixes=RISCV32,RISCV32-INT128 %s // RISCV32: #define _ILP32 1 @@ -10575,9 +10575,9 @@ // RISCV32-LINUX: #define linux 1 // RISCV32-LINUX: #define unix 1 -// RUN: %clang_cc1 -E -dM -ffreestanding -triple=riscv64 < /dev/null \ +// RUN: %clang_cc1 -E -dM -ffreestanding -fgnuc-version=4.2.1 -triple=riscv64 < /dev/null \ // RUN: | FileCheck -match-full-lines -check-prefix=RISCV64 %s -// RUN: %clang_cc1 -E -dM -ffreestanding -triple=riscv64-unknown-linux < /dev/null \ +// RUN: %clang_cc1 -E -dM -ffreestanding -fgnuc-version=4.2.1 -triple=riscv64-unknown-linux < /dev/null \ // RUN: | FileCheck -match-full-lines -check-prefixes=RISCV64,RISCV64-LINUX %s // RISCV64: #define _LP64 1 // RISCV64: #define __ATOMIC_ACQUIRE 2 diff --git a/clang/test/Profile/gcc-flag-compatibility.c b/clang/test/Profile/gcc-flag-compatibility.c index a378af9d34648..e3594b0ebc71c 100644 --- a/clang/test/Profile/gcc-flag-compatibility.c +++ b/clang/test/Profile/gcc-flag-compatibility.c @@ -15,7 +15,7 @@ // Check that -fprofile-generate=/path/to generates /path/to/default.profraw // RUN: %clang %s -c -S -o - -emit-llvm -fprofile-generate=/path/to -fno-experimental-new-pass-manager | FileCheck -check-prefixes=PROFILE-GEN,PROFILE-GEN-EQ %s // RxUN: %clang %s -c -S -o - -emit-llvm -fprofile-generate=/path/to -fexperimental-new-pass-manager | FileCheck -check-prefixes=PROFILE-GEN,PROFILE-GEN-EQ %s -// PROFILE-GEN-EQ: constant [{{.*}} x i8] c"/path/to{{/|\\5C}}{{.*}}\00" +// PROFILE-GEN-EQ: constant [{{.*}} x i8] c"/path/to{{/|\\\\}}{{.*}}\00" // Check that -fprofile-use=some/path reads some/path/default.profdata // This uses Clang FE format profile. diff --git a/clang/test/Sema/atomic-ops.c b/clang/test/Sema/atomic-ops.c index 638c30f278ce1..0340e23348943 100644 --- a/clang/test/Sema/atomic-ops.c +++ b/clang/test/Sema/atomic-ops.c @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 %s -verify -ffreestanding -fsyntax-only -triple=i686-linux-gnu -std=c11 +// RUN: %clang_cc1 %s -verify -fgnuc-version=4.2.1 -ffreestanding -fsyntax-only -triple=i686-linux-gnu -std=c11 // Basic parsing/Sema tests for __c11_atomic_* diff --git a/clang/test/SemaCUDA/call-device-fn-from-host.cu b/clang/test/SemaCUDA/call-device-fn-from-host.cu index ba1ce86020453..5d506d65ea58c 100644 --- a/clang/test/SemaCUDA/call-device-fn-from-host.cu +++ b/clang/test/SemaCUDA/call-device-fn-from-host.cu @@ -1,5 +1,7 @@ // RUN: %clang_cc1 %s --std=c++11 -triple x86_64-unknown-linux -emit-llvm -o - \ // RUN: -verify -verify-ignore-unexpected=note +// RUN: %clang_cc1 %s --std=c++11 -triple x86_64-unknown-linux -emit-llvm -o - \ +// RUN: -verify -verify-ignore-unexpected=note -fopenmp // Note: This test won't work with -fsyntax-only, because some of these errors // are emitted during codegen. diff --git a/clang/test/SemaCUDA/constexpr-ctor.cu b/clang/test/SemaCUDA/constexpr-ctor.cu new file mode 100644 index 0000000000000..20107b335dd49 --- /dev/null +++ b/clang/test/SemaCUDA/constexpr-ctor.cu @@ -0,0 +1,33 @@ +// RUN: %clang_cc1 -std=c++11 -triple nvptx64-nvidia-cuda -fsyntax-only \ +// RUN: -fcuda-is-device -verify=dev %s +// RUN: %clang_cc1 -std=c++11 -triple x86_64-unknown-linux-gnu -fsyntax-only \ +// RUN: -verify=host %s + +// host-no-diagnostics + +#include "Inputs/cuda.h" + +struct A { + A(); // dev-note 2{{'A' declared here}} +}; + +template struct B { + T a; + constexpr B() = default; // dev-error 2{{reference to __host__ function 'A' in __host__ __device__ function}} +}; + +__host__ void f() { B x; } +__device__ void f() { B x; } // dev-note{{called by 'f'}} + +struct foo { + __host__ foo() { B x; } + __device__ foo() { B x; } // dev-note{{called by 'foo'}} +}; + +__host__ void g() { foo x; } +__device__ void g() { foo x; } // dev-note{{called by 'g'}} + +struct bar { + __host__ bar() { B x; } + __device__ bar() { B x; } // no error since no instantiation of bar +}; diff --git a/clang/test/SemaCUDA/host-device-constexpr.cu b/clang/test/SemaCUDA/host-device-constexpr.cu index 6625d722c194a..6d81034fd3403 100644 --- a/clang/test/SemaCUDA/host-device-constexpr.cu +++ b/clang/test/SemaCUDA/host-device-constexpr.cu @@ -1,5 +1,10 @@ // RUN: %clang_cc1 -std=c++11 -fsyntax-only -verify -isystem %S/Inputs %s -// RUN: %clang_cc1 -std=c++11 -fsyntax-only -verify -isystem %S/Inputs %s -fcuda-is-device +// RUN: %clang_cc1 -std=c++11 -fsyntax-only -verify -isystem %S/Inputs %s \ +// RUN: -fcuda-is-device +// RUN: %clang_cc1 -std=c++11 -fsyntax-only -verify -isystem %S/Inputs \ +// RUN: -fopenmp %s +// RUN: %clang_cc1 -std=c++11 -fsyntax-only -verify -isystem %S/Inputs \ +// RUN: -fopenmp %s -fcuda-is-device #include "Inputs/cuda.h" diff --git a/clang/test/SemaCUDA/openmp-static-func.cu b/clang/test/SemaCUDA/openmp-static-func.cu new file mode 100644 index 0000000000000..f1f0c488749e1 --- /dev/null +++ b/clang/test/SemaCUDA/openmp-static-func.cu @@ -0,0 +1,14 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fsyntax-only \ +// RUN: -verify -fopenmp %s +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fsyntax-only \ +// RUN: -verify -fopenmp -x hip %s +// expected-no-diagnostics + +// Tests there is no assertion in Sema::markKnownEmitted when fopenmp is used +// with CUDA/HIP host compilation. + +static void f() {} + +static void g() { f(); } + +static void h() { g(); } diff --git a/clang/test/SemaCUDA/openmp-target.cu b/clang/test/SemaCUDA/openmp-target.cu new file mode 100644 index 0000000000000..2775dc1e2c5b8 --- /dev/null +++ b/clang/test/SemaCUDA/openmp-target.cu @@ -0,0 +1,43 @@ +// RUN: %clang_cc1 -triple x86_64 -verify=expected,dev \ +// RUN: -verify-ignore-unexpected=note \ +// RUN: -fopenmp -fopenmp-version=50 -o - %s +// RUN: %clang_cc1 -triple x86_64 -verify -verify-ignore-unexpected=note\ +// RUN: -fopenmp -fopenmp-version=50 -o - -x c++ %s +// RUN: %clang_cc1 -triple x86_64 -verify=dev -verify-ignore-unexpected=note\ +// RUN: -fcuda-is-device -o - %s + +#if __CUDA__ +#include "Inputs/cuda.h" +__device__ void cu_devf(); +#endif + +void bazz() {} +#pragma omp declare target to(bazz) device_type(nohost) +void bazzz() {bazz();} +#pragma omp declare target to(bazzz) device_type(nohost) +void any() {bazz();} // expected-error {{function with 'device_type(nohost)' is not available on host}} +void host1() {bazz();} +#pragma omp declare target to(host1) device_type(host) +void host2() {bazz();} +#pragma omp declare target to(host2) +void device() {host1();} +#pragma omp declare target to(device) device_type(nohost) +void host3() {host1();} +#pragma omp declare target to(host3) + +#pragma omp declare target +void any1() {any();} +void any2() {host1();} +void any3() {device();} // expected-error {{function with 'device_type(nohost)' is not available on host}} +void any4() {any2();} +#pragma omp end declare target + +void any5() {any();} +void any6() {host1();} +void any7() {device();} // expected-error {{function with 'device_type(nohost)' is not available on host}} +void any8() {any2();} + +#if __CUDA__ +void cu_hostf() { cu_devf(); } // dev-error {{no matching function for call to 'cu_devf'}} +__device__ void cu_devf2() { cu_hostf(); } // dev-error{{no matching function for call to 'cu_hostf'}} +#endif diff --git a/clang/test/SemaCXX/attr-nonnull.cpp b/clang/test/SemaCXX/attr-nonnull.cpp index 764e8d84081b3..21eedcf376d5b 100644 --- a/clang/test/SemaCXX/attr-nonnull.cpp +++ b/clang/test/SemaCXX/attr-nonnull.cpp @@ -77,10 +77,11 @@ constexpr int i3 = f3(&c, 0); //expected-error {{constant expression}} expected- constexpr int i32 = f3(0, &c); __attribute__((nonnull(4))) __attribute__((nonnull)) //expected-error {{out of bounds}} -constexpr int f4(const int*, const int*) { +constexpr int f4(const int*, const int*, int) { return 0; } -constexpr int i4 = f4(&c, 0); //expected-error {{constant expression}} expected-note {{null passed}} -constexpr int i42 = f4(0, &c); //expected-error {{constant expression}} expected-note {{null passed}} +constexpr int i4 = f4(&c, 0, 0); //expected-error {{constant expression}} expected-note {{null passed}} +constexpr int i42 = f4(0, &c, 1); //expected-error {{constant expression}} expected-note {{null passed}} +constexpr int i43 = f4(&c, &c, 0); } \ No newline at end of file diff --git a/clang/test/SemaCXX/cxx2a-constexpr-dynalloc.cpp b/clang/test/SemaCXX/cxx2a-constexpr-dynalloc.cpp index 23582f2e3026e..3647526ff0af7 100644 --- a/clang/test/SemaCXX/cxx2a-constexpr-dynalloc.cpp +++ b/clang/test/SemaCXX/cxx2a-constexpr-dynalloc.cpp @@ -166,3 +166,13 @@ constexpr bool construct_after_lifetime() { return true; } static_assert(construct_after_lifetime()); // expected-error {{}} expected-note {{in call}} + +constexpr bool construct_after_lifetime_2() { + struct A { struct B {} b; }; + A a; + a.~A(); + std::construct_at(&a.b); // expected-note {{in call}} + // expected-note@#new {{construction of subobject of object outside its lifetime is not allowed in a constant expression}} + return true; +} +static_assert(construct_after_lifetime_2()); // expected-error {{}} expected-note {{in call}} diff --git a/clang/test/SemaTemplate/temp_arg_nontype.cpp b/clang/test/SemaTemplate/temp_arg_nontype.cpp index 06eb5e0d78495..330a954e0ddd9 100644 --- a/clang/test/SemaTemplate/temp_arg_nontype.cpp +++ b/clang/test/SemaTemplate/temp_arg_nontype.cpp @@ -493,3 +493,16 @@ namespace instantiation_dependent { template int &g(...); int &rg = g(0); } + +namespace complete_array_from_incomplete { + template (T::kNum)]> + class Base {}; + template + class Derived : public Base {}; + + struct T { + static const int kNum = 3; + }; + extern const char *const kStrs[3] = {}; + Derived d; +} diff --git a/clang/test/Tooling/clang-check-offload.cpp b/clang/test/Tooling/clang-check-offload.cpp new file mode 100644 index 0000000000000..154bc043113e4 --- /dev/null +++ b/clang/test/Tooling/clang-check-offload.cpp @@ -0,0 +1,4 @@ +// RUN: not clang-check "%s" -- -c -x hip -nogpulib 2>&1 | FileCheck %s + +// CHECK: C++ requires +invalid; diff --git a/clang/tools/driver/cc1_main.cpp b/clang/tools/driver/cc1_main.cpp index acf6cd1e537b4..9e4f32da884fe 100644 --- a/clang/tools/driver/cc1_main.cpp +++ b/clang/tools/driver/cc1_main.cpp @@ -269,8 +269,6 @@ int cc1_main(ArrayRef Argv, const char *Argv0, void *MainAddr) { // FIXME(ibiryukov): make profilerOutput flush in destructor instead. profilerOutput->flush(); llvm::timeTraceProfilerCleanup(); - - llvm::errs() << "Time trace json-file dumped to " << Path.str() << "\n"; } } diff --git a/clang/tools/libclang/CIndex.cpp b/clang/tools/libclang/CIndex.cpp index 5e77808a74948..1aa16c28d7877 100644 --- a/clang/tools/libclang/CIndex.cpp +++ b/clang/tools/libclang/CIndex.cpp @@ -2047,6 +2047,7 @@ class EnqueueVisitor : public ConstStmtVisitor { void VisitOMPTeamsDirective(const OMPTeamsDirective *D); void VisitOMPTaskLoopDirective(const OMPTaskLoopDirective *D); void VisitOMPTaskLoopSimdDirective(const OMPTaskLoopSimdDirective *D); + void VisitOMPMasterTaskLoopDirective(const OMPMasterTaskLoopDirective *D); void VisitOMPDistributeDirective(const OMPDistributeDirective *D); void VisitOMPDistributeParallelForDirective( const OMPDistributeParallelForDirective *D); @@ -2891,6 +2892,11 @@ void EnqueueVisitor::VisitOMPTaskLoopSimdDirective( VisitOMPLoopDirective(D); } +void EnqueueVisitor::VisitOMPMasterTaskLoopDirective( + const OMPMasterTaskLoopDirective *D) { + VisitOMPLoopDirective(D); +} + void EnqueueVisitor::VisitOMPDistributeDirective( const OMPDistributeDirective *D) { VisitOMPLoopDirective(D); @@ -5463,6 +5469,8 @@ CXString clang_getCursorKindSpelling(enum CXCursorKind Kind) { return cxstring::createRef("OMPTaskLoopDirective"); case CXCursor_OMPTaskLoopSimdDirective: return cxstring::createRef("OMPTaskLoopSimdDirective"); + case CXCursor_OMPMasterTaskLoopDirective: + return cxstring::createRef("OMPMasterTaskLoopDirective"); case CXCursor_OMPDistributeDirective: return cxstring::createRef("OMPDistributeDirective"); case CXCursor_OMPDistributeParallelForDirective: diff --git a/clang/tools/libclang/CXCursor.cpp b/clang/tools/libclang/CXCursor.cpp index 422c2dacf755d..ae55041b109ea 100644 --- a/clang/tools/libclang/CXCursor.cpp +++ b/clang/tools/libclang/CXCursor.cpp @@ -671,6 +671,9 @@ CXCursor cxcursor::MakeCXCursor(const Stmt *S, const Decl *Parent, case Stmt::OMPTaskLoopSimdDirectiveClass: K = CXCursor_OMPTaskLoopSimdDirective; break; + case Stmt::OMPMasterTaskLoopDirectiveClass: + K = CXCursor_OMPMasterTaskLoopDirective; + break; case Stmt::OMPDistributeDirectiveClass: K = CXCursor_OMPDistributeDirective; break; diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp index dde065ad818c7..b0d7f08892e43 100644 --- a/clang/unittests/Format/FormatTest.cpp +++ b/clang/unittests/Format/FormatTest.cpp @@ -14723,6 +14723,12 @@ TEST_F(FormatTest, NotCastRPaen) { verifyFormat("auto operator delete(int &) final"); } -} // end namespace -} // end namespace format -} // end namespace clang +TEST_F(FormatTest, STLWhileNotDefineChed) { + verifyFormat("#if defined(while)\n" + "#define while EMIT WARNING C4005\n" + "#endif // while"); +} + +} // namespace +} // namespace format +} // namespace clang diff --git a/clang/unittests/Tooling/CMakeLists.txt b/clang/unittests/Tooling/CMakeLists.txt index 2b35302c7b1fa..5cef154926aeb 100644 --- a/clang/unittests/Tooling/CMakeLists.txt +++ b/clang/unittests/Tooling/CMakeLists.txt @@ -75,6 +75,7 @@ clang_target_link_libraries(ToolingTests clangToolingCore clangToolingInclusions clangToolingRefactoring + clangTransformer ) target_link_libraries(ToolingTests diff --git a/clang/unittests/Tooling/RangeSelectorTest.cpp b/clang/unittests/Tooling/RangeSelectorTest.cpp index 58ce63cbd750f..29a20a9f18d79 100644 --- a/clang/unittests/Tooling/RangeSelectorTest.cpp +++ b/clang/unittests/Tooling/RangeSelectorTest.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "clang/Tooling/Refactoring/RangeSelector.h" +#include "clang/Tooling/Transformer/RangeSelector.h" #include "clang/ASTMatchers/ASTMatchers.h" #include "clang/Frontend/ASTUnit.h" #include "clang/Tooling/FixIt.h" diff --git a/clang/unittests/Tooling/SourceCodeBuildersTest.cpp b/clang/unittests/Tooling/SourceCodeBuildersTest.cpp index 2bf50ffad5850..9b5e7bf3ba8da 100644 --- a/clang/unittests/Tooling/SourceCodeBuildersTest.cpp +++ b/clang/unittests/Tooling/SourceCodeBuildersTest.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "clang/Tooling/Refactoring/SourceCodeBuilders.h" +#include "clang/Tooling/Transformer/SourceCodeBuilders.h" #include "clang/ASTMatchers/ASTMatchFinder.h" #include "clang/ASTMatchers/ASTMatchers.h" #include "clang/Tooling/Tooling.h" diff --git a/clang/unittests/Tooling/SourceCodeTest.cpp b/clang/unittests/Tooling/SourceCodeTest.cpp index e3da9bf14b62e..eb28d7cf27d60 100644 --- a/clang/unittests/Tooling/SourceCodeTest.cpp +++ b/clang/unittests/Tooling/SourceCodeTest.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "clang/Tooling/Refactoring/SourceCode.h" +#include "clang/Tooling/Transformer/SourceCode.h" #include "TestVisitor.h" #include "clang/Basic/Diagnostic.h" #include "llvm/Testing/Support/Annotations.h" diff --git a/clang/unittests/Tooling/StencilTest.cpp b/clang/unittests/Tooling/StencilTest.cpp index 0bd3d90f83fe6..2b9aa68e34c97 100644 --- a/clang/unittests/Tooling/StencilTest.cpp +++ b/clang/unittests/Tooling/StencilTest.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "clang/Tooling/Refactoring/Stencil.h" +#include "clang/Tooling/Transformer/Stencil.h" #include "clang/ASTMatchers/ASTMatchers.h" #include "clang/Tooling/FixIt.h" #include "clang/Tooling/Tooling.h" diff --git a/clang/unittests/Tooling/TransformerTest.cpp b/clang/unittests/Tooling/TransformerTest.cpp index 5d55182f8273b..feae0c649aae5 100644 --- a/clang/unittests/Tooling/TransformerTest.cpp +++ b/clang/unittests/Tooling/TransformerTest.cpp @@ -6,9 +6,9 @@ // //===----------------------------------------------------------------------===// -#include "clang/Tooling/Refactoring/Transformer.h" +#include "clang/Tooling/Transformer/Transformer.h" #include "clang/ASTMatchers/ASTMatchers.h" -#include "clang/Tooling/Refactoring/RangeSelector.h" +#include "clang/Tooling/Transformer/RangeSelector.h" #include "clang/Tooling/Tooling.h" #include "llvm/Support/Errc.h" #include "llvm/Support/Error.h" diff --git a/clang/utils/TableGen/NeonEmitter.cpp b/clang/utils/TableGen/NeonEmitter.cpp index 283747dd3b68c..2cf7d90291100 100644 --- a/clang/utils/TableGen/NeonEmitter.cpp +++ b/clang/utils/TableGen/NeonEmitter.cpp @@ -1413,7 +1413,7 @@ void Intrinsic::emitBodyAsBuiltinCall() { if (T.getNumVectors() > 1) { // Check if an explicit cast is needed. std::string Cast; - if (T.isChar() || T.isPoly() || !T.isSigned()) { + if (LocalCK == ClassB) { Type T2 = T; T2.makeOneVector(); T2.makeInteger(8, /*Signed=*/true); @@ -1445,6 +1445,9 @@ void Intrinsic::emitBodyAsBuiltinCall() { if (CastToType.isVector() && LocalCK == ClassB) { CastToType.makeInteger(8, true); Arg = "(" + CastToType.str() + ")" + Arg; + } else if (CastToType.isVector() && LocalCK == ClassI) { + CastToType.makeSigned(); + Arg = "(" + CastToType.str() + ")" + Arg; } S += Arg + ", "; diff --git a/compiler-rt/lib/asan/asan_errors.h b/compiler-rt/lib/asan/asan_errors.h index b84f56c18535d..a7fda2fd9f5d6 100644 --- a/compiler-rt/lib/asan/asan_errors.h +++ b/compiler-rt/lib/asan/asan_errors.h @@ -48,7 +48,8 @@ struct ErrorDeadlySignal : ErrorBase { scariness.Scare(10, "stack-overflow"); } else if (!signal.is_memory_access) { scariness.Scare(10, "signal"); - } else if (signal.addr < GetPageSizeCached()) { + } else if (signal.is_true_faulting_addr && + signal.addr < GetPageSizeCached()) { scariness.Scare(10, "null-deref"); } else if (signal.addr == signal.pc) { scariness.Scare(60, "wild-jump"); diff --git a/compiler-rt/lib/fuzzer/FuzzerLoop.cpp b/compiler-rt/lib/fuzzer/FuzzerLoop.cpp index df533e877d37a..451a4c1731674 100644 --- a/compiler-rt/lib/fuzzer/FuzzerLoop.cpp +++ b/compiler-rt/lib/fuzzer/FuzzerLoop.cpp @@ -273,9 +273,9 @@ void Fuzzer::InterruptCallback() { NO_SANITIZE_MEMORY void Fuzzer::AlarmCallback() { assert(Options.UnitTimeoutSec > 0); - // In Windows Alarm callback is executed by a different thread. + // In Windows and Fuchsia, Alarm callback is executed by a different thread. // NetBSD's current behavior needs this change too. -#if !LIBFUZZER_WINDOWS && !LIBFUZZER_NETBSD +#if !LIBFUZZER_WINDOWS && !LIBFUZZER_NETBSD && !LIBFUZZER_FUCHSIA if (!InFuzzingThread()) return; #endif diff --git a/compiler-rt/lib/interception/tests/CMakeLists.txt b/compiler-rt/lib/interception/tests/CMakeLists.txt index b2418110fba77..bad67325c5b10 100644 --- a/compiler-rt/lib/interception/tests/CMakeLists.txt +++ b/compiler-rt/lib/interception/tests/CMakeLists.txt @@ -32,7 +32,10 @@ else() endif() if(MSVC) list(APPEND INTERCEPTION_TEST_CFLAGS_COMMON -gcodeview) - list(APPEND INTERCEPTION_TEST_LINK_FLAGS_COMMON -Wl,-largeaddressaware) + list(APPEND INTERCEPTION_TEST_LINK_FLAGS_COMMON + -Wl,-largeaddressaware + -Wl,-nodefaultlib:libcmt,-defaultlib:msvcrt,-defaultlib:oldnames + ) endif() list(APPEND INTERCEPTION_TEST_LINK_FLAGS_COMMON -g) diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_common.h b/compiler-rt/lib/sanitizer_common/sanitizer_common.h index ad056df387d5a..87b8f02b5b730 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_common.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_common.h @@ -881,6 +881,11 @@ struct SignalContext { bool is_memory_access; enum WriteFlag { UNKNOWN, READ, WRITE } write_flag; + // In some cases the kernel cannot provide the true faulting address; `addr` + // will be zero then. This field allows to distinguish between these cases + // and dereferences of null. + bool is_true_faulting_addr; + // VS2013 doesn't implement unrestricted unions, so we need a trivial default // constructor SignalContext() = default; @@ -893,7 +898,8 @@ struct SignalContext { context(context), addr(GetAddress()), is_memory_access(IsMemoryAccess()), - write_flag(GetWriteFlag()) { + write_flag(GetWriteFlag()), + is_true_faulting_addr(IsTrueFaultingAddress()) { InitPcSpBp(); } @@ -914,6 +920,7 @@ struct SignalContext { uptr GetAddress() const; WriteFlag GetWriteFlag() const; bool IsMemoryAccess() const; + bool IsTrueFaultingAddress() const; }; void InitializePlatformEarly(); diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc index 7cae94559075e..50e3558b52e87 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc +++ b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc @@ -9608,6 +9608,21 @@ INTERCEPTOR(char *, crypt_r, char *key, char *salt, void *data) { #define INIT_CRYPT_R #endif +#if SANITIZER_INTERCEPT_GETENTROPY +INTERCEPTOR(int, getentropy, void *buf, SIZE_T buflen) { + void *ctx; + COMMON_INTERCEPTOR_ENTER(ctx, getentropy, buf, buflen); + int r = REAL(getentropy)(buf, buflen); + if (r == 0) { + COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, buflen); + } + return r; +} +#define INIT_GETENTROPY COMMON_INTERCEPT_FUNCTION(getentropy) +#else +#define INIT_GETENTROPY +#endif + static void InitializeCommonInterceptors() { #if SI_POSIX static u64 metadata_mem[sizeof(MetadataHashMap) / sizeof(u64) + 1]; @@ -9908,6 +9923,7 @@ static void InitializeCommonInterceptors() { INIT_GETRANDOM; INIT_CRYPT; INIT_CRYPT_R; + INIT_GETENTROPY; INIT___PRINTF_CHK; } diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp index d23009075c6fc..0b53da6c349f2 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp @@ -1849,6 +1849,12 @@ SignalContext::WriteFlag SignalContext::GetWriteFlag() const { #endif } +bool SignalContext::IsTrueFaultingAddress() const { + auto si = static_cast(siginfo); + // SIGSEGV signals without a true fault address have si_code set to 128. + return si->si_signo == SIGSEGV && si->si_code != 128; +} + void SignalContext::DumpAllRegisters(void *context) { // FIXME: Implement this. } diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_mac.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_mac.cpp index 8eb1dfbdea6f6..ea4bd02aa92e4 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_mac.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_mac.cpp @@ -754,6 +754,12 @@ SignalContext::WriteFlag SignalContext::GetWriteFlag() const { #endif } +bool SignalContext::IsTrueFaultingAddress() const { + auto si = static_cast(siginfo); + // "Real" SIGSEGV codes (e.g., SEGV_MAPERR, SEGV_MAPERR) are non-zero. + return si->si_signo == SIGSEGV && si->si_code != 0; +} + static void GetPcSpBp(void *context, uptr *pc, uptr *sp, uptr *bp) { ucontext_t *ucontext = (ucontext_t*)context; # if defined(__aarch64__) diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h b/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h index 54a1699f5c44e..61a6b82ef8184 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h @@ -569,9 +569,11 @@ #define SANITIZER_INTERCEPT_CRYPT (SI_POSIX && !SI_ANDROID) #define SANITIZER_INTERCEPT_CRYPT_R (SI_LINUX && !SI_ANDROID) -#define SANITIZER_INTERCEPT_GETRANDOM (SI_LINUX && __GLIBC_PREREQ(2, 25)) +#define SANITIZER_INTERCEPT_GETRANDOM \ + ((SI_LINUX && __GLIBC_PREREQ(2, 25)) || SI_FREEBSD) #define SANITIZER_INTERCEPT___CXA_ATEXIT SI_NETBSD #define SANITIZER_INTERCEPT_ATEXIT SI_NETBSD #define SANITIZER_INTERCEPT_PTHREAD_ATFORK SI_NETBSD +#define SANITIZER_INTERCEPT_GETENTROPY SI_FREEBSD #endif // #ifndef SANITIZER_PLATFORM_INTERCEPTORS_H diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_printf.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_printf.cpp index 7063de257a9f0..a032787114bb9 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_printf.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_printf.cpp @@ -229,8 +229,6 @@ void SetPrintfAndReportCallback(void (*callback)(const char *)) { // Can be overriden in frontend. #if SANITIZER_GO && defined(TSAN_EXTERNAL_HOOKS) // Implementation must be defined in frontend. -// TODO(morehouse): Remove OnPrint after migrating Go to __sanitizer_on_print. -extern "C" void OnPrint(const char *str); extern "C" void __sanitizer_on_print(const char *str); #else SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_on_print, const char *str) { @@ -239,10 +237,6 @@ SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_on_print, const char *str) { #endif static void CallPrintfAndReportCallback(const char *str) { -#if SANITIZER_GO && defined(TSAN_EXTERNAL_HOOKS) - // TODO(morehouse): Remove OnPrint after migrating Go to __sanitizer_on_print. - OnPrint(str); -#endif __sanitizer_on_print(str); if (PrintfAndReportCallback) PrintfAndReportCallback(str); diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_suppressions.h b/compiler-rt/lib/sanitizer_common/sanitizer_suppressions.h index f9da7af7e6ab3..2d88b1f72fa6d 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_suppressions.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_suppressions.h @@ -42,7 +42,7 @@ class SuppressionContext { void GetMatched(InternalMmapVector *matched); private: - static const int kMaxSuppressionTypes = 32; + static const int kMaxSuppressionTypes = 64; const char **const suppression_types_; const int suppression_types_num_; diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_report.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_report.cpp index d6699f3ed6f68..fe9ea1a820057 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_report.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_report.cpp @@ -191,9 +191,14 @@ static void ReportDeadlySignalImpl(const SignalContext &sig, u32 tid, SanitizerCommonDecorator d; Printf("%s", d.Warning()); const char *description = sig.Describe(); - Report("ERROR: %s: %s on unknown address %p (pc %p bp %p sp %p T%d)\n", - SanitizerToolName, description, (void *)sig.addr, (void *)sig.pc, - (void *)sig.bp, (void *)sig.sp, tid); + if (sig.is_memory_access && !sig.is_true_faulting_addr) + Report("ERROR: %s: %s on unknown address (pc %p bp %p sp %p T%d)\n", + SanitizerToolName, description, (void *)sig.pc, (void *)sig.bp, + (void *)sig.sp, tid); + else + Report("ERROR: %s: %s on unknown address %p (pc %p bp %p sp %p T%d)\n", + SanitizerToolName, description, (void *)sig.addr, (void *)sig.pc, + (void *)sig.bp, (void *)sig.sp, tid); Printf("%s", d.Default()); if (sig.pc < GetPageSizeCached()) Report("Hint: pc points to the zero page.\n"); @@ -203,7 +208,11 @@ static void ReportDeadlySignalImpl(const SignalContext &sig, u32 tid, ? "WRITE" : (sig.write_flag == SignalContext::READ ? "READ" : "UNKNOWN"); Report("The signal is caused by a %s memory access.\n", access_type); - if (sig.addr < GetPageSizeCached()) + if (!sig.is_true_faulting_addr) + Report("Hint: this fault was caused by a dereference of a high value " + "address (see registers below). Dissassemble the provided pc " + "to learn which register value was used.\n"); + else if (sig.addr < GetPageSizeCached()) Report("Hint: address points to the zero page.\n"); } MaybeReportNonExecRegion(sig.pc); diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_win.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_win.cpp index ce2a4314ab9ed..36dde49d87083 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_win.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_win.cpp @@ -945,6 +945,11 @@ bool SignalContext::IsMemoryAccess() const { return GetWriteFlag() != SignalContext::UNKNOWN; } +bool SignalContext::IsTrueFaultingAddress() const { + // FIXME: Provide real implementation for this. See Linux and Mac variants. + return IsMemoryAccess(); +} + SignalContext::WriteFlag SignalContext::GetWriteFlag() const { EXCEPTION_RECORD *exception_record = (EXCEPTION_RECORD *)siginfo; // The contents of this array are documented at diff --git a/compiler-rt/lib/ubsan/ubsan_checks.inc b/compiler-rt/lib/ubsan/ubsan_checks.inc index 7e7216c5b4ab7..33a8dfcde0269 100644 --- a/compiler-rt/lib/ubsan/ubsan_checks.inc +++ b/compiler-rt/lib/ubsan/ubsan_checks.inc @@ -18,6 +18,11 @@ UBSAN_CHECK(GenericUB, "undefined-behavior", "undefined") UBSAN_CHECK(NullPointerUse, "null-pointer-use", "null") +UBSAN_CHECK(NullptrWithOffset, "nullptr-with-offset", "pointer-overflow") +UBSAN_CHECK(NullptrWithNonZeroOffset, "nullptr-with-nonzero-offset", + "pointer-overflow") +UBSAN_CHECK(NullptrAfterNonZeroOffset, "nullptr-after-nonzero-offset", + "pointer-overflow") UBSAN_CHECK(PointerOverflow, "pointer-overflow", "pointer-overflow") UBSAN_CHECK(MisalignedPointerUse, "misaligned-pointer-use", "alignment") UBSAN_CHECK(AlignmentAssumption, "alignment-assumption", "alignment") diff --git a/compiler-rt/lib/ubsan/ubsan_handlers.cpp b/compiler-rt/lib/ubsan/ubsan_handlers.cpp index e832581f9dcd5..3f9da75a12a8d 100644 --- a/compiler-rt/lib/ubsan/ubsan_handlers.cpp +++ b/compiler-rt/lib/ubsan/ubsan_handlers.cpp @@ -691,14 +691,33 @@ static void handlePointerOverflowImpl(PointerOverflowData *Data, ValueHandle Result, ReportOptions Opts) { SourceLocation Loc = Data->Loc.acquire(); - ErrorType ET = ErrorType::PointerOverflow; + ErrorType ET; + + if (Base == 0 && Result == 0) + ET = ErrorType::NullptrWithOffset; + else if (Base == 0 && Result != 0) + ET = ErrorType::NullptrWithNonZeroOffset; + else if (Base != 0 && Result == 0) + ET = ErrorType::NullptrAfterNonZeroOffset; + else + ET = ErrorType::PointerOverflow; if (ignoreReport(Loc, Opts, ET)) return; ScopedReport R(Opts, Loc, ET); - if ((sptr(Base) >= 0) == (sptr(Result) >= 0)) { + if (ET == ErrorType::NullptrWithOffset) { + Diag(Loc, DL_Error, ET, "applying zero offset to null pointer"); + } else if (ET == ErrorType::NullptrWithNonZeroOffset) { + Diag(Loc, DL_Error, ET, "applying non-zero offset %0 to null pointer") + << Result; + } else if (ET == ErrorType::NullptrAfterNonZeroOffset) { + Diag( + Loc, DL_Error, ET, + "applying non-zero offset to non-null pointer %0 produced null pointer") + << (void *)Base; + } else if ((sptr(Base) >= 0) == (sptr(Result) >= 0)) { if (Base > Result) Diag(Loc, DL_Error, ET, "addition of unsigned offset to %0 overflowed to %1") diff --git a/compiler-rt/test/asan/TestCases/Posix/high-address-dereference.c b/compiler-rt/test/asan/TestCases/Posix/high-address-dereference.c new file mode 100644 index 0000000000000..78503302891b5 --- /dev/null +++ b/compiler-rt/test/asan/TestCases/Posix/high-address-dereference.c @@ -0,0 +1,50 @@ +// On x86_64, the kernel does not provide the faulting address for dereferences +// of addresses greater than the 48-bit hardware addressable range, i.e., +// `siginfo.si_addr` is zero in ASan's SEGV signal handler. This test checks +// that ASan does not misrepresent such cases as "NULL dereferences". + +// REQUIRES: x86_64-target-arch +// RUN: %clang_asan %s -o %t +// RUN: export %env_asan_opts=print_scariness=1 +// RUN: not %run %t 0x0000000000000000 2>&1 | FileCheck %s --check-prefixes=ZERO,HINT-PAGE0 +// RUN: not %run %t 0x0000000000000FFF 2>&1 | FileCheck %s --check-prefixes=LOW1,HINT-PAGE0 +// RUN: not %run %t 0x0000000000001000 2>&1 | FileCheck %s --check-prefixes=LOW2,HINT-NONE +// RUN: not %run %t 0x4141414141414141 2>&1 | FileCheck %s --check-prefixes=HIGH,HINT-HIGHADDR +// RUN: not %run %t 0xFFFFFFFFFFFFFFFF 2>&1 | FileCheck %s --check-prefixes=MAX,HINT-HIGHADDR + +#include +#include + +int main(int argc, const char *argv[]) { + const char *hex = argv[1]; + uint64_t *addr = (uint64_t *)strtoull(hex, NULL, 16); + uint64_t x = *addr; // segmentation fault + return x; +} + +// ZERO: SEGV on unknown address 0x000000000000 (pc +// LOW1: SEGV on unknown address 0x000000000fff (pc +// LOW2: SEGV on unknown address 0x000000001000 (pc +// HIGH: SEGV on unknown address (pc +// MAX: SEGV on unknown address (pc + +// HINT-PAGE0-NOT: Hint: this fault was caused by a dereference of a high value address +// HINT-PAGE0: Hint: address points to the zero page. + +// HINT-NONE-NOT: Hint: this fault was caused by a dereference of a high value address +// HINT-NONE-NOT: Hint: address points to the zero page. + +// HINT-HIGHADDR: Hint: this fault was caused by a dereference of a high value address +// HINT-HIGHADDR-NOT: Hint: address points to the zero page. + +// ZERO: SCARINESS: 10 (null-deref) +// LOW1: SCARINESS: 10 (null-deref) +// LOW2: SCARINESS: 20 (wild-addr-read) +// HIGH: SCARINESS: 20 (wild-addr-read) +// MAX: SCARINESS: 20 (wild-addr-read) + +// TODO: Currently, register values are only printed on Mac. Once this changes, +// remove the 'TODO_' prefix in the following lines. +// TODO_HIGH,TODO_MAX: Register values: +// TODO_HIGH: = 0x4141414141414141 +// TODO_MAX: = 0xffffffffffffffff diff --git a/compiler-rt/test/sanitizer_common/TestCases/Linux/getrandom.cpp b/compiler-rt/test/sanitizer_common/TestCases/Linux/getrandom.cpp deleted file mode 100644 index 08337f537d10f..0000000000000 --- a/compiler-rt/test/sanitizer_common/TestCases/Linux/getrandom.cpp +++ /dev/null @@ -1,22 +0,0 @@ -// RUN: %clangxx -O2 %s -o %t && %run %t -// UNSUPPORTED: android -// - -#include - -#if !defined(__GLIBC_PREREQ) -#define __GLIBC_PREREQ(a, b) 0 -#endif - -#if __GLIBC_PREREQ(2, 25) -#include -#endif - -int main() { - char buf[16]; - ssize_t n = 1; -#if __GLIBC_PREREQ(2, 25) - n = getrandom(buf, sizeof(buf), 0); -#endif - return (int)(n <= 0); -} diff --git a/compiler-rt/test/sanitizer_common/TestCases/Linux/signal_trap_handler.cpp b/compiler-rt/test/sanitizer_common/TestCases/Linux/signal_trap_handler.cpp index f7af8aa850f0b..efc43c6ee31e6 100644 --- a/compiler-rt/test/sanitizer_common/TestCases/Linux/signal_trap_handler.cpp +++ b/compiler-rt/test/sanitizer_common/TestCases/Linux/signal_trap_handler.cpp @@ -1,5 +1,8 @@ // RUN: %clangxx -O1 %s -o %t && %env_tool_opts=handle_sigtrap=1 %run %t 2>&1 | FileCheck %s +// __builtin_debugtrap() does not raise SIGTRAP on these platforms. +// UNSUPPORTED: s390 + #include #include #include @@ -26,6 +29,8 @@ int main() { assert(a.sa_flags & SA_SIGINFO); in_handler = 1; + // Check that signal handler is not postponed by sanitizer. + // Don't use raise here as it calls any signal handler immediately. __builtin_debugtrap(); in_handler = 0; diff --git a/compiler-rt/test/sanitizer_common/TestCases/Posix/crypt.cpp b/compiler-rt/test/sanitizer_common/TestCases/Posix/crypt.cpp index 7927c6b6ab45f..17ab6965b20b8 100644 --- a/compiler-rt/test/sanitizer_common/TestCases/Posix/crypt.cpp +++ b/compiler-rt/test/sanitizer_common/TestCases/Posix/crypt.cpp @@ -6,6 +6,7 @@ #include #include #include +#include int main (int argc, char** argv) diff --git a/compiler-rt/test/sanitizer_common/TestCases/Posix/getrandom.c b/compiler-rt/test/sanitizer_common/TestCases/Posix/getrandom.c new file mode 100644 index 0000000000000..ba615a720527b --- /dev/null +++ b/compiler-rt/test/sanitizer_common/TestCases/Posix/getrandom.c @@ -0,0 +1,26 @@ +// RUN: %clang -O2 %s -o %t && %run %t +// UNSUPPORTED: android, netbsd, darwin, solaris +// + +#include + +#if !defined(__GLIBC_PREREQ) +#define __GLIBC_PREREQ(a, b) 0 +#endif + +#if (defined(__linux__) && __GLIBC_PREREQ(2, 25)) || defined(__FreeBSD__) +#define HAS_GETRANDOM +#endif + +#if defined(HAS_GETRANDOM) +#include +#endif + +int main() { + char buf[16]; + ssize_t n = 1; +#if defined(HAS_GETRANDOM) + n = getrandom(buf, sizeof(buf), 0); +#endif + return (int)(n <= 0); +} diff --git a/compiler-rt/test/ubsan/TestCases/Pointer/index-overflow.cpp b/compiler-rt/test/ubsan/TestCases/Pointer/index-overflow.cpp index f9b1fea08c940..0c082ddd4b9ef 100644 --- a/compiler-rt/test/ubsan/TestCases/Pointer/index-overflow.cpp +++ b/compiler-rt/test/ubsan/TestCases/Pointer/index-overflow.cpp @@ -1,7 +1,9 @@ // RUN: %clangxx -fsanitize=pointer-overflow %s -o %t -// RUN: %run %t 1 2>&1 | FileCheck %s --check-prefix=ERR -// RUN: %run %t 0 2>&1 | FileCheck %s --check-prefix=SAFE -// RUN: %run %t -1 2>&1 | FileCheck %s --check-prefix=SAFE +// RUN: %run %t 2 2>&1 | FileCheck %s --implicit-check-not="error:" --check-prefix=ERR2 +// RUN: %run %t 1 2>&1 | FileCheck %s --implicit-check-not="error:" --check-prefix=ERR1 +// RUN: %run %t 0 2>&1 | FileCheck %s --implicit-check-not="error:" --check-prefix=SAFE +// RUN: %run %t -1 2>&1 | FileCheck %s --implicit-check-not="error:" --check-prefix=SAFE +// RUN: %run %t -2 2>&1 | FileCheck %s --implicit-check-not="error:" --check-prefix=SAFE #include #include @@ -9,11 +11,18 @@ int main(int argc, char *argv[]) { // SAFE-NOT: runtime error - // ERR: runtime error: pointer index expression with base {{.*}} overflowed to + // ERR2: runtime error: pointer index expression with base {{.*}} overflowed to + // ERR2: runtime error: pointer index expression with base {{.*}} overflowed to + // ERR1: runtime error: applying non-zero offset to non-null pointer 0x{{.*}} produced null pointer + // ERR1: runtime error: applying non-zero offset to non-null pointer 0x{{.*}} produced null pointer char *p = (char *)(UINTPTR_MAX); printf("%p\n", p + atoi(argv[1])); + char *q = (char *)(UINTPTR_MAX); + + printf("%p\n", p - (-atoi(argv[1]))); + return 0; } diff --git a/compiler-rt/test/ubsan/TestCases/Pointer/nullptr-and-nonzero-offset-constants.cpp b/compiler-rt/test/ubsan/TestCases/Pointer/nullptr-and-nonzero-offset-constants.cpp new file mode 100644 index 0000000000000..2c3c156592be7 --- /dev/null +++ b/compiler-rt/test/ubsan/TestCases/Pointer/nullptr-and-nonzero-offset-constants.cpp @@ -0,0 +1,29 @@ +// RUN: %clang -x c -fsanitize=pointer-overflow -O0 %s -o %t && %run %t 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-C --implicit-check-not="error:" +// RUN: %clang -x c -fsanitize=pointer-overflow -O1 %s -o %t && %run %t 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-C --implicit-check-not="error:" +// RUN: %clang -x c -fsanitize=pointer-overflow -O2 %s -o %t && %run %t 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-C --implicit-check-not="error:" +// RUN: %clang -x c -fsanitize=pointer-overflow -O3 %s -o %t && %run %t 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-C --implicit-check-not="error:" + +// RUN: %clangxx -fsanitize=pointer-overflow -O0 %s -o %t && %run %t 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-CPP --implicit-check-not="error:" +// RUN: %clangxx -fsanitize=pointer-overflow -O1 %s -o %t && %run %t 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-CPP --implicit-check-not="error:" +// RUN: %clangxx -fsanitize=pointer-overflow -O2 %s -o %t && %run %t 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-CPP --implicit-check-not="error:" +// RUN: %clangxx -fsanitize=pointer-overflow -O3 %s -o %t && %run %t 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-CPP --implicit-check-not="error:" + +#include + +int main(int argc, char *argv[]) { + char *base, *result; + + base = (char *)0; + result = base + 0; + // CHECK-C: {{.*}}.cpp:[[@LINE-1]]:17: runtime error: applying zero offset to null pointer + + base = (char *)0; + result = base + 1; + // CHECK: {{.*}}.cpp:[[@LINE-1]]:17: runtime error: applying non-zero offset 1 to null pointer + + base = (char *)1; + result = base - 1; + // CHECK: {{.*}}.cpp:[[@LINE-1]]:17: runtime error: applying non-zero offset to non-null pointer 0x{{.*}} produced null pointer + + return 0; +} diff --git a/compiler-rt/test/ubsan/TestCases/Pointer/nullptr-and-nonzero-offset-summary.cpp b/compiler-rt/test/ubsan/TestCases/Pointer/nullptr-and-nonzero-offset-summary.cpp new file mode 100644 index 0000000000000..1f819747202d2 --- /dev/null +++ b/compiler-rt/test/ubsan/TestCases/Pointer/nullptr-and-nonzero-offset-summary.cpp @@ -0,0 +1,32 @@ +// RUN: %clang -x c -fsanitize=pointer-overflow %s -o %t +// RUN: %run %t 2>&1 | FileCheck %s --check-prefixes=CHECK-NOTYPE,CHECK-NOTYPE-C +// RUN: %env_ubsan_opts=report_error_type=1 %run %t 2>&1 | FileCheck %s --check-prefixes=CHECK-TYPE,CHECK-TYPE-C + +// RUN: %clangxx -fsanitize=pointer-overflow %s -o %t +// RUN: %run %t 2>&1 | FileCheck %s --check-prefixes=CHECK-NOTYPE,CHECK-NOTYPE-CPP +// RUN: %env_ubsan_opts=report_error_type=1 %run %t 2>&1 | FileCheck %s --check-prefixes=CHECK-TYPE,CHECK-TYPE-CPP + +// REQUIRES: !ubsan-standalone && !ubsan-standalone-static + +#include + +int main(int argc, char *argv[]) { + char *base, *result; + + base = (char *)0; + result = base + 0; + // CHECK-NOTYPE-C: SUMMARY: UndefinedBehaviorSanitizer: undefined-behavior {{.*}}summary.cpp:[[@LINE-1]]:17 + // CHECK-TYPE-C: SUMMARY: UndefinedBehaviorSanitizer: nullptr-with-offset {{.*}}summary.cpp:[[@LINE-2]]:17 + + base = (char *)0; + result = base + 1; + // CHECK-NOTYPE: SUMMARY: UndefinedBehaviorSanitizer: undefined-behavior {{.*}}summary.cpp:[[@LINE-1]]:17 + // CHECK-TYPE: SUMMARY: UndefinedBehaviorSanitizer: nullptr-with-nonzero-offset {{.*}}summary.cpp:[[@LINE-2]]:17 + + base = (char *)1; + result = base - 1; + // CHECK-NOTYPE: SUMMARY: UndefinedBehaviorSanitizer: undefined-behavior {{.*}}summary.cpp:[[@LINE-1]]:17 + // CHECK-TYPE: SUMMARY: UndefinedBehaviorSanitizer: nullptr-after-nonzero-offset {{.*}}summary.cpp:[[@LINE-2]]:17 + + return 0; +} diff --git a/compiler-rt/test/ubsan/TestCases/Pointer/nullptr-and-nonzero-offset-variable.cpp b/compiler-rt/test/ubsan/TestCases/Pointer/nullptr-and-nonzero-offset-variable.cpp new file mode 100644 index 0000000000000..ec376c7f5313e --- /dev/null +++ b/compiler-rt/test/ubsan/TestCases/Pointer/nullptr-and-nonzero-offset-variable.cpp @@ -0,0 +1,44 @@ +// RUN: %clang -x c -fsanitize=pointer-overflow -O0 %s -o %t && %run %t 2>&1 | FileCheck %s --implicit-check-not="error:" --check-prefix=CHECK-UB-C +// RUN: %clang -x c -fsanitize=pointer-overflow -O1 %s -o %t && %run %t 2>&1 | FileCheck %s --implicit-check-not="error:" --check-prefix=CHECK-UB-C +// RUN: %clang -x c -fsanitize=pointer-overflow -O2 %s -o %t && %run %t 2>&1 | FileCheck %s --implicit-check-not="error:" --check-prefix=CHECK-UB-C +// RUN: %clang -x c -fsanitize=pointer-overflow -O3 %s -o %t && %run %t 2>&1 | FileCheck %s --implicit-check-not="error:" --check-prefix=CHECK-UB-C + +// RUN: %clang -x c++ -fsanitize=pointer-overflow -O0 %s -o %t && %run %t 2>&1 | FileCheck %s --implicit-check-not="error:" --check-prefix=CHECK +// RUN: %clang -x c++ -fsanitize=pointer-overflow -O1 %s -o %t && %run %t 2>&1 | FileCheck %s --implicit-check-not="error:" --check-prefix=CHECK +// RUN: %clang -x c++ -fsanitize=pointer-overflow -O2 %s -o %t && %run %t 2>&1 | FileCheck %s --implicit-check-not="error:" --check-prefix=CHECK +// RUN: %clang -x c++ -fsanitize=pointer-overflow -O3 %s -o %t && %run %t 2>&1 | FileCheck %s --implicit-check-not="error:" --check-prefix=CHECK + +// RUN: %clang -x c -fsanitize=pointer-overflow -O0 %s -o %t && %run %t I_AM_UB 2>&1 | FileCheck %s --implicit-check-not="error:" --check-prefix=CHECK-UB +// RUN: %clang -x c -fsanitize=pointer-overflow -O1 %s -o %t && %run %t I_AM_UB 2>&1 | FileCheck %s --implicit-check-not="error:" --check-prefix=CHECK-UB +// RUN: %clang -x c -fsanitize=pointer-overflow -O2 %s -o %t && %run %t I_AM_UB 2>&1 | FileCheck %s --implicit-check-not="error:" --check-prefix=CHECK-UB +// RUN: %clang -x c -fsanitize=pointer-overflow -O3 %s -o %t && %run %t I_AM_UB 2>&1 | FileCheck %s --implicit-check-not="error:" --check-prefix=CHECK-UB + +// RUN: %clang -x c++ -fsanitize=pointer-overflow -O0 %s -o %t && %run %t I_AM_UB 2>&1 | FileCheck %s --implicit-check-not="error:" --check-prefix=CHECK-UB +// RUN: %clang -x c++ -fsanitize=pointer-overflow -O1 %s -o %t && %run %t I_AM_UB 2>&1 | FileCheck %s --implicit-check-not="error:" --check-prefix=CHECK-UB +// RUN: %clang -x c++ -fsanitize=pointer-overflow -O2 %s -o %t && %run %t I_AM_UB 2>&1 | FileCheck %s --implicit-check-not="error:" --check-prefix=CHECK-UB +// RUN: %clang -x c++ -fsanitize=pointer-overflow -O3 %s -o %t && %run %t I_AM_UB 2>&1 | FileCheck %s --implicit-check-not="error:" --check-prefix=CHECK-UB + +#include +#include + +// Just so deduplication doesn't do anything. +static char *getelementpointer_inbounds_v0(char *base, unsigned long offset) { + // Potentially UB. + return base + offset; +} + +int main(int argc, char *argv[]) { + char *base; + unsigned long offset; + + printf("Dummy\n"); + // CHECK: Dummy + + base = (char *)0; + offset = argc - 1; + (void)getelementpointer_inbounds_v0(base, offset); + // CHECK-UB: {{.*}}.cpp:[[@LINE-13]]:15: runtime error: applying non-zero offset 1 to null pointer + // CHECK-UB-C: {{.*}}.cpp:[[@LINE-14]]:15: runtime error: applying zero offset to null pointer + + return 0; +} diff --git a/compiler-rt/test/ubsan/TestCases/Pointer/unsigned-index-expression.cpp b/compiler-rt/test/ubsan/TestCases/Pointer/unsigned-index-expression.cpp index 5a1432625a54c..e4494f33455d5 100644 --- a/compiler-rt/test/ubsan/TestCases/Pointer/unsigned-index-expression.cpp +++ b/compiler-rt/test/ubsan/TestCases/Pointer/unsigned-index-expression.cpp @@ -1,5 +1,5 @@ // RUN: %clangxx -std=c++11 -fsanitize=pointer-overflow %s -o %t -// RUN: %run %t 2>&1 | FileCheck %s +// RUN: %run %t 2>&1 | FileCheck %s --implicit-check-not="error:" int main(int argc, char *argv[]) { char c; @@ -12,7 +12,7 @@ int main(int argc, char *argv[]) { // CHECK: unsigned-index-expression.cpp:[[@LINE+1]]:16: runtime error: subtraction of unsigned offset from 0x{{.*}} overflowed to 0x{{.*}} char *q1 = p - neg_1; - // CHECK: unsigned-index-expression.cpp:[[@LINE+2]]:16: runtime error: pointer index expression with base 0x{{0*}} overflowed to 0x{{.*}} + // CHECK: unsigned-index-expression.cpp:[[@LINE+2]]:16: runtime error: applying non-zero offset {{.*}} to null pointer char *n = nullptr; char *q2 = n - 1ULL; diff --git a/compiler-rt/test/ubsan_minimal/TestCases/nullptr-and-nonzero-offset.c b/compiler-rt/test/ubsan_minimal/TestCases/nullptr-and-nonzero-offset.c new file mode 100644 index 0000000000000..ba930341b4a98 --- /dev/null +++ b/compiler-rt/test/ubsan_minimal/TestCases/nullptr-and-nonzero-offset.c @@ -0,0 +1,22 @@ +// RUN: %clang -fsanitize=pointer-overflow %s -o %t && %run %t 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-C --implicit-check-not="pointer-overflow" +// RUN: %clangxx -fsanitize=pointer-overflow %s -o %t && %run %t 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-CPP --implicit-check-not="pointer-overflow" + +#include + +int main(int argc, char *argv[]) { + char *base, *result; + + base = (char *)0; + result = base + 0; + // CHECK-C: pointer-overflow + + base = (char *)0; + result = base + 1; + // CHECK: pointer-overflow + + base = (char *)1; + result = base - 1; + // CHECK: pointer-overflow + + return 0; +} diff --git a/libc/src/string/strcat/strcat_test.cpp b/libc/src/string/strcat/strcat_test.cpp index 26bcae2373e71..3b8a7a7e4472b 100644 --- a/libc/src/string/strcat/strcat_test.cpp +++ b/libc/src/string/strcat/strcat_test.cpp @@ -13,7 +13,7 @@ TEST(StrCatTest, EmptyDest) { std::string abc = "abc"; - char *dest = new char[4]; + char dest[4]; dest[0] = '\0'; @@ -21,13 +21,11 @@ TEST(StrCatTest, EmptyDest) { ASSERT_EQ(dest, result); ASSERT_EQ(std::string(dest), abc); ASSERT_EQ(std::string(dest).size(), abc.size()); - - delete[] dest; } TEST(StrCatTest, NonEmptyDest) { std::string abc = "abc"; - char *dest = new char[4]; + char dest[7]; dest[0] = 'x'; dest[1] = 'y'; @@ -38,6 +36,4 @@ TEST(StrCatTest, NonEmptyDest) { ASSERT_EQ(dest, result); ASSERT_EQ(std::string(dest), std::string("xyz") + abc); ASSERT_EQ(std::string(dest).size(), abc.size() + 3); - - delete[] dest; } diff --git a/libc/src/string/strcpy/strcpy_test.cpp b/libc/src/string/strcpy/strcpy_test.cpp index 48f55f246492e..e68ea5103dbb6 100644 --- a/libc/src/string/strcpy/strcpy_test.cpp +++ b/libc/src/string/strcpy/strcpy_test.cpp @@ -13,19 +13,17 @@ TEST(StrCpyTest, EmptyDest) { std::string abc = "abc"; - char *dest = new char[4]; + char dest[4]; char *result = __llvm_libc::strcpy(dest, abc.c_str()); ASSERT_EQ(dest, result); ASSERT_EQ(std::string(dest), abc); ASSERT_EQ(std::string(dest).size(), abc.size()); - - delete[] dest; } TEST(StrCpyTest, OffsetDest) { std::string abc = "abc"; - char *dest = new char[7]; + char dest[7]; dest[0] = 'x'; dest[1] = 'y'; @@ -35,6 +33,4 @@ TEST(StrCpyTest, OffsetDest) { ASSERT_EQ(dest + 3, result); ASSERT_EQ(std::string(dest), std::string("xyz") + abc); ASSERT_EQ(std::string(dest).size(), abc.size() + 3); - - delete[] dest; } diff --git a/libcxx/cmake/Modules/DefineLinkerScript.cmake b/libcxx/cmake/Modules/DefineLinkerScript.cmake index 213ab5d47a695..2e68121f6187e 100644 --- a/libcxx/cmake/Modules/DefineLinkerScript.cmake +++ b/libcxx/cmake/Modules/DefineLinkerScript.cmake @@ -31,7 +31,9 @@ function(define_linker_script target) set(link_libraries) if (interface_libs) foreach(lib IN LISTS interface_libs) - if (TARGET "${lib}") + if (TARGET "${lib}" OR + (${lib} MATCHES "cxxabi(_static|_shared)?" AND HAVE_LIBCXXABI) OR + (${lib} MATCHES "unwind(_static|_shared)?" AND HAVE_LIBUNWIND)) list(APPEND link_libraries "${CMAKE_LINK_LIBRARY_FLAG}$") else() list(APPEND link_libraries "${CMAKE_LINK_LIBRARY_FLAG}${lib}") diff --git a/libcxx/test/std/containers/associative/map/map.cons/assign_initializer_list.pass.cpp b/libcxx/test/std/containers/associative/map/map.cons/assign_initializer_list.pass.cpp index f955135209c53..0a8db58ccea73 100644 --- a/libcxx/test/std/containers/associative/map/map.cons/assign_initializer_list.pass.cpp +++ b/libcxx/test/std/containers/associative/map/map.cons/assign_initializer_list.pass.cpp @@ -75,23 +75,22 @@ void test_basic() { void duplicate_keys_test() { typedef std::map, test_allocator > > Map; - typedef test_alloc_base AllocBase; { - LIBCPP_ASSERT(AllocBase::alloc_count == 0); + LIBCPP_ASSERT(test_alloc_base::alloc_count == 0); Map s = {{1, 0}, {2, 0}, {3, 0}}; - LIBCPP_ASSERT(AllocBase::alloc_count == 3); + LIBCPP_ASSERT(test_alloc_base::alloc_count == 3); s = {{4, 0}, {4, 0}, {4, 0}, {4, 0}}; - LIBCPP_ASSERT(AllocBase::alloc_count == 1); + LIBCPP_ASSERT(test_alloc_base::alloc_count == 1); assert(s.size() == 1); assert(s.begin()->first == 4); } - LIBCPP_ASSERT(AllocBase::alloc_count == 0); + LIBCPP_ASSERT(test_alloc_base::alloc_count == 0); } int main(int, char**) { - test_basic(); - duplicate_keys_test(); + test_basic(); + duplicate_keys_test(); return 0; } diff --git a/libcxx/test/std/containers/associative/set/set.cons/assign_initializer_list.pass.cpp b/libcxx/test/std/containers/associative/set/set.cons/assign_initializer_list.pass.cpp index 3762446467e2a..7efe3fc27813c 100644 --- a/libcxx/test/std/containers/associative/set/set.cons/assign_initializer_list.pass.cpp +++ b/libcxx/test/std/containers/associative/set/set.cons/assign_initializer_list.pass.cpp @@ -57,17 +57,16 @@ void basic_test() { void duplicate_keys_test() { typedef std::set, test_allocator > Set; - typedef test_alloc_base AllocBase; { - LIBCPP_ASSERT(AllocBase::alloc_count == 0); + LIBCPP_ASSERT(test_alloc_base::alloc_count == 0); Set s = {1, 2, 3}; - LIBCPP_ASSERT(AllocBase::alloc_count == 3); + LIBCPP_ASSERT(test_alloc_base::alloc_count == 3); s = {4, 4, 4, 4, 4}; - LIBCPP_ASSERT(AllocBase::alloc_count == 1); + LIBCPP_ASSERT(test_alloc_base::alloc_count == 1); assert(s.size() == 1); assert(*s.begin() == 4); } - LIBCPP_ASSERT(AllocBase::alloc_count == 0); + LIBCPP_ASSERT(test_alloc_base::alloc_count == 0); } int main(int, char**) { diff --git a/libcxx/test/std/containers/unord/unord.map/unord.map.cnstr/deduct.pass.cpp b/libcxx/test/std/containers/unord/unord.map/unord.map.cnstr/deduct.pass.cpp index 0923597dcc994..503477ba82fac 100644 --- a/libcxx/test/std/containers/unord/unord.map/unord.map.cnstr/deduct.pass.cpp +++ b/libcxx/test/std/containers/unord/unord.map/unord.map.cnstr/deduct.pass.cpp @@ -87,22 +87,22 @@ int main(int, char**) { const P arr[] = { {1,1}, {2,2}, {1,1}, {INT_MAX,1}, {3,1} }; - std::unordered_map m(std::begin(arr), std::end(arr), 42, std::hash()); - ASSERT_SAME_TYPE(decltype(m), std::unordered_map, std::equal_to>); + std::unordered_map m(std::begin(arr), std::end(arr), 42, std::hash()); + ASSERT_SAME_TYPE(decltype(m), std::unordered_map, std::equal_to>); assert(std::is_permutation(m.begin(), m.end(), std::begin(expected_m), std::end(expected_m))); } { const P arr[] = { {1,1}, {2,2}, {1,1}, {INT_MAX,1}, {3,1} }; - std::unordered_map m(std::begin(arr), std::end(arr), 42, std::hash(), std::equal_to<>()); - ASSERT_SAME_TYPE(decltype(m), std::unordered_map, std::equal_to<>>); + std::unordered_map m(std::begin(arr), std::end(arr), 42, std::hash(), std::equal_to<>()); + ASSERT_SAME_TYPE(decltype(m), std::unordered_map, std::equal_to<>>); assert(std::is_permutation(m.begin(), m.end(), std::begin(expected_m), std::end(expected_m))); } { const P arr[] = { {1,1}, {2,2}, {1,1}, {INT_MAX,1}, {3,1} }; - std::unordered_map m(std::begin(arr), std::end(arr), 42, std::hash(), std::equal_to<>(), test_allocator(0, 41)); - ASSERT_SAME_TYPE(decltype(m), std::unordered_map, std::equal_to<>, test_allocator>); + std::unordered_map m(std::begin(arr), std::end(arr), 42, std::hash(), std::equal_to<>(), test_allocator(0, 41)); + ASSERT_SAME_TYPE(decltype(m), std::unordered_map, std::equal_to<>, test_allocator>); assert(std::is_permutation(m.begin(), m.end(), std::begin(expected_m), std::end(expected_m))); assert(m.get_allocator().get_id() == 41); } @@ -122,7 +122,7 @@ int main(int, char**) } { - std::unordered_map, std::equal_to<>, test_allocator> source; + std::unordered_map, std::equal_to<>, test_allocator> source; test_allocator a(0, 42); std::unordered_map m(source, a); ASSERT_SAME_TYPE(decltype(m), decltype(source)); @@ -131,7 +131,7 @@ int main(int, char**) } { - std::unordered_map, std::equal_to<>, test_allocator> source; + std::unordered_map, std::equal_to<>, test_allocator> source; test_allocator a(0, 43); std::unordered_map m{source, a}; // braces instead of parens ASSERT_SAME_TYPE(decltype(m), decltype(source)); @@ -152,20 +152,20 @@ int main(int, char**) } { - std::unordered_map m({ P{1,1L}, P{2,2L}, P{1,1L}, P{INT_MAX,1L}, P{3,1L} }, 42, std::hash()); - ASSERT_SAME_TYPE(decltype(m), std::unordered_map>); + std::unordered_map m({ P{1,1L}, P{2,2L}, P{1,1L}, P{INT_MAX,1L}, P{3,1L} }, 42, std::hash()); + ASSERT_SAME_TYPE(decltype(m), std::unordered_map>); assert(std::is_permutation(m.begin(), m.end(), std::begin(expected_m), std::end(expected_m))); } { - std::unordered_map m({ P{1,1L}, P{2,2L}, P{1,1L}, P{INT_MAX,1L}, P{3,1L} }, 42, std::hash(), std::equal_to<>()); - ASSERT_SAME_TYPE(decltype(m), std::unordered_map, std::equal_to<>>); + std::unordered_map m({ P{1,1L}, P{2,2L}, P{1,1L}, P{INT_MAX,1L}, P{3,1L} }, 42, std::hash(), std::equal_to<>()); + ASSERT_SAME_TYPE(decltype(m), std::unordered_map, std::equal_to<>>); assert(std::is_permutation(m.begin(), m.end(), std::begin(expected_m), std::end(expected_m))); } { - std::unordered_map m({ P{1,1L}, P{2,2L}, P{1,1L}, P{INT_MAX,1L}, P{3,1L} }, 42, std::hash(), std::equal_to<>(), test_allocator(0, 44)); - ASSERT_SAME_TYPE(decltype(m), std::unordered_map, std::equal_to<>, test_allocator>); + std::unordered_map m({ P{1,1L}, P{2,2L}, P{1,1L}, P{INT_MAX,1L}, P{3,1L} }, 42, std::hash(), std::equal_to<>(), test_allocator(0, 44)); + ASSERT_SAME_TYPE(decltype(m), std::unordered_map, std::equal_to<>, test_allocator>); assert(std::is_permutation(m.begin(), m.end(), std::begin(expected_m), std::end(expected_m))); assert(m.get_allocator().get_id() == 44); } @@ -180,8 +180,8 @@ int main(int, char**) { const P arr[] = { {1,1}, {2,2}, {1,1}, {INT_MAX,1}, {3,1} }; - std::unordered_map m(std::begin(arr), std::end(arr), 42, std::hash(), test_allocator(0, 46)); - ASSERT_SAME_TYPE(decltype(m), std::unordered_map, std::equal_to, test_allocator>); + std::unordered_map m(std::begin(arr), std::end(arr), 42, std::hash(), test_allocator(0, 46)); + ASSERT_SAME_TYPE(decltype(m), std::unordered_map, std::equal_to, test_allocator>); assert(std::is_permutation(m.begin(), m.end(), std::begin(expected_m), std::end(expected_m))); assert(m.get_allocator().get_id() == 46); } @@ -194,8 +194,8 @@ int main(int, char**) } { - std::unordered_map m({ P{1,1L}, P{2,2L}, P{1,1L}, P{INT_MAX,1L}, P{3,1L} }, 42, std::hash(), test_allocator(0, 48)); - ASSERT_SAME_TYPE(decltype(m), std::unordered_map, std::equal_to, test_allocator>); + std::unordered_map m({ P{1,1L}, P{2,2L}, P{1,1L}, P{INT_MAX,1L}, P{3,1L} }, 42, std::hash(), test_allocator(0, 48)); + ASSERT_SAME_TYPE(decltype(m), std::unordered_map, std::equal_to, test_allocator>); assert(std::is_permutation(m.begin(), m.end(), std::begin(expected_m), std::end(expected_m))); assert(m.get_allocator().get_id() == 48); } diff --git a/libcxx/test/std/containers/unord/unord.multiset/unord.multiset.cnstr/deduct.pass.cpp b/libcxx/test/std/containers/unord/unord.multiset/unord.multiset.cnstr/deduct.pass.cpp index 683d201976414..52693c44d888b 100644 --- a/libcxx/test/std/containers/unord/unord.multiset/unord.multiset.cnstr/deduct.pass.cpp +++ b/libcxx/test/std/containers/unord/unord.multiset/unord.multiset.cnstr/deduct.pass.cpp @@ -78,37 +78,37 @@ int main(int, char**) { const int arr[] = { 1, 2, 1, INT_MAX, 3 }; - std::unordered_multiset s(std::begin(arr), std::end(arr), 42, std::hash()); + std::unordered_multiset s(std::begin(arr), std::end(arr), 42, std::hash()); - ASSERT_SAME_TYPE(decltype(s), std::unordered_multiset>); + ASSERT_SAME_TYPE(decltype(s), std::unordered_multiset>); assert(std::is_permutation(s.begin(), s.end(), std::begin(expected_s), std::end(expected_s))); } { const int arr[] = { 1, 2, 1, INT_MAX, 3 }; - std::unordered_multiset s(std::begin(arr), std::end(arr), 42, std::hash(), test_allocator(0, 40)); + std::unordered_multiset s(std::begin(arr), std::end(arr), 42, std::hash(), test_allocator(0, 40)); - ASSERT_SAME_TYPE(decltype(s), std::unordered_multiset, std::equal_to, test_allocator>); + ASSERT_SAME_TYPE(decltype(s), std::unordered_multiset, std::equal_to, test_allocator>); assert(std::is_permutation(s.begin(), s.end(), std::begin(expected_s), std::end(expected_s))); assert(s.get_allocator().get_id() == 40); } { - std::unordered_multiset, std::equal_to<>, test_allocator> source; + std::unordered_multiset, std::equal_to<>, test_allocator> source; std::unordered_multiset s(source); ASSERT_SAME_TYPE(decltype(s), decltype(source)); assert(s.size() == 0); } { - std::unordered_multiset, std::equal_to<>, test_allocator> source; + std::unordered_multiset, std::equal_to<>, test_allocator> source; std::unordered_multiset s{source}; // braces instead of parens ASSERT_SAME_TYPE(decltype(s), decltype(source)); assert(s.size() == 0); } { - std::unordered_multiset, std::equal_to<>, test_allocator> source; + std::unordered_multiset, std::equal_to<>, test_allocator> source; std::unordered_multiset s(source, test_allocator(0, 41)); ASSERT_SAME_TYPE(decltype(s), decltype(source)); assert(s.size() == 0); @@ -116,7 +116,7 @@ int main(int, char**) } { - std::unordered_multiset, std::equal_to<>, test_allocator> source; + std::unordered_multiset, std::equal_to<>, test_allocator> source; std::unordered_multiset s{source, test_allocator(0, 42)}; // braces instead of parens ASSERT_SAME_TYPE(decltype(s), decltype(source)); assert(s.size() == 0); @@ -138,23 +138,23 @@ int main(int, char**) } { - std::unordered_multiset s({ 1, 2, 1, INT_MAX, 3 }, 42, std::hash()); + std::unordered_multiset s({ 1, 2, 1, INT_MAX, 3 }, 42, std::hash()); - ASSERT_SAME_TYPE(decltype(s), std::unordered_multiset>); + ASSERT_SAME_TYPE(decltype(s), std::unordered_multiset>); assert(std::is_permutation(s.begin(), s.end(), std::begin(expected_s), std::end(expected_s))); } { - std::unordered_multiset s({ 1, 2, 1, INT_MAX, 3 }, 42, std::hash(), std::equal_to<>()); + std::unordered_multiset s({ 1, 2, 1, INT_MAX, 3 }, 42, std::hash(), std::equal_to<>()); - ASSERT_SAME_TYPE(decltype(s), std::unordered_multiset, std::equal_to<>>); + ASSERT_SAME_TYPE(decltype(s), std::unordered_multiset, std::equal_to<>>); assert(std::is_permutation(s.begin(), s.end(), std::begin(expected_s), std::end(expected_s))); } { - std::unordered_multiset s({ 1, 2, 1, INT_MAX, 3 }, 42, std::hash(), std::equal_to<>(), test_allocator(0, 43)); + std::unordered_multiset s({ 1, 2, 1, INT_MAX, 3 }, 42, std::hash(), std::equal_to<>(), test_allocator(0, 43)); - ASSERT_SAME_TYPE(decltype(s), std::unordered_multiset, std::equal_to<>, test_allocator>); + ASSERT_SAME_TYPE(decltype(s), std::unordered_multiset, std::equal_to<>, test_allocator>); assert(std::is_permutation(s.begin(), s.end(), std::begin(expected_s), std::end(expected_s))); assert(s.get_allocator().get_id() == 43); } @@ -170,9 +170,9 @@ int main(int, char**) { const int arr[] = { 1, 2, 1, INT_MAX, 3 }; - std::unordered_multiset s(std::begin(arr), std::end(arr), 42, std::hash(), test_allocator(0, 44)); + std::unordered_multiset s(std::begin(arr), std::end(arr), 42, std::hash(), test_allocator(0, 44)); - ASSERT_SAME_TYPE(decltype(s), std::unordered_multiset, std::equal_to, test_allocator>); + ASSERT_SAME_TYPE(decltype(s), std::unordered_multiset, std::equal_to, test_allocator>); assert(std::is_permutation(s.begin(), s.end(), std::begin(expected_s), std::end(expected_s))); assert(s.get_allocator().get_id() == 44); } @@ -186,9 +186,9 @@ int main(int, char**) } { - std::unordered_multiset s({ 1, 2, 1, INT_MAX, 3 }, 42, std::hash(), test_allocator(0, 42)); + std::unordered_multiset s({ 1, 2, 1, INT_MAX, 3 }, 42, std::hash(), test_allocator(0, 42)); - ASSERT_SAME_TYPE(decltype(s), std::unordered_multiset, std::equal_to, test_allocator>); + ASSERT_SAME_TYPE(decltype(s), std::unordered_multiset, std::equal_to, test_allocator>); assert(std::is_permutation(s.begin(), s.end(), std::begin(expected_s), std::end(expected_s))); assert(s.get_allocator().get_id() == 42); } diff --git a/libcxx/test/std/containers/unord/unord.set/unord.set.cnstr/deduct.pass.cpp b/libcxx/test/std/containers/unord/unord.set/unord.set.cnstr/deduct.pass.cpp index 95bc08293f537..150441a93d64c 100644 --- a/libcxx/test/std/containers/unord/unord.set/unord.set.cnstr/deduct.pass.cpp +++ b/libcxx/test/std/containers/unord/unord.set/unord.set.cnstr/deduct.pass.cpp @@ -78,37 +78,37 @@ int main(int, char**) { const int arr[] = { 1, 2, 1, INT_MAX, 3 }; - std::unordered_set s(std::begin(arr), std::end(arr), 42, std::hash()); + std::unordered_set s(std::begin(arr), std::end(arr), 42, std::hash()); - ASSERT_SAME_TYPE(decltype(s), std::unordered_set>); + ASSERT_SAME_TYPE(decltype(s), std::unordered_set>); assert(std::is_permutation(s.begin(), s.end(), std::begin(expected_s), std::end(expected_s))); } { const int arr[] = { 1, 2, 1, INT_MAX, 3 }; - std::unordered_set s(std::begin(arr), std::end(arr), 42, std::hash(), test_allocator(0, 40)); + std::unordered_set s(std::begin(arr), std::end(arr), 42, std::hash(), test_allocator(0, 40)); - ASSERT_SAME_TYPE(decltype(s), std::unordered_set, std::equal_to, test_allocator>); + ASSERT_SAME_TYPE(decltype(s), std::unordered_set, std::equal_to, test_allocator>); assert(std::is_permutation(s.begin(), s.end(), std::begin(expected_s), std::end(expected_s))); assert(s.get_allocator().get_id() == 40); } { - std::unordered_set, std::equal_to<>, test_allocator> source; + std::unordered_set, std::equal_to<>, test_allocator> source; std::unordered_set s(source); ASSERT_SAME_TYPE(decltype(s), decltype(source)); assert(s.size() == 0); } { - std::unordered_set, std::equal_to<>, test_allocator> source; + std::unordered_set, std::equal_to<>, test_allocator> source; std::unordered_set s{source}; // braces instead of parens ASSERT_SAME_TYPE(decltype(s), decltype(source)); assert(s.size() == 0); } { - std::unordered_set, std::equal_to<>, test_allocator> source; + std::unordered_set, std::equal_to<>, test_allocator> source; std::unordered_set s(source, test_allocator(0, 41)); ASSERT_SAME_TYPE(decltype(s), decltype(source)); assert(s.size() == 0); @@ -116,7 +116,7 @@ int main(int, char**) } { - std::unordered_set, std::equal_to<>, test_allocator> source; + std::unordered_set, std::equal_to<>, test_allocator> source; std::unordered_set s{source, test_allocator(0, 42)}; // braces instead of parens ASSERT_SAME_TYPE(decltype(s), decltype(source)); assert(s.size() == 0); @@ -138,23 +138,23 @@ int main(int, char**) } { - std::unordered_set s({ 1, 2, 1, INT_MAX, 3 }, 42, std::hash()); + std::unordered_set s({ 1, 2, 1, INT_MAX, 3 }, 42, std::hash()); - ASSERT_SAME_TYPE(decltype(s), std::unordered_set>); + ASSERT_SAME_TYPE(decltype(s), std::unordered_set>); assert(std::is_permutation(s.begin(), s.end(), std::begin(expected_s), std::end(expected_s))); } { - std::unordered_set s({ 1, 2, 1, INT_MAX, 3 }, 42, std::hash(), std::equal_to<>()); + std::unordered_set s({ 1, 2, 1, INT_MAX, 3 }, 42, std::hash(), std::equal_to<>()); - ASSERT_SAME_TYPE(decltype(s), std::unordered_set, std::equal_to<>>); + ASSERT_SAME_TYPE(decltype(s), std::unordered_set, std::equal_to<>>); assert(std::is_permutation(s.begin(), s.end(), std::begin(expected_s), std::end(expected_s))); } { - std::unordered_set s({ 1, 2, 1, INT_MAX, 3 }, 42, std::hash(), std::equal_to<>(), test_allocator(0, 43)); + std::unordered_set s({ 1, 2, 1, INT_MAX, 3 }, 42, std::hash(), std::equal_to<>(), test_allocator(0, 43)); - ASSERT_SAME_TYPE(decltype(s), std::unordered_set, std::equal_to<>, test_allocator>); + ASSERT_SAME_TYPE(decltype(s), std::unordered_set, std::equal_to<>, test_allocator>); assert(std::is_permutation(s.begin(), s.end(), std::begin(expected_s), std::end(expected_s))); assert(s.get_allocator().get_id() == 43); } @@ -170,9 +170,9 @@ int main(int, char**) { const int arr[] = { 1, 2, 1, INT_MAX, 3 }; - std::unordered_set s(std::begin(arr), std::end(arr), 42, std::hash(), test_allocator(0, 44)); + std::unordered_set s(std::begin(arr), std::end(arr), 42, std::hash(), test_allocator(0, 44)); - ASSERT_SAME_TYPE(decltype(s), std::unordered_set, std::equal_to, test_allocator>); + ASSERT_SAME_TYPE(decltype(s), std::unordered_set, std::equal_to, test_allocator>); assert(std::is_permutation(s.begin(), s.end(), std::begin(expected_s), std::end(expected_s))); assert(s.get_allocator().get_id() == 44); } @@ -186,9 +186,9 @@ int main(int, char**) } { - std::unordered_set s({ 1, 2, 1, INT_MAX, 3 }, 42, std::hash(), test_allocator(0, 42)); + std::unordered_set s({ 1, 2, 1, INT_MAX, 3 }, 42, std::hash(), test_allocator(0, 42)); - ASSERT_SAME_TYPE(decltype(s), std::unordered_set, std::equal_to, test_allocator>); + ASSERT_SAME_TYPE(decltype(s), std::unordered_set, std::equal_to, test_allocator>); assert(std::is_permutation(s.begin(), s.end(), std::begin(expected_s), std::end(expected_s))); assert(s.get_allocator().get_id() == 42); } diff --git a/libcxx/test/std/numerics/c.math/abs.pass.cpp b/libcxx/test/std/numerics/c.math/abs.pass.cpp index bdca9a616ec8b..3196f225f308d 100644 --- a/libcxx/test/std/numerics/c.math/abs.pass.cpp +++ b/libcxx/test/std/numerics/c.math/abs.pass.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include #include "test_macros.h" @@ -75,4 +76,3 @@ int main(int, char**) return 0; } - diff --git a/libcxx/test/std/numerics/numeric.ops/numeric.ops.midpoint/midpoint.float.pass.cpp b/libcxx/test/std/numerics/numeric.ops/numeric.ops.midpoint/midpoint.float.pass.cpp index aeb9a192ca307..2c5d2408e2b47 100644 --- a/libcxx/test/std/numerics/numeric.ops/numeric.ops.midpoint/midpoint.float.pass.cpp +++ b/libcxx/test/std/numerics/numeric.ops/numeric.ops.midpoint/midpoint.float.pass.cpp @@ -41,7 +41,7 @@ void fp_test() constexpr T maxV = std::numeric_limits::max(); constexpr T minV = std::numeric_limits::min(); - + // Things that can be compared exactly static_assert((std::midpoint(T(0), T(0)) == T(0)), ""); static_assert((std::midpoint(T(2), T(4)) == T(3)), ""); @@ -58,7 +58,7 @@ void fp_test() assert((fptest_close_pct(std::midpoint(T(0.1), T(0.4)), T(0.25), pct))); assert((fptest_close_pct(std::midpoint(T(11.2345), T(14.5432)), T(12.88885), pct))); - + // From e to pi assert((fptest_close_pct(std::midpoint(T(2.71828182845904523536028747135266249775724709369995), T(3.14159265358979323846264338327950288419716939937510)), @@ -86,7 +86,7 @@ void fp_test() // TODO // Check two values "close to each other" - T d1 = 3.14; + T d1 = T(3.14); T d0 = std::nextafter(d1, T(2)); T d2 = std::nextafter(d1, T(5)); assert(d0 < d1); // sanity checking diff --git a/libcxx/test/std/utilities/meta/meta.trans/meta.trans.other/underlying_type.pass.cpp b/libcxx/test/std/utilities/meta/meta.trans/meta.trans.other/underlying_type.pass.cpp index fbbab50049d4f..3e08855d6adb9 100644 --- a/libcxx/test/std/utilities/meta/meta.trans/meta.trans.other/underlying_type.pass.cpp +++ b/libcxx/test/std/utilities/meta/meta.trans/meta.trans.other/underlying_type.pass.cpp @@ -42,7 +42,7 @@ void check() ASSERT_SAME_TYPE(Expected, typename std::underlying_type::type); #if TEST_STD_VER > 11 ASSERT_SAME_TYPE(Expected, typename std::underlying_type_t); -#endif +#endif } enum E { V = INT_MIN }; @@ -79,7 +79,9 @@ int main(int, char**) // SFINAE-able underlying_type #if TEST_STD_VER > 17 static_assert( has_type_member::value, ""); +#ifdef TEST_UNSIGNED_UNDERLYING_TYPE static_assert( has_type_member::value, ""); +#endif // TEST_UNSIGNED_UNDERLYING_TYPE static_assert( has_type_member::value, ""); static_assert(!has_type_member::value, ""); diff --git a/libcxx/test/support/any_helpers.h b/libcxx/test/support/any_helpers.h index eb9a4c149fbc0..099bcec4289c5 100644 --- a/libcxx/test/support/any_helpers.h +++ b/libcxx/test/support/any_helpers.h @@ -26,7 +26,7 @@ namespace std { namespace experimental {} } template struct IsSmallObject : public std::integral_constant::value % std::alignment_of::value == 0 && std::is_nothrow_move_constructible::value diff --git a/lld/COFF/DLL.cpp b/lld/COFF/DLL.cpp index 210f43fd6c88c..39d9fbab63d50 100644 --- a/lld/COFF/DLL.cpp +++ b/lld/COFF/DLL.cpp @@ -135,7 +135,7 @@ class NullChunk : public NonSectionChunk { static std::vector> binImports(const std::vector &imports) { // Group DLL-imported symbols by DLL name because that's how - // symbols are layed out in the import descriptor table. + // symbols are laid out in the import descriptor table. auto less = [](const std::string &a, const std::string &b) { return config->dllOrder[a] < config->dllOrder[b]; }; diff --git a/lld/COFF/DebugTypes.cpp b/lld/COFF/DebugTypes.cpp index c41c49cc319c4..6c7d70ee8dcba 100644 --- a/lld/COFF/DebugTypes.cpp +++ b/lld/COFF/DebugTypes.cpp @@ -17,11 +17,12 @@ #include "llvm/DebugInfo/PDB/Native/PDBFile.h" #include "llvm/Support/Path.h" -using namespace lld; -using namespace lld::coff; using namespace llvm; using namespace llvm::codeview; +namespace lld { +namespace coff { + namespace { // The TypeServerSource class represents a PDB type server, a file referenced by // OBJ files compiled with MSVC /Zi. A single PDB can be shared by several OBJ @@ -96,27 +97,25 @@ TpiSource::TpiSource(TpiKind k, const ObjFile *f) : kind(k), file(f) { GC.push_back(std::unique_ptr(this)); } -TpiSource *lld::coff::makeTpiSource(const ObjFile *f) { +TpiSource *makeTpiSource(const ObjFile *f) { return new TpiSource(TpiSource::Regular, f); } -TpiSource *lld::coff::makeUseTypeServerSource(const ObjFile *f, +TpiSource *makeUseTypeServerSource(const ObjFile *f, const TypeServer2Record *ts) { TypeServerSource::enqueue(f, *ts); return new UseTypeServerSource(f, ts); } -TpiSource *lld::coff::makePrecompSource(const ObjFile *f) { +TpiSource *makePrecompSource(const ObjFile *f) { return new PrecompSource(f); } -TpiSource *lld::coff::makeUsePrecompSource(const ObjFile *f, +TpiSource *makeUsePrecompSource(const ObjFile *f, const PrecompRecord *precomp) { return new UsePrecompSource(f, precomp); } -namespace lld { -namespace coff { template <> const PrecompRecord &retrieveDependencyInfo(const TpiSource *source) { assert(source->kind == TpiSource::UsingPCH); @@ -128,8 +127,6 @@ const TypeServer2Record &retrieveDependencyInfo(const TpiSource *source) { assert(source->kind == TpiSource::UsingPDB); return ((const UseTypeServerSource *)source)->typeServerDependency; } -} // namespace coff -} // namespace lld std::map> TypeServerSource::instances; @@ -210,8 +207,7 @@ TypeServerSource::findFromFile(const ObjFile *dependentFile) { // FIXME: Temporary interface until PDBLinker::maybeMergeTypeServerPDB() is // moved here. -Expected -lld::coff::findTypeServerSource(const ObjFile *f) { +Expected findTypeServerSource(const ObjFile *f) { Expected ts = TypeServerSource::findFromFile(f); if (!ts) return ts.takeError(); @@ -239,7 +235,7 @@ void TypeServerSource::enqueue(const ObjFile *dependentFile, // will be merged in. NOTE - a PDB load failure is not a link error: some // debug info will simply be missing from the final PDB - that is the default // accepted behavior. -void lld::coff::loadTypeServerSource(llvm::MemoryBufferRef m) { +void loadTypeServerSource(llvm::MemoryBufferRef m) { std::string path = normalizePdbPath(m.getBufferIdentifier()); Expected ts = TypeServerSource::getInstance(m); @@ -266,3 +262,6 @@ Expected TypeServerSource::getInstance(MemoryBufferRef m) { return info.takeError(); return new TypeServerSource(m, session.release()); } + +} // namespace coff +} // namespace lld diff --git a/lld/COFF/Driver.cpp b/lld/COFF/Driver.cpp index 4bd9dc6ce3879..30967a39b4caa 100644 --- a/lld/COFF/Driver.cpp +++ b/lld/COFF/Driver.cpp @@ -718,8 +718,7 @@ static std::string getImplibPath() { return out.str(); } -// -// The import name is caculated as the following: +// The import name is calculated as follows: // // | LIBRARY w/ ext | LIBRARY w/o ext | no LIBRARY // -----+----------------+---------------------+------------------ diff --git a/lld/COFF/ICF.cpp b/lld/COFF/ICF.cpp index abd71931cb463..c821569e34579 100644 --- a/lld/COFF/ICF.cpp +++ b/lld/COFF/ICF.cpp @@ -77,7 +77,7 @@ class ICF { // section is insignificant to the user program and the behaviour matches that // of the Visual C++ linker. bool ICF::isEligible(SectionChunk *c) { - // Non-comdat chunks, dead chunks, and writable chunks are not elegible. + // Non-comdat chunks, dead chunks, and writable chunks are not eligible. bool writable = c->getOutputCharacteristics() & llvm::COFF::IMAGE_SCN_MEM_WRITE; if (!c->isCOMDAT() || !c->live || writable) return false; @@ -274,7 +274,7 @@ void ICF::run(ArrayRef vec) { for (Symbol *b : sc->symbols()) if (auto *sym = dyn_cast_or_null(b)) hash += sym->getChunk()->eqClass[cnt % 2]; - // Set MSB to 1 to avoid collisions with non-hash classs. + // Set MSB to 1 to avoid collisions with non-hash classes. sc->eqClass[(cnt + 1) % 2] = hash | (1U << 31); }); } @@ -297,7 +297,7 @@ void ICF::run(ArrayRef vec) { log("ICF needed " + Twine(cnt) + " iterations"); - // Merge sections in the same classs. + // Merge sections in the same classes. forEachClass([&](size_t begin, size_t end) { if (end - begin == 1) return; diff --git a/lld/COFF/InputFiles.cpp b/lld/COFF/InputFiles.cpp index 3f763563b2b3f..d1ef201401642 100644 --- a/lld/COFF/InputFiles.cpp +++ b/lld/COFF/InputFiles.cpp @@ -47,6 +47,24 @@ using llvm::Triple; using llvm::support::ulittle32_t; namespace lld { + +// Returns the last element of a path, which is supposed to be a filename. +static StringRef getBasename(StringRef path) { + return sys::path::filename(path, sys::path::Style::windows); +} + +// Returns a string in the format of "foo.obj" or "foo.obj(bar.lib)". +std::string toString(const coff::InputFile *file) { + if (!file) + return ""; + if (file->parentName.empty() || file->kind() == coff::InputFile::ImportKind) + return file->getName(); + + return (getBasename(file->parentName) + "(" + getBasename(file->getName()) + + ")") + .str(); +} + namespace coff { std::vector ObjFile::instances; @@ -599,7 +617,7 @@ Optional ObjFile::createDefined( // Comdat handling. // A comdat symbol consists of two symbol table entries. // The first symbol entry has the name of the section (e.g. .text), fixed - // values for the other fields, and one auxilliary record. + // values for the other fields, and one auxiliary record. // The second symbol entry has the name of the comdat symbol, called the // "comdat leader". // When this function is called for the first symbol entry of a comdat, @@ -669,7 +687,7 @@ ArrayRef ObjFile::getDebugSection(StringRef secName) { return {}; } -// OBJ files systematically store critical informations in a .debug$S stream, +// OBJ files systematically store critical information in a .debug$S stream, // even if the TU was compiled with no debug info. At least two records are // always there. S_OBJNAME stores a 32-bit signature, which is loaded into the // PCHSignature member. S_COMPILE3 stores compile-time cmd-line flags. This is @@ -908,22 +926,6 @@ std::string replaceThinLTOSuffix(StringRef path) { return (path + repl).str(); return path; } + } // namespace coff } // namespace lld - -// Returns the last element of a path, which is supposed to be a filename. -static StringRef getBasename(StringRef path) { - return sys::path::filename(path, sys::path::Style::windows); -} - -// Returns a string in the format of "foo.obj" or "foo.obj(bar.lib)". -std::string lld::toString(const coff::InputFile *file) { - if (!file) - return ""; - if (file->parentName.empty() || file->kind() == coff::InputFile::ImportKind) - return file->getName(); - - return (getBasename(file->parentName) + "(" + getBasename(file->getName()) + - ")") - .str(); -} diff --git a/lld/COFF/LTO.cpp b/lld/COFF/LTO.cpp index 7aa3b17e24d5c..1c21236dce2b1 100644 --- a/lld/COFF/LTO.cpp +++ b/lld/COFF/LTO.cpp @@ -39,8 +39,8 @@ using namespace llvm; using namespace llvm::object; -using namespace lld; -using namespace lld::coff; +namespace lld { +namespace coff { // Creates an empty file to and returns a raw_fd_ostream to write to it. static std::unique_ptr openFile(StringRef file) { @@ -206,3 +206,6 @@ std::vector BitcodeCompiler::compile() { return ret; } + +} // namespace coff +} // namespace lld diff --git a/lld/COFF/MapFile.cpp b/lld/COFF/MapFile.cpp index 70017d34c8bcf..0fea60aab99b5 100644 --- a/lld/COFF/MapFile.cpp +++ b/lld/COFF/MapFile.cpp @@ -29,8 +29,8 @@ using namespace llvm; using namespace llvm::object; -using namespace lld; -using namespace lld::coff; +namespace lld { +namespace coff { using SymbolMapTy = DenseMap>; @@ -87,7 +87,7 @@ getSymbolStrings(ArrayRef syms) { return ret; } -void coff::writeMapFile(ArrayRef outputSections) { +void writeMapFile(ArrayRef outputSections) { if (config->mapFile.empty()) return; @@ -122,3 +122,6 @@ void coff::writeMapFile(ArrayRef outputSections) { } } } + +} // namespace coff +} // namespace lld diff --git a/lld/COFF/MinGW.cpp b/lld/COFF/MinGW.cpp index a22e609148a0d..270cdaab4d9cf 100644 --- a/lld/COFF/MinGW.cpp +++ b/lld/COFF/MinGW.cpp @@ -13,11 +13,12 @@ #include "llvm/Support/Path.h" #include "llvm/Support/raw_ostream.h" -using namespace lld; -using namespace lld::coff; using namespace llvm; using namespace llvm::COFF; +namespace lld { +namespace coff { + AutoExporter::AutoExporter() { excludeLibs = { "libgcc", @@ -55,7 +56,7 @@ AutoExporter::AutoExporter() { // C++ symbols "__rtti_", "__builtin_", - // Artifical symbols such as .refptr + // Artificial symbols such as .refptr ".", }; @@ -146,7 +147,7 @@ bool AutoExporter::shouldExport(Defined *sym) const { return !excludeObjects.count(fileName); } -void coff::writeDefFile(StringRef name) { +void writeDefFile(StringRef name) { std::error_code ec; raw_fd_ostream os(name, ec, sys::fs::OF_None); if (ec) @@ -164,3 +165,6 @@ void coff::writeDefFile(StringRef name) { os << "\n"; } } + +} // namespace coff +} // namespace lld diff --git a/lld/COFF/PDB.cpp b/lld/COFF/PDB.cpp index d0f8f3d382de5..400def8e60b6c 100644 --- a/lld/COFF/PDB.cpp +++ b/lld/COFF/PDB.cpp @@ -59,13 +59,14 @@ #include "llvm/Support/ScopedPrinter.h" #include -using namespace lld; -using namespace lld::coff; using namespace llvm; using namespace llvm::codeview; using llvm::object::coff_section; +namespace lld { +namespace coff { + static ExitOnError exitOnErr; static Timer totalPdbLinkTimer("PDB Emission (Cumulative)", Timer::root()); @@ -513,16 +514,15 @@ static bool equals_path(StringRef path1, StringRef path2) { return path1.equals(path2); #endif } - // Find by name an OBJ provided on the command line -static ObjFile *findObjByName(StringRef fileNameOnly) { - SmallString<128> currentPath; - +static ObjFile *findObjWithPrecompSignature(StringRef fileNameOnly, + uint32_t precompSignature) { for (ObjFile *f : ObjFile::instances) { StringRef currentFileName = sys::path::filename(f->getName()); - // Compare based solely on the file name (link.exe behavior) - if (equals_path(currentFileName, fileNameOnly)) + if (f->pchSignature.hasValue() && + f->pchSignature.getValue() == precompSignature && + equals_path(fileNameOnly, currentFileName)) return f; } return nullptr; @@ -559,22 +559,15 @@ Expected PDBLinker::aquirePrecompObj(ObjFile *file) { // link.exe requires that a precompiled headers object must always be provided // on the command-line, even if that's not necessary. - auto precompFile = findObjByName(precompFileName); + auto precompFile = + findObjWithPrecompSignature(precompFileName, precomp.Signature); if (!precompFile) return createFileError( - precompFileName.str(), - make_error(pdb::pdb_error_code::external_cmdline_ref)); + precomp.getPrecompFilePath().str(), + make_error(pdb::pdb_error_code::no_matching_pch)); addObjFile(precompFile, &indexMap); - if (!precompFile->pchSignature) - fatal(precompFile->getName() + " is not a precompiled headers object"); - - if (precomp.getSignature() != precompFile->pchSignature.getValueOr(0)) - return createFileError( - precomp.getPrecompFilePath().str(), - make_error(pdb::pdb_error_code::signature_out_of_date)); - return indexMap; } @@ -1597,7 +1590,7 @@ void PDBLinker::addImportFilesToPDB(ArrayRef outputSections) { } // Creates a PDB file. -void coff::createPDB(SymbolTable *symtab, +void createPDB(SymbolTable *symtab, ArrayRef outputSections, ArrayRef sectionTable, llvm::codeview::DebugInfo *buildId) { @@ -1798,7 +1791,7 @@ static bool findLineTable(const SectionChunk *c, uint32_t addr, // Use CodeView line tables to resolve a file and line number for the given // offset into the given chunk and return them, or {"", 0} if a line table was // not found. -std::pair coff::getFileLineCodeView(const SectionChunk *c, +std::pair getFileLineCodeView(const SectionChunk *c, uint32_t addr) { ExitOnError exitOnErr; @@ -1833,3 +1826,6 @@ std::pair coff::getFileLineCodeView(const SectionChunk *c, StringRef filename = exitOnErr(getFileName(cVStrTab, checksums, *nameIndex)); return {filename, *lineNumber}; } + +} // namespace coff +} // namespace lld diff --git a/lld/COFF/Writer.cpp b/lld/COFF/Writer.cpp index 838736162c88a..9729c6938ec8a 100644 --- a/lld/COFF/Writer.cpp +++ b/lld/COFF/Writer.cpp @@ -40,8 +40,9 @@ using namespace llvm::COFF; using namespace llvm::object; using namespace llvm::support; using namespace llvm::support::endian; -using namespace lld; -using namespace lld::coff; + +namespace lld { +namespace coff { /* To re-generate DOSProgram: $ cat > /tmp/DOSProgram.asm @@ -285,9 +286,6 @@ class Writer { }; } // anonymous namespace -namespace lld { -namespace coff { - static Timer codeLayoutTimer("Code Layout", Timer::root()); static Timer diskCommitTimer("Commit Output File", Timer::root()); @@ -333,9 +331,6 @@ void OutputSection::addContributingPartialSection(PartialSection *sec) { contribSections.push_back(sec); } -} // namespace coff -} // namespace lld - // Check whether the target address S is in range from a relocation // of type relType at address P. static bool isInRange(uint16_t relType, uint64_t s, uint64_t p, int margin) { @@ -1945,3 +1940,6 @@ PartialSection *Writer::findPartialSection(StringRef name, uint32_t outChars) { return it->second; return nullptr; } + +} // namespace coff +} // namespace lld diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp index 616718b69d813..96257a4c76247 100644 --- a/lld/ELF/Driver.cpp +++ b/lld/ELF/Driver.cpp @@ -1915,9 +1915,10 @@ template void LinkerDriver::link(opt::InputArgList &args) { // Replace common symbols with regular symbols. replaceCommonSymbols(); - // Do size optimizations: garbage collection, merging of SHF_MERGE sections - // and identical code folding. + // Split SHF_MERGE and .eh_frame sections into pieces in preparation for garbage collection. splitSections(); + + // Garbage collection and removal of shared symbols from unused shared objects. markLive(); demoteSharedSymbols(); diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp index f8887b111c4b6..b0389ccf100af 100644 --- a/lld/ELF/InputFiles.cpp +++ b/lld/ELF/InputFiles.cpp @@ -483,7 +483,8 @@ StringRef ObjFile::getShtGroupSignature(ArrayRef sections, return signature; } -template bool ObjFile::shouldMerge(const Elf_Shdr &sec) { +template +bool ObjFile::shouldMerge(const Elf_Shdr &sec, StringRef name) { // On a regular link we don't merge sections if -O0 (default is -O1). This // sometimes makes the linker significantly faster, although the output will // be bigger. @@ -515,14 +516,16 @@ template bool ObjFile::shouldMerge(const Elf_Shdr &sec) { if (entSize == 0) return false; if (sec.sh_size % entSize) - fatal(toString(this) + - ": SHF_MERGE section size must be a multiple of sh_entsize"); + fatal(toString(this) + ":(" + name + "): SHF_MERGE section size (" + + Twine(sec.sh_size) + ") must be a multiple of sh_entsize (" + + Twine(entSize) + ")"); uint64_t flags = sec.sh_flags; if (!(flags & SHF_MERGE)) return false; if (flags & SHF_WRITE) - fatal(toString(this) + ": writable SHF_MERGE section is not supported"); + fatal(toString(this) + ":(" + name + + "): writable SHF_MERGE section is not supported"); return true; } @@ -1033,7 +1036,7 @@ InputSectionBase *ObjFile::createInputSection(const Elf_Shdr &sec) { if (name == ".eh_frame" && !config->relocatable) return make(*this, sec, name); - if (shouldMerge(sec)) + if (shouldMerge(sec, name)) return make(*this, sec, name); return make(*this, sec, name); } diff --git a/lld/ELF/InputFiles.h b/lld/ELF/InputFiles.h index 3c777ceacf77a..1c78654d00fc6 100644 --- a/lld/ELF/InputFiles.h +++ b/lld/ELF/InputFiles.h @@ -259,7 +259,7 @@ template class ObjFile : public ELFFileBase { InputSectionBase *createInputSection(const Elf_Shdr &sec); StringRef getSectionName(const Elf_Shdr &sec); - bool shouldMerge(const Elf_Shdr &sec); + bool shouldMerge(const Elf_Shdr &sec, StringRef name); // Each ELF symbol contains a section index which the symbol belongs to. // However, because the number of bits dedicated for that is limited, a diff --git a/lld/ELF/Target.cpp b/lld/ELF/Target.cpp index c79e12ee93823..024e0cfec27b5 100644 --- a/lld/ELF/Target.cpp +++ b/lld/ELF/Target.cpp @@ -91,6 +91,9 @@ TargetInfo *getTarget() { } template static ErrorPlace getErrPlace(const uint8_t *loc) { + if (!Out::bufferStart) + return {}; + for (InputSectionBase *d : inputSections) { auto *isec = cast(d); if (!isec->getParent()) diff --git a/lld/MinGW/Driver.cpp b/lld/MinGW/Driver.cpp index f981e6ebee204..a5f64496eae91 100644 --- a/lld/MinGW/Driver.cpp +++ b/lld/MinGW/Driver.cpp @@ -103,9 +103,9 @@ opt::InputArgList MinGWOptTable::parse(ArrayRef argv) { opt::InputArgList args = this->ParseArgs(vec, missingIndex, missingCount); if (missingCount) - fatal(StringRef(args.getArgString(missingIndex)) + ": missing argument"); + error(StringRef(args.getArgString(missingIndex)) + ": missing argument"); for (auto *arg : args.filtered(OPT_UNKNOWN)) - fatal("unknown argument: " + arg->getAsString(args)); + error("unknown argument: " + arg->getAsString(args)); return args; } @@ -125,25 +125,49 @@ searchLibrary(StringRef name, ArrayRef searchPaths, bool bStatic) { for (StringRef dir : searchPaths) if (Optional s = findFile(dir, name.substr(1))) return *s; - fatal("unable to find library -l" + name); + error("unable to find library -l" + name); + return ""; } for (StringRef dir : searchPaths) { - if (!bStatic) + if (!bStatic) { if (Optional s = findFile(dir, "lib" + name + ".dll.a")) return *s; + if (Optional s = findFile(dir, name + ".dll.a")) + return *s; + } if (Optional s = findFile(dir, "lib" + name + ".a")) return *s; + if (!bStatic) { + if (Optional s = findFile(dir, name + ".lib")) + return *s; + if (Optional s = findFile(dir, "lib" + name + ".dll")) { + error("lld doesn't support linking directly against " + *s + + ", use an import library"); + return ""; + } + if (Optional s = findFile(dir, name + ".dll")) { + error("lld doesn't support linking directly against " + *s + + ", use an import library"); + return ""; + } + } } - fatal("unable to find library -l" + name); + error("unable to find library -l" + name); + return ""; } // Convert Unix-ish command line arguments to Windows-ish ones and // then call coff::link. bool mingw::link(ArrayRef argsArr, raw_ostream &diag) { + enableColors(diag.has_colors()); + MinGWOptTable parser; opt::InputArgList args = parser.parse(argsArr.slice(1)); + if (errorCount()) + return false; + if (args.hasArg(OPT_help)) { printHelp(argsArr[0]); return true; @@ -164,8 +188,10 @@ bool mingw::link(ArrayRef argsArr, raw_ostream &diag) { if (args.hasArg(OPT_version)) return true; - if (!args.hasArg(OPT_INPUT) && !args.hasArg(OPT_l)) - fatal("no input files"); + if (!args.hasArg(OPT_INPUT) && !args.hasArg(OPT_l)) { + error("no input files"); + return false; + } std::vector linkArgs; auto add = [&](const Twine &s) { linkArgs.push_back(s.str()); }; @@ -271,7 +297,7 @@ bool mingw::link(ArrayRef argsArr, raw_ostream &diag) { else if (s == "safe" || s == "none") add("-opt:noicf"); else - fatal("unknown parameter: --icf=" + s); + error("unknown parameter: --icf=" + s); } else { add("-opt:noicf"); } @@ -287,7 +313,7 @@ bool mingw::link(ArrayRef argsArr, raw_ostream &diag) { else if (s == "arm64pe") add("-machine:arm64"); else - fatal("unknown parameter: -m" + s); + error("unknown parameter: -m" + s); } for (auto *a : args.filtered(OPT_mllvm)) @@ -342,6 +368,9 @@ bool mingw::link(ArrayRef argsArr, raw_ostream &diag) { } } + if (errorCount()) + return false; + if (args.hasArg(OPT_verbose) || args.hasArg(OPT__HASH_HASH_HASH)) outs() << llvm::join(linkArgs, " ") << "\n"; diff --git a/lld/test/COFF/Inputs/precompa/precomp.obj b/lld/test/COFF/Inputs/precompa/precomp.obj new file mode 100644 index 0000000000000..27709f567d2d9 Binary files /dev/null and b/lld/test/COFF/Inputs/precompa/precomp.obj differ diff --git a/lld/test/COFF/Inputs/precompa/useprecomp.obj b/lld/test/COFF/Inputs/precompa/useprecomp.obj new file mode 100644 index 0000000000000..c0275ca4b15a2 Binary files /dev/null and b/lld/test/COFF/Inputs/precompa/useprecomp.obj differ diff --git a/lld/test/COFF/Inputs/precompb/precomp.obj b/lld/test/COFF/Inputs/precompb/precomp.obj new file mode 100644 index 0000000000000..416c934302955 Binary files /dev/null and b/lld/test/COFF/Inputs/precompb/precomp.obj differ diff --git a/lld/test/COFF/Inputs/precompb/useprecomp.obj b/lld/test/COFF/Inputs/precompb/useprecomp.obj new file mode 100644 index 0000000000000..2bee9054070e1 Binary files /dev/null and b/lld/test/COFF/Inputs/precompb/useprecomp.obj differ diff --git a/lld/test/COFF/precomp-link-samename.test b/lld/test/COFF/precomp-link-samename.test new file mode 100644 index 0000000000000..f44abf289d867 --- /dev/null +++ b/lld/test/COFF/precomp-link-samename.test @@ -0,0 +1,36 @@ +RUN: lld-link %S/Inputs/precompb/useprecomp.obj %S/Inputs/precompa/precomp.obj %S/Inputs/precompb/precomp.obj \ +RUN: %S/Inputs/precompa/useprecomp.obj /nodefaultlib /entry:main /debug /pdb:%t.pdb /out:%t.exe \ +RUN: /summary | FileCheck %s -check-prefix SUMMARY + +RUN: llvm-pdbutil dump -types %t.pdb | FileCheck %s + + +CHECK: Types (TPI Stream) +CHECK-NOT: LF_PRECOMP +CHECK-NOT: LF_ENDPRECOMP + + +SUMMARY: Summary +SUMMARY-NEXT: -------------------------------------------------------------------------------- +SUMMARY-NEXT: 4 Input OBJ files (expanded from all cmd-line inputs) +SUMMARY-NEXT: 0 PDB type server dependencies +SUMMARY-NEXT: 2 Precomp OBJ dependencies + +// precompa/precomp.cpp +#include "precomp.h" + +// precompa/useprecomp.cpp +#include "precomp.h" +int main(int argc, char **argv) { return 0; } + +// precompa/precomp.h +int precompa_symbol = 42; + +// precompb/precomp.cpp +#include "precomp.h" + +// precompb/useprecomp.cpp +#include "precomp.h" + +// precompb/precomp.h +int precompb_symbol = 142; \ No newline at end of file diff --git a/lld/test/COFF/precomp-link.test b/lld/test/COFF/precomp-link.test index e0ab3006867a2..f94f8c204e9dd 100644 --- a/lld/test/COFF/precomp-link.test +++ b/lld/test/COFF/precomp-link.test @@ -9,10 +9,10 @@ RUN: lld-link %S/Inputs/precomp-a.obj %S/Inputs/precomp-invalid.obj %S/Inputs/pr RUN: not lld-link %S/Inputs/precomp-a.obj %S/Inputs/precomp-b.obj /nodefaultlib /entry:main /debug /pdb:%t.pdb /out:%t.exe /opt:ref /opt:icf 2>&1 | FileCheck %s -check-prefix FAILURE-MISSING-PRECOMPOBJ FAILURE: warning: Cannot use debug info for '{{.*}}precomp-invalid.obj' [LNK4099] -FAILURE-NEXT: failed to load reference '{{.*}}precomp.obj': The signature does not match; the file(s) might be out of date. +FAILURE-NEXT: failed to load reference '{{.*}}precomp.obj': No matching precompiled header could be located. FAILURE-MISSING-PRECOMPOBJ: warning: Cannot use debug info for '{{.*}}precomp-a.obj' [LNK4099] -FAILURE-MISSING-PRECOMPOBJ-NEXT: failed to load reference '{{.*}}precomp.obj': The path to this file must be provided on the command-line +FAILURE-MISSING-PRECOMPOBJ-NEXT: failed to load reference '{{.*}}precomp.obj': No matching precompiled header could be located. CHECK: Types (TPI Stream) CHECK-NOT: LF_PRECOMP diff --git a/lld/test/ELF/compressed-debug-conflict.s b/lld/test/ELF/compressed-debug-conflict.s index d1113cb312bdf..1601a9d8e3146 100644 --- a/lld/test/ELF/compressed-debug-conflict.s +++ b/lld/test/ELF/compressed-debug-conflict.s @@ -13,9 +13,9 @@ # OBJ-NEXT: ] # ERROR: error: duplicate symbol: main -# ERROR-NEXT: >>> defined at reduced.c:2 (/tmp/reduced.c:2) +# ERROR-NEXT: >>> defined at reduced.c:2 ({{[/\\]}}tmp{{[/\\]}}reduced.c:2) # ERROR-NEXT: >>> -# ERROR-NEXT: >>> defined at reduced.c:2 (/tmp/reduced.c:2) +# ERROR-NEXT: >>> defined at reduced.c:2 ({{[/\\]}}tmp{{[/\\]}}reduced.c:2) # ERROR-NEXT: >>> .text diff --git a/lld/test/ELF/invalid/merge-invalid-size.s b/lld/test/ELF/invalid/merge-invalid-size.s index b16889a538a5c..71c3f98e75529 100644 --- a/lld/test/ELF/invalid/merge-invalid-size.s +++ b/lld/test/ELF/invalid/merge-invalid-size.s @@ -1,7 +1,7 @@ // REQUIRES: x86 // RUN: llvm-mc %s -o %t.o -filetype=obj -triple=x86_64-pc-linux // RUN: not ld.lld %t.o -o /dev/null 2>&1 | FileCheck %s -// CHECK: SHF_MERGE section size must be a multiple of sh_entsize +// CHECK: merge-invalid-size.s.tmp.o:(.foo): SHF_MERGE section size (2) must be a multiple of sh_entsize (4) .section .foo,"aM",@progbits,4 .short 42 diff --git a/lld/test/ELF/writable-merge.s b/lld/test/ELF/invalid/merge-writable.s similarity index 67% rename from lld/test/ELF/writable-merge.s rename to lld/test/ELF/invalid/merge-writable.s index 91a7e07d7ce53..0c5fe92481da0 100644 --- a/lld/test/ELF/writable-merge.s +++ b/lld/test/ELF/invalid/merge-writable.s @@ -1,7 +1,7 @@ // REQUIRES: x86 // RUN: llvm-mc %s -o %t.o -filetype=obj -triple=x86_64-pc-linux // RUN: not ld.lld %t.o -o /dev/null 2>&1 | FileCheck %s -// CHECK: writable SHF_MERGE section is not supported +// CHECK: merge-writable.s.tmp.o:(.foo): writable SHF_MERGE section is not supported .section .foo,"awM",@progbits,4 .quad 0 diff --git a/lld/test/MinGW/driver.test b/lld/test/MinGW/driver.test index b16c814d8a3b5..cf3a223adb922 100644 --- a/lld/test/MinGW/driver.test +++ b/lld/test/MinGW/driver.test @@ -214,3 +214,12 @@ HELP: --enable-auto-import RUN: ld.lld -### -m i386pep foo.o -delayload user32.dll --delayload shell32.dll | FileCheck -check-prefix DELAYLOAD %s RUN: ld.lld -### -m i386pep foo.o -delayload=user32.dll --delayload=shell32.dll | FileCheck -check-prefix DELAYLOAD %s DELAYLOAD: -delayload:user32.dll -delayload:shell32.dll + +RUN: not ld.lld -m i386pep -entry 2>&1 | FileCheck -check-prefix MISSING_ARG %s +MISSING_ARG: error: -entry: missing argument + +RUN: not ld.lld -m i386pep --foo 2>&1 | FileCheck -check-prefix UNKNOWN_ARG %s +UNKNOWN_ARG: error: unknown argument: --foo + +RUN: not ld.lld -m i386pep 2>&1 | FileCheck -check-prefix NO_INPUT_FILES %s +NO_INPUT_FILES: error: no input files diff --git a/lld/test/MinGW/lib.test b/lld/test/MinGW/lib.test index 56104d6d907a2..a2bd091bc691e 100644 --- a/lld/test/MinGW/lib.test +++ b/lld/test/MinGW/lib.test @@ -7,6 +7,13 @@ RUN: echo > %t/lib/libfoo.dll.a RUN: ld.lld -### -m i386pep -lfoo -L%t/lib | FileCheck -check-prefix=LIB2 %s LIB2: libfoo.dll.a +RUN: not ld.lld -### -m i386pep -l:barefilename -L%t/lib 2>&1 | FileCheck -check-prefix=LIB-LITERAL-FAIL %s +LIB-LITERAL-FAIL: unable to find library -l:barefilename + +RUN: echo > %t/lib/barefilename +RUN: ld.lld -### -m i386pep -l:barefilename -L%t/lib 2>&1 | FileCheck -check-prefix=LIB-LITERAL %s +LIB-LITERAL: barefilename + RUN: not ld.lld -### -m i386pep -Bstatic -lfoo -L%t/lib 2>&1 | FileCheck -check-prefix=LIB3 %s LIB3: unable to find library -lfoo @@ -19,3 +26,16 @@ RUN: echo > %t/lib/libbar.a RUN: ld.lld -### -m i386pep -Bstatic -lfoo -Bdynamic -lbar -L%t/lib | FileCheck -check-prefix=LIB5 %s LIB5: libfoo.a LIB5-SAME: libbar.dll.a + +RUN: echo > %t/lib/noprefix.dll.a +RUN: echo > %t/lib/msvcstyle.lib +RUN: ld.lld -### -m i386pep -L%t/lib -lnoprefix -lmsvcstyle | FileCheck -check-prefix=OTHERSTYLES %s +OTHERSTYLES: noprefix.dll.a +OTHERSTYLES-SAME: msvcstyle.lib + +RUN: echo > %t/lib/libnoimplib.dll +RUN: echo > %t/lib/noprefix_noimplib.dll +RUN: not ld.lld -### -m i386pep -L%t/lib -lnoimplib 2>&1 | FileCheck -check-prefix=UNSUPPORTED-DLL1 %s +RUN: not ld.lld -### -m i386pep -L%t/lib -lnoprefix_noimplib 2>&1 | FileCheck -check-prefix=UNSUPPORTED-DLL2 %s +UNSUPPORTED-DLL1: lld doesn't support linking directly against {{.*}}libnoimplib.dll, use an import library +UNSUPPORTED-DLL2: lld doesn't support linking directly against {{.*}}noprefix_noimplib.dll, use an import library diff --git a/lld/wasm/Driver.cpp b/lld/wasm/Driver.cpp index 34801c1b50e12..d795221200b0b 100644 --- a/lld/wasm/Driver.cpp +++ b/lld/wasm/Driver.cpp @@ -37,10 +37,9 @@ using namespace llvm::object; using namespace llvm::sys; using namespace llvm::wasm; -using namespace lld; -using namespace lld::wasm; - -Configuration *lld::wasm::config; +namespace lld { +namespace wasm { +Configuration *config; namespace { @@ -79,8 +78,7 @@ class LinkerDriver { }; } // anonymous namespace -bool lld::wasm::link(ArrayRef args, bool canExitEarly, - raw_ostream &error) { +bool link(ArrayRef args, bool canExitEarly, raw_ostream &error) { errorHandler().logName = args::getFilenameWithoutExe(args[0]); errorHandler().errorOS = &error; errorHandler().errorLimitExceededMsg = @@ -787,3 +785,6 @@ void LinkerDriver::link(ArrayRef argsArr) { // Write the result to the file. writeResult(); } + +} // namespace wasm +} // namespace lld diff --git a/lld/wasm/InputChunks.cpp b/lld/wasm/InputChunks.cpp index 9bd75df80f572..99a393e8ef959 100644 --- a/lld/wasm/InputChunks.cpp +++ b/lld/wasm/InputChunks.cpp @@ -19,10 +19,9 @@ using namespace llvm; using namespace llvm::wasm; using namespace llvm::support::endian; -using namespace lld; -using namespace lld::wasm; -StringRef lld::relocTypeToString(uint8_t relocType) { +namespace lld { +StringRef relocTypeToString(uint8_t relocType) { switch (relocType) { #define WASM_RELOC(NAME, REL) \ case REL: \ @@ -33,10 +32,11 @@ StringRef lld::relocTypeToString(uint8_t relocType) { llvm_unreachable("unknown reloc type"); } -std::string lld::toString(const InputChunk *c) { +std::string toString(const wasm::InputChunk *c) { return (toString(c->file) + ":(" + c->getName() + ")").str(); } +namespace wasm { StringRef InputChunk::getComdatName() const { uint32_t index = getComdat(); if (index == UINT32_MAX) @@ -346,3 +346,6 @@ void InputSegment::generateRelocationCode(raw_ostream &os) const { writeUleb128(os, 0, "offset"); } } + +} // namespace wasm +} // namespace lld diff --git a/lld/wasm/InputFiles.cpp b/lld/wasm/InputFiles.cpp index 198ac1c41a77b..ca7546b97331d 100644 --- a/lld/wasm/InputFiles.cpp +++ b/lld/wasm/InputFiles.cpp @@ -22,16 +22,27 @@ #define DEBUG_TYPE "lld" -using namespace lld; -using namespace lld::wasm; - using namespace llvm; using namespace llvm::object; using namespace llvm::wasm; -std::unique_ptr lld::wasm::tar; +namespace lld { + +// Returns a string in the format of "foo.o" or "foo.a(bar.o)". +std::string toString(const wasm::InputFile *file) { + if (!file) + return ""; + + if (file->archiveName.empty()) + return file->getName(); + + return (file->archiveName + "(" + file->getName() + ")").str(); +} -Optional lld::wasm::readFile(StringRef path) { +namespace wasm { +std::unique_ptr tar; + +Optional readFile(StringRef path) { log("Loading: " + path); auto mbOrErr = MemoryBuffer::getFile(path); @@ -48,7 +59,7 @@ Optional lld::wasm::readFile(StringRef path) { return mbref; } -InputFile *lld::wasm::createObjectFile(MemoryBufferRef mb, +InputFile *createObjectFile(MemoryBufferRef mb, StringRef archiveName) { file_magic magic = identify_magic(mb.getBuffer()); if (magic == file_magic::wasm_object) { @@ -542,13 +553,5 @@ void BitcodeFile::parse() { symbols.push_back(createBitcodeSymbol(keptComdats, objSym, *this)); } -// Returns a string in the format of "foo.o" or "foo.a(bar.o)". -std::string lld::toString(const wasm::InputFile *file) { - if (!file) - return ""; - - if (file->archiveName.empty()) - return file->getName(); - - return (file->archiveName + "(" + file->getName() + ")").str(); -} +} // namespace wasm +} // namespace lld diff --git a/lld/wasm/LTO.cpp b/lld/wasm/LTO.cpp index 98584d94cbd52..de234365dabf0 100644 --- a/lld/wasm/LTO.cpp +++ b/lld/wasm/LTO.cpp @@ -36,9 +36,9 @@ #include using namespace llvm; -using namespace lld; -using namespace lld::wasm; +namespace lld { +namespace wasm { static std::unique_ptr createLTO() { lto::Config c; c.Options = initTargetOptionsFromCodeGenFlags(); @@ -165,3 +165,6 @@ std::vector BitcodeCompiler::compile() { return ret; } + +} // namespace wasm +} // namespace lld diff --git a/lld/wasm/MarkLive.cpp b/lld/wasm/MarkLive.cpp index 98267d1cd8de7..6906f86f7150a 100644 --- a/lld/wasm/MarkLive.cpp +++ b/lld/wasm/MarkLive.cpp @@ -31,38 +31,52 @@ using namespace llvm; using namespace llvm::wasm; -void lld::wasm::markLive() { - if (!config->gcSections) - return; +namespace lld { +namespace wasm { - LLVM_DEBUG(dbgs() << "markLive\n"); - SmallVector q; - - std::function enqueue = [&](Symbol *sym) { - if (!sym || sym->isLive()) - return; - LLVM_DEBUG(dbgs() << "markLive: " << sym->getName() << "\n"); - sym->markLive(); - if (InputChunk *chunk = sym->getChunk()) - q.push_back(chunk); - - // The ctor functions are all referenced by the synthetic callCtors - // function. However, this function does not contain relocations so we - // have to manually mark the ctors as live if callCtors itself is live. - if (sym == WasmSym::callCtors) { - if (config->isPic) - enqueue(WasmSym::applyRelocs); - for (const ObjFile *obj : symtab->objectFiles) { - const WasmLinkingData &l = obj->getWasmObj()->linkingData(); - for (const WasmInitFunc &f : l.InitFunctions) { - auto* initSym = obj->getFunctionSymbol(f.Symbol); - if (!initSym->isDiscarded()) - enqueue(initSym); - } +namespace { + +class MarkLive { +public: + void run(); + +private: + void enqueue(Symbol *sym); + void markSymbol(Symbol *sym); + void mark(); + + // A list of chunks to visit. + SmallVector queue; +}; + +} // namespace + +void MarkLive::enqueue(Symbol *sym) { + if (!sym || sym->isLive()) + return; + LLVM_DEBUG(dbgs() << "markLive: " << sym->getName() << "\n"); + sym->markLive(); + if (InputChunk *chunk = sym->getChunk()) + queue.push_back(chunk); + + // The ctor functions are all referenced by the synthetic callCtors + // function. However, this function does not contain relocations so we + // have to manually mark the ctors as live if callCtors itself is live. + if (sym == WasmSym::callCtors) { + if (config->isPic) + enqueue(WasmSym::applyRelocs); + for (const ObjFile *obj : symtab->objectFiles) { + const WasmLinkingData &l = obj->getWasmObj()->linkingData(); + for (const WasmInitFunc &f : l.InitFunctions) { + auto* initSym = obj->getFunctionSymbol(f.Symbol); + if (!initSym->isDiscarded()) + enqueue(initSym); } } - }; + } +} +void MarkLive::run() { // Add GC root symbols. if (!config->entry.empty()) enqueue(symtab->find(config->entry)); @@ -87,9 +101,13 @@ void lld::wasm::markLive() { if (config->sharedMemory && !config->shared) enqueue(WasmSym::initMemory); + mark(); +} + +void MarkLive::mark() { // Follow relocations to mark all reachable chunks. - while (!q.empty()) { - InputChunk *c = q.pop_back_val(); + while (!queue.empty()) { + InputChunk *c = queue.pop_back_val(); for (const WasmRelocation reloc : c->getRelocations()) { if (reloc.Type == R_WASM_TYPE_INDEX_LEB) @@ -113,6 +131,16 @@ void lld::wasm::markLive() { enqueue(sym); } } +} + +void markLive() { + if (!config->gcSections) + return; + + LLVM_DEBUG(dbgs() << "markLive\n"); + + MarkLive marker; + marker.run(); // Report garbage-collected sections. if (config->printGcSections) { @@ -138,3 +166,6 @@ void lld::wasm::markLive() { message("removing unused section " + toString(g)); } } + +} // namespace wasm +} // namespace lld diff --git a/lld/wasm/OutputSections.cpp b/lld/wasm/OutputSections.cpp index c019d89e447ba..469a51a0f0685 100644 --- a/lld/wasm/OutputSections.cpp +++ b/lld/wasm/OutputSections.cpp @@ -20,9 +20,17 @@ using namespace llvm; using namespace llvm::wasm; -using namespace lld; -using namespace lld::wasm; +namespace lld { + +// Returns a string, e.g. "FUNCTION(.text)". +std::string toString(const wasm::OutputSection &sec) { + if (!sec.name.empty()) + return (sec.getSectionName() + "(" + sec.name + ")").str(); + return sec.getSectionName(); +} + +namespace wasm { static StringRef sectionTypeToString(uint32_t sectionType) { switch (sectionType) { case WASM_SEC_CUSTOM: @@ -58,13 +66,6 @@ static StringRef sectionTypeToString(uint32_t sectionType) { } } -// Returns a string, e.g. "FUNCTION(.text)". -std::string lld::toString(const OutputSection &sec) { - if (!sec.name.empty()) - return (sec.getSectionName() + "(" + sec.name + ")").str(); - return sec.getSectionName(); -} - StringRef OutputSection::getSectionName() const { return sectionTypeToString(type); } @@ -248,3 +249,6 @@ void CustomSection::writeRelocations(raw_ostream &os) const { for (const InputSection *s : inputSections) s->writeRelocations(os); } + +} // namespace wasm +} // namespace lld diff --git a/lld/wasm/Relocations.cpp b/lld/wasm/Relocations.cpp index 018fea9a9c898..006a6662cafce 100644 --- a/lld/wasm/Relocations.cpp +++ b/lld/wasm/Relocations.cpp @@ -14,9 +14,8 @@ using namespace llvm; using namespace llvm::wasm; -using namespace lld; -using namespace lld::wasm; - +namespace lld { +namespace wasm { static bool requiresGOTAccess(const Symbol *sym) { return config->isPic && !sym->isHidden() && !sym->isLocal(); } @@ -54,7 +53,7 @@ static void addGOTEntry(Symbol *sym) { out.globalSec->addStaticGOTEntry(sym); } -void lld::wasm::scanRelocations(InputChunk *chunk) { +void scanRelocations(InputChunk *chunk) { if (!chunk->live) return; ObjFile *file = chunk->file; @@ -113,3 +112,6 @@ void lld::wasm::scanRelocations(InputChunk *chunk) { } } + +} // namespace wasm +} // namespace lld diff --git a/lld/wasm/SymbolTable.cpp b/lld/wasm/SymbolTable.cpp index b53ddaf575013..92069eab54da2 100644 --- a/lld/wasm/SymbolTable.cpp +++ b/lld/wasm/SymbolTable.cpp @@ -21,10 +21,10 @@ using namespace llvm; using namespace llvm::wasm; using namespace llvm::object; -using namespace lld; -using namespace lld::wasm; -SymbolTable *lld::wasm::symtab; +namespace lld { +namespace wasm { +SymbolTable *symtab; void SymbolTable::addFile(InputFile *file) { log("Processing: " + toString(file)); @@ -692,3 +692,6 @@ void SymbolTable::handleSymbolVariants() { } } } + +} // namespace wasm +} // namespace lld diff --git a/lld/wasm/Symbols.cpp b/lld/wasm/Symbols.cpp index d36e33adf2d07..ae3736021c6b7 100644 --- a/lld/wasm/Symbols.cpp +++ b/lld/wasm/Symbols.cpp @@ -21,9 +21,45 @@ using namespace llvm; using namespace llvm::wasm; -using namespace lld; -using namespace lld::wasm; +namespace lld { +std::string toString(const wasm::Symbol &sym) { + return maybeDemangleSymbol(sym.getName()); +} + +std::string maybeDemangleSymbol(StringRef name) { + if (wasm::config->demangle) + return demangleItanium(name); + return name; +} + +std::string toString(wasm::Symbol::Kind kind) { + switch (kind) { + case wasm::Symbol::DefinedFunctionKind: + return "DefinedFunction"; + case wasm::Symbol::DefinedDataKind: + return "DefinedData"; + case wasm::Symbol::DefinedGlobalKind: + return "DefinedGlobal"; + case wasm::Symbol::DefinedEventKind: + return "DefinedEvent"; + case wasm::Symbol::UndefinedFunctionKind: + return "UndefinedFunction"; + case wasm::Symbol::UndefinedDataKind: + return "UndefinedData"; + case wasm::Symbol::UndefinedGlobalKind: + return "UndefinedGlobal"; + case wasm::Symbol::LazyKind: + return "LazyKind"; + case wasm::Symbol::SectionKind: + return "SectionKind"; + case wasm::Symbol::OutputSectionKind: + return "OutputSectionKind"; + } + llvm_unreachable("invalid symbol kind"); +} + +namespace wasm { DefinedFunction *WasmSym::callCtors; DefinedFunction *WasmSym::initMemory; DefinedFunction *WasmSym::applyRelocs; @@ -298,49 +334,12 @@ const OutputSectionSymbol *SectionSymbol::getOutputSectionSymbol() const { void LazySymbol::fetch() { cast(file)->addMember(&archiveSymbol); } -std::string lld::toString(const wasm::Symbol &sym) { - return lld::maybeDemangleSymbol(sym.getName()); -} - -std::string lld::maybeDemangleSymbol(StringRef name) { - if (config->demangle) - return demangleItanium(name); - return name; -} - -std::string lld::toString(wasm::Symbol::Kind kind) { - switch (kind) { - case wasm::Symbol::DefinedFunctionKind: - return "DefinedFunction"; - case wasm::Symbol::DefinedDataKind: - return "DefinedData"; - case wasm::Symbol::DefinedGlobalKind: - return "DefinedGlobal"; - case wasm::Symbol::DefinedEventKind: - return "DefinedEvent"; - case wasm::Symbol::UndefinedFunctionKind: - return "UndefinedFunction"; - case wasm::Symbol::UndefinedDataKind: - return "UndefinedData"; - case wasm::Symbol::UndefinedGlobalKind: - return "UndefinedGlobal"; - case wasm::Symbol::LazyKind: - return "LazyKind"; - case wasm::Symbol::SectionKind: - return "SectionKind"; - case wasm::Symbol::OutputSectionKind: - return "OutputSectionKind"; - } - llvm_unreachable("invalid symbol kind"); -} - - -void lld::wasm::printTraceSymbolUndefined(StringRef name, const InputFile* file) { +void printTraceSymbolUndefined(StringRef name, const InputFile* file) { message(toString(file) + ": reference to " + name); } // Print out a log message for --trace-symbol. -void lld::wasm::printTraceSymbol(Symbol *sym) { +void printTraceSymbol(Symbol *sym) { // Undefined symbols are traced via printTraceSymbolUndefined if (sym->isUndefined()) return; @@ -354,5 +353,8 @@ void lld::wasm::printTraceSymbol(Symbol *sym) { message(toString(sym->getFile()) + s + sym->getName()); } -const char *lld::wasm::defaultModule = "env"; -const char *lld::wasm::functionTableName = "__indirect_function_table"; +const char *defaultModule = "env"; +const char *functionTableName = "__indirect_function_table"; + +} // namespace wasm +} // namespace lld diff --git a/lld/wasm/SyntheticSections.cpp b/lld/wasm/SyntheticSections.cpp index 7c3ea321232fc..b00bd0ce9d062 100644 --- a/lld/wasm/SyntheticSections.cpp +++ b/lld/wasm/SyntheticSections.cpp @@ -22,10 +22,10 @@ using namespace llvm; using namespace llvm::wasm; -using namespace lld; -using namespace lld::wasm; +namespace lld { +namespace wasm { -OutStruct lld::wasm::out; +OutStruct out; namespace { @@ -567,3 +567,6 @@ void RelocSection::writeBody() { writeUleb128(bodyOutputStream, count, "reloc count"); sec->writeRelocations(bodyOutputStream); } + +} // namespace wasm +} // namespace lld diff --git a/lld/wasm/Writer.cpp b/lld/wasm/Writer.cpp index d841d21b79825..479b44b53fbee 100644 --- a/lld/wasm/Writer.cpp +++ b/lld/wasm/Writer.cpp @@ -39,9 +39,9 @@ using namespace llvm; using namespace llvm::wasm; -using namespace lld; -using namespace lld::wasm; +namespace lld { +namespace wasm { static constexpr int stackAlignment = 16; namespace { @@ -1088,4 +1088,7 @@ void Writer::createHeader() { fileSize += header.size(); } -void lld::wasm::writeResult() { Writer().run(); } +void writeResult() { Writer().run(); } + +} // namespace wasm +} // namespace lld diff --git a/lld/wasm/WriterUtils.cpp b/lld/wasm/WriterUtils.cpp index bad221241be53..00e92a6ddba30 100644 --- a/lld/wasm/WriterUtils.cpp +++ b/lld/wasm/WriterUtils.cpp @@ -18,50 +18,94 @@ using namespace llvm; using namespace llvm::wasm; namespace lld { +std::string toString(ValType type) { + switch (type) { + case ValType::I32: + return "i32"; + case ValType::I64: + return "i64"; + case ValType::F32: + return "f32"; + case ValType::F64: + return "f64"; + case ValType::V128: + return "v128"; + case ValType::EXNREF: + return "exnref"; + } + llvm_unreachable("Invalid wasm::ValType"); +} + +std::string toString(const WasmSignature &sig) { + SmallString<128> s("("); + for (ValType type : sig.Params) { + if (s.size() != 1) + s += ", "; + s += toString(type); + } + s += ") -> "; + if (sig.Returns.empty()) + s += "void"; + else + s += toString(sig.Returns[0]); + return s.str(); +} -void wasm::debugWrite(uint64_t offset, const Twine &msg) { +std::string toString(const WasmGlobalType &type) { + return (type.Mutable ? "var " : "const ") + + toString(static_cast(type.Type)); +} + +std::string toString(const WasmEventType &type) { + if (type.Attribute == WASM_EVENT_ATTRIBUTE_EXCEPTION) + return "exception"; + return "unknown"; +} + +namespace wasm { +void debugWrite(uint64_t offset, const Twine &msg) { LLVM_DEBUG(dbgs() << format(" | %08lld: ", offset) << msg << "\n"); } -void wasm::writeUleb128(raw_ostream &os, uint32_t number, const Twine &msg) { +void writeUleb128(raw_ostream &os, uint32_t number, const Twine &msg) { debugWrite(os.tell(), msg + "[" + utohexstr(number) + "]"); encodeULEB128(number, os); } -void wasm::writeSleb128(raw_ostream &os, int32_t number, const Twine &msg) { +void writeSleb128(raw_ostream &os, int32_t number, const Twine &msg) { debugWrite(os.tell(), msg + "[" + utohexstr(number) + "]"); encodeSLEB128(number, os); } -void wasm::writeBytes(raw_ostream &os, const char *bytes, size_t count, +void writeBytes(raw_ostream &os, const char *bytes, size_t count, const Twine &msg) { debugWrite(os.tell(), msg + " [data[" + Twine(count) + "]]"); os.write(bytes, count); } -void wasm::writeStr(raw_ostream &os, StringRef string, const Twine &msg) { +void writeStr(raw_ostream &os, StringRef string, const Twine &msg) { debugWrite(os.tell(), msg + " [str[" + Twine(string.size()) + "]: " + string + "]"); encodeULEB128(string.size(), os); os.write(string.data(), string.size()); } -void wasm::writeU8(raw_ostream &os, uint8_t byte, const Twine &msg) { +void writeU8(raw_ostream &os, uint8_t byte, const Twine &msg) { debugWrite(os.tell(), msg + " [0x" + utohexstr(byte) + "]"); os << byte; } -void wasm::writeU32(raw_ostream &os, uint32_t number, const Twine &msg) { +void writeU32(raw_ostream &os, uint32_t number, const Twine &msg) { debugWrite(os.tell(), msg + "[0x" + utohexstr(number) + "]"); support::endian::write(os, number, support::little); } -void wasm::writeValueType(raw_ostream &os, ValType type, const Twine &msg) { +void writeValueType(raw_ostream &os, ValType type, const Twine &msg) { writeU8(os, static_cast(type), msg + "[type: " + toString(type) + "]"); } -void wasm::writeSig(raw_ostream &os, const WasmSignature &sig) { +void writeSig(raw_ostream &os, const WasmSignature &sig) { writeU8(os, WASM_TYPE_FUNC, "signature type"); writeUleb128(os, sig.Params.size(), "param Count"); for (ValType paramType : sig.Params) { @@ -73,22 +117,22 @@ void wasm::writeSig(raw_ostream &os, const WasmSignature &sig) { } } -void wasm::writeI32Const(raw_ostream &os, int32_t number, const Twine &msg) { +void writeI32Const(raw_ostream &os, int32_t number, const Twine &msg) { writeU8(os, WASM_OPCODE_I32_CONST, "i32.const"); writeSleb128(os, number, msg); } -void wasm::writeI64Const(raw_ostream &os, int32_t number, const Twine &msg) { +void writeI64Const(raw_ostream &os, int32_t number, const Twine &msg) { writeU8(os, WASM_OPCODE_I64_CONST, "i64.const"); writeSleb128(os, number, msg); } -void wasm::writeMemArg(raw_ostream &os, uint32_t alignment, uint32_t offset) { +void writeMemArg(raw_ostream &os, uint32_t alignment, uint32_t offset) { writeUleb128(os, alignment, "alignment"); writeUleb128(os, offset, "offset"); } -void wasm::writeInitExpr(raw_ostream &os, const WasmInitExpr &initExpr) { +void writeInitExpr(raw_ostream &os, const WasmInitExpr &initExpr) { writeU8(os, initExpr.Opcode, "opcode"); switch (initExpr.Opcode) { case WASM_OPCODE_I32_CONST: @@ -106,39 +150,39 @@ void wasm::writeInitExpr(raw_ostream &os, const WasmInitExpr &initExpr) { writeU8(os, WASM_OPCODE_END, "opcode:end"); } -void wasm::writeLimits(raw_ostream &os, const WasmLimits &limits) { +void writeLimits(raw_ostream &os, const WasmLimits &limits) { writeU8(os, limits.Flags, "limits flags"); writeUleb128(os, limits.Initial, "limits initial"); if (limits.Flags & WASM_LIMITS_FLAG_HAS_MAX) writeUleb128(os, limits.Maximum, "limits max"); } -void wasm::writeGlobalType(raw_ostream &os, const WasmGlobalType &type) { +void writeGlobalType(raw_ostream &os, const WasmGlobalType &type) { // TODO: Update WasmGlobalType to use ValType and remove this cast. writeValueType(os, ValType(type.Type), "global type"); writeU8(os, type.Mutable, "global mutable"); } -void wasm::writeGlobal(raw_ostream &os, const WasmGlobal &global) { +void writeGlobal(raw_ostream &os, const WasmGlobal &global) { writeGlobalType(os, global.Type); writeInitExpr(os, global.InitExpr); } -void wasm::writeEventType(raw_ostream &os, const WasmEventType &type) { +void writeEventType(raw_ostream &os, const WasmEventType &type) { writeUleb128(os, type.Attribute, "event attribute"); writeUleb128(os, type.SigIndex, "sig index"); } -void wasm::writeEvent(raw_ostream &os, const WasmEvent &event) { +void writeEvent(raw_ostream &os, const WasmEvent &event) { writeEventType(os, event.Type); } -void wasm::writeTableType(raw_ostream &os, const llvm::wasm::WasmTable &type) { +void writeTableType(raw_ostream &os, const llvm::wasm::WasmTable &type) { writeU8(os, WASM_TYPE_FUNCREF, "table type"); writeLimits(os, type.Limits); } -void wasm::writeImport(raw_ostream &os, const WasmImport &import) { +void writeImport(raw_ostream &os, const WasmImport &import) { writeStr(os, import.Module, "import module name"); writeStr(os, import.Field, "import field name"); writeU8(os, import.Kind, "import kind"); @@ -163,7 +207,7 @@ void wasm::writeImport(raw_ostream &os, const WasmImport &import) { } } -void wasm::writeExport(raw_ostream &os, const WasmExport &export_) { +void writeExport(raw_ostream &os, const WasmExport &export_) { writeStr(os, export_.Name, "export name"); writeU8(os, export_.Kind, "export kind"); switch (export_.Kind) { @@ -183,48 +227,6 @@ void wasm::writeExport(raw_ostream &os, const WasmExport &export_) { fatal("unsupported export type: " + Twine(export_.Kind)); } } -} // namespace lld - -std::string lld::toString(ValType type) { - switch (type) { - case ValType::I32: - return "i32"; - case ValType::I64: - return "i64"; - case ValType::F32: - return "f32"; - case ValType::F64: - return "f64"; - case ValType::V128: - return "v128"; - case ValType::EXNREF: - return "exnref"; - } - llvm_unreachable("Invalid wasm::ValType"); -} - -std::string lld::toString(const WasmSignature &sig) { - SmallString<128> s("("); - for (ValType type : sig.Params) { - if (s.size() != 1) - s += ", "; - s += toString(type); - } - s += ") -> "; - if (sig.Returns.empty()) - s += "void"; - else - s += toString(sig.Returns[0]); - return s.str(); -} - -std::string lld::toString(const WasmGlobalType &type) { - return (type.Mutable ? "var " : "const ") + - toString(static_cast(type.Type)); -} -std::string lld::toString(const WasmEventType &type) { - if (type.Attribute == WASM_EVENT_ATTRIBUTE_EXCEPTION) - return "exception"; - return "unknown"; -} +} // namespace wasm +} // namespace lld diff --git a/lldb/CMakeLists.txt b/lldb/CMakeLists.txt index 58a7ef1a9a785..3df95e2f06c9f 100644 --- a/lldb/CMakeLists.txt +++ b/lldb/CMakeLists.txt @@ -190,72 +190,73 @@ if(LLDB_INCLUDE_TESTS) endif() if (NOT LLDB_DISABLE_PYTHON) - if(NOT LLDB_BUILD_FRAMEWORK) - set(use_python_wrapper_from_src_dir -m) - endif() - if(LLDB_USE_SYSTEM_SIX) - set(use_six_py_from_system --useSystemSix) - endif() - get_target_property(lldb_scripts_dir swig_wrapper BINARY_DIR) - get_target_property(liblldb_build_dir liblldb LIBRARY_OUTPUT_DIRECTORY) - - if(LLDB_BUILD_FRAMEWORK) - set(lldb_python_build_path "${liblldb_build_dir}/LLDB.framework/Resources/Python/lldb") - else() - set(lldb_python_build_path "${CMAKE_BINARY_DIR}/${LLDB_PYTHON_RELATIVE_PATH}/lldb") - endif() + if(NOT LLDB_BUILD_FRAMEWORK) + set(use_python_wrapper_from_src_dir -m) + endif() + if(LLDB_USE_SYSTEM_SIX) + set(use_six_py_from_system --useSystemSix) + endif() + get_target_property(lldb_scripts_dir swig_wrapper BINARY_DIR) + get_target_property(liblldb_build_dir liblldb LIBRARY_OUTPUT_DIRECTORY) - # Add a Post-Build Event to copy over Python files and create the symlink - # to liblldb.so for the Python API(hardlink on Windows). - add_custom_target(finish_swig ALL - COMMAND - ${PYTHON_EXECUTABLE} ${LLDB_SOURCE_DIR}/scripts/finishSwigWrapperClasses.py - --srcRoot=${LLDB_SOURCE_DIR} - --targetDir=${liblldb_build_dir} - --cfgBldDir=${lldb_scripts_dir} - --prefix=${CMAKE_BINARY_DIR} - --cmakeBuildConfiguration=${CMAKE_CFG_INTDIR} - --lldbLibDir=lib${LLVM_LIBDIR_SUFFIX} - --lldbPythonPath=${lldb_python_build_path} - ${use_python_wrapper_from_src_dir} - ${use_six_py_from_system} - VERBATIM - DEPENDS ${LLDB_SOURCE_DIR}/scripts/finishSwigWrapperClasses.py - DEPENDS ${lldb_scripts_dir}/lldb.py - COMMENT "Python script sym-linking LLDB Python API") - - add_dependencies(finish_swig swig_wrapper liblldb lldb-argdumper) - set_target_properties(finish_swig swig_wrapper PROPERTIES FOLDER "lldb misc") - - # Ensure we do the python post-build step when building lldb. - add_dependencies(lldb finish_swig) - - if(NOT LLDB_BUILD_FRAMEWORK) - # Install the LLDB python module - add_custom_target(lldb-python-scripts) - add_dependencies(lldb-python-scripts finish_swig) - install(DIRECTORY ${CMAKE_BINARY_DIR}/${LLDB_PYTHON_RELATIVE_PATH}/ - DESTINATION ${LLDB_PYTHON_RELATIVE_PATH} - COMPONENT lldb-python-scripts) - if (NOT LLVM_ENABLE_IDE) - add_llvm_install_targets(install-lldb-python-scripts - COMPONENT lldb-python-scripts - DEPENDS lldb-python-scripts) - endif() + if(LLDB_BUILD_FRAMEWORK) + set(lldb_python_build_path "${liblldb_build_dir}/LLDB.framework/Resources/Python/lldb") + else() + set(lldb_python_build_path "${CMAKE_BINARY_DIR}/${CMAKE_CFG_INTDIR}/${LLDB_PYTHON_RELATIVE_PATH}/lldb") + endif() + get_filename_component(lldb_python_build_path ${lldb_python_build_path} ABSOLUTE) + + # Add a Post-Build Event to copy over Python files and create the symlink + # to liblldb.so for the Python API(hardlink on Windows). + add_custom_target(finish_swig ALL + COMMAND + ${PYTHON_EXECUTABLE} ${LLDB_SOURCE_DIR}/scripts/finishSwigWrapperClasses.py + --srcRoot=${LLDB_SOURCE_DIR} + --targetDir=${liblldb_build_dir} + --cfgBldDir=${lldb_scripts_dir} + --prefix=${CMAKE_BINARY_DIR} + --cmakeBuildConfiguration=${CMAKE_CFG_INTDIR} + --lldbLibDir=lib${LLVM_LIBDIR_SUFFIX} + --lldbPythonPath=${lldb_python_build_path} + ${use_python_wrapper_from_src_dir} + ${use_six_py_from_system} + VERBATIM + DEPENDS ${LLDB_SOURCE_DIR}/scripts/finishSwigWrapperClasses.py + DEPENDS ${lldb_scripts_dir}/lldb.py + COMMENT "Python script sym-linking LLDB Python API") + + add_dependencies(finish_swig swig_wrapper liblldb lldb-argdumper) + set_target_properties(finish_swig swig_wrapper PROPERTIES FOLDER "lldb misc") + + # Ensure we do the python post-build step when building lldb. + add_dependencies(lldb finish_swig) + + if(NOT LLDB_BUILD_FRAMEWORK) + # Install the LLDB python module + add_custom_target(lldb-python-scripts) + add_dependencies(lldb-python-scripts finish_swig) + install(DIRECTORY ${CMAKE_BINARY_DIR}/${LLDB_PYTHON_RELATIVE_PATH}/ + DESTINATION ${LLDB_PYTHON_RELATIVE_PATH} + COMPONENT lldb-python-scripts) + if (NOT LLVM_ENABLE_IDE) + add_llvm_install_targets(install-lldb-python-scripts + COMPONENT lldb-python-scripts + DEPENDS lldb-python-scripts) endif() + endif() - # Add a Post-Build Event to copy the custom Python DLL to the lldb binaries dir so that Windows can find it when launching - # lldb.exe or any other executables that were linked with liblldb. - if (WIN32 AND NOT "${PYTHON_DLL}" STREQUAL "") - # When using the Visual Studio CMake generator the lldb binaries end up in Release/bin, Debug/bin etc. - file(TO_NATIVE_PATH "${CMAKE_BINARY_DIR}/${CMAKE_CFG_INTDIR}/bin" LLDB_BIN_DIR) - file(TO_NATIVE_PATH "${PYTHON_DLL}" PYTHON_DLL_NATIVE_PATH) - add_custom_command( - TARGET finish_swig - POST_BUILD - COMMAND ${CMAKE_COMMAND} -E copy ${PYTHON_DLL_NATIVE_PATH} ${LLDB_BIN_DIR} VERBATIM - COMMENT "Copying Python DLL to LLDB binaries directory.") - endif () + # Add a Post-Build Event to copy the custom Python DLL to the lldb binaries dir so that Windows can find it when launching + # lldb.exe or any other executables that were linked with liblldb. + if (WIN32 AND NOT "${PYTHON_DLL}" STREQUAL "") + # When using the Visual Studio CMake generator the lldb binaries end up in Release/bin, Debug/bin etc. + file(TO_NATIVE_PATH "${CMAKE_BINARY_DIR}/${CMAKE_CFG_INTDIR}/bin" LLDB_BIN_DIR) + file(TO_NATIVE_PATH "${PYTHON_DLL}" PYTHON_DLL_NATIVE_PATH) + add_custom_command( + TARGET finish_swig + POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy ${PYTHON_DLL_NATIVE_PATH} ${LLDB_BIN_DIR} VERBATIM + COMMENT "Copying Python DLL to LLDB binaries directory.") + endif () endif () if(LLDB_BUILT_STANDALONE AND NOT LLVM_ENABLE_IDE) diff --git a/lldb/docs/resources/test.rst b/lldb/docs/resources/test.rst index 2305aefaa8445..df1f7129e0ac8 100644 --- a/lldb/docs/resources/test.rst +++ b/lldb/docs/resources/test.rst @@ -3,18 +3,22 @@ Testing The LLDB test suite consists of three different kinds of test: -* Python scripts located under ``lldb/packages/Python/lldbsuite``. - These are written using python's unittest2 testing framework. - -* Unit tests, located under ``lldb/unittests``. These are written in C++, +* Unit test. These are located under ``lldb/unittests`` and are written in C++ using googletest. +* Integration tests that test the debugger through the SB API. These are + located under ``lldb/packages/Python/lldbsuite`` and are written in Python + using ``dotest`` (LLDB's custom testing framework on top of unittest2). +* Integration tests that test the debugger through the command line. These are + locarted under `lldb/tests/Shell` and are written in a shell-style format + using FileCheck to verify its output. -* LIT tests, located under ``lldb/lit``. These use the LLVM Integrated Tester. - -Many of the tests are accompanied by a C (C++, ObjC, etc.) source file. Each test -first compiles the source file and then uses LLDB to debug the resulting executable. +All three test suites use the `LLVM Integrated Tester +`_ (lit) as their test driver. The +test suites can be run as a whole or separately. -The tests verify both the LLDB command line interface and the scripting API. +Many of the tests are accompanied by a C (C++, ObjC, etc.) source file. Each +test first compiles the source file and then uses LLDB to debug the resulting +executable. .. contents:: :local: @@ -54,23 +58,69 @@ built with a custom version of clang, do: Note that multiple ``-A`` and ``-C`` flags can be specified to ``LLDB_TEST_USER_ARGS``. +Running a Single Test Suite +--------------------------- + +Each test suite can be run separately, similar to running the whole test suite +with ``check-lldb``. + +* Use ``check-lldb-unit`` to run just the unit tests. +* Use ``check-lldb-api`` to run just the SB API tests. +* Use ``check-lldb-shell`` to run just the shell tests. + +You can run specific subdirectories by appending the directory name to the +target. For example, to run all the tests in ``ObjectFile``, you can use the +target ``check-lldb-shell-objectfile``. However, because the unit tests and API +tests don't actually live under ``lldb/test``, this convenience is only +available for the shell tests. + +Running a Single Test +--------------------- + +The recommended way to run a single test is by invoking the lit driver with a +filter. This ensures that the test is run with the same configuration as when +run as part of a test suite. + +:: + + > ./bin/llvm-lit -sv lldb/test --filter + + +Because lit automatically scans a directory for tests, it's also possible to +pass a subdirectory to run a specific subset of the tests. + +:: + + > ./bin/llvm-lit -sv tools/lldb/test/Shell/Commands/CommandScriptImmediateOutput + + +For the SB API tests it is possible to forward arguments to ``dotest.py`` by +passing ``--param`` to lit and setting a value for ``dotest-args``. + +:: -Running a Specific Test or Set of Tests: Python ------------------------------------------------ + > ./bin/llvm-lit -sv tools/lldb/test --param dotest-args='-C gcc' + + +Below is an overview of running individual test in the unit and API test suites +without going through the lit driver. + +Running a Specific Test or Set of Tests: API Tests +-------------------------------------------------- In addition to running all the LLDB test suites with the ``check-lldb`` CMake target above, it is possible to run individual LLDB tests. If you have a CMake build you can use the ``lldb-dotest`` binary, which is a wrapper around -``dotest.py`` that passes all the arguments configured by CMake. Alternatively, -you can use ``dotest.py`` directly, if you want to run a test one-off with a -different configuration. +``dotest.py`` that passes all the arguments configured by CMake. +Alternatively, you can use ``dotest.py`` directly, if you want to run a test +one-off with a different configuration. For example, to run the test cases defined in TestInferiorCrashing.py, run: :: - > lldb-dotest -p TestInferiorCrashing.py + > ./bin/lldb-dotest -p TestInferiorCrashing.py :: @@ -83,7 +133,7 @@ off), all tests in that directory will be executed: :: - > lldb-dotest functionalities/data-formatter + > ./bin/lldb-dotest functionalities/data-formatter :: @@ -115,32 +165,6 @@ To run a specific test, pass a filter, for example: > ./tools/lldb/unittests/Host/HostTests --gtest_filter=SocketTest.DomainListenConnectAccept -Running a Specific Test or Set of Tests: LIT --------------------------------------------- - -LIT automatically scans a directory for tests. To run a subset of the LIT tests, pass it a -subdirectory, for example: - -:: - - > ./bin/llvm-lit -sv tools/lldb/lit/Commands/CommandScriptImmediateOutput - - -LIT can also filter based on test name. - -:: - - > ./bin/llvm-lit -sv tools/lldb/lit --filter CommandScriptImmediateOutput - - -It is also possible to forward arguments to dotest.py by passing ``--param`` to -lit and setting a value for ``dotest-args``. - -:: - - > ./bin/llvm-lit -sv tools/lldb/lit --param dotest-args='-C gcc' - - Running the Test Suite Remotely ------------------------------- diff --git a/lldb/include/lldb/API/SBCommandReturnObject.h b/lldb/include/lldb/API/SBCommandReturnObject.h index 6aed32089ce95..e3fbacf85afc1 100644 --- a/lldb/include/lldb/API/SBCommandReturnObject.h +++ b/lldb/include/lldb/API/SBCommandReturnObject.h @@ -44,13 +44,21 @@ class LLDB_API SBCommandReturnObject { const char *GetError(); - size_t PutOutput(FILE *fh); + size_t PutOutput(FILE *fh); // DEPRECATED + + size_t PutOutput(SBFile file); + + size_t PutOutput(FileSP file); size_t GetOutputSize(); size_t GetErrorSize(); - size_t PutError(FILE *fh); + size_t PutError(FILE *fh); // DEPRECATED + + size_t PutError(SBFile file); + + size_t PutError(FileSP file); void Clear(); @@ -68,14 +76,21 @@ class LLDB_API SBCommandReturnObject { bool GetDescription(lldb::SBStream &description); - // deprecated, these two functions do not take ownership of file handle - void SetImmediateOutputFile(FILE *fh); + void SetImmediateOutputFile(FILE *fh); // DEPRECATED + + void SetImmediateErrorFile(FILE *fh); // DEPRECATED + + void SetImmediateOutputFile(FILE *fh, bool transfer_ownership); // DEPRECATED + + void SetImmediateErrorFile(FILE *fh, bool transfer_ownership); // DEPRECATED + + void SetImmediateOutputFile(SBFile file); - void SetImmediateErrorFile(FILE *fh); + void SetImmediateErrorFile(SBFile file); - void SetImmediateOutputFile(FILE *fh, bool transfer_ownership); + void SetImmediateOutputFile(FileSP file); - void SetImmediateErrorFile(FILE *fh, bool transfer_ownership); + void SetImmediateErrorFile(FileSP file); void PutCString(const char *string, int len = -1); diff --git a/lldb/include/lldb/API/SBDebugger.h b/lldb/include/lldb/API/SBDebugger.h index 1f36999c3c471..1c564fc1fae09 100644 --- a/lldb/include/lldb/API/SBDebugger.h +++ b/lldb/include/lldb/API/SBDebugger.h @@ -94,6 +94,12 @@ class LLDB_API SBDebugger { SBError SetErrorFile(SBFile file); + SBError SetInputFile(FileSP file); + + SBError SetOutputFile(FileSP file); + + SBError SetErrorFile(FileSP file); + SBFile GetInputFile(); SBFile GetOutputFile(); diff --git a/lldb/include/lldb/API/SBFile.h b/lldb/include/lldb/API/SBFile.h index a5fc2cd41210f..4c4b2e0c1aeb5 100644 --- a/lldb/include/lldb/API/SBFile.h +++ b/lldb/include/lldb/API/SBFile.h @@ -15,9 +15,11 @@ namespace lldb { class LLDB_API SBFile { friend class SBDebugger; + friend class SBCommandReturnObject; public: SBFile(); + SBFile(FileSP file_sp); SBFile(FILE *file, bool transfer_ownership); SBFile(int fd, const char *mode, bool transfer_ownership); ~SBFile(); @@ -33,7 +35,6 @@ class LLDB_API SBFile { private: FileSP m_opaque_sp; - SBFile(FileSP file_sp); }; } // namespace lldb diff --git a/lldb/include/lldb/Breakpoint/BreakpointResolverAddress.h b/lldb/include/lldb/Breakpoint/BreakpointResolverAddress.h index 949a788282b96..3df89641c7111 100644 --- a/lldb/include/lldb/Breakpoint/BreakpointResolverAddress.h +++ b/lldb/include/lldb/Breakpoint/BreakpointResolverAddress.h @@ -41,8 +41,8 @@ class BreakpointResolverAddress : public BreakpointResolver { ModuleList &modules) override; Searcher::CallbackReturn SearchCallback(SearchFilter &filter, - SymbolContext &context, Address *addr, - bool containing) override; + SymbolContext &context, + Address *addr) override; lldb::SearchDepth GetDepth() override; diff --git a/lldb/include/lldb/Breakpoint/BreakpointResolverFileLine.h b/lldb/include/lldb/Breakpoint/BreakpointResolverFileLine.h index f146a704ca54e..9ca48ecf0dc03 100644 --- a/lldb/include/lldb/Breakpoint/BreakpointResolverFileLine.h +++ b/lldb/include/lldb/Breakpoint/BreakpointResolverFileLine.h @@ -35,8 +35,8 @@ class BreakpointResolverFileLine : public BreakpointResolver { ~BreakpointResolverFileLine() override; Searcher::CallbackReturn SearchCallback(SearchFilter &filter, - SymbolContext &context, Address *addr, - bool containing) override; + SymbolContext &context, + Address *addr) override; lldb::SearchDepth GetDepth() override; diff --git a/lldb/include/lldb/Breakpoint/BreakpointResolverFileRegex.h b/lldb/include/lldb/Breakpoint/BreakpointResolverFileRegex.h index 52321e4f82a09..df4c13ed59e27 100644 --- a/lldb/include/lldb/Breakpoint/BreakpointResolverFileRegex.h +++ b/lldb/include/lldb/Breakpoint/BreakpointResolverFileRegex.h @@ -37,8 +37,8 @@ class BreakpointResolverFileRegex : public BreakpointResolver { ~BreakpointResolverFileRegex() override; Searcher::CallbackReturn SearchCallback(SearchFilter &filter, - SymbolContext &context, Address *addr, - bool containing) override; + SymbolContext &context, + Address *addr) override; lldb::SearchDepth GetDepth() override; diff --git a/lldb/include/lldb/Breakpoint/BreakpointResolverName.h b/lldb/include/lldb/Breakpoint/BreakpointResolverName.h index 45a102e633797..196d88db848c7 100644 --- a/lldb/include/lldb/Breakpoint/BreakpointResolverName.h +++ b/lldb/include/lldb/Breakpoint/BreakpointResolverName.h @@ -58,8 +58,8 @@ class BreakpointResolverName : public BreakpointResolver { ~BreakpointResolverName() override; Searcher::CallbackReturn SearchCallback(SearchFilter &filter, - SymbolContext &context, Address *addr, - bool containing) override; + SymbolContext &context, + Address *addr) override; lldb::SearchDepth GetDepth() override; diff --git a/lldb/include/lldb/Breakpoint/BreakpointResolverScripted.h b/lldb/include/lldb/Breakpoint/BreakpointResolverScripted.h index 980bb4693d033..89a7d03ce93f7 100644 --- a/lldb/include/lldb/Breakpoint/BreakpointResolverScripted.h +++ b/lldb/include/lldb/Breakpoint/BreakpointResolverScripted.h @@ -26,8 +26,7 @@ class BreakpointResolverScripted : public BreakpointResolver { BreakpointResolverScripted(Breakpoint *bkpt, const llvm::StringRef class_name, lldb::SearchDepth depth, - StructuredDataImpl *args_data, - ScriptInterpreter &script_interp); + StructuredDataImpl *args_data); ~BreakpointResolverScripted() override; @@ -39,8 +38,8 @@ class BreakpointResolverScripted : public BreakpointResolver { StructuredData::ObjectSP SerializeToStructuredData() override; Searcher::CallbackReturn SearchCallback(SearchFilter &filter, - SymbolContext &context, Address *addr, - bool containing) override; + SymbolContext &context, + Address *addr) override; lldb::SearchDepth GetDepth() override; diff --git a/lldb/include/lldb/Core/AddressResolverFileLine.h b/lldb/include/lldb/Core/AddressResolverFileLine.h index b98e7d4c63380..efbe3de1f2948 100644 --- a/lldb/include/lldb/Core/AddressResolverFileLine.h +++ b/lldb/include/lldb/Core/AddressResolverFileLine.h @@ -34,8 +34,8 @@ class AddressResolverFileLine : public AddressResolver { ~AddressResolverFileLine() override; Searcher::CallbackReturn SearchCallback(SearchFilter &filter, - SymbolContext &context, Address *addr, - bool containing) override; + SymbolContext &context, + Address *addr) override; lldb::SearchDepth GetDepth() override; diff --git a/lldb/include/lldb/Core/AddressResolverName.h b/lldb/include/lldb/Core/AddressResolverName.h index 6f74c0f9e3e50..8a039f9e1d921 100644 --- a/lldb/include/lldb/Core/AddressResolverName.h +++ b/lldb/include/lldb/Core/AddressResolverName.h @@ -39,8 +39,8 @@ class AddressResolverName : public AddressResolver { ~AddressResolverName() override; Searcher::CallbackReturn SearchCallback(SearchFilter &filter, - SymbolContext &context, Address *addr, - bool containing) override; + SymbolContext &context, + Address *addr) override; lldb::SearchDepth GetDepth() override; diff --git a/lldb/include/lldb/Core/FileLineResolver.h b/lldb/include/lldb/Core/FileLineResolver.h index 1967ed58cd3b3..d6525b71bfdf1 100644 --- a/lldb/include/lldb/Core/FileLineResolver.h +++ b/lldb/include/lldb/Core/FileLineResolver.h @@ -37,8 +37,8 @@ class FileLineResolver : public Searcher { ~FileLineResolver() override; Searcher::CallbackReturn SearchCallback(SearchFilter &filter, - SymbolContext &context, Address *addr, - bool containing) override; + SymbolContext &context, + Address *addr) override; lldb::SearchDepth GetDepth() override; diff --git a/lldb/include/lldb/Core/SearchFilter.h b/lldb/include/lldb/Core/SearchFilter.h index f38690c95f523..6823daf9e3ed8 100644 --- a/lldb/include/lldb/Core/SearchFilter.h +++ b/lldb/include/lldb/Core/SearchFilter.h @@ -52,8 +52,8 @@ class Searcher { virtual ~Searcher(); virtual CallbackReturn SearchCallback(SearchFilter &filter, - SymbolContext &context, Address *addr, - bool complete) = 0; + SymbolContext &context, + Address *addr) = 0; virtual lldb::SearchDepth GetDepth() = 0; diff --git a/lldb/include/lldb/Expression/DiagnosticManager.h b/lldb/include/lldb/Expression/DiagnosticManager.h index 91fe8a4b9b4c1..e5aecce08727f 100644 --- a/lldb/include/lldb/Expression/DiagnosticManager.h +++ b/lldb/include/lldb/Expression/DiagnosticManager.h @@ -12,6 +12,7 @@ #include "lldb/lldb-defines.h" #include "lldb/lldb-types.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" #include @@ -87,7 +88,7 @@ class Diagnostic { uint32_t m_compiler_id; // Compiler-specific diagnostic ID }; -typedef std::vector DiagnosticList; +typedef std::vector> DiagnosticList; class DiagnosticManager { public: @@ -96,33 +97,24 @@ class DiagnosticManager { m_fixed_expression.clear(); } - // The diagnostic manager holds a list of diagnostics, which are owned by the - // manager. const DiagnosticList &Diagnostics() { return m_diagnostics; } - ~DiagnosticManager() { - for (Diagnostic *diag : m_diagnostics) { - delete diag; - } - } - bool HasFixIts() const { - for (Diagnostic *diag : m_diagnostics) { - if (diag->HasFixIts()) - return true; - } - return false; + return llvm::any_of(m_diagnostics, + [](const std::unique_ptr &diag) { + return diag->HasFixIts(); + }); } void AddDiagnostic(llvm::StringRef message, DiagnosticSeverity severity, DiagnosticOrigin origin, uint32_t compiler_id = LLDB_INVALID_COMPILER_ID) { - m_diagnostics.push_back( - new Diagnostic(message, severity, origin, compiler_id)); + m_diagnostics.emplace_back( + std::make_unique(message, severity, origin, compiler_id)); } - void AddDiagnostic(Diagnostic *diagnostic) { - m_diagnostics.push_back(diagnostic); + void AddDiagnostic(std::unique_ptr diagnostic) { + m_diagnostics.push_back(std::move(diagnostic)); } size_t Printf(DiagnosticSeverity severity, const char *format, ...) diff --git a/lldb/include/lldb/Interpreter/CommandCompletions.h b/lldb/include/lldb/Interpreter/CommandCompletions.h index 43f2c33404a1f..275cc7e7c145d 100644 --- a/lldb/include/lldb/Interpreter/CommandCompletions.h +++ b/lldb/include/lldb/Interpreter/CommandCompletions.h @@ -103,7 +103,7 @@ class CommandCompletions { ~Completer() override; CallbackReturn SearchCallback(SearchFilter &filter, SymbolContext &context, - Address *addr, bool complete) override = 0; + Address *addr) override = 0; lldb::SearchDepth GetDepth() override = 0; @@ -127,8 +127,7 @@ class CommandCompletions { Searcher::CallbackReturn SearchCallback(SearchFilter &filter, SymbolContext &context, - Address *addr, - bool complete) override; + Address *addr) override; void DoCompletion(SearchFilter *filter) override; @@ -151,8 +150,7 @@ class CommandCompletions { Searcher::CallbackReturn SearchCallback(SearchFilter &filter, SymbolContext &context, - Address *addr, - bool complete) override; + Address *addr) override; void DoCompletion(SearchFilter *filter) override; @@ -173,8 +171,7 @@ class CommandCompletions { Searcher::CallbackReturn SearchCallback(SearchFilter &filter, SymbolContext &context, - Address *addr, - bool complete) override; + Address *addr) override; void DoCompletion(SearchFilter *filter) override; diff --git a/lldb/include/lldb/Interpreter/CommandReturnObject.h b/lldb/include/lldb/Interpreter/CommandReturnObject.h index 670002dada837..61e57fb798a1d 100644 --- a/lldb/include/lldb/Interpreter/CommandReturnObject.h +++ b/lldb/include/lldb/Interpreter/CommandReturnObject.h @@ -62,13 +62,13 @@ class CommandReturnObject { return m_err_stream; } - void SetImmediateOutputFile(FILE *fh, bool transfer_fh_ownership = false) { - lldb::StreamSP stream_sp(new StreamFile(fh, transfer_fh_ownership)); + void SetImmediateOutputFile(lldb::FileSP file_sp) { + lldb::StreamSP stream_sp(new StreamFile(file_sp)); m_out_stream.SetStreamAtIndex(eImmediateStreamIndex, stream_sp); } - void SetImmediateErrorFile(FILE *fh, bool transfer_fh_ownership = false) { - lldb::StreamSP stream_sp(new StreamFile(fh, transfer_fh_ownership)); + void SetImmediateErrorFile(lldb::FileSP file_sp) { + lldb::StreamSP stream_sp(new StreamFile(file_sp)); m_err_stream.SetStreamAtIndex(eImmediateStreamIndex, stream_sp); } diff --git a/lldb/include/lldb/Symbol/CallFrameInfo.h b/lldb/include/lldb/Symbol/CallFrameInfo.h new file mode 100644 index 0000000000000..765ddb41ab0cc --- /dev/null +++ b/lldb/include/lldb/Symbol/CallFrameInfo.h @@ -0,0 +1,28 @@ +//===-- CallFrameInfo.h -----------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef liblldb_CallFrameInfo_h_ +#define liblldb_CallFrameInfo_h_ + +#include "lldb/Core/Address.h" + +namespace lldb_private { + +class CallFrameInfo { +public: + virtual ~CallFrameInfo() = default; + + virtual bool GetAddressRange(Address addr, AddressRange &range) = 0; + + virtual bool GetUnwindPlan(const Address &addr, UnwindPlan &unwind_plan) = 0; + virtual bool GetUnwindPlan(const AddressRange &range, UnwindPlan &unwind_plan) = 0; +}; + +} // namespace lldb_private + +#endif // liblldb_CallFrameInfo_h_ diff --git a/lldb/include/lldb/Symbol/FuncUnwinders.h b/lldb/include/lldb/Symbol/FuncUnwinders.h index cc767d4e1e82f..c49f6b0fa942a 100644 --- a/lldb/include/lldb/Symbol/FuncUnwinders.h +++ b/lldb/include/lldb/Symbol/FuncUnwinders.h @@ -76,6 +76,11 @@ class FuncUnwinders { lldb::UnwindPlanSP GetAssemblyUnwindPlan(Target &target, Thread &thread); + lldb::UnwindPlanSP GetObjectFileUnwindPlan(Target &target); + + lldb::UnwindPlanSP GetObjectFileAugmentedUnwindPlan(Target &target, + Thread &thread); + lldb::UnwindPlanSP GetEHFrameUnwindPlan(Target &target); lldb::UnwindPlanSP GetEHFrameAugmentedUnwindPlan(Target &target, @@ -113,10 +118,12 @@ class FuncUnwinders { std::recursive_mutex m_mutex; lldb::UnwindPlanSP m_unwind_plan_assembly_sp; + lldb::UnwindPlanSP m_unwind_plan_object_file_sp; lldb::UnwindPlanSP m_unwind_plan_eh_frame_sp; lldb::UnwindPlanSP m_unwind_plan_debug_frame_sp; // augmented by assembly inspection so it's valid everywhere + lldb::UnwindPlanSP m_unwind_plan_object_file_augmented_sp; lldb::UnwindPlanSP m_unwind_plan_eh_frame_augmented_sp; lldb::UnwindPlanSP m_unwind_plan_debug_frame_augmented_sp; @@ -130,7 +137,9 @@ class FuncUnwinders { // Fetching the UnwindPlans can be expensive - if we've already attempted to // get one & failed, don't try again. bool m_tried_unwind_plan_assembly : 1, m_tried_unwind_plan_eh_frame : 1, + m_tried_unwind_plan_object_file : 1, m_tried_unwind_plan_debug_frame : 1, + m_tried_unwind_plan_object_file_augmented : 1, m_tried_unwind_plan_eh_frame_augmented : 1, m_tried_unwind_plan_debug_frame_augmented : 1, m_tried_unwind_plan_compact_unwind : 1, diff --git a/lldb/include/lldb/Symbol/ObjectFile.h b/lldb/include/lldb/Symbol/ObjectFile.h index 7a0acd8e3040a..4d04f23a82869 100644 --- a/lldb/include/lldb/Symbol/ObjectFile.h +++ b/lldb/include/lldb/Symbol/ObjectFile.h @@ -656,6 +656,9 @@ class ObjectFile : public std::enable_shared_from_this, /// \return virtual std::vector GetLoadableData(Target &target); + /// Creates a plugin-specific call frame info + virtual std::unique_ptr CreateCallFrameInfo(); + protected: // Member variables. FileSpec m_file; diff --git a/lldb/include/lldb/Symbol/UnwindTable.h b/lldb/include/lldb/Symbol/UnwindTable.h index b4d7f0661d5b1..c1dc519c4b20c 100644 --- a/lldb/include/lldb/Symbol/UnwindTable.h +++ b/lldb/include/lldb/Symbol/UnwindTable.h @@ -27,6 +27,8 @@ class UnwindTable { ~UnwindTable(); + lldb_private::CallFrameInfo *GetObjectFileUnwindInfo(); + lldb_private::DWARFCallFrameInfo *GetEHFrameInfo(); lldb_private::DWARFCallFrameInfo *GetDebugFrameInfo(); @@ -71,6 +73,7 @@ class UnwindTable { bool m_initialized; // delay some initialization until ObjectFile is set up std::mutex m_mutex; + std::unique_ptr m_object_file_unwind_up; std::unique_ptr m_eh_frame_up; std::unique_ptr m_debug_frame_up; std::unique_ptr m_compact_unwind_up; diff --git a/lldb/include/lldb/Utility/ProcessInfo.h b/lldb/include/lldb/Utility/ProcessInfo.h index 7be25bb2ef9ac..9188bf3b70900 100644 --- a/lldb/include/lldb/Utility/ProcessInfo.h +++ b/lldb/include/lldb/Utility/ProcessInfo.h @@ -223,8 +223,20 @@ class ProcessInstanceInfoMatch { m_name_match_type = name_match_type; } + /// Return true iff the architecture in this object matches arch_spec. + bool ArchitectureMatches(const ArchSpec &arch_spec) const; + + /// Return true iff the process name in this object matches process_name. bool NameMatches(const char *process_name) const; + /// Return true iff the process ID and parent process IDs in this object match + /// the ones in proc_info. + bool ProcessIDsMatch(const ProcessInstanceInfo &proc_info) const; + + /// Return true iff the (both effective and real) user and group IDs in this + /// object match the ones in proc_info. + bool UserIDsMatch(const ProcessInstanceInfo &proc_info) const; + bool Matches(const ProcessInstanceInfo &proc_info) const; bool MatchAllProcesses() const; diff --git a/lldb/include/lldb/Utility/Reproducer.h b/lldb/include/lldb/Utility/Reproducer.h index 1ad7638eec433..7fde9c3e58b5e 100644 --- a/lldb/include/lldb/Utility/Reproducer.h +++ b/lldb/include/lldb/Utility/Reproducer.h @@ -313,6 +313,9 @@ class Reproducer { FileSpec GetReproducerPath() const; + bool IsCapturing() { return static_cast(m_generator); }; + bool IsReplaying() { return static_cast(m_loader); }; + protected: llvm::Error SetCapture(llvm::Optional root); llvm::Error SetReplay(llvm::Optional root); diff --git a/lldb/include/lldb/Utility/ReproducerInstrumentation.h b/lldb/include/lldb/Utility/ReproducerInstrumentation.h index 6053fafb198d8..75d66045758fd 100644 --- a/lldb/include/lldb/Utility/ReproducerInstrumentation.h +++ b/lldb/include/lldb/Utility/ReproducerInstrumentation.h @@ -238,9 +238,12 @@ struct ReferenceTag {}; struct ValueTag {}; struct FundamentalPointerTag {}; struct FundamentalReferenceTag {}; +struct NotImplementedTag {}; /// Return the deserialization tag for the given type T. -template struct serializer_tag { typedef ValueTag type; }; +template struct serializer_tag { + typedef typename std::conditional::value, ValueTag, NotImplementedTag>::type type; +}; template struct serializer_tag { typedef typename std::conditional::value, @@ -304,6 +307,11 @@ class Deserializer { } private: + template T Read(NotImplementedTag) { + m_buffer = m_buffer.drop_front(sizeof(T)); + return T(); + } + template T Read(ValueTag) { assert(HasData(sizeof(T))); T t; diff --git a/lldb/include/lldb/lldb-forward.h b/lldb/include/lldb/lldb-forward.h index 35f8928debeaf..8315fbb273463 100644 --- a/lldb/include/lldb/lldb-forward.h +++ b/lldb/include/lldb/lldb-forward.h @@ -44,6 +44,7 @@ class BreakpointSiteList; class BroadcastEventSpec; class Broadcaster; class BroadcasterManager; +class CallFrameInfo; class ClangASTContext; class ClangASTImporter; class ClangASTMetadata; diff --git a/lldb/packages/Python/lldbsuite/test/commands/expression/import-std-module/empty-module/Makefile b/lldb/packages/Python/lldbsuite/test/commands/expression/import-std-module/empty-module/Makefile new file mode 100644 index 0000000000000..533ff707023a5 --- /dev/null +++ b/lldb/packages/Python/lldbsuite/test/commands/expression/import-std-module/empty-module/Makefile @@ -0,0 +1,9 @@ +# We don't have any standard include directories, so we can't +# parse the test_common.h header we usually inject as it includes +# system headers. +NO_TEST_COMMON_H := 1 + +CXXFLAGS_EXTRAS = -I $(SRCDIR)/root/usr/include/c++/v1/ -I $(SRCDIR)/root/usr/include/ -nostdinc -nostdinc++ -DENABLE_STD_CONTENT=1 +CXX_SOURCES := main.cpp + +include Makefile.rules diff --git a/lldb/packages/Python/lldbsuite/test/commands/expression/import-std-module/empty-module/TestEmptyStdModule.py b/lldb/packages/Python/lldbsuite/test/commands/expression/import-std-module/empty-module/TestEmptyStdModule.py new file mode 100644 index 0000000000000..94af997298f90 --- /dev/null +++ b/lldb/packages/Python/lldbsuite/test/commands/expression/import-std-module/empty-module/TestEmptyStdModule.py @@ -0,0 +1,35 @@ +""" +Test that LLDB doesn't crash if the std module we load is empty. +""" + +from lldbsuite.test.decorators import * +from lldbsuite.test.lldbtest import * +from lldbsuite.test import lldbutil +import os + +class ImportStdModule(TestBase): + + mydir = TestBase.compute_mydir(__file__) + + @skipIf(compiler=no_match("clang")) + def test(self): + self.build() + + sysroot = os.path.join(os.getcwd(), "root") + + # Set the sysroot. + self.runCmd("platform select --sysroot '" + sysroot + "' host", CURRENT_EXECUTABLE_SET) + + lldbutil.run_to_source_breakpoint(self, + "// Set break point at this line.", lldb.SBFileSpec("main.cpp")) + + self.runCmd("settings set target.import-std-module true") + self.runCmd("log enable lldb expr") + + # Use the typedef that is only defined in our 'empty' module. If this fails, then LLDB + # somehow figured out the correct define for the header and compiled the right + # standard module that actually contains the std::vector template. + self.expect("expr MissingContent var = 3; var", substrs=['$0 = 3']) + # Try to access our mock std::vector. This should fail but not crash LLDB as the + # std::vector template should be missing from the std module. + self.expect("expr (size_t)v.size()", substrs=["Couldn't lookup symbols"], error=True) diff --git a/lldb/packages/Python/lldbsuite/test/commands/expression/import-std-module/empty-module/main.cpp b/lldb/packages/Python/lldbsuite/test/commands/expression/import-std-module/empty-module/main.cpp new file mode 100644 index 0000000000000..b01fe1a785361 --- /dev/null +++ b/lldb/packages/Python/lldbsuite/test/commands/expression/import-std-module/empty-module/main.cpp @@ -0,0 +1,8 @@ +#include + +int main(int argc, char **argv) { + // Makes sure we have the mock libc headers in the debug information. + libc_struct s; + std::vector v; + return 0; // Set break point at this line. +} diff --git a/lldb/packages/Python/lldbsuite/test/commands/expression/import-std-module/empty-module/root/usr/include/c++/v1/algorithm b/lldb/packages/Python/lldbsuite/test/commands/expression/import-std-module/empty-module/root/usr/include/c++/v1/algorithm new file mode 100644 index 0000000000000..a77c3d867396d --- /dev/null +++ b/lldb/packages/Python/lldbsuite/test/commands/expression/import-std-module/empty-module/root/usr/include/c++/v1/algorithm @@ -0,0 +1,22 @@ +// This is only defined when building, but LLDB is missing this flag when loading the standard +// library module so the actual contents of the module are missing. +#ifdef ENABLE_STD_CONTENT + +#include "libc_header.h" + +namespace std { + inline namespace __1 { + // Pretend to be a std::vector template we need to instantiate + // in LLDB. + template + struct vector { T i; int size() { return 2; } }; + } +} +#else +// Unused typedef we can use to check that we actually loaded +// an empty module. Will be missing if LLDB somehow can get the +// ENABLE_STD_CONTENT define right and break this test silently +// (as with the define the module isn't empty anymore and this +// test always succeeds). +typedef int MissingContent; +#endif // ENABLE_STD_CONTENT diff --git a/lldb/packages/Python/lldbsuite/test/commands/expression/import-std-module/sysroot/root/usr/include/c++/include/module.modulemap b/lldb/packages/Python/lldbsuite/test/commands/expression/import-std-module/empty-module/root/usr/include/c++/v1/module.modulemap similarity index 100% rename from lldb/packages/Python/lldbsuite/test/commands/expression/import-std-module/sysroot/root/usr/include/c++/include/module.modulemap rename to lldb/packages/Python/lldbsuite/test/commands/expression/import-std-module/empty-module/root/usr/include/c++/v1/module.modulemap diff --git a/lldb/packages/Python/lldbsuite/test/commands/expression/import-std-module/empty-module/root/usr/include/libc_header.h b/lldb/packages/Python/lldbsuite/test/commands/expression/import-std-module/empty-module/root/usr/include/libc_header.h new file mode 100644 index 0000000000000..47525c9db3467 --- /dev/null +++ b/lldb/packages/Python/lldbsuite/test/commands/expression/import-std-module/empty-module/root/usr/include/libc_header.h @@ -0,0 +1 @@ +struct libc_struct {}; diff --git a/lldb/packages/Python/lldbsuite/test/commands/expression/import-std-module/sysroot/Makefile b/lldb/packages/Python/lldbsuite/test/commands/expression/import-std-module/sysroot/Makefile index c1790dfbf767c..4915cdae87641 100644 --- a/lldb/packages/Python/lldbsuite/test/commands/expression/import-std-module/sysroot/Makefile +++ b/lldb/packages/Python/lldbsuite/test/commands/expression/import-std-module/sysroot/Makefile @@ -3,7 +3,7 @@ # system headers. NO_TEST_COMMON_H := 1 -CXXFLAGS_EXTRAS = -I $(SRCDIR)/root/usr/include/c++/include/ -I $(SRCDIR)/root/usr/include/ -nostdinc -nostdinc++ +CXXFLAGS_EXTRAS = -I $(SRCDIR)/root/usr/include/c++/v1/ -I $(SRCDIR)/root/usr/include/ -nostdinc -nostdinc++ CXX_SOURCES := main.cpp include Makefile.rules diff --git a/lldb/packages/Python/lldbsuite/test/commands/expression/import-std-module/sysroot/TestStdModuleSysroot.py b/lldb/packages/Python/lldbsuite/test/commands/expression/import-std-module/sysroot/TestStdModuleSysroot.py index fbf00cddcfa92..90072fc3fdf62 100644 --- a/lldb/packages/Python/lldbsuite/test/commands/expression/import-std-module/sysroot/TestStdModuleSysroot.py +++ b/lldb/packages/Python/lldbsuite/test/commands/expression/import-std-module/sysroot/TestStdModuleSysroot.py @@ -27,4 +27,6 @@ def test(self): # Call our custom function in our sysroot std module. # If this gives us the correct result, then we used the sysroot. - self.expect("expr std::myabs(-42)", substrs=['(int) $0 = 42']) + # We rely on the default argument of -123 to make sure we actually have the C++ module. + # (We don't have default arguments in the debug information). + self.expect("expr std::myabs()", substrs=['(int) $0 = 123']) diff --git a/lldb/packages/Python/lldbsuite/test/commands/expression/import-std-module/sysroot/main.cpp b/lldb/packages/Python/lldbsuite/test/commands/expression/import-std-module/sysroot/main.cpp index 2fbc76b9a7656..c01fadc5d8e47 100644 --- a/lldb/packages/Python/lldbsuite/test/commands/expression/import-std-module/sysroot/main.cpp +++ b/lldb/packages/Python/lldbsuite/test/commands/expression/import-std-module/sysroot/main.cpp @@ -2,5 +2,6 @@ int main(int argc, char **argv) { libc_struct s; + std::vector v; return 0; // Set break point at this line. } diff --git a/lldb/packages/Python/lldbsuite/test/commands/expression/import-std-module/sysroot/root/usr/include/c++/include/algorithm b/lldb/packages/Python/lldbsuite/test/commands/expression/import-std-module/sysroot/root/usr/include/c++/include/algorithm deleted file mode 100644 index e8cbcca8e846b..0000000000000 --- a/lldb/packages/Python/lldbsuite/test/commands/expression/import-std-module/sysroot/root/usr/include/c++/include/algorithm +++ /dev/null @@ -1,7 +0,0 @@ -#include "libc_header.h" - -namespace std { - int myabs(int i) { - return i < 0 ? -i : i; - } -} diff --git a/lldb/packages/Python/lldbsuite/test/commands/expression/import-std-module/sysroot/root/usr/include/c++/v1/algorithm b/lldb/packages/Python/lldbsuite/test/commands/expression/import-std-module/sysroot/root/usr/include/c++/v1/algorithm new file mode 100644 index 0000000000000..43f7becdbeb60 --- /dev/null +++ b/lldb/packages/Python/lldbsuite/test/commands/expression/import-std-module/sysroot/root/usr/include/c++/v1/algorithm @@ -0,0 +1,11 @@ +#include "libc_header.h" + +namespace std { + // Makes sure we get a support file for this header. + struct vector { int i; }; + + inline int myabs(int i = -123) { + double nil; + return i < 0 ? -i : i; + } +} diff --git a/lldb/packages/Python/lldbsuite/test/commands/expression/import-std-module/sysroot/root/usr/include/c++/v1/module.modulemap b/lldb/packages/Python/lldbsuite/test/commands/expression/import-std-module/sysroot/root/usr/include/c++/v1/module.modulemap new file mode 100644 index 0000000000000..0eb48492a65d9 --- /dev/null +++ b/lldb/packages/Python/lldbsuite/test/commands/expression/import-std-module/sysroot/root/usr/include/c++/v1/module.modulemap @@ -0,0 +1,3 @@ +module std { + module "algorithm" { header "algorithm" export * } +} diff --git a/lldb/packages/Python/lldbsuite/test/configuration.py b/lldb/packages/Python/lldbsuite/test/configuration.py index 8bd17feacb130..494af7e2acd4c 100644 --- a/lldb/packages/Python/lldbsuite/test/configuration.py +++ b/lldb/packages/Python/lldbsuite/test/configuration.py @@ -107,7 +107,9 @@ test_build_dir = None # The clang module cache directory used by lldb. -module_cache_dir = None +lldb_module_cache_dir = None +# The clang module cache directory used by clang. +clang_module_cache_dir = None # The only directory to scan for tests. If multiple test directories are # specified, and an exclusive test subdirectory is specified, the latter option diff --git a/lldb/packages/Python/lldbsuite/test/dotest.py b/lldb/packages/Python/lldbsuite/test/dotest.py index 652a02e5ed65c..7b5414ee32dae 100644 --- a/lldb/packages/Python/lldbsuite/test/dotest.py +++ b/lldb/packages/Python/lldbsuite/test/dotest.py @@ -426,11 +426,18 @@ def parseOptionsAndInitTestdirs(): configuration.lldb_platform_working_dir = args.lldb_platform_working_dir if args.test_build_dir: configuration.test_build_dir = args.test_build_dir - if args.module_cache_dir: - configuration.module_cache_dir = args.module_cache_dir + if args.lldb_module_cache_dir: + configuration.lldb_module_cache_dir = args.lldb_module_cache_dir else: - configuration.module_cache_dir = os.path.join(configuration.test_build_dir, - 'module-cache-lldb') + configuration.lldb_module_cache_dir = os.path.join( + configuration.test_build_dir, 'module-cache-lldb') + if args.clang_module_cache_dir: + configuration.clang_module_cache_dir = args.clang_module_cache_dir + else: + configuration.clang_module_cache_dir = os.path.join( + configuration.test_build_dir, 'module-cache-clang') + + os.environ['CLANG_MODULE_CACHE_DIR'] = configuration.clang_module_cache_dir # Gather all the dirs passed on the command line. if len(args.args) > 0: diff --git a/lldb/packages/Python/lldbsuite/test/dotest_args.py b/lldb/packages/Python/lldbsuite/test/dotest_args.py index 4922f27c7bf21..fd77c7dc88f1e 100644 --- a/lldb/packages/Python/lldbsuite/test/dotest_args.py +++ b/lldb/packages/Python/lldbsuite/test/dotest_args.py @@ -150,10 +150,15 @@ def create_parser(): default='lldb-test-build.noindex', help='The root build directory for the tests. It will be removed before running.') group.add_argument( - '--module-cache-dir', - dest='module_cache_dir', + '--lldb-module-cache-dir', + dest='lldb_module_cache_dir', metavar='The clang module cache directory used by LLDB', - help='The clang module cache directory used by LLDB. This is not the one used by the makefiles. Defaults to /module-cache-lldb.') + help='The clang module cache directory used by LLDB. Defaults to /module-cache-lldb.') + group.add_argument( + '--clang-module-cache-dir', + dest='clang_module_cache_dir', + metavar='The clang module cache directory used by Clang', + help='The clang module cache directory used in the Make files by Clang while building tests. Defaults to /module-cache-clang.') # Configuration options group = parser.add_argument_group('Remote platform options') diff --git a/lldb/packages/Python/lldbsuite/test/functionalities/breakpoint/serialize/TestBreakpointSerialization.py b/lldb/packages/Python/lldbsuite/test/functionalities/breakpoint/serialize/TestBreakpointSerialization.py index 086203ec54a88..7a04fa3c28807 100644 --- a/lldb/packages/Python/lldbsuite/test/functionalities/breakpoint/serialize/TestBreakpointSerialization.py +++ b/lldb/packages/Python/lldbsuite/test/functionalities/breakpoint/serialize/TestBreakpointSerialization.py @@ -47,7 +47,12 @@ def test_name_filters(self): self.build() self.setup_targets_and_cleanup() self.do_check_names() - + + def test_scripted_extra_args(self): + self.build() + self.setup_targets_and_cleanup() + self.do_check_extra_args() + def setup_targets_and_cleanup(self): def cleanup (): self.RemoveTempFile(self.bkpts_file_path) @@ -289,3 +294,85 @@ def do_check_names(self): error = self.copy_target.BreakpointsCreateFromFile(self.bkpts_file_spec, names_list, copy_bps) self.assertTrue(error.Success(), "Failed reading breakpoints from file: %s"%(error.GetCString())) self.assertTrue(copy_bps.GetSize() == 1, "Found the matching breakpoint.") + + def do_check_extra_args(self): + + import side_effect + interp = self.dbg.GetCommandInterpreter() + error = lldb.SBError() + + script_name = os.path.join(self.getSourceDir(), "resolver.py") + + command = "command script import " + script_name + result = lldb.SBCommandReturnObject() + interp.HandleCommand(command, result) + self.assertTrue(result.Succeeded(), "com scr imp failed: %s"%(result.GetError())) + + # First make sure a scripted breakpoint with no args works: + bkpt = self.orig_target.BreakpointCreateFromScript("resolver.Resolver", lldb.SBStructuredData(), + lldb.SBFileSpecList(), lldb.SBFileSpecList()) + self.assertTrue(bkpt.IsValid(), "Bkpt is valid") + write_bps = lldb.SBBreakpointList(self.orig_target) + + error = self.orig_target.BreakpointsWriteToFile(self.bkpts_file_spec, write_bps) + self.assertTrue(error.Success(), "Failed writing breakpoints: %s"%(error.GetCString())) + + side_effect.g_extra_args = None + copy_bps = lldb.SBBreakpointList(self.copy_target) + error = self.copy_target.BreakpointsCreateFromFile(self.bkpts_file_spec, copy_bps) + self.assertTrue(error.Success(), "Failed reading breakpoints: %s"%(error.GetCString())) + + self.assertEqual(copy_bps.GetSize(), 1, "Got one breakpoint from file.") + no_keys = lldb.SBStringList() + side_effect.g_extra_args.GetKeys(no_keys) + self.assertEqual(no_keys.GetSize(), 0, "Should have no keys") + + self.orig_target.DeleteAllBreakpoints() + self.copy_target.DeleteAllBreakpoints() + + # Now try one with extra args: + + extra_args = lldb.SBStructuredData() + stream = lldb.SBStream() + stream.Print('{"first_arg" : "first_value", "second_arg" : "second_value"}') + extra_args.SetFromJSON(stream) + self.assertTrue(extra_args.IsValid(), "SBStructuredData is valid.") + + bkpt = self.orig_target.BreakpointCreateFromScript("resolver.Resolver", + extra_args, lldb.SBFileSpecList(), lldb.SBFileSpecList()) + self.assertTrue(bkpt.IsValid(), "Bkpt is valid") + write_bps = lldb.SBBreakpointList(self.orig_target) + + error = self.orig_target.BreakpointsWriteToFile(self.bkpts_file_spec, write_bps) + self.assertTrue(error.Success(), "Failed writing breakpoints: %s"%(error.GetCString())) + + orig_extra_args = side_effect.g_extra_args + self.assertTrue(orig_extra_args.IsValid(), "Extra args originally valid") + + orig_keys = lldb.SBStringList() + orig_extra_args.GetKeys(orig_keys) + self.assertEqual(2, orig_keys.GetSize(), "Should have two keys") + + side_effect.g_extra_args = None + + copy_bps = lldb.SBBreakpointList(self.copy_target) + error = self.copy_target.BreakpointsCreateFromFile(self.bkpts_file_spec, copy_bps) + self.assertTrue(error.Success(), "Failed reading breakpoints: %s"%(error.GetCString())) + + self.assertEqual(copy_bps.GetSize(), 1, "Got one breakpoint from file.") + + copy_extra_args = side_effect.g_extra_args + copy_keys = lldb.SBStringList() + copy_extra_args.GetKeys(copy_keys) + self.assertEqual(2, copy_keys.GetSize(), "Copy should have two keys") + + for idx in range(0, orig_keys.GetSize()): + key = orig_keys.GetStringAtIndex(idx) + copy_value = copy_extra_args.GetValueForKey(key).GetStringValue(100) + + if key == "first_arg": + self.assertEqual(copy_value, "first_value") + elif key == "second_arg": + self.assertEqual(copy_value, "second_value") + else: + self.Fail("Unknown key: %s"%(key)) diff --git a/lldb/packages/Python/lldbsuite/test/functionalities/breakpoint/serialize/resolver.py b/lldb/packages/Python/lldbsuite/test/functionalities/breakpoint/serialize/resolver.py new file mode 100644 index 0000000000000..c3a5af596d21a --- /dev/null +++ b/lldb/packages/Python/lldbsuite/test/functionalities/breakpoint/serialize/resolver.py @@ -0,0 +1,17 @@ +import lldb +import side_effect + +class Resolver: + """This resolver class is just so I can read out the extra_args.""" + + def __init__(self, bkpt, extra_args, dict): + self.bkpt = bkpt + side_effect.g_extra_args = extra_args + + def __callback__(self, sym_ctx): + """Doesn't actually do anything.""" + return + + def get_short_help(self): + return "I am a python breakpoint resolver that does nothing" + diff --git a/lldb/packages/Python/lldbsuite/test/functionalities/breakpoint/serialize/side_effect.py b/lldb/packages/Python/lldbsuite/test/functionalities/breakpoint/serialize/side_effect.py new file mode 100644 index 0000000000000..868901cfce86d --- /dev/null +++ b/lldb/packages/Python/lldbsuite/test/functionalities/breakpoint/serialize/side_effect.py @@ -0,0 +1 @@ +g_extra_args = None diff --git a/lldb/packages/Python/lldbsuite/test/functionalities/gdb_remote_client/gdbclientutils.py b/lldb/packages/Python/lldbsuite/test/functionalities/gdb_remote_client/gdbclientutils.py index fad41f8a83a40..621279a0a9994 100644 --- a/lldb/packages/Python/lldbsuite/test/functionalities/gdb_remote_client/gdbclientutils.py +++ b/lldb/packages/Python/lldbsuite/test/functionalities/gdb_remote_client/gdbclientutils.py @@ -296,7 +296,7 @@ def _run(self): try: # accept() is stubborn and won't fail even when the socket is # shutdown, so we'll use a timeout - self._socket.settimeout(2.0) + self._socket.settimeout(20.0) client, client_addr = self._socket.accept() self._client = client # The connected client inherits its timeout from self._socket, diff --git a/lldb/packages/Python/lldbsuite/test/functionalities/mtc/simple/Makefile b/lldb/packages/Python/lldbsuite/test/functionalities/mtc/simple/Makefile index 44093101a5076..1b231a5958d26 100644 --- a/lldb/packages/Python/lldbsuite/test/functionalities/mtc/simple/Makefile +++ b/lldb/packages/Python/lldbsuite/test/functionalities/mtc/simple/Makefile @@ -1,4 +1,9 @@ OBJC_SOURCES := main.m -LD_EXTRAS := -lobjc -framework Foundation -framework AppKit +ifeq ($(findstring MacOSX.platform,$(shell xcrun --show-sdk-path)),MacOSX.platform) +UI_FRAMEWORK = AppKit +else +UI_FRAMEWORK = UIKit +endif +LD_EXTRAS = -lobjc -framework Foundation -framework $(UI_FRAMEWORK) include Makefile.rules diff --git a/lldb/packages/Python/lldbsuite/test/functionalities/mtc/simple/TestMTCSimple.py b/lldb/packages/Python/lldbsuite/test/functionalities/mtc/simple/TestMTCSimple.py index 0b4c0d1e0fa0d..e530c47d2d39b 100644 --- a/lldb/packages/Python/lldbsuite/test/functionalities/mtc/simple/TestMTCSimple.py +++ b/lldb/packages/Python/lldbsuite/test/functionalities/mtc/simple/TestMTCSimple.py @@ -15,19 +15,12 @@ class MTCSimpleTestCase(TestBase): mydir = TestBase.compute_mydir(__file__) @skipUnlessDarwin - @skipIfDarwinEmbedded # Test file depends on AppKit which is not present on iOS etc. def test(self): self.mtc_dylib_path = findMainThreadCheckerDylib() - if self.mtc_dylib_path == "": - self.skipTest("This test requires libMainThreadChecker.dylib.") - + self.assertTrue(self.mtc_dylib_path != "") self.build() self.mtc_tests() - def setUp(self): - # Call super's setUp(). - TestBase.setUp(self) - @skipIf(archs=['i386']) def mtc_tests(self): # Load the test @@ -41,7 +34,11 @@ def mtc_tests(self): thread = process.GetSelectedThread() frame = thread.GetSelectedFrame() - self.expect("thread info", substrs=['stop reason = -[NSView superview] must be used from main thread only']) + view = "NSView" if lldbplatformutil.getPlatform() == "macosx" else "UIView" + + self.expect("thread info", + substrs=['stop reason = -[' + view + + ' superview] must be used from main thread only']) self.expect( "thread info -s", @@ -51,7 +48,7 @@ def mtc_tests(self): json_line = '\n'.join(output_lines[2:]) data = json.loads(json_line) self.assertEqual(data["instrumentation_class"], "MainThreadChecker") - self.assertEqual(data["api_name"], "-[NSView superview]") - self.assertEqual(data["class_name"], "NSView") + self.assertEqual(data["api_name"], "-[" + view + " superview]") + self.assertEqual(data["class_name"], view) self.assertEqual(data["selector"], "superview") - self.assertEqual(data["description"], "-[NSView superview] must be used from main thread only") + self.assertEqual(data["description"], "-[" + view + " superview] must be used from main thread only") diff --git a/lldb/packages/Python/lldbsuite/test/functionalities/mtc/simple/main.m b/lldb/packages/Python/lldbsuite/test/functionalities/mtc/simple/main.m index 651347cf74eeb..a967dee469201 100644 --- a/lldb/packages/Python/lldbsuite/test/functionalities/mtc/simple/main.m +++ b/lldb/packages/Python/lldbsuite/test/functionalities/mtc/simple/main.m @@ -1,8 +1,14 @@ #import +#if __has_include() #import +#define XXView NSView +#else +#import +#define XXView UIView +#endif int main() { - NSView *view = [[NSView alloc] init]; + XXView *view = [[XXView alloc] init]; dispatch_group_t g = dispatch_group_create(); dispatch_group_enter(g); [NSThread detachNewThreadWithBlock:^{ diff --git a/lldb/packages/Python/lldbsuite/test/functionalities/postmortem/minidump-new/TestMiniDumpUUID.py b/lldb/packages/Python/lldbsuite/test/functionalities/postmortem/minidump-new/TestMiniDumpUUID.py index 74241a5fe60c9..3752cfd9c9b4b 100644 --- a/lldb/packages/Python/lldbsuite/test/functionalities/postmortem/minidump-new/TestMiniDumpUUID.py +++ b/lldb/packages/Python/lldbsuite/test/functionalities/postmortem/minidump-new/TestMiniDumpUUID.py @@ -42,7 +42,7 @@ def get_minidump_modules(self, yaml_file): def test_zero_uuid_modules(self): """ Test multiple modules having a MINIDUMP_MODULE.CvRecord that is valid, - but contains a PDB70 value whose age is zero and whose UUID values are + but contains a PDB70 value whose age is zero and whose UUID values are all zero. Prior to a fix all such modules would be duplicated to the first one since the UUIDs claimed to be valid and all zeroes. Now we ensure that the UUID is not valid for each module and that we have @@ -56,7 +56,7 @@ def test_zero_uuid_modules(self): def test_uuid_modules_no_age(self): """ Test multiple modules having a MINIDUMP_MODULE.CvRecord that is valid, - and contains a PDB70 value whose age is zero and whose UUID values are + and contains a PDB70 value whose age is zero and whose UUID values are valid. Ensure we decode the UUID and don't include the age field in the UUID. """ modules = self.get_minidump_modules("linux-arm-uuids-no-age.yaml") @@ -68,7 +68,7 @@ def test_uuid_modules_no_age(self): def test_uuid_modules_no_age_apple(self): """ Test multiple modules having a MINIDUMP_MODULE.CvRecord that is valid, - and contains a PDB70 value whose age is zero and whose UUID values are + and contains a PDB70 value whose age is zero and whose UUID values are valid. Ensure we decode the UUID and don't include the age field in the UUID. Also ensure that the first uint32_t is byte swapped, along with the next two uint16_t values. Breakpad incorrectly byte swaps these values when it @@ -83,7 +83,7 @@ def test_uuid_modules_no_age_apple(self): def test_uuid_modules_with_age(self): """ Test multiple modules having a MINIDUMP_MODULE.CvRecord that is valid, - and contains a PDB70 value whose age is valid and whose UUID values are + and contains a PDB70 value whose age is valid and whose UUID values are valid. Ensure we decode the UUID and include the age field in the UUID. """ modules = self.get_minidump_modules("linux-arm-uuids-with-age.yaml") @@ -121,13 +121,31 @@ def test_uuid_modules_elf_build_id_zero(self): self.verify_module(modules[0], "/not/exist/a", None) self.verify_module(modules[1], "/not/exist/b", None) + def test_uuid_modules_elf_build_id_same(self): + """ + Test multiple modules having a MINIDUMP_MODULE.CvRecord that is + valid, and contains a ELF build ID whose value is the same. There + is an assert in the PlaceholderObjectFile that was firing when we + encountered this which was crashing the process that was checking + if PlaceholderObjectFile.m_base was the same as the address this + fake module was being loaded at. We need to ensure we don't crash + in such cases and that we add both modules even though they have + the same UUID. + """ + modules = self.get_minidump_modules("linux-arm-same-uuids.yaml") + self.assertEqual(2, len(modules)) + self.verify_module(modules[0], "/file/does/not/exist/a", + '11223344-1122-3344-1122-334411223344-11223344') + self.verify_module(modules[1], "/file/does/not/exist/b", + '11223344-1122-3344-1122-334411223344-11223344') + @expectedFailureAll(oslist=["windows"]) def test_partial_uuid_match(self): """ Breakpad has been known to create minidump files using CvRecord in each module whose signature is set to PDB70 where the UUID only contains the - first 16 bytes of a 20 byte ELF build ID. Code was added to - ProcessMinidump.cpp to deal with this and allows partial UUID matching. + first 16 bytes of a 20 byte ELF build ID. Code was added to + ProcessMinidump.cpp to deal with this and allows partial UUID matching. This test verifies that if we have a minidump with a 16 byte UUID, that we are able to associate a symbol file with a 20 byte UUID only if the @@ -141,16 +159,16 @@ def test_partial_uuid_match(self): self.dbg.HandleCommand(cmd) modules = self.get_minidump_modules("linux-arm-partial-uuids-match.yaml") self.assertEqual(1, len(modules)) - self.verify_module(modules[0], so_path, + self.verify_module(modules[0], so_path, "7295E17C-6668-9E05-CBB5-DEE5003865D5-5267C116") def test_partial_uuid_mismatch(self): """ Breakpad has been known to create minidump files using CvRecord in each module whose signature is set to PDB70 where the UUID only contains the - first 16 bytes of a 20 byte ELF build ID. Code was added to - ProcessMinidump.cpp to deal with this and allows partial UUID matching. - + first 16 bytes of a 20 byte ELF build ID. Code was added to + ProcessMinidump.cpp to deal with this and allows partial UUID matching. + This test verifies that if we have a minidump with a 16 byte UUID, that we are not able to associate a symbol file with a 20 byte UUID only if any of the first 16 bytes do not match. In this case we will see the UUID @@ -163,7 +181,7 @@ def test_partial_uuid_mismatch(self): modules = self.get_minidump_modules("linux-arm-partial-uuids-mismatch.yaml") self.assertEqual(1, len(modules)) self.verify_module(modules[0], - "/invalid/path/on/current/system/libuuidmismatch.so", + "/invalid/path/on/current/system/libuuidmismatch.so", "7295E17C-6668-9E05-CBB5-DEE5003865D5") def test_relative_module_name(self): diff --git a/lldb/packages/Python/lldbsuite/test/functionalities/postmortem/minidump-new/linux-arm-same-uuids.yaml b/lldb/packages/Python/lldbsuite/test/functionalities/postmortem/minidump-new/linux-arm-same-uuids.yaml new file mode 100644 index 0000000000000..21c5220e415db --- /dev/null +++ b/lldb/packages/Python/lldbsuite/test/functionalities/postmortem/minidump-new/linux-arm-same-uuids.yaml @@ -0,0 +1,21 @@ +--- !minidump +Streams: + - Type: SystemInfo + Processor Arch: AMD64 + Platform ID: Linux + CSD Version: '15E216' + CPU: + Vendor ID: GenuineIntel + Version Info: 0x00000000 + Feature Info: 0x00000000 + - Type: ModuleList + Modules: + - Base of Image: 0x0000000000001000 + Size of Image: 0x00001000 + Module Name: '/file/does/not/exist/a' + CodeView Record: '52534453112233441122334411223344112233441122334411' + - Base of Image: 0x0000000000003000 + Size of Image: 0x00001000 + Module Name: '/file/does/not/exist/b' + CodeView Record: '52534453112233441122334411223344112233441122334411' +... diff --git a/lldb/packages/Python/lldbsuite/test/lang/cpp/accelerator-table/Makefile b/lldb/packages/Python/lldbsuite/test/lang/cpp/accelerator-table/Makefile new file mode 100644 index 0000000000000..4c053a09c7562 --- /dev/null +++ b/lldb/packages/Python/lldbsuite/test/lang/cpp/accelerator-table/Makefile @@ -0,0 +1,7 @@ +# There is no guaranteed order in which the linker will order these +# files, so we just have a lot of them to make it unlikely that we hit +# the right one first by pure luck. + +CXX_SOURCES := main.cpp a.cpp b.cpp c.cpp d.cpp e.cpp f.cpp g.cpp + +include Makefile.rules diff --git a/lldb/packages/Python/lldbsuite/test/lang/cpp/accelerator-table/TestCPPAccelerator.py b/lldb/packages/Python/lldbsuite/test/lang/cpp/accelerator-table/TestCPPAccelerator.py new file mode 100644 index 0000000000000..3705e95c60034 --- /dev/null +++ b/lldb/packages/Python/lldbsuite/test/lang/cpp/accelerator-table/TestCPPAccelerator.py @@ -0,0 +1,31 @@ +import lldb +from lldbsuite.test.decorators import * +from lldbsuite.test.lldbtest import * +from lldbsuite.test import lldbutil + + +class CPPAcceleratorTableTestCase(TestBase): + + mydir = TestBase.compute_mydir(__file__) + + @skipUnlessDarwin + @skipIf(debug_info=no_match(["dwarf"])) + def test(self): + """Test that type lookups fail early (performance)""" + self.build() + logfile = self.getBuildArtifact('dwarf.log') + self.expect('log enable dwarf lookups -f' + logfile) + target, process, thread, bkpt = lldbutil.run_to_source_breakpoint( + self, 'break here', lldb.SBFileSpec('main.cpp')) + # Pick one from the middle of the list to have a high chance + # of it not being in the first file looked at. + self.expect('frame variable inner_d') + + log = open(logfile, 'r') + n = 0 + for line in log: + if re.findall(r'[abcdefg]\.o: FindByNameAndTag\(\)', line): + self.assertTrue("d.o" in line) + n += 1 + + self.assertEqual(n, 1, log) diff --git a/lldb/packages/Python/lldbsuite/test/lang/cpp/accelerator-table/a.cpp b/lldb/packages/Python/lldbsuite/test/lang/cpp/accelerator-table/a.cpp new file mode 100644 index 0000000000000..d9f758e199138 --- /dev/null +++ b/lldb/packages/Python/lldbsuite/test/lang/cpp/accelerator-table/a.cpp @@ -0,0 +1,2 @@ +#include "source.h" +CLASS(A) diff --git a/lldb/packages/Python/lldbsuite/test/lang/cpp/accelerator-table/b.cpp b/lldb/packages/Python/lldbsuite/test/lang/cpp/accelerator-table/b.cpp new file mode 100644 index 0000000000000..a0cdffa14f17c --- /dev/null +++ b/lldb/packages/Python/lldbsuite/test/lang/cpp/accelerator-table/b.cpp @@ -0,0 +1,2 @@ +#include "source.h" +CLASS(B) diff --git a/lldb/packages/Python/lldbsuite/test/lang/cpp/accelerator-table/c.cpp b/lldb/packages/Python/lldbsuite/test/lang/cpp/accelerator-table/c.cpp new file mode 100644 index 0000000000000..1bd7172b771e1 --- /dev/null +++ b/lldb/packages/Python/lldbsuite/test/lang/cpp/accelerator-table/c.cpp @@ -0,0 +1,2 @@ +#include "source.h" +CLASS(C) diff --git a/lldb/packages/Python/lldbsuite/test/lang/cpp/accelerator-table/d.cpp b/lldb/packages/Python/lldbsuite/test/lang/cpp/accelerator-table/d.cpp new file mode 100644 index 0000000000000..e43c2ad05aafa --- /dev/null +++ b/lldb/packages/Python/lldbsuite/test/lang/cpp/accelerator-table/d.cpp @@ -0,0 +1,2 @@ +#include "source.h" +CLASS(D) diff --git a/lldb/packages/Python/lldbsuite/test/lang/cpp/accelerator-table/e.cpp b/lldb/packages/Python/lldbsuite/test/lang/cpp/accelerator-table/e.cpp new file mode 100644 index 0000000000000..a3008f71f653e --- /dev/null +++ b/lldb/packages/Python/lldbsuite/test/lang/cpp/accelerator-table/e.cpp @@ -0,0 +1,2 @@ +#include "source.h" +CLASS(E) diff --git a/lldb/packages/Python/lldbsuite/test/lang/cpp/accelerator-table/f.cpp b/lldb/packages/Python/lldbsuite/test/lang/cpp/accelerator-table/f.cpp new file mode 100644 index 0000000000000..77df296183e89 --- /dev/null +++ b/lldb/packages/Python/lldbsuite/test/lang/cpp/accelerator-table/f.cpp @@ -0,0 +1,2 @@ +#include "source.h" +CLASS(F) diff --git a/lldb/packages/Python/lldbsuite/test/lang/cpp/accelerator-table/g.cpp b/lldb/packages/Python/lldbsuite/test/lang/cpp/accelerator-table/g.cpp new file mode 100644 index 0000000000000..e1446918891cf --- /dev/null +++ b/lldb/packages/Python/lldbsuite/test/lang/cpp/accelerator-table/g.cpp @@ -0,0 +1,2 @@ +#include "source.h" +CLASS(G) diff --git a/lldb/packages/Python/lldbsuite/test/lang/cpp/accelerator-table/main.cpp b/lldb/packages/Python/lldbsuite/test/lang/cpp/accelerator-table/main.cpp new file mode 100644 index 0000000000000..b7eb252bad83c --- /dev/null +++ b/lldb/packages/Python/lldbsuite/test/lang/cpp/accelerator-table/main.cpp @@ -0,0 +1,28 @@ +#define CLASS(NAME) \ + class NAME { \ + public: \ + struct Inner; \ + Inner *i = nullptr; \ + }; \ +NAME::Inner &getInner##NAME(); + +CLASS(A) +CLASS(B) +CLASS(C) +CLASS(D) +CLASS(E) +CLASS(F) +CLASS(G) + +int main() +{ + A::Inner &inner_a = getInnerA(); + B::Inner &inner_b = getInnerB(); + C::Inner &inner_c = getInnerC(); + D::Inner &inner_d = getInnerD(); + E::Inner &inner_e = getInnerE(); + F::Inner &inner_f = getInnerF(); + G::Inner &inner_g = getInnerG(); + + return 0; // break here +} diff --git a/lldb/packages/Python/lldbsuite/test/lang/cpp/accelerator-table/source.h b/lldb/packages/Python/lldbsuite/test/lang/cpp/accelerator-table/source.h new file mode 100644 index 0000000000000..214e7dada2e96 --- /dev/null +++ b/lldb/packages/Python/lldbsuite/test/lang/cpp/accelerator-table/source.h @@ -0,0 +1,12 @@ +#define CLASS(NAME) \ + class NAME { \ + public: \ + class Inner { \ + int j = #NAME[0]; \ + }; \ + Inner *i = nullptr; \ + }; \ + \ + static NAME::Inner inner; \ + static NAME obj; \ + NAME::Inner &getInner##NAME() { return inner; } diff --git a/lldb/packages/Python/lldbsuite/test/lldbplatformutil.py b/lldb/packages/Python/lldbsuite/test/lldbplatformutil.py index 6e382d28c1edb..56b9febbd7297 100644 --- a/lldb/packages/Python/lldbsuite/test/lldbplatformutil.py +++ b/lldb/packages/Python/lldbsuite/test/lldbplatformutil.py @@ -17,6 +17,7 @@ # LLDB modules from . import configuration import lldb +import lldbsuite.test.lldbplatform as lldbplatform def check_first_register_readable(test_case): @@ -145,6 +146,9 @@ def findMainThreadCheckerDylib(): if not platformIsDarwin(): return "" + if getPlatform() in lldbplatform.translate(lldbplatform.darwin_embedded): + return "/Developer/usr/lib/libMainThreadChecker.dylib" + with os.popen('xcode-select -p') as output: xcode_developer_path = output.read().strip() mtc_dylib_path = '%s/usr/lib/libMainThreadChecker.dylib' % xcode_developer_path diff --git a/lldb/packages/Python/lldbsuite/test/lldbtest.py b/lldb/packages/Python/lldbsuite/test/lldbtest.py index 313823b47f601..34e6aa8f460d4 100644 --- a/lldb/packages/Python/lldbsuite/test/lldbtest.py +++ b/lldb/packages/Python/lldbsuite/test/lldbtest.py @@ -696,7 +696,7 @@ def setUpCommands(cls): "settings set plugin.process.gdb-remote.packet-timeout 60", 'settings set symbols.clang-modules-cache-path "{}"'.format( - configuration.module_cache_dir), + configuration.lldb_module_cache_dir), "settings set use-color false", ] # Make sure that a sanitizer LLDB's environment doesn't get passed on. diff --git a/lldb/packages/Python/lldbsuite/test/macosx/indirect_symbol/Makefile b/lldb/packages/Python/lldbsuite/test/macosx/indirect_symbol/Makefile index 5bea8dc4ab515..929ed58f75757 100644 --- a/lldb/packages/Python/lldbsuite/test/macosx/indirect_symbol/Makefile +++ b/lldb/packages/Python/lldbsuite/test/macosx/indirect_symbol/Makefile @@ -1,38 +1,16 @@ -include Makefile.rules - -LIB_PREFIX := lib - -ifeq "$(OS)" "Darwin" - CFLAGS += -arch $(ARCH) - DS := dsymutil - LD_FLAGS := -dynamiclib - LIB_INDIRECT := $(LIB_PREFIX)indirect.dylib - LIB_REEXPORT := $(LIB_PREFIX)reexport.dylib - EXEC_PATH := "@executable_path" - EXEC_PATH_INDIRECT := -install_name $(EXEC_PATH)/$(LIB_INDIRECT) - EXEC_PATH_REEXPORT := -install_name $(EXEC_PATH)/$(LIB_REEXPORT) -endif - -all: a.out $(LIB_INDIRECT) $(LIB_REEXPORT) +C_SOURCES := main.c +LD_EXTRAS := -L. -lindirect -lreexport -a.out: main.o $(LIB_INDIRECT) $(LIB_REEXPORT) - $(CC) $(CFLAGS) -o a.out main.o -L. $(LIB_INDIRECT) $(LIB_REEXPORT) +.PHONY: build-libindirect build-libreepxoprt +all: build-libindirect build-libreepxoprt a.out -main.o: $(SRCDIR)/main.c - $(CC) $(CFLAGS) -c $(SRCDIR)/main.c - -$(LIB_INDIRECT): indirect.o - $(CC) $(CFLAGS) $(LD_FLAGS) $(EXEC_PATH_INDIRECT) -o $(LIB_INDIRECT) indirect.o - if [ "$(OS)" = "Darwin" ]; then dsymutil $(LIB_INDIRECT); fi - -indirect.o: $(SRCDIR)/indirect.c - $(CC) $(CFLAGS) -c $(SRCDIR)/indirect.c +include Makefile.rules -$(LIB_REEXPORT): reexport.o $(LIB_INDIRECT) - $(CC) $(CFLAGS) $(LD_FLAGS) $(EXEC_PATH_REEXPORT) -o $(LIB_REEXPORT) reexport.o -L. -lindirect -Wl,-alias_list,$(SRCDIR)/alias.list - if [ "$(OS)" = "Darwin" ]; then dsymutil $(LIB_REEXPORT); fi +build-libindirect: indirect.c + $(MAKE) -f $(MAKEFILE_RULES) \ + DYLIB_C_SOURCES=indirect.c DYLIB_NAME=indirect DYLIB_ONLY=YES -reexport.o: $(SRCDIR)/reexport.c - $(CC) $(CFLAGS) -c $(SRCDIR)/reexport.c -clean:: - rm -rf $(wildcard *.o *~ *.dylib *.so a.out *.dSYM) +build-libreepxoprt: reexport.c + $(MAKE) -f $(MAKEFILE_RULES) \ + DYLIB_C_SOURCES=reexport.c DYLIB_NAME=reexport DYLIB_ONLY=YES \ + LD_EXTRAS="-L. -lindirect -Wl,-alias_list,$(SRCDIR)/alias.list" diff --git a/lldb/packages/Python/lldbsuite/test/macosx/indirect_symbol/TestIndirectSymbols.py b/lldb/packages/Python/lldbsuite/test/macosx/indirect_symbol/TestIndirectSymbols.py index 4aa0681f8dc9a..e529db11a0c20 100644 --- a/lldb/packages/Python/lldbsuite/test/macosx/indirect_symbol/TestIndirectSymbols.py +++ b/lldb/packages/Python/lldbsuite/test/macosx/indirect_symbol/TestIndirectSymbols.py @@ -20,6 +20,7 @@ def setUp(self): self.main_source = "main.c" @skipUnlessDarwin + @expectedFailureAll(oslist=no_match(["macosx"]), bugnumber="rdar://55952764") @add_test_categories(['pyapi']) def test_with_python_api(self): """Test stepping and setting breakpoints in indirect and re-exported symbols.""" @@ -61,8 +62,7 @@ def test_with_python_api(self): # indirect function. thread.StepInto() curr_function = thread.GetFrameAtIndex(0).GetFunctionName() - self.assertTrue( - curr_function == "call_through_indirect_hidden", + self.assertEqual(curr_function, "call_through_indirect_hidden", "Stepped into indirect symbols.") # Now set a breakpoint using the indirect symbol name, and make sure we diff --git a/lldb/packages/Python/lldbsuite/test/macosx/macabi/Makefile b/lldb/packages/Python/lldbsuite/test/macosx/macabi/Makefile index 286c77452c577..61e3337e1d9ec 100644 --- a/lldb/packages/Python/lldbsuite/test/macosx/macabi/Makefile +++ b/lldb/packages/Python/lldbsuite/test/macosx/macabi/Makefile @@ -8,7 +8,7 @@ CFLAGS_EXTRAS := -target $(TRIPLE) all: libfoo.dylib a.out -libfoo.dylib: foo.c \ +libfoo.dylib: foo.c $(MAKE) -f $(MAKEFILE_RULES) \ DYLIB_ONLY=YES DYLIB_NAME=foo DYLIB_C_SOURCES=foo.c diff --git a/lldb/packages/Python/lldbsuite/test/make/Makefile.rules b/lldb/packages/Python/lldbsuite/test/make/Makefile.rules index 0a0ac3f2da89c..2ad4a486c7439 100644 --- a/lldb/packages/Python/lldbsuite/test/make/Makefile.rules +++ b/lldb/packages/Python/lldbsuite/test/make/Makefile.rules @@ -171,15 +171,8 @@ endif # o cxx_compiler # o cxx_linker #---------------------------------------------------------------------- -CC ?= clang -ifeq "$(CC)" "cc" - ifneq "$(shell which clang)" "" - CC = clang - else ifneq "$(shell which clang-3.5)" "" - CC = clang-3.5 - else ifneq "$(shell which gcc)" "" - CC = gcc - endif +ifeq "$(CC)" "" +$(error "C compiler is not specified. Please run tests through lldb-dotest or lit") endif #---------------------------------------------------------------------- @@ -313,14 +306,6 @@ ifeq "$(MAKE_DWO)" "YES" CFLAGS += -gsplit-dwarf endif -# Use a shared module cache when building in the default test build directory. -CLANG_MODULE_CACHE_DIR := $(shell echo "$(BUILDDIR)" | sed $(QUOTE)s/lldb-test-build.noindex.*/lldb-test-build.noindex\/module-cache-clang/$(QUOTE)) - -ifeq "$(findstring lldb-test-build.noindex, $(BUILDDIR))" "" -CLANG_MODULE_CACHE_DIR := $(BUILDDIR)/module-cache -$(warning failed to set the shared clang module cache dir) -endif - MODULE_BASE_FLAGS := -fmodules -gmodules -fmodules-cache-path=$(CLANG_MODULE_CACHE_DIR) MANDATORY_MODULE_BUILD_CFLAGS := $(MODULE_BASE_FLAGS) -gmodules # Build flags for building with C++ modules. @@ -609,6 +594,9 @@ ifneq "$(DYLIB_NAME)" "" ifeq "$(DYLIB_ONLY)" "" $(EXE) : $(OBJECTS) $(ARCHIVE_NAME) $(DYLIB_FILENAME) $(LD) $(OBJECTS) $(ARCHIVE_NAME) -L. -l$(DYLIB_NAME) $(LDFLAGS) -o "$(EXE)" +ifneq "$(CODESIGN)" "" + $(CODESIGN) -s - "$(EXE)" +endif else EXE = $(DYLIB_FILENAME) endif diff --git a/lldb/packages/Python/lldbsuite/test/plugins/builder_base.py b/lldb/packages/Python/lldbsuite/test/plugins/builder_base.py index e6bc9a0838097..aede03da14ca9 100644 --- a/lldb/packages/Python/lldbsuite/test/plugins/builder_base.py +++ b/lldb/packages/Python/lldbsuite/test/plugins/builder_base.py @@ -104,6 +104,33 @@ def getCCSpec(compiler): else: return "" +def getDsymutilSpec(): + """ + Helper function to return the key-value string to specify the dsymutil + used for the make system. + """ + if "DSYMUTIL" in os.environ: + return "DSYMUTIL={}".format(os.environ["DSYMUTIL"]) + return ""; + +def getSDKRootSpec(): + """ + Helper function to return the key-value string to specify the SDK root + used for the make system. + """ + if "SDKROOT" in os.environ: + return "SDKROOT={}".format(os.environ["SDKROOT"]) + return ""; + +def getModuleCacheSpec(): + """ + Helper function to return the key-value string to specify the clang + module cache used for the make system. + """ + if "CLANG_MODULE_CACHE_DIR" in os.environ: + return "CLANG_MODULE_CACHE_DIR={}".format( + os.environ["CLANG_MODULE_CACHE_DIR"]) + return ""; def getCmdLine(d): """ @@ -145,8 +172,14 @@ def buildDefault( testname=None): """Build the binaries the default way.""" commands = [] - commands.append(getMake(testdir, testname) + ["all", getArchSpec(architecture), - getCCSpec(compiler), getCmdLine(dictionary)]) + commands.append(getMake(testdir, testname) + + ["all", + getArchSpec(architecture), + getCCSpec(compiler), + getDsymutilSpec(), + getSDKRootSpec(), + getModuleCacheSpec(), + getCmdLine(dictionary)]) runBuildCommands(commands, sender=sender) @@ -164,8 +197,13 @@ def buildDwarf( """Build the binaries with dwarf debug info.""" commands = [] commands.append(getMake(testdir, testname) + - ["MAKE_DSYM=NO", getArchSpec(architecture), - getCCSpec(compiler), getCmdLine(dictionary)]) + ["MAKE_DSYM=NO", + getArchSpec(architecture), + getCCSpec(compiler), + getDsymutilSpec(), + getSDKRootSpec(), + getModuleCacheSpec(), + getCmdLine(dictionary)]) runBuildCommands(commands, sender=sender) # True signifies that we can handle building dwarf. @@ -182,9 +220,13 @@ def buildDwo( """Build the binaries with dwarf debug info.""" commands = [] commands.append(getMake(testdir, testname) + - ["MAKE_DSYM=NO", "MAKE_DWO=YES", + ["MAKE_DSYM=NO", + "MAKE_DWO=YES", getArchSpec(architecture), getCCSpec(compiler), + getDsymutilSpec(), + getSDKRootSpec(), + getModuleCacheSpec(), getCmdLine(dictionary)]) runBuildCommands(commands, sender=sender) @@ -206,6 +248,9 @@ def buildGModules( "MAKE_GMODULES=YES", getArchSpec(architecture), getCCSpec(compiler), + getDsymutilSpec(), + getSDKRootSpec(), + getModuleCacheSpec(), getCmdLine(dictionary)]) lldbtest.system(commands, sender=sender) diff --git a/lldb/packages/Python/lldbsuite/test/python_api/file_handle/TestFileHandle.py b/lldb/packages/Python/lldbsuite/test/python_api/file_handle/TestFileHandle.py index 3231fdfe6887f..a977ec959658a 100644 --- a/lldb/packages/Python/lldbsuite/test/python_api/file_handle/TestFileHandle.py +++ b/lldb/packages/Python/lldbsuite/test/python_api/file_handle/TestFileHandle.py @@ -12,9 +12,53 @@ import lldb from lldbsuite.test import lldbtest -from lldbsuite.test.decorators import ( - add_test_categories, no_debug_info_test, skipIf) - +from lldbsuite.test.decorators import * + +class OhNoe(Exception): + pass + +class BadIO(io.TextIOBase): + @property + def closed(self): + return False + def writable(self): + return True + def readable(self): + return True + def write(self, s): + raise OhNoe('OH NOE') + def read(self, n): + raise OhNoe("OH NOE") + def flush(self): + raise OhNoe('OH NOE') + +# This class will raise an exception while it's being +# converted into a C++ object by swig +class ReallyBadIO(io.TextIOBase): + def fileno(self): + return 999 + def writable(self): + raise OhNoe("OH NOE!!!") + +class MutableBool(): + def __init__(self, value): + self.value = value + def set(self, value): + self.value = bool(value) + def __bool__(self): + return self.value + +class FlushTestIO(io.StringIO): + def __init__(self, mutable_flushed, mutable_closed): + super(FlushTestIO, self).__init__() + self.mut_flushed = mutable_flushed + self.mut_closed = mutable_closed + def close(self): + self.mut_closed.set(True) + return super(FlushTestIO, self).close() + def flush(self): + self.mut_flushed.set(True) + return super(FlushTestIO, self).flush() @contextmanager def replace_stdout(new): @@ -36,6 +80,7 @@ def i(): class FileHandleTestCase(lldbtest.TestBase): + NO_DEBUG_INFO_TESTCASE = True mydir = lldbtest.Base.compute_mydir(__file__) # The way this class interacts with the debugger is different @@ -84,7 +129,8 @@ def handleCmd(self, cmd, check=True, collect_result=True): @add_test_categories(['pyapi']) - @no_debug_info_test + @skipIfWindows # FIXME pre-existing bug, should be fixed + # when we delete the FILE* typemaps. def test_legacy_file_out_script(self): with open(self.out_filename, 'w') as f: self.debugger.SetOutputFileHandle(f, False) @@ -100,7 +146,6 @@ def test_legacy_file_out_script(self): @add_test_categories(['pyapi']) - @no_debug_info_test def test_legacy_file_out(self): with open(self.out_filename, 'w') as f: self.debugger.SetOutputFileHandle(f, False) @@ -110,12 +155,15 @@ def test_legacy_file_out(self): self.assertIn('deadbeef', f.read()) @add_test_categories(['pyapi']) - @no_debug_info_test + @skipIfWindows # FIXME pre-existing bug, should be fixed + # when we delete the FILE* typemaps. def test_legacy_file_err_with_get(self): with open(self.out_filename, 'w') as f: self.debugger.SetErrorFileHandle(f, False) self.handleCmd('lolwut', check=False, collect_result=False) - self.debugger.GetErrorFileHandle().write('FOOBAR\n') + f2 = self.debugger.GetErrorFileHandle() + f2.write('FOOBAR\n') + f2.flush() lldb.SBDebugger.Destroy(self.debugger) with open(self.out_filename, 'r') as f: errors = f.read() @@ -124,7 +172,6 @@ def test_legacy_file_err_with_get(self): @add_test_categories(['pyapi']) - @no_debug_info_test def test_legacy_file_err(self): with open(self.out_filename, 'w') as f: self.debugger.SetErrorFileHandle(f, False) @@ -135,7 +182,16 @@ def test_legacy_file_err(self): @add_test_categories(['pyapi']) - @no_debug_info_test + def test_legacy_file_error(self): + debugger = self.debugger + with open(self.out_filename, 'w') as f: + debugger.SetErrorFileHandle(f, False) + self.handleCmd('lolwut', check=False, collect_result=False) + with open(self.out_filename, 'r') as f: + errors = f.read() + self.assertTrue(re.search(r'error:.*lolwut', errors)) + + @add_test_categories(['pyapi']) def test_sbfile_type_errors(self): sbf = lldb.SBFile() self.assertRaises(TypeError, sbf.Write, None) @@ -146,8 +202,7 @@ def test_sbfile_type_errors(self): @add_test_categories(['pyapi']) - @no_debug_info_test - def test_sbfile_write(self): + def test_sbfile_write_fileno(self): with open(self.out_filename, 'w') as f: sbf = lldb.SBFile(f.fileno(), "w", False) self.assertTrue(sbf.IsValid()) @@ -161,8 +216,20 @@ def test_sbfile_write(self): @add_test_categories(['pyapi']) - @no_debug_info_test - def test_sbfile_read(self): + def test_sbfile_write(self): + with open(self.out_filename, 'w') as f: + sbf = lldb.SBFile(f) + e, n = sbf.Write(b'FOO\n') + self.assertTrue(e.Success()) + self.assertEqual(n, 4) + sbf.Close() + self.assertTrue(f.closed) + with open(self.out_filename, 'r') as f: + self.assertEqual(f.read().strip(), 'FOO') + + + @add_test_categories(['pyapi']) + def test_sbfile_read_fileno(self): with open(self.out_filename, 'w') as f: f.write('FOO') with open(self.out_filename, 'r') as f: @@ -175,7 +242,21 @@ def test_sbfile_read(self): @add_test_categories(['pyapi']) - @no_debug_info_test + def test_sbfile_read(self): + with open(self.out_filename, 'w') as f: + f.write('foo') + with open(self.out_filename, 'r') as f: + sbf = lldb.SBFile(f) + buf = bytearray(100) + e, n = sbf.Read(buf) + self.assertTrue(e.Success()) + self.assertEqual(n, 3) + self.assertEqual(buf[:n], b'foo') + sbf.Close() + self.assertTrue(f.closed) + + + @add_test_categories(['pyapi']) def test_fileno_out(self): with open(self.out_filename, 'w') as f: sbf = lldb.SBFile(f.fileno(), "w", False) @@ -189,7 +270,6 @@ def test_fileno_out(self): @add_test_categories(['pyapi']) - @no_debug_info_test def test_fileno_help(self): with open(self.out_filename, 'w') as f: sbf = lldb.SBFile(f.fileno(), "w", False) @@ -201,7 +281,17 @@ def test_fileno_help(self): @add_test_categories(['pyapi']) - @no_debug_info_test + def test_help(self): + debugger = self.debugger + with open(self.out_filename, 'w') as f: + status = debugger.SetOutputFile(lldb.SBFile(f)) + self.assertTrue(status.Success()) + self.handleCmd("help help", check=False, collect_result=False) + with open(self.out_filename, 'r') as f: + self.assertIn('Show a list of all debugger commands', f.read()) + + + @add_test_categories(['pyapi']) def test_immediate(self): with open(self.out_filename, 'w') as f: ret = lldb.SBCommandReturnObject() @@ -210,9 +300,7 @@ def test_immediate(self): interpreter.HandleCommand("help help", ret) # make sure the file wasn't closed early. f.write("\nQUUX\n") - ret = None # call destructor and flush streams - with open(self.out_filename, 'r') as f: output = f.read() self.assertTrue(re.search(r'Show a list of all debugger commands', output)) @@ -220,7 +308,37 @@ def test_immediate(self): @add_test_categories(['pyapi']) - @no_debug_info_test + @skipIf(py_version=['<', (3,)]) + def test_immediate_string(self): + f = io.StringIO() + ret = lldb.SBCommandReturnObject() + ret.SetImmediateOutputFile(f) + interpreter = self.debugger.GetCommandInterpreter() + interpreter.HandleCommand("help help", ret) + # make sure the file wasn't closed early. + f.write("\nQUUX\n") + ret = None # call destructor and flush streams + output = f.getvalue() + self.assertTrue(re.search(r'Show a list of all debugger commands', output)) + self.assertTrue(re.search(r'QUUX', output)) + + + @add_test_categories(['pyapi']) + @skipIf(py_version=['<', (3,)]) + def test_immediate_sbfile_string(self): + f = io.StringIO() + ret = lldb.SBCommandReturnObject() + ret.SetImmediateOutputFile(lldb.SBFile(f)) + interpreter = self.debugger.GetCommandInterpreter() + interpreter.HandleCommand("help help", ret) + output = f.getvalue() + ret = None # call destructor and flush streams + # sbfile default constructor doesn't borrow the file + self.assertTrue(f.closed) + self.assertTrue(re.search(r'Show a list of all debugger commands', output)) + + + @add_test_categories(['pyapi']) def test_fileno_inout(self): with open(self.in_filename, 'w') as f: f.write("help help\n") @@ -244,7 +362,76 @@ def test_fileno_inout(self): @add_test_categories(['pyapi']) - @no_debug_info_test + def test_inout(self): + with open(self.in_filename, 'w') as f: + f.write("help help\n") + with open(self.out_filename, 'w') as outf, \ + open(self.in_filename, 'r') as inf: + status = self.debugger.SetOutputFile(lldb.SBFile(outf)) + self.assertTrue(status.Success()) + status = self.debugger.SetInputFile(lldb.SBFile(inf)) + self.assertTrue(status.Success()) + opts = lldb.SBCommandInterpreterRunOptions() + self.debugger.RunCommandInterpreter(True, False, opts, 0, False, False) + self.debugger.GetOutputFile().Flush() + with open(self.out_filename, 'r') as f: + output = f.read() + self.assertIn('Show a list of all debugger commands', output) + + + @add_test_categories(['pyapi']) + def test_binary_inout(self): + debugger = self.debugger + with open(self.in_filename, 'w') as f: + f.write("help help\n") + with open(self.out_filename, 'wb') as outf, \ + open(self.in_filename, 'rb') as inf: + status = debugger.SetOutputFile(lldb.SBFile(outf)) + self.assertTrue(status.Success()) + status = debugger.SetInputFile(lldb.SBFile(inf)) + self.assertTrue(status.Success()) + opts = lldb.SBCommandInterpreterRunOptions() + debugger.RunCommandInterpreter(True, False, opts, 0, False, False) + debugger.GetOutputFile().Flush() + with open(self.out_filename, 'r') as f: + output = f.read() + self.assertIn('Show a list of all debugger commands', output) + + + @add_test_categories(['pyapi']) + @expectedFailureAll() # FIXME IOHandler still using FILE* + def test_string_inout(self): + inf = io.StringIO("help help\n") + outf = io.StringIO() + status = self.debugger.SetOutputFile(lldb.SBFile(outf)) + self.assertTrue(status.Success()) + status = self.debugger.SetInputFile(lldb.SBFile(inf)) + self.assertTrue(status.Success()) + opts = lldb.SBCommandInterpreterRunOptions() + self.debugger.RunCommandInterpreter(True, False, opts, 0, False, False) + self.debugger.GetOutputFile().Flush() + output = outf.getvalue() + self.assertIn('Show a list of all debugger commands', output) + + + @add_test_categories(['pyapi']) + @expectedFailureAll() # FIXME IOHandler still using FILE* + def test_bytes_inout(self): + inf = io.BytesIO(b"help help\nhelp b\n") + outf = io.BytesIO() + status = self.debugger.SetOutputFile(lldb.SBFile(outf)) + self.assertTrue(status.Success()) + status = self.debugger.SetInputFile(lldb.SBFile(inf)) + self.assertTrue(status.Success()) + opts = lldb.SBCommandInterpreterRunOptions() + self.debugger.RunCommandInterpreter(True, False, opts, 0, False, False) + self.debugger.GetOutputFile().Flush() + output = outf.getvalue() + self.assertIn(b'Show a list of all debugger commands', output) + self.assertIn(b'Set a breakpoint', output) + + + @add_test_categories(['pyapi']) def test_fileno_error(self): with open(self.out_filename, 'w') as f: @@ -263,7 +450,6 @@ def test_fileno_error(self): #FIXME This shouldn't fail for python2 either. @add_test_categories(['pyapi']) - @no_debug_info_test @skipIf(py_version=['<', (3,)]) def test_replace_stdout(self): f = io.StringIO() @@ -272,3 +458,370 @@ def test_replace_stdout(self): self.handleCmd('script sys.stdout.write("lol")', collect_result=False, check=False) self.assertEqual(sys.stdout, f) + + + @add_test_categories(['pyapi']) + @expectedFailureAll() #FIXME bug in ScriptInterpreterPython + def test_replace_stdout_with_nonfile(self): + debugger = self.debugger + f = io.StringIO() + with replace_stdout(f): + class Nothing(): + pass + with replace_stdout(Nothing): + self.assertEqual(sys.stdout, Nothing) + self.handleCmd('script sys.stdout.write("lol")', + check=False, collect_result=False) + self.assertEqual(sys.stdout, Nothing) + sys.stdout.write(u"FOO") + self.assertEqual(f.getvalue(), "FOO") + + + @add_test_categories(['pyapi']) + def test_sbfile_write_borrowed(self): + with open(self.out_filename, 'w') as f: + sbf = lldb.SBFile.Create(f, borrow=True) + e, n = sbf.Write(b'FOO') + self.assertTrue(e.Success()) + self.assertEqual(n, 3) + sbf.Close() + self.assertFalse(f.closed) + f.write('BAR\n') + with open(self.out_filename, 'r') as f: + self.assertEqual(f.read().strip(), 'FOOBAR') + + + + @add_test_categories(['pyapi']) + @skipIf(py_version=['<', (3,)]) + def test_sbfile_write_forced(self): + with open(self.out_filename, 'w') as f: + written = MutableBool(False) + orig_write = f.write + def mywrite(x): + written.set(True) + return orig_write(x) + f.write = mywrite + sbf = lldb.SBFile.Create(f, force_io_methods=True) + e, n = sbf.Write(b'FOO') + self.assertTrue(written) + self.assertTrue(e.Success()) + self.assertEqual(n, 3) + sbf.Close() + self.assertTrue(f.closed) + with open(self.out_filename, 'r') as f: + self.assertEqual(f.read().strip(), 'FOO') + + + @add_test_categories(['pyapi']) + @skipIf(py_version=['<', (3,)]) + def test_sbfile_write_forced_borrowed(self): + with open(self.out_filename, 'w') as f: + written = MutableBool(False) + orig_write = f.write + def mywrite(x): + written.set(True) + return orig_write(x) + f.write = mywrite + sbf = lldb.SBFile.Create(f, borrow=True, force_io_methods=True) + e, n = sbf.Write(b'FOO') + self.assertTrue(written) + self.assertTrue(e.Success()) + self.assertEqual(n, 3) + sbf.Close() + self.assertFalse(f.closed) + with open(self.out_filename, 'r') as f: + self.assertEqual(f.read().strip(), 'FOO') + + + @add_test_categories(['pyapi']) + @skipIf(py_version=['<', (3,)]) + def test_sbfile_write_string(self): + f = io.StringIO() + sbf = lldb.SBFile(f) + e, n = sbf.Write(b'FOO') + self.assertEqual(f.getvalue().strip(), "FOO") + self.assertTrue(e.Success()) + self.assertEqual(n, 3) + sbf.Close() + self.assertTrue(f.closed) + + + @add_test_categories(['pyapi']) + @skipIf(py_version=['<', (3,)]) + def test_string_out(self): + f = io.StringIO() + status = self.debugger.SetOutputFile(f) + self.assertTrue(status.Success()) + self.handleCmd("script 'foobar'") + self.assertEqual(f.getvalue().strip(), "'foobar'") + + + @add_test_categories(['pyapi']) + @skipIf(py_version=['<', (3,)]) + def test_string_error(self): + f = io.StringIO() + debugger = self.debugger + status = debugger.SetErrorFile(f) + self.assertTrue(status.Success()) + self.handleCmd('lolwut', check=False, collect_result=False) + errors = f.getvalue() + self.assertTrue(re.search(r'error:.*lolwut', errors)) + + + @add_test_categories(['pyapi']) + @skipIf(py_version=['<', (3,)]) + def test_sbfile_write_bytes(self): + f = io.BytesIO() + sbf = lldb.SBFile(f) + e, n = sbf.Write(b'FOO') + self.assertEqual(f.getvalue().strip(), b"FOO") + self.assertTrue(e.Success()) + self.assertEqual(n, 3) + sbf.Close() + self.assertTrue(f.closed) + + @add_test_categories(['pyapi']) + @skipIf(py_version=['<', (3,)]) + def test_sbfile_read_string(self): + f = io.StringIO('zork') + sbf = lldb.SBFile(f) + buf = bytearray(100) + e, n = sbf.Read(buf) + self.assertTrue(e.Success()) + self.assertEqual(buf[:n], b'zork') + + + @add_test_categories(['pyapi']) + @skipIf(py_version=['<', (3,)]) + def test_sbfile_read_string_one_byte(self): + f = io.StringIO('z') + sbf = lldb.SBFile(f) + buf = bytearray(1) + e, n = sbf.Read(buf) + self.assertTrue(e.Fail()) + self.assertEqual(n, 0) + self.assertEqual(e.GetCString(), "can't read less than 6 bytes from a utf8 text stream") + + + @add_test_categories(['pyapi']) + @skipIf(py_version=['<', (3,)]) + def test_sbfile_read_bytes(self): + f = io.BytesIO(b'zork') + sbf = lldb.SBFile(f) + buf = bytearray(100) + e, n = sbf.Read(buf) + self.assertTrue(e.Success()) + self.assertEqual(buf[:n], b'zork') + + + @add_test_categories(['pyapi']) + @skipIf(py_version=['<', (3,)]) + def test_sbfile_out(self): + with open(self.out_filename, 'w') as f: + sbf = lldb.SBFile(f) + status = self.debugger.SetOutputFile(sbf) + self.assertTrue(status.Success()) + self.handleCmd('script 2+2') + with open(self.out_filename, 'r') as f: + self.assertEqual(f.read().strip(), '4') + + + @add_test_categories(['pyapi']) + @skipIf(py_version=['<', (3,)]) + def test_file_out(self): + with open(self.out_filename, 'w') as f: + status = self.debugger.SetOutputFile(f) + self.assertTrue(status.Success()) + self.handleCmd('script 2+2') + with open(self.out_filename, 'r') as f: + self.assertEqual(f.read().strip(), '4') + + + @add_test_categories(['pyapi']) + def test_sbfile_error(self): + with open(self.out_filename, 'w') as f: + sbf = lldb.SBFile(f) + status = self.debugger.SetErrorFile(sbf) + self.assertTrue(status.Success()) + self.handleCmd('lolwut', check=False, collect_result=False) + with open(self.out_filename, 'r') as f: + errors = f.read() + self.assertTrue(re.search(r'error:.*lolwut', errors)) + + + @add_test_categories(['pyapi']) + def test_file_error(self): + with open(self.out_filename, 'w') as f: + status = self.debugger.SetErrorFile(f) + self.assertTrue(status.Success()) + self.handleCmd('lolwut', check=False, collect_result=False) + with open(self.out_filename, 'r') as f: + errors = f.read() + self.assertTrue(re.search(r'error:.*lolwut', errors)) + + + @add_test_categories(['pyapi']) + def test_exceptions(self): + self.assertRaises(Exception, lldb.SBFile, None) + self.assertRaises(Exception, lldb.SBFile, "ham sandwich") + if sys.version_info[0] < 3: + self.assertRaises(Exception, lldb.SBFile, ReallyBadIO()) + else: + self.assertRaises(OhNoe, lldb.SBFile, ReallyBadIO()) + error, n = lldb.SBFile(BadIO()).Write(b"FOO") + self.assertEqual(n, 0) + self.assertTrue(error.Fail()) + self.assertIn('OH NOE', error.GetCString()) + error, n = lldb.SBFile(BadIO()).Read(bytearray(100)) + self.assertEqual(n, 0) + self.assertTrue(error.Fail()) + self.assertIn('OH NOE', error.GetCString()) + + + @add_test_categories(['pyapi']) + @skipIf(py_version=['<', (3,)]) + def test_exceptions_logged(self): + messages = list() + self.debugger.SetLoggingCallback(messages.append) + self.handleCmd('log enable lldb script') + self.debugger.SetOutputFile(lldb.SBFile(BadIO())) + self.handleCmd('script 1+1') + self.assertTrue(any('OH NOE' in msg for msg in messages)) + + + @add_test_categories(['pyapi']) + @skipIf(py_version=['<', (3,)]) + def test_flush(self): + flushed = MutableBool(False) + closed = MutableBool(False) + f = FlushTestIO(flushed, closed) + self.assertFalse(flushed) + self.assertFalse(closed) + sbf = lldb.SBFile(f) + self.assertFalse(flushed) + self.assertFalse(closed) + sbf = None + self.assertFalse(flushed) + self.assertTrue(closed) + self.assertTrue(f.closed) + + flushed = MutableBool(False) + closed = MutableBool(False) + f = FlushTestIO(flushed, closed) + self.assertFalse(flushed) + self.assertFalse(closed) + sbf = lldb.SBFile.Create(f, borrow=True) + self.assertFalse(flushed) + self.assertFalse(closed) + sbf = None + self.assertTrue(flushed) + self.assertFalse(closed) + self.assertFalse(f.closed) + + + @add_test_categories(['pyapi']) + def test_fileno_flush(self): + with open(self.out_filename, 'w') as f: + f.write("foo") + sbf = lldb.SBFile(f) + sbf.Write(b'bar') + sbf = None + self.assertTrue(f.closed) + with open(self.out_filename, 'r') as f: + self.assertEqual(f.read(), 'foobar') + + with open(self.out_filename, 'w+') as f: + f.write("foo") + sbf = lldb.SBFile.Create(f, borrow=True) + sbf.Write(b'bar') + sbf = None + self.assertFalse(f.closed) + f.seek(0) + self.assertEqual(f.read(), 'foobar') + + + @add_test_categories(['pyapi']) + def test_close(self): + debugger = self.debugger + with open(self.out_filename, 'w') as f: + status = debugger.SetOutputFile(f) + self.assertTrue(status.Success()) + self.handleCmd("help help", check=False, collect_result=False) + # make sure the file wasn't closed early. + f.write("\nZAP\n") + lldb.SBDebugger.Destroy(debugger) + # check that output file was closed when debugger was destroyed. + with self.assertRaises(ValueError): + f.write("\nQUUX\n") + with open(self.out_filename, 'r') as f: + output = f.read() + self.assertTrue(re.search(r'Show a list of all debugger commands', output)) + self.assertTrue(re.search(r'ZAP', output)) + + + @add_test_categories(['pyapi']) + @skipIf(py_version=['<', (3,)]) + def test_stdout(self): + f = io.StringIO() + status = self.debugger.SetOutputFile(f) + self.assertTrue(status.Success()) + self.handleCmd(r"script sys.stdout.write('foobar\n')") + self.assertEqual(f.getvalue().strip().split(), ["foobar", "7"]) + + + @add_test_categories(['pyapi']) + @expectedFailureAll() # FIXME implement SBFile::GetFile + @skipIf(py_version=['<', (3,)]) + def test_identity(self): + + f = io.StringIO() + sbf = lldb.SBFile(f) + self.assertTrue(f is sbf.GetFile()) + sbf.Close() + self.assertTrue(f.closed) + + f = io.StringIO() + sbf = lldb.SBFile.Create(f, borrow=True) + self.assertTrue(f is sbf.GetFile()) + sbf.Close() + self.assertFalse(f.closed) + + with open(self.out_filename, 'w') as f: + sbf = lldb.SBFile(f) + self.assertTrue(f is sbf.GetFile()) + sbf.Close() + self.assertTrue(f.closed) + + with open(self.out_filename, 'w') as f: + sbf = lldb.SBFile.Create(f, borrow=True) + self.assertFalse(f is sbf.GetFile()) + sbf.Write(b"foobar\n") + self.assertEqual(f.fileno(), sbf.GetFile().fileno()) + sbf.Close() + self.assertFalse(f.closed) + + with open(self.out_filename, 'r') as f: + self.assertEqual("foobar", f.read().strip()) + + with open(self.out_filename, 'wb') as f: + sbf = lldb.SBFile.Create(f, borrow=True, force_io_methods=True) + self.assertTrue(f is sbf.GetFile()) + sbf.Write(b"foobar\n") + self.assertEqual(f.fileno(), sbf.GetFile().fileno()) + sbf.Close() + self.assertFalse(f.closed) + + with open(self.out_filename, 'r') as f: + self.assertEqual("foobar", f.read().strip()) + + with open(self.out_filename, 'wb') as f: + sbf = lldb.SBFile.Create(f, force_io_methods=True) + self.assertTrue(f is sbf.GetFile()) + sbf.Write(b"foobar\n") + self.assertEqual(f.fileno(), sbf.GetFile().fileno()) + sbf.Close() + self.assertTrue(f.closed) + + with open(self.out_filename, 'r') as f: + self.assertEqual("foobar", f.read().strip()) diff --git a/lldb/packages/Python/lldbsuite/test/python_api/hello_world/TestHelloWorld.py b/lldb/packages/Python/lldbsuite/test/python_api/hello_world/TestHelloWorld.py index a762dc97e39a8..a94b19b35025c 100644 --- a/lldb/packages/Python/lldbsuite/test/python_api/hello_world/TestHelloWorld.py +++ b/lldb/packages/Python/lldbsuite/test/python_api/hello_world/TestHelloWorld.py @@ -78,17 +78,19 @@ def test_with_process_launch_api(self): @expectedFailureNetBSD def test_with_attach_to_process_with_id_api(self): """Create target, spawn a process, and attach to it with process id.""" - exe = '%s_%d'%(self.getBuildArtifact(self.testMethodName), os.getpid()) + exe = '%s_%d'%(self.testMethodName, os.getpid()) d = {'EXE': exe} self.build(dictionary=d) self.setTearDownCleanup(dictionary=d) - target = self.dbg.CreateTarget(exe) + target = self.dbg.CreateTarget(self.getBuildArtifact(exe)) # Spawn a new process token = exe+'.token' - if os.path.exists(token): - os.remove(token) - popen = self.spawnSubprocess(exe, [token]) + if not lldb.remote_platform: + token = self.getBuildArtifact(token) + if os.path.exists(token): + os.remove(token) + popen = self.spawnSubprocess(self.getBuildArtifact(exe), [token]) self.addTearDownHook(self.cleanupSubprocesses) lldbutil.wait_for_file_on_target(self, token) @@ -110,17 +112,19 @@ def test_with_attach_to_process_with_id_api(self): @expectedFailureNetBSD def test_with_attach_to_process_with_name_api(self): """Create target, spawn a process, and attach to it with process name.""" - exe = '%s_%d'%(self.getBuildArtifact(self.testMethodName), os.getpid()) + exe = '%s_%d'%(self.testMethodName, os.getpid()) d = {'EXE': exe} self.build(dictionary=d) self.setTearDownCleanup(dictionary=d) - target = self.dbg.CreateTarget(exe) + target = self.dbg.CreateTarget(self.getBuildArtifact(exe)) # Spawn a new process. token = exe+'.token' - if os.path.exists(token): - os.remove(token) - popen = self.spawnSubprocess(exe, [token]) + if not lldb.remote_platform: + token = self.getBuildArtifact(token) + if os.path.exists(token): + os.remove(token) + popen = self.spawnSubprocess(self.getBuildArtifact(exe), [token]) self.addTearDownHook(self.cleanupSubprocesses) lldbutil.wait_for_file_on_target(self, token) diff --git a/lldb/packages/Python/lldbsuite/test/python_api/interpreter/TestRunCommandInterpreterAPI.py b/lldb/packages/Python/lldbsuite/test/python_api/interpreter/TestRunCommandInterpreterAPI.py index a82fd7326d869..ede40134fe462 100644 --- a/lldb/packages/Python/lldbsuite/test/python_api/interpreter/TestRunCommandInterpreterAPI.py +++ b/lldb/packages/Python/lldbsuite/test/python_api/interpreter/TestRunCommandInterpreterAPI.py @@ -5,8 +5,46 @@ from lldbsuite.test.decorators import * from lldbsuite.test.lldbtest import * +class CommandRunInterpreterLegacyAPICase(TestBase): + + NO_DEBUG_INFO_TESTCASE = True + mydir = TestBase.compute_mydir(__file__) + + def setUp(self): + TestBase.setUp(self) + + self.stdin_path = self.getBuildArtifact("stdin.txt") + + with open(self.stdin_path, 'w') as input_handle: + input_handle.write("nonexistingcommand\nquit") + + # Python will close the file descriptor if all references + # to the filehandle object lapse, so we need to keep one + # around. + self.filehandle = open(self.stdin_path, 'r') + self.dbg.SetInputFileHandle(self.filehandle, False) + + # No need to track the output + self.devnull = open(os.devnull, 'w') + self.dbg.SetOutputFileHandle(self.devnull, False) + self.dbg.SetErrorFileHandle (self.devnull, False) + + @add_test_categories(['pyapi']) + def test_run_session_with_error_and_quit(self): + """Run non-existing and quit command returns appropriate values""" + + n_errors, quit_requested, has_crashed = self.dbg.RunCommandInterpreter( + True, False, lldb.SBCommandInterpreterRunOptions(), 0, False, + False) + + self.assertGreater(n_errors, 0) + self.assertTrue(quit_requested) + self.assertFalse(has_crashed) + + class CommandRunInterpreterAPICase(TestBase): + NO_DEBUG_INFO_TESTCASE = True mydir = TestBase.compute_mydir(__file__) def setUp(self): @@ -17,13 +55,12 @@ def setUp(self): with open(self.stdin_path, 'w') as input_handle: input_handle.write("nonexistingcommand\nquit") - with open(self.stdin_path, 'r') as input_handle: - self.dbg.SetInputFileHandle(input_handle, False) + self.dbg.SetInputFile(open(self.stdin_path, 'r')) # No need to track the output devnull = open(os.devnull, 'w') - self.dbg.SetOutputFileHandle(devnull, False) - self.dbg.SetErrorFileHandle(devnull, False) + self.dbg.SetOutputFile(devnull) + self.dbg.SetErrorFile(devnull) @add_test_categories(['pyapi']) def test_run_session_with_error_and_quit(self): diff --git a/lldb/scripts/Python/finishSwigPythonLLDB.py b/lldb/scripts/Python/finishSwigPythonLLDB.py index 902ad8af5dd33..d12c833a4e44b 100644 --- a/lldb/scripts/Python/finishSwigPythonLLDB.py +++ b/lldb/scripts/Python/finishSwigPythonLLDB.py @@ -365,6 +365,7 @@ def make_symlink_native(vDictArgs, strSrc, strTarget): # Throws: None. #-- + def make_symlink( vDictArgs, vstrFrameworkPythonDir, @@ -373,13 +374,27 @@ def make_symlink( dbg = utilsDebug.CDebugFnVerbose("Python script make_symlink()") strTarget = os.path.join(vstrFrameworkPythonDir, vstrTargetFile) strTarget = os.path.normcase(strTarget) - strPrefix = vDictArgs['--prefix'] + strSrc = "" os.chdir(vstrFrameworkPythonDir) + bMakeFileCalled = "-m" in vDictArgs + eOSType = utilsOsType.determine_os_type() + if not bMakeFileCalled: + strBuildDir = os.path.join("..", "..", "..") + else: + # Resolve vstrSrcFile path relatively the build directory + if eOSType == utilsOsType.EnumOsType.Windows: + # On a Windows platform the vstrFrameworkPythonDir looks like: + # llvm\\build\\Lib\\site-packages\\lldb + strBuildDir = os.path.join("..", "..", "..") + else: + # On a UNIX style platform the vstrFrameworkPythonDir looks like: + # llvm/build/lib/python2.7/site-packages/lldb + strBuildDir = os.path.join("..", "..", "..", "..") + strSrc = os.path.normcase(os.path.join(strBuildDir, vstrSrcFile)) + + return make_symlink_native(vDictArgs, strSrc, strTarget) - strSrc = os.path.normcase(os.path.join(strPrefix, vstrSrcFile)) - strRelSrc = os.path.relpath(strSrc, os.path.dirname(strTarget)) - return make_symlink_native(vDictArgs, strRelSrc, strTarget) #++--------------------------------------------------------------------------- # Details: Make the symbolic that the script bridge for Python will need in diff --git a/lldb/scripts/Python/python-typemaps.swig b/lldb/scripts/Python/python-typemaps.swig index 77ca1156ec580..9428c7c92ff57 100644 --- a/lldb/scripts/Python/python-typemaps.swig +++ b/lldb/scripts/Python/python-typemaps.swig @@ -372,6 +372,69 @@ bool SetNumberFromPyObject(double &number, PyObject *obj) { $1 = $1 || PyCallable_Check(reinterpret_cast($input)); } + +%typemap(in) lldb::FileSP { + using namespace lldb_private; + PythonFile py_file(PyRefType::Borrowed, $input); + if (!py_file) { + PyErr_SetString(PyExc_TypeError, "not a file"); + return nullptr; + } + auto sp = unwrapOrSetPythonException(py_file.ConvertToFile()); + if (!sp) + return nullptr; + $1 = sp; +} + +%typemap(in) lldb::FileSP FORCE_IO_METHODS { + using namespace lldb_private; + PythonFile py_file(PyRefType::Borrowed, $input); + if (!py_file) { + PyErr_SetString(PyExc_TypeError, "not a file"); + return nullptr; + } + auto sp = unwrapOrSetPythonException(py_file.ConvertToFileForcingUseOfScriptingIOMethods()); + if (!sp) + return nullptr; + $1 = sp; +} + +%typemap(in) lldb::FileSP BORROWED { + using namespace lldb_private; + PythonFile py_file(PyRefType::Borrowed, $input); + if (!py_file) { + PyErr_SetString(PyExc_TypeError, "not a file"); + return nullptr; + } + auto sp = unwrapOrSetPythonException(py_file.ConvertToFile(/*borrowed=*/true)); + if (!sp) + return nullptr; + $1 = sp; +} + +%typemap(in) lldb::FileSP BORROWED_FORCE_IO_METHODS { + using namespace lldb_private; + PythonFile py_file(PyRefType::Borrowed, $input); + if (!py_file) { + PyErr_SetString(PyExc_TypeError, "not a file"); + return nullptr; + } + auto sp = unwrapOrSetPythonException(py_file.ConvertToFileForcingUseOfScriptingIOMethods(/*borrowed=*/true)); + if (!sp) + return nullptr; + $1 = sp; +} + +%typecheck(SWIG_TYPECHECK_POINTER) lldb::FileSP { + if (lldb_private::PythonFile::Check($input)) { + $1 = 1; + } else { + PyErr_Clear(); + $1 = 0; + } +} + + // FIXME both of these paths wind up calling fdopen() with no provision for ever calling // fclose() on the result. SB interfaces that use FILE* should be deprecated for scripting // use and this typemap should eventually be removed. @@ -381,19 +444,19 @@ bool SetNumberFromPyObject(double &number, PyObject *obj) { $1 = nullptr; else if (!lldb_private::PythonFile::Check($input)) { int fd = PyObject_AsFileDescriptor($input); + if (fd < 0 || PyErr_Occurred()) + return nullptr; PythonObject py_input(PyRefType::Borrowed, $input); PythonString py_mode = py_input.GetAttributeValue("mode").AsType(); - - if (-1 != fd && py_mode.IsValid()) { - FILE *f; - if ((f = fdopen(fd, py_mode.GetString().str().c_str()))) - $1 = f; - else - PyErr_SetString(PyExc_TypeError, strerror(errno)); - } else { - PyErr_SetString(PyExc_TypeError,"not a file-like object"); - return nullptr; - } + if (!py_mode.IsValid() || PyErr_Occurred()) + return nullptr; + FILE *f; + if ((f = fdopen(fd, py_mode.GetString().str().c_str()))) + $1 = f; + else { + PyErr_SetString(PyExc_TypeError, strerror(errno)); + return nullptr; + } } else { diff --git a/lldb/scripts/interface/SBCommandReturnObject.i b/lldb/scripts/interface/SBCommandReturnObject.i index 1e04a8fa1bf41..73d4001aaba59 100644 --- a/lldb/scripts/interface/SBCommandReturnObject.i +++ b/lldb/scripts/interface/SBCommandReturnObject.i @@ -49,10 +49,16 @@ public: GetError (bool if_no_immediate); size_t - PutOutput (FILE *fh); + PutOutput (lldb::SBFile file); size_t - PutError (FILE *fh); + PutError (lldb::SBFile file); + + size_t + PutOutput (lldb::FileSP BORROWED); + + size_t + PutError (lldb::FileSP BORROWED); void Clear(); @@ -85,15 +91,20 @@ public: bool GetDescription (lldb::SBStream &description); + void SetImmediateOutputFile(lldb::SBFile file); + void SetImmediateErrorFile(lldb::SBFile file); + void SetImmediateOutputFile(lldb::FileSP BORROWED); + void SetImmediateErrorFile(lldb::FileSP BORROWED); - // wrapping here so that lldb takes ownership of the - // new FILE* created inside of the swig interface %extend { - void SetImmediateOutputFile(FILE *fh) { - self->SetImmediateOutputFile(fh, true); + // transfer_ownership does nothing, and is here for compatibility with + // old scripts. Ownership is tracked by reference count in the ordinary way. + + void SetImmediateOutputFile(lldb::FileSP BORROWED, bool transfer_ownership) { + self->SetImmediateOutputFile(BORROWED); } - void SetImmediateErrorFile(FILE *fh) { - self->SetImmediateErrorFile(fh, true); + void SetImmediateErrorFile(lldb::FileSP BORROWED, bool transfer_ownership) { + self->SetImmediateErrorFile(BORROWED); } } diff --git a/lldb/scripts/interface/SBDebugger.i b/lldb/scripts/interface/SBDebugger.i index c4eb11ea35670..208b036e8af2e 100644 --- a/lldb/scripts/interface/SBDebugger.i +++ b/lldb/scripts/interface/SBDebugger.i @@ -165,21 +165,27 @@ public: void SkipLLDBInitFiles (bool b); + %feature("autodoc", "DEPRECATED, use SetInputFile"); void SetInputFileHandle (FILE *f, bool transfer_ownership); + %feature("autodoc", "DEPRECATED, use SetOutputFile"); void SetOutputFileHandle (FILE *f, bool transfer_ownership); + %feature("autodoc", "DEPRECATED, use SetErrorFile"); void SetErrorFileHandle (FILE *f, bool transfer_ownership); + %feature("autodoc", "DEPRECATED, use GetInputFile"); FILE * GetInputFileHandle (); + %feature("autodoc", "DEPRECATED, use GetOutputFile"); FILE * GetOutputFileHandle (); + %feature("autodoc", "DEPRECATED, use GetErrorFile"); FILE * GetErrorFileHandle (); @@ -192,6 +198,15 @@ public: SBError SetErrorFile (SBFile file); + SBError + SetInputFile (FileSP file); + + SBError + SetOutputFile (FileSP file); + + SBError + SetErrorFile (FileSP file); + SBFile GetInputFile (); diff --git a/lldb/scripts/interface/SBFile.i b/lldb/scripts/interface/SBFile.i index 6cdb192f26ed2..179446d5a539e 100644 --- a/lldb/scripts/interface/SBFile.i +++ b/lldb/scripts/interface/SBFile.i @@ -15,9 +15,53 @@ namespace lldb { class SBFile { public: + SBFile(); + + %feature("docstring", " + Initialize a SBFile from a file descriptor. mode is + 'r', 'r+', or 'w', like fdopen."); SBFile(int fd, const char *mode, bool transfer_ownership); + %feature("docstring", "initialize a SBFile from a python file object"); + SBFile(FileSP file); + + %extend { + static lldb::SBFile MakeBorrowed(lldb::FileSP BORROWED) { + return lldb::SBFile(BORROWED); + } + static lldb::SBFile MakeForcingIOMethods(lldb::FileSP FORCE_IO_METHODS) { + return lldb::SBFile(FORCE_IO_METHODS); + } + static lldb::SBFile MakeBorrowedForcingIOMethods(lldb::FileSP BORROWED_FORCE_IO_METHODS) { + return lldb::SBFile(BORROWED_FORCE_IO_METHODS); + } + } + + %pythoncode { + @classmethod + def Create(cls, file, borrow=False, force_io_methods=False): + """ + Create a SBFile from a python file object, with options. + + If borrow is set then the underlying file will + not be closed when the SBFile is closed or destroyed. + + If force_scripting_io is set then the python read/write + methods will be called even if a file descriptor is available. + """ + if borrow: + if force_io_methods: + return cls.MakeBorrowedForcingIOMethods(file) + else: + return cls.MakeBorrowed(file) + else: + if force_io_methods: + return cls.MakeForcingIOMethods(file) + else: + return cls(file) + } + ~SBFile (); %feature("autodoc", "Read(buffer) -> SBError, bytes_read") Read; diff --git a/lldb/source/API/SBCommandReturnObject.cpp b/lldb/source/API/SBCommandReturnObject.cpp index 39c165cf6ebdd..02e8ea5776110 100644 --- a/lldb/source/API/SBCommandReturnObject.cpp +++ b/lldb/source/API/SBCommandReturnObject.cpp @@ -10,6 +10,7 @@ #include "SBReproducerPrivate.h" #include "Utils.h" #include "lldb/API/SBError.h" +#include "lldb/API/SBFile.h" #include "lldb/API/SBStream.h" #include "lldb/Interpreter/CommandReturnObject.h" #include "lldb/Utility/ConstString.h" @@ -116,7 +117,6 @@ size_t SBCommandReturnObject::GetErrorSize() { size_t SBCommandReturnObject::PutOutput(FILE *fh) { LLDB_RECORD_METHOD(size_t, SBCommandReturnObject, PutOutput, (FILE *), fh); - if (fh) { size_t num_bytes = GetOutputSize(); if (num_bytes) @@ -125,6 +125,21 @@ size_t SBCommandReturnObject::PutOutput(FILE *fh) { return 0; } +size_t SBCommandReturnObject::PutOutput(FileSP file_sp) { + LLDB_RECORD_METHOD(size_t, SBCommandReturnObject, PutOutput, (FileSP), + file_sp); + if (!file_sp) + return 0; + return file_sp->Printf("%s", GetOutput()); +} + +size_t SBCommandReturnObject::PutOutput(SBFile file) { + LLDB_RECORD_METHOD(size_t, SBCommandReturnObject, PutOutput, (SBFile), file); + if (!file.m_opaque_sp) + return 0; + return file.m_opaque_sp->Printf("%s", GetOutput()); +} + size_t SBCommandReturnObject::PutError(FILE *fh) { LLDB_RECORD_METHOD(size_t, SBCommandReturnObject, PutError, (FILE *), fh); @@ -136,6 +151,21 @@ size_t SBCommandReturnObject::PutError(FILE *fh) { return 0; } +size_t SBCommandReturnObject::PutError(FileSP file_sp) { + LLDB_RECORD_METHOD(size_t, SBCommandReturnObject, PutError, (FileSP), + file_sp); + if (!file_sp) + return 0; + return file_sp->Printf("%s", GetError()); +} + +size_t SBCommandReturnObject::PutError(SBFile file) { + LLDB_RECORD_METHOD(size_t, SBCommandReturnObject, PutError, (SBFile), file); + if (!file.m_opaque_sp) + return 0; + return file.m_opaque_sp->Printf("%s", GetError()); +} + void SBCommandReturnObject::Clear() { LLDB_RECORD_METHOD_NO_ARGS(void, SBCommandReturnObject, Clear); @@ -242,16 +272,40 @@ void SBCommandReturnObject::SetImmediateOutputFile(FILE *fh, bool transfer_ownership) { LLDB_RECORD_METHOD(void, SBCommandReturnObject, SetImmediateOutputFile, (FILE *, bool), fh, transfer_ownership); - - ref().SetImmediateOutputFile(fh, transfer_ownership); + FileSP file = std::make_shared(fh, transfer_ownership); + ref().SetImmediateOutputFile(file); } void SBCommandReturnObject::SetImmediateErrorFile(FILE *fh, bool transfer_ownership) { LLDB_RECORD_METHOD(void, SBCommandReturnObject, SetImmediateErrorFile, (FILE *, bool), fh, transfer_ownership); + FileSP file = std::make_shared(fh, transfer_ownership); + ref().SetImmediateErrorFile(file); +} - ref().SetImmediateErrorFile(fh, transfer_ownership); +void SBCommandReturnObject::SetImmediateOutputFile(SBFile file) { + LLDB_RECORD_METHOD(void, SBCommandReturnObject, SetImmediateOutputFile, + (SBFile), file); + ref().SetImmediateOutputFile(file.m_opaque_sp); +} + +void SBCommandReturnObject::SetImmediateErrorFile(SBFile file) { + LLDB_RECORD_METHOD(void, SBCommandReturnObject, SetImmediateErrorFile, + (SBFile), file); + ref().SetImmediateErrorFile(file.m_opaque_sp); +} + +void SBCommandReturnObject::SetImmediateOutputFile(FileSP file_sp) { + LLDB_RECORD_METHOD(void, SBCommandReturnObject, SetImmediateOutputFile, + (FileSP), file_sp); + SetImmediateOutputFile(SBFile(file_sp)); +} + +void SBCommandReturnObject::SetImmediateErrorFile(FileSP file_sp) { + LLDB_RECORD_METHOD(void, SBCommandReturnObject, SetImmediateErrorFile, + (FileSP), file_sp); + SetImmediateErrorFile(SBFile(file_sp)); } void SBCommandReturnObject::PutCString(const char *string, int len) { @@ -335,6 +389,10 @@ void RegisterMethods(Registry &R) { LLDB_REGISTER_METHOD(size_t, SBCommandReturnObject, GetErrorSize, ()); LLDB_REGISTER_METHOD(size_t, SBCommandReturnObject, PutOutput, (FILE *)); LLDB_REGISTER_METHOD(size_t, SBCommandReturnObject, PutError, (FILE *)); + LLDB_REGISTER_METHOD(size_t, SBCommandReturnObject, PutOutput, (SBFile)); + LLDB_REGISTER_METHOD(size_t, SBCommandReturnObject, PutError, (SBFile)); + LLDB_REGISTER_METHOD(size_t, SBCommandReturnObject, PutOutput, (FileSP)); + LLDB_REGISTER_METHOD(size_t, SBCommandReturnObject, PutError, (FileSP)); LLDB_REGISTER_METHOD(void, SBCommandReturnObject, Clear, ()); LLDB_REGISTER_METHOD(lldb::ReturnStatus, SBCommandReturnObject, GetStatus, ()); @@ -352,6 +410,14 @@ void RegisterMethods(Registry &R) { (FILE *)); LLDB_REGISTER_METHOD(void, SBCommandReturnObject, SetImmediateErrorFile, (FILE *)); + LLDB_REGISTER_METHOD(void, SBCommandReturnObject, SetImmediateOutputFile, + (SBFile)); + LLDB_REGISTER_METHOD(void, SBCommandReturnObject, SetImmediateErrorFile, + (SBFile)); + LLDB_REGISTER_METHOD(void, SBCommandReturnObject, SetImmediateOutputFile, + (FileSP)); + LLDB_REGISTER_METHOD(void, SBCommandReturnObject, SetImmediateErrorFile, + (FileSP)); LLDB_REGISTER_METHOD(void, SBCommandReturnObject, SetImmediateOutputFile, (FILE *, bool)); LLDB_REGISTER_METHOD(void, SBCommandReturnObject, SetImmediateErrorFile, diff --git a/lldb/source/API/SBDebugger.cpp b/lldb/source/API/SBDebugger.cpp index 85c8e0a89038c..1da6ed26b2475 100644 --- a/lldb/source/API/SBDebugger.cpp +++ b/lldb/source/API/SBDebugger.cpp @@ -292,6 +292,11 @@ void SBDebugger::SetInputFileHandle(FILE *fh, bool transfer_ownership) { SetInputFile((FileSP)std::make_shared(fh, transfer_ownership)); } +SBError SBDebugger::SetInputFile(FileSP file_sp) { + LLDB_RECORD_METHOD(SBError, SBDebugger, SetInputFile, (FileSP), file_sp); + return SetInputFile(SBFile(file_sp)); +} + // Shouldn't really be settable after initialization as this could cause lots // of problems; don't want users trying to switch modes in the middle of a // debugging session. @@ -332,6 +337,11 @@ SBError SBDebugger::SetInputFile(SBFile file) { return error; } +SBError SBDebugger::SetOutputFile(FileSP file_sp) { + LLDB_RECORD_METHOD(SBError, SBDebugger, SetOutputFile, (FileSP), file_sp); + return SetOutputFile(SBFile(file_sp)); +} + void SBDebugger::SetOutputFileHandle(FILE *fh, bool transfer_ownership) { LLDB_RECORD_METHOD(void, SBDebugger, SetOutputFileHandle, (FILE *, bool), fh, transfer_ownership); @@ -359,6 +369,11 @@ void SBDebugger::SetErrorFileHandle(FILE *fh, bool transfer_ownership) { SetErrorFile((FileSP)std::make_shared(fh, transfer_ownership)); } +SBError SBDebugger::SetErrorFile(FileSP file_sp) { + LLDB_RECORD_METHOD(SBError, SBDebugger, SetErrorFile, (FileSP), file_sp); + return SetErrorFile(SBFile(file_sp)); +} + SBError SBDebugger::SetErrorFile(SBFile file) { LLDB_RECORD_METHOD(SBError, SBDebugger, SetErrorFile, (SBFile file), file); SBError error; @@ -467,10 +482,8 @@ void SBDebugger::HandleCommand(const char *command) { sb_interpreter.HandleCommand(command, result, false); - if (GetErrorFileHandle() != nullptr) - result.PutError(GetErrorFileHandle()); - if (GetOutputFileHandle() != nullptr) - result.PutOutput(GetOutputFileHandle()); + result.PutError(m_opaque_sp->GetErrorStream().GetFileSP()); + result.PutOutput(m_opaque_sp->GetOutputStream().GetFileSP()); if (!m_opaque_sp->GetAsyncExecution()) { SBProcess process(GetCommandInterpreter().GetProcess()); @@ -1578,6 +1591,8 @@ static void SetFileHandleRedirect(SBDebugger *, FILE *, bool) { static SBError SetFileRedirect(SBDebugger *, SBFile file) { return SBError(); } +static SBError SetFileRedirect(SBDebugger *, FileSP file) { return SBError(); } + static bool GetDefaultArchitectureRedirect(char *arch_name, size_t arch_name_len) { // The function is writing to its argument. Without the redirect it would @@ -1608,6 +1623,16 @@ template <> void RegisterMethods(Registry &R) { SBFile)>::method<&SBDebugger::SetErrorFile>::doit, &SetFileRedirect); + R.Register(&invoke::method<&SBDebugger::SetInputFile>::doit, + &SetFileRedirect); + R.Register(&invoke::method<&SBDebugger::SetOutputFile>::doit, + &SetFileRedirect); + R.Register(&invoke::method<&SBDebugger::SetErrorFile>::doit, + &SetFileRedirect); + LLDB_REGISTER_CONSTRUCTOR(SBDebugger, ()); LLDB_REGISTER_CONSTRUCTOR(SBDebugger, (const lldb::DebuggerSP &)); LLDB_REGISTER_CONSTRUCTOR(SBDebugger, (const lldb::SBDebugger &)); diff --git a/lldb/source/API/SBFile.cpp b/lldb/source/API/SBFile.cpp index 07122916d23ac..5c003bc387942 100644 --- a/lldb/source/API/SBFile.cpp +++ b/lldb/source/API/SBFile.cpp @@ -16,17 +16,20 @@ using namespace lldb_private; SBFile::~SBFile() {} -SBFile::SBFile(FileSP file_sp) : m_opaque_sp(file_sp) {} +SBFile::SBFile(FileSP file_sp) : m_opaque_sp(file_sp) { + LLDB_RECORD_DUMMY(void, SBfile, SBFile, (FileSP), file_sp); +} SBFile::SBFile() { LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBFile); } SBFile::SBFile(FILE *file, bool transfer_ownership) { + LLDB_RECORD_DUMMY(void, SBFile, (FILE *, bool), file, transfer_ownership); m_opaque_sp = std::make_shared(file, transfer_ownership); } SBFile::SBFile(int fd, const char *mode, bool transfer_owndership) { - LLDB_RECORD_CONSTRUCTOR(SBFile, (int, const char *, bool), fd, mode, - transfer_owndership); + LLDB_RECORD_DUMMY(void, SBFile, (int, const char *, bool), fd, mode, + transfer_owndership); auto options = File::GetOptionsFromMode(mode); m_opaque_sp = std::make_shared(fd, options, transfer_owndership); } @@ -102,9 +105,9 @@ bool SBFile::operator!() const { namespace lldb_private { namespace repro { + template <> void RegisterMethods(Registry &R) { - LLDB_REGISTER_CONSTRUCTOR(SBFile, ()); - LLDB_REGISTER_CONSTRUCTOR(SBFile, (int, const char *, bool)); + LLDB_REGISTER_METHOD(lldb::SBError, SBFile, Flush, ()); LLDB_REGISTER_METHOD_CONST(bool, SBFile, IsValid, ()); LLDB_REGISTER_METHOD_CONST(bool, SBFile, operator bool,()); diff --git a/lldb/source/Breakpoint/BreakpointResolver.cpp b/lldb/source/Breakpoint/BreakpointResolver.cpp index 849da4b337ba0..e0a4e6ac67126 100644 --- a/lldb/source/Breakpoint/BreakpointResolver.cpp +++ b/lldb/source/Breakpoint/BreakpointResolver.cpp @@ -34,7 +34,8 @@ using namespace lldb; // BreakpointResolver: const char *BreakpointResolver::g_ty_to_name[] = {"FileAndLine", "Address", "SymbolName", "SourceRegex", - "Exception", "Unknown"}; + "Python", "Exception", + "Unknown"}; const char *BreakpointResolver::g_option_names[static_cast( BreakpointResolver::OptionNames::LastOptionName)] = { diff --git a/lldb/source/Breakpoint/BreakpointResolverAddress.cpp b/lldb/source/Breakpoint/BreakpointResolverAddress.cpp index cc14a134e9062..b98568098b4b5 100644 --- a/lldb/source/Breakpoint/BreakpointResolverAddress.cpp +++ b/lldb/source/Breakpoint/BreakpointResolverAddress.cpp @@ -120,10 +120,8 @@ void BreakpointResolverAddress::ResolveBreakpointInModules( BreakpointResolver::ResolveBreakpointInModules(filter, modules); } -Searcher::CallbackReturn -BreakpointResolverAddress::SearchCallback(SearchFilter &filter, - SymbolContext &context, Address *addr, - bool containing) { +Searcher::CallbackReturn BreakpointResolverAddress::SearchCallback( + SearchFilter &filter, SymbolContext &context, Address *addr) { assert(m_breakpoint != nullptr); if (filter.AddressPasses(m_addr)) { diff --git a/lldb/source/Breakpoint/BreakpointResolverFileLine.cpp b/lldb/source/Breakpoint/BreakpointResolverFileLine.cpp index a6095be316471..2b26f65816bd4 100644 --- a/lldb/source/Breakpoint/BreakpointResolverFileLine.cpp +++ b/lldb/source/Breakpoint/BreakpointResolverFileLine.cpp @@ -198,10 +198,8 @@ void BreakpointResolverFileLine::FilterContexts(SymbolContextList &sc_list, } } -Searcher::CallbackReturn -BreakpointResolverFileLine::SearchCallback(SearchFilter &filter, - SymbolContext &context, - Address *addr, bool containing) { +Searcher::CallbackReturn BreakpointResolverFileLine::SearchCallback( + SearchFilter &filter, SymbolContext &context, Address *addr) { SymbolContextList sc_list; assert(m_breakpoint != nullptr); diff --git a/lldb/source/Breakpoint/BreakpointResolverFileRegex.cpp b/lldb/source/Breakpoint/BreakpointResolverFileRegex.cpp index 94135a59f9215..3cb04263c6dcb 100644 --- a/lldb/source/Breakpoint/BreakpointResolverFileRegex.cpp +++ b/lldb/source/Breakpoint/BreakpointResolverFileRegex.cpp @@ -94,10 +94,8 @@ BreakpointResolverFileRegex::SerializeToStructuredData() { return WrapOptionsDict(options_dict_sp); } -Searcher::CallbackReturn -BreakpointResolverFileRegex::SearchCallback(SearchFilter &filter, - SymbolContext &context, - Address *addr, bool containing) { +Searcher::CallbackReturn BreakpointResolverFileRegex::SearchCallback( + SearchFilter &filter, SymbolContext &context, Address *addr) { assert(m_breakpoint != nullptr); if (!context.target_sp) diff --git a/lldb/source/Breakpoint/BreakpointResolverName.cpp b/lldb/source/Breakpoint/BreakpointResolverName.cpp index c3c05b3b7768d..59b74dc296441 100644 --- a/lldb/source/Breakpoint/BreakpointResolverName.cpp +++ b/lldb/source/Breakpoint/BreakpointResolverName.cpp @@ -250,8 +250,7 @@ void BreakpointResolverName::AddNameLookup(ConstString name, Searcher::CallbackReturn BreakpointResolverName::SearchCallback(SearchFilter &filter, - SymbolContext &context, Address *addr, - bool containing) { + SymbolContext &context, Address *addr) { SymbolContextList func_list; // SymbolContextList sym_list; diff --git a/lldb/source/Breakpoint/BreakpointResolverScripted.cpp b/lldb/source/Breakpoint/BreakpointResolverScripted.cpp index c6833ae101a4c..288fd37c1c798 100644 --- a/lldb/source/Breakpoint/BreakpointResolverScripted.cpp +++ b/lldb/source/Breakpoint/BreakpointResolverScripted.cpp @@ -29,8 +29,7 @@ BreakpointResolverScripted::BreakpointResolverScripted( Breakpoint *bkpt, const llvm::StringRef class_name, lldb::SearchDepth depth, - StructuredDataImpl *args_data, - ScriptInterpreter &script_interp) + StructuredDataImpl *args_data) : BreakpointResolver(bkpt, BreakpointResolver::PythonResolver), m_class_name(class_name), m_depth(depth), m_args_ptr(args_data) { CreateImplementationIfNeeded(); @@ -68,45 +67,25 @@ BreakpointResolverScripted::CreateFromStructuredData( llvm::StringRef class_name; bool success; - if (!bkpt) - return nullptr; - success = options_dict.GetValueForKeyAsString( GetKey(OptionNames::PythonClassName), class_name); if (!success) { error.SetErrorString("BRFL::CFSD: Couldn't find class name entry."); return nullptr; } - lldb::SearchDepth depth; - int depth_as_int; - success = options_dict.GetValueForKeyAsInteger( - GetKey(OptionNames::SearchDepth), depth_as_int); - if (!success) { - error.SetErrorString("BRFL::CFSD: Couldn't find class name entry."); - return nullptr; - } - if (depth_as_int >= (int) OptionNames::LastOptionName) { - error.SetErrorString("BRFL::CFSD: Invalid value for search depth."); - return nullptr; - } - depth = (lldb::SearchDepth) depth_as_int; + // The Python function will actually provide the search depth, this is a + // placeholder. + lldb::SearchDepth depth = lldb::eSearchDepthTarget; StructuredDataImpl *args_data_impl = new StructuredDataImpl(); StructuredData::Dictionary *args_dict = nullptr; success = options_dict.GetValueForKeyAsDictionary( GetKey(OptionNames::ScriptArgs), args_dict); if (success) { - // FIXME: The resolver needs a copy of the ARGS dict that it can own, - // so I need to make a copy constructor for the Dictionary so I can pass - // that to it here. For now the args are empty. - //StructuredData::Dictionary *dict_copy = new StructuredData::Dictionary(args_dict); - + args_data_impl->SetObjectSP(args_dict->shared_from_this()); } - ScriptInterpreter *script_interp = bkpt->GetTarget() - .GetDebugger() - .GetScriptInterpreter(); - return new BreakpointResolverScripted(bkpt, class_name, depth, args_data_impl, - *script_interp); + return new BreakpointResolverScripted(bkpt, class_name, depth, + args_data_impl); } StructuredData::ObjectSP @@ -116,6 +95,10 @@ BreakpointResolverScripted::SerializeToStructuredData() { options_dict_sp->AddStringItem(GetKey(OptionNames::PythonClassName), m_class_name); + if (m_args_ptr->IsValid()) + options_dict_sp->AddItem(GetKey(OptionNames::ScriptArgs), + m_args_ptr->GetObjectSP()); + return WrapOptionsDict(options_dict_sp); } @@ -123,10 +106,8 @@ ScriptInterpreter *BreakpointResolverScripted::GetScriptInterpreter() { return m_breakpoint->GetTarget().GetDebugger().GetScriptInterpreter(); } -Searcher::CallbackReturn -BreakpointResolverScripted::SearchCallback(SearchFilter &filter, - SymbolContext &context, Address *addr, - bool containing) { +Searcher::CallbackReturn BreakpointResolverScripted::SearchCallback( + SearchFilter &filter, SymbolContext &context, Address *addr) { assert(m_breakpoint != nullptr); bool should_continue = true; if (!m_implementation_sp) @@ -173,11 +154,10 @@ void BreakpointResolverScripted::Dump(Stream *s) const {} lldb::BreakpointResolverSP BreakpointResolverScripted::CopyForBreakpoint(Breakpoint &breakpoint) { - ScriptInterpreter *script_interp = GetScriptInterpreter(); // FIXME: Have to make a copy of the arguments from the m_args_ptr and then // pass that to the new resolver. lldb::BreakpointResolverSP ret_sp( - new BreakpointResolverScripted(&breakpoint, m_class_name, - m_depth, nullptr, *script_interp)); + new BreakpointResolverScripted(&breakpoint, m_class_name, m_depth, + nullptr)); return ret_sp; } diff --git a/lldb/source/Commands/CommandCompletions.cpp b/lldb/source/Commands/CommandCompletions.cpp index 00ba108f3889a..43354c7e35dd7 100644 --- a/lldb/source/Commands/CommandCompletions.cpp +++ b/lldb/source/Commands/CommandCompletions.cpp @@ -356,8 +356,7 @@ lldb::SearchDepth CommandCompletions::SourceFileCompleter::GetDepth() { Searcher::CallbackReturn CommandCompletions::SourceFileCompleter::SearchCallback(SearchFilter &filter, SymbolContext &context, - Address *addr, - bool complete) { + Address *addr) { if (context.comp_unit != nullptr) { if (m_include_support_files) { FileSpecList supporting_files = context.comp_unit->GetSupportFiles(); @@ -443,8 +442,7 @@ lldb::SearchDepth CommandCompletions::SymbolCompleter::GetDepth() { } Searcher::CallbackReturn CommandCompletions::SymbolCompleter::SearchCallback( - SearchFilter &filter, SymbolContext &context, Address *addr, - bool complete) { + SearchFilter &filter, SymbolContext &context, Address *addr) { if (context.module_sp) { SymbolContextList sc_list; const bool include_symbols = true; @@ -491,8 +489,7 @@ lldb::SearchDepth CommandCompletions::ModuleCompleter::GetDepth() { } Searcher::CallbackReturn CommandCompletions::ModuleCompleter::SearchCallback( - SearchFilter &filter, SymbolContext &context, Address *addr, - bool complete) { + SearchFilter &filter, SymbolContext &context, Address *addr) { if (context.module_sp) { const char *cur_file_name = context.module_sp->GetFileSpec().GetFilename().GetCString(); diff --git a/lldb/source/Commands/CommandObjectReproducer.cpp b/lldb/source/Commands/CommandObjectReproducer.cpp index 404702d3640e4..424595fc0bd75 100644 --- a/lldb/source/Commands/CommandObjectReproducer.cpp +++ b/lldb/source/Commands/CommandObjectReproducer.cpp @@ -88,7 +88,7 @@ class CommandObjectReproducerGenerate : public CommandObjectParsed { auto &r = Reproducer::Instance(); if (auto generator = r.GetGenerator()) { generator->Keep(); - } else if (r.GetLoader()) { + } else if (r.IsReplaying()) { // Make this operation a NOP in replay mode. result.SetStatus(eReturnStatusSuccessFinishNoResult); return result.Succeeded(); @@ -132,9 +132,9 @@ class CommandObjectReproducerStatus : public CommandObjectParsed { } auto &r = Reproducer::Instance(); - if (r.GetGenerator()) { + if (r.IsCapturing()) { result.GetOutputStream() << "Reproducer is in capture mode.\n"; - } else if (r.GetLoader()) { + } else if (r.IsReplaying()) { result.GetOutputStream() << "Reproducer is in replay mode.\n"; } else { result.GetOutputStream() << "Reproducer is off.\n"; diff --git a/lldb/source/Commands/CommandObjectTarget.cpp b/lldb/source/Commands/CommandObjectTarget.cpp index 566de9ba071b0..8a768e513eb11 100644 --- a/lldb/source/Commands/CommandObjectTarget.cpp +++ b/lldb/source/Commands/CommandObjectTarget.cpp @@ -3445,6 +3445,25 @@ class CommandObjectTargetModulesShowUnwind : public CommandObjectParsed { result.GetOutputStream().Printf("\n"); } + UnwindPlanSP of_unwind_sp = + func_unwinders_sp->GetObjectFileUnwindPlan(*target); + if (of_unwind_sp) { + result.GetOutputStream().Printf("object file UnwindPlan:\n"); + of_unwind_sp->Dump(result.GetOutputStream(), thread.get(), + LLDB_INVALID_ADDRESS); + result.GetOutputStream().Printf("\n"); + } + + UnwindPlanSP of_unwind_augmented_sp = + func_unwinders_sp->GetObjectFileAugmentedUnwindPlan(*target, + *thread); + if (of_unwind_augmented_sp) { + result.GetOutputStream().Printf("object file augmented UnwindPlan:\n"); + of_unwind_augmented_sp->Dump(result.GetOutputStream(), thread.get(), + LLDB_INVALID_ADDRESS); + result.GetOutputStream().Printf("\n"); + } + UnwindPlanSP ehframe_sp = func_unwinders_sp->GetEHFrameUnwindPlan(*target); if (ehframe_sp) { diff --git a/lldb/source/Core/AddressResolverFileLine.cpp b/lldb/source/Core/AddressResolverFileLine.cpp index 33a7bb186eb03..4a14260c6c72f 100644 --- a/lldb/source/Core/AddressResolverFileLine.cpp +++ b/lldb/source/Core/AddressResolverFileLine.cpp @@ -38,8 +38,7 @@ AddressResolverFileLine::~AddressResolverFileLine() {} Searcher::CallbackReturn AddressResolverFileLine::SearchCallback(SearchFilter &filter, - SymbolContext &context, Address *addr, - bool containing) { + SymbolContext &context, Address *addr) { SymbolContextList sc_list; uint32_t sc_list_size; CompileUnit *cu = context.comp_unit; diff --git a/lldb/source/Core/AddressResolverName.cpp b/lldb/source/Core/AddressResolverName.cpp index 089f0da440059..665d6aa68b4c2 100644 --- a/lldb/source/Core/AddressResolverName.cpp +++ b/lldb/source/Core/AddressResolverName.cpp @@ -67,8 +67,7 @@ AddressResolverName::~AddressResolverName() = default; Searcher::CallbackReturn AddressResolverName::SearchCallback(SearchFilter &filter, - SymbolContext &context, Address *addr, - bool containing) { + SymbolContext &context, Address *addr) { SymbolContextList func_list; SymbolContextList sym_list; diff --git a/lldb/source/Core/FileLineResolver.cpp b/lldb/source/Core/FileLineResolver.cpp index 3cba1c7e81432..01df295398a83 100644 --- a/lldb/source/Core/FileLineResolver.cpp +++ b/lldb/source/Core/FileLineResolver.cpp @@ -33,7 +33,7 @@ FileLineResolver::~FileLineResolver() {} Searcher::CallbackReturn FileLineResolver::SearchCallback(SearchFilter &filter, SymbolContext &context, - Address *addr, bool containing) { + Address *addr) { CompileUnit *cu = context.comp_unit; if (m_inlines || diff --git a/lldb/source/Core/IOHandler.cpp b/lldb/source/Core/IOHandler.cpp index 350a538350777..d3152981a5675 100644 --- a/lldb/source/Core/IOHandler.cpp +++ b/lldb/source/Core/IOHandler.cpp @@ -329,10 +329,9 @@ bool IOHandlerEditline::GetLine(std::string &line, bool &interrupted) { prompt = GetPrompt(); if (prompt && prompt[0]) { - FILE *out = GetOutputFILE(); - if (out) { - ::fprintf(out, "%s", prompt); - ::fflush(out); + if (m_output_sp) { + m_output_sp->Printf("%s", prompt); + m_output_sp->Flush(); } } } @@ -491,10 +490,11 @@ bool IOHandlerEditline::GetLines(StringList &lines, bool &interrupted) { // Show line numbers if we are asked to std::string line; if (m_base_line_number > 0 && GetIsInteractive()) { - FILE *out = GetOutputFILE(); - if (out) - ::fprintf(out, "%u%s", m_base_line_number + (uint32_t)lines.GetSize(), - GetPrompt() == nullptr ? " " : ""); + if (m_output_sp) { + m_output_sp->Printf("%u%s", + m_base_line_number + (uint32_t)lines.GetSize(), + GetPrompt() == nullptr ? " " : ""); + } } m_curr_line_idx = lines.GetSize(); diff --git a/lldb/source/Core/SearchFilter.cpp b/lldb/source/Core/SearchFilter.cpp index 22c8997ed4602..e02b4f66b58c2 100644 --- a/lldb/source/Core/SearchFilter.cpp +++ b/lldb/source/Core/SearchFilter.cpp @@ -209,7 +209,7 @@ void SearchFilter::Search(Searcher &searcher) { empty_sc.target_sp = m_target_sp; if (searcher.GetDepth() == lldb::eSearchDepthTarget) - searcher.SearchCallback(*this, empty_sc, nullptr, false); + searcher.SearchCallback(*this, empty_sc, nullptr); else DoModuleIteration(empty_sc, searcher); } @@ -222,7 +222,7 @@ void SearchFilter::SearchInModuleList(Searcher &searcher, ModuleList &modules) { empty_sc.target_sp = m_target_sp; if (searcher.GetDepth() == lldb::eSearchDepthTarget) - searcher.SearchCallback(*this, empty_sc, nullptr, false); + searcher.SearchCallback(*this, empty_sc, nullptr); else { std::lock_guard guard(modules.GetMutex()); const size_t numModules = modules.GetSize(); @@ -252,7 +252,7 @@ SearchFilter::DoModuleIteration(const SymbolContext &context, if (context.module_sp) { if (searcher.GetDepth() == lldb::eSearchDepthModule) { SymbolContext matchingContext(context.module_sp.get()); - searcher.SearchCallback(*this, matchingContext, nullptr, false); + searcher.SearchCallback(*this, matchingContext, nullptr); } else { return DoCUIteration(context.module_sp, context, searcher); } @@ -272,7 +272,7 @@ SearchFilter::DoModuleIteration(const SymbolContext &context, SymbolContext matchingContext(m_target_sp, module_sp); Searcher::CallbackReturn shouldContinue = - searcher.SearchCallback(*this, matchingContext, nullptr, false); + searcher.SearchCallback(*this, matchingContext, nullptr); if (shouldContinue == Searcher::eCallbackReturnStop || shouldContinue == Searcher::eCallbackReturnPop) return shouldContinue; @@ -306,7 +306,7 @@ SearchFilter::DoCUIteration(const ModuleSP &module_sp, SymbolContext matchingContext(m_target_sp, module_sp, cu_sp.get()); shouldContinue = - searcher.SearchCallback(*this, matchingContext, nullptr, false); + searcher.SearchCallback(*this, matchingContext, nullptr); if (shouldContinue == Searcher::eCallbackReturnPop) return Searcher::eCallbackReturnContinue; @@ -328,9 +328,8 @@ SearchFilter::DoCUIteration(const ModuleSP &module_sp, if (searcher.GetDepth() == lldb::eSearchDepthFunction) { SymbolContext matchingContext(m_target_sp, module_sp, cu_sp.get(), func_sp.get()); - shouldContinue = searcher.SearchCallback(*this, - matchingContext, - nullptr, false); + shouldContinue = + searcher.SearchCallback(*this, matchingContext, nullptr); } else { shouldContinue = DoFunctionIteration(func_sp.get(), context, searcher); @@ -343,7 +342,7 @@ SearchFilter::DoCUIteration(const ModuleSP &module_sp, } else { if (CompUnitPasses(*context.comp_unit)) { SymbolContext matchingContext(m_target_sp, module_sp, context.comp_unit); - return searcher.SearchCallback(*this, matchingContext, nullptr, false); + return searcher.SearchCallback(*this, matchingContext, nullptr); } } return Searcher::eCallbackReturnContinue; @@ -431,7 +430,7 @@ void SearchFilterByModule::Search(Searcher &searcher) { if (searcher.GetDepth() == lldb::eSearchDepthTarget) { SymbolContext empty_sc; empty_sc.target_sp = m_target_sp; - searcher.SearchCallback(*this, empty_sc, nullptr, false); + searcher.SearchCallback(*this, empty_sc, nullptr); } // If the module file spec is a full path, then we can just find the one @@ -568,7 +567,7 @@ void SearchFilterByModuleList::Search(Searcher &searcher) { if (searcher.GetDepth() == lldb::eSearchDepthTarget) { SymbolContext empty_sc; empty_sc.target_sp = m_target_sp; - searcher.SearchCallback(*this, empty_sc, nullptr, false); + searcher.SearchCallback(*this, empty_sc, nullptr); } // If the module file spec is a full path, then we can just find the one @@ -777,7 +776,7 @@ void SearchFilterByModuleListAndCU::Search(Searcher &searcher) { if (searcher.GetDepth() == lldb::eSearchDepthTarget) { SymbolContext empty_sc; empty_sc.target_sp = m_target_sp; - searcher.SearchCallback(*this, empty_sc, nullptr, false); + searcher.SearchCallback(*this, empty_sc, nullptr); } // If the module file spec is a full path, then we can just find the one diff --git a/lldb/source/Expression/DiagnosticManager.cpp b/lldb/source/Expression/DiagnosticManager.cpp index 5333e3e545b30..48eba3586d30a 100644 --- a/lldb/source/Expression/DiagnosticManager.cpp +++ b/lldb/source/Expression/DiagnosticManager.cpp @@ -47,7 +47,7 @@ static const char *StringForSeverity(DiagnosticSeverity severity) { std::string DiagnosticManager::GetString(char separator) { std::string ret; - for (const Diagnostic *diagnostic : Diagnostics()) { + for (const auto &diagnostic : Diagnostics()) { ret.append(StringForSeverity(diagnostic->GetSeverity())); ret.append(diagnostic->GetMessage()); ret.push_back(separator); diff --git a/lldb/source/Expression/REPL.cpp b/lldb/source/Expression/REPL.cpp index 78e4688102344..4f81ee3e56dd7 100644 --- a/lldb/source/Expression/REPL.cpp +++ b/lldb/source/Expression/REPL.cpp @@ -423,7 +423,7 @@ void REPL::IOHandlerInputComplete(IOHandler &io_handler, std::string &code) { .SetBaseLineNumber(m_code.GetSize() + 1); } if (extra_line) { - fprintf(output_sp->GetFile().GetStream(), "\n"); + output_sp->Printf("\n"); } } } diff --git a/lldb/source/Host/common/File.cpp b/lldb/source/Host/common/File.cpp index bd4cfcdb334cb..6498ec5b57ed9 100644 --- a/lldb/source/Host/common/File.cpp +++ b/lldb/source/Host/common/File.cpp @@ -70,15 +70,17 @@ static const char *GetStreamOpenModeFromOptions(uint32_t options) { uint32_t File::GetOptionsFromMode(llvm::StringRef mode) { return llvm::StringSwitch(mode) - .Case("r", File::eOpenOptionRead) - .Case("w", File::eOpenOptionWrite) - .Case("a", File::eOpenOptionWrite | File::eOpenOptionAppend | - File::eOpenOptionCanCreate) - .Case("r+", File::eOpenOptionRead | File::eOpenOptionWrite) - .Case("w+", File::eOpenOptionRead | File::eOpenOptionWrite | - File::eOpenOptionCanCreate | File::eOpenOptionTruncate) - .Case("a+", File::eOpenOptionRead | File::eOpenOptionWrite | - File::eOpenOptionAppend | File::eOpenOptionCanCreate) + .Cases("r", "rb", eOpenOptionRead) + .Cases("w", "wb", eOpenOptionWrite) + .Cases("a", "ab", + eOpenOptionWrite | eOpenOptionAppend | eOpenOptionCanCreate) + .Cases("r+", "rb+", "r+b", eOpenOptionRead | eOpenOptionWrite) + .Cases("w+", "wb+", "w+b", + eOpenOptionRead | eOpenOptionWrite | eOpenOptionCanCreate | + eOpenOptionTruncate) + .Cases("a+", "ab+", "a+b", + eOpenOptionRead | eOpenOptionWrite | eOpenOptionAppend | + eOpenOptionCanCreate) .Default(0); } diff --git a/lldb/source/Host/macosx/objcxx/Host.mm b/lldb/source/Host/macosx/objcxx/Host.mm index 742c38fd0fd22..fe31830b93e07 100644 --- a/lldb/source/Host/macosx/objcxx/Host.mm +++ b/lldb/source/Host/macosx/objcxx/Host.mm @@ -677,14 +677,16 @@ static bool GetMacOSXProcessUserAndGroup(ProcessInstanceInfo &process_info) { process_info.SetEffectiveGroupID(UINT32_MAX); // Make sure our info matches before we go fetch the name and cpu type - if (match_info.Matches(process_info)) { - // Get CPU type first so we can know to look for iOS simulator is we have - // x86 or x86_64 - if (GetMacOSXProcessCPUType(process_info)) { - if (GetMacOSXProcessArgs(&match_info, process_info)) { - if (match_info.Matches(process_info)) - process_infos.Append(process_info); - } + if (!match_info.UserIDsMatch(process_info) || + !match_info.ProcessIDsMatch(process_info)) + continue; + + // Get CPU type first so we can know to look for iOS simulator is we have + // x86 or x86_64 + if (GetMacOSXProcessCPUType(process_info)) { + if (GetMacOSXProcessArgs(&match_info, process_info)) { + if (match_info.Matches(process_info)) + process_infos.Append(process_info); } } } diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionParser.cpp b/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionParser.cpp index db3f13947e22c..6e2a379d9e8ba 100644 --- a/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionParser.cpp +++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionParser.cpp @@ -208,9 +208,8 @@ class ClangDiagnosticManagerAdapter : public clang::DiagnosticConsumer { // around them. std::string stripped_output = llvm::StringRef(m_output).trim(); - ClangDiagnostic *new_diagnostic = - new ClangDiagnostic(stripped_output, severity, Info.getID()); - m_manager->AddDiagnostic(new_diagnostic); + auto new_diagnostic = std::make_unique( + stripped_output, severity, Info.getID()); // Don't store away warning fixits, since the compiler doesn't have // enough context in an expression for the warning to be useful. @@ -224,6 +223,8 @@ class ClangDiagnosticManagerAdapter : public clang::DiagnosticConsumer { new_diagnostic->AddFixitHint(fixit); } } + + m_manager->AddDiagnostic(std::move(new_diagnostic)); } } @@ -1100,8 +1101,8 @@ bool ClangExpressionParser::RewriteExpression( if (num_diags == 0) return false; - for (const Diagnostic *diag : diagnostic_manager.Diagnostics()) { - const ClangDiagnostic *diagnostic = llvm::dyn_cast(diag); + for (const auto &diag : diagnostic_manager.Diagnostics()) { + const auto *diagnostic = llvm::dyn_cast(diag.get()); if (diagnostic && diagnostic->HasFixIts()) { for (const FixItHint &fixit : diagnostic->FixIts()) { // This is cobbed from clang::Rewrite::FixItRewriter. diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangUserExpression.cpp b/lldb/source/Plugins/ExpressionParser/Clang/ClangUserExpression.cpp index 31c85d2087459..da1ca785635c9 100644 --- a/lldb/source/Plugins/ExpressionParser/Clang/ClangUserExpression.cpp +++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangUserExpression.cpp @@ -444,6 +444,8 @@ static CppModuleConfiguration LogConfigError(const std::string &msg) { CppModuleConfiguration GetModuleConfig(lldb::LanguageType language, ExecutionContext &exe_ctx) { + Log *log(lldb_private::GetLogIfAllCategoriesSet(LIBLLDB_LOG_EXPRESSIONS)); + // Don't do anything if this is not a C++ module configuration. if (!SupportsCxxModuleImport(language)) return LogConfigError("Language doesn't support C++ modules"); @@ -483,6 +485,15 @@ CppModuleConfiguration GetModuleConfig(lldb::LanguageType language, } } }); + + LLDB_LOG(log, "[C++ module config] Found {0} support files to analyze", + files.GetSize()); + if (log && log->GetVerbose()) { + for (const FileSpec &f : files) + LLDB_LOGV(log, "[C++ module config] Analyzing support file: {0}", + f.GetPath()); + } + // Try to create a configuration from the files. If there is no valid // configuration possible with the files, this just returns an invalid // configuration. diff --git a/lldb/source/Plugins/ExpressionParser/Clang/CppModuleConfiguration.cpp b/lldb/source/Plugins/ExpressionParser/Clang/CppModuleConfiguration.cpp index 1f9d33e9beb4d..51ae73285b533 100644 --- a/lldb/source/Plugins/ExpressionParser/Clang/CppModuleConfiguration.cpp +++ b/lldb/source/Plugins/ExpressionParser/Clang/CppModuleConfiguration.cpp @@ -63,9 +63,9 @@ bool CppModuleConfiguration::hasValidConfig() { CppModuleConfiguration::CppModuleConfiguration( const FileSpecList &support_files) { // Analyze all files we were given to build the configuration. - bool error = !std::all_of(support_files.begin(), support_files.end(), - std::bind(&CppModuleConfiguration::analyzeFile, - this, std::placeholders::_1)); + bool error = !llvm::all_of(support_files, + std::bind(&CppModuleConfiguration::analyzeFile, + this, std::placeholders::_1)); // If we have a valid configuration at this point, set the // include directories and module list that should be used. if (!error && hasValidConfig()) { diff --git a/lldb/source/Plugins/Language/CPlusPlus/LibCxxTuple.cpp b/lldb/source/Plugins/Language/CPlusPlus/LibCxxTuple.cpp index c86ab6be64449..45294e25f0f5d 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/LibCxxTuple.cpp +++ b/lldb/source/Plugins/Language/CPlusPlus/LibCxxTuple.cpp @@ -79,7 +79,9 @@ ValueObjectSP TupleFrontEnd::GetChildAtIndex(size_t idx) { m_elements[idx] = elem_sp->Clone(ConstString(llvm::formatv("[{0}]", idx).str())).get(); - return m_elements[idx]->GetSP(); + if (m_elements[idx]) + return m_elements[idx]->GetSP(); + return ValueObjectSP(); } SyntheticChildrenFrontEnd * diff --git a/lldb/source/Plugins/Language/CPlusPlus/LibStdcppTuple.cpp b/lldb/source/Plugins/Language/CPlusPlus/LibStdcppTuple.cpp index 15bf7f8cf1dcd..0ac7b8f8e02b0 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/LibStdcppTuple.cpp +++ b/lldb/source/Plugins/Language/CPlusPlus/LibStdcppTuple.cpp @@ -90,7 +90,7 @@ bool LibStdcppTupleSyntheticFrontEnd::MightHaveChildren() { return true; } lldb::ValueObjectSP LibStdcppTupleSyntheticFrontEnd::GetChildAtIndex(size_t idx) { - if (idx < m_members.size()) + if (idx < m_members.size() && m_members[idx]) return m_members[idx]->GetSP(); return lldb::ValueObjectSP(); } diff --git a/lldb/source/Plugins/Language/CPlusPlus/LibStdcppUniquePointer.cpp b/lldb/source/Plugins/Language/CPlusPlus/LibStdcppUniquePointer.cpp index 84f9e57015f81..cceb511cdc465 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/LibStdcppUniquePointer.cpp +++ b/lldb/source/Plugins/Language/CPlusPlus/LibStdcppUniquePointer.cpp @@ -118,11 +118,11 @@ bool LibStdcppUniquePtrSyntheticFrontEnd::MightHaveChildren() { return true; } lldb::ValueObjectSP LibStdcppUniquePtrSyntheticFrontEnd::GetChildAtIndex(size_t idx) { - if (idx == 0) + if (idx == 0 && m_ptr_obj) return m_ptr_obj->GetSP(); - if (idx == 1) + if (idx == 1 && m_del_obj) return m_del_obj->GetSP(); - if (idx == 2) + if (idx == 2 && m_obj_obj) return m_obj_obj->GetSP(); return lldb::ValueObjectSP(); } diff --git a/lldb/source/Plugins/LanguageRuntime/RenderScript/RenderScriptRuntime/RenderScriptRuntime.cpp b/lldb/source/Plugins/LanguageRuntime/RenderScript/RenderScriptRuntime/RenderScriptRuntime.cpp index ad1b3411c67e9..5200749d759f7 100644 --- a/lldb/source/Plugins/LanguageRuntime/RenderScript/RenderScriptRuntime/RenderScriptRuntime.cpp +++ b/lldb/source/Plugins/LanguageRuntime/RenderScript/RenderScriptRuntime/RenderScriptRuntime.cpp @@ -790,7 +790,7 @@ RenderScriptRuntime::CreateInstance(Process *process, // symbol. Searcher::CallbackReturn RSBreakpointResolver::SearchCallback(SearchFilter &filter, - SymbolContext &context, Address *, bool) { + SymbolContext &context, Address *) { ModuleSP module = context.module_sp; if (!module || !IsRenderScriptScriptModule(module)) @@ -820,7 +820,7 @@ RSBreakpointResolver::SearchCallback(SearchFilter &filter, Searcher::CallbackReturn RSReduceBreakpointResolver::SearchCallback(lldb_private::SearchFilter &filter, lldb_private::SymbolContext &context, - Address *, bool) { + Address *) { // We need to have access to the list of reductions currently parsed, as // reduce names don't actually exist as symbols in a module. They are only // identifiable by parsing the .rs.info packet, or finding the expand symbol. @@ -880,8 +880,7 @@ RSReduceBreakpointResolver::SearchCallback(lldb_private::SearchFilter &filter, } Searcher::CallbackReturn RSScriptGroupBreakpointResolver::SearchCallback( - SearchFilter &filter, SymbolContext &context, Address *addr, - bool containing) { + SearchFilter &filter, SymbolContext &context, Address *addr) { if (!m_breakpoint) return eCallbackReturnContinue; diff --git a/lldb/source/Plugins/LanguageRuntime/RenderScript/RenderScriptRuntime/RenderScriptRuntime.h b/lldb/source/Plugins/LanguageRuntime/RenderScript/RenderScriptRuntime/RenderScriptRuntime.h index 3923221d4302c..c3740ba55a116 100644 --- a/lldb/source/Plugins/LanguageRuntime/RenderScript/RenderScriptRuntime/RenderScriptRuntime.h +++ b/lldb/source/Plugins/LanguageRuntime/RenderScript/RenderScriptRuntime/RenderScriptRuntime.h @@ -67,8 +67,8 @@ class RSBreakpointResolver : public BreakpointResolver { void Dump(Stream *s) const override {} Searcher::CallbackReturn SearchCallback(SearchFilter &filter, - SymbolContext &context, Address *addr, - bool containing) override; + SymbolContext &context, + Address *addr) override; lldb::SearchDepth GetDepth() override { return lldb::eSearchDepthModule; } @@ -117,8 +117,8 @@ class RSReduceBreakpointResolver : public BreakpointResolver { void Dump(Stream *s) const override {} Searcher::CallbackReturn SearchCallback(SearchFilter &filter, - SymbolContext &context, Address *addr, - bool containing) override; + SymbolContext &context, + Address *addr) override; lldb::SearchDepth GetDepth() override { return lldb::eSearchDepthModule; } @@ -262,8 +262,8 @@ class RSScriptGroupBreakpointResolver : public BreakpointResolver { void Dump(Stream *s) const override {} Searcher::CallbackReturn SearchCallback(SearchFilter &filter, - SymbolContext &context, Address *addr, - bool containing) override; + SymbolContext &context, + Address *addr) override; lldb::SearchDepth GetDepth() override { return lldb::eSearchDepthModule; } diff --git a/lldb/source/Plugins/ObjectFile/PECOFF/CMakeLists.txt b/lldb/source/Plugins/ObjectFile/PECOFF/CMakeLists.txt index 6981f7ecb71a8..ad768feca30a4 100644 --- a/lldb/source/Plugins/ObjectFile/PECOFF/CMakeLists.txt +++ b/lldb/source/Plugins/ObjectFile/PECOFF/CMakeLists.txt @@ -7,6 +7,7 @@ endif() add_lldb_library(lldbPluginObjectFilePECOFF PLUGIN ObjectFilePECOFF.cpp + PECallFrameInfo.cpp WindowsMiniDump.cpp LINK_LIBS diff --git a/lldb/source/Plugins/ObjectFile/PECOFF/ObjectFilePECOFF.cpp b/lldb/source/Plugins/ObjectFile/PECOFF/ObjectFilePECOFF.cpp index 16c131fa469a0..eee98491a8812 100644 --- a/lldb/source/Plugins/ObjectFile/PECOFF/ObjectFilePECOFF.cpp +++ b/lldb/source/Plugins/ObjectFile/PECOFF/ObjectFilePECOFF.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "ObjectFilePECOFF.h" +#include "PECallFrameInfo.h" #include "WindowsMiniDump.h" #include "lldb/Core/FileSpecList.h" @@ -518,7 +519,26 @@ bool ObjectFilePECOFF::ParseCOFFOptionalHeader(lldb::offset_t *offset_ptr) { return success; } +uint32_t ObjectFilePECOFF::GetRVA(const Address &addr) const { + return addr.GetFileAddress() - m_image_base; +} + +Address ObjectFilePECOFF::GetAddress(uint32_t rva) { + SectionList *sect_list = GetSectionList(); + if (!sect_list) + return Address(GetFileAddress(rva)); + + return Address(GetFileAddress(rva), sect_list); +} + +lldb::addr_t ObjectFilePECOFF::GetFileAddress(uint32_t rva) const { + return m_image_base + rva; +} + DataExtractor ObjectFilePECOFF::ReadImageData(uint32_t offset, size_t size) { + if (!size) + return {}; + if (m_file) { // A bit of a hack, but we intend to write to this buffer, so we can't // mmap it. @@ -541,6 +561,15 @@ DataExtractor ObjectFilePECOFF::ReadImageData(uint32_t offset, size_t size) { return data; } +DataExtractor ObjectFilePECOFF::ReadImageDataByRVA(uint32_t rva, size_t size) { + if (m_file) { + Address addr = GetAddress(rva); + rva = addr.GetSection()->GetFileOffset() + addr.GetOffset(); + } + + return ReadImageData(rva, size); +} + // ParseSectionHeaders bool ObjectFilePECOFF::ParseSectionHeaders( uint32_t section_header_data_offset) { @@ -678,14 +707,8 @@ Symtab *ObjectFilePECOFF::GetSymtab() { uint32_t data_start = m_coff_header_opt.data_dirs[coff_data_dir_export_table].vmaddr; - uint32_t address_rva = data_start; - if (m_file) { - Address address(m_coff_header_opt.image_base + data_start, sect_list); - address_rva = - address.GetSection()->GetFileOffset() + address.GetOffset(); - } - DataExtractor symtab_data = - ReadImageData(address_rva, m_coff_header_opt.data_dirs[0].vmsize); + DataExtractor symtab_data = ReadImageDataByRVA( + data_start, m_coff_header_opt.data_dirs[0].vmsize); lldb::offset_t offset = 0; // Read export_table header @@ -740,6 +763,19 @@ Symtab *ObjectFilePECOFF::GetSymtab() { return m_symtab_up.get(); } +std::unique_ptr ObjectFilePECOFF::CreateCallFrameInfo() { + if (coff_data_dir_exception_table >= m_coff_header_opt.data_dirs.size()) + return {}; + + data_directory data_dir_exception = + m_coff_header_opt.data_dirs[coff_data_dir_exception_table]; + if (!data_dir_exception.vmaddr) + return {}; + + return std::make_unique(*this, data_dir_exception.vmaddr, + data_dir_exception.vmsize); +} + bool ObjectFilePECOFF::IsStripped() { // TODO: determine this for COFF return false; diff --git a/lldb/source/Plugins/ObjectFile/PECOFF/ObjectFilePECOFF.h b/lldb/source/Plugins/ObjectFile/PECOFF/ObjectFilePECOFF.h index 68ea7a7270c21..78088ecc43778 100644 --- a/lldb/source/Plugins/ObjectFile/PECOFF/ObjectFilePECOFF.h +++ b/lldb/source/Plugins/ObjectFile/PECOFF/ObjectFilePECOFF.h @@ -135,7 +135,14 @@ class ObjectFilePECOFF : public lldb_private::ObjectFile { bool IsWindowsSubsystem(); + uint32_t GetRVA(const lldb_private::Address &addr) const; + lldb_private::Address GetAddress(uint32_t rva); + lldb::addr_t GetFileAddress(uint32_t rva) const; + lldb_private::DataExtractor ReadImageData(uint32_t offset, size_t size); + lldb_private::DataExtractor ReadImageDataByRVA(uint32_t rva, size_t size); + + std::unique_ptr CreateCallFrameInfo() override; protected: bool NeedsEndianSwap() const; @@ -216,6 +223,7 @@ class ObjectFilePECOFF : public lldb_private::ObjectFile { enum coff_data_dir_type { coff_data_dir_export_table = 0, coff_data_dir_import_table = 1, + coff_data_dir_exception_table = 3 }; typedef struct section_header { diff --git a/lldb/source/Plugins/ObjectFile/PECOFF/PECallFrameInfo.cpp b/lldb/source/Plugins/ObjectFile/PECOFF/PECallFrameInfo.cpp new file mode 100644 index 0000000000000..fe67e87f09f75 --- /dev/null +++ b/lldb/source/Plugins/ObjectFile/PECOFF/PECallFrameInfo.cpp @@ -0,0 +1,535 @@ +#include "PECallFrameInfo.h" + +#include "ObjectFilePECOFF.h" + +#include "Plugins/Process/Utility/lldb-x86-register-enums.h" +#include "lldb/Symbol/UnwindPlan.h" +#include "llvm/Support/Win64EH.h" + +using namespace lldb; +using namespace lldb_private; +using namespace llvm::Win64EH; + +template +static const T *TypedRead(const DataExtractor &data_extractor, offset_t &offset, + offset_t size = sizeof(T)) { + return static_cast(data_extractor.GetData(&offset, size)); +} + +struct EHInstruction { + enum class Type { + PUSH_REGISTER, + ALLOCATE, + SET_FRAME_POINTER_REGISTER, + SAVE_REGISTER + }; + + uint8_t offset; + Type type; + uint32_t reg; + uint32_t frame_offset; +}; + +using EHProgram = std::vector; + +class UnwindCodesIterator { +public: + UnwindCodesIterator(ObjectFilePECOFF &object_file, uint32_t unwind_info_rva); + + bool GetNext(); + bool IsError() const { return m_error; } + + const UnwindInfo *GetUnwindInfo() const { return m_unwind_info; } + const UnwindCode *GetUnwindCode() const { return m_unwind_code; } + bool IsChained() const { return m_chained; } + +private: + ObjectFilePECOFF &m_object_file; + + bool m_error; + + uint32_t m_unwind_info_rva; + DataExtractor m_unwind_info_data; + const UnwindInfo *m_unwind_info; + + DataExtractor m_unwind_code_data; + offset_t m_unwind_code_offset; + const UnwindCode *m_unwind_code; + + bool m_chained; +}; + +UnwindCodesIterator::UnwindCodesIterator(ObjectFilePECOFF &object_file, + uint32_t unwind_info_rva) + : m_object_file(object_file), m_error(false), + m_unwind_info_rva(unwind_info_rva), + m_unwind_info(nullptr), m_unwind_code_offset{}, m_unwind_code(nullptr), + m_chained(false) {} + +bool UnwindCodesIterator::GetNext() { + static constexpr int UNWIND_INFO_SIZE = 4; + + m_error = false; + m_unwind_code = nullptr; + while (!m_unwind_code) { + if (!m_unwind_info) { + m_unwind_info_data = + m_object_file.ReadImageDataByRVA(m_unwind_info_rva, UNWIND_INFO_SIZE); + + offset_t offset = 0; + m_unwind_info = + TypedRead(m_unwind_info_data, offset, UNWIND_INFO_SIZE); + if (!m_unwind_info) { + m_error = true; + break; + } + + m_unwind_code_data = m_object_file.ReadImageDataByRVA( + m_unwind_info_rva + UNWIND_INFO_SIZE, + m_unwind_info->NumCodes * sizeof(UnwindCode)); + m_unwind_code_offset = 0; + } + + if (m_unwind_code_offset < m_unwind_code_data.GetByteSize()) { + m_unwind_code = + TypedRead(m_unwind_code_data, m_unwind_code_offset); + m_error = !m_unwind_code; + break; + } + + if (!(m_unwind_info->getFlags() & UNW_ChainInfo)) + break; + + uint32_t runtime_function_rva = + m_unwind_info_rva + UNWIND_INFO_SIZE + + ((m_unwind_info->NumCodes + 1) & ~1) * sizeof(UnwindCode); + DataExtractor runtime_function_data = m_object_file.ReadImageDataByRVA( + runtime_function_rva, sizeof(RuntimeFunction)); + + offset_t offset = 0; + const auto *runtime_function = + TypedRead(runtime_function_data, offset); + if (!runtime_function) { + m_error = true; + break; + } + + m_unwind_info_rva = runtime_function->UnwindInfoOffset; + m_unwind_info = nullptr; + m_chained = true; + } + + return !!m_unwind_code; +} + +class EHProgramBuilder { +public: + EHProgramBuilder(ObjectFilePECOFF &object_file, uint32_t unwind_info_rva); + + bool Build(); + + const EHProgram &GetProgram() const { return m_program; } + +private: + static uint32_t ConvertMachineToLLDBRegister(uint8_t machine_reg); + static uint32_t ConvertXMMToLLDBRegister(uint8_t xmm_reg); + + bool ProcessUnwindCode(UnwindCode code); + void Finalize(); + + bool ParseBigOrScaledFrameOffset(uint32_t &result, bool big, uint32_t scale); + bool ParseBigFrameOffset(uint32_t &result); + bool ParseFrameOffset(uint32_t &result); + + UnwindCodesIterator m_iterator; + EHProgram m_program; +}; + +EHProgramBuilder::EHProgramBuilder(ObjectFilePECOFF &object_file, + uint32_t unwind_info_rva) + : m_iterator(object_file, unwind_info_rva) {} + +bool EHProgramBuilder::Build() { + while (m_iterator.GetNext()) + if (!ProcessUnwindCode(*m_iterator.GetUnwindCode())) + return false; + + if (m_iterator.IsError()) + return false; + + Finalize(); + + return true; +} + +uint32_t EHProgramBuilder::ConvertMachineToLLDBRegister(uint8_t machine_reg) { + static uint32_t machine_to_lldb_register[] = { + lldb_rax_x86_64, lldb_rcx_x86_64, lldb_rdx_x86_64, lldb_rbx_x86_64, + lldb_rsp_x86_64, lldb_rbp_x86_64, lldb_rsi_x86_64, lldb_rdi_x86_64, + lldb_r8_x86_64, lldb_r9_x86_64, lldb_r10_x86_64, lldb_r11_x86_64, + lldb_r12_x86_64, lldb_r13_x86_64, lldb_r14_x86_64, lldb_r15_x86_64}; + + if (machine_reg >= llvm::array_lengthof(machine_to_lldb_register)) + return LLDB_INVALID_REGNUM; + + return machine_to_lldb_register[machine_reg]; +} + +uint32_t EHProgramBuilder::ConvertXMMToLLDBRegister(uint8_t xmm_reg) { + static uint32_t xmm_to_lldb_register[] = { + lldb_xmm0_x86_64, lldb_xmm1_x86_64, lldb_xmm2_x86_64, + lldb_xmm3_x86_64, lldb_xmm4_x86_64, lldb_xmm5_x86_64, + lldb_xmm6_x86_64, lldb_xmm7_x86_64, lldb_xmm8_x86_64, + lldb_xmm9_x86_64, lldb_xmm10_x86_64, lldb_xmm11_x86_64, + lldb_xmm12_x86_64, lldb_xmm13_x86_64, lldb_xmm14_x86_64, + lldb_xmm15_x86_64}; + + if (xmm_reg >= llvm::array_lengthof(xmm_to_lldb_register)) + return LLDB_INVALID_REGNUM; + + return xmm_to_lldb_register[xmm_reg]; +} + +bool EHProgramBuilder::ProcessUnwindCode(UnwindCode code) { + uint8_t o = m_iterator.IsChained() ? 0 : code.u.CodeOffset; + uint8_t unwind_operation = code.getUnwindOp(); + uint8_t operation_info = code.getOpInfo(); + + switch (unwind_operation) { + case UOP_PushNonVol: { + uint32_t r = ConvertMachineToLLDBRegister(operation_info); + if (r == LLDB_INVALID_REGNUM) + return false; + + m_program.emplace_back( + EHInstruction{o, EHInstruction::Type::PUSH_REGISTER, r, 8}); + + return true; + } + case UOP_AllocLarge: { + uint32_t fo; + if (!ParseBigOrScaledFrameOffset(fo, operation_info, 8)) + return false; + + m_program.emplace_back(EHInstruction{o, EHInstruction::Type::ALLOCATE, + LLDB_INVALID_REGNUM, fo}); + + return true; + } + case UOP_AllocSmall: { + m_program.emplace_back( + EHInstruction{o, EHInstruction::Type::ALLOCATE, LLDB_INVALID_REGNUM, + static_cast(operation_info) * 8 + 8}); + return true; + } + case UOP_SetFPReg: { + uint32_t fpr = LLDB_INVALID_REGNUM; + if (m_iterator.GetUnwindInfo()->getFrameRegister()) + fpr = ConvertMachineToLLDBRegister( + m_iterator.GetUnwindInfo()->getFrameRegister()); + if (fpr == LLDB_INVALID_REGNUM) + return false; + + uint32_t fpro = + static_cast(m_iterator.GetUnwindInfo()->getFrameOffset()) * + 16; + + m_program.emplace_back(EHInstruction{ + o, EHInstruction::Type::SET_FRAME_POINTER_REGISTER, fpr, fpro}); + + return true; + } + case UOP_SaveNonVol: + case UOP_SaveNonVolBig: { + uint32_t r = ConvertMachineToLLDBRegister(operation_info); + if (r == LLDB_INVALID_REGNUM) + return false; + + uint32_t fo; + if (!ParseBigOrScaledFrameOffset(fo, unwind_operation == UOP_SaveNonVolBig, + 8)) + return false; + + m_program.emplace_back( + EHInstruction{o, EHInstruction::Type::SAVE_REGISTER, r, fo}); + + return true; + } + case UOP_Epilog: { + return m_iterator.GetNext(); + } + case UOP_SpareCode: { + // ReSharper disable once CppIdenticalOperandsInBinaryExpression + return m_iterator.GetNext() && m_iterator.GetNext(); + } + case UOP_SaveXMM128: + case UOP_SaveXMM128Big: { + uint32_t r = ConvertXMMToLLDBRegister(operation_info); + if (r == LLDB_INVALID_REGNUM) + return false; + + uint32_t fo; + if (!ParseBigOrScaledFrameOffset(fo, unwind_operation == UOP_SaveXMM128Big, + 16)) + return false; + + m_program.emplace_back( + EHInstruction{o, EHInstruction::Type::SAVE_REGISTER, r, fo}); + + return true; + } + case UOP_PushMachFrame: { + if (operation_info) + m_program.emplace_back(EHInstruction{o, EHInstruction::Type::ALLOCATE, + LLDB_INVALID_REGNUM, 8}); + m_program.emplace_back(EHInstruction{o, EHInstruction::Type::PUSH_REGISTER, + lldb_rip_x86_64, 8}); + m_program.emplace_back(EHInstruction{o, EHInstruction::Type::PUSH_REGISTER, + lldb_cs_x86_64, 8}); + m_program.emplace_back(EHInstruction{o, EHInstruction::Type::PUSH_REGISTER, + lldb_rflags_x86_64, 8}); + m_program.emplace_back(EHInstruction{o, EHInstruction::Type::PUSH_REGISTER, + lldb_rsp_x86_64, 8}); + m_program.emplace_back(EHInstruction{o, EHInstruction::Type::PUSH_REGISTER, + lldb_ss_x86_64, 8}); + + return true; + } + default: + return false; + } +} + +void EHProgramBuilder::Finalize() { + for (const EHInstruction &i : m_program) + if (i.reg == lldb_rip_x86_64) + return; + + m_program.emplace_back( + EHInstruction{0, EHInstruction::Type::PUSH_REGISTER, lldb_rip_x86_64, 8}); +} + +bool EHProgramBuilder::ParseBigOrScaledFrameOffset(uint32_t &result, bool big, + uint32_t scale) { + if (big) { + if (!ParseBigFrameOffset(result)) + return false; + } else { + if (!ParseFrameOffset(result)) + return false; + + result *= scale; + } + + return true; +} + +bool EHProgramBuilder::ParseBigFrameOffset(uint32_t &result) { + if (!m_iterator.GetNext()) + return false; + + result = m_iterator.GetUnwindCode()->FrameOffset; + + if (!m_iterator.GetNext()) + return false; + + result += static_cast(m_iterator.GetUnwindCode()->FrameOffset) + << 16; + + return true; +} + +bool EHProgramBuilder::ParseFrameOffset(uint32_t &result) { + if (!m_iterator.GetNext()) + return false; + + result = m_iterator.GetUnwindCode()->FrameOffset; + + return true; +} + +class EHProgramRange { +public: + EHProgramRange(EHProgram::const_iterator begin, + EHProgram::const_iterator end); + + std::unique_ptr BuildUnwindPlanRow() const; + +private: + int32_t GetCFAFrameOffset() const; + + EHProgram::const_iterator m_begin; + EHProgram::const_iterator m_end; +}; + +EHProgramRange::EHProgramRange(EHProgram::const_iterator begin, + EHProgram::const_iterator end) + : m_begin(begin), m_end(end) {} + +std::unique_ptr EHProgramRange::BuildUnwindPlanRow() const { + std::unique_ptr row = std::make_unique(); + + if (m_begin != m_end) + row->SetOffset(m_begin->offset); + + int32_t cfa_frame_offset = GetCFAFrameOffset(); + + bool frame_pointer_found = false; + for (EHProgram::const_iterator it = m_begin; it != m_end; ++it) { + switch (it->type) { + case EHInstruction::Type::SET_FRAME_POINTER_REGISTER: + row->GetCFAValue().SetIsRegisterPlusOffset(it->reg, cfa_frame_offset - + it->frame_offset); + frame_pointer_found = true; + break; + default: + break; + } + if (frame_pointer_found) + break; + } + if (!frame_pointer_found) + row->GetCFAValue().SetIsRegisterPlusOffset(lldb_rsp_x86_64, + cfa_frame_offset); + + int32_t rsp_frame_offset = 0; + for (EHProgram::const_iterator it = m_begin; it != m_end; ++it) { + switch (it->type) { + case EHInstruction::Type::PUSH_REGISTER: + row->SetRegisterLocationToAtCFAPlusOffset( + it->reg, rsp_frame_offset - cfa_frame_offset, false); + rsp_frame_offset += it->frame_offset; + break; + case EHInstruction::Type::ALLOCATE: + rsp_frame_offset += it->frame_offset; + break; + case EHInstruction::Type::SAVE_REGISTER: + row->SetRegisterLocationToAtCFAPlusOffset( + it->reg, it->frame_offset - cfa_frame_offset, false); + break; + default: + break; + } + } + + row->SetRegisterLocationToIsCFAPlusOffset(lldb_rsp_x86_64, 0, false); + + return row; +} + +int32_t EHProgramRange::GetCFAFrameOffset() const { + int32_t result = 0; + + for (EHProgram::const_iterator it = m_begin; it != m_end; ++it) { + switch (it->type) { + case EHInstruction::Type::PUSH_REGISTER: + case EHInstruction::Type::ALLOCATE: + result += it->frame_offset; + default: + break; + } + } + + return result; +} + +PECallFrameInfo::PECallFrameInfo(ObjectFilePECOFF &object_file, + uint32_t exception_dir_rva, + uint32_t exception_dir_size) + : m_object_file(object_file), + m_exception_dir(object_file.ReadImageDataByRVA(exception_dir_rva, + exception_dir_size)) {} + +bool PECallFrameInfo::GetAddressRange(Address addr, AddressRange &range) { + range.Clear(); + + const RuntimeFunction *runtime_function = + FindRuntimeFunctionIntersectsWithRange(AddressRange(addr, 1)); + if (!runtime_function) + return false; + + range.GetBaseAddress() = + m_object_file.GetAddress(runtime_function->StartAddress); + range.SetByteSize(runtime_function->EndAddress - + runtime_function->StartAddress); + + return true; +} + +bool PECallFrameInfo::GetUnwindPlan(const Address &addr, + UnwindPlan &unwind_plan) { + return GetUnwindPlan(AddressRange(addr, 1), unwind_plan); +} + +bool PECallFrameInfo::GetUnwindPlan(const AddressRange &range, + UnwindPlan &unwind_plan) { + unwind_plan.Clear(); + + unwind_plan.SetSourceName("PE EH info"); + unwind_plan.SetSourcedFromCompiler(eLazyBoolYes); + unwind_plan.SetRegisterKind(eRegisterKindLLDB); + + const RuntimeFunction *runtime_function = + FindRuntimeFunctionIntersectsWithRange(range); + if (!runtime_function) + return false; + + EHProgramBuilder builder(m_object_file, runtime_function->UnwindInfoOffset); + if (!builder.Build()) + return false; + + std::vector rows; + + uint32_t last_offset = UINT32_MAX; + for (auto it = builder.GetProgram().begin(); it != builder.GetProgram().end(); + ++it) { + if (it->offset == last_offset) + continue; + + EHProgramRange program_range = + EHProgramRange(it, builder.GetProgram().end()); + rows.push_back(program_range.BuildUnwindPlanRow()); + + last_offset = it->offset; + } + + for (auto it = rows.rbegin(); it != rows.rend(); ++it) + unwind_plan.AppendRow(*it); + + unwind_plan.SetPlanValidAddressRange(AddressRange( + m_object_file.GetAddress(runtime_function->StartAddress), + runtime_function->EndAddress - runtime_function->StartAddress)); + unwind_plan.SetUnwindPlanValidAtAllInstructions(eLazyBoolNo); + + return true; +} + +const RuntimeFunction *PECallFrameInfo::FindRuntimeFunctionIntersectsWithRange( + const AddressRange &range) const { + uint32_t rva = m_object_file.GetRVA(range.GetBaseAddress()); + addr_t size = range.GetByteSize(); + + uint32_t begin = 0; + uint32_t end = m_exception_dir.GetByteSize() / sizeof(RuntimeFunction); + while (begin < end) { + uint32_t curr = (begin + end) / 2; + + offset_t offset = curr * sizeof(RuntimeFunction); + const auto *runtime_function = + TypedRead(m_exception_dir, offset); + if (!runtime_function) + break; + + if (runtime_function->StartAddress < rva + size && + runtime_function->EndAddress > rva) + return runtime_function; + + if (runtime_function->StartAddress >= rva + size) + end = curr; + + if (runtime_function->EndAddress <= rva) + begin = curr + 1; + } + + return nullptr; +} diff --git a/lldb/source/Plugins/ObjectFile/PECOFF/PECallFrameInfo.h b/lldb/source/Plugins/ObjectFile/PECOFF/PECallFrameInfo.h new file mode 100644 index 0000000000000..b5932dc726a32 --- /dev/null +++ b/lldb/source/Plugins/ObjectFile/PECOFF/PECallFrameInfo.h @@ -0,0 +1,47 @@ +//===-- PECallFrameInfo.h ---------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef liblldb_PECallFrameInfo_h_ +#define liblldb_PECallFrameInfo_h_ + +#include "lldb/Symbol/CallFrameInfo.h" +#include "lldb/Utility/DataExtractor.h" + +class ObjectFilePECOFF; + +namespace llvm { +namespace Win64EH { + +struct RuntimeFunction; + +} +} // namespace llvm + +class PECallFrameInfo : public virtual lldb_private::CallFrameInfo { +public: + explicit PECallFrameInfo(ObjectFilePECOFF &object_file, + uint32_t exception_dir_rva, + uint32_t exception_dir_size); + + bool GetAddressRange(lldb_private::Address addr, + lldb_private::AddressRange &range) override; + + bool GetUnwindPlan(const lldb_private::Address &addr, + lldb_private::UnwindPlan &unwind_plan) override; + bool GetUnwindPlan(const lldb_private::AddressRange &range, + lldb_private::UnwindPlan &unwind_plan) override; + +private: + const llvm::Win64EH::RuntimeFunction *FindRuntimeFunctionIntersectsWithRange( + const lldb_private::AddressRange &range) const; + + ObjectFilePECOFF &m_object_file; + lldb_private::DataExtractor m_exception_dir; +}; + +#endif // liblldb_PECallFrameInfo_h_ diff --git a/lldb/source/Plugins/Process/FreeBSD/ProcessMonitor.cpp b/lldb/source/Plugins/Process/FreeBSD/ProcessMonitor.cpp index af8588c9f5917..ff3fb0a75e2d0 100644 --- a/lldb/source/Plugins/Process/FreeBSD/ProcessMonitor.cpp +++ b/lldb/source/Plugins/Process/FreeBSD/ProcessMonitor.cpp @@ -703,7 +703,7 @@ ProcessMonitor::ProcessMonitor( const lldb_private::ProcessLaunchInfo & /* launch_info */, lldb_private::Status &error) : m_process(static_cast(process)), - m_operation_thread(nullptr), m_monitor_thread(nullptr), m_pid(LLDB_INVALID_PROCESS_ID), m_terminal_fd(-1), m_operation(0) { + m_operation_thread(), m_monitor_thread(), m_pid(LLDB_INVALID_PROCESS_ID), m_terminal_fd(-1), m_operation(0) { using namespace std::placeholders; std::unique_ptr args( @@ -730,20 +730,22 @@ ProcessMonitor::ProcessMonitor( } // Finally, start monitoring the child process for change in state. - m_monitor_thread = Host::StartMonitoringChildProcess( + llvm::Expected monitor_thread = + Host::StartMonitoringChildProcess( std::bind(&ProcessMonitor::MonitorCallback, this, _1, _2, _3, _4), GetPID(), true); - if (!m_monitor_thread->IsJoinable()) { + if (!monitor_thread || !monitor_thread->IsJoinable()) { error.SetErrorToGenericError(); error.SetErrorString("Process launch failed."); return; } + m_monitor_thread = *monitor_thread; } ProcessMonitor::ProcessMonitor(ProcessFreeBSD *process, lldb::pid_t pid, lldb_private::Status &error) : m_process(static_cast(process)), - m_operation_thread(nullptr), m_monitor_thread(nullptr), m_pid(pid), m_terminal_fd(-1), m_operation(0) { + m_operation_thread(), m_monitor_thread(), m_pid(pid), m_terminal_fd(-1), m_operation(0) { using namespace std::placeholders; sem_init(&m_operation_pending, 0, 0); @@ -768,14 +770,16 @@ ProcessMonitor::ProcessMonitor(ProcessFreeBSD *process, lldb::pid_t pid, } // Finally, start monitoring the child process for change in state. - m_monitor_thread = Host::StartMonitoringChildProcess( + llvm::Expected monitor_thread = + Host::StartMonitoringChildProcess( std::bind(&ProcessMonitor::MonitorCallback, this, _1, _2, _3, _4), GetPID(), true); - if (!m_monitor_thread->IsJoinable()) { + if (!monitor_thread || !monitor_thread->IsJoinable()) { error.SetErrorToGenericError(); error.SetErrorString("Process attach failed."); return; } + m_monitor_thread = *monitor_thread; } ProcessMonitor::~ProcessMonitor() { StopMonitor(); } @@ -784,13 +788,15 @@ ProcessMonitor::~ProcessMonitor() { StopMonitor(); } void ProcessMonitor::StartLaunchOpThread(LaunchArgs *args, Status &error) { static const char *g_thread_name = "lldb.process.freebsd.operation"; - if (m_operation_thread->IsJoinable()) + if (m_operation_thread && m_operation_thread->IsJoinable()) return; - m_operation_thread = - ThreadLauncher::LaunchThread(g_thread_name, LaunchOpThread, args); - if (!m_operation_thread) - error = m_operation_thread.takeError(); + llvm::Expected operation_thread = + ThreadLauncher::LaunchThread(g_thread_name, LaunchOpThread, args); + if (operation_thread) + m_operation_thread = *operation_thread; + else + error = operation_thread.takeError(); } void *ProcessMonitor::LaunchOpThread(void *arg) { @@ -952,14 +958,15 @@ void ProcessMonitor::StartAttachOpThread(AttachArgs *args, lldb_private::Status &error) { static const char *g_thread_name = "lldb.process.freebsd.operation"; - if (m_operation_thread->IsJoinable()) + if (m_operation_thread && m_operation_thread->IsJoinable()) return; - m_operation_thread = - ThreadLauncher::LaunchThread(g_thread_name, AttachOpThread, args); - - if (!m_operation_thread) - error = m_operation_thread.takeError(); + llvm::Expected operation_thread = + ThreadLauncher::LaunchThread(g_thread_name, AttachOpThread, args); + if (operation_thread) + m_operation_thread = *operation_thread; + else + error = operation_thread.takeError(); } void *ProcessMonitor::AttachOpThread(void *arg) { @@ -1374,7 +1381,7 @@ bool ProcessMonitor::DupDescriptor(const FileSpec &file_spec, int fd, } void ProcessMonitor::StopMonitoringChildProcess() { - if (m_monitor_thread->IsJoinable()) { + if (m_monitor_thread && m_monitor_thread->IsJoinable()) { m_monitor_thread->Cancel(); m_monitor_thread->Join(nullptr); m_monitor_thread->Reset(); @@ -1412,10 +1419,9 @@ void ProcessMonitor::StopMonitor() { bool ProcessMonitor::WaitForInitialTIDStop(lldb::tid_t tid) { return true; } void ProcessMonitor::StopOpThread() { - if (!m_operation_thread->IsJoinable()) - return; - - m_operation_thread->Cancel(); - m_operation_thread->Join(nullptr); - m_operation_thread->Reset(); + if (m_operation_thread && m_operation_thread->IsJoinable()) { + m_operation_thread->Cancel(); + m_operation_thread->Join(nullptr); + m_operation_thread->Reset(); + } } diff --git a/lldb/source/Plugins/Process/FreeBSD/ProcessMonitor.h b/lldb/source/Plugins/Process/FreeBSD/ProcessMonitor.h index 2adcc449c5c63..c5edfc0be95aa 100644 --- a/lldb/source/Plugins/Process/FreeBSD/ProcessMonitor.h +++ b/lldb/source/Plugins/Process/FreeBSD/ProcessMonitor.h @@ -183,8 +183,8 @@ class ProcessMonitor { private: ProcessFreeBSD *m_process; - llvm::Expected m_operation_thread; - llvm::Expected m_monitor_thread; + llvm::Optional m_operation_thread; + llvm::Optional m_monitor_thread; lldb::pid_t m_pid; int m_terminal_fd; diff --git a/lldb/source/Plugins/Process/Utility/RegisterContextLLDB.cpp b/lldb/source/Plugins/Process/Utility/RegisterContextLLDB.cpp index 50795c09f2808..49a589f14989d 100644 --- a/lldb/source/Plugins/Process/Utility/RegisterContextLLDB.cpp +++ b/lldb/source/Plugins/Process/Utility/RegisterContextLLDB.cpp @@ -12,6 +12,7 @@ #include "lldb/Core/Value.h" #include "lldb/Expression/DWARFExpression.h" #include "lldb/Symbol/ArmUnwindInfo.h" +#include "lldb/Symbol/CallFrameInfo.h" #include "lldb/Symbol/DWARFCallFrameInfo.h" #include "lldb/Symbol/FuncUnwinders.h" #include "lldb/Symbol/Function.h" @@ -784,6 +785,16 @@ UnwindPlanSP RegisterContextLLDB::GetFullUnwindPlanForFrame() { unwind_plan_sp.reset(); } + CallFrameInfo *object_file_unwind = + pc_module_sp->GetUnwindTable().GetObjectFileUnwindInfo(); + if (object_file_unwind) { + unwind_plan_sp = std::make_shared(lldb::eRegisterKindGeneric); + if (object_file_unwind->GetUnwindPlan(m_current_pc, *unwind_plan_sp)) + return unwind_plan_sp; + else + unwind_plan_sp.reset(); + } + return arch_default_unwind_plan_sp; } @@ -796,6 +807,9 @@ UnwindPlanSP RegisterContextLLDB::GetFullUnwindPlanForFrame() { m_fast_unwind_plan_sp.reset(); unwind_plan_sp = func_unwinders_sp->GetEHFrameUnwindPlan(process->GetTarget()); + if (!unwind_plan_sp) + unwind_plan_sp = + func_unwinders_sp->GetObjectFileUnwindPlan(process->GetTarget()); if (unwind_plan_sp && unwind_plan_sp->PlanValidAtAddress(m_current_pc) && unwind_plan_sp->GetSourcedFromCompiler() == eLazyBoolYes) { return unwind_plan_sp; @@ -818,6 +832,9 @@ UnwindPlanSP RegisterContextLLDB::GetFullUnwindPlanForFrame() { // intend) or compact unwind (this won't work) unwind_plan_sp = func_unwinders_sp->GetEHFrameUnwindPlan(process->GetTarget()); + if (!unwind_plan_sp) + unwind_plan_sp = + func_unwinders_sp->GetObjectFileUnwindPlan(process->GetTarget()); if (unwind_plan_sp && unwind_plan_sp->PlanValidAtAddress(m_current_pc)) { UnwindLogMsgVerbose("frame uses %s for full UnwindPlan because the " "DynamicLoader suggested we prefer it", diff --git a/lldb/source/Plugins/Process/Windows/Common/ProcessWindows.cpp b/lldb/source/Plugins/Process/Windows/Common/ProcessWindows.cpp index 8a06fb7ed3793..c4b7a6d1a9091 100644 --- a/lldb/source/Plugins/Process/Windows/Common/ProcessWindows.cpp +++ b/lldb/source/Plugins/Process/Windows/Common/ProcessWindows.cpp @@ -81,13 +81,24 @@ ProcessSP ProcessWindows::CreateInstance(lldb::TargetSP target_sp, return ProcessSP(new ProcessWindows(target_sp, listener_sp)); } -void ProcessWindows::Initialize() { - static llvm::once_flag g_once_flag; +static bool ShouldUseLLDBServer() { + llvm::StringRef use_lldb_server = ::getenv("LLDB_USE_LLDB_SERVER"); + return use_lldb_server.equals_lower("on") || + use_lldb_server.equals_lower("yes") || + use_lldb_server.equals_lower("1") || + use_lldb_server.equals_lower("true"); +} - llvm::call_once(g_once_flag, []() { - PluginManager::RegisterPlugin(GetPluginNameStatic(), - GetPluginDescriptionStatic(), CreateInstance); - }); +void ProcessWindows::Initialize() { + if (!ShouldUseLLDBServer()) { + static llvm::once_flag g_once_flag; + + llvm::call_once(g_once_flag, []() { + PluginManager::RegisterPlugin(GetPluginNameStatic(), + GetPluginDescriptionStatic(), + CreateInstance); + }); + } } void ProcessWindows::Terminate() {} diff --git a/lldb/source/Plugins/Process/minidump/MinidumpParser.cpp b/lldb/source/Plugins/Process/minidump/MinidumpParser.cpp index e1e0f39bca140..d4da56e03f36e 100644 --- a/lldb/source/Plugins/Process/minidump/MinidumpParser.cpp +++ b/lldb/source/Plugins/Process/minidump/MinidumpParser.cpp @@ -427,23 +427,35 @@ CreateRegionsCacheFromLinuxMaps(MinidumpParser &parser, static bool CreateRegionsCacheFromMemoryInfoList(MinidumpParser &parser, std::vector ®ions) { - auto data = parser.GetStream(StreamType::MemoryInfoList); - if (data.empty()) - return false; - auto mem_info_list = MinidumpMemoryInfo::ParseMemoryInfoList(data); - if (mem_info_list.empty()) + Log *log = GetLogIfAnyCategoriesSet(LIBLLDB_LOG_MODULES); + auto ExpectedInfo = parser.GetMinidumpFile().getMemoryInfoList(); + if (!ExpectedInfo) { + LLDB_LOG_ERROR(log, ExpectedInfo.takeError(), + "Failed to read memory info list: {0}"); return false; + } constexpr auto yes = MemoryRegionInfo::eYes; constexpr auto no = MemoryRegionInfo::eNo; - regions.reserve(mem_info_list.size()); - for (const auto &entry : mem_info_list) { + for (const MemoryInfo &entry : *ExpectedInfo) { MemoryRegionInfo region; - region.GetRange().SetRangeBase(entry->base_address); - region.GetRange().SetByteSize(entry->region_size); - region.SetReadable(entry->isReadable() ? yes : no); - region.SetWritable(entry->isWritable() ? yes : no); - region.SetExecutable(entry->isExecutable() ? yes : no); - region.SetMapped(entry->isMapped() ? yes : no); + region.GetRange().SetRangeBase(entry.BaseAddress); + region.GetRange().SetByteSize(entry.RegionSize); + + MemoryProtection prot = entry.Protect; + region.SetReadable(bool(prot & MemoryProtection::NoAccess) ? no : yes); + region.SetWritable( + bool(prot & (MemoryProtection::ReadWrite | MemoryProtection::WriteCopy | + MemoryProtection::ExecuteReadWrite | + MemoryProtection::ExeciteWriteCopy)) + ? yes + : no); + region.SetExecutable( + bool(prot & (MemoryProtection::Execute | MemoryProtection::ExecuteRead | + MemoryProtection::ExecuteReadWrite | + MemoryProtection::ExeciteWriteCopy)) + ? yes + : no); + region.SetMapped(entry.State != MemoryState::Free ? yes : no); regions.push_back(region); } return !regions.empty(); diff --git a/lldb/source/Plugins/Process/minidump/MinidumpTypes.cpp b/lldb/source/Plugins/Process/minidump/MinidumpTypes.cpp index d7fc6e43d090c..2845430487dfc 100644 --- a/lldb/source/Plugins/Process/minidump/MinidumpTypes.cpp +++ b/lldb/source/Plugins/Process/minidump/MinidumpTypes.cpp @@ -87,29 +87,3 @@ MinidumpMemoryDescriptor64::ParseMemory64List(llvm::ArrayRef &data) { *mem_ranges_count), *base_rva); } - -std::vector -MinidumpMemoryInfo::ParseMemoryInfoList(llvm::ArrayRef &data) { - const MinidumpMemoryInfoListHeader *header; - Status error = consumeObject(data, header); - if (error.Fail() || - header->size_of_header < sizeof(MinidumpMemoryInfoListHeader) || - header->size_of_entry < sizeof(MinidumpMemoryInfo)) - return {}; - - data = data.drop_front(header->size_of_header - - sizeof(MinidumpMemoryInfoListHeader)); - - if (header->size_of_entry * header->num_of_entries > data.size()) - return {}; - - std::vector result; - result.reserve(header->num_of_entries); - - for (uint64_t i = 0; i < header->num_of_entries; ++i) { - result.push_back(reinterpret_cast( - data.data() + i * header->size_of_entry)); - } - - return result; -} diff --git a/lldb/source/Plugins/Process/minidump/MinidumpTypes.h b/lldb/source/Plugins/Process/minidump/MinidumpTypes.h index b4878e82de5de..d7390a36eaf5e 100644 --- a/lldb/source/Plugins/Process/minidump/MinidumpTypes.h +++ b/lldb/source/Plugins/Process/minidump/MinidumpTypes.h @@ -85,90 +85,6 @@ struct MinidumpMemoryDescriptor64 { static_assert(sizeof(MinidumpMemoryDescriptor64) == 16, "sizeof MinidumpMemoryDescriptor64 is not correct!"); -// Reference: -// https://msdn.microsoft.com/en-us/library/windows/desktop/ms680385(v=vs.85).aspx -struct MinidumpMemoryInfoListHeader { - llvm::support::ulittle32_t size_of_header; - llvm::support::ulittle32_t size_of_entry; - llvm::support::ulittle64_t num_of_entries; -}; -static_assert(sizeof(MinidumpMemoryInfoListHeader) == 16, - "sizeof MinidumpMemoryInfoListHeader is not correct!"); - -enum class MinidumpMemoryInfoState : uint32_t { - MemCommit = 0x1000, - MemFree = 0x10000, - MemReserve = 0x2000, - LLVM_MARK_AS_BITMASK_ENUM(/* LargestValue = */ MemFree) -}; - -enum class MinidumpMemoryInfoType : uint32_t { - MemImage = 0x1000000, - MemMapped = 0x40000, - MemPrivate = 0x20000, - LLVM_MARK_AS_BITMASK_ENUM(/* LargestValue = */ MemImage) -}; - -// Reference: -// https://msdn.microsoft.com/en-us/library/windows/desktop/aa366786(v=vs.85).aspx -enum class MinidumpMemoryProtectionContants : uint32_t { - PageExecute = 0x10, - PageExecuteRead = 0x20, - PageExecuteReadWrite = 0x40, - PageExecuteWriteCopy = 0x80, - PageNoAccess = 0x01, - PageReadOnly = 0x02, - PageReadWrite = 0x04, - PageWriteCopy = 0x08, - PageTargetsInvalid = 0x40000000, - PageTargetsNoUpdate = 0x40000000, - - PageWritable = PageExecuteReadWrite | PageExecuteWriteCopy | PageReadWrite | - PageWriteCopy, - PageExecutable = PageExecute | PageExecuteRead | PageExecuteReadWrite | - PageExecuteWriteCopy, - LLVM_MARK_AS_BITMASK_ENUM(/* LargestValue = */ PageTargetsInvalid) -}; - -// Reference: -// https://msdn.microsoft.com/en-us/library/windows/desktop/ms680386(v=vs.85).aspx -struct MinidumpMemoryInfo { - llvm::support::ulittle64_t base_address; - llvm::support::ulittle64_t allocation_base; - llvm::support::ulittle32_t allocation_protect; - llvm::support::ulittle32_t alignment1; - llvm::support::ulittle64_t region_size; - llvm::support::ulittle32_t state; - llvm::support::ulittle32_t protect; - llvm::support::ulittle32_t type; - llvm::support::ulittle32_t alignment2; - - static std::vector - ParseMemoryInfoList(llvm::ArrayRef &data); - - bool isReadable() const { - const auto mask = MinidumpMemoryProtectionContants::PageNoAccess; - return (static_cast(mask) & protect) == 0; - } - - bool isWritable() const { - const auto mask = MinidumpMemoryProtectionContants::PageWritable; - return (static_cast(mask) & protect) != 0; - } - - bool isExecutable() const { - const auto mask = MinidumpMemoryProtectionContants::PageExecutable; - return (static_cast(mask) & protect) != 0; - } - - bool isMapped() const { - return state != static_cast(MinidumpMemoryInfoState::MemFree); - } -}; - -static_assert(sizeof(MinidumpMemoryInfo) == 48, - "sizeof MinidumpMemoryInfo is not correct!"); - // TODO misc2, misc3 ? // Reference: // https://msdn.microsoft.com/en-us/library/windows/desktop/ms680389(v=vs.85).aspx diff --git a/lldb/source/Plugins/Process/minidump/ProcessMinidump.cpp b/lldb/source/Plugins/Process/minidump/ProcessMinidump.cpp index 41dc08b9e6c27..52d93491eca53 100644 --- a/lldb/source/Plugins/Process/minidump/ProcessMinidump.cpp +++ b/lldb/source/Plugins/Process/minidump/ProcessMinidump.cpp @@ -49,8 +49,8 @@ namespace { class PlaceholderObjectFile : public ObjectFile { public: PlaceholderObjectFile(const lldb::ModuleSP &module_sp, - const ModuleSpec &module_spec, lldb::offset_t base, - lldb::offset_t size) + const ModuleSpec &module_spec, lldb::addr_t base, + lldb::addr_t size) : ObjectFile(module_sp, &module_spec.GetFileSpec(), /*file_offset*/ 0, /*length*/ 0, /*data_sp*/ nullptr, /*data_offset*/ 0), m_arch(module_spec.GetArchitecture()), m_uuid(module_spec.GetUUID()), @@ -58,7 +58,10 @@ class PlaceholderObjectFile : public ObjectFile { m_symtab_up = std::make_unique(this); } - ConstString GetPluginName() override { return ConstString("placeholder"); } + static ConstString GetStaticPluginName() { + return ConstString("placeholder"); + } + ConstString GetPluginName() override { return GetStaticPluginName(); } uint32_t GetPluginVersion() override { return 1; } bool ParseHeader() override { return true; } Type CalculateType() override { return eTypeUnknown; } @@ -109,11 +112,12 @@ class PlaceholderObjectFile : public ObjectFile { GetFileSpec(), m_base, m_base + m_size); } + lldb::addr_t GetBaseImageAddress() const { return m_base; } private: ArchSpec m_arch; UUID m_uuid; - lldb::offset_t m_base; - lldb::offset_t m_size; + lldb::addr_t m_base; + lldb::addr_t m_size; }; } // namespace @@ -351,14 +355,15 @@ void ProcessMinidump::ReadModuleList() { std::vector filtered_modules = m_minidump_parser->GetFilteredModuleList(); - Log *log(GetLogIfAllCategoriesSet(LIBLLDB_LOG_MODULES)); + Log *log(GetLogIfAllCategoriesSet(LIBLLDB_LOG_DYNAMIC_LOADER)); for (auto module : filtered_modules) { std::string name = cantFail(m_minidump_parser->GetMinidumpFile().getString( module->ModuleNameRVA)); + const uint64_t load_addr = module->BaseOfImage; + const uint64_t load_size = module->SizeOfImage; LLDB_LOG(log, "found module: name: {0} {1:x10}-{2:x10} size: {3}", name, - module->BaseOfImage, module->BaseOfImage + module->SizeOfImage, - module->SizeOfImage); + load_addr, load_addr + load_size, load_size); // check if the process is wow64 - a 32 bit windows process running on a // 64 bit windows @@ -373,7 +378,7 @@ void ProcessMinidump::ReadModuleList() { Status error; // Try and find a module with a full UUID that matches. This function will // add the module to the target if it finds one. - lldb::ModuleSP module_sp = GetTarget().GetOrCreateModule(module_spec, + lldb::ModuleSP module_sp = GetTarget().GetOrCreateModule(module_spec, true /* notify */, &error); if (!module_sp) { // Try and find a module without specifying the UUID and only looking for @@ -386,8 +391,8 @@ void ProcessMinidump::ReadModuleList() { ModuleSpec basename_module_spec(module_spec); basename_module_spec.GetUUID().Clear(); basename_module_spec.GetFileSpec().GetDirectory().Clear(); - module_sp = GetTarget().GetOrCreateModule(basename_module_spec, - true /* notify */, &error); + module_sp = GetTarget().GetOrCreateModule(basename_module_spec, + true /* notify */, &error); if (module_sp) { // We consider the module to be a match if the minidump UUID is a // prefix of the actual UUID, or if either of the UUIDs are empty. @@ -401,6 +406,19 @@ void ProcessMinidump::ReadModuleList() { } } } + if (module_sp) { + // Watch out for place holder modules that have different paths, but the + // same UUID. If the base address is different, create a new module. If + // we don't then we will end up setting the load address of a different + // PlaceholderObjectFile and an assertion will fire. + auto *objfile = module_sp->GetObjectFile(); + if (objfile && objfile->GetPluginName() == + PlaceholderObjectFile::GetStaticPluginName()) { + if (((PlaceholderObjectFile *)objfile)->GetBaseImageAddress() != + load_addr) + module_sp.reset(); + } + } if (!module_sp) { // We failed to locate a matching local object file. Fortunately, the // minidump format encodes enough information about each module's memory @@ -415,12 +433,12 @@ void ProcessMinidump::ReadModuleList() { name); module_sp = Module::CreateModuleFromObjectFile( - module_spec, module->BaseOfImage, module->SizeOfImage); + module_spec, load_addr, load_size); GetTarget().GetImages().Append(module_sp, true /* notify */); } bool load_addr_changed = false; - module_sp->SetLoadAddress(GetTarget(), module->BaseOfImage, false, + module_sp->SetLoadAddress(GetTarget(), load_addr, false, load_addr_changed); } } diff --git a/lldb/source/Plugins/Process/minidump/RegisterContextMinidump_ARM.cpp b/lldb/source/Plugins/Process/minidump/RegisterContextMinidump_ARM.cpp index f2e456097dfcc..72dead07dcb48 100644 --- a/lldb/source/Plugins/Process/minidump/RegisterContextMinidump_ARM.cpp +++ b/lldb/source/Plugins/Process/minidump/RegisterContextMinidump_ARM.cpp @@ -9,6 +9,7 @@ #include "RegisterContextMinidump_ARM.h" #include "Utility/ARM_DWARF_Registers.h" +#include "Utility/ARM_ehframe_Registers.h" #include "lldb/Utility/RegisterValue.h" #include "lldb/Utility/DataExtractor.h" #include "lldb/Utility/LLDBAssert.h" @@ -29,14 +30,14 @@ using namespace minidump; #define DEF_R(i) \ { \ "r" #i, nullptr, 4, OFFSET(r) + i * 4, eEncodingUint, eFormatHex, \ - {dwarf_r##i, dwarf_r##i, INV, INV, reg_r##i}, \ + {ehframe_r##i, dwarf_r##i, INV, INV, reg_r##i}, \ nullptr, nullptr, nullptr, 0 \ } #define DEF_R_ARG(i, n) \ { \ "r" #i, "arg" #n, 4, OFFSET(r) + i * 4, eEncodingUint, eFormatHex, \ - {dwarf_r##i, dwarf_r##i, LLDB_REGNUM_GENERIC_ARG1 + i, INV, reg_r##i}, \ + {ehframe_r##i, dwarf_r##i, LLDB_REGNUM_GENERIC_ARG1 + i, INV, reg_r##i}, \ nullptr, nullptr, nullptr, 0 \ } @@ -173,7 +174,7 @@ static RegisterInfo g_reg_info_apple_fp = { OFFSET(r) + 7 * 4, eEncodingUint, eFormatHex, - {INV, dwarf_r7, LLDB_REGNUM_GENERIC_FP, INV, reg_r7}, + {ehframe_r7, dwarf_r7, LLDB_REGNUM_GENERIC_FP, INV, reg_r7}, nullptr, nullptr, nullptr, @@ -186,7 +187,7 @@ static RegisterInfo g_reg_info_fp = { OFFSET(r) + 11 * 4, eEncodingUint, eFormatHex, - {INV, dwarf_r11, LLDB_REGNUM_GENERIC_FP, INV, reg_r11}, + {ehframe_r11, dwarf_r11, LLDB_REGNUM_GENERIC_FP, INV, reg_r11}, nullptr, nullptr, nullptr, @@ -213,7 +214,7 @@ static RegisterInfo g_reg_infos[] = { OFFSET(r) + 13 * 4, eEncodingUint, eFormatHex, - {INV, dwarf_sp, LLDB_REGNUM_GENERIC_SP, INV, reg_sp}, + {ehframe_sp, dwarf_sp, LLDB_REGNUM_GENERIC_SP, INV, reg_sp}, nullptr, nullptr, nullptr, @@ -224,7 +225,7 @@ static RegisterInfo g_reg_infos[] = { OFFSET(r) + 14 * 4, eEncodingUint, eFormatHex, - {INV, dwarf_lr, LLDB_REGNUM_GENERIC_RA, INV, reg_lr}, + {ehframe_lr, dwarf_lr, LLDB_REGNUM_GENERIC_RA, INV, reg_lr}, nullptr, nullptr, nullptr, @@ -235,7 +236,7 @@ static RegisterInfo g_reg_infos[] = { OFFSET(r) + 15 * 4, eEncodingUint, eFormatHex, - {INV, dwarf_pc, LLDB_REGNUM_GENERIC_PC, INV, reg_pc}, + {ehframe_pc, dwarf_pc, LLDB_REGNUM_GENERIC_PC, INV, reg_pc}, nullptr, nullptr, nullptr, @@ -246,7 +247,7 @@ static RegisterInfo g_reg_infos[] = { OFFSET(cpsr), eEncodingUint, eFormatHex, - {INV, dwarf_cpsr, LLDB_REGNUM_GENERIC_FLAGS, INV, reg_cpsr}, + {ehframe_cpsr, dwarf_cpsr, LLDB_REGNUM_GENERIC_FLAGS, INV, reg_cpsr}, nullptr, nullptr, nullptr, @@ -476,12 +477,22 @@ RegisterContextMinidump_ARM::RegisterContextMinidump_ARM( lldbassert(k_num_regs == k_num_reg_infos); } -size_t RegisterContextMinidump_ARM::GetRegisterCount() { return k_num_regs; } +size_t RegisterContextMinidump_ARM::GetRegisterCountStatic() { + return k_num_regs; +} + +// Used for unit testing so we can verify register info is filled in for +// all register flavors (DWARF, EH Frame, generic, etc). +size_t RegisterContextMinidump_ARM::GetRegisterCount() { + return GetRegisterCountStatic(); +} +// Used for unit testing so we can verify register info is filled in. const RegisterInfo * -RegisterContextMinidump_ARM::GetRegisterInfoAtIndex(size_t reg) { +RegisterContextMinidump_ARM::GetRegisterInfoAtIndexStatic(size_t reg, + bool apple) { if (reg < k_num_reg_infos) { - if (m_apple) { + if (apple) { if (reg == reg_r7) return &g_reg_info_apple_fp; } else { @@ -493,6 +504,11 @@ RegisterContextMinidump_ARM::GetRegisterInfoAtIndex(size_t reg) { return nullptr; } +const RegisterInfo * +RegisterContextMinidump_ARM::GetRegisterInfoAtIndex(size_t reg) { + return GetRegisterInfoAtIndexStatic(reg, m_apple); +} + size_t RegisterContextMinidump_ARM::GetRegisterSetCount() { return k_num_reg_sets; } diff --git a/lldb/source/Plugins/Process/minidump/RegisterContextMinidump_ARM.h b/lldb/source/Plugins/Process/minidump/RegisterContextMinidump_ARM.h index eff8cdfef00a0..7af3b98a6fe7b 100644 --- a/lldb/source/Plugins/Process/minidump/RegisterContextMinidump_ARM.h +++ b/lldb/source/Plugins/Process/minidump/RegisterContextMinidump_ARM.h @@ -38,6 +38,12 @@ class RegisterContextMinidump_ARM : public lldb_private::RegisterContext { // Do nothing... registers are always valid... } + // Used for unit testing. + static size_t GetRegisterCountStatic(); + // Used for unit testing. + static const lldb_private::RegisterInfo * + GetRegisterInfoAtIndexStatic(size_t reg, bool apple); + size_t GetRegisterCount() override; const lldb_private::RegisterInfo *GetRegisterInfoAtIndex(size_t reg) override; diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.cpp b/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.cpp index 13fb01ff21bbf..5346e3f5914d5 100644 --- a/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.cpp +++ b/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.cpp @@ -1028,22 +1028,23 @@ bool PythonFile::Check(PyObject *py_obj) { // first-class object type anymore. `PyFile_FromFd` is just a thin wrapper // over `io.open()`, which returns some object derived from `io.IOBase`. As a // result, the only way to detect a file in Python 3 is to check whether it - // inherits from `io.IOBase`. Since it is possible for non-files to also - // inherit from `io.IOBase`, we additionally verify that it has the `fileno` - // attribute, which should guarantee that it is backed by the file system. - PythonObject io_module(PyRefType::Owned, PyImport_ImportModule("io")); - PythonDictionary io_dict(PyRefType::Borrowed, - PyModule_GetDict(io_module.get())); - PythonObject io_base_class = io_dict.GetItemForKey(PythonString("IOBase")); - - PythonObject object_type(PyRefType::Owned, PyObject_Type(py_obj)); - - if (1 != PyObject_IsSubclass(object_type.get(), io_base_class.get())) + // inherits from `io.IOBase`. + auto io_module = PythonModule::Import("io"); + if (!io_module) { + llvm::consumeError(io_module.takeError()); return false; - if (!object_type.HasAttribute("fileno")) + } + auto iobase = io_module.get().Get("IOBase"); + if (!iobase) { + llvm::consumeError(iobase.takeError()); return false; - - return true; + } + int r = PyObject_IsInstance(py_obj, iobase.get().get()); + if (r < 0) { + llvm::consumeError(exception()); // clear the exception and log it. + return false; + } + return !!r; #endif } @@ -1096,6 +1097,20 @@ FileUP PythonFile::GetUnderlyingFile() const { return file; } +namespace { +class GIL { +public: + GIL() { + m_state = PyGILState_Ensure(); + assert(!PyErr_Occurred()); + } + ~GIL() { PyGILState_Release(m_state); } + +protected: + PyGILState_STATE m_state; +}; +} // namespace + const char *PythonException::toCString() const { if (!m_repr_bytes) return "unknown exception"; @@ -1150,4 +1165,376 @@ std::error_code PythonException::convertToErrorCode() const { char PythonException::ID = 0; +llvm::Expected GetOptionsForPyObject(const PythonObject &obj) { + uint32_t options = 0; +#if PY_MAJOR_VERSION >= 3 + auto readable = As(obj.CallMethod("readable")); + if (!readable) + return readable.takeError(); + auto writable = As(obj.CallMethod("writable")); + if (!writable) + return writable.takeError(); + if (readable.get()) + options |= File::eOpenOptionRead; + if (writable.get()) + options |= File::eOpenOptionWrite; +#else + PythonString py_mode = obj.GetAttributeValue("mode").AsType(); + options = File::GetOptionsFromMode(py_mode.GetString()); +#endif + return options; +} + +// Base class template for python files. All it knows how to do +// is hold a reference to the python object and close or flush it +// when the File is closed. +namespace { +template class OwnedPythonFile : public Base { +public: + template + OwnedPythonFile(const PythonFile &file, bool borrowed, Args... args) + : Base(args...), m_py_obj(file), m_borrowed(borrowed) { + assert(m_py_obj); + } + + ~OwnedPythonFile() override { + assert(m_py_obj); + GIL takeGIL; + Close(); + m_py_obj.Reset(); + } + + bool IsPythonSideValid() const { + GIL takeGIL; + auto closed = As(m_py_obj.GetAttribute("closed")); + if (!closed) { + llvm::consumeError(closed.takeError()); + return false; + } + return !closed.get(); + } + + bool IsValid() const override { + return IsPythonSideValid() && Base::IsValid(); + } + + Status Close() override { + assert(m_py_obj); + Status py_error, base_error; + GIL takeGIL; + if (!m_borrowed) { + auto r = m_py_obj.CallMethod("close"); + if (!r) + py_error = Status(r.takeError()); + } + base_error = Base::Close(); + if (py_error.Fail()) + return py_error; + return base_error; + }; + +protected: + PythonFile m_py_obj; + bool m_borrowed; +}; +} // namespace + +// A SimplePythonFile is a OwnedPythonFile that just does all I/O as +// a NativeFile +namespace { +class SimplePythonFile : public OwnedPythonFile { +public: + SimplePythonFile(const PythonFile &file, bool borrowed, int fd, + uint32_t options) + : OwnedPythonFile(file, borrowed, fd, options, false) {} +}; +} // namespace + +#if PY_MAJOR_VERSION >= 3 + +namespace { +class PythonBuffer { +public: + PythonBuffer &operator=(const PythonBuffer &) = delete; + PythonBuffer(const PythonBuffer &) = delete; + + static Expected Create(PythonObject &obj, + int flags = PyBUF_SIMPLE) { + Py_buffer py_buffer = {}; + PyObject_GetBuffer(obj.get(), &py_buffer, flags); + if (!py_buffer.obj) + return llvm::make_error(); + return PythonBuffer(py_buffer); + } + + PythonBuffer(PythonBuffer &&other) { + m_buffer = other.m_buffer; + other.m_buffer.obj = nullptr; + } + + ~PythonBuffer() { + if (m_buffer.obj) + PyBuffer_Release(&m_buffer); + } + + Py_buffer &get() { return m_buffer; } + +private: + // takes ownership of the buffer. + PythonBuffer(const Py_buffer &py_buffer) : m_buffer(py_buffer) {} + Py_buffer m_buffer; +}; +} // namespace + +// Shared methods between TextPythonFile and BinaryPythonFile +namespace { +class PythonIOFile : public OwnedPythonFile { +public: + PythonIOFile(const PythonFile &file, bool borrowed) + : OwnedPythonFile(file, borrowed) {} + + ~PythonIOFile() override { Close(); } + + bool IsValid() const override { return IsPythonSideValid(); } + + Status Close() override { + assert(m_py_obj); + GIL takeGIL; + if (m_borrowed) + return Flush(); + auto r = m_py_obj.CallMethod("close"); + if (!r) + return Status(r.takeError()); + return Status(); + } + + Status Flush() override { + GIL takeGIL; + auto r = m_py_obj.CallMethod("flush"); + if (!r) + return Status(r.takeError()); + return Status(); + } + +}; +} // namespace + +namespace { +class BinaryPythonFile : public PythonIOFile { +protected: + int m_descriptor; + +public: + BinaryPythonFile(int fd, const PythonFile &file, bool borrowed) + : PythonIOFile(file, borrowed), + m_descriptor(File::DescriptorIsValid(fd) ? fd + : File::kInvalidDescriptor) {} + + int GetDescriptor() const override { return m_descriptor; } + + Status Write(const void *buf, size_t &num_bytes) override { + GIL takeGIL; + PyObject *pybuffer_p = PyMemoryView_FromMemory( + const_cast((const char *)buf), num_bytes, PyBUF_READ); + if (!pybuffer_p) + return Status(llvm::make_error()); + auto pybuffer = Take(pybuffer_p); + num_bytes = 0; + auto bytes_written = As(m_py_obj.CallMethod("write", pybuffer)); + if (!bytes_written) + return Status(bytes_written.takeError()); + if (bytes_written.get() < 0) + return Status(".write() method returned a negative number!"); + static_assert(sizeof(long long) >= sizeof(size_t), "overflow"); + num_bytes = bytes_written.get(); + return Status(); + } + + Status Read(void *buf, size_t &num_bytes) override { + GIL takeGIL; + static_assert(sizeof(long long) >= sizeof(size_t), "overflow"); + auto pybuffer_obj = + m_py_obj.CallMethod("read", (unsigned long long)num_bytes); + if (!pybuffer_obj) + return Status(pybuffer_obj.takeError()); + num_bytes = 0; + if (pybuffer_obj.get().IsNone()) { + // EOF + num_bytes = 0; + return Status(); + } + auto pybuffer = PythonBuffer::Create(pybuffer_obj.get()); + if (!pybuffer) + return Status(pybuffer.takeError()); + memcpy(buf, pybuffer.get().get().buf, pybuffer.get().get().len); + num_bytes = pybuffer.get().get().len; + return Status(); + } +}; +} // namespace + +namespace { +class TextPythonFile : public PythonIOFile { +protected: + int m_descriptor; + +public: + TextPythonFile(int fd, const PythonFile &file, bool borrowed) + : PythonIOFile(file, borrowed), + m_descriptor(File::DescriptorIsValid(fd) ? fd + : File::kInvalidDescriptor) {} + + int GetDescriptor() const override { return m_descriptor; } + + Status Write(const void *buf, size_t &num_bytes) override { + GIL takeGIL; + auto pystring = + PythonString::FromUTF8(llvm::StringRef((const char *)buf, num_bytes)); + if (!pystring) + return Status(pystring.takeError()); + num_bytes = 0; + auto bytes_written = + As(m_py_obj.CallMethod("write", pystring.get())); + if (!bytes_written) + return Status(bytes_written.takeError()); + if (bytes_written.get() < 0) + return Status(".write() method returned a negative number!"); + static_assert(sizeof(long long) >= sizeof(size_t), "overflow"); + num_bytes = bytes_written.get(); + return Status(); + } + + Status Read(void *buf, size_t &num_bytes) override { + GIL takeGIL; + size_t num_chars = num_bytes / 6; + size_t orig_num_bytes = num_bytes; + num_bytes = 0; + if (orig_num_bytes < 6) { + return Status("can't read less than 6 bytes from a utf8 text stream"); + } + auto pystring = As( + m_py_obj.CallMethod("read", (unsigned long long)num_chars)); + if (!pystring) + return Status(pystring.takeError()); + if (pystring.get().IsNone()) { + // EOF + return Status(); + } + auto stringref = pystring.get().AsUTF8(); + if (!stringref) + return Status(stringref.takeError()); + num_bytes = stringref.get().size(); + memcpy(buf, stringref.get().begin(), num_bytes); + return Status(); + } +}; +} // namespace + +#endif + +llvm::Expected PythonFile::ConvertToFile(bool borrowed) { + if (!IsValid()) + return llvm::createStringError(llvm::inconvertibleErrorCode(), + "invalid PythonFile"); + + int fd = PyObject_AsFileDescriptor(m_py_obj); + if (fd < 0) { + PyErr_Clear(); + return ConvertToFileForcingUseOfScriptingIOMethods(borrowed); + } + auto options = GetOptionsForPyObject(*this); + if (!options) + return options.takeError(); + + // LLDB and python will not share I/O buffers. We should probably + // flush the python buffers now. + auto r = CallMethod("flush"); + if (!r) + return r.takeError(); + + FileSP file_sp; + if (borrowed) { + // In this case we we don't need to retain the python + // object at all. + file_sp = std::make_shared(fd, options.get(), false); + } else { + file_sp = std::static_pointer_cast( + std::make_shared(*this, borrowed, fd, options.get())); + } + if (!file_sp->IsValid()) + return llvm::createStringError(llvm::inconvertibleErrorCode(), + "invalid File"); + + return file_sp; +} + +llvm::Expected +PythonFile::ConvertToFileForcingUseOfScriptingIOMethods(bool borrowed) { + + assert(!PyErr_Occurred()); + + if (!IsValid()) + return llvm::createStringError(llvm::inconvertibleErrorCode(), + "invalid PythonFile"); + +#if PY_MAJOR_VERSION < 3 + + return llvm::createStringError(llvm::inconvertibleErrorCode(), + "not supported on python 2"); + +#else + + int fd = PyObject_AsFileDescriptor(m_py_obj); + if (fd < 0) { + PyErr_Clear(); + fd = File::kInvalidDescriptor; + } + + auto io_module = PythonModule::Import("io"); + if (!io_module) + return io_module.takeError(); + auto textIOBase = io_module.get().Get("TextIOBase"); + if (!textIOBase) + return textIOBase.takeError(); + auto rawIOBase = io_module.get().Get("RawIOBase"); + if (!rawIOBase) + return rawIOBase.takeError(); + auto bufferedIOBase = io_module.get().Get("BufferedIOBase"); + if (!bufferedIOBase) + return bufferedIOBase.takeError(); + + FileSP file_sp; + + auto isTextIO = IsInstance(textIOBase.get()); + if (!isTextIO) + return isTextIO.takeError(); + if (isTextIO.get()) + file_sp = std::static_pointer_cast( + std::make_shared(fd, *this, borrowed)); + + auto isRawIO = IsInstance(rawIOBase.get()); + if (!isRawIO) + return isRawIO.takeError(); + auto isBufferedIO = IsInstance(bufferedIOBase.get()); + if (!isBufferedIO) + return isBufferedIO.takeError(); + + if (isRawIO.get() || isBufferedIO.get()) { + file_sp = std::static_pointer_cast( + std::make_shared(fd, *this, borrowed)); + } + + if (!file_sp) + return llvm::createStringError(llvm::inconvertibleErrorCode(), + "python file is neither text nor binary"); + + if (!file_sp->IsValid()) + return llvm::createStringError(llvm::inconvertibleErrorCode(), + "invalid File"); + + return file_sp; + +#endif +} + #endif diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp index fefa12c70684d..2f81c44f79555 100644 --- a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp +++ b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp @@ -879,8 +879,9 @@ bool ScriptInterpreterPythonImpl::ExecuteOneLine( ::setbuf(outfile_handle, nullptr); result->SetImmediateOutputFile( - debugger.GetOutputFile().GetStream()); - result->SetImmediateErrorFile(debugger.GetErrorFile().GetStream()); + debugger.GetOutputStream().GetFileSP()); + result->SetImmediateErrorFile( + debugger.GetErrorStream().GetFileSP()); } } } diff --git a/lldb/source/Plugins/SymbolFile/DWARF/AppleDWARFIndex.cpp b/lldb/source/Plugins/SymbolFile/DWARF/AppleDWARFIndex.cpp index 97dbd6c4b90aa..0a5073b8cd9eb 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/AppleDWARFIndex.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/AppleDWARFIndex.cpp @@ -110,6 +110,7 @@ void AppleDWARFIndex::GetTypes(const DWARFDeclContext &context, const bool has_qualified_name_hash = m_apple_types_up->GetHeader().header_data.ContainsAtom( DWARFMappedHash::eAtomTypeQualNameHash); + const ConstString type_name(context[0].name); const dw_tag_t tag = context[0].tag; if (has_tag && has_qualified_name_hash) { @@ -119,12 +120,32 @@ void AppleDWARFIndex::GetTypes(const DWARFDeclContext &context, m_module.LogMessage(log, "FindByNameAndTagAndQualifiedNameHash()"); m_apple_types_up->FindByNameAndTagAndQualifiedNameHash( type_name.GetStringRef(), tag, qualified_name_hash, offsets); - } else if (has_tag) { + return; + } + + if (has_tag) { + // When searching for a scoped type (for example, + // "std::vector::const_iterator") searching for the innermost + // name alone ("const_iterator") could yield many false + // positives. By searching for the parent type ("vector") + // first we can avoid extracting type DIEs from object files that + // would fail the filter anyway. + if (!has_qualified_name_hash && (context.GetSize() > 1) && + (context[1].tag == DW_TAG_class_type || + context[1].tag == DW_TAG_structure_type)) { + DIEArray class_matches; + m_apple_types_up->FindByName(context[1].name, class_matches); + if (class_matches.empty()) + return; + } + if (log) m_module.LogMessage(log, "FindByNameAndTag()"); m_apple_types_up->FindByNameAndTag(type_name.GetStringRef(), tag, offsets); - } else - m_apple_types_up->FindByName(type_name.GetStringRef(), offsets); + return; + } + + m_apple_types_up->FindByName(type_name.GetStringRef(), offsets); } void AppleDWARFIndex::GetNamespaces(ConstString name, DIEArray &offsets) { diff --git a/lldb/source/Plugins/UnwindAssembly/x86/x86AssemblyInspectionEngine.cpp b/lldb/source/Plugins/UnwindAssembly/x86/x86AssemblyInspectionEngine.cpp index 946d99cc0ce5f..bf6f60a2d26c5 100644 --- a/lldb/source/Plugins/UnwindAssembly/x86/x86AssemblyInspectionEngine.cpp +++ b/lldb/source/Plugins/UnwindAssembly/x86/x86AssemblyInspectionEngine.cpp @@ -1371,7 +1371,6 @@ bool x86AssemblyInspectionEngine::AugmentUnwindPlanFromCallSite( int row_id = 1; bool unwind_plan_updated = false; UnwindPlan::RowSP row(new UnwindPlan::Row(*first_row)); - m_cur_insn = data + offset; // After a mid-function epilogue we will need to re-insert the original // unwind rules so unwinds work for the remainder of the function. These @@ -1381,19 +1380,17 @@ bool x86AssemblyInspectionEngine::AugmentUnwindPlanFromCallSite( while (offset < size) { m_cur_insn = data + offset; int insn_len; - if (!instruction_length(m_cur_insn, insn_len, size - offset) - || insn_len == 0 - || insn_len > kMaxInstructionByteSize) { + if (!instruction_length(m_cur_insn, insn_len, size - offset) || + insn_len == 0 || insn_len > kMaxInstructionByteSize) { // An unrecognized/junk instruction. break; } // Advance offsets. offset += insn_len; - m_cur_insn = data + offset; // offset is pointing beyond the bounds of the function; stop looping. - if (offset >= size) + if (offset >= size) continue; if (reinstate_unwind_state) { @@ -1547,16 +1544,18 @@ bool x86AssemblyInspectionEngine::AugmentUnwindPlanFromCallSite( // [0x5d] pop %rbp/%ebp // => [0xc3] ret if (pop_rbp_pattern_p() || leave_pattern_p()) { - offset += 1; - row->SetOffset(offset); - row->GetCFAValue().SetIsRegisterPlusOffset( - first_row->GetCFAValue().GetRegisterNumber(), m_wordsize); - - UnwindPlan::RowSP new_row(new UnwindPlan::Row(*row)); - unwind_plan.InsertRow(new_row); - unwind_plan_updated = true; - reinstate_unwind_state = true; - continue; + m_cur_insn++; + if (ret_pattern_p()) { + row->SetOffset(offset); + row->GetCFAValue().SetIsRegisterPlusOffset( + first_row->GetCFAValue().GetRegisterNumber(), m_wordsize); + + UnwindPlan::RowSP new_row(new UnwindPlan::Row(*row)); + unwind_plan.InsertRow(new_row); + unwind_plan_updated = true; + reinstate_unwind_state = true; + continue; + } } } else { // CFA register is not sp or fp. diff --git a/lldb/source/Symbol/CxxModuleHandler.cpp b/lldb/source/Symbol/CxxModuleHandler.cpp index 68a2aab80bd6e..19e80e5036bcd 100644 --- a/lldb/source/Symbol/CxxModuleHandler.cpp +++ b/lldb/source/Symbol/CxxModuleHandler.cpp @@ -175,6 +175,8 @@ T *createDecl(ASTImporter &importer, Decl *from_d, Args &&... args) { } llvm::Optional CxxModuleHandler::tryInstantiateStdTemplate(Decl *d) { + Log *log = lldb_private::GetLogIfAllCategoriesSet(LIBLLDB_LOG_EXPRESSIONS); + // If we don't have a template to instiantiate, then there is nothing to do. auto td = dyn_cast(d); if (!td) @@ -196,9 +198,15 @@ llvm::Optional CxxModuleHandler::tryInstantiateStdTemplate(Decl *d) { // Find the local DeclContext that corresponds to the DeclContext of our // decl we want to import. - auto to_context = getEqualLocalDeclContext(*m_sema, td->getDeclContext()); - if (!to_context) + llvm::Expected to_context = + getEqualLocalDeclContext(*m_sema, td->getDeclContext()); + if (!to_context) { + LLDB_LOG_ERROR(log, to_context.takeError(), + "Got error while searching equal local DeclContext for decl " + "'{1}':\n{0}", + td->getName()); return {}; + } // Look up the template in our local context. std::unique_ptr lookup = @@ -215,8 +223,6 @@ llvm::Optional CxxModuleHandler::tryInstantiateStdTemplate(Decl *d) { // Import the foreign template arguments. llvm::SmallVector imported_args; - Log *log = lldb_private::GetLogIfAllCategoriesSet(LIBLLDB_LOG_EXPRESSIONS); - // If this logic is changed, also update templateArgsAreSupported. for (const TemplateArgument &arg : foreign_args.asArray()) { switch (arg.getKind()) { diff --git a/lldb/source/Symbol/FuncUnwinders.cpp b/lldb/source/Symbol/FuncUnwinders.cpp index 09cb9b00aaf3b..f609bf7821e18 100644 --- a/lldb/source/Symbol/FuncUnwinders.cpp +++ b/lldb/source/Symbol/FuncUnwinders.cpp @@ -10,6 +10,7 @@ #include "lldb/Core/Address.h" #include "lldb/Core/AddressRange.h" #include "lldb/Symbol/ArmUnwindInfo.h" +#include "lldb/Symbol/CallFrameInfo.h" #include "lldb/Symbol/CompactUnwindInfo.h" #include "lldb/Symbol/DWARFCallFrameInfo.h" #include "lldb/Symbol/ObjectFile.h" @@ -58,6 +59,8 @@ UnwindPlanSP FuncUnwinders::GetUnwindPlanAtCallSite(Target &target, Thread &thread) { std::lock_guard guard(m_mutex); + if (UnwindPlanSP plan_sp = GetObjectFileUnwindPlan(target)) + return plan_sp; if (UnwindPlanSP plan_sp = GetSymbolFileUnwindPlan(thread)) return plan_sp; if (UnwindPlanSP plan_sp = GetDebugFrameUnwindPlan(target)) @@ -97,6 +100,26 @@ UnwindPlanSP FuncUnwinders::GetCompactUnwindUnwindPlan(Target &target) { return UnwindPlanSP(); } +lldb::UnwindPlanSP FuncUnwinders::GetObjectFileUnwindPlan(Target &target) { + std::lock_guard guard(m_mutex); + if (m_unwind_plan_object_file_sp.get() || + m_tried_unwind_plan_object_file) + return m_unwind_plan_object_file_sp; + + m_tried_unwind_plan_object_file = true; + if (m_range.GetBaseAddress().IsValid()) { + CallFrameInfo *object_file_frame = m_unwind_table.GetObjectFileUnwindInfo(); + if (object_file_frame) { + m_unwind_plan_object_file_sp = + std::make_shared(lldb::eRegisterKindGeneric); + if (!object_file_frame->GetUnwindPlan(m_range, + *m_unwind_plan_object_file_sp)) + m_unwind_plan_object_file_sp.reset(); + } + } + return m_unwind_plan_object_file_sp; +} + UnwindPlanSP FuncUnwinders::GetEHFrameUnwindPlan(Target &target) { std::lock_guard guard(m_mutex); if (m_unwind_plan_eh_frame_sp.get() || m_tried_unwind_plan_eh_frame) @@ -185,6 +208,38 @@ UnwindPlanSP FuncUnwinders::GetSymbolFileUnwindPlan(Thread &thread) { return m_unwind_plan_symbol_file_sp; } +UnwindPlanSP +FuncUnwinders::GetObjectFileAugmentedUnwindPlan(Target &target, + Thread &thread) { + std::lock_guard guard(m_mutex); + if (m_unwind_plan_object_file_augmented_sp.get() || + m_tried_unwind_plan_object_file_augmented) + return m_unwind_plan_object_file_augmented_sp; + + m_tried_unwind_plan_object_file_augmented = true; + + UnwindPlanSP object_file_unwind_plan = GetObjectFileUnwindPlan(target); + if (!object_file_unwind_plan) + return m_unwind_plan_object_file_augmented_sp; + + m_unwind_plan_object_file_augmented_sp = + std::make_shared(*object_file_unwind_plan); + + // Augment the instructions with epilogue descriptions if necessary + // so the UnwindPlan can be used at any instruction in the function. + + UnwindAssemblySP assembly_profiler_sp(GetUnwindAssemblyProfiler(target)); + if (assembly_profiler_sp) { + if (!assembly_profiler_sp->AugmentUnwindPlanFromCallSite( + m_range, thread, *m_unwind_plan_object_file_augmented_sp)) { + m_unwind_plan_object_file_augmented_sp.reset(); + } + } else { + m_unwind_plan_object_file_augmented_sp.reset(); + } + return m_unwind_plan_object_file_augmented_sp; +} + UnwindPlanSP FuncUnwinders::GetEHFrameAugmentedUnwindPlan(Target &target, Thread &thread) { std::lock_guard guard(m_mutex); @@ -328,6 +383,8 @@ UnwindPlanSP FuncUnwinders::GetUnwindPlanAtNonCallSite(Target &target, UnwindPlanSP eh_frame_sp = GetEHFrameUnwindPlan(target); if (!eh_frame_sp) eh_frame_sp = GetDebugFrameUnwindPlan(target); + if (!eh_frame_sp) + eh_frame_sp = GetObjectFileUnwindPlan(target); UnwindPlanSP arch_default_at_entry_sp = GetUnwindPlanArchitectureDefaultAtFunctionEntry(thread); UnwindPlanSP arch_default_sp = GetUnwindPlanArchitectureDefault(thread); @@ -366,6 +423,8 @@ UnwindPlanSP FuncUnwinders::GetUnwindPlanAtNonCallSite(Target &target, return plan_sp; if (UnwindPlanSP plan_sp = GetEHFrameAugmentedUnwindPlan(target, thread)) return plan_sp; + if (UnwindPlanSP plan_sp = GetObjectFileAugmentedUnwindPlan(target, thread)) + return plan_sp; return assembly_sp; } @@ -473,6 +532,9 @@ Address FuncUnwinders::GetLSDAAddress(Target &target) { if (unwind_plan_sp.get() == nullptr) { unwind_plan_sp = GetCompactUnwindUnwindPlan(target); } + if (unwind_plan_sp.get() == nullptr) { + unwind_plan_sp = GetObjectFileUnwindPlan(target); + } if (unwind_plan_sp.get() && unwind_plan_sp->GetLSDAAddress().IsValid()) { lsda_addr = unwind_plan_sp->GetLSDAAddress(); } @@ -486,6 +548,9 @@ Address FuncUnwinders::GetPersonalityRoutinePtrAddress(Target &target) { if (unwind_plan_sp.get() == nullptr) { unwind_plan_sp = GetCompactUnwindUnwindPlan(target); } + if (unwind_plan_sp.get() == nullptr) { + unwind_plan_sp = GetObjectFileUnwindPlan(target); + } if (unwind_plan_sp.get() && unwind_plan_sp->GetPersonalityFunctionPtr().IsValid()) { personality_addr = unwind_plan_sp->GetPersonalityFunctionPtr(); diff --git a/lldb/source/Symbol/ObjectFile.cpp b/lldb/source/Symbol/ObjectFile.cpp index 8bfce5ce7a4f4..38bc7722d0d02 100644 --- a/lldb/source/Symbol/ObjectFile.cpp +++ b/lldb/source/Symbol/ObjectFile.cpp @@ -11,6 +11,7 @@ #include "lldb/Core/ModuleSpec.h" #include "lldb/Core/PluginManager.h" #include "lldb/Core/Section.h" +#include "lldb/Symbol/CallFrameInfo.h" #include "lldb/Symbol/ObjectContainer.h" #include "lldb/Symbol/SymbolFile.h" #include "lldb/Target/Process.h" @@ -670,6 +671,10 @@ ObjectFile::GetLoadableData(Target &target) { return loadables; } +std::unique_ptr ObjectFile::CreateCallFrameInfo() { + return {}; +} + void ObjectFile::RelocateSection(lldb_private::Section *section) { } diff --git a/lldb/source/Symbol/UnwindTable.cpp b/lldb/source/Symbol/UnwindTable.cpp index 7566d8027f056..045957a67b3be 100644 --- a/lldb/source/Symbol/UnwindTable.cpp +++ b/lldb/source/Symbol/UnwindTable.cpp @@ -13,6 +13,7 @@ #include "lldb/Core/Module.h" #include "lldb/Core/Section.h" #include "lldb/Symbol/ArmUnwindInfo.h" +#include "lldb/Symbol/CallFrameInfo.h" #include "lldb/Symbol/CompactUnwindInfo.h" #include "lldb/Symbol/DWARFCallFrameInfo.h" #include "lldb/Symbol/FuncUnwinders.h" @@ -29,7 +30,8 @@ using namespace lldb_private; UnwindTable::UnwindTable(Module &module) : m_module(module), m_unwinds(), m_initialized(false), m_mutex(), - m_eh_frame_up(), m_compact_unwind_up(), m_arm_unwind_up() {} + m_object_file_unwind_up(), m_eh_frame_up(), m_compact_unwind_up(), + m_arm_unwind_up() {} // We can't do some of this initialization when the ObjectFile is running its // ctor; delay doing it until needed for something. @@ -47,6 +49,8 @@ void UnwindTable::Initialize() { if (!object_file) return; + m_object_file_unwind_up = object_file->CreateCallFrameInfo(); + SectionList *sl = m_module.GetSectionList(); if (!sl) return; @@ -83,7 +87,12 @@ llvm::Optional UnwindTable::GetAddressRange(const Address &addr, SymbolContext &sc) { AddressRange range; - // First check the symbol context + // First check the unwind info from the object file plugin + if (m_object_file_unwind_up && + m_object_file_unwind_up->GetAddressRange(addr, range)) + return range; + + // Check the symbol context if (sc.GetAddressRange(eSymbolContextFunction | eSymbolContextSymbol, 0, false, range) && range.GetBaseAddress().IsValid()) @@ -162,6 +171,11 @@ void UnwindTable::Dump(Stream &s) { s.EOL(); } +lldb_private::CallFrameInfo *UnwindTable::GetObjectFileUnwindInfo() { + Initialize(); + return m_object_file_unwind_up.get(); +} + DWARFCallFrameInfo *UnwindTable::GetEHFrameInfo() { Initialize(); return m_eh_frame_up.get(); diff --git a/lldb/source/Target/LanguageRuntime.cpp b/lldb/source/Target/LanguageRuntime.cpp index dd44158106132..999ac99e93c35 100644 --- a/lldb/source/Target/LanguageRuntime.cpp +++ b/lldb/source/Target/LanguageRuntime.cpp @@ -111,12 +111,11 @@ class ExceptionBreakpointResolver : public BreakpointResolver { ~ExceptionBreakpointResolver() override = default; Searcher::CallbackReturn SearchCallback(SearchFilter &filter, - SymbolContext &context, Address *addr, - bool containing) override { + SymbolContext &context, + Address *addr) override { if (SetActualResolver()) - return m_actual_resolver_sp->SearchCallback(filter, context, addr, - containing); + return m_actual_resolver_sp->SearchCallback(filter, context, addr); else return eCallbackReturnStop; } diff --git a/lldb/source/Target/Target.cpp b/lldb/source/Target/Target.cpp index 38406214ba12f..85b0bf926f316 100644 --- a/lldb/source/Target/Target.cpp +++ b/lldb/source/Target/Target.cpp @@ -609,8 +609,7 @@ lldb::BreakpointSP Target::CreateScriptedBreakpoint( extra_args_impl->SetObjectSP(extra_args_sp); BreakpointResolverSP resolver_sp(new BreakpointResolverScripted( - nullptr, class_name, depth, extra_args_impl, - *GetDebugger().GetScriptInterpreter())); + nullptr, class_name, depth, extra_args_impl)); return CreateBreakpoint(filter_sp, resolver_sp, internal, false, true); } diff --git a/lldb/source/Utility/DataExtractor.cpp b/lldb/source/Utility/DataExtractor.cpp index 79a1f75d737c1..f642a8fc76392 100644 --- a/lldb/source/Utility/DataExtractor.cpp +++ b/lldb/source/Utility/DataExtractor.cpp @@ -816,26 +816,25 @@ DataExtractor::CopyByteOrderedData(offset_t src_offset, offset_t src_len, // non-zero and there aren't enough available bytes, nullptr will be returned // and "offset_ptr" will not be updated. const char *DataExtractor::GetCStr(offset_t *offset_ptr) const { - const char *cstr = reinterpret_cast(PeekData(*offset_ptr, 1)); - if (cstr) { - const char *cstr_end = cstr; - const char *end = reinterpret_cast(m_end); - while (cstr_end < end && *cstr_end) - ++cstr_end; - - // Now we are either at the end of the data or we point to the - // NULL C string terminator with cstr_end... - if (*cstr_end == '\0') { - // Advance the offset with one extra byte for the NULL terminator - *offset_ptr += (cstr_end - cstr + 1); - return cstr; - } + const char *start = reinterpret_cast(PeekData(*offset_ptr, 1)); + // Already at the end of the data. + if (!start) + return nullptr; - // We reached the end of the data without finding a NULL C string - // terminator. Fall through and return nullptr otherwise anyone that would - // have used the result as a C string can wander into unknown memory... - } - return nullptr; + const char *end = reinterpret_cast(m_end); + + // Check all bytes for a null terminator that terminates a C string. + const char *terminator_or_end = std::find(start, end, '\0'); + + // We didn't find a null terminator, so return nullptr to indicate that there + // is no valid C string at that offset. + if (terminator_or_end == end) + return nullptr; + + // Update offset_ptr for the caller to point to the data behind the + // terminator (which is 1 byte long). + *offset_ptr += (terminator_or_end - start + 1UL); + return start; } // Extracts a NULL terminated C string from the fixed length field of length diff --git a/lldb/source/Utility/ProcessInfo.cpp b/lldb/source/Utility/ProcessInfo.cpp index 832e5efae29c8..fa418f333bef4 100644 --- a/lldb/source/Utility/ProcessInfo.cpp +++ b/lldb/source/Utility/ProcessInfo.cpp @@ -243,8 +243,14 @@ void ProcessInstanceInfo::DumpAsTableRow(Stream &s, UserIDResolver &resolver, } } +bool ProcessInstanceInfoMatch::ArchitectureMatches( + const ArchSpec &arch_spec) const { + return !m_match_info.GetArchitecture().IsValid() || + m_match_info.GetArchitecture().IsCompatibleMatch(arch_spec); +} + bool ProcessInstanceInfoMatch::NameMatches(const char *process_name) const { - if (m_name_match_type == NameMatch::Ignore || process_name == nullptr) + if (m_name_match_type == NameMatch::Ignore) return true; const char *match_name = m_match_info.GetName(); if (!match_name) @@ -253,11 +259,8 @@ bool ProcessInstanceInfoMatch::NameMatches(const char *process_name) const { return lldb_private::NameMatches(process_name, m_name_match_type, match_name); } -bool ProcessInstanceInfoMatch::Matches( +bool ProcessInstanceInfoMatch::ProcessIDsMatch( const ProcessInstanceInfo &proc_info) const { - if (!NameMatches(proc_info.GetName())) - return false; - if (m_match_info.ProcessIDIsValid() && m_match_info.GetProcessID() != proc_info.GetProcessID()) return false; @@ -265,7 +268,11 @@ bool ProcessInstanceInfoMatch::Matches( if (m_match_info.ParentProcessIDIsValid() && m_match_info.GetParentProcessID() != proc_info.GetParentProcessID()) return false; + return true; +} +bool ProcessInstanceInfoMatch::UserIDsMatch( + const ProcessInstanceInfo &proc_info) const { if (m_match_info.UserIDIsValid() && m_match_info.GetUserID() != proc_info.GetUserID()) return false; @@ -281,13 +288,14 @@ bool ProcessInstanceInfoMatch::Matches( if (m_match_info.EffectiveGroupIDIsValid() && m_match_info.GetEffectiveGroupID() != proc_info.GetEffectiveGroupID()) return false; - - if (m_match_info.GetArchitecture().IsValid() && - !m_match_info.GetArchitecture().IsCompatibleMatch( - proc_info.GetArchitecture())) - return false; return true; } +bool ProcessInstanceInfoMatch::Matches( + const ProcessInstanceInfo &proc_info) const { + return ArchitectureMatches(proc_info.GetArchitecture()) && + ProcessIDsMatch(proc_info) && UserIDsMatch(proc_info) && + NameMatches(proc_info.GetName()); +} bool ProcessInstanceInfoMatch::MatchAllProcesses() const { if (m_name_match_type != NameMatch::Ignore) diff --git a/lldb/test/API/lit.cfg b/lldb/test/API/lit.cfg.py similarity index 74% rename from lldb/test/API/lit.cfg rename to lldb/test/API/lit.cfg.py index bb9e3aaaaa44d..f29d9047e24e2 100644 --- a/lldb/test/API/lit.cfg +++ b/lldb/test/API/lit.cfg.py @@ -5,6 +5,7 @@ import os import platform import shlex +import shutil import lit.formats @@ -52,6 +53,23 @@ def find_shlibpath_var(): lit_config.warning("unable to inject shared library path on '{}'".format( platform.system())) +# Clean the module caches in the test build directory. This is necessary in an +# incremental build whenever clang changes underneath, so doing it once per +# lit.py invocation is close enough. +for cachedir in [config.clang_module_cache, config.lldb_module_cache]: + if os.path.isdir(cachedir): + print("Deleting module cache at %s."%cachedir) + shutil.rmtree(cachedir) + +# Set a default per-test timeout of 10 minutes. Setting a timeout per test +# requires that killProcessAndChildren() is supported on the platform and +# lit complains if the value is set but it is not supported. +supported, errormsg = lit_config.maxIndividualTestTimeIsSupported +if supported: + lit_config.maxIndividualTestTime = 600 +else: + lit_config.warning("Could not set a default per-test timeout. " + errormsg) + # Build dotest command. dotest_cmd = [config.dotest_path] dotest_cmd.extend(config.dotest_args_str.split(';')) @@ -70,7 +88,10 @@ def find_shlibpath_var(): dotest_cmd += ['--build-dir', config.lldb_build_directory] if config.lldb_module_cache: - dotest_cmd += ['--module-cache-dir', config.lldb_module_cache] + dotest_cmd += ['--lldb-module-cache-dir', config.lldb_module_cache] + +if config.clang_module_cache: + dotest_cmd += ['--clang-module-cache-dir', config.clang_module_cache] # Load LLDB test format. sys.path.append(os.path.join(config.lldb_src_root, "test", "API")) diff --git a/lldb/test/API/lit.site.cfg.in b/lldb/test/API/lit.site.cfg.py.in similarity index 89% rename from lldb/test/API/lit.site.cfg.in rename to lldb/test/API/lit.site.cfg.py.in index 883bc4033005b..b5e8ed9df8d2d 100644 --- a/lldb/test/API/lit.site.cfg.in +++ b/lldb/test/API/lit.site.cfg.py.in @@ -17,13 +17,14 @@ config.shared_libs = @LLVM_ENABLE_SHARED_LIBS@ config.llvm_use_sanitizer = "@LLVM_USE_SANITIZER@" config.target_triple = "@TARGET_TRIPLE@" config.lldb_build_directory = "@LLDB_TEST_BUILD_DIRECTORY@" -config.lldb_module_cache = "@LLDB_TEST_MODULE_CACHE_LLDB@" -config.clang_module_cache = "@LLDB_TEST_MODULE_CACHE_CLANG@" config.python_executable = "@PYTHON_EXECUTABLE@" config.dotest_path = "@LLDB_SOURCE_DIR@/test/API/dotest.py" config.dotest_args_str = "@LLDB_DOTEST_ARGS@" config.lldb_disable_python = @LLDB_DISABLE_PYTHON@ config.dotest_lit_args_str = None +# The API tests use their own module caches. +config.lldb_module_cache = os.path.join("@LLDB_TEST_MODULE_CACHE_LLDB@", "lldb-api") +config.clang_module_cache = os.path.join("@LLDB_TEST_MODULE_CACHE_CLANG@", "lldb-api") # Additional dotest arguments can be passed to lit by providing a # semicolon-separates list: --param dotest-args="arg;arg". @@ -43,4 +44,4 @@ except KeyError as e: lit_config.fatal("unable to find %r parameter, use '--param=%s=VALUE'" % (key,key)) # Let the main config do the real work. -lit_config.load_config(config, "@LLDB_SOURCE_DIR@/test/API/lit.cfg") +lit_config.load_config(config, "@LLDB_SOURCE_DIR@/test/API/lit.cfg.py") diff --git a/lldb/test/CMakeLists.txt b/lldb/test/CMakeLists.txt index d13b13950462b..4bee6ca2d6764 100644 --- a/lldb/test/CMakeLists.txt +++ b/lldb/test/CMakeLists.txt @@ -3,6 +3,12 @@ add_subdirectory(API) +# Configure and create module cache directories. +set(LLDB_TEST_MODULE_CACHE_LLDB "${LLDB_TEST_BUILD_DIRECTORY}/module-cache-lldb" CACHE PATH "The Clang module cache used by the Clang embedded in LLDB while running tests.") +set(LLDB_TEST_MODULE_CACHE_CLANG "${LLDB_TEST_BUILD_DIRECTORY}/module-cache-clang" CACHE PATH "The Clang module cache used by the Clang while building tests.") +file(MAKE_DIRECTORY ${LLDB_TEST_MODULE_CACHE_LLDB}) +file(MAKE_DIRECTORY ${LLDB_TEST_MODULE_CACHE_CLANG}) + # LLVM_BUILD_MODE is used in lit.site.cfg if (CMAKE_CFG_INTDIR STREQUAL ".") set(LLVM_BUILD_MODE ".") @@ -17,8 +23,6 @@ endif() get_property(LLDB_DOTEST_ARGS GLOBAL PROPERTY LLDB_DOTEST_ARGS_PROPERTY) set(dotest_args_replacement ${LLVM_BUILD_MODE}) -set(LLDB_TEST_MODULE_CACHE_LLDB "${LLDB_TEST_BUILD_DIRECTORY}/module-cache-lldb" CACHE PATH "The Clang module cache used by the Clang embedded in LLDB while running tests.") -set(LLDB_TEST_MODULE_CACHE_CLANG "${LLDB_TEST_BUILD_DIRECTORY}/module-cache-clang" CACHE PATH "The Clang module cache used by the Clang while building tests.") if(LLDB_BUILT_STANDALONE) # In paths to our build-tree, replace CMAKE_CFG_INTDIR with our configuration name placeholder. @@ -58,9 +62,14 @@ add_lldb_test_dependency( llvm-mc llvm-objcopy llvm-readobj - llvm-strip ) +# Since llvm-strip is a symlink created by add_custom_target, it +# doesn't expose an export target when building standalone. +if(NOT LLDB_BUILT_STANDALONE) + add_lldb_test_dependency(llvm-strip) +endif() + if(TARGET lld) add_lldb_test_dependency(lld) else() @@ -78,32 +87,40 @@ if(NOT LLDB_BUILT_STANDALONE) ) endif() -# the value is not canonicalized within LLVM +# These values are not canonicalized within LLVM. llvm_canonicalize_cmake_booleans( LLDB_DISABLE_PYTHON LLVM_ENABLE_ZLIB LLVM_ENABLE_SHARED_LIBS LLDB_IS_64_BITS) +# Configure the top level test suite. +configure_lit_site_cfg( + ${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.py.in + ${CMAKE_CURRENT_BINARY_DIR}/lit.site.cfg.py + MAIN_CONFIG + ${CMAKE_CURRENT_SOURCE_DIR}/lit.cfg.py) + +# Configure the Shell test suite. configure_lit_site_cfg( ${CMAKE_CURRENT_SOURCE_DIR}/Shell/lit.site.cfg.py.in ${CMAKE_CURRENT_BINARY_DIR}/Shell/lit.site.cfg.py MAIN_CONFIG ${CMAKE_CURRENT_SOURCE_DIR}/Shell/lit.cfg.py) + +# Configure the Unit test suite. configure_lit_site_cfg( ${CMAKE_CURRENT_SOURCE_DIR}/Unit/lit.site.cfg.py.in ${CMAKE_CURRENT_BINARY_DIR}/Unit/lit.site.cfg.py MAIN_CONFIG ${CMAKE_CURRENT_SOURCE_DIR}/Unit/lit.cfg.py) + +# Configure the API test suite. configure_lit_site_cfg( - ${CMAKE_CURRENT_SOURCE_DIR}/API/lit.site.cfg.in - ${CMAKE_CURRENT_BINARY_DIR}/API/lit.site.cfg) -configure_lit_site_cfg( - ${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.py.in - ${CMAKE_CURRENT_BINARY_DIR}/lit.site.cfg.py + ${CMAKE_CURRENT_SOURCE_DIR}/API/lit.site.cfg.py.in + ${CMAKE_CURRENT_BINARY_DIR}/API/lit.site.cfg.py MAIN_CONFIG - ${CMAKE_CURRENT_SOURCE_DIR}/Shell/lit.cfg.py) - + ${CMAKE_CURRENT_SOURCE_DIR}/API/lit.cfg.py) configure_file( ${CMAKE_CURRENT_SOURCE_DIR}/Shell/lit-lldb-init.in ${CMAKE_CURRENT_BINARY_DIR}/Shell/lit-lldb-init) diff --git a/lldb/test/Shell/ObjectFile/ELF/minidebuginfo-set-and-hit-breakpoint.test b/lldb/test/Shell/ObjectFile/ELF/minidebuginfo-set-and-hit-breakpoint.test index c568a19642a07..93c524f055277 100644 --- a/lldb/test/Shell/ObjectFile/ELF/minidebuginfo-set-and-hit-breakpoint.test +++ b/lldb/test/Shell/ObjectFile/ELF/minidebuginfo-set-and-hit-breakpoint.test @@ -42,7 +42,7 @@ # .rela.dyn and .dynsym sections can be removed once llvm-objcopy # --only-keep-debug starts to work. # RUN: llvm-objcopy --remove-section=.rela.plt --remove-section=.rela.dyn \ -# RUN: --remove-section=.gnu.version --remove-section=.gnu.hash --remove-section=.dynsym %t.mini_debuginfo +# RUN: --remove-section=.gnu.version --remove-section=.gnu.hash --remove-section=.hash --remove-section=.dynsym %t.mini_debuginfo # Drop the full debug info from the original binary. diff --git a/lldb/test/Shell/ObjectFile/MachO/symtab.yaml b/lldb/test/Shell/ObjectFile/MachO/symtab.yaml index e50ba6b6c224a..9f5e1a02bbc82 100644 --- a/lldb/test/Shell/ObjectFile/MachO/symtab.yaml +++ b/lldb/test/Shell/ObjectFile/MachO/symtab.yaml @@ -51,7 +51,6 @@ LoadCommands: reserved1: 0x00000000 reserved2: 0x00000000 reserved3: 0x00000000 - content: 554889E54883EC30488D054112000031C948C7052C1200005704000048C70529120000AE080000488B151A120000897DE44889C7488D45E8488975D84889C6B821000000488955D04889C2E830010000488B7DD0488D35F5110000E840000000488D7DE8E81D01000031C04883C4305DC3662E0F1F8400000000000F1F440000554889E548C745F000000000488975F848897DF05DC3662E0F1F840000000000554889E54883EC3048C745F80000000048C745F00000000048C745E80000000048897DF8488975F0488B06488945E848897DD8488945E0488D3D62110000E81D000000488D7DD84889C6E891FFFFFF4883C4305DC3662E0F1F840000000000904883EC284889F8488B0F4883F9000F9CC2F6C2014889CE48897C2420488944241848894C24104889742408750A488B4424084883C428C3488B44241048C1F82048F7D8488B4C241089CA4863F2488B7C24204801F731D289D6488934244889C6488B1424488B0C24E81F0000004889C1488B54241848890248894C2408EBAE - sectname: __stubs segname: __TEXT addr: 0x0000000100000F60 @@ -64,7 +63,6 @@ LoadCommands: reserved1: 0x00000000 reserved2: 0x00000006 reserved3: 0x00000000 - content: FF259A100000FF259C100000FF259E100000 - sectname: __stub_helper segname: __TEXT addr: 0x0000000100000F74 @@ -77,7 +75,6 @@ LoadCommands: reserved1: 0x00000000 reserved2: 0x00000000 reserved3: 0x00000000 - content: 4C8D1D9D1000004153FF257D000000906800000000E9E6FFFFFF6819000000E9DCFFFFFF6830000000E9D2FFFFFF - sectname: __swift5_typeref segname: __TEXT addr: 0x0000000100000FA2 @@ -90,7 +87,6 @@ LoadCommands: reserved1: 0x00000000 reserved2: 0x00000000 reserved3: 0x00000000 - content: 53695F53697400 - sectname: __const segname: __TEXT addr: 0x0000000100000FAA @@ -103,7 +99,6 @@ LoadCommands: reserved1: 0x00000000 reserved2: 0x00000000 reserved3: 0x00000000 - content: '0300' - sectname: __unwind_info segname: __TEXT addr: 0x0000000100000FAC @@ -116,7 +111,6 @@ LoadCommands: reserved1: 0x00000000 reserved2: 0x00000000 reserved3: 0x00000000 - content: 010000001C000000000000001C000000000000001C00000002000000E00D00003400000034000000600F00000000000034000000030000000C0002001400020000000001000100000000060200000001 - cmd: LC_SEGMENT_64 cmdsize: 232 segname: __DATA_CONST @@ -141,7 +135,6 @@ LoadCommands: reserved1: 0x00000003 reserved2: 0x00000000 reserved3: 0x00000000 - content: '0000000000000000' - sectname: __objc_imageinfo segname: __DATA_CONST addr: 0x0000000100001008 @@ -154,7 +147,6 @@ LoadCommands: reserved1: 0x00000000 reserved2: 0x00000000 reserved3: 0x00000000 - content: '0000000040070105' - cmd: LC_SEGMENT_64 cmdsize: 312 segname: __DATA @@ -179,7 +171,6 @@ LoadCommands: reserved1: 0x00000004 reserved2: 0x00000000 reserved3: 0x00000000 - content: 840F0000010000008E0F000001000000980F000001000000 - sectname: __data segname: __DATA addr: 0x0000000100002018 @@ -192,7 +183,6 @@ LoadCommands: reserved1: 0x00000000 reserved2: 0x00000000 reserved3: 0x00000000 - content: 000000000000000082EFFFFFFAFFFFFF - sectname: __common segname: __DATA addr: 0x0000000100002028 diff --git a/lldb/test/Shell/SymbolFile/Breakpad/Inputs/unwind-via-stack-win.yaml b/lldb/test/Shell/SymbolFile/Breakpad/Inputs/unwind-via-stack-win.yaml index 399cf2bbaa351..e0140061d6ead 100644 --- a/lldb/test/Shell/SymbolFile/Breakpad/Inputs/unwind-via-stack-win.yaml +++ b/lldb/test/Shell/SymbolFile/Breakpad/Inputs/unwind-via-stack-win.yaml @@ -24,7 +24,21 @@ Streams: - Start of Memory Range: 0x0000000000CFFE78 Content: 0000000079100B0000100B0000100B0000100B0000100B0000100B0000100B0000100B0000100B0000100B0000100B0000100B0000100B0000100B0000100B0000100B0000100B0000100B0000100B0000100B0000100B0085100B0094842777 - Type: MemoryInfoList - Content: 1000000030000000020000000000000000100B00000000000000000000000000000000000000000000400000000000000010000010000000000000010000000000002677000000000000000000000000000000000000000000000E000000000000100000100000000000000100000000 + Memory Ranges: + - Base Address: 0x00000000000B1000 + Allocation Base: 0x0000000000000000 + Allocation Protect: [ ] + Region Size: 0x0000000000004000 + State: [ MEM_COMMIT ] + Protect: [ PAGE_EXECUTE ] + Type: [ MEM_IMAGE ] + - Base Address: 0x0000000077260000 + Allocation Base: 0x0000000000000000 + Allocation Protect: [ ] + Region Size: 0x00000000000E0000 + State: [ MEM_COMMIT ] + Protect: [ PAGE_EXECUTE ] + Type: [ MEM_IMAGE ] - Type: SystemInfo Processor Arch: X86 Platform ID: Win32NT diff --git a/lldb/test/Shell/lit.cfg.py b/lldb/test/Shell/lit.cfg.py index 3503e95692295..cc54234faf788 100644 --- a/lldb/test/Shell/lit.cfg.py +++ b/lldb/test/Shell/lit.cfg.py @@ -39,9 +39,7 @@ llvm_config.use_default_substitutions() - toolchain.use_lldb_substitutions(config) - toolchain.use_support_substitutions(config) @@ -69,9 +67,9 @@ def calculate_arch_features(arch_string): # incremental build whenever clang changes underneath, so doing it once per # lit.py invocation is close enough. for cachedir in [config.clang_module_cache, config.lldb_module_cache]: - if os.path.isdir(cachedir): - print("Deleting module cache at %s."%cachedir) - shutil.rmtree(cachedir) + if os.path.isdir(cachedir): + print("Deleting module cache at %s."%cachedir) + shutil.rmtree(cachedir) # Set a default per-test timeout of 10 minutes. Setting a timeout per test # requires that killProcessAndChildren() is supported on the platform and diff --git a/lldb/test/Shell/lit.site.cfg.py.in b/lldb/test/Shell/lit.site.cfg.py.in index 459d560454121..39990a408b059 100644 --- a/lldb/test/Shell/lit.site.cfg.py.in +++ b/lldb/test/Shell/lit.site.cfg.py.in @@ -20,8 +20,9 @@ config.host_triple = "@LLVM_HOST_TRIPLE@" config.lldb_bitness = 64 if @LLDB_IS_64_BITS@ else 32 config.lldb_disable_python = @LLDB_DISABLE_PYTHON@ config.lldb_build_directory = "@LLDB_TEST_BUILD_DIRECTORY@" -config.lldb_module_cache = "@LLDB_TEST_MODULE_CACHE_LLDB@" -config.clang_module_cache = "@LLDB_TEST_MODULE_CACHE_CLANG@" +# The shell tests use their own module caches. +config.lldb_module_cache = os.path.join("@LLDB_TEST_MODULE_CACHE_LLDB@", "lldb-shell") +config.clang_module_cache = os.path.join("@LLDB_TEST_MODULE_CACHE_CLANG@", "lldb-shell") # Support substitution of the tools and libs dirs with user parameters. This is # used when we can't determine the tool dir at configuration time. @@ -32,7 +33,6 @@ try: config.lldb_libs_dir = config.lldb_libs_dir % lit_config.params config.lldb_tools_dir = config.lldb_tools_dir % lit_config.params config.lldb_lit_tools_dir = config.lldb_lit_tools_dir % lit_config.params - except KeyError as e: key, = e.args lit_config.fatal("unable to find %r parameter, use '--param=%s=VALUE'" % (key,key)) diff --git a/lldb/test/lit.cfg.py b/lldb/test/lit.cfg.py index 797750bac2126..f8ab797c4082c 100644 --- a/lldb/test/lit.cfg.py +++ b/lldb/test/lit.cfg.py @@ -1,41 +1,15 @@ # -*- Python -*- import os -import platform -import re -import shutil -import site -import sys import lit.formats from lit.llvm import llvm_config -from lit.llvm.subst import FindTool -from lit.llvm.subst import ToolSubst -from distutils.spawn import find_executable # This is the top level configuration. Most of these configuration options will # be overriden by individual lit configuration files in the test -# subdirectories. +# subdirectories. Anything configured here will *not* be loaded when pointing +# lit at on of the subdirectories. -# name: The name of this test suite. config.name = 'lldb' - -# testFormat: The test format to use to interpret tests. -config.test_format = lit.formats.ShTest(not llvm_config.use_lit_shell) - -# suffixes: A list of file extensions to treat as test files. This is overriden -# by individual lit.local.cfg files in the test subdirectories. -config.suffixes = ['.test', '.cpp', '.s'] - -# excludes: A list of directories to exclude from the testsuite. The 'Inputs' -# subdirectories contain auxiliary inputs for various tests in their parent -# directories. -config.excludes = ['Inputs', 'CMakeLists.txt', 'README.txt', 'LICENSE.txt'] - -# test_source_root: The root path where tests are located. config.test_source_root = os.path.dirname(__file__) - -# test_exec_root: The root path where tests should be run. config.test_exec_root = os.path.join(config.lldb_obj_root, 'test') - -llvm_config.use_default_substitutions() diff --git a/lldb/third_party/Python/module/pexpect-4.6/pexpect/pty_spawn.py b/lldb/third_party/Python/module/pexpect-4.6/pexpect/pty_spawn.py index e0e2b54fd033e..6b9ad3f63f7cd 100644 --- a/lldb/third_party/Python/module/pexpect-4.6/pexpect/pty_spawn.py +++ b/lldb/third_party/Python/module/pexpect-4.6/pexpect/pty_spawn.py @@ -640,7 +640,7 @@ def terminate(self, force=False): # this to happen. I think isalive() reports True, but the # process is dead to the kernel. # Make one last attempt to see if the kernel is up to date. - time.sleep(self.delayafterterminate) + time.sleep(self.delayafterterminate * 10) if not self.isalive(): return True else: diff --git a/lldb/tools/debugserver/source/MacOSX/arm64/DNBArchImplARM64.cpp b/lldb/tools/debugserver/source/MacOSX/arm64/DNBArchImplARM64.cpp index 8856e921997dc..1bf14d97056ce 100644 --- a/lldb/tools/debugserver/source/MacOSX/arm64/DNBArchImplARM64.cpp +++ b/lldb/tools/debugserver/source/MacOSX/arm64/DNBArchImplARM64.cpp @@ -49,8 +49,6 @@ static const uint8_t g_arm64_breakpoint_opcode[] = { 0x00, 0x00, 0x20, 0xD4}; // "brk #0", 0xd4200000 in BE byte order -static const uint8_t g_arm_breakpoint_opcode[] = { - 0xFE, 0xDE, 0xFF, 0xE7}; // this armv7 insn also works in arm64 // If we need to set one logical watchpoint by using // two hardware watchpoint registers, the watchpoint @@ -87,7 +85,7 @@ DNBArchProtocol *DNBArchMachARM64::Create(MachThread *thread) { const uint8_t * DNBArchMachARM64::SoftwareBreakpointOpcode(nub_size_t byte_size) { - return g_arm_breakpoint_opcode; + return g_arm64_breakpoint_opcode; } uint32_t DNBArchMachARM64::GetCPUType() { return CPU_TYPE_ARM64; } diff --git a/lldb/tools/driver/Driver.cpp b/lldb/tools/driver/Driver.cpp index ec70784457d0a..4a403a7ffb460 100644 --- a/lldb/tools/driver/Driver.cpp +++ b/lldb/tools/driver/Driver.cpp @@ -11,6 +11,7 @@ #include "lldb/API/SBCommandInterpreter.h" #include "lldb/API/SBCommandReturnObject.h" #include "lldb/API/SBDebugger.h" +#include "lldb/API/SBFile.h" #include "lldb/API/SBHostOS.h" #include "lldb/API/SBLanguageRuntime.h" #include "lldb/API/SBReproducer.h" @@ -18,8 +19,8 @@ #include "lldb/API/SBStringList.h" #include "llvm/ADT/StringRef.h" -#include "llvm/Support/ConvertUTF.h" #include "llvm/Support/Format.h" +#include "llvm/Support/InitLLVM.h" #include "llvm/Support/Path.h" #include "llvm/Support/PrettyStackTrace.h" #include "llvm/Support/Process.h" @@ -499,16 +500,16 @@ int Driver::MainLoop() { SBCommandReturnObject result; sb_interpreter.SourceInitFileInHomeDirectory(result); if (m_option_data.m_debug_mode) { - result.PutError(m_debugger.GetErrorFileHandle()); - result.PutOutput(m_debugger.GetOutputFileHandle()); + result.PutError(m_debugger.GetErrorFile()); + result.PutOutput(m_debugger.GetOutputFile()); } // Source the local .lldbinit file if it exists and we're allowed to source. // Here we want to always print the return object because it contains the // warning and instructions to load local lldbinit files. sb_interpreter.SourceInitFileInCurrentWorkingDirectory(result); - result.PutError(m_debugger.GetErrorFileHandle()); - result.PutOutput(m_debugger.GetOutputFileHandle()); + result.PutError(m_debugger.GetErrorFile()); + result.PutOutput(m_debugger.GetOutputFile()); // We allow the user to specify an exit code when calling quit which we will // return when exiting. @@ -574,8 +575,8 @@ int Driver::MainLoop() { } if (m_option_data.m_debug_mode) { - result.PutError(m_debugger.GetErrorFileHandle()); - result.PutOutput(m_debugger.GetOutputFileHandle()); + result.PutError(m_debugger.GetErrorFile()); + result.PutOutput(m_debugger.GetOutputFile()); } const bool handle_events = true; @@ -806,23 +807,9 @@ llvm::Optional InitializeReproducer(opt::InputArgList &input_args) { return llvm::None; } -int -#ifdef _MSC_VER -wmain(int argc, wchar_t const *wargv[]) -#else -main(int argc, char const *argv[]) -#endif +int main(int argc, char const *argv[]) { -#ifdef _MSC_VER - // Convert wide arguments to UTF-8 - std::vector argvStrings(argc); - std::vector argvPointers(argc); - for (int i = 0; i != argc; ++i) { - llvm::convertWideToUTF8(wargv[i], argvStrings[i]); - argvPointers[i] = argvStrings[i].c_str(); - } - const char **argv = argvPointers.data(); -#endif + llvm::InitLLVM IL(argc, argv); // Print stack trace on crash. llvm::StringRef ToolName = llvm::sys::path::filename(argv[0]); diff --git a/lldb/tools/lldb-server/lldb-server.cpp b/lldb/tools/lldb-server/lldb-server.cpp index 518c34b090388..ab32eefb518ed 100644 --- a/lldb/tools/lldb-server/lldb-server.cpp +++ b/lldb/tools/lldb-server/lldb-server.cpp @@ -12,6 +12,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Support/InitLLVM.h" #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/PrettyStackTrace.h" #include "llvm/Support/Signals.h" @@ -48,6 +49,7 @@ static void terminate_debugger() { g_debugger_lifetime->Terminate(); } // main int main(int argc, char *argv[]) { + llvm::InitLLVM IL(argc, argv); llvm::StringRef ToolName = argv[0]; llvm::sys::PrintStackTraceOnErrorSignal(ToolName); llvm::PrettyStackTraceProgram X(argc, argv); diff --git a/lldb/tools/lldb-test/lldb-test.cpp b/lldb/tools/lldb-test/lldb-test.cpp index 112c2fa876404..426b12acd4074 100644 --- a/lldb/tools/lldb-test/lldb-test.cpp +++ b/lldb/tools/lldb-test/lldb-test.cpp @@ -19,7 +19,6 @@ #include "lldb/Interpreter/CommandInterpreter.h" #include "lldb/Interpreter/CommandReturnObject.h" #include "lldb/Symbol/ClangASTContext.h" -#include "lldb/Symbol/ClangASTImporter.h" #include "lldb/Symbol/CompileUnit.h" #include "lldb/Symbol/LineTable.h" #include "lldb/Symbol/SymbolFile.h" diff --git a/lldb/unittests/Expression/DiagnosticManagerTest.cpp b/lldb/unittests/Expression/DiagnosticManagerTest.cpp index 843d55503ef3f..f012632e63bf9 100644 --- a/lldb/unittests/Expression/DiagnosticManagerTest.cpp +++ b/lldb/unittests/Expression/DiagnosticManagerTest.cpp @@ -39,17 +39,19 @@ TEST(DiagnosticManagerTest, AddDiagnostic) { DiagnosticManager mgr; EXPECT_EQ(0U, mgr.Diagnostics().size()); - Diagnostic *diag = new Diagnostic( - "foo bar has happened", DiagnosticSeverity::eDiagnosticSeverityError, - DiagnosticOrigin::eDiagnosticOriginLLDB, custom_diag_id); - mgr.AddDiagnostic(diag); + std::string msg = "foo bar has happened"; + DiagnosticSeverity severity = DiagnosticSeverity::eDiagnosticSeverityError; + DiagnosticOrigin origin = DiagnosticOrigin::eDiagnosticOriginLLDB; + auto diag = + std::make_unique(msg, severity, origin, custom_diag_id); + mgr.AddDiagnostic(std::move(diag)); EXPECT_EQ(1U, mgr.Diagnostics().size()); - Diagnostic *got = mgr.Diagnostics().front(); - EXPECT_EQ(diag->getKind(), got->getKind()); - EXPECT_EQ(diag->GetMessage(), got->GetMessage()); - EXPECT_EQ(diag->GetSeverity(), got->GetSeverity()); - EXPECT_EQ(diag->GetCompilerID(), got->GetCompilerID()); - EXPECT_EQ(diag->HasFixIts(), got->HasFixIts()); + const Diagnostic *got = mgr.Diagnostics().front().get(); + EXPECT_EQ(DiagnosticOrigin::eDiagnosticOriginLLDB, got->getKind()); + EXPECT_EQ(msg, got->GetMessage()); + EXPECT_EQ(severity, got->GetSeverity()); + EXPECT_EQ(custom_diag_id, got->GetCompilerID()); + EXPECT_EQ(false, got->HasFixIts()); } TEST(DiagnosticManagerTest, HasFixits) { @@ -57,16 +59,16 @@ TEST(DiagnosticManagerTest, HasFixits) { // By default we shouldn't have any fixits. EXPECT_FALSE(mgr.HasFixIts()); // Adding a diag without fixits shouldn't make HasFixIts return true. - mgr.AddDiagnostic(new FixItDiag("no fixit", false)); + mgr.AddDiagnostic(std::make_unique("no fixit", false)); EXPECT_FALSE(mgr.HasFixIts()); // Adding a diag with fixits will mark the manager as containing fixits. - mgr.AddDiagnostic(new FixItDiag("fixit", true)); + mgr.AddDiagnostic(std::make_unique("fixit", true)); EXPECT_TRUE(mgr.HasFixIts()); // Adding another diag without fixit shouldn't make it return false. - mgr.AddDiagnostic(new FixItDiag("no fixit", false)); + mgr.AddDiagnostic(std::make_unique("no fixit", false)); EXPECT_TRUE(mgr.HasFixIts()); // Adding a diag with fixits. The manager should still return true. - mgr.AddDiagnostic(new FixItDiag("fixit", true)); + mgr.AddDiagnostic(std::make_unique("fixit", true)); EXPECT_TRUE(mgr.HasFixIts()); } @@ -77,7 +79,8 @@ TEST(DiagnosticManagerTest, GetStringNoDiags) { TEST(DiagnosticManagerTest, GetStringBasic) { DiagnosticManager mgr; - mgr.AddDiagnostic(new TextDiag("abc", eDiagnosticSeverityError)); + mgr.AddDiagnostic( + std::make_unique("abc", eDiagnosticSeverityError)); EXPECT_EQ("error: abc\n", mgr.GetString()); } @@ -85,15 +88,18 @@ TEST(DiagnosticManagerTest, GetStringMultiline) { DiagnosticManager mgr; // Multiline diagnostics should only get one severity label. - mgr.AddDiagnostic(new TextDiag("b\nc", eDiagnosticSeverityError)); + mgr.AddDiagnostic( + std::make_unique("b\nc", eDiagnosticSeverityError)); EXPECT_EQ("error: b\nc\n", mgr.GetString()); } TEST(DiagnosticManagerTest, GetStringMultipleDiags) { DiagnosticManager mgr; - mgr.AddDiagnostic(new TextDiag("abc", eDiagnosticSeverityError)); + mgr.AddDiagnostic( + std::make_unique("abc", eDiagnosticSeverityError)); EXPECT_EQ("error: abc\n", mgr.GetString()); - mgr.AddDiagnostic(new TextDiag("def", eDiagnosticSeverityError)); + mgr.AddDiagnostic( + std::make_unique("def", eDiagnosticSeverityError)); EXPECT_EQ("error: abc\nerror: def\n", mgr.GetString()); } @@ -101,10 +107,13 @@ TEST(DiagnosticManagerTest, GetStringSeverityLabels) { DiagnosticManager mgr; // Different severities should cause different labels. - mgr.AddDiagnostic(new TextDiag("foo", eDiagnosticSeverityError)); - mgr.AddDiagnostic(new TextDiag("bar", eDiagnosticSeverityWarning)); + mgr.AddDiagnostic( + std::make_unique("foo", eDiagnosticSeverityError)); + mgr.AddDiagnostic( + std::make_unique("bar", eDiagnosticSeverityWarning)); // Remarks have no labels. - mgr.AddDiagnostic(new TextDiag("baz", eDiagnosticSeverityRemark)); + mgr.AddDiagnostic( + std::make_unique("baz", eDiagnosticSeverityRemark)); EXPECT_EQ("error: foo\nwarning: bar\nbaz\n", mgr.GetString()); } @@ -112,9 +121,12 @@ TEST(DiagnosticManagerTest, GetStringPreserveOrder) { DiagnosticManager mgr; // Make sure we preserve the diagnostic order and do not sort them in any way. - mgr.AddDiagnostic(new TextDiag("baz", eDiagnosticSeverityRemark)); - mgr.AddDiagnostic(new TextDiag("bar", eDiagnosticSeverityWarning)); - mgr.AddDiagnostic(new TextDiag("foo", eDiagnosticSeverityError)); + mgr.AddDiagnostic( + std::make_unique("baz", eDiagnosticSeverityRemark)); + mgr.AddDiagnostic( + std::make_unique("bar", eDiagnosticSeverityWarning)); + mgr.AddDiagnostic( + std::make_unique("foo", eDiagnosticSeverityError)); EXPECT_EQ("baz\nwarning: bar\nerror: foo\n", mgr.GetString()); } @@ -129,8 +141,10 @@ TEST(DiagnosticManagerTest, AppendMessageNoDiag) { TEST(DiagnosticManagerTest, AppendMessageAttachToLastDiag) { DiagnosticManager mgr; - mgr.AddDiagnostic(new TextDiag("foo", eDiagnosticSeverityError)); - mgr.AddDiagnostic(new TextDiag("bar", eDiagnosticSeverityError)); + mgr.AddDiagnostic( + std::make_unique("foo", eDiagnosticSeverityError)); + mgr.AddDiagnostic( + std::make_unique("bar", eDiagnosticSeverityError)); // This should append to 'bar' and not to 'foo'. mgr.AppendMessageToDiagnostic("message text"); @@ -140,10 +154,12 @@ TEST(DiagnosticManagerTest, AppendMessageAttachToLastDiag) { TEST(DiagnosticManagerTest, AppendMessageSubsequentDiags) { DiagnosticManager mgr; - mgr.AddDiagnostic(new TextDiag("bar", eDiagnosticSeverityError)); + mgr.AddDiagnostic( + std::make_unique("bar", eDiagnosticSeverityError)); mgr.AppendMessageToDiagnostic("message text"); // Pushing another diag after the message should work fine. - mgr.AddDiagnostic(new TextDiag("foo", eDiagnosticSeverityError)); + mgr.AddDiagnostic( + std::make_unique("foo", eDiagnosticSeverityError)); EXPECT_EQ("error: bar\nmessage text\nerror: foo\n", mgr.GetString()); } diff --git a/lldb/unittests/ObjectFile/CMakeLists.txt b/lldb/unittests/ObjectFile/CMakeLists.txt index 62c003e3f2d8c..a9b42ea3199d7 100644 --- a/lldb/unittests/ObjectFile/CMakeLists.txt +++ b/lldb/unittests/ObjectFile/CMakeLists.txt @@ -1,2 +1,3 @@ add_subdirectory(Breakpad) add_subdirectory(ELF) +add_subdirectory(PECOFF) diff --git a/lldb/unittests/ObjectFile/PECOFF/CMakeLists.txt b/lldb/unittests/ObjectFile/PECOFF/CMakeLists.txt new file mode 100644 index 0000000000000..3ce5a7b9739cc --- /dev/null +++ b/lldb/unittests/ObjectFile/PECOFF/CMakeLists.txt @@ -0,0 +1,8 @@ +add_lldb_unittest(ObjectFilePECOFFTests + TestPECallFrameInfo.cpp + + LINK_LIBS + lldbUtilityHelpers + lldbPluginObjectFilePECOFF + LLVMTestingSupport + ) diff --git a/lldb/unittests/ObjectFile/PECOFF/TestPECallFrameInfo.cpp b/lldb/unittests/ObjectFile/PECOFF/TestPECallFrameInfo.cpp new file mode 100644 index 0000000000000..cb0a2afea6757 --- /dev/null +++ b/lldb/unittests/ObjectFile/PECOFF/TestPECallFrameInfo.cpp @@ -0,0 +1,336 @@ +//===-- TestPECallFrameInfo.cpp ------------------------------*- C++ -*-===// +// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "gtest/gtest.h" + +#include "Plugins/ObjectFile/PECOFF/ObjectFilePECOFF.h" +#include "Plugins/Process/Utility/lldb-x86-register-enums.h" +#include "TestingSupport/TestUtilities.h" + +#include "lldb/Core/Module.h" +#include "lldb/Symbol/CallFrameInfo.h" +#include "lldb/Symbol/UnwindPlan.h" +#include "llvm/Testing/Support/Error.h" + +using namespace lldb_private; +using namespace lldb; + +class PECallFrameInfoTest : public testing::Test { +public: + void SetUp() override { + FileSystem::Initialize(); + ObjectFilePECOFF::Initialize(); + } + + void TearDown() override { + ObjectFilePECOFF::Terminate(); + FileSystem::Terminate(); + } + +protected: + void GetUnwindPlan(addr_t file_addr, UnwindPlan &plan) const; +}; + +void PECallFrameInfoTest::GetUnwindPlan(addr_t file_addr, UnwindPlan &plan) const { + llvm::Expected ExpectedFile = TestFile::fromYaml( + R"( +--- !COFF +OptionalHeader: + AddressOfEntryPoint: 0 + ImageBase: 16777216 + SectionAlignment: 4096 + FileAlignment: 512 + MajorOperatingSystemVersion: 6 + MinorOperatingSystemVersion: 0 + MajorImageVersion: 0 + MinorImageVersion: 0 + MajorSubsystemVersion: 6 + MinorSubsystemVersion: 0 + Subsystem: IMAGE_SUBSYSTEM_WINDOWS_CUI + DLLCharacteristics: [ IMAGE_DLL_CHARACTERISTICS_HIGH_ENTROPY_VA, IMAGE_DLL_CHARACTERISTICS_DYNAMIC_BASE, IMAGE_DLL_CHARACTERISTICS_NX_COMPAT ] + SizeOfStackReserve: 1048576 + SizeOfStackCommit: 4096 + SizeOfHeapReserve: 1048576 + SizeOfHeapCommit: 4096 + ExportTable: + RelativeVirtualAddress: 0 + Size: 0 + ImportTable: + RelativeVirtualAddress: 0 + Size: 0 + ResourceTable: + RelativeVirtualAddress: 0 + Size: 0 + ExceptionTable: + RelativeVirtualAddress: 12288 + Size: 60 + CertificateTable: + RelativeVirtualAddress: 0 + Size: 0 + BaseRelocationTable: + RelativeVirtualAddress: 0 + Size: 0 + Debug: + RelativeVirtualAddress: 0 + Size: 0 + Architecture: + RelativeVirtualAddress: 0 + Size: 0 + GlobalPtr: + RelativeVirtualAddress: 0 + Size: 0 + TlsTable: + RelativeVirtualAddress: 0 + Size: 0 + LoadConfigTable: + RelativeVirtualAddress: 0 + Size: 0 + BoundImport: + RelativeVirtualAddress: 0 + Size: 0 + IAT: + RelativeVirtualAddress: 0 + Size: 0 + DelayImportDescriptor: + RelativeVirtualAddress: 0 + Size: 0 + ClrRuntimeHeader: + RelativeVirtualAddress: 0 + Size: 0 +header: + Machine: IMAGE_FILE_MACHINE_AMD64 + Characteristics: [ IMAGE_FILE_EXECUTABLE_IMAGE, IMAGE_FILE_LARGE_ADDRESS_AWARE ] +sections: + - Name: .text + Characteristics: [ IMAGE_SCN_CNT_CODE, IMAGE_SCN_MEM_EXECUTE, IMAGE_SCN_MEM_READ ] + VirtualAddress: 4096 + VirtualSize: 4096 + - Name: .rdata + Characteristics: [ IMAGE_SCN_CNT_INITIALIZED_DATA, IMAGE_SCN_MEM_READ ] + VirtualAddress: 8192 + VirtualSize: 68 + SectionData: 010C06000C3208F006E00470036002302105020005540D0000100000001100000020000019400E352F74670028646600213465001A3315015E000EF00CE00AD008C00650 + + +# Unwind info at 0x2000: +# 01 0C 06 00 No chained info, prolog size = 0xC, unwind codes size is 6 words, no frame register +# 0C 32 UOP_AllocSmall(2) 3 * 8 + 8 bytes, offset in prolog is 0xC +# 08 F0 UOP_PushNonVol(0) R15(0xF), offset in prolog is 8 +# 06 E0 UOP_PushNonVol(0) R14(0xE), offset in prolog is 6 +# 04 70 UOP_PushNonVol(0) RDI(7), offset in prolog is 4 +# 03 60 UOP_PushNonVol(0) RSI(6), offset in prolog is 3 +# 02 30 UOP_PushNonVol(0) RBX(3), offset in prolog is 2 +# Corresponding prolog: +# 00 push rbx +# 02 push rsi +# 03 push rdi +# 04 push r14 +# 06 push r15 +# 08 sub rsp, 20h + +# Unwind info at 0x2010: +# 21 05 02 00 Has chained info, prolog size = 5, unwind codes size is 2 words, no frame register +# 05 54 0D 00 UOP_SaveNonVol(4) RBP(5) to RSP + 0xD * 8, offset in prolog is 5 +# Chained runtime function: +# 00 10 00 00 Start address is 0x1000 +# 00 11 00 00 End address is 0x1100 +# 00 20 00 00 Unwind info RVA is 0x2000 +# Corresponding prolog: +# 00 mov [rsp+68h], rbp + +# Unwind info at 0x2024: +# 19 40 0E 35 No chained info, prolog size = 0x40, unwind codes size is 0xE words, frame register is RBP, frame register offset is RSP + 3 * 16 +# 2F 74 67 00 UOP_SaveNonVol(4) RDI(7) to RSP + 0x67 * 8, offset in prolog is 0x2F +# 28 64 66 00 UOP_SaveNonVol(4) RSI(6) to RSP + 0x66 * 8, offset in prolog is 0x28 +# 21 34 65 00 UOP_SaveNonVol(4) RBX(3) to RSP + 0x65 * 8, offset in prolog is 0x21 +# 1A 33 UOP_SetFPReg(3), offset in prolog is 0x1A +# 15 01 5E 00 UOP_AllocLarge(1) 0x5E * 8 bytes, offset in prolog is 0x15 +# 0E F0 UOP_PushNonVol(0) R15(0xF), offset in prolog is 0xE +# 0C E0 UOP_PushNonVol(0) R14(0xE), offset in prolog is 0xC +# 0A D0 UOP_PushNonVol(0) R13(0xD), offset in prolog is 0xA +# 08 C0 UOP_PushNonVol(0) R12(0xC), offset in prolog is 8 +# 06 50 UOP_PushNonVol(0) RBP(5), offset in prolog is 6 +# Corresponding prolog: +# 00 mov [rsp+8], rcx +# 05 push rbp +# 06 push r12 +# 08 push r13 +# 0A push r14 +# 0C push r15 +# 0E sub rsp, 2F0h +# 15 lea rbp, [rsp+30h] +# 1A mov [rbp+2F8h], rbx +# 21 mov [rbp+300h], rsi +# 28 mov [rbp+308h], rdi + + - Name: .pdata + Characteristics: [ IMAGE_SCN_CNT_INITIALIZED_DATA, IMAGE_SCN_MEM_READ ] + VirtualAddress: 12288 + VirtualSize: 60 + SectionData: 000000000000000000000000000000000000000000000000001000000011000000200000001100000012000010200000001200000013000024200000 + +# 00 00 00 00 +# 00 00 00 00 Test correct processing of empty runtime functions at begin +# 00 00 00 00 + +# 00 00 00 00 +# 00 00 00 00 Test correct processing of empty runtime functions at begin +# 00 00 00 00 + +# 00 10 00 00 Start address is 0x1000 +# 00 11 00 00 End address is 0x1100 +# 00 20 00 00 Unwind info RVA is 0x2000 + +# 00 11 00 00 Start address is 0x1100 +# 00 12 00 00 End address is 0x1200 +# 10 20 00 00 Unwind info RVA is 0x2010 + +# 00 12 00 00 Start address is 0x1200 +# 00 13 00 00 End address is 0x1300 +# 24 20 00 00 Unwind info RVA is 0x2024 + +symbols: [] +... +)"); + ASSERT_THAT_EXPECTED(ExpectedFile, llvm::Succeeded()); + + ModuleSP module_sp = std::make_shared(ModuleSpec(FileSpec(ExpectedFile->name()))); + ObjectFile *object_file = module_sp->GetObjectFile(); + ASSERT_NE(object_file, nullptr); + + std::unique_ptr cfi = object_file->CreateCallFrameInfo(); + ASSERT_NE(cfi.get(), nullptr); + + SectionList *sect_list = object_file->GetSectionList(); + ASSERT_NE(sect_list, nullptr); + + EXPECT_TRUE(cfi->GetUnwindPlan(Address(file_addr, sect_list), plan)); +} + +TEST_F(PECallFrameInfoTest, Basic_eh) { + UnwindPlan plan(eRegisterKindLLDB); + GetUnwindPlan(0x1001080, plan); + EXPECT_EQ(plan.GetRowCount(), 7); + + UnwindPlan::Row row; + row.SetOffset(0); + row.GetCFAValue().SetIsRegisterPlusOffset(lldb_rsp_x86_64, 8); + row.SetRegisterLocationToIsCFAPlusOffset(lldb_rsp_x86_64, 0, true); + row.SetRegisterLocationToAtCFAPlusOffset(lldb_rip_x86_64, -8, true); + EXPECT_EQ(*plan.GetRowAtIndex(0), row); + + row.SetOffset(2); + row.GetCFAValue().SetIsRegisterPlusOffset(lldb_rsp_x86_64, 0x10); + row.SetRegisterLocationToAtCFAPlusOffset(lldb_rbx_x86_64, -0x10, true); + EXPECT_EQ(*plan.GetRowAtIndex(1), row); + + row.SetOffset(3); + row.GetCFAValue().SetIsRegisterPlusOffset(lldb_rsp_x86_64, 0x18); + row.SetRegisterLocationToAtCFAPlusOffset(lldb_rsi_x86_64, -0x18, true); + EXPECT_EQ(*plan.GetRowAtIndex(2), row); + + row.SetOffset(4); + row.GetCFAValue().SetIsRegisterPlusOffset(lldb_rsp_x86_64, 0x20); + row.SetRegisterLocationToAtCFAPlusOffset(lldb_rdi_x86_64, -0x20, true); + EXPECT_EQ(*plan.GetRowAtIndex(3), row); + + row.SetOffset(6); + row.GetCFAValue().SetIsRegisterPlusOffset(lldb_rsp_x86_64, 0x28); + row.SetRegisterLocationToAtCFAPlusOffset(lldb_r14_x86_64, -0x28, true); + EXPECT_EQ(*plan.GetRowAtIndex(4), row); + + row.SetOffset(8); + row.GetCFAValue().SetIsRegisterPlusOffset(lldb_rsp_x86_64, 0x30); + row.SetRegisterLocationToAtCFAPlusOffset(lldb_r15_x86_64, -0x30, true); + EXPECT_EQ(*plan.GetRowAtIndex(5), row); + + row.SetOffset(0xC); + row.GetCFAValue().SetIsRegisterPlusOffset(lldb_rsp_x86_64, 0x50); + EXPECT_EQ(*plan.GetRowAtIndex(6), row); +} + +TEST_F(PECallFrameInfoTest, Chained_eh) { + UnwindPlan plan(eRegisterKindLLDB); + GetUnwindPlan(0x1001180, plan); + EXPECT_EQ(plan.GetRowCount(), 2); + + UnwindPlan::Row row; + row.SetOffset(0); + row.GetCFAValue().SetIsRegisterPlusOffset(lldb_rsp_x86_64, 0x50); + row.SetRegisterLocationToIsCFAPlusOffset(lldb_rsp_x86_64, 0, true); + row.SetRegisterLocationToAtCFAPlusOffset(lldb_rip_x86_64, -8, true); + row.SetRegisterLocationToAtCFAPlusOffset(lldb_rbx_x86_64, -0x10, true); + row.SetRegisterLocationToAtCFAPlusOffset(lldb_rsi_x86_64, -0x18, true); + row.SetRegisterLocationToAtCFAPlusOffset(lldb_rdi_x86_64, -0x20, true); + row.SetRegisterLocationToAtCFAPlusOffset(lldb_r14_x86_64, -0x28, true); + row.SetRegisterLocationToAtCFAPlusOffset(lldb_r15_x86_64, -0x30, true); + EXPECT_EQ(*plan.GetRowAtIndex(0), row); + + row.SetOffset(5); + row.SetRegisterLocationToAtCFAPlusOffset(lldb_rbp_x86_64, 0x18, true); + EXPECT_EQ(*plan.GetRowAtIndex(1), row); +} + +TEST_F(PECallFrameInfoTest, Frame_reg_eh) { + UnwindPlan plan(eRegisterKindLLDB); + GetUnwindPlan(0x1001280, plan); + EXPECT_EQ(plan.GetRowCount(), 11); + + UnwindPlan::Row row; + row.SetOffset(0); + row.GetCFAValue().SetIsRegisterPlusOffset(lldb_rsp_x86_64, 8); + row.SetRegisterLocationToIsCFAPlusOffset(lldb_rsp_x86_64, 0, true); + row.SetRegisterLocationToAtCFAPlusOffset(lldb_rip_x86_64, -8, true); + EXPECT_EQ(*plan.GetRowAtIndex(0), row); + + row.SetOffset(6); + row.GetCFAValue().SetIsRegisterPlusOffset(lldb_rsp_x86_64, 0x10); + row.SetRegisterLocationToAtCFAPlusOffset(lldb_rbp_x86_64, -0x10, true); + EXPECT_EQ(*plan.GetRowAtIndex(1), row); + + row.SetOffset(8); + row.GetCFAValue().SetIsRegisterPlusOffset(lldb_rsp_x86_64, 0x18); + row.SetRegisterLocationToAtCFAPlusOffset(lldb_r12_x86_64, -0x18, true); + EXPECT_EQ(*plan.GetRowAtIndex(2), row); + + row.SetOffset(0xA); + row.GetCFAValue().SetIsRegisterPlusOffset(lldb_rsp_x86_64, 0x20); + row.SetRegisterLocationToAtCFAPlusOffset(lldb_r13_x86_64, -0x20, true); + EXPECT_EQ(*plan.GetRowAtIndex(3), row); + + row.SetOffset(0xC); + row.GetCFAValue().SetIsRegisterPlusOffset(lldb_rsp_x86_64, 0x28); + row.SetRegisterLocationToAtCFAPlusOffset(lldb_r14_x86_64, -0x28, true); + EXPECT_EQ(*plan.GetRowAtIndex(4), row); + + row.SetOffset(0xE); + row.GetCFAValue().SetIsRegisterPlusOffset(lldb_rsp_x86_64, 0x30); + row.SetRegisterLocationToAtCFAPlusOffset(lldb_r15_x86_64, -0x30, true); + EXPECT_EQ(*plan.GetRowAtIndex(5), row); + + row.SetOffset(0x15); + row.GetCFAValue().SetIsRegisterPlusOffset(lldb_rsp_x86_64, 0x320); + EXPECT_EQ(*plan.GetRowAtIndex(6), row); + + row.SetOffset(0x1A); + row.GetCFAValue().SetIsRegisterPlusOffset(lldb_rbp_x86_64, 0x2F0); + EXPECT_EQ(*plan.GetRowAtIndex(7), row); + + row.SetOffset(0x21); + row.SetRegisterLocationToAtCFAPlusOffset(lldb_rbx_x86_64, 8, true); + EXPECT_EQ(*plan.GetRowAtIndex(8), row); + + row.SetOffset(0x28); + row.SetRegisterLocationToAtCFAPlusOffset(lldb_rsi_x86_64, 0x10, true); + EXPECT_EQ(*plan.GetRowAtIndex(9), row); + + row.SetOffset(0x2F); + row.SetRegisterLocationToAtCFAPlusOffset(lldb_rdi_x86_64, 0x18, true); + EXPECT_EQ(*plan.GetRowAtIndex(10), row); +} diff --git a/lldb/unittests/Process/minidump/MinidumpParserTest.cpp b/lldb/unittests/Process/minidump/MinidumpParserTest.cpp index 7c0791ed4dac4..eb4b6b8edc268 100644 --- a/lldb/unittests/Process/minidump/MinidumpParserTest.cpp +++ b/lldb/unittests/Process/minidump/MinidumpParserTest.cpp @@ -338,6 +338,7 @@ void check_region(MinidumpParser &parser, lldb::addr_t addr, lldb::addr_t start, MemoryRegionInfo::OptionalBool exec, MemoryRegionInfo::OptionalBool mapped, ConstString name = ConstString()) { + SCOPED_TRACE(addr); auto range_info = parser.GetMemoryRegionInfo(addr); EXPECT_EQ(start, range_info.GetRange().GetRangeBase()); EXPECT_EQ(end, range_info.GetRange().GetRangeEnd()); diff --git a/lldb/unittests/Process/minidump/RegisterContextMinidumpTest.cpp b/lldb/unittests/Process/minidump/RegisterContextMinidumpTest.cpp index 3d0f628f80f19..265da63d33a8c 100644 --- a/lldb/unittests/Process/minidump/RegisterContextMinidumpTest.cpp +++ b/lldb/unittests/Process/minidump/RegisterContextMinidumpTest.cpp @@ -10,7 +10,9 @@ #include "Plugins/Process/Utility/RegisterContextLinux_x86_64.h" #include "Plugins/Process/minidump/RegisterContextMinidump_x86_32.h" #include "Plugins/Process/minidump/RegisterContextMinidump_x86_64.h" +#include "Plugins/Process/minidump/RegisterContextMinidump_ARM.h" #include "lldb/Utility/DataBuffer.h" +#include "llvm/ADT/StringRef.h" #include "gtest/gtest.h" using namespace lldb_private; @@ -143,3 +145,57 @@ TEST(RegisterContextMinidump, ConvertMinidumpContext_x86_64) { EXPECT_EQ(Context.ds, reg64(*Buf, Info[lldb_ds_x86_64])); EXPECT_EQ(Context.es, reg64(*Buf, Info[lldb_es_x86_64])); } + +static void TestARMRegInfo(const lldb_private::RegisterInfo *info) { + // Make sure we have valid register numbers for eRegisterKindEHFrame and + // eRegisterKindDWARF for GPR registers r0-r15 so that we can unwind + // correctly when using this information. + llvm::StringRef name(info->name); + llvm::StringRef alt_name(info->alt_name); + if (name.startswith("r") || alt_name.startswith("r")) { + EXPECT_NE(info->kinds[lldb::eRegisterKindEHFrame], LLDB_INVALID_REGNUM); + EXPECT_NE(info->kinds[lldb::eRegisterKindDWARF], LLDB_INVALID_REGNUM); + } + // Verify generic register are set correctly + if (name == "r0") { + EXPECT_EQ(info->kinds[lldb::eRegisterKindGeneric], + (uint32_t)LLDB_REGNUM_GENERIC_ARG1); + } else if (name == "r1") { + EXPECT_EQ(info->kinds[lldb::eRegisterKindGeneric], + (uint32_t)LLDB_REGNUM_GENERIC_ARG2); + } else if (name == "r2") { + EXPECT_EQ(info->kinds[lldb::eRegisterKindGeneric], + (uint32_t)LLDB_REGNUM_GENERIC_ARG3); + } else if (name == "r3") { + EXPECT_EQ(info->kinds[lldb::eRegisterKindGeneric], + (uint32_t)LLDB_REGNUM_GENERIC_ARG4); + } else if (name == "sp") { + EXPECT_EQ(info->kinds[lldb::eRegisterKindGeneric], + (uint32_t)LLDB_REGNUM_GENERIC_SP); + } else if (name == "fp") { + EXPECT_EQ(info->kinds[lldb::eRegisterKindGeneric], + (uint32_t)LLDB_REGNUM_GENERIC_FP); + } else if (name == "lr") { + EXPECT_EQ(info->kinds[lldb::eRegisterKindGeneric], + (uint32_t)LLDB_REGNUM_GENERIC_RA); + } else if (name == "pc") { + EXPECT_EQ(info->kinds[lldb::eRegisterKindGeneric], + (uint32_t)LLDB_REGNUM_GENERIC_PC); + } else if (name == "cpsr") { + EXPECT_EQ(info->kinds[lldb::eRegisterKindGeneric], + (uint32_t)LLDB_REGNUM_GENERIC_FLAGS); + } +} + +TEST(RegisterContextMinidump, CheckRegisterContextMinidump_ARM) { + size_t num_regs = RegisterContextMinidump_ARM::GetRegisterCountStatic(); + const lldb_private::RegisterInfo *reg_info; + for (size_t reg=0; reg engine64 = Getx86_64Inspector(); + + uint8_t data[] = { + 0x55, // pushq %rbp + 0x48, 0x89, 0xe5, // movq %rsp, %rbp + + // x86AssemblyInspectionEngine::AugmentUnwindPlanFromCallSite + // has a bug where it can't augment a function that is just + // prologue+epilogue - it needs at least one other instruction + // in between. + + 0x90, // nop + 0x48, 0x81, 0xec, 0x88, 0, 0, 0, // subq $0x88, %rsp + 0x90, // nop + 0x48, 0x81, 0xc4, 0x88, 0, 0, 0, // addq $0x88, %rsp + + 0x5d, // popq %rbp + 0xc3 // retq + }; + + sample_range = AddressRange(0x1000, sizeof(data)); + + unwind_plan.SetSourceName("unit testing hand-created unwind plan"); + unwind_plan.SetPlanValidAddressRange(sample_range); + unwind_plan.SetRegisterKind(eRegisterKindLLDB); + + row_sp = std::make_shared(); + + // Describe offset 0 + row_sp->SetOffset(0); + row_sp->GetCFAValue().SetIsRegisterPlusOffset(k_rsp, 8); + + regloc.SetAtCFAPlusOffset(-8); + row_sp->SetRegisterInfo(k_rip, regloc); + + unwind_plan.AppendRow(row_sp); + + // Allocate a new Row, populate it with the existing Row contents. + UnwindPlan::Row *new_row = new UnwindPlan::Row; + *new_row = *row_sp.get(); + row_sp.reset(new_row); + + // Describe offset 1 + row_sp->SetOffset(1); + row_sp->GetCFAValue().SetIsRegisterPlusOffset(k_rsp, 16); + regloc.SetAtCFAPlusOffset(-16); + row_sp->SetRegisterInfo(k_rbp, regloc); + unwind_plan.AppendRow(row_sp); + + // Allocate a new Row, populate it with the existing Row contents. + new_row = new UnwindPlan::Row; + *new_row = *row_sp.get(); + row_sp.reset(new_row); + + // Describe offset 4 + row_sp->SetOffset(4); + row_sp->GetCFAValue().SetIsRegisterPlusOffset(k_rsp, 16); + unwind_plan.AppendRow(row_sp); + + RegisterContextSP reg_ctx_sp; + EXPECT_TRUE(engine64->AugmentUnwindPlanFromCallSite( + data, sizeof(data), sample_range, unwind_plan, reg_ctx_sp)); + + // Before we touch the stack pointer, we should still refer to the + // row from after the prologue. + row_sp = unwind_plan.GetRowForFunctionOffset(5); + EXPECT_EQ(4ull, row_sp->GetOffset()); + + // Check the first stack pointer update. + row_sp = unwind_plan.GetRowForFunctionOffset(12); + EXPECT_EQ(12ull, row_sp->GetOffset()); + EXPECT_TRUE(row_sp->GetCFAValue().GetRegisterNumber() == k_rsp); + EXPECT_EQ(152, row_sp->GetCFAValue().GetOffset()); + + // After the nop, we should still refer to the same row. + row_sp = unwind_plan.GetRowForFunctionOffset(13); + EXPECT_EQ(12ull, row_sp->GetOffset()); + + // Check that the second stack pointer update is reflected in the + // unwind plan. + row_sp = unwind_plan.GetRowForFunctionOffset(20); + EXPECT_EQ(20ull, row_sp->GetOffset()); + EXPECT_TRUE(row_sp->GetCFAValue().GetRegisterNumber() == k_rsp); + EXPECT_EQ(16, row_sp->GetCFAValue().GetOffset()); +} + TEST_F(Testx86AssemblyInspectionEngine, TestSimplex86_64Augmented) { UnwindPlan::Row::RegisterLocation regloc; UnwindPlan::RowSP row_sp; diff --git a/lldb/unittests/Utility/DataExtractorTest.cpp b/lldb/unittests/Utility/DataExtractorTest.cpp index 1a974f26f3293..fbf13399a1c99 100644 --- a/lldb/unittests/Utility/DataExtractorTest.cpp +++ b/lldb/unittests/Utility/DataExtractorTest.cpp @@ -49,6 +49,51 @@ TEST(DataExtractorTest, PeekData) { EXPECT_EQ(nullptr, E.PeekData(4, 1)); } +TEST(DataExtractorTest, GetCStr) { + uint8_t buffer[] = {'X', 'f', 'o', 'o', '\0'}; + DataExtractor E(buffer, sizeof buffer, lldb::eByteOrderLittle, 4); + + lldb::offset_t offset = 1; + EXPECT_STREQ("foo", E.GetCStr(&offset)); + EXPECT_EQ(5U, offset); +} + +TEST(DataExtractorTest, GetCStrEmpty) { + uint8_t buffer[] = {'X', '\0'}; + DataExtractor E(buffer, sizeof buffer, lldb::eByteOrderLittle, 4); + + lldb::offset_t offset = 1; + EXPECT_STREQ("", E.GetCStr(&offset)); + EXPECT_EQ(2U, offset); +} + +TEST(DataExtractorTest, GetCStrUnterminated) { + uint8_t buffer[] = {'X', 'f', 'o', 'o'}; + DataExtractor E(buffer, sizeof buffer, lldb::eByteOrderLittle, 4); + + lldb::offset_t offset = 1; + EXPECT_EQ(nullptr, E.GetCStr(&offset)); + EXPECT_EQ(1U, offset); +} + +TEST(DataExtractorTest, GetCStrAtEnd) { + uint8_t buffer[] = {'X'}; + DataExtractor E(buffer, sizeof buffer, lldb::eByteOrderLittle, 4); + + lldb::offset_t offset = 1; + EXPECT_EQ(nullptr, E.GetCStr(&offset)); + EXPECT_EQ(1U, offset); +} + +TEST(DataExtractorTest, GetCStrAtNullOffset) { + uint8_t buffer[] = {'f', 'o', 'o', '\0'}; + DataExtractor E(buffer, sizeof buffer, lldb::eByteOrderLittle, 4); + + lldb::offset_t offset = 0; + EXPECT_STREQ("foo", E.GetCStr(&offset)); + EXPECT_EQ(4U, offset); +} + TEST(DataExtractorTest, GetMaxU64) { uint8_t buffer[] = {0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08}; DataExtractor LE(buffer, sizeof(buffer), lldb::eByteOrderLittle, diff --git a/lldb/unittests/Utility/ProcessInstanceInfoTest.cpp b/lldb/unittests/Utility/ProcessInstanceInfoTest.cpp index 73978836c5be5..1d363ac80a365 100644 --- a/lldb/unittests/Utility/ProcessInstanceInfoTest.cpp +++ b/lldb/unittests/Utility/ProcessInstanceInfoTest.cpp @@ -91,3 +91,20 @@ TEST(ProcessInstanceInfo, DumpTable_invalidUID) { )", s.GetData()); } + +TEST(ProcessInstanceInfoMatch, Name) { + ProcessInstanceInfo info_bar, info_empty; + info_bar.GetExecutableFile().SetFile("/foo/bar", FileSpec::Style::posix); + + ProcessInstanceInfoMatch match; + match.SetNameMatchType(NameMatch::Equals); + match.GetProcessInfo().GetExecutableFile().SetFile("bar", + FileSpec::Style::posix); + + EXPECT_TRUE(match.Matches(info_bar)); + EXPECT_FALSE(match.Matches(info_empty)); + + match.GetProcessInfo().GetExecutableFile() = FileSpec(); + EXPECT_TRUE(match.Matches(info_bar)); + EXPECT_TRUE(match.Matches(info_empty)); +} diff --git a/llvm/docs/CommandGuide/FileCheck.rst b/llvm/docs/CommandGuide/FileCheck.rst index e8b324d080dfa..0072c9c034721 100644 --- a/llvm/docs/CommandGuide/FileCheck.rst +++ b/llvm/docs/CommandGuide/FileCheck.rst @@ -71,6 +71,11 @@ and from the command line. The :option:`--strict-whitespace` argument disables this behavior. End-of-line sequences are canonicalized to UNIX-style ``\n`` in all modes. +.. option:: --ignore-case + + By default, FileCheck uses case-sensitive matching. This option causes + FileCheck to use case-insensitive matching. + .. option:: --implicit-check-not check-pattern Adds implicit negative checks for the specified patterns between positive diff --git a/llvm/docs/CommandGuide/lit.rst b/llvm/docs/CommandGuide/lit.rst index 7bb499b971d09..ff70a21b8dfcc 100644 --- a/llvm/docs/CommandGuide/lit.rst +++ b/llvm/docs/CommandGuide/lit.rst @@ -53,7 +53,7 @@ GENERAL OPTIONS Show the :program:`lit` help message. -.. option:: -j N, --threads=N +.. option:: -j N, --workers=N Run ``N`` tests in parallel. By default, this is automatically chosen to match the number of detected available CPUs. diff --git a/llvm/docs/CommandGuide/llvm-mca.rst b/llvm/docs/CommandGuide/llvm-mca.rst index 4f8704ad9a96d..efe29c4da4b56 100644 --- a/llvm/docs/CommandGuide/llvm-mca.rst +++ b/llvm/docs/CommandGuide/llvm-mca.rst @@ -523,6 +523,7 @@ Below is the timeline view for a subset of the dot-product example located in 0. 3 1.0 1.0 3.3 vmulps %xmm0, %xmm1, %xmm2 1. 3 3.3 0.7 1.0 vhaddps %xmm2, %xmm2, %xmm3 2. 3 5.7 0.0 0.0 vhaddps %xmm3, %xmm3, %xmm4 + 3 3.3 0.5 1.4 The timeline view is interesting because it shows instruction state changes during execution. It also gives an idea of how the tool processes instructions @@ -574,7 +575,8 @@ and therefore consuming physical registers). Table *Average Wait times* helps diagnose performance issues that are caused by the presence of long latency instructions and potentially long data dependencies -which may limit the ILP. Note that :program:`llvm-mca`, by default, assumes at +which may limit the ILP. Last row, ````, shows a global average over all +instructions measured. Note that :program:`llvm-mca`, by default, assumes at least 1cy between the dispatch event and the issue event. When the performance is limited by data dependencies and/or long latency diff --git a/llvm/docs/CompilerWriterInfo.rst b/llvm/docs/CompilerWriterInfo.rst index 7058574925ae3..5e37fde17c4f3 100644 --- a/llvm/docs/CompilerWriterInfo.rst +++ b/llvm/docs/CompilerWriterInfo.rst @@ -58,21 +58,27 @@ PowerPC IBM - Official manuals and docs ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -* `Power Instruction Set Architecture, Versions 2.03 through 2.06 (authentication required, free sign-up) `_ +* `Power Instruction Set Architecture, Version 3.0B `_ -* `PowerPC Compiler Writer's Guide `_ +* `POWER9 Processor User's Manual `_ -* `Intro to PowerPC Architecture `_ +* `Power Instruction Set Architecture, Version 2.07B `_ -* `PowerPC Processor Manuals (embedded) `_ +* `POWER8 Processor User's Manual `_ -* `Various IBM specifications and white papers `_ +* `Power Instruction Set Architecture, Versions 2.03 through 2.06 (Internet Archive) `_ + +* `IBM AIX 7.2 POWER Assembly Reference `_ * `IBM AIX/5L for POWER Assembly Reference `_ Other documents, collections, notes ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +* `PowerPC Compiler Writer's Guide `_ +* `Intro to PowerPC Architecture `_ +* `PowerPC Processor Manuals (embedded) `_ +* `Various IBM specifications and white papers `_ * `PowerPC ABI documents `_ * `PowerPC64 alignment of long doubles (from GCC) `_ * `Long branch stubs for powerpc64-linux (from binutils) `_ @@ -133,6 +139,9 @@ Linux ----- * `Linux extensions to gabi `_ +* `64-Bit ELF V2 ABI Specification: Power Architecture `_ + +* `OpenPOWER ELFv2 Errata: ELFv2 ABI Version 1.4 `_ * `PowerPC 64-bit ELF ABI Supplement `_ * `Procedure Call Standard for the AArch64 Architecture `_ * `Procedure Call Standard for the ARM Architecture `_ diff --git a/llvm/docs/DeveloperPolicy.rst b/llvm/docs/DeveloperPolicy.rst index 27abc66f6d00f..91934c047ba18 100644 --- a/llvm/docs/DeveloperPolicy.rst +++ b/llvm/docs/DeveloperPolicy.rst @@ -396,6 +396,26 @@ to do so. .. _discuss the change/gather consensus: +Obtaining Commit Access to the GitHub Repository +------------------------------------------------ +We are currently in the process of migrating the project's source code from SVN +to a git repository on GitHub. We are maintaining a file in SVN to map +SVN usernames to GitHub usernames, so we can automatically grant access to +existing committers when we complete the migration to GitHub. In order to +request commit access, check out the github-usernames.txt file in meta/trunk and +add a line in the form of $SVN_USERNAME:$GITHUB_USERNAME and commit it. For +example: + +.. code:: console + + mkdir tmp-llvm-svn + cd tmp-llvm-svn + svn co https://$SVN_USERNAME@llvm.org/svn/llvm-project/meta/trunk + echo "$SVN_USERNAME:$GITHUB_USERNAME" >> trunk/github-usernames.txt + cd trunk + svn commit -m "Request commit access for $SVN_USERNAME" + + Making a Major Change --------------------- diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index e797b1f9a15d8..2caef042ff28a 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -6264,6 +6264,13 @@ enum is the smallest type which can represent all of its values:: !0 = !{i32 1, !"short_wchar", i32 1} !1 = !{i32 1, !"short_enum", i32 0} +LTO Post-Link Module Flags Metadata +----------------------------------- + +Some optimisations are only when the entire LTO unit is present in the current +module. This is represented by the ``LTOPostLink`` module flags metadata, which +will be created with a value of ``1`` when LTO linking occurs. + Automatic Linker Flags Named Metadata ===================================== @@ -16809,6 +16816,8 @@ Overview: The ``llvm.type.test`` intrinsic tests whether the given pointer is associated with the given type identifier. +.. _type.checked.load: + '``llvm.type.checked.load``' Intrinsic ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/llvm/docs/Reference.rst b/llvm/docs/Reference.rst index c18b6d01375dd..882d0dc9ab120 100644 --- a/llvm/docs/Reference.rst +++ b/llvm/docs/Reference.rst @@ -32,7 +32,6 @@ LLVM and API reference documentation. LangRef LibFuzzer MarkedUpDisassembly - MemorySSA MIRLangRef OptBisect ORCv2 @@ -41,7 +40,6 @@ LLVM and API reference documentation. SegmentedStacks StackMaps SpeculativeLoadHardening - SupportLibrary Statepoints SystemLibrary TestingGuide @@ -58,6 +56,9 @@ API Reference `Doxygen generated documentation `_ (`classes `_) +:doc:`HowToUseAttributes` + Answers some questions about the new Attributes infrastructure. + `Documentation for Go bindings `_ :doc:`ORCv2` @@ -67,76 +68,6 @@ API Reference LLVM Reference -------------- -:doc:`FaultMaps` - LLVM support for folding control flow into faulting machine instructions. - -:doc:`Atomics` - Information about LLVM's concurrency model. - -:doc:`ExceptionHandling` - This document describes the design and implementation of exception handling - in LLVM. - -:doc:`Extensions` - LLVM-specific extensions to tools and formats LLVM seeks compatibility with. - -:doc:`HowToSetUpLLVMStyleRTTI` - How to make ``isa<>``, ``dyn_cast<>``, etc. available for clients of your - class hierarchy. - -:doc:`BlockFrequencyTerminology` - Provides information about terminology used in the ``BlockFrequencyInfo`` - analysis pass. - -:doc:`BranchWeightMetadata` - Provides information about Branch Prediction Information. - -:doc:`MemorySSA` - Information about the MemorySSA utility in LLVM, as well as how to use it. - -:doc:`Support Library ` - This document describes the LLVM Support Library (``lib/Support``) and - how to keep LLVM source code portable - -:doc:`GetElementPtr` - Answers to some very frequent questions about LLVM's most frequently - misunderstood instruction. - -:doc:`ScudoHardenedAllocator` - A library that implements a security-hardened `malloc()`. - -:doc:`GwpAsan` - A sampled heap memory error detection toolkit designed for production use. - -:doc:`Dependence Graphs ` - A description of the design of the various dependence graphs such as - the DDG (Data Dependence Graph). - -:doc:`CFIVerify` - A description of the verification tool for Control Flow Integrity. - -:doc:`SpeculativeLoadHardening` - A description of the Speculative Load Hardening mitigation for Spectre v1. - -:doc:`SegmentedStacks` - This document describes segmented stacks and how they are used in LLVM. - -:doc:`MarkedUpDisassembly` - This document describes the optional rich disassembly output syntax. - -:doc:`HowToUseAttributes` - Answers some questions about the new Attributes infrastructure. - -:doc:`StackMaps` - LLVM support for mapping instruction addresses to the location of - values and allowing code to be patched. - -:doc:`Coroutines` - LLVM support for coroutines. - -:doc:`YamlIO` - A reference guide for using LLVM's YAML I/O library. - ====================== Command Line Utilities ====================== @@ -216,4 +147,68 @@ XRay High-level documentation of how to use XRay in LLVM. :doc:`XRayExample` - An example of how to debug an application with XRay. \ No newline at end of file + An example of how to debug an application with XRay. + +================= +Additional Topics +================= + +:doc:`FaultMaps` + LLVM support for folding control flow into faulting machine instructions. + +:doc:`Atomics` + Information about LLVM's concurrency model. + +:doc:`ExceptionHandling` + This document describes the design and implementation of exception handling + in LLVM. + +:doc:`Extensions` + LLVM-specific extensions to tools and formats LLVM seeks compatibility with. + +:doc:`HowToSetUpLLVMStyleRTTI` + How to make ``isa<>``, ``dyn_cast<>``, etc. available for clients of your + class hierarchy. + +:doc:`BlockFrequencyTerminology` + Provides information about terminology used in the ``BlockFrequencyInfo`` + analysis pass. + +:doc:`BranchWeightMetadata` + Provides information about Branch Prediction Information. + +:doc:`GetElementPtr` + Answers to some very frequent questions about LLVM's most frequently + misunderstood instruction. + +:doc:`ScudoHardenedAllocator` + A library that implements a security-hardened `malloc()`. + +:doc:`GwpAsan` + A sampled heap memory error detection toolkit designed for production use. + +:doc:`Dependence Graphs ` + A description of the design of the various dependence graphs such as + the DDG (Data Dependence Graph). + +:doc:`CFIVerify` + A description of the verification tool for Control Flow Integrity. + +:doc:`SpeculativeLoadHardening` + A description of the Speculative Load Hardening mitigation for Spectre v1. + +:doc:`SegmentedStacks` + This document describes segmented stacks and how they are used in LLVM. + +:doc:`MarkedUpDisassembly` + This document describes the optional rich disassembly output syntax. + +:doc:`StackMaps` + LLVM support for mapping instruction addresses to the location of + values and allowing code to be patched. + +:doc:`Coroutines` + LLVM support for coroutines. + +:doc:`YamlIO` + A reference guide for using LLVM's YAML I/O library. \ No newline at end of file diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst index 1d501b260635e..08303052c1aad 100644 --- a/llvm/docs/ReleaseNotes.rst +++ b/llvm/docs/ReleaseNotes.rst @@ -53,6 +53,20 @@ Non-comprehensive list of changes in this release Makes programs 10x faster by doing Special New Thing. +* As per :ref:`LLVM Language Reference Manual `, + ``getelementptr inbounds`` can not change the null status of a pointer, + meaning it can not produce non-null pointer given null base pointer, and + likewise given non-null base pointer it can not produce null pointer; if it + does, the result is a :ref:`poison value `. + Since `r369789 `_ + (`D66608 `_ ``[InstCombine] icmp eq/ne (gep + inbounds P, Idx..), null -> icmp eq/ne P, null``) LLVM uses that for + transformations. If the original source violates these requirements this + may result in code being miscompiled. If you are using Clang front-end, + Undefined Behaviour Sanitizer ``-fsanitize=pointer-overflow`` check + will now catch such cases. + + Changes to the LLVM IR ---------------------- diff --git a/llvm/docs/TypeMetadata.rst b/llvm/docs/TypeMetadata.rst index 84cf05ba70fc6..7d0745b927963 100644 --- a/llvm/docs/TypeMetadata.rst +++ b/llvm/docs/TypeMetadata.rst @@ -224,3 +224,67 @@ efficiently to minimize the sizes of the underlying bitsets. } .. _GlobalLayoutBuilder: https://github.com/llvm/llvm-project/blob/master/llvm/include/llvm/Transforms/IPO/LowerTypeTests.h + +``!vcall_visibility`` Metadata +============================== + +In order to allow removing unused function pointers from vtables, we need to +know whether every virtual call which could use it is known to the compiler, or +whether another translation unit could introduce more calls through the vtable. +This is not the same as the linkage of the vtable, because call sites could be +using a pointer of a more widely-visible base class. For example, consider this +code: + +.. code-block:: c++ + + __attribute__((visibility("default"))) + struct A { + virtual void f(); + }; + + __attribute__((visibility("hidden"))) + struct B : A { + virtual void f(); + }; + +With LTO, we know that all code which can see the declaration of ``B`` is +visible to us. However, a pointer to a ``B`` could be cast to ``A*`` and passed +to another linkage unit, which could then call ``f`` on it. This call would +load from the vtable for ``B`` (using the object pointer), and then call +``B::f``. This means we can't remove the function pointer from ``B``'s vtable, +or the implementation of ``B::f``. However, if we can see all code which knows +about any dynamic base class (which would be the case if ``B`` only inherited +from classes with hidden visibility), then this optimisation would be valid. + +This concept is represented in IR by the ``!vcall_visibility`` metadata +attached to vtable objects, with the following values: + +.. list-table:: + :header-rows: 1 + :widths: 10 90 + + * - Value + - Behavior + + * - 0 (or omitted) + - **Public** + Virtual function calls using this vtable could be made from external + code. + + * - 1 + - **Linkage Unit** + All virtual function calls which might use this vtable are in the + current LTO unit, meaning they will be in the current module once + LTO linking has been performed. + + * - 2 + - **Translation Unit** + All virtual function calls which might use this vtable are in the + current module. + +In addition, all function pointer loads from a vtable marked with the +``!vcall_visibility`` metadata (with a non-zero value) must be done using the +:ref:`llvm.type.checked.load ` intrinsic, so that virtual +calls sites can be correlated with the vtables which they might load from. +Other parts of the vtable (RTTI, offset-to-top, ...) can still be accessed with +normal loads. diff --git a/llvm/docs/UserGuides.rst b/llvm/docs/UserGuides.rst index 5c035d1717d6a..abaa98cc9e7e7 100644 --- a/llvm/docs/UserGuides.rst +++ b/llvm/docs/UserGuides.rst @@ -37,6 +37,7 @@ intermediate LLVM representation. LinkTimeOptimization LoopTerminology MarkdownQuickstartTemplate + MemorySSA MergeFunctions MCJITDesignAndImplementation NVPTXUsage @@ -44,8 +45,9 @@ intermediate LLVM representation. Passes ReportingGuide Remarks - StackSafetyAnalysis SourceLevelDebugging + StackSafetyAnalysis + SupportLibrary TableGen/index TableGenFundamentals Vectorizers @@ -87,6 +89,10 @@ LLVM Builds and Distributions :doc:`Docker` A reference for using Dockerfiles provided with LLVM. +:doc:`Support Library ` + This document describes the LLVM Support Library (``lib/Support``) and + how to keep LLVM source code portable + Optimizations ------------- @@ -107,6 +113,9 @@ Optimizations Information on how to write a new alias analysis implementation or how to use existing analyses. +:doc:`MemorySSA` + Information about the MemorySSA utility in LLVM, as well as how to use it. + :doc:`LoopTerminology` A document describing Loops and associated terms as used in LLVM. diff --git a/llvm/include/llvm/ADT/Statistic.h b/llvm/include/llvm/ADT/Statistic.h index 2ac59da596efe..b7387ddcf1c79 100644 --- a/llvm/include/llvm/ADT/Statistic.h +++ b/llvm/include/llvm/ADT/Statistic.h @@ -44,38 +44,39 @@ class raw_ostream; class raw_fd_ostream; class StringRef; -class Statistic { +class StatisticBase { public: const char *DebugType; const char *Name; const char *Desc; - std::atomic Value; - std::atomic Initialized; - unsigned getValue() const { return Value.load(std::memory_order_relaxed); } + StatisticBase(const char *DebugType, const char *Name, const char *Desc) + : DebugType(DebugType), Name(Name), Desc(Desc) {} + const char *getDebugType() const { return DebugType; } const char *getName() const { return Name; } const char *getDesc() const { return Desc; } +}; - /// construct - This should only be called for non-global statistics. - void construct(const char *debugtype, const char *name, const char *desc) { - DebugType = debugtype; - Name = name; - Desc = desc; - Value = 0; - Initialized = false; - } +class TrackingStatistic : public StatisticBase { +public: + std::atomic Value; + std::atomic Initialized; + + TrackingStatistic(const char *DebugType, const char *Name, const char *Desc) + : StatisticBase(DebugType, Name, Desc), Value(0), Initialized(false) {} + + unsigned getValue() const { return Value.load(std::memory_order_relaxed); } // Allow use of this class as the value itself. operator unsigned() const { return getValue(); } -#if LLVM_ENABLE_STATS - const Statistic &operator=(unsigned Val) { + const TrackingStatistic &operator=(unsigned Val) { Value.store(Val, std::memory_order_relaxed); return init(); } - const Statistic &operator++() { + const TrackingStatistic &operator++() { Value.fetch_add(1, std::memory_order_relaxed); return init(); } @@ -85,7 +86,7 @@ class Statistic { return Value.fetch_add(1, std::memory_order_relaxed); } - const Statistic &operator--() { + const TrackingStatistic &operator--() { Value.fetch_sub(1, std::memory_order_relaxed); return init(); } @@ -95,14 +96,14 @@ class Statistic { return Value.fetch_sub(1, std::memory_order_relaxed); } - const Statistic &operator+=(unsigned V) { + const TrackingStatistic &operator+=(unsigned V) { if (V == 0) return *this; Value.fetch_add(V, std::memory_order_relaxed); return init(); } - const Statistic &operator-=(unsigned V) { + const TrackingStatistic &operator-=(unsigned V) { if (V == 0) return *this; Value.fetch_sub(V, std::memory_order_relaxed); @@ -119,54 +120,57 @@ class Statistic { init(); } -#else // Statistics are disabled in release builds. - - const Statistic &operator=(unsigned Val) { +protected: + TrackingStatistic &init() { + if (!Initialized.load(std::memory_order_acquire)) + RegisterStatistic(); return *this; } - const Statistic &operator++() { - return *this; - } + void RegisterStatistic(); +}; - unsigned operator++(int) { - return 0; - } +class NoopStatistic : public StatisticBase { +public: + using StatisticBase::StatisticBase; - const Statistic &operator--() { - return *this; - } + unsigned getValue() const { return 0; } - unsigned operator--(int) { - return 0; - } + // Allow use of this class as the value itself. + operator unsigned() const { return 0; } - const Statistic &operator+=(const unsigned &V) { - return *this; - } + const NoopStatistic &operator=(unsigned Val) { return *this; } - const Statistic &operator-=(const unsigned &V) { - return *this; - } + const NoopStatistic &operator++() { return *this; } - void updateMax(unsigned V) {} + unsigned operator++(int) { return 0; } -#endif // LLVM_ENABLE_STATS + const NoopStatistic &operator--() { return *this; } -protected: - Statistic &init() { - if (!Initialized.load(std::memory_order_acquire)) - RegisterStatistic(); - return *this; - } + unsigned operator--(int) { return 0; } - void RegisterStatistic(); + const NoopStatistic &operator+=(const unsigned &V) { return *this; } + + const NoopStatistic &operator-=(const unsigned &V) { return *this; } + + void updateMax(unsigned V) {} }; +#if LLVM_ENABLE_STATS +using Statistic = TrackingStatistic; +#else +using Statistic = NoopStatistic; +#endif + // STATISTIC - A macro to make definition of statistics really simple. This // automatically passes the DEBUG_TYPE of the file into the statistic. #define STATISTIC(VARNAME, DESC) \ - static llvm::Statistic VARNAME = {DEBUG_TYPE, #VARNAME, DESC, {0}, {false}} + static llvm::Statistic VARNAME = {DEBUG_TYPE, #VARNAME, DESC} + +// ALWAYS_ENABLED_STATISTIC - A macro to define a statistic like STATISTIC but +// it is enabled even if LLVM_ENABLE_STATS is off. +#define ALWAYS_ENABLED_STATISTIC(VARNAME, DESC) \ + static llvm::TrackingStatistic VARNAME = {DEBUG_TYPE, #VARNAME, DESC} /// Enable the collection and printing of statistics. void EnableStatistics(bool PrintOnExit = true); diff --git a/llvm/include/llvm/ADT/StringMap.h b/llvm/include/llvm/ADT/StringMap.h index 51fa4844f307d..108185bd07b90 100644 --- a/llvm/include/llvm/ADT/StringMap.h +++ b/llvm/include/llvm/ADT/StringMap.h @@ -118,36 +118,59 @@ class StringMapImpl { } }; -/// StringMapEntry - This is used to represent one value that is inserted into -/// a StringMap. It contains the Value itself and the key: the string length -/// and data. +/// StringMapEntryStorage - Holds the value in a StringMapEntry. +/// +/// Factored out into a separate base class to make it easier to specialize. +/// This is primarily intended to support StringSet, which doesn't need a value +/// stored at all. template -class StringMapEntry : public StringMapEntryBase { +class StringMapEntryStorage : public StringMapEntryBase { public: ValueTy second; - explicit StringMapEntry(size_t strLen) + explicit StringMapEntryStorage(size_t strLen) : StringMapEntryBase(strLen), second() {} template - StringMapEntry(size_t strLen, InitTy &&... InitVals) + StringMapEntryStorage(size_t strLen, InitTy &&... InitVals) : StringMapEntryBase(strLen), second(std::forward(InitVals)...) {} - StringMapEntry(StringMapEntry &E) = delete; - - StringRef getKey() const { - return StringRef(getKeyData(), getKeyLength()); - } + StringMapEntryStorage(StringMapEntryStorage &E) = delete; const ValueTy &getValue() const { return second; } ValueTy &getValue() { return second; } void setValue(const ValueTy &V) { second = V; } +}; + +template<> +class StringMapEntryStorage : public StringMapEntryBase { +public: + explicit StringMapEntryStorage(size_t strLen, NoneType none = None) + : StringMapEntryBase(strLen) {} + StringMapEntryStorage(StringMapEntryStorage &E) = delete; + + NoneType getValue() const { return None; } +}; + +/// StringMapEntry - This is used to represent one value that is inserted into +/// a StringMap. It contains the Value itself and the key: the string length +/// and data. +template +class StringMapEntry final : public StringMapEntryStorage { +public: + using StringMapEntryStorage::StringMapEntryStorage; + + StringRef getKey() const { + return StringRef(getKeyData(), this->getKeyLength()); + } /// getKeyData - Return the start of the string data that is the key for this /// value. The string data is always stored immediately after the /// StringMapEntry object. const char *getKeyData() const {return reinterpret_cast(this+1);} - StringRef first() const { return StringRef(getKeyData(), getKeyLength()); } + StringRef first() const { + return StringRef(getKeyData(), this->getKeyLength()); + } /// Create a StringMapEntry for the specified key construct the value using /// \p InitiVals. @@ -199,7 +222,7 @@ class StringMapEntry : public StringMapEntryBase { template void Destroy(AllocatorTy &Allocator) { // Free memory referenced by the item. - size_t AllocSize = sizeof(StringMapEntry) + getKeyLength() + 1; + size_t AllocSize = sizeof(StringMapEntry) + this->getKeyLength() + 1; this->~StringMapEntry(); Allocator.Deallocate(static_cast(this), AllocSize); } diff --git a/llvm/include/llvm/ADT/StringSet.h b/llvm/include/llvm/ADT/StringSet.h index af3a44a7b32c4..60be09d3c3264 100644 --- a/llvm/include/llvm/ADT/StringSet.h +++ b/llvm/include/llvm/ADT/StringSet.h @@ -24,8 +24,8 @@ namespace llvm { /// StringSet - A wrapper for StringMap that provides set-like functionality. template - class StringSet : public StringMap { - using base = StringMap; + class StringSet : public StringMap { + using base = StringMap; public: StringSet() = default; @@ -37,13 +37,13 @@ namespace llvm { std::pair insert(StringRef Key) { assert(!Key.empty()); - return base::insert(std::make_pair(Key, '\0')); + return base::insert(std::make_pair(Key, None)); } template void insert(const InputIt &Begin, const InputIt &End) { for (auto It = Begin; It != End; ++It) - base::insert(std::make_pair(*It, '\0')); + base::insert(std::make_pair(*It, None)); } template diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h index cdb0ee32de19f..fa8451e9d3e50 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -371,6 +371,34 @@ class TargetTransformInfoImplBase { return false; } + unsigned getCacheLineSize() const { return 0; } + + llvm::Optional getCacheSize(TargetTransformInfo::CacheLevel Level) const { + switch (Level) { + case TargetTransformInfo::CacheLevel::L1D: + LLVM_FALLTHROUGH; + case TargetTransformInfo::CacheLevel::L2D: + return llvm::Optional(); + } + llvm_unreachable("Unknown TargetTransformInfo::CacheLevel"); + } + + llvm::Optional getCacheAssociativity( + TargetTransformInfo::CacheLevel Level) const { + switch (Level) { + case TargetTransformInfo::CacheLevel::L1D: + LLVM_FALLTHROUGH; + case TargetTransformInfo::CacheLevel::L2D: + return llvm::Optional(); + } + + llvm_unreachable("Unknown TargetTransformInfo::CacheLevel"); + } + + unsigned getPrefetchDistance() const { return 0; } + unsigned getMinPrefetchStride() const { return 1; } + unsigned getMaxPrefetchIterationsAhead() const { return UINT_MAX; } + unsigned getMaxInterleaveFactor(unsigned VF) { return 1; } unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, diff --git a/llvm/include/llvm/Analysis/TypeMetadataUtils.h b/llvm/include/llvm/Analysis/TypeMetadataUtils.h index 82cf8efeea54b..43ce26147c2e2 100644 --- a/llvm/include/llvm/Analysis/TypeMetadataUtils.h +++ b/llvm/include/llvm/Analysis/TypeMetadataUtils.h @@ -50,6 +50,8 @@ void findDevirtualizableCallsForTypeCheckedLoad( SmallVectorImpl &LoadedPtrs, SmallVectorImpl &Preds, bool &HasNonCallUses, const CallInst *CI, DominatorTree &DT); + +Constant *getPointerAtOffset(Constant *I, uint64_t Offset, Module &M); } #endif diff --git a/llvm/include/llvm/Analysis/VectorUtils.h b/llvm/include/llvm/Analysis/VectorUtils.h index 9811b41256953..4a61c2bc35c72 100644 --- a/llvm/include/llvm/Analysis/VectorUtils.h +++ b/llvm/include/llvm/Analysis/VectorUtils.h @@ -381,13 +381,12 @@ APInt possiblyDemandedEltsInMask(Value *Mask); /// the interleaved store group doesn't allow gaps. template class InterleaveGroup { public: - InterleaveGroup(uint32_t Factor, bool Reverse, uint32_t Align) - : Factor(Factor), Reverse(Reverse), Align(Align), InsertPos(nullptr) {} - - InterleaveGroup(InstTy *Instr, int32_t Stride, uint32_t Align) - : Align(Align), InsertPos(Instr) { - assert(Align && "The alignment should be non-zero"); + InterleaveGroup(uint32_t Factor, bool Reverse, Align Alignment) + : Factor(Factor), Reverse(Reverse), Alignment(Alignment), + InsertPos(nullptr) {} + InterleaveGroup(InstTy *Instr, int32_t Stride, Align Alignment) + : Alignment(Alignment), InsertPos(Instr) { Factor = std::abs(Stride); assert(Factor > 1 && "Invalid interleave factor"); @@ -397,7 +396,7 @@ template class InterleaveGroup { bool isReverse() const { return Reverse; } uint32_t getFactor() const { return Factor; } - uint32_t getAlignment() const { return Align; } + uint32_t getAlignment() const { return Alignment.value(); } uint32_t getNumMembers() const { return Members.size(); } /// Try to insert a new member \p Instr with index \p Index and @@ -405,9 +404,7 @@ template class InterleaveGroup { /// negative if it is the new leader. /// /// \returns false if the instruction doesn't belong to the group. - bool insertMember(InstTy *Instr, int32_t Index, uint32_t NewAlign) { - assert(NewAlign && "The new member's alignment should be non-zero"); - + bool insertMember(InstTy *Instr, int32_t Index, Align NewAlign) { // Make sure the key fits in an int32_t. Optional MaybeKey = checkedAdd(Index, SmallestKey); if (!MaybeKey) @@ -439,7 +436,7 @@ template class InterleaveGroup { } // It's always safe to select the minimum alignment. - Align = std::min(Align, NewAlign); + Alignment = std::min(Alignment, NewAlign); Members[Key] = Instr; return true; } @@ -498,7 +495,7 @@ template class InterleaveGroup { private: uint32_t Factor; // Interleave Factor. bool Reverse; - uint32_t Align; + Align Alignment; DenseMap Members; int32_t SmallestKey = 0; int32_t LargestKey = 0; @@ -615,8 +612,8 @@ class InterleavedAccessInfo { struct StrideDescriptor { StrideDescriptor() = default; StrideDescriptor(int64_t Stride, const SCEV *Scev, uint64_t Size, - unsigned Align) - : Stride(Stride), Scev(Scev), Size(Size), Align(Align) {} + Align Alignment) + : Stride(Stride), Scev(Scev), Size(Size), Alignment(Alignment) {} // The access's stride. It is negative for a reverse access. int64_t Stride = 0; @@ -628,7 +625,7 @@ class InterleavedAccessInfo { uint64_t Size = 0; // The alignment of this access. - unsigned Align = 0; + Align Alignment; }; /// A type for holding instructions and their stride descriptors. @@ -639,11 +636,11 @@ class InterleavedAccessInfo { /// /// \returns the newly created interleave group. InterleaveGroup * - createInterleaveGroup(Instruction *Instr, int Stride, unsigned Align) { + createInterleaveGroup(Instruction *Instr, int Stride, Align Alignment) { assert(!InterleaveGroupMap.count(Instr) && "Already in an interleaved access group"); InterleaveGroupMap[Instr] = - new InterleaveGroup(Instr, Stride, Align); + new InterleaveGroup(Instr, Stride, Alignment); InterleaveGroups.insert(InterleaveGroupMap[Instr]); return InterleaveGroupMap[Instr]; } diff --git a/llvm/include/llvm/BinaryFormat/Minidump.h b/llvm/include/llvm/BinaryFormat/Minidump.h index 93df467cd823f..4ab5332e7341e 100644 --- a/llvm/include/llvm/BinaryFormat/Minidump.h +++ b/llvm/include/llvm/BinaryFormat/Minidump.h @@ -25,6 +25,8 @@ namespace llvm { namespace minidump { +LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); + /// The minidump header is the first part of a minidump file. It identifies the /// file as a minidump file, and gives the location of the stream directory. struct Header { @@ -72,6 +74,12 @@ struct MemoryInfoListHeader { support::ulittle32_t SizeOfHeader; support::ulittle32_t SizeOfEntry; support::ulittle64_t NumberOfEntries; + + MemoryInfoListHeader() = default; + MemoryInfoListHeader(uint32_t SizeOfHeader, uint32_t SizeOfEntry, + uint64_t NumberOfEntries) + : SizeOfHeader(SizeOfHeader), SizeOfEntry(SizeOfEntry), + NumberOfEntries(NumberOfEntries) {} }; static_assert(sizeof(MemoryInfoListHeader) == 16, ""); @@ -84,11 +92,13 @@ enum class MemoryProtection : uint32_t { enum class MemoryState : uint32_t { #define HANDLE_MDMP_MEMSTATE(CODE, NAME, NATIVENAME) NAME = CODE, #include "llvm/BinaryFormat/MinidumpConstants.def" + LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/0xffffffffu), }; enum class MemoryType : uint32_t { #define HANDLE_MDMP_MEMTYPE(CODE, NAME, NATIVENAME) NAME = CODE, #include "llvm/BinaryFormat/MinidumpConstants.def" + LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/0xffffffffu), }; struct MemoryInfo { diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index 7617583fc292d..8a0cce0f6a03d 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -190,6 +190,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { protected: explicit BasicTTIImplBase(const TargetMachine *TM, const DataLayout &DL) : BaseT(DL) {} + virtual ~BasicTTIImplBase() = default; using TargetTransformInfoImplBase::DL; @@ -522,8 +523,13 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { virtual Optional getCacheAssociativity(TargetTransformInfo::CacheLevel Level) const { - return Optional( - getST()->getCacheAssociativity(static_cast(Level))); + Optional TargetResult = + getST()->getCacheAssociativity(static_cast(Level)); + + if (TargetResult) + return TargetResult; + + return BaseT::getCacheAssociativity(Level); } virtual unsigned getCacheLineSize() const { diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GISelKnownBits.h b/llvm/include/llvm/CodeGen/GlobalISel/GISelKnownBits.h index e2a088f921764..dfe5a7f3177df 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/GISelKnownBits.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/GISelKnownBits.h @@ -63,12 +63,13 @@ class GISelKnownBits : public GISelChangeObserver { void computeKnownBitsForFrameIndex(Register R, KnownBits &Known, const APInt &DemandedElts, unsigned Depth = 0); - static unsigned inferAlignmentForFrameIdx(int FrameIdx, int Offset, - const MachineFunction &MF); - static void computeKnownBitsForAlignment(KnownBits &Known, unsigned Align); + static Align inferAlignmentForFrameIdx(int FrameIdx, int Offset, + const MachineFunction &MF); + static void computeKnownBitsForAlignment(KnownBits &Known, + MaybeAlign Alignment); // Try to infer alignment for MI. - static unsigned inferPtrAlignment(const MachineInstr &MI); + static MaybeAlign inferPtrAlignment(const MachineInstr &MI); // Observer API. No-op for non-caching implementation. void erasingInstr(MachineInstr &MI) override{}; diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h index bf60319996a79..f65bf269c88f2 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h @@ -203,6 +203,10 @@ class LegalizerHelper { LegalizeResult fewerElementsVectorUnmergeValues(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy); + LegalizeResult fewerElementsVectorBuildVector(MachineInstr &MI, + unsigned TypeIdx, + LLT NarrowTy); + LegalizeResult reduceLoadStoreWidth(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy); diff --git a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h index db2a0be7931db..8af2853473c2a 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h @@ -119,14 +119,16 @@ struct ValueAndVReg { unsigned VReg; }; /// If \p VReg is defined by a statically evaluable chain of -/// instructions rooted on a G_CONSTANT (\p LookThroughInstrs == true) -/// and that constant fits in int64_t, returns its value as well as -/// the virtual register defined by this G_CONSTANT. -/// When \p LookThroughInstrs == false, this function behaves like +/// instructions rooted on a G_F/CONSTANT (\p LookThroughInstrs == true) +/// and that constant fits in int64_t, returns its value as well as the +/// virtual register defined by this G_F/CONSTANT. +/// When \p LookThroughInstrs == false this function behaves like /// getConstantVRegVal. +/// When \p HandleFConstants == false the function bails on G_FCONSTANTs. Optional getConstantVRegValWithLookThrough(unsigned VReg, const MachineRegisterInfo &MRI, - bool LookThroughInstrs = true); + bool LookThroughInstrs = true, + bool HandleFConstants = true); const ConstantFP* getConstantFPVRegVal(unsigned VReg, const MachineRegisterInfo &MRI); diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h index 9afd85237621f..9c3757828563e 100644 --- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h +++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h @@ -778,6 +778,19 @@ class TargetInstrInfo : public MCInstrInfo { return false; } + /// Return the increase in code size needed to predicate a contiguous run of + /// NumInsts instructions. + virtual unsigned extraSizeToPredicateInstructions(const MachineFunction &MF, + unsigned NumInsts) const { + return 0; + } + + /// Return an estimate for the code size reduction (in bytes) which will be + /// caused by removing the given branch instruction during if-conversion. + virtual unsigned predictBranchSizeForIfCvt(MachineInstr &MI) const { + return getInstSizeInBytes(MI); + } + /// Return true if it's profitable to unpredicate /// one side of a 'diamond', i.e. two sides of if-else predicated on mutually /// exclusive predicates. diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLoc.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLoc.h index f65a20af0cb1c..d06818eca9d3f 100644 --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLoc.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLoc.h @@ -99,7 +99,7 @@ class DWARFDebugLoclists { bool IsLittleEndian; public: - void parse(DataExtractor data, unsigned Version); + void parse(DataExtractor data, uint64_t Offset, uint64_t EndOffset, uint16_t Version); void dump(raw_ostream &OS, uint64_t BaseAddr, const MCRegisterInfo *RegInfo, Optional Offset) const; diff --git a/llvm/include/llvm/DebugInfo/GSYM/FileWriter.h b/llvm/include/llvm/DebugInfo/GSYM/FileWriter.h index e102de5de6181..cd568765a4f2e 100644 --- a/llvm/include/llvm/DebugInfo/GSYM/FileWriter.h +++ b/llvm/include/llvm/DebugInfo/GSYM/FileWriter.h @@ -109,6 +109,10 @@ class FileWriter { /// file position. uint64_t tell(); + llvm::raw_pwrite_stream &get_stream() { + return OS; + } + private: FileWriter(const FileWriter &rhs) = delete; void operator=(const FileWriter &rhs) = delete; diff --git a/llvm/include/llvm/DebugInfo/GSYM/GsymCreator.h b/llvm/include/llvm/DebugInfo/GSYM/GsymCreator.h new file mode 100644 index 0000000000000..12c8187132bab --- /dev/null +++ b/llvm/include/llvm/DebugInfo/GSYM/GsymCreator.h @@ -0,0 +1,229 @@ +//===- GsymCreator.h --------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_GSYM_GSYMCREATOR_H +#define LLVM_DEBUGINFO_GSYM_GSYMCREATOR_H + +#include +#include +#include +#include +#include + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/DebugInfo/GSYM/FileEntry.h" +#include "llvm/DebugInfo/GSYM/FunctionInfo.h" +#include "llvm/DebugInfo/GSYM/Range.h" +#include "llvm/MC/StringTableBuilder.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/Path.h" + +namespace llvm { + +namespace gsym { +class FileWriter; + +/// GsymCreator is used to emit GSYM data to a stand alone file or section +/// within a file. +/// +/// The GsymCreator is designed to be used in 3 stages: +/// - Create FunctionInfo objects and add them +/// - Finalize the GsymCreator object +/// - Save to file or section +/// +/// The first stage involves creating FunctionInfo objects from another source +/// of information like compiler debug info metadata, DWARF or Breakpad files. +/// Any strings in the FunctionInfo or contained information, like InlineInfo +/// or LineTable objects, should get the string table offsets by calling +/// GsymCreator::insertString(...). Any file indexes that are needed should be +/// obtained by calling GsymCreator::insertFile(...). All of the function calls +/// in GsymCreator are thread safe. This allows multiple threads to create and +/// add FunctionInfo objects while parsing debug information. +/// +/// Once all of the FunctionInfo objects have been added, the +/// GsymCreator::finalize(...) must be called prior to saving. This function +/// will sort the FunctionInfo objects, finalize the string table, and do any +/// other passes on the information needed to prepare the information to be +/// saved. +/// +/// Once the object has been finalized, it can be saved to a file or section. +/// +/// ENCODING +/// +/// GSYM files are designed to be memory mapped into a process as shared, read +/// only data, and used as is. +/// +/// The GSYM file format when in a stand alone file consists of: +/// - Header +/// - Address Table +/// - Function Info Offsets +/// - File Table +/// - String Table +/// - Function Info Data +/// +/// HEADER +/// +/// The header is fully described in "llvm/DebugInfo/GSYM/Header.h". +/// +/// ADDRESS TABLE +/// +/// The address table immediately follows the header in the file and consists +/// of Header.NumAddresses address offsets. These offsets are sorted and can be +/// binary searched for efficient lookups. Addresses in the address table are +/// stored as offsets from a 64 bit base address found in Header.BaseAddress. +/// This allows the address table to contain 8, 16, or 32 offsets. This allows +/// the address table to not require full 64 bit addresses for each address. +/// The resulting GSYM size is smaller and causes fewer pages to be touched +/// during address lookups when the address table is smaller. The size of the +/// address offsets in the address table is specified in the header in +/// Header.AddrOffSize. The first offset in the address table is alinged to +/// Header.AddrOffSize alignement to ensure efficient access when loaded into +/// memory. +/// +/// FUNCTION INFO OFFSETS TABLE +/// +/// The function info offsets table immediately follows the address table and +/// consists of Header.NumAddresses 32 bit file offsets: one for each address +/// in the address table. This data is algined to a 4 byte boundary. The +/// offsets in this table are the relative offsets from the start offset of the +/// GSYM header and point to the function info data for each address in the +/// address table. Keeping this data separate from the address table helps to +/// reduce the number of pages that are touched when address lookups occur on a +/// GSYM file. +/// +/// FILE TABLE +/// +/// The file table immediately follows the function info offsets table. The +/// encoding of the FileTable is: +/// +/// struct FileTable { +/// uint32_t Count; +/// FileEntry Files[]; +/// }; +/// +/// The file table starts with a 32 bit count of the number of files that are +/// used in all of the function info, followed by that number of FileEntry +/// structures. The file table is aligned to a 4 byte boundary, Each file in +/// the file table is represented with a FileEntry structure. +/// See "llvm/DebugInfo/GSYM/FileEntry.h" for details. +/// +/// STRING TABLE +/// +/// The string table follows the file table in stand alone GSYM files and +/// contains all strings for everything contained in the GSYM file. Any string +/// data should be added to the string table and any references to strings +/// inside GSYM information must be stored as 32 bit string table offsets into +/// this string table. The string table always starts with an empty string at +/// offset zero and is followed by any strings needed by the GSYM information. +/// The start of the string table is not aligned to any boundary. +/// +/// FUNCTION INFO DATA +/// +/// The function info data is the payload that contains information about the +/// address that is being looked up. It contains all of the encoded +/// FunctionInfo objects. Each encoded FunctionInfo's data is pointed to by an +/// entry in the Function Info Offsets Table. For details on the exact encoding +/// of FunctionInfo objects, see "llvm/DebugInfo/GSYM/FunctionInfo.h". +class GsymCreator { + // Private member variables require Mutex protections + mutable std::recursive_mutex Mutex; + std::vector Funcs; + StringTableBuilder StrTab; + DenseMap FileEntryToIndex; + std::vector Files; + std::vector UUID; + bool Finalized = false; + +public: + + GsymCreator(); + + /// Save a GSYM file to a stand alone file. + /// + /// \param Path The file path to save the GSYM file to. + /// \param ByteOrder The endianness to use when saving the file. + /// \returns An error object that indicates success or failure of the save. + llvm::Error save(StringRef Path, llvm::support::endianness ByteOrder) const; + + /// Encode a GSYM into the file writer stream at the current position. + /// + /// \param O The stream to save the binary data to + /// \returns An error object that indicates success or failure of the save. + llvm::Error encode(FileWriter &O) const; + + /// Insert a string into the GSYM string table. + /// + /// All strings used by GSYM files must be uniqued by adding them to this + /// string pool and using the returned offset for any string values. + /// + /// \param S The string to insert into the string table. + /// \returns The unique 32 bit offset into the string table. + uint32_t insertString(StringRef S); + + /// Insert a file into this GSYM creator. + /// + /// Inserts a file by adding a FileEntry into the "Files" member variable if + /// the file has not already been added. The file path is split into + /// directory and filename which are both added to the string table. This + /// allows paths to be stored efficiently by reusing the directories that are + /// common between multiple files. + /// + /// \param Path The path to the file to insert. + /// \param Style The path style for the "Path" parameter. + /// \returns The unique file index for the inserted file. + uint32_t insertFile(StringRef Path, + sys::path::Style Style = sys::path::Style::native); + + /// Add a function info to this GSYM creator. + /// + /// All information in the FunctionInfo object must use the + /// GsymCreator::insertString(...) function when creating string table + /// offsets for names and other strings. + /// + /// \param FI The function info object to emplace into our functions list. + void addFunctionInfo(FunctionInfo &&FI); + + /// Finalize the data in the GSYM creator prior to saving the data out. + /// + /// Finalize must be called after all FunctionInfo objects have been added + /// and before GsymCreator::save() is called. + /// + /// \param OS Output stream to report duplicate function infos, overlapping + /// function infos, and function infos that were merged or removed. + /// \returns An error object that indicates success or failure of the + /// finalize. + llvm::Error finalize(llvm::raw_ostream &OS); + + /// Set the UUID value. + /// + /// \param UUIDBytes The new UUID bytes. + void setUUID(llvm::ArrayRef UUIDBytes) { + UUID.assign(UUIDBytes.begin(), UUIDBytes.end()); + } + + /// Thread safe iteration over all function infos. + /// + /// \param Callback A callback function that will get called with each + /// FunctionInfo. If the callback returns false, stop iterating. + void forEachFunctionInfo( + std::function const &Callback); + + /// Thread safe const iteration over all function infos. + /// + /// \param Callback A callback function that will get called with each + /// FunctionInfo. If the callback returns false, stop iterating. + void forEachFunctionInfo( + std::function const &Callback) const; + +}; + +} // namespace gsym +} // namespace llvm + +#endif // #ifndef LLVM_DEBUGINFO_GSYM_GSYMCREATOR_H diff --git a/llvm/include/llvm/DebugInfo/GSYM/GsymReader.h b/llvm/include/llvm/DebugInfo/GSYM/GsymReader.h new file mode 100644 index 0000000000000..113bcee9c9a34 --- /dev/null +++ b/llvm/include/llvm/DebugInfo/GSYM/GsymReader.h @@ -0,0 +1,228 @@ +//===- GsymReader.h ---------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_GSYM_GSYMREADER_H +#define LLVM_DEBUGINFO_GSYM_GSYMREADER_H + + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/DebugInfo/GSYM/FileEntry.h" +#include "llvm/DebugInfo/GSYM/FunctionInfo.h" +#include "llvm/DebugInfo/GSYM/Header.h" +#include "llvm/DebugInfo/GSYM/LineEntry.h" +#include "llvm/DebugInfo/GSYM/StringTable.h" +#include "llvm/Support/DataExtractor.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/ErrorOr.h" + +#include +#include +#include +#include +#include + +namespace llvm { +class MemoryBuffer; +class raw_ostream; + +namespace gsym { + +/// GsymReader is used to read GSYM data from a file or buffer. +/// +/// This class is optimized for very quick lookups when the endianness matches +/// the host system. The Header, address table, address info offsets, and file +/// table is designed to be mmap'ed as read only into memory and used without +/// any parsing needed. If the endianness doesn't match, we swap these objects +/// and tables into GsymReader::SwappedData and then point our header and +/// ArrayRefs to this swapped internal data. +/// +/// GsymReader objects must use one of the static functions to create an +/// instance: GsymReader::openFile(...) and GsymReader::copyBuffer(...). + +class GsymReader { + GsymReader(std::unique_ptr Buffer); + llvm::Error parse(); + + std::unique_ptr MemBuffer; + StringRef GsymBytes; + llvm::support::endianness Endian; + const Header *Hdr = nullptr; + ArrayRef AddrOffsets; + ArrayRef AddrInfoOffsets; + ArrayRef Files; + StringTable StrTab; + /// When the GSYM file's endianness doesn't match the host system then + /// we must decode all data structures that need to be swapped into + /// local storage and set point the ArrayRef objects above to these swapped + /// copies. + struct SwappedData { + Header Hdr; + std::vector AddrOffsets; + std::vector AddrInfoOffsets; + std::vector Files; + }; + std::unique_ptr Swap; + +public: + GsymReader(GsymReader &&RHS); + ~GsymReader(); + + /// Construct a GsymReader from a file on disk. + /// + /// \param Path The file path the GSYM file to read. + /// \returns An expected GsymReader that contains the object or an error + /// object that indicates reason for failing to read the GSYM. + static llvm::Expected openFile(StringRef Path); + + /// Construct a GsymReader from a buffer. + /// + /// \param Bytes A set of bytes that will be copied and owned by the + /// returned object on success. + /// \returns An expected GsymReader that contains the object or an error + /// object that indicates reason for failing to read the GSYM. + static llvm::Expected copyBuffer(StringRef Bytes); + + /// Access the GSYM header. + /// \returns A native endian version of the GSYM header. + const Header &getHeader() const; + + /// Get the full function info for an address. + /// + /// \param Addr A virtual address from the orignal object file to lookup. + /// \returns An expected FunctionInfo that contains the function info object + /// or an error object that indicates reason for failing to lookup the + /// address, + llvm::Expected getFunctionInfo(uint64_t Addr) const; + + /// Get a string from the string table. + /// + /// \param Offset The string table offset for the string to retrieve. + /// \returns The string from the strin table. + StringRef getString(uint32_t Offset) const { return StrTab[Offset]; } + +protected: + /// Gets an address from the address table. + /// + /// Addresses are stored as offsets frrom the gsym::Header::BaseAddress. + /// + /// \param Index A index into the address table. + /// \returns A resolved virtual address for adddress in the address table + /// or llvm::None if Index is out of bounds. + Optional getAddress(size_t Index) const; + + /// Get the a file entry for the suppplied file index. + /// + /// Used to convert any file indexes in the FunctionInfo data back into + /// files. This function can be used for iteration, but is more commonly used + /// for random access when doing lookups. + /// + /// \param Index An index into the file table. + /// \returns An optional FileInfo that will be valid if the file index is + /// valid, or llvm::None if the file index is out of bounds, + Optional getFile(uint32_t Index) const { + if (Index < Files.size()) + return Files[Index]; + return llvm::None; + } + + /// Get an appropriate address info offsets array. + /// + /// The address table in the GSYM file is stored as array of 1, 2, 4 or 8 + /// byte offsets from the The gsym::Header::BaseAddress. The table is stored + /// internally as a array of bytes that are in the correct endianness. When + /// we access this table we must get an array that matches those sizes. This + /// templatized helper function is used when accessing address offsets in the + /// AddrOffsets member variable. + /// + /// \returns An ArrayRef of an appropriate address offset size. + template ArrayRef + getAddrOffsets() const { + return ArrayRef(reinterpret_cast(AddrOffsets.data()), + AddrOffsets.size()/sizeof(T)); + } + + /// Get an appropriate address from the address table. + /// + /// The address table in the GSYM file is stored as array of 1, 2, 4 or 8 + /// byte address offsets from the The gsym::Header::BaseAddress. The table is + /// stored internally as a array of bytes that are in the correct endianness. + /// In order to extract an address from the address table we must access the + /// address offset using the correct size and then add it to the BaseAddress + /// in the header. + /// + /// \param Index An index into the AddrOffsets array. + /// \returns An virtual address that matches the original object file for the + /// address as the specified index, or llvm::None if Index is out of bounds. + template Optional + addressForIndex(size_t Index) const { + ArrayRef AIO = getAddrOffsets(); + if (Index < AIO.size()) + return AIO[Index] + Hdr->BaseAddress; + return llvm::None; + } + /// Lookup an address offset in the AddrOffsets table. + /// + /// Given an address offset, look it up using a binary search of the + /// AddrOffsets table. + /// + /// \param AddrOffset An address offset, that has already been computed by + /// subtracting the gsym::Header::BaseAddress. + /// \returns The matching address offset index. This index will be used to + /// extract the FunctionInfo data's offset from the AddrInfoOffsets array. + template + uint64_t getAddressOffsetIndex(const uint64_t AddrOffset) const { + ArrayRef AIO = getAddrOffsets(); + const auto Begin = AIO.begin(); + const auto End = AIO.end(); + auto Iter = std::lower_bound(Begin, End, AddrOffset); + if (Iter == End || AddrOffset < *Iter) + --Iter; + return std::distance(Begin, Iter); + } + + /// Create a GSYM from a memory buffer. + /// + /// Called by both openFile() and copyBuffer(), this function does all of the + /// work of parsing the GSYM file and returning an error. + /// + /// \param MemBuffer A memory buffer that will transfer ownership into the + /// GsymReader. + /// \returns An expected GsymReader that contains the object or an error + /// object that indicates reason for failing to read the GSYM. + static llvm::Expected + create(std::unique_ptr &MemBuffer); + + + /// Given an address, find the address index. + /// + /// Binary search the address table and find the matching address index. + /// + /// \param Addr A virtual address that matches the original object file + /// to lookup. + /// \returns An index into the address table. This index can be used to + /// extract the FunctionInfo data's offset from the AddrInfoOffsets array. + /// Returns an error if the address isn't in the GSYM with details of why. + Expected getAddressIndex(const uint64_t Addr) const; + + /// Given an address index, get the offset for the FunctionInfo. + /// + /// Looking up an address is done by finding the corresponding address + /// index for the address. This index is then used to get the offset of the + /// FunctionInfo data that we will decode using this function. + /// + /// \param Index An index into the address table. + /// \returns An optional GSYM data offset for the offset of the FunctionInfo + /// that needs to be decoded. + Optional getAddressInfoOffset(size_t Index) const; +}; + +} // namespace gsym +} // namespace llvm + +#endif // #ifndef LLVM_DEBUGINFO_GSYM_GSYMREADER_H diff --git a/llvm/include/llvm/DebugInfo/GSYM/Header.h b/llvm/include/llvm/DebugInfo/GSYM/Header.h index b81f68c10782e..6652c59c97a67 100644 --- a/llvm/include/llvm/DebugInfo/GSYM/Header.h +++ b/llvm/include/llvm/DebugInfo/GSYM/Header.h @@ -22,6 +22,7 @@ namespace gsym { class FileWriter; constexpr uint32_t GSYM_MAGIC = 0x4753594d; // 'GSYM' +constexpr uint32_t GSYM_CIGAM = 0x4d595347; // 'MYSG' constexpr uint32_t GSYM_VERSION = 1; constexpr size_t GSYM_MAX_UUID_SIZE = 20; @@ -84,16 +85,20 @@ struct Header { /// be set to zero. uint8_t UUID[GSYM_MAX_UUID_SIZE]; - /// Check if a header is valid. + /// Check if a header is valid and return an error if anything is wrong. /// - /// \returns True if the header is valid and if the version is supported. - bool isValid() const { - if (Magic != GSYM_MAGIC) - return false; - if (Version != GSYM_VERSION) - return false; - return true; - } + /// This function can be used prior to encoding a header to ensure it is + /// valid, or after decoding a header to ensure it is valid and supported. + /// + /// Check a correctly byte swapped header for errors: + /// - check magic value + /// - check that version number is supported + /// - check that the address offset size is supported + /// - check that the UUID size is valid + /// + /// \returns An error if anything is wrong in the header, or Error::success() + /// if there are no errors. + llvm::Error checkForError() const; /// Decode an object from a binary data stream. /// diff --git a/llvm/include/llvm/DebugInfo/PDB/GenericError.h b/llvm/include/llvm/DebugInfo/PDB/GenericError.h index ec85d92d2a927..af93be931b8e0 100644 --- a/llvm/include/llvm/DebugInfo/PDB/GenericError.h +++ b/llvm/include/llvm/DebugInfo/PDB/GenericError.h @@ -20,7 +20,7 @@ enum class pdb_error_code { dia_sdk_not_present, dia_failed_loading, signature_out_of_date, - external_cmdline_ref, + no_matching_pch, unspecified, }; } // namespace pdb diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/MachO_arm64.h b/llvm/include/llvm/ExecutionEngine/JITLink/MachO_arm64.h new file mode 100644 index 0000000000000..d70b545fff861 --- /dev/null +++ b/llvm/include/llvm/ExecutionEngine/JITLink/MachO_arm64.h @@ -0,0 +1,60 @@ +//===---- MachO_arm64.h - JIT link functions for MachO/arm64 ----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// jit-link functions for MachO/arm64. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_EXECUTIONENGINE_JITLINK_MACHO_ARM64_H +#define LLVM_EXECUTIONENGINE_JITLINK_MACHO_ARM64_H + +#include "llvm/ExecutionEngine/JITLink/JITLink.h" + +namespace llvm { +namespace jitlink { + +namespace MachO_arm64_Edges { + +enum MachOARM64RelocationKind : Edge::Kind { + Branch26 = Edge::FirstRelocation, + Pointer32, + Pointer64, + Pointer64Anon, + Page21, + PageOffset12, + GOTPage21, + GOTPageOffset12, + PointerToGOT, + PairedAddend, + LDRLiteral19, + Delta32, + Delta64, + NegDelta32, + NegDelta64, +}; + +} // namespace MachO_arm64_Edges + +/// jit-link the given object buffer, which must be a MachO arm64 object file. +/// +/// If PrePrunePasses is empty then a default mark-live pass will be inserted +/// that will mark all exported atoms live. If PrePrunePasses is not empty, the +/// caller is responsible for including a pass to mark atoms as live. +/// +/// If PostPrunePasses is empty then a default GOT-and-stubs insertion pass will +/// be inserted. If PostPrunePasses is not empty then the caller is responsible +/// for including a pass to insert GOT and stub edges. +void jitLink_MachO_arm64(std::unique_ptr Ctx); + +/// Return the string name of the given MachO arm64 edge kind. +StringRef getMachOARM64RelocationKindName(Edge::Kind R); + +} // end namespace jitlink +} // end namespace llvm + +#endif // LLVM_EXECUTIONENGINE_JITLINK_MACHO_ARM64_H diff --git a/llvm/include/llvm/IR/FixedMetadataKinds.def b/llvm/include/llvm/IR/FixedMetadataKinds.def index 1ad17093a7166..0e1ffef58672d 100644 --- a/llvm/include/llvm/IR/FixedMetadataKinds.def +++ b/llvm/include/llvm/IR/FixedMetadataKinds.def @@ -40,3 +40,4 @@ LLVM_FIXED_MD_KIND(MD_access_group, "llvm.access.group", 25) LLVM_FIXED_MD_KIND(MD_callback, "callback", 26) LLVM_FIXED_MD_KIND(MD_preserve_access_index, "llvm.preserve.access.index", 27) LLVM_FIXED_MD_KIND(MD_misexpect, "misexpect", 28) +LLVM_FIXED_MD_KIND(MD_vcall_visibility, "vcall_visibility", 29) diff --git a/llvm/include/llvm/IR/GlobalObject.h b/llvm/include/llvm/IR/GlobalObject.h index a47faac5d4158..6ea2c5d41e71c 100644 --- a/llvm/include/llvm/IR/GlobalObject.h +++ b/llvm/include/llvm/IR/GlobalObject.h @@ -28,6 +28,20 @@ class MDNode; class Metadata; class GlobalObject : public GlobalValue { +public: + // VCallVisibility - values for visibility metadata attached to vtables. This + // describes the scope in which a virtual call could end up being dispatched + // through this vtable. + enum VCallVisibility { + // Type is potentially visible to external code. + VCallVisibilityPublic = 0, + // Type is only visible to code which will be in the current Module after + // LTO internalization. + VCallVisibilityLinkageUnit = 1, + // Type is only visible to code in the current Module. + VCallVisibilityTranslationUnit = 2, + }; + protected: GlobalObject(Type *Ty, ValueTy VTy, Use *Ops, unsigned NumOps, LinkageTypes Linkage, const Twine &Name, @@ -163,6 +177,8 @@ class GlobalObject : public GlobalValue { void copyMetadata(const GlobalObject *Src, unsigned Offset); void addTypeMetadata(unsigned Offset, Metadata *TypeID); + void addVCallVisibilityMetadata(VCallVisibility Visibility); + VCallVisibility getVCallVisibility() const; protected: void copyAttributesFrom(const GlobalObject *Src); diff --git a/llvm/include/llvm/IR/Metadata.h b/llvm/include/llvm/IR/Metadata.h index 581d7d78a97f6..f62b1e246cca0 100644 --- a/llvm/include/llvm/IR/Metadata.h +++ b/llvm/include/llvm/IR/Metadata.h @@ -601,7 +601,7 @@ dyn_extract_or_null(Y &&MD) { /// These are used to efficiently contain a byte sequence for metadata. /// MDString is always unnamed. class MDString : public Metadata { - friend class StringMapEntry; + friend class StringMapEntryStorage; StringMapEntry *Entry = nullptr; diff --git a/llvm/include/llvm/IR/PatternMatch.h b/llvm/include/llvm/IR/PatternMatch.h index 73b182c87b800..2851b24c05ae9 100644 --- a/llvm/include/llvm/IR/PatternMatch.h +++ b/llvm/include/llvm/IR/PatternMatch.h @@ -643,11 +643,11 @@ struct bind_const_intval_ty { }; /// Match a specified integer value or vector of all elements of that -// value. +/// value. struct specific_intval { - uint64_t Val; + APInt Val; - specific_intval(uint64_t V) : Val(V) {} + specific_intval(APInt V) : Val(std::move(V)) {} template bool match(ITy *V) { const auto *CI = dyn_cast(V); @@ -655,13 +655,19 @@ struct specific_intval { if (const auto *C = dyn_cast(V)) CI = dyn_cast_or_null(C->getSplatValue()); - return CI && CI->getValue() == Val; + return CI && APInt::isSameValue(CI->getValue(), Val); } }; /// Match a specific integer value or vector with all elements equal to /// the value. -inline specific_intval m_SpecificInt(uint64_t V) { return specific_intval(V); } +inline specific_intval m_SpecificInt(APInt V) { + return specific_intval(std::move(V)); +} + +inline specific_intval m_SpecificInt(uint64_t V) { + return m_SpecificInt(APInt(64, V)); +} /// Match a ConstantInt and bind to its value. This does not match /// ConstantInts wider than 64-bits. diff --git a/llvm/include/llvm/LTO/legacy/LTOCodeGenerator.h b/llvm/include/llvm/LTO/legacy/LTOCodeGenerator.h index d3cb4c8b79a00..8718df4b88e68 100644 --- a/llvm/include/llvm/LTO/legacy/LTOCodeGenerator.h +++ b/llvm/include/llvm/LTO/legacy/LTOCodeGenerator.h @@ -113,7 +113,7 @@ struct LTOCodeGenerator { ShouldRestoreGlobalsLinkage = Value; } - void addMustPreserveSymbol(StringRef Sym) { MustPreserveSymbols[Sym] = 1; } + void addMustPreserveSymbol(StringRef Sym) { MustPreserveSymbols.insert(Sym); } /// Pass options to the driver and optimization passes. /// diff --git a/llvm/include/llvm/ObjectYAML/MinidumpYAML.h b/llvm/include/llvm/ObjectYAML/MinidumpYAML.h index b4163e483e856..54be8886c6495 100644 --- a/llvm/include/llvm/ObjectYAML/MinidumpYAML.h +++ b/llvm/include/llvm/ObjectYAML/MinidumpYAML.h @@ -26,6 +26,7 @@ namespace MinidumpYAML { /// from Types to Kinds is fixed and given by the static getKind function. struct Stream { enum class StreamKind { + MemoryInfoList, MemoryList, ModuleList, RawContent, @@ -102,6 +103,26 @@ using ModuleListStream = detail::ListStream; using ThreadListStream = detail::ListStream; using MemoryListStream = detail::ListStream; +/// A structure containing the list of MemoryInfo entries comprising a +/// MemoryInfoList stream. +struct MemoryInfoListStream : public Stream { + std::vector Infos; + + MemoryInfoListStream() + : Stream(StreamKind::MemoryInfoList, + minidump::StreamType::MemoryInfoList) {} + + explicit MemoryInfoListStream( + iterator_range Range) + : Stream(StreamKind::MemoryInfoList, + minidump::StreamType::MemoryInfoList), + Infos(Range.begin(), Range.end()) {} + + static bool classof(const Stream *S) { + return S->Kind == StreamKind::MemoryInfoList; + } +}; + /// A minidump stream represented as a sequence of hex bytes. This is used as a /// fallback when no other stream kind is suitable. struct RawContentStream : public Stream { @@ -122,16 +143,16 @@ struct SystemInfoStream : public Stream { minidump::SystemInfo Info; std::string CSDVersion; - explicit SystemInfoStream(const minidump::SystemInfo &Info, - std::string CSDVersion) - : Stream(StreamKind::SystemInfo, minidump::StreamType::SystemInfo), - Info(Info), CSDVersion(std::move(CSDVersion)) {} - SystemInfoStream() : Stream(StreamKind::SystemInfo, minidump::StreamType::SystemInfo) { memset(&Info, 0, sizeof(Info)); } + explicit SystemInfoStream(const minidump::SystemInfo &Info, + std::string CSDVersion) + : Stream(StreamKind::SystemInfo, minidump::StreamType::SystemInfo), + Info(Info), CSDVersion(std::move(CSDVersion)) {} + static bool classof(const Stream *S) { return S->Kind == StreamKind::SystemInfo; } @@ -207,6 +228,10 @@ template <> struct MappingContextTraits { } // namespace llvm +LLVM_YAML_DECLARE_BITSET_TRAITS(llvm::minidump::MemoryProtection) +LLVM_YAML_DECLARE_BITSET_TRAITS(llvm::minidump::MemoryState) +LLVM_YAML_DECLARE_BITSET_TRAITS(llvm::minidump::MemoryType) + LLVM_YAML_DECLARE_ENUM_TRAITS(llvm::minidump::ProcessorArchitecture) LLVM_YAML_DECLARE_ENUM_TRAITS(llvm::minidump::OSPlatform) LLVM_YAML_DECLARE_ENUM_TRAITS(llvm::minidump::StreamType) @@ -214,6 +239,7 @@ LLVM_YAML_DECLARE_ENUM_TRAITS(llvm::minidump::StreamType) LLVM_YAML_DECLARE_MAPPING_TRAITS(llvm::minidump::CPUInfo::ArmInfo) LLVM_YAML_DECLARE_MAPPING_TRAITS(llvm::minidump::CPUInfo::OtherInfo) LLVM_YAML_DECLARE_MAPPING_TRAITS(llvm::minidump::CPUInfo::X86Info) +LLVM_YAML_DECLARE_MAPPING_TRAITS(llvm::minidump::MemoryInfo) LLVM_YAML_DECLARE_MAPPING_TRAITS(llvm::minidump::VSFixedFileInfo) LLVM_YAML_DECLARE_MAPPING_TRAITS( @@ -227,6 +253,7 @@ LLVM_YAML_IS_SEQUENCE_VECTOR(std::unique_ptr) LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::MinidumpYAML::MemoryListStream::entry_type) LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::MinidumpYAML::ModuleListStream::entry_type) LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::MinidumpYAML::ThreadListStream::entry_type) +LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::minidump::MemoryInfo) LLVM_YAML_DECLARE_MAPPING_TRAITS(llvm::MinidumpYAML::Object) diff --git a/llvm/include/llvm/ProfileData/SampleProf.h b/llvm/include/llvm/ProfileData/SampleProf.h index 936ebcecfe96d..55418d9d0f9cb 100644 --- a/llvm/include/llvm/ProfileData/SampleProf.h +++ b/llvm/include/llvm/ProfileData/SampleProf.h @@ -120,6 +120,7 @@ enum SecType { SecProfSummary = 1, SecNameTable = 2, SecProfileSymbolList = 3, + SecFuncOffsetTable = 4, // marker for the first type of profile. SecFuncProfileFirst = 32, SecLBRProfile = SecFuncProfileFirst @@ -135,6 +136,8 @@ static inline std::string getSecName(SecType Type) { return "NameTableSection"; case SecProfileSymbolList: return "ProfileSymbolListSection"; + case SecFuncOffsetTable: + return "FuncOffsetTableSection"; case SecLBRProfile: return "LBRProfileSection"; } diff --git a/llvm/include/llvm/ProfileData/SampleProfReader.h b/llvm/include/llvm/ProfileData/SampleProfReader.h index 424818bbb26df..6f6482747fbc1 100644 --- a/llvm/include/llvm/ProfileData/SampleProfReader.h +++ b/llvm/include/llvm/ProfileData/SampleProfReader.h @@ -279,7 +279,7 @@ class SampleProfileReader { /// Print the profile for \p FName on stream \p OS. void dumpFunctionProfile(StringRef FName, raw_ostream &OS = dbgs()); - virtual void collectFuncsToUse(const Module &M) {} + virtual void collectFuncsFrom(const Module &M) {} /// Print all the profiles on stream \p OS. void dump(raw_ostream &OS = dbgs()); @@ -424,7 +424,7 @@ class SampleProfileReaderBinary : public SampleProfileReader { bool at_eof() const { return Data >= End; } /// Read the next function profile instance. - std::error_code readFuncProfile(); + std::error_code readFuncProfile(const uint8_t *Start); /// Read the contents of the given profile instance. std::error_code readProfile(FunctionSamples &FProfile); @@ -526,7 +526,17 @@ class SampleProfileReaderExtBinary : public SampleProfileReaderExtBinaryBase { virtual std::error_code verifySPMagic(uint64_t Magic) override; virtual std::error_code readOneSection(const uint8_t *Start, uint64_t Size, SecType Type) override; - std::error_code readProfileSymbolList(uint64_t Size); + std::error_code readProfileSymbolList(); + std::error_code readFuncOffsetTable(); + std::error_code readFuncProfiles(); + + /// The table mapping from function name to the offset of its FunctionSample + /// towards file start. + DenseMap FuncOffsetTable; + /// The set containing the functions to use when compiling a module. + DenseSet FuncsToUse; + /// Use all functions from the input profile. + bool UseAllFuncs = true; public: SampleProfileReaderExtBinary(std::unique_ptr B, LLVMContext &C, @@ -539,6 +549,9 @@ class SampleProfileReaderExtBinary : public SampleProfileReaderExtBinaryBase { virtual std::unique_ptr getProfileSymbolList() override { return std::move(ProfSymList); }; + + /// Collect functions with definitions in Module \p M. + void collectFuncsFrom(const Module &M) override; }; class SampleProfileReaderCompactBinary : public SampleProfileReaderBinary { @@ -571,7 +584,7 @@ class SampleProfileReaderCompactBinary : public SampleProfileReaderBinary { std::error_code read() override; /// Collect functions to be used when compiling Module \p M. - void collectFuncsToUse(const Module &M) override; + void collectFuncsFrom(const Module &M) override; }; using InlineCallStack = SmallVector; diff --git a/llvm/include/llvm/ProfileData/SampleProfWriter.h b/llvm/include/llvm/ProfileData/SampleProfWriter.h index ce60baf66c65c..cc951594c9e24 100644 --- a/llvm/include/llvm/ProfileData/SampleProfWriter.h +++ b/llvm/include/llvm/ProfileData/SampleProfWriter.h @@ -153,14 +153,15 @@ class SampleProfileWriterExtBinaryBase : public SampleProfileWriterBinary { protected: uint64_t markSectionStart(SecType Type); std::error_code addNewSection(SecType Sec, uint64_t SectionStart); - virtual void initSectionLayout() = 0; + virtual void initSectionHdrLayout() = 0; virtual std::error_code writeSections(const StringMap &ProfileMap) = 0; - // Specifiy the section layout in the profile. Note that the order in - // SecHdrTable (order to collect sections) may be different from the - // order in SectionLayout (order to write out sections into profile). - SmallVector SectionLayout; + // Specifiy the order of sections in section header table. Note + // the order of sections in the profile may be different that the + // order in SectionHdrLayout. sample Reader will follow the order + // in SectionHdrLayout to read each section. + SmallVector SectionHdrLayout; private: void allocSecHdrTable(); @@ -193,23 +194,44 @@ class SampleProfileWriterExtBinary : public SampleProfileWriterExtBinaryBase { public: SampleProfileWriterExtBinary(std::unique_ptr &OS) : SampleProfileWriterExtBinaryBase(OS) { - initSectionLayout(); + initSectionHdrLayout(); } + virtual std::error_code writeSample(const FunctionSamples &S) override; virtual void setProfileSymbolList(ProfileSymbolList *PSL) override { ProfSymList = PSL; }; private: - virtual void initSectionLayout() override { - SectionLayout = {{SecProfSummary, 0, 0, 0}, - {SecNameTable, 0, 0, 0}, - {SecLBRProfile, 0, 0, 0}, - {SecProfileSymbolList, 0, 0, 0}}; + virtual void initSectionHdrLayout() override { + // Note that SecFuncOffsetTable section is written after SecLBRProfile + // in the profile, but is put before SecLBRProfile in SectionHdrLayout. + // + // This is because sample reader follows the order of SectionHdrLayout to + // read each section, to read function profiles on demand sample reader + // need to get the offset of each function profile first. + // + // SecFuncOffsetTable section is written after SecLBRProfile in the + // profile because FuncOffsetTable needs to be populated while section + // SecLBRProfile is written. + SectionHdrLayout = {{SecProfSummary, 0, 0, 0}, + {SecNameTable, 0, 0, 0}, + {SecFuncOffsetTable, 0, 0, 0}, + {SecLBRProfile, 0, 0, 0}, + {SecProfileSymbolList, 0, 0, 0}}; }; virtual std::error_code writeSections(const StringMap &ProfileMap) override; ProfileSymbolList *ProfSymList = nullptr; + + // Save the start of SecLBRProfile so we can compute the offset to the + // start of SecLBRProfile for each Function's Profile and will keep it + // in FuncOffsetTable. + uint64_t SecLBRProfileStart; + // FuncOffsetTable maps function name to its profile offset in SecLBRProfile + // section. It is used to load function profile on demand. + MapVector FuncOffsetTable; + std::error_code writeFuncOffsetTable(); }; // CompactBinary is a compact format of binary profile which both reduces diff --git a/llvm/include/llvm/Support/FileCheck.h b/llvm/include/llvm/Support/FileCheck.h index 5c6585ed76f75..2547449246a81 100644 --- a/llvm/include/llvm/Support/FileCheck.h +++ b/llvm/include/llvm/Support/FileCheck.h @@ -30,6 +30,7 @@ struct FileCheckRequest { std::vector GlobalDefines; bool AllowEmptyInput = false; bool MatchFullLines = false; + bool IgnoreCase = false; bool EnableVarScope = false; bool AllowDeprecatedDagOverlap = false; bool Verbose = false; diff --git a/llvm/include/llvm/Support/Parallel.h b/llvm/include/llvm/Support/Parallel.h index eab9b492c4a5c..3c0ed2c111275 100644 --- a/llvm/include/llvm/Support/Parallel.h +++ b/llvm/include/llvm/Support/Parallel.h @@ -18,14 +18,6 @@ #include #include -#if defined(_MSC_VER) && LLVM_ENABLE_THREADS -#pragma warning(push) -#pragma warning(disable : 4530) -#include -#include -#pragma warning(pop) -#endif - namespace llvm { namespace parallel { @@ -84,23 +76,6 @@ class TaskGroup { void sync() const { L.sync(); } }; -#if defined(_MSC_VER) -template -void parallel_sort(RandomAccessIterator Start, RandomAccessIterator End, - const Comparator &Comp) { - concurrency::parallel_sort(Start, End, Comp); -} -template -void parallel_for_each(IterTy Begin, IterTy End, FuncTy Fn) { - concurrency::parallel_for_each(Begin, End, Fn); -} - -template -void parallel_for_each_n(IndexTy Begin, IndexTy End, FuncTy Fn) { - concurrency::parallel_for(Begin, End, Fn); -} - -#else const ptrdiff_t MinParallelSize = 1024; /// Inclusive median. @@ -188,8 +163,6 @@ void parallel_for_each_n(IndexTy Begin, IndexTy End, FuncTy Fn) { #endif -#endif - template using DefComparator = std::less::value_type>; diff --git a/llvm/include/llvm/Support/Win64EH.h b/llvm/include/llvm/Support/Win64EH.h index bdd23b41594ee..8220131e5be92 100644 --- a/llvm/include/llvm/Support/Win64EH.h +++ b/llvm/include/llvm/Support/Win64EH.h @@ -30,7 +30,9 @@ enum UnwindOpcodes { UOP_SetFPReg, UOP_SaveNonVol, UOP_SaveNonVolBig, - UOP_SaveXMM128 = 8, + UOP_Epilog, + UOP_SpareCode, + UOP_SaveXMM128, UOP_SaveXMM128Big, UOP_PushMachFrame, // The following set of unwind opcodes is for ARM64. They are documented at diff --git a/llvm/include/llvm/TextAPI/MachO/InterfaceFile.h b/llvm/include/llvm/TextAPI/MachO/InterfaceFile.h index 8bb6b5ff06665..bd434e04b693a 100644 --- a/llvm/include/llvm/TextAPI/MachO/InterfaceFile.h +++ b/llvm/include/llvm/TextAPI/MachO/InterfaceFile.h @@ -67,6 +67,9 @@ enum FileType : unsigned { /// Text-based stub file (.tbd) version 3.0 TBD_V3 = 1U << 2, + /// Text-based stub file (.tbd) version 4.0 + TBD_V4 = 1U << 3, + All = ~0U, LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/All), diff --git a/llvm/include/llvm/TextAPI/MachO/Symbol.h b/llvm/include/llvm/TextAPI/MachO/Symbol.h index b6444fbd78ff7..1b1632c599c4a 100644 --- a/llvm/include/llvm/TextAPI/MachO/Symbol.h +++ b/llvm/include/llvm/TextAPI/MachO/Symbol.h @@ -38,7 +38,10 @@ enum class SymbolFlags : uint8_t { /// Undefined Undefined = 1U << 3, - LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/Undefined), + /// Rexported + Rexported = 1U << 4, + + LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/Rexported), }; // clang-format on @@ -50,7 +53,7 @@ enum class SymbolKind : uint8_t { ObjectiveCInstanceVariable, }; -using TargetList = SmallVector; +using TargetList = SmallVector; class Symbol { public: Symbol(SymbolKind Kind, StringRef Name, TargetList Targets, SymbolFlags Flags) @@ -81,6 +84,10 @@ class Symbol { return (Flags & SymbolFlags::Undefined) == SymbolFlags::Undefined; } + bool isReexported() const { + return (Flags & SymbolFlags::Rexported) == SymbolFlags::Rexported; + } + using const_target_iterator = TargetList::const_iterator; using const_target_range = llvm::iterator_range; const_target_range targets() const { return {Targets}; } diff --git a/llvm/include/llvm/TextAPI/MachO/Target.h b/llvm/include/llvm/TextAPI/MachO/Target.h index 74e900d812f52..5fe44cb7d366f 100644 --- a/llvm/include/llvm/TextAPI/MachO/Target.h +++ b/llvm/include/llvm/TextAPI/MachO/Target.h @@ -29,6 +29,8 @@ class Target { explicit Target(const llvm::Triple &Triple) : Arch(mapToArchitecture(Triple)), Platform(mapToPlatformKind(Triple)) {} + static llvm::Expected create(StringRef Target); + operator std::string() const; Architecture Arch; diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h index 04ab8f74755e9..ba9ec25dec17b 100644 --- a/llvm/include/llvm/Transforms/IPO/Attributor.h +++ b/llvm/include/llvm/Transforms/IPO/Attributor.h @@ -899,13 +899,12 @@ struct Attributor { const DataLayout &getDataLayout() const { return InfoCache.DL; } private: - /// The private version of getAAFor that allows to omit a querying abstract /// attribute. See also the public getAAFor method. template const AAType &getOrCreateAAFor(const IRPosition &IRP, - const AbstractAttribute *QueryingAA = nullptr, - bool TrackDependence = false) { + const AbstractAttribute *QueryingAA = nullptr, + bool TrackDependence = false) { if (const AAType *AAPtr = lookupAAFor(IRP, QueryingAA, TrackDependence)) return *AAPtr; @@ -1417,7 +1416,8 @@ struct AAReturnedValues const function_ref &)> &Pred) const = 0; - using iterator = MapVector>::iterator; + using iterator = + MapVector>::iterator; using const_iterator = MapVector>::const_iterator; virtual llvm::iterator_range returned_values() = 0; diff --git a/llvm/include/llvm/Transforms/IPO/GlobalDCE.h b/llvm/include/llvm/Transforms/IPO/GlobalDCE.h index c434484d1ae35..0a6851849e7e8 100644 --- a/llvm/include/llvm/Transforms/IPO/GlobalDCE.h +++ b/llvm/include/llvm/Transforms/IPO/GlobalDCE.h @@ -43,11 +43,25 @@ class GlobalDCEPass : public PassInfoMixin { /// Comdat -> Globals in that Comdat section. std::unordered_multimap ComdatMembers; + /// !type metadata -> set of (vtable, offset) pairs + DenseMap, 4>> + TypeIdMap; + + // Global variables which are vtables, and which we have enough information + // about to safely do dead virtual function elimination. + SmallPtrSet VFESafeVTables; + void UpdateGVDependencies(GlobalValue &GV); void MarkLive(GlobalValue &GV, SmallVectorImpl *Updates = nullptr); bool RemoveUnusedGlobalValue(GlobalValue &GV); + // Dead virtual function elimination. + void AddVirtualFunctionDependencies(Module &M); + void ScanVTables(Module &M); + void ScanTypeCheckedLoadIntrinsics(Module &M); + void ScanVTableLoad(Function *Caller, Metadata *TypeId, uint64_t CallOffset); + void ComputeDependencies(Value *V, SmallPtrSetImpl &U); }; diff --git a/llvm/include/llvm/Transforms/Instrumentation/MemorySanitizer.h b/llvm/include/llvm/Transforms/Instrumentation/MemorySanitizer.h index 0739d9e58a61f..01a86ee3f1fdb 100644 --- a/llvm/include/llvm/Transforms/Instrumentation/MemorySanitizer.h +++ b/llvm/include/llvm/Transforms/Instrumentation/MemorySanitizer.h @@ -19,12 +19,11 @@ namespace llvm { struct MemorySanitizerOptions { - MemorySanitizerOptions() = default; - MemorySanitizerOptions(int TrackOrigins, bool Recover, bool Kernel) - : TrackOrigins(TrackOrigins), Recover(Recover), Kernel(Kernel) {} - int TrackOrigins = 0; - bool Recover = false; - bool Kernel = false; + MemorySanitizerOptions() : MemorySanitizerOptions(0, false, false){}; + MemorySanitizerOptions(int TrackOrigins, bool Recover, bool Kernel); + bool Kernel; + int TrackOrigins; + bool Recover; }; // Insert MemorySanitizer instrumentation (detection of uninitialized reads) @@ -41,6 +40,7 @@ struct MemorySanitizerPass : public PassInfoMixin { MemorySanitizerPass(MemorySanitizerOptions Options) : Options(Options) {} PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM); + PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); private: MemorySanitizerOptions Options; diff --git a/llvm/include/llvm/Transforms/Instrumentation/ThreadSanitizer.h b/llvm/include/llvm/Transforms/Instrumentation/ThreadSanitizer.h index b4e7d9924ff61..ce0e46745abb1 100644 --- a/llvm/include/llvm/Transforms/Instrumentation/ThreadSanitizer.h +++ b/llvm/include/llvm/Transforms/Instrumentation/ThreadSanitizer.h @@ -27,6 +27,8 @@ FunctionPass *createThreadSanitizerLegacyPassPass(); /// yet, the pass inserts the declarations. Otherwise the existing globals are struct ThreadSanitizerPass : public PassInfoMixin { PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM); + PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); }; + } // namespace llvm #endif /* LLVM_TRANSFORMS_INSTRUMENTATION_THREADSANITIZER_H */ diff --git a/llvm/lib/Analysis/MemorySSAUpdater.cpp b/llvm/lib/Analysis/MemorySSAUpdater.cpp index 78e197aad5545..f2d56b05d968e 100644 --- a/llvm/lib/Analysis/MemorySSAUpdater.cpp +++ b/llvm/lib/Analysis/MemorySSAUpdater.cpp @@ -44,11 +44,15 @@ MemoryAccess *MemorySSAUpdater::getPreviousDefRecursive( // First, do a cache lookup. Without this cache, certain CFG structures // (like a series of if statements) take exponential time to visit. auto Cached = CachedPreviousDef.find(BB); - if (Cached != CachedPreviousDef.end()) { + if (Cached != CachedPreviousDef.end()) return Cached->second; - } - if (BasicBlock *Pred = BB->getSinglePredecessor()) { + // If this method is called from an unreachable block, return LoE. + if (!MSSA->DT->isReachableFromEntry(BB)) + return MSSA->getLiveOnEntryDef(); + + if (BasicBlock *Pred = BB->getUniquePredecessor()) { + VisitedBlocks.insert(BB); // Single predecessor case, just recurse, we can only have one definition. MemoryAccess *Result = getPreviousDefFromEnd(Pred, CachedPreviousDef); CachedPreviousDef.insert({BB, Result}); @@ -92,9 +96,15 @@ MemoryAccess *MemorySSAUpdater::getPreviousDefRecursive( // See if we can avoid the phi by simplifying it. auto *Result = tryRemoveTrivialPhi(Phi, PhiOps); // If we couldn't simplify, we may have to create a phi - if (Result == Phi && UniqueIncomingAccess && SingleAccess) + if (Result == Phi && UniqueIncomingAccess && SingleAccess) { + // A concrete Phi only exists if we created an empty one to break a cycle. + if (Phi) { + assert(Phi->operands().empty() && "Expected empty Phi"); + Phi->replaceAllUsesWith(SingleAccess); + removeMemoryAccess(Phi); + } Result = SingleAccess; - else if (Result == Phi && !(UniqueIncomingAccess && SingleAccess)) { + } else if (Result == Phi && !(UniqueIncomingAccess && SingleAccess)) { if (!Phi) Phi = MSSA->createMemoryPhi(BB); @@ -233,6 +243,7 @@ MemoryAccess *MemorySSAUpdater::tryRemoveTrivialPhi(MemoryPhi *Phi, void MemorySSAUpdater::insertUse(MemoryUse *MU, bool RenameUses) { InsertedPHIs.clear(); MU->setDefiningAccess(getPreviousDef(MU)); + // In cases without unreachable blocks, because uses do not create new // may-defs, there are only two cases: // 1. There was a def already below us, and therefore, we should not have diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index c29fc5dbccfb8..8d4c7c5a55f16 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -158,6 +158,9 @@ MaxBruteForceIterations("scalar-evolution-max-iterations", cl::ReallyHidden, static cl::opt VerifySCEV( "verify-scev", cl::Hidden, cl::desc("Verify ScalarEvolution's backedge taken counts (slow)")); +static cl::opt VerifySCEVStrict( + "verify-scev-strict", cl::Hidden, + cl::desc("Enable stricter verification with -verify-scev is passed")); static cl::opt VerifySCEVMap("verify-scev-maps", cl::Hidden, cl::desc("Verify no dangling value in ScalarEvolution's " @@ -11922,14 +11925,14 @@ void ScalarEvolution::verify() const { SE.getTypeSizeInBits(NewBECount->getType())) CurBECount = SE2.getZeroExtendExpr(CurBECount, NewBECount->getType()); - auto *ConstantDelta = - dyn_cast(SE2.getMinusSCEV(CurBECount, NewBECount)); + const SCEV *Delta = SE2.getMinusSCEV(CurBECount, NewBECount); - if (ConstantDelta && ConstantDelta->getAPInt() != 0) { - dbgs() << "Trip Count Changed!\n"; + // Unless VerifySCEVStrict is set, we only compare constant deltas. + if ((VerifySCEVStrict || isa(Delta)) && !Delta->isZero()) { + dbgs() << "Trip Count for " << *L << " Changed!\n"; dbgs() << "Old: " << *CurBECount << "\n"; dbgs() << "New: " << *NewBECount << "\n"; - dbgs() << "Delta: " << *ConstantDelta << "\n"; + dbgs() << "Delta: " << *Delta << "\n"; std::abort(); } } diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp index b5467813094e8..f3d20ce984dbd 100644 --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -40,34 +40,6 @@ namespace { struct NoTTIImpl : TargetTransformInfoImplCRTPBase { explicit NoTTIImpl(const DataLayout &DL) : TargetTransformInfoImplCRTPBase(DL) {} - - unsigned getCacheLineSize() const { return 0; } - - llvm::Optional getCacheSize(TargetTransformInfo::CacheLevel Level) const { - switch (Level) { - case TargetTransformInfo::CacheLevel::L1D: - LLVM_FALLTHROUGH; - case TargetTransformInfo::CacheLevel::L2D: - return llvm::Optional(); - } - llvm_unreachable("Unknown TargetTransformInfo::CacheLevel"); - } - - llvm::Optional getCacheAssociativity( - TargetTransformInfo::CacheLevel Level) const { - switch (Level) { - case TargetTransformInfo::CacheLevel::L1D: - LLVM_FALLTHROUGH; - case TargetTransformInfo::CacheLevel::L2D: - return llvm::Optional(); - } - - llvm_unreachable("Unknown TargetTransformInfo::CacheLevel"); - } - - unsigned getPrefetchDistance() const { return 0; } - unsigned getMinPrefetchStride() const { return 1; } - unsigned getMaxPrefetchIterationsAhead() const { return UINT_MAX; } }; } diff --git a/llvm/lib/Analysis/TypeMetadataUtils.cpp b/llvm/lib/Analysis/TypeMetadataUtils.cpp index 9311dfbc6eba0..072d291f3f932 100644 --- a/llvm/lib/Analysis/TypeMetadataUtils.cpp +++ b/llvm/lib/Analysis/TypeMetadataUtils.cpp @@ -127,3 +127,35 @@ void llvm::findDevirtualizableCallsForTypeCheckedLoad( findCallsAtConstantOffset(DevirtCalls, &HasNonCallUses, LoadedPtr, Offset->getZExtValue(), CI, DT); } + +Constant *llvm::getPointerAtOffset(Constant *I, uint64_t Offset, Module &M) { + if (I->getType()->isPointerTy()) { + if (Offset == 0) + return I; + return nullptr; + } + + const DataLayout &DL = M.getDataLayout(); + + if (auto *C = dyn_cast(I)) { + const StructLayout *SL = DL.getStructLayout(C->getType()); + if (Offset >= SL->getSizeInBytes()) + return nullptr; + + unsigned Op = SL->getElementContainingOffset(Offset); + return getPointerAtOffset(cast(I->getOperand(Op)), + Offset - SL->getElementOffset(Op), M); + } + if (auto *C = dyn_cast(I)) { + ArrayType *VTableTy = C->getType(); + uint64_t ElemSize = DL.getTypeAllocSize(VTableTy->getElementType()); + + unsigned Op = Offset / ElemSize; + if (Op >= C->getNumOperands()) + return nullptr; + + return getPointerAtOffset(cast(I->getOperand(Op)), + Offset % ElemSize, M); + } + return nullptr; +} diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index f0b87810ef9a1..aaebba0052752 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -5755,17 +5755,47 @@ Optional llvm::isPointerOffset(const Value *Ptr1, const Value *Ptr2, const GEPOperator *GEP1 = dyn_cast(Ptr1); const GEPOperator *GEP2 = dyn_cast(Ptr2); - // If one pointer is a GEP and the other isn't, then see if the GEP is a - // constant offset from the base, as in "P" and "gep P, 1". - if (GEP1 && !GEP2 && GEP1->getOperand(0)->stripPointerCasts() == Ptr2) { - auto Offset = getOffsetFromIndex(GEP1, 1, DL); - if (!Offset) + // If one pointer is a GEP see if the GEP is a constant offset from the base, + // as in "P" and "gep P, 1". + // Also do this iteratively to handle the the following case: + // Ptr_t1 = GEP Ptr1, c1 + // Ptr_t2 = GEP Ptr_t1, c2 + // Ptr2 = GEP Ptr_t2, c3 + // where we will return c1+c2+c3. + // TODO: Handle the case when both Ptr1 and Ptr2 are GEPs of some common base + // -- replace getOffsetFromBase with getOffsetAndBase, check that the bases + // are the same, and return the difference between offsets. + auto getOffsetFromBase = [&DL](const GEPOperator *GEP, + const Value *Ptr) -> Optional { + const GEPOperator *GEP_T = GEP; + int64_t OffsetVal = 0; + bool HasSameBase = false; + while (GEP_T) { + auto Offset = getOffsetFromIndex(GEP_T, 1, DL); + if (!Offset) + return None; + OffsetVal += *Offset; + auto Op0 = GEP_T->getOperand(0)->stripPointerCasts(); + if (Op0 == Ptr) { + HasSameBase = true; + break; + } + GEP_T = dyn_cast(Op0); + } + if (!HasSameBase) return None; - return -*Offset; - } + return OffsetVal; + }; - if (GEP2 && !GEP1 && GEP2->getOperand(0)->stripPointerCasts() == Ptr1) { - return getOffsetFromIndex(GEP2, 1, DL); + if (GEP1) { + auto Offset = getOffsetFromBase(GEP1, Ptr2); + if (Offset) + return -*Offset; + } + if (GEP2) { + auto Offset = getOffsetFromBase(GEP2, Ptr1); + if (Offset) + return Offset; } // Right now we handle the case when Ptr1/Ptr2 are both GEPs with an identical diff --git a/llvm/lib/Analysis/VectorUtils.cpp b/llvm/lib/Analysis/VectorUtils.cpp index f54794c59e3fe..600f57ab9d716 100644 --- a/llvm/lib/Analysis/VectorUtils.cpp +++ b/llvm/lib/Analysis/VectorUtils.cpp @@ -836,11 +836,11 @@ void InterleavedAccessInfo::collectConstStrideAccesses( uint64_t Size = DL.getTypeAllocSize(PtrTy->getElementType()); // An alignment of 0 means target ABI alignment. - unsigned Align = getLoadStoreAlignment(&I); - if (!Align) - Align = DL.getABITypeAlignment(PtrTy->getElementType()); + MaybeAlign Alignment = MaybeAlign(getLoadStoreAlignment(&I)); + if (!Alignment) + Alignment = Align(DL.getABITypeAlignment(PtrTy->getElementType())); - AccessStrideInfo[&I] = StrideDescriptor(Stride, Scev, Size, Align); + AccessStrideInfo[&I] = StrideDescriptor(Stride, Scev, Size, *Alignment); } } @@ -927,7 +927,7 @@ void InterleavedAccessInfo::analyzeInterleaving( if (!Group) { LLVM_DEBUG(dbgs() << "LV: Creating an interleave group with:" << *B << '\n'); - Group = createInterleaveGroup(B, DesB.Stride, DesB.Align); + Group = createInterleaveGroup(B, DesB.Stride, DesB.Alignment); } if (B->mayWriteToMemory()) StoreGroups.insert(Group); @@ -1034,7 +1034,7 @@ void InterleavedAccessInfo::analyzeInterleaving( Group->getIndex(B) + DistanceToB / static_cast(DesB.Size); // Try to insert A into B's group. - if (Group->insertMember(A, IndexA, DesA.Align)) { + if (Group->insertMember(A, IndexA, DesA.Alignment)) { LLVM_DEBUG(dbgs() << "LV: Inserted:" << *A << '\n' << " into the interleave group with" << *B << '\n'); diff --git a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp index 616b1368a7a75..012da940f8995 100644 --- a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp @@ -2858,6 +2858,14 @@ void CodeViewDebug::endFunctionImpl(const MachineFunction *MF) { CurFn = nullptr; } +// Usable locations are valid with non-zero line numbers. A line number of zero +// corresponds to optimized code that doesn't have a distinct source location. +// In this case, we try to use the previous or next source location depending on +// the context. +static bool isUsableDebugLoc(DebugLoc DL) { + return DL && DL.getLine() != 0; +} + void CodeViewDebug::beginInstruction(const MachineInstr *MI) { DebugHandlerBase::beginInstruction(MI); @@ -2869,19 +2877,21 @@ void CodeViewDebug::beginInstruction(const MachineInstr *MI) { // If the first instruction of a new MBB has no location, find the first // instruction with a location and use that. DebugLoc DL = MI->getDebugLoc(); - if (!DL && MI->getParent() != PrevInstBB) { + if (!isUsableDebugLoc(DL) && MI->getParent() != PrevInstBB) { for (const auto &NextMI : *MI->getParent()) { if (NextMI.isDebugInstr()) continue; DL = NextMI.getDebugLoc(); - if (DL) + if (isUsableDebugLoc(DL)) break; } + // FIXME: Handle the case where the BB has no valid locations. This would + // probably require doing a real dataflow analysis. } PrevInstBB = MI->getParent(); // If we still don't have a debug location, don't record a location. - if (!DL) + if (!isUsableDebugLoc(DL)) return; maybeRecordLocation(DL, Asm->MF); diff --git a/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp b/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp index 8f9b7ddeabf20..be8efa8795f32 100644 --- a/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp +++ b/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp @@ -33,19 +33,19 @@ GISelKnownBits::GISelKnownBits(MachineFunction &MF) : MF(MF), MRI(MF.getRegInfo()), TL(*MF.getSubtarget().getTargetLowering()), DL(MF.getFunction().getParent()->getDataLayout()) {} -unsigned GISelKnownBits::inferAlignmentForFrameIdx(int FrameIdx, int Offset, - const MachineFunction &MF) { +Align GISelKnownBits::inferAlignmentForFrameIdx(int FrameIdx, int Offset, + const MachineFunction &MF) { const MachineFrameInfo &MFI = MF.getFrameInfo(); - return MinAlign(Offset, MFI.getObjectAlignment(FrameIdx)); + return commonAlignment(Align(MFI.getObjectAlignment(FrameIdx)), Offset); // TODO: How to handle cases with Base + Offset? } -unsigned GISelKnownBits::inferPtrAlignment(const MachineInstr &MI) { +MaybeAlign GISelKnownBits::inferPtrAlignment(const MachineInstr &MI) { if (MI.getOpcode() == TargetOpcode::G_FRAME_INDEX) { int FrameIdx = MI.getOperand(1).getIndex(); return inferAlignmentForFrameIdx(FrameIdx, 0, *MI.getMF()); } - return 0; + return None; } void GISelKnownBits::computeKnownBitsForFrameIndex(Register R, KnownBits &Known, @@ -56,10 +56,10 @@ void GISelKnownBits::computeKnownBitsForFrameIndex(Register R, KnownBits &Known, } void GISelKnownBits::computeKnownBitsForAlignment(KnownBits &Known, - unsigned Align) { - if (Align) + MaybeAlign Alignment) { + if (Alignment) // The low bits are known zero if the pointer is aligned. - Known.Zero.setLowBits(Log2_32(Align)); + Known.Zero.setLowBits(Log2(Alignment)); } KnownBits GISelKnownBits::getKnownBits(MachineInstr &MI) { diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index 684b99d8bae3f..c5830ff865231 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -2766,6 +2766,65 @@ LegalizerHelper::fewerElementsVectorUnmergeValues(MachineInstr &MI, return Legalized; } +LegalizerHelper::LegalizeResult +LegalizerHelper::fewerElementsVectorBuildVector(MachineInstr &MI, + unsigned TypeIdx, + LLT NarrowTy) { + assert(TypeIdx == 0 && "not a vector type index"); + Register DstReg = MI.getOperand(0).getReg(); + LLT DstTy = MRI.getType(DstReg); + LLT SrcTy = DstTy.getElementType(); + + int DstNumElts = DstTy.getNumElements(); + int NarrowNumElts = NarrowTy.getNumElements(); + int NumConcat = (DstNumElts + NarrowNumElts - 1) / NarrowNumElts; + LLT WidenedDstTy = LLT::vector(NarrowNumElts * NumConcat, SrcTy); + + SmallVector ConcatOps; + SmallVector SubBuildVector; + + Register UndefReg; + if (WidenedDstTy != DstTy) + UndefReg = MIRBuilder.buildUndef(SrcTy).getReg(0); + + // Create a G_CONCAT_VECTORS of NarrowTy pieces, padding with undef as + // necessary. + // + // %3:_(<3 x s16>) = G_BUILD_VECTOR %0, %1, %2 + // -> <2 x s16> + // + // %4:_(s16) = G_IMPLICIT_DEF + // %5:_(<2 x s16>) = G_BUILD_VECTOR %0, %1 + // %6:_(<2 x s16>) = G_BUILD_VECTOR %2, %4 + // %7:_(<4 x s16>) = G_CONCAT_VECTORS %5, %6 + // %3:_(<3 x s16>) = G_EXTRACT %7, 0 + for (int I = 0; I != NumConcat; ++I) { + for (int J = 0; J != NarrowNumElts; ++J) { + int SrcIdx = NarrowNumElts * I + J; + + if (SrcIdx < DstNumElts) { + Register SrcReg = MI.getOperand(SrcIdx + 1).getReg(); + SubBuildVector.push_back(SrcReg); + } else + SubBuildVector.push_back(UndefReg); + } + + auto BuildVec = MIRBuilder.buildBuildVector(NarrowTy, SubBuildVector); + ConcatOps.push_back(BuildVec.getReg(0)); + SubBuildVector.clear(); + } + + if (DstTy == WidenedDstTy) + MIRBuilder.buildConcatVectors(DstReg, ConcatOps); + else { + auto Concat = MIRBuilder.buildConcatVectors(WidenedDstTy, ConcatOps); + MIRBuilder.buildExtract(DstReg, Concat, 0); + } + + MI.eraseFromParent(); + return Legalized; +} + LegalizerHelper::LegalizeResult LegalizerHelper::reduceLoadStoreWidth(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy) { @@ -2941,6 +3000,8 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, return fewerElementsVectorPhi(MI, TypeIdx, NarrowTy); case G_UNMERGE_VALUES: return fewerElementsVectorUnmergeValues(MI, TypeIdx, NarrowTy); + case G_BUILD_VECTOR: + return fewerElementsVectorBuildVector(MI, TypeIdx, NarrowTy); case G_LOAD: case G_STORE: return reduceLoadStoreWidth(MI, TypeIdx, NarrowTy); diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp index a93e5153089d0..45618d7992ad2 100644 --- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp +++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp @@ -216,11 +216,33 @@ Optional llvm::getConstantVRegVal(unsigned VReg, } Optional llvm::getConstantVRegValWithLookThrough( - unsigned VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs) { + unsigned VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs, + bool HandleFConstant) { SmallVector, 4> SeenOpcodes; MachineInstr *MI; - while ((MI = MRI.getVRegDef(VReg)) && - MI->getOpcode() != TargetOpcode::G_CONSTANT && LookThroughInstrs) { + auto IsConstantOpcode = [HandleFConstant](unsigned Opcode) { + return Opcode == TargetOpcode::G_CONSTANT || + (HandleFConstant && Opcode == TargetOpcode::G_FCONSTANT); + }; + auto GetImmediateValue = [HandleFConstant, + &MRI](const MachineInstr &MI) -> Optional { + const MachineOperand &CstVal = MI.getOperand(1); + if (!CstVal.isImm() && !CstVal.isCImm() && + (!HandleFConstant || !CstVal.isFPImm())) + return None; + if (!CstVal.isFPImm()) { + unsigned BitWidth = + MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); + APInt Val = CstVal.isImm() ? APInt(BitWidth, CstVal.getImm()) + : CstVal.getCImm()->getValue(); + assert(Val.getBitWidth() == BitWidth && + "Value bitwidth doesn't match definition type"); + return Val; + } + return CstVal.getFPImm()->getValueAPF().bitcastToAPInt(); + }; + while ((MI = MRI.getVRegDef(VReg)) && !IsConstantOpcode(MI->getOpcode()) && + LookThroughInstrs) { switch (MI->getOpcode()) { case TargetOpcode::G_TRUNC: case TargetOpcode::G_SEXT: @@ -242,16 +264,13 @@ Optional llvm::getConstantVRegValWithLookThrough( return None; } } - if (!MI || MI->getOpcode() != TargetOpcode::G_CONSTANT || - (!MI->getOperand(1).isImm() && !MI->getOperand(1).isCImm())) + if (!MI || !IsConstantOpcode(MI->getOpcode())) return None; - const MachineOperand &CstVal = MI->getOperand(1); - unsigned BitWidth = MRI.getType(MI->getOperand(0).getReg()).getSizeInBits(); - APInt Val = CstVal.isImm() ? APInt(BitWidth, CstVal.getImm()) - : CstVal.getCImm()->getValue(); - assert(Val.getBitWidth() == BitWidth && - "Value bitwidth doesn't match definition type"); + Optional MaybeVal = GetImmediateValue(*MI); + if (!MaybeVal) + return None; + APInt &Val = *MaybeVal; while (!SeenOpcodes.empty()) { std::pair OpcodeAndSize = SeenOpcodes.pop_back_val(); switch (OpcodeAndSize.first) { diff --git a/llvm/lib/CodeGen/IfConversion.cpp b/llvm/lib/CodeGen/IfConversion.cpp index e503c568f9667..d9caa5660695d 100644 --- a/llvm/lib/CodeGen/IfConversion.cpp +++ b/llvm/lib/CodeGen/IfConversion.cpp @@ -285,14 +285,113 @@ namespace { Prediction); } - bool MeetIfcvtSizeLimit(MachineBasicBlock &TBB, - unsigned TCycle, unsigned TExtra, - MachineBasicBlock &FBB, - unsigned FCycle, unsigned FExtra, - BranchProbability Prediction) const { - return TCycle > 0 && FCycle > 0 && - TII->isProfitableToIfCvt(TBB, TCycle, TExtra, FBB, FCycle, FExtra, - Prediction); + bool MeetIfcvtSizeLimit(BBInfo &TBBInfo, BBInfo &FBBInfo, + MachineBasicBlock &CommBB, unsigned Dups, + BranchProbability Prediction, bool Forked) const { + const MachineFunction &MF = *TBBInfo.BB->getParent(); + if (MF.getFunction().hasMinSize()) { + MachineBasicBlock::iterator TIB = TBBInfo.BB->begin(); + MachineBasicBlock::iterator FIB = FBBInfo.BB->begin(); + MachineBasicBlock::iterator TIE = TBBInfo.BB->end(); + MachineBasicBlock::iterator FIE = FBBInfo.BB->end(); + + unsigned Dups1, Dups2; + if (!CountDuplicatedInstructions(TIB, FIB, TIE, FIE, Dups1, Dups2, + *TBBInfo.BB, *FBBInfo.BB, + /*SkipUnconditionalBranches*/ true)) + llvm_unreachable("should already have been checked by ValidDiamond"); + + unsigned BranchBytes = 0; + unsigned CommonBytes = 0; + + // Count common instructions at the start of the true and false blocks. + for (auto &I : make_range(TBBInfo.BB->begin(), TIB)) { + LLVM_DEBUG(dbgs() << "Common inst: " << I); + CommonBytes += TII->getInstSizeInBytes(I); + } + for (auto &I : make_range(FBBInfo.BB->begin(), FIB)) { + LLVM_DEBUG(dbgs() << "Common inst: " << I); + CommonBytes += TII->getInstSizeInBytes(I); + } + + // Count instructions at the end of the true and false blocks, after + // the ones we plan to predicate. Analyzable branches will be removed + // (unless this is a forked diamond), and all other instructions are + // common between the two blocks. + for (auto &I : make_range(TIE, TBBInfo.BB->end())) { + if (I.isBranch() && TBBInfo.IsBrAnalyzable && !Forked) { + LLVM_DEBUG(dbgs() << "Saving branch: " << I); + BranchBytes += TII->predictBranchSizeForIfCvt(I); + } else { + LLVM_DEBUG(dbgs() << "Common inst: " << I); + CommonBytes += TII->getInstSizeInBytes(I); + } + } + for (auto &I : make_range(FIE, FBBInfo.BB->end())) { + if (I.isBranch() && FBBInfo.IsBrAnalyzable && !Forked) { + LLVM_DEBUG(dbgs() << "Saving branch: " << I); + BranchBytes += TII->predictBranchSizeForIfCvt(I); + } else { + LLVM_DEBUG(dbgs() << "Common inst: " << I); + CommonBytes += TII->getInstSizeInBytes(I); + } + } + for (auto &I : CommBB.terminators()) { + if (I.isBranch()) { + LLVM_DEBUG(dbgs() << "Saving branch: " << I); + BranchBytes += TII->predictBranchSizeForIfCvt(I); + } + } + + // The common instructions in one branch will be eliminated, halving + // their code size. + CommonBytes /= 2; + + // Count the instructions which we need to predicate. + unsigned NumPredicatedInstructions = 0; + for (auto &I : make_range(TIB, TIE)) { + if (!I.isDebugInstr()) { + LLVM_DEBUG(dbgs() << "Predicating: " << I); + NumPredicatedInstructions++; + } + } + for (auto &I : make_range(FIB, FIE)) { + if (!I.isDebugInstr()) { + LLVM_DEBUG(dbgs() << "Predicating: " << I); + NumPredicatedInstructions++; + } + } + + // Even though we're optimising for size at the expense of performance, + // avoid creating really long predicated blocks. + if (NumPredicatedInstructions > 15) + return false; + + // Some targets (e.g. Thumb2) need to insert extra instructions to + // start predicated blocks. + unsigned ExtraPredicateBytes = TII->extraSizeToPredicateInstructions( + MF, NumPredicatedInstructions); + + LLVM_DEBUG(dbgs() << "MeetIfcvtSizeLimit(BranchBytes=" << BranchBytes + << ", CommonBytes=" << CommonBytes + << ", NumPredicatedInstructions=" + << NumPredicatedInstructions + << ", ExtraPredicateBytes=" << ExtraPredicateBytes + << ")\n"); + return (BranchBytes + CommonBytes) > ExtraPredicateBytes; + } else { + unsigned TCycle = TBBInfo.NonPredSize + TBBInfo.ExtraCost - Dups; + unsigned FCycle = FBBInfo.NonPredSize + FBBInfo.ExtraCost - Dups; + bool Res = TCycle > 0 && FCycle > 0 && + TII->isProfitableToIfCvt( + *TBBInfo.BB, TCycle, TBBInfo.ExtraCost2, *FBBInfo.BB, + FCycle, FBBInfo.ExtraCost2, Prediction); + LLVM_DEBUG(dbgs() << "MeetIfcvtSizeLimit(TCycle=" << TCycle + << ", FCycle=" << FCycle + << ", TExtra=" << TBBInfo.ExtraCost2 << ", FExtra=" + << FBBInfo.ExtraCost2 << ") = " << Res << "\n"); + return Res; + } } /// Returns true if Block ends without a terminator. @@ -842,6 +941,8 @@ bool IfConverter::ValidForkedDiamond( TrueBBICalc.BB = TrueBBI.BB; FalseBBICalc.BB = FalseBBI.BB; + TrueBBICalc.IsBrAnalyzable = TrueBBI.IsBrAnalyzable; + FalseBBICalc.IsBrAnalyzable = FalseBBI.IsBrAnalyzable; if (!RescanInstructions(TIB, FIB, TIE, FIE, TrueBBICalc, FalseBBICalc)) return false; @@ -899,6 +1000,8 @@ bool IfConverter::ValidDiamond( TrueBBICalc.BB = TrueBBI.BB; FalseBBICalc.BB = FalseBBI.BB; + TrueBBICalc.IsBrAnalyzable = TrueBBI.IsBrAnalyzable; + FalseBBICalc.IsBrAnalyzable = FalseBBI.IsBrAnalyzable; if (!RescanInstructions(TIB, FIB, TIE, FIE, TrueBBICalc, FalseBBICalc)) return false; // The size is used to decide whether to if-convert, and the shared portions @@ -1186,13 +1289,9 @@ void IfConverter::AnalyzeBlock( if (CanRevCond) { BBInfo TrueBBICalc, FalseBBICalc; - auto feasibleDiamond = [&]() { - bool MeetsSize = MeetIfcvtSizeLimit( - *TrueBBI.BB, (TrueBBICalc.NonPredSize - (Dups + Dups2) + - TrueBBICalc.ExtraCost), TrueBBICalc.ExtraCost2, - *FalseBBI.BB, (FalseBBICalc.NonPredSize - (Dups + Dups2) + - FalseBBICalc.ExtraCost), FalseBBICalc.ExtraCost2, - Prediction); + auto feasibleDiamond = [&](bool Forked) { + bool MeetsSize = MeetIfcvtSizeLimit(TrueBBICalc, FalseBBICalc, *BB, + Dups + Dups2, Prediction, Forked); bool TrueFeasible = FeasibilityAnalysis(TrueBBI, BBI.BrCond, /* IsTriangle */ false, /* RevCond */ false, /* hasCommonTail */ true); @@ -1204,7 +1303,7 @@ void IfConverter::AnalyzeBlock( if (ValidDiamond(TrueBBI, FalseBBI, Dups, Dups2, TrueBBICalc, FalseBBICalc)) { - if (feasibleDiamond()) { + if (feasibleDiamond(false)) { // Diamond: // EBB // / \_ @@ -1220,7 +1319,7 @@ void IfConverter::AnalyzeBlock( } } else if (ValidForkedDiamond(TrueBBI, FalseBBI, Dups, Dups2, TrueBBICalc, FalseBBICalc)) { - if (feasibleDiamond()) { + if (feasibleDiamond(true)) { // ForkedDiamond: // if TBB and FBB have a common tail that includes their conditional // branch instructions, then we can If Convert this pattern. diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 7ea908437ff91..41303921d87a6 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -5522,6 +5522,23 @@ static bool isBSwapHWordElement(SDValue N, MutableArrayRef Parts) { return true; } +// Match 2 elements of a packed halfword bswap. +static bool isBSwapHWordPair(SDValue N, MutableArrayRef Parts) { + if (N.getOpcode() == ISD::OR) + return isBSwapHWordElement(N.getOperand(0), Parts) && + isBSwapHWordElement(N.getOperand(1), Parts); + + if (N.getOpcode() == ISD::SRL && N.getOperand(0).getOpcode() == ISD::BSWAP) { + ConstantSDNode *C = isConstOrConstSplat(N.getOperand(1)); + if (!C || C->getAPIntValue() != 16) + return false; + Parts[0] = Parts[1] = N.getOperand(0).getOperand(0).getNode(); + return true; + } + + return false; +} + /// Match a 32-bit packed halfword bswap. That is /// ((x & 0x000000ff) << 8) | /// ((x & 0x0000ff00) >> 8) | @@ -5539,43 +5556,26 @@ SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) { return SDValue(); // Look for either - // (or (or (and), (and)), (or (and), (and))) - // (or (or (or (and), (and)), (and)), (and)) - if (N0.getOpcode() != ISD::OR) - return SDValue(); - SDValue N00 = N0.getOperand(0); - SDValue N01 = N0.getOperand(1); + // (or (bswaphpair), (bswaphpair)) + // (or (or (bswaphpair), (and)), (and)) + // (or (or (and), (bswaphpair)), (and)) SDNode *Parts[4] = {}; - if (N1.getOpcode() == ISD::OR && - N00.getNumOperands() == 2 && N01.getNumOperands() == 2) { + if (isBSwapHWordPair(N0, Parts)) { // (or (or (and), (and)), (or (and), (and))) - if (!isBSwapHWordElement(N00, Parts)) - return SDValue(); - - if (!isBSwapHWordElement(N01, Parts)) - return SDValue(); - SDValue N10 = N1.getOperand(0); - if (!isBSwapHWordElement(N10, Parts)) - return SDValue(); - SDValue N11 = N1.getOperand(1); - if (!isBSwapHWordElement(N11, Parts)) + if (!isBSwapHWordPair(N1, Parts)) return SDValue(); - } else { + } else if (N0.getOpcode() == ISD::OR) { // (or (or (or (and), (and)), (and)), (and)) if (!isBSwapHWordElement(N1, Parts)) return SDValue(); - if (!isBSwapHWordElement(N01, Parts)) - return SDValue(); - if (N00.getOpcode() != ISD::OR) - return SDValue(); - SDValue N000 = N00.getOperand(0); - if (!isBSwapHWordElement(N000, Parts)) - return SDValue(); - SDValue N001 = N00.getOperand(1); - if (!isBSwapHWordElement(N001, Parts)) + SDValue N00 = N0.getOperand(0); + SDValue N01 = N0.getOperand(1); + if (!(isBSwapHWordElement(N01, Parts) && isBSwapHWordPair(N00, Parts)) && + !(isBSwapHWordElement(N00, Parts) && isBSwapHWordPair(N01, Parts))) return SDValue(); - } + } else + return SDValue(); // Make sure the parts are all coming from the same node. if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3]) @@ -8221,22 +8221,33 @@ SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) { return Cond; } - // For any constants that differ by 1, we can transform the select into an - // extend and add. Use a target hook because some targets may prefer to - // transform in the other direction. + // Use a target hook because some targets may prefer to transform in the + // other direction. if (TLI.convertSelectOfConstantsToMath(VT)) { - if (C1->getAPIntValue() - 1 == C2->getAPIntValue()) { + // For any constants that differ by 1, we can transform the select into an + // extend and add. + const APInt &C1Val = C1->getAPIntValue(); + const APInt &C2Val = C2->getAPIntValue(); + if (C1Val - 1 == C2Val) { // select Cond, C1, C1-1 --> add (zext Cond), C1-1 if (VT != MVT::i1) Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond); return DAG.getNode(ISD::ADD, DL, VT, Cond, N2); } - if (C1->getAPIntValue() + 1 == C2->getAPIntValue()) { + if (C1Val + 1 == C2Val) { // select Cond, C1, C1+1 --> add (sext Cond), C1+1 if (VT != MVT::i1) Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond); return DAG.getNode(ISD::ADD, DL, VT, Cond, N2); } + + // select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2) + if (C1Val.isPowerOf2() && C2Val.isNullValue()) { + if (VT != MVT::i1) + Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond); + SDValue ShAmtC = DAG.getConstant(C1Val.exactLogBase2(), DL, VT); + return DAG.getNode(ISD::SHL, DL, VT, Cond, ShAmtC); + } } return SDValue(); @@ -10152,7 +10163,10 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { return SDValue(); LoadSDNode *LN0 = cast(N0); - if (!isLegalNarrowLdSt(LN0, ExtType, ExtVT, ShAmt)) + // Reducing the width of a volatile load is illegal. For atomics, we may be + // able to reduce the width provided we never widen again. (see D66309) + if (!LN0->isSimple() || + !isLegalNarrowLdSt(LN0, ExtType, ExtVT, ShAmt)) return SDValue(); auto AdjustBigEndianShift = [&](unsigned ShAmt) { @@ -16276,6 +16290,11 @@ SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) { if (OptLevel == CodeGenOpt::None) return SDValue(); + // Can't change the number of memory accesses for a volatile store or break + // atomicity for an atomic one. + if (!ST->isSimple()) + return SDValue(); + SDValue Val = ST->getValue(); SDLoc DL(ST); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index e094981a19152..3c8f63e46bcec 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -642,48 +642,78 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Overflow(SDNode *N) { } SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBSAT(SDNode *N) { - // For promoting iN -> iM, this can be expanded by - // 1. ANY_EXTEND iN to iM - // 2. SHL by M-N - // 3. [US][ADD|SUB]SAT - // 4. L/ASHR by M-N + // If the promoted type is legal, we can convert this to: + // 1. ANY_EXTEND iN to iM + // 2. SHL by M-N + // 3. [US][ADD|SUB]SAT + // 4. L/ASHR by M-N + // Else it is more efficient to convert this to a min and a max + // operation in the higher precision arithmetic. SDLoc dl(N); SDValue Op1 = N->getOperand(0); SDValue Op2 = N->getOperand(1); unsigned OldBits = Op1.getScalarValueSizeInBits(); unsigned Opcode = N->getOpcode(); - unsigned ShiftOp; - switch (Opcode) { - case ISD::SADDSAT: - case ISD::SSUBSAT: - ShiftOp = ISD::SRA; - break; - case ISD::UADDSAT: - case ISD::USUBSAT: - ShiftOp = ISD::SRL; - break; - default: - llvm_unreachable("Expected opcode to be signed or unsigned saturation " - "addition or subtraction"); - } SDValue Op1Promoted = GetPromotedInteger(Op1); SDValue Op2Promoted = GetPromotedInteger(Op2); - EVT PromotedType = Op1Promoted.getValueType(); unsigned NewBits = PromotedType.getScalarSizeInBits(); - unsigned SHLAmount = NewBits - OldBits; - EVT SHVT = TLI.getShiftAmountTy(PromotedType, DAG.getDataLayout()); - SDValue ShiftAmount = DAG.getConstant(SHLAmount, dl, SHVT); - Op1Promoted = - DAG.getNode(ISD::SHL, dl, PromotedType, Op1Promoted, ShiftAmount); - Op2Promoted = - DAG.getNode(ISD::SHL, dl, PromotedType, Op2Promoted, ShiftAmount); - - SDValue Result = - DAG.getNode(Opcode, dl, PromotedType, Op1Promoted, Op2Promoted); - return DAG.getNode(ShiftOp, dl, PromotedType, Result, ShiftAmount); + + if (TLI.isOperationLegalOrCustom(Opcode, PromotedType)) { + unsigned ShiftOp; + switch (Opcode) { + case ISD::SADDSAT: + case ISD::SSUBSAT: + ShiftOp = ISD::SRA; + break; + case ISD::UADDSAT: + case ISD::USUBSAT: + ShiftOp = ISD::SRL; + break; + default: + llvm_unreachable("Expected opcode to be signed or unsigned saturation " + "addition or subtraction"); + } + + unsigned SHLAmount = NewBits - OldBits; + EVT SHVT = TLI.getShiftAmountTy(PromotedType, DAG.getDataLayout()); + SDValue ShiftAmount = DAG.getConstant(SHLAmount, dl, SHVT); + Op1Promoted = + DAG.getNode(ISD::SHL, dl, PromotedType, Op1Promoted, ShiftAmount); + Op2Promoted = + DAG.getNode(ISD::SHL, dl, PromotedType, Op2Promoted, ShiftAmount); + + SDValue Result = + DAG.getNode(Opcode, dl, PromotedType, Op1Promoted, Op2Promoted); + return DAG.getNode(ShiftOp, dl, PromotedType, Result, ShiftAmount); + } else { + if (Opcode == ISD::USUBSAT) { + SDValue Max = + DAG.getNode(ISD::UMAX, dl, PromotedType, Op1Promoted, Op2Promoted); + return DAG.getNode(ISD::SUB, dl, PromotedType, Max, Op2Promoted); + } + + if (Opcode == ISD::UADDSAT) { + APInt MaxVal = APInt::getAllOnesValue(OldBits).zext(NewBits); + SDValue SatMax = DAG.getConstant(MaxVal, dl, PromotedType); + SDValue Add = + DAG.getNode(ISD::ADD, dl, PromotedType, Op1Promoted, Op2Promoted); + return DAG.getNode(ISD::UMIN, dl, PromotedType, Add, SatMax); + } + + unsigned AddOp = Opcode == ISD::SADDSAT ? ISD::ADD : ISD::SUB; + APInt MinVal = APInt::getSignedMinValue(OldBits).sext(NewBits); + APInt MaxVal = APInt::getSignedMaxValue(OldBits).sext(NewBits); + SDValue SatMin = DAG.getConstant(MinVal, dl, PromotedType); + SDValue SatMax = DAG.getConstant(MaxVal, dl, PromotedType); + SDValue Result = + DAG.getNode(AddOp, dl, PromotedType, Op1Promoted, Op2Promoted); + Result = DAG.getNode(ISD::SMIN, dl, PromotedType, Result, SatMax); + Result = DAG.getNode(ISD::SMAX, dl, PromotedType, Result, SatMin); + return Result; + } } SDValue DAGTypeLegalizer::PromoteIntRes_MULFIX(SDNode *N) { diff --git a/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp b/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp index 770f129753eca..ed6c2b93ce6ba 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp @@ -290,20 +290,24 @@ static void dumpLoclistsSection(raw_ostream &OS, DIDumpOptions DumpOpts, const MCRegisterInfo *MRI, Optional DumpOffset) { uint64_t Offset = 0; - DWARFDebugLoclists Loclists; - DWARFListTableHeader Header(".debug_loclists", "locations"); - if (Error E = Header.extract(Data, &Offset)) { - WithColor::error() << toString(std::move(E)) << '\n'; - return; - } + while (Data.isValidOffset(Offset)) { + DWARFListTableHeader Header(".debug_loclists", "locations"); + if (Error E = Header.extract(Data, &Offset)) { + WithColor::error() << toString(std::move(E)) << '\n'; + return; + } - Header.dump(OS, DumpOpts); - DataExtractor LocData(Data.getData().drop_front(Offset), - Data.isLittleEndian(), Header.getAddrSize()); + Header.dump(OS, DumpOpts); + DataExtractor LocData(Data.getData(), + Data.isLittleEndian(), Header.getAddrSize()); - Loclists.parse(LocData, Header.getVersion()); - Loclists.dump(OS, 0, MRI, DumpOffset); + DWARFDebugLoclists Loclists; + uint64_t EndOffset = Header.length() + Header.getHeaderOffset(); + Loclists.parse(LocData, Offset, EndOffset, Header.getVersion()); + Loclists.dump(OS, 0, MRI, DumpOffset); + Offset = EndOffset; + } } void DWARFContext::dump( @@ -733,7 +737,7 @@ const DWARFDebugLoclists *DWARFContext::getDebugLocDWO() { // Use version 4. DWO does not support the DWARF v5 .debug_loclists yet and // that means we are parsing the new style .debug_loc (pre-standatized version // of the .debug_loclists). - LocDWO->parse(LocData, 4 /* Version */); + LocDWO->parse(LocData, 0, LocData.getData().size(), 4 /* Version */); return LocDWO.get(); } diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp index a243ed3a80bea..bdafafc7a37d7 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp @@ -187,12 +187,11 @@ DWARFDebugLoclists::parseOneLocationList(const DataExtractor &Data, return LL; } -void DWARFDebugLoclists::parse(DataExtractor data, unsigned Version) { +void DWARFDebugLoclists::parse(DataExtractor data, uint64_t Offset, uint64_t EndOffset, uint16_t Version) { IsLittleEndian = data.isLittleEndian(); AddressSize = data.getAddressSize(); - uint64_t Offset = 0; - while (Offset < data.getData().size()) { + while (Offset < EndOffset) { if (auto LL = parseOneLocationList(data, &Offset, Version)) Locations.push_back(std::move(*LL)); else { diff --git a/llvm/lib/DebugInfo/GSYM/CMakeLists.txt b/llvm/lib/DebugInfo/GSYM/CMakeLists.txt index e05b2c112ce78..2369f05197a2c 100644 --- a/llvm/lib/DebugInfo/GSYM/CMakeLists.txt +++ b/llvm/lib/DebugInfo/GSYM/CMakeLists.txt @@ -2,6 +2,8 @@ add_llvm_library(LLVMDebugInfoGSYM Header.cpp FileWriter.cpp FunctionInfo.cpp + GsymCreator.cpp + GsymReader.cpp InlineInfo.cpp LineTable.cpp Range.cpp @@ -9,4 +11,7 @@ add_llvm_library(LLVMDebugInfoGSYM ADDITIONAL_HEADER_DIRS ${LLVM_MAIN_INCLUDE_DIR}/llvm/DebugInfo/GSYM ${LLVM_MAIN_INCLUDE_DIR}/llvm/DebugInfo + + DEPENDS + LLVMMC ) diff --git a/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp b/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp index dff867d66eb97..ad022fec9e325 100644 --- a/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp +++ b/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp @@ -1,4 +1,4 @@ -//===- FunctionInfo.cpp -----------------------------------------*- C++ -*-===// +//===- FunctionInfo.cpp ---------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp b/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp new file mode 100644 index 0000000000000..f371426f20104 --- /dev/null +++ b/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp @@ -0,0 +1,275 @@ +//===- GsymCreator.cpp ----------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/GSYM/GsymCreator.h" +#include "llvm/DebugInfo/GSYM/FileWriter.h" +#include "llvm/DebugInfo/GSYM/Header.h" +#include "llvm/DebugInfo/GSYM/LineTable.h" +#include "llvm/MC/StringTableBuilder.h" +#include "llvm/Support/raw_ostream.h" + +#include +#include +#include +#include + +using namespace llvm; +using namespace gsym; + + +GsymCreator::GsymCreator() : StrTab(StringTableBuilder::ELF) { + insertFile(StringRef()); +} + +uint32_t GsymCreator::insertFile(StringRef Path, + llvm::sys::path::Style Style) { + llvm::StringRef directory = llvm::sys::path::parent_path(Path, Style); + llvm::StringRef filename = llvm::sys::path::filename(Path, Style); + FileEntry FE(insertString(directory), insertString(filename)); + + std::lock_guard Guard(Mutex); + const auto NextIndex = Files.size(); + // Find FE in hash map and insert if not present. + auto R = FileEntryToIndex.insert(std::make_pair(FE, NextIndex)); + if (R.second) + Files.emplace_back(FE); + return R.first->second; +} + +llvm::Error GsymCreator::save(StringRef Path, + llvm::support::endianness ByteOrder) const { + std::error_code EC; + raw_fd_ostream OutStrm(Path, EC); + if (EC) + return llvm::errorCodeToError(EC); + FileWriter O(OutStrm, ByteOrder); + return encode(O); +} + +llvm::Error GsymCreator::encode(FileWriter &O) const { + std::lock_guard Guard(Mutex); + if (Funcs.empty()) + return createStringError(std::errc::invalid_argument, + "no functions to encode"); + if (!Finalized) + return createStringError(std::errc::invalid_argument, + "GsymCreator wasn't finalized prior to encoding"); + + if (Funcs.size() > UINT32_MAX) + return createStringError(std::errc::invalid_argument, + "too many FunctionInfos"); + const uint64_t MinAddr = Funcs.front().startAddress(); + const uint64_t MaxAddr = Funcs.back().startAddress(); + const uint64_t AddrDelta = MaxAddr - MinAddr; + Header Hdr; + Hdr.Magic = GSYM_MAGIC; + Hdr.Version = GSYM_VERSION; + Hdr.AddrOffSize = 0; + Hdr.UUIDSize = static_cast(UUID.size()); + Hdr.BaseAddress = MinAddr; + Hdr.NumAddresses = static_cast(Funcs.size()); + Hdr.StrtabOffset = 0; // We will fix this up later. + Hdr.StrtabOffset = 0; // We will fix this up later. + memset(Hdr.UUID, 0, sizeof(Hdr.UUID)); + if (UUID.size() > sizeof(Hdr.UUID)) + return createStringError(std::errc::invalid_argument, + "invalid UUID size %u", (uint32_t)UUID.size()); + // Set the address offset size correctly in the GSYM header. + if (AddrDelta <= UINT8_MAX) + Hdr.AddrOffSize = 1; + else if (AddrDelta <= UINT16_MAX) + Hdr.AddrOffSize = 2; + else if (AddrDelta <= UINT32_MAX) + Hdr.AddrOffSize = 4; + else + Hdr.AddrOffSize = 8; + // Copy the UUID value if we have one. + if (UUID.size() > 0) + memcpy(Hdr.UUID, UUID.data(), UUID.size()); + // Write out the header. + llvm::Error Err = Hdr.encode(O); + if (Err) + return Err; + + // Write out the address offsets. + O.alignTo(Hdr.AddrOffSize); + for (const auto &FuncInfo : Funcs) { + uint64_t AddrOffset = FuncInfo.startAddress() - Hdr.BaseAddress; + switch(Hdr.AddrOffSize) { + case 1: O.writeU8(static_cast(AddrOffset)); break; + case 2: O.writeU16(static_cast(AddrOffset)); break; + case 4: O.writeU32(static_cast(AddrOffset)); break; + case 8: O.writeU64(AddrOffset); break; + } + } + + // Write out all zeros for the AddrInfoOffsets. + O.alignTo(4); + const off_t AddrInfoOffsetsOffset = O.tell(); + for (size_t i = 0, n = Funcs.size(); i < n; ++i) + O.writeU32(0); + + // Write out the file table + O.alignTo(4); + assert(!Files.empty()); + assert(Files[0].Dir == 0); + assert(Files[0].Base == 0); + size_t NumFiles = Files.size(); + if (NumFiles > UINT32_MAX) + return createStringError(std::errc::invalid_argument, + "too many files"); + O.writeU32(static_cast(NumFiles)); + for (auto File: Files) { + O.writeU32(File.Dir); + O.writeU32(File.Base); + } + + // Write out the sting table. + const off_t StrtabOffset = O.tell(); + StrTab.write(O.get_stream()); + const off_t StrtabSize = O.tell() - StrtabOffset; + std::vector AddrInfoOffsets; + + // Write out the address infos for each function info. + for (const auto &FuncInfo : Funcs) { + if (Expected OffsetOrErr = FuncInfo.encode(O)) + AddrInfoOffsets.push_back(OffsetOrErr.get()); + else + return OffsetOrErr.takeError(); + } + // Fixup the string table offset and size in the header + O.fixup32((uint32_t)StrtabOffset, offsetof(Header, StrtabOffset)); + O.fixup32((uint32_t)StrtabSize, offsetof(Header, StrtabSize)); + + // Fixup all address info offsets + uint64_t Offset = 0; + for (auto AddrInfoOffset: AddrInfoOffsets) { + O.fixup32(AddrInfoOffset, AddrInfoOffsetsOffset + Offset); + Offset += 4; + } + return ErrorSuccess(); +} + +llvm::Error GsymCreator::finalize(llvm::raw_ostream &OS) { + std::lock_guard Guard(Mutex); + if (Finalized) + return createStringError(std::errc::invalid_argument, + "already finalized"); + Finalized = true; + + // Sort function infos so we can emit sorted functions. + llvm::sort(Funcs.begin(), Funcs.end()); + + // Don't let the string table indexes change by finalizing in order. + StrTab.finalizeInOrder(); + + // Remove duplicates function infos that have both entries from debug info + // (DWARF or Breakpad) and entries from the SymbolTable. + // + // Also handle overlapping function. Usually there shouldn't be any, but they + // can and do happen in some rare cases. + // + // (a) (b) (c) + // ^ ^ ^ ^ + // |X |Y |X ^ |X + // | | | |Y | ^ + // | | | v v |Y + // v v v v + // + // In (a) and (b), Y is ignored and X will be reported for the full range. + // In (c), both functions will be included in the result and lookups for an + // address in the intersection will return Y because of binary search. + // + // Note that in case of (b), we cannot include Y in the result because then + // we wouldn't find any function for range (end of Y, end of X) + // with binary search + auto NumBefore = Funcs.size(); + auto Curr = Funcs.begin(); + auto Prev = Funcs.end(); + while (Curr != Funcs.end()) { + // Can't check for overlaps or same address ranges if we don't have a + // previous entry + if (Prev != Funcs.end()) { + if (Prev->Range.intersects(Curr->Range)) { + // Overlapping address ranges. + if (Prev->Range == Curr->Range) { + // Same address range. Check if one is from debug info and the other + // is from a symbol table. If so, then keep the one with debug info. + // Our sorting guarantees that entries with matching address ranges + // that have debug info are last in the sort. + if (*Prev == *Curr) { + // FunctionInfo entries match exactly (range, lines, inlines) + OS << "warning: duplicate function info entries, removing " + "duplicate:\n" + << *Curr << '\n'; + Curr = Funcs.erase(Prev); + } else { + if (!Prev->hasRichInfo() && Curr->hasRichInfo()) { + // Same address range, one with no debug info (symbol) and the + // next with debug info. Keep the latter. + Curr = Funcs.erase(Prev); + } else { + OS << "warning: same address range contains different debug " + << "info. Removing:\n" + << *Prev << "\nIn favor of this one:\n" + << *Curr << "\n"; + Curr = Funcs.erase(Prev); + } + } + } else { + // print warnings about overlaps + OS << "warning: function ranges overlap:\n" + << *Prev << "\n" + << *Curr << "\n"; + } + } else if (Prev->Range.size() == 0 && + Curr->Range.contains(Prev->Range.Start)) { + OS << "warning: removing symbol:\n" + << *Prev << "\nKeeping:\n" + << *Curr << "\n"; + Curr = Funcs.erase(Prev); + } + } + if (Curr == Funcs.end()) + break; + Prev = Curr++; + } + + OS << "Pruned " << NumBefore - Funcs.size() << " functions, ended with " + << Funcs.size() << " total\n"; + return Error::success(); +} + +uint32_t GsymCreator::insertString(StringRef S) { + std::lock_guard Guard(Mutex); + if (S.empty()) + return 0; + return StrTab.add(S); +} + +void GsymCreator::addFunctionInfo(FunctionInfo &&FI) { + std::lock_guard Guard(Mutex); + Funcs.emplace_back(FI); +} + +void GsymCreator::forEachFunctionInfo( + std::function const &Callback) { + std::lock_guard Guard(Mutex); + for (auto &FI : Funcs) { + if (!Callback(FI)) + break; + } +} + +void GsymCreator::forEachFunctionInfo( + std::function const &Callback) const { + std::lock_guard Guard(Mutex); + for (const auto &FI : Funcs) { + if (!Callback(FI)) + break; + } +} diff --git a/llvm/lib/DebugInfo/GSYM/GsymReader.cpp b/llvm/lib/DebugInfo/GSYM/GsymReader.cpp new file mode 100644 index 0000000000000..1b448cf80b70d --- /dev/null +++ b/llvm/lib/DebugInfo/GSYM/GsymReader.cpp @@ -0,0 +1,265 @@ +//===- GsymReader.cpp -----------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/GSYM/GsymReader.h" + +#include +#include +#include +#include + +#include "llvm/DebugInfo/GSYM/GsymCreator.h" +#include "llvm/DebugInfo/GSYM/InlineInfo.h" +#include "llvm/DebugInfo/GSYM/LineTable.h" +#include "llvm/Support/BinaryStreamReader.h" +#include "llvm/Support/DataExtractor.h" +#include "llvm/Support/MemoryBuffer.h" + +using namespace llvm; +using namespace gsym; + +GsymReader::GsymReader(std::unique_ptr Buffer) : + MemBuffer(std::move(Buffer)), + Endian(support::endian::system_endianness()) {} + + GsymReader::GsymReader(GsymReader &&RHS) = default; + +GsymReader::~GsymReader() = default; + +llvm::Expected GsymReader::openFile(StringRef Filename) { + // Open the input file and return an appropriate error if needed. + ErrorOr> BuffOrErr = + MemoryBuffer::getFileOrSTDIN(Filename); + auto Err = BuffOrErr.getError(); + if (Err) + return llvm::errorCodeToError(Err); + return create(BuffOrErr.get()); +} + +llvm::Expected GsymReader::copyBuffer(StringRef Bytes) { + auto MemBuffer = MemoryBuffer::getMemBufferCopy(Bytes, "GSYM bytes"); + return create(MemBuffer); +} + +llvm::Expected +GsymReader::create(std::unique_ptr &MemBuffer) { + if (!MemBuffer.get()) + return createStringError(std::errc::invalid_argument, + "invalid memory buffer"); + GsymReader GR(std::move(MemBuffer)); + llvm::Error Err = GR.parse(); + if (Err) + return std::move(Err); + return std::move(GR); +} + +llvm::Error +GsymReader::parse() { + BinaryStreamReader FileData(MemBuffer->getBuffer(), + support::endian::system_endianness()); + // Check for the magic bytes. This file format is designed to be mmap'ed + // into a process and accessed as read only. This is done for performance + // and efficiency for symbolicating and parsing GSYM data. + if (FileData.readObject(Hdr)) + return createStringError(std::errc::invalid_argument, + "not enough data for a GSYM header"); + + const auto HostByteOrder = support::endian::system_endianness(); + switch (Hdr->Magic) { + case GSYM_MAGIC: + Endian = HostByteOrder; + break; + case GSYM_CIGAM: + // This is a GSYM file, but not native endianness. + Endian = sys::IsBigEndianHost ? support::little : support::big; + Swap.reset(new SwappedData); + break; + default: + return createStringError(std::errc::invalid_argument, + "not a GSYM file"); + } + + bool DataIsLittleEndian = HostByteOrder != support::little; + // Read a correctly byte swapped header if we need to. + if (Swap) { + DataExtractor Data(MemBuffer->getBuffer(), DataIsLittleEndian, 4); + if (auto ExpectedHdr = Header::decode(Data)) + Swap->Hdr = ExpectedHdr.get(); + else + return ExpectedHdr.takeError(); + Hdr = &Swap->Hdr; + } + + // Detect errors in the header and report any that are found. If we make it + // past this without errors, we know we have a good magic value, a supported + // version number, verified address offset size and a valid UUID size. + if (Error Err = Hdr->checkForError()) + return Err; + + if (!Swap) { + // This is the native endianness case that is most common and optimized for + // efficient lookups. Here we just grab pointers to the native data and + // use ArrayRef objects to allow efficient read only access. + + // Read the address offsets. + if (FileData.padToAlignment(Hdr->AddrOffSize) || + FileData.readArray(AddrOffsets, + Hdr->NumAddresses * Hdr->AddrOffSize)) + return createStringError(std::errc::invalid_argument, + "failed to read address table"); + + // Read the address info offsets. + if (FileData.padToAlignment(4) || + FileData.readArray(AddrInfoOffsets, Hdr->NumAddresses)) + return createStringError(std::errc::invalid_argument, + "failed to read address info offsets table"); + + // Read the file table. + uint32_t NumFiles = 0; + if (FileData.readInteger(NumFiles) || FileData.readArray(Files, NumFiles)) + return createStringError(std::errc::invalid_argument, + "failed to read file table"); + + // Get the string table. + FileData.setOffset(Hdr->StrtabOffset); + if (FileData.readFixedString(StrTab.Data, Hdr->StrtabSize)) + return createStringError(std::errc::invalid_argument, + "failed to read string table"); +} else { + // This is the non native endianness case that is not common and not + // optimized for lookups. Here we decode the important tables into local + // storage and then set the ArrayRef objects to point to these swapped + // copies of the read only data so lookups can be as efficient as possible. + DataExtractor Data(MemBuffer->getBuffer(), DataIsLittleEndian, 4); + + // Read the address offsets. + uint64_t Offset = alignTo(sizeof(Header), Hdr->AddrOffSize); + Swap->AddrOffsets.resize(Hdr->NumAddresses * Hdr->AddrOffSize); + switch (Hdr->AddrOffSize) { + case 1: + if (!Data.getU8(&Offset, Swap->AddrOffsets.data(), Hdr->NumAddresses)) + return createStringError(std::errc::invalid_argument, + "failed to read address table"); + break; + case 2: + if (!Data.getU16(&Offset, + reinterpret_cast(Swap->AddrOffsets.data()), + Hdr->NumAddresses)) + return createStringError(std::errc::invalid_argument, + "failed to read address table"); + break; + case 4: + if (!Data.getU32(&Offset, + reinterpret_cast(Swap->AddrOffsets.data()), + Hdr->NumAddresses)) + return createStringError(std::errc::invalid_argument, + "failed to read address table"); + break; + case 8: + if (!Data.getU64(&Offset, + reinterpret_cast(Swap->AddrOffsets.data()), + Hdr->NumAddresses)) + return createStringError(std::errc::invalid_argument, + "failed to read address table"); + } + AddrOffsets = ArrayRef(Swap->AddrOffsets); + + // Read the address info offsets. + Offset = alignTo(Offset, 4); + Swap->AddrInfoOffsets.resize(Hdr->NumAddresses); + if (Data.getU32(&Offset, Swap->AddrInfoOffsets.data(), Hdr->NumAddresses)) + AddrInfoOffsets = ArrayRef(Swap->AddrInfoOffsets); + else + return createStringError(std::errc::invalid_argument, + "failed to read address table"); + // Read the file table. + const uint32_t NumFiles = Data.getU32(&Offset); + if (NumFiles > 0) { + Swap->Files.resize(NumFiles); + if (Data.getU32(&Offset, &Swap->Files[0].Dir, NumFiles*2)) + Files = ArrayRef(Swap->Files); + else + return createStringError(std::errc::invalid_argument, + "failed to read file table"); + } + // Get the string table. + StrTab.Data = MemBuffer->getBuffer().substr(Hdr->StrtabOffset, + Hdr->StrtabSize); + if (StrTab.Data.empty()) + return createStringError(std::errc::invalid_argument, + "failed to read string table"); + } + return Error::success(); + +} + +const Header &GsymReader::getHeader() const { + // The only way to get a GsymReader is from GsymReader::openFile(...) or + // GsymReader::copyBuffer() and the header must be valid and initialized to + // a valid pointer value, so the assert below should not trigger. + assert(Hdr); + return *Hdr; +} + +Optional GsymReader::getAddress(size_t Index) const { + switch (Hdr->AddrOffSize) { + case 1: return addressForIndex(Index); + case 2: return addressForIndex(Index); + case 4: return addressForIndex(Index); + case 8: return addressForIndex(Index); + } + return llvm::None; +} + +Optional GsymReader::getAddressInfoOffset(size_t Index) const { + const auto NumAddrInfoOffsets = AddrInfoOffsets.size(); + if (Index < NumAddrInfoOffsets) + return AddrInfoOffsets[Index]; + return llvm::None; +} + +Expected +GsymReader::getAddressIndex(const uint64_t Addr) const { + if (Addr < Hdr->BaseAddress) + return createStringError(std::errc::invalid_argument, + "address 0x%" PRIx64 " not in GSYM", Addr); + const uint64_t AddrOffset = Addr - Hdr->BaseAddress; + switch (Hdr->AddrOffSize) { + case 1: return getAddressOffsetIndex(AddrOffset); + case 2: return getAddressOffsetIndex(AddrOffset); + case 4: return getAddressOffsetIndex(AddrOffset); + case 8: return getAddressOffsetIndex(AddrOffset); + default: break; + } + return createStringError(std::errc::invalid_argument, + "unsupported address offset size %u", + Hdr->AddrOffSize); +} + +llvm::Expected GsymReader::getFunctionInfo(uint64_t Addr) const { + Expected AddressIndex = getAddressIndex(Addr); + if (!AddressIndex) + return AddressIndex.takeError(); + // Address info offsets size should have been checked in parse(). + assert(*AddressIndex < AddrInfoOffsets.size()); + auto AddrInfoOffset = AddrInfoOffsets[*AddressIndex]; + DataExtractor Data(MemBuffer->getBuffer().substr(AddrInfoOffset), Endian, 4); + if (Optional OptAddr = getAddress(*AddressIndex)) { + auto ExpectedFI = FunctionInfo::decode(Data, *OptAddr); + if (ExpectedFI) { + if (ExpectedFI->Range.contains(Addr) || ExpectedFI->Range.size() == 0) + return ExpectedFI; + return createStringError(std::errc::invalid_argument, + "address 0x%" PRIx64 " not in GSYM", Addr); + } + } + return createStringError(std::errc::invalid_argument, + "failed to extract address[%" PRIu64 "]", + *AddressIndex); +} diff --git a/llvm/lib/DebugInfo/GSYM/Header.cpp b/llvm/lib/DebugInfo/GSYM/Header.cpp index fbf991682e7f0..0b3fb9c498949 100644 --- a/llvm/lib/DebugInfo/GSYM/Header.cpp +++ b/llvm/lib/DebugInfo/GSYM/Header.cpp @@ -38,14 +38,14 @@ raw_ostream &llvm::gsym::operator<<(raw_ostream &OS, const Header &H) { } /// Check the header and detect any errors. -static llvm::Error getHeaderError(const Header &H) { - if (H.Magic != GSYM_MAGIC) +llvm::Error Header::checkForError() const { + if (Magic != GSYM_MAGIC) return createStringError(std::errc::invalid_argument, - "invalid GSYM magic 0x%8.8x", H.Magic); - if (H.Version != GSYM_VERSION) + "invalid GSYM magic 0x%8.8x", Magic); + if (Version != GSYM_VERSION) return createStringError(std::errc::invalid_argument, - "unsupported GSYM version %u", H.Version); - switch (H.AddrOffSize) { + "unsupported GSYM version %u", Version); + switch (AddrOffSize) { case 1: break; case 2: break; case 4: break; @@ -53,11 +53,11 @@ static llvm::Error getHeaderError(const Header &H) { default: return createStringError(std::errc::invalid_argument, "invalid address offset size %u", - H.AddrOffSize); + AddrOffSize); } - if (H.UUIDSize > GSYM_MAX_UUID_SIZE) + if (UUIDSize > GSYM_MAX_UUID_SIZE) return createStringError(std::errc::invalid_argument, - "invalid UUID size %u", H.UUIDSize); + "invalid UUID size %u", UUIDSize); return Error::success(); } @@ -77,16 +77,14 @@ llvm::Expected
Header::decode(DataExtractor &Data) { H.StrtabOffset = Data.getU32(&Offset); H.StrtabSize = Data.getU32(&Offset); Data.getU8(&Offset, H.UUID, GSYM_MAX_UUID_SIZE); - llvm::Error Err = getHeaderError(H); - if (Err) + if (llvm::Error Err = H.checkForError()) return std::move(Err); return H; } llvm::Error Header::encode(FileWriter &O) const { // Users must verify the Header is valid prior to calling this funtion. - llvm::Error Err = getHeaderError(*this); - if (Err) + if (llvm::Error Err = checkForError()) return Err; O.writeU32(Magic); O.writeU16(Version); diff --git a/llvm/lib/DebugInfo/GSYM/LLVMBuild.txt b/llvm/lib/DebugInfo/GSYM/LLVMBuild.txt index ff324253994d9..d3cf7653abf41 100644 --- a/llvm/lib/DebugInfo/GSYM/LLVMBuild.txt +++ b/llvm/lib/DebugInfo/GSYM/LLVMBuild.txt @@ -18,4 +18,4 @@ type = Library name = DebugInfoGSYM parent = DebugInfo -required_libraries = Support +required_libraries = MC Support diff --git a/llvm/lib/DebugInfo/PDB/GenericError.cpp b/llvm/lib/DebugInfo/PDB/GenericError.cpp index 70dc094c42ecf..0e4cba3174b26 100644 --- a/llvm/lib/DebugInfo/PDB/GenericError.cpp +++ b/llvm/lib/DebugInfo/PDB/GenericError.cpp @@ -34,8 +34,8 @@ class PDBErrorCategory : public std::error_category { return "The PDB file path is an invalid UTF8 sequence."; case pdb_error_code::signature_out_of_date: return "The signature does not match; the file(s) might be out of date."; - case pdb_error_code::external_cmdline_ref: - return "The path to this file must be provided on the command-line."; + case pdb_error_code::no_matching_pch: + return "No matching precompiled header could be located."; } llvm_unreachable("Unrecognized generic_error_code"); } diff --git a/llvm/lib/ExecutionEngine/JITLink/CMakeLists.txt b/llvm/lib/ExecutionEngine/JITLink/CMakeLists.txt index ad3427fdfe316..af4efadd6ba23 100644 --- a/llvm/lib/ExecutionEngine/JITLink/CMakeLists.txt +++ b/llvm/lib/ExecutionEngine/JITLink/CMakeLists.txt @@ -4,6 +4,7 @@ add_llvm_library(LLVMJITLink JITLinkMemoryManager.cpp EHFrameSupport.cpp MachO.cpp + MachO_arm64.cpp MachO_x86_64.cpp MachOLinkGraphBuilder.cpp diff --git a/llvm/lib/ExecutionEngine/JITLink/MachO.cpp b/llvm/lib/ExecutionEngine/JITLink/MachO.cpp index 15995b8ce98fc..58bc0f56e1555 100644 --- a/llvm/lib/ExecutionEngine/JITLink/MachO.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/MachO.cpp @@ -14,6 +14,7 @@ #include "llvm/ExecutionEngine/JITLink/MachO.h" #include "llvm/BinaryFormat/MachO.h" +#include "llvm/ExecutionEngine/JITLink/MachO_arm64.h" #include "llvm/ExecutionEngine/JITLink/MachO_x86_64.h" #include "llvm/Support/Endian.h" #include "llvm/Support/Format.h" @@ -64,6 +65,8 @@ void jitLink_MachO(std::unique_ptr Ctx) { }); switch (Header.cputype) { + case MachO::CPU_TYPE_ARM64: + return jitLink_MachO_arm64(std::move(Ctx)); case MachO::CPU_TYPE_X86_64: return jitLink_MachO_x86_64(std::move(Ctx)); } diff --git a/llvm/lib/ExecutionEngine/JITLink/MachO_arm64.cpp b/llvm/lib/ExecutionEngine/JITLink/MachO_arm64.cpp new file mode 100644 index 0000000000000..945343bff89d2 --- /dev/null +++ b/llvm/lib/ExecutionEngine/JITLink/MachO_arm64.cpp @@ -0,0 +1,736 @@ +//===---- MachO_arm64.cpp - JIT linker implementation for MachO/arm64 -----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// MachO/arm64 jit-link implementation. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ExecutionEngine/JITLink/MachO_arm64.h" + +#include "BasicGOTAndStubsBuilder.h" +#include "MachOLinkGraphBuilder.h" + +#define DEBUG_TYPE "jitlink" + +using namespace llvm; +using namespace llvm::jitlink; +using namespace llvm::jitlink::MachO_arm64_Edges; + +namespace { + +class MachOLinkGraphBuilder_arm64 : public MachOLinkGraphBuilder { +public: + MachOLinkGraphBuilder_arm64(const object::MachOObjectFile &Obj) + : MachOLinkGraphBuilder(Obj), + NumSymbols(Obj.getSymtabLoadCommand().nsyms) { + addCustomSectionParser( + "__eh_frame", [this](NormalizedSection &EHFrameSection) { + if (!EHFrameSection.Data) + return make_error( + "__eh_frame section is marked zero-fill"); + return MachOEHFrameBinaryParser( + *this, EHFrameSection.Address, + StringRef(EHFrameSection.Data, EHFrameSection.Size), + *EHFrameSection.GraphSection, 8, 4, NegDelta32, Delta64) + .addToGraph(); + }); + } + +private: + static Expected + getRelocationKind(const MachO::relocation_info &RI) { + switch (RI.r_type) { + case MachO::ARM64_RELOC_UNSIGNED: + if (!RI.r_pcrel) { + if (RI.r_length == 3) + return RI.r_extern ? Pointer64 : Pointer64Anon; + else if (RI.r_length == 2) + return Pointer32; + } + break; + case MachO::ARM64_RELOC_SUBTRACTOR: + // SUBTRACTOR must be non-pc-rel, extern, with length 2 or 3. + // Initially represent SUBTRACTOR relocations with 'Delta'. + // They may be turned into NegDelta by parsePairRelocation. + if (!RI.r_pcrel && RI.r_extern) { + if (RI.r_length == 2) + return Delta32; + else if (RI.r_length == 3) + return Delta64; + } + break; + case MachO::ARM64_RELOC_BRANCH26: + if (RI.r_pcrel && RI.r_extern && RI.r_length == 2) + return Branch26; + break; + case MachO::ARM64_RELOC_PAGE21: + if (RI.r_pcrel && RI.r_extern && RI.r_length == 2) + return Page21; + break; + case MachO::ARM64_RELOC_PAGEOFF12: + if (!RI.r_pcrel && RI.r_extern && RI.r_length == 2) + return PageOffset12; + break; + case MachO::ARM64_RELOC_GOT_LOAD_PAGE21: + if (RI.r_pcrel && RI.r_extern && RI.r_length == 2) + return GOTPage21; + break; + case MachO::ARM64_RELOC_GOT_LOAD_PAGEOFF12: + if (!RI.r_pcrel && RI.r_extern && RI.r_length == 2) + return GOTPageOffset12; + break; + case MachO::ARM64_RELOC_POINTER_TO_GOT: + if (RI.r_pcrel && RI.r_extern && RI.r_length == 2) + return PointerToGOT; + break; + case MachO::ARM64_RELOC_ADDEND: + if (!RI.r_pcrel && !RI.r_extern && RI.r_length == 2) + return PairedAddend; + break; + } + + return make_error( + "Unsupported arm64 relocation: address=" + + formatv("{0:x8}", RI.r_address) + + ", symbolnum=" + formatv("{0:x6}", RI.r_symbolnum) + + ", kind=" + formatv("{0:x1}", RI.r_type) + + ", pc_rel=" + (RI.r_pcrel ? "true" : "false") + + ", extern=" + (RI.r_extern ? "true" : "false") + + ", length=" + formatv("{0:d}", RI.r_length)); + } + + MachO::relocation_info + getRelocationInfo(const object::relocation_iterator RelItr) { + MachO::any_relocation_info ARI = + getObject().getRelocation(RelItr->getRawDataRefImpl()); + MachO::relocation_info RI; + memcpy(&RI, &ARI, sizeof(MachO::relocation_info)); + return RI; + } + + using PairRelocInfo = + std::tuple; + + // Parses paired SUBTRACTOR/UNSIGNED relocations and, on success, + // returns the edge kind and addend to be used. + Expected + parsePairRelocation(Block &BlockToFix, Edge::Kind SubtractorKind, + const MachO::relocation_info &SubRI, + JITTargetAddress FixupAddress, const char *FixupContent, + object::relocation_iterator &UnsignedRelItr, + object::relocation_iterator &RelEnd) { + using namespace support; + + assert(((SubtractorKind == Delta32 && SubRI.r_length == 2) || + (SubtractorKind == Delta64 && SubRI.r_length == 3)) && + "Subtractor kind should match length"); + assert(SubRI.r_extern && "SUBTRACTOR reloc symbol should be extern"); + assert(!SubRI.r_pcrel && "SUBTRACTOR reloc should not be PCRel"); + + if (UnsignedRelItr == RelEnd) + return make_error("arm64 SUBTRACTOR without paired " + "UNSIGNED relocation"); + + auto UnsignedRI = getRelocationInfo(UnsignedRelItr); + + if (SubRI.r_address != UnsignedRI.r_address) + return make_error("arm64 SUBTRACTOR and paired UNSIGNED " + "point to different addresses"); + + if (SubRI.r_length != UnsignedRI.r_length) + return make_error("length of arm64 SUBTRACTOR and paired " + "UNSIGNED reloc must match"); + + Symbol *FromSymbol; + if (auto FromSymbolOrErr = findSymbolByIndex(SubRI.r_symbolnum)) + FromSymbol = FromSymbolOrErr->GraphSymbol; + else + return FromSymbolOrErr.takeError(); + + // Read the current fixup value. + uint64_t FixupValue = 0; + if (SubRI.r_length == 3) + FixupValue = *(const little64_t *)FixupContent; + else + FixupValue = *(const little32_t *)FixupContent; + + // Find 'ToSymbol' using symbol number or address, depending on whether the + // paired UNSIGNED relocation is extern. + Symbol *ToSymbol = nullptr; + if (UnsignedRI.r_extern) { + // Find target symbol by symbol index. + if (auto ToSymbolOrErr = findSymbolByIndex(UnsignedRI.r_symbolnum)) + ToSymbol = ToSymbolOrErr->GraphSymbol; + else + return ToSymbolOrErr.takeError(); + } else { + if (auto ToSymbolOrErr = findSymbolByAddress(FixupValue)) + ToSymbol = &*ToSymbolOrErr; + else + return ToSymbolOrErr.takeError(); + FixupValue -= ToSymbol->getAddress(); + } + + MachOARM64RelocationKind DeltaKind; + Symbol *TargetSymbol; + uint64_t Addend; + if (&BlockToFix == &FromSymbol->getAddressable()) { + TargetSymbol = ToSymbol; + DeltaKind = (SubRI.r_length == 3) ? Delta64 : Delta32; + Addend = FixupValue + (FixupAddress - FromSymbol->getAddress()); + // FIXME: handle extern 'from'. + } else if (&BlockToFix == &ToSymbol->getAddressable()) { + TargetSymbol = &*FromSymbol; + DeltaKind = (SubRI.r_length == 3) ? NegDelta64 : NegDelta32; + Addend = FixupValue - (FixupAddress - ToSymbol->getAddress()); + } else { + // BlockToFix was neither FromSymbol nor ToSymbol. + return make_error("SUBTRACTOR relocation must fix up " + "either 'A' or 'B' (or a symbol in one " + "of their alt-entry groups)"); + } + + return PairRelocInfo(DeltaKind, TargetSymbol, Addend); + } + + Error addRelocations() override { + using namespace support; + auto &Obj = getObject(); + + for (auto &S : Obj.sections()) { + + JITTargetAddress SectionAddress = S.getAddress(); + + for (auto RelItr = S.relocation_begin(), RelEnd = S.relocation_end(); + RelItr != RelEnd; ++RelItr) { + + MachO::relocation_info RI = getRelocationInfo(RelItr); + + // Sanity check the relocation kind. + auto Kind = getRelocationKind(RI); + if (!Kind) + return Kind.takeError(); + + // Find the address of the value to fix up. + JITTargetAddress FixupAddress = SectionAddress + (uint32_t)RI.r_address; + + LLVM_DEBUG({ + dbgs() << "Processing " << getMachOARM64RelocationKindName(*Kind) + << " relocation at " << format("0x%016" PRIx64, FixupAddress) + << "\n"; + }); + + // Find the block that the fixup points to. + Block *BlockToFix = nullptr; + { + auto SymbolToFixOrErr = findSymbolByAddress(FixupAddress); + if (!SymbolToFixOrErr) + return SymbolToFixOrErr.takeError(); + BlockToFix = &SymbolToFixOrErr->getBlock(); + } + + if (FixupAddress + static_cast(1ULL << RI.r_length) > + BlockToFix->getAddress() + BlockToFix->getContent().size()) + return make_error( + "Relocation content extends past end of fixup block"); + + // Get a pointer to the fixup content. + const char *FixupContent = BlockToFix->getContent().data() + + (FixupAddress - BlockToFix->getAddress()); + + // The target symbol and addend will be populated by the switch below. + Symbol *TargetSymbol = nullptr; + uint64_t Addend = 0; + + if (*Kind == PairedAddend) { + // If this is an Addend relocation then process it and move to the + // paired reloc. + + Addend = RI.r_symbolnum; + + if (RelItr == RelEnd) + return make_error("Unpaired Addend reloc at " + + formatv("{0:x16}", FixupAddress)); + ++RelItr; + RI = getRelocationInfo(RelItr); + + Kind = getRelocationKind(RI); + if (!Kind) + return Kind.takeError(); + + if (*Kind != Branch26 && *Kind != Page21 && *Kind != PageOffset12) + return make_error( + "Invalid relocation pair: Addend + " + + getMachOARM64RelocationKindName(*Kind)); + else + LLVM_DEBUG({ + dbgs() << " pair is " << getMachOARM64RelocationKindName(*Kind) + << "`\n"; + }); + + // Find the address of the value to fix up. + JITTargetAddress PairedFixupAddress = + SectionAddress + (uint32_t)RI.r_address; + if (PairedFixupAddress != FixupAddress) + return make_error("Paired relocation points at " + "different target"); + } + + switch (*Kind) { + case Branch26: { + if (auto TargetSymbolOrErr = findSymbolByIndex(RI.r_symbolnum)) + TargetSymbol = TargetSymbolOrErr->GraphSymbol; + else + return TargetSymbolOrErr.takeError(); + uint32_t Instr = *(const ulittle32_t *)FixupContent; + if ((Instr & 0x7fffffff) != 0x14000000) + return make_error("BRANCH26 target is not a B or BL " + "instruction with a zero addend"); + break; + } + case Pointer32: + if (auto TargetSymbolOrErr = findSymbolByIndex(RI.r_symbolnum)) + TargetSymbol = TargetSymbolOrErr->GraphSymbol; + else + return TargetSymbolOrErr.takeError(); + Addend = *(const ulittle32_t *)FixupContent; + break; + case Pointer64: + if (auto TargetSymbolOrErr = findSymbolByIndex(RI.r_symbolnum)) + TargetSymbol = TargetSymbolOrErr->GraphSymbol; + else + return TargetSymbolOrErr.takeError(); + Addend = *(const ulittle64_t *)FixupContent; + break; + case Pointer64Anon: { + JITTargetAddress TargetAddress = *(const ulittle64_t *)FixupContent; + if (auto TargetSymbolOrErr = findSymbolByAddress(TargetAddress)) + TargetSymbol = &*TargetSymbolOrErr; + else + return TargetSymbolOrErr.takeError(); + Addend = TargetAddress - TargetSymbol->getAddress(); + break; + } + case Page21: + case GOTPage21: { + if (auto TargetSymbolOrErr = findSymbolByIndex(RI.r_symbolnum)) + TargetSymbol = TargetSymbolOrErr->GraphSymbol; + else + return TargetSymbolOrErr.takeError(); + uint32_t Instr = *(const ulittle32_t *)FixupContent; + if ((Instr & 0xffffffe0) != 0x90000000) + return make_error("PAGE21/GOTPAGE21 target is not an " + "ADRP instruction with a zero " + "addend"); + break; + } + case PageOffset12: { + if (auto TargetSymbolOrErr = findSymbolByIndex(RI.r_symbolnum)) + TargetSymbol = TargetSymbolOrErr->GraphSymbol; + else + return TargetSymbolOrErr.takeError(); + break; + } + case GOTPageOffset12: { + if (auto TargetSymbolOrErr = findSymbolByIndex(RI.r_symbolnum)) + TargetSymbol = TargetSymbolOrErr->GraphSymbol; + else + return TargetSymbolOrErr.takeError(); + uint32_t Instr = *(const ulittle32_t *)FixupContent; + if ((Instr & 0xfffffc00) != 0xf9400000) + return make_error("GOTPAGEOFF12 target is not an LDR " + "immediate instruction with a zero " + "addend"); + break; + } + case PointerToGOT: + if (auto TargetSymbolOrErr = findSymbolByIndex(RI.r_symbolnum)) + TargetSymbol = TargetSymbolOrErr->GraphSymbol; + else + return TargetSymbolOrErr.takeError(); + break; + case Delta32: + case Delta64: { + // We use Delta32/Delta64 to represent SUBTRACTOR relocations. + // parsePairRelocation handles the paired reloc, and returns the + // edge kind to be used (either Delta32/Delta64, or + // NegDelta32/NegDelta64, depending on the direction of the + // subtraction) along with the addend. + auto PairInfo = + parsePairRelocation(*BlockToFix, *Kind, RI, FixupAddress, + FixupContent, ++RelItr, RelEnd); + if (!PairInfo) + return PairInfo.takeError(); + std::tie(*Kind, TargetSymbol, Addend) = *PairInfo; + assert(TargetSymbol && "No target symbol from parsePairRelocation?"); + break; + } + default: + llvm_unreachable("Special relocation kind should not appear in " + "mach-o file"); + } + + LLVM_DEBUG({ + Edge GE(*Kind, FixupAddress - BlockToFix->getAddress(), *TargetSymbol, + Addend); + printEdge(dbgs(), *BlockToFix, GE, + getMachOARM64RelocationKindName(*Kind)); + dbgs() << "\n"; + }); + BlockToFix->addEdge(*Kind, FixupAddress - BlockToFix->getAddress(), + *TargetSymbol, Addend); + } + } + return Error::success(); + } + + unsigned NumSymbols = 0; +}; + +class MachO_arm64_GOTAndStubsBuilder + : public BasicGOTAndStubsBuilder { +public: + MachO_arm64_GOTAndStubsBuilder(LinkGraph &G) + : BasicGOTAndStubsBuilder(G) {} + + bool isGOTEdge(Edge &E) const { + return E.getKind() == GOTPage21 || E.getKind() == GOTPageOffset12 || + E.getKind() == PointerToGOT; + } + + Symbol &createGOTEntry(Symbol &Target) { + auto &GOTEntryBlock = G.createContentBlock( + getGOTSection(), getGOTEntryBlockContent(), 0, 8, 0); + GOTEntryBlock.addEdge(Pointer64, 0, Target, 0); + return G.addAnonymousSymbol(GOTEntryBlock, 0, 8, false, false); + } + + void fixGOTEdge(Edge &E, Symbol &GOTEntry) { + if (E.getKind() == GOTPage21 || E.getKind() == GOTPageOffset12) { + // Update the target, but leave the edge addend as-is. + E.setTarget(GOTEntry); + } else if (E.getKind() == PointerToGOT) { + E.setTarget(GOTEntry); + E.setKind(Delta32); + } else + llvm_unreachable("Not a GOT edge?"); + } + + bool isExternalBranchEdge(Edge &E) { + return E.getKind() == Branch26 && !E.getTarget().isDefined(); + } + + Symbol &createStub(Symbol &Target) { + auto &StubContentBlock = + G.createContentBlock(getStubsSection(), getStubBlockContent(), 0, 1, 0); + // Re-use GOT entries for stub targets. + auto &GOTEntrySymbol = getGOTEntrySymbol(Target); + StubContentBlock.addEdge(LDRLiteral19, 0, GOTEntrySymbol, 0); + return G.addAnonymousSymbol(StubContentBlock, 0, 8, true, false); + } + + void fixExternalBranchEdge(Edge &E, Symbol &Stub) { + assert(E.getKind() == Branch26 && "Not a Branch32 edge?"); + assert(E.getAddend() == 0 && "Branch32 edge has non-zero addend?"); + E.setTarget(Stub); + } + +private: + Section &getGOTSection() { + if (!GOTSection) + GOTSection = &G.createSection("$__GOT", sys::Memory::MF_READ); + return *GOTSection; + } + + Section &getStubsSection() { + if (!StubsSection) { + auto StubsProt = static_cast( + sys::Memory::MF_READ | sys::Memory::MF_EXEC); + StubsSection = &G.createSection("$__STUBS", StubsProt); + } + return *StubsSection; + } + + StringRef getGOTEntryBlockContent() { + return StringRef(reinterpret_cast(NullGOTEntryContent), + sizeof(NullGOTEntryContent)); + } + + StringRef getStubBlockContent() { + return StringRef(reinterpret_cast(StubContent), + sizeof(StubContent)); + } + + static const uint8_t NullGOTEntryContent[8]; + static const uint8_t StubContent[8]; + Section *GOTSection = nullptr; + Section *StubsSection = nullptr; +}; + +const uint8_t MachO_arm64_GOTAndStubsBuilder::NullGOTEntryContent[8] = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}; +const uint8_t MachO_arm64_GOTAndStubsBuilder::StubContent[8] = { + 0x10, 0x00, 0x00, 0x58, // LDR x16, + 0x00, 0x02, 0x1f, 0xd6 // BR x16 +}; + +} // namespace + +namespace llvm { +namespace jitlink { + +class MachOJITLinker_arm64 : public JITLinker { + friend class JITLinker; + +public: + MachOJITLinker_arm64(std::unique_ptr Ctx, + PassConfiguration PassConfig) + : JITLinker(std::move(Ctx), std::move(PassConfig)) {} + +private: + StringRef getEdgeKindName(Edge::Kind R) const override { + return getMachOARM64RelocationKindName(R); + } + + Expected> + buildGraph(MemoryBufferRef ObjBuffer) override { + auto MachOObj = object::ObjectFile::createMachOObjectFile(ObjBuffer); + if (!MachOObj) + return MachOObj.takeError(); + return MachOLinkGraphBuilder_arm64(**MachOObj).buildGraph(); + } + + static Error targetOutOfRangeError(const Block &B, const Edge &E) { + std::string ErrMsg; + { + raw_string_ostream ErrStream(ErrMsg); + ErrStream << "Relocation target out of range: "; + printEdge(ErrStream, B, E, getMachOARM64RelocationKindName(E.getKind())); + ErrStream << "\n"; + } + return make_error(std::move(ErrMsg)); + } + + static unsigned getPageOffset12Shift(uint32_t Instr) { + constexpr uint32_t LDRLiteralMask = 0x3ffffc00; + + // Check for a GPR LDR immediate with a zero embedded literal. + // If found, the top two bits contain the shift. + if ((Instr & LDRLiteralMask) == 0x39400000) + return Instr >> 30; + + // Check for a Neon LDR immediate of size 64-bit or less with a zero + // embedded literal. If found, the top two bits contain the shift. + if ((Instr & LDRLiteralMask) == 0x3d400000) + return Instr >> 30; + + // Check for a Neon LDR immediate of size 128-bit with a zero embedded + // literal. + constexpr uint32_t SizeBitsMask = 0xc0000000; + if ((Instr & (LDRLiteralMask | SizeBitsMask)) == 0x3dc00000) + return 4; + + return 0; + } + + Error applyFixup(Block &B, const Edge &E, char *BlockWorkingMem) const { + using namespace support; + + char *FixupPtr = BlockWorkingMem + E.getOffset(); + JITTargetAddress FixupAddress = B.getAddress() + E.getOffset(); + + switch (E.getKind()) { + case Branch26: { + assert((FixupAddress & 0x3) == 0 && "Branch-inst is not 32-bit aligned"); + + int64_t Value = E.getTarget().getAddress() - FixupAddress + E.getAddend(); + + if (static_cast(Value) & 0x3) + return make_error("Branch26 target is not 32-bit " + "aligned"); + + if (Value < -(1 << 27) || Value > ((1 << 27) - 1)) + return targetOutOfRangeError(B, E); + + uint32_t RawInstr = *(little32_t *)FixupPtr; + assert((RawInstr & 0x7fffffff) == 0x14000000 && + "RawInstr isn't a B or BR immediate instruction"); + uint32_t Imm = (static_cast(Value) & ((1 << 28) - 1)) >> 2; + uint32_t FixedInstr = RawInstr | Imm; + *(little32_t *)FixupPtr = FixedInstr; + break; + } + case Pointer32: { + uint64_t Value = E.getTarget().getAddress() + E.getAddend(); + if (Value > std::numeric_limits::max()) + return targetOutOfRangeError(B, E); + *(ulittle32_t *)FixupPtr = Value; + break; + } + case Pointer64: { + uint64_t Value = E.getTarget().getAddress() + E.getAddend(); + *(ulittle64_t *)FixupPtr = Value; + break; + } + case Page21: + case GOTPage21: { + assert(E.getAddend() == 0 && "PAGE21/GOTPAGE21 with non-zero addend"); + uint64_t TargetPage = + E.getTarget().getAddress() & ~static_cast(4096 - 1); + uint64_t PCPage = B.getAddress() & ~static_cast(4096 - 1); + + int64_t PageDelta = TargetPage - PCPage; + if (PageDelta < -(1 << 30) || PageDelta > ((1 << 30) - 1)) + return targetOutOfRangeError(B, E); + + uint32_t RawInstr = *(ulittle32_t *)FixupPtr; + assert((RawInstr & 0xffffffe0) == 0x90000000 && + "RawInstr isn't an ADRP instruction"); + uint32_t ImmLo = (static_cast(PageDelta) >> 12) & 0x3; + uint32_t ImmHi = (static_cast(PageDelta) >> 14) & 0x7ffff; + uint32_t FixedInstr = RawInstr | (ImmLo << 29) | (ImmHi << 5); + *(ulittle32_t *)FixupPtr = FixedInstr; + break; + } + case PageOffset12: { + assert(E.getAddend() == 0 && "PAGEOFF12 with non-zero addend"); + uint64_t TargetOffset = E.getTarget().getAddress() & 0xfff; + + uint32_t RawInstr = *(ulittle32_t *)FixupPtr; + unsigned ImmShift = getPageOffset12Shift(RawInstr); + + if (TargetOffset & ((1 << ImmShift) - 1)) + return make_error("PAGEOFF12 target is not aligned"); + + uint32_t EncodedImm = (TargetOffset >> ImmShift) << 10; + uint32_t FixedInstr = RawInstr | EncodedImm; + *(ulittle32_t *)FixupPtr = FixedInstr; + break; + } + case GOTPageOffset12: { + assert(E.getAddend() == 0 && "GOTPAGEOF12 with non-zero addend"); + + uint32_t RawInstr = *(ulittle32_t *)FixupPtr; + assert((RawInstr & 0xfffffc00) == 0xf9400000 && + "RawInstr isn't a 64-bit LDR immediate"); + + uint32_t TargetOffset = E.getTarget().getAddress() & 0xfff; + assert((TargetOffset & 0x7) == 0 && "GOT entry is not 8-byte aligned"); + uint32_t EncodedImm = (TargetOffset >> 3) << 10; + uint32_t FixedInstr = RawInstr | EncodedImm; + *(ulittle32_t *)FixupPtr = FixedInstr; + break; + } + case LDRLiteral19: { + assert((FixupAddress & 0x3) == 0 && "LDR is not 32-bit aligned"); + assert(E.getAddend() == 0 && "LDRLiteral19 with non-zero addend"); + uint32_t RawInstr = *(ulittle32_t *)FixupPtr; + assert(RawInstr == 0x58000010 && "RawInstr isn't a 64-bit LDR literal"); + int64_t Delta = E.getTarget().getAddress() - FixupAddress; + if (Delta & 0x3) + return make_error("LDR literal target is not 32-bit " + "aligned"); + if (Delta < -(1 << 20) || Delta > ((1 << 20) - 1)) + return targetOutOfRangeError(B, E); + + uint32_t EncodedImm = (static_cast(Delta) >> 2) << 5; + uint32_t FixedInstr = RawInstr | EncodedImm; + *(ulittle32_t *)FixupPtr = FixedInstr; + break; + } + case Delta32: + case Delta64: + case NegDelta32: + case NegDelta64: { + int64_t Value; + if (E.getKind() == Delta32 || E.getKind() == Delta64) + Value = E.getTarget().getAddress() - FixupAddress + E.getAddend(); + else + Value = FixupAddress - E.getTarget().getAddress() + E.getAddend(); + + if (E.getKind() == Delta32 || E.getKind() == NegDelta32) { + if (Value < std::numeric_limits::min() || + Value > std::numeric_limits::max()) + return targetOutOfRangeError(B, E); + *(little32_t *)FixupPtr = Value; + } else + *(little64_t *)FixupPtr = Value; + break; + } + default: + llvm_unreachable("Unrecognized edge kind"); + } + + return Error::success(); + } + + uint64_t NullValue = 0; +}; + +void jitLink_MachO_arm64(std::unique_ptr Ctx) { + PassConfiguration Config; + Triple TT("arm64-apple-ios"); + + if (Ctx->shouldAddDefaultTargetPasses(TT)) { + // Add a mark-live pass. + if (auto MarkLive = Ctx->getMarkLivePass(TT)) + Config.PrePrunePasses.push_back(std::move(MarkLive)); + else + Config.PrePrunePasses.push_back(markAllSymbolsLive); + + // Add an in-place GOT/Stubs pass. + Config.PostPrunePasses.push_back([](LinkGraph &G) -> Error { + MachO_arm64_GOTAndStubsBuilder(G).run(); + return Error::success(); + }); + } + + if (auto Err = Ctx->modifyPassConfig(TT, Config)) + return Ctx->notifyFailed(std::move(Err)); + + // Construct a JITLinker and run the link function. + MachOJITLinker_arm64::link(std::move(Ctx), std::move(Config)); +} + +StringRef getMachOARM64RelocationKindName(Edge::Kind R) { + switch (R) { + case Branch26: + return "Branch26"; + case Pointer64: + return "Pointer64"; + case Pointer64Anon: + return "Pointer64Anon"; + case Page21: + return "Page21"; + case PageOffset12: + return "PageOffset12"; + case GOTPage21: + return "GOTPage21"; + case GOTPageOffset12: + return "GOTPageOffset12"; + case PointerToGOT: + return "PointerToGOT"; + case PairedAddend: + return "PairedAddend"; + case LDRLiteral19: + return "LDRLiteral19"; + case Delta32: + return "Delta32"; + case Delta64: + return "Delta64"; + case NegDelta32: + return "NegDelta32"; + case NegDelta64: + return "NegDelta64"; + default: + return getGenericEdgeKindName(static_cast(R)); + } +} + +} // end namespace jitlink +} // end namespace llvm diff --git a/llvm/lib/IR/Metadata.cpp b/llvm/lib/IR/Metadata.cpp index 748a2238e6420..d426d8e2fa9ca 100644 --- a/llvm/lib/IR/Metadata.cpp +++ b/llvm/lib/IR/Metadata.cpp @@ -1497,6 +1497,24 @@ void GlobalObject::addTypeMetadata(unsigned Offset, Metadata *TypeID) { TypeID})); } +void GlobalObject::addVCallVisibilityMetadata(VCallVisibility Visibility) { + addMetadata(LLVMContext::MD_vcall_visibility, + *MDNode::get(getContext(), + {ConstantAsMetadata::get(ConstantInt::get( + Type::getInt64Ty(getContext()), Visibility))})); +} + +GlobalObject::VCallVisibility GlobalObject::getVCallVisibility() const { + if (MDNode *MD = getMetadata(LLVMContext::MD_vcall_visibility)) { + uint64_t Val = cast( + cast(MD->getOperand(0))->getValue()) + ->getZExtValue(); + assert((Val >= 0 && Val <= 2) && "unknown vcall visibility!"); + return (VCallVisibility)Val; + } + return VCallVisibility::VCallVisibilityPublic; +} + void Function::setSubprogram(DISubprogram *SP) { setMetadata(LLVMContext::MD_dbg, SP); } diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp index 6c5858b942a3f..c04fc7bacaded 100644 --- a/llvm/lib/LTO/LTO.cpp +++ b/llvm/lib/LTO/LTO.cpp @@ -1003,6 +1003,8 @@ Error LTO::runRegularLTO(AddStreamFn AddStream) { GV->setLinkage(GlobalValue::InternalLinkage); } + RegularLTO.CombinedModule->addModuleFlag(Module::Error, "LTOPostLink", 1); + if (Conf.PostInternalizeModuleHook && !Conf.PostInternalizeModuleHook(0, *RegularLTO.CombinedModule)) return Error::success(); diff --git a/llvm/lib/LTO/LTOCodeGenerator.cpp b/llvm/lib/LTO/LTOCodeGenerator.cpp index 199c6994992e2..8821928928672 100644 --- a/llvm/lib/LTO/LTOCodeGenerator.cpp +++ b/llvm/lib/LTO/LTOCodeGenerator.cpp @@ -151,7 +151,7 @@ void LTOCodeGenerator::initializeLTOPasses() { void LTOCodeGenerator::setAsmUndefinedRefs(LTOModule *Mod) { const std::vector &undefs = Mod->getAsmUndefinedRefs(); for (int i = 0, e = undefs.size(); i != e; ++i) - AsmUndefinedRefs[undefs[i]] = 1; + AsmUndefinedRefs.insert(undefs[i]); } bool LTOCodeGenerator::addModule(LTOModule *Mod) { @@ -463,6 +463,8 @@ void LTOCodeGenerator::applyScopeRestrictions() { internalizeModule(*MergedModule, mustPreserveGV); + MergedModule->addModuleFlag(Module::Error, "LTOPostLink", 1); + ScopeRestrictionsDone = true; } diff --git a/llvm/lib/ObjectYAML/MinidumpEmitter.cpp b/llvm/lib/ObjectYAML/MinidumpEmitter.cpp index 31a839e524c30..9029be80ad715 100644 --- a/llvm/lib/ObjectYAML/MinidumpEmitter.cpp +++ b/llvm/lib/ObjectYAML/MinidumpEmitter.cpp @@ -158,6 +158,14 @@ static Directory layout(BlobAllocator &File, Stream &S) { Result.Location.RVA = File.tell(); Optional DataEnd; switch (S.Kind) { + case Stream::StreamKind::MemoryInfoList: { + MemoryInfoListStream &InfoList = cast(S); + File.allocateNewObject( + sizeof(minidump::MemoryInfoListHeader), sizeof(minidump::MemoryInfo), + InfoList.Infos.size()); + File.allocateArray(makeArrayRef(InfoList.Infos)); + break; + } case Stream::StreamKind::MemoryList: DataEnd = layout(File, cast(S)); break; diff --git a/llvm/lib/ObjectYAML/MinidumpYAML.cpp b/llvm/lib/ObjectYAML/MinidumpYAML.cpp index acc36a95a293c..b9d1ded181661 100644 --- a/llvm/lib/ObjectYAML/MinidumpYAML.cpp +++ b/llvm/lib/ObjectYAML/MinidumpYAML.cpp @@ -69,6 +69,8 @@ Stream::~Stream() = default; Stream::StreamKind Stream::getKind(StreamType Type) { switch (Type) { + case StreamType::MemoryInfoList: + return StreamKind::MemoryInfoList; case StreamType::MemoryList: return StreamKind::MemoryList; case StreamType::ModuleList: @@ -93,6 +95,8 @@ Stream::StreamKind Stream::getKind(StreamType Type) { std::unique_ptr Stream::create(StreamType Type) { StreamKind Kind = getKind(Type); switch (Kind) { + case StreamKind::MemoryInfoList: + return std::make_unique(); case StreamKind::MemoryList: return std::make_unique(); case StreamKind::ModuleList: @@ -109,6 +113,25 @@ std::unique_ptr Stream::create(StreamType Type) { llvm_unreachable("Unhandled stream kind!"); } +void yaml::ScalarBitSetTraits::bitset( + IO &IO, MemoryProtection &Protect) { +#define HANDLE_MDMP_PROTECT(CODE, NAME, NATIVENAME) \ + IO.bitSetCase(Protect, #NATIVENAME, MemoryProtection::NAME); +#include "llvm/BinaryFormat/MinidumpConstants.def" +} + +void yaml::ScalarBitSetTraits::bitset(IO &IO, MemoryState &State) { +#define HANDLE_MDMP_MEMSTATE(CODE, NAME, NATIVENAME) \ + IO.bitSetCase(State, #NATIVENAME, MemoryState::NAME); +#include "llvm/BinaryFormat/MinidumpConstants.def" +} + +void yaml::ScalarBitSetTraits::bitset(IO &IO, MemoryType &Type) { +#define HANDLE_MDMP_MEMTYPE(CODE, NAME, NATIVENAME) \ + IO.bitSetCase(Type, #NATIVENAME, MemoryType::NAME); +#include "llvm/BinaryFormat/MinidumpConstants.def" +} + void yaml::ScalarEnumerationTraits::enumeration( IO &IO, ProcessorArchitecture &Arch) { #define HANDLE_MDMP_ARCH(CODE, NAME) \ @@ -215,6 +238,20 @@ void yaml::MappingTraits::mapping(IO &IO, mapOptionalHex(IO, "AMD Extended Features", Info.AMDExtendedFeatures, 0); } +void yaml::MappingTraits::mapping(IO &IO, MemoryInfo &Info) { + mapRequiredHex(IO, "Base Address", Info.BaseAddress); + mapOptionalHex(IO, "Allocation Base", Info.AllocationBase, Info.BaseAddress); + mapRequiredAs(IO, "Allocation Protect", + Info.AllocationProtect); + mapOptionalHex(IO, "Reserved0", Info.Reserved0, 0); + mapRequiredHex(IO, "Region Size", Info.RegionSize); + mapRequiredAs(IO, "State", Info.State); + mapOptionalAs(IO, "Protect", Info.Protect, + Info.AllocationProtect); + mapRequiredAs(IO, "Type", Info.Type); + mapOptionalHex(IO, "Reserved1", Info.Reserved1, 0); +} + void yaml::MappingTraits::mapping(IO &IO, VSFixedFileInfo &Info) { mapOptionalHex(IO, "Signature", Info.Signature, 0); @@ -264,6 +301,10 @@ void yaml::MappingTraits::mapping( IO, Range.Entry, Range.Content); } +static void streamMapping(yaml::IO &IO, MemoryInfoListStream &Stream) { + IO.mapRequired("Memory Ranges", Stream.Infos); +} + static void streamMapping(yaml::IO &IO, MemoryListStream &Stream) { IO.mapRequired("Memory Ranges", Stream.Entries); } @@ -336,6 +377,9 @@ void yaml::MappingTraits>::mapping( if (!IO.outputting()) S = MinidumpYAML::Stream::create(Type); switch (S->Kind) { + case MinidumpYAML::Stream::StreamKind::MemoryInfoList: + streamMapping(IO, llvm::cast(*S)); + break; case MinidumpYAML::Stream::StreamKind::MemoryList: streamMapping(IO, llvm::cast(*S)); break; @@ -362,6 +406,7 @@ StringRef yaml::MappingTraits>::validate( switch (S->Kind) { case MinidumpYAML::Stream::StreamKind::RawContent: return streamValidate(cast(*S)); + case MinidumpYAML::Stream::StreamKind::MemoryInfoList: case MinidumpYAML::Stream::StreamKind::MemoryList: case MinidumpYAML::Stream::StreamKind::ModuleList: case MinidumpYAML::Stream::StreamKind::SystemInfo: @@ -384,6 +429,12 @@ Expected> Stream::create(const Directory &StreamDesc, const object::MinidumpFile &File) { StreamKind Kind = getKind(StreamDesc.Type); switch (Kind) { + case StreamKind::MemoryInfoList: { + if (auto ExpectedList = File.getMemoryInfoList()) + return std::make_unique(*ExpectedList); + else + return ExpectedList.takeError(); + } case StreamKind::MemoryList: { auto ExpectedList = File.getMemoryList(); if (!ExpectedList) diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index 4cd05ee11f64d..eb350cb665f5d 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -86,6 +86,8 @@ MODULE_PASS("synthetic-counts-propagation", SyntheticCountsPropagation()) MODULE_PASS("wholeprogramdevirt", WholeProgramDevirtPass(nullptr, nullptr)) MODULE_PASS("verify", VerifierPass()) MODULE_PASS("asan-module", ModuleAddressSanitizerPass(/*CompileKernel=*/false, false, true, false)) +MODULE_PASS("msan-module", MemorySanitizerPass({})) +MODULE_PASS("tsan-module", ThreadSanitizerPass()) MODULE_PASS("kasan-module", ModuleAddressSanitizerPass(/*CompileKernel=*/true, false, true, false)) MODULE_PASS("sancov-module", ModuleSanitizerCoveragePass()) MODULE_PASS("poison-checking", PoisonCheckingPass()) diff --git a/llvm/lib/ProfileData/SampleProfReader.cpp b/llvm/lib/ProfileData/SampleProfReader.cpp index 6d00404b0bb22..cf3e56728e235 100644 --- a/llvm/lib/ProfileData/SampleProfReader.cpp +++ b/llvm/lib/ProfileData/SampleProfReader.cpp @@ -439,7 +439,9 @@ SampleProfileReaderBinary::readProfile(FunctionSamples &FProfile) { return sampleprof_error::success; } -std::error_code SampleProfileReaderBinary::readFuncProfile() { +std::error_code +SampleProfileReaderBinary::readFuncProfile(const uint8_t *Start) { + Data = Start; auto NumHeadSamples = readNumber(); if (std::error_code EC = NumHeadSamples.getError()) return EC; @@ -461,7 +463,7 @@ std::error_code SampleProfileReaderBinary::readFuncProfile() { std::error_code SampleProfileReaderBinary::read() { while (!at_eof()) { - if (std::error_code EC = readFuncProfile()) + if (std::error_code EC = readFuncProfile(Data)) return EC; } @@ -483,13 +485,15 @@ SampleProfileReaderExtBinary::readOneSection(const uint8_t *Start, return EC; break; case SecLBRProfile: - while (Data < Start + Size) { - if (std::error_code EC = readFuncProfile()) - return EC; - } + if (std::error_code EC = readFuncProfiles()) + return EC; break; case SecProfileSymbolList: - if (std::error_code EC = readProfileSymbolList(Size)) + if (std::error_code EC = readProfileSymbolList()) + return EC; + break; + case SecFuncOffsetTable: + if (std::error_code EC = readFuncOffsetTable()) return EC; break; default: @@ -498,15 +502,65 @@ SampleProfileReaderExtBinary::readOneSection(const uint8_t *Start, return sampleprof_error::success; } -std::error_code -SampleProfileReaderExtBinary::readProfileSymbolList(uint64_t Size) { +void SampleProfileReaderExtBinary::collectFuncsFrom(const Module &M) { + UseAllFuncs = false; + FuncsToUse.clear(); + for (auto &F : M) + FuncsToUse.insert(FunctionSamples::getCanonicalFnName(F)); +} + +std::error_code SampleProfileReaderExtBinary::readFuncOffsetTable() { + auto Size = readNumber(); + if (std::error_code EC = Size.getError()) + return EC; + + FuncOffsetTable.reserve(*Size); + for (uint32_t I = 0; I < *Size; ++I) { + auto FName(readStringFromTable()); + if (std::error_code EC = FName.getError()) + return EC; + + auto Offset = readNumber(); + if (std::error_code EC = Offset.getError()) + return EC; + + FuncOffsetTable[*FName] = *Offset; + } + return sampleprof_error::success; +} + +std::error_code SampleProfileReaderExtBinary::readFuncProfiles() { + const uint8_t *Start = Data; + if (UseAllFuncs) { + while (Data < End) { + if (std::error_code EC = readFuncProfile(Data)) + return EC; + } + assert(Data == End && "More data is read than expected"); + return sampleprof_error::success; + } + + for (auto Name : FuncsToUse) { + auto iter = FuncOffsetTable.find(Name); + if (iter == FuncOffsetTable.end()) + continue; + const uint8_t *FuncProfileAddr = Start + iter->second; + assert(FuncProfileAddr < End && "out of LBRProfile section"); + if (std::error_code EC = readFuncProfile(FuncProfileAddr)) + return EC; + } + Data = End; + return sampleprof_error::success; +} + +std::error_code SampleProfileReaderExtBinary::readProfileSymbolList() { if (!ProfSymList) ProfSymList = std::make_unique(); - if (std::error_code EC = ProfSymList->read(Data, Size)) + if (std::error_code EC = ProfSymList->read(Data, End - Data)) return EC; - Data = Data + Size; + Data = End; return sampleprof_error::success; } @@ -600,9 +654,9 @@ std::error_code SampleProfileReaderCompactBinary::read() { for (auto Offset : OffsetsToUse) { const uint8_t *SavedData = Data; - Data = reinterpret_cast(Buffer->getBufferStart()) + - Offset; - if (std::error_code EC = readFuncProfile()) + if (std::error_code EC = readFuncProfile( + reinterpret_cast(Buffer->getBufferStart()) + + Offset)) return EC; Data = SavedData; } @@ -719,8 +773,16 @@ uint64_t SampleProfileReaderExtBinaryBase::getSectionSize(SecType Type) { } uint64_t SampleProfileReaderExtBinaryBase::getFileSize() { - auto &LastEntry = SecHdrTable.back(); - return LastEntry.Offset + LastEntry.Size; + // Sections in SecHdrTable is not necessarily in the same order as + // sections in the profile because section like FuncOffsetTable needs + // to be written after section LBRProfile but needs to be read before + // section LBRProfile, so we cannot simply use the last entry in + // SecHdrTable to calculate the file size. + uint64_t FileSize = 0; + for (auto &Entry : SecHdrTable) { + FileSize = std::max(Entry.Offset + Entry.Size, FileSize); + } + return FileSize; } bool SampleProfileReaderExtBinaryBase::dumpSectionInfo(raw_ostream &OS) { @@ -812,13 +874,11 @@ std::error_code SampleProfileReaderCompactBinary::readFuncOffsetTable() { return sampleprof_error::success; } -void SampleProfileReaderCompactBinary::collectFuncsToUse(const Module &M) { +void SampleProfileReaderCompactBinary::collectFuncsFrom(const Module &M) { UseAllFuncs = false; FuncsToUse.clear(); - for (auto &F : M) { - StringRef CanonName = FunctionSamples::getCanonicalFnName(F); - FuncsToUse.insert(CanonName); - } + for (auto &F : M) + FuncsToUse.insert(FunctionSamples::getCanonicalFnName(F)); } std::error_code SampleProfileReaderBinary::readSummaryEntry( diff --git a/llvm/lib/ProfileData/SampleProfWriter.cpp b/llvm/lib/ProfileData/SampleProfWriter.cpp index 03446367665bd..8d09af31f94bf 100644 --- a/llvm/lib/ProfileData/SampleProfWriter.cpp +++ b/llvm/lib/ProfileData/SampleProfWriter.cpp @@ -76,7 +76,7 @@ SampleProfileWriter::write(const StringMap &ProfileMap) { SecHdrTableEntry & SampleProfileWriterExtBinaryBase::getEntryInLayout(SecType Type) { auto SecIt = std::find_if( - SectionLayout.begin(), SectionLayout.end(), + SectionHdrLayout.begin(), SectionHdrLayout.end(), [=](const auto &Entry) -> bool { return Entry.Type == Type; }); return *SecIt; } @@ -143,6 +143,29 @@ std::error_code SampleProfileWriterExtBinaryBase::write( return sampleprof_error::success; } +std::error_code +SampleProfileWriterExtBinary::writeSample(const FunctionSamples &S) { + uint64_t Offset = OutputStream->tell(); + StringRef Name = S.getName(); + FuncOffsetTable[Name] = Offset - SecLBRProfileStart; + encodeULEB128(S.getHeadSamples(), *OutputStream); + return writeBody(S); +} + +std::error_code SampleProfileWriterExtBinary::writeFuncOffsetTable() { + auto &OS = *OutputStream; + + // Write out the table size. + encodeULEB128(FuncOffsetTable.size(), OS); + + // Write out FuncOffsetTable. + for (auto entry : FuncOffsetTable) { + writeNameIdx(entry.first); + encodeULEB128(entry.second, OS); + } + return sampleprof_error::success; +} + std::error_code SampleProfileWriterExtBinary::writeSections( const StringMap &ProfileMap) { uint64_t SectionStart = markSectionStart(SecProfSummary); @@ -163,6 +186,7 @@ std::error_code SampleProfileWriterExtBinary::writeSections( return EC; SectionStart = markSectionStart(SecLBRProfile); + SecLBRProfileStart = OutputStream->tell(); if (std::error_code EC = writeFuncProfiles(ProfileMap)) return EC; if (std::error_code EC = addNewSection(SecLBRProfile, SectionStart)) @@ -178,6 +202,12 @@ std::error_code SampleProfileWriterExtBinary::writeSections( if (std::error_code EC = addNewSection(SecProfileSymbolList, SectionStart)) return EC; + SectionStart = markSectionStart(SecFuncOffsetTable); + if (std::error_code EC = writeFuncOffsetTable()) + return EC; + if (std::error_code EC = addNewSection(SecFuncOffsetTable, SectionStart)) + return EC; + return sampleprof_error::success; } @@ -359,7 +389,7 @@ std::error_code SampleProfileWriterBinary::writeHeader( } void SampleProfileWriterExtBinaryBase::setToCompressAllSections() { - for (auto &Entry : SectionLayout) + for (auto &Entry : SectionHdrLayout) addSecFlags(Entry, SecFlagCompress); } @@ -369,7 +399,7 @@ void SampleProfileWriterExtBinaryBase::setToCompressSection(SecType Type) { void SampleProfileWriterExtBinaryBase::addSectionFlags(SecType Type, SecFlags Flags) { - for (auto &Entry : SectionLayout) { + for (auto &Entry : SectionHdrLayout) { if (Entry.Type == Type) addSecFlags(Entry, Flags); } @@ -378,9 +408,9 @@ void SampleProfileWriterExtBinaryBase::addSectionFlags(SecType Type, void SampleProfileWriterExtBinaryBase::allocSecHdrTable() { support::endian::Writer Writer(*OutputStream, support::little); - Writer.write(static_cast(SectionLayout.size())); + Writer.write(static_cast(SectionHdrLayout.size())); SecHdrTableOffset = OutputStream->tell(); - for (uint32_t i = 0; i < SectionLayout.size(); i++) { + for (uint32_t i = 0; i < SectionHdrLayout.size(); i++) { Writer.write(static_cast(-1)); Writer.write(static_cast(-1)); Writer.write(static_cast(-1)); @@ -402,14 +432,15 @@ std::error_code SampleProfileWriterExtBinaryBase::writeSecHdrTable() { IndexMap.insert({static_cast(SecHdrTable[i].Type), i}); } - // Write the sections in the order specified in SectionLayout. - // That is the sections order Reader will see. Note that the - // sections order in which Reader expects to read may be different - // from the order in which Writer is able to write, so we need - // to adjust the order in SecHdrTable to be consistent with - // SectionLayout when we write SecHdrTable to the memory. - for (uint32_t i = 0; i < SectionLayout.size(); i++) { - uint32_t idx = IndexMap[static_cast(SectionLayout[i].Type)]; + // Write the section header table in the order specified in + // SectionHdrLayout. That is the sections order Reader will see. + // Note that the sections order in which Reader expects to read + // may be different from the order in which Writer is able to + // write, so we need to adjust the order in SecHdrTable to be + // consistent with SectionHdrLayout when we write SecHdrTable + // to the memory. + for (uint32_t i = 0; i < SectionHdrLayout.size(); i++) { + uint32_t idx = IndexMap[static_cast(SectionHdrLayout[i].Type)]; Writer.write(static_cast(SecHdrTable[idx].Type)); Writer.write(static_cast(SecHdrTable[idx].Flags)); Writer.write(static_cast(SecHdrTable[idx].Offset)); diff --git a/llvm/lib/Support/FileCheck.cpp b/llvm/lib/Support/FileCheck.cpp index c3f537b35243e..841e406a7b694 100644 --- a/llvm/lib/Support/FileCheck.cpp +++ b/llvm/lib/Support/FileCheck.cpp @@ -320,6 +320,7 @@ bool FileCheckPattern::parsePattern(StringRef PatternStr, StringRef Prefix, SourceMgr &SM, const FileCheckRequest &Req) { bool MatchFullLinesHere = Req.MatchFullLines && CheckTy != Check::CheckNot; + IgnoreCase = Req.IgnoreCase; PatternLoc = SMLoc::getFromPointer(PatternStr.data()); @@ -619,7 +620,8 @@ Expected FileCheckPattern::match(StringRef Buffer, size_t &MatchLen, // If this is a fixed string pattern, just match it now. if (!FixedStr.empty()) { MatchLen = FixedStr.size(); - size_t Pos = Buffer.find(FixedStr); + size_t Pos = IgnoreCase ? Buffer.find_lower(FixedStr) + : Buffer.find(FixedStr); if (Pos == StringRef::npos) return make_error(); return Pos; @@ -657,7 +659,10 @@ Expected FileCheckPattern::match(StringRef Buffer, size_t &MatchLen, } SmallVector MatchInfo; - if (!Regex(RegExToMatch, Regex::Newline).match(Buffer, &MatchInfo)) + unsigned int Flags = Regex::Newline; + if (IgnoreCase) + Flags |= Regex::IgnoreCase; + if (!Regex(RegExToMatch, Flags).match(Buffer, &MatchInfo)) return make_error(); // Successful regex match. diff --git a/llvm/lib/Support/FileCheckImpl.h b/llvm/lib/Support/FileCheckImpl.h index 001b3589d5fdc..06ce8301cec4b 100644 --- a/llvm/lib/Support/FileCheckImpl.h +++ b/llvm/lib/Support/FileCheckImpl.h @@ -428,6 +428,9 @@ class FileCheckPattern { /// line to the one with this CHECK. Optional LineNumber; + /// Ignore case while matching if set to true. + bool IgnoreCase = false; + public: FileCheckPattern(Check::FileCheckType Ty, FileCheckPatternContext *Context, Optional Line = None) diff --git a/llvm/lib/Support/Parallel.cpp b/llvm/lib/Support/Parallel.cpp index 621bccbf2a4c9..355c64b7d0793 100644 --- a/llvm/lib/Support/Parallel.cpp +++ b/llvm/lib/Support/Parallel.cpp @@ -32,34 +32,6 @@ class Executor { static Executor *getDefaultExecutor(); }; -#if defined(_MSC_VER) -/// An Executor that runs tasks via ConcRT. -class ConcRTExecutor : public Executor { - struct Taskish { - Taskish(std::function Task) : Task(Task) {} - - std::function Task; - - static void run(void *P) { - Taskish *Self = static_cast(P); - Self->Task(); - concurrency::Free(Self); - } - }; - -public: - virtual void add(std::function F) { - Concurrency::CurrentScheduler::ScheduleTask( - Taskish::run, new (concurrency::Alloc(sizeof(Taskish))) Taskish(F)); - } -}; - -Executor *Executor::getDefaultExecutor() { - static ConcRTExecutor exec; - return &exec; -} - -#else /// An implementation of an Executor that runs closures on a thread pool /// in filo order. class ThreadPoolExecutor : public Executor { @@ -117,8 +89,7 @@ Executor *Executor::getDefaultExecutor() { static ThreadPoolExecutor exec; return &exec; } -#endif -} +} // namespace static std::atomic TaskGroupInstances; diff --git a/llvm/lib/Support/Statistic.cpp b/llvm/lib/Support/Statistic.cpp index e4f0535d21aa6..8b4177c7fba67 100644 --- a/llvm/lib/Support/Statistic.cpp +++ b/llvm/lib/Support/Statistic.cpp @@ -57,7 +57,7 @@ namespace { /// This class is also used to look up statistic values from applications that /// use LLVM. class StatisticInfo { - std::vector Stats; + std::vector Stats; friend void llvm::PrintStatistics(); friend void llvm::PrintStatistics(raw_ostream &OS); @@ -66,14 +66,12 @@ class StatisticInfo { /// Sort statistics by debugtype,name,description. void sort(); public: - using const_iterator = std::vector::const_iterator; + using const_iterator = std::vector::const_iterator; StatisticInfo(); ~StatisticInfo(); - void addStatistic(Statistic *S) { - Stats.push_back(S); - } + void addStatistic(TrackingStatistic *S) { Stats.push_back(S); } const_iterator begin() const { return Stats.begin(); } const_iterator end() const { return Stats.end(); } @@ -90,7 +88,7 @@ static ManagedStatic > StatLock; /// RegisterStatistic - The first time a statistic is bumped, this method is /// called. -void Statistic::RegisterStatistic() { +void TrackingStatistic::RegisterStatistic() { // If stats are enabled, inform StatInfo that this statistic should be // printed. // llvm_shutdown calls destructors while holding the ManagedStatic mutex. @@ -135,15 +133,16 @@ bool llvm::AreStatisticsEnabled() { } void StatisticInfo::sort() { - llvm::stable_sort(Stats, [](const Statistic *LHS, const Statistic *RHS) { - if (int Cmp = std::strcmp(LHS->getDebugType(), RHS->getDebugType())) - return Cmp < 0; + llvm::stable_sort( + Stats, [](const TrackingStatistic *LHS, const TrackingStatistic *RHS) { + if (int Cmp = std::strcmp(LHS->getDebugType(), RHS->getDebugType())) + return Cmp < 0; - if (int Cmp = std::strcmp(LHS->getName(), RHS->getName())) - return Cmp < 0; + if (int Cmp = std::strcmp(LHS->getName(), RHS->getName())) + return Cmp < 0; - return std::strcmp(LHS->getDesc(), RHS->getDesc()) < 0; - }); + return std::strcmp(LHS->getDesc(), RHS->getDesc()) < 0; + }); } void StatisticInfo::reset() { @@ -207,7 +206,7 @@ void llvm::PrintStatisticsJSON(raw_ostream &OS) { // Print all of the statistics. OS << "{\n"; const char *delim = ""; - for (const Statistic *Stat : Stats.Stats) { + for (const TrackingStatistic *Stat : Stats.Stats) { OS << delim; assert(yaml::needsQuotes(Stat->getDebugType()) == yaml::QuotingType::None && "Statistic group/type name is simple."); diff --git a/llvm/lib/Support/StringExtras.cpp b/llvm/lib/Support/StringExtras.cpp index bf28b2be56578..af8dd463e125d 100644 --- a/llvm/lib/Support/StringExtras.cpp +++ b/llvm/lib/Support/StringExtras.cpp @@ -60,7 +60,9 @@ void llvm::SplitString(StringRef Source, void llvm::printEscapedString(StringRef Name, raw_ostream &Out) { for (unsigned i = 0, e = Name.size(); i != e; ++i) { unsigned char C = Name[i]; - if (isPrint(C) && C != '\\' && C != '"') + if (C == '\\') + Out << '\\' << C; + else if (isPrint(C) && C != '"') Out << C; else Out << '\\' << hexdigit(C >> 4) << hexdigit(C & 0x0F); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index c74a361b2c712..631a2366062fe 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -209,15 +209,14 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel { bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset, SDValue &Offset) const; + template + bool SelectFlatOffset(SDNode *N, SDValue Addr, SDValue &VAddr, + SDValue &Offset, SDValue &SLC) const; bool SelectFlatAtomic(SDNode *N, SDValue Addr, SDValue &VAddr, SDValue &Offset, SDValue &SLC) const; bool SelectFlatAtomicSigned(SDNode *N, SDValue Addr, SDValue &VAddr, SDValue &Offset, SDValue &SLC) const; - template - bool SelectFlatOffset(SDNode *N, SDValue Addr, SDValue &VAddr, - SDValue &Offset, SDValue &SLC) const; - bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset, bool &Imm) const; SDValue Expand32BitAddress(SDValue Addr) const; @@ -643,7 +642,7 @@ static unsigned selectSGPRVectorRegClassID(unsigned NumVectorElts) { case 3: return AMDGPU::SGPR_96RegClassID; case 4: - return AMDGPU::SReg_128RegClassID; + return AMDGPU::SGPR_128RegClassID; case 5: return AMDGPU::SGPR_160RegClassID; case 8: @@ -783,7 +782,7 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) { SDValue RC, SubReg0, SubReg1; SDLoc DL(N); if (N->getValueType(0) == MVT::i128) { - RC = CurDAG->getTargetConstant(AMDGPU::SReg_128RegClassID, DL, MVT::i32); + RC = CurDAG->getTargetConstant(AMDGPU::SGPR_128RegClassID, DL, MVT::i32); SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32); SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32); } else if (N->getValueType(0) == MVT::i64) { @@ -1606,14 +1605,48 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE, DLC, SWZ); } +// Find a load or store from corresponding pattern root. +// Roots may be build_vector, bitconvert or their combinations. +static MemSDNode* findMemSDNode(SDNode *N) { + N = AMDGPUTargetLowering::stripBitcast(SDValue(N,0)).getNode(); + if (MemSDNode *MN = dyn_cast(N)) + return MN; + assert(isa(N)); + for (SDValue V : N->op_values()) + if (MemSDNode *MN = + dyn_cast(AMDGPUTargetLowering::stripBitcast(V))) + return MN; + llvm_unreachable("cannot find MemSDNode in the pattern!"); +} + template bool AMDGPUDAGToDAGISel::SelectFlatOffset(SDNode *N, SDValue Addr, SDValue &VAddr, SDValue &Offset, SDValue &SLC) const { - return static_cast(getTargetLowering())-> - SelectFlatOffset(IsSigned, *CurDAG, N, Addr, VAddr, Offset, SLC); + int64_t OffsetVal = 0; + + if (Subtarget->hasFlatInstOffsets() && + (!Subtarget->hasFlatSegmentOffsetBug() || + findMemSDNode(N)->getAddressSpace() != AMDGPUAS::FLAT_ADDRESS) && + CurDAG->isBaseWithConstantOffset(Addr)) { + SDValue N0 = Addr.getOperand(0); + SDValue N1 = Addr.getOperand(1); + int64_t COffsetVal = cast(N1)->getSExtValue(); + + const SIInstrInfo *TII = Subtarget->getInstrInfo(); + if (TII->isLegalFLATOffset(COffsetVal, findMemSDNode(N)->getAddressSpace(), + IsSigned)) { + Addr = N0; + OffsetVal = COffsetVal; + } + } + + VAddr = Addr; + Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i16); + SLC = CurDAG->getTargetConstant(0, SDLoc(), MVT::i1); + return true; } bool AMDGPUDAGToDAGISel::SelectFlatAtomic(SDNode *N, @@ -1625,10 +1658,10 @@ bool AMDGPUDAGToDAGISel::SelectFlatAtomic(SDNode *N, } bool AMDGPUDAGToDAGISel::SelectFlatAtomicSigned(SDNode *N, - SDValue Addr, - SDValue &VAddr, - SDValue &Offset, - SDValue &SLC) const { + SDValue Addr, + SDValue &VAddr, + SDValue &Offset, + SDValue &SLC) const { return SelectFlatOffset(N, Addr, VAddr, Offset, SLC); } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 3027b21fb0537..c299e38b36371 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -2828,54 +2828,6 @@ bool AMDGPUTargetLowering::shouldCombineMemoryType(EVT VT) const { return true; } -// Find a load or store from corresponding pattern root. -// Roots may be build_vector, bitconvert or their combinations. -static MemSDNode* findMemSDNode(SDNode *N) { - N = AMDGPUTargetLowering::stripBitcast(SDValue(N,0)).getNode(); - if (MemSDNode *MN = dyn_cast(N)) - return MN; - assert(isa(N)); - for (SDValue V : N->op_values()) - if (MemSDNode *MN = - dyn_cast(AMDGPUTargetLowering::stripBitcast(V))) - return MN; - llvm_unreachable("cannot find MemSDNode in the pattern!"); -} - -bool AMDGPUTargetLowering::SelectFlatOffset(bool IsSigned, - SelectionDAG &DAG, - SDNode *N, - SDValue Addr, - SDValue &VAddr, - SDValue &Offset, - SDValue &SLC) const { - const GCNSubtarget &ST = - DAG.getMachineFunction().getSubtarget(); - int64_t OffsetVal = 0; - - if (ST.hasFlatInstOffsets() && - (!ST.hasFlatSegmentOffsetBug() || - findMemSDNode(N)->getAddressSpace() != AMDGPUAS::FLAT_ADDRESS) && - DAG.isBaseWithConstantOffset(Addr)) { - SDValue N0 = Addr.getOperand(0); - SDValue N1 = Addr.getOperand(1); - int64_t COffsetVal = cast(N1)->getSExtValue(); - - const SIInstrInfo *TII = ST.getInstrInfo(); - if (TII->isLegalFLATOffset(COffsetVal, findMemSDNode(N)->getAddressSpace(), - IsSigned)) { - Addr = N0; - OffsetVal = COffsetVal; - } - } - - VAddr = Addr; - Offset = DAG.getTargetConstant(OffsetVal, SDLoc(), MVT::i16); - SLC = DAG.getTargetConstant(0, SDLoc(), MVT::i1); - - return true; -} - // Replace load of an illegal type with a store of a bitcast to a friendlier // type. SDValue AMDGPUTargetLowering::performLoadCombine(SDNode *N, diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h index 2626acad23a3f..f9379f2e74a6d 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -326,10 +326,6 @@ class AMDGPUTargetLowering : public TargetLowering { } AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *) const override; - - bool SelectFlatOffset(bool IsSigned, SelectionDAG &DAG, SDNode *N, - SDValue Addr, SDValue &VAddr, SDValue &Offset, - SDValue &SLC) const; }; namespace AMDGPUISD { diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index c1c111a762d47..7fe0298f1c33c 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -64,6 +64,14 @@ static LegalityPredicate isSmallOddVector(unsigned TypeIdx) { }; } +static LegalityPredicate isWideVec16(unsigned TypeIdx) { + return [=](const LegalityQuery &Query) { + const LLT Ty = Query.Types[TypeIdx]; + const LLT EltTy = Ty.getScalarType(); + return EltTy.getSizeInBits() == 16 && Ty.getNumElements() > 2; + }; +} + static LegalizeMutation oneMoreElement(unsigned TypeIdx) { return [=](const LegalityQuery &Query) { const LLT Ty = Query.Types[TypeIdx]; @@ -945,7 +953,8 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, .legalForCartesianProduct(AllS32Vectors, {S32}) .legalForCartesianProduct(AllS64Vectors, {S64}) .clampNumElements(0, V16S32, V32S32) - .clampNumElements(0, V2S64, V16S64); + .clampNumElements(0, V2S64, V16S64) + .fewerElementsIf(isWideVec16(0), changeTo(0, V2S16)); if (ST.hasScalarPackInsts()) BuildVector.legalFor({V2S16, S32}); diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index 9446814c8f818..aded210bd84bc 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -323,6 +323,8 @@ AMDGPURegisterBankInfo::getInstrAlternativeMappingsIntrinsicWSideEffects( } } +// FIXME: Returns uniform if there's no source value information. This is +// probably wrong. static bool isInstrUniformNonExtLoadAlign4(const MachineInstr &MI) { if (!MI.hasOneMemOperand()) return false; @@ -1047,8 +1049,13 @@ bool AMDGPURegisterBankInfo::applyMappingWideLoad(MachineInstr &MI, SmallVector SrcRegs(OpdMapper.getVRegs(1)); // If the pointer is an SGPR, we have nothing to do. - if (SrcRegs.empty()) - return false; + if (SrcRegs.empty()) { + Register PtrReg = MI.getOperand(1).getReg(); + const RegisterBank *PtrBank = getRegBank(PtrReg, MRI, *TRI); + if (PtrBank == &AMDGPU::SGPRRegBank) + return false; + SrcRegs.push_back(PtrReg); + } assert(LoadSize % MaxNonSmrdLoadSize == 0); @@ -2025,7 +2032,7 @@ AMDGPURegisterBankInfo::getInstrMappingForLoad(const MachineInstr &MI) const { const MachineFunction &MF = *MI.getParent()->getParent(); const MachineRegisterInfo &MRI = MF.getRegInfo(); - SmallVector OpdsMapping(MI.getNumOperands()); + SmallVector OpdsMapping(2); unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI); LLT LoadTy = MRI.getType(MI.getOperand(0).getReg()); Register PtrReg = MI.getOperand(1).getReg(); @@ -2036,7 +2043,10 @@ AMDGPURegisterBankInfo::getInstrMappingForLoad(const MachineInstr &MI) const { const ValueMapping *ValMapping; const ValueMapping *PtrMapping; - if ((AS != AMDGPUAS::LOCAL_ADDRESS && AS != AMDGPUAS::REGION_ADDRESS && + const RegisterBank *PtrBank = getRegBank(PtrReg, MRI, *TRI); + + if (PtrBank == &AMDGPU::SGPRRegBank && + (AS != AMDGPUAS::LOCAL_ADDRESS && AS != AMDGPUAS::REGION_ADDRESS && AS != AMDGPUAS::PRIVATE_ADDRESS) && isInstrUniformNonExtLoadAlign4(MI)) { // We have a uniform instruction so we want to use an SMRD load diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 03815208ae47e..e8cf77161a14d 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -885,9 +885,6 @@ bool GCNPassConfig::addInstSelector() { addPass(createSILowerI1CopiesPass()); addPass(createSIFixupVectorISelPass()); addPass(createSIAddIMGInitPass()); - // FIXME: Remove this once the phi on CF_END is cleaned up by either removing - // LCSSA or other ways. - addPass(&UnreachableMachineBlockElimID); return false; } @@ -1052,7 +1049,7 @@ bool GCNTargetMachine::parseMachineFunctionInfo( return true; if (MFI->ScratchRSrcReg != AMDGPU::PRIVATE_RSRC_REG && - !AMDGPU::SReg_128RegClass.contains(MFI->ScratchRSrcReg)) { + !AMDGPU::SGPR_128RegClass.contains(MFI->ScratchRSrcReg)) { return diagnoseRegisterClass(YamlMFI.ScratchRSrcReg); } @@ -1101,7 +1098,7 @@ bool GCNTargetMachine::parseMachineFunctionInfo( if (YamlMFI.ArgInfo && (parseAndCheckArgument(YamlMFI.ArgInfo->PrivateSegmentBuffer, - AMDGPU::SReg_128RegClass, + AMDGPU::SGPR_128RegClass, MFI->ArgInfo.PrivateSegmentBuffer, 4, 0) || parseAndCheckArgument(YamlMFI.ArgInfo->DispatchPtr, AMDGPU::SReg_64RegClass, MFI->ArgInfo.DispatchPtr, diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h index 56a46bcdaa44e..c7cd2f2c7bfd0 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h @@ -46,10 +46,18 @@ class AMDGPUTTIImpl final : public BasicTTIImplBase { Triple TargetTriple; + const TargetSubtargetInfo *ST; + const TargetLoweringBase *TLI; + + const TargetSubtargetInfo *getST() const { return ST; } + const TargetLoweringBase *getTLI() const { return TLI; } + public: explicit AMDGPUTTIImpl(const AMDGPUTargetMachine *TM, const Function &F) - : BaseT(TM, F.getParent()->getDataLayout()), - TargetTriple(TM->getTargetTriple()) {} + : BaseT(TM, F.getParent()->getDataLayout()), + TargetTriple(TM->getTargetTriple()), + ST(static_cast(TM->getSubtargetImpl(F))), + TLI(ST->getTargetLowering()) {} void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP); diff --git a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp index e1845e2e8e879..f788ea6826ea3 100644 --- a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp +++ b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp @@ -178,7 +178,9 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI, if (OldIdx != -1) { assert(OldIdx == NumOperands); assert(isOfRegClass(CombOldVGPR, AMDGPU::VGPR_32RegClass, *MRI)); - DPPInst.addReg(CombOldVGPR.Reg, 0, CombOldVGPR.SubReg); + auto *Def = getVRegSubRegDef(CombOldVGPR, *MRI); + DPPInst.addReg(CombOldVGPR.Reg, Def ? 0 : RegState::Undef, + CombOldVGPR.SubReg); ++NumOperands; } else { // TODO: this discards MAC/FMA instructions for now, let's add it later @@ -195,6 +197,10 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI, assert(0LL == (Mod0->getImm() & ~(SISrcMods::ABS | SISrcMods::NEG))); DPPInst.addImm(Mod0->getImm()); ++NumOperands; + } else if (AMDGPU::getNamedOperandIdx(DPPOp, + AMDGPU::OpName::src0_modifiers) != -1) { + DPPInst.addImm(0); + ++NumOperands; } auto *Src0 = TII->getNamedOperand(MovMI, AMDGPU::OpName::src0); assert(Src0); @@ -214,6 +220,10 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI, assert(0LL == (Mod1->getImm() & ~(SISrcMods::ABS | SISrcMods::NEG))); DPPInst.addImm(Mod1->getImm()); ++NumOperands; + } else if (AMDGPU::getNamedOperandIdx(DPPOp, + AMDGPU::OpName::src1_modifiers) != -1) { + DPPInst.addImm(0); + ++NumOperands; } if (auto *Src1 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1)) { if (!TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, Src1)) { diff --git a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp index dad9fedff25fb..93df7f3e49b96 100644 --- a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp +++ b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp @@ -606,14 +606,14 @@ static bool hoistAndMergeSGPRInits(unsigned Reg, auto MBB = MI->getParent(); MachineInstr &BoundaryMI = *getFirstNonPrologue(MBB, TII); MachineBasicBlock::reverse_iterator B(BoundaryMI); - // Check if B should actually be a bondary. If not set the previous + // Check if B should actually be a boundary. If not set the previous // instruction as the boundary instead. if (!TII->isBasicBlockPrologue(*B)) B++; auto R = std::next(MI->getReverseIterator()); const unsigned Threshold = 50; - // Search until B or Threashold for a place to insert the initialization. + // Search until B or Threshold for a place to insert the initialization. for (unsigned I = 0; R != B && I < Threshold; ++R, ++I) if (R->readsRegister(Reg, TRI) || R->definesRegister(Reg, TRI) || TII->isSchedulingBoundary(*R, MBB, *MBB->getParent())) diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index 1f8b744155d68..698bcbdb9832c 100644 --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -581,13 +581,17 @@ void SIFoldOperands::foldOperand( if (FoldingImmLike && UseMI->isCopy()) { Register DestReg = UseMI->getOperand(0).getReg(); - const TargetRegisterClass *DestRC = Register::isVirtualRegister(DestReg) - ? MRI->getRegClass(DestReg) - : TRI->getPhysRegClass(DestReg); + + // Don't fold into a copy to a physical register. Doing so would interfere + // with the register coalescer's logic which would avoid redundant + // initalizations. + if (DestReg.isPhysical()) + return; + + const TargetRegisterClass *DestRC = MRI->getRegClass(DestReg); Register SrcReg = UseMI->getOperand(1).getReg(); - if (Register::isVirtualRegister(DestReg) && - Register::isVirtualRegister(SrcReg)) { + if (SrcReg.isVirtual()) { // XXX - This can be an assert? const TargetRegisterClass * SrcRC = MRI->getRegClass(SrcReg); if (TRI->isSGPRClass(SrcRC) && TRI->hasVectorRegisters(DestRC)) { MachineRegisterInfo::use_iterator NextUse; diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 1afbdb96d32dd..ba637d4e5a634 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -125,10 +125,10 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, addRegisterClass(MVT::v3i32, &AMDGPU::SGPR_96RegClass); addRegisterClass(MVT::v3f32, &AMDGPU::VReg_96RegClass); - addRegisterClass(MVT::v2i64, &AMDGPU::SReg_128RegClass); - addRegisterClass(MVT::v2f64, &AMDGPU::SReg_128RegClass); + addRegisterClass(MVT::v2i64, &AMDGPU::SGPR_128RegClass); + addRegisterClass(MVT::v2f64, &AMDGPU::SGPR_128RegClass); - addRegisterClass(MVT::v4i32, &AMDGPU::SReg_128RegClass); + addRegisterClass(MVT::v4i32, &AMDGPU::SGPR_128RegClass); addRegisterClass(MVT::v4f32, &AMDGPU::VReg_128RegClass); addRegisterClass(MVT::v5i32, &AMDGPU::SGPR_160RegClass); @@ -10494,7 +10494,7 @@ MachineSDNode *SITargetLowering::wrapAddr64Rsrc(SelectionDAG &DAG, // Combine the constants and the pointer. const SDValue Ops1[] = { - DAG.getTargetConstant(AMDGPU::SReg_128RegClassID, DL, MVT::i32), + DAG.getTargetConstant(AMDGPU::SGPR_128RegClassID, DL, MVT::i32), Ptr, DAG.getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32), SubRegHi, @@ -10524,7 +10524,7 @@ MachineSDNode *SITargetLowering::buildRSRC(SelectionDAG &DAG, const SDLoc &DL, SDValue DataHi = buildSMovImm32(DAG, DL, RsrcDword2And3 >> 32); const SDValue Ops[] = { - DAG.getTargetConstant(AMDGPU::SReg_128RegClassID, DL, MVT::i32), + DAG.getTargetConstant(AMDGPU::SGPR_128RegClassID, DL, MVT::i32), PtrLo, DAG.getTargetConstant(AMDGPU::sub0, DL, MVT::i32), PtrHi, @@ -10567,7 +10567,7 @@ SITargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, RC = &AMDGPU::SReg_96RegClass; break; case 128: - RC = &AMDGPU::SReg_128RegClass; + RC = &AMDGPU::SGPR_128RegClass; break; case 160: RC = &AMDGPU::SReg_160RegClass; diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 1d486feb32c3b..db33c3ecb2e7a 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -4267,7 +4267,7 @@ emitLoadSRsrcFromVGPRLoop(const SIInstrInfo &TII, MachineRegisterInfo &MRI, Register SRsrcSub1 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); Register SRsrcSub2 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); Register SRsrcSub3 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); - Register SRsrc = MRI.createVirtualRegister(&AMDGPU::SReg_128RegClass); + Register SRsrc = MRI.createVirtualRegister(&AMDGPU::SGPR_128RegClass); // Beginning of the loop, read the next Rsrc variant. BuildMI(LoopBB, I, DL, TII.get(AMDGPU::V_READFIRSTLANE_B32), SRsrcSub0) @@ -4406,7 +4406,7 @@ extractRsrcPtr(const SIInstrInfo &TII, MachineInstr &MI, MachineOperand &Rsrc) { Register Zero64 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); Register SRsrcFormatLo = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); Register SRsrcFormatHi = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); - Register NewSRsrc = MRI.createVirtualRegister(&AMDGPU::SReg_128RegClass); + Register NewSRsrc = MRI.createVirtualRegister(&AMDGPU::SGPR_128RegClass); uint64_t RsrcDataFormat = TII.getDefaultRsrcDataFormat(); // Zero64 = 0 @@ -6178,7 +6178,7 @@ bool SIInstrInfo::isBufferSMRD(const MachineInstr &MI) const { return false; const auto RCID = MI.getDesc().OpInfo[Idx].RegClass; - return RCID == AMDGPU::SReg_128RegClassID; + return RI.getRegClass(RCID)->hasSubClassEq(&AMDGPU::SGPR_128RegClass); } bool SIInstrInfo::isLegalFLATOffset(int64_t Offset, unsigned AddrSpace, diff --git a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp index d0a6d03144d6c..b21ee5c727b62 100644 --- a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp +++ b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp @@ -819,7 +819,7 @@ SILoadStoreOptimizer::mergeRead2Pair(CombineInfo &CI) { unsigned BaseSubReg = AddrReg->getSubReg(); unsigned BaseRegFlags = 0; if (CI.BaseOff) { - Register ImmReg = MRI->createVirtualRegister(&AMDGPU::SGPR_32RegClass); + Register ImmReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass); BuildMI(*MBB, CI.Paired, DL, TII->get(AMDGPU::S_MOV_B32), ImmReg) .addImm(CI.BaseOff); @@ -912,7 +912,7 @@ SILoadStoreOptimizer::mergeWrite2Pair(CombineInfo &CI) { unsigned BaseSubReg = AddrReg->getSubReg(); unsigned BaseRegFlags = 0; if (CI.BaseOff) { - Register ImmReg = MRI->createVirtualRegister(&AMDGPU::SGPR_32RegClass); + Register ImmReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass); BuildMI(*MBB, CI.Paired, DL, TII->get(AMDGPU::S_MOV_B32), ImmReg) .addImm(CI.BaseOff); @@ -1120,7 +1120,7 @@ SILoadStoreOptimizer::getTargetRegisterClass(const CombineInfo &CI) { case 2: return &AMDGPU::SReg_64_XEXECRegClass; case 4: - return &AMDGPU::SReg_128RegClass; + return &AMDGPU::SGPR_128RegClass; case 8: return &AMDGPU::SReg_256RegClass; case 16: diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp index 58f4590daf9c4..73115af31227e 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -189,7 +189,7 @@ unsigned SIMachineFunctionInfo::addPrivateSegmentBuffer( const SIRegisterInfo &TRI) { ArgInfo.PrivateSegmentBuffer = ArgDescriptor::createRegister(TRI.getMatchingSuperReg( - getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_128RegClass)); + getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SGPR_128RegClass)); NumUserSGPRs += 4; return ArgInfo.PrivateSegmentBuffer.getRegister(); } diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp index f4dd995316dde..39f1ff511a16e 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -123,7 +123,7 @@ unsigned SIRegisterInfo::reservedPrivateSegmentBufferReg( const GCNSubtarget &ST = MF.getSubtarget(); unsigned BaseIdx = alignDown(ST.getMaxNumSGPRs(MF), 4) - 4; unsigned BaseReg(AMDGPU::SGPR_32RegClass.getRegister(BaseIdx)); - return getMatchingSuperReg(BaseReg, AMDGPU::sub0, &AMDGPU::SReg_128RegClass); + return getMatchingSuperReg(BaseReg, AMDGPU::sub0, &AMDGPU::SGPR_128RegClass); } static unsigned findPrivateSegmentWaveByteOffsetRegIndex(unsigned RegCount) { @@ -1546,7 +1546,7 @@ const TargetRegisterClass *SIRegisterInfo::getEquivalentSGPRClass( case 96: return &AMDGPU::SReg_96RegClass; case 128: - return &AMDGPU::SReg_128RegClass; + return &AMDGPU::SGPR_128RegClass; case 160: return &AMDGPU::SReg_160RegClass; case 256: @@ -1576,7 +1576,7 @@ const TargetRegisterClass *SIRegisterInfo::getSubRegClass( case 3: return &AMDGPU::SReg_96RegClass; case 4: - return &AMDGPU::SReg_128RegClass; + return &AMDGPU::SGPR_128RegClass; case 5: return &AMDGPU::SReg_160RegClass; case 8: @@ -1966,7 +1966,7 @@ SIRegisterInfo::getRegClassForSizeOnBank(unsigned Size, &AMDGPU::SReg_96RegClass; case 128: return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_128RegClass : - &AMDGPU::SReg_128RegClass; + &AMDGPU::SGPR_128RegClass; case 160: return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_160RegClass : &AMDGPU::SReg_160RegClass; @@ -1990,9 +1990,12 @@ SIRegisterInfo::getRegClassForSizeOnBank(unsigned Size, const TargetRegisterClass * SIRegisterInfo::getConstrainedRegClassForOperand(const MachineOperand &MO, const MachineRegisterInfo &MRI) const { - if (const RegisterBank *RB = MRI.getRegBankOrNull(MO.getReg())) + const RegClassOrRegBank &RCOrRB = MRI.getRegClassOrRegBank(MO.getReg()); + if (const RegisterBank *RB = RCOrRB.dyn_cast()) return getRegClassForTypeOnBank(MRI.getType(MO.getReg()), *RB, MRI); - return nullptr; + + const TargetRegisterClass *RC = RCOrRB.get(); + return getAllocatableClass(RC); } unsigned SIRegisterInfo::getVCC() const { diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td index ef5128f427333..82219cbdf3b2a 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td @@ -529,6 +529,7 @@ def TTMP_128 : RegisterClass<"AMDGPU", [v4i32, v4f32, v2i64], 32, def SReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32, v2i64, v2f64], 32, (add SGPR_128, TTMP_128)> { let AllocationPriority = 15; + let isAllocatable = 0; } } // End CopyCost = 2 diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp index de53cb3b59d97..684cd1def977c 100644 --- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -2079,6 +2079,38 @@ isProfitableToIfCvt(MachineBasicBlock &TBB, return PredCost <= UnpredCost; } +unsigned +ARMBaseInstrInfo::extraSizeToPredicateInstructions(const MachineFunction &MF, + unsigned NumInsts) const { + // Thumb2 needs a 2-byte IT instruction to predicate up to 4 instructions. + // ARM has a condition code field in every predicable instruction, using it + // doesn't change code size. + return Subtarget.isThumb2() ? divideCeil(NumInsts, 4) * 2 : 0; +} + +unsigned +ARMBaseInstrInfo::predictBranchSizeForIfCvt(MachineInstr &MI) const { + // If this branch is likely to be folded into the comparison to form a + // CB(N)Z, then removing it won't reduce code size at all, because that will + // just replace the CB(N)Z with a CMP. + if (MI.getOpcode() == ARM::t2Bcc && + findCMPToFoldIntoCBZ(&MI, &getRegisterInfo())) + return 0; + + unsigned Size = getInstSizeInBytes(MI); + + // For Thumb2, all branches are 32-bit instructions during the if conversion + // pass, but may be replaced with 16-bit instructions during size reduction. + // Since the branches considered by if conversion tend to be forward branches + // over small basic blocks, they are very likely to be in range for the + // narrow instructions, so we assume the final code size will be half what it + // currently is. + if (Subtarget.isThumb2()) + Size /= 2; + + return Size; +} + bool ARMBaseInstrInfo::isProfitableToUnpredicate(MachineBasicBlock &TMBB, MachineBasicBlock &FMBB) const { diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h index e70695a4d97b5..c232b6f0b45dd 100644 --- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h @@ -276,6 +276,10 @@ class ARMBaseInstrInfo : public ARMGenInstrInfo { return NumCycles == 1; } + unsigned extraSizeToPredicateInstructions(const MachineFunction &MF, + unsigned NumInsts) const override; + unsigned predictBranchSizeForIfCvt(MachineInstr &MI) const override; + bool isProfitableToUnpredicate(MachineBasicBlock &TMBB, MachineBasicBlock &FMBB) const override; diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index c1365f5893038..d8b2c55eefeb0 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -265,6 +265,10 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) { setOperationAction(ISD::CTTZ, VT, Custom); setOperationAction(ISD::BITREVERSE, VT, Legal); setOperationAction(ISD::BSWAP, VT, Legal); + setOperationAction(ISD::SADDSAT, VT, Legal); + setOperationAction(ISD::UADDSAT, VT, Legal); + setOperationAction(ISD::SSUBSAT, VT, Legal); + setOperationAction(ISD::USUBSAT, VT, Legal); // No native support for these. setOperationAction(ISD::UDIV, VT, Expand); diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td index 3ba23d9812cf5..80b45ce891434 100644 --- a/llvm/lib/Target/ARM/ARMInstrMVE.td +++ b/llvm/lib/Target/ARM/ARMInstrMVE.td @@ -1524,8 +1524,8 @@ let Predicates = [HasMVEInt] in { } class MVE_VQADDSUB size, list pattern=[]> - : MVE_int { + bits<2> size, ValueType vt> + : MVE_int { let Inst{28} = U; let Inst{25-23} = 0b110; @@ -1535,26 +1535,48 @@ class MVE_VQADDSUB size, ValueType VT> + : MVE_VQADDSUB<"vqadd", suffix, U, 0b0, size, VT>; +class MVE_VQSUB size, ValueType VT> + : MVE_VQADDSUB<"vqsub", suffix, U, 0b1, size, VT>; + +def MVE_VQADDs8 : MVE_VQADD<"s8", 0b0, 0b00, v16i8>; +def MVE_VQADDs16 : MVE_VQADD<"s16", 0b0, 0b01, v8i16>; +def MVE_VQADDs32 : MVE_VQADD<"s32", 0b0, 0b10, v4i32>; +def MVE_VQADDu8 : MVE_VQADD<"u8", 0b1, 0b00, v16i8>; +def MVE_VQADDu16 : MVE_VQADD<"u16", 0b1, 0b01, v8i16>; +def MVE_VQADDu32 : MVE_VQADD<"u32", 0b1, 0b10, v4i32>; + +def MVE_VQSUBs8 : MVE_VQSUB<"s8", 0b0, 0b00, v16i8>; +def MVE_VQSUBs16 : MVE_VQSUB<"s16", 0b0, 0b01, v8i16>; +def MVE_VQSUBs32 : MVE_VQSUB<"s32", 0b0, 0b10, v4i32>; +def MVE_VQSUBu8 : MVE_VQSUB<"u8", 0b1, 0b00, v16i8>; +def MVE_VQSUBu16 : MVE_VQSUB<"u16", 0b1, 0b01, v8i16>; +def MVE_VQSUBu32 : MVE_VQSUB<"u32", 0b1, 0b10, v4i32>; + +let Predicates = [HasMVEInt] in { + foreach instr = [MVE_VQADDu8, MVE_VQADDu16, MVE_VQADDu32] in + foreach VT = [instr.VT] in + def : Pat<(VT (uaddsat (VT MQPR:$Qm), (VT MQPR:$Qn))), + (VT (instr (VT MQPR:$Qm), (VT MQPR:$Qn)))>; + foreach instr = [MVE_VQADDs8, MVE_VQADDs16, MVE_VQADDs32] in + foreach VT = [instr.VT] in + def : Pat<(VT (saddsat (VT MQPR:$Qm), (VT MQPR:$Qn))), + (VT (instr (VT MQPR:$Qm), (VT MQPR:$Qn)))>; + foreach instr = [MVE_VQSUBu8, MVE_VQSUBu16, MVE_VQSUBu32] in + foreach VT = [instr.VT] in + def : Pat<(VT (usubsat (VT MQPR:$Qm), (VT MQPR:$Qn))), + (VT (instr (VT MQPR:$Qm), (VT MQPR:$Qn)))>; + foreach instr = [MVE_VQSUBs8, MVE_VQSUBs16, MVE_VQSUBs32] in + foreach VT = [instr.VT] in + def : Pat<(VT (ssubsat (VT MQPR:$Qm), (VT MQPR:$Qn))), + (VT (instr (VT MQPR:$Qm), (VT MQPR:$Qn)))>; } -class MVE_VQADD size, list pattern=[]> - : MVE_VQADDSUB<"vqadd", suffix, U, 0b0, size, pattern>; -class MVE_VQSUB size, list pattern=[]> - : MVE_VQADDSUB<"vqsub", suffix, U, 0b1, size, pattern>; - -def MVE_VQADDs8 : MVE_VQADD<"s8", 0b0, 0b00>; -def MVE_VQADDs16 : MVE_VQADD<"s16", 0b0, 0b01>; -def MVE_VQADDs32 : MVE_VQADD<"s32", 0b0, 0b10>; -def MVE_VQADDu8 : MVE_VQADD<"u8", 0b1, 0b00>; -def MVE_VQADDu16 : MVE_VQADD<"u16", 0b1, 0b01>; -def MVE_VQADDu32 : MVE_VQADD<"u32", 0b1, 0b10>; - -def MVE_VQSUBs8 : MVE_VQSUB<"s8", 0b0, 0b00>; -def MVE_VQSUBs16 : MVE_VQSUB<"s16", 0b0, 0b01>; -def MVE_VQSUBs32 : MVE_VQSUB<"s32", 0b0, 0b10>; -def MVE_VQSUBu8 : MVE_VQSUB<"u8", 0b1, 0b00>; -def MVE_VQSUBu16 : MVE_VQSUB<"u16", 0b1, 0b01>; -def MVE_VQSUBu32 : MVE_VQSUB<"u32", 0b1, 0b10>; class MVE_VABD_int size, list pattern=[]> : MVE_int<"vabd", suffix, size, pattern> { diff --git a/llvm/lib/Target/BPF/BPFAbstractMemberAccess.cpp b/llvm/lib/Target/BPF/BPFAbstractMemberAccess.cpp index 1b442815c7ed9..400701c4e5c22 100644 --- a/llvm/lib/Target/BPF/BPFAbstractMemberAccess.cpp +++ b/llvm/lib/Target/BPF/BPFAbstractMemberAccess.cpp @@ -93,8 +93,6 @@ namespace llvm { const std::string BPFCoreSharedInfo::AmaAttr = "btf_ama"; -const std::string BPFCoreSharedInfo::PatchableExtSecName = - ".BPF.patchable_externs"; } // namespace llvm using namespace llvm; diff --git a/llvm/lib/Target/BPF/BPFCORE.h b/llvm/lib/Target/BPF/BPFCORE.h index a6cb3cf533760..ed4778353e529 100644 --- a/llvm/lib/Target/BPF/BPFCORE.h +++ b/llvm/lib/Target/BPF/BPFCORE.h @@ -23,10 +23,8 @@ class BPFCoreSharedInfo { MAX_FIELD_RELOC_KIND, }; - /// The attribute attached to globals representing a member offset + /// The attribute attached to globals representing a field access static const std::string AmaAttr; - /// The section name to identify a patchable external global - static const std::string PatchableExtSecName; }; } // namespace llvm diff --git a/llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp b/llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp index b363179e25bb0..8cebc5b537fa3 100644 --- a/llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp +++ b/llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp @@ -11,19 +11,15 @@ // ldd r2, r1, 0 // add r3, struct_base_reg, r2 // -// Here @global should either present a AMA (abstruct member access) or -// a patchable extern variable. And these two kinds of accesses -// are subject to bpf load time patching. After this pass, the +// Here @global should represent an AMA (abstruct member access). +// Such an access is subject to bpf load time patching. After this pass, the // code becomes // ld_imm64 r1, @global // add r3, struct_base_reg, r1 // // Eventually, at BTF output stage, a relocation record will be generated // for ld_imm64 which should be replaced later by bpf loader: -// r1 = or -// add r3, struct_base_reg, r1 -// or -// ld_imm64 r1, +// r1 = // add r3, struct_base_reg, r1 // //===----------------------------------------------------------------------===// @@ -102,7 +98,6 @@ bool BPFMISimplifyPatchable::removeLD() { Register DstReg = MI.getOperand(0).getReg(); Register SrcReg = MI.getOperand(1).getReg(); - int64_t ImmVal = MI.getOperand(2).getImm(); MachineInstr *DefInst = MRI->getUniqueVRegDef(SrcReg); if (!DefInst) @@ -118,17 +113,8 @@ bool BPFMISimplifyPatchable::removeLD() { // Global variables representing structure offset or // patchable extern globals. if (GVar->hasAttribute(BPFCoreSharedInfo::AmaAttr)) { - assert(ImmVal == 0); + assert(MI.getOperand(2).getImm() == 0); IsCandidate = true; - } else if (!GVar->hasInitializer() && GVar->hasExternalLinkage() && - GVar->getSection() == - BPFCoreSharedInfo::PatchableExtSecName) { - if (ImmVal == 0) - IsCandidate = true; - else - errs() << "WARNING: unhandled patchable extern " - << GVar->getName() << " with load offset " << ImmVal - << "\n"; } } } diff --git a/llvm/lib/Target/BPF/BTF.h b/llvm/lib/Target/BPF/BTF.h index ef408dafd52b2..a13c862bf840a 100644 --- a/llvm/lib/Target/BPF/BTF.h +++ b/llvm/lib/Target/BPF/BTF.h @@ -39,13 +39,6 @@ /// struct SecFieldReloc for ELF section #2 /// A number of struct BPFFieldReloc for ELF section #2 /// ... -/// The ExternReloc subsection is defined as below: -/// BPFExternReloc Size -/// struct SecExternReloc for ELF section #1 -/// A number of struct BPFExternReloc for ELF section #1 -/// struct SecExternReloc for ELF section #2 -/// A number of struct BPFExternReloc for ELF section #2 -/// ... /// /// The section formats are also defined at /// https://github.com/torvalds/linux/blob/master/include/uapi/linux/btf.h @@ -63,7 +56,7 @@ enum : uint32_t { MAGIC = 0xeB9F, VERSION = 1 }; /// Sizes in bytes of various things in the BTF format. enum { HeaderSize = 24, - ExtHeaderSize = 40, + ExtHeaderSize = 32, CommonTypeSize = 12, BTFArraySize = 12, BTFEnumSize = 8, @@ -73,11 +66,9 @@ enum { SecFuncInfoSize = 8, SecLineInfoSize = 8, SecFieldRelocSize = 8, - SecExternRelocSize = 8, BPFFuncInfoSize = 8, BPFLineInfoSize = 16, BPFFieldRelocSize = 16, - BPFExternRelocSize = 8, }; /// The .BTF section header definition. @@ -215,8 +206,6 @@ struct ExtHeader { uint32_t LineInfoLen; ///< Length of line info section uint32_t FieldRelocOff; ///< Offset of offset reloc section uint32_t FieldRelocLen; ///< Length of offset reloc section - uint32_t ExternRelocOff; ///< Offset of extern reloc section - uint32_t ExternRelocLen; ///< Length of extern reloc section }; /// Specifying one function info. @@ -260,18 +249,6 @@ struct SecFieldReloc { uint32_t NumFieldReloc; ///< Number of offset reloc's in this section }; -/// Specifying one offset relocation. -struct BPFExternReloc { - uint32_t InsnOffset; ///< Byte offset in this section - uint32_t ExternNameOff; ///< The string for external variable -}; - -/// Specifying extern relocation's in one section. -struct SecExternReloc { - uint32_t SecNameOff; ///< Section name index in the .BTF string table - uint32_t NumExternReloc; ///< Number of extern reloc's in this section -}; - } // End namespace BTF. } // End namespace llvm. diff --git a/llvm/lib/Target/BPF/BTFDebug.cpp b/llvm/lib/Target/BPF/BTFDebug.cpp index a8f857343ec33..db551e739bd7c 100644 --- a/llvm/lib/Target/BPF/BTFDebug.cpp +++ b/llvm/lib/Target/BPF/BTFDebug.cpp @@ -752,9 +752,10 @@ void BTFDebug::emitBTFSection() { } void BTFDebug::emitBTFExtSection() { - // Do not emit section if empty FuncInfoTable and LineInfoTable. + // Do not emit section if empty FuncInfoTable and LineInfoTable + // and FieldRelocTable. if (!FuncInfoTable.size() && !LineInfoTable.size() && - !FieldRelocTable.size() && !ExternRelocTable.size()) + !FieldRelocTable.size()) return; MCContext &Ctx = OS.getContext(); @@ -766,8 +767,8 @@ void BTFDebug::emitBTFExtSection() { // Account for FuncInfo/LineInfo record size as well. uint32_t FuncLen = 4, LineLen = 4; - // Do not account for optional FieldReloc/ExternReloc. - uint32_t FieldRelocLen = 0, ExternRelocLen = 0; + // Do not account for optional FieldReloc. + uint32_t FieldRelocLen = 0; for (const auto &FuncSec : FuncInfoTable) { FuncLen += BTF::SecFuncInfoSize; FuncLen += FuncSec.second.size() * BTF::BPFFuncInfoSize; @@ -780,15 +781,9 @@ void BTFDebug::emitBTFExtSection() { FieldRelocLen += BTF::SecFieldRelocSize; FieldRelocLen += FieldRelocSec.second.size() * BTF::BPFFieldRelocSize; } - for (const auto &ExternRelocSec : ExternRelocTable) { - ExternRelocLen += BTF::SecExternRelocSize; - ExternRelocLen += ExternRelocSec.second.size() * BTF::BPFExternRelocSize; - } if (FieldRelocLen) FieldRelocLen += 4; - if (ExternRelocLen) - ExternRelocLen += 4; OS.EmitIntValue(0, 4); OS.EmitIntValue(FuncLen, 4); @@ -796,8 +791,6 @@ void BTFDebug::emitBTFExtSection() { OS.EmitIntValue(LineLen, 4); OS.EmitIntValue(FuncLen + LineLen, 4); OS.EmitIntValue(FieldRelocLen, 4); - OS.EmitIntValue(FuncLen + LineLen + FieldRelocLen, 4); - OS.EmitIntValue(ExternRelocLen, 4); // Emit func_info table. OS.AddComment("FuncInfo"); @@ -848,22 +841,6 @@ void BTFDebug::emitBTFExtSection() { } } } - - // Emit extern reloc table. - if (ExternRelocLen) { - OS.AddComment("ExternReloc"); - OS.EmitIntValue(BTF::BPFExternRelocSize, 4); - for (const auto &ExternRelocSec : ExternRelocTable) { - OS.AddComment("Extern reloc section string offset=" + - std::to_string(ExternRelocSec.first)); - OS.EmitIntValue(ExternRelocSec.first, 4); - OS.EmitIntValue(ExternRelocSec.second.size(), 4); - for (const auto &ExternRelocInfo : ExternRelocSec.second) { - Asm->EmitLabelReference(ExternRelocInfo.Label, 4); - OS.EmitIntValue(ExternRelocInfo.ExternNameOff, 4); - } - } - } } void BTFDebug::beginFunctionImpl(const MachineFunction *MF) { @@ -1019,15 +996,6 @@ void BTFDebug::processLDimm64(const MachineInstr *MI) { MDNode *MDN = GVar->getMetadata(LLVMContext::MD_preserve_access_index); DIType *Ty = dyn_cast(MDN); generateFieldReloc(MI, ORSym, Ty, GVar->getName()); - } else if (GVar && !GVar->hasInitializer() && GVar->hasExternalLinkage() && - GVar->getSection() == BPFCoreSharedInfo::PatchableExtSecName) { - MCSymbol *ORSym = OS.getContext().createTempSymbol(); - OS.EmitLabel(ORSym); - - BTFExternReloc ExternReloc; - ExternReloc.Label = ORSym; - ExternReloc.ExternNameOff = addString(GVar->getName()); - ExternRelocTable[SecNameOff].push_back(ExternReloc); } } } @@ -1165,20 +1133,6 @@ bool BTFDebug::InstLower(const MachineInstr *MI, MCInst &OutMI) { OutMI.addOperand(MCOperand::createReg(MI->getOperand(0).getReg())); OutMI.addOperand(MCOperand::createImm(Imm)); return true; - } else if (GVar && !GVar->hasInitializer() && - GVar->hasExternalLinkage() && - GVar->getSection() == BPFCoreSharedInfo::PatchableExtSecName) { - const IntegerType *IntTy = dyn_cast(GVar->getValueType()); - assert(IntTy); - // For patchable externals, emit "LD_imm64, ri, 0" if the external - // variable is 64bit width, emit "mov ri, 0" otherwise. - if (IntTy->getBitWidth() == 64) - OutMI.setOpcode(BPF::LD_imm64); - else - OutMI.setOpcode(BPF::MOV_ri); - OutMI.addOperand(MCOperand::createReg(MI->getOperand(0).getReg())); - OutMI.addOperand(MCOperand::createImm(0)); - return true; } } } diff --git a/llvm/lib/Target/BPF/BTFDebug.h b/llvm/lib/Target/BPF/BTFDebug.h index eec8614431663..c01e0d1d16128 100644 --- a/llvm/lib/Target/BPF/BTFDebug.h +++ b/llvm/lib/Target/BPF/BTFDebug.h @@ -231,12 +231,6 @@ struct BTFFieldReloc { uint32_t RelocKind; ///< What to patch the instruction }; -/// Represent one extern relocation. -struct BTFExternReloc { - const MCSymbol *Label; ///< MCSymbol identifying insn for the reloc - uint32_t ExternNameOff; ///< The extern variable name -}; - /// Collect and emit BTF information. class BTFDebug : public DebugHandlerBase { MCStreamer &OS; @@ -251,7 +245,6 @@ class BTFDebug : public DebugHandlerBase { std::map> FuncInfoTable; std::map> LineInfoTable; std::map> FieldRelocTable; - std::map> ExternRelocTable; StringMap> FileContent; std::map> DataSecEntries; std::vector StructTypes; diff --git a/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp index 65afb3650f800..c8313240a6789 100644 --- a/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp +++ b/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp @@ -129,9 +129,9 @@ void MipsSEDAGToDAGISel::emitMCountABI(MachineInstr &MI, MachineBasicBlock &MBB, MachineInstrBuilder MIB(MF, &MI); if (!Subtarget->isABI_O32()) { // N32, N64 // Save current return address. - BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(Mips::OR)) + BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(Mips::OR64)) .addDef(Mips::AT_64) - .addUse(Mips::RA_64) + .addUse(Mips::RA_64, RegState::Undef) .addUse(Mips::ZERO_64); // Stops instruction above from being removed later on. MIB.addUse(Mips::AT_64, RegState::Implicit); @@ -139,7 +139,7 @@ void MipsSEDAGToDAGISel::emitMCountABI(MachineInstr &MI, MachineBasicBlock &MBB, // Save current return address. BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(Mips::OR)) .addDef(Mips::AT) - .addUse(Mips::RA) + .addUse(Mips::RA, RegState::Undef) .addUse(Mips::ZERO); // _mcount pops 2 words from stack. BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(Mips::ADDiu)) diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp index b9e52a11274f0..da81d6d6019a2 100644 --- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -78,7 +78,7 @@ namespace { class PPCAsmPrinter : public AsmPrinter { protected: - MapVector TOC; + MapVector TOC; const PPCSubtarget *Subtarget; StackMaps SM; @@ -89,7 +89,7 @@ class PPCAsmPrinter : public AsmPrinter { StringRef getPassName() const override { return "PowerPC Assembly Printer"; } - MCSymbol *lookUpOrCreateTOCEntry(MCSymbol *Sym); + MCSymbol *lookUpOrCreateTOCEntry(const MCSymbol *Sym); bool doInitialization(Module &M) override { if (!TOC.empty()) @@ -338,7 +338,7 @@ bool PPCAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, /// lookUpOrCreateTOCEntry -- Given a symbol, look up whether a TOC entry /// exists for it. If not, create one. Then return a symbol that references /// the TOC entry. -MCSymbol *PPCAsmPrinter::lookUpOrCreateTOCEntry(MCSymbol *Sym) { +MCSymbol *PPCAsmPrinter::lookUpOrCreateTOCEntry(const MCSymbol *Sym) { MCSymbol *&TOCEntry = TOC[Sym]; if (!TOCEntry) TOCEntry = createTempSymbol("C"); @@ -512,6 +512,22 @@ void PPCAsmPrinter::EmitTlsCall(const MachineInstr *MI, .addExpr(SymVar)); } +/// Map the machine operand to its corresponding MCSymbol. +static MCSymbol *getMCSymbolForTOCPseudoMO(const MachineOperand &MO, AsmPrinter &AP) { + switch(MO.getType()) { + case MachineOperand::MO_GlobalAddress: + return AP.getSymbol(MO.getGlobal()); + case MachineOperand::MO_ConstantPoolIndex: + return AP.GetCPISymbol(MO.getIndex()); + case MachineOperand::MO_JumpTableIndex: + return AP.GetJTISymbol(MO.getIndex()); + case MachineOperand::MO_BlockAddress: + return AP.GetBlockAddressSymbol(MO.getBlockAddress()); + default: + llvm_unreachable("Unexpected operand type to get symbol."); + } +} + /// EmitInstruction -- Print out a single PowerPC MI in Darwin syntax to /// the current output stream. /// @@ -668,16 +684,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { "Unexpected operand type for LWZtoc pseudo."); // Map the operand to its corresponding MCSymbol. - MCSymbol *MOSymbol = nullptr; - if (MO.isGlobal()) - MOSymbol = getSymbol(MO.getGlobal()); - else if (MO.isCPI()) - MOSymbol = GetCPISymbol(MO.getIndex()); - else if (MO.isJTI()) - MOSymbol = GetJTISymbol(MO.getIndex()); - else if (MO.isBlockAddress()) - MOSymbol = GetBlockAddressSymbol(MO.getBlockAddress()); - + const MCSymbol *const MOSymbol = getMCSymbolForTOCPseudoMO(MO, *this); const bool IsAIX = TM.getTargetTriple().isOSAIX(); // Create a reference to the GOT entry for the symbol. The GOT entry will be @@ -726,24 +733,18 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { // Transform %x3 = LDtoc @min1, %x2 LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, IsDarwin); - // Change the opcode to LD, and the global address operand to be a - // reference to the TOC entry we will synthesize later. + // Change the opcode to LD. TmpInst.setOpcode(PPC::LD); - const MachineOperand &MO = MI->getOperand(1); - // Map symbol -> label of TOC entry - assert(MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress()); - MCSymbol *MOSymbol = nullptr; - if (MO.isGlobal()) - MOSymbol = getSymbol(MO.getGlobal()); - else if (MO.isCPI()) - MOSymbol = GetCPISymbol(MO.getIndex()); - else if (MO.isJTI()) - MOSymbol = GetJTISymbol(MO.getIndex()); - else if (MO.isBlockAddress()) - MOSymbol = GetBlockAddressSymbol(MO.getBlockAddress()); + const MachineOperand &MO = MI->getOperand(1); + assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress()) && + "Invalid operand!"); - MCSymbol *TOCEntry = lookUpOrCreateTOCEntry(MOSymbol); + // Map the machine operand to its corresponding MCSymbol, then map the + // global address operand to be a reference to the TOC entry we will + // synthesize later. + MCSymbol *TOCEntry = + lookUpOrCreateTOCEntry(getMCSymbolForTOCPseudoMO(MO, *this)); const MCExpr *Exp = MCSymbolRefExpr::create(TOCEntry, MCSymbolRefExpr::VK_PPC_TOC, @@ -757,32 +758,22 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { // Transform %xd = ADDIStocHA8 %x2, @sym LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, IsDarwin); - // Change the opcode to ADDIS8. If the global address is external, has - // common linkage, is a non-local function address, or is a jump table - // address, then generate a TOC entry and reference that. Otherwise - // reference the symbol directly. + // Change the opcode to ADDIS8. If the global address is the address of + // an external symbol, is a jump table address, is a block address, or is a + // constant pool index with large code model enabled, then generate a TOC + // entry and reference that. Otherwise, reference the symbol directly. TmpInst.setOpcode(PPC::ADDIS8); + const MachineOperand &MO = MI->getOperand(2); - assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || - MO.isBlockAddress()) && + assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress()) && "Invalid operand for ADDIStocHA8!"); - MCSymbol *MOSymbol = nullptr; - bool GlobalToc = false; - - if (MO.isGlobal()) { - const GlobalValue *GV = MO.getGlobal(); - MOSymbol = getSymbol(GV); - GlobalToc = Subtarget->isGVIndirectSymbol(GV); - } else if (MO.isCPI()) { - MOSymbol = GetCPISymbol(MO.getIndex()); - } else if (MO.isJTI()) { - MOSymbol = GetJTISymbol(MO.getIndex()); - } else if (MO.isBlockAddress()) { - MOSymbol = GetBlockAddressSymbol(MO.getBlockAddress()); - } + const MCSymbol *MOSymbol = getMCSymbolForTOCPseudoMO(MO, *this); + + const bool GlobalToc = + MO.isGlobal() && Subtarget->isGVIndirectSymbol(MO.getGlobal()); if (GlobalToc || MO.isJTI() || MO.isBlockAddress() || - TM.getCodeModel() == CodeModel::Large) + (MO.isCPI() && TM.getCodeModel() == CodeModel::Large)) MOSymbol = lookUpOrCreateTOCEntry(MOSymbol); const MCExpr *Exp = @@ -803,36 +794,26 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { // Transform %xd = LDtocL @sym, %xs LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, IsDarwin); - // Change the opcode to LD. If the global address is external, has - // common linkage, or is a jump table address, then reference the - // associated TOC entry. Otherwise reference the symbol directly. + // Change the opcode to LD. If the global address is the address of + // an external symbol, is a jump table address, is a block address, or is + // a constant pool index with large code model enabled, then generate a + // TOC entry and reference that. Otherwise, reference the symbol directly. TmpInst.setOpcode(PPC::LD); + const MachineOperand &MO = MI->getOperand(1); assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress()) && "Invalid operand for LDtocL!"); - MCSymbol *MOSymbol = nullptr; - if (MO.isJTI()) - MOSymbol = lookUpOrCreateTOCEntry(GetJTISymbol(MO.getIndex())); - else if (MO.isBlockAddress()) { - MOSymbol = GetBlockAddressSymbol(MO.getBlockAddress()); - MOSymbol = lookUpOrCreateTOCEntry(MOSymbol); - } - else if (MO.isCPI()) { - MOSymbol = GetCPISymbol(MO.getIndex()); - if (TM.getCodeModel() == CodeModel::Large) - MOSymbol = lookUpOrCreateTOCEntry(MOSymbol); - } - else if (MO.isGlobal()) { - const GlobalValue *GV = MO.getGlobal(); - MOSymbol = getSymbol(GV); - LLVM_DEBUG( - assert((Subtarget->isGVIndirectSymbol(GV)) && - "LDtocL used on symbol that could be accessed directly is " - "invalid. Must match ADDIStocHA8.")); + LLVM_DEBUG(assert( + (!MO.isGlobal() || Subtarget->isGVIndirectSymbol(MO.getGlobal())) && + "LDtocL used on symbol that could be accessed directly is " + "invalid. Must match ADDIStocHA8.")); + + const MCSymbol *MOSymbol = getMCSymbolForTOCPseudoMO(MO, *this); + + if (!MO.isCPI() || TM.getCodeModel() == CodeModel::Large) MOSymbol = lookUpOrCreateTOCEntry(MOSymbol); - } const MCExpr *Exp = MCSymbolRefExpr::create(MOSymbol, MCSymbolRefExpr::VK_PPC_TOC_LO, @@ -845,26 +826,21 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { // Transform %xd = ADDItocL %xs, @sym LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, IsDarwin); - // Change the opcode to ADDI8. If the global address is external, then - // generate a TOC entry and reference that. Otherwise reference the + // Change the opcode to ADDI8. If the global address is external, then + // generate a TOC entry and reference that. Otherwise, reference the // symbol directly. TmpInst.setOpcode(PPC::ADDI8); + const MachineOperand &MO = MI->getOperand(2); - assert((MO.isGlobal() || MO.isCPI()) && "Invalid operand for ADDItocL"); - MCSymbol *MOSymbol = nullptr; - - if (MO.isGlobal()) { - const GlobalValue *GV = MO.getGlobal(); - LLVM_DEBUG(assert(!(Subtarget->isGVIndirectSymbol(GV)) && - "Interposable definitions must use indirect access.")); - MOSymbol = getSymbol(GV); - } else if (MO.isCPI()) { - MOSymbol = GetCPISymbol(MO.getIndex()); - } + assert((MO.isGlobal() || MO.isCPI()) && "Invalid operand for ADDItocL."); + + LLVM_DEBUG( + assert(!(MO.isGlobal() && Subtarget->isGVIndirectSymbol(MO.getGlobal())) && + "Interposable definitions must use indirect access.")); const MCExpr *Exp = - MCSymbolRefExpr::create(MOSymbol, MCSymbolRefExpr::VK_PPC_TOC_LO, - OutContext); + MCSymbolRefExpr::create(getMCSymbolForTOCPseudoMO(MO, *this), + MCSymbolRefExpr::VK_PPC_TOC_LO, OutContext); TmpInst.getOperand(2) = MCOperand::createExpr(Exp); EmitToStreamer(*OutStreamer, TmpInst); return; @@ -1400,15 +1376,16 @@ bool PPCLinuxAsmPrinter::doFinalization(Module &M) { ".got2", ELF::SHT_PROGBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC); OutStreamer->SwitchSection(Section); - for (MapVector::iterator I = TOC.begin(), - E = TOC.end(); I != E; ++I) { - OutStreamer->EmitLabel(I->second); - MCSymbol *S = I->first; + for (const auto &TOCMapPair: TOC) { + const MCSymbol *const TOCEntryTarget = TOCMapPair.first; + MCSymbol *const TOCEntryLabel = TOCMapPair.second; + + OutStreamer->EmitLabel(TOCEntryLabel); if (isPPC64) { - TS.emitTCEntry(*S); + TS.emitTCEntry(*TOCEntryTarget); } else { OutStreamer->EmitValueToAlignment(4); - OutStreamer->EmitSymbolValue(S, 4); + OutStreamer->EmitSymbolValue(TOCEntryTarget, 4); } } } diff --git a/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp b/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp index 1de01f9bcbc3c..b1c0433641ddc 100644 --- a/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp +++ b/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp @@ -117,8 +117,6 @@ namespace { if (!AfterBBI->modifiesRegister(Reg, TRI)) continue; - assert(DeadOrKillToUnset && - "Shouldn't overwrite a register before it is killed"); // Finish scanning because Reg is overwritten by a non-load // instruction. if (AfterBBI->getOpcode() != Opc) @@ -134,12 +132,15 @@ namespace { // It loads same immediate value to the same Reg, which is redundant. // We would unset kill flag in previous Reg usage to extend live range // of Reg first, then remove the redundancy. - LLVM_DEBUG(dbgs() << " Unset dead/kill flag of " << *DeadOrKillToUnset - << " from " << *DeadOrKillToUnset->getParent()); - if (DeadOrKillToUnset->isDef()) - DeadOrKillToUnset->setIsDead(false); - else - DeadOrKillToUnset->setIsKill(false); + if (DeadOrKillToUnset) { + LLVM_DEBUG(dbgs() + << " Unset dead/kill flag of " << *DeadOrKillToUnset + << " from " << *DeadOrKillToUnset->getParent()); + if (DeadOrKillToUnset->isDef()) + DeadOrKillToUnset->setIsDead(false); + else + DeadOrKillToUnset->setIsKill(false); + } DeadOrKillToUnset = AfterBBI->findRegisterDefOperand(Reg, true, true, TRI); if (DeadOrKillToUnset) diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.cpp b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.cpp index bc63382aa8f9b..f7af38459ab67 100644 --- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.cpp +++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.cpp @@ -52,7 +52,9 @@ void WebAssemblyInstPrinter::printInst(const MCInst *MI, raw_ostream &OS, // Print any additional variadic operands. const MCInstrDesc &Desc = MII.get(MI->getOpcode()); - if (Desc.isVariadic()) + if (Desc.isVariadic()) { + if (Desc.getNumOperands() == 0 && MI->getNumOperands() > 0) + OS << "\t"; for (auto I = Desc.getNumOperands(), E = MI->getNumOperands(); I < E; ++I) { // FIXME: For CALL_INDIRECT_VOID, don't print a leading comma, because // we have an extra flags operand which is not currently printed, for @@ -63,6 +65,7 @@ void WebAssemblyInstPrinter::printInst(const MCInst *MI, raw_ostream &OS, OS << ", "; printOperand(MI, I, OS); } + } // Print any added annotation. printAnnotation(OS, Annot); diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp index 86b3f46856065..5d8b873ce23bc 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp @@ -332,43 +332,15 @@ void WebAssemblyAsmPrinter::EmitInstruction(const MachineInstr *MI) { // These represent values which are live into the function entry, so there's // no instruction to emit. break; - case WebAssembly::FALLTHROUGH_RETURN_I32: - case WebAssembly::FALLTHROUGH_RETURN_I32_S: - case WebAssembly::FALLTHROUGH_RETURN_I64: - case WebAssembly::FALLTHROUGH_RETURN_I64_S: - case WebAssembly::FALLTHROUGH_RETURN_F32: - case WebAssembly::FALLTHROUGH_RETURN_F32_S: - case WebAssembly::FALLTHROUGH_RETURN_F64: - case WebAssembly::FALLTHROUGH_RETURN_F64_S: - case WebAssembly::FALLTHROUGH_RETURN_v16i8: - case WebAssembly::FALLTHROUGH_RETURN_v16i8_S: - case WebAssembly::FALLTHROUGH_RETURN_v8i16: - case WebAssembly::FALLTHROUGH_RETURN_v8i16_S: - case WebAssembly::FALLTHROUGH_RETURN_v4i32: - case WebAssembly::FALLTHROUGH_RETURN_v4i32_S: - case WebAssembly::FALLTHROUGH_RETURN_v2i64: - case WebAssembly::FALLTHROUGH_RETURN_v2i64_S: - case WebAssembly::FALLTHROUGH_RETURN_v4f32: - case WebAssembly::FALLTHROUGH_RETURN_v4f32_S: - case WebAssembly::FALLTHROUGH_RETURN_v2f64: - case WebAssembly::FALLTHROUGH_RETURN_v2f64_S: { + case WebAssembly::FALLTHROUGH_RETURN: { // These instructions represent the implicit return at the end of a - // function body. Always pops one value off the stack. + // function body. if (isVerbose()) { - OutStreamer->AddComment("fallthrough-return-value"); + OutStreamer->AddComment("fallthrough-return"); OutStreamer->AddBlankLine(); } break; } - case WebAssembly::FALLTHROUGH_RETURN_VOID: - case WebAssembly::FALLTHROUGH_RETURN_VOID_S: - // This instruction represents the implicit return at the end of a - // function body with no return value. - if (isVerbose()) { - OutStreamer->AddComment("fallthrough-return-void"); - OutStreamer->AddBlankLine(); - } - break; case WebAssembly::COMPILER_FENCE: // This is a compiler barrier that prevents instruction reordering during // backend compilation, and should not be emitted. diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp index 10327091aea32..fbccc58b13a67 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp @@ -1227,11 +1227,11 @@ getDepth(const SmallVectorImpl &Stack, /// checks for such cases and fixes up the signatures. void WebAssemblyCFGStackify::fixEndsAtEndOfFunction(MachineFunction &MF) { const auto &MFI = *MF.getInfo(); - assert(MFI.getResults().size() <= 1); if (MFI.getResults().empty()) return; + // TODO: Generalize from value types to function types for multivalue WebAssembly::ExprType RetType; switch (MFI.getResults().front().SimpleTy) { case MVT::i32: @@ -1266,10 +1266,14 @@ void WebAssemblyCFGStackify::fixEndsAtEndOfFunction(MachineFunction &MF) { if (MI.isPosition() || MI.isDebugInstr()) continue; if (MI.getOpcode() == WebAssembly::END_BLOCK) { + if (MFI.getResults().size() > 1) + report_fatal_error("Multivalue block signatures not implemented yet"); EndToBegin[&MI]->getOperand(0).setImm(int32_t(RetType)); continue; } if (MI.getOpcode() == WebAssembly::END_LOOP) { + if (MFI.getResults().size() > 1) + report_fatal_error("Multivalue loop signatures not implemented yet"); EndToBegin[&MI]->getOperand(0).setImm(int32_t(RetType)); continue; } diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp index 48d19c0893b80..c932f985489ab 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp @@ -1302,51 +1302,33 @@ bool WebAssemblyFastISel::selectRet(const Instruction *I) { if (Ret->getNumOperands() == 0) { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(WebAssembly::RETURN_VOID)); + TII.get(WebAssembly::RETURN)); return true; } + // TODO: support multiple return in FastISel + if (Ret->getNumOperands() > 1) + return false; + Value *RV = Ret->getOperand(0); if (!Subtarget->hasSIMD128() && RV->getType()->isVectorTy()) return false; - unsigned Opc; switch (getSimpleType(RV->getType())) { case MVT::i1: case MVT::i8: case MVT::i16: case MVT::i32: - Opc = WebAssembly::RETURN_I32; - break; case MVT::i64: - Opc = WebAssembly::RETURN_I64; - break; case MVT::f32: - Opc = WebAssembly::RETURN_F32; - break; case MVT::f64: - Opc = WebAssembly::RETURN_F64; - break; case MVT::v16i8: - Opc = WebAssembly::RETURN_v16i8; - break; case MVT::v8i16: - Opc = WebAssembly::RETURN_v8i16; - break; case MVT::v4i32: - Opc = WebAssembly::RETURN_v4i32; - break; case MVT::v2i64: - Opc = WebAssembly::RETURN_v2i64; - break; case MVT::v4f32: - Opc = WebAssembly::RETURN_v4f32; - break; case MVT::v2f64: - Opc = WebAssembly::RETURN_v2f64; - break; case MVT::exnref: - Opc = WebAssembly::RETURN_EXNREF; break; default: return false; @@ -1363,7 +1345,9 @@ bool WebAssemblyFastISel::selectRet(const Instruction *I) { if (Reg == 0) return false; - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc)).addReg(Reg); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(WebAssembly::RETURN)) + .addReg(Reg); return true; } diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp index 538234384af15..f06afdbcea9eb 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -852,8 +852,8 @@ bool WebAssemblyTargetLowering::CanLowerReturn( CallingConv::ID /*CallConv*/, MachineFunction & /*MF*/, bool /*IsVarArg*/, const SmallVectorImpl &Outs, LLVMContext & /*Context*/) const { - // WebAssembly can't currently handle returning tuples. - return Outs.size() <= 1; + // WebAssembly can only handle returning tuples with multivalue enabled + return Subtarget->hasMultivalue() || Outs.size() <= 1; } SDValue WebAssemblyTargetLowering::LowerReturn( @@ -861,7 +861,8 @@ SDValue WebAssemblyTargetLowering::LowerReturn( const SmallVectorImpl &Outs, const SmallVectorImpl &OutVals, const SDLoc &DL, SelectionDAG &DAG) const { - assert(Outs.size() <= 1 && "WebAssembly can only return up to one value"); + assert((Subtarget->hasMultivalue() || Outs.size() <= 1) && + "MVP WebAssembly can only return up to one value"); if (!callingConvSupported(CallConv)) fail(DL, DAG, "WebAssembly doesn't support non-C calling conventions"); diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrControl.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrControl.td index 1870c5bc34b06..1afc9a8790dcd 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrControl.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrControl.td @@ -84,49 +84,19 @@ let isTerminator = 1, isBarrier = 1 in defm END_FUNCTION : NRI<(outs), (ins), [], "end_function", 0x0b>; } // Uses = [VALUE_STACK], Defs = [VALUE_STACK] -multiclass RETURN { - defm RETURN_#vt : I<(outs), (ins vt:$val), (outs), (ins), - [(WebAssemblyreturn vt:$val)], - "return \t$val", "return", 0x0f>; - // Equivalent to RETURN_#vt, for use at the end of a function when wasm - // semantics return by falling off the end of the block. - let isCodeGenOnly = 1 in - defm FALLTHROUGH_RETURN_#vt : I<(outs), (ins vt:$val), (outs), (ins), []>; -} - -multiclass SIMD_RETURN { - defm RETURN_#vt : I<(outs), (ins V128:$val), (outs), (ins), - [(WebAssemblyreturn (vt V128:$val))], - "return \t$val", "return", 0x0f>, - Requires<[HasSIMD128]>; - // Equivalent to RETURN_#vt, for use at the end of a function when wasm - // semantics return by falling off the end of the block. - let isCodeGenOnly = 1 in - defm FALLTHROUGH_RETURN_#vt : I<(outs), (ins V128:$val), (outs), (ins), - []>, - Requires<[HasSIMD128]>; -} let isTerminator = 1, hasCtrlDep = 1, isBarrier = 1 in { let isReturn = 1 in { - defm "": RETURN; - defm "": RETURN; - defm "": RETURN; - defm "": RETURN; - defm "": RETURN; - defm "": SIMD_RETURN; - defm "": SIMD_RETURN; - defm "": SIMD_RETURN; - defm "": SIMD_RETURN; - defm "": SIMD_RETURN; - defm "": SIMD_RETURN; - - defm RETURN_VOID : NRI<(outs), (ins), [(WebAssemblyreturn)], "return", 0x0f>; - - // This is to RETURN_VOID what FALLTHROUGH_RETURN_#vt is to RETURN_#vt. - let isCodeGenOnly = 1 in - defm FALLTHROUGH_RETURN_VOID : NRI<(outs), (ins), []>; + +defm RETURN : I<(outs), (ins variable_ops), (outs), (ins), + [(WebAssemblyreturn)], + "return", "return", 0x0f>; +// Equivalent to RETURN, for use at the end of a function when wasm +// semantics return by falling off the end of the block. +let isCodeGenOnly = 1 in +defm FALLTHROUGH_RETURN : I<(outs), (ins variable_ops), (outs), (ins), []>; + } // isReturn = 1 defm UNREACHABLE : NRI<(outs), (ins), [(trap)], "unreachable", 0x00>; diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td index 73ddbe85d5511..0449014813812 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td @@ -106,7 +106,8 @@ def WebAssemblybr_table : SDNode<"WebAssemblyISD::BR_TABLE", def WebAssemblyargument : SDNode<"WebAssemblyISD::ARGUMENT", SDT_WebAssemblyArgument>; def WebAssemblyreturn : SDNode<"WebAssemblyISD::RETURN", - SDT_WebAssemblyReturn, [SDNPHasChain]>; + SDT_WebAssemblyReturn, + [SDNPHasChain, SDNPVariadic]>; def WebAssemblywrapper : SDNode<"WebAssemblyISD::Wrapper", SDT_WebAssemblyWrapper>; def WebAssemblywrapperPIC : SDNode<"WebAssemblyISD::WrapperPIC", diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.cpp index 94b2bac7e64ce..e4cc2389147bc 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.cpp @@ -49,10 +49,12 @@ void llvm::computeSignatureVTs(const FunctionType *Ty, const Function &F, computeLegalValueVTs(F, TM, Ty->getReturnType(), Results); MVT PtrVT = MVT::getIntegerVT(TM.createDataLayout().getPointerSizeInBits()); - if (Results.size() > 1) { - // WebAssembly currently can't lower returns of multiple values without - // demoting to sret (see WebAssemblyTargetLowering::CanLowerReturn). So - // replace multiple return values with a pointer parameter. + if (Results.size() > 1 && + !TM.getSubtarget(F).hasMultivalue()) { + // WebAssembly can't lower returns of multiple values without demoting to + // sret unless multivalue is enabled (see + // WebAssemblyTargetLowering::CanLowerReturn). So replace multiple return + // values with a poitner parameter. Results.clear(); Params.push_back(PtrVT); } diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyPeephole.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyPeephole.cpp index 63b87d6bdc5e0..ea6cd09a604c5 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyPeephole.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyPeephole.cpp @@ -75,9 +75,7 @@ static bool maybeRewriteToFallthrough(MachineInstr &MI, MachineBasicBlock &MBB, const MachineFunction &MF, WebAssemblyFunctionInfo &MFI, MachineRegisterInfo &MRI, - const WebAssemblyInstrInfo &TII, - unsigned FallthroughOpc, - unsigned CopyLocalOpc) { + const WebAssemblyInstrInfo &TII) { if (DisableWebAssemblyFallthroughReturnOpt) return false; if (&MBB != &MF.back()) @@ -90,13 +88,36 @@ static bool maybeRewriteToFallthrough(MachineInstr &MI, MachineBasicBlock &MBB, if (&MI != &*End) return false; - if (FallthroughOpc != WebAssembly::FALLTHROUGH_RETURN_VOID) { - // If the operand isn't stackified, insert a COPY to read the operand and - // stackify it. - MachineOperand &MO = MI.getOperand(0); + for (auto &MO : MI.explicit_operands()) { + // If the operand isn't stackified, insert a COPY to read the operands and + // stackify them. Register Reg = MO.getReg(); if (!MFI.isVRegStackified(Reg)) { - Register NewReg = MRI.createVirtualRegister(MRI.getRegClass(Reg)); + unsigned CopyLocalOpc; + const TargetRegisterClass *RegClass = MRI.getRegClass(Reg); + switch (RegClass->getID()) { + case WebAssembly::I32RegClassID: + CopyLocalOpc = WebAssembly::COPY_I32; + break; + case WebAssembly::I64RegClassID: + CopyLocalOpc = WebAssembly::COPY_I64; + break; + case WebAssembly::F32RegClassID: + CopyLocalOpc = WebAssembly::COPY_F32; + break; + case WebAssembly::F64RegClassID: + CopyLocalOpc = WebAssembly::COPY_F64; + break; + case WebAssembly::V128RegClassID: + CopyLocalOpc = WebAssembly::COPY_V128; + break; + case WebAssembly::EXNREFRegClassID: + CopyLocalOpc = WebAssembly::COPY_EXNREF; + break; + default: + llvm_unreachable("Unexpected register class for return operand"); + } + Register NewReg = MRI.createVirtualRegister(RegClass); BuildMI(MBB, MI, MI.getDebugLoc(), TII.get(CopyLocalOpc), NewReg) .addReg(Reg); MO.setReg(NewReg); @@ -104,8 +125,7 @@ static bool maybeRewriteToFallthrough(MachineInstr &MI, MachineBasicBlock &MBB, } } - // Rewrite the return. - MI.setDesc(TII.get(FallthroughOpc)); + MI.setDesc(TII.get(WebAssembly::FALLTHROUGH_RETURN)); return true; } @@ -157,60 +177,8 @@ bool WebAssemblyPeephole::runOnMachineFunction(MachineFunction &MF) { break; } // Optimize away an explicit void return at the end of the function. - case WebAssembly::RETURN_I32: - Changed |= maybeRewriteToFallthrough( - MI, MBB, MF, MFI, MRI, TII, WebAssembly::FALLTHROUGH_RETURN_I32, - WebAssembly::COPY_I32); - break; - case WebAssembly::RETURN_I64: - Changed |= maybeRewriteToFallthrough( - MI, MBB, MF, MFI, MRI, TII, WebAssembly::FALLTHROUGH_RETURN_I64, - WebAssembly::COPY_I64); - break; - case WebAssembly::RETURN_F32: - Changed |= maybeRewriteToFallthrough( - MI, MBB, MF, MFI, MRI, TII, WebAssembly::FALLTHROUGH_RETURN_F32, - WebAssembly::COPY_F32); - break; - case WebAssembly::RETURN_F64: - Changed |= maybeRewriteToFallthrough( - MI, MBB, MF, MFI, MRI, TII, WebAssembly::FALLTHROUGH_RETURN_F64, - WebAssembly::COPY_F64); - break; - case WebAssembly::RETURN_v16i8: - Changed |= maybeRewriteToFallthrough( - MI, MBB, MF, MFI, MRI, TII, WebAssembly::FALLTHROUGH_RETURN_v16i8, - WebAssembly::COPY_V128); - break; - case WebAssembly::RETURN_v8i16: - Changed |= maybeRewriteToFallthrough( - MI, MBB, MF, MFI, MRI, TII, WebAssembly::FALLTHROUGH_RETURN_v8i16, - WebAssembly::COPY_V128); - break; - case WebAssembly::RETURN_v4i32: - Changed |= maybeRewriteToFallthrough( - MI, MBB, MF, MFI, MRI, TII, WebAssembly::FALLTHROUGH_RETURN_v4i32, - WebAssembly::COPY_V128); - break; - case WebAssembly::RETURN_v2i64: - Changed |= maybeRewriteToFallthrough( - MI, MBB, MF, MFI, MRI, TII, WebAssembly::FALLTHROUGH_RETURN_v2i64, - WebAssembly::COPY_V128); - break; - case WebAssembly::RETURN_v4f32: - Changed |= maybeRewriteToFallthrough( - MI, MBB, MF, MFI, MRI, TII, WebAssembly::FALLTHROUGH_RETURN_v4f32, - WebAssembly::COPY_V128); - break; - case WebAssembly::RETURN_v2f64: - Changed |= maybeRewriteToFallthrough( - MI, MBB, MF, MFI, MRI, TII, WebAssembly::FALLTHROUGH_RETURN_v2f64, - WebAssembly::COPY_V128); - break; - case WebAssembly::RETURN_VOID: - Changed |= maybeRewriteToFallthrough( - MI, MBB, MF, MFI, MRI, TII, WebAssembly::FALLTHROUGH_RETURN_VOID, - WebAssembly::INSTRUCTION_LIST_END); + case WebAssembly::RETURN: + Changed |= maybeRewriteToFallthrough(MI, MBB, MF, MFI, MRI, TII); break; } diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 54ca3721cf59b..ad0f7134c255b 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -4859,6 +4859,8 @@ bool X86TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT, bool X86TargetLowering::shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, EVT NewVT) const { + assert(cast(Load)->isSimple() && "illegal to narrow"); + // "ELF Handling for Thread-Local Storage" specifies that R_X86_64_GOTTPOFF // relocation target a movq or addq instruction: don't let the load shrink. SDValue BasePtr = cast(Load)->getBasePtr(); @@ -7724,7 +7726,7 @@ static SDValue LowerAsSplatVectorLoad(SDValue SrcOp, MVT VT, const SDLoc &dl, static bool findEltLoadSrc(SDValue Elt, LoadSDNode *&Ld, int64_t &ByteOffset) { if (ISD::isNON_EXTLoad(Elt.getNode())) { auto *BaseLd = cast(Elt); - if (BaseLd->getMemOperand()->getFlags() & MachineMemOperand::MOVolatile) + if (!BaseLd->isSimple()) return false; Ld = BaseLd; ByteOffset = 0; @@ -7878,8 +7880,8 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef Elts, auto CreateLoad = [&DAG, &DL, &Loads](EVT VT, LoadSDNode *LDBase) { auto MMOFlags = LDBase->getMemOperand()->getFlags(); - assert(!(MMOFlags & MachineMemOperand::MOVolatile) && - "Cannot merge volatile loads."); + assert(LDBase->isSimple() && + "Cannot merge volatile or atomic loads."); SDValue NewLd = DAG.getLoad(VT, DL, LDBase->getChain(), LDBase->getBasePtr(), LDBase->getPointerInfo(), LDBase->getAlignment(), MMOFlags); @@ -39828,6 +39830,9 @@ static SDValue detectSSatPattern(SDValue In, EVT VT, bool MatchPackUS = false) { static SDValue combineTruncateWithSat(SDValue In, EVT VT, const SDLoc &DL, SelectionDAG &DAG, const X86Subtarget &Subtarget) { + if (!Subtarget.hasSSE2()) + return SDValue(); + EVT SVT = VT.getScalarType(); EVT InVT = In.getValueType(); EVT InSVT = InVT.getScalarType(); @@ -39839,21 +39844,49 @@ static SDValue combineTruncateWithSat(SDValue In, EVT VT, const SDLoc &DL, if (auto USatVal = detectUSatPattern(In, VT, DAG, DL)) return DAG.getNode(X86ISD::VTRUNCUS, DL, VT, USatVal); } + + // If we're clamping a signed 32-bit vector to 0-255 and the 32-bit vector is + // split across two registers. We can use a packusdw+perm to clamp to 0-65535 + // and concatenate at the same time. Then we can use a final vpmovuswb to + // clip to 0-255. + if (Subtarget.hasBWI() && !Subtarget.useAVX512Regs() && + InVT == MVT::v16i32 && VT == MVT::v16i8) { + if (auto USatVal = detectSSatPattern(In, VT, true)) { + // Emit a VPACKUSDW+VPERMQ followed by a VPMOVUSWB. + SDValue Mid = truncateVectorWithPACK(X86ISD::PACKUS, MVT::v16i16, USatVal, + DL, DAG, Subtarget); + assert(Mid && "Failed to pack!"); + return DAG.getNode(X86ISD::VTRUNCUS, DL, VT, Mid); + } + } + + // vXi32 truncate instructions are available with AVX512F. + // vXi16 truncate instructions are only available with AVX512BW. + // For 256-bit or smaller vectors, we require VLX. + // FIXME: We could widen truncates to 512 to remove the VLX restriction. + bool PreferAVX512 = ((Subtarget.hasAVX512() && InSVT == MVT::i32) || + (Subtarget.hasBWI() && InSVT == MVT::i16)) && + (Subtarget.hasVLX() || InVT.getSizeInBits() > 256); + if (VT.isVector() && isPowerOf2_32(VT.getVectorNumElements()) && - !(Subtarget.hasAVX512() && InSVT == MVT::i32) && - !(Subtarget.hasBWI() && InSVT == MVT::i16) && + !PreferAVX512 && (SVT == MVT::i8 || SVT == MVT::i16) && (InSVT == MVT::i16 || InSVT == MVT::i32)) { if (auto USatVal = detectSSatPattern(In, VT, true)) { // vXi32 -> vXi8 must be performed as PACKUSWB(PACKSSDW,PACKSSDW). - if (SVT == MVT::i8 && InSVT == MVT::i32) { + // Only do this when the result is at least 64 bits or we'll leaving + // dangling PACKSSDW nodes. + if (SVT == MVT::i8 && InSVT == MVT::i32 && + VT.getVectorNumElements() >= 8) { EVT MidVT = EVT::getVectorVT(*DAG.getContext(), MVT::i16, VT.getVectorNumElements()); SDValue Mid = truncateVectorWithPACK(X86ISD::PACKSS, MidVT, USatVal, DL, DAG, Subtarget); - if (Mid) - return truncateVectorWithPACK(X86ISD::PACKUS, VT, Mid, DL, DAG, - Subtarget); + assert(Mid && "Failed to pack!"); + SDValue V = truncateVectorWithPACK(X86ISD::PACKUS, VT, Mid, DL, DAG, + Subtarget); + assert(V && "Failed to pack!"); + return V; } else if (SVT == MVT::i8 || Subtarget.hasSSE41()) return truncateVectorWithPACK(X86ISD::PACKUS, VT, USatVal, DL, DAG, Subtarget); @@ -40415,6 +40448,19 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG, MVT::v16i8, St->getMemOperand()); } + // Try to fold a vpmovuswb 256->128 into a truncating store. + // FIXME: Generalize this to other types. + // FIXME: Do the same for signed saturation. + if (!St->isTruncatingStore() && VT == MVT::v16i8 && + St->getValue().getOpcode() == X86ISD::VTRUNCUS && + St->getValue().getOperand(0).getValueType() == MVT::v16i16 && + TLI.isTruncStoreLegal(MVT::v16i16, MVT::v16i8) && + St->getValue().hasOneUse()) { + return EmitTruncSStore(false /* Unsigned saturation */, St->getChain(), + dl, St->getValue().getOperand(0), St->getBasePtr(), + MVT::v16i8, St->getMemOperand(), DAG); + } + // Optimize trunc store (of multiple scalars) to shuffle and store. // First, pack all of the elements in one place. Next, store to memory // in fewer chunks. @@ -41208,10 +41254,14 @@ static SDValue combineVTRUNC(SDNode *N, SelectionDAG &DAG) { /// In this case we go though all bitcasts. /// This also recognizes splat of a negated value and returns the splat of that /// value. -static SDValue isFNEG(SelectionDAG &DAG, SDNode *N) { +static SDValue isFNEG(SelectionDAG &DAG, SDNode *N, unsigned Depth = 0) { if (N->getOpcode() == ISD::FNEG) return N->getOperand(0); + // Don't recurse exponentially. + if (Depth > SelectionDAG::MaxRecursionDepth) + return SDValue(); + unsigned ScalarSize = N->getValueType(0).getScalarSizeInBits(); SDValue Op = peekThroughBitcasts(SDValue(N, 0)); @@ -41225,7 +41275,7 @@ static SDValue isFNEG(SelectionDAG &DAG, SDNode *N) { // of this is VECTOR_SHUFFLE(-VEC1, UNDEF). The mask can be anything here. if (!SVOp->getOperand(1).isUndef()) return SDValue(); - if (SDValue NegOp0 = isFNEG(DAG, SVOp->getOperand(0).getNode())) + if (SDValue NegOp0 = isFNEG(DAG, SVOp->getOperand(0).getNode(), Depth + 1)) if (NegOp0.getValueType() == VT) // FIXME: Can we do better? return DAG.getVectorShuffle(VT, SDLoc(SVOp), NegOp0, DAG.getUNDEF(VT), SVOp->getMask()); @@ -41239,7 +41289,7 @@ static SDValue isFNEG(SelectionDAG &DAG, SDNode *N) { SDValue InsVal = Op.getOperand(1); if (!InsVector.isUndef()) return SDValue(); - if (SDValue NegInsVal = isFNEG(DAG, InsVal.getNode())) + if (SDValue NegInsVal = isFNEG(DAG, InsVal.getNode(), Depth + 1)) if (NegInsVal.getValueType() == VT.getVectorElementType()) // FIXME return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Op), VT, InsVector, NegInsVal, Op.getOperand(2)); @@ -41378,6 +41428,101 @@ static SDValue combineFneg(SDNode *N, SelectionDAG &DAG, return SDValue(); } +char X86TargetLowering::isNegatibleForFree(SDValue Op, SelectionDAG &DAG, + bool LegalOperations, + bool ForCodeSize, + unsigned Depth) const { + // fneg patterns are removable even if they have multiple uses. + if (isFNEG(DAG, Op.getNode(), Depth)) + return 2; + + // Don't recurse exponentially. + if (Depth > SelectionDAG::MaxRecursionDepth) + return 0; + + EVT VT = Op.getValueType(); + EVT SVT = VT.getScalarType(); + switch (Op.getOpcode()) { + case ISD::FMA: + case X86ISD::FMSUB: + case X86ISD::FNMADD: + case X86ISD::FNMSUB: + case X86ISD::FMADD_RND: + case X86ISD::FMSUB_RND: + case X86ISD::FNMADD_RND: + case X86ISD::FNMSUB_RND: { + if (!Op.hasOneUse() || !Subtarget.hasAnyFMA() || !isTypeLegal(VT) || + !(SVT == MVT::f32 || SVT == MVT::f64) || !LegalOperations) + break; + + // This is always negatible for free but we might be able to remove some + // extra operand negations as well. + for (int i = 0; i != 3; ++i) { + char V = isNegatibleForFree(Op.getOperand(i), DAG, LegalOperations, + ForCodeSize, Depth + 1); + if (V == 2) + return V; + } + return 1; + } + } + + return TargetLowering::isNegatibleForFree(Op, DAG, LegalOperations, + ForCodeSize, Depth); +} + +SDValue X86TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG, + bool LegalOperations, + bool ForCodeSize, + unsigned Depth) const { + // fneg patterns are removable even if they have multiple uses. + if (SDValue Arg = isFNEG(DAG, Op.getNode(), Depth)) + return DAG.getBitcast(Op.getValueType(), Arg); + + EVT VT = Op.getValueType(); + EVT SVT = VT.getScalarType(); + unsigned Opc = Op.getOpcode(); + switch (Opc) { + case ISD::FMA: + case X86ISD::FMSUB: + case X86ISD::FNMADD: + case X86ISD::FNMSUB: + case X86ISD::FMADD_RND: + case X86ISD::FMSUB_RND: + case X86ISD::FNMADD_RND: + case X86ISD::FNMSUB_RND: { + if (!Op.hasOneUse() || !Subtarget.hasAnyFMA() || !isTypeLegal(VT) || + !(SVT == MVT::f32 || SVT == MVT::f64) || !LegalOperations) + break; + + // This is always negatible for free but we might be able to remove some + // extra operand negations as well. + SmallVector NewOps(Op.getNumOperands(), SDValue()); + for (int i = 0; i != 3; ++i) { + char V = isNegatibleForFree(Op.getOperand(i), DAG, LegalOperations, + ForCodeSize, Depth + 1); + if (V == 2) + NewOps[i] = getNegatedExpression(Op.getOperand(i), DAG, LegalOperations, + ForCodeSize, Depth + 1); + } + + bool NegA = !!NewOps[0]; + bool NegB = !!NewOps[1]; + bool NegC = !!NewOps[2]; + unsigned NewOpc = negateFMAOpcode(Opc, NegA != NegB, NegC, true); + + // Fill in the non-negated ops with the original values. + for (int i = 0, e = Op.getNumOperands(); i != e; ++i) + if (!NewOps[i]) + NewOps[i] = Op.getOperand(i); + return DAG.getNode(NewOpc, SDLoc(Op), VT, NewOps); + } + } + + return TargetLowering::getNegatedExpression(Op, DAG, LegalOperations, + ForCodeSize, Depth); +} + static SDValue lowerX86FPLogicOp(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget) { MVT VT = N->getSimpleValueType(0); @@ -42183,12 +42328,14 @@ static SDValue combineSext(SDNode *N, SelectionDAG &DAG, } static SDValue combineFMA(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget) { SDLoc dl(N); EVT VT = N->getValueType(0); // Let legalize expand this if it isn't a legal type yet. - if (!DAG.getTargetLoweringInfo().isTypeLegal(VT)) + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (!TLI.isTypeLegal(VT)) return SDValue(); EVT ScalarVT = VT.getScalarType(); @@ -42199,17 +42346,21 @@ static SDValue combineFMA(SDNode *N, SelectionDAG &DAG, SDValue B = N->getOperand(1); SDValue C = N->getOperand(2); - auto invertIfNegative = [&DAG](SDValue &V) { - if (SDValue NegVal = isFNEG(DAG, V.getNode())) { - V = DAG.getBitcast(V.getValueType(), NegVal); + auto invertIfNegative = [&DAG, &TLI, &DCI](SDValue &V) { + bool CodeSize = DAG.getMachineFunction().getFunction().hasOptSize(); + bool LegalOperations = !DCI.isBeforeLegalizeOps(); + if (TLI.isNegatibleForFree(V, DAG, LegalOperations, CodeSize) == 2) { + V = TLI.getNegatedExpression(V, DAG, LegalOperations, CodeSize); return true; } // Look through extract_vector_elts. If it comes from an FNEG, create a // new extract from the FNEG input. if (V.getOpcode() == ISD::EXTRACT_VECTOR_ELT && isNullConstant(V.getOperand(1))) { - if (SDValue NegVal = isFNEG(DAG, V.getOperand(0).getNode())) { - NegVal = DAG.getBitcast(V.getOperand(0).getValueType(), NegVal); + SDValue Vec = V.getOperand(0); + if (TLI.isNegatibleForFree(Vec, DAG, LegalOperations, CodeSize) == 2) { + SDValue NegVal = + TLI.getNegatedExpression(Vec, DAG, LegalOperations, CodeSize); V = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(V), V.getValueType(), NegVal, V.getOperand(1)); return true; @@ -42239,25 +42390,25 @@ static SDValue combineFMA(SDNode *N, SelectionDAG &DAG, // Combine FMADDSUB(A, B, FNEG(C)) -> FMSUBADD(A, B, C) // Combine FMSUBADD(A, B, FNEG(C)) -> FMADDSUB(A, B, C) static SDValue combineFMADDSUB(SDNode *N, SelectionDAG &DAG, - const X86Subtarget &Subtarget) { + TargetLowering::DAGCombinerInfo &DCI) { SDLoc dl(N); EVT VT = N->getValueType(0); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + bool CodeSize = DAG.getMachineFunction().getFunction().hasOptSize(); + bool LegalOperations = !DCI.isBeforeLegalizeOps(); - SDValue NegVal = isFNEG(DAG, N->getOperand(2).getNode()); - if (!NegVal) - return SDValue(); - - // FIXME: Should we bitcast instead? - if (NegVal.getValueType() != VT) + SDValue N2 = N->getOperand(2); + if (TLI.isNegatibleForFree(N2, DAG, LegalOperations, CodeSize) != 2) return SDValue(); + SDValue NegN2 = TLI.getNegatedExpression(N2, DAG, LegalOperations, CodeSize); unsigned NewOpcode = negateFMAOpcode(N->getOpcode(), false, true, false); if (N->getNumOperands() == 4) return DAG.getNode(NewOpcode, dl, VT, N->getOperand(0), N->getOperand(1), - NegVal, N->getOperand(3)); + NegN2, N->getOperand(3)); return DAG.getNode(NewOpcode, dl, VT, N->getOperand(0), N->getOperand(1), - NegVal); + NegN2); } static SDValue combineZext(SDNode *N, SelectionDAG &DAG, @@ -44623,11 +44774,11 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case X86ISD::FNMADD_RND: case X86ISD::FNMSUB: case X86ISD::FNMSUB_RND: - case ISD::FMA: return combineFMA(N, DAG, Subtarget); + case ISD::FMA: return combineFMA(N, DAG, DCI, Subtarget); case X86ISD::FMADDSUB_RND: case X86ISD::FMSUBADD_RND: case X86ISD::FMADDSUB: - case X86ISD::FMSUBADD: return combineFMADDSUB(N, DAG, Subtarget); + case X86ISD::FMSUBADD: return combineFMADDSUB(N, DAG, DCI); case X86ISD::MOVMSK: return combineMOVMSK(N, DAG, DCI, Subtarget); case X86ISD::MGATHER: case X86ISD::MSCATTER: return combineX86GatherScatter(N, DAG, DCI); diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index 5c967ca1eca39..33a8e379e133d 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -802,6 +802,17 @@ namespace llvm { /// and some i16 instructions are slow. bool IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const override; + /// Return 1 if we can compute the negated form of the specified expression + /// for the same cost as the expression itself, or 2 if we can compute the + /// negated form more cheaply than the expression itself. Else return 0. + char isNegatibleForFree(SDValue Op, SelectionDAG &DAG, bool LegalOperations, + bool ForCodeSize, unsigned Depth) const override; + + /// If isNegatibleForFree returns true, return the newly negated expression. + SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, + bool LegalOperations, bool ForCodeSize, + unsigned Depth) const override; + MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const override; diff --git a/llvm/lib/TextAPI/MachO/Target.cpp b/llvm/lib/TextAPI/MachO/Target.cpp index 3052aa53ac233..aee8ef421425f 100644 --- a/llvm/lib/TextAPI/MachO/Target.cpp +++ b/llvm/lib/TextAPI/MachO/Target.cpp @@ -17,6 +17,36 @@ namespace llvm { namespace MachO { +Expected Target::create(StringRef TargetValue) { + auto Result = TargetValue.split('-'); + auto ArchitectureStr = Result.first; + auto Architecture = getArchitectureFromName(ArchitectureStr); + auto PlatformStr = Result.second; + PlatformKind Platform; + Platform = StringSwitch(PlatformStr) + .Case("macos", PlatformKind::macOS) + .Case("ios", PlatformKind::iOS) + .Case("tvos", PlatformKind::tvOS) + .Case("watchos", PlatformKind::watchOS) + .Case("bridgeos", PlatformKind::bridgeOS) + .Case("maccatalyst", PlatformKind::macCatalyst) + .Case("ios-simulator", PlatformKind::iOSSimulator) + .Case("tvos-simulator", PlatformKind::tvOSSimulator) + .Case("watchos-simulator", PlatformKind::watchOSSimulator) + .Default(PlatformKind::unknown); + + if (Platform == PlatformKind::unknown) { + if (PlatformStr.startswith("<") && PlatformStr.endswith(">")) { + PlatformStr = PlatformStr.drop_front().drop_back(); + unsigned long long RawValue; + if (!PlatformStr.getAsInteger(10, RawValue)) + Platform = (PlatformKind)RawValue; + } + } + + return Target{Architecture, Platform}; +} + Target::operator std::string() const { return (getArchitectureName(Arch) + " (" + getPlatformName(Platform) + ")") .str(); @@ -42,4 +72,4 @@ ArchitectureSet mapToArchitectureSet(ArrayRef Targets) { } } // end namespace MachO. -} // end namespace llvm. \ No newline at end of file +} // end namespace llvm. diff --git a/llvm/lib/TextAPI/MachO/TextStub.cpp b/llvm/lib/TextAPI/MachO/TextStub.cpp index 78c9f54ba22dd..0584e43d5893f 100644 --- a/llvm/lib/TextAPI/MachO/TextStub.cpp +++ b/llvm/lib/TextAPI/MachO/TextStub.cpp @@ -147,6 +147,58 @@ Each undefineds section is defined as following: objc-ivars: [] # Optional: List of Objective C Instance Variables weak-ref-symbols: [] # Optional: List of weak defined symbols */ + +/* + + YAML Format specification. + +--- !tapi-tbd +tbd-version: 4 # The tbd version for format +targets: [ armv7-ios, x86_64-maccatalyst ] # The list of applicable tapi supported target triples +uuids: # Optional: List of target and UUID pairs. + - target: armv7-ios + value: ... + - target: x86_64-maccatalyst + value: ... +flags: [] # Optional: +install-name: /u/l/libfoo.dylib # +current-version: 1.2.3 # Optional: defaults to 1.0 +compatibility-version: 1.0 # Optional: defaults to 1.0 +swift-abi-version: 0 # Optional: defaults to 0 +parent-umbrella: # Optional: +allowable-clients: + - targets: [ armv7-ios ] # Optional: + clients: [ clientA ] +exports: # List of export sections +... +re-exports: # List of reexport sections +... +undefineds: # List of undefineds sections +... + +Each export and reexport section is defined as following: + +- targets: [ arm64-macos ] # The list of target triples associated with symbols + symbols: [ _symA ] # Optional: List of symbols + objc-classes: [] # Optional: List of Objective-C classes + objc-eh-types: [] # Optional: List of Objective-C classes + # with EH + objc-ivars: [] # Optional: List of Objective C Instance + # Variables + weak-symbols: [] # Optional: List of weak defined symbols + thread-local-symbols: [] # Optional: List of thread local symbols +- targets: [ arm64-macos, x86_64-maccatalyst ] # Optional: Targets for applicable additional symbols + symbols: [ _symB ] # Optional: List of symbols + +Each undefineds section is defined as following: +- targets: [ arm64-macos ] # The list of target triples associated with symbols + symbols: [ _symC ] # Optional: List of symbols + objc-classes: [] # Optional: List of Objective-C classes + objc-eh-types: [] # Optional: List of Objective-C classes + # with EH + objc-ivars: [] # Optional: List of Objective C Instance Variables + weak-symbols: [] # Optional: List of weak defined symbols +*/ // clang-format on using namespace llvm; @@ -175,6 +227,38 @@ struct UndefinedSection { std::vector WeakRefSymbols; }; +// Sections for direct target mapping in TBDv4 +struct SymbolSection { + TargetList Targets; + std::vector Symbols; + std::vector Classes; + std::vector ClassEHs; + std::vector Ivars; + std::vector WeakSymbols; + std::vector TlvSymbols; +}; + +struct MetadataSection { + enum Option { Clients, Libraries }; + std::vector Targets; + std::vector Values; +}; + +struct UmbrellaSection { + std::vector Targets; + std::string Umbrella; +}; + +// UUID's for TBDv4 are mapped to target not arch +struct UUIDv4 { + Target TargetID; + std::string Value; + + UUIDv4() = default; + UUIDv4(const Target &TargetID, const std::string &Value) + : TargetID(TargetID), Value(Value) {} +}; + // clang-format off enum TBDFlags : unsigned { None = 0U, @@ -189,6 +273,12 @@ enum TBDFlags : unsigned { LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(Architecture) LLVM_YAML_IS_SEQUENCE_VECTOR(ExportSection) LLVM_YAML_IS_SEQUENCE_VECTOR(UndefinedSection) +// Specific to TBDv4 +LLVM_YAML_IS_SEQUENCE_VECTOR(SymbolSection) +LLVM_YAML_IS_SEQUENCE_VECTOR(MetadataSection) +LLVM_YAML_IS_SEQUENCE_VECTOR(UmbrellaSection) +LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(Target) +LLVM_YAML_IS_SEQUENCE_VECTOR(UUIDv4) namespace llvm { namespace yaml { @@ -231,6 +321,49 @@ template <> struct MappingTraits { } }; +template <> struct MappingTraits { + static void mapping(IO &IO, SymbolSection &Section) { + IO.mapRequired("targets", Section.Targets); + IO.mapOptional("symbols", Section.Symbols); + IO.mapOptional("objc-classes", Section.Classes); + IO.mapOptional("objc-eh-types", Section.ClassEHs); + IO.mapOptional("objc-ivars", Section.Ivars); + IO.mapOptional("weak-symbols", Section.WeakSymbols); + IO.mapOptional("thread-local-symbols", Section.TlvSymbols); + } +}; + +template <> struct MappingTraits { + static void mapping(IO &IO, UmbrellaSection &Section) { + IO.mapRequired("targets", Section.Targets); + IO.mapRequired("umbrella", Section.Umbrella); + } +}; + +template <> struct MappingTraits { + static void mapping(IO &IO, UUIDv4 &UUID) { + IO.mapRequired("target", UUID.TargetID); + IO.mapRequired("value", UUID.Value); + } +}; + +template <> +struct MappingContextTraits { + static void mapping(IO &IO, MetadataSection &Section, + MetadataSection::Option &OptionKind) { + IO.mapRequired("targets", Section.Targets); + switch (OptionKind) { + case MetadataSection::Option::Clients: + IO.mapRequired("clients", Section.Values); + return; + case MetadataSection::Option::Libraries: + IO.mapRequired("libraries", Section.Values); + return; + } + llvm_unreachable("unexpected option for metadata"); + } +}; + template <> struct ScalarBitSetTraits { static void bitset(IO &IO, TBDFlags &Flags) { IO.bitSetCase(Flags, "flat_namespace", TBDFlags::FlatNamespace); @@ -240,6 +373,60 @@ template <> struct ScalarBitSetTraits { } }; +template <> struct ScalarTraits { + static void output(const Target &Value, void *, raw_ostream &OS) { + OS << Value.Arch << "-"; + switch (Value.Platform) { + default: + OS << "unknown"; + break; + case PlatformKind::macOS: + OS << "macos"; + break; + case PlatformKind::iOS: + OS << "ios"; + break; + case PlatformKind::tvOS: + OS << "tvos"; + break; + case PlatformKind::watchOS: + OS << "watchos"; + break; + case PlatformKind::bridgeOS: + OS << "bridgeos"; + break; + case PlatformKind::macCatalyst: + OS << "maccatalyst"; + break; + case PlatformKind::iOSSimulator: + OS << "ios-simulator"; + break; + case PlatformKind::tvOSSimulator: + OS << "tvos-simulator"; + break; + case PlatformKind::watchOSSimulator: + OS << "watchos-simulator"; + break; + } + } + + static StringRef input(StringRef Scalar, void *, Target &Value) { + auto Result = Target::create(Scalar); + if (!Result) + return toString(Result.takeError()); + + Value = *Result; + if (Value.Arch == AK_unknown) + return "unknown architecture"; + if (Value.Platform == PlatformKind::unknown) + return "unknown platform"; + + return {}; + } + + static QuotingType mustQuote(StringRef) { return QuotingType::None; } +}; + template <> struct MappingTraits { struct NormalizedTBD { explicit NormalizedTBD(IO &IO) {} @@ -555,71 +742,336 @@ template <> struct MappingTraits { std::vector Undefineds; }; + static void setFileTypeForInput(TextAPIContext *Ctx, IO &IO) { + if (IO.mapTag("!tapi-tbd", false)) + Ctx->FileKind = FileType::TBD_V4; + else if (IO.mapTag("!tapi-tbd-v3", false)) + Ctx->FileKind = FileType::TBD_V3; + else if (IO.mapTag("!tapi-tbd-v2", false)) + Ctx->FileKind = FileType::TBD_V2; + else if (IO.mapTag("!tapi-tbd-v1", false) || + IO.mapTag("tag:yaml.org,2002:map", false)) + Ctx->FileKind = FileType::TBD_V1; + else { + Ctx->FileKind = FileType::Invalid; + return; + } + } + static void mapping(IO &IO, const InterfaceFile *&File) { auto *Ctx = reinterpret_cast(IO.getContext()); assert((!Ctx || !IO.outputting() || (Ctx && Ctx->FileKind != FileType::Invalid)) && "File type is not set in YAML context"); - MappingNormalization Keys(IO, File); - // prope file type when reading. if (!IO.outputting()) { - if (IO.mapTag("!tapi-tbd-v3", false)) - Ctx->FileKind = FileType::TBD_V3; - else if (IO.mapTag("!tapi-tbd-v2", false)) - Ctx->FileKind = FileType::TBD_V2; - else if (IO.mapTag("!tapi-tbd-v1", false) || - IO.mapTag("tag:yaml.org,2002:map", false)) - Ctx->FileKind = FileType::TBD_V1; - else { + setFileTypeForInput(Ctx, IO); + switch (Ctx->FileKind) { + default: + break; + case FileType::TBD_V4: + mapKeysToValuesV4(IO, File); + return; + case FileType::Invalid: IO.setError("unsupported file type"); return; } - } - - // Set file type when writing. - if (IO.outputting()) { + } else { + // Set file type when writing. switch (Ctx->FileKind) { default: llvm_unreachable("unexpected file type"); - case FileType::TBD_V1: - // Don't write the tag into the .tbd file for TBD v1. + case FileType::TBD_V4: + mapKeysToValuesV4(IO, File); + return; + case FileType::TBD_V3: + IO.mapTag("!tapi-tbd-v3", true); break; case FileType::TBD_V2: IO.mapTag("!tapi-tbd-v2", true); break; - case FileType::TBD_V3: - IO.mapTag("!tapi-tbd-v3", true); + case FileType::TBD_V1: + // Don't write the tag into the .tbd file for TBD v1 break; } } + mapKeysToValues(Ctx->FileKind, IO, File); + } + + using SectionList = std::vector; + struct NormalizedTBD_V4 { + explicit NormalizedTBD_V4(IO &IO) {} + NormalizedTBD_V4(IO &IO, const InterfaceFile *&File) { + auto Ctx = reinterpret_cast(IO.getContext()); + assert(Ctx); + TBDVersion = Ctx->FileKind >> 1; + Targets.insert(Targets.begin(), File->targets().begin(), + File->targets().end()); + for (const auto &IT : File->uuids()) + UUIDs.emplace_back(IT.first, IT.second); + InstallName = File->getInstallName(); + CurrentVersion = File->getCurrentVersion(); + CompatibilityVersion = File->getCompatibilityVersion(); + SwiftABIVersion = File->getSwiftABIVersion(); + + Flags = TBDFlags::None; + if (!File->isApplicationExtensionSafe()) + Flags |= TBDFlags::NotApplicationExtensionSafe; + + if (!File->isTwoLevelNamespace()) + Flags |= TBDFlags::FlatNamespace; + + if (File->isInstallAPI()) + Flags |= TBDFlags::InstallAPI; + + { + std::map valueToTargetList; + for (const auto &it : File->umbrellas()) + valueToTargetList[it.second].emplace_back(it.first); + + for (const auto &it : valueToTargetList) { + UmbrellaSection CurrentSection; + CurrentSection.Targets.insert(CurrentSection.Targets.begin(), + it.second.begin(), it.second.end()); + CurrentSection.Umbrella = it.first; + ParentUmbrellas.emplace_back(std::move(CurrentSection)); + } + } + + assignTargetsToLibrary(File->allowableClients(), AllowableClients); + assignTargetsToLibrary(File->reexportedLibraries(), ReexportedLibraries); + + auto handleSymbols = + [](SectionList &CurrentSections, + InterfaceFile::const_filtered_symbol_range Symbols, + std::function Pred) { + std::set TargetSet; + std::map SymbolToTargetList; + for (const auto *Symbol : Symbols) { + if (!Pred(Symbol)) + continue; + TargetList Targets(Symbol->targets()); + SymbolToTargetList[Symbol] = Targets; + TargetSet.emplace(std::move(Targets)); + } + for (const auto &TargetIDs : TargetSet) { + SymbolSection CurrentSection; + CurrentSection.Targets.insert(CurrentSection.Targets.begin(), + TargetIDs.begin(), TargetIDs.end()); + + for (const auto &IT : SymbolToTargetList) { + if (IT.second != TargetIDs) + continue; + + const auto *Symbol = IT.first; + switch (Symbol->getKind()) { + case SymbolKind::GlobalSymbol: + if (Symbol->isWeakDefined()) + CurrentSection.WeakSymbols.emplace_back(Symbol->getName()); + else if (Symbol->isThreadLocalValue()) + CurrentSection.TlvSymbols.emplace_back(Symbol->getName()); + else + CurrentSection.Symbols.emplace_back(Symbol->getName()); + break; + case SymbolKind::ObjectiveCClass: + CurrentSection.Classes.emplace_back(Symbol->getName()); + break; + case SymbolKind::ObjectiveCClassEHType: + CurrentSection.ClassEHs.emplace_back(Symbol->getName()); + break; + case SymbolKind::ObjectiveCInstanceVariable: + CurrentSection.Ivars.emplace_back(Symbol->getName()); + break; + } + } + sort(CurrentSection.Symbols); + sort(CurrentSection.Classes); + sort(CurrentSection.ClassEHs); + sort(CurrentSection.Ivars); + sort(CurrentSection.WeakSymbols); + sort(CurrentSection.TlvSymbols); + CurrentSections.emplace_back(std::move(CurrentSection)); + } + }; + + handleSymbols(Exports, File->exports(), [](const Symbol *Symbol) { + return !Symbol->isReexported(); + }); + handleSymbols(Reexports, File->exports(), [](const Symbol *Symbol) { + return Symbol->isReexported(); + }); + handleSymbols(Undefineds, File->undefineds(), + [](const Symbol *Symbol) { return true; }); + } + + const InterfaceFile *denormalize(IO &IO) { + auto Ctx = reinterpret_cast(IO.getContext()); + assert(Ctx); + auto *File = new InterfaceFile; + File->setPath(Ctx->Path); + File->setFileType(Ctx->FileKind); + for (auto &id : UUIDs) + File->addUUID(id.TargetID, id.Value); + File->addTargets(Targets); + File->setInstallName(InstallName); + File->setCurrentVersion(CurrentVersion); + File->setCompatibilityVersion(CompatibilityVersion); + File->setSwiftABIVersion(SwiftABIVersion); + for (const auto &CurrentSection : ParentUmbrellas) + for (const auto &target : CurrentSection.Targets) + File->addParentUmbrella(target, CurrentSection.Umbrella); + File->setTwoLevelNamespace(!(Flags & TBDFlags::FlatNamespace)); + File->setApplicationExtensionSafe( + !(Flags & TBDFlags::NotApplicationExtensionSafe)); + File->setInstallAPI(Flags & TBDFlags::InstallAPI); + + for (const auto &CurrentSection : AllowableClients) { + for (const auto &lib : CurrentSection.Values) + for (const auto &Target : CurrentSection.Targets) + File->addAllowableClient(lib, Target); + } + + for (const auto &CurrentSection : ReexportedLibraries) { + for (const auto &Lib : CurrentSection.Values) + for (const auto &Target : CurrentSection.Targets) + File->addReexportedLibrary(Lib, Target); + } + + auto handleSymbols = [File](const SectionList &CurrentSections, + SymbolFlags Flag = SymbolFlags::None) { + for (const auto &CurrentSection : CurrentSections) { + for (auto &sym : CurrentSection.Symbols) + File->addSymbol(SymbolKind::GlobalSymbol, sym, + CurrentSection.Targets, Flag); + + for (auto &sym : CurrentSection.Classes) + File->addSymbol(SymbolKind::ObjectiveCClass, sym, + CurrentSection.Targets); + + for (auto &sym : CurrentSection.ClassEHs) + File->addSymbol(SymbolKind::ObjectiveCClassEHType, sym, + CurrentSection.Targets); + + for (auto &sym : CurrentSection.Ivars) + File->addSymbol(SymbolKind::ObjectiveCInstanceVariable, sym, + CurrentSection.Targets); + + for (auto &sym : CurrentSection.WeakSymbols) + File->addSymbol(SymbolKind::GlobalSymbol, sym, + CurrentSection.Targets); + for (auto &sym : CurrentSection.TlvSymbols) + File->addSymbol(SymbolKind::GlobalSymbol, sym, + CurrentSection.Targets, + SymbolFlags::ThreadLocalValue); + } + }; + + handleSymbols(Exports); + handleSymbols(Reexports, SymbolFlags::Rexported); + handleSymbols(Undefineds, SymbolFlags::Undefined); + + return File; + } + + unsigned TBDVersion; + std::vector UUIDs; + TargetList Targets; + StringRef InstallName; + PackedVersion CurrentVersion; + PackedVersion CompatibilityVersion; + SwiftVersion SwiftABIVersion{0}; + std::vector AllowableClients; + std::vector ReexportedLibraries; + TBDFlags Flags{TBDFlags::None}; + std::vector ParentUmbrellas; + SectionList Exports; + SectionList Reexports; + SectionList Undefineds; + + private: + void assignTargetsToLibrary(const std::vector &Libraries, + std::vector &Section) { + std::set targetSet; + std::map valueToTargetList; + for (const auto &library : Libraries) { + TargetList targets(library.targets()); + valueToTargetList[&library] = targets; + targetSet.emplace(std::move(targets)); + } + + for (const auto &targets : targetSet) { + MetadataSection CurrentSection; + CurrentSection.Targets.insert(CurrentSection.Targets.begin(), + targets.begin(), targets.end()); + + for (const auto &it : valueToTargetList) { + if (it.second != targets) + continue; + + CurrentSection.Values.emplace_back(it.first->getInstallName()); + } + llvm::sort(CurrentSection.Values); + Section.emplace_back(std::move(CurrentSection)); + } + } + }; + + static void mapKeysToValues(FileType FileKind, IO &IO, + const InterfaceFile *&File) { + MappingNormalization Keys(IO, File); IO.mapRequired("archs", Keys->Architectures); - if (Ctx->FileKind != FileType::TBD_V1) + if (FileKind != FileType::TBD_V1) IO.mapOptional("uuids", Keys->UUIDs); IO.mapRequired("platform", Keys->Platforms); - if (Ctx->FileKind != FileType::TBD_V1) + if (FileKind != FileType::TBD_V1) IO.mapOptional("flags", Keys->Flags, TBDFlags::None); IO.mapRequired("install-name", Keys->InstallName); IO.mapOptional("current-version", Keys->CurrentVersion, PackedVersion(1, 0, 0)); IO.mapOptional("compatibility-version", Keys->CompatibilityVersion, PackedVersion(1, 0, 0)); - if (Ctx->FileKind != FileType::TBD_V3) + if (FileKind != FileType::TBD_V3) IO.mapOptional("swift-version", Keys->SwiftABIVersion, SwiftVersion(0)); else IO.mapOptional("swift-abi-version", Keys->SwiftABIVersion, SwiftVersion(0)); IO.mapOptional("objc-constraint", Keys->ObjCConstraint, - (Ctx->FileKind == FileType::TBD_V1) + (FileKind == FileType::TBD_V1) ? ObjCConstraintType::None : ObjCConstraintType::Retain_Release); - if (Ctx->FileKind != FileType::TBD_V1) + if (FileKind != FileType::TBD_V1) IO.mapOptional("parent-umbrella", Keys->ParentUmbrella, StringRef()); IO.mapOptional("exports", Keys->Exports); - if (Ctx->FileKind != FileType::TBD_V1) + if (FileKind != FileType::TBD_V1) IO.mapOptional("undefineds", Keys->Undefineds); } + + static void mapKeysToValuesV4(IO &IO, const InterfaceFile *&File) { + MappingNormalization Keys(IO, + File); + IO.mapTag("!tapi-tbd", true); + IO.mapRequired("tbd-version", Keys->TBDVersion); + IO.mapRequired("targets", Keys->Targets); + IO.mapOptional("uuids", Keys->UUIDs); + IO.mapOptional("flags", Keys->Flags, TBDFlags::None); + IO.mapRequired("install-name", Keys->InstallName); + IO.mapOptional("current-version", Keys->CurrentVersion, + PackedVersion(1, 0, 0)); + IO.mapOptional("compatibility-version", Keys->CompatibilityVersion, + PackedVersion(1, 0, 0)); + IO.mapOptional("swift-abi-version", Keys->SwiftABIVersion, SwiftVersion(0)); + IO.mapOptional("parent-umbrella", Keys->ParentUmbrellas); + auto OptionKind = MetadataSection::Option::Clients; + IO.mapOptionalWithContext("allowable-clients", Keys->AllowableClients, + OptionKind); + OptionKind = MetadataSection::Option::Libraries; + IO.mapOptionalWithContext("reexported-libraries", Keys->ReexportedLibraries, + OptionKind); + IO.mapOptional("exports", Keys->Exports); + IO.mapOptional("reexports", Keys->Reexports); + IO.mapOptional("undefineds", Keys->Undefineds); + } }; template <> diff --git a/llvm/lib/TextAPI/MachO/TextStubCommon.cpp b/llvm/lib/TextAPI/MachO/TextStubCommon.cpp index cfd9ac8d0cf6f..183c5d5a93b03 100644 --- a/llvm/lib/TextAPI/MachO/TextStubCommon.cpp +++ b/llvm/lib/TextAPI/MachO/TextStubCommon.cpp @@ -172,14 +172,25 @@ void ScalarTraits::output(const SwiftVersion &Value, void *, break; } } -StringRef ScalarTraits::input(StringRef Scalar, void *, +StringRef ScalarTraits::input(StringRef Scalar, void *IO, SwiftVersion &Value) { - Value = StringSwitch(Scalar) - .Case("1.0", 1) - .Case("1.1", 2) - .Case("2.0", 3) - .Case("3.0", 4) - .Default(0); + const auto *Ctx = reinterpret_cast(IO); + assert((!Ctx || Ctx->FileKind != FileType::Invalid) && + "File type is not set in context"); + + if (Ctx->FileKind == FileType::TBD_V4) { + if (Scalar.getAsInteger(10, Value)) + return "invalid Swift ABI version."; + return {}; + } else { + Value = StringSwitch(Scalar) + .Case("1.0", 1) + .Case("1.1", 2) + .Case("2.0", 3) + .Case("3.0", 4) + .Default(0); + } + if (Value != SwiftVersion(0)) return {}; diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp index 6e2867f5708ce..a24de3ca213f6 100644 --- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp +++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp @@ -250,6 +250,72 @@ static bool foldAnyOrAllBitsSet(Instruction &I) { return true; } +// Try to recognize below function as popcount intrinsic. +// This is the "best" algorithm from +// http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel +// Also used in TargetLowering::expandCTPOP(). +// +// int popcount(unsigned int i) { +// i = i - ((i >> 1) & 0x55555555); +// i = (i & 0x33333333) + ((i >> 2) & 0x33333333); +// i = ((i + (i >> 4)) & 0x0F0F0F0F); +// return (i * 0x01010101) >> 24; +// } +static bool tryToRecognizePopCount(Instruction &I) { + if (I.getOpcode() != Instruction::LShr) + return false; + + Type *Ty = I.getType(); + if (!Ty->isIntOrIntVectorTy()) + return false; + + unsigned Len = Ty->getScalarSizeInBits(); + // FIXME: fix Len == 8 and other irregular type lengths. + if (!(Len <= 128 && Len > 8 && Len % 8 == 0)) + return false; + + APInt Mask55 = APInt::getSplat(Len, APInt(8, 0x55)); + APInt Mask33 = APInt::getSplat(Len, APInt(8, 0x33)); + APInt Mask0F = APInt::getSplat(Len, APInt(8, 0x0F)); + APInt Mask01 = APInt::getSplat(Len, APInt(8, 0x01)); + APInt MaskShift = APInt(Len, Len - 8); + + Value *Op0 = I.getOperand(0); + Value *Op1 = I.getOperand(1); + Value *MulOp0; + // Matching "(i * 0x01010101...) >> 24". + if ((match(Op0, m_Mul(m_Value(MulOp0), m_SpecificInt(Mask01)))) && + match(Op1, m_SpecificInt(MaskShift))) { + Value *ShiftOp0; + // Matching "((i + (i >> 4)) & 0x0F0F0F0F...)". + if (match(MulOp0, m_And(m_c_Add(m_LShr(m_Value(ShiftOp0), m_SpecificInt(4)), + m_Deferred(ShiftOp0)), + m_SpecificInt(Mask0F)))) { + Value *AndOp0; + // Matching "(i & 0x33333333...) + ((i >> 2) & 0x33333333...)". + if (match(ShiftOp0, + m_c_Add(m_And(m_Value(AndOp0), m_SpecificInt(Mask33)), + m_And(m_LShr(m_Deferred(AndOp0), m_SpecificInt(2)), + m_SpecificInt(Mask33))))) { + Value *Root, *SubOp1; + // Matching "i - ((i >> 1) & 0x55555555...)". + if (match(AndOp0, m_Sub(m_Value(Root), m_Value(SubOp1))) && + match(SubOp1, m_And(m_LShr(m_Specific(Root), m_SpecificInt(1)), + m_SpecificInt(Mask55)))) { + LLVM_DEBUG(dbgs() << "Recognized popcount intrinsic\n"); + IRBuilder<> Builder(&I); + Function *Func = Intrinsic::getDeclaration( + I.getModule(), Intrinsic::ctpop, I.getType()); + I.replaceAllUsesWith(Builder.CreateCall(Func, {Root})); + return true; + } + } + } + } + + return false; +} + /// This is the entry point for folds that could be implemented in regular /// InstCombine, but they are separated because they are not expected to /// occur frequently and/or have more than a constant-length pattern match. @@ -268,6 +334,7 @@ static bool foldUnusualPatterns(Function &F, DominatorTree &DT) { for (Instruction &I : make_range(BB.rbegin(), BB.rend())) { MadeChange |= foldAnyOrAllBitsSet(I); MadeChange |= foldGuardedRotateToFunnelShift(I); + MadeChange |= tryToRecognizePopCount(I); } } diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp index 4943721c46986..4b393539813da 100644 --- a/llvm/lib/Transforms/IPO/Attributor.cpp +++ b/llvm/lib/Transforms/IPO/Attributor.cpp @@ -139,8 +139,8 @@ static cl::opt DepRecInterval( static cl::opt EnableHeapToStack("enable-heap-to-stack-conversion", cl::init(true), cl::Hidden); -static cl::opt MaxHeapToStackSize("max-heap-to-stack-size", - cl::init(128), cl::Hidden); +static cl::opt MaxHeapToStackSize("max-heap-to-stack-size", cl::init(128), + cl::Hidden); /// Logic operators for the change status enum class. /// @@ -712,8 +712,8 @@ struct AACallSiteReturnedFromReturned : public Base { /// Base class is required to have `followUse` method. /// bool followUse(Attributor &A, const Use *U, const Instruction *I) -/// \param U Underlying use. -/// \param I The user of the \p U. +/// U - Underlying use. +/// I - The user of the \p U. /// `followUse` returns true if the value should be tracked transitively. template (getAnchorValue())) + Value &Val = getAssociatedValue(); + if (isa(Val)) + indicateOptimisticFixpoint(); + if (isa(Val) && + Val.getType()->getPointerAddressSpace() == 0) indicateOptimisticFixpoint(); } @@ -1971,8 +1975,12 @@ struct AANoAliasCallSiteArgument final : AANoAliasImpl { // check only uses possibly executed before this callsite. auto &NoCaptureAA = A.getAAFor(*this, IRP); - if (!NoCaptureAA.isAssumedNoCaptureMaybeReturned()) + if (!NoCaptureAA.isAssumedNoCaptureMaybeReturned()) { + LLVM_DEBUG( + dbgs() << "[Attributor][AANoAliasCSArg] " << V + << " cannot be noalias as it is potentially captured\n"); return indicatePessimisticFixpoint(); + } // (iii) Check there is no other pointer argument which could alias with the // value. @@ -1986,13 +1994,15 @@ struct AANoAliasCallSiteArgument final : AANoAliasImpl { if (const Function *F = getAnchorScope()) { if (AAResults *AAR = A.getInfoCache().getAAResultsForFunction(*F)) { + bool IsAliasing = AAR->isNoAlias(&getAssociatedValue(), ArgOp); LLVM_DEBUG(dbgs() << "[Attributor][NoAliasCSArg] Check alias between " "callsite arguments " << AAR->isNoAlias(&getAssociatedValue(), ArgOp) << " " - << getAssociatedValue() << " " << *ArgOp << "\n"); + << getAssociatedValue() << " " << *ArgOp << " => " + << (IsAliasing ? "" : "no-") << "alias \n"); - if (AAR->isNoAlias(&getAssociatedValue(), ArgOp)) + if (IsAliasing) continue; } } @@ -2881,6 +2891,13 @@ struct AANoCaptureImpl : public AANoCapture { void initialize(Attributor &A) override { AANoCapture::initialize(A); + // You cannot "capture" null in the default address space. + if (isa(getAssociatedValue()) && + getAssociatedValue().getType()->getPointerAddressSpace() == 0) { + indicateOptimisticFixpoint(); + return; + } + const IRPosition &IRP = getIRPosition(); const Function *F = getArgNo() >= 0 ? IRP.getAssociatedFunction() : IRP.getAnchorScope(); diff --git a/llvm/lib/Transforms/IPO/GlobalDCE.cpp b/llvm/lib/Transforms/IPO/GlobalDCE.cpp index 86b7f3e49ee6f..0b14229ac6201 100644 --- a/llvm/lib/Transforms/IPO/GlobalDCE.cpp +++ b/llvm/lib/Transforms/IPO/GlobalDCE.cpp @@ -17,9 +17,11 @@ #include "llvm/Transforms/IPO/GlobalDCE.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/TypeMetadataUtils.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" +#include "llvm/IR/Operator.h" #include "llvm/Pass.h" #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/Utils/CtorUtils.h" @@ -29,10 +31,15 @@ using namespace llvm; #define DEBUG_TYPE "globaldce" +static cl::opt + ClEnableVFE("enable-vfe", cl::Hidden, cl::init(true), cl::ZeroOrMore, + cl::desc("Enable virtual function elimination")); + STATISTIC(NumAliases , "Number of global aliases removed"); STATISTIC(NumFunctions, "Number of functions removed"); STATISTIC(NumIFuncs, "Number of indirect functions removed"); STATISTIC(NumVariables, "Number of global variables removed"); +STATISTIC(NumVFuncs, "Number of virtual functions removed"); namespace { class GlobalDCELegacyPass : public ModulePass { @@ -118,6 +125,15 @@ void GlobalDCEPass::UpdateGVDependencies(GlobalValue &GV) { ComputeDependencies(User, Deps); Deps.erase(&GV); // Remove self-reference. for (GlobalValue *GVU : Deps) { + // If this is a dep from a vtable to a virtual function, and we have + // complete information about all virtual call sites which could call + // though this vtable, then skip it, because the call site information will + // be more precise. + if (VFESafeVTables.count(GVU) && isa(&GV)) { + LLVM_DEBUG(dbgs() << "Ignoring dep " << GVU->getName() << " -> " + << GV.getName() << "\n"); + continue; + } GVDependencies[GVU].insert(&GV); } } @@ -132,12 +148,133 @@ void GlobalDCEPass::MarkLive(GlobalValue &GV, if (Updates) Updates->push_back(&GV); if (Comdat *C = GV.getComdat()) { - for (auto &&CM : make_range(ComdatMembers.equal_range(C))) + for (auto &&CM : make_range(ComdatMembers.equal_range(C))) { MarkLive(*CM.second, Updates); // Recursion depth is only two because only // globals in the same comdat are visited. + } + } +} + +void GlobalDCEPass::ScanVTables(Module &M) { + SmallVector Types; + LLVM_DEBUG(dbgs() << "Building type info -> vtable map\n"); + + auto *LTOPostLinkMD = + cast_or_null(M.getModuleFlag("LTOPostLink")); + bool LTOPostLink = + LTOPostLinkMD && + (cast(LTOPostLinkMD->getValue())->getZExtValue() != 0); + + for (GlobalVariable &GV : M.globals()) { + Types.clear(); + GV.getMetadata(LLVMContext::MD_type, Types); + if (GV.isDeclaration() || Types.empty()) + continue; + + // Use the typeid metadata on the vtable to build a mapping from typeids to + // the list of (GV, offset) pairs which are the possible vtables for that + // typeid. + for (MDNode *Type : Types) { + Metadata *TypeID = Type->getOperand(1).get(); + + uint64_t Offset = + cast( + cast(Type->getOperand(0))->getValue()) + ->getZExtValue(); + + TypeIdMap[TypeID].insert(std::make_pair(&GV, Offset)); + } + + // If the type corresponding to the vtable is private to this translation + // unit, we know that we can see all virtual functions which might use it, + // so VFE is safe. + if (auto GO = dyn_cast(&GV)) { + GlobalObject::VCallVisibility TypeVis = GV.getVCallVisibility(); + if (TypeVis == GlobalObject::VCallVisibilityTranslationUnit || + (LTOPostLink && + TypeVis == GlobalObject::VCallVisibilityLinkageUnit)) { + LLVM_DEBUG(dbgs() << GV.getName() << " is safe for VFE\n"); + VFESafeVTables.insert(&GV); + } + } + } +} + +void GlobalDCEPass::ScanVTableLoad(Function *Caller, Metadata *TypeId, + uint64_t CallOffset) { + for (auto &VTableInfo : TypeIdMap[TypeId]) { + GlobalVariable *VTable = VTableInfo.first; + uint64_t VTableOffset = VTableInfo.second; + + Constant *Ptr = + getPointerAtOffset(VTable->getInitializer(), VTableOffset + CallOffset, + *Caller->getParent()); + if (!Ptr) { + LLVM_DEBUG(dbgs() << "can't find pointer in vtable!\n"); + VFESafeVTables.erase(VTable); + return; + } + + auto Callee = dyn_cast(Ptr->stripPointerCasts()); + if (!Callee) { + LLVM_DEBUG(dbgs() << "vtable entry is not function pointer!\n"); + VFESafeVTables.erase(VTable); + return; + } + + LLVM_DEBUG(dbgs() << "vfunc dep " << Caller->getName() << " -> " + << Callee->getName() << "\n"); + GVDependencies[Caller].insert(Callee); } } +void GlobalDCEPass::ScanTypeCheckedLoadIntrinsics(Module &M) { + LLVM_DEBUG(dbgs() << "Scanning type.checked.load intrinsics\n"); + Function *TypeCheckedLoadFunc = + M.getFunction(Intrinsic::getName(Intrinsic::type_checked_load)); + + if (!TypeCheckedLoadFunc) + return; + + for (auto U : TypeCheckedLoadFunc->users()) { + auto CI = dyn_cast(U); + if (!CI) + continue; + + auto *Offset = dyn_cast(CI->getArgOperand(1)); + Value *TypeIdValue = CI->getArgOperand(2); + auto *TypeId = cast(TypeIdValue)->getMetadata(); + + if (Offset) { + ScanVTableLoad(CI->getFunction(), TypeId, Offset->getZExtValue()); + } else { + // type.checked.load with a non-constant offset, so assume every entry in + // every matching vtable is used. + for (auto &VTableInfo : TypeIdMap[TypeId]) { + VFESafeVTables.erase(VTableInfo.first); + } + } + } +} + +void GlobalDCEPass::AddVirtualFunctionDependencies(Module &M) { + if (!ClEnableVFE) + return; + + ScanVTables(M); + + if (VFESafeVTables.empty()) + return; + + ScanTypeCheckedLoadIntrinsics(M); + + LLVM_DEBUG( + dbgs() << "VFE safe vtables:\n"; + for (auto *VTable : VFESafeVTables) + dbgs() << " " << VTable->getName() << "\n"; + ); +} + PreservedAnalyses GlobalDCEPass::run(Module &M, ModuleAnalysisManager &MAM) { bool Changed = false; @@ -163,6 +300,10 @@ PreservedAnalyses GlobalDCEPass::run(Module &M, ModuleAnalysisManager &MAM) { if (Comdat *C = GA.getComdat()) ComdatMembers.insert(std::make_pair(C, &GA)); + // Add dependencies between virtual call sites and the virtual functions they + // might call, if we have that information. + AddVirtualFunctionDependencies(M); + // Loop over the module, adding globals which are obviously necessary. for (GlobalObject &GO : M.global_objects()) { Changed |= RemoveUnusedGlobalValue(GO); @@ -257,8 +398,17 @@ PreservedAnalyses GlobalDCEPass::run(Module &M, ModuleAnalysisManager &MAM) { }; NumFunctions += DeadFunctions.size(); - for (Function *F : DeadFunctions) + for (Function *F : DeadFunctions) { + if (!F->use_empty()) { + // Virtual functions might still be referenced by one or more vtables, + // but if we've proven them to be unused then it's safe to replace the + // virtual function pointers with null, allowing us to remove the + // function itself. + ++NumVFuncs; + F->replaceAllUsesWith(ConstantPointerNull::get(F->getType())); + } EraseUnusedGlobalValue(F); + } NumVariables += DeadGlobalVars.size(); for (GlobalVariable *GV : DeadGlobalVars) @@ -277,6 +427,8 @@ PreservedAnalyses GlobalDCEPass::run(Module &M, ModuleAnalysisManager &MAM) { ConstantDependenciesCache.clear(); GVDependencies.clear(); ComdatMembers.clear(); + TypeIdMap.clear(); + VFESafeVTables.clear(); if (Changed) return PreservedAnalyses::none(); diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp index c9e7a19c380a2..aa22ac3b449c3 100644 --- a/llvm/lib/Transforms/IPO/SampleProfile.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp @@ -1682,7 +1682,7 @@ bool SampleProfileLoader::doInitialization(Module &M) { return false; } Reader = std::move(ReaderOrErr.get()); - Reader->collectFuncsToUse(M); + Reader->collectFuncsFrom(M); ProfileIsValid = (Reader->read() == sampleprof_error::success); PSL = Reader->getProfileSymbolList(); diff --git a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp index 4055fe049999b..52a7dae533be1 100644 --- a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp +++ b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp @@ -496,7 +496,6 @@ struct DevirtModule { void buildTypeIdentifierMap( std::vector &Bits, DenseMap> &TypeIdMap); - Constant *getPointerAtOffset(Constant *I, uint64_t Offset); bool tryFindVirtualCallTargets(std::vector &TargetsForSlot, const std::set &TypeMemberInfos, @@ -813,38 +812,6 @@ void DevirtModule::buildTypeIdentifierMap( } } -Constant *DevirtModule::getPointerAtOffset(Constant *I, uint64_t Offset) { - if (I->getType()->isPointerTy()) { - if (Offset == 0) - return I; - return nullptr; - } - - const DataLayout &DL = M.getDataLayout(); - - if (auto *C = dyn_cast(I)) { - const StructLayout *SL = DL.getStructLayout(C->getType()); - if (Offset >= SL->getSizeInBytes()) - return nullptr; - - unsigned Op = SL->getElementContainingOffset(Offset); - return getPointerAtOffset(cast(I->getOperand(Op)), - Offset - SL->getElementOffset(Op)); - } - if (auto *C = dyn_cast(I)) { - ArrayType *VTableTy = C->getType(); - uint64_t ElemSize = DL.getTypeAllocSize(VTableTy->getElementType()); - - unsigned Op = Offset / ElemSize; - if (Op >= C->getNumOperands()) - return nullptr; - - return getPointerAtOffset(cast(I->getOperand(Op)), - Offset % ElemSize); - } - return nullptr; -} - bool DevirtModule::tryFindVirtualCallTargets( std::vector &TargetsForSlot, const std::set &TypeMemberInfos, uint64_t ByteOffset) { @@ -853,7 +820,7 @@ bool DevirtModule::tryFindVirtualCallTargets( return false; Constant *Ptr = getPointerAtOffset(TM.Bits->GV->getInitializer(), - TM.Offset + ByteOffset); + TM.Offset + ByteOffset, M); if (!Ptr) return false; @@ -1941,6 +1908,12 @@ bool DevirtModule::run() { for (VTableBits &B : Bits) rebuildGlobal(B); + // We have lowered or deleted the type checked load intrinsics, so we no + // longer have enough information to reason about the liveness of virtual + // function pointers in GlobalDCE. + for (GlobalVariable &GV : M.globals()) + GV.eraseMetadata(LLVMContext::MD_vcall_visibility); + return true; } diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp index f9354069da326..69c9020e060ba 100644 --- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -462,16 +462,9 @@ namespace { /// the module. class MemorySanitizer { public: - MemorySanitizer(Module &M, MemorySanitizerOptions Options) { - this->CompileKernel = - ClEnableKmsan.getNumOccurrences() > 0 ? ClEnableKmsan : Options.Kernel; - if (ClTrackOrigins.getNumOccurrences() > 0) - this->TrackOrigins = ClTrackOrigins; - else - this->TrackOrigins = this->CompileKernel ? 2 : Options.TrackOrigins; - this->Recover = ClKeepGoing.getNumOccurrences() > 0 - ? ClKeepGoing - : (this->CompileKernel | Options.Recover); + MemorySanitizer(Module &M, MemorySanitizerOptions Options) + : CompileKernel(Options.Kernel), TrackOrigins(Options.TrackOrigins), + Recover(Options.Recover) { initializeModule(M); } @@ -594,10 +587,26 @@ class MemorySanitizer { /// An empty volatile inline asm that prevents callback merge. InlineAsm *EmptyAsm; - - Function *MsanCtorFunction; }; +void insertModuleCtor(Module &M) { + getOrCreateSanitizerCtorAndInitFunctions( + M, kMsanModuleCtorName, kMsanInitName, + /*InitArgTypes=*/{}, + /*InitArgs=*/{}, + // This callback is invoked when the functions are created the first + // time. Hook them into the global ctors list in that case: + [&](Function *Ctor, FunctionCallee) { + if (!ClWithComdat) { + appendToGlobalCtors(M, Ctor, 0); + return; + } + Comdat *MsanCtorComdat = M.getOrInsertComdat(kMsanModuleCtorName); + Ctor->setComdat(MsanCtorComdat); + appendToGlobalCtors(M, Ctor, 0, Ctor); + }); +} + /// A legacy function pass for msan instrumentation. /// /// Instruments functions to detect unitialized reads. @@ -623,8 +632,17 @@ struct MemorySanitizerLegacyPass : public FunctionPass { MemorySanitizerOptions Options; }; +template T getOptOrDefault(const cl::opt &Opt, T Default) { + return (Opt.getNumOccurrences() > 0) ? Opt : Default; +} + } // end anonymous namespace +MemorySanitizerOptions::MemorySanitizerOptions(int TO, bool R, bool K) + : Kernel(getOptOrDefault(ClEnableKmsan, K)), + TrackOrigins(getOptOrDefault(ClTrackOrigins, Kernel ? 2 : TO)), + Recover(getOptOrDefault(ClKeepGoing, Kernel || R)) {} + PreservedAnalyses MemorySanitizerPass::run(Function &F, FunctionAnalysisManager &FAM) { MemorySanitizer Msan(*F.getParent(), Options); @@ -633,6 +651,14 @@ PreservedAnalyses MemorySanitizerPass::run(Function &F, return PreservedAnalyses::all(); } +PreservedAnalyses MemorySanitizerPass::run(Module &M, + ModuleAnalysisManager &AM) { + if (Options.Kernel) + return PreservedAnalyses::all(); + insertModuleCtor(M); + return PreservedAnalyses::none(); +} + char MemorySanitizerLegacyPass::ID = 0; INITIALIZE_PASS_BEGIN(MemorySanitizerLegacyPass, "msan", @@ -918,23 +944,6 @@ void MemorySanitizer::initializeModule(Module &M) { OriginStoreWeights = MDBuilder(*C).createBranchWeights(1, 1000); if (!CompileKernel) { - std::tie(MsanCtorFunction, std::ignore) = - getOrCreateSanitizerCtorAndInitFunctions( - M, kMsanModuleCtorName, kMsanInitName, - /*InitArgTypes=*/{}, - /*InitArgs=*/{}, - // This callback is invoked when the functions are created the first - // time. Hook them into the global ctors list in that case: - [&](Function *Ctor, FunctionCallee) { - if (!ClWithComdat) { - appendToGlobalCtors(M, Ctor, 0); - return; - } - Comdat *MsanCtorComdat = M.getOrInsertComdat(kMsanModuleCtorName); - Ctor->setComdat(MsanCtorComdat); - appendToGlobalCtors(M, Ctor, 0, Ctor); - }); - if (TrackOrigins) M.getOrInsertGlobal("__msan_track_origins", IRB.getInt32Ty(), [&] { return new GlobalVariable( @@ -952,6 +961,8 @@ void MemorySanitizer::initializeModule(Module &M) { } bool MemorySanitizerLegacyPass::doInitialization(Module &M) { + if (!Options.Kernel) + insertModuleCtor(M); MSan.emplace(M, Options); return true; } @@ -4576,8 +4587,9 @@ static VarArgHelper *CreateVarArgHelper(Function &Func, MemorySanitizer &Msan, } bool MemorySanitizer::sanitizeFunction(Function &F, TargetLibraryInfo &TLI) { - if (!CompileKernel && (&F == MsanCtorFunction)) + if (!CompileKernel && F.getName() == kMsanModuleCtorName) return false; + MemorySanitizerVisitor Visitor(F, *this, TLI); // Clear out readonly/readnone attributes. diff --git a/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp index 643a03783e7a5..ac274a155a803 100644 --- a/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp @@ -92,11 +92,10 @@ namespace { /// ensures the __tsan_init function is in the list of global constructors for /// the module. struct ThreadSanitizer { - ThreadSanitizer(Module &M); bool sanitizeFunction(Function &F, const TargetLibraryInfo &TLI); private: - void initializeCallbacks(Module &M); + void initialize(Module &M); bool instrumentLoadOrStore(Instruction *I, const DataLayout &DL); bool instrumentAtomic(Instruction *I, const DataLayout &DL); bool instrumentMemIntrinsic(Instruction *I); @@ -108,8 +107,6 @@ struct ThreadSanitizer { void InsertRuntimeIgnores(Function &F); Type *IntptrTy; - IntegerType *OrdTy; - // Callbacks to run-time library are computed in doInitialization. FunctionCallee TsanFuncEntry; FunctionCallee TsanFuncExit; FunctionCallee TsanIgnoreBegin; @@ -130,7 +127,6 @@ struct ThreadSanitizer { FunctionCallee TsanVptrUpdate; FunctionCallee TsanVptrLoad; FunctionCallee MemmoveFn, MemcpyFn, MemsetFn; - Function *TsanCtorFunction; }; struct ThreadSanitizerLegacyPass : FunctionPass { @@ -143,16 +139,32 @@ struct ThreadSanitizerLegacyPass : FunctionPass { private: Optional TSan; }; + +void insertModuleCtor(Module &M) { + getOrCreateSanitizerCtorAndInitFunctions( + M, kTsanModuleCtorName, kTsanInitName, /*InitArgTypes=*/{}, + /*InitArgs=*/{}, + // This callback is invoked when the functions are created the first + // time. Hook them into the global ctors list in that case: + [&](Function *Ctor, FunctionCallee) { appendToGlobalCtors(M, Ctor, 0); }); +} + } // namespace PreservedAnalyses ThreadSanitizerPass::run(Function &F, FunctionAnalysisManager &FAM) { - ThreadSanitizer TSan(*F.getParent()); + ThreadSanitizer TSan; if (TSan.sanitizeFunction(F, FAM.getResult(F))) return PreservedAnalyses::none(); return PreservedAnalyses::all(); } +PreservedAnalyses ThreadSanitizerPass::run(Module &M, + ModuleAnalysisManager &MAM) { + insertModuleCtor(M); + return PreservedAnalyses::none(); +} + char ThreadSanitizerLegacyPass::ID = 0; INITIALIZE_PASS_BEGIN(ThreadSanitizerLegacyPass, "tsan", "ThreadSanitizer: detects data races.", false, false) @@ -169,7 +181,8 @@ void ThreadSanitizerLegacyPass::getAnalysisUsage(AnalysisUsage &AU) const { } bool ThreadSanitizerLegacyPass::doInitialization(Module &M) { - TSan.emplace(M); + insertModuleCtor(M); + TSan.emplace(); return true; } @@ -183,7 +196,10 @@ FunctionPass *llvm::createThreadSanitizerLegacyPassPass() { return new ThreadSanitizerLegacyPass(); } -void ThreadSanitizer::initializeCallbacks(Module &M) { +void ThreadSanitizer::initialize(Module &M) { + const DataLayout &DL = M.getDataLayout(); + IntptrTy = DL.getIntPtrType(M.getContext()); + IRBuilder<> IRB(M.getContext()); AttributeList Attr; Attr = Attr.addAttribute(M.getContext(), AttributeList::FunctionIndex, @@ -197,7 +213,7 @@ void ThreadSanitizer::initializeCallbacks(Module &M) { IRB.getVoidTy()); TsanIgnoreEnd = M.getOrInsertFunction("__tsan_ignore_thread_end", Attr, IRB.getVoidTy()); - OrdTy = IRB.getInt32Ty(); + IntegerType *OrdTy = IRB.getInt32Ty(); for (size_t i = 0; i < kNumberOfAccessSizes; ++i) { const unsigned ByteSize = 1U << i; const unsigned BitSize = ByteSize * 8; @@ -280,20 +296,6 @@ void ThreadSanitizer::initializeCallbacks(Module &M) { IRB.getInt8PtrTy(), IRB.getInt32Ty(), IntptrTy); } -ThreadSanitizer::ThreadSanitizer(Module &M) { - const DataLayout &DL = M.getDataLayout(); - IntptrTy = DL.getIntPtrType(M.getContext()); - std::tie(TsanCtorFunction, std::ignore) = - getOrCreateSanitizerCtorAndInitFunctions( - M, kTsanModuleCtorName, kTsanInitName, /*InitArgTypes=*/{}, - /*InitArgs=*/{}, - // This callback is invoked when the functions are created the first - // time. Hook them into the global ctors list in that case: - [&](Function *Ctor, FunctionCallee) { - appendToGlobalCtors(M, Ctor, 0); - }); -} - static bool isVtableAccess(Instruction *I) { if (MDNode *Tag = I->getMetadata(LLVMContext::MD_tbaa)) return Tag->isTBAAVtableAccess(); @@ -436,9 +438,9 @@ bool ThreadSanitizer::sanitizeFunction(Function &F, const TargetLibraryInfo &TLI) { // This is required to prevent instrumenting call to __tsan_init from within // the module constructor. - if (&F == TsanCtorFunction) + if (F.getName() == kTsanModuleCtorName) return false; - initializeCallbacks(*F.getParent()); + initialize(*F.getParent()); SmallVector AllLoadsAndStores; SmallVector LocalLoadsAndStores; SmallVector AtomicAccesses; diff --git a/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp index 343cc740ac355..728f228837cd7 100644 --- a/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp +++ b/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp @@ -63,6 +63,7 @@ STATISTIC(NumUDivs, "Number of udivs whose width was decreased"); STATISTIC(NumAShrs, "Number of ashr converted to lshr"); STATISTIC(NumSRems, "Number of srem converted to urem"); STATISTIC(NumSExt, "Number of sext converted to zext"); +STATISTIC(NumAnd, "Number of ands removed"); STATISTIC(NumOverflows, "Number of overflow checks removed"); STATISTIC(NumSaturating, "Number of saturating arithmetics converted to normal arithmetics"); @@ -700,6 +701,29 @@ static bool processBinOp(BinaryOperator *BinOp, LazyValueInfo *LVI) { return Changed; } +static bool processAnd(BinaryOperator *BinOp, LazyValueInfo *LVI) { + if (BinOp->getType()->isVectorTy()) + return false; + + // Pattern match (and lhs, C) where C includes a superset of bits which might + // be set in lhs. This is a common truncation idiom created by instcombine. + BasicBlock *BB = BinOp->getParent(); + Value *LHS = BinOp->getOperand(0); + ConstantInt *RHS = dyn_cast(BinOp->getOperand(1)); + if (!RHS || !RHS->getValue().isMask()) + return false; + + ConstantRange LRange = LVI->getConstantRange(LHS, BB, BinOp); + if (!LRange.getUnsignedMax().ule(RHS->getValue())) + return false; + + BinOp->replaceAllUsesWith(LHS); + BinOp->eraseFromParent(); + NumAnd++; + return true; +} + + static Constant *getConstantAt(Value *V, Instruction *At, LazyValueInfo *LVI) { if (Constant *C = LVI->getConstant(V, At->getParent(), At)) return C; @@ -774,6 +798,9 @@ static bool runImpl(Function &F, LazyValueInfo *LVI, DominatorTree *DT, case Instruction::Sub: BBChanged |= processBinOp(cast(II), LVI); break; + case Instruction::And: + BBChanged |= processAnd(cast(II), LVI); + break; } } diff --git a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp index 461a5e2cba0d6..361b559ac02e5 100644 --- a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -1915,11 +1915,9 @@ Value *LibCallSimplifier::optimizeLog(CallInst *Log, IRBuilder<> &B) { IRBuilder<>::FastMathFlagGuard Guard(B); B.setFastMathFlags(FastMathFlags::getFast()); - Function *ArgFn = Arg->getCalledFunction(); - StringRef ArgNm = ArgFn->getName(); - Intrinsic::ID ArgID = ArgFn->getIntrinsicID(); + Intrinsic::ID ArgID = Arg->getIntrinsicID(); LibFunc ArgLb = NotLibFunc; - TLI->getLibFunc(ArgNm, ArgLb); + TLI->getLibFunc(Arg, ArgLb); // log(pow(x,y)) -> y*log(x) if (ArgLb == PowLb || ArgID == Intrinsic::pow) { @@ -1934,9 +1932,10 @@ Value *LibCallSimplifier::optimizeLog(CallInst *Log, IRBuilder<> &B) { substituteInParent(Arg, MulY); return MulY; } + // log(exp{,2,10}(y)) -> y*log({e,2,10}) // TODO: There is no exp10() intrinsic yet. - else if (ArgLb == ExpLb || ArgLb == Exp2Lb || ArgLb == Exp10Lb || + if (ArgLb == ExpLb || ArgLb == Exp2Lb || ArgLb == Exp10Lb || ArgID == Intrinsic::exp || ArgID == Intrinsic::exp2) { Constant *Eul; if (ArgLb == ExpLb || ArgID == Intrinsic::exp) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 7e95038a5ebcf..df64ad8279026 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -355,6 +355,29 @@ static Constant *getSignedIntOrFpConstant(Type *Ty, int64_t C) { : ConstantFP::get(Ty, C); } +/// Returns "best known" trip count for the specified loop \p L as defined by +/// the following procedure: +/// 1) Returns exact trip count if it is known. +/// 2) Returns expected trip count according to profile data if any. +/// 3) Returns upper bound estimate if it is known. +/// 4) Returns None if all of the above failed. +static Optional getSmallBestKnownTC(ScalarEvolution &SE, Loop *L) { + // Check if exact trip count is known. + if (unsigned ExpectedTC = SE.getSmallConstantTripCount(L)) + return ExpectedTC; + + // Check if there is an expected trip count available from profile data. + if (LoopVectorizeWithBlockFrequency) + if (auto EstimatedTC = getLoopEstimatedTripCount(L)) + return EstimatedTC; + + // Check if upper bound estimate is known. + if (unsigned ExpectedTC = SE.getSmallConstantMaxTripCount(L)) + return ExpectedTC; + + return None; +} + namespace llvm { /// InnerLoopVectorizer vectorizes loops which contain only one basic @@ -7483,36 +7506,11 @@ bool LoopVectorizePass::processLoop(Loop *L) { ORE, BFI, PSI, Hints); assert(L->empty() && "Inner loop expected."); + // Check the loop for a trip count threshold: vectorize loops with a tiny trip // count by optimizing for size, to minimize overheads. - // Prefer constant trip counts over profile data, over upper bound estimate. - unsigned ExpectedTC = 0; - bool HasExpectedTC = false; - if (const SCEVConstant *ConstExits = - dyn_cast(SE->getBackedgeTakenCount(L))) { - const APInt &ExitsCount = ConstExits->getAPInt(); - // We are interested in small values for ExpectedTC. Skip over those that - // can't fit an unsigned. - if (ExitsCount.ult(std::numeric_limits::max())) { - ExpectedTC = static_cast(ExitsCount.getZExtValue()) + 1; - HasExpectedTC = true; - } - } - // ExpectedTC may be large because it's bound by a variable. Check - // profiling information to validate we should vectorize. - if (!HasExpectedTC && LoopVectorizeWithBlockFrequency) { - auto EstimatedTC = getLoopEstimatedTripCount(L); - if (EstimatedTC) { - ExpectedTC = *EstimatedTC; - HasExpectedTC = true; - } - } - if (!HasExpectedTC) { - ExpectedTC = SE->getSmallConstantMaxTripCount(L); - HasExpectedTC = (ExpectedTC > 0); - } - - if (HasExpectedTC && ExpectedTC < TinyTripCountVectorThreshold) { + auto ExpectedTC = getSmallBestKnownTC(*SE, L); + if (ExpectedTC && *ExpectedTC < TinyTripCountVectorThreshold) { LLVM_DEBUG(dbgs() << "LV: Found a loop with a very small trip count. " << "This loop is worth vectorizing only if no scalar " << "iteration overheads are incurred."); diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index 14adb478cd863..f2420f6f65624 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -736,7 +736,7 @@ void VPInterleavedAccessInfo::visitBlock(VPBlockBase *Block, Old2NewTy &Old2New, auto NewIGIter = Old2New.find(IG); if (NewIGIter == Old2New.end()) Old2New[IG] = new InterleaveGroup( - IG->getFactor(), IG->isReverse(), IG->getAlignment()); + IG->getFactor(), IG->isReverse(), Align(IG->getAlignment())); if (Inst == IG->getInsertPos()) Old2New[IG]->setInsertPos(VPInst); @@ -744,7 +744,8 @@ void VPInterleavedAccessInfo::visitBlock(VPBlockBase *Block, Old2NewTy &Old2New, InterleaveGroupMap[VPInst] = Old2New[IG]; InterleaveGroupMap[VPInst]->insertMember( VPInst, IG->getIndex(Inst), - IG->isReverse() ? (-1) * int(IG->getFactor()) : IG->getFactor()); + Align(IG->isReverse() ? (-1) * int(IG->getFactor()) + : IG->getFactor())); } } else if (VPRegionBlock *Region = dyn_cast(Block)) visitRegion(Region, Old2New, IAI); diff --git a/llvm/test/Analysis/MemorySSA/pr43426.ll b/llvm/test/Analysis/MemorySSA/pr43426.ll new file mode 100644 index 0000000000000..f603c32cf8da6 --- /dev/null +++ b/llvm/test/Analysis/MemorySSA/pr43426.ll @@ -0,0 +1,40 @@ +; RUN: opt -licm -enable-mssa-loop-dependency -S %s | FileCheck %s +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; CHECK-LABEL: @d() +define dso_local void @d() { +entry: + br label %header + +header: + store i32 1, i32* null, align 4 + br i1 true, label %cleanup53, label %body + +body: + br i1 undef, label %cleanup31, label %for.cond11 + +for.cond11: ; Needs branch as is + br i1 undef, label %unreachable, label %latch + +cleanup31: + br label %unreachable + +deadblock: + br i1 undef, label %unreachable, label %deadblock + +cleanup53: + %val = load i32, i32* null, align 4 + %cmpv = icmp eq i32 %val, 0 + br i1 %cmpv, label %cleanup63, label %latch + +latch: + br label %header + +cleanup63: + ret void + +unreachable: + unreachable +} + diff --git a/llvm/test/Analysis/MemorySSA/pr43541.ll b/llvm/test/Analysis/MemorySSA/pr43541.ll new file mode 100644 index 0000000000000..3f6b2e26bce86 --- /dev/null +++ b/llvm/test/Analysis/MemorySSA/pr43541.ll @@ -0,0 +1,50 @@ +; RUN: opt -gvn-hoist -enable-mssa-loop-dependency -S < %s | FileCheck %s +; REQUIRES: asserts +%struct.job_pool.6.7 = type { i32 } + +; CHECK-LABEL: @f() +define dso_local void @f() { +entry: + br label %for.cond + +for.cond: ; preds = %for.end, %entry + br label %for.body + +for.body: ; preds = %for.cond + br label %if.end + +if.then: ; No predecessors! + br label %if.end + +if.end: ; preds = %if.then, %for.body + br i1 false, label %for.body12.lr.ph, label %for.end + +for.body12.lr.ph: ; preds = %if.end + br label %for.body12 + +for.body12: ; preds = %if.end40, %for.body12.lr.ph + br label %if.then23 + +if.then23: ; preds = %for.body12 + br i1 undef, label %if.then24, label %if.else + +if.then24: ; preds = %if.then23 + %0 = load %struct.job_pool.6.7*, %struct.job_pool.6.7** undef, align 8 + br label %if.end40 + +if.else: ; preds = %if.then23 + %1 = load %struct.job_pool.6.7*, %struct.job_pool.6.7** undef, align 8 + br label %if.end40 + +if.end40: ; preds = %if.else, %if.then24 + br i1 false, label %for.body12, label %for.cond9.for.end_crit_edge + +for.cond9.for.end_crit_edge: ; preds = %if.end40 + br label %for.end + +for.end: ; preds = %for.cond9.for.end_crit_edge, %if.end + br i1 true, label %if.then45, label %for.cond + +if.then45: ; preds = %for.end + ret void +} diff --git a/llvm/test/Assembler/asm-path-writer.ll b/llvm/test/Assembler/asm-path-writer.ll index f22639566721c..a0e63ef2040f6 100644 --- a/llvm/test/Assembler/asm-path-writer.ll +++ b/llvm/test/Assembler/asm-path-writer.ll @@ -1,6 +1,6 @@ ; RUN: llvm-as < %s | llvm-dis | FileCheck %s -; CHECK: ^0 = module: (path: ".\5Cf4folder\5Cabc.o", hash: (0, 0, 0, 0, 0)) +; CHECK: ^0 = module: (path: ".\\f4folder\\abc.o", hash: (0, 0, 0, 0, 0)) -^0 = module: (path: ".\5Cf4folder\5Cabc.o", hash: (0, 0, 0, 0, 0)) +^0 = module: (path: ".\5Cf4folder\\abc.o", hash: (0, 0, 0, 0, 0)) ^1 = gv: (guid: 15822663052811949562, summaries: (function: (module: ^0, flags: (linkage: external, notEligibleToImport: 0, live: 0, dsoLocal: 0), insts: 2))) diff --git a/llvm/test/Assembler/source-filename-backslash.ll b/llvm/test/Assembler/source-filename-backslash.ll index 8669401cf93df..c4b25c6053d84 100644 --- a/llvm/test/Assembler/source-filename-backslash.ll +++ b/llvm/test/Assembler/source-filename-backslash.ll @@ -1,8 +1,7 @@ - ; Make sure that llvm-as/llvm-dis properly assemble/disassemble the ; source_filename. ; RUN: llvm-as < %s | llvm-dis | FileCheck %s -; CHECK: source_filename = "C:\5Cpath\5Cwith\5Cbackslashes\5Ctest.cc" -source_filename = "C:\5Cpath\5Cwith\5Cbackslashes\5Ctest.cc" +; CHECK: source_filename = "C:\\path\\with\\backslashes\\test.cc" +source_filename = "C:\\path\\with\5Cbackslashes\\test.cc" diff --git a/llvm/test/CodeGen/AArch64/sadd_sat.ll b/llvm/test/CodeGen/AArch64/sadd_sat.ll index 9651796ff937c..7cbf4e3321aa0 100644 --- a/llvm/test/CodeGen/AArch64/sadd_sat.ll +++ b/llvm/test/CodeGen/AArch64/sadd_sat.ll @@ -39,14 +39,13 @@ define i64 @func2(i64 %x, i64 %y) nounwind { define i16 @func16(i16 %x, i16 %y) nounwind { ; CHECK-LABEL: func16: ; CHECK: // %bb.0: -; CHECK-NEXT: lsl w8, w0, #16 -; CHECK-NEXT: adds w10, w8, w1, lsl #16 -; CHECK-NEXT: mov w9, #2147483647 -; CHECK-NEXT: cmp w10, #0 // =0 -; CHECK-NEXT: cinv w9, w9, ge -; CHECK-NEXT: adds w8, w8, w1, lsl #16 -; CHECK-NEXT: csel w8, w9, w8, vs -; CHECK-NEXT: asr w0, w8, #16 +; CHECK-NEXT: add w8, w0, w1 +; CHECK-NEXT: mov w9, #32767 +; CHECK-NEXT: cmp w8, w9 +; CHECK-NEXT: csel w8, w8, w9, lt +; CHECK-NEXT: cmn w8, #8, lsl #12 // =32768 +; CHECK-NEXT: mov w9, #-32768 +; CHECK-NEXT: csel w0, w8, w9, gt ; CHECK-NEXT: ret %tmp = call i16 @llvm.sadd.sat.i16(i16 %x, i16 %y); ret i16 %tmp; @@ -55,14 +54,13 @@ define i16 @func16(i16 %x, i16 %y) nounwind { define i8 @func8(i8 %x, i8 %y) nounwind { ; CHECK-LABEL: func8: ; CHECK: // %bb.0: -; CHECK-NEXT: lsl w8, w0, #24 -; CHECK-NEXT: adds w10, w8, w1, lsl #24 -; CHECK-NEXT: mov w9, #2147483647 -; CHECK-NEXT: cmp w10, #0 // =0 -; CHECK-NEXT: cinv w9, w9, ge -; CHECK-NEXT: adds w8, w8, w1, lsl #24 -; CHECK-NEXT: csel w8, w9, w8, vs -; CHECK-NEXT: asr w0, w8, #24 +; CHECK-NEXT: add w8, w0, w1 +; CHECK-NEXT: mov w9, #127 +; CHECK-NEXT: cmp w8, #127 // =127 +; CHECK-NEXT: csel w8, w8, w9, lt +; CHECK-NEXT: cmn w8, #128 // =128 +; CHECK-NEXT: mov w9, #-128 +; CHECK-NEXT: csel w0, w8, w9, gt ; CHECK-NEXT: ret %tmp = call i8 @llvm.sadd.sat.i8(i8 %x, i8 %y); ret i8 %tmp; @@ -71,14 +69,13 @@ define i8 @func8(i8 %x, i8 %y) nounwind { define i4 @func3(i4 %x, i4 %y) nounwind { ; CHECK-LABEL: func3: ; CHECK: // %bb.0: -; CHECK-NEXT: lsl w8, w0, #28 -; CHECK-NEXT: adds w10, w8, w1, lsl #28 -; CHECK-NEXT: mov w9, #2147483647 -; CHECK-NEXT: cmp w10, #0 // =0 -; CHECK-NEXT: cinv w9, w9, ge -; CHECK-NEXT: adds w8, w8, w1, lsl #28 -; CHECK-NEXT: csel w8, w9, w8, vs -; CHECK-NEXT: asr w0, w8, #28 +; CHECK-NEXT: add w8, w0, w1 +; CHECK-NEXT: mov w9, #7 +; CHECK-NEXT: cmp w8, #7 // =7 +; CHECK-NEXT: csel w8, w8, w9, lt +; CHECK-NEXT: cmn w8, #8 // =8 +; CHECK-NEXT: mov w9, #-8 +; CHECK-NEXT: csel w0, w8, w9, gt ; CHECK-NEXT: ret %tmp = call i4 @llvm.sadd.sat.i4(i4 %x, i4 %y); ret i4 %tmp; diff --git a/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll b/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll index 5a2a24ee9c8fe..7c713c62d07f4 100644 --- a/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll +++ b/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll @@ -236,30 +236,23 @@ define void @v4i8(<4 x i8>* %px, <4 x i8>* %py, <4 x i8>* %pz) nounwind { ; CHECK-NEXT: ldrb w9, [x1] ; CHECK-NEXT: ldrb w10, [x0, #1] ; CHECK-NEXT: ldrb w11, [x1, #1] -; CHECK-NEXT: ldrb w12, [x0, #2] ; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: ldrb w8, [x1, #2] ; CHECK-NEXT: fmov s1, w9 +; CHECK-NEXT: ldrb w8, [x0, #2] +; CHECK-NEXT: ldrb w9, [x1, #2] ; CHECK-NEXT: mov v0.h[1], w10 -; CHECK-NEXT: ldrb w9, [x0, #3] -; CHECK-NEXT: ldrb w10, [x1, #3] ; CHECK-NEXT: mov v1.h[1], w11 -; CHECK-NEXT: mov v0.h[2], w12 -; CHECK-NEXT: mov v1.h[2], w8 -; CHECK-NEXT: mov v0.h[3], w9 -; CHECK-NEXT: mov v1.h[3], w10 -; CHECK-NEXT: shl v1.4h, v1.4h, #8 -; CHECK-NEXT: shl v0.4h, v0.4h, #8 -; CHECK-NEXT: add v3.4h, v0.4h, v1.4h -; CHECK-NEXT: cmlt v4.4h, v3.4h, #0 -; CHECK-NEXT: mvni v2.4h, #128, lsl #8 -; CHECK-NEXT: cmlt v1.4h, v1.4h, #0 -; CHECK-NEXT: cmgt v0.4h, v0.4h, v3.4h -; CHECK-NEXT: mvn v5.8b, v4.8b -; CHECK-NEXT: bsl v2.8b, v4.8b, v5.8b -; CHECK-NEXT: eor v0.8b, v1.8b, v0.8b -; CHECK-NEXT: bsl v0.8b, v2.8b, v3.8b -; CHECK-NEXT: sshr v0.4h, v0.4h, #8 +; CHECK-NEXT: ldrb w10, [x0, #3] +; CHECK-NEXT: ldrb w11, [x1, #3] +; CHECK-NEXT: mov v0.h[2], w8 +; CHECK-NEXT: mov v1.h[2], w9 +; CHECK-NEXT: mov v0.h[3], w10 +; CHECK-NEXT: mov v1.h[3], w11 +; CHECK-NEXT: add v0.4h, v0.4h, v1.4h +; CHECK-NEXT: movi v1.4h, #127 +; CHECK-NEXT: smin v0.4h, v0.4h, v1.4h +; CHECK-NEXT: mvni v1.4h, #127 +; CHECK-NEXT: smax v0.4h, v0.4h, v1.4h ; CHECK-NEXT: xtn v0.8b, v0.8h ; CHECK-NEXT: str s0, [x2] ; CHECK-NEXT: ret @@ -278,21 +271,14 @@ define void @v2i8(<2 x i8>* %px, <2 x i8>* %py, <2 x i8>* %pz) nounwind { ; CHECK-NEXT: ldrb w10, [x0, #1] ; CHECK-NEXT: ldrb w11, [x1, #1] ; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: fmov s2, w9 +; CHECK-NEXT: fmov s1, w9 ; CHECK-NEXT: mov v0.s[1], w10 -; CHECK-NEXT: mov v2.s[1], w11 -; CHECK-NEXT: shl v2.2s, v2.2s, #24 -; CHECK-NEXT: shl v0.2s, v0.2s, #24 -; CHECK-NEXT: add v3.2s, v0.2s, v2.2s -; CHECK-NEXT: cmlt v4.2s, v3.2s, #0 -; CHECK-NEXT: mvni v1.2s, #128, lsl #24 -; CHECK-NEXT: cmlt v2.2s, v2.2s, #0 -; CHECK-NEXT: cmgt v0.2s, v0.2s, v3.2s -; CHECK-NEXT: mvn v5.8b, v4.8b -; CHECK-NEXT: eor v0.8b, v2.8b, v0.8b -; CHECK-NEXT: bsl v1.8b, v4.8b, v5.8b -; CHECK-NEXT: bsl v0.8b, v1.8b, v3.8b -; CHECK-NEXT: ushr v0.2s, v0.2s, #24 +; CHECK-NEXT: mov v1.s[1], w11 +; CHECK-NEXT: add v0.2s, v0.2s, v1.2s +; CHECK-NEXT: movi v1.2s, #127 +; CHECK-NEXT: smin v0.2s, v0.2s, v1.2s +; CHECK-NEXT: mvni v1.2s, #127 +; CHECK-NEXT: smax v0.2s, v0.2s, v1.2s ; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: fmov w9, s0 ; CHECK-NEXT: strb w8, [x2, #1] @@ -336,21 +322,14 @@ define void @v2i16(<2 x i16>* %px, <2 x i16>* %py, <2 x i16>* %pz) nounwind { ; CHECK-NEXT: ldrh w10, [x0, #2] ; CHECK-NEXT: ldrh w11, [x1, #2] ; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: fmov s2, w9 +; CHECK-NEXT: fmov s1, w9 ; CHECK-NEXT: mov v0.s[1], w10 -; CHECK-NEXT: mov v2.s[1], w11 -; CHECK-NEXT: shl v2.2s, v2.2s, #16 -; CHECK-NEXT: shl v0.2s, v0.2s, #16 -; CHECK-NEXT: add v3.2s, v0.2s, v2.2s -; CHECK-NEXT: cmlt v4.2s, v3.2s, #0 -; CHECK-NEXT: mvni v1.2s, #128, lsl #24 -; CHECK-NEXT: cmlt v2.2s, v2.2s, #0 -; CHECK-NEXT: cmgt v0.2s, v0.2s, v3.2s -; CHECK-NEXT: mvn v5.8b, v4.8b -; CHECK-NEXT: eor v0.8b, v2.8b, v0.8b -; CHECK-NEXT: bsl v1.8b, v4.8b, v5.8b -; CHECK-NEXT: bsl v0.8b, v1.8b, v3.8b -; CHECK-NEXT: ushr v0.2s, v0.2s, #16 +; CHECK-NEXT: mov v1.s[1], w11 +; CHECK-NEXT: add v0.2s, v0.2s, v1.2s +; CHECK-NEXT: movi v1.2s, #127, msl #8 +; CHECK-NEXT: smin v0.2s, v0.2s, v1.2s +; CHECK-NEXT: mvni v1.2s, #127, msl #8 +; CHECK-NEXT: smax v0.2s, v0.2s, v1.2s ; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: fmov w9, s0 ; CHECK-NEXT: strh w8, [x2, #2] @@ -462,18 +441,11 @@ define void @v1i16(<1 x i16>* %px, <1 x i16>* %py, <1 x i16>* %pz) nounwind { define <16 x i4> @v16i4(<16 x i4> %x, <16 x i4> %y) nounwind { ; CHECK-LABEL: v16i4: ; CHECK: // %bb.0: -; CHECK-NEXT: shl v1.16b, v1.16b, #4 -; CHECK-NEXT: shl v0.16b, v0.16b, #4 -; CHECK-NEXT: add v3.16b, v0.16b, v1.16b -; CHECK-NEXT: cmlt v4.16b, v3.16b, #0 -; CHECK-NEXT: movi v2.16b, #127 -; CHECK-NEXT: cmlt v1.16b, v1.16b, #0 -; CHECK-NEXT: cmgt v0.16b, v0.16b, v3.16b -; CHECK-NEXT: mvn v5.16b, v4.16b -; CHECK-NEXT: bsl v2.16b, v4.16b, v5.16b -; CHECK-NEXT: eor v0.16b, v1.16b, v0.16b -; CHECK-NEXT: bsl v0.16b, v2.16b, v3.16b -; CHECK-NEXT: sshr v0.16b, v0.16b, #4 +; CHECK-NEXT: add v0.16b, v0.16b, v1.16b +; CHECK-NEXT: movi v1.16b, #7 +; CHECK-NEXT: smin v0.16b, v0.16b, v1.16b +; CHECK-NEXT: movi v1.16b, #248 +; CHECK-NEXT: smax v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %z = call <16 x i4> @llvm.sadd.sat.v16i4(<16 x i4> %x, <16 x i4> %y) ret <16 x i4> %z @@ -482,18 +454,11 @@ define <16 x i4> @v16i4(<16 x i4> %x, <16 x i4> %y) nounwind { define <16 x i1> @v16i1(<16 x i1> %x, <16 x i1> %y) nounwind { ; CHECK-LABEL: v16i1: ; CHECK: // %bb.0: -; CHECK-NEXT: shl v1.16b, v1.16b, #7 -; CHECK-NEXT: shl v0.16b, v0.16b, #7 -; CHECK-NEXT: add v3.16b, v0.16b, v1.16b -; CHECK-NEXT: cmlt v4.16b, v3.16b, #0 -; CHECK-NEXT: movi v2.16b, #127 -; CHECK-NEXT: cmlt v1.16b, v1.16b, #0 -; CHECK-NEXT: cmgt v0.16b, v0.16b, v3.16b -; CHECK-NEXT: mvn v5.16b, v4.16b -; CHECK-NEXT: bsl v2.16b, v4.16b, v5.16b -; CHECK-NEXT: eor v0.16b, v1.16b, v0.16b -; CHECK-NEXT: bsl v0.16b, v2.16b, v3.16b -; CHECK-NEXT: sshr v0.16b, v0.16b, #7 +; CHECK-NEXT: add v0.16b, v0.16b, v1.16b +; CHECK-NEXT: movi v1.2d, #0000000000000000 +; CHECK-NEXT: smin v0.16b, v0.16b, v1.16b +; CHECK-NEXT: movi v1.2d, #0xffffffffffffffff +; CHECK-NEXT: smax v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %z = call <16 x i1> @llvm.sadd.sat.v16i1(<16 x i1> %x, <16 x i1> %y) ret <16 x i1> %z diff --git a/llvm/test/CodeGen/AArch64/selectcc-to-shiftand.ll b/llvm/test/CodeGen/AArch64/selectcc-to-shiftand.ll index bd1635a1e6b48..292a934d2806d 100644 --- a/llvm/test/CodeGen/AArch64/selectcc-to-shiftand.ll +++ b/llvm/test/CodeGen/AArch64/selectcc-to-shiftand.ll @@ -118,3 +118,98 @@ define i32 @PR31175(i32 %x, i32 %y) { ret i32 %sel } +define i8 @sel_shift_bool_i8(i1 %t) { +; CHECK-LABEL: sel_shift_bool_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: tst w0, #0x1 +; CHECK-NEXT: mov w8, #-128 +; CHECK-NEXT: csel w0, w8, wzr, ne +; CHECK-NEXT: ret + %shl = select i1 %t, i8 128, i8 0 + ret i8 %shl +} + +define i16 @sel_shift_bool_i16(i1 %t) { +; CHECK-LABEL: sel_shift_bool_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: tst w0, #0x1 +; CHECK-NEXT: mov w8, #128 +; CHECK-NEXT: csel w0, w8, wzr, ne +; CHECK-NEXT: ret + %shl = select i1 %t, i16 128, i16 0 + ret i16 %shl +} + +define i32 @sel_shift_bool_i32(i1 %t) { +; CHECK-LABEL: sel_shift_bool_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: tst w0, #0x1 +; CHECK-NEXT: mov w8, #64 +; CHECK-NEXT: csel w0, w8, wzr, ne +; CHECK-NEXT: ret + %shl = select i1 %t, i32 64, i32 0 + ret i32 %shl +} + +define i64 @sel_shift_bool_i64(i1 %t) { +; CHECK-LABEL: sel_shift_bool_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: tst w0, #0x1 +; CHECK-NEXT: mov w8, #65536 +; CHECK-NEXT: csel x0, x8, xzr, ne +; CHECK-NEXT: ret + %shl = select i1 %t, i64 65536, i64 0 + ret i64 %shl +} + +define <16 x i8> @sel_shift_bool_v16i8(<16 x i1> %t) { +; CHECK-LABEL: sel_shift_bool_v16i8: +; CHECK: // %bb.0: +; CHECK-NEXT: shl v0.16b, v0.16b, #7 +; CHECK-NEXT: sshr v0.16b, v0.16b, #7 +; CHECK-NEXT: movi v1.16b, #128 +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b +; CHECK-NEXT: ret + %shl = select <16 x i1> %t, <16 x i8> , <16 x i8> zeroinitializer + ret <16 x i8> %shl +} + +define <8 x i16> @sel_shift_bool_v8i16(<8 x i1> %t) { +; CHECK-LABEL: sel_shift_bool_v8i16: +; CHECK: // %bb.0: +; CHECK-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-NEXT: shl v0.8h, v0.8h, #15 +; CHECK-NEXT: sshr v0.8h, v0.8h, #15 +; CHECK-NEXT: movi v1.8h, #128 +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b +; CHECK-NEXT: ret + %shl= select <8 x i1> %t, <8 x i16> , <8 x i16> zeroinitializer + ret <8 x i16> %shl +} + +define <4 x i32> @sel_shift_bool_v4i32(<4 x i1> %t) { +; CHECK-LABEL: sel_shift_bool_v4i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-NEXT: shl v0.4s, v0.4s, #31 +; CHECK-NEXT: sshr v0.4s, v0.4s, #31 +; CHECK-NEXT: movi v1.4s, #64 +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b +; CHECK-NEXT: ret + %shl = select <4 x i1> %t, <4 x i32> , <4 x i32> zeroinitializer + ret <4 x i32> %shl +} + +define <2 x i64> @sel_shift_bool_v2i64(<2 x i1> %t) { +; CHECK-LABEL: sel_shift_bool_v2i64: +; CHECK: // %bb.0: +; CHECK-NEXT: ushll v0.2d, v0.2s, #0 +; CHECK-NEXT: mov w8, #65536 +; CHECK-NEXT: shl v0.2d, v0.2d, #63 +; CHECK-NEXT: sshr v0.2d, v0.2d, #63 +; CHECK-NEXT: dup v1.2d, x8 +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b +; CHECK-NEXT: ret + %shl = select <2 x i1> %t, <2 x i64> , <2 x i64> zeroinitializer + ret <2 x i64> %shl +} diff --git a/llvm/test/CodeGen/AArch64/ssub_sat.ll b/llvm/test/CodeGen/AArch64/ssub_sat.ll index 0fbe3c4a71cde..f934c8d3b23f3 100644 --- a/llvm/test/CodeGen/AArch64/ssub_sat.ll +++ b/llvm/test/CodeGen/AArch64/ssub_sat.ll @@ -39,14 +39,13 @@ define i64 @func2(i64 %x, i64 %y) nounwind { define i16 @func16(i16 %x, i16 %y) nounwind { ; CHECK-LABEL: func16: ; CHECK: // %bb.0: -; CHECK-NEXT: lsl w8, w0, #16 -; CHECK-NEXT: subs w10, w8, w1, lsl #16 -; CHECK-NEXT: mov w9, #2147483647 -; CHECK-NEXT: cmp w10, #0 // =0 -; CHECK-NEXT: cinv w9, w9, ge -; CHECK-NEXT: subs w8, w8, w1, lsl #16 -; CHECK-NEXT: csel w8, w9, w8, vs -; CHECK-NEXT: asr w0, w8, #16 +; CHECK-NEXT: sub w8, w0, w1 +; CHECK-NEXT: mov w9, #32767 +; CHECK-NEXT: cmp w8, w9 +; CHECK-NEXT: csel w8, w8, w9, lt +; CHECK-NEXT: cmn w8, #8, lsl #12 // =32768 +; CHECK-NEXT: mov w9, #-32768 +; CHECK-NEXT: csel w0, w8, w9, gt ; CHECK-NEXT: ret %tmp = call i16 @llvm.ssub.sat.i16(i16 %x, i16 %y); ret i16 %tmp; @@ -55,14 +54,13 @@ define i16 @func16(i16 %x, i16 %y) nounwind { define i8 @func8(i8 %x, i8 %y) nounwind { ; CHECK-LABEL: func8: ; CHECK: // %bb.0: -; CHECK-NEXT: lsl w8, w0, #24 -; CHECK-NEXT: subs w10, w8, w1, lsl #24 -; CHECK-NEXT: mov w9, #2147483647 -; CHECK-NEXT: cmp w10, #0 // =0 -; CHECK-NEXT: cinv w9, w9, ge -; CHECK-NEXT: subs w8, w8, w1, lsl #24 -; CHECK-NEXT: csel w8, w9, w8, vs -; CHECK-NEXT: asr w0, w8, #24 +; CHECK-NEXT: sub w8, w0, w1 +; CHECK-NEXT: mov w9, #127 +; CHECK-NEXT: cmp w8, #127 // =127 +; CHECK-NEXT: csel w8, w8, w9, lt +; CHECK-NEXT: cmn w8, #128 // =128 +; CHECK-NEXT: mov w9, #-128 +; CHECK-NEXT: csel w0, w8, w9, gt ; CHECK-NEXT: ret %tmp = call i8 @llvm.ssub.sat.i8(i8 %x, i8 %y); ret i8 %tmp; @@ -71,14 +69,13 @@ define i8 @func8(i8 %x, i8 %y) nounwind { define i4 @func3(i4 %x, i4 %y) nounwind { ; CHECK-LABEL: func3: ; CHECK: // %bb.0: -; CHECK-NEXT: lsl w8, w0, #28 -; CHECK-NEXT: subs w10, w8, w1, lsl #28 -; CHECK-NEXT: mov w9, #2147483647 -; CHECK-NEXT: cmp w10, #0 // =0 -; CHECK-NEXT: cinv w9, w9, ge -; CHECK-NEXT: subs w8, w8, w1, lsl #28 -; CHECK-NEXT: csel w8, w9, w8, vs -; CHECK-NEXT: asr w0, w8, #28 +; CHECK-NEXT: sub w8, w0, w1 +; CHECK-NEXT: mov w9, #7 +; CHECK-NEXT: cmp w8, #7 // =7 +; CHECK-NEXT: csel w8, w8, w9, lt +; CHECK-NEXT: cmn w8, #8 // =8 +; CHECK-NEXT: mov w9, #-8 +; CHECK-NEXT: csel w0, w8, w9, gt ; CHECK-NEXT: ret %tmp = call i4 @llvm.ssub.sat.i4(i4 %x, i4 %y); ret i4 %tmp; diff --git a/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll b/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll index 664793bba2776..c33104da2aaad 100644 --- a/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll +++ b/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll @@ -237,30 +237,23 @@ define void @v4i8(<4 x i8>* %px, <4 x i8>* %py, <4 x i8>* %pz) nounwind { ; CHECK-NEXT: ldrb w9, [x1] ; CHECK-NEXT: ldrb w10, [x0, #1] ; CHECK-NEXT: ldrb w11, [x1, #1] -; CHECK-NEXT: ldrb w12, [x0, #2] ; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: ldrb w8, [x1, #2] ; CHECK-NEXT: fmov s1, w9 +; CHECK-NEXT: ldrb w8, [x0, #2] +; CHECK-NEXT: ldrb w9, [x1, #2] ; CHECK-NEXT: mov v0.h[1], w10 -; CHECK-NEXT: ldrb w9, [x0, #3] -; CHECK-NEXT: ldrb w10, [x1, #3] ; CHECK-NEXT: mov v1.h[1], w11 -; CHECK-NEXT: mov v0.h[2], w12 -; CHECK-NEXT: mov v1.h[2], w8 -; CHECK-NEXT: mov v0.h[3], w9 -; CHECK-NEXT: mov v1.h[3], w10 -; CHECK-NEXT: shl v1.4h, v1.4h, #8 -; CHECK-NEXT: shl v0.4h, v0.4h, #8 -; CHECK-NEXT: sub v3.4h, v0.4h, v1.4h -; CHECK-NEXT: cmlt v4.4h, v3.4h, #0 -; CHECK-NEXT: mvni v2.4h, #128, lsl #8 -; CHECK-NEXT: cmgt v1.4h, v1.4h, #0 -; CHECK-NEXT: cmgt v0.4h, v0.4h, v3.4h -; CHECK-NEXT: mvn v5.8b, v4.8b -; CHECK-NEXT: bsl v2.8b, v4.8b, v5.8b -; CHECK-NEXT: eor v0.8b, v1.8b, v0.8b -; CHECK-NEXT: bsl v0.8b, v2.8b, v3.8b -; CHECK-NEXT: sshr v0.4h, v0.4h, #8 +; CHECK-NEXT: ldrb w10, [x0, #3] +; CHECK-NEXT: ldrb w11, [x1, #3] +; CHECK-NEXT: mov v0.h[2], w8 +; CHECK-NEXT: mov v1.h[2], w9 +; CHECK-NEXT: mov v0.h[3], w10 +; CHECK-NEXT: mov v1.h[3], w11 +; CHECK-NEXT: sub v0.4h, v0.4h, v1.4h +; CHECK-NEXT: movi v1.4h, #127 +; CHECK-NEXT: smin v0.4h, v0.4h, v1.4h +; CHECK-NEXT: mvni v1.4h, #127 +; CHECK-NEXT: smax v0.4h, v0.4h, v1.4h ; CHECK-NEXT: xtn v0.8b, v0.8h ; CHECK-NEXT: str s0, [x2] ; CHECK-NEXT: ret @@ -279,21 +272,14 @@ define void @v2i8(<2 x i8>* %px, <2 x i8>* %py, <2 x i8>* %pz) nounwind { ; CHECK-NEXT: ldrb w10, [x0, #1] ; CHECK-NEXT: ldrb w11, [x1, #1] ; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: fmov s2, w9 +; CHECK-NEXT: fmov s1, w9 ; CHECK-NEXT: mov v0.s[1], w10 -; CHECK-NEXT: mov v2.s[1], w11 -; CHECK-NEXT: shl v2.2s, v2.2s, #24 -; CHECK-NEXT: shl v0.2s, v0.2s, #24 -; CHECK-NEXT: sub v3.2s, v0.2s, v2.2s -; CHECK-NEXT: cmlt v4.2s, v3.2s, #0 -; CHECK-NEXT: mvni v1.2s, #128, lsl #24 -; CHECK-NEXT: cmgt v2.2s, v2.2s, #0 -; CHECK-NEXT: cmgt v0.2s, v0.2s, v3.2s -; CHECK-NEXT: mvn v5.8b, v4.8b -; CHECK-NEXT: eor v0.8b, v2.8b, v0.8b -; CHECK-NEXT: bsl v1.8b, v4.8b, v5.8b -; CHECK-NEXT: bsl v0.8b, v1.8b, v3.8b -; CHECK-NEXT: ushr v0.2s, v0.2s, #24 +; CHECK-NEXT: mov v1.s[1], w11 +; CHECK-NEXT: sub v0.2s, v0.2s, v1.2s +; CHECK-NEXT: movi v1.2s, #127 +; CHECK-NEXT: smin v0.2s, v0.2s, v1.2s +; CHECK-NEXT: mvni v1.2s, #127 +; CHECK-NEXT: smax v0.2s, v0.2s, v1.2s ; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: fmov w9, s0 ; CHECK-NEXT: strb w8, [x2, #1] @@ -337,21 +323,14 @@ define void @v2i16(<2 x i16>* %px, <2 x i16>* %py, <2 x i16>* %pz) nounwind { ; CHECK-NEXT: ldrh w10, [x0, #2] ; CHECK-NEXT: ldrh w11, [x1, #2] ; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: fmov s2, w9 +; CHECK-NEXT: fmov s1, w9 ; CHECK-NEXT: mov v0.s[1], w10 -; CHECK-NEXT: mov v2.s[1], w11 -; CHECK-NEXT: shl v2.2s, v2.2s, #16 -; CHECK-NEXT: shl v0.2s, v0.2s, #16 -; CHECK-NEXT: sub v3.2s, v0.2s, v2.2s -; CHECK-NEXT: cmlt v4.2s, v3.2s, #0 -; CHECK-NEXT: mvni v1.2s, #128, lsl #24 -; CHECK-NEXT: cmgt v2.2s, v2.2s, #0 -; CHECK-NEXT: cmgt v0.2s, v0.2s, v3.2s -; CHECK-NEXT: mvn v5.8b, v4.8b -; CHECK-NEXT: eor v0.8b, v2.8b, v0.8b -; CHECK-NEXT: bsl v1.8b, v4.8b, v5.8b -; CHECK-NEXT: bsl v0.8b, v1.8b, v3.8b -; CHECK-NEXT: ushr v0.2s, v0.2s, #16 +; CHECK-NEXT: mov v1.s[1], w11 +; CHECK-NEXT: sub v0.2s, v0.2s, v1.2s +; CHECK-NEXT: movi v1.2s, #127, msl #8 +; CHECK-NEXT: smin v0.2s, v0.2s, v1.2s +; CHECK-NEXT: mvni v1.2s, #127, msl #8 +; CHECK-NEXT: smax v0.2s, v0.2s, v1.2s ; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: fmov w9, s0 ; CHECK-NEXT: strh w8, [x2, #2] @@ -463,18 +442,11 @@ define void @v1i16(<1 x i16>* %px, <1 x i16>* %py, <1 x i16>* %pz) nounwind { define <16 x i4> @v16i4(<16 x i4> %x, <16 x i4> %y) nounwind { ; CHECK-LABEL: v16i4: ; CHECK: // %bb.0: -; CHECK-NEXT: shl v1.16b, v1.16b, #4 -; CHECK-NEXT: shl v0.16b, v0.16b, #4 -; CHECK-NEXT: sub v3.16b, v0.16b, v1.16b -; CHECK-NEXT: cmlt v4.16b, v3.16b, #0 -; CHECK-NEXT: movi v2.16b, #127 -; CHECK-NEXT: cmgt v1.16b, v1.16b, #0 -; CHECK-NEXT: cmgt v0.16b, v0.16b, v3.16b -; CHECK-NEXT: mvn v5.16b, v4.16b -; CHECK-NEXT: bsl v2.16b, v4.16b, v5.16b -; CHECK-NEXT: eor v0.16b, v1.16b, v0.16b -; CHECK-NEXT: bsl v0.16b, v2.16b, v3.16b -; CHECK-NEXT: sshr v0.16b, v0.16b, #4 +; CHECK-NEXT: sub v0.16b, v0.16b, v1.16b +; CHECK-NEXT: movi v1.16b, #7 +; CHECK-NEXT: smin v0.16b, v0.16b, v1.16b +; CHECK-NEXT: movi v1.16b, #248 +; CHECK-NEXT: smax v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %z = call <16 x i4> @llvm.ssub.sat.v16i4(<16 x i4> %x, <16 x i4> %y) ret <16 x i4> %z @@ -483,18 +455,11 @@ define <16 x i4> @v16i4(<16 x i4> %x, <16 x i4> %y) nounwind { define <16 x i1> @v16i1(<16 x i1> %x, <16 x i1> %y) nounwind { ; CHECK-LABEL: v16i1: ; CHECK: // %bb.0: -; CHECK-NEXT: shl v1.16b, v1.16b, #7 -; CHECK-NEXT: shl v0.16b, v0.16b, #7 -; CHECK-NEXT: sub v3.16b, v0.16b, v1.16b -; CHECK-NEXT: cmlt v4.16b, v3.16b, #0 -; CHECK-NEXT: movi v2.16b, #127 -; CHECK-NEXT: cmgt v1.16b, v1.16b, #0 -; CHECK-NEXT: cmgt v0.16b, v0.16b, v3.16b -; CHECK-NEXT: mvn v5.16b, v4.16b -; CHECK-NEXT: bsl v2.16b, v4.16b, v5.16b -; CHECK-NEXT: eor v0.16b, v1.16b, v0.16b -; CHECK-NEXT: bsl v0.16b, v2.16b, v3.16b -; CHECK-NEXT: sshr v0.16b, v0.16b, #7 +; CHECK-NEXT: sub v0.16b, v0.16b, v1.16b +; CHECK-NEXT: movi v1.2d, #0000000000000000 +; CHECK-NEXT: smin v0.16b, v0.16b, v1.16b +; CHECK-NEXT: movi v1.2d, #0xffffffffffffffff +; CHECK-NEXT: smax v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %z = call <16 x i1> @llvm.ssub.sat.v16i1(<16 x i1> %x, <16 x i1> %y) ret <16 x i1> %z diff --git a/llvm/test/CodeGen/AArch64/uadd_sat.ll b/llvm/test/CodeGen/AArch64/uadd_sat.ll index 61c40bc56665c..f2bbc3e5de688 100644 --- a/llvm/test/CodeGen/AArch64/uadd_sat.ll +++ b/llvm/test/CodeGen/AArch64/uadd_sat.ll @@ -30,10 +30,10 @@ define i64 @func2(i64 %x, i64 %y) nounwind { define i16 @func16(i16 %x, i16 %y) nounwind { ; CHECK-LABEL: func16: ; CHECK: // %bb.0: -; CHECK-NEXT: lsl w8, w0, #16 -; CHECK-NEXT: adds w8, w8, w1, lsl #16 -; CHECK-NEXT: csinv w8, w8, wzr, lo -; CHECK-NEXT: lsr w0, w8, #16 +; CHECK-NEXT: add w8, w0, w1 +; CHECK-NEXT: mov w9, #65535 +; CHECK-NEXT: cmp w8, w9 +; CHECK-NEXT: csel w0, w8, w9, lo ; CHECK-NEXT: ret %tmp = call i16 @llvm.uadd.sat.i16(i16 %x, i16 %y); ret i16 %tmp; @@ -42,10 +42,10 @@ define i16 @func16(i16 %x, i16 %y) nounwind { define i8 @func8(i8 %x, i8 %y) nounwind { ; CHECK-LABEL: func8: ; CHECK: // %bb.0: -; CHECK-NEXT: lsl w8, w0, #24 -; CHECK-NEXT: adds w8, w8, w1, lsl #24 -; CHECK-NEXT: csinv w8, w8, wzr, lo -; CHECK-NEXT: lsr w0, w8, #24 +; CHECK-NEXT: add w8, w0, w1 +; CHECK-NEXT: cmp w8, #255 // =255 +; CHECK-NEXT: mov w9, #255 +; CHECK-NEXT: csel w0, w8, w9, lo ; CHECK-NEXT: ret %tmp = call i8 @llvm.uadd.sat.i8(i8 %x, i8 %y); ret i8 %tmp; @@ -54,10 +54,10 @@ define i8 @func8(i8 %x, i8 %y) nounwind { define i4 @func3(i4 %x, i4 %y) nounwind { ; CHECK-LABEL: func3: ; CHECK: // %bb.0: -; CHECK-NEXT: lsl w8, w0, #28 -; CHECK-NEXT: adds w8, w8, w1, lsl #28 -; CHECK-NEXT: csinv w8, w8, wzr, lo -; CHECK-NEXT: lsr w0, w8, #28 +; CHECK-NEXT: add w8, w0, w1 +; CHECK-NEXT: cmp w8, #15 // =15 +; CHECK-NEXT: mov w9, #15 +; CHECK-NEXT: csel w0, w8, w9, lo ; CHECK-NEXT: ret %tmp = call i4 @llvm.uadd.sat.i4(i4 %x, i4 %y); ret i4 %tmp; diff --git a/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll b/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll index 55b42e7905320..604207a5ff6af 100644 --- a/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll +++ b/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll @@ -142,28 +142,25 @@ define void @v8i8(<8 x i8>* %px, <8 x i8>* %py, <8 x i8>* %pz) nounwind { define void @v4i8(<4 x i8>* %px, <4 x i8>* %py, <4 x i8>* %pz) nounwind { ; CHECK-LABEL: v4i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldrb w9, [x1] ; CHECK-NEXT: ldrb w8, [x0] -; CHECK-NEXT: ldrb w11, [x1, #1] +; CHECK-NEXT: ldrb w9, [x1] ; CHECK-NEXT: ldrb w10, [x0, #1] -; CHECK-NEXT: fmov s1, w9 -; CHECK-NEXT: ldrb w9, [x1, #2] +; CHECK-NEXT: ldrb w11, [x1, #1] ; CHECK-NEXT: fmov s0, w8 +; CHECK-NEXT: fmov s1, w9 ; CHECK-NEXT: ldrb w8, [x0, #2] -; CHECK-NEXT: mov v1.h[1], w11 -; CHECK-NEXT: ldrb w11, [x1, #3] +; CHECK-NEXT: ldrb w9, [x1, #2] ; CHECK-NEXT: mov v0.h[1], w10 +; CHECK-NEXT: mov v1.h[1], w11 ; CHECK-NEXT: ldrb w10, [x0, #3] -; CHECK-NEXT: mov v1.h[2], w9 +; CHECK-NEXT: ldrb w11, [x1, #3] ; CHECK-NEXT: mov v0.h[2], w8 -; CHECK-NEXT: mov v1.h[3], w11 +; CHECK-NEXT: mov v1.h[2], w9 ; CHECK-NEXT: mov v0.h[3], w10 -; CHECK-NEXT: shl v1.4h, v1.4h, #8 -; CHECK-NEXT: shl v0.4h, v0.4h, #8 -; CHECK-NEXT: mvn v2.8b, v1.8b -; CHECK-NEXT: umin v0.4h, v0.4h, v2.4h +; CHECK-NEXT: mov v1.h[3], w11 ; CHECK-NEXT: add v0.4h, v0.4h, v1.4h -; CHECK-NEXT: ushr v0.4h, v0.4h, #8 +; CHECK-NEXT: movi d1, #0xff00ff00ff00ff +; CHECK-NEXT: umin v0.4h, v0.4h, v1.4h ; CHECK-NEXT: xtn v0.8b, v0.8h ; CHECK-NEXT: str s0, [x2] ; CHECK-NEXT: ret @@ -177,20 +174,17 @@ define void @v4i8(<4 x i8>* %px, <4 x i8>* %py, <4 x i8>* %pz) nounwind { define void @v2i8(<2 x i8>* %px, <2 x i8>* %py, <2 x i8>* %pz) nounwind { ; CHECK-LABEL: v2i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldrb w9, [x1] ; CHECK-NEXT: ldrb w8, [x0] -; CHECK-NEXT: ldrb w11, [x1, #1] +; CHECK-NEXT: ldrb w9, [x1] ; CHECK-NEXT: ldrb w10, [x0, #1] -; CHECK-NEXT: fmov s1, w9 +; CHECK-NEXT: ldrb w11, [x1, #1] ; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: mov v1.s[1], w11 +; CHECK-NEXT: fmov s1, w9 ; CHECK-NEXT: mov v0.s[1], w10 -; CHECK-NEXT: shl v1.2s, v1.2s, #24 -; CHECK-NEXT: shl v0.2s, v0.2s, #24 -; CHECK-NEXT: mvn v2.8b, v1.8b -; CHECK-NEXT: umin v0.2s, v0.2s, v2.2s +; CHECK-NEXT: mov v1.s[1], w11 ; CHECK-NEXT: add v0.2s, v0.2s, v1.2s -; CHECK-NEXT: ushr v0.2s, v0.2s, #24 +; CHECK-NEXT: movi d1, #0x0000ff000000ff +; CHECK-NEXT: umin v0.2s, v0.2s, v1.2s ; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: fmov w9, s0 ; CHECK-NEXT: strb w8, [x2, #1] @@ -223,20 +217,17 @@ define void @v4i16(<4 x i16>* %px, <4 x i16>* %py, <4 x i16>* %pz) nounwind { define void @v2i16(<2 x i16>* %px, <2 x i16>* %py, <2 x i16>* %pz) nounwind { ; CHECK-LABEL: v2i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldrh w9, [x1] ; CHECK-NEXT: ldrh w8, [x0] -; CHECK-NEXT: ldrh w11, [x1, #2] +; CHECK-NEXT: ldrh w9, [x1] ; CHECK-NEXT: ldrh w10, [x0, #2] -; CHECK-NEXT: fmov s1, w9 +; CHECK-NEXT: ldrh w11, [x1, #2] ; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: mov v1.s[1], w11 +; CHECK-NEXT: fmov s1, w9 ; CHECK-NEXT: mov v0.s[1], w10 -; CHECK-NEXT: shl v1.2s, v1.2s, #16 -; CHECK-NEXT: shl v0.2s, v0.2s, #16 -; CHECK-NEXT: mvn v2.8b, v1.8b -; CHECK-NEXT: umin v0.2s, v0.2s, v2.2s +; CHECK-NEXT: mov v1.s[1], w11 ; CHECK-NEXT: add v0.2s, v0.2s, v1.2s -; CHECK-NEXT: ushr v0.2s, v0.2s, #16 +; CHECK-NEXT: movi d1, #0x00ffff0000ffff +; CHECK-NEXT: umin v0.2s, v0.2s, v1.2s ; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: fmov w9, s0 ; CHECK-NEXT: strh w8, [x2, #2] @@ -318,12 +309,9 @@ define void @v1i16(<1 x i16>* %px, <1 x i16>* %py, <1 x i16>* %pz) nounwind { define <16 x i4> @v16i4(<16 x i4> %x, <16 x i4> %y) nounwind { ; CHECK-LABEL: v16i4: ; CHECK: // %bb.0: -; CHECK-NEXT: shl v1.16b, v1.16b, #4 -; CHECK-NEXT: shl v0.16b, v0.16b, #4 -; CHECK-NEXT: mvn v2.16b, v1.16b -; CHECK-NEXT: umin v0.16b, v0.16b, v2.16b ; CHECK-NEXT: add v0.16b, v0.16b, v1.16b -; CHECK-NEXT: ushr v0.16b, v0.16b, #4 +; CHECK-NEXT: movi v1.16b, #15 +; CHECK-NEXT: umin v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %z = call <16 x i4> @llvm.uadd.sat.v16i4(<16 x i4> %x, <16 x i4> %y) ret <16 x i4> %z @@ -332,12 +320,9 @@ define <16 x i4> @v16i4(<16 x i4> %x, <16 x i4> %y) nounwind { define <16 x i1> @v16i1(<16 x i1> %x, <16 x i1> %y) nounwind { ; CHECK-LABEL: v16i1: ; CHECK: // %bb.0: -; CHECK-NEXT: shl v1.16b, v1.16b, #7 -; CHECK-NEXT: shl v0.16b, v0.16b, #7 -; CHECK-NEXT: mvn v2.16b, v1.16b -; CHECK-NEXT: umin v0.16b, v0.16b, v2.16b ; CHECK-NEXT: add v0.16b, v0.16b, v1.16b -; CHECK-NEXT: ushr v0.16b, v0.16b, #7 +; CHECK-NEXT: movi v1.16b, #1 +; CHECK-NEXT: umin v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %z = call <16 x i1> @llvm.uadd.sat.v16i1(<16 x i1> %x, <16 x i1> %y) ret <16 x i1> %z diff --git a/llvm/test/CodeGen/AArch64/usub_sat.ll b/llvm/test/CodeGen/AArch64/usub_sat.ll index 0238c263d6c75..dd969bdec1aab 100644 --- a/llvm/test/CodeGen/AArch64/usub_sat.ll +++ b/llvm/test/CodeGen/AArch64/usub_sat.ll @@ -30,10 +30,9 @@ define i64 @func2(i64 %x, i64 %y) nounwind { define i16 @func16(i16 %x, i16 %y) nounwind { ; CHECK-LABEL: func16: ; CHECK: // %bb.0: -; CHECK-NEXT: lsl w8, w0, #16 -; CHECK-NEXT: subs w8, w8, w1, lsl #16 -; CHECK-NEXT: csel w8, wzr, w8, lo -; CHECK-NEXT: lsr w0, w8, #16 +; CHECK-NEXT: cmp w0, w1 +; CHECK-NEXT: csel w8, w0, w1, hi +; CHECK-NEXT: sub w0, w8, w1 ; CHECK-NEXT: ret %tmp = call i16 @llvm.usub.sat.i16(i16 %x, i16 %y); ret i16 %tmp; @@ -42,10 +41,9 @@ define i16 @func16(i16 %x, i16 %y) nounwind { define i8 @func8(i8 %x, i8 %y) nounwind { ; CHECK-LABEL: func8: ; CHECK: // %bb.0: -; CHECK-NEXT: lsl w8, w0, #24 -; CHECK-NEXT: subs w8, w8, w1, lsl #24 -; CHECK-NEXT: csel w8, wzr, w8, lo -; CHECK-NEXT: lsr w0, w8, #24 +; CHECK-NEXT: cmp w0, w1 +; CHECK-NEXT: csel w8, w0, w1, hi +; CHECK-NEXT: sub w0, w8, w1 ; CHECK-NEXT: ret %tmp = call i8 @llvm.usub.sat.i8(i8 %x, i8 %y); ret i8 %tmp; @@ -54,10 +52,9 @@ define i8 @func8(i8 %x, i8 %y) nounwind { define i4 @func3(i4 %x, i4 %y) nounwind { ; CHECK-LABEL: func3: ; CHECK: // %bb.0: -; CHECK-NEXT: lsl w8, w0, #28 -; CHECK-NEXT: subs w8, w8, w1, lsl #28 -; CHECK-NEXT: csel w8, wzr, w8, lo -; CHECK-NEXT: lsr w0, w8, #28 +; CHECK-NEXT: cmp w0, w1 +; CHECK-NEXT: csel w8, w0, w1, hi +; CHECK-NEXT: sub w0, w8, w1 ; CHECK-NEXT: ret %tmp = call i4 @llvm.usub.sat.i4(i4 %x, i4 %y); ret i4 %tmp; diff --git a/llvm/test/CodeGen/AArch64/usub_sat_vec.ll b/llvm/test/CodeGen/AArch64/usub_sat_vec.ll index b7a8be433ccc1..f0cceb3621e2e 100644 --- a/llvm/test/CodeGen/AArch64/usub_sat_vec.ll +++ b/llvm/test/CodeGen/AArch64/usub_sat_vec.ll @@ -144,11 +144,8 @@ define void @v4i8(<4 x i8>* %px, <4 x i8>* %py, <4 x i8>* %pz) nounwind { ; CHECK-NEXT: mov v1.h[2], w9 ; CHECK-NEXT: mov v0.h[3], w10 ; CHECK-NEXT: mov v1.h[3], w11 -; CHECK-NEXT: shl v1.4h, v1.4h, #8 -; CHECK-NEXT: shl v0.4h, v0.4h, #8 ; CHECK-NEXT: umax v0.4h, v0.4h, v1.4h ; CHECK-NEXT: sub v0.4h, v0.4h, v1.4h -; CHECK-NEXT: ushr v0.4h, v0.4h, #8 ; CHECK-NEXT: xtn v0.8b, v0.8h ; CHECK-NEXT: str s0, [x2] ; CHECK-NEXT: ret @@ -170,11 +167,8 @@ define void @v2i8(<2 x i8>* %px, <2 x i8>* %py, <2 x i8>* %pz) nounwind { ; CHECK-NEXT: fmov s1, w9 ; CHECK-NEXT: mov v0.s[1], w10 ; CHECK-NEXT: mov v1.s[1], w11 -; CHECK-NEXT: shl v1.2s, v1.2s, #24 -; CHECK-NEXT: shl v0.2s, v0.2s, #24 ; CHECK-NEXT: umax v0.2s, v0.2s, v1.2s ; CHECK-NEXT: sub v0.2s, v0.2s, v1.2s -; CHECK-NEXT: ushr v0.2s, v0.2s, #24 ; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: fmov w9, s0 ; CHECK-NEXT: strb w8, [x2, #1] @@ -214,11 +208,8 @@ define void @v2i16(<2 x i16>* %px, <2 x i16>* %py, <2 x i16>* %pz) nounwind { ; CHECK-NEXT: fmov s1, w9 ; CHECK-NEXT: mov v0.s[1], w10 ; CHECK-NEXT: mov v1.s[1], w11 -; CHECK-NEXT: shl v1.2s, v1.2s, #16 -; CHECK-NEXT: shl v0.2s, v0.2s, #16 ; CHECK-NEXT: umax v0.2s, v0.2s, v1.2s ; CHECK-NEXT: sub v0.2s, v0.2s, v1.2s -; CHECK-NEXT: ushr v0.2s, v0.2s, #16 ; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: fmov w9, s0 ; CHECK-NEXT: strh w8, [x2, #2] @@ -295,11 +286,8 @@ define void @v1i16(<1 x i16>* %px, <1 x i16>* %py, <1 x i16>* %pz) nounwind { define <16 x i4> @v16i4(<16 x i4> %x, <16 x i4> %y) nounwind { ; CHECK-LABEL: v16i4: ; CHECK: // %bb.0: -; CHECK-NEXT: shl v1.16b, v1.16b, #4 -; CHECK-NEXT: shl v0.16b, v0.16b, #4 ; CHECK-NEXT: umax v0.16b, v0.16b, v1.16b ; CHECK-NEXT: sub v0.16b, v0.16b, v1.16b -; CHECK-NEXT: ushr v0.16b, v0.16b, #4 ; CHECK-NEXT: ret %z = call <16 x i4> @llvm.usub.sat.v16i4(<16 x i4> %x, <16 x i4> %y) ret <16 x i4> %z @@ -308,11 +296,8 @@ define <16 x i4> @v16i4(<16 x i4> %x, <16 x i4> %y) nounwind { define <16 x i1> @v16i1(<16 x i1> %x, <16 x i1> %y) nounwind { ; CHECK-LABEL: v16i1: ; CHECK: // %bb.0: -; CHECK-NEXT: shl v1.16b, v1.16b, #7 -; CHECK-NEXT: shl v0.16b, v0.16b, #7 ; CHECK-NEXT: umax v0.16b, v0.16b, v1.16b ; CHECK-NEXT: sub v0.16b, v0.16b, v1.16b -; CHECK-NEXT: ushr v0.16b, v0.16b, #7 ; CHECK-NEXT: ret %z = call <16 x i1> @llvm.usub.sat.v16i1(<16 x i1> %x, <16 x i1> %y) ret <16 x i1> %z diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-build-vector.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-build-vector.mir index d97118bf14864..5763a878708a6 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-build-vector.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-build-vector.mir @@ -143,7 +143,7 @@ body: | ; GCN: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; GCN: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr0_sgpr1 ; GCN: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY $sgpr2_sgpr3 - ; GCN: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1, [[COPY1]], %subreg.sub2_sub3 + ; GCN: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1, [[COPY1]], %subreg.sub2_sub3 ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[REG_SEQUENCE]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = COPY $sgpr2_sgpr3 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-concat-vectors.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-concat-vectors.mir index 72face22e929d..d335ce8b8d7bd 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-concat-vectors.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-concat-vectors.mir @@ -146,7 +146,7 @@ body: | ; GCN: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr1 ; GCN: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2 ; GCN: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3 - ; GCN: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GCN: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[REG_SEQUENCE]] %0:sgpr(<2 x s16>) = COPY $sgpr0 %1:sgpr(<2 x s16>) = COPY $sgpr1 @@ -192,7 +192,7 @@ body: | ; GCN-LABEL: name: test_concat_vectors_s_v8s16_s_v4s16_s_v4s16 ; GCN: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr0_sgpr1 ; GCN: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY $sgpr2_sgpr3 - ; GCN: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1, [[COPY1]], %subreg.sub2_sub3 + ; GCN: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1, [[COPY1]], %subreg.sub2_sub3 ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[REG_SEQUENCE]] %0:sgpr(<4 x s16>) = COPY $sgpr0_sgpr1 %1:sgpr(<4 x s16>) = COPY $sgpr2_sgpr3 @@ -350,8 +350,8 @@ body: | liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4_sgpr5_sgpr6_sgpr7 ; GCN-LABEL: name: test_concat_vectors_s_v12s16_s_v8s16_s_v8s16 - ; GCN: [[COPY:%[0-9]+]]:sreg_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN: [[COPY1:%[0-9]+]]:sreg_128 = COPY $sgpr4_sgpr5_sgpr6_sgpr7 + ; GCN: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN: [[COPY1:%[0-9]+]]:sgpr_128 = COPY $sgpr4_sgpr5_sgpr6_sgpr7 ; GCN: [[REG_SEQUENCE:%[0-9]+]]:sreg_256 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1_sub2_sub3, [[COPY1]], %subreg.sub4_sub5_sub6_sub7 ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY [[REG_SEQUENCE]] %0:sgpr(<8 x s16>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 @@ -461,7 +461,7 @@ body: | ; GCN-LABEL: name: test_concat_vectors_s_v4s32_s_v2s32_s_v2s32 ; GCN: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr0_sgpr1 ; GCN: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY $sgpr2_sgpr3 - ; GCN: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1, [[COPY1]], %subreg.sub2_sub3 + ; GCN: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1, [[COPY1]], %subreg.sub2_sub3 ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[REG_SEQUENCE]] %0:sgpr(<2 x s32>) = COPY $sgpr0_sgpr1 %1:sgpr(<2 x s32>) = COPY $sgpr2_sgpr3 @@ -524,8 +524,8 @@ body: | liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4_sgpr5_sgpr6_sgpr7 ; GCN-LABEL: name: test_concat_vectors_s_v8s32_s_v4s32_s_v4s32 - ; GCN: [[COPY:%[0-9]+]]:sreg_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN: [[COPY1:%[0-9]+]]:sreg_128 = COPY $sgpr4_sgpr5_sgpr6_sgpr7 + ; GCN: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN: [[COPY1:%[0-9]+]]:sgpr_128 = COPY $sgpr4_sgpr5_sgpr6_sgpr7 ; GCN: [[REG_SEQUENCE:%[0-9]+]]:sreg_256 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1_sub2_sub3, [[COPY1]], %subreg.sub4_sub5_sub6_sub7 ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY [[REG_SEQUENCE]] %0:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 @@ -596,8 +596,8 @@ body: | liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4_sgpr5_sgpr6_sgpr7 ; GCN-LABEL: name: test_concat_vectors_s_v4s64_s_v2s64_s_v2s64 - ; GCN: [[COPY:%[0-9]+]]:sreg_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN: [[COPY1:%[0-9]+]]:sreg_128 = COPY $sgpr4_sgpr5_sgpr6_sgpr7 + ; GCN: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN: [[COPY1:%[0-9]+]]:sgpr_128 = COPY $sgpr4_sgpr5_sgpr6_sgpr7 ; GCN: [[REG_SEQUENCE:%[0-9]+]]:sreg_256 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1_sub2_sub3, [[COPY1]], %subreg.sub4_sub5_sub6_sub7 ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY [[REG_SEQUENCE]] %0:sgpr(<2 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 @@ -656,10 +656,10 @@ body: | liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr12_sgpr13_sgpr14_sgpr15 ; GCN-LABEL: name: test_concat_vectors_s_v8s64_s_v2s64_s_v2s64_s_v2s64_s_v2s64 - ; GCN: [[COPY:%[0-9]+]]:sreg_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN: [[COPY1:%[0-9]+]]:sreg_128 = COPY $sgpr4_sgpr5_sgpr6_sgpr7 - ; GCN: [[COPY2:%[0-9]+]]:sreg_128 = COPY $sgpr8_sgpr9_sgpr10_sgpr11 - ; GCN: [[COPY3:%[0-9]+]]:sreg_128 = COPY $sgpr12_sgpr13_sgpr14_sgpr15 + ; GCN: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN: [[COPY1:%[0-9]+]]:sgpr_128 = COPY $sgpr4_sgpr5_sgpr6_sgpr7 + ; GCN: [[COPY2:%[0-9]+]]:sgpr_128 = COPY $sgpr8_sgpr9_sgpr10_sgpr11 + ; GCN: [[COPY3:%[0-9]+]]:sgpr_128 = COPY $sgpr12_sgpr13_sgpr14_sgpr15 ; GCN: [[REG_SEQUENCE:%[0-9]+]]:sreg_512 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1_sub2_sub3, [[COPY1]], %subreg.sub4_sub5_sub6_sub7, [[COPY2]], %subreg.sub8_sub9_sub10_sub11, [[COPY3]], %subreg.sub12_sub13_sub14_sub15 ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY [[REG_SEQUENCE]] %0:sgpr(<2 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 @@ -680,8 +680,8 @@ body: | liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4_sgpr5_sgpr6_sgpr7 ; GCN-LABEL: name: test_concat_vectors_s_v4p1_s_v2p1_s_v2p1 - ; GCN: [[COPY:%[0-9]+]]:sreg_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN: [[COPY1:%[0-9]+]]:sreg_128 = COPY $sgpr4_sgpr5_sgpr6_sgpr7 + ; GCN: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN: [[COPY1:%[0-9]+]]:sgpr_128 = COPY $sgpr4_sgpr5_sgpr6_sgpr7 ; GCN: [[REG_SEQUENCE:%[0-9]+]]:sreg_256 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1_sub2_sub3, [[COPY1]], %subreg.sub4_sub5_sub6_sub7 ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY [[REG_SEQUENCE]] %0:sgpr(<2 x p1>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 @@ -702,7 +702,7 @@ body: | ; GCN-LABEL: name: test_concat_vectors_s_v4p3_s_v2p3_s_v2p3 ; GCN: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr0_sgpr1 ; GCN: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY $sgpr2_sgpr3 - ; GCN: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1, [[COPY1]], %subreg.sub2_sub3 + ; GCN: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1, [[COPY1]], %subreg.sub2_sub3 ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[REG_SEQUENCE]] %0:sgpr(<2 x p3>) = COPY $sgpr0_sgpr1 %1:sgpr(<2 x p3>) = COPY $sgpr2_sgpr3 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert.mir index c120c96174128..7e39fa705eabc 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert.mir @@ -256,9 +256,9 @@ body: | bb.0: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4_sgpr5 ; CHECK-LABEL: name: insert_s_s128_s_s64_0 - ; CHECK: [[COPY:%[0-9]+]]:sreg_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY $sgpr4_sgpr5 - ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:sreg_128 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub0_sub1 + ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:sgpr_128 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub0_sub1 ; CHECK: S_ENDPGM 0, implicit [[INSERT_SUBREG]] %0:sgpr(s128) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:sgpr(s64) = COPY $sgpr4_sgpr5 @@ -291,9 +291,9 @@ body: | bb.0: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4_sgpr5 ; CHECK-LABEL: name: insert_s_s128_s_s64_64 - ; CHECK: [[COPY:%[0-9]+]]:sreg_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY $sgpr4_sgpr5 - ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:sreg_128 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub2_sub3 + ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:sgpr_128 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub2_sub3 ; CHECK: S_ENDPGM 0, implicit [[INSERT_SUBREG]] %0:sgpr(s128) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:sgpr(s64) = COPY $sgpr4_sgpr5 @@ -368,7 +368,7 @@ body: | ; CHECK-LABEL: name: insert_s_s128_s_s96_0 ; CHECK: [[COPY:%[0-9]+]]:sgpr_128_with_sub0_sub1_sub2 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK: [[COPY1:%[0-9]+]]:sreg_96 = COPY $sgpr6_sgpr7_sgpr8 - ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:sreg_128 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub0_sub1_sub2 + ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:sgpr_128 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub0_sub1_sub2 ; CHECK: S_ENDPGM 0, implicit [[INSERT_SUBREG]] %0:sgpr(s128) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:sgpr(s96) = COPY $sgpr6_sgpr7_sgpr8 @@ -388,7 +388,7 @@ body: | ; CHECK-LABEL: name: insert_s_s128_s_s96_32 ; CHECK: [[COPY:%[0-9]+]]:sgpr_128_with_sub1_sub2_sub3 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK: [[COPY1:%[0-9]+]]:sreg_96 = COPY $sgpr6_sgpr7_sgpr8 - ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:sreg_128 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub1_sub2_sub3 + ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:sgpr_128 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub1_sub2_sub3 ; CHECK: S_ENDPGM 0, implicit [[INSERT_SUBREG]] %0:sgpr(s128) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:sgpr(s96) = COPY $sgpr6_sgpr7_sgpr8 @@ -468,7 +468,7 @@ body: | ; CHECK-LABEL: name: insert_s_s256_s_s128_0 ; CHECK: [[COPY:%[0-9]+]]:sreg_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 - ; CHECK: [[COPY1:%[0-9]+]]:sreg_128 = COPY $sgpr8_sgpr9_sgpr10_sgpr11 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr_128 = COPY $sgpr8_sgpr9_sgpr10_sgpr11 ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:sreg_256 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub0_sub1_sub2_sub3 ; CHECK: S_ENDPGM 0, implicit [[INSERT_SUBREG]] %0:sgpr(s256) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-constant.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-constant.mir index 2c53482178d42..e48716cc32a26 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-constant.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-constant.mir @@ -208,23 +208,23 @@ body: | ; GFX6-LABEL: name: load_constant_v4s32_align4 ; GFX6: liveins: $sgpr0_sgpr1 ; GFX6: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX6: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sreg_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0, 0 :: (load 16, align 4, addrspace 4) + ; GFX6: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0, 0 :: (load 16, align 4, addrspace 4) ; GFX6: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[S_LOAD_DWORDX4_IMM]] ; GFX7-LABEL: name: load_constant_v4s32_align4 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX7: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sreg_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0, 0 :: (load 16, align 4, addrspace 4) + ; GFX7: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0, 0 :: (load 16, align 4, addrspace 4) ; GFX7: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[S_LOAD_DWORDX4_IMM]] ; GFX8-LABEL: name: load_constant_v4s32_align4 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX8: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sreg_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0, 0 :: (load 16, align 4, addrspace 4) + ; GFX8: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0, 0 :: (load 16, align 4, addrspace 4) ; GFX8: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[S_LOAD_DWORDX4_IMM]] ; GFX10-LABEL: name: load_constant_v4s32_align4 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10: $vcc_hi = IMPLICIT_DEF ; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sreg_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0, 0 :: (load 16, align 4, addrspace 4) + ; GFX10: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0, 0 :: (load 16, align 4, addrspace 4) ; GFX10: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[S_LOAD_DWORDX4_IMM]] %0:sgpr(p4) = COPY $sgpr0_sgpr1 %1:sgpr(<4 x s32>) = G_LOAD %0 :: (load 16, align 4, addrspace 4) @@ -322,23 +322,23 @@ body: | ; GFX6-LABEL: name: load_constant_v2s64 ; GFX6: liveins: $sgpr0_sgpr1 ; GFX6: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX6: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sreg_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0, 0 :: (load 16, align 4, addrspace 4) + ; GFX6: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0, 0 :: (load 16, align 4, addrspace 4) ; GFX6: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[S_LOAD_DWORDX4_IMM]] ; GFX7-LABEL: name: load_constant_v2s64 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX7: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sreg_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0, 0 :: (load 16, align 4, addrspace 4) + ; GFX7: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0, 0 :: (load 16, align 4, addrspace 4) ; GFX7: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[S_LOAD_DWORDX4_IMM]] ; GFX8-LABEL: name: load_constant_v2s64 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX8: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sreg_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0, 0 :: (load 16, align 4, addrspace 4) + ; GFX8: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0, 0 :: (load 16, align 4, addrspace 4) ; GFX8: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[S_LOAD_DWORDX4_IMM]] ; GFX10-LABEL: name: load_constant_v2s64 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10: $vcc_hi = IMPLICIT_DEF ; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sreg_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0, 0 :: (load 16, align 4, addrspace 4) + ; GFX10: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0, 0 :: (load 16, align 4, addrspace 4) ; GFX10: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[S_LOAD_DWORDX4_IMM]] %0:sgpr(p4) = COPY $sgpr0_sgpr1 %1:sgpr(<2 x s64>) = G_LOAD %0 :: (load 16, align 4, addrspace 4) @@ -360,22 +360,22 @@ body: | ; GFX6-LABEL: name: load_constant_v2p1 ; GFX6: liveins: $sgpr0_sgpr1 ; GFX6: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; GFX6: [[LOAD:%[0-9]+]]:sreg_128(<2 x p1>) = G_LOAD [[COPY]](p4) :: (load 16, align 4, addrspace 4) + ; GFX6: [[LOAD:%[0-9]+]]:sgpr_128(<2 x p1>) = G_LOAD [[COPY]](p4) :: (load 16, align 4, addrspace 4) ; GFX6: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[LOAD]](<2 x p1>) ; GFX7-LABEL: name: load_constant_v2p1 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; GFX7: [[LOAD:%[0-9]+]]:sreg_128(<2 x p1>) = G_LOAD [[COPY]](p4) :: (load 16, align 4, addrspace 4) + ; GFX7: [[LOAD:%[0-9]+]]:sgpr_128(<2 x p1>) = G_LOAD [[COPY]](p4) :: (load 16, align 4, addrspace 4) ; GFX7: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[LOAD]](<2 x p1>) ; GFX8-LABEL: name: load_constant_v2p1 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; GFX8: [[LOAD:%[0-9]+]]:sreg_128(<2 x p1>) = G_LOAD [[COPY]](p4) :: (load 16, align 4, addrspace 4) + ; GFX8: [[LOAD:%[0-9]+]]:sgpr_128(<2 x p1>) = G_LOAD [[COPY]](p4) :: (load 16, align 4, addrspace 4) ; GFX8: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[LOAD]](<2 x p1>) ; GFX10-LABEL: name: load_constant_v2p1 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; GFX10: [[LOAD:%[0-9]+]]:sreg_128(<2 x p1>) = G_LOAD [[COPY]](p4) :: (load 16, align 4, addrspace 4) + ; GFX10: [[LOAD:%[0-9]+]]:sgpr_128(<2 x p1>) = G_LOAD [[COPY]](p4) :: (load 16, align 4, addrspace 4) ; GFX10: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[LOAD]](<2 x p1>) %0:sgpr(p4) = COPY $sgpr0_sgpr1 %1:sgpr(<2 x p1>) = G_LOAD %0 :: (load 16, align 4, addrspace 4) @@ -397,22 +397,22 @@ body: | ; GFX6-LABEL: name: load_constant_s128_align4 ; GFX6: liveins: $sgpr0_sgpr1 ; GFX6: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; GFX6: [[LOAD:%[0-9]+]]:sreg_128(s128) = G_LOAD [[COPY]](p4) :: (load 16, align 4, addrspace 4) + ; GFX6: [[LOAD:%[0-9]+]]:sgpr_128(s128) = G_LOAD [[COPY]](p4) :: (load 16, align 4, addrspace 4) ; GFX6: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[LOAD]](s128) ; GFX7-LABEL: name: load_constant_s128_align4 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; GFX7: [[LOAD:%[0-9]+]]:sreg_128(s128) = G_LOAD [[COPY]](p4) :: (load 16, align 4, addrspace 4) + ; GFX7: [[LOAD:%[0-9]+]]:sgpr_128(s128) = G_LOAD [[COPY]](p4) :: (load 16, align 4, addrspace 4) ; GFX7: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[LOAD]](s128) ; GFX8-LABEL: name: load_constant_s128_align4 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; GFX8: [[LOAD:%[0-9]+]]:sreg_128(s128) = G_LOAD [[COPY]](p4) :: (load 16, align 4, addrspace 4) + ; GFX8: [[LOAD:%[0-9]+]]:sgpr_128(s128) = G_LOAD [[COPY]](p4) :: (load 16, align 4, addrspace 4) ; GFX8: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[LOAD]](s128) ; GFX10-LABEL: name: load_constant_s128_align4 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; GFX10: [[LOAD:%[0-9]+]]:sreg_128(s128) = G_LOAD [[COPY]](p4) :: (load 16, align 4, addrspace 4) + ; GFX10: [[LOAD:%[0-9]+]]:sgpr_128(s128) = G_LOAD [[COPY]](p4) :: (load 16, align 4, addrspace 4) ; GFX10: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[LOAD]](s128) %0:sgpr(p4) = COPY $sgpr0_sgpr1 %1:sgpr(s128) = G_LOAD %0 :: (load 16, align 4, addrspace 4) @@ -660,22 +660,22 @@ body: | ; GFX6-LABEL: name: load_constant_v8s16 ; GFX6: liveins: $sgpr0_sgpr1 ; GFX6: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; GFX6: [[LOAD:%[0-9]+]]:sreg_128(<8 x s16>) = G_LOAD [[COPY]](p4) :: (load 16, align 4, addrspace 4) + ; GFX6: [[LOAD:%[0-9]+]]:sgpr_128(<8 x s16>) = G_LOAD [[COPY]](p4) :: (load 16, align 4, addrspace 4) ; GFX6: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[LOAD]](<8 x s16>) ; GFX7-LABEL: name: load_constant_v8s16 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; GFX7: [[LOAD:%[0-9]+]]:sreg_128(<8 x s16>) = G_LOAD [[COPY]](p4) :: (load 16, align 4, addrspace 4) + ; GFX7: [[LOAD:%[0-9]+]]:sgpr_128(<8 x s16>) = G_LOAD [[COPY]](p4) :: (load 16, align 4, addrspace 4) ; GFX7: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[LOAD]](<8 x s16>) ; GFX8-LABEL: name: load_constant_v8s16 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; GFX8: [[LOAD:%[0-9]+]]:sreg_128(<8 x s16>) = G_LOAD [[COPY]](p4) :: (load 16, align 4, addrspace 4) + ; GFX8: [[LOAD:%[0-9]+]]:sgpr_128(<8 x s16>) = G_LOAD [[COPY]](p4) :: (load 16, align 4, addrspace 4) ; GFX8: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[LOAD]](<8 x s16>) ; GFX10-LABEL: name: load_constant_v8s16 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; GFX10: [[LOAD:%[0-9]+]]:sreg_128(<8 x s16>) = G_LOAD [[COPY]](p4) :: (load 16, align 4, addrspace 4) + ; GFX10: [[LOAD:%[0-9]+]]:sgpr_128(<8 x s16>) = G_LOAD [[COPY]](p4) :: (load 16, align 4, addrspace 4) ; GFX10: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[LOAD]](<8 x s16>) %0:sgpr(p4) = COPY $sgpr0_sgpr1 %1:sgpr(<8 x s16>) = G_LOAD %0 :: (load 16, align 4, addrspace 4) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-merge-values.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-merge-values.mir index 8332de8992bd0..cb4ebabb586ca 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-merge-values.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-merge-values.mir @@ -198,7 +198,7 @@ body: | ; GCN: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr1 ; GCN: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2 ; GCN: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3 - ; GCN: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GCN: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[REG_SEQUENCE]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 @@ -248,7 +248,7 @@ body: | ; GCN: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; GCN: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr0_sgpr1 ; GCN: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY $sgpr2_sgpr3 - ; GCN: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1, [[COPY1]], %subreg.sub2_sub3 + ; GCN: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1, [[COPY1]], %subreg.sub2_sub3 ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[REG_SEQUENCE]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = COPY $sgpr2_sgpr3 @@ -420,8 +420,8 @@ body: | ; GCN-LABEL: name: test_merge_values_s_s256_s_s128_s_s128 ; GCN: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4_sgpr5_sgpr6_sgpr7 - ; GCN: [[COPY:%[0-9]+]]:sreg_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN: [[COPY1:%[0-9]+]]:sreg_128 = COPY $sgpr4_sgpr5_sgpr6_sgpr7 + ; GCN: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN: [[COPY1:%[0-9]+]]:sgpr_128 = COPY $sgpr4_sgpr5_sgpr6_sgpr7 ; GCN: [[REG_SEQUENCE:%[0-9]+]]:sreg_256 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1_sub2_sub3, [[COPY1]], %subreg.sub4_sub5_sub6_sub7 ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY [[REG_SEQUENCE]] %0:sgpr(s128) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-trunc.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-trunc.mir index 390f5d2e209d2..d5e7bc4353127 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-trunc.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-trunc.mir @@ -128,7 +128,7 @@ body: | bb.0: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; GCN-LABEL: name: trunc_sgpr_s128_to_s16 - ; GCN: [[COPY:%[0-9]+]]:sreg_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; GCN: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY [[COPY]].sub0 ; GCN: S_ENDPGM 0, implicit [[COPY1]] %0:sgpr(s128) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 @@ -163,7 +163,7 @@ body: | liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GCN-LABEL: name: trunc_sgpr_s256_to_s128 ; GCN: [[COPY:%[0-9]+]]:sreg_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 - ; GCN: [[COPY1:%[0-9]+]]:sreg_128 = COPY [[COPY]].sub0_sub1_sub2_sub3 + ; GCN: [[COPY1:%[0-9]+]]:sgpr_128 = COPY [[COPY]].sub0_sub1_sub2_sub3 ; GCN: S_ENDPGM 0, implicit [[COPY1]] %0:sgpr(s256) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 %1:sgpr(s128) = G_TRUNC %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-unmerge-values.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-unmerge-values.mir index 8bc4e08c59a70..ad6b23c9413b2 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-unmerge-values.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-unmerge-values.mir @@ -141,7 +141,7 @@ body: | ; GCN-LABEL: name: test_unmerge_values_s_s32_s_s32_s32_s_s32_s_s128 ; GCN: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN: [[COPY:%[0-9]+]]:sreg_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; GCN: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY [[COPY]].sub0 ; GCN: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY [[COPY]].sub1 ; GCN: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY [[COPY]].sub2 @@ -164,7 +164,7 @@ body: | ; GCN-LABEL: name: test_unmerge_values_s_s64_s_s64_s_s128 ; GCN: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN: [[COPY:%[0-9]+]]:sreg_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; GCN: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY [[COPY]].sub0_sub1 ; GCN: [[COPY2:%[0-9]+]]:sreg_64_xexec = COPY [[COPY]].sub2_sub3 ; GCN: S_ENDPGM 0, implicit [[COPY1]], implicit [[COPY2]] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ashr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ashr.mir index 9d0210c782330..8819030007848 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ashr.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ashr.mir @@ -623,25 +623,28 @@ body: | ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[C]](s32) ; SI: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32) ; SI: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[ASHR]], [[AND]](s32) + ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ASHR1]](s32) ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY5]], [[C]](s32) ; SI: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[C]](s32) ; SI: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[ASHR2]], [[AND1]](s32) + ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[ASHR3]](s32) ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]] ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[COPY7]], [[C]](s32) ; SI: [[ASHR4:%[0-9]+]]:_(s32) = G_ASHR [[SHL2]], [[C]](s32) ; SI: [[ASHR5:%[0-9]+]]:_(s32) = G_ASHR [[ASHR4]], [[AND2]](s32) - ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[ASHR1]](s32) - ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[ASHR3]](s32) - ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[ASHR5]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32) - ; SI: [[TRUNC:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; SI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[TRUNC]](<3 x s16>), 0 + ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[ASHR5]](s32) + ; SI: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF2]](s16) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; SI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[EXTRACT2]](<3 x s16>), 0 ; SI: $vgpr0_vgpr1 = COPY [[INSERT2]](<4 x s16>) ; VI-LABEL: name: test_ashr_v3s16_v3s16 ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 @@ -672,13 +675,13 @@ body: | ; VI: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[TRUNC]], [[TRUNC3]](s16) ; VI: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[TRUNC1]], [[TRUNC4]](s16) ; VI: [[ASHR2:%[0-9]+]]:_(s16) = G_ASHR [[TRUNC2]], [[TRUNC5]](s16) - ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR]](s16) - ; VI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR1]](s16) - ; VI: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR2]](s16) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; VI: [[TRUNC6:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; VI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[TRUNC6]](<3 x s16>), 0 + ; VI: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[ASHR]](s16), [[ASHR1]](s16) + ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[ASHR2]](s16), [[DEF2]](s16) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; VI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[EXTRACT2]](<3 x s16>), 0 ; VI: $vgpr0_vgpr1 = COPY [[INSERT2]](<4 x s16>) ; GFX9-LABEL: name: test_ashr_v3s16_v3s16 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 @@ -771,8 +774,10 @@ body: | ; SI: [[ASHR6:%[0-9]+]]:_(s32) = G_ASHR [[SHL3]], [[C]](s32) ; SI: [[ASHR7:%[0-9]+]]:_(s32) = G_ASHR [[ASHR6]], [[AND3]](s32) ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[ASHR7]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) - ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; VI-LABEL: name: test_ashr_v4s16_v4s16 ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 @@ -799,8 +804,10 @@ body: | ; VI: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[TRUNC1]], [[TRUNC5]](s16) ; VI: [[ASHR2:%[0-9]+]]:_(s16) = G_ASHR [[TRUNC2]], [[TRUNC6]](s16) ; VI: [[ASHR3:%[0-9]+]]:_(s16) = G_ASHR [[TRUNC3]], [[TRUNC7]](s16) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[ASHR]](s16), [[ASHR1]](s16), [[ASHR2]](s16), [[ASHR3]](s16) - ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[ASHR]](s16), [[ASHR1]](s16) + ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[ASHR2]](s16), [[ASHR3]](s16) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-LABEL: name: test_ashr_v4s16_v4s16 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-build-vector.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-build-vector.mir index 5b53a8c342644..760caa395b799 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-build-vector.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-build-vector.mir @@ -822,3 +822,303 @@ body: | %4:_(<4 x s128>) = G_BUILD_VECTOR %0, %1, %2, %3 S_NOP 0, implicit %4 ... + +--- +name: build_vector_v2s16 +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; CHECK-LABEL: name: build_vector_v2s16 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; CHECK: S_NOP 0, implicit [[BUILD_VECTOR]](<2 x s16>) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s16) = G_TRUNC %0 + %3:_(s16) = G_TRUNC %1 + %4:_(<2 x s16>) = G_BUILD_VECTOR %2, %3 + S_NOP 0, implicit %4 +... + +--- +name: build_vector_v3s16 +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2 + + ; CHECK-LABEL: name: build_vector_v3s16 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; CHECK: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; CHECK: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; CHECK: S_NOP 0, implicit [[EXTRACT]](<3 x s16>) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32) = COPY $vgpr2 + %3:_(s16) = G_TRUNC %0 + %4:_(s16) = G_TRUNC %1 + %5:_(s16) = G_TRUNC %2 + %6:_(<3 x s16>) = G_BUILD_VECTOR %3, %4, %5 + S_NOP 0, implicit %6 +... + +--- +name: build_vector_v4s16 +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + + ; CHECK-LABEL: name: build_vector_v4s16 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; CHECK: S_NOP 0, implicit [[CONCAT_VECTORS]](<4 x s16>) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32) = COPY $vgpr2 + %3:_(s32) = COPY $vgpr3 + %4:_(s16) = G_TRUNC %0 + %5:_(s16) = G_TRUNC %1 + %6:_(s16) = G_TRUNC %2 + %7:_(s16) = G_TRUNC %3 + %8:_(<4 x s16>) = G_BUILD_VECTOR %4, %5, %6, %7 + S_NOP 0, implicit %8 +... + +--- +name: build_vector_v5s16 +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 + + ; CHECK-LABEL: name: build_vector_v5s16 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) + ; CHECK: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY4]](s32) + ; CHECK: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[DEF]](s16) + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) + ; CHECK: [[EXTRACT:%[0-9]+]]:_(<5 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<6 x s16>), 0 + ; CHECK: S_NOP 0, implicit [[EXTRACT]](<5 x s16>) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32) = COPY $vgpr2 + %3:_(s32) = COPY $vgpr3 + %4:_(s32) = COPY $vgpr4 + %5:_(s16) = G_TRUNC %0 + %6:_(s16) = G_TRUNC %1 + %7:_(s16) = G_TRUNC %2 + %8:_(s16) = G_TRUNC %3 + %9:_(s16) = G_TRUNC %4 + %10:_(<5 x s16>) = G_BUILD_VECTOR %5, %6, %7, %8, %9 + S_NOP 0, implicit %10 +... + +--- +name: build_vector_v7s16 +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 + + ; CHECK-LABEL: name: build_vector_v7s16 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) + ; CHECK: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY4]](s32) + ; CHECK: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY5]](s32) + ; CHECK: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY6]](s32) + ; CHECK: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) + ; CHECK: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[DEF]](s16) + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) + ; CHECK: [[EXTRACT:%[0-9]+]]:_(<7 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<8 x s16>), 0 + ; CHECK: S_NOP 0, implicit [[EXTRACT]](<7 x s16>) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32) = COPY $vgpr2 + %3:_(s32) = COPY $vgpr3 + %4:_(s32) = COPY $vgpr4 + %5:_(s32) = COPY $vgpr5 + %6:_(s32) = COPY $vgpr6 + %7:_(s16) = G_TRUNC %0 + %8:_(s16) = G_TRUNC %1 + %9:_(s16) = G_TRUNC %2 + %10:_(s16) = G_TRUNC %3 + %11:_(s16) = G_TRUNC %4 + %12:_(s16) = G_TRUNC %5 + %13:_(s16) = G_TRUNC %6 + %14:_(<7 x s16>) = G_BUILD_VECTOR %7, %8, %9, %10, %11, %12, %13 + S_NOP 0, implicit %14 +... + +--- +name: build_vector_v8s16 +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 + + ; CHECK-LABEL: name: build_vector_v8s16 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) + ; CHECK: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY4]](s32) + ; CHECK: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY5]](s32) + ; CHECK: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY6]](s32) + ; CHECK: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[COPY7]](s32) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) + ; CHECK: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16) + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) + ; CHECK: S_NOP 0, implicit [[CONCAT_VECTORS]](<8 x s16>) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32) = COPY $vgpr2 + %3:_(s32) = COPY $vgpr3 + %4:_(s32) = COPY $vgpr4 + %5:_(s32) = COPY $vgpr5 + %6:_(s32) = COPY $vgpr6 + %7:_(s32) = COPY $vgpr7 + %8:_(s16) = G_TRUNC %0 + %9:_(s16) = G_TRUNC %1 + %10:_(s16) = G_TRUNC %2 + %11:_(s16) = G_TRUNC %3 + %12:_(s16) = G_TRUNC %4 + %13:_(s16) = G_TRUNC %5 + %14:_(s16) = G_TRUNC %6 + %15:_(s16) = G_TRUNC %7 + %16:_(<8 x s16>) = G_BUILD_VECTOR %8, %9, %10, %11, %12, %13, %14, %15 + S_NOP 0, implicit %16 +... + +--- +name: build_vector_v16s16 +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15 + + ; CHECK-LABEL: name: build_vector_v16s16 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) + ; CHECK: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY4]](s32) + ; CHECK: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY5]](s32) + ; CHECK: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY6]](s32) + ; CHECK: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[COPY7]](s32) + ; CHECK: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[COPY8]](s32) + ; CHECK: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) + ; CHECK: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) + ; CHECK: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32) + ; CHECK: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) + ; CHECK: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) + ; CHECK: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) + ; CHECK: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) + ; CHECK: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16) + ; CHECK: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC8]](s16), [[TRUNC9]](s16) + ; CHECK: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC10]](s16), [[TRUNC11]](s16) + ; CHECK: [[BUILD_VECTOR6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC12]](s16), [[TRUNC13]](s16) + ; CHECK: [[BUILD_VECTOR7:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC14]](s16), [[TRUNC15]](s16) + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>), [[BUILD_VECTOR6]](<2 x s16>), [[BUILD_VECTOR7]](<2 x s16>) + ; CHECK: S_NOP 0, implicit [[CONCAT_VECTORS]](<16 x s16>) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32) = COPY $vgpr2 + %3:_(s32) = COPY $vgpr3 + %4:_(s32) = COPY $vgpr4 + %5:_(s32) = COPY $vgpr5 + %6:_(s32) = COPY $vgpr6 + %7:_(s32) = COPY $vgpr7 + %8:_(s32) = COPY $vgpr8 + %9:_(s32) = COPY $vgpr9 + %10:_(s32) = COPY $vgpr10 + %11:_(s32) = COPY $vgpr11 + %12:_(s32) = COPY $vgpr12 + %13:_(s32) = COPY $vgpr13 + %14:_(s32) = COPY $vgpr14 + %15:_(s32) = COPY $vgpr15 + %16:_(s16) = G_TRUNC %0 + %17:_(s16) = G_TRUNC %1 + %18:_(s16) = G_TRUNC %2 + %19:_(s16) = G_TRUNC %3 + %20:_(s16) = G_TRUNC %4 + %21:_(s16) = G_TRUNC %5 + %22:_(s16) = G_TRUNC %6 + %23:_(s16) = G_TRUNC %7 + %24:_(s16) = G_TRUNC %8 + %25:_(s16) = G_TRUNC %9 + %26:_(s16) = G_TRUNC %10 + %27:_(s16) = G_TRUNC %11 + %28:_(s16) = G_TRUNC %12 + %29:_(s16) = G_TRUNC %13 + %30:_(s16) = G_TRUNC %14 + %31:_(s16) = G_TRUNC %15 + %32:_(<16 x s16>) = G_BUILD_VECTOR %16, %17, %18, %19, %20, %21, %22, %23, %24, %25, %26, %27, %28, %29, %30, %31 + S_NOP 0, implicit %32 +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract.mir index b3a14ce947d97..521ec195405f4 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract.mir @@ -513,15 +513,18 @@ body: | ; CHECK: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF]](<4 x s32>) ; CHECK: [[EXTRACT:%[0-9]+]]:_(<3 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0 ; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<3 x s8>) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s8) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s8) - ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s8) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; CHECK: [[TRUNC1:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; CHECK: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[TRUNC1]](<3 x s16>), 0 - ; CHECK: [[EXTRACT1:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT]](<4 x s16>), 32 - ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT1]](s16) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8) + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8) + ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[UV2]](s8) + ; CHECK: [[DEF1:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[ANYEXT]](s16), [[ANYEXT1]](s16) + ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[ANYEXT2]](s16), [[DEF1]](s16) + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; CHECK: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; CHECK: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT1]](<3 x s16>), 0 + ; CHECK: [[EXTRACT2:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT]](<4 x s16>), 32 + ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT2]](s16) ; CHECK: $vgpr0 = COPY [[ANYEXT3]](s32) %0:_(<3 x s8>) = G_IMPLICIT_DEF %1:_(s8) = G_EXTRACT %0, 16 @@ -538,18 +541,22 @@ body: | ; CHECK: [[TRUNC:%[0-9]+]]:_(<6 x s1>) = G_TRUNC [[DEF]](<6 x s32>) ; CHECK: [[EXTRACT:%[0-9]+]]:_(<5 x s1>) = G_EXTRACT [[TRUNC]](<6 x s1>), 0 ; CHECK: [[UV:%[0-9]+]]:_(s1), [[UV1:%[0-9]+]]:_(s1), [[UV2:%[0-9]+]]:_(s1), [[UV3:%[0-9]+]]:_(s1), [[UV4:%[0-9]+]]:_(s1) = G_UNMERGE_VALUES [[EXTRACT]](<5 x s1>) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s1) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s1) - ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s1) - ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s1) - ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[UV4]](s1) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32), [[ANYEXT3]](s32), [[ANYEXT4]](s32) - ; CHECK: [[TRUNC1:%[0-9]+]]:_(<5 x s16>) = G_TRUNC [[BUILD_VECTOR]](<5 x s32>) - ; CHECK: [[DEF1:%[0-9]+]]:_(<6 x s32>) = G_IMPLICIT_DEF - ; CHECK: [[TRUNC2:%[0-9]+]]:_(<6 x s16>) = G_TRUNC [[DEF1]](<6 x s32>) - ; CHECK: [[INSERT:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[TRUNC2]], [[TRUNC1]](<5 x s16>), 0 - ; CHECK: [[EXTRACT1:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT]](<6 x s16>), 64 - ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT1]](s16) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s1) + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s1) + ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[UV2]](s1) + ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s16) = G_ANYEXT [[UV3]](s1) + ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s16) = G_ANYEXT [[UV4]](s1) + ; CHECK: [[DEF1:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[ANYEXT]](s16), [[ANYEXT1]](s16) + ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[ANYEXT2]](s16), [[ANYEXT3]](s16) + ; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[ANYEXT4]](s16), [[DEF1]](s16) + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) + ; CHECK: [[EXTRACT1:%[0-9]+]]:_(<5 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<6 x s16>), 0 + ; CHECK: [[DEF2:%[0-9]+]]:_(<6 x s32>) = G_IMPLICIT_DEF + ; CHECK: [[TRUNC1:%[0-9]+]]:_(<6 x s16>) = G_TRUNC [[DEF2]](<6 x s32>) + ; CHECK: [[INSERT:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[TRUNC1]], [[EXTRACT1]](<5 x s16>), 0 + ; CHECK: [[EXTRACT2:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT]](<6 x s16>), 64 + ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT2]](s16) ; CHECK: $vgpr0 = COPY [[ANYEXT5]](s32) %0:_(<5 x s1>) = G_IMPLICIT_DEF %1:_(s1) = G_EXTRACT %0, 4 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fadd.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fadd.mir index 8824553bf1934..f223805cdfdc9 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fadd.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fadd.mir @@ -358,12 +358,12 @@ body: | ; SI: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) ; SI: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FPEXT4]], [[FPEXT5]] ; SI: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD2]](s32) - ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) - ; SI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC1]](s16) - ; SI: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC2]](s16) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; SI: [[TRUNC6:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; SI: S_NOP 0, implicit [[TRUNC6]](<3 x s16>) + ; SI: [[DEF4:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16) + ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC2]](s16), [[DEF4]](s16) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; SI: S_NOP 0, implicit [[EXTRACT2]](<3 x s16>) ; VI-LABEL: name: test_fadd_v3s16 ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 @@ -393,12 +393,12 @@ body: | ; VI: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[TRUNC]], [[TRUNC3]] ; VI: [[FADD1:%[0-9]+]]:_(s16) = G_FADD [[TRUNC1]], [[TRUNC4]] ; VI: [[FADD2:%[0-9]+]]:_(s16) = G_FADD [[TRUNC2]], [[TRUNC5]] - ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) - ; VI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD1]](s16) - ; VI: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD2]](s16) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; VI: [[TRUNC6:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; VI: S_NOP 0, implicit [[TRUNC6]](<3 x s16>) + ; VI: [[DEF4:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FADD]](s16), [[FADD1]](s16) + ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FADD2]](s16), [[DEF4]](s16) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; VI: S_NOP 0, implicit [[EXTRACT2]](<3 x s16>) ; GFX9-LABEL: name: test_fadd_v3s16 ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 @@ -479,8 +479,10 @@ body: | ; SI: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC7]](s16) ; SI: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FPEXT6]], [[FPEXT7]] ; SI: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD3]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16), [[FPTRUNC2]](s16), [[FPTRUNC3]](s16) - ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16) + ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC2]](s16), [[FPTRUNC3]](s16) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; VI-LABEL: name: test_fadd_v4s16 ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 @@ -507,8 +509,10 @@ body: | ; VI: [[FADD1:%[0-9]+]]:_(s16) = G_FADD [[TRUNC1]], [[TRUNC5]] ; VI: [[FADD2:%[0-9]+]]:_(s16) = G_FADD [[TRUNC2]], [[TRUNC6]] ; VI: [[FADD3:%[0-9]+]]:_(s16) = G_FADD [[TRUNC3]], [[TRUNC7]] - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[FADD]](s16), [[FADD1]](s16), [[FADD2]](s16), [[FADD3]](s16) - ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FADD]](s16), [[FADD1]](s16) + ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FADD2]](s16), [[FADD3]](s16) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-LABEL: name: test_fadd_v4s16 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcanonicalize.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcanonicalize.mir index 4a9ce6193846f..69b6f598d2ecf 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcanonicalize.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcanonicalize.mir @@ -235,12 +235,12 @@ body: | ; SI: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) ; SI: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FPEXT2]] ; SI: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FCANONICALIZE2]](s32) - ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) - ; SI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC1]](s16) - ; SI: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC2]](s16) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; SI: [[TRUNC3:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; SI: S_NOP 0, implicit [[TRUNC3]](<3 x s16>) + ; SI: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16) + ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC2]](s16), [[DEF2]](s16) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; SI: S_NOP 0, implicit [[EXTRACT1]](<3 x s16>) ; VI-LABEL: name: test_fcanonicalize_v3s16 ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 @@ -258,12 +258,12 @@ body: | ; VI: [[FCANONICALIZE:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC]] ; VI: [[FCANONICALIZE1:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC1]] ; VI: [[FCANONICALIZE2:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC2]] - ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FCANONICALIZE]](s16) - ; VI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FCANONICALIZE1]](s16) - ; VI: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FCANONICALIZE2]](s16) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; VI: [[TRUNC3:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; VI: S_NOP 0, implicit [[TRUNC3]](<3 x s16>) + ; VI: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FCANONICALIZE]](s16), [[FCANONICALIZE1]](s16) + ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FCANONICALIZE2]](s16), [[DEF2]](s16) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; VI: S_NOP 0, implicit [[EXTRACT1]](<3 x s16>) ; GFX9-LABEL: name: test_fcanonicalize_v3s16 ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 @@ -321,8 +321,10 @@ body: | ; SI: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) ; SI: [[FCANONICALIZE3:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FPEXT3]] ; SI: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FCANONICALIZE3]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16), [[FPTRUNC2]](s16), [[FPTRUNC3]](s16) - ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16) + ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC2]](s16), [[FPTRUNC3]](s16) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; VI-LABEL: name: test_fcanonicalize_v4s16 ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) @@ -339,8 +341,10 @@ body: | ; VI: [[FCANONICALIZE1:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC1]] ; VI: [[FCANONICALIZE2:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC2]] ; VI: [[FCANONICALIZE3:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC3]] - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[FCANONICALIZE]](s16), [[FCANONICALIZE1]](s16), [[FCANONICALIZE2]](s16), [[FCANONICALIZE3]](s16) - ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FCANONICALIZE]](s16), [[FCANONICALIZE1]](s16) + ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FCANONICALIZE2]](s16), [[FCANONICALIZE3]](s16) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-LABEL: name: test_fcanonicalize_v4s16 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcos.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcos.mir index 5b57702e0c606..d08a1ac6403bc 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcos.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcos.mir @@ -344,12 +344,12 @@ body: | ; SI: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL2]](s32) ; SI: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT4]](s32) ; SI: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT5]](s32) - ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) - ; SI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC1]](s16) - ; SI: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC2]](s16) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; SI: [[TRUNC3:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; SI: S_NOP 0, implicit [[TRUNC3]](<3 x s16>) + ; SI: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16) + ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC2]](s16), [[DEF2]](s16) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; SI: S_NOP 0, implicit [[EXTRACT1]](<3 x s16>) ; VI-LABEL: name: test_fcos_v3s16 ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 @@ -374,12 +374,12 @@ body: | ; VI: [[FMUL2:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC2]], [[C1]] ; VI: [[INT4:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL2]](s16) ; VI: [[INT5:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT4]](s16) - ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16) - ; VI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[INT3]](s16) - ; VI: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[INT5]](s16) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; VI: [[TRUNC3:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; VI: S_NOP 0, implicit [[TRUNC3]](<3 x s16>) + ; VI: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[INT1]](s16), [[INT3]](s16) + ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[INT5]](s16), [[DEF2]](s16) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; VI: S_NOP 0, implicit [[EXTRACT1]](<3 x s16>) ; GFX9-LABEL: name: test_fcos_v3s16 ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 @@ -401,12 +401,12 @@ body: | ; GFX9: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[FMUL1]](s16) ; GFX9: [[FMUL2:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC2]], [[C1]] ; GFX9: [[INT2:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[FMUL2]](s16) - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT]](s16) - ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16) - ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[INT2]](s16) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; GFX9: [[TRUNC3:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; GFX9: S_NOP 0, implicit [[TRUNC3]](<3 x s16>) + ; GFX9: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[INT]](s16), [[INT1]](s16) + ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[INT2]](s16), [[DEF2]](s16) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX9: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; GFX9: S_NOP 0, implicit [[EXTRACT1]](<3 x s16>) %0:_(<3 x s16>) = G_IMPLICIT_DEF %1:_(<3 x s16>) = G_FCOS %0 S_NOP 0, implicit %1 @@ -451,8 +451,10 @@ body: | ; SI: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL3]](s32) ; SI: [[INT7:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT6]](s32) ; SI: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT7]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16), [[FPTRUNC2]](s16), [[FPTRUNC3]](s16) - ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16) + ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC2]](s16), [[FPTRUNC3]](s16) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; VI-LABEL: name: test_fcos_v4s16 ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) @@ -478,8 +480,10 @@ body: | ; VI: [[FMUL3:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC3]], [[C1]] ; VI: [[INT6:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL3]](s16) ; VI: [[INT7:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT6]](s16) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[INT1]](s16), [[INT3]](s16), [[INT5]](s16), [[INT7]](s16) - ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[INT1]](s16), [[INT3]](s16) + ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[INT5]](s16), [[INT7]](s16) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-LABEL: name: test_fcos_v4s16 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) @@ -501,8 +505,10 @@ body: | ; GFX9: [[INT2:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[FMUL2]](s16) ; GFX9: [[FMUL3:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC3]], [[C1]] ; GFX9: [[INT3:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[FMUL3]](s16) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[INT]](s16), [[INT1]](s16), [[INT2]](s16), [[INT3]](s16) - ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[INT]](s16), [[INT1]](s16) + ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[INT2]](s16), [[INT3]](s16) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX9: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = G_FCOS %0 $vgpr0_vgpr1 = COPY %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ffloor.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ffloor.mir index c3575696d067e..c15d76640c0cc 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ffloor.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ffloor.mir @@ -257,12 +257,12 @@ body: | ; SI: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) ; SI: [[FFLOOR2:%[0-9]+]]:_(s32) = G_FFLOOR [[FPEXT2]] ; SI: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FFLOOR2]](s32) - ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) - ; SI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC1]](s16) - ; SI: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC2]](s16) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; SI: [[TRUNC3:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; SI: S_NOP 0, implicit [[TRUNC3]](<3 x s16>) + ; SI: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16) + ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC2]](s16), [[DEF2]](s16) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; SI: S_NOP 0, implicit [[EXTRACT1]](<3 x s16>) ; VI-LABEL: name: test_ffloor_v3s16 ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 @@ -280,12 +280,12 @@ body: | ; VI: [[FFLOOR:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC]] ; VI: [[FFLOOR1:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC1]] ; VI: [[FFLOOR2:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC2]] - ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FFLOOR]](s16) - ; VI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FFLOOR1]](s16) - ; VI: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FFLOOR2]](s16) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; VI: [[TRUNC3:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; VI: S_NOP 0, implicit [[TRUNC3]](<3 x s16>) + ; VI: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FFLOOR]](s16), [[FFLOOR1]](s16) + ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FFLOOR2]](s16), [[DEF2]](s16) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; VI: S_NOP 0, implicit [[EXTRACT1]](<3 x s16>) ; GFX9-LABEL: name: test_ffloor_v3s16 ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 @@ -303,12 +303,12 @@ body: | ; GFX9: [[FFLOOR:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC]] ; GFX9: [[FFLOOR1:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC1]] ; GFX9: [[FFLOOR2:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC2]] - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FFLOOR]](s16) - ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FFLOOR1]](s16) - ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FFLOOR2]](s16) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; GFX9: [[TRUNC3:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; GFX9: S_NOP 0, implicit [[TRUNC3]](<3 x s16>) + ; GFX9: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FFLOOR]](s16), [[FFLOOR1]](s16) + ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FFLOOR2]](s16), [[DEF2]](s16) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX9: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; GFX9: S_NOP 0, implicit [[EXTRACT1]](<3 x s16>) %0:_(<3 x s16>) = G_IMPLICIT_DEF %1:_(<3 x s16>) = G_FFLOOR %0 S_NOP 0, implicit %1 @@ -344,8 +344,10 @@ body: | ; SI: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) ; SI: [[FFLOOR3:%[0-9]+]]:_(s32) = G_FFLOOR [[FPEXT3]] ; SI: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FFLOOR3]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16), [[FPTRUNC2]](s16), [[FPTRUNC3]](s16) - ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16) + ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC2]](s16), [[FPTRUNC3]](s16) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; VI-LABEL: name: test_ffloor_v4s16 ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) @@ -362,8 +364,10 @@ body: | ; VI: [[FFLOOR1:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC1]] ; VI: [[FFLOOR2:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC2]] ; VI: [[FFLOOR3:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC3]] - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[FFLOOR]](s16), [[FFLOOR1]](s16), [[FFLOOR2]](s16), [[FFLOOR3]](s16) - ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FFLOOR]](s16), [[FFLOOR1]](s16) + ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FFLOOR2]](s16), [[FFLOOR3]](s16) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-LABEL: name: test_ffloor_v4s16 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) @@ -380,8 +384,10 @@ body: | ; GFX9: [[FFLOOR1:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC1]] ; GFX9: [[FFLOOR2:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC2]] ; GFX9: [[FFLOOR3:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC3]] - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[FFLOOR]](s16), [[FFLOOR1]](s16), [[FFLOOR2]](s16), [[FFLOOR3]](s16) - ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FFLOOR]](s16), [[FFLOOR1]](s16) + ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FFLOOR2]](s16), [[FFLOOR3]](s16) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX9: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = G_FFLOOR %0 $vgpr0_vgpr1 = COPY %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fma.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fma.mir index e1ffcd5e8ee96..601d9b2b51f56 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fma.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fma.mir @@ -436,12 +436,12 @@ body: | ; SI: [[FPEXT8:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC8]](s16) ; SI: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FPEXT6]], [[FPEXT7]], [[FPEXT8]] ; SI: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMA2]](s32) - ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) - ; SI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC1]](s16) - ; SI: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC2]](s16) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; SI: [[TRUNC9:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; SI: S_NOP 0, implicit [[TRUNC9]](<3 x s16>) + ; SI: [[DEF6:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16) + ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC2]](s16), [[DEF6]](s16) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: [[EXTRACT3:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; SI: S_NOP 0, implicit [[EXTRACT3]](<3 x s16>) ; VI-LABEL: name: test_fma_v3s16 ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 @@ -483,12 +483,12 @@ body: | ; VI: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC3]], [[TRUNC6]] ; VI: [[FMA1:%[0-9]+]]:_(s16) = G_FMA [[TRUNC1]], [[TRUNC4]], [[TRUNC7]] ; VI: [[FMA2:%[0-9]+]]:_(s16) = G_FMA [[TRUNC2]], [[TRUNC5]], [[TRUNC8]] - ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) - ; VI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA1]](s16) - ; VI: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA2]](s16) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; VI: [[TRUNC9:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; VI: S_NOP 0, implicit [[TRUNC9]](<3 x s16>) + ; VI: [[DEF6:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FMA]](s16), [[FMA1]](s16) + ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FMA2]](s16), [[DEF6]](s16) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: [[EXTRACT3:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; VI: S_NOP 0, implicit [[EXTRACT3]](<3 x s16>) ; GFX9-LABEL: name: test_fma_v3s16 ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 @@ -592,8 +592,10 @@ body: | ; SI: [[FPEXT11:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC11]](s16) ; SI: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FPEXT9]], [[FPEXT10]], [[FPEXT11]] ; SI: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMA3]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16), [[FPTRUNC2]](s16), [[FPTRUNC3]](s16) - ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16) + ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC2]](s16), [[FPTRUNC3]](s16) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; VI-LABEL: name: test_fma_v4s16 ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 @@ -630,8 +632,10 @@ body: | ; VI: [[FMA1:%[0-9]+]]:_(s16) = G_FMA [[TRUNC1]], [[TRUNC5]], [[TRUNC9]] ; VI: [[FMA2:%[0-9]+]]:_(s16) = G_FMA [[TRUNC2]], [[TRUNC6]], [[TRUNC10]] ; VI: [[FMA3:%[0-9]+]]:_(s16) = G_FMA [[TRUNC3]], [[TRUNC7]], [[TRUNC11]] - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[FMA]](s16), [[FMA1]](s16), [[FMA2]](s16), [[FMA3]](s16) - ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FMA]](s16), [[FMA1]](s16) + ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FMA2]](s16), [[FMA3]](s16) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-LABEL: name: test_fma_v4s16 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmad.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmad.s16.mir index 4faf397946389..460b5d60b5f48 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmad.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmad.s16.mir @@ -312,8 +312,10 @@ body: | ; SI-F16DENORM: [[FPEXT15:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC11]](s16) ; SI-F16DENORM: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FPEXT14]], [[FPEXT15]] ; SI-F16DENORM: [[FPTRUNC7:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD3]](s32) - ; SI-F16DENORM: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[FPTRUNC1]](s16), [[FPTRUNC3]](s16), [[FPTRUNC5]](s16), [[FPTRUNC7]](s16) - ; SI-F16DENORM: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; SI-F16DENORM: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC1]](s16), [[FPTRUNC3]](s16) + ; SI-F16DENORM: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC5]](s16), [[FPTRUNC7]](s16) + ; SI-F16DENORM: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI-F16DENORM: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; SI-F16FLUSH-LABEL: name: test_fmad_v4s16 ; SI-F16FLUSH: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; SI-F16FLUSH: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 @@ -378,8 +380,10 @@ body: | ; SI-F16FLUSH: [[FPEXT15:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC11]](s16) ; SI-F16FLUSH: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FPEXT14]], [[FPEXT15]] ; SI-F16FLUSH: [[FPTRUNC7:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD3]](s32) - ; SI-F16FLUSH: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[FPTRUNC1]](s16), [[FPTRUNC3]](s16), [[FPTRUNC5]](s16), [[FPTRUNC7]](s16) - ; SI-F16FLUSH: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; SI-F16FLUSH: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC1]](s16), [[FPTRUNC3]](s16) + ; SI-F16FLUSH: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC5]](s16), [[FPTRUNC7]](s16) + ; SI-F16FLUSH: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI-F16FLUSH: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; VI-F16DENORM-LABEL: name: test_fmad_v4s16 ; VI-F16DENORM: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; VI-F16DENORM: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 @@ -412,8 +416,10 @@ body: | ; VI-F16DENORM: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) ; VI-F16DENORM: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) ; VI-F16DENORM: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) - ; VI-F16DENORM: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR %16(s16), %17(s16), %18(s16), %19(s16) - ; VI-F16DENORM: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; VI-F16DENORM: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR %16(s16), %17(s16) + ; VI-F16DENORM: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR %18(s16), %19(s16) + ; VI-F16DENORM: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI-F16DENORM: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; VI-F16DENORM: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC3]], [[TRUNC7]] ; VI-F16DENORM: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[FMUL]], [[TRUNC11]] ; VI-F16DENORM: [[FMUL1:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC2]], [[TRUNC6]] @@ -458,8 +464,10 @@ body: | ; VI-F16FLUSH: [[FMAD1:%[0-9]+]]:_(s16) = G_FMAD [[TRUNC1]], [[TRUNC5]], [[TRUNC9]] ; VI-F16FLUSH: [[FMAD2:%[0-9]+]]:_(s16) = G_FMAD [[TRUNC2]], [[TRUNC6]], [[TRUNC10]] ; VI-F16FLUSH: [[FMAD3:%[0-9]+]]:_(s16) = G_FMAD [[TRUNC3]], [[TRUNC7]], [[TRUNC11]] - ; VI-F16FLUSH: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[FMAD]](s16), [[FMAD1]](s16), [[FMAD2]](s16), [[FMAD3]](s16) - ; VI-F16FLUSH: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; VI-F16FLUSH: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FMAD]](s16), [[FMAD1]](s16) + ; VI-F16FLUSH: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FMAD2]](s16), [[FMAD3]](s16) + ; VI-F16FLUSH: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI-F16FLUSH: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX10-LABEL: name: test_fmad_v4s16 ; GFX10: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; GFX10: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 @@ -500,8 +508,10 @@ body: | ; GFX10: [[FADD2:%[0-9]+]]:_(s16) = G_FADD [[FMUL2]], [[TRUNC10]] ; GFX10: [[FMUL3:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC3]], [[TRUNC7]] ; GFX10: [[FADD3:%[0-9]+]]:_(s16) = G_FADD [[FMUL3]], [[TRUNC11]] - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[FADD]](s16), [[FADD1]](s16), [[FADD2]](s16), [[FADD3]](s16) - ; GFX10: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FADD]](s16), [[FADD1]](s16) + ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FADD2]](s16), [[FADD3]](s16) + ; GFX10: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX10: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 %2:_(<4 x s16>) = COPY $vgpr4_vgpr5 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmaxnum.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmaxnum.mir index ff3dcd28605b7..c5c09c270bdc7 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmaxnum.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmaxnum.mir @@ -420,13 +420,13 @@ body: | ; SI: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) ; SI: [[FMINNUM_IEEE2:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FPEXT4]], [[FPEXT5]] ; SI: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMINNUM_IEEE2]](s32) - ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) - ; SI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC1]](s16) - ; SI: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC2]](s16) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; SI: [[TRUNC6:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; SI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[TRUNC6]](<3 x s16>), 0 + ; SI: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16) + ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC2]](s16), [[DEF2]](s16) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; SI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[EXTRACT2]](<3 x s16>), 0 ; SI: $vgpr0_vgpr1 = COPY [[INSERT2]](<4 x s16>) ; VI-LABEL: name: test_fminnum_v3s16 ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 @@ -463,13 +463,13 @@ body: | ; VI: [[FCANONICALIZE4:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC2]] ; VI: [[FCANONICALIZE5:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC5]] ; VI: [[FMINNUM_IEEE2:%[0-9]+]]:_(s16) = G_FMINNUM_IEEE [[FCANONICALIZE4]], [[FCANONICALIZE5]] - ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMINNUM_IEEE]](s16) - ; VI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FMINNUM_IEEE1]](s16) - ; VI: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FMINNUM_IEEE2]](s16) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; VI: [[TRUNC6:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; VI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[TRUNC6]](<3 x s16>), 0 + ; VI: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FMINNUM_IEEE]](s16), [[FMINNUM_IEEE1]](s16) + ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FMINNUM_IEEE2]](s16), [[DEF2]](s16) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; VI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[EXTRACT2]](<3 x s16>), 0 ; VI: $vgpr0_vgpr1 = COPY [[INSERT2]](<4 x s16>) ; GFX9-LABEL: name: test_fminnum_v3s16 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 @@ -561,8 +561,10 @@ body: | ; SI: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC7]](s16) ; SI: [[FMINNUM_IEEE3:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FPEXT6]], [[FPEXT7]] ; SI: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMINNUM_IEEE3]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16), [[FPTRUNC2]](s16), [[FPTRUNC3]](s16) - ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16) + ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC2]](s16), [[FPTRUNC3]](s16) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; VI-LABEL: name: test_fminnum_v4s16 ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 @@ -597,8 +599,10 @@ body: | ; VI: [[FCANONICALIZE6:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC3]] ; VI: [[FCANONICALIZE7:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC7]] ; VI: [[FMINNUM_IEEE3:%[0-9]+]]:_(s16) = G_FMINNUM_IEEE [[FCANONICALIZE6]], [[FCANONICALIZE7]] - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[FMINNUM_IEEE]](s16), [[FMINNUM_IEEE1]](s16), [[FMINNUM_IEEE2]](s16), [[FMINNUM_IEEE3]](s16) - ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FMINNUM_IEEE]](s16), [[FMINNUM_IEEE1]](s16) + ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FMINNUM_IEEE2]](s16), [[FMINNUM_IEEE3]](s16) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-LABEL: name: test_fminnum_v4s16 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fminnum.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fminnum.mir index ff3dcd28605b7..c5c09c270bdc7 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fminnum.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fminnum.mir @@ -420,13 +420,13 @@ body: | ; SI: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) ; SI: [[FMINNUM_IEEE2:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FPEXT4]], [[FPEXT5]] ; SI: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMINNUM_IEEE2]](s32) - ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) - ; SI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC1]](s16) - ; SI: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC2]](s16) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; SI: [[TRUNC6:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; SI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[TRUNC6]](<3 x s16>), 0 + ; SI: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16) + ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC2]](s16), [[DEF2]](s16) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; SI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[EXTRACT2]](<3 x s16>), 0 ; SI: $vgpr0_vgpr1 = COPY [[INSERT2]](<4 x s16>) ; VI-LABEL: name: test_fminnum_v3s16 ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 @@ -463,13 +463,13 @@ body: | ; VI: [[FCANONICALIZE4:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC2]] ; VI: [[FCANONICALIZE5:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC5]] ; VI: [[FMINNUM_IEEE2:%[0-9]+]]:_(s16) = G_FMINNUM_IEEE [[FCANONICALIZE4]], [[FCANONICALIZE5]] - ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMINNUM_IEEE]](s16) - ; VI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FMINNUM_IEEE1]](s16) - ; VI: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FMINNUM_IEEE2]](s16) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; VI: [[TRUNC6:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; VI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[TRUNC6]](<3 x s16>), 0 + ; VI: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FMINNUM_IEEE]](s16), [[FMINNUM_IEEE1]](s16) + ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FMINNUM_IEEE2]](s16), [[DEF2]](s16) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; VI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[EXTRACT2]](<3 x s16>), 0 ; VI: $vgpr0_vgpr1 = COPY [[INSERT2]](<4 x s16>) ; GFX9-LABEL: name: test_fminnum_v3s16 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 @@ -561,8 +561,10 @@ body: | ; SI: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC7]](s16) ; SI: [[FMINNUM_IEEE3:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FPEXT6]], [[FPEXT7]] ; SI: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMINNUM_IEEE3]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16), [[FPTRUNC2]](s16), [[FPTRUNC3]](s16) - ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16) + ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC2]](s16), [[FPTRUNC3]](s16) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; VI-LABEL: name: test_fminnum_v4s16 ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 @@ -597,8 +599,10 @@ body: | ; VI: [[FCANONICALIZE6:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC3]] ; VI: [[FCANONICALIZE7:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC7]] ; VI: [[FMINNUM_IEEE3:%[0-9]+]]:_(s16) = G_FMINNUM_IEEE [[FCANONICALIZE6]], [[FCANONICALIZE7]] - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[FMINNUM_IEEE]](s16), [[FMINNUM_IEEE1]](s16), [[FMINNUM_IEEE2]](s16), [[FMINNUM_IEEE3]](s16) - ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FMINNUM_IEEE]](s16), [[FMINNUM_IEEE1]](s16) + ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FMINNUM_IEEE2]](s16), [[FMINNUM_IEEE3]](s16) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-LABEL: name: test_fminnum_v4s16 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmul.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmul.mir index dc2dfa978ae95..de0283d6d91ab 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmul.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmul.mir @@ -357,12 +357,12 @@ body: | ; SI: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) ; SI: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT4]], [[FPEXT5]] ; SI: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL2]](s32) - ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) - ; SI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC1]](s16) - ; SI: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC2]](s16) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; SI: [[TRUNC6:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; SI: S_NOP 0, implicit [[TRUNC6]](<3 x s16>) + ; SI: [[DEF4:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16) + ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC2]](s16), [[DEF4]](s16) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; SI: S_NOP 0, implicit [[EXTRACT2]](<3 x s16>) ; VI-LABEL: name: test_fmul_v3s16 ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 @@ -392,12 +392,12 @@ body: | ; VI: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[TRUNC3]] ; VI: [[FMUL1:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC1]], [[TRUNC4]] ; VI: [[FMUL2:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC2]], [[TRUNC5]] - ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMUL]](s16) - ; VI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FMUL1]](s16) - ; VI: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FMUL2]](s16) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; VI: [[TRUNC6:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; VI: S_NOP 0, implicit [[TRUNC6]](<3 x s16>) + ; VI: [[DEF4:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FMUL]](s16), [[FMUL1]](s16) + ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FMUL2]](s16), [[DEF4]](s16) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; VI: S_NOP 0, implicit [[EXTRACT2]](<3 x s16>) ; GFX9-LABEL: name: test_fmul_v3s16 ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 @@ -478,8 +478,10 @@ body: | ; SI: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC7]](s16) ; SI: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT6]], [[FPEXT7]] ; SI: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL3]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16), [[FPTRUNC2]](s16), [[FPTRUNC3]](s16) - ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16) + ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC2]](s16), [[FPTRUNC3]](s16) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; VI-LABEL: name: test_fmul_v4s16 ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 @@ -506,8 +508,10 @@ body: | ; VI: [[FMUL1:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC1]], [[TRUNC5]] ; VI: [[FMUL2:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC2]], [[TRUNC6]] ; VI: [[FMUL3:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC3]], [[TRUNC7]] - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[FMUL]](s16), [[FMUL1]](s16), [[FMUL2]](s16), [[FMUL3]](s16) - ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FMUL]](s16), [[FMUL1]](s16) + ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FMUL2]](s16), [[FMUL3]](s16) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-LABEL: name: test_fmul_v4s16 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-frint.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-frint.mir index 5ca81b17b5c91..f7b6d1cdb59ae 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-frint.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-frint.mir @@ -56,18 +56,14 @@ body: | ; SI-LABEL: name: test_frint_s64 ; SI: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; SI: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 0x4330000000000000 - ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 - ; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 9223372036854775807 - ; SI: [[AND:%[0-9]+]]:_(s64) = G_AND [[C]], [[C2]] - ; SI: [[AND1:%[0-9]+]]:_(s64) = G_AND [[C]], [[C1]] - ; SI: [[OR:%[0-9]+]]:_(s64) = G_OR [[AND]], [[AND1]] - ; SI: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[COPY]], [[OR]] - ; SI: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[OR]] + ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4841369599423283200 + ; SI: [[COPY1:%[0-9]+]]:_(s64) = COPY [[C]](s64) + ; SI: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[COPY]], [[COPY1]] + ; SI: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[COPY1]] ; SI: [[FADD1:%[0-9]+]]:_(s64) = G_FADD [[FADD]], [[FNEG]] - ; SI: [[C3:%[0-9]+]]:_(s64) = G_FCONSTANT double 0x432FFFFFFFFFFFFF + ; SI: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 0x432FFFFFFFFFFFFF ; SI: [[FABS:%[0-9]+]]:_(s64) = G_FABS [[COPY]] - ; SI: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[FABS]](s64), [[C3]] + ; SI: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[FABS]](s64), [[C1]] ; SI: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[FCMP]](s1), [[COPY]], [[FADD1]] ; SI: [[FRINT:%[0-9]+]]:_(s64) = G_FRINT [[COPY]] ; SI: $vgpr0_vgpr1 = COPY [[FRINT]](s64) @@ -131,26 +127,22 @@ body: | ; SI-LABEL: name: test_frint_v2s64 ; SI: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; SI: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; SI: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 0x4330000000000000 - ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 - ; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 9223372036854775807 - ; SI: [[AND:%[0-9]+]]:_(s64) = G_AND [[C]], [[C2]] - ; SI: [[AND1:%[0-9]+]]:_(s64) = G_AND [[C]], [[C1]] - ; SI: [[OR:%[0-9]+]]:_(s64) = G_OR [[AND]], [[AND1]] - ; SI: [[COPY1:%[0-9]+]]:_(s64) = COPY [[OR]](s64) + ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4841369599423283200 + ; SI: [[COPY1:%[0-9]+]]:_(s64) = COPY [[C]](s64) ; SI: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[UV]], [[COPY1]] ; SI: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[COPY1]] ; SI: [[FADD1:%[0-9]+]]:_(s64) = G_FADD [[FADD]], [[FNEG]] - ; SI: [[C3:%[0-9]+]]:_(s64) = G_FCONSTANT double 0x432FFFFFFFFFFFFF + ; SI: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 0x432FFFFFFFFFFFFF ; SI: [[FABS:%[0-9]+]]:_(s64) = G_FABS [[UV]] - ; SI: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[FABS]](s64), [[C3]] + ; SI: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[FABS]](s64), [[C1]] ; SI: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[FCMP]](s1), [[UV]], [[FADD1]] ; SI: [[FRINT:%[0-9]+]]:_(s64) = G_FRINT [[UV]] - ; SI: [[FADD2:%[0-9]+]]:_(s64) = G_FADD [[UV1]], [[OR]] - ; SI: [[FNEG1:%[0-9]+]]:_(s64) = G_FNEG [[OR]] + ; SI: [[COPY2:%[0-9]+]]:_(s64) = COPY [[C]](s64) + ; SI: [[FADD2:%[0-9]+]]:_(s64) = G_FADD [[UV1]], [[COPY2]] + ; SI: [[FNEG1:%[0-9]+]]:_(s64) = G_FNEG [[COPY2]] ; SI: [[FADD3:%[0-9]+]]:_(s64) = G_FADD [[FADD2]], [[FNEG1]] ; SI: [[FABS1:%[0-9]+]]:_(s64) = G_FABS [[UV1]] - ; SI: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[FABS1]](s64), [[C3]] + ; SI: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[FABS1]](s64), [[C1]] ; SI: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[FCMP1]](s1), [[UV1]], [[FADD3]] ; SI: [[FRINT1:%[0-9]+]]:_(s64) = G_FRINT [[UV1]] ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FRINT]](s64), [[FRINT1]](s64) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsin.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsin.mir index d4f32b4f4c6c2..a651dace6256c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsin.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsin.mir @@ -344,12 +344,12 @@ body: | ; SI: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL2]](s32) ; SI: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT4]](s32) ; SI: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT5]](s32) - ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) - ; SI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC1]](s16) - ; SI: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC2]](s16) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; SI: [[TRUNC3:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; SI: S_NOP 0, implicit [[TRUNC3]](<3 x s16>) + ; SI: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16) + ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC2]](s16), [[DEF2]](s16) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; SI: S_NOP 0, implicit [[EXTRACT1]](<3 x s16>) ; VI-LABEL: name: test_fsin_v3s16 ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 @@ -374,12 +374,12 @@ body: | ; VI: [[FMUL2:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC2]], [[C1]] ; VI: [[INT4:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL2]](s16) ; VI: [[INT5:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT4]](s16) - ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16) - ; VI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[INT3]](s16) - ; VI: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[INT5]](s16) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; VI: [[TRUNC3:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; VI: S_NOP 0, implicit [[TRUNC3]](<3 x s16>) + ; VI: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[INT1]](s16), [[INT3]](s16) + ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[INT5]](s16), [[DEF2]](s16) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; VI: S_NOP 0, implicit [[EXTRACT1]](<3 x s16>) ; GFX9-LABEL: name: test_fsin_v3s16 ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 @@ -401,12 +401,12 @@ body: | ; GFX9: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[FMUL1]](s16) ; GFX9: [[FMUL2:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC2]], [[C1]] ; GFX9: [[INT2:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[FMUL2]](s16) - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT]](s16) - ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16) - ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[INT2]](s16) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; GFX9: [[TRUNC3:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; GFX9: S_NOP 0, implicit [[TRUNC3]](<3 x s16>) + ; GFX9: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[INT]](s16), [[INT1]](s16) + ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[INT2]](s16), [[DEF2]](s16) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX9: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; GFX9: S_NOP 0, implicit [[EXTRACT1]](<3 x s16>) %0:_(<3 x s16>) = G_IMPLICIT_DEF %1:_(<3 x s16>) = G_FSIN %0 S_NOP 0, implicit %1 @@ -451,8 +451,10 @@ body: | ; SI: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL3]](s32) ; SI: [[INT7:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT6]](s32) ; SI: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT7]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16), [[FPTRUNC2]](s16), [[FPTRUNC3]](s16) - ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16) + ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC2]](s16), [[FPTRUNC3]](s16) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; VI-LABEL: name: test_fsin_v4s16 ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) @@ -478,8 +480,10 @@ body: | ; VI: [[FMUL3:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC3]], [[C1]] ; VI: [[INT6:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL3]](s16) ; VI: [[INT7:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT6]](s16) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[INT1]](s16), [[INT3]](s16), [[INT5]](s16), [[INT7]](s16) - ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[INT1]](s16), [[INT3]](s16) + ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[INT5]](s16), [[INT7]](s16) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-LABEL: name: test_fsin_v4s16 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) @@ -501,8 +505,10 @@ body: | ; GFX9: [[INT2:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[FMUL2]](s16) ; GFX9: [[FMUL3:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC3]], [[C1]] ; GFX9: [[INT3:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[FMUL3]](s16) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[INT]](s16), [[INT1]](s16), [[INT2]](s16), [[INT3]](s16) - ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[INT]](s16), [[INT1]](s16) + ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[INT2]](s16), [[INT3]](s16) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX9: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = G_FSIN %0 $vgpr0_vgpr1 = COPY %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsqrt.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsqrt.mir index 03fb288d515d6..9c302db0d20fd 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsqrt.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsqrt.mir @@ -257,12 +257,12 @@ body: | ; SI: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) ; SI: [[FSQRT2:%[0-9]+]]:_(s32) = G_FSQRT [[FPEXT2]] ; SI: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FSQRT2]](s32) - ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) - ; SI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC1]](s16) - ; SI: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC2]](s16) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; SI: [[TRUNC3:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; SI: S_NOP 0, implicit [[TRUNC3]](<3 x s16>) + ; SI: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16) + ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC2]](s16), [[DEF2]](s16) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; SI: S_NOP 0, implicit [[EXTRACT1]](<3 x s16>) ; VI-LABEL: name: test_fsqrt_v3s16 ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 @@ -280,12 +280,12 @@ body: | ; VI: [[FSQRT:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC]] ; VI: [[FSQRT1:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC1]] ; VI: [[FSQRT2:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC2]] - ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FSQRT]](s16) - ; VI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FSQRT1]](s16) - ; VI: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FSQRT2]](s16) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; VI: [[TRUNC3:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; VI: S_NOP 0, implicit [[TRUNC3]](<3 x s16>) + ; VI: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FSQRT]](s16), [[FSQRT1]](s16) + ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FSQRT2]](s16), [[DEF2]](s16) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; VI: S_NOP 0, implicit [[EXTRACT1]](<3 x s16>) ; GFX9-LABEL: name: test_fsqrt_v3s16 ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 @@ -303,12 +303,12 @@ body: | ; GFX9: [[FSQRT:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC]] ; GFX9: [[FSQRT1:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC1]] ; GFX9: [[FSQRT2:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC2]] - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FSQRT]](s16) - ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FSQRT1]](s16) - ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FSQRT2]](s16) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; GFX9: [[TRUNC3:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; GFX9: S_NOP 0, implicit [[TRUNC3]](<3 x s16>) + ; GFX9: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FSQRT]](s16), [[FSQRT1]](s16) + ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FSQRT2]](s16), [[DEF2]](s16) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX9: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; GFX9: S_NOP 0, implicit [[EXTRACT1]](<3 x s16>) %0:_(<3 x s16>) = G_IMPLICIT_DEF %1:_(<3 x s16>) = G_FSQRT %0 S_NOP 0, implicit %1 @@ -344,8 +344,10 @@ body: | ; SI: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) ; SI: [[FSQRT3:%[0-9]+]]:_(s32) = G_FSQRT [[FPEXT3]] ; SI: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FSQRT3]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16), [[FPTRUNC2]](s16), [[FPTRUNC3]](s16) - ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16) + ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC2]](s16), [[FPTRUNC3]](s16) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; VI-LABEL: name: test_fsqrt_v4s16 ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) @@ -362,8 +364,10 @@ body: | ; VI: [[FSQRT1:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC1]] ; VI: [[FSQRT2:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC2]] ; VI: [[FSQRT3:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC3]] - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[FSQRT]](s16), [[FSQRT1]](s16), [[FSQRT2]](s16), [[FSQRT3]](s16) - ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FSQRT]](s16), [[FSQRT1]](s16) + ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FSQRT2]](s16), [[FSQRT3]](s16) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-LABEL: name: test_fsqrt_v4s16 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) @@ -380,8 +384,10 @@ body: | ; GFX9: [[FSQRT1:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC1]] ; GFX9: [[FSQRT2:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC2]] ; GFX9: [[FSQRT3:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC3]] - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[FSQRT]](s16), [[FSQRT1]](s16), [[FSQRT2]](s16), [[FSQRT3]](s16) - ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FSQRT]](s16), [[FSQRT1]](s16) + ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FSQRT2]](s16), [[FSQRT3]](s16) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX9: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = G_FSQRT %0 $vgpr0_vgpr1 = COPY %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsub.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsub.mir index 7beb00ad9e72b..67530f5f51bd9 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsub.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsub.mir @@ -405,12 +405,12 @@ body: | ; SI: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[FNEG2]](s16) ; SI: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FPEXT4]], [[FPEXT5]] ; SI: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD2]](s32) - ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) - ; SI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC1]](s16) - ; SI: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC2]](s16) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; SI: [[TRUNC6:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; SI: S_NOP 0, implicit [[TRUNC6]](<3 x s16>) + ; SI: [[DEF4:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16) + ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC2]](s16), [[DEF4]](s16) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; SI: S_NOP 0, implicit [[EXTRACT2]](<3 x s16>) ; VI-LABEL: name: test_fsub_v3s16 ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 @@ -443,12 +443,12 @@ body: | ; VI: [[FADD1:%[0-9]+]]:_(s16) = G_FADD [[TRUNC1]], [[FNEG1]] ; VI: [[FNEG2:%[0-9]+]]:_(s16) = G_FNEG [[TRUNC5]] ; VI: [[FADD2:%[0-9]+]]:_(s16) = G_FADD [[TRUNC2]], [[FNEG2]] - ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) - ; VI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD1]](s16) - ; VI: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD2]](s16) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; VI: [[TRUNC6:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; VI: S_NOP 0, implicit [[TRUNC6]](<3 x s16>) + ; VI: [[DEF4:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FADD]](s16), [[FADD1]](s16) + ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FADD2]](s16), [[DEF4]](s16) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; VI: S_NOP 0, implicit [[EXTRACT2]](<3 x s16>) ; GFX9-LABEL: name: test_fsub_v3s16 ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 @@ -481,12 +481,12 @@ body: | ; GFX9: [[FADD1:%[0-9]+]]:_(s16) = G_FADD [[TRUNC1]], [[FNEG1]] ; GFX9: [[FNEG2:%[0-9]+]]:_(s16) = G_FNEG [[TRUNC5]] ; GFX9: [[FADD2:%[0-9]+]]:_(s16) = G_FADD [[TRUNC2]], [[FNEG2]] - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) - ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD1]](s16) - ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD2]](s16) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; GFX9: [[TRUNC6:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; GFX9: S_NOP 0, implicit [[TRUNC6]](<3 x s16>) + ; GFX9: [[DEF4:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FADD]](s16), [[FADD1]](s16) + ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FADD2]](s16), [[DEF4]](s16) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX9: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; GFX9: S_NOP 0, implicit [[EXTRACT2]](<3 x s16>) %0:_(<3 x s16>) = G_IMPLICIT_DEF %1:_(<3 x s16>) = G_IMPLICIT_DEF %2:_(<3 x s16>) = G_FSUB %0, %1 @@ -541,8 +541,10 @@ body: | ; SI: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[FNEG3]](s16) ; SI: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FPEXT6]], [[FPEXT7]] ; SI: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD3]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16), [[FPTRUNC2]](s16), [[FPTRUNC3]](s16) - ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16) + ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC2]](s16), [[FPTRUNC3]](s16) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; VI-LABEL: name: test_fsub_v4s16 ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 @@ -573,8 +575,10 @@ body: | ; VI: [[FADD2:%[0-9]+]]:_(s16) = G_FADD [[TRUNC2]], [[FNEG2]] ; VI: [[FNEG3:%[0-9]+]]:_(s16) = G_FNEG [[TRUNC7]] ; VI: [[FADD3:%[0-9]+]]:_(s16) = G_FADD [[TRUNC3]], [[FNEG3]] - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[FADD]](s16), [[FADD1]](s16), [[FADD2]](s16), [[FADD3]](s16) - ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FADD]](s16), [[FADD1]](s16) + ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FADD2]](s16), [[FADD3]](s16) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-LABEL: name: test_fsub_v4s16 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 @@ -605,8 +609,10 @@ body: | ; GFX9: [[FADD2:%[0-9]+]]:_(s16) = G_FADD [[TRUNC2]], [[FNEG2]] ; GFX9: [[FNEG3:%[0-9]+]]:_(s16) = G_FNEG [[TRUNC7]] ; GFX9: [[FADD3:%[0-9]+]]:_(s16) = G_FADD [[TRUNC3]], [[FNEG3]] - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[FADD]](s16), [[FADD1]](s16), [[FADD2]](s16), [[FADD3]](s16) - ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FADD]](s16), [[FADD1]](s16) + ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FADD2]](s16), [[FADD3]](s16) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX9: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 %2:_(<4 x s16>) = G_FSUB %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir index 5bf60e2ce82c9..a544d0edf156c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir @@ -5205,87 +5205,102 @@ body: | ; CI-LABEL: name: test_load_constant_v3s16_align2 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) + ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI: [[GEP:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C]](s64) ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p4) :: (load 2, addrspace 4) + ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI: [[GEP1:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C1]](s64) ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p4) :: (load 2, addrspace 4) - ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CI: [[TRUNC:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; CI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[TRUNC]](<3 x s16>), 0 + ; CI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) + ; CI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; CI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; CI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; CI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 ; CI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; VI-LABEL: name: test_load_constant_v3s16_align2 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) + ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[GEP:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C]](s64) ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p4) :: (load 2, addrspace 4) + ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[GEP1:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C1]](s64) ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p4) :: (load 2, addrspace 4) - ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; VI: [[TRUNC:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[TRUNC]](<3 x s16>), 0 + ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) + ; VI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 ; VI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; GFX9-LABEL: name: test_load_constant_v3s16_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) + ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9: [[GEP:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C]](s64) ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p4) :: (load 2, addrspace 4) + ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9: [[GEP1:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C1]](s64) ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p4) :: (load 2, addrspace 4) - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX9: [[TRUNC:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[TRUNC]](<3 x s16>), 0 + ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) + ; GFX9: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 ; GFX9: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; CI-MESA-LABEL: name: test_load_constant_v3s16_align2 ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) + ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI-MESA: [[GEP:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C]](s64) ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p4) :: (load 2, addrspace 4) + ; CI-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI-MESA: [[GEP1:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C1]](s64) ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p4) :: (load 2, addrspace 4) - ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CI-MESA: [[TRUNC:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; CI-MESA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[TRUNC]](<3 x s16>), 0 + ; CI-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) + ; CI-MESA: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; CI-MESA: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; CI-MESA: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; CI-MESA: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CI-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 ; CI-MESA: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; GFX9-MESA-LABEL: name: test_load_constant_v3s16_align2 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) + ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9-MESA: [[GEP:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C]](s64) ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p4) :: (load 2, addrspace 4) + ; GFX9-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9-MESA: [[GEP1:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C1]](s64) ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p4) :: (load 2, addrspace 4) - ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[TRUNC]](<3 x s16>), 0 + ; GFX9-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) + ; GFX9-MESA: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; GFX9-MESA: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX9-MESA: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; GFX9-MESA: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<3 x s16>) = G_LOAD %0 :: (load 6, align 2, addrspace 4) @@ -5653,8 +5668,10 @@ body: | ; CI: [[GEP2:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C2]](s64) ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p4) :: (load 2, addrspace 4) ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) - ; CI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; CI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; CI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; VI-LABEL: name: test_load_constant_v4s16_align2 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) @@ -5671,8 +5688,10 @@ body: | ; VI: [[GEP2:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C2]](s64) ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p4) :: (load 2, addrspace 4) ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) - ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-LABEL: name: test_load_constant_v4s16_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) @@ -5689,8 +5708,10 @@ body: | ; GFX9: [[GEP2:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C2]](s64) ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p4) :: (load 2, addrspace 4) ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX9: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; CI-MESA-LABEL: name: test_load_constant_v4s16_align2 ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) @@ -5707,8 +5728,10 @@ body: | ; CI-MESA: [[GEP2:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C2]](s64) ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p4) :: (load 2, addrspace 4) ; CI-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) - ; CI-MESA: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; CI-MESA: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; CI-MESA: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-MESA-LABEL: name: test_load_constant_v4s16_align2 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) @@ -5725,8 +5748,10 @@ body: | ; GFX9-MESA: [[GEP2:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C2]](s64) ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p4) :: (load 2, addrspace 4) ; GFX9-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; GFX9-MESA: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = G_LOAD %0 :: (load 8, align 2, addrspace 4) $vgpr0_vgpr1 = COPY %1 @@ -6047,8 +6072,12 @@ body: | ; CI: [[GEP6:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C6]](s64) ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[GEP6]](p4) :: (load 2, addrspace 4) ; CI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) - ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16), [[TRUNC4]](s16), [[TRUNC5]](s16), [[TRUNC6]](s16), [[TRUNC7]](s16) - ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<8 x s16>) + ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; CI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; CI: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) + ; CI: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16) + ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) + ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<8 x s16>) ; VI-LABEL: name: test_load_constant_v8s16_align8 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, align 8, addrspace 4) @@ -6081,8 +6110,12 @@ body: | ; VI: [[GEP6:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C6]](s64) ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[GEP6]](p4) :: (load 2, addrspace 4) ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16), [[TRUNC4]](s16), [[TRUNC5]](s16), [[TRUNC6]](s16), [[TRUNC7]](s16) - ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<8 x s16>) + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; VI: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) + ; VI: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) + ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<8 x s16>) ; GFX9-LABEL: name: test_load_constant_v8s16_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, align 8, addrspace 4) @@ -6115,8 +6148,12 @@ body: | ; GFX9: [[GEP6:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C6]](s64) ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[GEP6]](p4) :: (load 2, addrspace 4) ; GFX9: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16), [[TRUNC4]](s16), [[TRUNC5]](s16), [[TRUNC6]](s16), [[TRUNC7]](s16) - ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<8 x s16>) + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; GFX9: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) + ; GFX9: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) + ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<8 x s16>) ; CI-MESA-LABEL: name: test_load_constant_v8s16_align8 ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, align 8, addrspace 4) @@ -6149,8 +6186,12 @@ body: | ; CI-MESA: [[GEP6:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C6]](s64) ; CI-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[GEP6]](p4) :: (load 2, addrspace 4) ; CI-MESA: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) - ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16), [[TRUNC4]](s16), [[TRUNC5]](s16), [[TRUNC6]](s16), [[TRUNC7]](s16) - ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<8 x s16>) + ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; CI-MESA: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; CI-MESA: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) + ; CI-MESA: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16) + ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) + ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<8 x s16>) ; GFX9-MESA-LABEL: name: test_load_constant_v8s16_align8 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, align 8, addrspace 4) @@ -6183,8 +6224,12 @@ body: | ; GFX9-MESA: [[GEP6:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C6]](s64) ; GFX9-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[GEP6]](p4) :: (load 2, addrspace 4) ; GFX9-MESA: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) - ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16), [[TRUNC4]](s16), [[TRUNC5]](s16), [[TRUNC6]](s16), [[TRUNC7]](s16) - ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<8 x s16>) + ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; GFX9-MESA: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; GFX9-MESA: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) + ; GFX9-MESA: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16) + ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) + ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<8 x s16>) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<8 x s16>) = G_LOAD %0 :: (load 8, align 8, addrspace 4) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir index 5fb5ad65673a1..389b9391f4f0d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir @@ -5295,87 +5295,102 @@ body: | ; CI-LABEL: name: test_load_flat_v3s16_align2 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) + ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI: [[GEP:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C]](s64) ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p0) :: (load 2) + ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI: [[GEP1:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C1]](s64) ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p0) :: (load 2) - ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CI: [[TRUNC:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; CI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[TRUNC]](<3 x s16>), 0 + ; CI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) + ; CI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; CI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; CI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; CI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 ; CI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; VI-LABEL: name: test_load_flat_v3s16_align2 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) + ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[GEP:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C]](s64) ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p0) :: (load 2) + ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[GEP1:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C1]](s64) ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p0) :: (load 2) - ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; VI: [[TRUNC:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[TRUNC]](<3 x s16>), 0 + ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) + ; VI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 ; VI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; GFX9-LABEL: name: test_load_flat_v3s16_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) + ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9: [[GEP:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C]](s64) ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p0) :: (load 2) + ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9: [[GEP1:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C1]](s64) ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p0) :: (load 2) - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX9: [[TRUNC:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[TRUNC]](<3 x s16>), 0 + ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) + ; GFX9: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 ; GFX9: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; CI-MESA-LABEL: name: test_load_flat_v3s16_align2 ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) + ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI-MESA: [[GEP:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C]](s64) ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p0) :: (load 2) + ; CI-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI-MESA: [[GEP1:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C1]](s64) ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p0) :: (load 2) - ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CI-MESA: [[TRUNC:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; CI-MESA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[TRUNC]](<3 x s16>), 0 + ; CI-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) + ; CI-MESA: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; CI-MESA: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; CI-MESA: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; CI-MESA: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CI-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 ; CI-MESA: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; GFX9-MESA-LABEL: name: test_load_flat_v3s16_align2 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) + ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9-MESA: [[GEP:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C]](s64) ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p0) :: (load 2) + ; GFX9-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9-MESA: [[GEP1:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C1]](s64) ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p0) :: (load 2) - ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[TRUNC]](<3 x s16>), 0 + ; GFX9-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) + ; GFX9-MESA: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; GFX9-MESA: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX9-MESA: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; GFX9-MESA: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(<3 x s16>) = G_LOAD %0 :: (load 6, align 2, addrspace 0) @@ -5743,8 +5758,10 @@ body: | ; CI: [[GEP2:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C2]](s64) ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p0) :: (load 2) ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) - ; CI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; CI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; CI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; VI-LABEL: name: test_load_flat_v4s16_align2 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) @@ -5761,8 +5778,10 @@ body: | ; VI: [[GEP2:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C2]](s64) ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p0) :: (load 2) ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) - ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-LABEL: name: test_load_flat_v4s16_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) @@ -5779,8 +5798,10 @@ body: | ; GFX9: [[GEP2:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C2]](s64) ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p0) :: (load 2) ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX9: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; CI-MESA-LABEL: name: test_load_flat_v4s16_align2 ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) @@ -5797,8 +5818,10 @@ body: | ; CI-MESA: [[GEP2:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C2]](s64) ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p0) :: (load 2) ; CI-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) - ; CI-MESA: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; CI-MESA: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; CI-MESA: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-MESA-LABEL: name: test_load_flat_v4s16_align2 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) @@ -5815,8 +5838,10 @@ body: | ; GFX9-MESA: [[GEP2:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C2]](s64) ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p0) :: (load 2) ; GFX9-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; GFX9-MESA: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = G_LOAD %0 :: (load 8, align 2, addrspace 0) $vgpr0_vgpr1 = COPY %1 @@ -6137,8 +6162,12 @@ body: | ; CI: [[GEP6:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C6]](s64) ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[GEP6]](p0) :: (load 2) ; CI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) - ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16), [[TRUNC4]](s16), [[TRUNC5]](s16), [[TRUNC6]](s16), [[TRUNC7]](s16) - ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<8 x s16>) + ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; CI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; CI: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) + ; CI: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16) + ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) + ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<8 x s16>) ; VI-LABEL: name: test_load_flat_v8s16_align8 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2, align 8) @@ -6171,8 +6200,12 @@ body: | ; VI: [[GEP6:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C6]](s64) ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[GEP6]](p0) :: (load 2) ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16), [[TRUNC4]](s16), [[TRUNC5]](s16), [[TRUNC6]](s16), [[TRUNC7]](s16) - ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<8 x s16>) + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; VI: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) + ; VI: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) + ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<8 x s16>) ; GFX9-LABEL: name: test_load_flat_v8s16_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2, align 8) @@ -6205,8 +6238,12 @@ body: | ; GFX9: [[GEP6:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C6]](s64) ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[GEP6]](p0) :: (load 2) ; GFX9: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16), [[TRUNC4]](s16), [[TRUNC5]](s16), [[TRUNC6]](s16), [[TRUNC7]](s16) - ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<8 x s16>) + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; GFX9: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) + ; GFX9: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) + ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<8 x s16>) ; CI-MESA-LABEL: name: test_load_flat_v8s16_align8 ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2, align 8) @@ -6239,8 +6276,12 @@ body: | ; CI-MESA: [[GEP6:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C6]](s64) ; CI-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[GEP6]](p0) :: (load 2) ; CI-MESA: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) - ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16), [[TRUNC4]](s16), [[TRUNC5]](s16), [[TRUNC6]](s16), [[TRUNC7]](s16) - ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<8 x s16>) + ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; CI-MESA: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; CI-MESA: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) + ; CI-MESA: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16) + ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) + ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<8 x s16>) ; GFX9-MESA-LABEL: name: test_load_flat_v8s16_align8 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2, align 8) @@ -6273,8 +6314,12 @@ body: | ; GFX9-MESA: [[GEP6:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C6]](s64) ; GFX9-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[GEP6]](p0) :: (load 2) ; GFX9-MESA: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) - ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16), [[TRUNC4]](s16), [[TRUNC5]](s16), [[TRUNC6]](s16), [[TRUNC7]](s16) - ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<8 x s16>) + ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; GFX9-MESA: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; GFX9-MESA: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) + ; GFX9-MESA: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16) + ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) + ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<8 x s16>) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(<8 x s16>) = G_LOAD %0 :: (load 8, align 8, addrspace 0) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir index 9e7d3338d7867..aa52a344c9d62 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir @@ -5021,19 +5021,22 @@ body: | ; SI-LABEL: name: test_load_global_v3s16_align2 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI: [[GEP:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C]](s64) ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p1) :: (load 2, addrspace 1) + ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI: [[GEP1:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C1]](s64) ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p1) :: (load 2, addrspace 1) - ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; SI: [[TRUNC:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[TRUNC]](<3 x s16>), 0 + ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) + ; SI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; SI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 ; SI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; CI-HSA-LABEL: name: test_load_global_v3s16_align2 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 @@ -5044,36 +5047,42 @@ body: | ; CI-MESA-LABEL: name: test_load_global_v3s16_align2 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI-MESA: [[GEP:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C]](s64) ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p1) :: (load 2, addrspace 1) + ; CI-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI-MESA: [[GEP1:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C1]](s64) ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p1) :: (load 2, addrspace 1) - ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CI-MESA: [[TRUNC:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; CI-MESA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[TRUNC]](<3 x s16>), 0 + ; CI-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) + ; CI-MESA: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; CI-MESA: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; CI-MESA: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; CI-MESA: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CI-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 ; CI-MESA: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; VI-LABEL: name: test_load_global_v3s16_align2 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[GEP:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C]](s64) ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p1) :: (load 2, addrspace 1) + ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[GEP1:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C1]](s64) ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p1) :: (load 2, addrspace 1) - ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; VI: [[TRUNC:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[TRUNC]](<3 x s16>), 0 + ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) + ; VI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 ; VI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; GFX9-HSA-LABEL: name: test_load_global_v3s16_align2 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 @@ -5084,19 +5093,22 @@ body: | ; GFX9-MESA-LABEL: name: test_load_global_v3s16_align2 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9-MESA: [[GEP:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C]](s64) ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p1) :: (load 2, addrspace 1) + ; GFX9-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9-MESA: [[GEP1:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C1]](s64) ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p1) :: (load 2, addrspace 1) - ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[TRUNC]](<3 x s16>), 0 + ; GFX9-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) + ; GFX9-MESA: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; GFX9-MESA: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX9-MESA: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; GFX9-MESA: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<3 x s16>) = G_LOAD %0 :: (load 6, align 2, addrspace 1) @@ -5434,8 +5446,10 @@ body: | ; SI: [[GEP2:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C2]](s64) ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p1) :: (load 2, addrspace 1) ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) - ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; CI-HSA-LABEL: name: test_load_global_v4s16_align2 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load 8, align 2, addrspace 1) @@ -5456,8 +5470,10 @@ body: | ; CI-MESA: [[GEP2:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C2]](s64) ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p1) :: (load 2, addrspace 1) ; CI-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) - ; CI-MESA: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; CI-MESA: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; CI-MESA: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; VI-LABEL: name: test_load_global_v4s16_align2 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) @@ -5474,8 +5490,10 @@ body: | ; VI: [[GEP2:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C2]](s64) ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p1) :: (load 2, addrspace 1) ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) - ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-HSA-LABEL: name: test_load_global_v4s16_align2 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load 8, align 2, addrspace 1) @@ -5496,8 +5514,10 @@ body: | ; GFX9-MESA: [[GEP2:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C2]](s64) ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p1) :: (load 2, addrspace 1) ; GFX9-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; GFX9-MESA: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = G_LOAD %0 :: (load 8, align 2, addrspace 1) $vgpr0_vgpr1 = COPY %1 @@ -5776,8 +5796,12 @@ body: | ; SI: [[GEP6:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C6]](s64) ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[GEP6]](p1) :: (load 2, addrspace 1) ; SI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16), [[TRUNC4]](s16), [[TRUNC5]](s16), [[TRUNC6]](s16), [[TRUNC7]](s16) - ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<8 x s16>) + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; SI: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) + ; SI: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) + ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<8 x s16>) ; CI-HSA-LABEL: name: test_load_global_v8s16_align8 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, align 8, addrspace 1) @@ -5810,8 +5834,12 @@ body: | ; CI-HSA: [[GEP6:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C6]](s64) ; CI-HSA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[GEP6]](p1) :: (load 2, addrspace 1) ; CI-HSA: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) - ; CI-HSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16), [[TRUNC4]](s16), [[TRUNC5]](s16), [[TRUNC6]](s16), [[TRUNC7]](s16) - ; CI-HSA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<8 x s16>) + ; CI-HSA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; CI-HSA: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; CI-HSA: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) + ; CI-HSA: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16) + ; CI-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) + ; CI-HSA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<8 x s16>) ; CI-MESA-LABEL: name: test_load_global_v8s16_align8 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, align 8, addrspace 1) @@ -5844,8 +5872,12 @@ body: | ; CI-MESA: [[GEP6:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C6]](s64) ; CI-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[GEP6]](p1) :: (load 2, addrspace 1) ; CI-MESA: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) - ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16), [[TRUNC4]](s16), [[TRUNC5]](s16), [[TRUNC6]](s16), [[TRUNC7]](s16) - ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<8 x s16>) + ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; CI-MESA: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; CI-MESA: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) + ; CI-MESA: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16) + ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) + ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<8 x s16>) ; VI-LABEL: name: test_load_global_v8s16_align8 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, align 8, addrspace 1) @@ -5878,8 +5910,12 @@ body: | ; VI: [[GEP6:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C6]](s64) ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[GEP6]](p1) :: (load 2, addrspace 1) ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16), [[TRUNC4]](s16), [[TRUNC5]](s16), [[TRUNC6]](s16), [[TRUNC7]](s16) - ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<8 x s16>) + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; VI: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) + ; VI: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) + ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<8 x s16>) ; GFX9-HSA-LABEL: name: test_load_global_v8s16_align8 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, align 8, addrspace 1) @@ -5912,8 +5948,12 @@ body: | ; GFX9-HSA: [[GEP6:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C6]](s64) ; GFX9-HSA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[GEP6]](p1) :: (load 2, addrspace 1) ; GFX9-HSA: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) - ; GFX9-HSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16), [[TRUNC4]](s16), [[TRUNC5]](s16), [[TRUNC6]](s16), [[TRUNC7]](s16) - ; GFX9-HSA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<8 x s16>) + ; GFX9-HSA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; GFX9-HSA: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; GFX9-HSA: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) + ; GFX9-HSA: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16) + ; GFX9-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) + ; GFX9-HSA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<8 x s16>) ; GFX9-MESA-LABEL: name: test_load_global_v8s16_align8 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, align 8, addrspace 1) @@ -5946,8 +5986,12 @@ body: | ; GFX9-MESA: [[GEP6:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C6]](s64) ; GFX9-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[GEP6]](p1) :: (load 2, addrspace 1) ; GFX9-MESA: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) - ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16), [[TRUNC4]](s16), [[TRUNC5]](s16), [[TRUNC6]](s16), [[TRUNC7]](s16) - ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<8 x s16>) + ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; GFX9-MESA: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; GFX9-MESA: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) + ; GFX9-MESA: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16) + ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) + ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<8 x s16>) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<8 x s16>) = G_LOAD %0 :: (load 8, align 8, addrspace 1) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir index d5b869a560e91..26dee2ea77525 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir @@ -6325,87 +6325,102 @@ body: | ; SI-LABEL: name: test_load_local_v3s16_align2 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; SI: [[GEP:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C]](s32) ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p3) :: (load 2, addrspace 3) + ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; SI: [[GEP1:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C1]](s32) ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p3) :: (load 2, addrspace 3) - ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; SI: [[TRUNC:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[TRUNC]](<3 x s16>), 0 + ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) + ; SI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; SI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 ; SI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; CI-LABEL: name: test_load_local_v3s16_align2 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI: [[GEP:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C]](s32) ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p3) :: (load 2, addrspace 3) + ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI: [[GEP1:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C1]](s32) ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p3) :: (load 2, addrspace 3) - ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CI: [[TRUNC:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; CI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[TRUNC]](<3 x s16>), 0 + ; CI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) + ; CI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; CI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; CI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; CI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 ; CI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; CI-DS128-LABEL: name: test_load_local_v3s16_align2 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; CI-DS128: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI-DS128: [[GEP:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C]](s32) ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p3) :: (load 2, addrspace 3) + ; CI-DS128: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; CI-DS128: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI-DS128: [[GEP1:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C1]](s32) ; CI-DS128: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p3) :: (load 2, addrspace 3) - ; CI-DS128: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; CI-DS128: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; CI-DS128: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; CI-DS128: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CI-DS128: [[TRUNC:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; CI-DS128: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-DS128: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[TRUNC]](<3 x s16>), 0 + ; CI-DS128: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) + ; CI-DS128: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; CI-DS128: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; CI-DS128: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; CI-DS128: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; CI-DS128: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; CI-DS128: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CI-DS128: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 ; CI-DS128: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; VI-LABEL: name: test_load_local_v3s16_align2 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; VI: [[GEP:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C]](s32) ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p3) :: (load 2, addrspace 3) + ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; VI: [[GEP1:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C1]](s32) ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p3) :: (load 2, addrspace 3) - ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; VI: [[TRUNC:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[TRUNC]](<3 x s16>), 0 + ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) + ; VI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 ; VI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; GFX9-LABEL: name: test_load_local_v3s16_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX9: [[GEP:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C]](s32) ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p3) :: (load 2, addrspace 3) + ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX9: [[GEP1:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C1]](s32) ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p3) :: (load 2, addrspace 3) - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX9: [[TRUNC:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[TRUNC]](<3 x s16>), 0 + ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) + ; GFX9: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 ; GFX9: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) %0:_(p3) = COPY $vgpr0 %1:_(<3 x s16>) = G_LOAD %0 :: (load 6, align 2, addrspace 3) @@ -6777,8 +6792,10 @@ body: | ; SI: [[GEP2:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C2]](s32) ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p3) :: (load 2, addrspace 3) ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) - ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; CI-LABEL: name: test_load_local_v4s16_align2 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) @@ -6795,8 +6812,10 @@ body: | ; CI: [[GEP2:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C2]](s32) ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p3) :: (load 2, addrspace 3) ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) - ; CI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; CI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; CI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; CI-DS128-LABEL: name: test_load_local_v4s16_align2 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) @@ -6813,8 +6832,10 @@ body: | ; CI-DS128: [[GEP2:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C2]](s32) ; CI-DS128: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p3) :: (load 2, addrspace 3) ; CI-DS128: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; CI-DS128: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) - ; CI-DS128: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; CI-DS128: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; CI-DS128: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; CI-DS128: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; CI-DS128: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; VI-LABEL: name: test_load_local_v4s16_align2 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) @@ -6831,8 +6852,10 @@ body: | ; VI: [[GEP2:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C2]](s32) ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p3) :: (load 2, addrspace 3) ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) - ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-LABEL: name: test_load_local_v4s16_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) @@ -6849,8 +6872,10 @@ body: | ; GFX9: [[GEP2:%[0-9]+]]:_(p3) = G_GEP [[COPY]], [[C2]](s32) ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p3) :: (load 2, addrspace 3) ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) - ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX9: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(p3) = COPY $vgpr0 %1:_(<4 x s16>) = G_LOAD %0 :: (load 8, align 2, addrspace 3) $vgpr0_vgpr1 = COPY %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir index ed819e4658b26..5aeb8f7593f54 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir @@ -5143,70 +5143,82 @@ body: | ; SI-LABEL: name: test_load_private_v3s16_align8 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, align 8, addrspace 5) + ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; SI: [[GEP:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C]](s32) ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p5) :: (load 2, addrspace 5) + ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; SI: [[GEP1:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C1]](s32) ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p5) :: (load 2, align 4, addrspace 5) - ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; SI: [[TRUNC:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[TRUNC]](<3 x s16>), 0 + ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) + ; SI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; SI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 ; SI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; CI-LABEL: name: test_load_private_v3s16_align8 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, align 8, addrspace 5) + ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI: [[GEP:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C]](s32) ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p5) :: (load 2, addrspace 5) + ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI: [[GEP1:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C1]](s32) ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p5) :: (load 2, align 4, addrspace 5) - ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CI: [[TRUNC:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; CI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[TRUNC]](<3 x s16>), 0 + ; CI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) + ; CI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; CI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; CI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; CI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 ; CI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; VI-LABEL: name: test_load_private_v3s16_align8 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, align 8, addrspace 5) + ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; VI: [[GEP:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C]](s32) ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p5) :: (load 2, addrspace 5) + ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; VI: [[GEP1:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C1]](s32) ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p5) :: (load 2, align 4, addrspace 5) - ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; VI: [[TRUNC:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[TRUNC]](<3 x s16>), 0 + ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) + ; VI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 ; VI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; GFX9-LABEL: name: test_load_private_v3s16_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, align 8, addrspace 5) + ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX9: [[GEP:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C]](s32) ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p5) :: (load 2, addrspace 5) + ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX9: [[GEP1:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C1]](s32) ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p5) :: (load 2, align 4, addrspace 5) - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX9: [[TRUNC:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[TRUNC]](<3 x s16>), 0 + ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) + ; GFX9: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 ; GFX9: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) %0:_(p5) = COPY $vgpr0 %1:_(<3 x s16>) = G_LOAD %0 :: (load 6, align 8, addrspace 5) @@ -5224,70 +5236,82 @@ body: | ; SI-LABEL: name: test_load_private_v3s16_align2 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) + ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; SI: [[GEP:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C]](s32) ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p5) :: (load 2, addrspace 5) + ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; SI: [[GEP1:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C1]](s32) ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p5) :: (load 2, addrspace 5) - ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; SI: [[TRUNC:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[TRUNC]](<3 x s16>), 0 + ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) + ; SI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; SI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 ; SI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; CI-LABEL: name: test_load_private_v3s16_align2 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) + ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI: [[GEP:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C]](s32) ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p5) :: (load 2, addrspace 5) + ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI: [[GEP1:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C1]](s32) ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p5) :: (load 2, addrspace 5) - ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CI: [[TRUNC:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; CI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[TRUNC]](<3 x s16>), 0 + ; CI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) + ; CI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; CI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; CI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; CI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 ; CI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; VI-LABEL: name: test_load_private_v3s16_align2 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) + ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; VI: [[GEP:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C]](s32) ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p5) :: (load 2, addrspace 5) + ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; VI: [[GEP1:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C1]](s32) ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p5) :: (load 2, addrspace 5) - ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; VI: [[TRUNC:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[TRUNC]](<3 x s16>), 0 + ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) + ; VI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 ; VI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; GFX9-LABEL: name: test_load_private_v3s16_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) + ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX9: [[GEP:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C]](s32) ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p5) :: (load 2, addrspace 5) + ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX9: [[GEP1:%[0-9]+]]:_(p5) = G_GEP [[COPY]], [[C1]](s32) ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p5) :: (load 2, addrspace 5) - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX9: [[TRUNC:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[TRUNC]](<3 x s16>), 0 + ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) + ; GFX9: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 ; GFX9: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) %0:_(p5) = COPY $vgpr0 %1:_(<3 x s16>) = G_LOAD %0 :: (load 6, align 2, addrspace 5) @@ -5344,13 +5368,13 @@ body: | ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32) ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) ; SI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] - ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; SI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16) - ; SI: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[OR2]](s16) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; SI: [[TRUNC6:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[TRUNC6]](<3 x s16>), 0 + ; SI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[OR]](s16), [[OR1]](s16) + ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[OR2]](s16), [[DEF]](s16) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; SI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 ; SI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; CI-LABEL: name: test_load_private_v3s16_align1 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 @@ -5394,13 +5418,13 @@ body: | ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY4]](s32) ; CI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) ; CI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] - ; CI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; CI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16) - ; CI: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[OR2]](s16) - ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; CI: [[TRUNC6:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; CI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[TRUNC6]](<3 x s16>), 0 + ; CI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[OR]](s16), [[OR1]](s16) + ; CI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[OR2]](s16), [[DEF]](s16) + ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; CI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; CI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 ; CI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; VI-LABEL: name: test_load_private_v3s16_align1 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 @@ -5438,13 +5462,13 @@ body: | ; VI: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]] ; VI: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16) ; VI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] - ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; VI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16) - ; VI: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[OR2]](s16) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; VI: [[TRUNC6:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[TRUNC6]](<3 x s16>), 0 + ; VI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[OR]](s16), [[OR1]](s16) + ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[OR2]](s16), [[DEF]](s16) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 ; VI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; GFX9-LABEL: name: test_load_private_v3s16_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 @@ -5482,13 +5506,13 @@ body: | ; GFX9: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]] ; GFX9: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16) ; GFX9: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16) - ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[OR2]](s16) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; GFX9: [[TRUNC6:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[TRUNC6]](<3 x s16>), 0 + ; GFX9: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[OR]](s16), [[OR1]](s16) + ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[OR2]](s16), [[DEF]](s16) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 ; GFX9: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) %0:_(p5) = COPY $vgpr0 %1:_(<3 x s16>) = G_LOAD %0 :: (load 6, align 1, addrspace 5) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-lshr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-lshr.mir index 7c09ce8123f82..4c5d64d8bb6d3 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-lshr.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-lshr.mir @@ -609,23 +609,26 @@ body: | ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; SI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[AND]](s32) + ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] ; SI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[AND2]](s32) + ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]] ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] ; SI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[AND4]](s32) - ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) - ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32) - ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32) - ; SI: [[TRUNC:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; SI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[TRUNC]](<3 x s16>), 0 + ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) + ; SI: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF2]](s16) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; SI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[EXTRACT2]](<3 x s16>), 0 ; SI: $vgpr0_vgpr1 = COPY [[INSERT2]](<4 x s16>) ; VI-LABEL: name: test_lshr_v3s16_v3s16 ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 @@ -656,13 +659,13 @@ body: | ; VI: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[TRUNC3]](s16) ; VI: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[TRUNC4]](s16) ; VI: [[LSHR6:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[TRUNC5]](s16) - ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR4]](s16) - ; VI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR5]](s16) - ; VI: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR6]](s16) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; VI: [[TRUNC6:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; VI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[TRUNC6]](<3 x s16>), 0 + ; VI: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[LSHR4]](s16), [[LSHR5]](s16) + ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[LSHR6]](s16), [[DEF2]](s16) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; VI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[EXTRACT2]](<3 x s16>), 0 ; VI: $vgpr0_vgpr1 = COPY [[INSERT2]](<4 x s16>) ; GFX9-LABEL: name: test_lshr_v3s16_v3s16 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 @@ -751,8 +754,10 @@ body: | ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C1]] ; SI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[AND6]](s32) ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) - ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; VI-LABEL: name: test_lshr_v4s16_v4s16 ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 @@ -779,8 +784,10 @@ body: | ; VI: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[TRUNC5]](s16) ; VI: [[LSHR6:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[TRUNC6]](s16) ; VI: [[LSHR7:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC3]], [[TRUNC7]](s16) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[LSHR4]](s16), [[LSHR5]](s16), [[LSHR6]](s16), [[LSHR7]](s16) - ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[LSHR4]](s16), [[LSHR5]](s16) + ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[LSHR6]](s16), [[LSHR7]](s16) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-LABEL: name: test_lshr_v4s16_v4s16 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir index b8c330a08f6c3..55a201c7f8c68 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir @@ -144,25 +144,28 @@ body: | ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY2]], [[COPY3]] + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ADD]](s32) ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) ; CHECK: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[COPY4]], [[COPY5]] + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[ADD1]](s32) ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) ; CHECK: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[COPY6]], [[COPY7]] - ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY [[ADD]](s32) - ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY [[ADD1]](s32) - ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY [[ADD2]](s32) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32) - ; CHECK: [[TRUNC:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; CHECK: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[TRUNC]](<3 x s16>), 0 + ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[ADD2]](s32) + ; CHECK: [[DEF3:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF3]](s16) + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; CHECK: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; CHECK: [[DEF4:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CHECK: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF4]], [[EXTRACT1]](<3 x s16>), 0 ; CHECK: G_BR %bb.2 ; CHECK: bb.2: ; CHECK: [[PHI:%[0-9]+]]:_(<4 x s16>) = G_PHI [[INSERT]](<4 x s16>), %bb.0, [[INSERT3]](<4 x s16>), %bb.1 - ; CHECK: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[PHI]](<4 x s16>), 0 - ; CHECK: [[DEF4:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF4]], [[EXTRACT1]](<3 x s16>), 0 + ; CHECK: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[PHI]](<4 x s16>), 0 + ; CHECK: [[DEF5:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CHECK: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF5]], [[EXTRACT2]](<3 x s16>), 0 ; CHECK: $vgpr0_vgpr1 = COPY [[INSERT4]](<4 x s16>) ; CHECK: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: @@ -236,10 +239,12 @@ body: | ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) ; CHECK: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[COPY8]], [[COPY9]] ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[ADD3]](s32) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) ; CHECK: G_BR %bb.2 ; CHECK: bb.2: - ; CHECK: [[PHI:%[0-9]+]]:_(<4 x s16>) = G_PHI [[COPY]](<4 x s16>), %bb.0, [[BUILD_VECTOR]](<4 x s16>), %bb.1 + ; CHECK: [[PHI:%[0-9]+]]:_(<4 x s16>) = G_PHI [[COPY]](<4 x s16>), %bb.0, [[CONCAT_VECTORS]](<4 x s16>), %bb.1 ; CHECK: $vgpr0_vgpr1 = COPY [[PHI]](<4 x s16>) ; CHECK: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: @@ -708,8 +713,10 @@ body: | ; CHECK: [[ADD61:%[0-9]+]]:_(s32) = G_ADD [[UV61]], [[UV125]] ; CHECK: [[ADD62:%[0-9]+]]:_(s32) = G_ADD [[UV62]], [[UV126]] ; CHECK: [[ADD63:%[0-9]+]]:_(s32) = G_ADD [[UV63]], [[UV127]] - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<64 x s32>) = G_BUILD_VECTOR [[ADD]](s32), [[ADD1]](s32), [[ADD2]](s32), [[ADD3]](s32), [[ADD4]](s32), [[ADD5]](s32), [[ADD6]](s32), [[ADD7]](s32), [[ADD8]](s32), [[ADD9]](s32), [[ADD10]](s32), [[ADD11]](s32), [[ADD12]](s32), [[ADD13]](s32), [[ADD14]](s32), [[ADD15]](s32), [[ADD16]](s32), [[ADD17]](s32), [[ADD18]](s32), [[ADD19]](s32), [[ADD20]](s32), [[ADD21]](s32), [[ADD22]](s32), [[ADD23]](s32), [[ADD24]](s32), [[ADD25]](s32), [[ADD26]](s32), [[ADD27]](s32), [[ADD28]](s32), [[ADD29]](s32), [[ADD30]](s32), [[ADD31]](s32), [[ADD32]](s32), [[ADD33]](s32), [[ADD34]](s32), [[ADD35]](s32), [[ADD36]](s32), [[ADD37]](s32), [[ADD38]](s32), [[ADD39]](s32), [[ADD40]](s32), [[ADD41]](s32), [[ADD42]](s32), [[ADD43]](s32), [[ADD44]](s32), [[ADD45]](s32), [[ADD46]](s32), [[ADD47]](s32), [[ADD48]](s32), [[ADD49]](s32), [[ADD50]](s32), [[ADD51]](s32), [[ADD52]](s32), [[ADD53]](s32), [[ADD54]](s32), [[ADD55]](s32), [[ADD56]](s32), [[ADD57]](s32), [[ADD58]](s32), [[ADD59]](s32), [[ADD60]](s32), [[ADD61]](s32), [[ADD62]](s32), [[ADD63]](s32) - ; CHECK: [[UV128:%[0-9]+]]:_(<16 x s32>), [[UV129:%[0-9]+]]:_(<16 x s32>), [[UV130:%[0-9]+]]:_(<16 x s32>), [[UV131:%[0-9]+]]:_(<16 x s32>) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<64 x s32>) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[ADD]](s32), [[ADD1]](s32), [[ADD2]](s32), [[ADD3]](s32), [[ADD4]](s32), [[ADD5]](s32), [[ADD6]](s32), [[ADD7]](s32), [[ADD8]](s32), [[ADD9]](s32), [[ADD10]](s32), [[ADD11]](s32), [[ADD12]](s32), [[ADD13]](s32), [[ADD14]](s32), [[ADD15]](s32), [[ADD16]](s32), [[ADD17]](s32), [[ADD18]](s32), [[ADD19]](s32), [[ADD20]](s32), [[ADD21]](s32), [[ADD22]](s32), [[ADD23]](s32), [[ADD24]](s32), [[ADD25]](s32), [[ADD26]](s32), [[ADD27]](s32), [[ADD28]](s32), [[ADD29]](s32), [[ADD30]](s32), [[ADD31]](s32) + ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[ADD32]](s32), [[ADD33]](s32), [[ADD34]](s32), [[ADD35]](s32), [[ADD36]](s32), [[ADD37]](s32), [[ADD38]](s32), [[ADD39]](s32), [[ADD40]](s32), [[ADD41]](s32), [[ADD42]](s32), [[ADD43]](s32), [[ADD44]](s32), [[ADD45]](s32), [[ADD46]](s32), [[ADD47]](s32), [[ADD48]](s32), [[ADD49]](s32), [[ADD50]](s32), [[ADD51]](s32), [[ADD52]](s32), [[ADD53]](s32), [[ADD54]](s32), [[ADD55]](s32), [[ADD56]](s32), [[ADD57]](s32), [[ADD58]](s32), [[ADD59]](s32), [[ADD60]](s32), [[ADD61]](s32), [[ADD62]](s32), [[ADD63]](s32) + ; CHECK: [[UV128:%[0-9]+]]:_(<16 x s32>), [[UV129:%[0-9]+]]:_(<16 x s32>) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<32 x s32>) + ; CHECK: [[UV130:%[0-9]+]]:_(<16 x s32>), [[UV131:%[0-9]+]]:_(<16 x s32>) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<32 x s32>) ; CHECK: G_BR %bb.2 ; CHECK: bb.2: ; CHECK: [[PHI:%[0-9]+]]:_(<16 x s32>) = G_PHI [[DEF]](<16 x s32>), %bb.0, [[UV128]](<16 x s32>), %bb.1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shl.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shl.mir index f4615982e54c3..9b5721882de9f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shl.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shl.mir @@ -596,21 +596,24 @@ body: | ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[AND]](s32) + ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY5]], [[AND1]](s32) + ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]] ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[COPY7]], [[AND2]](s32) - ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[SHL]](s32) - ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[SHL1]](s32) - ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[SHL2]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32) - ; SI: [[TRUNC:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; SI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[TRUNC]](<3 x s16>), 0 + ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) + ; SI: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF2]](s16) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; SI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[EXTRACT2]](<3 x s16>), 0 ; SI: $vgpr0_vgpr1 = COPY [[INSERT2]](<4 x s16>) ; VI-LABEL: name: test_shl_v3s16_v3s16 ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 @@ -641,13 +644,13 @@ body: | ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC3]](s16) ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[TRUNC4]](s16) ; VI: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[TRUNC5]](s16) - ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SHL]](s16) - ; VI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[SHL1]](s16) - ; VI: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[SHL2]](s16) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; VI: [[TRUNC6:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; VI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[TRUNC6]](<3 x s16>), 0 + ; VI: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[SHL]](s16), [[SHL1]](s16) + ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[SHL2]](s16), [[DEF2]](s16) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; VI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[EXTRACT2]](<3 x s16>), 0 ; VI: $vgpr0_vgpr1 = COPY [[INSERT2]](<4 x s16>) ; GFX9-LABEL: name: test_shl_v3s16_v3s16 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 @@ -732,8 +735,10 @@ body: | ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[COPY9]], [[AND3]](s32) ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) - ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; VI-LABEL: name: test_shl_v4s16_v4s16 ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 @@ -760,8 +765,10 @@ body: | ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[TRUNC5]](s16) ; VI: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[TRUNC6]](s16) ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[TRUNC3]], [[TRUNC7]](s16) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[SHL]](s16), [[SHL1]](s16), [[SHL2]](s16), [[SHL3]](s16) - ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[SHL]](s16), [[SHL1]](s16) + ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[SHL2]](s16), [[SHL3]](s16) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-LABEL: name: test_shl_v4s16_v4s16 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shuffle-vector.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shuffle-vector.mir index 2bd357c5b846f..41f43c1b6ff2c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shuffle-vector.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shuffle-vector.mir @@ -349,8 +349,10 @@ body: | ; CHECK: [[BUILD_VECTOR3:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ASHR9]](s32), [[ASHR10]](s32), [[ASHR11]](s32) ; CHECK: [[EXTRACT5:%[0-9]+]]:_(s32) = G_EXTRACT [[BUILD_VECTOR3]](<3 x s32>), 0 ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[EXTRACT5]](s32) - ; CHECK: [[BUILD_VECTOR4:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) - ; CHECK: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR4]](<4 x s16>) + ; CHECK: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; CHECK: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>) + ; CHECK: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 %2:_(<3 x s16>) = G_EXTRACT %0, 0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smax.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smax.mir index 2a52412aa8ba7..3010bff87ac40 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smax.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smax.mir @@ -355,6 +355,7 @@ body: | ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C]](s32) ; SI: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[C]](s32) ; SI: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[ASHR]], [[ASHR1]] + ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SMAX]](s32) ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C]](s32) ; SI: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SHL2]], [[C]](s32) @@ -362,6 +363,7 @@ body: | ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[C]](s32) ; SI: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[SHL3]], [[C]](s32) ; SI: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[ASHR2]], [[ASHR3]] + ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SMAX1]](s32) ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[COPY4]], [[C]](s32) ; SI: [[ASHR4:%[0-9]+]]:_(s32) = G_ASHR [[SHL4]], [[C]](s32) @@ -369,12 +371,13 @@ body: | ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[COPY5]], [[C]](s32) ; SI: [[ASHR5:%[0-9]+]]:_(s32) = G_ASHR [[SHL5]], [[C]](s32) ; SI: [[SMAX2:%[0-9]+]]:_(s32) = G_SMAX [[ASHR4]], [[ASHR5]] - ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[SMAX]](s32) - ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SMAX1]](s32) - ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[SMAX2]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) - ; SI: [[TRUNC:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; SI: S_NOP 0, implicit [[TRUNC]](<3 x s16>) + ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SMAX2]](s32) + ; SI: [[DEF4:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF4]](s16) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; SI: S_NOP 0, implicit [[EXTRACT2]](<3 x s16>) ; VI-LABEL: name: test_smax_v3s16 ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 @@ -404,12 +407,12 @@ body: | ; VI: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC]], [[TRUNC3]] ; VI: [[SMAX1:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC1]], [[TRUNC4]] ; VI: [[SMAX2:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC2]], [[TRUNC5]] - ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SMAX]](s16) - ; VI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[SMAX1]](s16) - ; VI: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[SMAX2]](s16) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; VI: [[TRUNC6:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; VI: S_NOP 0, implicit [[TRUNC6]](<3 x s16>) + ; VI: [[DEF4:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[SMAX]](s16), [[SMAX1]](s16) + ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[SMAX2]](s16), [[DEF4]](s16) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; VI: S_NOP 0, implicit [[EXTRACT2]](<3 x s16>) ; GFX9-LABEL: name: test_smax_v3s16 ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 @@ -484,8 +487,10 @@ body: | ; SI: [[ASHR7:%[0-9]+]]:_(s32) = G_ASHR [[SHL7]], [[C]](s32) ; SI: [[SMAX3:%[0-9]+]]:_(s32) = G_SMAX [[ASHR6]], [[ASHR7]] ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SMAX3]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) - ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; VI-LABEL: name: test_smax_v4s16 ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 @@ -512,8 +517,10 @@ body: | ; VI: [[SMAX1:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC1]], [[TRUNC5]] ; VI: [[SMAX2:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC2]], [[TRUNC6]] ; VI: [[SMAX3:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC3]], [[TRUNC7]] - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[SMAX]](s16), [[SMAX1]](s16), [[SMAX2]](s16), [[SMAX3]](s16) - ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[SMAX]](s16), [[SMAX1]](s16) + ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[SMAX2]](s16), [[SMAX3]](s16) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-LABEL: name: test_smax_v4s16 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smin.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smin.mir index 4146df9b06c06..63411ddfc1791 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smin.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smin.mir @@ -355,6 +355,7 @@ body: | ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C]](s32) ; SI: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[C]](s32) ; SI: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[ASHR]], [[ASHR1]] + ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SMIN]](s32) ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C]](s32) ; SI: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SHL2]], [[C]](s32) @@ -362,6 +363,7 @@ body: | ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[C]](s32) ; SI: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[SHL3]], [[C]](s32) ; SI: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[ASHR2]], [[ASHR3]] + ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SMIN1]](s32) ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[COPY4]], [[C]](s32) ; SI: [[ASHR4:%[0-9]+]]:_(s32) = G_ASHR [[SHL4]], [[C]](s32) @@ -369,12 +371,13 @@ body: | ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[COPY5]], [[C]](s32) ; SI: [[ASHR5:%[0-9]+]]:_(s32) = G_ASHR [[SHL5]], [[C]](s32) ; SI: [[SMIN2:%[0-9]+]]:_(s32) = G_SMIN [[ASHR4]], [[ASHR5]] - ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[SMIN]](s32) - ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SMIN1]](s32) - ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[SMIN2]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) - ; SI: [[TRUNC:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; SI: S_NOP 0, implicit [[TRUNC]](<3 x s16>) + ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SMIN2]](s32) + ; SI: [[DEF4:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF4]](s16) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; SI: S_NOP 0, implicit [[EXTRACT2]](<3 x s16>) ; VI-LABEL: name: test_smin_v3s16 ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 @@ -404,12 +407,12 @@ body: | ; VI: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC]], [[TRUNC3]] ; VI: [[SMIN1:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC1]], [[TRUNC4]] ; VI: [[SMIN2:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC2]], [[TRUNC5]] - ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SMIN]](s16) - ; VI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[SMIN1]](s16) - ; VI: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[SMIN2]](s16) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; VI: [[TRUNC6:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; VI: S_NOP 0, implicit [[TRUNC6]](<3 x s16>) + ; VI: [[DEF4:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[SMIN]](s16), [[SMIN1]](s16) + ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[SMIN2]](s16), [[DEF4]](s16) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; VI: S_NOP 0, implicit [[EXTRACT2]](<3 x s16>) ; GFX9-LABEL: name: test_smin_v3s16 ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 @@ -484,8 +487,10 @@ body: | ; SI: [[ASHR7:%[0-9]+]]:_(s32) = G_ASHR [[SHL7]], [[C]](s32) ; SI: [[SMIN3:%[0-9]+]]:_(s32) = G_SMIN [[ASHR6]], [[ASHR7]] ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SMIN3]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) - ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; VI-LABEL: name: test_smin_v4s16 ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 @@ -512,8 +517,10 @@ body: | ; VI: [[SMIN1:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC1]], [[TRUNC5]] ; VI: [[SMIN2:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC2]], [[TRUNC6]] ; VI: [[SMIN3:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC3]], [[TRUNC7]] - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[SMIN]](s16), [[SMIN1]](s16), [[SMIN2]](s16), [[SMIN3]](s16) - ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[SMIN]](s16), [[SMIN1]](s16) + ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[SMIN2]](s16), [[SMIN3]](s16) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-LABEL: name: test_smin_v4s16 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umax.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umax.mir index d18ab9b6b14c4..35e3ecf845b69 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umax.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umax.mir @@ -337,22 +337,25 @@ body: | ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] ; SI: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[AND]], [[AND1]] + ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UMAX]](s32) ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; SI: [[UMAX1:%[0-9]+]]:_(s32) = G_UMAX [[AND2]], [[AND3]] + ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UMAX1]](s32) ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] ; SI: [[UMAX2:%[0-9]+]]:_(s32) = G_UMAX [[AND4]], [[AND5]] - ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UMAX]](s32) - ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UMAX1]](s32) - ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[UMAX2]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) - ; SI: [[TRUNC:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; SI: S_NOP 0, implicit [[TRUNC]](<3 x s16>) + ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UMAX2]](s32) + ; SI: [[DEF4:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF4]](s16) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; SI: S_NOP 0, implicit [[EXTRACT2]](<3 x s16>) ; VI-LABEL: name: test_umax_v3s16 ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 @@ -382,12 +385,12 @@ body: | ; VI: [[UMAX:%[0-9]+]]:_(s16) = G_UMAX [[TRUNC]], [[TRUNC3]] ; VI: [[UMAX1:%[0-9]+]]:_(s16) = G_UMAX [[TRUNC1]], [[TRUNC4]] ; VI: [[UMAX2:%[0-9]+]]:_(s16) = G_UMAX [[TRUNC2]], [[TRUNC5]] - ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UMAX]](s16) - ; VI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UMAX1]](s16) - ; VI: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UMAX2]](s16) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; VI: [[TRUNC6:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; VI: S_NOP 0, implicit [[TRUNC6]](<3 x s16>) + ; VI: [[DEF4:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[UMAX]](s16), [[UMAX1]](s16) + ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[UMAX2]](s16), [[DEF4]](s16) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; VI: S_NOP 0, implicit [[EXTRACT2]](<3 x s16>) ; GFX9-LABEL: name: test_umax_v3s16 ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 @@ -455,8 +458,10 @@ body: | ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C1]] ; SI: [[UMAX3:%[0-9]+]]:_(s32) = G_UMAX [[AND6]], [[AND7]] ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[UMAX3]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) - ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; VI-LABEL: name: test_umax_v4s16 ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 @@ -483,8 +488,10 @@ body: | ; VI: [[UMAX1:%[0-9]+]]:_(s16) = G_UMAX [[TRUNC1]], [[TRUNC5]] ; VI: [[UMAX2:%[0-9]+]]:_(s16) = G_UMAX [[TRUNC2]], [[TRUNC6]] ; VI: [[UMAX3:%[0-9]+]]:_(s16) = G_UMAX [[TRUNC3]], [[TRUNC7]] - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[UMAX]](s16), [[UMAX1]](s16), [[UMAX2]](s16), [[UMAX3]](s16) - ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[UMAX]](s16), [[UMAX1]](s16) + ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[UMAX2]](s16), [[UMAX3]](s16) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-LABEL: name: test_umax_v4s16 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umin.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umin.mir index b8de3fd8e7761..1d1557ccfd89f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umin.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umin.mir @@ -337,22 +337,25 @@ body: | ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] ; SI: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AND]], [[AND1]] + ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UMIN]](s32) ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; SI: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[AND2]], [[AND3]] + ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UMIN1]](s32) ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] ; SI: [[UMIN2:%[0-9]+]]:_(s32) = G_UMIN [[AND4]], [[AND5]] - ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UMIN]](s32) - ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UMIN1]](s32) - ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[UMIN2]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) - ; SI: [[TRUNC:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; SI: S_NOP 0, implicit [[TRUNC]](<3 x s16>) + ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UMIN2]](s32) + ; SI: [[DEF4:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF4]](s16) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; SI: S_NOP 0, implicit [[EXTRACT2]](<3 x s16>) ; VI-LABEL: name: test_umin_v3s16 ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 @@ -382,12 +385,12 @@ body: | ; VI: [[UMIN:%[0-9]+]]:_(s16) = G_UMIN [[TRUNC]], [[TRUNC3]] ; VI: [[UMIN1:%[0-9]+]]:_(s16) = G_UMIN [[TRUNC1]], [[TRUNC4]] ; VI: [[UMIN2:%[0-9]+]]:_(s16) = G_UMIN [[TRUNC2]], [[TRUNC5]] - ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UMIN]](s16) - ; VI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UMIN1]](s16) - ; VI: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UMIN2]](s16) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; VI: [[TRUNC6:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; VI: S_NOP 0, implicit [[TRUNC6]](<3 x s16>) + ; VI: [[DEF4:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[UMIN]](s16), [[UMIN1]](s16) + ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[UMIN2]](s16), [[DEF4]](s16) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 + ; VI: S_NOP 0, implicit [[EXTRACT2]](<3 x s16>) ; GFX9-LABEL: name: test_umin_v3s16 ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 @@ -455,8 +458,10 @@ body: | ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C1]] ; SI: [[UMIN3:%[0-9]+]]:_(s32) = G_UMIN [[AND6]], [[AND7]] ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[UMIN3]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) - ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; SI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; VI-LABEL: name: test_umin_v4s16 ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 @@ -483,8 +488,10 @@ body: | ; VI: [[UMIN1:%[0-9]+]]:_(s16) = G_UMIN [[TRUNC1]], [[TRUNC5]] ; VI: [[UMIN2:%[0-9]+]]:_(s16) = G_UMIN [[TRUNC2]], [[TRUNC6]] ; VI: [[UMIN3:%[0-9]+]]:_(s16) = G_UMIN [[TRUNC3]], [[TRUNC7]] - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[UMIN]](s16), [[UMIN1]](s16), [[UMIN2]](s16), [[UMIN3]](s16) - ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[UMIN]](s16), [[UMIN1]](s16) + ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[UMIN2]](s16), [[UMIN3]](s16) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-LABEL: name: test_umin_v4s16 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f16.ll index e5428803965c5..ea012b37ac278 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f16.ll @@ -13,7 +13,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; UNPACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; UNPACKED: BUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom TargetCustom7, align 1, addrspace 4) ; UNPACKED: S_ENDPGM 0 ; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_f16 @@ -26,7 +26,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; PACKED: BUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom TargetCustom7, align 1, addrspace 4) ; PACKED: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.format.f16(half %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) @@ -43,7 +43,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__voffset_409 ; UNPACKED: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5 ; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; UNPACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; UNPACKED: BUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY4]], $noreg, [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom TargetCustom7 + 4095, align 1, addrspace 4) ; UNPACKED: S_ENDPGM 0 ; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__voffset_4095__sgpr_soffset_f16 @@ -55,7 +55,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__voffset_409 ; PACKED: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5 ; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; PACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; PACKED: BUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY4]], $noreg, [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom TargetCustom7 + 4095, align 1, addrspace 4) ; PACKED: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.format.f16(half %val, <4 x i32> %rsrc, i32 4095, i32 %soffset, i32 0) @@ -73,7 +73,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; UNPACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; UNPACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 16 ; UNPACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; UNPACKED: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY7]], [[COPY4]], implicit $exec @@ -90,7 +90,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; PACKED: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4) ; PACKED: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.format.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) @@ -109,7 +109,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; UNPACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; UNPACKED: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; UNPACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 16 ; UNPACKED: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; UNPACKED: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY8]], [[COPY4]], implicit $exec @@ -129,7 +129,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; PACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; PACKED: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; PACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 ; PACKED: BUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7, align 1, addrspace 4) ; PACKED: S_ENDPGM 0 @@ -172,7 +172,7 @@ define amdgpu_ps void @raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffse ; UNPACKED: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 ; UNPACKED: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[COPY11]], implicit $exec ; UNPACKED: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; UNPACKED: [[REG_SEQUENCE4:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; UNPACKED: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 ; UNPACKED: BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE4]], [[COPY7]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7, align 1, addrspace 4) ; UNPACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; UNPACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc @@ -210,7 +210,7 @@ define amdgpu_ps void @raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffse ; PACKED: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 ; PACKED: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[COPY9]], implicit $exec ; PACKED: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; PACKED: [[REG_SEQUENCE4:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; PACKED: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 ; PACKED: BUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE4]], [[COPY7]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7, align 1, addrspace 4) ; PACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; PACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc @@ -234,7 +234,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; UNPACKED: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5 ; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; UNPACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095 ; UNPACKED: [[S_MOV_B32_1:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 16 ; UNPACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] @@ -251,7 +251,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; PACKED: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5 ; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; PACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095 ; PACKED: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4) ; PACKED: S_ENDPGM 0 @@ -269,7 +269,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; UNPACKED: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5 ; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; UNPACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 ; UNPACKED: [[S_MOV_B32_1:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 16 ; UNPACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] @@ -286,7 +286,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; PACKED: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5 ; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; PACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 ; PACKED: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4) ; PACKED: S_ENDPGM 0 @@ -305,7 +305,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; UNPACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; UNPACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 16 ; UNPACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; UNPACKED: %11:vgpr_32, dead %21:sreg_64_xexec = V_ADD_I32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec @@ -324,7 +324,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; PACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 16 ; PACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; PACKED: %11:vgpr_32, dead %13:sreg_64_xexec = V_ADD_I32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec @@ -346,7 +346,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; UNPACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; UNPACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 4095 ; UNPACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; UNPACKED: %11:vgpr_32, dead %22:sreg_64_xexec = V_ADD_I32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec @@ -366,7 +366,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; PACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 4095 ; PACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; PACKED: %11:vgpr_32, dead %13:sreg_64_xexec = V_ADD_I32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec @@ -388,7 +388,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; UNPACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; UNPACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 4096 ; UNPACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; UNPACKED: %11:vgpr_32, dead %22:sreg_64_xexec = V_ADD_I32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec @@ -408,7 +408,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; PACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 4096 ; PACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; PACKED: %11:vgpr_32, dead %13:sreg_64_xexec = V_ADD_I32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec @@ -458,7 +458,7 @@ define amdgpu_ps void @raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffse ; UNPACKED: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 ; UNPACKED: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[COPY12]], implicit $exec ; UNPACKED: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; UNPACKED: [[REG_SEQUENCE4:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; UNPACKED: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 ; UNPACKED: BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact [[REG_SEQUENCE1]], %13, [[REG_SEQUENCE4]], [[COPY7]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7, align 1, addrspace 4) ; UNPACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; UNPACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc @@ -499,7 +499,7 @@ define amdgpu_ps void @raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffse ; PACKED: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 ; PACKED: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[COPY10]], implicit $exec ; PACKED: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; PACKED: [[REG_SEQUENCE4:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; PACKED: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 ; PACKED: BUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact [[REG_SEQUENCE1]], %13, [[REG_SEQUENCE4]], [[COPY7]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7, align 1, addrspace 4) ; PACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; PACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f32.ll index ae881bb0a9dc2..1793bbdcdcec7 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f32.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f32.ll @@ -13,7 +13,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: BUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4) ; CHECK: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.format.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) @@ -30,7 +30,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__voffset_409 ; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: BUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY4]], $noreg, [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7 + 4095, align 1, addrspace 4) ; CHECK: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.format.f32(float %val, <4 x i32> %rsrc, i32 4095, i32 %soffset, i32 0) @@ -49,7 +49,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 ; CHECK: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7, align 1, addrspace 4) ; CHECK: S_ENDPGM 0 @@ -70,7 +70,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; CHECK: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2 ; CHECK: BUFFER_STORE_FORMAT_XYZ_OFFEN_exact [[REG_SEQUENCE1]], [[COPY7]], [[REG_SEQUENCE]], [[COPY8]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 12 into custom TargetCustom7, align 1, addrspace 4) ; CHECK: S_ENDPGM 0 @@ -92,7 +92,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; CHECK: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3 ; CHECK: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[COPY8]], [[REG_SEQUENCE]], [[COPY9]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom TargetCustom7, align 1, addrspace 4) ; CHECK: S_ENDPGM 0 @@ -131,7 +131,7 @@ define amdgpu_ps void @raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffse ; CHECK: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 ; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[COPY11]], implicit $exec ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; CHECK: [[REG_SEQUENCE4:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 ; CHECK: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[COPY8]], [[REG_SEQUENCE4]], [[COPY9]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom TargetCustom7, align 1, addrspace 4) ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc @@ -156,7 +156,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095 ; CHECK: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7, align 1, addrspace 4) @@ -176,7 +176,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 ; CHECK: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7, align 1, addrspace 4) @@ -197,7 +197,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 16 ; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] @@ -221,7 +221,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 4095 ; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] @@ -245,7 +245,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 4096 ; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] @@ -293,7 +293,7 @@ define amdgpu_ps void @raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffse ; CHECK: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 ; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[COPY12]], implicit $exec ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; CHECK: [[REG_SEQUENCE4:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 ; CHECK: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[REG_SEQUENCE1]], %15, [[REG_SEQUENCE4]], [[COPY9]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom TargetCustom7, align 1, addrspace 4) ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.ll index 64fd2929a0d72..6e6a7d056d784 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.ll @@ -14,7 +14,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4) ; CHECK: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) @@ -33,7 +33,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__sgpr_val__sgpr_voffset__sgpr ; CHECK: [[COPY4:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr6 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr7 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr8 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] ; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY7]], [[COPY8]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4) @@ -70,7 +70,7 @@ define amdgpu_ps void @raw_buffer_store__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 ; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; CHECK: [[REG_SEQUENCE3:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE3]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4) ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc @@ -97,7 +97,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__vgpr ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec ; CHECK: bb.2: ; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000) @@ -144,7 +144,7 @@ define amdgpu_ps void @raw_buffer_store__vgpr_rsrc__vgpr_val__vgpr_voffset__vgpr ; CHECK: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 ; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; CHECK: [[REG_SEQUENCE3:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 ; CHECK: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec ; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec ; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc @@ -172,7 +172,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4) ; CHECK: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 1) @@ -190,7 +190,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 1, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4) ; CHECK: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 2) @@ -208,7 +208,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 1, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4) ; CHECK: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 3) @@ -226,7 +226,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 1, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4) ; CHECK: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 4) @@ -244,7 +244,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 1, 0, 1, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4) ; CHECK: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 6) @@ -262,7 +262,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 0, 0, 1, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4) ; CHECK: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 5) @@ -280,7 +280,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 1, 0, 1, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4) ; CHECK: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 7) @@ -299,7 +299,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 ; CHECK: BUFFER_STORE_DWORDX2_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7, align 1, addrspace 4) ; CHECK: S_ENDPGM 0 @@ -320,7 +320,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; CHECK: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2 ; CHECK: BUFFER_STORE_DWORDX3_OFFEN_exact [[REG_SEQUENCE1]], [[COPY7]], [[REG_SEQUENCE]], [[COPY8]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 12 into custom TargetCustom7, align 1, addrspace 4) ; CHECK: S_ENDPGM 0 @@ -342,7 +342,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; CHECK: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3 ; CHECK: BUFFER_STORE_DWORDX4_OFFEN_exact [[REG_SEQUENCE1]], [[COPY8]], [[REG_SEQUENCE]], [[COPY9]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom TargetCustom7, align 1, addrspace 4) ; CHECK: S_ENDPGM 0 @@ -361,7 +361,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: BUFFER_STORE_BYTE_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 1 into custom TargetCustom7, addrspace 4) ; CHECK: S_ENDPGM 0 %val.trunc = trunc i32 %val to i8 @@ -380,7 +380,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: BUFFER_STORE_SHORT_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom TargetCustom7, align 1, addrspace 4) ; CHECK: S_ENDPGM 0 %val.trunc = trunc i32 %val to i16 @@ -399,7 +399,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: BUFFER_STORE_SHORT_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom TargetCustom7, align 1, addrspace 4) ; CHECK: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.f16(half %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) @@ -417,7 +417,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4) ; CHECK: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) @@ -436,7 +436,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 ; CHECK: BUFFER_STORE_DWORDX2_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7, align 1, addrspace 4) ; CHECK: S_ENDPGM 0 @@ -473,7 +473,7 @@ define amdgpu_ps void @raw_buffer_store__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 ; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[COPY9]], implicit $exec ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; CHECK: [[REG_SEQUENCE4:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 ; CHECK: BUFFER_STORE_DWORDX2_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE4]], [[COPY7]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7, align 1, addrspace 4) ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc @@ -497,7 +497,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__sgpr_soffset_f32_v ; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], $noreg, [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7 + 4095, align 1, addrspace 4) ; CHECK: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 4095, i32 %soffset, i32 0) @@ -514,7 +514,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__sgpr_soffset_f32_v ; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[V_MOV_B32_e32_]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7 + 4096, align 1, addrspace 4) ; CHECK: S_ENDPGM 0 @@ -533,7 +533,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 16 ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; CHECK: %11:vgpr_32, dead %13:sreg_64_xexec = V_ADD_I32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec @@ -555,7 +555,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 4095 ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; CHECK: %11:vgpr_32, dead %13:sreg_64_xexec = V_ADD_I32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec @@ -577,7 +577,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 4096 ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; CHECK: %11:vgpr_32, dead %13:sreg_64_xexec = V_ADD_I32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec @@ -598,7 +598,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095 ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4) ; CHECK: S_ENDPGM 0 @@ -616,7 +616,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4) ; CHECK: S_ENDPGM 0 @@ -635,7 +635,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 16 ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; CHECK: %11:vgpr_32, dead %13:sreg_64_xexec = V_ADD_I32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec @@ -657,7 +657,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 4095 ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; CHECK: %11:vgpr_32, dead %13:sreg_64_xexec = V_ADD_I32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec @@ -679,7 +679,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 4096 ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; CHECK: %11:vgpr_32, dead %13:sreg_64_xexec = V_ADD_I32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec @@ -721,7 +721,7 @@ define amdgpu_ps void @raw_buffer_store__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 ; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY9]], implicit $exec ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; CHECK: [[REG_SEQUENCE3:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], %11, [[REG_SEQUENCE3]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4) ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc @@ -764,7 +764,7 @@ define amdgpu_ps void @raw_buffer_store__vgpr_rsrc__vgpr_val__5000_voffset__sgpr ; CHECK: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 ; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; CHECK: [[REG_SEQUENCE3:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; CHECK: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[V_MOV_B32_e32_]], [[REG_SEQUENCE3]], [[COPY5]], 904, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7 + 5000, align 1, addrspace 4) ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-load.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-load.mir index 49ac13f666687..d8bd4f777b4b7 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-load.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-load.mir @@ -69,6 +69,8 @@ define amdgpu_kernel void @load_constant_i32_uniform_align2() {ret void} define amdgpu_kernel void @load_constant_i32_uniform_align1() {ret void} define amdgpu_kernel void @load_private_uniform_sgpr_i32() {ret void} + define amdgpu_kernel void @load_constant_v8i32_vgpr_crash() { ret void } + define amdgpu_kernel void @load_constant_v8i32_vgpr_crash_loop_phi() { ret void } declare i32 @llvm.amdgcn.workitem.id.x() #0 attributes #0 = { nounwind readnone } @@ -652,3 +654,47 @@ body: | %0:_(p5) = COPY $sgpr0 %1:_(s32) = G_LOAD %0 :: (load 4, addrspace 5, align 4) ... + +--- +name: load_constant_v8i32_vgpr_crash +legalized: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: load_constant_v8i32_vgpr_crash + ; CHECK: %0:vgpr(p4) = COPY $vgpr0_vgpr1 + ; CHECK: vgpr(<4 x s32>) = G_LOAD %0(p4) + ; CHECK: vgpr(<4 x s32>) = G_LOAD + ; CHECK: G_CONCAT_VECTORS + %0:_(p4) = COPY $vgpr0_vgpr1 + %1:_(<8 x s32>) = G_LOAD %0 :: (load 32, addrspace 4) +... + +--- +name: load_constant_v8i32_vgpr_crash_loop_phi +legalized: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 + + ; CHECK-LABEL: name: load_constant_v8i32_vgpr_crash_loop_phi + ; CHECK: G_PHI + ; CHECK: vgpr(<4 x s32>) = G_LOAD + ; CHECK: vgpr(<4 x s32>) = G_LOAD + ; CHECK: G_CONCAT_VECTORS + + %0:_(p4) = COPY $sgpr0_sgpr1 + %1:_(p4) = COPY $sgpr2_sgpr3 + G_BR %bb.1 + + bb.1: + %2:_(p4) = G_PHI %0, %bb.0, %4, %bb.1 + %3:_(<8 x s32>) = G_LOAD %2 :: (load 32, addrspace 4) + %4:_(p4) = COPY %1 + G_BR %bb.1 +... diff --git a/llvm/test/CodeGen/AMDGPU/buffer-intrinsics-mmo-offsets.ll b/llvm/test/CodeGen/AMDGPU/buffer-intrinsics-mmo-offsets.ll index d5dc9ce17331b..95322379386a5 100644 --- a/llvm/test/CodeGen/AMDGPU/buffer-intrinsics-mmo-offsets.ll +++ b/llvm/test/CodeGen/AMDGPU/buffer-intrinsics-mmo-offsets.ll @@ -9,7 +9,7 @@ define amdgpu_cs void @mmo_offsets0(<4 x i32> addrspace(6)* inreg noalias derefe ; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0 ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 0 ; GCN: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[S_MOV_B32_]], %subreg.sub1 - ; GCN: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sreg_128 = S_LOAD_DWORDX4_IMM killed [[REG_SEQUENCE]], 0, 0, 0 :: (dereferenceable invariant load 16 from %ir.arg0, addrspace 6) + ; GCN: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM killed [[REG_SEQUENCE]], 0, 0, 0 :: (dereferenceable invariant load 16 from %ir.arg0, addrspace 6) ; GCN: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom TargetCustom7 + 16, align 1, addrspace 4) ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec ; GCN: [[BUFFER_LOAD_DWORDX4_IDXEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom TargetCustom7, align 1, addrspace 4) diff --git a/llvm/test/CodeGen/AMDGPU/coalescer-extend-pruned-subrange.mir b/llvm/test/CodeGen/AMDGPU/coalescer-extend-pruned-subrange.mir index 599cacb826155..7fff7ca70dc74 100644 --- a/llvm/test/CodeGen/AMDGPU/coalescer-extend-pruned-subrange.mir +++ b/llvm/test/CodeGen/AMDGPU/coalescer-extend-pruned-subrange.mir @@ -12,10 +12,10 @@ body: | %1:vgpr_32 = COPY %0 INLINEASM &"; %1", 1, 327690, def %1, 2147483657, %1(tied-def 3) %2:sreg_64 = V_CMP_NE_U32_e64 0, %1, implicit $exec - undef %3.sub0:sreg_128 = COPY %0 - %3.sub1:sreg_128 = COPY %0 - %3.sub2:sreg_128 = COPY %0 - %4:sreg_128 = COPY %3 + undef %3.sub0:sgpr_128 = COPY %0 + %3.sub1:sgpr_128 = COPY %0 + %3.sub2:sgpr_128 = COPY %0 + %4:sgpr_128 = COPY %3 %5:vgpr_32 = V_MOV_B32_e32 -64, implicit $exec %6:vreg_128 = COPY %4 %7:sreg_32_xm0 = S_AND_B32 target-flags(amdgpu-gotprel) 1, %2.sub0, implicit-def dead $scc @@ -30,7 +30,7 @@ body: | %14:vgpr_32 = V_AND_B32_e32 1, %13, implicit $exec %15:sreg_64_xexec = V_CMP_EQ_U32_e64 0, %14, implicit $exec %16:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %15, implicit $exec - BUFFER_STORE_DWORD_OFFEN_exact %16, undef %17:vgpr_32, undef %18:sreg_128, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into constant-pool, align 1, addrspace 4) + BUFFER_STORE_DWORD_OFFEN_exact %16, undef %17:vgpr_32, undef %18:sgpr_128, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into constant-pool, align 1, addrspace 4) S_ENDPGM 0 bb.2: @@ -44,10 +44,10 @@ body: | bb.3: successors: %bb.4 - undef %22.sub0:sreg_128 = COPY %8 - %22.sub1:sreg_128 = COPY %8 - %22.sub2:sreg_128 = COPY %8 - %23:sreg_128 = COPY %22 + undef %22.sub0:sgpr_128 = COPY %8 + %22.sub1:sgpr_128 = COPY %8 + %22.sub2:sgpr_128 = COPY %8 + %23:sgpr_128 = COPY %22 %24:vreg_128 = COPY %23 %10:vreg_128 = COPY %24 @@ -78,7 +78,7 @@ body: | bb.8: successors: %bb.10 - %31:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN undef %32:vgpr_32, undef %33:sreg_128, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from constant-pool, align 1, addrspace 4) + %31:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN undef %32:vgpr_32, undef %33:sgpr_128, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from constant-pool, align 1, addrspace 4) %34:sreg_64_xexec = V_CMP_NE_U32_e64 0, %31, implicit $exec %35:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, -1, %34, implicit $exec %28:vgpr_32 = COPY %35 diff --git a/llvm/test/CodeGen/AMDGPU/coalescer-identical-values-undef.mir b/llvm/test/CodeGen/AMDGPU/coalescer-identical-values-undef.mir index f0f46b4f31188..280c82b3352c8 100644 --- a/llvm/test/CodeGen/AMDGPU/coalescer-identical-values-undef.mir +++ b/llvm/test/CodeGen/AMDGPU/coalescer-identical-values-undef.mir @@ -10,18 +10,18 @@ body: | bb.0: successors: %bb.1, %bb.2 liveins: $sgpr4 - undef %0.sub2:sreg_128 = COPY $sgpr4 + undef %0.sub2:sgpr_128 = COPY $sgpr4 %3 = IMPLICIT_DEF S_CBRANCH_SCC1 %bb.2, implicit undef $scc bb.1: successors: %bb.2 - %0.sub0:sreg_128 = COPY %0.sub2 - %0.sub1:sreg_128 = COPY %0.sub2 - %1:sreg_128 = COPY %0 - %2:sreg_128 = COPY %0 - %0:sreg_128 = COPY %2 - %3:sreg_128 = COPY %1 + %0.sub0:sgpr_128 = COPY %0.sub2 + %0.sub1:sgpr_128 = COPY %0.sub2 + %1:sgpr_128 = COPY %0 + %2:sgpr_128 = COPY %0 + %0:sgpr_128 = COPY %2 + %3:sgpr_128 = COPY %1 bb.2: $sgpr1 = COPY %3 diff --git a/llvm/test/CodeGen/AMDGPU/coalescer-subranges-another-copymi-not-live.mir b/llvm/test/CodeGen/AMDGPU/coalescer-subranges-another-copymi-not-live.mir index bc549f7bb87b4..083e9ce67b47f 100644 --- a/llvm/test/CodeGen/AMDGPU/coalescer-subranges-another-copymi-not-live.mir +++ b/llvm/test/CodeGen/AMDGPU/coalescer-subranges-another-copymi-not-live.mir @@ -42,10 +42,10 @@ body: | bb.3: successors: %bb.5(0x80000000) %8:sreg_32_xm0 = S_MOV_B32 0 - undef %9.sub0:sreg_128 = COPY %8 - %9.sub1:sreg_128 = COPY %8 - %9.sub2:sreg_128 = COPY %8 - %9.sub3:sreg_128 = COPY killed %8 + undef %9.sub0:sgpr_128 = COPY %8 + %9.sub1:sgpr_128 = COPY %8 + %9.sub2:sgpr_128 = COPY %8 + %9.sub3:sgpr_128 = COPY killed %8 %10:vreg_128 = COPY killed %9 %7:vreg_128 = COPY killed %10 S_BRANCH %bb.5 @@ -53,11 +53,11 @@ body: | bb.4: successors: %bb.5(0x80000000) %11:sreg_32_xm0 = S_MOV_B32 0 - undef %12.sub0:sreg_128 = COPY %11 - %12.sub1:sreg_128 = COPY %11 - %12.sub2:sreg_128 = COPY %11 - %12.sub3:sreg_128 = COPY killed %11 - %13:sreg_128 = COPY killed %12 + undef %12.sub0:sgpr_128 = COPY %11 + %12.sub1:sgpr_128 = COPY %11 + %12.sub2:sgpr_128 = COPY %11 + %12.sub3:sgpr_128 = COPY killed %11 + %13:sgpr_128 = COPY killed %12 %14:vreg_128 = COPY killed %13 %7:vreg_128 = COPY killed %14 @@ -83,7 +83,7 @@ body: | bb.9: successors: %bb.10(0x80000000) - %19:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN killed %18, undef %20:sreg_128, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from constant-pool, align 1, addrspace 4) + %19:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN killed %18, undef %20:sgpr_128, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from constant-pool, align 1, addrspace 4) %21:sreg_64 = V_CMP_NE_U32_e64 target-flags(amdgpu-gotprel) 0, killed %19.sub0, implicit $exec %22:sreg_64 = COPY $exec, implicit-def $exec %23:sreg_64 = S_AND_B64 %22, %21, implicit-def dead $scc diff --git a/llvm/test/CodeGen/AMDGPU/coalescer-subranges-another-prune-error.mir b/llvm/test/CodeGen/AMDGPU/coalescer-subranges-another-prune-error.mir index 67399883ae07d..b2ac4e96c95f6 100644 --- a/llvm/test/CodeGen/AMDGPU/coalescer-subranges-another-prune-error.mir +++ b/llvm/test/CodeGen/AMDGPU/coalescer-subranges-another-prune-error.mir @@ -25,16 +25,16 @@ body: | successors: %bb.2(0x40000000), %bb.1(0x40000000) %0:sreg_64 = COPY $exec %1:sgpr_32 = S_MOV_B32 0 - undef %2.sub0:sreg_128 = COPY %1 - %2.sub1:sreg_128 = COPY %1 - %2.sub2:sreg_128 = COPY %1 - %2.sub3:sreg_128 = COPY %1 + undef %2.sub0:sgpr_128 = COPY %1 + %2.sub1:sgpr_128 = COPY %1 + %2.sub2:sgpr_128 = COPY %1 + %2.sub3:sgpr_128 = COPY %1 $exec = S_WQM_B64 $exec, implicit-def dead $scc S_CBRANCH_SCC0 %bb.2, implicit undef $scc bb.1: successors: %bb.3(0x80000000) - %3:sreg_128 = COPY killed %2 + %3:sgpr_128 = COPY killed %2 %4:vreg_128 = COPY killed %3 %5:vreg_128 = COPY killed %4 S_BRANCH %bb.3 @@ -47,7 +47,7 @@ body: | %10:vgpr_32 = V_CVT_U32_F32_e32 killed %9, implicit $exec %11:vgpr_32 = V_LSHLREV_B32_e32 1, killed %10, implicit $exec %12:sreg_64 = S_MOV_B64 0 - %13:sreg_128 = COPY killed %2 + %13:sgpr_128 = COPY killed %2 %14:vgpr_32 = V_MOV_B32_e32 0, implicit $exec %15:vreg_128 = COPY killed %13 %16:sreg_64 = COPY killed %12 @@ -68,7 +68,7 @@ body: | %23:vreg_128 = COPY killed %17 %24:sreg_64 = COPY killed %16 %25:vgpr_32 = V_OR_B32_e32 %22, %11, implicit $exec - %26:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN killed %25, undef %27:sreg_128, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from constant-pool, align 1, addrspace 4) + %26:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN killed %25, undef %27:sgpr_128, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from constant-pool, align 1, addrspace 4) %28:vgpr_32 = V_LSHRREV_B32_e32 30, killed %26.sub0, implicit $exec %29:vreg_128 = COPY killed %21 %29.sub0:vreg_128 = COPY %1 @@ -257,7 +257,7 @@ body: | %109.sub5:sreg_256 = COPY %108 %109.sub6:sreg_256 = COPY %108 %109.sub7:sreg_256 = COPY killed %108 - %110:vgpr_32 = IMAGE_SAMPLE_V1_V2 killed %107, killed %109, undef %111:sreg_128, 8, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from constant-pool, addrspace 4) + %110:vgpr_32 = IMAGE_SAMPLE_V1_V2 killed %107, killed %109, undef %111:sgpr_128, 8, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from constant-pool, addrspace 4) %112:vgpr_32 = V_MUL_F32_e32 0, killed %110, implicit $exec %113:vgpr_32 = V_MUL_F32_e32 0, killed %112, implicit $exec %114:vgpr_32 = V_MAD_F32 0, killed %113, 0, 0, 0, 0, 0, 0, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/coalescer-subreg-join.mir b/llvm/test/CodeGen/AMDGPU/coalescer-subreg-join.mir index 3d1b98714c541..c5d7628233e20 100644 --- a/llvm/test/CodeGen/AMDGPU/coalescer-subreg-join.mir +++ b/llvm/test/CodeGen/AMDGPU/coalescer-subreg-join.mir @@ -12,9 +12,9 @@ registers: - { id: 1, class: vgpr_32 } - { id: 2, class: vgpr_32 } - { id: 3, class: sreg_256 } - - { id: 4, class: sreg_128 } + - { id: 4, class: sgpr_128 } - { id: 5, class: sreg_256 } - - { id: 6, class: sreg_128 } + - { id: 6, class: sgpr_128 } - { id: 7, class: sreg_512 } - { id: 9, class: vreg_512 } - { id: 11, class: vreg_512 } diff --git a/llvm/test/CodeGen/AMDGPU/coalescer-subregjoin-fullcopy.mir b/llvm/test/CodeGen/AMDGPU/coalescer-subregjoin-fullcopy.mir index 773466af7adb2..c3a945716f77d 100644 --- a/llvm/test/CodeGen/AMDGPU/coalescer-subregjoin-fullcopy.mir +++ b/llvm/test/CodeGen/AMDGPU/coalescer-subregjoin-fullcopy.mir @@ -67,8 +67,8 @@ body: | successors: %bb.7(0x40000000), %bb.18(0x40000000) %9:vreg_128 = COPY killed %6 %10:sreg_64 = COPY killed %5 - undef %11.sub2:sreg_128 = COPY %4 - %11.sub3:sreg_128 = COPY %3 + undef %11.sub2:sgpr_128 = COPY %4 + %11.sub3:sgpr_128 = COPY %3 %12:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET killed %11, 0, 0, 0, 0, 0, 0, 0, implicit $exec undef %13.sub1:vreg_128 = COPY %9.sub1 %13.sub2:vreg_128 = COPY %9.sub2 @@ -161,7 +161,7 @@ body: | bb.18: successors: %bb.7(0x80000000) dead %59:vgpr_32 = V_FMA_F32 0, killed %9.sub2, 0, undef %60:vgpr_32, 0, undef %61:vgpr_32, 0, 0, implicit $exec - dead %62:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN undef %63:vgpr_32, undef %64:sreg_128, undef %65:sreg_32, 0, 0, 0, 0, 0, 0, implicit $exec + dead %62:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN undef %63:vgpr_32, undef %64:sgpr_128, undef %65:sreg_32, 0, 0, 0, 0, 0, 0, implicit $exec undef %66.sub1:vreg_128 = COPY %13.sub1 %66.sub2:vreg_128 = COPY %13.sub2 %67:sreg_64 = V_CMP_NGT_F32_e64 0, 0, 0, undef %68:vgpr_32, 0, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/coalescer-with-subregs-bad-identical.mir b/llvm/test/CodeGen/AMDGPU/coalescer-with-subregs-bad-identical.mir index 4c532e89398e1..83b63fe23aaeb 100644 --- a/llvm/test/CodeGen/AMDGPU/coalescer-with-subregs-bad-identical.mir +++ b/llvm/test/CodeGen/AMDGPU/coalescer-with-subregs-bad-identical.mir @@ -109,11 +109,11 @@ body: | bb.9: successors: %bb.11(0x80000000) %32:sreg_32_xm0 = S_MOV_B32 0 - undef %33.sub0:sreg_128 = COPY %32 - %33.sub1:sreg_128 = COPY %32 - %33.sub2:sreg_128 = COPY %32 - %33.sub3:sreg_128 = COPY killed %32 - %34:sreg_128 = COPY killed %33 + undef %33.sub0:sgpr_128 = COPY %32 + %33.sub1:sgpr_128 = COPY %32 + %33.sub2:sgpr_128 = COPY %32 + %33.sub3:sgpr_128 = COPY killed %32 + %34:sgpr_128 = COPY killed %33 %35:vreg_128 = COPY killed %34 %31:vreg_128 = COPY killed %35 S_BRANCH %bb.11 @@ -145,10 +145,10 @@ body: | %40:vgpr_32 = V_MAD_F32 0, killed %39, 0, -1090519040, 0, 1056964608, 0, 0, implicit $exec %41:vgpr_32 = V_MAD_F32 0, killed %40, 0, 0, 0, -1090519040, 0, 0, implicit $exec %42:vgpr_32 = V_CVT_I32_F32_e32 killed %41, implicit $exec - %43:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM undef %44:sreg_128, 12, 0, 0 :: (dereferenceable invariant load 4) + %43:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM undef %44:sgpr_128, 12, 0, 0 :: (dereferenceable invariant load 4) %45:vgpr_32 = V_MUL_LO_I32 killed %42, killed %43, implicit $exec %46:vgpr_32 = V_LSHLREV_B32_e32 2, killed %45, implicit $exec - %47:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN killed %46, undef %48:sreg_128, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from constant-pool, align 1, addrspace 4) + %47:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN killed %46, undef %48:sgpr_128, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from constant-pool, align 1, addrspace 4) %49:sreg_64 = V_CMP_NE_U32_e64 0, killed %47, implicit $exec %50:sreg_64 = COPY $exec, implicit-def $exec %51:sreg_64 = S_AND_B64 %50, %49, implicit-def dead $scc diff --git a/llvm/test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir b/llvm/test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir index 92e29f3a52909..a1590becf4939 100644 --- a/llvm/test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir +++ b/llvm/test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir @@ -18,7 +18,7 @@ registers: - { id: 3, class: sreg_32_xm0 } - { id: 4, class: sreg_32_xm0 } - { id: 5, class: sreg_32_xm0 } - - { id: 6, class: sreg_128 } + - { id: 6, class: sgpr_128 } - { id: 7, class: sreg_32_xm0 } - { id: 8, class: sreg_32_xm0 } - { id: 9, class: sreg_32_xm0 } @@ -185,7 +185,7 @@ registers: - { id: 7, class: sreg_32_xm0 } - { id: 8, class: sreg_32_xm0 } - { id: 9, class: sreg_32_xm0 } - - { id: 10, class: sreg_128 } + - { id: 10, class: sgpr_128 } - { id: 11, class: sreg_32_xm0 } - { id: 12, class: sreg_32_xm0 } - { id: 13, class: vgpr_32 } @@ -385,7 +385,7 @@ registers: - { id: 7, class: sreg_32_xm0 } - { id: 8, class: sreg_32_xm0 } - { id: 9, class: sreg_32_xm0 } - - { id: 10, class: sreg_128 } + - { id: 10, class: sgpr_128 } - { id: 11, class: sreg_32_xm0 } - { id: 12, class: sreg_32_xm0 } - { id: 13, class: vgpr_32 } @@ -593,7 +593,7 @@ registers: - { id: 7, class: sreg_32_xm0 } - { id: 8, class: sreg_32_xm0 } - { id: 9, class: sreg_32_xm0 } - - { id: 10, class: sreg_128 } + - { id: 10, class: sgpr_128 } - { id: 11, class: sreg_32_xm0 } - { id: 12, class: sreg_32_xm0 } - { id: 13, class: vgpr_32 } diff --git a/llvm/test/CodeGen/AMDGPU/couldnt-join-subrange-3.mir b/llvm/test/CodeGen/AMDGPU/couldnt-join-subrange-3.mir index bba41584bc97f..e14420080c5fb 100644 --- a/llvm/test/CodeGen/AMDGPU/couldnt-join-subrange-3.mir +++ b/llvm/test/CodeGen/AMDGPU/couldnt-join-subrange-3.mir @@ -158,15 +158,15 @@ selected: false failedISel: false tracksRegLiveness: true registers: - - { id: 0, class: sreg_128, preferred-register: '' } + - { id: 0, class: sgpr_128, preferred-register: '' } - { id: 1, class: sreg_32_xm0, preferred-register: '%5' } - - { id: 2, class: sreg_128, preferred-register: '' } + - { id: 2, class: sgpr_128, preferred-register: '' } - { id: 3, class: sreg_32_xm0, preferred-register: '' } - - { id: 4, class: sreg_128, preferred-register: '' } + - { id: 4, class: sgpr_128, preferred-register: '' } - { id: 5, class: sreg_32_xm0, preferred-register: '%1' } - - { id: 6, class: sreg_128, preferred-register: '' } - - { id: 7, class: sreg_128, preferred-register: '' } - - { id: 8, class: sreg_128, preferred-register: '' } + - { id: 6, class: sgpr_128, preferred-register: '' } + - { id: 7, class: sgpr_128, preferred-register: '' } + - { id: 8, class: sgpr_128, preferred-register: '' } - { id: 9, class: sreg_32_xm0, preferred-register: '' } - { id: 10, class: vgpr_32, preferred-register: '' } - { id: 11, class: vgpr_32, preferred-register: '' } @@ -176,23 +176,23 @@ registers: - { id: 15, class: sreg_32, preferred-register: '' } - { id: 16, class: sreg_32_xm0, preferred-register: '' } - { id: 17, class: sreg_32_xm0, preferred-register: '' } - - { id: 18, class: sreg_128, preferred-register: '' } + - { id: 18, class: sgpr_128, preferred-register: '' } - { id: 19, class: sreg_32_xm0, preferred-register: '' } - { id: 20, class: sreg_32_xm0, preferred-register: '' } - { id: 21, class: sreg_32_xm0, preferred-register: '' } - { id: 22, class: vreg_128, preferred-register: '' } - { id: 23, class: vgpr_32, preferred-register: '' } - - { id: 24, class: sreg_128, preferred-register: '' } + - { id: 24, class: sgpr_128, preferred-register: '' } - { id: 25, class: sreg_32_xm0, preferred-register: '' } - { id: 26, class: sreg_32_xm0, preferred-register: '' } - - { id: 27, class: sreg_128, preferred-register: '' } + - { id: 27, class: sgpr_128, preferred-register: '' } - { id: 28, class: sreg_32_xm0, preferred-register: '' } - { id: 29, class: sreg_32_xm0, preferred-register: '' } - { id: 30, class: sreg_32_xm0, preferred-register: '' } - { id: 31, class: sreg_32_xm0, preferred-register: '' } - { id: 32, class: sreg_32_xm0, preferred-register: '' } - { id: 33, class: sreg_32_xm0, preferred-register: '' } - - { id: 34, class: sreg_128, preferred-register: '' } + - { id: 34, class: sgpr_128, preferred-register: '' } - { id: 35, class: sreg_64, preferred-register: '' } - { id: 36, class: sreg_64, preferred-register: '' } - { id: 37, class: vgpr_32, preferred-register: '' } @@ -291,7 +291,7 @@ body: | bb.3..lr.ph3410.preheader: successors: %bb.4(0x80000000) - dead %22:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN killed %53.sub3, undef %24:sreg_128, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from constant-pool, align 1, addrspace 4) + dead %22:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN killed %53.sub3, undef %24:sgpr_128, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from constant-pool, align 1, addrspace 4) dead %60:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec %36:sreg_64 = S_AND_B64 $exec, -1, implicit-def dead $scc dead %67:vgpr_32 = V_MOV_B32_e32 0, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/dce-disjoint-intervals.mir b/llvm/test/CodeGen/AMDGPU/dce-disjoint-intervals.mir index 8c32546693be7..b333e5dbb4099 100644 --- a/llvm/test/CodeGen/AMDGPU/dce-disjoint-intervals.mir +++ b/llvm/test/CodeGen/AMDGPU/dce-disjoint-intervals.mir @@ -11,8 +11,8 @@ body: | bb.0: liveins: $sgpr0_sgpr1 - %10:sreg_128 = S_LOAD_DWORDX4_IMM killed $noreg, 9, 0, 0 - S_NOP 0, implicit-def %4:sreg_128, implicit %10.sub1:sreg_128 + %10:sgpr_128 = S_LOAD_DWORDX4_IMM killed $noreg, 9, 0, 0 + S_NOP 0, implicit-def %4:sgpr_128, implicit %10.sub1:sgpr_128 S_CBRANCH_SCC0 %bb.3, implicit undef $scc S_BRANCH %bb.1 @@ -21,14 +21,14 @@ body: | S_BRANCH %bb.3 bb.2: - %8:sreg_32_xm0 = COPY %4.sub1:sreg_128 - %7:sreg_32_xm0 = COPY %10.sub1:sreg_128 + %8:sreg_32_xm0 = COPY %4.sub1:sgpr_128 + %7:sreg_32_xm0 = COPY %10.sub1:sgpr_128 S_BRANCH %bb.4 bb.3: - %10:sreg_128 = S_LOAD_DWORDX4_IMM killed $noreg, 10, 0, 0 - %7:sreg_32_xm0 = COPY %10.sub1:sreg_128 - %8:sreg_32_xm0 = COPY %10.sub2:sreg_128 + %10:sgpr_128 = S_LOAD_DWORDX4_IMM killed $noreg, 10, 0, 0 + %7:sreg_32_xm0 = COPY %10.sub1:sgpr_128 + %8:sreg_32_xm0 = COPY %10.sub2:sgpr_128 bb.4: S_NOP 0, implicit %10 diff --git a/llvm/test/CodeGen/AMDGPU/detect-dead-lanes.mir b/llvm/test/CodeGen/AMDGPU/detect-dead-lanes.mir index b035977a78ab9..5908fc4b88312 100644 --- a/llvm/test/CodeGen/AMDGPU/detect-dead-lanes.mir +++ b/llvm/test/CodeGen/AMDGPU/detect-dead-lanes.mir @@ -6,7 +6,7 @@ # CHECK: S_NOP 0, implicit-def %0 # CHECK: S_NOP 0, implicit-def %1 # CHECK: S_NOP 0, implicit-def dead %2 -# CHECK: %3:sreg_128 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1, undef %2, %subreg.sub3 +# CHECK: %3:sgpr_128 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1, undef %2, %subreg.sub3 # CHECK: S_NOP 0, implicit %3.sub0 # CHECK: S_NOP 0, implicit %3.sub1 # CHECK: S_NOP 0, implicit undef %3.sub2 @@ -20,7 +20,7 @@ registers: - { id: 0, class: sreg_32_xm0 } - { id: 1, class: sreg_32_xm0 } - { id: 2, class: sreg_32_xm0 } - - { id: 3, class: sreg_128 } + - { id: 3, class: sgpr_128 } - { id: 4, class: sreg_64 } - { id: 5, class: sreg_64 } body: | @@ -42,8 +42,8 @@ body: | # Check defined lanes transfer; Includes checking for some special cases like # undef operands or IMPLICIT_DEF definitions. # CHECK-LABEL: name: test1 -# CHECK: %0:sreg_128 = REG_SEQUENCE $sgpr0, %subreg.sub0, $sgpr0, %subreg.sub2 -# CHECK: %1:sreg_128 = INSERT_SUBREG %0, $sgpr1, %subreg.sub3 +# CHECK: %0:sgpr_128 = REG_SEQUENCE $sgpr0, %subreg.sub0, $sgpr0, %subreg.sub2 +# CHECK: %1:sgpr_128 = INSERT_SUBREG %0, $sgpr1, %subreg.sub3 # CHECK: %2:sreg_64 = INSERT_SUBREG %0.sub2_sub3, $sgpr42, %subreg.sub0 # CHECK: S_NOP 0, implicit %1.sub0 # CHECK: S_NOP 0, implicit undef %1.sub1 @@ -53,7 +53,7 @@ body: | # CHECK: S_NOP 0, implicit undef %2.sub1 # CHECK: %3:sreg_32_xm0 = IMPLICIT_DEF -# CHECK: %4:sreg_128 = INSERT_SUBREG %0, undef %3, %subreg.sub0 +# CHECK: %4:sgpr_128 = INSERT_SUBREG %0, undef %3, %subreg.sub0 # CHECK: S_NOP 0, implicit undef %4.sub0 # CHECK: S_NOP 0, implicit undef %4.sub1 # CHECK: S_NOP 0, implicit %4.sub2 @@ -70,21 +70,21 @@ body: | # CHECK: %9:sreg_32_xm0 = EXTRACT_SUBREG undef %8, %subreg.sub1 # CHECK: S_NOP 0, implicit undef %9 -# CHECK: %10:sreg_128 = EXTRACT_SUBREG undef %0, %subreg.sub2_sub3 +# CHECK: %10:sgpr_128 = EXTRACT_SUBREG undef %0, %subreg.sub2_sub3 # CHECK: S_NOP 0, implicit undef %10 name: test1 registers: - - { id: 0, class: sreg_128 } - - { id: 1, class: sreg_128 } + - { id: 0, class: sgpr_128 } + - { id: 1, class: sgpr_128 } - { id: 2, class: sreg_64 } - { id: 3, class: sreg_32_xm0 } - - { id: 4, class: sreg_128 } + - { id: 4, class: sgpr_128 } - { id: 5, class: sreg_64 } - { id: 6, class: sreg_32_xm0 } - { id: 7, class: sreg_32_xm0 } - { id: 8, class: sreg_64 } - { id: 9, class: sreg_32_xm0 } - - { id: 10, class: sreg_128 } + - { id: 10, class: sgpr_128 } body: | bb.0: %0 = REG_SEQUENCE $sgpr0, %subreg.sub0, $sgpr0, %subreg.sub2 @@ -125,7 +125,7 @@ body: | # CHECK: S_NOP 0, implicit-def dead %0 # CHECK: S_NOP 0, implicit-def %1 # CHECK: S_NOP 0, implicit-def %2 -# CHECK: %3:sreg_128 = REG_SEQUENCE undef %0, %subreg.sub0, %1, %subreg.sub1, %2, %subreg.sub2_sub3 +# CHECK: %3:sgpr_128 = REG_SEQUENCE undef %0, %subreg.sub0, %1, %subreg.sub1, %2, %subreg.sub2_sub3 # CHECK: S_NOP 0, implicit %3.sub1 # CHECK: S_NOP 0, implicit %3.sub3 @@ -136,17 +136,17 @@ body: | # CHECK: S_NOP 0, implicit-def dead %7 # CHECK: S_NOP 0, implicit-def %8 -# CHECK: %9:sreg_128 = INSERT_SUBREG undef %7, %8, %subreg.sub2_sub3 +# CHECK: %9:sgpr_128 = INSERT_SUBREG undef %7, %8, %subreg.sub2_sub3 # CHECK: S_NOP 0, implicit %9.sub2 # CHECK: S_NOP 0, implicit-def %10 # CHECK: S_NOP 0, implicit-def dead %11 -# CHECK: %12:sreg_128 = INSERT_SUBREG %10, undef %11, %subreg.sub0_sub1 +# CHECK: %12:sgpr_128 = INSERT_SUBREG %10, undef %11, %subreg.sub0_sub1 # CHECK: S_NOP 0, implicit %12.sub3 # CHECK: S_NOP 0, implicit-def %13 # CHECK: S_NOP 0, implicit-def dead %14 -# CHECK: %15:sreg_128 = REG_SEQUENCE %13, %subreg.sub0_sub1, undef %14, %subreg.sub2_sub3 +# CHECK: %15:sgpr_128 = REG_SEQUENCE %13, %subreg.sub0_sub1, undef %14, %subreg.sub2_sub3 # CHECK: %16:sreg_64 = EXTRACT_SUBREG %15, %subreg.sub0_sub1 # CHECK: S_NOP 0, implicit %16.sub1 @@ -155,19 +155,19 @@ registers: - { id: 0, class: sreg_32_xm0 } - { id: 1, class: sreg_32_xm0 } - { id: 2, class: sreg_64 } - - { id: 3, class: sreg_128 } + - { id: 3, class: sgpr_128 } - { id: 4, class: sreg_32_xm0 } - { id: 5, class: sreg_32_xm0 } - { id: 6, class: sreg_64 } - - { id: 7, class: sreg_128 } + - { id: 7, class: sgpr_128 } - { id: 8, class: sreg_64 } - - { id: 9, class: sreg_128 } - - { id: 10, class: sreg_128 } + - { id: 9, class: sgpr_128 } + - { id: 10, class: sgpr_128 } - { id: 11, class: sreg_64 } - - { id: 12, class: sreg_128 } + - { id: 12, class: sgpr_128 } - { id: 13, class: sreg_64 } - { id: 14, class: sreg_64 } - - { id: 15, class: sreg_128 } + - { id: 15, class: sgpr_128 } - { id: 16, class: sreg_64 } body: | bb.0: @@ -265,10 +265,10 @@ body: | # CHECK: S_NOP 0, implicit-def %0 # CHECK: S_NOP 0, implicit-def dead %1 # CHECK: S_NOP 0, implicit-def dead %2 -# CHECK: %3:sreg_128 = REG_SEQUENCE %0, %subreg.sub0, undef %1, %subreg.sub1, undef %2, %subreg.sub2 +# CHECK: %3:sgpr_128 = REG_SEQUENCE %0, %subreg.sub0, undef %1, %subreg.sub1, undef %2, %subreg.sub2 # CHECK: bb.1: -# CHECK: %4:sreg_128 = PHI %3, %bb.0, %5, %bb.1 +# CHECK: %4:sgpr_128 = PHI %3, %bb.0, %5, %bb.1 # CHECK: bb.2: # CHECK: S_NOP 0, implicit %4.sub0 @@ -279,9 +279,9 @@ registers: - { id: 0, class: sreg_32_xm0 } - { id: 1, class: sreg_32_xm0 } - { id: 2, class: sreg_32_xm0 } - - { id: 3, class: sreg_128 } - - { id: 4, class: sreg_128 } - - { id: 5, class: sreg_128 } + - { id: 3, class: sgpr_128 } + - { id: 4, class: sgpr_128 } + - { id: 5, class: sgpr_128 } body: | bb.0: S_NOP 0, implicit-def %0 @@ -315,12 +315,12 @@ body: | # CHECK: S_NOP 0, implicit-def %1 # CHECK: S_NOP 0, implicit-def dead %2 # CHECK: S_NOP 0, implicit-def %3 -# CHECK: %4:sreg_128 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1, undef %2, %subreg.sub2, %3, %subreg.sub3 +# CHECK: %4:sgpr_128 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1, undef %2, %subreg.sub2, %3, %subreg.sub3 # CHECK: bb.1: -# CHECK: %5:sreg_128 = PHI %4, %bb.0, %6, %bb.1 +# CHECK: %5:sgpr_128 = PHI %4, %bb.0, %6, %bb.1 -# CHECK: %6:sreg_128 = REG_SEQUENCE %5.sub1, %subreg.sub0, %5.sub3, %subreg.sub1, undef %5.sub2, %subreg.sub2, %5.sub0, %subreg.sub3 +# CHECK: %6:sgpr_128 = REG_SEQUENCE %5.sub1, %subreg.sub0, %5.sub3, %subreg.sub1, undef %5.sub2, %subreg.sub2, %5.sub0, %subreg.sub3 # CHECK: bb.2: # CHECK: S_NOP 0, implicit %6.sub3 @@ -331,9 +331,9 @@ registers: - { id: 1, class: sreg_32_xm0 } - { id: 2, class: sreg_32_xm0 } - { id: 3, class: sreg_32_xm0 } - - { id: 4, class: sreg_128 } - - { id: 5, class: sreg_128 } - - { id: 6, class: sreg_128 } + - { id: 4, class: sgpr_128 } + - { id: 5, class: sgpr_128 } + - { id: 6, class: sgpr_128 } body: | bb.0: S_NOP 0, implicit-def %0 @@ -361,12 +361,12 @@ body: | # CHECK-LABEL: name: loop2 # CHECK: bb.0: # CHECK: S_NOP 0, implicit-def %0 -# CHECK: %1:sreg_128 = REG_SEQUENCE %0, %subreg.sub0 +# CHECK: %1:sgpr_128 = REG_SEQUENCE %0, %subreg.sub0 # CHECK: bb.1: -# CHECK: %2:sreg_128 = PHI %1, %bb.0, %3, %bb.1 +# CHECK: %2:sgpr_128 = PHI %1, %bb.0, %3, %bb.1 -# CHECK: %3:sreg_128 = REG_SEQUENCE %2.sub3, %subreg.sub0, undef %2.sub1, %subreg.sub1, %2.sub0, %subreg.sub2, %2.sub2, %subreg.sub3 +# CHECK: %3:sgpr_128 = REG_SEQUENCE %2.sub3, %subreg.sub0, undef %2.sub1, %subreg.sub1, %2.sub0, %subreg.sub2, %2.sub2, %subreg.sub3 # CHECK: bb.2: # CHECK: S_NOP 0, implicit %2.sub0 @@ -377,9 +377,9 @@ name: loop2 tracksRegLiveness: true registers: - { id: 0, class: sreg_32_xm0 } - - { id: 1, class: sreg_128 } - - { id: 2, class: sreg_128 } - - { id: 3, class: sreg_128 } + - { id: 1, class: sgpr_128 } + - { id: 2, class: sgpr_128 } + - { id: 3, class: sgpr_128 } body: | bb.0: S_NOP 0, implicit-def %0 diff --git a/llvm/test/CodeGen/AMDGPU/dpp_combine.mir b/llvm/test/CodeGen/AMDGPU/dpp_combine.mir index c43fb037d074f..af08c9added6d 100644 --- a/llvm/test/CodeGen/AMDGPU/dpp_combine.mir +++ b/llvm/test/CodeGen/AMDGPU/dpp_combine.mir @@ -1,4 +1,4 @@ -# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=gcn-dpp-combine -o - %s | FileCheck %s +# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s --- # old is undefined: only combine when masks are fully enabled and @@ -512,7 +512,7 @@ body: | ... # CHECK-LABEL: name: add_old_subreg_undef -# CHECK: %5:vgpr_32 = V_ADD_U32_dpp %3.sub1, %1, %0.sub1, 1, 15, 15, 1, implicit $exec +# CHECK: %5:vgpr_32 = V_ADD_U32_dpp undef %3.sub1, %1, %0.sub1, 1, 15, 15, 1, implicit $exec name: add_old_subreg_undef tracksRegLiveness: true @@ -526,3 +526,39 @@ body: | %3:vreg_64 = REG_SEQUENCE %2, %subreg.sub0 ; %3.sub1 is undef %4:vgpr_32 = V_MOV_B32_dpp %3.sub1, %1, 1, 15, 15, 1, implicit $exec %5:vgpr_32 = V_ADD_U32_e32 %4, %0.sub1, implicit $exec +... + +# Test instruction which does not have modifiers in VOP1 form but does in DPP form. +# CHECK-LABEL: name: dpp_vop1 +# CHECK: %3:vgpr_32 = V_CEIL_F32_dpp %0, 0, undef %2:vgpr_32, 1, 15, 15, 1, implicit $exec +name: dpp_vop1 +tracksRegLiveness: true +body: | + bb.0: + %1:vgpr_32 = IMPLICIT_DEF + %2:vgpr_32 = V_MOV_B32_dpp %1:vgpr_32, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec + %3:vgpr_32 = V_CEIL_F32_e32 %2, implicit $exec +... + +# Test instruction which does not have modifiers in VOP2 form but does in DPP form. +# CHECK-LABEL: name: dpp_min +# CHECK: %3:vgpr_32 = V_MIN_F32_dpp %0, 0, undef %2:vgpr_32, 0, undef %4:vgpr_32, 1, 15, 15, 1, implicit $exec +name: dpp_min +tracksRegLiveness: true +body: | + bb.0: + %1:vgpr_32 = IMPLICIT_DEF + %2:vgpr_32 = V_MOV_B32_dpp %1:vgpr_32, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec + %4:vgpr_32 = V_MIN_F32_e32 %2, undef %3:vgpr_32, implicit $exec +... + +# Test an undef old operand +# CHECK-LABEL: name: dpp_undef_old +# CHECK: %3:vgpr_32 = V_CEIL_F32_dpp undef %1:vgpr_32, 0, undef %2:vgpr_32, 1, 15, 15, 1, implicit $exec +name: dpp_undef_old +tracksRegLiveness: true +body: | + bb.0: + %2:vgpr_32 = V_MOV_B32_dpp undef %1:vgpr_32, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec + %3:vgpr_32 = V_CEIL_F32_e32 %2, implicit $exec +... diff --git a/llvm/test/CodeGen/AMDGPU/extract_subvector_vec4_vec3.ll b/llvm/test/CodeGen/AMDGPU/extract_subvector_vec4_vec3.ll index 70e5df5788aee..03ffd5cc93483 100644 --- a/llvm/test/CodeGen/AMDGPU/extract_subvector_vec4_vec3.ll +++ b/llvm/test/CodeGen/AMDGPU/extract_subvector_vec4_vec3.ll @@ -11,7 +11,7 @@ define amdgpu_hs void @main([0 x i8] addrspace(6)* inreg %arg) { ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 0 ; GCN: [[DEF:%[0-9]+]]:sreg_32_xm0 = IMPLICIT_DEF ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[DEF]] - ; GCN: [[DEF1:%[0-9]+]]:sreg_128 = IMPLICIT_DEF + ; GCN: [[DEF1:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF ; GCN: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY]], [[DEF1]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom TargetCustom7, align 1, addrspace 4) ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2 ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1 @@ -20,7 +20,7 @@ define amdgpu_hs void @main([0 x i8] addrspace(6)* inreg %arg) { ; GCN: [[COPY4:%[0-9]+]]:vreg_96 = COPY [[REG_SEQUENCE]] ; GCN: [[DEF2:%[0-9]+]]:sreg_32_xm0 = IMPLICIT_DEF ; GCN: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[DEF2]] - ; GCN: [[DEF3:%[0-9]+]]:sreg_128 = IMPLICIT_DEF + ; GCN: [[DEF3:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF ; GCN: BUFFER_STORE_DWORDX3_OFFEN_exact killed [[COPY4]], [[COPY5]], [[DEF3]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 12 into custom TargetCustom7, align 1, addrspace 4) ; GCN: S_ENDPGM 0 main_body: diff --git a/llvm/test/CodeGen/AMDGPU/fold-imm-copy.mir b/llvm/test/CodeGen/AMDGPU/fold-imm-copy.mir index f2d423a707851..b2521bb745005 100644 --- a/llvm/test/CodeGen/AMDGPU/fold-imm-copy.mir +++ b/llvm/test/CodeGen/AMDGPU/fold-imm-copy.mir @@ -12,7 +12,7 @@ body: | liveins: $vgpr0, $sgpr0_sgpr1 %0:vgpr_32 = COPY $vgpr0 %1:sgpr_64 = COPY $sgpr0_sgpr1 - %2:sreg_128 = S_LOAD_DWORDX4_IMM %1, 9, 0, 0 + %2:sgpr_128 = S_LOAD_DWORDX4_IMM %1, 9, 0, 0 %3:sreg_32_xm0 = S_MOV_B32 2 %4:vgpr_32 = V_LSHLREV_B32_e64 killed %3, %0, implicit $exec %5:vgpr_32 = V_MOV_B32_e32 0, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/fold-imm-f16-f32.mir b/llvm/test/CodeGen/AMDGPU/fold-imm-f16-f32.mir index 1e596b79016ab..e76f1be6c485b 100644 --- a/llvm/test/CodeGen/AMDGPU/fold-imm-f16-f32.mir +++ b/llvm/test/CodeGen/AMDGPU/fold-imm-f16-f32.mir @@ -131,7 +131,7 @@ registers: - { id: 7, class: sreg_32 } - { id: 8, class: sreg_32 } - { id: 9, class: sreg_32 } - - { id: 10, class: sreg_128 } + - { id: 10, class: sgpr_128 } - { id: 11, class: vgpr_32 } - { id: 12, class: vgpr_32 } - { id: 13, class: vgpr_32 } @@ -193,7 +193,7 @@ registers: - { id: 7, class: sreg_32 } - { id: 8, class: sreg_32 } - { id: 9, class: sreg_32 } - - { id: 10, class: sreg_128 } + - { id: 10, class: sgpr_128 } - { id: 11, class: vgpr_32 } - { id: 12, class: vgpr_32 } - { id: 13, class: vgpr_32 } @@ -259,7 +259,7 @@ registers: - { id: 7, class: sreg_32 } - { id: 8, class: sreg_32 } - { id: 9, class: sreg_32 } - - { id: 10, class: sreg_128 } + - { id: 10, class: sgpr_128 } - { id: 11, class: vgpr_32 } - { id: 12, class: vgpr_32 } - { id: 13, class: vgpr_32 } @@ -329,7 +329,7 @@ registers: - { id: 7, class: sreg_32 } - { id: 8, class: sreg_32 } - { id: 9, class: sreg_32 } - - { id: 10, class: sreg_128 } + - { id: 10, class: sgpr_128 } - { id: 11, class: vgpr_32 } - { id: 12, class: vgpr_32 } - { id: 13, class: vgpr_32 } @@ -398,7 +398,7 @@ registers: - { id: 7, class: sreg_32 } - { id: 8, class: sreg_32 } - { id: 9, class: sreg_32 } - - { id: 10, class: sreg_128 } + - { id: 10, class: sgpr_128 } - { id: 11, class: vgpr_32 } - { id: 12, class: vgpr_32 } - { id: 13, class: vgpr_32 } @@ -463,7 +463,7 @@ registers: - { id: 7, class: sreg_32 } - { id: 8, class: sreg_32 } - { id: 9, class: sreg_32 } - - { id: 10, class: sreg_128 } + - { id: 10, class: sgpr_128 } - { id: 11, class: vgpr_32 } - { id: 12, class: vgpr_32 } - { id: 13, class: vgpr_32 } @@ -535,7 +535,7 @@ registers: - { id: 7, class: sreg_32 } - { id: 8, class: sreg_32 } - { id: 9, class: sreg_32 } - - { id: 10, class: sreg_128 } + - { id: 10, class: sgpr_128 } - { id: 11, class: vgpr_32 } - { id: 12, class: vgpr_32 } - { id: 13, class: vgpr_32 } @@ -602,7 +602,7 @@ registers: - { id: 7, class: sreg_32 } - { id: 8, class: sreg_32 } - { id: 9, class: sreg_32 } - - { id: 10, class: sreg_128 } + - { id: 10, class: sgpr_128 } - { id: 11, class: vgpr_32 } - { id: 12, class: vgpr_32 } - { id: 13, class: vgpr_32 } @@ -668,7 +668,7 @@ registers: - { id: 7, class: sreg_32 } - { id: 8, class: sreg_32 } - { id: 9, class: sreg_32 } - - { id: 10, class: sreg_128 } + - { id: 10, class: sgpr_128 } - { id: 11, class: vgpr_32 } - { id: 12, class: vgpr_32 } - { id: 13, class: vgpr_32 } diff --git a/llvm/test/CodeGen/AMDGPU/fold-multiple.mir b/llvm/test/CodeGen/AMDGPU/fold-multiple.mir index d8c396c9d4a4f..1134b9a84302d 100644 --- a/llvm/test/CodeGen/AMDGPU/fold-multiple.mir +++ b/llvm/test/CodeGen/AMDGPU/fold-multiple.mir @@ -25,7 +25,7 @@ registers: - { id: 2, class: vgpr_32 } - { id: 3, class: sreg_32 } - { id: 4, class: vgpr_32 } - - { id: 5, class: sreg_128 } + - { id: 5, class: sgpr_128 } body: | bb.0 (%ir-block.0): %0 = IMPLICIT_DEF diff --git a/llvm/test/CodeGen/AMDGPU/global-load-store-atomics.mir b/llvm/test/CodeGen/AMDGPU/global-load-store-atomics.mir index 5aa9b41cf57e2..9b6b086c5c42f 100644 --- a/llvm/test/CodeGen/AMDGPU/global-load-store-atomics.mir +++ b/llvm/test/CodeGen/AMDGPU/global-load-store-atomics.mir @@ -238,7 +238,7 @@ body: | GLOBAL_STORE_DWORDX2 %11, %77, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) GLOBAL_ATOMIC_UMAX_X2 %11, %16, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1) - %79:sreg_128 = REG_SEQUENCE %4, %subreg.sub0, %4, %subreg.sub1, %4, %subreg.sub2, %4, %subreg.sub3 + %79:sgpr_128 = REG_SEQUENCE %4, %subreg.sub0, %4, %subreg.sub1, %4, %subreg.sub2, %4, %subreg.sub3 %80:vreg_128 = COPY %79 %78:vreg_64 = GLOBAL_ATOMIC_CMPSWAP_X2_RTN %11, %80, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1) diff --git a/llvm/test/CodeGen/AMDGPU/memory_clause.mir b/llvm/test/CodeGen/AMDGPU/memory_clause.mir index b46cfb16b7ba5..1a8d695c31bef 100644 --- a/llvm/test/CodeGen/AMDGPU/memory_clause.mir +++ b/llvm/test/CodeGen/AMDGPU/memory_clause.mir @@ -304,10 +304,10 @@ body: | ... # GCN-LABEL: {{^}}name: image_clause{{$}} -# GCN: early-clobber %4:vreg_128, early-clobber %3:vreg_128, early-clobber %5:vreg_128 = BUNDLE %0, undef %2:sreg_128, %1, implicit $exec { -# GCN-NEXT: %3:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, undef %2:sreg_128, 15, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec -# GCN-NEXT: %4:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, undef %2:sreg_128, 15, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec -# GCN-NEXT: %5:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, undef %2:sreg_128, 15, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec +# GCN: early-clobber %4:vreg_128, early-clobber %3:vreg_128, early-clobber %5:vreg_128 = BUNDLE %0, undef %2:sgpr_128, %1, implicit $exec { +# GCN-NEXT: %3:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, undef %2:sgpr_128, 15, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec +# GCN-NEXT: %4:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, undef %2:sgpr_128, 15, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec +# GCN-NEXT: %5:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, undef %2:sgpr_128, 15, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec # GCN-NEXT: } # GCN-NEXT: IMAGE_STORE_V4_V2 %3, %0, %1, 15, -1, 0, 0, 0, 0, 0, 0, 0, implicit $exec @@ -317,7 +317,7 @@ tracksRegLiveness: true registers: - { id: 0, class: vreg_64 } - { id: 1, class: sreg_256 } - - { id: 2, class: sreg_128 } + - { id: 2, class: sgpr_128 } - { id: 3, class: vreg_128 } - { id: 4, class: vreg_128 } - { id: 5, class: vreg_128 } @@ -325,9 +325,9 @@ body: | bb.0: %0 = IMPLICIT_DEF %1 = IMPLICIT_DEF - %3:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, undef %2:sreg_128, 15, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec - %4:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, undef %2:sreg_128, 15, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec - %5:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, undef %2:sreg_128, 15, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec + %3:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, undef %2:sgpr_128, 15, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec + %4:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, undef %2:sgpr_128, 15, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec + %5:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, undef %2:sgpr_128, 15, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec IMAGE_STORE_V4_V2 %3, %0, %1, 15, -1, 0, 0, 0, 0, 0, 0, 0, implicit $exec IMAGE_STORE_V4_V2 %4, %0, %1, 15, -1, 0, 0, 0, 0, 0, 0, 0, implicit $exec IMAGE_STORE_V4_V2 %5, %0, %1, 15, -1, 0, 0, 0, 0, 0, 0, 0, implicit $exec @@ -346,7 +346,7 @@ tracksRegLiveness: true registers: - { id: 0, class: vreg_64 } - { id: 1, class: sreg_256 } - - { id: 2, class: sreg_128 } + - { id: 2, class: sgpr_128 } - { id: 3, class: vreg_128 } - { id: 4, class: vreg_128 } - { id: 5, class: vgpr_32 } diff --git a/llvm/test/CodeGen/AMDGPU/merge-load-store.mir b/llvm/test/CodeGen/AMDGPU/merge-load-store.mir index 6bff48467b594..14fd6201a14dd 100644 --- a/llvm/test/CodeGen/AMDGPU/merge-load-store.mir +++ b/llvm/test/CodeGen/AMDGPU/merge-load-store.mir @@ -154,7 +154,7 @@ body: | %6:sreg_32_xm0_xexec = S_MOV_B32 0 %7:sreg_32_xm0 = S_MOV_B32 0 %8:sreg_64_xexec = REG_SEQUENCE killed %6, %subreg.sub0, %7, %subreg.sub1 - %9:sreg_128 = S_LOAD_DWORDX4_IMM killed %8, 0, 0, 0 :: (invariant load 16, addrspace 6) + %9:sgpr_128 = S_LOAD_DWORDX4_IMM killed %8, 0, 0, 0 :: (invariant load 16, addrspace 6) %31:sreg_64_xexec = S_BUFFER_LOAD_DWORDX2_IMM %9, 0, 0, 0 :: (dereferenceable invariant load 4) %10:sreg_32_xm0_xexec = COPY %31.sub0 %11:sreg_32_xm0_xexec = COPY killed %31.sub1 @@ -179,7 +179,7 @@ body: | bb.0: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 - %0:sreg_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0, 0, 0, 0 :: (dereferenceable invariant load 4) %2:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0, 1, 0, 0 :: (dereferenceable invariant load 4) %3:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4) diff --git a/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.mir b/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.mir index ccff6bb51275e..b7a757832a107 100644 --- a/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.mir +++ b/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.mir @@ -20,7 +20,7 @@ # W64: [[SRSRC1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[VRSRC]].sub1, implicit $exec # W64: [[SRSRC2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[VRSRC]].sub2, implicit $exec # W64: [[SRSRC3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[VRSRC]].sub3, implicit $exec -# W64: [[SRSRC:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[SRSRC0]], %subreg.sub0, [[SRSRC1]], %subreg.sub1, [[SRSRC2]], %subreg.sub2, [[SRSRC3]], %subreg.sub3 +# W64: [[SRSRC:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[SRSRC0]], %subreg.sub0, [[SRSRC1]], %subreg.sub1, [[SRSRC2]], %subreg.sub2, [[SRSRC3]], %subreg.sub3 # W64: [[CMP0:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[SRSRC]].sub0_sub1, [[VRSRC]].sub0_sub1, implicit $exec # W64: [[CMP1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[SRSRC]].sub2_sub3, [[VRSRC]].sub2_sub3, implicit $exec # W64: [[CMP:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[CMP0]], [[CMP1]], implicit-def $scc @@ -42,7 +42,7 @@ # W32: [[SRSRC1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[VRSRC]].sub1, implicit $exec # W32: [[SRSRC2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[VRSRC]].sub2, implicit $exec # W32: [[SRSRC3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[VRSRC]].sub3, implicit $exec -# W32: [[SRSRC:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[SRSRC0]], %subreg.sub0, [[SRSRC1]], %subreg.sub1, [[SRSRC2]], %subreg.sub2, [[SRSRC3]], %subreg.sub3 +# W32: [[SRSRC:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[SRSRC0]], %subreg.sub0, [[SRSRC1]], %subreg.sub1, [[SRSRC2]], %subreg.sub2, [[SRSRC3]], %subreg.sub3 # W32: [[CMP0:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[SRSRC]].sub0_sub1, [[VRSRC]].sub0_sub1, implicit $exec # W32: [[CMP1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[SRSRC]].sub2_sub3, [[VRSRC]].sub2_sub3, implicit $exec # W32: [[CMP:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[CMP0]], [[CMP1]], implicit-def $scc @@ -71,7 +71,7 @@ body: | %2:vgpr_32 = COPY $vgpr2 %1:vgpr_32 = COPY $vgpr1 %0:vgpr_32 = COPY $vgpr0 - %6:sreg_128 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1, %2, %subreg.sub2, %3, %subreg.sub3 + %6:sgpr_128 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1, %2, %subreg.sub2, %3, %subreg.sub3 %7:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN %4, killed %6, 0, 0, 0, 0, 0, 0, 0, implicit $exec $sgpr30_sgpr31 = COPY %5 $vgpr0 = COPY %7 @@ -89,7 +89,7 @@ body: | # W64: [[SRSRC1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[VRSRC]].sub1, implicit $exec # W64: [[SRSRC2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[VRSRC]].sub2, implicit $exec # W64: [[SRSRC3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[VRSRC]].sub3, implicit $exec -# W64: [[SRSRC:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[SRSRC0]], %subreg.sub0, [[SRSRC1]], %subreg.sub1, [[SRSRC2]], %subreg.sub2, [[SRSRC3]], %subreg.sub3 +# W64: [[SRSRC:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[SRSRC0]], %subreg.sub0, [[SRSRC1]], %subreg.sub1, [[SRSRC2]], %subreg.sub2, [[SRSRC3]], %subreg.sub3 # W64: [[CMP0:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[SRSRC]].sub0_sub1, [[VRSRC]].sub0_sub1, implicit $exec # W64: [[CMP1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[SRSRC]].sub2_sub3, [[VRSRC]].sub2_sub3, implicit $exec # W64: [[CMP:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[CMP0]], [[CMP1]], implicit-def $scc @@ -111,7 +111,7 @@ body: | # W32: [[SRSRC1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[VRSRC]].sub1, implicit $exec # W32: [[SRSRC2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[VRSRC]].sub2, implicit $exec # W32: [[SRSRC3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[VRSRC]].sub3, implicit $exec -# W32: [[SRSRC:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[SRSRC0]], %subreg.sub0, [[SRSRC1]], %subreg.sub1, [[SRSRC2]], %subreg.sub2, [[SRSRC3]], %subreg.sub3 +# W32: [[SRSRC:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[SRSRC0]], %subreg.sub0, [[SRSRC1]], %subreg.sub1, [[SRSRC2]], %subreg.sub2, [[SRSRC3]], %subreg.sub3 # W32: [[CMP0:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[SRSRC]].sub0_sub1, [[VRSRC]].sub0_sub1, implicit $exec # W32: [[CMP1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[SRSRC]].sub2_sub3, [[VRSRC]].sub2_sub3, implicit $exec # W32: [[CMP:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[CMP0]], [[CMP1]], implicit-def $scc @@ -140,7 +140,7 @@ body: | %2:vgpr_32 = COPY $vgpr2 %1:vgpr_32 = COPY $vgpr1 %0:vgpr_32 = COPY $vgpr0 - %6:sreg_128 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1, %2, %subreg.sub2, %3, %subreg.sub3 + %6:sgpr_128 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1, %2, %subreg.sub2, %3, %subreg.sub3 %7:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFEN %4, killed %6, 0, 0, 0, 0, 0, 0, 0, implicit $exec $sgpr30_sgpr31 = COPY %5 $vgpr0 = COPY %7 @@ -158,7 +158,7 @@ body: | # W64: [[SRSRC1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[VRSRC]].sub1, implicit $exec # W64: [[SRSRC2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[VRSRC]].sub2, implicit $exec # W64: [[SRSRC3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[VRSRC]].sub3, implicit $exec -# W64: [[SRSRC:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[SRSRC0]], %subreg.sub0, [[SRSRC1]], %subreg.sub1, [[SRSRC2]], %subreg.sub2, [[SRSRC3]], %subreg.sub3 +# W64: [[SRSRC:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[SRSRC0]], %subreg.sub0, [[SRSRC1]], %subreg.sub1, [[SRSRC2]], %subreg.sub2, [[SRSRC3]], %subreg.sub3 # W64: [[CMP0:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[SRSRC]].sub0_sub1, [[VRSRC]].sub0_sub1, implicit $exec # W64: [[CMP1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[SRSRC]].sub2_sub3, [[VRSRC]].sub2_sub3, implicit $exec # W64: [[CMP:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[CMP0]], [[CMP1]], implicit-def $scc @@ -180,7 +180,7 @@ body: | # W32: [[SRSRC1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[VRSRC]].sub1, implicit $exec # W32: [[SRSRC2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[VRSRC]].sub2, implicit $exec # W32: [[SRSRC3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[VRSRC]].sub3, implicit $exec -# W32: [[SRSRC:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[SRSRC0]], %subreg.sub0, [[SRSRC1]], %subreg.sub1, [[SRSRC2]], %subreg.sub2, [[SRSRC3]], %subreg.sub3 +# W32: [[SRSRC:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[SRSRC0]], %subreg.sub0, [[SRSRC1]], %subreg.sub1, [[SRSRC2]], %subreg.sub2, [[SRSRC3]], %subreg.sub3 # W32: [[CMP0:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[SRSRC]].sub0_sub1, [[VRSRC]].sub0_sub1, implicit $exec # W32: [[CMP1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[SRSRC]].sub2_sub3, [[VRSRC]].sub2_sub3, implicit $exec # W32: [[CMP:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[CMP0]], [[CMP1]], implicit-def $scc @@ -209,7 +209,7 @@ body: | %2:vgpr_32 = COPY $vgpr2 %1:vgpr_32 = COPY $vgpr1 %0:vgpr_32 = COPY $vgpr0 - %6:sreg_128 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1, %2, %subreg.sub2, %3, %subreg.sub3 + %6:sgpr_128 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1, %2, %subreg.sub2, %3, %subreg.sub3 %7:vgpr_32 = BUFFER_LOAD_FORMAT_X_BOTHEN %4, killed %6, 0, 0, 0, 0, 0, 0, 0, implicit $exec $sgpr30_sgpr31 = COPY %5 $vgpr0 = COPY %7 @@ -222,7 +222,7 @@ body: | # ADDR64: %15:sreg_64 = S_MOV_B64 0 # ADDR64: %16:sgpr_32 = S_MOV_B32 0 # ADDR64: %17:sgpr_32 = S_MOV_B32 61440 -# ADDR64: %18:sreg_128 = REG_SEQUENCE %15, %subreg.sub0_sub1, %16, %subreg.sub2, %17, %subreg.sub3 +# ADDR64: %18:sgpr_128 = REG_SEQUENCE %15, %subreg.sub0_sub1, %16, %subreg.sub2, %17, %subreg.sub3 # ADDR64: %9:vgpr_32, %12:sreg_64_xexec = V_ADD_I32_e64 %14.sub0, %4.sub0, 0, implicit $exec # ADDR64: %10:vgpr_32, dead %13:sreg_64_xexec = V_ADDC_U32_e64 %14.sub1, %4.sub1, killed %12, 0, implicit $exec # ADDR64: %11:vreg_64 = REG_SEQUENCE %9, %subreg.sub0, %10, %subreg.sub1 @@ -245,7 +245,7 @@ body: | %2:vgpr_32 = COPY $vgpr2 %1:vgpr_32 = COPY $vgpr1 %0:vgpr_32 = COPY $vgpr0 - %6:sreg_128 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1, %2, %subreg.sub2, %3, %subreg.sub3 + %6:sgpr_128 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1, %2, %subreg.sub2, %3, %subreg.sub3 %7:vgpr_32 = BUFFER_LOAD_FORMAT_X_ADDR64 %4, killed %6, 0, 0, 0, 0, 0, 0, 0, implicit $exec $sgpr30_sgpr31 = COPY %5 $vgpr0 = COPY %7 @@ -264,7 +264,7 @@ body: | # W64-NO-ADDR64: [[SRSRC1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[VRSRC]].sub1, implicit $exec # W64-NO-ADDR64: [[SRSRC2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[VRSRC]].sub2, implicit $exec # W64-NO-ADDR64: [[SRSRC3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[VRSRC]].sub3, implicit $exec -# W64-NO-ADDR64: [[SRSRC:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[SRSRC0]], %subreg.sub0, [[SRSRC1]], %subreg.sub1, [[SRSRC2]], %subreg.sub2, [[SRSRC3]], %subreg.sub3 +# W64-NO-ADDR64: [[SRSRC:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[SRSRC0]], %subreg.sub0, [[SRSRC1]], %subreg.sub1, [[SRSRC2]], %subreg.sub2, [[SRSRC3]], %subreg.sub3 # W64-NO-ADDR64: [[CMP0:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[SRSRC]].sub0_sub1, [[VRSRC]].sub0_sub1, implicit $exec # W64-NO-ADDR64: [[CMP1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[SRSRC]].sub2_sub3, [[VRSRC]].sub2_sub3, implicit $exec # W64-NO-ADDR64: [[CMP:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[CMP0]], [[CMP1]], implicit-def $scc @@ -284,7 +284,7 @@ body: | # W32: [[SRSRC1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[VRSRC]].sub1, implicit $exec # W32: [[SRSRC2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[VRSRC]].sub2, implicit $exec # W32: [[SRSRC3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[VRSRC]].sub3, implicit $exec -# W32: [[SRSRC:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[SRSRC0]], %subreg.sub0, [[SRSRC1]], %subreg.sub1, [[SRSRC2]], %subreg.sub2, [[SRSRC3]], %subreg.sub3 +# W32: [[SRSRC:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[SRSRC0]], %subreg.sub0, [[SRSRC1]], %subreg.sub1, [[SRSRC2]], %subreg.sub2, [[SRSRC3]], %subreg.sub3 # W32: [[CMP0:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[SRSRC]].sub0_sub1, [[VRSRC]].sub0_sub1, implicit $exec # W32: [[CMP1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[SRSRC]].sub2_sub3, [[VRSRC]].sub2_sub3, implicit $exec # W32: [[CMP:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[CMP0]], [[CMP1]], implicit-def $scc @@ -301,7 +301,7 @@ body: | # ADDR64: [[ZERO64:%[0-9]+]]:sreg_64 = S_MOV_B64 0 # ADDR64: [[RSRCFMTLO:%[0-9]+]]:sgpr_32 = S_MOV_B32 0 # ADDR64: [[RSRCFMTHI:%[0-9]+]]:sgpr_32 = S_MOV_B32 61440 -# ADDR64: [[ZERORSRC:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[ZERO64]], %subreg.sub0_sub1, [[RSRCFMTLO]], %subreg.sub2, [[RSRCFMTHI]], %subreg.sub3 +# ADDR64: [[ZERORSRC:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[ZERO64]], %subreg.sub0_sub1, [[RSRCFMTLO]], %subreg.sub2, [[RSRCFMTHI]], %subreg.sub3 # ADDR64: [[VADDR64:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[RSRCPTR]].sub0, %subreg.sub0, [[RSRCPTR]].sub1, %subreg.sub1 # ADDR64: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_ADDR64 [[VADDR64]], [[ZERORSRC]], 0, 0, 0, 0, 0, 0, 0, implicit $exec @@ -323,7 +323,7 @@ body: | %2:vgpr_32 = COPY $vgpr2 %1:vgpr_32 = COPY $vgpr1 %0:vgpr_32 = COPY $vgpr0 - %6:sreg_128 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1, %2, %subreg.sub2, %3, %subreg.sub3 + %6:sgpr_128 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1, %2, %subreg.sub2, %3, %subreg.sub3 %7:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFSET killed %6, 0, 0, 0, 0, 0, 0, 0, implicit $exec $sgpr30_sgpr31 = COPY %5 $vgpr0 = COPY %7 diff --git a/llvm/test/CodeGen/AMDGPU/optimize-negated-cond-exec-masking.mir b/llvm/test/CodeGen/AMDGPU/optimize-negated-cond-exec-masking.mir index 4986f5153b6ea..c70474bf8c390 100644 --- a/llvm/test/CodeGen/AMDGPU/optimize-negated-cond-exec-masking.mir +++ b/llvm/test/CodeGen/AMDGPU/optimize-negated-cond-exec-masking.mir @@ -465,17 +465,17 @@ body: | ... # GCN: name: negated_cond_subreg -# GCN: %0.sub0_sub1:sreg_128 = IMPLICIT_DEF +# GCN: %0.sub0_sub1:sgpr_128 = IMPLICIT_DEF # GCN-NEXT: $vcc = S_ANDN2_B64 $exec, %0.sub0_sub1, implicit-def $scc # GCN-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc --- name: negated_cond_subreg body: | bb.0: - %0.sub0_sub1:sreg_128 = IMPLICIT_DEF + %0.sub0_sub1:sgpr_128 = IMPLICIT_DEF %1:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %0.sub0_sub1, implicit $exec - %2.sub0_sub1:sreg_128 = V_CMP_NE_U32_e64 %1, 1, implicit $exec - $vcc = S_AND_B64 $exec, killed %2.sub0_sub1:sreg_128, implicit-def dead $scc + %2.sub0_sub1:sgpr_128 = V_CMP_NE_U32_e64 %1, 1, implicit $exec + $vcc = S_AND_B64 $exec, killed %2.sub0_sub1:sgpr_128, implicit-def dead $scc S_CBRANCH_VCCNZ %bb.2, implicit killed $vcc S_BRANCH %bb.1 diff --git a/llvm/test/CodeGen/AMDGPU/phi-elimination-end-cf.mir b/llvm/test/CodeGen/AMDGPU/phi-elimination-end-cf.mir index 807029a92f348..8944ef86f6209 100644 --- a/llvm/test/CodeGen/AMDGPU/phi-elimination-end-cf.mir +++ b/llvm/test/CodeGen/AMDGPU/phi-elimination-end-cf.mir @@ -45,7 +45,7 @@ body: | %4:vgpr_32 = PHI %19, %bb.3, %3, %bb.2, %18, %bb.0 %15:sreg_32_xm0 = S_MOV_B32 61440 %16:sreg_32_xm0 = S_MOV_B32 -1 - %17:sreg_128 = REG_SEQUENCE undef %14:sreg_32_xm0, %subreg.sub0, undef %12:sreg_32_xm0, %subreg.sub1, %16, %subreg.sub2, %15, %subreg.sub3 + %17:sgpr_128 = REG_SEQUENCE undef %14:sreg_32_xm0, %subreg.sub0, undef %12:sreg_32_xm0, %subreg.sub1, %16, %subreg.sub2, %15, %subreg.sub3 BUFFER_STORE_DWORD_OFFSET %4, %17, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) %19:vgpr_32 = COPY %4 %20:sreg_64 = SI_IF %0, %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.mir b/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.mir index cf95723ac9a85..aa4bdfe238d68 100644 --- a/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.mir +++ b/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.mir @@ -9,7 +9,7 @@ body: | bb.0.entry: %0:sgpr_64 = COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 - %3:sreg_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %4:sreg_32_xm0 = COPY $sgpr101 %5:sreg_32_xm0 = S_MOV_B32 0 $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %3 @@ -62,7 +62,7 @@ body: | bb.0.entry: %0:sgpr_64 = COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 - %3:sreg_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %4:sreg_32_xm0 = COPY $sgpr101 %5:sreg_32_xm0 = S_MOV_B32 0 $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %3 @@ -115,7 +115,7 @@ body: | bb.0.entry: %0:sgpr_64 = COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 - %3:sreg_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %4:sreg_32_xm0 = COPY $sgpr101 %5:sreg_32_xm0 = S_MOV_B32 0 $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %3 @@ -160,7 +160,7 @@ body: | bb.0.entry: %0:sgpr_64 = COPY $sgpr0_sgpr1 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 - %3:sreg_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %4:sreg_32_xm0 = COPY $sgpr101 %5:sreg_32_xm0 = S_MOV_B32 0 $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %3 diff --git a/llvm/test/CodeGen/AMDGPU/regbank-reassign.mir b/llvm/test/CodeGen/AMDGPU/regbank-reassign.mir index 450439fbdc61d..6a7402574dab2 100644 --- a/llvm/test/CodeGen/AMDGPU/regbank-reassign.mir +++ b/llvm/test/CodeGen/AMDGPU/regbank-reassign.mir @@ -325,7 +325,7 @@ body: | name: smem_bundle tracksRegLiveness: true registers: - - { id: 0, class: sreg_128, preferred-register: '$sgpr0_sgpr1_sgpr2_sgpr3' } + - { id: 0, class: sgpr_128, preferred-register: '$sgpr0_sgpr1_sgpr2_sgpr3' } - { id: 1, class: sreg_32_xm0_xexec, preferred-register: '$sgpr16' } - { id: 2, class: sreg_32_xm0_xexec, preferred-register: '$sgpr17' } - { id: 3, class: sreg_32_xm0_xexec, preferred-register: '$sgpr4' } diff --git a/llvm/test/CodeGen/AMDGPU/regcoal-subrange-join-seg.mir b/llvm/test/CodeGen/AMDGPU/regcoal-subrange-join-seg.mir index 1d9ab685c5320..9b1bb7f2fb7e5 100644 --- a/llvm/test/CodeGen/AMDGPU/regcoal-subrange-join-seg.mir +++ b/llvm/test/CodeGen/AMDGPU/regcoal-subrange-join-seg.mir @@ -15,36 +15,36 @@ name: regcoal-subrange-join-seg tracksRegLiveness: true registers: - - { id: 0, class: sreg_128 } - - { id: 1, class: sreg_128 } - - { id: 2, class: sreg_128 } - - { id: 3, class: sreg_128 } - - { id: 4, class: sreg_128 } - - { id: 5, class: sreg_128 } - - { id: 6, class: sreg_128 } - - { id: 7, class: sreg_128 } - - { id: 8, class: sreg_128 } + - { id: 0, class: sgpr_128 } + - { id: 1, class: sgpr_128 } + - { id: 2, class: sgpr_128 } + - { id: 3, class: sgpr_128 } + - { id: 4, class: sgpr_128 } + - { id: 5, class: sgpr_128 } + - { id: 6, class: sgpr_128 } + - { id: 7, class: sgpr_128 } + - { id: 8, class: sgpr_128 } - { id: 9, class: sreg_32_xm0 } - { id: 10, class: sreg_32_xm0 } - { id: 11, class: vgpr_32 } - { id: 12, class: vgpr_32 } - { id: 13, class: vgpr_32 } - { id: 14, class: sreg_32_xm0_xexec } - - { id: 15, class: sreg_128 } + - { id: 15, class: sgpr_128 } - { id: 16, class: sreg_32 } - { id: 17, class: sreg_32_xm0 } - { id: 18, class: sreg_32_xm0 } - { id: 19, class: sreg_32_xm0 } - { id: 20, class: sreg_32_xm0 } - { id: 21, class: sreg_32_xm0_xexec } - - { id: 22, class: sreg_128 } + - { id: 22, class: sgpr_128 } - { id: 23, class: sreg_32_xm0 } - { id: 24, class: vgpr_32 } - { id: 25, class: sreg_64_xexec } - { id: 26, class: vgpr_32 } - { id: 27, class: sreg_32_xm0 } - { id: 28, class: sreg_32 } - - { id: 29, class: sreg_128 } + - { id: 29, class: sgpr_128 } - { id: 30, class: sreg_32_xm0 } - { id: 31, class: sreg_32_xm0 } - { id: 32, class: vgpr_32 } diff --git a/llvm/test/CodeGen/AMDGPU/regcoal-subrange-join.mir b/llvm/test/CodeGen/AMDGPU/regcoal-subrange-join.mir index 1c9099fca6603..ad56ba08583ef 100644 --- a/llvm/test/CodeGen/AMDGPU/regcoal-subrange-join.mir +++ b/llvm/test/CodeGen/AMDGPU/regcoal-subrange-join.mir @@ -44,7 +44,7 @@ registers: - { id: 22, class: sreg_32_xm0_xexec } - { id: 23, class: sreg_32_xm0 } - { id: 24, class: sreg_64_xexec } - - { id: 25, class: sreg_128 } + - { id: 25, class: sgpr_128 } - { id: 26, class: sreg_64_xexec } - { id: 27, class: sreg_32_xm0_xexec } - { id: 28, class: sreg_32_xm0 } @@ -57,7 +57,7 @@ registers: - { id: 35, class: vgpr_32 } - { id: 36, class: vgpr_32 } - { id: 37, class: vgpr_32 } - - { id: 38, class: sreg_128 } + - { id: 38, class: sgpr_128 } - { id: 39, class: sreg_64_xexec } - { id: 40, class: sreg_32_xm0_xexec } - { id: 41, class: sreg_32_xm0 } @@ -70,7 +70,7 @@ registers: - { id: 48, class: vgpr_32 } - { id: 49, class: vgpr_32 } - { id: 50, class: vgpr_32 } - - { id: 51, class: sreg_128 } + - { id: 51, class: sgpr_128 } - { id: 52, class: vgpr_32 } - { id: 53, class: vgpr_32 } - { id: 54, class: vgpr_32 } diff --git a/llvm/test/CodeGen/AMDGPU/regcoalescing-remove-partial-redundancy-assert.mir b/llvm/test/CodeGen/AMDGPU/regcoalescing-remove-partial-redundancy-assert.mir index 8c29dbc61e960..9693f61a45ff0 100644 --- a/llvm/test/CodeGen/AMDGPU/regcoalescing-remove-partial-redundancy-assert.mir +++ b/llvm/test/CodeGen/AMDGPU/regcoalescing-remove-partial-redundancy-assert.mir @@ -16,7 +16,7 @@ body: | %23:vgpr_32 = V_CVT_U32_F32_e32 killed %21, implicit $exec %108:vgpr_32 = V_LSHRREV_B32_e32 4, killed %23, implicit $exec undef %109.sub1:vreg_128 = COPY %108 - %28:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM undef %29:sreg_128, 3044, 0, 0 :: (dereferenceable invariant load 4) + %28:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM undef %29:sgpr_128, 3044, 0, 0 :: (dereferenceable invariant load 4) S_CMP_EQ_U32 killed %28, 0, implicit-def $scc S_CBRANCH_SCC0 %bb.2, implicit killed $scc @@ -47,7 +47,7 @@ body: | S_BRANCH %bb.6 bb.6: - %36:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM undef %37:sreg_128, 2708, 0, 0 :: (dereferenceable invariant load 4) + %36:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM undef %37:sgpr_128, 2708, 0, 0 :: (dereferenceable invariant load 4) %39:vgpr_32 = nnan arcp contract reassoc V_MAD_F32 0, killed %110.sub1, 0, target-flags(amdgpu-gotprel32-lo) 0, 0, 0, 0, 0, implicit $exec %40:vgpr_32 = V_MAD_F32 0, %111.sub1, 0, target-flags(amdgpu-gotprel32-lo) 0, 0, 0, 0, 0, implicit $exec %41:vgpr_32 = V_MUL_F32_e64 0, 0, 0, killed %40, 1, 0, implicit $exec @@ -83,7 +83,7 @@ body: | S_BRANCH %bb.8 bb.8: - dead %66:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM undef %67:sreg_128, 2704, 0, 0 :: (dereferenceable invariant load 4) + dead %66:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM undef %67:sgpr_128, 2704, 0, 0 :: (dereferenceable invariant load 4) %138:vreg_128 = COPY killed %111 bb.9: diff --git a/llvm/test/CodeGen/AMDGPU/rename-independent-subregs.mir b/llvm/test/CodeGen/AMDGPU/rename-independent-subregs.mir index 650d65f3163a4..789b155564556 100644 --- a/llvm/test/CodeGen/AMDGPU/rename-independent-subregs.mir +++ b/llvm/test/CodeGen/AMDGPU/rename-independent-subregs.mir @@ -18,7 +18,7 @@ # CHECK: S_NOP 0, implicit %0 name: test0 registers: - - { id: 0, class: sreg_128 } + - { id: 0, class: sgpr_128 } body: | bb.0: S_NOP 0, implicit-def undef %0.sub0 @@ -46,8 +46,8 @@ body: | # CHECK: S_NOP 0, implicit %2.sub name: test1 registers: - - { id: 0, class: sreg_128 } - - { id: 1, class: sreg_128 } + - { id: 0, class: sgpr_128 } + - { id: 1, class: sgpr_128 } body: | bb.0: S_NOP 0, implicit-def undef %0.sub2 diff --git a/llvm/test/CodeGen/AMDGPU/schedule-regpressure.mir b/llvm/test/CodeGen/AMDGPU/schedule-regpressure.mir index cd9a909ac7cd6..02db08899236c 100644 --- a/llvm/test/CodeGen/AMDGPU/schedule-regpressure.mir +++ b/llvm/test/CodeGen/AMDGPU/schedule-regpressure.mir @@ -17,7 +17,7 @@ regBankSelected: false selected: false tracksRegLiveness: true registers: - - { id: 0, class: sreg_128 } + - { id: 0, class: sgpr_128 } - { id: 1, class: sgpr_64 } - { id: 2, class: sreg_32_xm0 } - { id: 3, class: sgpr_32 } diff --git a/llvm/test/CodeGen/AMDGPU/spill-before-exec.mir b/llvm/test/CodeGen/AMDGPU/spill-before-exec.mir index 82b5cbae8b533..c56387918719e 100644 --- a/llvm/test/CodeGen/AMDGPU/spill-before-exec.mir +++ b/llvm/test/CodeGen/AMDGPU/spill-before-exec.mir @@ -14,28 +14,28 @@ machineFunctionInfo: stackPtrOffsetReg: $sgpr32 registers: - { id: 0, class: sreg_64 } - - { id: 1100, class: sreg_128 } - - { id: 1101, class: sreg_128 } - - { id: 1102, class: sreg_128 } - - { id: 1103, class: sreg_128 } - - { id: 1104, class: sreg_128 } - - { id: 1105, class: sreg_128 } - - { id: 1106, class: sreg_128 } - - { id: 1107, class: sreg_128 } - - { id: 1108, class: sreg_128 } - - { id: 1109, class: sreg_128 } - - { id: 1110, class: sreg_128 } - - { id: 1111, class: sreg_128 } - - { id: 1112, class: sreg_128 } - - { id: 1113, class: sreg_128 } - - { id: 1114, class: sreg_128 } - - { id: 1115, class: sreg_128 } - - { id: 1116, class: sreg_128 } - - { id: 1117, class: sreg_128 } - - { id: 1118, class: sreg_128 } - - { id: 1119, class: sreg_128 } - - { id: 1120, class: sreg_128 } - - { id: 1121, class: sreg_128 } + - { id: 1100, class: sgpr_128 } + - { id: 1101, class: sgpr_128 } + - { id: 1102, class: sgpr_128 } + - { id: 1103, class: sgpr_128 } + - { id: 1104, class: sgpr_128 } + - { id: 1105, class: sgpr_128 } + - { id: 1106, class: sgpr_128 } + - { id: 1107, class: sgpr_128 } + - { id: 1108, class: sgpr_128 } + - { id: 1109, class: sgpr_128 } + - { id: 1110, class: sgpr_128 } + - { id: 1111, class: sgpr_128 } + - { id: 1112, class: sgpr_128 } + - { id: 1113, class: sgpr_128 } + - { id: 1114, class: sgpr_128 } + - { id: 1115, class: sgpr_128 } + - { id: 1116, class: sgpr_128 } + - { id: 1117, class: sgpr_128 } + - { id: 1118, class: sgpr_128 } + - { id: 1119, class: sgpr_128 } + - { id: 1120, class: sgpr_128 } + - { id: 1121, class: sgpr_128 } body: | bb.0: successors: %bb.1 diff --git a/llvm/test/CodeGen/AMDGPU/splitkit.mir b/llvm/test/CodeGen/AMDGPU/splitkit.mir index 3f9aeacc40936..6f3aac5891f99 100644 --- a/llvm/test/CodeGen/AMDGPU/splitkit.mir +++ b/llvm/test/CodeGen/AMDGPU/splitkit.mir @@ -18,8 +18,8 @@ name: func0 body: | bb.0: - S_NOP 0, implicit-def undef %0.sub0 : sreg_128 - S_NOP 0, implicit-def %0.sub3 : sreg_128 + S_NOP 0, implicit-def undef %0.sub0 : sgpr_128 + S_NOP 0, implicit-def %0.sub3 : sgpr_128 ; Clobber registers S_NOP 0, implicit-def dead $sgpr0, implicit-def dead $sgpr1, implicit-def dead $sgpr2, implicit-def dead $sgpr3, implicit-def dead $sgpr4, implicit-def dead $sgpr5, implicit-def dead $sgpr6, implicit-def dead $sgpr7, implicit-def dead $sgpr8, implicit-def dead $sgpr9, implicit-def dead $sgpr10, implicit-def dead $sgpr11 @@ -49,7 +49,7 @@ tracksRegLiveness: true body: | bb.0: liveins: $sgpr0, $sgpr1, $sgpr2 - undef %0.sub0 : sreg_128 = COPY $sgpr0 + undef %0.sub0 : sgpr_128 = COPY $sgpr0 %0.sub2 = COPY $sgpr2 S_NOP 0, implicit-def dead $sgpr0, implicit-def dead $sgpr1 @@ -75,8 +75,8 @@ tracksRegLiveness: true body: | bb.0: successors: %bb.1, %bb.2 - S_NOP 0, implicit-def undef %0.sub0 : sreg_128 - S_NOP 0, implicit-def %0.sub3 : sreg_128 + S_NOP 0, implicit-def undef %0.sub0 : sgpr_128 + S_NOP 0, implicit-def %0.sub3 : sgpr_128 S_CBRANCH_VCCNZ %bb.1, implicit undef $vcc S_BRANCH %bb.2 diff --git a/llvm/test/CodeGen/AMDGPU/stack-pointer-offset-relative-frameindex.ll b/llvm/test/CodeGen/AMDGPU/stack-pointer-offset-relative-frameindex.ll index 7a2085fa36153..00ae166a6ce51 100644 --- a/llvm/test/CodeGen/AMDGPU/stack-pointer-offset-relative-frameindex.ll +++ b/llvm/test/CodeGen/AMDGPU/stack-pointer-offset-relative-frameindex.ll @@ -14,8 +14,8 @@ define amdgpu_kernel void @kernel_background_evaluate(float addrspace(5)* %kg, < ; GCN-NEXT: s_mov_b64 s[0:1], s[36:37] ; GCN-NEXT: v_mov_b32_e32 v1, 0x2000 ; GCN-NEXT: v_mov_b32_e32 v2, 0x4000 -; GCN-NEXT: s_mov_b64 s[2:3], s[38:39] ; GCN-NEXT: v_mov_b32_e32 v3, 0 +; GCN-NEXT: s_mov_b64 s[2:3], s[38:39] ; GCN-NEXT: v_mov_b32_e32 v4, 0x400000 ; GCN-NEXT: s_add_u32 s32, s33, 0xc0000 ; GCN-NEXT: v_add_nc_u32_e64 v32, 4, 0x4000 diff --git a/llvm/test/CodeGen/AMDGPU/subreg-split-live-in-error.mir b/llvm/test/CodeGen/AMDGPU/subreg-split-live-in-error.mir index f4932d6987cb5..05ddadad86bbb 100644 --- a/llvm/test/CodeGen/AMDGPU/subreg-split-live-in-error.mir +++ b/llvm/test/CodeGen/AMDGPU/subreg-split-live-in-error.mir @@ -98,7 +98,7 @@ body: | %11.sub5:sreg_256 = COPY %11.sub0 %11.sub6:sreg_256 = COPY %11.sub0 %11.sub7:sreg_256 = COPY %11.sub0 - %12:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %9, %11, undef %13:sreg_128, 15, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from constant-pool, addrspace 4) + %12:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %9, %11, undef %13:sgpr_128, 15, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from constant-pool, addrspace 4) %14:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec %15:vreg_128 = IMPLICIT_DEF S_CBRANCH_SCC1 %bb.8, implicit undef $scc @@ -164,12 +164,12 @@ body: | %18:vgpr_32 = V_MAD_F32 0, %10.sub0, 0, target-flags(amdgpu-gotprel) 1073741824, 0, -1082130432, 0, 0, implicit $exec %19:vgpr_32 = V_MAD_F32 0, %12.sub0, 0, target-flags(amdgpu-gotprel) 0, 0, 0, 0, 0, implicit $exec - %20:sreg_128 = S_BUFFER_LOAD_DWORDX4_IMM undef %21:sreg_128, 1040, 0, 0 :: (dereferenceable invariant load 16) + %20:sgpr_128 = S_BUFFER_LOAD_DWORDX4_IMM undef %21:sgpr_128, 1040, 0, 0 :: (dereferenceable invariant load 16) %22:vgpr_32 = V_ADD_F32_e32 0, %19, implicit $exec %23:vgpr_32 = V_MAD_F32 0, %18, 0, 0, 0, 0, 0, 0, implicit $exec %24:vgpr_32 = COPY %20.sub3 %25:vgpr_32 = V_MUL_F32_e64 0, target-flags(amdgpu-gotprel32-lo) 0, 0, %20.sub1, 0, 0, implicit $exec - %26:sreg_128 = S_BUFFER_LOAD_DWORDX4_IMM undef %27:sreg_128, 1056, 0, 0 :: (dereferenceable invariant load 16) + %26:sgpr_128 = S_BUFFER_LOAD_DWORDX4_IMM undef %27:sgpr_128, 1056, 0, 0 :: (dereferenceable invariant load 16) %28:vgpr_32 = V_MAD_F32 0, %18, 0, %26.sub0, 0, 0, 0, 0, implicit $exec %29:vgpr_32 = V_ADD_F32_e32 %28, %19, implicit $exec %30:vgpr_32 = V_RCP_F32_e32 %29, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/subreg_interference.mir b/llvm/test/CodeGen/AMDGPU/subreg_interference.mir index e8478a8e48790..abcff10234969 100644 --- a/llvm/test/CodeGen/AMDGPU/subreg_interference.mir +++ b/llvm/test/CodeGen/AMDGPU/subreg_interference.mir @@ -21,9 +21,9 @@ name: func0 body: | bb.0: - S_NOP 0, implicit-def undef %0.sub0 : sreg_128 + S_NOP 0, implicit-def undef %0.sub0 : sgpr_128 S_NOP 0, implicit-def %0.sub3 - S_NOP 0, implicit-def undef %1.sub1 : sreg_128 + S_NOP 0, implicit-def undef %1.sub1 : sgpr_128 S_NOP 0, implicit-def %1.sub2 diff --git a/llvm/test/CodeGen/AMDGPU/subvector-test.mir b/llvm/test/CodeGen/AMDGPU/subvector-test.mir index 1e742f6b2c763..508731a75e1b3 100644 --- a/llvm/test/CodeGen/AMDGPU/subvector-test.mir +++ b/llvm/test/CodeGen/AMDGPU/subvector-test.mir @@ -16,7 +16,7 @@ body: | successors: %bb.1, %bb.2 %1:sgpr_64 = COPY $sgpr0_sgpr1 - %4:sreg_128 = S_LOAD_DWORDX4_IMM %1, 36, 0, 0 + %4:sgpr_128 = S_LOAD_DWORDX4_IMM %1, 36, 0, 0 %11:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4.sub2_sub3, 0, 0, 0 undef %15.sub0:vreg_64 = COPY %4.sub0 %15.sub1:vreg_64 = COPY %4.sub1 diff --git a/llvm/test/CodeGen/ARM/ParallelDSP/unroll-n-jam-smlad.ll b/llvm/test/CodeGen/ARM/ParallelDSP/unroll-n-jam-smlad.ll index c72d458453798..16d0216df7e3b 100644 --- a/llvm/test/CodeGen/ARM/ParallelDSP/unroll-n-jam-smlad.ll +++ b/llvm/test/CodeGen/ARM/ParallelDSP/unroll-n-jam-smlad.ll @@ -45,7 +45,6 @@ entry: ; CHECK-REG-PRESSURE: ldr{{.*}}, [sp ; CHECK-REG-PRESSURE: ldr{{.*}}, [sp ; CHECK-REG-PRESSURE: ldr{{.*}}, [sp -; CHECK-REG-PRESSURE: ldr{{.*}}, [sp ; CHECK-REG-PRESSURE: bne .LBB0_1 for.body: diff --git a/llvm/test/CodeGen/ARM/ifcvt-size.mir b/llvm/test/CodeGen/ARM/ifcvt-size.mir new file mode 100644 index 0000000000000..a5c31cbab4ae7 --- /dev/null +++ b/llvm/test/CodeGen/ARM/ifcvt-size.mir @@ -0,0 +1,559 @@ +# RUN: llc %s -o - -run-pass=if-converter -debug-only=if-converter 2>%t| FileCheck %s +# RUN: FileCheck %s < %t --check-prefix=DEBUG +# REQUIRES: asserts + +# When optimising for size, we use a different set of heuristics for +# if-conversion, which take into account the size of the instructions, not the +# time taken to execute them. This is more complicated for Thumb, where it if +# also affected by selection of narrow branch instructions, insertion if IT +# instructions, and selection of the CB(N)Z instructions. + +--- | + target triple = "thumbv7-unknown-linux-gnueabi" + + define void @fn1() minsize { + entry: + unreachable + if.then: + unreachable + if.else: + unreachable + if.end: + unreachable + } + + define void @fn2() minsize { + entry: + unreachable + if.then: + unreachable + if.else: + unreachable + if.end: + unreachable + } + + define void @fn3() minsize { + entry: + unreachable + if.then: + unreachable + if.else: + unreachable + if.end: + unreachable + } + + define void @fn4() minsize "target-features"="-thumb-mode" { + entry: + unreachable + if.then: + unreachable + if.else: + unreachable + if.end: + unreachable + } + + define void @fn5() minsize { + entry: + unreachable + if.then: + unreachable + if.else: + unreachable + if.end: + unreachable + } + + define void @fn6() minsize { + entry: + unreachable + if.then: + unreachable + if.else: + unreachable + if2.then: + unreachable + if2.else: + unreachable + } + + define void @fn7() minsize "target-features"="-thumb-mode" { + entry: + unreachable + if.then: + unreachable + if.else: + unreachable + if.end: + unreachable + } + + define void @fn8() minsize { + entry: + unreachable + if.then: + unreachable + if.else: + unreachable + if.end: + unreachable + } + + define void @fn9() minsize { + entry: + unreachable + if.then: + unreachable + if.else: + unreachable + lab1: + unreachable + } +... +--- +name: fn1 +alignment: 1 +tracksRegLiveness: true + +# If-conversion is profitable here because it will remove two branches of 2 +# bytes each (assuming they can become narrow branches later), and will only +# add 2 bytes with the IT instruction. + +# CHECK-LABEL: name: fn1 +# CHECK: t2CMPri +# CHECK-NEXT: t2LDRi12 +# CHECK-NEXT: t2LDRi12 +# CHECK-NEXT: t2LDRi12 +# CHECK-NEXT: t2LDRSHi12 +# CHECK-NEXT: t2MOVi + +# DEBUG-LABEL: Ifcvt: function ({{[0-9]+}}) 'fn1' +# DEBUG: MeetIfcvtSizeLimit(BranchBytes=4, CommonBytes=0, NumPredicatedInstructions=4, ExtraPredicateBytes=2) + +body: | + bb.0.entry: + successors: %bb.1(0x40000000), %bb.2(0x40000000) + liveins: $r0, $r1, $r2, $r3 + + t2CMPri killed renamable $r2, 5, 14, $noreg, implicit-def $cpsr + t2Bcc %bb.2, 11, killed $cpsr + + bb.1.if.then: + successors: %bb.3(0x80000000) + liveins: $r0, $r3 + + renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg + t2B %bb.3, 14, $noreg + + bb.2.if.else: + successors: %bb.3(0x80000000) + liveins: $r1, $r3 + + renamable $r0 = t2LDRi12 killed renamable $r1, 0, 14, $noreg + renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg + renamable $r0 = t2LDRSHi12 killed renamable $r0, 0, 14, $noreg + + bb.3.if.end: + liveins: $r0, $r3 + + renamable $r1 = t2MOVi 0, 14, $noreg, $noreg + t2STRi12 killed renamable $r1, killed renamable $r3, 0, 14, $noreg + tBX_RET 14, $noreg, implicit $r0 + +--- +name: fn2 +alignment: 1 +tracksRegLiveness: true + +# If-conversion is not profitable here, because the 5 conditional instructions +# would require 2 IT instructions. + +# CHECK-LABEL: name: fn2 +# CHECK: t2CMPri +# CHECK-NEXT: t2Bcc + +# DEBUG-LABEL: Ifcvt: function ({{[0-9]+}}) 'fn2' +# DEBUG: MeetIfcvtSizeLimit(BranchBytes=4, CommonBytes=0, NumPredicatedInstructions=5, ExtraPredicateBytes=4) + +body: | + bb.0.entry: + successors: %bb.1(0x40000000), %bb.2(0x40000000) + liveins: $r0, $r1, $r2, $r3 + + t2CMPri killed renamable $r2, 5, 14, $noreg, implicit-def $cpsr + t2Bcc %bb.2, 11, killed $cpsr + + bb.1.if.then: + successors: %bb.3(0x80000000) + liveins: $r0, $r3 + + renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg + renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg + t2B %bb.3, 14, $noreg + + bb.2.if.else: + successors: %bb.3(0x80000000) + liveins: $r1, $r3 + + renamable $r0 = t2LDRi12 killed renamable $r1, 0, 14, $noreg + renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg + renamable $r0 = t2LDRSHi12 killed renamable $r0, 0, 14, $noreg + + bb.3.if.end: + liveins: $r0, $r3 + + renamable $r1 = t2MOVi 0, 14, $noreg, $noreg + t2STRi12 killed renamable $r1, killed renamable $r3, 0, 14, $noreg + tBX_RET 14, $noreg, implicit $r0 + +--- +name: fn3 +alignment: 1 +tracksRegLiveness: true + +# Here, the true and false blocks both end in a tBX_RET instruction. One of +# these will be removed, saving 2 bytes, and the remaining one isn't +# conditional, so doesn't push us over the limit of 4 instructions in an IT +# block. + +# CHECK-LABEL: name: fn3 +# CHECK: t2CMPri +# CHECK-NEXT: t2LDRi12 +# CHECK-NEXT: t2LDRi12 +# CHECK-NEXT: t2LDRi12 +# CHECK-NEXT: t2LDRSHi12 +# CHECK-NEXT: tBX_RET + +# DEBUG-LABEL: Ifcvt: function ({{[0-9]+}}) 'fn3' +# DEBUG: MeetIfcvtSizeLimit(BranchBytes=2, CommonBytes=2, NumPredicatedInstructions=4, ExtraPredicateBytes=2) + +body: | + bb.0.entry: + successors: %bb.1(0x40000000), %bb.2(0x40000000) + liveins: $r0, $r1, $r2, $r3 + + t2CMPri killed renamable $r2, 5, 14, $noreg, implicit-def $cpsr + t2Bcc %bb.2, 11, killed $cpsr + + bb.1.if.then: + liveins: $r0, $r3 + + renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg + tBX_RET 14, $noreg, implicit $r0 + + bb.2.if.else: + liveins: $r1, $r3 + + renamable $r0 = t2LDRi12 killed renamable $r1, 0, 14, $noreg + renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg + renamable $r0 = t2LDRSHi12 killed renamable $r0, 0, 14, $noreg + tBX_RET 14, $noreg, implicit $r0 + +--- +name: fn4 +alignment: 1 +tracksRegLiveness: true + +# This is the same as fn2, but compiled for ARM, which doesn't need IT +# instructions, so if-conversion is profitable. + +# CHECK-LABEL: name: fn4 +# CHECK: CMPri +# CHECK-NEXT: LDRi12 +# CHECK-NEXT: LDRi12 +# CHECK-NEXT: LDRSH +# CHECK-NEXT: LDRi12 +# CHECK-NEXT: LDRi12 +# CHECK-NEXT: MOVi + +# DEBUG-LABEL: Ifcvt: function ({{[0-9]+}}) 'fn4' +# DEBUG: MeetIfcvtSizeLimit(BranchBytes=8, CommonBytes=0, NumPredicatedInstructions=5, ExtraPredicateBytes=0) + +body: | + bb.0.entry: + successors: %bb.1(0x40000000), %bb.2(0x40000000) + liveins: $r0, $r1, $r2, $r3 + + CMPri killed renamable $r2, 5, 14, $noreg, implicit-def $cpsr + Bcc %bb.2, 11, killed $cpsr + + bb.1.if.then: + successors: %bb.3(0x80000000) + liveins: $r0, $r3 + + renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg + renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg + B %bb.3 + + bb.2.if.else: + successors: %bb.3(0x80000000) + liveins: $r1, $r3 + + renamable $r0 = LDRi12 killed renamable $r1, 0, 14, $noreg + renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg + renamable $r0 = LDRSH killed renamable $r0, $noreg, 0, 14, $noreg + + bb.3.if.end: + liveins: $r0, $r3 + + renamable $r1 = MOVi 0, 14, $noreg, $noreg + STRi12 killed renamable $r1, killed renamable $r3, 0, 14, $noreg + BX_RET 14, $noreg, implicit $r0 + +--- +name: fn5 +alignment: 1 +tracksRegLiveness: true + +# Here, the compare and conditional branch can be turned into a CBZ, so we +# don't want to if-convert. + +# CHECK-LABEL: name: fn5 +# CHECK: t2CMPri +# CHECK: t2Bcc + +# DEBUG-LABEL: Ifcvt: function ({{[0-9]+}}) 'fn5' +# DEBUG: MeetIfcvtSizeLimit(BranchBytes=0, CommonBytes=2, NumPredicatedInstructions=4, ExtraPredicateBytes=2) + +body: | + bb.0.entry: + successors: %bb.1(0x30000000), %bb.2(0x50000000) + liveins: $r0, $r1, $r2 + + t2CMPri killed renamable $r2, 0, 14, $noreg, implicit-def $cpsr + t2Bcc %bb.2, 1, killed $cpsr + + bb.1.if.then: + liveins: $r0 + + renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg + tBX_RET 14, $noreg, implicit $r0 + + bb.2.if.else: + liveins: $r1 + + renamable $r0 = t2LDRi12 killed renamable $r1, 0, 14, $noreg + renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg + renamable $r0 = t2LDRSHi12 killed renamable $r0, 0, 14, $noreg + tBX_RET 14, $noreg, implicit $r0 + +--- +name: fn6 +alignment: 1 +tracksRegLiveness: true + +# This is a forked-diamond pattern, we recognise that the conditional branches +# at the ends of the true and false blocks are the same, and can be shared. + +# CHECK-LABEL: name: fn6 +# CHECK: t2CMPri +# CHECK-NEXT: t2LDRSHi12 +# CHECK-NEXT: t2LDRi12 +# CHECK-NEXT: t2LDRi12 +# CHECK-NEXT: t2LDRi12 +# CHECK-NEXT: t2CMPri +# CHECK-NEXT: t2Bcc + +# DEBUG-LABEL: Ifcvt: function ({{[0-9]+}}) 'fn6' +# DEBUG: MeetIfcvtSizeLimit(BranchBytes=2, CommonBytes=12, NumPredicatedInstructions=4, ExtraPredicateBytes=2) + +body: | + bb.0.entry: + successors: %bb.1(0x30000000), %bb.2(0x50000000) + liveins: $r0, $r1, $r2, $r3 + + t2CMPri killed renamable $r2, 4, 14, $noreg, implicit-def $cpsr + t2Bcc %bb.2, 1, killed $cpsr + + bb.1.if.then: + successors: %bb.3(0x30000000), %bb.4(0x50000000) + liveins: $r0, $r3 + + renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg + renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg + renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg + t2CMPri renamable $r0, 0, 14, $noreg, implicit-def $cpsr + t2Bcc %bb.3.if2.then, 1, killed $cpsr + t2B %bb.4.if2.else, 14, $noreg + + bb.2.if.else: + successors: %bb.3(0x30000000), %bb.4(0x50000000) + liveins: $r0, $r1, $r3 + + renamable $r0 = t2LDRSHi12 killed renamable $r0, 0, 14, $noreg + t2CMPri renamable $r0, 0, 14, $noreg, implicit-def $cpsr + t2Bcc %bb.3.if2.then, 1, killed $cpsr + t2B %bb.4.if2.else, 14, $noreg + + bb.3.if2.then: + liveins: $r0, $r1, $r3 + + t2STRi12 killed renamable $r1, killed renamable $r3, 0, 14, $noreg + tBX_RET 14, $noreg, implicit $r0 + + bb.4.if2.else: + liveins: $r0 + + tBX_RET 14, $noreg, implicit $r0 + +--- +name: fn7 +alignment: 1 +tracksRegLiveness: true + +# When compiling for ARM, it would be good for code size to generate very long +# runs of conditional instructions, but we put an (arbitrary) limit on this to +# avoid generating code which is very bad for performance, and only saves a few +# bytes of code size. + +# CHECK-LABEL: name: fn7 +# CHECK: CMPri +# CHECK-NEXT: Bcc + +body: | + bb.0.entry: + successors: %bb.1(0x40000000), %bb.2(0x40000000) + liveins: $r0, $r1, $r2, $r3 + + CMPri killed renamable $r2, 5, 14, $noreg, implicit-def $cpsr + Bcc %bb.2, 11, killed $cpsr + + bb.1.if.then: + successors: %bb.3(0x80000000) + liveins: $r0, $r3 + + renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg + renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg + renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg + renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg + renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg + renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg + renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg + renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg + renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg + renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg + renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg + renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg + renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg + B %bb.3 + + bb.2.if.else: + successors: %bb.3(0x80000000) + liveins: $r1, $r3 + + renamable $r0 = LDRi12 killed renamable $r1, 0, 14, $noreg + renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg + renamable $r0 = LDRSH killed renamable $r0, $noreg, 0, 14, $noreg + + bb.3.if.end: + liveins: $r0, $r3 + + renamable $r1 = MOVi 0, 14, $noreg, $noreg + STRi12 killed renamable $r1, killed renamable $r3, 0, 14, $noreg + BX_RET 14, $noreg, implicit $r0 + +--- +name: fn8 +alignment: 1 +tracksRegLiveness: true + +# The first t2LDRi12 instruction in each branch is the same, so one copy of it +# will be removed, and it doesn't need to be predicated, keeping us under the 4 +# instruction IT block limit. + +# CHECK-LABEL: name: fn8 +# CHECK: t2CMPri +# CHECK-NEXT: t2LDRi12 +# CHECK-NEXT: t2LDRi12 +# CHECK-NEXT: t2LDRi12 +# CHECK-NEXT: t2LDRi12 +# CHECK-NEXT: t2LDRSHi12 +# CHECK-NEXT: t2MOVi + +# DEBUG-LABEL: Ifcvt: function ({{[0-9]+}}) 'fn8' +# DEBUG: MeetIfcvtSizeLimit(BranchBytes=4, CommonBytes=4, NumPredicatedInstructions=4, ExtraPredicateBytes=2) + +body: | + bb.0.entry: + successors: %bb.1(0x40000000), %bb.2(0x40000000) + liveins: $r0, $r1, $r2, $r3 + + t2CMPri killed renamable $r2, 5, 14, $noreg, implicit-def $cpsr + t2Bcc %bb.2, 11, killed $cpsr + + bb.1.if.then: + successors: %bb.3(0x80000000) + liveins: $r0, $r3 + + renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg + renamable $r0 = t2LDRi12 killed renamable $r0, 4, 14, $noreg + t2B %bb.3, 14, $noreg + + bb.2.if.else: + successors: %bb.3(0x80000000) + liveins: $r0, $r3 + + renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg + renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg + renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg + renamable $r0 = t2LDRSHi12 killed renamable $r0, 0, 14, $noreg + + bb.3.if.end: + liveins: $r0, $r3 + + renamable $r1 = t2MOVi 0, 14, $noreg, $noreg + t2STRi12 killed renamable $r1, killed renamable $r3, 0, 14, $noreg + tBX_RET 14, $noreg, implicit $r0 + +--- +name: fn9 +alignment: 2 +tracksRegLiveness: true + +# The INLINEASM_BR instructions aren't analyzable, but they are identical so we +# can still do diamond if-conversion. From a code-size POV, they are common +# instructions, so one will be removed, and they don't need an IT block slot. + +# CHECK-LABEL: name: fn9 +# CHECK: tCMPi8 +# CHECK-NEXT: tLDRi +# CHECK-NEXT: tLDRi +# CHECK-NEXT: tLDRi +# CHECK-NEXT: t2LDRSHi12 +# CHECK-NEXT: INLINEASM_BR + +# DEBUG-LABEL: Ifcvt: function ({{[0-9]+}}) 'fn9' +# DEBUG: MeetIfcvtSizeLimit(BranchBytes=2, CommonBytes=6, NumPredicatedInstructions=4, ExtraPredicateBytes=2) + +body: | + bb.0.entry: + successors: %bb.1(0x30000000), %bb.3(0x50000000) + liveins: $r0, $r1, $r2 + + tCMPi8 killed renamable $r2, 42, 14, $noreg, implicit-def $cpsr + t2Bcc %bb.3, 1, killed $cpsr + + bb.1.if.then: + successors: %bb.5(0x7fffffff) + liveins: $r0 + + renamable $r0 = tLDRi killed renamable $r0, 0, 14, $noreg + INLINEASM_BR &"b ${0:l}", 1, 13, blockaddress(@fn9, %ir-block.lab1) + + bb.3.if.else: + successors: %bb.5(0x7fffffff) + liveins: $r1 + + renamable $r0 = tLDRi killed renamable $r1, 0, 14, $noreg + renamable $r0 = tLDRi killed renamable $r0, 0, 14, $noreg + renamable $r0 = t2LDRSHi12 killed renamable $r0, 0, 14, $noreg + INLINEASM_BR &"b ${0:l}", 1, 13, blockaddress(@fn9, %ir-block.lab1) + + bb.5.lab1 (address-taken): + liveins: $r0 + + renamable $r0, dead $cpsr = nsw tADDi8 killed renamable $r0, 5, 14, $noreg + tBX_RET 14, $noreg, implicit $r0 +... diff --git a/llvm/test/CodeGen/ARM/sadd_sat.ll b/llvm/test/CodeGen/ARM/sadd_sat.ll index 387850c831ff8..b10b0b5e6bea2 100644 --- a/llvm/test/CodeGen/ARM/sadd_sat.ll +++ b/llvm/test/CodeGen/ARM/sadd_sat.ll @@ -210,67 +210,51 @@ define i64 @func2(i64 %x, i64 %y) nounwind { define i16 @func16(i16 %x, i16 %y) nounwind { ; CHECK-T1-LABEL: func16: ; CHECK-T1: @ %bb.0: -; CHECK-T1-NEXT: lsls r3, r1, #16 -; CHECK-T1-NEXT: lsls r1, r0, #16 -; CHECK-T1-NEXT: movs r2, #1 -; CHECK-T1-NEXT: adds r0, r1, r3 -; CHECK-T1-NEXT: mov r3, r2 -; CHECK-T1-NEXT: bmi .LBB2_2 +; CHECK-T1-NEXT: adds r0, r0, r1 +; CHECK-T1-NEXT: ldr r1, .LCPI2_0 +; CHECK-T1-NEXT: cmp r0, r1 +; CHECK-T1-NEXT: blt .LBB2_2 ; CHECK-T1-NEXT: @ %bb.1: -; CHECK-T1-NEXT: movs r3, #0 +; CHECK-T1-NEXT: mov r0, r1 ; CHECK-T1-NEXT: .LBB2_2: -; CHECK-T1-NEXT: cmp r3, #0 -; CHECK-T1-NEXT: bne .LBB2_4 -; CHECK-T1-NEXT: @ %bb.3: -; CHECK-T1-NEXT: lsls r2, r2, #31 +; CHECK-T1-NEXT: ldr r1, .LCPI2_1 ; CHECK-T1-NEXT: cmp r0, r1 -; CHECK-T1-NEXT: bvs .LBB2_5 -; CHECK-T1-NEXT: b .LBB2_6 +; CHECK-T1-NEXT: bgt .LBB2_4 +; CHECK-T1-NEXT: @ %bb.3: +; CHECK-T1-NEXT: mov r0, r1 ; CHECK-T1-NEXT: .LBB2_4: -; CHECK-T1-NEXT: ldr r2, .LCPI2_0 -; CHECK-T1-NEXT: cmp r0, r1 -; CHECK-T1-NEXT: bvc .LBB2_6 -; CHECK-T1-NEXT: .LBB2_5: -; CHECK-T1-NEXT: mov r0, r2 -; CHECK-T1-NEXT: .LBB2_6: -; CHECK-T1-NEXT: asrs r0, r0, #16 ; CHECK-T1-NEXT: bx lr ; CHECK-T1-NEXT: .p2align 2 -; CHECK-T1-NEXT: @ %bb.7: +; CHECK-T1-NEXT: @ %bb.5: ; CHECK-T1-NEXT: .LCPI2_0: -; CHECK-T1-NEXT: .long 2147483647 @ 0x7fffffff +; CHECK-T1-NEXT: .long 32767 @ 0x7fff +; CHECK-T1-NEXT: .LCPI2_1: +; CHECK-T1-NEXT: .long 4294934528 @ 0xffff8000 ; ; CHECK-T2-LABEL: func16: ; CHECK-T2: @ %bb.0: -; CHECK-T2-NEXT: lsls r2, r0, #16 -; CHECK-T2-NEXT: add.w r1, r2, r1, lsl #16 -; CHECK-T2-NEXT: movs r2, #0 -; CHECK-T2-NEXT: cmp r1, #0 -; CHECK-T2-NEXT: mov.w r3, #-2147483648 -; CHECK-T2-NEXT: it mi -; CHECK-T2-NEXT: movmi r2, #1 -; CHECK-T2-NEXT: cmp r2, #0 -; CHECK-T2-NEXT: it ne -; CHECK-T2-NEXT: mvnne r3, #-2147483648 -; CHECK-T2-NEXT: cmp.w r1, r0, lsl #16 -; CHECK-T2-NEXT: it vc -; CHECK-T2-NEXT: movvc r3, r1 -; CHECK-T2-NEXT: asrs r0, r3, #16 +; CHECK-T2-NEXT: add r0, r1 +; CHECK-T2-NEXT: movw r1, #32767 +; CHECK-T2-NEXT: cmp r0, r1 +; CHECK-T2-NEXT: it lt +; CHECK-T2-NEXT: movlt r1, r0 +; CHECK-T2-NEXT: movw r0, #32768 +; CHECK-T2-NEXT: cmn.w r1, #32768 +; CHECK-T2-NEXT: movt r0, #65535 +; CHECK-T2-NEXT: it gt +; CHECK-T2-NEXT: movgt r0, r1 ; CHECK-T2-NEXT: bx lr ; ; CHECK-ARM-LABEL: func16: ; CHECK-ARM: @ %bb.0: -; CHECK-ARM-NEXT: lsl r2, r0, #16 -; CHECK-ARM-NEXT: add r1, r2, r1, lsl #16 -; CHECK-ARM-NEXT: mov r2, #0 -; CHECK-ARM-NEXT: cmp r1, #0 -; CHECK-ARM-NEXT: movwmi r2, #1 -; CHECK-ARM-NEXT: mov r3, #-2147483648 -; CHECK-ARM-NEXT: cmp r2, #0 -; CHECK-ARM-NEXT: mvnne r3, #-2147483648 -; CHECK-ARM-NEXT: cmp r1, r0, lsl #16 -; CHECK-ARM-NEXT: movvc r3, r1 -; CHECK-ARM-NEXT: asr r0, r3, #16 +; CHECK-ARM-NEXT: add r0, r0, r1 +; CHECK-ARM-NEXT: movw r1, #32767 +; CHECK-ARM-NEXT: cmp r0, r1 +; CHECK-ARM-NEXT: movlt r1, r0 +; CHECK-ARM-NEXT: movw r0, #32768 +; CHECK-ARM-NEXT: movt r0, #65535 +; CHECK-ARM-NEXT: cmn r1, #32768 +; CHECK-ARM-NEXT: movgt r0, r1 ; CHECK-ARM-NEXT: bx lr %tmp = call i16 @llvm.sadd.sat.i16(i16 %x, i16 %y) ret i16 %tmp @@ -279,67 +263,39 @@ define i16 @func16(i16 %x, i16 %y) nounwind { define i8 @func8(i8 %x, i8 %y) nounwind { ; CHECK-T1-LABEL: func8: ; CHECK-T1: @ %bb.0: -; CHECK-T1-NEXT: lsls r3, r1, #24 -; CHECK-T1-NEXT: lsls r1, r0, #24 -; CHECK-T1-NEXT: movs r2, #1 -; CHECK-T1-NEXT: adds r0, r1, r3 -; CHECK-T1-NEXT: mov r3, r2 -; CHECK-T1-NEXT: bmi .LBB3_2 +; CHECK-T1-NEXT: adds r0, r0, r1 +; CHECK-T1-NEXT: movs r1, #127 +; CHECK-T1-NEXT: cmp r0, #127 +; CHECK-T1-NEXT: blt .LBB3_2 ; CHECK-T1-NEXT: @ %bb.1: -; CHECK-T1-NEXT: movs r3, #0 +; CHECK-T1-NEXT: mov r0, r1 ; CHECK-T1-NEXT: .LBB3_2: -; CHECK-T1-NEXT: cmp r3, #0 -; CHECK-T1-NEXT: bne .LBB3_4 -; CHECK-T1-NEXT: @ %bb.3: -; CHECK-T1-NEXT: lsls r2, r2, #31 +; CHECK-T1-NEXT: mvns r1, r1 ; CHECK-T1-NEXT: cmp r0, r1 -; CHECK-T1-NEXT: bvs .LBB3_5 -; CHECK-T1-NEXT: b .LBB3_6 +; CHECK-T1-NEXT: bgt .LBB3_4 +; CHECK-T1-NEXT: @ %bb.3: +; CHECK-T1-NEXT: mov r0, r1 ; CHECK-T1-NEXT: .LBB3_4: -; CHECK-T1-NEXT: ldr r2, .LCPI3_0 -; CHECK-T1-NEXT: cmp r0, r1 -; CHECK-T1-NEXT: bvc .LBB3_6 -; CHECK-T1-NEXT: .LBB3_5: -; CHECK-T1-NEXT: mov r0, r2 -; CHECK-T1-NEXT: .LBB3_6: -; CHECK-T1-NEXT: asrs r0, r0, #24 ; CHECK-T1-NEXT: bx lr -; CHECK-T1-NEXT: .p2align 2 -; CHECK-T1-NEXT: @ %bb.7: -; CHECK-T1-NEXT: .LCPI3_0: -; CHECK-T1-NEXT: .long 2147483647 @ 0x7fffffff ; ; CHECK-T2-LABEL: func8: ; CHECK-T2: @ %bb.0: -; CHECK-T2-NEXT: lsls r2, r0, #24 -; CHECK-T2-NEXT: add.w r1, r2, r1, lsl #24 -; CHECK-T2-NEXT: movs r2, #0 -; CHECK-T2-NEXT: cmp r1, #0 -; CHECK-T2-NEXT: mov.w r3, #-2147483648 -; CHECK-T2-NEXT: it mi -; CHECK-T2-NEXT: movmi r2, #1 -; CHECK-T2-NEXT: cmp r2, #0 -; CHECK-T2-NEXT: it ne -; CHECK-T2-NEXT: mvnne r3, #-2147483648 -; CHECK-T2-NEXT: cmp.w r1, r0, lsl #24 -; CHECK-T2-NEXT: it vc -; CHECK-T2-NEXT: movvc r3, r1 -; CHECK-T2-NEXT: asrs r0, r3, #24 +; CHECK-T2-NEXT: add r0, r1 +; CHECK-T2-NEXT: cmp r0, #127 +; CHECK-T2-NEXT: it ge +; CHECK-T2-NEXT: movge r0, #127 +; CHECK-T2-NEXT: cmn.w r0, #128 +; CHECK-T2-NEXT: it le +; CHECK-T2-NEXT: mvnle r0, #127 ; CHECK-T2-NEXT: bx lr ; ; CHECK-ARM-LABEL: func8: ; CHECK-ARM: @ %bb.0: -; CHECK-ARM-NEXT: lsl r2, r0, #24 -; CHECK-ARM-NEXT: add r1, r2, r1, lsl #24 -; CHECK-ARM-NEXT: mov r2, #0 -; CHECK-ARM-NEXT: cmp r1, #0 -; CHECK-ARM-NEXT: movwmi r2, #1 -; CHECK-ARM-NEXT: mov r3, #-2147483648 -; CHECK-ARM-NEXT: cmp r2, #0 -; CHECK-ARM-NEXT: mvnne r3, #-2147483648 -; CHECK-ARM-NEXT: cmp r1, r0, lsl #24 -; CHECK-ARM-NEXT: movvc r3, r1 -; CHECK-ARM-NEXT: asr r0, r3, #24 +; CHECK-ARM-NEXT: add r0, r0, r1 +; CHECK-ARM-NEXT: cmp r0, #127 +; CHECK-ARM-NEXT: movge r0, #127 +; CHECK-ARM-NEXT: cmn r0, #128 +; CHECK-ARM-NEXT: mvnle r0, #127 ; CHECK-ARM-NEXT: bx lr %tmp = call i8 @llvm.sadd.sat.i8(i8 %x, i8 %y) ret i8 %tmp @@ -348,67 +304,39 @@ define i8 @func8(i8 %x, i8 %y) nounwind { define i4 @func3(i4 %x, i4 %y) nounwind { ; CHECK-T1-LABEL: func3: ; CHECK-T1: @ %bb.0: -; CHECK-T1-NEXT: lsls r3, r1, #28 -; CHECK-T1-NEXT: lsls r1, r0, #28 -; CHECK-T1-NEXT: movs r2, #1 -; CHECK-T1-NEXT: adds r0, r1, r3 -; CHECK-T1-NEXT: mov r3, r2 -; CHECK-T1-NEXT: bmi .LBB4_2 +; CHECK-T1-NEXT: adds r0, r0, r1 +; CHECK-T1-NEXT: movs r1, #7 +; CHECK-T1-NEXT: cmp r0, #7 +; CHECK-T1-NEXT: blt .LBB4_2 ; CHECK-T1-NEXT: @ %bb.1: -; CHECK-T1-NEXT: movs r3, #0 +; CHECK-T1-NEXT: mov r0, r1 ; CHECK-T1-NEXT: .LBB4_2: -; CHECK-T1-NEXT: cmp r3, #0 -; CHECK-T1-NEXT: bne .LBB4_4 -; CHECK-T1-NEXT: @ %bb.3: -; CHECK-T1-NEXT: lsls r2, r2, #31 +; CHECK-T1-NEXT: mvns r1, r1 ; CHECK-T1-NEXT: cmp r0, r1 -; CHECK-T1-NEXT: bvs .LBB4_5 -; CHECK-T1-NEXT: b .LBB4_6 +; CHECK-T1-NEXT: bgt .LBB4_4 +; CHECK-T1-NEXT: @ %bb.3: +; CHECK-T1-NEXT: mov r0, r1 ; CHECK-T1-NEXT: .LBB4_4: -; CHECK-T1-NEXT: ldr r2, .LCPI4_0 -; CHECK-T1-NEXT: cmp r0, r1 -; CHECK-T1-NEXT: bvc .LBB4_6 -; CHECK-T1-NEXT: .LBB4_5: -; CHECK-T1-NEXT: mov r0, r2 -; CHECK-T1-NEXT: .LBB4_6: -; CHECK-T1-NEXT: asrs r0, r0, #28 ; CHECK-T1-NEXT: bx lr -; CHECK-T1-NEXT: .p2align 2 -; CHECK-T1-NEXT: @ %bb.7: -; CHECK-T1-NEXT: .LCPI4_0: -; CHECK-T1-NEXT: .long 2147483647 @ 0x7fffffff ; ; CHECK-T2-LABEL: func3: ; CHECK-T2: @ %bb.0: -; CHECK-T2-NEXT: lsls r2, r0, #28 -; CHECK-T2-NEXT: add.w r1, r2, r1, lsl #28 -; CHECK-T2-NEXT: movs r2, #0 -; CHECK-T2-NEXT: cmp r1, #0 -; CHECK-T2-NEXT: mov.w r3, #-2147483648 -; CHECK-T2-NEXT: it mi -; CHECK-T2-NEXT: movmi r2, #1 -; CHECK-T2-NEXT: cmp r2, #0 -; CHECK-T2-NEXT: it ne -; CHECK-T2-NEXT: mvnne r3, #-2147483648 -; CHECK-T2-NEXT: cmp.w r1, r0, lsl #28 -; CHECK-T2-NEXT: it vc -; CHECK-T2-NEXT: movvc r3, r1 -; CHECK-T2-NEXT: asrs r0, r3, #28 +; CHECK-T2-NEXT: add r0, r1 +; CHECK-T2-NEXT: cmp r0, #7 +; CHECK-T2-NEXT: it ge +; CHECK-T2-NEXT: movge r0, #7 +; CHECK-T2-NEXT: cmn.w r0, #8 +; CHECK-T2-NEXT: it le +; CHECK-T2-NEXT: mvnle r0, #7 ; CHECK-T2-NEXT: bx lr ; ; CHECK-ARM-LABEL: func3: ; CHECK-ARM: @ %bb.0: -; CHECK-ARM-NEXT: lsl r2, r0, #28 -; CHECK-ARM-NEXT: add r1, r2, r1, lsl #28 -; CHECK-ARM-NEXT: mov r2, #0 -; CHECK-ARM-NEXT: cmp r1, #0 -; CHECK-ARM-NEXT: movwmi r2, #1 -; CHECK-ARM-NEXT: mov r3, #-2147483648 -; CHECK-ARM-NEXT: cmp r2, #0 -; CHECK-ARM-NEXT: mvnne r3, #-2147483648 -; CHECK-ARM-NEXT: cmp r1, r0, lsl #28 -; CHECK-ARM-NEXT: movvc r3, r1 -; CHECK-ARM-NEXT: asr r0, r3, #28 +; CHECK-ARM-NEXT: add r0, r0, r1 +; CHECK-ARM-NEXT: cmp r0, #7 +; CHECK-ARM-NEXT: movge r0, #7 +; CHECK-ARM-NEXT: cmn r0, #8 +; CHECK-ARM-NEXT: mvnle r0, #7 ; CHECK-ARM-NEXT: bx lr %tmp = call i4 @llvm.sadd.sat.i4(i4 %x, i4 %y) ret i4 %tmp diff --git a/llvm/test/CodeGen/ARM/ssub_sat.ll b/llvm/test/CodeGen/ARM/ssub_sat.ll index b31dc0f1686e5..161d88c1a047f 100644 --- a/llvm/test/CodeGen/ARM/ssub_sat.ll +++ b/llvm/test/CodeGen/ARM/ssub_sat.ll @@ -212,69 +212,51 @@ define i64 @func2(i64 %x, i64 %y) nounwind { define i16 @func16(i16 %x, i16 %y) nounwind { ; CHECK-T1-LABEL: func16: ; CHECK-T1: @ %bb.0: -; CHECK-T1-NEXT: .save {r4, lr} -; CHECK-T1-NEXT: push {r4, lr} -; CHECK-T1-NEXT: lsls r1, r1, #16 -; CHECK-T1-NEXT: lsls r2, r0, #16 -; CHECK-T1-NEXT: movs r3, #1 -; CHECK-T1-NEXT: subs r0, r2, r1 -; CHECK-T1-NEXT: mov r4, r3 -; CHECK-T1-NEXT: bmi .LBB2_2 +; CHECK-T1-NEXT: subs r0, r0, r1 +; CHECK-T1-NEXT: ldr r1, .LCPI2_0 +; CHECK-T1-NEXT: cmp r0, r1 +; CHECK-T1-NEXT: blt .LBB2_2 ; CHECK-T1-NEXT: @ %bb.1: -; CHECK-T1-NEXT: movs r4, #0 +; CHECK-T1-NEXT: mov r0, r1 ; CHECK-T1-NEXT: .LBB2_2: -; CHECK-T1-NEXT: cmp r4, #0 -; CHECK-T1-NEXT: bne .LBB2_4 +; CHECK-T1-NEXT: ldr r1, .LCPI2_1 +; CHECK-T1-NEXT: cmp r0, r1 +; CHECK-T1-NEXT: bgt .LBB2_4 ; CHECK-T1-NEXT: @ %bb.3: -; CHECK-T1-NEXT: lsls r3, r3, #31 -; CHECK-T1-NEXT: cmp r2, r1 -; CHECK-T1-NEXT: bvs .LBB2_5 -; CHECK-T1-NEXT: b .LBB2_6 +; CHECK-T1-NEXT: mov r0, r1 ; CHECK-T1-NEXT: .LBB2_4: -; CHECK-T1-NEXT: ldr r3, .LCPI2_0 -; CHECK-T1-NEXT: cmp r2, r1 -; CHECK-T1-NEXT: bvc .LBB2_6 -; CHECK-T1-NEXT: .LBB2_5: -; CHECK-T1-NEXT: mov r0, r3 -; CHECK-T1-NEXT: .LBB2_6: -; CHECK-T1-NEXT: asrs r0, r0, #16 -; CHECK-T1-NEXT: pop {r4, pc} +; CHECK-T1-NEXT: bx lr ; CHECK-T1-NEXT: .p2align 2 -; CHECK-T1-NEXT: @ %bb.7: +; CHECK-T1-NEXT: @ %bb.5: ; CHECK-T1-NEXT: .LCPI2_0: -; CHECK-T1-NEXT: .long 2147483647 @ 0x7fffffff +; CHECK-T1-NEXT: .long 32767 @ 0x7fff +; CHECK-T1-NEXT: .LCPI2_1: +; CHECK-T1-NEXT: .long 4294934528 @ 0xffff8000 ; ; CHECK-T2-LABEL: func16: ; CHECK-T2: @ %bb.0: -; CHECK-T2-NEXT: lsls r0, r0, #16 -; CHECK-T2-NEXT: sub.w r12, r0, r1, lsl #16 -; CHECK-T2-NEXT: movs r3, #0 -; CHECK-T2-NEXT: cmp.w r12, #0 -; CHECK-T2-NEXT: mov.w r2, #-2147483648 -; CHECK-T2-NEXT: it mi -; CHECK-T2-NEXT: movmi r3, #1 -; CHECK-T2-NEXT: cmp r3, #0 -; CHECK-T2-NEXT: it ne -; CHECK-T2-NEXT: mvnne r2, #-2147483648 -; CHECK-T2-NEXT: cmp.w r0, r1, lsl #16 -; CHECK-T2-NEXT: it vc -; CHECK-T2-NEXT: movvc r2, r12 -; CHECK-T2-NEXT: asrs r0, r2, #16 +; CHECK-T2-NEXT: subs r0, r0, r1 +; CHECK-T2-NEXT: movw r1, #32767 +; CHECK-T2-NEXT: cmp r0, r1 +; CHECK-T2-NEXT: it lt +; CHECK-T2-NEXT: movlt r1, r0 +; CHECK-T2-NEXT: movw r0, #32768 +; CHECK-T2-NEXT: cmn.w r1, #32768 +; CHECK-T2-NEXT: movt r0, #65535 +; CHECK-T2-NEXT: it gt +; CHECK-T2-NEXT: movgt r0, r1 ; CHECK-T2-NEXT: bx lr ; ; CHECK-ARM-LABEL: func16: ; CHECK-ARM: @ %bb.0: -; CHECK-ARM-NEXT: lsl r0, r0, #16 -; CHECK-ARM-NEXT: sub r12, r0, r1, lsl #16 -; CHECK-ARM-NEXT: mov r3, #0 -; CHECK-ARM-NEXT: cmp r12, #0 -; CHECK-ARM-NEXT: movwmi r3, #1 -; CHECK-ARM-NEXT: mov r2, #-2147483648 -; CHECK-ARM-NEXT: cmp r3, #0 -; CHECK-ARM-NEXT: mvnne r2, #-2147483648 -; CHECK-ARM-NEXT: cmp r0, r1, lsl #16 -; CHECK-ARM-NEXT: movvc r2, r12 -; CHECK-ARM-NEXT: asr r0, r2, #16 +; CHECK-ARM-NEXT: sub r0, r0, r1 +; CHECK-ARM-NEXT: movw r1, #32767 +; CHECK-ARM-NEXT: cmp r0, r1 +; CHECK-ARM-NEXT: movlt r1, r0 +; CHECK-ARM-NEXT: movw r0, #32768 +; CHECK-ARM-NEXT: movt r0, #65535 +; CHECK-ARM-NEXT: cmn r1, #32768 +; CHECK-ARM-NEXT: movgt r0, r1 ; CHECK-ARM-NEXT: bx lr %tmp = call i16 @llvm.ssub.sat.i16(i16 %x, i16 %y) ret i16 %tmp @@ -283,69 +265,39 @@ define i16 @func16(i16 %x, i16 %y) nounwind { define i8 @func8(i8 %x, i8 %y) nounwind { ; CHECK-T1-LABEL: func8: ; CHECK-T1: @ %bb.0: -; CHECK-T1-NEXT: .save {r4, lr} -; CHECK-T1-NEXT: push {r4, lr} -; CHECK-T1-NEXT: lsls r1, r1, #24 -; CHECK-T1-NEXT: lsls r2, r0, #24 -; CHECK-T1-NEXT: movs r3, #1 -; CHECK-T1-NEXT: subs r0, r2, r1 -; CHECK-T1-NEXT: mov r4, r3 -; CHECK-T1-NEXT: bmi .LBB3_2 +; CHECK-T1-NEXT: subs r0, r0, r1 +; CHECK-T1-NEXT: movs r1, #127 +; CHECK-T1-NEXT: cmp r0, #127 +; CHECK-T1-NEXT: blt .LBB3_2 ; CHECK-T1-NEXT: @ %bb.1: -; CHECK-T1-NEXT: movs r4, #0 +; CHECK-T1-NEXT: mov r0, r1 ; CHECK-T1-NEXT: .LBB3_2: -; CHECK-T1-NEXT: cmp r4, #0 -; CHECK-T1-NEXT: bne .LBB3_4 +; CHECK-T1-NEXT: mvns r1, r1 +; CHECK-T1-NEXT: cmp r0, r1 +; CHECK-T1-NEXT: bgt .LBB3_4 ; CHECK-T1-NEXT: @ %bb.3: -; CHECK-T1-NEXT: lsls r3, r3, #31 -; CHECK-T1-NEXT: cmp r2, r1 -; CHECK-T1-NEXT: bvs .LBB3_5 -; CHECK-T1-NEXT: b .LBB3_6 +; CHECK-T1-NEXT: mov r0, r1 ; CHECK-T1-NEXT: .LBB3_4: -; CHECK-T1-NEXT: ldr r3, .LCPI3_0 -; CHECK-T1-NEXT: cmp r2, r1 -; CHECK-T1-NEXT: bvc .LBB3_6 -; CHECK-T1-NEXT: .LBB3_5: -; CHECK-T1-NEXT: mov r0, r3 -; CHECK-T1-NEXT: .LBB3_6: -; CHECK-T1-NEXT: asrs r0, r0, #24 -; CHECK-T1-NEXT: pop {r4, pc} -; CHECK-T1-NEXT: .p2align 2 -; CHECK-T1-NEXT: @ %bb.7: -; CHECK-T1-NEXT: .LCPI3_0: -; CHECK-T1-NEXT: .long 2147483647 @ 0x7fffffff +; CHECK-T1-NEXT: bx lr ; ; CHECK-T2-LABEL: func8: ; CHECK-T2: @ %bb.0: -; CHECK-T2-NEXT: lsls r0, r0, #24 -; CHECK-T2-NEXT: sub.w r12, r0, r1, lsl #24 -; CHECK-T2-NEXT: movs r3, #0 -; CHECK-T2-NEXT: cmp.w r12, #0 -; CHECK-T2-NEXT: mov.w r2, #-2147483648 -; CHECK-T2-NEXT: it mi -; CHECK-T2-NEXT: movmi r3, #1 -; CHECK-T2-NEXT: cmp r3, #0 -; CHECK-T2-NEXT: it ne -; CHECK-T2-NEXT: mvnne r2, #-2147483648 -; CHECK-T2-NEXT: cmp.w r0, r1, lsl #24 -; CHECK-T2-NEXT: it vc -; CHECK-T2-NEXT: movvc r2, r12 -; CHECK-T2-NEXT: asrs r0, r2, #24 +; CHECK-T2-NEXT: subs r0, r0, r1 +; CHECK-T2-NEXT: cmp r0, #127 +; CHECK-T2-NEXT: it ge +; CHECK-T2-NEXT: movge r0, #127 +; CHECK-T2-NEXT: cmn.w r0, #128 +; CHECK-T2-NEXT: it le +; CHECK-T2-NEXT: mvnle r0, #127 ; CHECK-T2-NEXT: bx lr ; ; CHECK-ARM-LABEL: func8: ; CHECK-ARM: @ %bb.0: -; CHECK-ARM-NEXT: lsl r0, r0, #24 -; CHECK-ARM-NEXT: sub r12, r0, r1, lsl #24 -; CHECK-ARM-NEXT: mov r3, #0 -; CHECK-ARM-NEXT: cmp r12, #0 -; CHECK-ARM-NEXT: movwmi r3, #1 -; CHECK-ARM-NEXT: mov r2, #-2147483648 -; CHECK-ARM-NEXT: cmp r3, #0 -; CHECK-ARM-NEXT: mvnne r2, #-2147483648 -; CHECK-ARM-NEXT: cmp r0, r1, lsl #24 -; CHECK-ARM-NEXT: movvc r2, r12 -; CHECK-ARM-NEXT: asr r0, r2, #24 +; CHECK-ARM-NEXT: sub r0, r0, r1 +; CHECK-ARM-NEXT: cmp r0, #127 +; CHECK-ARM-NEXT: movge r0, #127 +; CHECK-ARM-NEXT: cmn r0, #128 +; CHECK-ARM-NEXT: mvnle r0, #127 ; CHECK-ARM-NEXT: bx lr %tmp = call i8 @llvm.ssub.sat.i8(i8 %x, i8 %y) ret i8 %tmp @@ -354,69 +306,39 @@ define i8 @func8(i8 %x, i8 %y) nounwind { define i4 @func3(i4 %x, i4 %y) nounwind { ; CHECK-T1-LABEL: func3: ; CHECK-T1: @ %bb.0: -; CHECK-T1-NEXT: .save {r4, lr} -; CHECK-T1-NEXT: push {r4, lr} -; CHECK-T1-NEXT: lsls r1, r1, #28 -; CHECK-T1-NEXT: lsls r2, r0, #28 -; CHECK-T1-NEXT: movs r3, #1 -; CHECK-T1-NEXT: subs r0, r2, r1 -; CHECK-T1-NEXT: mov r4, r3 -; CHECK-T1-NEXT: bmi .LBB4_2 +; CHECK-T1-NEXT: subs r0, r0, r1 +; CHECK-T1-NEXT: movs r1, #7 +; CHECK-T1-NEXT: cmp r0, #7 +; CHECK-T1-NEXT: blt .LBB4_2 ; CHECK-T1-NEXT: @ %bb.1: -; CHECK-T1-NEXT: movs r4, #0 +; CHECK-T1-NEXT: mov r0, r1 ; CHECK-T1-NEXT: .LBB4_2: -; CHECK-T1-NEXT: cmp r4, #0 -; CHECK-T1-NEXT: bne .LBB4_4 +; CHECK-T1-NEXT: mvns r1, r1 +; CHECK-T1-NEXT: cmp r0, r1 +; CHECK-T1-NEXT: bgt .LBB4_4 ; CHECK-T1-NEXT: @ %bb.3: -; CHECK-T1-NEXT: lsls r3, r3, #31 -; CHECK-T1-NEXT: cmp r2, r1 -; CHECK-T1-NEXT: bvs .LBB4_5 -; CHECK-T1-NEXT: b .LBB4_6 +; CHECK-T1-NEXT: mov r0, r1 ; CHECK-T1-NEXT: .LBB4_4: -; CHECK-T1-NEXT: ldr r3, .LCPI4_0 -; CHECK-T1-NEXT: cmp r2, r1 -; CHECK-T1-NEXT: bvc .LBB4_6 -; CHECK-T1-NEXT: .LBB4_5: -; CHECK-T1-NEXT: mov r0, r3 -; CHECK-T1-NEXT: .LBB4_6: -; CHECK-T1-NEXT: asrs r0, r0, #28 -; CHECK-T1-NEXT: pop {r4, pc} -; CHECK-T1-NEXT: .p2align 2 -; CHECK-T1-NEXT: @ %bb.7: -; CHECK-T1-NEXT: .LCPI4_0: -; CHECK-T1-NEXT: .long 2147483647 @ 0x7fffffff +; CHECK-T1-NEXT: bx lr ; ; CHECK-T2-LABEL: func3: ; CHECK-T2: @ %bb.0: -; CHECK-T2-NEXT: lsls r0, r0, #28 -; CHECK-T2-NEXT: sub.w r12, r0, r1, lsl #28 -; CHECK-T2-NEXT: movs r3, #0 -; CHECK-T2-NEXT: cmp.w r12, #0 -; CHECK-T2-NEXT: mov.w r2, #-2147483648 -; CHECK-T2-NEXT: it mi -; CHECK-T2-NEXT: movmi r3, #1 -; CHECK-T2-NEXT: cmp r3, #0 -; CHECK-T2-NEXT: it ne -; CHECK-T2-NEXT: mvnne r2, #-2147483648 -; CHECK-T2-NEXT: cmp.w r0, r1, lsl #28 -; CHECK-T2-NEXT: it vc -; CHECK-T2-NEXT: movvc r2, r12 -; CHECK-T2-NEXT: asrs r0, r2, #28 +; CHECK-T2-NEXT: subs r0, r0, r1 +; CHECK-T2-NEXT: cmp r0, #7 +; CHECK-T2-NEXT: it ge +; CHECK-T2-NEXT: movge r0, #7 +; CHECK-T2-NEXT: cmn.w r0, #8 +; CHECK-T2-NEXT: it le +; CHECK-T2-NEXT: mvnle r0, #7 ; CHECK-T2-NEXT: bx lr ; ; CHECK-ARM-LABEL: func3: ; CHECK-ARM: @ %bb.0: -; CHECK-ARM-NEXT: lsl r0, r0, #28 -; CHECK-ARM-NEXT: sub r12, r0, r1, lsl #28 -; CHECK-ARM-NEXT: mov r3, #0 -; CHECK-ARM-NEXT: cmp r12, #0 -; CHECK-ARM-NEXT: movwmi r3, #1 -; CHECK-ARM-NEXT: mov r2, #-2147483648 -; CHECK-ARM-NEXT: cmp r3, #0 -; CHECK-ARM-NEXT: mvnne r2, #-2147483648 -; CHECK-ARM-NEXT: cmp r0, r1, lsl #28 -; CHECK-ARM-NEXT: movvc r2, r12 -; CHECK-ARM-NEXT: asr r0, r2, #28 +; CHECK-ARM-NEXT: sub r0, r0, r1 +; CHECK-ARM-NEXT: cmp r0, #7 +; CHECK-ARM-NEXT: movge r0, #7 +; CHECK-ARM-NEXT: cmn r0, #8 +; CHECK-ARM-NEXT: mvnle r0, #7 ; CHECK-ARM-NEXT: bx lr %tmp = call i4 @llvm.ssub.sat.i4(i4 %x, i4 %y) ret i4 %tmp diff --git a/llvm/test/CodeGen/ARM/uadd_sat.ll b/llvm/test/CodeGen/ARM/uadd_sat.ll index 2843f85af5135..04491d3602834 100644 --- a/llvm/test/CodeGen/ARM/uadd_sat.ll +++ b/llvm/test/CodeGen/ARM/uadd_sat.ll @@ -93,34 +93,34 @@ define i64 @func2(i64 %x, i64 %y) nounwind { define i16 @func16(i16 %x, i16 %y) nounwind { ; CHECK-T1-LABEL: func16: ; CHECK-T1: @ %bb.0: -; CHECK-T1-NEXT: lsls r1, r1, #16 -; CHECK-T1-NEXT: lsls r0, r0, #16 ; CHECK-T1-NEXT: adds r0, r0, r1 +; CHECK-T1-NEXT: ldr r1, .LCPI2_0 +; CHECK-T1-NEXT: cmp r0, r1 ; CHECK-T1-NEXT: blo .LBB2_2 ; CHECK-T1-NEXT: @ %bb.1: -; CHECK-T1-NEXT: movs r0, #0 -; CHECK-T1-NEXT: mvns r0, r0 +; CHECK-T1-NEXT: mov r0, r1 ; CHECK-T1-NEXT: .LBB2_2: -; CHECK-T1-NEXT: lsrs r0, r0, #16 ; CHECK-T1-NEXT: bx lr +; CHECK-T1-NEXT: .p2align 2 +; CHECK-T1-NEXT: @ %bb.3: +; CHECK-T1-NEXT: .LCPI2_0: +; CHECK-T1-NEXT: .long 65535 @ 0xffff ; ; CHECK-T2-LABEL: func16: ; CHECK-T2: @ %bb.0: -; CHECK-T2-NEXT: lsls r2, r0, #16 -; CHECK-T2-NEXT: add.w r1, r2, r1, lsl #16 -; CHECK-T2-NEXT: cmp.w r1, r0, lsl #16 +; CHECK-T2-NEXT: add r1, r0 +; CHECK-T2-NEXT: movw r0, #65535 +; CHECK-T2-NEXT: cmp r1, r0 ; CHECK-T2-NEXT: it lo -; CHECK-T2-NEXT: movlo.w r1, #-1 -; CHECK-T2-NEXT: lsrs r0, r1, #16 +; CHECK-T2-NEXT: movlo r0, r1 ; CHECK-T2-NEXT: bx lr ; ; CHECK-ARM-LABEL: func16: ; CHECK-ARM: @ %bb.0: -; CHECK-ARM-NEXT: lsl r2, r0, #16 -; CHECK-ARM-NEXT: add r1, r2, r1, lsl #16 -; CHECK-ARM-NEXT: cmp r1, r0, lsl #16 -; CHECK-ARM-NEXT: mvnlo r1, #0 -; CHECK-ARM-NEXT: lsr r0, r1, #16 +; CHECK-ARM-NEXT: add r1, r0, r1 +; CHECK-ARM-NEXT: movw r0, #65535 +; CHECK-ARM-NEXT: cmp r1, r0 +; CHECK-ARM-NEXT: movlo r0, r1 ; CHECK-ARM-NEXT: bx lr %tmp = call i16 @llvm.uadd.sat.i16(i16 %x, i16 %y) ret i16 %tmp @@ -129,34 +129,27 @@ define i16 @func16(i16 %x, i16 %y) nounwind { define i8 @func8(i8 %x, i8 %y) nounwind { ; CHECK-T1-LABEL: func8: ; CHECK-T1: @ %bb.0: -; CHECK-T1-NEXT: lsls r1, r1, #24 -; CHECK-T1-NEXT: lsls r0, r0, #24 ; CHECK-T1-NEXT: adds r0, r0, r1 +; CHECK-T1-NEXT: cmp r0, #255 ; CHECK-T1-NEXT: blo .LBB3_2 ; CHECK-T1-NEXT: @ %bb.1: -; CHECK-T1-NEXT: movs r0, #0 -; CHECK-T1-NEXT: mvns r0, r0 +; CHECK-T1-NEXT: movs r0, #255 ; CHECK-T1-NEXT: .LBB3_2: -; CHECK-T1-NEXT: lsrs r0, r0, #24 ; CHECK-T1-NEXT: bx lr ; ; CHECK-T2-LABEL: func8: ; CHECK-T2: @ %bb.0: -; CHECK-T2-NEXT: lsls r2, r0, #24 -; CHECK-T2-NEXT: add.w r1, r2, r1, lsl #24 -; CHECK-T2-NEXT: cmp.w r1, r0, lsl #24 -; CHECK-T2-NEXT: it lo -; CHECK-T2-NEXT: movlo.w r1, #-1 -; CHECK-T2-NEXT: lsrs r0, r1, #24 +; CHECK-T2-NEXT: add r0, r1 +; CHECK-T2-NEXT: cmp r0, #255 +; CHECK-T2-NEXT: it hs +; CHECK-T2-NEXT: movhs r0, #255 ; CHECK-T2-NEXT: bx lr ; ; CHECK-ARM-LABEL: func8: ; CHECK-ARM: @ %bb.0: -; CHECK-ARM-NEXT: lsl r2, r0, #24 -; CHECK-ARM-NEXT: add r1, r2, r1, lsl #24 -; CHECK-ARM-NEXT: cmp r1, r0, lsl #24 -; CHECK-ARM-NEXT: mvnlo r1, #0 -; CHECK-ARM-NEXT: lsr r0, r1, #24 +; CHECK-ARM-NEXT: add r0, r0, r1 +; CHECK-ARM-NEXT: cmp r0, #255 +; CHECK-ARM-NEXT: movhs r0, #255 ; CHECK-ARM-NEXT: bx lr %tmp = call i8 @llvm.uadd.sat.i8(i8 %x, i8 %y) ret i8 %tmp @@ -165,34 +158,27 @@ define i8 @func8(i8 %x, i8 %y) nounwind { define i4 @func3(i4 %x, i4 %y) nounwind { ; CHECK-T1-LABEL: func3: ; CHECK-T1: @ %bb.0: -; CHECK-T1-NEXT: lsls r1, r1, #28 -; CHECK-T1-NEXT: lsls r0, r0, #28 ; CHECK-T1-NEXT: adds r0, r0, r1 +; CHECK-T1-NEXT: cmp r0, #15 ; CHECK-T1-NEXT: blo .LBB4_2 ; CHECK-T1-NEXT: @ %bb.1: -; CHECK-T1-NEXT: movs r0, #0 -; CHECK-T1-NEXT: mvns r0, r0 +; CHECK-T1-NEXT: movs r0, #15 ; CHECK-T1-NEXT: .LBB4_2: -; CHECK-T1-NEXT: lsrs r0, r0, #28 ; CHECK-T1-NEXT: bx lr ; ; CHECK-T2-LABEL: func3: ; CHECK-T2: @ %bb.0: -; CHECK-T2-NEXT: lsls r2, r0, #28 -; CHECK-T2-NEXT: add.w r1, r2, r1, lsl #28 -; CHECK-T2-NEXT: cmp.w r1, r0, lsl #28 -; CHECK-T2-NEXT: it lo -; CHECK-T2-NEXT: movlo.w r1, #-1 -; CHECK-T2-NEXT: lsrs r0, r1, #28 +; CHECK-T2-NEXT: add r0, r1 +; CHECK-T2-NEXT: cmp r0, #15 +; CHECK-T2-NEXT: it hs +; CHECK-T2-NEXT: movhs r0, #15 ; CHECK-T2-NEXT: bx lr ; ; CHECK-ARM-LABEL: func3: ; CHECK-ARM: @ %bb.0: -; CHECK-ARM-NEXT: lsl r2, r0, #28 -; CHECK-ARM-NEXT: add r1, r2, r1, lsl #28 -; CHECK-ARM-NEXT: cmp r1, r0, lsl #28 -; CHECK-ARM-NEXT: mvnlo r1, #0 -; CHECK-ARM-NEXT: lsr r0, r1, #28 +; CHECK-ARM-NEXT: add r0, r0, r1 +; CHECK-ARM-NEXT: cmp r0, #15 +; CHECK-ARM-NEXT: movhs r0, #15 ; CHECK-ARM-NEXT: bx lr %tmp = call i4 @llvm.uadd.sat.i4(i4 %x, i4 %y) ret i4 %tmp diff --git a/llvm/test/CodeGen/ARM/usub_sat.ll b/llvm/test/CodeGen/ARM/usub_sat.ll index 5587cef25c316..1809c48672b14 100644 --- a/llvm/test/CodeGen/ARM/usub_sat.ll +++ b/llvm/test/CodeGen/ARM/usub_sat.ll @@ -93,33 +93,27 @@ define i64 @func2(i64 %x, i64 %y) nounwind { define i16 @func16(i16 %x, i16 %y) nounwind { ; CHECK-T1-LABEL: func16: ; CHECK-T1: @ %bb.0: -; CHECK-T1-NEXT: lsls r1, r1, #16 -; CHECK-T1-NEXT: lsls r0, r0, #16 -; CHECK-T1-NEXT: subs r0, r0, r1 -; CHECK-T1-NEXT: bhs .LBB2_2 +; CHECK-T1-NEXT: cmp r0, r1 +; CHECK-T1-NEXT: bhi .LBB2_2 ; CHECK-T1-NEXT: @ %bb.1: -; CHECK-T1-NEXT: movs r0, #0 +; CHECK-T1-NEXT: mov r0, r1 ; CHECK-T1-NEXT: .LBB2_2: -; CHECK-T1-NEXT: lsrs r0, r0, #16 +; CHECK-T1-NEXT: subs r0, r0, r1 ; CHECK-T1-NEXT: bx lr ; ; CHECK-T2-LABEL: func16: ; CHECK-T2: @ %bb.0: -; CHECK-T2-NEXT: lsls r0, r0, #16 -; CHECK-T2-NEXT: sub.w r2, r0, r1, lsl #16 -; CHECK-T2-NEXT: cmp.w r0, r1, lsl #16 -; CHECK-T2-NEXT: it lo -; CHECK-T2-NEXT: movlo r2, #0 -; CHECK-T2-NEXT: lsrs r0, r2, #16 +; CHECK-T2-NEXT: cmp r0, r1 +; CHECK-T2-NEXT: it ls +; CHECK-T2-NEXT: movls r0, r1 +; CHECK-T2-NEXT: subs r0, r0, r1 ; CHECK-T2-NEXT: bx lr ; ; CHECK-ARM-LABEL: func16: ; CHECK-ARM: @ %bb.0: -; CHECK-ARM-NEXT: lsl r0, r0, #16 -; CHECK-ARM-NEXT: sub r2, r0, r1, lsl #16 -; CHECK-ARM-NEXT: cmp r0, r1, lsl #16 -; CHECK-ARM-NEXT: movlo r2, #0 -; CHECK-ARM-NEXT: lsr r0, r2, #16 +; CHECK-ARM-NEXT: cmp r0, r1 +; CHECK-ARM-NEXT: movls r0, r1 +; CHECK-ARM-NEXT: sub r0, r0, r1 ; CHECK-ARM-NEXT: bx lr %tmp = call i16 @llvm.usub.sat.i16(i16 %x, i16 %y) ret i16 %tmp @@ -128,33 +122,27 @@ define i16 @func16(i16 %x, i16 %y) nounwind { define i8 @func8(i8 %x, i8 %y) nounwind { ; CHECK-T1-LABEL: func8: ; CHECK-T1: @ %bb.0: -; CHECK-T1-NEXT: lsls r1, r1, #24 -; CHECK-T1-NEXT: lsls r0, r0, #24 -; CHECK-T1-NEXT: subs r0, r0, r1 -; CHECK-T1-NEXT: bhs .LBB3_2 +; CHECK-T1-NEXT: cmp r0, r1 +; CHECK-T1-NEXT: bhi .LBB3_2 ; CHECK-T1-NEXT: @ %bb.1: -; CHECK-T1-NEXT: movs r0, #0 +; CHECK-T1-NEXT: mov r0, r1 ; CHECK-T1-NEXT: .LBB3_2: -; CHECK-T1-NEXT: lsrs r0, r0, #24 +; CHECK-T1-NEXT: subs r0, r0, r1 ; CHECK-T1-NEXT: bx lr ; ; CHECK-T2-LABEL: func8: ; CHECK-T2: @ %bb.0: -; CHECK-T2-NEXT: lsls r0, r0, #24 -; CHECK-T2-NEXT: sub.w r2, r0, r1, lsl #24 -; CHECK-T2-NEXT: cmp.w r0, r1, lsl #24 -; CHECK-T2-NEXT: it lo -; CHECK-T2-NEXT: movlo r2, #0 -; CHECK-T2-NEXT: lsrs r0, r2, #24 +; CHECK-T2-NEXT: cmp r0, r1 +; CHECK-T2-NEXT: it ls +; CHECK-T2-NEXT: movls r0, r1 +; CHECK-T2-NEXT: subs r0, r0, r1 ; CHECK-T2-NEXT: bx lr ; ; CHECK-ARM-LABEL: func8: ; CHECK-ARM: @ %bb.0: -; CHECK-ARM-NEXT: lsl r0, r0, #24 -; CHECK-ARM-NEXT: sub r2, r0, r1, lsl #24 -; CHECK-ARM-NEXT: cmp r0, r1, lsl #24 -; CHECK-ARM-NEXT: movlo r2, #0 -; CHECK-ARM-NEXT: lsr r0, r2, #24 +; CHECK-ARM-NEXT: cmp r0, r1 +; CHECK-ARM-NEXT: movls r0, r1 +; CHECK-ARM-NEXT: sub r0, r0, r1 ; CHECK-ARM-NEXT: bx lr %tmp = call i8 @llvm.usub.sat.i8(i8 %x, i8 %y) ret i8 %tmp @@ -163,33 +151,27 @@ define i8 @func8(i8 %x, i8 %y) nounwind { define i4 @func3(i4 %x, i4 %y) nounwind { ; CHECK-T1-LABEL: func3: ; CHECK-T1: @ %bb.0: -; CHECK-T1-NEXT: lsls r1, r1, #28 -; CHECK-T1-NEXT: lsls r0, r0, #28 -; CHECK-T1-NEXT: subs r0, r0, r1 -; CHECK-T1-NEXT: bhs .LBB4_2 +; CHECK-T1-NEXT: cmp r0, r1 +; CHECK-T1-NEXT: bhi .LBB4_2 ; CHECK-T1-NEXT: @ %bb.1: -; CHECK-T1-NEXT: movs r0, #0 +; CHECK-T1-NEXT: mov r0, r1 ; CHECK-T1-NEXT: .LBB4_2: -; CHECK-T1-NEXT: lsrs r0, r0, #28 +; CHECK-T1-NEXT: subs r0, r0, r1 ; CHECK-T1-NEXT: bx lr ; ; CHECK-T2-LABEL: func3: ; CHECK-T2: @ %bb.0: -; CHECK-T2-NEXT: lsls r0, r0, #28 -; CHECK-T2-NEXT: sub.w r2, r0, r1, lsl #28 -; CHECK-T2-NEXT: cmp.w r0, r1, lsl #28 -; CHECK-T2-NEXT: it lo -; CHECK-T2-NEXT: movlo r2, #0 -; CHECK-T2-NEXT: lsrs r0, r2, #28 +; CHECK-T2-NEXT: cmp r0, r1 +; CHECK-T2-NEXT: it ls +; CHECK-T2-NEXT: movls r0, r1 +; CHECK-T2-NEXT: subs r0, r0, r1 ; CHECK-T2-NEXT: bx lr ; ; CHECK-ARM-LABEL: func3: ; CHECK-ARM: @ %bb.0: -; CHECK-ARM-NEXT: lsl r0, r0, #28 -; CHECK-ARM-NEXT: sub r2, r0, r1, lsl #28 -; CHECK-ARM-NEXT: cmp r0, r1, lsl #28 -; CHECK-ARM-NEXT: movlo r2, #0 -; CHECK-ARM-NEXT: lsr r0, r2, #28 +; CHECK-ARM-NEXT: cmp r0, r1 +; CHECK-ARM-NEXT: movls r0, r1 +; CHECK-ARM-NEXT: sub r0, r0, r1 ; CHECK-ARM-NEXT: bx lr %tmp = call i4 @llvm.usub.sat.i4(i4 %x, i4 %y) ret i4 %tmp diff --git a/llvm/test/CodeGen/BPF/BTF/binary-format.ll b/llvm/test/CodeGen/BPF/BTF/binary-format.ll index bc561560caa12..9701983679132 100644 --- a/llvm/test/CodeGen/BPF/BTF/binary-format.ll +++ b/llvm/test/CodeGen/BPF/BTF/binary-format.ll @@ -28,20 +28,18 @@ entry: ; CHECK: 0x00000060 696e7420 6628696e 74206129 207b2072 ; CHECK: 0x00000070 65747572 6e20613b 207d00 ; CHECK: '.BTF.ext' -; CHECK-EL: 0x00000000 9feb0100 28000000 00000000 14000000 +; CHECK-EL: 0x00000000 9feb0100 20000000 00000000 14000000 ; CHECK-EL: 0x00000010 14000000 2c000000 40000000 00000000 -; CHECK-EL: 0x00000020 40000000 00000000 08000000 09000000 -; CHECK-EL: 0x00000030 01000000 00000000 03000000 10000000 -; CHECK-EL: 0x00000040 09000000 02000000 00000000 0f000000 -; CHECK-EL: 0x00000050 18000000 00040000 08000000 0f000000 -; CHECK-EL: 0x00000060 18000000 10040000 -; CHECK-EB: 0x00000000 eb9f0100 00000028 00000000 00000014 +; CHECK-EL: 0x00000020 08000000 09000000 01000000 00000000 +; CHECK-EL: 0x00000030 03000000 10000000 09000000 02000000 +; CHECK-EL: 0x00000040 00000000 0f000000 18000000 00040000 +; CHECK-EL: 0x00000050 08000000 0f000000 18000000 10040000 +; CHECK-EB: 0x00000000 eb9f0100 00000020 00000000 00000014 ; CHECK-EB: 0x00000010 00000014 0000002c 00000040 00000000 -; CHECK-EB: 0x00000020 00000040 00000000 00000008 00000009 -; CHECK-EB: 0x00000030 00000001 00000000 00000003 00000010 -; CHECK-EB: 0x00000040 00000009 00000002 00000000 0000000f -; CHECK-EB: 0x00000050 00000018 00000400 00000008 0000000f -; CHECK-EB: 0x00000060 00000018 00000410 +; CHECK-EB: 0x00000020 00000008 00000009 00000001 00000000 +; CHECK-EB: 0x00000030 00000003 00000010 00000009 00000002 +; CHECK-EB: 0x00000040 00000000 0000000f 00000018 00000400 +; CHECK-EB: 0x00000050 00000008 0000000f 00000018 00000410 ; Function Attrs: nounwind readnone speculatable declare void @llvm.dbg.value(metadata, metadata, metadata) #1 diff --git a/llvm/test/CodeGen/BPF/BTF/filename.ll b/llvm/test/CodeGen/BPF/BTF/filename.ll index fd96720f041f8..4c6a3a0a4196e 100644 --- a/llvm/test/CodeGen/BPF/BTF/filename.ll +++ b/llvm/test/CodeGen/BPF/BTF/filename.ll @@ -43,15 +43,13 @@ define dso_local i32 @test() local_unnamed_addr #0 !dbg !7 { ; CHECK-NEXT: .short 60319 # 0xeb9f ; CHECK-NEXT: .byte 1 ; CHECK-NEXT: .byte 0 -; CHECK-NEXT: .long 40 +; CHECK-NEXT: .long 32 ; CHECK-NEXT: .long 0 ; CHECK-NEXT: .long 20 ; CHECK-NEXT: .long 20 ; CHECK-NEXT: .long 28 ; CHECK-NEXT: .long 48 ; CHECK-NEXT: .long 0 -; CHECK-NEXT: .long 48 -; CHECK-NEXT: .long 0 ; CHECK-NEXT: .long 8 # FuncInfo ; CHECK-NEXT: .long 10 # FuncInfo section string offset=10 ; CHECK-NEXT: .long 1 diff --git a/llvm/test/CodeGen/BPF/BTF/func-func-ptr.ll b/llvm/test/CodeGen/BPF/BTF/func-func-ptr.ll index e61459125b4b6..d9f677cd85f5c 100644 --- a/llvm/test/CodeGen/BPF/BTF/func-func-ptr.ll +++ b/llvm/test/CodeGen/BPF/BTF/func-func-ptr.ll @@ -74,15 +74,13 @@ entry: ; CHECK-NEXT: .short 60319 # 0xeb9f ; CHECK-NEXT: .byte 1 ; CHECK-NEXT: .byte 0 -; CHECK-NEXT: .long 40 +; CHECK-NEXT: .long 32 ; CHECK-NEXT: .long 0 ; CHECK-NEXT: .long 20 ; CHECK-NEXT: .long 20 ; CHECK-NEXT: .long 28 ; CHECK-NEXT: .long 48 ; CHECK-NEXT: .long 0 -; CHECK-NEXT: .long 48 -; CHECK-NEXT: .long 0 ; CHECK-NEXT: .long 8 # FuncInfo ; CHECK-NEXT: .long 11 # FuncInfo section string offset=11 ; CHECK-NEXT: .long 1 diff --git a/llvm/test/CodeGen/BPF/BTF/func-non-void.ll b/llvm/test/CodeGen/BPF/BTF/func-non-void.ll index 5593ea888dde0..c09ee9a77c16f 100644 --- a/llvm/test/CodeGen/BPF/BTF/func-non-void.ll +++ b/llvm/test/CodeGen/BPF/BTF/func-non-void.ll @@ -48,15 +48,13 @@ define dso_local i32 @f1(i32 returned) local_unnamed_addr #0 !dbg !7 { ; CHECK-NEXT: .short 60319 # 0xeb9f ; CHECK-NEXT: .byte 1 ; CHECK-NEXT: .byte 0 -; CHECK-NEXT: .long 40 +; CHECK-NEXT: .long 32 ; CHECK-NEXT: .long 0 ; CHECK-NEXT: .long 20 ; CHECK-NEXT: .long 20 ; CHECK-NEXT: .long 44 ; CHECK-NEXT: .long 64 ; CHECK-NEXT: .long 0 -; CHECK-NEXT: .long 64 -; CHECK-NEXT: .long 0 ; CHECK-NEXT: .long 8 # FuncInfo ; CHECK-NEXT: .long 11 # FuncInfo section string offset=11 ; CHECK-NEXT: .long 1 diff --git a/llvm/test/CodeGen/BPF/BTF/func-source.ll b/llvm/test/CodeGen/BPF/BTF/func-source.ll index 0d6e098b358da..48e161ad966ec 100644 --- a/llvm/test/CodeGen/BPF/BTF/func-source.ll +++ b/llvm/test/CodeGen/BPF/BTF/func-source.ll @@ -43,15 +43,13 @@ entry: ; CHECK-NEXT: .short 60319 # 0xeb9f ; CHECK-NEXT: .byte 1 ; CHECK-NEXT: .byte 0 -; CHECK-NEXT: .long 40 +; CHECK-NEXT: .long 32 ; CHECK-NEXT: .long 0 ; CHECK-NEXT: .long 20 ; CHECK-NEXT: .long 20 ; CHECK-NEXT: .long 28 ; CHECK-NEXT: .long 48 ; CHECK-NEXT: .long 0 -; CHECK-NEXT: .long 48 -; CHECK-NEXT: .long 0 ; CHECK-NEXT: .long 8 # FuncInfo ; CHECK-NEXT: .long 3 # FuncInfo section string offset=3 ; CHECK-NEXT: .long 1 diff --git a/llvm/test/CodeGen/BPF/BTF/func-typedef.ll b/llvm/test/CodeGen/BPF/BTF/func-typedef.ll index 48fcb3362962a..46fc883ec282e 100644 --- a/llvm/test/CodeGen/BPF/BTF/func-typedef.ll +++ b/llvm/test/CodeGen/BPF/BTF/func-typedef.ll @@ -61,15 +61,13 @@ entry: ; CHECK-NEXT: .short 60319 # 0xeb9f ; CHECK-NEXT: .byte 1 ; CHECK-NEXT: .byte 0 -; CHECK-NEXT: .long 40 +; CHECK-NEXT: .long 32 ; CHECK-NEXT: .long 0 ; CHECK-NEXT: .long 20 ; CHECK-NEXT: .long 20 ; CHECK-NEXT: .long 44 ; CHECK-NEXT: .long 64 ; CHECK-NEXT: .long 0 -; CHECK-NEXT: .long 64 -; CHECK-NEXT: .long 0 ; CHECK-NEXT: .long 8 # FuncInfo ; CHECK-NEXT: .long 20 # FuncInfo section string offset=20 ; CHECK-NEXT: .long 1 diff --git a/llvm/test/CodeGen/BPF/BTF/func-unused-arg.ll b/llvm/test/CodeGen/BPF/BTF/func-unused-arg.ll index ea94fb7463808..c104a765876db 100644 --- a/llvm/test/CodeGen/BPF/BTF/func-unused-arg.ll +++ b/llvm/test/CodeGen/BPF/BTF/func-unused-arg.ll @@ -48,15 +48,13 @@ define dso_local i32 @f1(i32) local_unnamed_addr #0 !dbg !7 { ; CHECK-NEXT: .short 60319 # 0xeb9f ; CHECK-NEXT: .byte 1 ; CHECK-NEXT: .byte 0 -; CHECK-NEXT: .long 40 +; CHECK-NEXT: .long 32 ; CHECK-NEXT: .long 0 ; CHECK-NEXT: .long 20 ; CHECK-NEXT: .long 20 ; CHECK-NEXT: .long 28 ; CHECK-NEXT: .long 48 ; CHECK-NEXT: .long 0 -; CHECK-NEXT: .long 48 -; CHECK-NEXT: .long 0 ; CHECK-NEXT: .long 8 # FuncInfo ; CHECK-NEXT: .long 11 # FuncInfo section string offset=11 ; CHECK-NEXT: .long 1 diff --git a/llvm/test/CodeGen/BPF/BTF/func-void.ll b/llvm/test/CodeGen/BPF/BTF/func-void.ll index 42a24d1884df1..4979f401ccc8e 100644 --- a/llvm/test/CodeGen/BPF/BTF/func-void.ll +++ b/llvm/test/CodeGen/BPF/BTF/func-void.ll @@ -37,15 +37,13 @@ define dso_local void @f1() local_unnamed_addr #0 !dbg !7 { ; CHECK-NEXT: .short 60319 # 0xeb9f ; CHECK-NEXT: .byte 1 ; CHECK-NEXT: .byte 0 -; CHECK-NEXT: .long 40 +; CHECK-NEXT: .long 32 ; CHECK-NEXT: .long 0 ; CHECK-NEXT: .long 20 ; CHECK-NEXT: .long 20 ; CHECK-NEXT: .long 28 ; CHECK-NEXT: .long 48 ; CHECK-NEXT: .long 0 -; CHECK-NEXT: .long 48 -; CHECK-NEXT: .long 0 ; CHECK-NEXT: .long 8 # FuncInfo ; CHECK-NEXT: .long 4 # FuncInfo section string offset=4 ; CHECK-NEXT: .long 1 diff --git a/llvm/test/CodeGen/BPF/CORE/offset-reloc-basic.ll b/llvm/test/CodeGen/BPF/CORE/offset-reloc-basic.ll index 6c4bbf14ce794..310a07a079c2f 100644 --- a/llvm/test/CodeGen/BPF/CORE/offset-reloc-basic.ll +++ b/llvm/test/CodeGen/BPF/CORE/offset-reloc-basic.ll @@ -103,21 +103,19 @@ define dso_local i32 @bpf_prog(%struct.sk_buff*) local_unnamed_addr #0 !dbg !15 ; CHECK-NEXT: .short 60319 # 0xeb9f ; CHECK-NEXT: .byte 1 ; CHECK-NEXT: .byte 0 -; CHECK-NEXT: .long 40 +; CHECK-NEXT: .long 32 ; CHECK-NEXT: .long 0 ; CHECK-NEXT: .long 20 ; CHECK-NEXT: .long 20 ; CHECK-NEXT: .long 124 ; CHECK-NEXT: .long 144 ; CHECK-NEXT: .long 28 -; CHECK-NEXT: .long 172 -; CHECK-NEXT: .long 0 ; CHECK-NEXT: .long 8 # FuncInfo ; CHECK: .long 16 # FieldReloc ; CHECK-NEXT: .long 43 # Field reloc section string offset=43 ; CHECK-NEXT: .long 1 -; CHECK-NEXT: .long .Ltmp2 +; CHECK-NEXT: .long .Ltmp{{[0-9]+}} ; CHECK-NEXT: .long 2 ; CHECK-NEXT: .long 86 ; CHECK-NEXT: .long 0 diff --git a/llvm/test/CodeGen/BPF/CORE/offset-reloc-multilevel.ll b/llvm/test/CodeGen/BPF/CORE/offset-reloc-multilevel.ll index 120b85d8687b8..105ec16180690 100644 --- a/llvm/test/CodeGen/BPF/CORE/offset-reloc-multilevel.ll +++ b/llvm/test/CodeGen/BPF/CORE/offset-reloc-multilevel.ll @@ -111,21 +111,19 @@ define dso_local i32 @bpf_prog(%struct.sk_buff*) local_unnamed_addr #0 !dbg !15 ; CHECK-NEXT: .short 60319 # 0xeb9f ; CHECK-NEXT: .byte 1 ; CHECK-NEXT: .byte 0 -; CHECK-NEXT: .long 40 +; CHECK-NEXT: .long 32 ; CHECK-NEXT: .long 0 ; CHECK-NEXT: .long 20 ; CHECK-NEXT: .long 20 ; CHECK-NEXT: .long 76 ; CHECK-NEXT: .long 96 ; CHECK-NEXT: .long 28 -; CHECK-NEXT: .long 124 -; CHECK-NEXT: .long 0 ; CHECK-NEXT: .long 8 # FuncInfo ; CHECK: .long 16 # FieldReloc ; CHECK-NEXT: .long 57 # Field reloc section string offset=57 ; CHECK-NEXT: .long 1 -; CHECK-NEXT: .long .Ltmp2 +; CHECK-NEXT: .long .Ltmp{{[0-9]+}} ; CHECK-NEXT: .long 2 ; CHECK-NEXT: .long 100 ; CHECK-NEXT: .long 0 diff --git a/llvm/test/CodeGen/BPF/CORE/offset-reloc-struct-anonymous.ll b/llvm/test/CodeGen/BPF/CORE/offset-reloc-struct-anonymous.ll index f77152b448b34..72c60f2e6cb01 100644 --- a/llvm/test/CodeGen/BPF/CORE/offset-reloc-struct-anonymous.ll +++ b/llvm/test/CodeGen/BPF/CORE/offset-reloc-struct-anonymous.ll @@ -121,15 +121,13 @@ define dso_local i32 @bpf_prog(%struct.sk_buff*) local_unnamed_addr #0 !dbg !15 ; CHECK-NEXT: .short 60319 # 0xeb9f ; CHECK-NEXT: .byte 1 ; CHECK-NEXT: .byte 0 -; CHECK-NEXT: .long 40 +; CHECK-NEXT: .long 32 ; CHECK-NEXT: .long 0 ; CHECK-NEXT: .long 20 ; CHECK-NEXT: .long 20 ; CHECK-NEXT: .long 76 ; CHECK-NEXT: .long 96 ; CHECK-NEXT: .long 28 -; CHECK-NEXT: .long 124 -; CHECK-NEXT: .long 0 ; CHECK-NEXT: .long 8 # FuncInfo ; CHECK: .long 16 # FieldReloc diff --git a/llvm/test/CodeGen/BPF/CORE/offset-reloc-struct-array.ll b/llvm/test/CodeGen/BPF/CORE/offset-reloc-struct-array.ll index a56b8fd84095e..d4590bb8a59e9 100644 --- a/llvm/test/CodeGen/BPF/CORE/offset-reloc-struct-array.ll +++ b/llvm/test/CodeGen/BPF/CORE/offset-reloc-struct-array.ll @@ -124,21 +124,19 @@ define dso_local i32 @bpf_prog(%struct.sk_buff*) local_unnamed_addr #0 !dbg !15 ; CHECK-NEXT: .short 60319 # 0xeb9f ; CHECK-NEXT: .byte 1 ; CHECK-NEXT: .byte 0 -; CHECK-NEXT: .long 40 +; CHECK-NEXT: .long 32 ; CHECK-NEXT: .long 0 ; CHECK-NEXT: .long 20 ; CHECK-NEXT: .long 20 ; CHECK-NEXT: .long 76 ; CHECK-NEXT: .long 96 ; CHECK-NEXT: .long 28 -; CHECK-NEXT: .long 124 -; CHECK-NEXT: .long 0 ; CHECK-NEXT: .long 8 # FuncInfo ; CHECK: .long 16 # FieldReloc ; CHECK-NEXT: .long 77 # Field reloc section string offset=77 ; CHECK-NEXT: .long 1 -; CHECK-NEXT: .long .Ltmp2 +; CHECK-NEXT: .long .Ltmp{{[0-9]+}} ; CHECK-NEXT: .long 2 ; CHECK-NEXT: .long 120 ; CHECK-NEXT: .long 0 diff --git a/llvm/test/CodeGen/BPF/CORE/offset-reloc-union.ll b/llvm/test/CodeGen/BPF/CORE/offset-reloc-union.ll index cb60c81d58a15..44f687b1a1e9b 100644 --- a/llvm/test/CodeGen/BPF/CORE/offset-reloc-union.ll +++ b/llvm/test/CodeGen/BPF/CORE/offset-reloc-union.ll @@ -127,21 +127,19 @@ define dso_local i32 @bpf_prog(%union.sk_buff*) local_unnamed_addr #0 !dbg !15 { ; CHECK-NEXT: .short 60319 # 0xeb9f ; CHECK-NEXT: .byte 1 ; CHECK-NEXT: .byte 0 -; CHECK-NEXT: .long 40 +; CHECK-NEXT: .long 32 ; CHECK-NEXT: .long 0 ; CHECK-NEXT: .long 20 ; CHECK-NEXT: .long 20 ; CHECK-NEXT: .long 76 ; CHECK-NEXT: .long 96 ; CHECK-NEXT: .long 28 -; CHECK-NEXT: .long 124 -; CHECK-NEXT: .long 0 ; CHECK-NEXT: .long 8 # FuncInfo ; CHECK: .long 16 # FieldReloc ; CHECK-NEXT: .long 54 # Field reloc section string offset=54 ; CHECK-NEXT: .long 1 -; CHECK-NEXT: .long .Ltmp2 +; CHECK-NEXT: .long .Ltmp{{[0-9]+}} ; CHECK-NEXT: .long 2 ; CHECK-NEXT: .long 97 ; CHECK-NEXT: .long 0 diff --git a/llvm/test/CodeGen/BPF/CORE/patchable-extern-char.ll b/llvm/test/CodeGen/BPF/CORE/patchable-extern-char.ll deleted file mode 100644 index fb30fd5a070df..0000000000000 --- a/llvm/test/CodeGen/BPF/CORE/patchable-extern-char.ll +++ /dev/null @@ -1,107 +0,0 @@ -; RUN: llc -march=bpfel -filetype=asm -o - %s | FileCheck -check-prefixes=CHECK %s -; RUN: llc -march=bpfeb -filetype=asm -o - %s | FileCheck -check-prefixes=CHECK %s -; Source code: -; extern __attribute__((section(".BPF.patchable_externs"))) char a; -; int foo() { return a; } -; Compilation flag: -; clang -target bpf -O2 -g -S -emit-llvm test.c - -@a = external dso_local local_unnamed_addr global i8, section ".BPF.patchable_externs", align 1 - -; Function Attrs: norecurse nounwind readonly -define dso_local i32 @foo() local_unnamed_addr #0 !dbg !7 { - %1 = load i8, i8* @a, align 1, !dbg !11, !tbaa !12 - %2 = sext i8 %1 to i32, !dbg !11 -; CHECK: r0 = 0 -; CHECK-NEXT: r0 <<= 56 -; CHECK-NEXT: r0 s>>= 56 - ret i32 %2, !dbg !15 -} - -; CHECK: .section .BTF,"",@progbits -; CHECK-NEXT: .short 60319 # 0xeb9f -; CHECK-NEXT: .byte 1 -; CHECK-NEXT: .byte 0 -; CHECK-NEXT: .long 24 -; CHECK-NEXT: .long 0 -; CHECK-NEXT: .long 40 -; CHECK-NEXT: .long 40 -; CHECK-NEXT: .long 54 -; CHECK-NEXT: .long 0 # BTF_KIND_FUNC_PROTO(id = 1) -; CHECK-NEXT: .long 218103808 # 0xd000000 -; CHECK-NEXT: .long 2 -; CHECK-NEXT: .long 1 # BTF_KIND_INT(id = 2) -; CHECK-NEXT: .long 16777216 # 0x1000000 -; CHECK-NEXT: .long 4 -; CHECK-NEXT: .long 16777248 # 0x1000020 -; CHECK-NEXT: .long 5 # BTF_KIND_FUNC(id = 3) -; CHECK-NEXT: .long 201326592 # 0xc000000 -; CHECK-NEXT: .long 1 -; CHECK-NEXT: .byte 0 # string offset=0 -; CHECK-NEXT: .ascii "int" # string offset=1 -; CHECK-NEXT: .byte 0 -; CHECK-NEXT: .ascii "foo" # string offset=5 -; CHECK-NEXT: .byte 0 -; CHECK-NEXT: .ascii ".text" # string offset=9 -; CHECK-NEXT: .byte 0 -; CHECK-NEXT: .byte 97 # string offset=15 -; CHECK-NEXT: .byte 0 -; CHECK-NEXT: .ascii "/tmp/home/yhs/work/tests/llvm/test.c" # string offset=17 -; CHECK-NEXT: .byte 0 -; CHECK-NEXT: .section .BTF.ext,"",@progbits -; CHECK-NEXT: .short 60319 # 0xeb9f -; CHECK-NEXT: .byte 1 -; CHECK-NEXT: .byte 0 -; CHECK-NEXT: .long 40 -; CHECK-NEXT: .long 0 -; CHECK-NEXT: .long 20 -; CHECK-NEXT: .long 20 -; CHECK-NEXT: .long 44 -; CHECK-NEXT: .long 64 -; CHECK-NEXT: .long 0 -; CHECK-NEXT: .long 64 -; CHECK-NEXT: .long 20 -; CHECK-NEXT: .long 8 # FuncInfo -; CHECK-NEXT: .long 9 # FuncInfo section string offset=9 -; CHECK-NEXT: .long 1 -; CHECK-NEXT: .long .Lfunc_begin0 -; CHECK-NEXT: .long 3 -; CHECK-NEXT: .long 16 # LineInfo -; CHECK-NEXT: .long 9 # LineInfo section string offset=9 -; CHECK-NEXT: .long 2 -; CHECK-NEXT: .long .Ltmp{{[0-9]+}} -; CHECK-NEXT: .long 17 -; CHECK-NEXT: .long 0 -; CHECK-NEXT: .long 2068 # Line 2 Col 20 -; CHECK-NEXT: .long .Ltmp{{[0-9]+}} -; CHECK-NEXT: .long 17 -; CHECK-NEXT: .long 0 -; CHECK-NEXT: .long 2061 # Line 2 Col 13 -; CHECK-NEXT: .long 8 # ExternReloc -; CHECK-NEXT: .long 9 # Extern reloc section string offset=9 -; CHECK-NEXT: .long 1 -; CHECK-NEXT: .long .Ltmp{{[0-9]+}} -; CHECK-NEXT: .long 15 - -attributes #0 = { norecurse nounwind readonly "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } - -!llvm.dbg.cu = !{!0} -!llvm.module.flags = !{!3, !4, !5} -!llvm.ident = !{!6} - -!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 8.0.20181009 ", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, nameTableKind: None) -!1 = !DIFile(filename: "test.c", directory: "/tmp/home/yhs/work/tests/llvm") -!2 = !{} -!3 = !{i32 2, !"Dwarf Version", i32 4} -!4 = !{i32 2, !"Debug Info Version", i32 3} -!5 = !{i32 1, !"wchar_size", i32 4} -!6 = !{!"clang version 8.0.20181009 "} -!7 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 2, type: !8, isLocal: false, isDefinition: true, scopeLine: 2, isOptimized: true, unit: !0, retainedNodes: !2) -!8 = !DISubroutineType(types: !9) -!9 = !{!10} -!10 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) -!11 = !DILocation(line: 2, column: 20, scope: !7) -!12 = !{!13, !13, i64 0} -!13 = !{!"omnipotent char", !14, i64 0} -!14 = !{!"Simple C/C++ TBAA"} -!15 = !DILocation(line: 2, column: 13, scope: !7) diff --git a/llvm/test/CodeGen/BPF/CORE/patchable-extern-uint.ll b/llvm/test/CodeGen/BPF/CORE/patchable-extern-uint.ll deleted file mode 100644 index ba3770b47368b..0000000000000 --- a/llvm/test/CodeGen/BPF/CORE/patchable-extern-uint.ll +++ /dev/null @@ -1,102 +0,0 @@ -; RUN: llc -march=bpfel -filetype=asm -o - %s | FileCheck -check-prefixes=CHECK %s -; RUN: llc -march=bpfeb -filetype=asm -o - %s | FileCheck -check-prefixes=CHECK %s -; Source code: -; extern __attribute__((section(".BPF.patchable_externs"))) unsigned a; -; int foo() { return a; } -; Compilation flag: -; clang -target bpf -O2 -g -S -emit-llvm test.c - -@a = external dso_local local_unnamed_addr global i32, section ".BPF.patchable_externs", align 4 - -; Function Attrs: norecurse nounwind readonly -define dso_local i32 @foo() local_unnamed_addr #0 !dbg !7 { - %1 = load i32, i32* @a, align 4, !dbg !11, !tbaa !12 -; CHECK: r0 = 0 -; CHECK-NEXT: exit - ret i32 %1, !dbg !16 -} - -; CHECK: .section .BTF,"",@progbits -; CHECK-NEXT: .short 60319 # 0xeb9f -; CHECK-NEXT: .byte 1 -; CHECK-NEXT: .byte 0 -; CHECK-NEXT: .long 24 -; CHECK-NEXT: .long 0 -; CHECK-NEXT: .long 40 -; CHECK-NEXT: .long 40 -; CHECK-NEXT: .long 49 -; CHECK-NEXT: .long 0 # BTF_KIND_FUNC_PROTO(id = 1) -; CHECK-NEXT: .long 218103808 # 0xd000000 -; CHECK-NEXT: .long 2 -; CHECK-NEXT: .long 1 # BTF_KIND_INT(id = 2) -; CHECK-NEXT: .long 16777216 # 0x1000000 -; CHECK-NEXT: .long 4 -; CHECK-NEXT: .long 16777248 # 0x1000020 -; CHECK-NEXT: .long 5 # BTF_KIND_FUNC(id = 3) -; CHECK-NEXT: .long 201326592 # 0xc000000 -; CHECK-NEXT: .long 1 -; CHECK-NEXT: .byte 0 # string offset=0 -; CHECK-NEXT: .ascii "int" # string offset=1 -; CHECK-NEXT: .byte 0 -; CHECK-NEXT: .ascii "foo" # string offset=5 -; CHECK-NEXT: .byte 0 -; CHECK-NEXT: .ascii ".text" # string offset=9 -; CHECK-NEXT: .byte 0 -; CHECK-NEXT: .byte 97 # string offset=15 -; CHECK-NEXT: .byte 0 -; CHECK-NEXT: .ascii "/tmp/yhs/work/tests/llvm/test.c" # string offset=17 -; CHECK-NEXT: .byte 0 -; CHECK-NEXT: .section .BTF.ext,"",@progbits -; CHECK-NEXT: .short 60319 # 0xeb9f -; CHECK-NEXT: .byte 1 -; CHECK-NEXT: .byte 0 -; CHECK-NEXT: .long 40 -; CHECK-NEXT: .long 0 -; CHECK-NEXT: .long 20 -; CHECK-NEXT: .long 20 -; CHECK-NEXT: .long 28 -; CHECK-NEXT: .long 48 -; CHECK-NEXT: .long 0 -; CHECK-NEXT: .long 48 -; CHECK-NEXT: .long 20 -; CHECK-NEXT: .long 8 # FuncInfo -; CHECK-NEXT: .long 9 # FuncInfo section string offset=9 -; CHECK-NEXT: .long 1 -; CHECK-NEXT: .long .Lfunc_begin0 -; CHECK-NEXT: .long 3 -; CHECK-NEXT: .long 16 # LineInfo -; CHECK-NEXT: .long 9 # LineInfo section string offset=9 -; CHECK-NEXT: .long 1 -; CHECK-NEXT: .long .Ltmp{{[0-9]+}} -; CHECK-NEXT: .long 17 -; CHECK-NEXT: .long 0 -; CHECK-NEXT: .long 2061 # Line 2 Col 13 -; CHECK-NEXT: .long 8 # ExternReloc -; CHECK-NEXT: .long 9 # Extern reloc section string offset=9 -; CHECK-NEXT: .long 1 -; CHECK-NEXT: .long .Ltmp{{[0-9]+}} -; CHECK-NEXT: .long 15 - -attributes #0 = { norecurse nounwind readonly "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } - -!llvm.dbg.cu = !{!0} -!llvm.module.flags = !{!3, !4, !5} -!llvm.ident = !{!6} - -!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 8.0.20181009 ", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, nameTableKind: None) -!1 = !DIFile(filename: "test.c", directory: "/tmp/yhs/work/tests/llvm") -!2 = !{} -!3 = !{i32 2, !"Dwarf Version", i32 4} -!4 = !{i32 2, !"Debug Info Version", i32 3} -!5 = !{i32 1, !"wchar_size", i32 4} -!6 = !{!"clang version 8.0.20181009 "} -!7 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 2, type: !8, isLocal: false, isDefinition: true, scopeLine: 2, isOptimized: true, unit: !0, retainedNodes: !2) -!8 = !DISubroutineType(types: !9) -!9 = !{!10} -!10 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) -!11 = !DILocation(line: 2, column: 20, scope: !7) -!12 = !{!13, !13, i64 0} -!13 = !{!"int", !14, i64 0} -!14 = !{!"omnipotent char", !15, i64 0} -!15 = !{!"Simple C/C++ TBAA"} -!16 = !DILocation(line: 2, column: 13, scope: !7) diff --git a/llvm/test/CodeGen/BPF/CORE/patchable-extern-ulonglong.ll b/llvm/test/CodeGen/BPF/CORE/patchable-extern-ulonglong.ll deleted file mode 100644 index c483cceadc8e5..0000000000000 --- a/llvm/test/CodeGen/BPF/CORE/patchable-extern-ulonglong.ll +++ /dev/null @@ -1,103 +0,0 @@ -; RUN: llc -march=bpfel -filetype=asm -o - %s | FileCheck -check-prefixes=CHECK %s -; RUN: llc -march=bpfeb -filetype=asm -o - %s | FileCheck -check-prefixes=CHECK %s -; Source code: -; extern __attribute__((section(".BPF.patchable_externs"))) unsigned long long a; -; int foo() { return a; } -; Compilation flag: -; clang -target bpf -O2 -g -S -emit-llvm test.c - -@a = external dso_local local_unnamed_addr global i64, section ".BPF.patchable_externs", align 8 - -; Function Attrs: norecurse nounwind readonly -define dso_local i32 @foo() local_unnamed_addr #0 !dbg !7 { - %1 = load i64, i64* @a, align 8, !dbg !11, !tbaa !12 - %2 = trunc i64 %1 to i32, !dbg !11 -; CHECK: r0 = 0 ll -; CHECK-NEXT: exit - ret i32 %2, !dbg !16 -} - -; CHECK: .section .BTF,"",@progbits -; CHECK-NEXT: .short 60319 # 0xeb9f -; CHECK-NEXT: .byte 1 -; CHECK-NEXT: .byte 0 -; CHECK-NEXT: .long 24 -; CHECK-NEXT: .long 0 -; CHECK-NEXT: .long 40 -; CHECK-NEXT: .long 40 -; CHECK-NEXT: .long 54 -; CHECK-NEXT: .long 0 # BTF_KIND_FUNC_PROTO(id = 1) -; CHECK-NEXT: .long 218103808 # 0xd000000 -; CHECK-NEXT: .long 2 -; CHECK-NEXT: .long 1 # BTF_KIND_INT(id = 2) -; CHECK-NEXT: .long 16777216 # 0x1000000 -; CHECK-NEXT: .long 4 -; CHECK-NEXT: .long 16777248 # 0x1000020 -; CHECK-NEXT: .long 5 # BTF_KIND_FUNC(id = 3) -; CHECK-NEXT: .long 201326592 # 0xc000000 -; CHECK-NEXT: .long 1 -; CHECK-NEXT: .byte 0 # string offset=0 -; CHECK-NEXT: .ascii "int" # string offset=1 -; CHECK-NEXT: .byte 0 -; CHECK-NEXT: .ascii "foo" # string offset=5 -; CHECK-NEXT: .byte 0 -; CHECK-NEXT: .ascii ".text" # string offset=9 -; CHECK-NEXT: .byte 0 -; CHECK-NEXT: .byte 97 # string offset=15 -; CHECK-NEXT: .byte 0 -; CHECK-NEXT: .ascii "/tmp/home/yhs/work/tests/llvm/test.c" # string offset=17 -; CHECK-NEXT: .byte 0 -; CHECK-NEXT: .section .BTF.ext,"",@progbits -; CHECK-NEXT: .short 60319 # 0xeb9f -; CHECK-NEXT: .byte 1 -; CHECK-NEXT: .byte 0 -; CHECK-NEXT: .long 40 -; CHECK-NEXT: .long 0 -; CHECK-NEXT: .long 20 -; CHECK-NEXT: .long 20 -; CHECK-NEXT: .long 28 -; CHECK-NEXT: .long 48 -; CHECK-NEXT: .long 0 -; CHECK-NEXT: .long 48 -; CHECK-NEXT: .long 20 -; CHECK-NEXT: .long 8 # FuncInfo -; CHECK-NEXT: .long 9 # FuncInfo section string offset=9 -; CHECK-NEXT: .long 1 -; CHECK-NEXT: .long .Lfunc_begin0 -; CHECK-NEXT: .long 3 -; CHECK-NEXT: .long 16 # LineInfo -; CHECK-NEXT: .long 9 # LineInfo section string offset=9 -; CHECK-NEXT: .long 1 -; CHECK-NEXT: .long .Ltmp{{[0-9]+}} -; CHECK-NEXT: .long 17 -; CHECK-NEXT: .long 0 -; CHECK-NEXT: .long 2061 # Line 2 Col 13 -; CHECK-NEXT: .long 8 # ExternReloc -; CHECK-NEXT: .long 9 # Extern reloc section string offset=9 -; CHECK-NEXT: .long 1 -; CHECK-NEXT: .long .Ltmp{{[0-9]+}} -; CHECK-NEXT: .long 15 - -attributes #0 = { norecurse nounwind readonly "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } - -!llvm.dbg.cu = !{!0} -!llvm.module.flags = !{!3, !4, !5} -!llvm.ident = !{!6} - -!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 8.0.20181009 ", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, nameTableKind: None) -!1 = !DIFile(filename: "test.c", directory: "/tmp/home/yhs/work/tests/llvm") -!2 = !{} -!3 = !{i32 2, !"Dwarf Version", i32 4} -!4 = !{i32 2, !"Debug Info Version", i32 3} -!5 = !{i32 1, !"wchar_size", i32 4} -!6 = !{!"clang version 8.0.20181009 "} -!7 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 2, type: !8, isLocal: false, isDefinition: true, scopeLine: 2, isOptimized: true, unit: !0, retainedNodes: !2) -!8 = !DISubroutineType(types: !9) -!9 = !{!10} -!10 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) -!11 = !DILocation(line: 2, column: 20, scope: !7) -!12 = !{!13, !13, i64 0} -!13 = !{!"long long", !14, i64 0} -!14 = !{!"omnipotent char", !15, i64 0} -!15 = !{!"Simple C/C++ TBAA"} -!16 = !DILocation(line: 2, column: 13, scope: !7) diff --git a/llvm/test/CodeGen/MIR/WebAssembly/int-type-register-class-name.mir b/llvm/test/CodeGen/MIR/WebAssembly/int-type-register-class-name.mir index e5d7076eedeff..60dd27363949f 100644 --- a/llvm/test/CodeGen/MIR/WebAssembly/int-type-register-class-name.mir +++ b/llvm/test/CodeGen/MIR/WebAssembly/int-type-register-class-name.mir @@ -9,5 +9,5 @@ body: | liveins: $arguments %0:i32 = CONST_I32 0, implicit-def dead $arguments ; CHECK: %0:i32 = CONST_I32 0, implicit-def dead $arguments - RETURN_VOID implicit-def dead $arguments + RETURN implicit-def dead $arguments ... diff --git a/llvm/test/CodeGen/MIR/WebAssembly/typed-immediate-operand-invalid0.mir b/llvm/test/CodeGen/MIR/WebAssembly/typed-immediate-operand-invalid0.mir index b9f138c14b003..72908711e9ee5 100644 --- a/llvm/test/CodeGen/MIR/WebAssembly/typed-immediate-operand-invalid0.mir +++ b/llvm/test/CodeGen/MIR/WebAssembly/typed-immediate-operand-invalid0.mir @@ -9,5 +9,5 @@ body: | liveins: $arguments ; CHECK: [[@LINE+1]]:24: expected integers after 'i'/'s'/'p' type character %0:i32 = CONST_I32 i 0, implicit-def dead $arguments - RETURN_VOID implicit-def dead $arguments + RETURN implicit-def dead $arguments ... diff --git a/llvm/test/CodeGen/MIR/WebAssembly/typed-immediate-operand-invalid1.mir b/llvm/test/CodeGen/MIR/WebAssembly/typed-immediate-operand-invalid1.mir index 03e722f6c3c29..f5c16b52553bf 100644 --- a/llvm/test/CodeGen/MIR/WebAssembly/typed-immediate-operand-invalid1.mir +++ b/llvm/test/CodeGen/MIR/WebAssembly/typed-immediate-operand-invalid1.mir @@ -9,5 +9,5 @@ body: | liveins: $arguments ; CHECK: [[@LINE+1]]:24: a typed immediate operand should start with one of 'i', 's', or 'p' %0:i32 = CONST_I32 abc 0, implicit-def dead $arguments - RETURN_VOID implicit-def dead $arguments + RETURN implicit-def dead $arguments ... diff --git a/llvm/test/CodeGen/MIR/X86/global-value-operands.mir b/llvm/test/CodeGen/MIR/X86/global-value-operands.mir index 6287f99e39830..7b55cb4f14ea4 100644 --- a/llvm/test/CodeGen/MIR/X86/global-value-operands.mir +++ b/llvm/test/CodeGen/MIR/X86/global-value-operands.mir @@ -103,7 +103,7 @@ body: | name: test2 body: | bb.0.entry: - ; CHECK: , @"\01Hello@$%09 \5C World,", + ; CHECK: , @"\01Hello@$%09 \\ World,", $rax = MOV64rm $rip, 1, _, @"\01Hello@$%09 \\ World,", _ $eax = MOV32rm killed $rax, 1, _, 0, _ RETQ $eax diff --git a/llvm/test/CodeGen/Mips/mcount.ll b/llvm/test/CodeGen/Mips/mcount.ll index e136ae03da566..fe8cee9d78d80 100644 --- a/llvm/test/CodeGen/Mips/mcount.ll +++ b/llvm/test/CodeGen/Mips/mcount.ll @@ -1,11 +1,17 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -march=mips < %s | FileCheck %s -check-prefix=MIPS32 -; RUN: llc -march=mips -relocation-model=pic < %s | FileCheck %s -check-prefix=MIPS32-PIC -; RUN: llc -march=mips64 < %s | FileCheck %s -check-prefix=MIPS64 -; RUN: llc -march=mips64 -relocation-model=pic < %s | FileCheck %s -check-prefix=MIPS64-PIC -; RUN: llc -march=mips -mattr=+micromips < %s | FileCheck %s -check-prefix=MIPS32-MM -; RUN: llc -march=mips -relocation-model=pic -mattr=+micromips < %s | FileCheck %s -check-prefix=MIPS32-MM-PIC +; RUN: llc -march=mips -verify-machineinstrs \ +; RUN: < %s | FileCheck %s -check-prefix=MIPS32 +; RUN: llc -march=mips -verify-machineinstrs -relocation-model=pic \ +; RUN: < %s | FileCheck %s -check-prefix=MIPS32-PIC +; RUN: llc -march=mips64 -verify-machineinstrs \ +; RUN: < %s | FileCheck %s -check-prefix=MIPS64 +; RUN: llc -march=mips64 -verify-machineinstrs -relocation-model=pic \ +; RUN: < %s | FileCheck %s -check-prefix=MIPS64-PIC +; RUN: llc -march=mips -verify-machineinstrs -mattr=+micromips \ +; RUN: < %s | FileCheck %s -check-prefix=MIPS32-MM +; RUN: llc -march=mips -verify-machineinstrs -relocation-model=pic -mattr=+micromips \ +; RUN: < %s | FileCheck %s -check-prefix=MIPS32-MM-PIC ; Test that checks ABI for _mcount calls. @@ -49,7 +55,7 @@ define void @foo() #0 { ; MIPS64-NEXT: .cfi_def_cfa_offset 16 ; MIPS64-NEXT: sd $ra, 8($sp) # 8-byte Folded Spill ; MIPS64-NEXT: .cfi_offset 31, -8 -; MIPS64-NEXT: or $1, $ra, $zero +; MIPS64-NEXT: move $1, $ra ; MIPS64-NEXT: jal _mcount ; MIPS64-NEXT: nop ; MIPS64-NEXT: ld $ra, 8($sp) # 8-byte Folded Reload @@ -68,7 +74,7 @@ define void @foo() #0 { ; MIPS64-PIC-NEXT: daddu $1, $1, $25 ; MIPS64-PIC-NEXT: daddiu $gp, $1, %lo(%neg(%gp_rel(foo))) ; MIPS64-PIC-NEXT: ld $25, %call16(_mcount)($gp) -; MIPS64-PIC-NEXT: or $1, $ra, $zero +; MIPS64-PIC-NEXT: move $1, $ra ; MIPS64-PIC-NEXT: .reloc .Ltmp0, R_MIPS_JALR, _mcount ; MIPS64-PIC-NEXT: .Ltmp0: ; MIPS64-PIC-NEXT: jalr $25 diff --git a/llvm/test/CodeGen/PowerPC/loop-instr-form-prepare.ll b/llvm/test/CodeGen/PowerPC/loop-instr-form-prepare.ll new file mode 100644 index 0000000000000..593d276f95bd7 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/loop-instr-form-prepare.ll @@ -0,0 +1,753 @@ +; RUN: llc -ppc-asm-full-reg-names -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr9 < %s | FileCheck %s + +; test_no_prep: +; unsigned long test_no_prep(char *p, int count) { +; unsigned long i=0, res=0; +; int DISP1 = 4001; +; int DISP2 = 4002; +; int DISP3 = 4003; +; int DISP4 = 4004; +; for (; i < count ; i++) { +; unsigned long x1 = *(unsigned long *)(p + i + DISP1); +; unsigned long x2 = *(unsigned long *)(p + i + DISP2); +; unsigned long x3 = *(unsigned long *)(p + i + DISP3); +; unsigned long x4 = *(unsigned long *)(p + i + DISP4); +; res += x1*x2*x3*x4; +; } +; return res + count; +; } + +define i64 @test_no_prep(i8* %0, i32 signext %1) { +; CHECK-LABEL: test_no_prep: +; CHECK: addi r3, r3, 4004 +; CHECK: .LBB0_2: # +; CHECK-NEXT: ldx r9, r3, r6 +; CHECK-NEXT: ldx r10, r3, r7 +; CHECK-NEXT: mulld r9, r10, r9 +; CHECK-NEXT: ldx r11, r3, r8 +; CHECK-NEXT: mulld r9, r9, r11 +; CHECK-NEXT: ld r12, 0(r3) +; CHECK-NEXT: addi r3, r3, 1 +; CHECK-NEXT: maddld r5, r9, r12, r5 +; CHECK-NEXT: bdnz .LBB0_2 + %3 = sext i32 %1 to i64 + %4 = icmp eq i32 %1, 0 + br i1 %4, label %27, label %5 + +5: ; preds = %2, %5 + %6 = phi i64 [ %25, %5 ], [ 0, %2 ] + %7 = phi i64 [ %24, %5 ], [ 0, %2 ] + %8 = getelementptr inbounds i8, i8* %0, i64 %6 + %9 = getelementptr inbounds i8, i8* %8, i64 4001 + %10 = bitcast i8* %9 to i64* + %11 = load i64, i64* %10, align 8 + %12 = getelementptr inbounds i8, i8* %8, i64 4002 + %13 = bitcast i8* %12 to i64* + %14 = load i64, i64* %13, align 8 + %15 = getelementptr inbounds i8, i8* %8, i64 4003 + %16 = bitcast i8* %15 to i64* + %17 = load i64, i64* %16, align 8 + %18 = getelementptr inbounds i8, i8* %8, i64 4004 + %19 = bitcast i8* %18 to i64* + %20 = load i64, i64* %19, align 8 + %21 = mul i64 %14, %11 + %22 = mul i64 %21, %17 + %23 = mul i64 %22, %20 + %24 = add i64 %23, %7 + %25 = add nuw i64 %6, 1 + %26 = icmp ult i64 %25, %3 + br i1 %26, label %5, label %27 + +27: ; preds = %5, %2 + %28 = phi i64 [ 0, %2 ], [ %24, %5 ] + %29 = add i64 %28, %3 + ret i64 %29 +} + +; test_ds_prep: +; unsigned long test_ds_prep(char *p, int count) { +; unsigned long i=0, res=0; +; int DISP1 = 4001; +; int DISP2 = 4002; +; int DISP3 = 4003; +; int DISP4 = 4006; +; for (; i < count ; i++) { +; unsigned long x1 = *(unsigned long *)(p + i + DISP1); +; unsigned long x2 = *(unsigned long *)(p + i + DISP2); +; unsigned long x3 = *(unsigned long *)(p + i + DISP3); +; unsigned long x4 = *(unsigned long *)(p + i + DISP4); +; res += x1*x2*x3*x4; +; } +; return res + count; +; } + +define i64 @test_ds_prep(i8* %0, i32 signext %1) { +; CHECK-LABEL: test_ds_prep: +; CHECK: addi r6, r3, 4001 +; CHECK: .LBB1_2: # +; CHECK-NEXT: ld r10, 0(r6) +; CHECK-NEXT: ldx r11, r6, r5 +; CHECK-NEXT: mulld r10, r11, r10 +; CHECK-NEXT: ldx r12, r6, r7 +; CHECK-NEXT: mulld r10, r10, r12 +; CHECK-NEXT: addi r9, r6, 1 +; CHECK-NEXT: ldx r6, r6, r8 +; CHECK-NEXT: maddld r3, r10, r6, r3 +; CHECK-NEXT: mr r6, r9 +; CHECK-NEXT: bdnz .LBB1_2 + %3 = sext i32 %1 to i64 + %4 = icmp eq i32 %1, 0 + br i1 %4, label %27, label %5 + +5: ; preds = %2, %5 + %6 = phi i64 [ %25, %5 ], [ 0, %2 ] + %7 = phi i64 [ %24, %5 ], [ 0, %2 ] + %8 = getelementptr inbounds i8, i8* %0, i64 %6 + %9 = getelementptr inbounds i8, i8* %8, i64 4001 + %10 = bitcast i8* %9 to i64* + %11 = load i64, i64* %10, align 8 + %12 = getelementptr inbounds i8, i8* %8, i64 4002 + %13 = bitcast i8* %12 to i64* + %14 = load i64, i64* %13, align 8 + %15 = getelementptr inbounds i8, i8* %8, i64 4003 + %16 = bitcast i8* %15 to i64* + %17 = load i64, i64* %16, align 8 + %18 = getelementptr inbounds i8, i8* %8, i64 4006 + %19 = bitcast i8* %18 to i64* + %20 = load i64, i64* %19, align 8 + %21 = mul i64 %14, %11 + %22 = mul i64 %21, %17 + %23 = mul i64 %22, %20 + %24 = add i64 %23, %7 + %25 = add nuw i64 %6, 1 + %26 = icmp ult i64 %25, %3 + br i1 %26, label %5, label %27 + +27: ; preds = %5, %2 + %28 = phi i64 [ 0, %2 ], [ %24, %5 ] + %29 = add i64 %28, %3 + ret i64 %29 +} + +; test_max_number_reminder: +; unsigned long test_max_number_reminder(char *p, int count) { +; unsigned long i=0, res=0; +; int DISP1 = 4001; +; int DISP2 = 4002; +; int DISP3 = 4003; +; int DISP4 = 4005; +; int DISP5 = 4006; +; int DISP6 = 4007; +; int DISP7 = 4014; +; int DISP8 = 4010; +; int DISP9 = 4011; +; for (; i < count ; i++) { +; unsigned long x1 = *(unsigned long *)(p + i + DISP1); +; unsigned long x2 = *(unsigned long *)(p + i + DISP2); +; unsigned long x3 = *(unsigned long *)(p + i + DISP3); +; unsigned long x4 = *(unsigned long *)(p + i + DISP4); +; unsigned long x5 = *(unsigned long *)(p + i + DISP5); +; unsigned long x6 = *(unsigned long *)(p + i + DISP6); +; unsigned long x7 = *(unsigned long *)(p + i + DISP7); +; unsigned long x8 = *(unsigned long *)(p + i + DISP8); +; unsigned long x9 = *(unsigned long *)(p + i + DISP9); +; res += x1*x2*x3*x4*x5*x6*x7*x8*x9; +; } +; return res + count; +;} + +define i64 @test_max_number_reminder(i8* %0, i32 signext %1) { +; CHECK-LABEL: test_max_number_reminder: +; CHECK: addi r8, r3, 4001 +; CHECK: .LBB2_2: # +; CHECK-NEXT: ld r30, 0(r8) +; CHECK-NEXT: ldx r29, r8, r5 +; CHECK-NEXT: mulld r30, r29, r30 +; CHECK-NEXT: addi r0, r8, 1 +; CHECK-NEXT: ld r28, 4(r8) +; CHECK-NEXT: ldx r27, r8, r7 +; CHECK-NEXT: ldx r26, r8, r9 +; CHECK-NEXT: ldx r25, r8, r10 +; CHECK-NEXT: ldx r24, r8, r11 +; CHECK-NEXT: ldx r23, r8, r12 +; CHECK-NEXT: ldx r8, r8, r6 +; CHECK-NEXT: mulld r8, r30, r8 +; CHECK-NEXT: mulld r8, r8, r28 +; CHECK-NEXT: mulld r8, r8, r27 +; CHECK-NEXT: mulld r8, r8, r26 +; CHECK-NEXT: mulld r8, r8, r25 +; CHECK-NEXT: mulld r8, r8, r24 +; CHECK-NEXT: maddld r3, r8, r23, r3 +; CHECK-NEXT: mr r8, r0 +; CHECK-NEXT: bdnz .LBB2_2 + %3 = sext i32 %1 to i64 + %4 = icmp eq i32 %1, 0 + br i1 %4, label %47, label %5 + +5: ; preds = %2, %5 + %6 = phi i64 [ %45, %5 ], [ 0, %2 ] + %7 = phi i64 [ %44, %5 ], [ 0, %2 ] + %8 = getelementptr inbounds i8, i8* %0, i64 %6 + %9 = getelementptr inbounds i8, i8* %8, i64 4001 + %10 = bitcast i8* %9 to i64* + %11 = load i64, i64* %10, align 8 + %12 = getelementptr inbounds i8, i8* %8, i64 4002 + %13 = bitcast i8* %12 to i64* + %14 = load i64, i64* %13, align 8 + %15 = getelementptr inbounds i8, i8* %8, i64 4003 + %16 = bitcast i8* %15 to i64* + %17 = load i64, i64* %16, align 8 + %18 = getelementptr inbounds i8, i8* %8, i64 4005 + %19 = bitcast i8* %18 to i64* + %20 = load i64, i64* %19, align 8 + %21 = getelementptr inbounds i8, i8* %8, i64 4006 + %22 = bitcast i8* %21 to i64* + %23 = load i64, i64* %22, align 8 + %24 = getelementptr inbounds i8, i8* %8, i64 4007 + %25 = bitcast i8* %24 to i64* + %26 = load i64, i64* %25, align 8 + %27 = getelementptr inbounds i8, i8* %8, i64 4014 + %28 = bitcast i8* %27 to i64* + %29 = load i64, i64* %28, align 8 + %30 = getelementptr inbounds i8, i8* %8, i64 4010 + %31 = bitcast i8* %30 to i64* + %32 = load i64, i64* %31, align 8 + %33 = getelementptr inbounds i8, i8* %8, i64 4011 + %34 = bitcast i8* %33 to i64* + %35 = load i64, i64* %34, align 8 + %36 = mul i64 %14, %11 + %37 = mul i64 %36, %17 + %38 = mul i64 %37, %20 + %39 = mul i64 %38, %23 + %40 = mul i64 %39, %26 + %41 = mul i64 %40, %29 + %42 = mul i64 %41, %32 + %43 = mul i64 %42, %35 + %44 = add i64 %43, %7 + %45 = add nuw i64 %6, 1 + %46 = icmp ult i64 %45, %3 + br i1 %46, label %5, label %47 + +47: ; preds = %5, %2 + %48 = phi i64 [ 0, %2 ], [ %44, %5 ] + %49 = add i64 %48, %3 + ret i64 %49 +} + +; test_update_ds_prep_interact: +; unsigned long test_update_ds_prep_interact(char *p, int count) { +; unsigned long i=0, res=0; +; int DISP1 = 4001; +; int DISP2 = 4002; +; int DISP3 = 4003; +; int DISP4 = 4006; +; for (; i < count ; i++) { +; unsigned long x1 = *(unsigned long *)(p + 4 * i + DISP1); +; unsigned long x2 = *(unsigned long *)(p + 4 * i + DISP2); +; unsigned long x3 = *(unsigned long *)(p + 4 * i + DISP3); +; unsigned long x4 = *(unsigned long *)(p + 4 * i + DISP4); +; res += x1*x2*x3*x4; +; } +; return res + count; +; } + +define dso_local i64 @test_update_ds_prep_interact(i8* %0, i32 signext %1) { +; CHECK-LABEL: test_update_ds_prep_interact: +; CHECK: addi r3, r3, 3997 +; CHECK: .LBB3_2: # +; CHECK-NEXT: ldu r9, 4(r3) +; CHECK-NEXT: ldx r10, r3, r6 +; CHECK-NEXT: mulld r9, r10, r9 +; CHECK-NEXT: ldx r11, r3, r7 +; CHECK-NEXT: mulld r9, r9, r11 +; CHECK-NEXT: ldx r12, r3, r8 +; CHECK-NEXT: maddld r5, r9, r12, r5 +; CHECK-NEXT: bdnz .LBB3_2 + %3 = sext i32 %1 to i64 + %4 = icmp eq i32 %1, 0 + br i1 %4, label %28, label %5 + +5: ; preds = %2, %5 + %6 = phi i64 [ %26, %5 ], [ 0, %2 ] + %7 = phi i64 [ %25, %5 ], [ 0, %2 ] + %8 = shl i64 %6, 2 + %9 = getelementptr inbounds i8, i8* %0, i64 %8 + %10 = getelementptr inbounds i8, i8* %9, i64 4001 + %11 = bitcast i8* %10 to i64* + %12 = load i64, i64* %11, align 8 + %13 = getelementptr inbounds i8, i8* %9, i64 4002 + %14 = bitcast i8* %13 to i64* + %15 = load i64, i64* %14, align 8 + %16 = getelementptr inbounds i8, i8* %9, i64 4003 + %17 = bitcast i8* %16 to i64* + %18 = load i64, i64* %17, align 8 + %19 = getelementptr inbounds i8, i8* %9, i64 4006 + %20 = bitcast i8* %19 to i64* + %21 = load i64, i64* %20, align 8 + %22 = mul i64 %15, %12 + %23 = mul i64 %22, %18 + %24 = mul i64 %23, %21 + %25 = add i64 %24, %7 + %26 = add nuw i64 %6, 1 + %27 = icmp ult i64 %26, %3 + br i1 %27, label %5, label %28 + +28: ; preds = %5, %2 + %29 = phi i64 [ 0, %2 ], [ %25, %5 ] + %30 = add i64 %29, %3 + ret i64 %30 +} + +; test_update_ds_prep_nointeract: +; unsigned long test_update_ds_prep_nointeract(char *p, int count) { +; unsigned long i=0, res=0; +; int DISP1 = 4001; +; int DISP2 = 4002; +; int DISP3 = 4003; +; int DISP4 = 4007; +; for (; i < count ; i++) { +; char x1 = *(p + i + DISP1); +; unsigned long x2 = *(unsigned long *)(p + i + DISP2); +; unsigned long x3 = *(unsigned long *)(p + i + DISP3); +; unsigned long x4 = *(unsigned long *)(p + i + DISP4); +; res += (unsigned long)x1*x2*x3*x4; +; } +; return res + count; +; } + +define i64 @test_update_ds_prep_nointeract(i8* %0, i32 signext %1) { +; CHECK-LABEL: test_update_ds_prep_nointeract: +; CHECK: addi r3, r3, 4000 +; CHECK: .LBB4_2: # +; CHECK-NEXT: lbzu r9, 1(r3) +; CHECK-NEXT: ldx r10, r3, r6 +; CHECK-NEXT: mulld r9, r10, r9 +; CHECK-NEXT: ldx r11, r3, r7 +; CHECK-NEXT: mulld r9, r9, r11 +; CHECK-NEXT: ldx r12, r3, r8 +; CHECK-NEXT: maddld r5, r9, r12, r5 +; CHECK-NEXT: bdnz .LBB4_2 + %3 = sext i32 %1 to i64 + %4 = icmp eq i32 %1, 0 + br i1 %4, label %27, label %5 + +5: ; preds = %2, %5 + %6 = phi i64 [ %25, %5 ], [ 0, %2 ] + %7 = phi i64 [ %24, %5 ], [ 0, %2 ] + %8 = getelementptr inbounds i8, i8* %0, i64 %6 + %9 = getelementptr inbounds i8, i8* %8, i64 4001 + %10 = load i8, i8* %9, align 1 + %11 = getelementptr inbounds i8, i8* %8, i64 4002 + %12 = bitcast i8* %11 to i64* + %13 = load i64, i64* %12, align 8 + %14 = getelementptr inbounds i8, i8* %8, i64 4003 + %15 = bitcast i8* %14 to i64* + %16 = load i64, i64* %15, align 8 + %17 = getelementptr inbounds i8, i8* %8, i64 4007 + %18 = bitcast i8* %17 to i64* + %19 = load i64, i64* %18, align 8 + %20 = zext i8 %10 to i64 + %21 = mul i64 %13, %20 + %22 = mul i64 %21, %16 + %23 = mul i64 %22, %19 + %24 = add i64 %23, %7 + %25 = add nuw i64 %6, 1 + %26 = icmp ult i64 %25, %3 + br i1 %26, label %5, label %27 + +27: ; preds = %5, %2 + %28 = phi i64 [ 0, %2 ], [ %24, %5 ] + %29 = add i64 %28, %3 + ret i64 %29 +} + +; test_ds_multiple_chains: +; unsigned long test_ds_multiple_chains(char *p, char *q, int count) { +; unsigned long i=0, res=0; +; int DISP1 = 4001; +; int DISP2 = 4010; +; int DISP3 = 4005; +; int DISP4 = 4009; +; for (; i < count ; i++) { +; unsigned long x1 = *(unsigned long *)(p + i + DISP1); +; unsigned long x2 = *(unsigned long *)(p + i + DISP2); +; unsigned long x3 = *(unsigned long *)(p + i + DISP3); +; unsigned long x4 = *(unsigned long *)(p + i + DISP4); +; unsigned long x5 = *(unsigned long *)(q + i + DISP1); +; unsigned long x6 = *(unsigned long *)(q + i + DISP2); +; unsigned long x7 = *(unsigned long *)(q + i + DISP3); +; unsigned long x8 = *(unsigned long *)(q + i + DISP4); +; res += x1*x2*x3*x4*x5*x6*x7*x8; +; } +; return res + count; +; } + +define dso_local i64 @test_ds_multiple_chains(i8* %0, i8* %1, i32 signext %2) { +; CHECK-LABEL: test_ds_multiple_chains: +; CHECK: addi r3, r3, 4010 +; CHECK: addi r4, r4, 4010 +; CHECK: .LBB5_2: # +; CHECK-NEXT: ldx r10, r3, r7 +; CHECK-NEXT: ld r11, 0(r3) +; CHECK-NEXT: mulld r10, r11, r10 +; CHECK-NEXT: ldx r11, r3, r8 +; CHECK-NEXT: mulld r10, r10, r11 +; CHECK-NEXT: ldx r12, r3, r9 +; CHECK-NEXT: addi r3, r3, 1 +; CHECK-NEXT: mulld r10, r10, r12 +; CHECK-NEXT: ldx r0, r4, r7 +; CHECK-NEXT: mulld r10, r10, r0 +; CHECK-NEXT: ld r30, 0(r4) +; CHECK-NEXT: mulld r10, r10, r30 +; CHECK-NEXT: ldx r29, r4, r8 +; CHECK-NEXT: mulld r10, r10, r29 +; CHECK-NEXT: ldx r28, r4, r9 +; CHECK-NEXT: addi r4, r4, 1 +; CHECK-NEXT: maddld r6, r10, r28, r6 +; CHECK-NEXT: bdnz .LBB5_2 + %4 = sext i32 %2 to i64 + %5 = icmp eq i32 %2, 0 + br i1 %5, label %45, label %6 + +6: ; preds = %3, %6 + %7 = phi i64 [ %43, %6 ], [ 0, %3 ] + %8 = phi i64 [ %42, %6 ], [ 0, %3 ] + %9 = getelementptr inbounds i8, i8* %0, i64 %7 + %10 = getelementptr inbounds i8, i8* %9, i64 4001 + %11 = bitcast i8* %10 to i64* + %12 = load i64, i64* %11, align 8 + %13 = getelementptr inbounds i8, i8* %9, i64 4010 + %14 = bitcast i8* %13 to i64* + %15 = load i64, i64* %14, align 8 + %16 = getelementptr inbounds i8, i8* %9, i64 4005 + %17 = bitcast i8* %16 to i64* + %18 = load i64, i64* %17, align 8 + %19 = getelementptr inbounds i8, i8* %9, i64 4009 + %20 = bitcast i8* %19 to i64* + %21 = load i64, i64* %20, align 8 + %22 = getelementptr inbounds i8, i8* %1, i64 %7 + %23 = getelementptr inbounds i8, i8* %22, i64 4001 + %24 = bitcast i8* %23 to i64* + %25 = load i64, i64* %24, align 8 + %26 = getelementptr inbounds i8, i8* %22, i64 4010 + %27 = bitcast i8* %26 to i64* + %28 = load i64, i64* %27, align 8 + %29 = getelementptr inbounds i8, i8* %22, i64 4005 + %30 = bitcast i8* %29 to i64* + %31 = load i64, i64* %30, align 8 + %32 = getelementptr inbounds i8, i8* %22, i64 4009 + %33 = bitcast i8* %32 to i64* + %34 = load i64, i64* %33, align 8 + %35 = mul i64 %15, %12 + %36 = mul i64 %35, %18 + %37 = mul i64 %36, %21 + %38 = mul i64 %37, %25 + %39 = mul i64 %38, %28 + %40 = mul i64 %39, %31 + %41 = mul i64 %40, %34 + %42 = add i64 %41, %8 + %43 = add nuw i64 %7, 1 + %44 = icmp ult i64 %43, %4 + br i1 %44, label %6, label %45 + +45: ; preds = %6, %3 + %46 = phi i64 [ 0, %3 ], [ %42, %6 ] + %47 = add i64 %46, %4 + ret i64 %47 +} + +; test_ds_cross_basic_blocks: +;extern char *arr; +;unsigned long foo(char *p, int count) +;{ +; unsigned long i=0, res=0; +; int DISP1 = 4000; +; int DISP2 = 4001; +; int DISP3 = 4002; +; int DISP4 = 4003; +; int DISP5 = 4005; +; int DISP6 = 4009; +; unsigned long x1, x2, x3, x4, x5, x6; +; x1=x2=x3=x4=x5=x6=1; +; for (; i < count ; i++) { +; if (arr[i] % 3 == 1) { +; x1 += *(unsigned long *)(p + i + DISP1); +; x2 += *(unsigned long *)(p + i + DISP2); +; } +; else if (arr[i] % 3 == 2) { +; x3 += *(unsigned long *)(p + i + DISP3); +; x4 += *(unsigned long *)(p + i + DISP5); +; } +; else { +; x5 += *(unsigned long *)(p + i + DISP4); +; x6 += *(unsigned long *)(p + i + DISP6); +; } +; res += x1*x2*x3*x4*x5*x6; +; } +; return res; +;} + +@arr = external local_unnamed_addr global i8*, align 8 + +define i64 @test_ds_cross_basic_blocks(i8* %0, i32 signext %1) { +; CHECK-LABEL: test_ds_cross_basic_blocks: +; CHECK: addi r5, r3, 4000 +; CHECK: .LBB6_2: # +; CHECK-NEXT: ld r0, 0(r5) +; CHECK-NEXT: add r26, r0, r26 +; CHECK-NEXT: ldx r0, r5, r7 +; CHECK-NEXT: add r27, r0, r27 +; CHECK-NEXT: .LBB6_3: # +; CHECK-NEXT: mulld r0, r27, r26 +; CHECK-NEXT: mulld r0, r0, r28 +; CHECK-NEXT: mulld r0, r0, r29 +; CHECK-NEXT: mulld r0, r0, r30 +; CHECK-NEXT: maddld r3, r0, r12, r3 +; CHECK-NEXT: addi r5, r5, 1 +; CHECK-NEXT: bdz .LBB6_9 +; CHECK-NEXT: .LBB6_4: # +; CHECK-NEXT: lbzu r0, 1(r6) +; CHECK-NEXT: clrldi r25, r0, 32 +; CHECK-NEXT: mulld r25, r25, r4 +; CHECK-NEXT: rldicl r25, r25, 31, 33 +; CHECK-NEXT: slwi r24, r25, 1 +; CHECK-NEXT: add r25, r25, r24 +; CHECK-NEXT: subf r0, r25, r0 +; CHECK-NEXT: cmplwi r0, 1 +; CHECK-NEXT: beq cr0, .LBB6_2 +; CHECK-NEXT: # %bb.5: # +; CHECK-NEXT: clrlwi r0, r0, 24 +; CHECK-NEXT: cmplwi r0, 2 +; CHECK-NEXT: bne cr0, .LBB6_7 +; CHECK-NEXT: # %bb.6: # +; CHECK-NEXT: ldx r0, r5, r8 +; CHECK-NEXT: add r28, r0, r28 +; CHECK-NEXT: ldx r0, r5, r9 +; CHECK-NEXT: add r29, r0, r29 +; CHECK-NEXT: b .LBB6_3 +; CHECK-NEXT: .p2align 4 +; CHECK-NEXT: .LBB6_7: # +; CHECK-NEXT: ldx r0, r5, r10 +; CHECK-NEXT: add r30, r0, r30 +; CHECK-NEXT: ldx r0, r5, r11 +; CHECK-NEXT: add r12, r0, r12 + %3 = sext i32 %1 to i64 + %4 = icmp eq i32 %1, 0 + br i1 %4, label %66, label %5 + +5: ; preds = %2 + %6 = load i8*, i8** @arr, align 8 + br label %7 + +7: ; preds = %5, %51 + %8 = phi i64 [ 1, %5 ], [ %57, %51 ] + %9 = phi i64 [ 1, %5 ], [ %56, %51 ] + %10 = phi i64 [ 1, %5 ], [ %55, %51 ] + %11 = phi i64 [ 1, %5 ], [ %54, %51 ] + %12 = phi i64 [ 1, %5 ], [ %53, %51 ] + %13 = phi i64 [ 1, %5 ], [ %52, %51 ] + %14 = phi i64 [ 0, %5 ], [ %64, %51 ] + %15 = phi i64 [ 0, %5 ], [ %63, %51 ] + %16 = getelementptr inbounds i8, i8* %6, i64 %14 + %17 = load i8, i8* %16, align 1 + %18 = urem i8 %17, 3 + %19 = icmp eq i8 %18, 1 + br i1 %19, label %20, label %30 + +20: ; preds = %7 + %21 = getelementptr inbounds i8, i8* %0, i64 %14 + %22 = getelementptr inbounds i8, i8* %21, i64 4000 + %23 = bitcast i8* %22 to i64* + %24 = load i64, i64* %23, align 8 + %25 = add i64 %24, %13 + %26 = getelementptr inbounds i8, i8* %21, i64 4001 + %27 = bitcast i8* %26 to i64* + %28 = load i64, i64* %27, align 8 + %29 = add i64 %28, %12 + br label %51 + +30: ; preds = %7 + %31 = icmp eq i8 %18, 2 + %32 = getelementptr inbounds i8, i8* %0, i64 %14 + br i1 %31, label %33, label %42 + +33: ; preds = %30 + %34 = getelementptr inbounds i8, i8* %32, i64 4002 + %35 = bitcast i8* %34 to i64* + %36 = load i64, i64* %35, align 8 + %37 = add i64 %36, %11 + %38 = getelementptr inbounds i8, i8* %32, i64 4005 + %39 = bitcast i8* %38 to i64* + %40 = load i64, i64* %39, align 8 + %41 = add i64 %40, %10 + br label %51 + +42: ; preds = %30 + %43 = getelementptr inbounds i8, i8* %32, i64 4003 + %44 = bitcast i8* %43 to i64* + %45 = load i64, i64* %44, align 8 + %46 = add i64 %45, %9 + %47 = getelementptr inbounds i8, i8* %32, i64 4009 + %48 = bitcast i8* %47 to i64* + %49 = load i64, i64* %48, align 8 + %50 = add i64 %49, %8 + br label %51 + +51: ; preds = %33, %42, %20 + %52 = phi i64 [ %25, %20 ], [ %13, %33 ], [ %13, %42 ] + %53 = phi i64 [ %29, %20 ], [ %12, %33 ], [ %12, %42 ] + %54 = phi i64 [ %11, %20 ], [ %37, %33 ], [ %11, %42 ] + %55 = phi i64 [ %10, %20 ], [ %41, %33 ], [ %10, %42 ] + %56 = phi i64 [ %9, %20 ], [ %9, %33 ], [ %46, %42 ] + %57 = phi i64 [ %8, %20 ], [ %8, %33 ], [ %50, %42 ] + %58 = mul i64 %53, %52 + %59 = mul i64 %58, %54 + %60 = mul i64 %59, %55 + %61 = mul i64 %60, %56 + %62 = mul i64 %61, %57 + %63 = add i64 %62, %15 + %64 = add nuw i64 %14, 1 + %65 = icmp ult i64 %64, %3 + br i1 %65, label %7, label %66 + +66: ; preds = %51, %2 + %67 = phi i64 [ 0, %2 ], [ %63, %51 ] + ret i64 %67 +} + +; test_ds_float: +;float test_ds_float(char *p, int count) { +; int i=0 ; +; float res=0; +; int DISP1 = 4001; +; int DISP2 = 4002; +; int DISP3 = 4022; +; int DISP4 = 4062; +; for (; i < count ; i++) { +; float x1 = *(float *)(p + i + DISP1); +; float x2 = *(float *)(p + i + DISP2); +; float x3 = *(float *)(p + i + DISP3); +; float x4 = *(float *)(p + i + DISP4); +; res += x1*x2*x3*x4; +; } +; return res; +;} + +define float @test_ds_float(i8* %0, i32 signext %1) { +; CHECK-LABEL: test_ds_float: +; CHECK: addi r3, r3, 4000 +; CHECK: .LBB7_2: # +; CHECK-NEXT: lfsu f0, 1(r3) +; CHECK-NEXT: lfsx f2, r3, r4 +; CHECK-NEXT: lfsx f3, r3, r5 +; CHECK-NEXT: xsmulsp f0, f0, f2 +; CHECK-NEXT: lfsx f4, r3, r6 +; CHECK-NEXT: xsmulsp f0, f0, f3 +; CHECK-NEXT: xsmulsp f0, f0, f4 +; CHECK-NEXT: xsaddsp f1, f1, f0 +; CHECK-NEXT: bdnz .LBB7_2 + %3 = icmp sgt i32 %1, 0 + br i1 %3, label %4, label %28 + +4: ; preds = %2 + %5 = zext i32 %1 to i64 + br label %6 + +6: ; preds = %6, %4 + %7 = phi i64 [ 0, %4 ], [ %26, %6 ] + %8 = phi float [ 0.000000e+00, %4 ], [ %25, %6 ] + %9 = getelementptr inbounds i8, i8* %0, i64 %7 + %10 = getelementptr inbounds i8, i8* %9, i64 4001 + %11 = bitcast i8* %10 to float* + %12 = load float, float* %11, align 4 + %13 = getelementptr inbounds i8, i8* %9, i64 4002 + %14 = bitcast i8* %13 to float* + %15 = load float, float* %14, align 4 + %16 = getelementptr inbounds i8, i8* %9, i64 4022 + %17 = bitcast i8* %16 to float* + %18 = load float, float* %17, align 4 + %19 = getelementptr inbounds i8, i8* %9, i64 4062 + %20 = bitcast i8* %19 to float* + %21 = load float, float* %20, align 4 + %22 = fmul float %12, %15 + %23 = fmul float %22, %18 + %24 = fmul float %23, %21 + %25 = fadd float %8, %24 + %26 = add nuw nsw i64 %7, 1 + %27 = icmp eq i64 %26, %5 + br i1 %27, label %28, label %6 + +28: ; preds = %6, %2 + %29 = phi float [ 0.000000e+00, %2 ], [ %25, %6 ] + ret float %29 +} + +; test_ds_combine_float_int: +;float test_ds_combine_float_int(char *p, int count) { +; int i=0 ; +; float res=0; +; int DISP1 = 4001; +; int DISP2 = 4002; +; int DISP3 = 4022; +; int DISP4 = 4062; +; for (; i < count ; i++) { +; float x1 = *(float *)(p + i + DISP1); +; unsigned long x2 = *(unsigned long*)(p + i + DISP2); +; float x3 = *(float *)(p + i + DISP3); +; float x4 = *(float *)(p + i + DISP4); +; res += x1*x2*x3*x4; +; } +; return res; +;} + +define float @test_ds_combine_float_int(i8* %0, i32 signext %1) { +; CHECK-LABEL: test_ds_combine_float_int: +; CHECK: addi r4, r3, 4001 +; CHECK: addi r3, r3, 4000 +; CHECK: .LBB8_2: # +; CHECK-NEXT: lfdu f4, 1(r4) +; CHECK-NEXT: lfsu f0, 1(r3) +; CHECK-NEXT: xscvuxdsp f4, f4 +; CHECK-NEXT: lfsx f2, r3, r5 +; CHECK-NEXT: lfsx f3, r3, r6 +; CHECK-NEXT: xsmulsp f0, f0, f4 +; CHECK-NEXT: xsmulsp f0, f2, f0 +; CHECK-NEXT: xsmulsp f0, f3, f0 +; CHECK-NEXT: xsaddsp f1, f1, f0 +; CHECK-NEXT: bdnz .LBB8_2 + %3 = icmp sgt i32 %1, 0 + br i1 %3, label %4, label %29 + +4: ; preds = %2 + %5 = zext i32 %1 to i64 + br label %6 + +6: ; preds = %6, %4 + %7 = phi i64 [ 0, %4 ], [ %27, %6 ] + %8 = phi float [ 0.000000e+00, %4 ], [ %26, %6 ] + %9 = getelementptr inbounds i8, i8* %0, i64 %7 + %10 = getelementptr inbounds i8, i8* %9, i64 4001 + %11 = bitcast i8* %10 to float* + %12 = load float, float* %11, align 4 + %13 = getelementptr inbounds i8, i8* %9, i64 4002 + %14 = bitcast i8* %13 to i64* + %15 = load i64, i64* %14, align 8 + %16 = getelementptr inbounds i8, i8* %9, i64 4022 + %17 = bitcast i8* %16 to float* + %18 = load float, float* %17, align 4 + %19 = getelementptr inbounds i8, i8* %9, i64 4062 + %20 = bitcast i8* %19 to float* + %21 = load float, float* %20, align 4 + %22 = uitofp i64 %15 to float + %23 = fmul float %12, %22 + %24 = fmul float %18, %23 + %25 = fmul float %21, %24 + %26 = fadd float %8, %25 + %27 = add nuw nsw i64 %7, 1 + %28 = icmp eq i64 %27, %5 + br i1 %28, label %29, label %6 + +29: ; preds = %6, %2 + %30 = phi float [ 0.000000e+00, %2 ], [ %26, %6 ] + ret float %30 +} diff --git a/llvm/test/CodeGen/PowerPC/remove-redundant-load-imm.mir b/llvm/test/CodeGen/PowerPC/remove-redundant-load-imm.mir index 02db2caa1e7a2..1dd37955452f0 100644 --- a/llvm/test/CodeGen/PowerPC/remove-redundant-load-imm.mir +++ b/llvm/test/CodeGen/PowerPC/remove-redundant-load-imm.mir @@ -346,3 +346,25 @@ body: | BLR8 implicit $lr8, implicit $rm ... +--- +name: overwrite_reg_before_killed +alignment: 16 +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0.entry: + liveins: $x1 + + ; CHECK-LABEL: name: overwrite_reg_before_killed + ; CHECK: liveins: $x1 + ; CHECK: renamable $x3 = LI8 0 + ; CHECK: STD renamable $x3, 16, $x1 + ; CHECK: STD killed renamable $x3, 8, $x1 + ; CHECK: BLR8 implicit $lr8, implicit $rm + renamable $x3 = LI8 0 + STD renamable $x3, 16, $x1 + renamable $x3 = LI8 0 + STD killed renamable $x3, 8, $x1 + BLR8 implicit $lr8, implicit $rm + +... diff --git a/llvm/test/CodeGen/Thumb2/mve-saturating-arith.ll b/llvm/test/CodeGen/Thumb2/mve-saturating-arith.ll index f6a9af42f6af6..3610c2a656532 100644 --- a/llvm/test/CodeGen/Thumb2/mve-saturating-arith.ll +++ b/llvm/test/CodeGen/Thumb2/mve-saturating-arith.ll @@ -4,21 +4,7 @@ define arm_aapcs_vfpcc <16 x i8> @sadd_int8_t(<16 x i8> %src1, <16 x i8> %src2) { ; CHECK-LABEL: sadd_int8_t: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .vsave {d8, d9} -; CHECK-NEXT: vpush {d8, d9} -; CHECK-NEXT: vadd.i8 q2, q0, q1 -; CHECK-NEXT: vmov.i8 q3, #0x80 -; CHECK-NEXT: vcmp.s8 lt, q2, zr -; CHECK-NEXT: vmov.i8 q4, #0x7f -; CHECK-NEXT: vpsel q3, q4, q3 -; CHECK-NEXT: vcmp.s8 gt, q0, q2 -; CHECK-NEXT: vmrs r0, p0 -; CHECK-NEXT: vcmp.s8 lt, q1, zr -; CHECK-NEXT: vmrs r1, p0 -; CHECK-NEXT: eors r0, r1 -; CHECK-NEXT: vmsr p0, r0 -; CHECK-NEXT: vpsel q0, q3, q2 -; CHECK-NEXT: vpop {d8, d9} +; CHECK-NEXT: vqadd.s8 q0, q0, q1 ; CHECK-NEXT: bx lr entry: %0 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> %src1, <16 x i8> %src2) @@ -28,21 +14,7 @@ entry: define arm_aapcs_vfpcc <8 x i16> @sadd_int16_t(<8 x i16> %src1, <8 x i16> %src2) { ; CHECK-LABEL: sadd_int16_t: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .vsave {d8, d9} -; CHECK-NEXT: vpush {d8, d9} -; CHECK-NEXT: vadd.i16 q2, q0, q1 -; CHECK-NEXT: vmov.i16 q3, #0x8000 -; CHECK-NEXT: vcmp.s16 lt, q2, zr -; CHECK-NEXT: vmvn.i16 q4, #0x8000 -; CHECK-NEXT: vpsel q3, q4, q3 -; CHECK-NEXT: vcmp.s16 gt, q0, q2 -; CHECK-NEXT: vmrs r0, p0 -; CHECK-NEXT: vcmp.s16 lt, q1, zr -; CHECK-NEXT: vmrs r1, p0 -; CHECK-NEXT: eors r0, r1 -; CHECK-NEXT: vmsr p0, r0 -; CHECK-NEXT: vpsel q0, q3, q2 -; CHECK-NEXT: vpop {d8, d9} +; CHECK-NEXT: vqadd.s16 q0, q0, q1 ; CHECK-NEXT: bx lr entry: %0 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %src1, <8 x i16> %src2) @@ -52,21 +24,7 @@ entry: define arm_aapcs_vfpcc <4 x i32> @sadd_int32_t(<4 x i32> %src1, <4 x i32> %src2) { ; CHECK-LABEL: sadd_int32_t: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .vsave {d8, d9} -; CHECK-NEXT: vpush {d8, d9} -; CHECK-NEXT: vadd.i32 q2, q0, q1 -; CHECK-NEXT: vmov.i32 q3, #0x80000000 -; CHECK-NEXT: vcmp.s32 lt, q2, zr -; CHECK-NEXT: vmvn.i32 q4, #0x80000000 -; CHECK-NEXT: vpsel q3, q4, q3 -; CHECK-NEXT: vcmp.s32 gt, q0, q2 -; CHECK-NEXT: vmrs r0, p0 -; CHECK-NEXT: vcmp.s32 lt, q1, zr -; CHECK-NEXT: vmrs r1, p0 -; CHECK-NEXT: eors r0, r1 -; CHECK-NEXT: vmsr p0, r0 -; CHECK-NEXT: vpsel q0, q3, q2 -; CHECK-NEXT: vpop {d8, d9} +; CHECK-NEXT: vqadd.s32 q0, q0, q1 ; CHECK-NEXT: bx lr entry: %0 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %src1, <4 x i32> %src2) @@ -156,9 +114,7 @@ entry: define arm_aapcs_vfpcc <16 x i8> @uadd_int8_t(<16 x i8> %src1, <16 x i8> %src2) { ; CHECK-LABEL: uadd_int8_t: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmvn q2, q1 -; CHECK-NEXT: vmin.u8 q0, q0, q2 -; CHECK-NEXT: vadd.i8 q0, q0, q1 +; CHECK-NEXT: vqadd.u8 q0, q0, q1 ; CHECK-NEXT: bx lr entry: %0 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> %src1, <16 x i8> %src2) @@ -168,9 +124,7 @@ entry: define arm_aapcs_vfpcc <8 x i16> @uadd_int16_t(<8 x i16> %src1, <8 x i16> %src2) { ; CHECK-LABEL: uadd_int16_t: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmvn q2, q1 -; CHECK-NEXT: vmin.u16 q0, q0, q2 -; CHECK-NEXT: vadd.i16 q0, q0, q1 +; CHECK-NEXT: vqadd.u16 q0, q0, q1 ; CHECK-NEXT: bx lr entry: %0 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> %src1, <8 x i16> %src2) @@ -180,9 +134,7 @@ entry: define arm_aapcs_vfpcc <4 x i32> @uadd_int32_t(<4 x i32> %src1, <4 x i32> %src2) { ; CHECK-LABEL: uadd_int32_t: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmvn q2, q1 -; CHECK-NEXT: vmin.u32 q0, q0, q2 -; CHECK-NEXT: vadd.i32 q0, q0, q1 +; CHECK-NEXT: vqadd.u32 q0, q0, q1 ; CHECK-NEXT: bx lr entry: %0 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> %src1, <4 x i32> %src2) @@ -239,21 +191,7 @@ entry: define arm_aapcs_vfpcc <16 x i8> @ssub_int8_t(<16 x i8> %src1, <16 x i8> %src2) { ; CHECK-LABEL: ssub_int8_t: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .vsave {d8, d9} -; CHECK-NEXT: vpush {d8, d9} -; CHECK-NEXT: vsub.i8 q2, q0, q1 -; CHECK-NEXT: vmov.i8 q3, #0x80 -; CHECK-NEXT: vcmp.s8 lt, q2, zr -; CHECK-NEXT: vmov.i8 q4, #0x7f -; CHECK-NEXT: vpsel q3, q4, q3 -; CHECK-NEXT: vcmp.s8 gt, q0, q2 -; CHECK-NEXT: vmrs r0, p0 -; CHECK-NEXT: vcmp.s8 gt, q1, zr -; CHECK-NEXT: vmrs r1, p0 -; CHECK-NEXT: eors r0, r1 -; CHECK-NEXT: vmsr p0, r0 -; CHECK-NEXT: vpsel q0, q3, q2 -; CHECK-NEXT: vpop {d8, d9} +; CHECK-NEXT: vqsub.s8 q0, q0, q1 ; CHECK-NEXT: bx lr entry: %0 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> %src1, <16 x i8> %src2) @@ -263,21 +201,7 @@ entry: define arm_aapcs_vfpcc <8 x i16> @ssub_int16_t(<8 x i16> %src1, <8 x i16> %src2) { ; CHECK-LABEL: ssub_int16_t: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .vsave {d8, d9} -; CHECK-NEXT: vpush {d8, d9} -; CHECK-NEXT: vsub.i16 q2, q0, q1 -; CHECK-NEXT: vmov.i16 q3, #0x8000 -; CHECK-NEXT: vcmp.s16 lt, q2, zr -; CHECK-NEXT: vmvn.i16 q4, #0x8000 -; CHECK-NEXT: vpsel q3, q4, q3 -; CHECK-NEXT: vcmp.s16 gt, q0, q2 -; CHECK-NEXT: vmrs r0, p0 -; CHECK-NEXT: vcmp.s16 gt, q1, zr -; CHECK-NEXT: vmrs r1, p0 -; CHECK-NEXT: eors r0, r1 -; CHECK-NEXT: vmsr p0, r0 -; CHECK-NEXT: vpsel q0, q3, q2 -; CHECK-NEXT: vpop {d8, d9} +; CHECK-NEXT: vqsub.s16 q0, q0, q1 ; CHECK-NEXT: bx lr entry: %0 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %src1, <8 x i16> %src2) @@ -287,21 +211,7 @@ entry: define arm_aapcs_vfpcc <4 x i32> @ssub_int32_t(<4 x i32> %src1, <4 x i32> %src2) { ; CHECK-LABEL: ssub_int32_t: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .vsave {d8, d9} -; CHECK-NEXT: vpush {d8, d9} -; CHECK-NEXT: vsub.i32 q2, q0, q1 -; CHECK-NEXT: vmov.i32 q3, #0x80000000 -; CHECK-NEXT: vcmp.s32 lt, q2, zr -; CHECK-NEXT: vmvn.i32 q4, #0x80000000 -; CHECK-NEXT: vpsel q3, q4, q3 -; CHECK-NEXT: vcmp.s32 gt, q0, q2 -; CHECK-NEXT: vmrs r0, p0 -; CHECK-NEXT: vcmp.s32 gt, q1, zr -; CHECK-NEXT: vmrs r1, p0 -; CHECK-NEXT: eors r0, r1 -; CHECK-NEXT: vmsr p0, r0 -; CHECK-NEXT: vpsel q0, q3, q2 -; CHECK-NEXT: vpop {d8, d9} +; CHECK-NEXT: vqsub.s32 q0, q0, q1 ; CHECK-NEXT: bx lr entry: %0 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %src1, <4 x i32> %src2) @@ -406,8 +316,7 @@ entry: define arm_aapcs_vfpcc <16 x i8> @usub_int8_t(<16 x i8> %src1, <16 x i8> %src2) { ; CHECK-LABEL: usub_int8_t: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmax.u8 q0, q0, q1 -; CHECK-NEXT: vsub.i8 q0, q0, q1 +; CHECK-NEXT: vqsub.u8 q0, q0, q1 ; CHECK-NEXT: bx lr entry: %0 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> %src1, <16 x i8> %src2) @@ -417,8 +326,7 @@ entry: define arm_aapcs_vfpcc <8 x i16> @usub_int16_t(<8 x i16> %src1, <8 x i16> %src2) { ; CHECK-LABEL: usub_int16_t: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmax.u16 q0, q0, q1 -; CHECK-NEXT: vsub.i16 q0, q0, q1 +; CHECK-NEXT: vqsub.u16 q0, q0, q1 ; CHECK-NEXT: bx lr entry: %0 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> %src1, <8 x i16> %src2) @@ -428,8 +336,7 @@ entry: define arm_aapcs_vfpcc <4 x i32> @usub_int32_t(<4 x i32> %src1, <4 x i32> %src2) { ; CHECK-LABEL: usub_int32_t: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmax.u32 q0, q0, q1 -; CHECK-NEXT: vsub.i32 q0, q0, q1 +; CHECK-NEXT: vqsub.u32 q0, q0, q1 ; CHECK-NEXT: bx lr entry: %0 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> %src1, <4 x i32> %src2) diff --git a/llvm/test/CodeGen/WebAssembly/atomic-fence.mir b/llvm/test/CodeGen/WebAssembly/atomic-fence.mir index 46f38a52f30f4..ea0f3931eb25c 100644 --- a/llvm/test/CodeGen/WebAssembly/atomic-fence.mir +++ b/llvm/test/CodeGen/WebAssembly/atomic-fence.mir @@ -39,7 +39,7 @@ body: | COMPILER_FENCE implicit-def $arguments %2:i32 = ADD_I32 %0:i32, %0:i32, implicit-def $arguments CALL_VOID @foo, %2:i32, %1:i32, implicit-def $arguments - RETURN_VOID implicit-def $arguments + RETURN implicit-def $arguments ... --- @@ -63,6 +63,5 @@ body: | ATOMIC_FENCE 0, implicit-def $arguments %2:i32 = ADD_I32 %0:i32, %0:i32, implicit-def $arguments CALL_VOID @foo, %2:i32, %1:i32, implicit-def $arguments - RETURN_VOID implicit-def $arguments + RETURN implicit-def $arguments ... - diff --git a/llvm/test/CodeGen/WebAssembly/eh-labels.mir b/llvm/test/CodeGen/WebAssembly/eh-labels.mir index 015276475cc32..ca8378906de15 100644 --- a/llvm/test/CodeGen/WebAssembly/eh-labels.mir +++ b/llvm/test/CodeGen/WebAssembly/eh-labels.mir @@ -42,5 +42,5 @@ body: | bb.2: ; predecessors: %bb.0, %bb.1 - RETURN_VOID implicit-def dead $arguments + RETURN implicit-def dead $arguments ... diff --git a/llvm/test/CodeGen/WebAssembly/explicit-locals.mir b/llvm/test/CodeGen/WebAssembly/explicit-locals.mir index 7718dddb1fd0c..084766c5573c2 100644 --- a/llvm/test/CodeGen/WebAssembly/explicit-locals.mir +++ b/llvm/test/CodeGen/WebAssembly/explicit-locals.mir @@ -19,5 +19,5 @@ body: | ; CHECK-NOT: dead %{{[0-9]+}} ; CHECK: DROP_I32 killed %{{[0-9]+}} dead %0:i32 = CONST_I32 0, implicit-def dead $arguments, implicit $sp32, implicit $sp64 - RETURN_VOID implicit-def dead $arguments + RETURN implicit-def dead $arguments ... diff --git a/llvm/test/CodeGen/WebAssembly/function-info.mir b/llvm/test/CodeGen/WebAssembly/function-info.mir index e40bda5ebff6a..fd60773c2afd6 100644 --- a/llvm/test/CodeGen/WebAssembly/function-info.mir +++ b/llvm/test/CodeGen/WebAssembly/function-info.mir @@ -8,5 +8,5 @@ liveins: - { reg: '$arguments' } body: | bb.0: - RETURN_VOID implicit-def dead $arguments + RETURN implicit-def dead $arguments ... diff --git a/llvm/test/CodeGen/WebAssembly/llround-conv-i32.ll b/llvm/test/CodeGen/WebAssembly/llround-conv-i32.ll index eb2768cb940fc..5c84b33de78d3 100644 --- a/llvm/test/CodeGen/WebAssembly/llround-conv-i32.ll +++ b/llvm/test/CodeGen/WebAssembly/llround-conv-i32.ll @@ -7,7 +7,7 @@ define i64 @testmsxs_builtin(float %x) { ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i64.call llroundf -; CHECK-NEXT: # fallthrough-return-value +; CHECK-NEXT: # fallthrough-return ; CHECK-NEXT: end_function entry: %0 = tail call i64 @llvm.llround.f32(float %x) @@ -20,7 +20,7 @@ define i64 @testmsxd_builtin(double %x) { ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i64.call llround -; CHECK-NEXT: # fallthrough-return-value +; CHECK-NEXT: # fallthrough-return ; CHECK-NEXT: end_function entry: %0 = tail call i64 @llvm.llround.f64(double %x) diff --git a/llvm/test/CodeGen/WebAssembly/multivalue.ll b/llvm/test/CodeGen/WebAssembly/multivalue.ll index cbf8d4e0a0d0c..483e45b687a53 100644 --- a/llvm/test/CodeGen/WebAssembly/multivalue.ll +++ b/llvm/test/CodeGen/WebAssembly/multivalue.ll @@ -9,15 +9,17 @@ target triple = "wasm32-unknown-unknown" %pair = type { i32, i32 } %packed_pair = type <{ i32, i32 }> -; CHECK-LABEL: sret: -; CHECK-NEXT: sret (i32, i32, i32) -> () -define %pair @sret(%pair %p) { +; CHECK-LABEL: pair_ident: +; CHECK-NEXT: pair_ident (i32, i32) -> (i32, i32) +; CHECK-NEXT: return $0, $1{{$}} +define %pair @pair_ident(%pair %p) { ret %pair %p } -; CHECK-LABEL: packed_sret: -; CHECK-NEXT: packed_sret (i32, i32, i32) -> () -define %packed_pair @packed_sret(%packed_pair %p) { +; CHECK-LABEL: packed_pair_ident: +; CHECK-NEXT: packed_pair_ident (i32, i32) -> (i32, i32) +; CHECK-nEXT: return $0, $1{{$}} +define %packed_pair @packed_pair_ident(%packed_pair %p) { ret %packed_pair %p } diff --git a/llvm/test/CodeGen/WebAssembly/reg-argument.mir b/llvm/test/CodeGen/WebAssembly/reg-argument.mir index 70c033f7f8f0f..1fb62b573dfb2 100644 --- a/llvm/test/CodeGen/WebAssembly/reg-argument.mir +++ b/llvm/test/CodeGen/WebAssembly/reg-argument.mir @@ -11,7 +11,7 @@ body: | bb.0: %0:i32 = CONST_I32 0, implicit-def $arguments %1:i32 = ARGUMENT_i32 0, implicit $arguments - RETURN_VOID implicit-def $arguments + RETURN implicit-def $arguments ... --- name: argument_i64 @@ -22,7 +22,7 @@ body: | bb.0: %0:i32 = CONST_I32 0, implicit-def $arguments %1:i64 = ARGUMENT_i64 0, implicit $arguments - RETURN_VOID implicit-def $arguments + RETURN implicit-def $arguments ... --- name: argument_f32 @@ -33,7 +33,7 @@ body: | bb.0: %0:i32 = CONST_I32 0, implicit-def $arguments %1:f32 = ARGUMENT_f32 0, implicit $arguments - RETURN_VOID implicit-def $arguments + RETURN implicit-def $arguments ... --- name: argument_f64 @@ -44,7 +44,7 @@ body: | bb.0: %0:i32 = CONST_I32 0, implicit-def $arguments %1:f64 = ARGUMENT_f64 0, implicit $arguments - RETURN_VOID implicit-def $arguments + RETURN implicit-def $arguments ... --- name: argument_exnref @@ -55,5 +55,5 @@ body: | bb.0: %0:i32 = CONST_I32 0, implicit-def $arguments %1:exnref = ARGUMENT_exnref 0, implicit $arguments - RETURN_VOID implicit-def $arguments + RETURN implicit-def $arguments ... diff --git a/llvm/test/CodeGen/WebAssembly/reg-copy.mir b/llvm/test/CodeGen/WebAssembly/reg-copy.mir index a077c347efdaa..e07c6acb8b663 100644 --- a/llvm/test/CodeGen/WebAssembly/reg-copy.mir +++ b/llvm/test/CodeGen/WebAssembly/reg-copy.mir @@ -6,10 +6,10 @@ name: copy_i32 body: | ; CHECK-LABEL: bb.0: ; CHECK-NEXT: %0:i32 = COPY_I32 %1:i32 - ; CHECK-NEXT: RETURN_VOID + ; CHECK-NEXT: RETURN bb.0: %0:i32 = COPY %1:i32 - RETURN_VOID implicit-def $arguments + RETURN implicit-def $arguments ... --- name: copy_i64 @@ -17,10 +17,10 @@ name: copy_i64 body: | ; CHECK-LABEL: bb.0: ; CHECK-NEXT: %0:i64 = COPY_I64 %1:i64 - ; CHECK-NEXT: RETURN_VOID + ; CHECK-NEXT: RETURN bb.0: %0:i64 = COPY %1:i64 - RETURN_VOID implicit-def $arguments + RETURN implicit-def $arguments ... --- name: copy_f32 @@ -28,10 +28,10 @@ name: copy_f32 body: | ; CHECK-LABEL: bb.0: ; CHECK-NEXT: %0:f32 = COPY_F32 %1:f32 - ; CHECK-NEXT: RETURN_VOID + ; CHECK-NEXT: RETURN bb.0: %0:f32 = COPY %1:f32 - RETURN_VOID implicit-def $arguments + RETURN implicit-def $arguments ... --- name: copy_f64 @@ -39,10 +39,10 @@ name: copy_f64 body: | ; CHECK-LABEL: bb.0: ; CHECK-NEXT: %0:f64 = COPY_F64 %1:f64 - ; CHECK-NEXT: RETURN_VOID + ; CHECK-NEXT: RETURN bb.0: %0:f64 = COPY %1:f64 - RETURN_VOID implicit-def $arguments + RETURN implicit-def $arguments ... --- name: copy_v128 @@ -50,10 +50,10 @@ name: copy_v128 body: | ; CHECK-LABEL: bb.0: ; CHECK-NEXT: %0:v128 = COPY_V128 %1:v128 - ; CHECK-NEXT: RETURN_VOID + ; CHECK-NEXT: RETURN bb.0: %0:v128 = COPY %1:v128 - RETURN_VOID implicit-def $arguments + RETURN implicit-def $arguments ... --- name: copy_exnref @@ -61,8 +61,8 @@ name: copy_exnref body: | ; CHECK-LABEL: bb.0: ; CHECK-NEXT: %0:exnref = COPY_EXNREF %1:exnref - ; CHECK-NEXT: RETURN_VOID + ; CHECK-NEXT: RETURN bb.0: %0:exnref = COPY %1:exnref - RETURN_VOID implicit-def $arguments + RETURN implicit-def $arguments ... diff --git a/llvm/test/CodeGen/X86/bswap_tree.ll b/llvm/test/CodeGen/X86/bswap_tree.ll index 79a45050b98f0..b136263b179e7 100644 --- a/llvm/test/CodeGen/X86/bswap_tree.ll +++ b/llvm/test/CodeGen/X86/bswap_tree.ll @@ -79,30 +79,15 @@ define i32 @test3(i32 %x) nounwind { ; CHECK-LABEL: test3: ; CHECK: # %bb.0: ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-NEXT: movl %eax, %ecx -; CHECK-NEXT: andl $16711680, %ecx # imm = 0xFF0000 -; CHECK-NEXT: movl %eax, %edx -; CHECK-NEXT: andl $-16777216, %edx # imm = 0xFF000000 -; CHECK-NEXT: shll $8, %ecx -; CHECK-NEXT: shrl $8, %edx -; CHECK-NEXT: orl %ecx, %edx ; CHECK-NEXT: bswapl %eax -; CHECK-NEXT: shrl $16, %eax -; CHECK-NEXT: orl %edx, %eax +; CHECK-NEXT: roll $16, %eax ; CHECK-NEXT: retl ; ; CHECK64-LABEL: test3: ; CHECK64: # %bb.0: ; CHECK64-NEXT: movl %edi, %eax -; CHECK64-NEXT: andl $16711680, %eax # imm = 0xFF0000 -; CHECK64-NEXT: movl %edi, %ecx -; CHECK64-NEXT: andl $-16777216, %ecx # imm = 0xFF000000 -; CHECK64-NEXT: shll $8, %eax -; CHECK64-NEXT: shrl $8, %ecx -; CHECK64-NEXT: addl %ecx, %eax -; CHECK64-NEXT: bswapl %edi -; CHECK64-NEXT: shrl $16, %edi -; CHECK64-NEXT: orl %edi, %eax +; CHECK64-NEXT: bswapl %eax +; CHECK64-NEXT: roll $16, %eax ; CHECK64-NEXT: retq %byte2 = and i32 %x, 16711680 ; 0x00ff0000 %byte3 = and i32 %x, 4278190080 ; 0xff000000 diff --git a/llvm/test/CodeGen/X86/debug-loclists.ll b/llvm/test/CodeGen/X86/debug-loclists.ll index 0c2ab3dfad5a9..30cab3b01e198 100644 --- a/llvm/test/CodeGen/X86/debug-loclists.ll +++ b/llvm/test/CodeGen/X86/debug-loclists.ll @@ -12,7 +12,7 @@ ; CHECK: .debug_loclists contents: ; CHECK-NEXT: 0x00000000: locations list header: length = 0x00000015, version = 0x0005, addr_size = 0x08, seg_size = 0x00, offset_entry_count = 0x00000000 -; CHECK-NEXT: 0x00000000: +; CHECK-NEXT: 0x0000000c: ; CHECK-NEXT: [0x0000000000000000, 0x0000000000000004): DW_OP_breg5 RDI+0 ; CHECK-NEXT: [0x0000000000000004, 0x0000000000000012): DW_OP_breg3 RBX+0 diff --git a/llvm/test/CodeGen/X86/masked_store_trunc_ssat.ll b/llvm/test/CodeGen/X86/masked_store_trunc_ssat.ll index 83bf33c4f7f1e..c170e079454f6 100644 --- a/llvm/test/CodeGen/X86/masked_store_trunc_ssat.ll +++ b/llvm/test/CodeGen/X86/masked_store_trunc_ssat.ll @@ -4594,11 +4594,8 @@ define void @truncstore_v8i32_v8i16(<8 x i32> %x, <8 x i16>* %p, <8 x i32> %mask ; AVX512F: # %bb.0: ; AVX512F-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 ; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0 -; AVX512F-NEXT: vpbroadcastd {{.*#+}} ymm1 = [32767,32767,32767,32767,32767,32767,32767,32767] -; AVX512F-NEXT: vpminsd %ymm1, %ymm0, %ymm0 -; AVX512F-NEXT: vpbroadcastd {{.*#+}} ymm1 = [4294934528,4294934528,4294934528,4294934528,4294934528,4294934528,4294934528,4294934528] -; AVX512F-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 -; AVX512F-NEXT: vpmovdw %zmm0, %ymm0 +; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX512F-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 ; AVX512F-NEXT: kmovw %k0, %eax ; AVX512F-NEXT: testb $1, %al ; AVX512F-NEXT: jne .LBB11_1 @@ -4665,11 +4662,8 @@ define void @truncstore_v8i32_v8i16(<8 x i32> %x, <8 x i16>* %p, <8 x i32> %mask ; AVX512BW-NEXT: vptestmd %zmm1, %zmm1, %k0 ; AVX512BW-NEXT: kshiftld $24, %k0, %k0 ; AVX512BW-NEXT: kshiftrd $24, %k0, %k1 -; AVX512BW-NEXT: vpbroadcastd {{.*#+}} ymm1 = [32767,32767,32767,32767,32767,32767,32767,32767] -; AVX512BW-NEXT: vpminsd %ymm1, %ymm0, %ymm0 -; AVX512BW-NEXT: vpbroadcastd {{.*#+}} ymm1 = [4294934528,4294934528,4294934528,4294934528,4294934528,4294934528,4294934528,4294934528] -; AVX512BW-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 -; AVX512BW-NEXT: vpmovdw %zmm0, %ymm0 +; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX512BW-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 ; AVX512BW-NEXT: vmovdqu16 %zmm0, (%rdi) {%k1} ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq @@ -4977,11 +4971,9 @@ define void @truncstore_v8i32_v8i8(<8 x i32> %x, <8 x i8>* %p, <8 x i32> %mask) ; AVX512F: # %bb.0: ; AVX512F-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 ; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0 -; AVX512F-NEXT: vpbroadcastd {{.*#+}} ymm1 = [127,127,127,127,127,127,127,127] -; AVX512F-NEXT: vpminsd %ymm1, %ymm0, %ymm0 -; AVX512F-NEXT: vpbroadcastd {{.*#+}} ymm1 = [4294967168,4294967168,4294967168,4294967168,4294967168,4294967168,4294967168,4294967168] -; AVX512F-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 -; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX512F-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 +; AVX512F-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 ; AVX512F-NEXT: kmovw %k0, %eax ; AVX512F-NEXT: testb $1, %al ; AVX512F-NEXT: jne .LBB12_1 @@ -5048,11 +5040,9 @@ define void @truncstore_v8i32_v8i8(<8 x i32> %x, <8 x i8>* %p, <8 x i32> %mask) ; AVX512BW-NEXT: vptestmd %zmm1, %zmm1, %k0 ; AVX512BW-NEXT: kshiftlq $56, %k0, %k0 ; AVX512BW-NEXT: kshiftrq $56, %k0, %k1 -; AVX512BW-NEXT: vpbroadcastd {{.*#+}} ymm1 = [127,127,127,127,127,127,127,127] -; AVX512BW-NEXT: vpminsd %ymm1, %ymm0, %ymm0 -; AVX512BW-NEXT: vpbroadcastd {{.*#+}} ymm1 = [4294967168,4294967168,4294967168,4294967168,4294967168,4294967168,4294967168,4294967168] -; AVX512BW-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 -; AVX512BW-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX512BW-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 +; AVX512BW-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 ; AVX512BW-NEXT: vmovdqu8 %zmm0, (%rdi) {%k1} ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq @@ -5192,10 +5182,6 @@ define void @truncstore_v4i32_v4i16(<4 x i32> %x, <4 x i16>* %p, <4 x i32> %mask ; AVX512F: # %bb.0: ; AVX512F-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0 -; AVX512F-NEXT: vpbroadcastd {{.*#+}} xmm1 = [32767,32767,32767,32767] -; AVX512F-NEXT: vpminsd %xmm1, %xmm0, %xmm0 -; AVX512F-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4294934528,4294934528,4294934528,4294934528] -; AVX512F-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 ; AVX512F-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 ; AVX512F-NEXT: kmovw %k0, %eax ; AVX512F-NEXT: testb $1, %al @@ -5235,10 +5221,6 @@ define void @truncstore_v4i32_v4i16(<4 x i32> %x, <4 x i16>* %p, <4 x i32> %mask ; AVX512BW-NEXT: vptestmd %zmm1, %zmm1, %k0 ; AVX512BW-NEXT: kshiftld $28, %k0, %k0 ; AVX512BW-NEXT: kshiftrd $28, %k0, %k1 -; AVX512BW-NEXT: vpbroadcastd {{.*#+}} xmm1 = [32767,32767,32767,32767] -; AVX512BW-NEXT: vpminsd %xmm1, %xmm0, %xmm0 -; AVX512BW-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4294934528,4294934528,4294934528,4294934528] -; AVX512BW-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 ; AVX512BW-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 ; AVX512BW-NEXT: vmovdqu16 %zmm0, (%rdi) {%k1} ; AVX512BW-NEXT: vzeroupper @@ -7302,9 +7284,8 @@ define void @truncstore_v16i16_v16i8(<16 x i16> %x, <16 x i8>* %p, <16 x i8> %ma ; AVX512BW-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; AVX512BW-NEXT: vptestmb %zmm1, %zmm1, %k0 ; AVX512BW-NEXT: kmovw %k0, %k1 -; AVX512BW-NEXT: vpminsw {{.*}}(%rip), %ymm0, %ymm0 -; AVX512BW-NEXT: vpmaxsw {{.*}}(%rip), %ymm0, %ymm0 -; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 +; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX512BW-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 ; AVX512BW-NEXT: vmovdqu8 %zmm0, (%rdi) {%k1} ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq @@ -7601,8 +7582,6 @@ define void @truncstore_v8i16_v8i8(<8 x i16> %x, <8 x i8>* %p, <8 x i16> %mask) ; AVX512BW-NEXT: vptestmw %zmm1, %zmm1, %k0 ; AVX512BW-NEXT: kshiftlq $56, %k0, %k0 ; AVX512BW-NEXT: kshiftrq $56, %k0, %k1 -; AVX512BW-NEXT: vpminsw {{.*}}(%rip), %xmm0, %xmm0 -; AVX512BW-NEXT: vpmaxsw {{.*}}(%rip), %xmm0, %xmm0 ; AVX512BW-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 ; AVX512BW-NEXT: vmovdqu8 %zmm0, (%rdi) {%k1} ; AVX512BW-NEXT: vzeroupper diff --git a/llvm/test/CodeGen/X86/min-legal-vector-width.ll b/llvm/test/CodeGen/X86/min-legal-vector-width.ll index e3c66e83c83f6..49dae99268dc2 100644 --- a/llvm/test/CodeGen/X86/min-legal-vector-width.ll +++ b/llvm/test/CodeGen/X86/min-legal-vector-width.ll @@ -1079,3 +1079,169 @@ define void @vselect_split_v16i16_setcc(<16 x i16> %s, <16 x i16> %t, <16 x i32> store <16 x i32> %b, <16 x i32>* %r ret void } + +define <16 x i8> @trunc_packus_v16i32_v16i8(<16 x i32>* %p) "min-legal-vector-width"="256" { +; CHECK-LABEL: trunc_packus_v16i32_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vmovdqa (%rdi), %ymm0 +; CHECK-NEXT: vpackusdw 32(%rdi), %ymm0, %ymm0 +; CHECK-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] +; CHECK-NEXT: vpmovuswb %ymm0, %xmm0 +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq + %a = load <16 x i32>, <16 x i32>* %p + %b = icmp slt <16 x i32> %a, + %c = select <16 x i1> %b, <16 x i32> %a, <16 x i32> + %d = icmp sgt <16 x i32> %c, zeroinitializer + %e = select <16 x i1> %d, <16 x i32> %c, <16 x i32> zeroinitializer + %f = trunc <16 x i32> %e to <16 x i8> + ret <16 x i8> %f +} + +define void @trunc_packus_v16i32_v16i8_store(<16 x i32>* %p, <16 x i8>* %q) "min-legal-vector-width"="256" { +; CHECK-LABEL: trunc_packus_v16i32_v16i8_store: +; CHECK: # %bb.0: +; CHECK-NEXT: vmovdqa (%rdi), %ymm0 +; CHECK-NEXT: vpackusdw 32(%rdi), %ymm0, %ymm0 +; CHECK-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] +; CHECK-NEXT: vpmovuswb %ymm0, (%rsi) +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq + %a = load <16 x i32>, <16 x i32>* %p + %b = icmp slt <16 x i32> %a, + %c = select <16 x i1> %b, <16 x i32> %a, <16 x i32> + %d = icmp sgt <16 x i32> %c, zeroinitializer + %e = select <16 x i1> %d, <16 x i32> %c, <16 x i32> zeroinitializer + %f = trunc <16 x i32> %e to <16 x i8> + store <16 x i8> %f, <16 x i8>* %q + ret void +} + +define <32 x i8> @trunc_packus_v32i32_v32i8(<32 x i32>* %p) "min-legal-vector-width"="256" { +; CHECK-LABEL: trunc_packus_v32i32_v32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; CHECK-NEXT: vpmaxsd 96(%rdi), %ymm0, %ymm1 +; CHECK-NEXT: vpmovusdb %ymm1, %xmm1 +; CHECK-NEXT: vpmaxsd 64(%rdi), %ymm0, %ymm2 +; CHECK-NEXT: vpmovusdb %ymm2, %xmm2 +; CHECK-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] +; CHECK-NEXT: vpmaxsd 32(%rdi), %ymm0, %ymm2 +; CHECK-NEXT: vpmovusdb %ymm2, %xmm2 +; CHECK-NEXT: vpmaxsd (%rdi), %ymm0, %ymm0 +; CHECK-NEXT: vpmovusdb %ymm0, %xmm0 +; CHECK-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; CHECK-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; CHECK-NEXT: retq + %a = load <32 x i32>, <32 x i32>* %p + %b = icmp slt <32 x i32> %a, + %c = select <32 x i1> %b, <32 x i32> %a, <32 x i32> + %d = icmp sgt <32 x i32> %c, zeroinitializer + %e = select <32 x i1> %d, <32 x i32> %c, <32 x i32> zeroinitializer + %f = trunc <32 x i32> %e to <32 x i8> + ret <32 x i8> %f +} + +define <8 x i8> @trunc_packus_v8i64_v8i8(<8 x i64> %a0) "min-legal-vector-width"="256" { +; CHECK-LABEL: trunc_packus_v8i64_v8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vpmaxsq %ymm2, %ymm1, %ymm1 +; CHECK-NEXT: vpmovusqb %ymm1, %xmm1 +; CHECK-NEXT: vpmaxsq %ymm2, %ymm0, %ymm0 +; CHECK-NEXT: vpmovusqb %ymm0, %xmm0 +; CHECK-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq + %1 = icmp slt <8 x i64> %a0, + %2 = select <8 x i1> %1, <8 x i64> %a0, <8 x i64> + %3 = icmp sgt <8 x i64> %2, zeroinitializer + %4 = select <8 x i1> %3, <8 x i64> %2, <8 x i64> zeroinitializer + %5 = trunc <8 x i64> %4 to <8 x i8> + ret <8 x i8> %5 +} + +define void @trunc_packus_v8i64_v8i8_store(<8 x i64> %a0, <8 x i8> *%p1) "min-legal-vector-width"="256" { +; CHECK-LABEL: trunc_packus_v8i64_v8i8_store: +; CHECK: # %bb.0: +; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vpmaxsq %ymm2, %ymm1, %ymm1 +; CHECK-NEXT: vpmovusqb %ymm1, %xmm1 +; CHECK-NEXT: vpmaxsq %ymm2, %ymm0, %ymm0 +; CHECK-NEXT: vpmovusqb %ymm0, %xmm0 +; CHECK-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; CHECK-NEXT: vmovq %xmm0, (%rdi) +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq + %1 = icmp slt <8 x i64> %a0, + %2 = select <8 x i1> %1, <8 x i64> %a0, <8 x i64> + %3 = icmp sgt <8 x i64> %2, zeroinitializer + %4 = select <8 x i1> %3, <8 x i64> %2, <8 x i64> zeroinitializer + %5 = trunc <8 x i64> %4 to <8 x i8> + store <8 x i8> %5, <8 x i8> *%p1 + ret void +} + +define <8 x i8> @trunc_ssat_v8i64_v8i8(<8 x i64> %a0) "min-legal-vector-width"="256" { +; CHECK-LABEL: trunc_ssat_v8i64_v8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vpmovsqb %ymm1, %xmm1 +; CHECK-NEXT: vpmovsqb %ymm0, %xmm0 +; CHECK-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq + %1 = icmp slt <8 x i64> %a0, + %2 = select <8 x i1> %1, <8 x i64> %a0, <8 x i64> + %3 = icmp sgt <8 x i64> %2, + %4 = select <8 x i1> %3, <8 x i64> %2, <8 x i64> + %5 = trunc <8 x i64> %4 to <8 x i8> + ret <8 x i8> %5 +} + +define void @trunc_ssat_v8i64_v8i8_store(<8 x i64> %a0, <8 x i8> *%p1) "min-legal-vector-width"="256" { +; CHECK-LABEL: trunc_ssat_v8i64_v8i8_store: +; CHECK: # %bb.0: +; CHECK-NEXT: vpmovsqb %ymm1, %xmm1 +; CHECK-NEXT: vpmovsqb %ymm0, %xmm0 +; CHECK-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; CHECK-NEXT: vmovq %xmm0, (%rdi) +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq + %1 = icmp slt <8 x i64> %a0, + %2 = select <8 x i1> %1, <8 x i64> %a0, <8 x i64> + %3 = icmp sgt <8 x i64> %2, + %4 = select <8 x i1> %3, <8 x i64> %2, <8 x i64> + %5 = trunc <8 x i64> %4 to <8 x i8> + store <8 x i8> %5, <8 x i8> *%p1 + ret void +} + +define <8 x i8> @trunc_usat_v8i64_v8i8(<8 x i64> %a0) "min-legal-vector-width"="256" { +; CHECK-LABEL: trunc_usat_v8i64_v8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vpmovusqb %ymm1, %xmm1 +; CHECK-NEXT: vpmovusqb %ymm0, %xmm0 +; CHECK-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq + %1 = icmp ult <8 x i64> %a0, + %2 = select <8 x i1> %1, <8 x i64> %a0, <8 x i64> + %3 = trunc <8 x i64> %2 to <8 x i8> + ret <8 x i8> %3 +} + +define void @trunc_usat_v8i64_v8i8_store(<8 x i64> %a0, <8 x i8> *%p1) "min-legal-vector-width"="256" { +; CHECK-LABEL: trunc_usat_v8i64_v8i8_store: +; CHECK: # %bb.0: +; CHECK-NEXT: vpmovusqb %ymm1, %xmm1 +; CHECK-NEXT: vpmovusqb %ymm0, %xmm0 +; CHECK-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; CHECK-NEXT: vmovq %xmm0, (%rdi) +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq + %1 = icmp ult <8 x i64> %a0, + %2 = select <8 x i1> %1, <8 x i64> %a0, <8 x i64> + %3 = trunc <8 x i64> %2 to <8 x i8> + store <8 x i8> %3, <8 x i8> *%p1 + ret void +} diff --git a/llvm/test/CodeGen/X86/pmaddubsw.ll b/llvm/test/CodeGen/X86/pmaddubsw.ll index 3a08e47092e1b..48da43c631122 100644 --- a/llvm/test/CodeGen/X86/pmaddubsw.ll +++ b/llvm/test/CodeGen/X86/pmaddubsw.ll @@ -349,53 +349,27 @@ define <8 x i16> @pmaddubsw_bad_extend(<16 x i8>* %Aptr, <16 x i8>* %Bptr) { ; AVX1-NEXT: vpackssdw %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: pmaddubsw_bad_extend: -; AVX2: # %bb.0: -; AVX2-NEXT: vmovdqa (%rdi), %xmm0 -; AVX2-NEXT: vmovdqa (%rsi), %xmm1 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> -; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm3 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm4 = <1,3,5,7,9,11,13,15,u,u,u,u,u,u,u,u> -; AVX2-NEXT: vpshufb %xmm4, %xmm0, %xmm0 -; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm2 -; AVX2-NEXT: vpshufb %xmm4, %xmm1, %xmm1 -; AVX2-NEXT: vpmovsxbd %xmm3, %ymm3 -; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero -; AVX2-NEXT: vpmulld %ymm2, %ymm3, %ymm2 -; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero -; AVX2-NEXT: vpmovsxbd %xmm1, %ymm1 -; AVX2-NEXT: vpmulld %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpaddd %ymm0, %ymm2, %ymm0 -; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vzeroupper -; AVX2-NEXT: retq -; -; AVX512-LABEL: pmaddubsw_bad_extend: -; AVX512: # %bb.0: -; AVX512-NEXT: vmovdqa (%rdi), %xmm0 -; AVX512-NEXT: vmovdqa (%rsi), %xmm1 -; AVX512-NEXT: vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> -; AVX512-NEXT: vpshufb %xmm2, %xmm0, %xmm3 -; AVX512-NEXT: vmovdqa {{.*#+}} xmm4 = <1,3,5,7,9,11,13,15,u,u,u,u,u,u,u,u> -; AVX512-NEXT: vpshufb %xmm4, %xmm0, %xmm0 -; AVX512-NEXT: vpshufb %xmm2, %xmm1, %xmm2 -; AVX512-NEXT: vpshufb %xmm4, %xmm1, %xmm1 -; AVX512-NEXT: vpmovsxbd %xmm3, %ymm3 -; AVX512-NEXT: vpmovzxbd {{.*#+}} ymm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero -; AVX512-NEXT: vpmulld %ymm2, %ymm3, %ymm2 -; AVX512-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero -; AVX512-NEXT: vpmovsxbd %xmm1, %ymm1 -; AVX512-NEXT: vpmulld %ymm1, %ymm0, %ymm0 -; AVX512-NEXT: vpaddd %ymm0, %ymm2, %ymm0 -; AVX512-NEXT: vpbroadcastd {{.*#+}} ymm1 = [4294934528,4294934528,4294934528,4294934528,4294934528,4294934528,4294934528,4294934528] -; AVX512-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 -; AVX512-NEXT: vpbroadcastd {{.*#+}} ymm1 = [32767,32767,32767,32767,32767,32767,32767,32767] -; AVX512-NEXT: vpminsd %ymm1, %ymm0, %ymm0 -; AVX512-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 -; AVX512-NEXT: vzeroupper -; AVX512-NEXT: retq +; AVX256-LABEL: pmaddubsw_bad_extend: +; AVX256: # %bb.0: +; AVX256-NEXT: vmovdqa (%rdi), %xmm0 +; AVX256-NEXT: vmovdqa (%rsi), %xmm1 +; AVX256-NEXT: vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> +; AVX256-NEXT: vpshufb %xmm2, %xmm0, %xmm3 +; AVX256-NEXT: vmovdqa {{.*#+}} xmm4 = <1,3,5,7,9,11,13,15,u,u,u,u,u,u,u,u> +; AVX256-NEXT: vpshufb %xmm4, %xmm0, %xmm0 +; AVX256-NEXT: vpshufb %xmm2, %xmm1, %xmm2 +; AVX256-NEXT: vpshufb %xmm4, %xmm1, %xmm1 +; AVX256-NEXT: vpmovsxbd %xmm3, %ymm3 +; AVX256-NEXT: vpmovzxbd {{.*#+}} ymm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero +; AVX256-NEXT: vpmulld %ymm2, %ymm3, %ymm2 +; AVX256-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero +; AVX256-NEXT: vpmovsxbd %xmm1, %ymm1 +; AVX256-NEXT: vpmulld %ymm1, %ymm0, %ymm0 +; AVX256-NEXT: vpaddd %ymm0, %ymm2, %ymm0 +; AVX256-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX256-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 +; AVX256-NEXT: vzeroupper +; AVX256-NEXT: retq %A = load <16 x i8>, <16 x i8>* %Aptr %B = load <16 x i8>, <16 x i8>* %Bptr %A_even = shufflevector <16 x i8> %A, <16 x i8> undef, <8 x i32> @@ -476,49 +450,25 @@ define <8 x i16> @pmaddubsw_bad_indices(<16 x i8>* %Aptr, <16 x i8>* %Bptr) { ; AVX1-NEXT: vpackssdw %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: pmaddubsw_bad_indices: -; AVX2: # %bb.0: -; AVX2-NEXT: vmovdqa (%rdi), %xmm0 -; AVX2-NEXT: vmovdqa (%rsi), %xmm1 -; AVX2-NEXT: vpshufb {{.*#+}} xmm2 = xmm0[1,2,5,6,9,10,13,14,u,u,u,u,u,u,u,u] -; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,3,4,7,8,11,12,15,u,u,u,u,u,u,u,u] -; AVX2-NEXT: vpshufb {{.*#+}} xmm3 = xmm1[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u] -; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[1,3,5,7,9,11,13,15,u,u,u,u,u,u,u,u] -; AVX2-NEXT: vpmovsxbd %xmm2, %ymm2 -; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero,xmm3[4],zero,zero,zero,xmm3[5],zero,zero,zero,xmm3[6],zero,zero,zero,xmm3[7],zero,zero,zero -; AVX2-NEXT: vpmulld %ymm3, %ymm2, %ymm2 -; AVX2-NEXT: vpmovsxbd %xmm0, %ymm0 -; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero -; AVX2-NEXT: vpmulld %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpaddd %ymm0, %ymm2, %ymm0 -; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vzeroupper -; AVX2-NEXT: retq -; -; AVX512-LABEL: pmaddubsw_bad_indices: -; AVX512: # %bb.0: -; AVX512-NEXT: vmovdqa (%rdi), %xmm0 -; AVX512-NEXT: vmovdqa (%rsi), %xmm1 -; AVX512-NEXT: vpshufb {{.*#+}} xmm2 = xmm0[1,2,5,6,9,10,13,14,u,u,u,u,u,u,u,u] -; AVX512-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,3,4,7,8,11,12,15,u,u,u,u,u,u,u,u] -; AVX512-NEXT: vpshufb {{.*#+}} xmm3 = xmm1[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u] -; AVX512-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[1,3,5,7,9,11,13,15,u,u,u,u,u,u,u,u] -; AVX512-NEXT: vpmovsxbd %xmm2, %ymm2 -; AVX512-NEXT: vpmovzxbd {{.*#+}} ymm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero,xmm3[4],zero,zero,zero,xmm3[5],zero,zero,zero,xmm3[6],zero,zero,zero,xmm3[7],zero,zero,zero -; AVX512-NEXT: vpmulld %ymm3, %ymm2, %ymm2 -; AVX512-NEXT: vpmovsxbd %xmm0, %ymm0 -; AVX512-NEXT: vpmovzxbd {{.*#+}} ymm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero -; AVX512-NEXT: vpmulld %ymm1, %ymm0, %ymm0 -; AVX512-NEXT: vpaddd %ymm0, %ymm2, %ymm0 -; AVX512-NEXT: vpbroadcastd {{.*#+}} ymm1 = [4294934528,4294934528,4294934528,4294934528,4294934528,4294934528,4294934528,4294934528] -; AVX512-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 -; AVX512-NEXT: vpbroadcastd {{.*#+}} ymm1 = [32767,32767,32767,32767,32767,32767,32767,32767] -; AVX512-NEXT: vpminsd %ymm1, %ymm0, %ymm0 -; AVX512-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 -; AVX512-NEXT: vzeroupper -; AVX512-NEXT: retq +; AVX256-LABEL: pmaddubsw_bad_indices: +; AVX256: # %bb.0: +; AVX256-NEXT: vmovdqa (%rdi), %xmm0 +; AVX256-NEXT: vmovdqa (%rsi), %xmm1 +; AVX256-NEXT: vpshufb {{.*#+}} xmm2 = xmm0[1,2,5,6,9,10,13,14,u,u,u,u,u,u,u,u] +; AVX256-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,3,4,7,8,11,12,15,u,u,u,u,u,u,u,u] +; AVX256-NEXT: vpshufb {{.*#+}} xmm3 = xmm1[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u] +; AVX256-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[1,3,5,7,9,11,13,15,u,u,u,u,u,u,u,u] +; AVX256-NEXT: vpmovsxbd %xmm2, %ymm2 +; AVX256-NEXT: vpmovzxbd {{.*#+}} ymm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero,xmm3[4],zero,zero,zero,xmm3[5],zero,zero,zero,xmm3[6],zero,zero,zero,xmm3[7],zero,zero,zero +; AVX256-NEXT: vpmulld %ymm3, %ymm2, %ymm2 +; AVX256-NEXT: vpmovsxbd %xmm0, %ymm0 +; AVX256-NEXT: vpmovzxbd {{.*#+}} ymm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero +; AVX256-NEXT: vpmulld %ymm1, %ymm0, %ymm0 +; AVX256-NEXT: vpaddd %ymm0, %ymm2, %ymm0 +; AVX256-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX256-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 +; AVX256-NEXT: vzeroupper +; AVX256-NEXT: retq %A = load <16 x i8>, <16 x i8>* %Aptr %B = load <16 x i8>, <16 x i8>* %Bptr %A_even = shufflevector <16 x i8> %A, <16 x i8> undef, <8 x i32> ;indices aren't all even diff --git a/llvm/test/CodeGen/X86/recip-fastmath.ll b/llvm/test/CodeGen/X86/recip-fastmath.ll index c618c37e4fea7..5d05bd401e24c 100644 --- a/llvm/test/CodeGen/X86/recip-fastmath.ll +++ b/llvm/test/CodeGen/X86/recip-fastmath.ll @@ -60,15 +60,15 @@ define float @f32_one_step(float %x) #1 { ; FMA-RECIP-LABEL: f32_one_step: ; FMA-RECIP: # %bb.0: ; FMA-RECIP-NEXT: vrcpss %xmm0, %xmm0, %xmm1 -; FMA-RECIP-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem -; FMA-RECIP-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 +; FMA-RECIP-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + mem +; FMA-RECIP-NEXT: vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * xmm1) + xmm1 ; FMA-RECIP-NEXT: retq ; ; BDVER2-LABEL: f32_one_step: ; BDVER2: # %bb.0: ; BDVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm1 -; BDVER2-NEXT: vfnmaddss {{.*}}(%rip), %xmm1, %xmm0, %xmm0 -; BDVER2-NEXT: vfmaddss %xmm1, %xmm0, %xmm1, %xmm0 +; BDVER2-NEXT: vfmaddss {{.*}}(%rip), %xmm1, %xmm0, %xmm0 +; BDVER2-NEXT: vfnmaddss %xmm1, %xmm0, %xmm1, %xmm0 ; BDVER2-NEXT: retq ; ; BTVER2-LABEL: f32_one_step: @@ -94,8 +94,8 @@ define float @f32_one_step(float %x) #1 { ; HASWELL-LABEL: f32_one_step: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vrcpss %xmm0, %xmm0, %xmm1 -; HASWELL-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem -; HASWELL-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 +; HASWELL-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + mem +; HASWELL-NEXT: vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * xmm1) + xmm1 ; HASWELL-NEXT: retq ; ; HASWELL-NO-FMA-LABEL: f32_one_step: @@ -111,8 +111,8 @@ define float @f32_one_step(float %x) #1 { ; AVX512-LABEL: f32_one_step: ; AVX512: # %bb.0: ; AVX512-NEXT: vrcpss %xmm0, %xmm0, %xmm1 -; AVX512-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem -; AVX512-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 +; AVX512-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + mem +; AVX512-NEXT: vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * xmm1) + xmm1 ; AVX512-NEXT: retq %div = fdiv fast float 1.0, %x ret float %div diff --git a/llvm/test/CodeGen/X86/recip-fastmath2.ll b/llvm/test/CodeGen/X86/recip-fastmath2.ll index a2bd6c2081c1d..c5e364280b802 100644 --- a/llvm/test/CodeGen/X86/recip-fastmath2.ll +++ b/llvm/test/CodeGen/X86/recip-fastmath2.ll @@ -154,8 +154,8 @@ define float @f32_one_step_2_divs(float %x) #1 { ; FMA-RECIP-LABEL: f32_one_step_2_divs: ; FMA-RECIP: # %bb.0: ; FMA-RECIP-NEXT: vrcpss %xmm0, %xmm0, %xmm1 -; FMA-RECIP-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem -; FMA-RECIP-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 +; FMA-RECIP-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + mem +; FMA-RECIP-NEXT: vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * xmm1) + xmm1 ; FMA-RECIP-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm1 ; FMA-RECIP-NEXT: vmulss %xmm0, %xmm1, %xmm0 ; FMA-RECIP-NEXT: retq @@ -163,8 +163,8 @@ define float @f32_one_step_2_divs(float %x) #1 { ; BDVER2-LABEL: f32_one_step_2_divs: ; BDVER2: # %bb.0: ; BDVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm1 -; BDVER2-NEXT: vfnmaddss {{.*}}(%rip), %xmm1, %xmm0, %xmm0 -; BDVER2-NEXT: vfmaddss %xmm1, %xmm0, %xmm1, %xmm0 +; BDVER2-NEXT: vfmaddss {{.*}}(%rip), %xmm1, %xmm0, %xmm0 +; BDVER2-NEXT: vfnmaddss %xmm1, %xmm0, %xmm1, %xmm0 ; BDVER2-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm1 ; BDVER2-NEXT: vmulss %xmm0, %xmm1, %xmm0 ; BDVER2-NEXT: retq @@ -196,8 +196,8 @@ define float @f32_one_step_2_divs(float %x) #1 { ; HASWELL-LABEL: f32_one_step_2_divs: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vrcpss %xmm0, %xmm0, %xmm1 -; HASWELL-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem -; HASWELL-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 +; HASWELL-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + mem +; HASWELL-NEXT: vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * xmm1) + xmm1 ; HASWELL-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm1 ; HASWELL-NEXT: vmulss %xmm0, %xmm1, %xmm0 ; HASWELL-NEXT: retq @@ -217,8 +217,8 @@ define float @f32_one_step_2_divs(float %x) #1 { ; AVX512-LABEL: f32_one_step_2_divs: ; AVX512: # %bb.0: ; AVX512-NEXT: vrcpss %xmm0, %xmm0, %xmm1 -; AVX512-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem -; AVX512-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 +; AVX512-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + mem +; AVX512-NEXT: vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * xmm1) + xmm1 ; AVX512-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm1 ; AVX512-NEXT: vmulss %xmm0, %xmm1, %xmm0 ; AVX512-NEXT: retq @@ -267,8 +267,8 @@ define float @f32_two_step_2(float %x) #2 { ; FMA-RECIP: # %bb.0: ; FMA-RECIP-NEXT: vrcpss %xmm0, %xmm0, %xmm1 ; FMA-RECIP-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero -; FMA-RECIP-NEXT: vfnmadd231ss {{.*#+}} xmm2 = -(xmm0 * xmm1) + xmm2 -; FMA-RECIP-NEXT: vfmadd132ss {{.*#+}} xmm2 = (xmm2 * xmm1) + xmm1 +; FMA-RECIP-NEXT: vfmadd231ss {{.*#+}} xmm2 = (xmm0 * xmm1) + xmm2 +; FMA-RECIP-NEXT: vfnmadd132ss {{.*#+}} xmm2 = -(xmm2 * xmm1) + xmm1 ; FMA-RECIP-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; FMA-RECIP-NEXT: vmulss %xmm1, %xmm2, %xmm3 ; FMA-RECIP-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm3 * xmm0) + xmm1 @@ -278,9 +278,9 @@ define float @f32_two_step_2(float %x) #2 { ; BDVER2-LABEL: f32_two_step_2: ; BDVER2: # %bb.0: ; BDVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm1 -; BDVER2-NEXT: vfnmaddss {{.*}}(%rip), %xmm1, %xmm0, %xmm2 +; BDVER2-NEXT: vfmaddss {{.*}}(%rip), %xmm1, %xmm0, %xmm2 ; BDVER2-NEXT: vmovss {{.*#+}} xmm4 = mem[0],zero,zero,zero -; BDVER2-NEXT: vfmaddss %xmm1, %xmm2, %xmm1, %xmm1 +; BDVER2-NEXT: vfnmaddss %xmm1, %xmm2, %xmm1, %xmm1 ; BDVER2-NEXT: vmulss %xmm4, %xmm1, %xmm3 ; BDVER2-NEXT: vfnmaddss %xmm4, %xmm3, %xmm0, %xmm0 ; BDVER2-NEXT: vfmaddss %xmm3, %xmm0, %xmm1, %xmm0 @@ -322,8 +322,8 @@ define float @f32_two_step_2(float %x) #2 { ; HASWELL: # %bb.0: ; HASWELL-NEXT: vrcpss %xmm0, %xmm0, %xmm1 ; HASWELL-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero -; HASWELL-NEXT: vfnmadd231ss {{.*#+}} xmm2 = -(xmm0 * xmm1) + xmm2 -; HASWELL-NEXT: vfmadd132ss {{.*#+}} xmm2 = (xmm2 * xmm1) + xmm1 +; HASWELL-NEXT: vfmadd231ss {{.*#+}} xmm2 = (xmm0 * xmm1) + xmm2 +; HASWELL-NEXT: vfnmadd132ss {{.*#+}} xmm2 = -(xmm2 * xmm1) + xmm1 ; HASWELL-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; HASWELL-NEXT: vmulss %xmm1, %xmm2, %xmm3 ; HASWELL-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm3 * xmm0) + xmm1 @@ -350,8 +350,8 @@ define float @f32_two_step_2(float %x) #2 { ; AVX512: # %bb.0: ; AVX512-NEXT: vrcpss %xmm0, %xmm0, %xmm1 ; AVX512-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero -; AVX512-NEXT: vfnmadd231ss {{.*#+}} xmm2 = -(xmm0 * xmm1) + xmm2 -; AVX512-NEXT: vfmadd132ss {{.*#+}} xmm2 = (xmm2 * xmm1) + xmm1 +; AVX512-NEXT: vfmadd231ss {{.*#+}} xmm2 = (xmm0 * xmm1) + xmm2 +; AVX512-NEXT: vfnmadd132ss {{.*#+}} xmm2 = -(xmm2 * xmm1) + xmm1 ; AVX512-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; AVX512-NEXT: vmulss %xmm1, %xmm2, %xmm3 ; AVX512-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm3 * xmm0) + xmm1 @@ -610,9 +610,9 @@ define <4 x float> @v4f32_two_step2(<4 x float> %x) #2 { ; FMA-RECIP-LABEL: v4f32_two_step2: ; FMA-RECIP: # %bb.0: ; FMA-RECIP-NEXT: vrcpps %xmm0, %xmm1 -; FMA-RECIP-NEXT: vmovaps {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] -; FMA-RECIP-NEXT: vfnmadd231ps {{.*#+}} xmm2 = -(xmm0 * xmm1) + xmm2 -; FMA-RECIP-NEXT: vfmadd132ps {{.*#+}} xmm2 = (xmm2 * xmm1) + xmm1 +; FMA-RECIP-NEXT: vmovaps {{.*#+}} xmm2 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0] +; FMA-RECIP-NEXT: vfmadd231ps {{.*#+}} xmm2 = (xmm0 * xmm1) + xmm2 +; FMA-RECIP-NEXT: vfnmadd132ps {{.*#+}} xmm2 = -(xmm2 * xmm1) + xmm1 ; FMA-RECIP-NEXT: vmovaps {{.*#+}} xmm1 = [1.0E+0,2.0E+0,3.0E+0,4.0E+0] ; FMA-RECIP-NEXT: vmulps %xmm1, %xmm2, %xmm3 ; FMA-RECIP-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm3 * xmm0) + xmm1 @@ -622,9 +622,9 @@ define <4 x float> @v4f32_two_step2(<4 x float> %x) #2 { ; BDVER2-LABEL: v4f32_two_step2: ; BDVER2: # %bb.0: ; BDVER2-NEXT: vrcpps %xmm0, %xmm1 -; BDVER2-NEXT: vfnmaddps {{.*}}(%rip), %xmm1, %xmm0, %xmm2 +; BDVER2-NEXT: vfmaddps {{.*}}(%rip), %xmm1, %xmm0, %xmm2 ; BDVER2-NEXT: vmovaps {{.*#+}} xmm4 = [1.0E+0,2.0E+0,3.0E+0,4.0E+0] -; BDVER2-NEXT: vfmaddps %xmm1, %xmm2, %xmm1, %xmm1 +; BDVER2-NEXT: vfnmaddps %xmm1, %xmm2, %xmm1, %xmm1 ; BDVER2-NEXT: vmulps %xmm4, %xmm1, %xmm3 ; BDVER2-NEXT: vfnmaddps %xmm4, %xmm3, %xmm0, %xmm0 ; BDVER2-NEXT: vfmaddps %xmm3, %xmm0, %xmm1, %xmm0 @@ -665,9 +665,9 @@ define <4 x float> @v4f32_two_step2(<4 x float> %x) #2 { ; HASWELL-LABEL: v4f32_two_step2: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vrcpps %xmm0, %xmm1 -; HASWELL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] -; HASWELL-NEXT: vfnmadd231ps {{.*#+}} xmm2 = -(xmm0 * xmm1) + xmm2 -; HASWELL-NEXT: vfmadd132ps {{.*#+}} xmm2 = (xmm2 * xmm1) + xmm1 +; HASWELL-NEXT: vbroadcastss {{.*#+}} xmm2 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0] +; HASWELL-NEXT: vfmadd231ps {{.*#+}} xmm2 = (xmm0 * xmm1) + xmm2 +; HASWELL-NEXT: vfnmadd132ps {{.*#+}} xmm2 = -(xmm2 * xmm1) + xmm1 ; HASWELL-NEXT: vmovaps {{.*#+}} xmm1 = [1.0E+0,2.0E+0,3.0E+0,4.0E+0] ; HASWELL-NEXT: vmulps %xmm1, %xmm2, %xmm3 ; HASWELL-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm3 * xmm0) + xmm1 @@ -693,9 +693,9 @@ define <4 x float> @v4f32_two_step2(<4 x float> %x) #2 { ; AVX512-LABEL: v4f32_two_step2: ; AVX512: # %bb.0: ; AVX512-NEXT: vrcpps %xmm0, %xmm1 -; AVX512-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] -; AVX512-NEXT: vfnmadd231ps {{.*#+}} xmm2 = -(xmm0 * xmm1) + xmm2 -; AVX512-NEXT: vfmadd132ps {{.*#+}} xmm2 = (xmm2 * xmm1) + xmm1 +; AVX512-NEXT: vbroadcastss {{.*#+}} xmm2 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0] +; AVX512-NEXT: vfmadd231ps {{.*#+}} xmm2 = (xmm0 * xmm1) + xmm2 +; AVX512-NEXT: vfnmadd132ps {{.*#+}} xmm2 = -(xmm2 * xmm1) + xmm1 ; AVX512-NEXT: vmovaps {{.*#+}} xmm1 = [1.0E+0,2.0E+0,3.0E+0,4.0E+0] ; AVX512-NEXT: vmulps %xmm1, %xmm2, %xmm3 ; AVX512-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm3 * xmm0) + xmm1 @@ -987,9 +987,9 @@ define <8 x float> @v8f32_two_step2(<8 x float> %x) #2 { ; FMA-RECIP-LABEL: v8f32_two_step2: ; FMA-RECIP: # %bb.0: ; FMA-RECIP-NEXT: vrcpps %ymm0, %ymm1 -; FMA-RECIP-NEXT: vmovaps {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] -; FMA-RECIP-NEXT: vfnmadd231ps {{.*#+}} ymm2 = -(ymm0 * ymm1) + ymm2 -; FMA-RECIP-NEXT: vfmadd132ps {{.*#+}} ymm2 = (ymm2 * ymm1) + ymm1 +; FMA-RECIP-NEXT: vmovaps {{.*#+}} ymm2 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0] +; FMA-RECIP-NEXT: vfmadd231ps {{.*#+}} ymm2 = (ymm0 * ymm1) + ymm2 +; FMA-RECIP-NEXT: vfnmadd132ps {{.*#+}} ymm2 = -(ymm2 * ymm1) + ymm1 ; FMA-RECIP-NEXT: vmovaps {{.*#+}} ymm1 = [1.0E+0,2.0E+0,3.0E+0,4.0E+0,5.0E+0,6.0E+0,7.0E+0,8.0E+0] ; FMA-RECIP-NEXT: vmulps %ymm1, %ymm2, %ymm3 ; FMA-RECIP-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm3 * ymm0) + ymm1 @@ -999,9 +999,9 @@ define <8 x float> @v8f32_two_step2(<8 x float> %x) #2 { ; BDVER2-LABEL: v8f32_two_step2: ; BDVER2: # %bb.0: ; BDVER2-NEXT: vrcpps %ymm0, %ymm1 -; BDVER2-NEXT: vfnmaddps {{.*}}(%rip), %ymm1, %ymm0, %ymm2 +; BDVER2-NEXT: vfmaddps {{.*}}(%rip), %ymm1, %ymm0, %ymm2 ; BDVER2-NEXT: vmovaps {{.*#+}} ymm4 = [1.0E+0,2.0E+0,3.0E+0,4.0E+0,5.0E+0,6.0E+0,7.0E+0,8.0E+0] -; BDVER2-NEXT: vfmaddps %ymm1, %ymm2, %ymm1, %ymm1 +; BDVER2-NEXT: vfnmaddps %ymm1, %ymm2, %ymm1, %ymm1 ; BDVER2-NEXT: vmulps %ymm4, %ymm1, %ymm3 ; BDVER2-NEXT: vfnmaddps %ymm4, %ymm3, %ymm0, %ymm0 ; BDVER2-NEXT: vfmaddps %ymm3, %ymm0, %ymm1, %ymm0 @@ -1042,9 +1042,9 @@ define <8 x float> @v8f32_two_step2(<8 x float> %x) #2 { ; HASWELL-LABEL: v8f32_two_step2: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vrcpps %ymm0, %ymm1 -; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] -; HASWELL-NEXT: vfnmadd231ps {{.*#+}} ymm2 = -(ymm0 * ymm1) + ymm2 -; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm2 = (ymm2 * ymm1) + ymm1 +; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm2 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0] +; HASWELL-NEXT: vfmadd231ps {{.*#+}} ymm2 = (ymm0 * ymm1) + ymm2 +; HASWELL-NEXT: vfnmadd132ps {{.*#+}} ymm2 = -(ymm2 * ymm1) + ymm1 ; HASWELL-NEXT: vmovaps {{.*#+}} ymm1 = [1.0E+0,2.0E+0,3.0E+0,4.0E+0,5.0E+0,6.0E+0,7.0E+0,8.0E+0] ; HASWELL-NEXT: vmulps %ymm1, %ymm2, %ymm3 ; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm3 * ymm0) + ymm1 @@ -1070,9 +1070,9 @@ define <8 x float> @v8f32_two_step2(<8 x float> %x) #2 { ; AVX512-LABEL: v8f32_two_step2: ; AVX512: # %bb.0: ; AVX512-NEXT: vrcpps %ymm0, %ymm1 -; AVX512-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] -; AVX512-NEXT: vfnmadd231ps {{.*#+}} ymm2 = -(ymm0 * ymm1) + ymm2 -; AVX512-NEXT: vfmadd132ps {{.*#+}} ymm2 = (ymm2 * ymm1) + ymm1 +; AVX512-NEXT: vbroadcastss {{.*#+}} ymm2 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0] +; AVX512-NEXT: vfmadd231ps {{.*#+}} ymm2 = (ymm0 * ymm1) + ymm2 +; AVX512-NEXT: vfnmadd132ps {{.*#+}} ymm2 = -(ymm2 * ymm1) + ymm1 ; AVX512-NEXT: vmovaps {{.*#+}} ymm1 = [1.0E+0,2.0E+0,3.0E+0,4.0E+0,5.0E+0,6.0E+0,7.0E+0,8.0E+0] ; AVX512-NEXT: vmulps %ymm1, %ymm2, %ymm3 ; AVX512-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm3 * ymm0) + ymm1 @@ -1552,17 +1552,17 @@ define <16 x float> @v16f32_two_step2(<16 x float> %x) #2 { ; FMA-RECIP-LABEL: v16f32_two_step2: ; FMA-RECIP: # %bb.0: ; FMA-RECIP-NEXT: vrcpps %ymm0, %ymm2 -; FMA-RECIP-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; FMA-RECIP-NEXT: vmovaps {{.*#+}} ymm3 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0] ; FMA-RECIP-NEXT: vmovaps %ymm2, %ymm4 -; FMA-RECIP-NEXT: vfnmadd213ps {{.*#+}} ymm4 = -(ymm0 * ymm4) + ymm3 -; FMA-RECIP-NEXT: vfmadd132ps {{.*#+}} ymm4 = (ymm4 * ymm2) + ymm2 +; FMA-RECIP-NEXT: vfmadd213ps {{.*#+}} ymm4 = (ymm0 * ymm4) + ymm3 +; FMA-RECIP-NEXT: vfnmadd132ps {{.*#+}} ymm4 = -(ymm4 * ymm2) + ymm2 ; FMA-RECIP-NEXT: vmovaps {{.*#+}} ymm2 = [1.0E+0,2.0E+0,3.0E+0,4.0E+0,5.0E+0,6.0E+0,7.0E+0,8.0E+0] ; FMA-RECIP-NEXT: vmulps %ymm2, %ymm4, %ymm5 ; FMA-RECIP-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm5 * ymm0) + ymm2 ; FMA-RECIP-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm4 * ymm0) + ymm5 ; FMA-RECIP-NEXT: vrcpps %ymm1, %ymm2 -; FMA-RECIP-NEXT: vfnmadd231ps {{.*#+}} ymm3 = -(ymm1 * ymm2) + ymm3 -; FMA-RECIP-NEXT: vfmadd132ps {{.*#+}} ymm3 = (ymm3 * ymm2) + ymm2 +; FMA-RECIP-NEXT: vfmadd231ps {{.*#+}} ymm3 = (ymm1 * ymm2) + ymm3 +; FMA-RECIP-NEXT: vfnmadd132ps {{.*#+}} ymm3 = -(ymm3 * ymm2) + ymm2 ; FMA-RECIP-NEXT: vmovaps {{.*#+}} ymm2 = [9.0E+0,1.0E+1,1.1E+1,1.2E+1,1.3E+1,1.4E+1,1.5E+1,1.6E+1] ; FMA-RECIP-NEXT: vmulps %ymm2, %ymm3, %ymm4 ; FMA-RECIP-NEXT: vfnmadd213ps {{.*#+}} ymm1 = -(ymm4 * ymm1) + ymm2 @@ -1572,17 +1572,17 @@ define <16 x float> @v16f32_two_step2(<16 x float> %x) #2 { ; BDVER2-LABEL: v16f32_two_step2: ; BDVER2: # %bb.0: ; BDVER2-NEXT: vrcpps %ymm0, %ymm2 -; BDVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] -; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm0, %ymm4 -; BDVER2-NEXT: vfmaddps %ymm2, %ymm4, %ymm2, %ymm2 +; BDVER2-NEXT: vmovaps {{.*#+}} ymm3 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0] +; BDVER2-NEXT: vfmaddps %ymm3, %ymm2, %ymm0, %ymm4 +; BDVER2-NEXT: vfnmaddps %ymm2, %ymm4, %ymm2, %ymm2 ; BDVER2-NEXT: vmovaps {{.*#+}} ymm4 = [1.0E+0,2.0E+0,3.0E+0,4.0E+0,5.0E+0,6.0E+0,7.0E+0,8.0E+0] ; BDVER2-NEXT: vmulps %ymm4, %ymm2, %ymm5 ; BDVER2-NEXT: vfnmaddps %ymm4, %ymm5, %ymm0, %ymm0 ; BDVER2-NEXT: vfmaddps %ymm5, %ymm0, %ymm2, %ymm0 ; BDVER2-NEXT: vrcpps %ymm1, %ymm2 ; BDVER2-NEXT: vmovaps {{.*#+}} ymm5 = [9.0E+0,1.0E+1,1.1E+1,1.2E+1,1.3E+1,1.4E+1,1.5E+1,1.6E+1] -; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm1, %ymm3 -; BDVER2-NEXT: vfmaddps %ymm2, %ymm3, %ymm2, %ymm2 +; BDVER2-NEXT: vfmaddps %ymm3, %ymm2, %ymm1, %ymm3 +; BDVER2-NEXT: vfnmaddps %ymm2, %ymm3, %ymm2, %ymm2 ; BDVER2-NEXT: vmulps %ymm5, %ymm2, %ymm4 ; BDVER2-NEXT: vfnmaddps %ymm5, %ymm4, %ymm1, %ymm1 ; BDVER2-NEXT: vfmaddps %ymm4, %ymm1, %ymm2, %ymm1 @@ -1645,17 +1645,17 @@ define <16 x float> @v16f32_two_step2(<16 x float> %x) #2 { ; HASWELL-LABEL: v16f32_two_step2: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vrcpps %ymm0, %ymm2 -; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm3 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0] ; HASWELL-NEXT: vmovaps %ymm2, %ymm4 -; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm4 = -(ymm0 * ymm4) + ymm3 -; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm4 = (ymm4 * ymm2) + ymm2 +; HASWELL-NEXT: vfmadd213ps {{.*#+}} ymm4 = (ymm0 * ymm4) + ymm3 +; HASWELL-NEXT: vfnmadd132ps {{.*#+}} ymm4 = -(ymm4 * ymm2) + ymm2 ; HASWELL-NEXT: vmovaps {{.*#+}} ymm2 = [1.0E+0,2.0E+0,3.0E+0,4.0E+0,5.0E+0,6.0E+0,7.0E+0,8.0E+0] ; HASWELL-NEXT: vmulps %ymm2, %ymm4, %ymm5 ; HASWELL-NEXT: vrcpps %ymm1, %ymm6 ; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm5 * ymm0) + ymm2 ; HASWELL-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm4 * ymm0) + ymm5 -; HASWELL-NEXT: vfnmadd231ps {{.*#+}} ymm3 = -(ymm1 * ymm6) + ymm3 -; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm3 = (ymm3 * ymm6) + ymm6 +; HASWELL-NEXT: vfmadd231ps {{.*#+}} ymm3 = (ymm1 * ymm6) + ymm3 +; HASWELL-NEXT: vfnmadd132ps {{.*#+}} ymm3 = -(ymm3 * ymm6) + ymm6 ; HASWELL-NEXT: vmovaps {{.*#+}} ymm2 = [9.0E+0,1.0E+1,1.1E+1,1.2E+1,1.3E+1,1.4E+1,1.5E+1,1.6E+1] ; HASWELL-NEXT: vmulps %ymm2, %ymm3, %ymm4 ; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm1 = -(ymm4 * ymm1) + ymm2 @@ -1692,9 +1692,9 @@ define <16 x float> @v16f32_two_step2(<16 x float> %x) #2 { ; AVX512-LABEL: v16f32_two_step2: ; AVX512: # %bb.0: ; AVX512-NEXT: vrcp14ps %zmm0, %zmm1 -; AVX512-NEXT: vbroadcastss {{.*#+}} zmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] -; AVX512-NEXT: vfnmadd231ps {{.*#+}} zmm2 = -(zmm0 * zmm1) + zmm2 -; AVX512-NEXT: vfmadd132ps {{.*#+}} zmm2 = (zmm2 * zmm1) + zmm1 +; AVX512-NEXT: vbroadcastss {{.*#+}} zmm2 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0] +; AVX512-NEXT: vfmadd231ps {{.*#+}} zmm2 = (zmm0 * zmm1) + zmm2 +; AVX512-NEXT: vfnmadd132ps {{.*#+}} zmm2 = -(zmm2 * zmm1) + zmm1 ; AVX512-NEXT: vmovaps {{.*#+}} zmm1 = [1.0E+0,2.0E+0,3.0E+0,4.0E+0,5.0E+0,6.0E+0,7.0E+0,8.0E+0,9.0E+0,1.0E+1,1.1E+1,1.2E+1,1.3E+1,1.4E+1,1.5E+1,1.6E+1] ; AVX512-NEXT: vmulps %zmm1, %zmm2, %zmm3 ; AVX512-NEXT: vfnmadd213ps {{.*#+}} zmm0 = -(zmm3 * zmm0) + zmm1 diff --git a/llvm/test/CodeGen/X86/sadd_sat.ll b/llvm/test/CodeGen/X86/sadd_sat.ll index 6d853d7b0d8bf..e462763d9ebbd 100644 --- a/llvm/test/CodeGen/X86/sadd_sat.ll +++ b/llvm/test/CodeGen/X86/sadd_sat.ll @@ -159,34 +159,27 @@ define i4 @func3(i4 %x, i4 %y) nounwind { ; X86-LABEL: func3: ; X86: # %bb.0: ; X86-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-NEXT: movb {{[0-9]+}}(%esp), %dl -; X86-NEXT: shlb $4, %dl -; X86-NEXT: shlb $4, %al -; X86-NEXT: xorl %ecx, %ecx -; X86-NEXT: movb %al, %ah -; X86-NEXT: addb %dl, %ah -; X86-NEXT: setns %cl -; X86-NEXT: addl $127, %ecx -; X86-NEXT: addb %dl, %al -; X86-NEXT: movzbl %al, %eax -; X86-NEXT: cmovol %ecx, %eax -; X86-NEXT: sarb $4, %al +; X86-NEXT: addb {{[0-9]+}}(%esp), %al +; X86-NEXT: movzbl %al, %ecx +; X86-NEXT: cmpb $7, %al +; X86-NEXT: movl $7, %edx +; X86-NEXT: cmovll %ecx, %edx +; X86-NEXT: cmpb $-8, %dl +; X86-NEXT: movl $248, %eax +; X86-NEXT: cmovgl %edx, %eax ; X86-NEXT: # kill: def $al killed $al killed $eax ; X86-NEXT: retl ; ; X64-LABEL: func3: ; X64: # %bb.0: -; X64-NEXT: shlb $4, %sil -; X64-NEXT: shlb $4, %dil -; X64-NEXT: xorl %ecx, %ecx -; X64-NEXT: movl %edi, %eax -; X64-NEXT: addb %sil, %al -; X64-NEXT: setns %cl -; X64-NEXT: addl $127, %ecx ; X64-NEXT: addb %sil, %dil ; X64-NEXT: movzbl %dil, %eax -; X64-NEXT: cmovol %ecx, %eax -; X64-NEXT: sarb $4, %al +; X64-NEXT: cmpb $7, %al +; X64-NEXT: movl $7, %ecx +; X64-NEXT: cmovll %eax, %ecx +; X64-NEXT: cmpb $-8, %cl +; X64-NEXT: movl $248, %eax +; X64-NEXT: cmovgl %ecx, %eax ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq %tmp = call i4 @llvm.sadd.sat.i4(i4 %x, i4 %y); diff --git a/llvm/test/CodeGen/X86/selectcc-to-shiftand.ll b/llvm/test/CodeGen/X86/selectcc-to-shiftand.ll index b6bbce92c1396..6cc41fd0cf874 100644 --- a/llvm/test/CodeGen/X86/selectcc-to-shiftand.ll +++ b/llvm/test/CodeGen/X86/selectcc-to-shiftand.ll @@ -1,23 +1,16 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=-bmi < %s | FileCheck %s --check-prefix=CHECK-NOBMI -; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi < %s | FileCheck %s --check-prefix=CHECK-BMI +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=-bmi < %s | FileCheck %s --check-prefixes=ANY,CHECK-NOBMI +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi < %s | FileCheck %s --check-prefixes=ANY,CHECK-BMI ; Compare if negative and select of constants where one constant is zero. define i32 @neg_sel_constants(i32 %a) { -; CHECK-NOBMI-LABEL: neg_sel_constants: -; CHECK-NOBMI: # %bb.0: -; CHECK-NOBMI-NEXT: movl %edi, %eax -; CHECK-NOBMI-NEXT: sarl $31, %eax -; CHECK-NOBMI-NEXT: andl $5, %eax -; CHECK-NOBMI-NEXT: retq -; -; CHECK-BMI-LABEL: neg_sel_constants: -; CHECK-BMI: # %bb.0: -; CHECK-BMI-NEXT: movl %edi, %eax -; CHECK-BMI-NEXT: sarl $31, %eax -; CHECK-BMI-NEXT: andl $5, %eax -; CHECK-BMI-NEXT: retq +; ANY-LABEL: neg_sel_constants: +; ANY: # %bb.0: +; ANY-NEXT: movl %edi, %eax +; ANY-NEXT: sarl $31, %eax +; ANY-NEXT: andl $5, %eax +; ANY-NEXT: retq %tmp.1 = icmp slt i32 %a, 0 %retval = select i1 %tmp.1, i32 5, i32 0 ret i32 %retval @@ -26,19 +19,12 @@ define i32 @neg_sel_constants(i32 %a) { ; Compare if negative and select of constants where one constant is zero and the other is a single bit. define i32 @neg_sel_special_constant(i32 %a) { -; CHECK-NOBMI-LABEL: neg_sel_special_constant: -; CHECK-NOBMI: # %bb.0: -; CHECK-NOBMI-NEXT: movl %edi, %eax -; CHECK-NOBMI-NEXT: shrl $22, %eax -; CHECK-NOBMI-NEXT: andl $512, %eax # imm = 0x200 -; CHECK-NOBMI-NEXT: retq -; -; CHECK-BMI-LABEL: neg_sel_special_constant: -; CHECK-BMI: # %bb.0: -; CHECK-BMI-NEXT: movl %edi, %eax -; CHECK-BMI-NEXT: shrl $22, %eax -; CHECK-BMI-NEXT: andl $512, %eax # imm = 0x200 -; CHECK-BMI-NEXT: retq +; ANY-LABEL: neg_sel_special_constant: +; ANY: # %bb.0: +; ANY-NEXT: movl %edi, %eax +; ANY-NEXT: shrl $22, %eax +; ANY-NEXT: andl $512, %eax # imm = 0x200 +; ANY-NEXT: retq %tmp.1 = icmp slt i32 %a, 0 %retval = select i1 %tmp.1, i32 512, i32 0 ret i32 %retval @@ -47,19 +33,12 @@ define i32 @neg_sel_special_constant(i32 %a) { ; Compare if negative and select variable or zero. define i32 @neg_sel_variable_and_zero(i32 %a, i32 %b) { -; CHECK-NOBMI-LABEL: neg_sel_variable_and_zero: -; CHECK-NOBMI: # %bb.0: -; CHECK-NOBMI-NEXT: movl %edi, %eax -; CHECK-NOBMI-NEXT: sarl $31, %eax -; CHECK-NOBMI-NEXT: andl %esi, %eax -; CHECK-NOBMI-NEXT: retq -; -; CHECK-BMI-LABEL: neg_sel_variable_and_zero: -; CHECK-BMI: # %bb.0: -; CHECK-BMI-NEXT: movl %edi, %eax -; CHECK-BMI-NEXT: sarl $31, %eax -; CHECK-BMI-NEXT: andl %esi, %eax -; CHECK-BMI-NEXT: retq +; ANY-LABEL: neg_sel_variable_and_zero: +; ANY: # %bb.0: +; ANY-NEXT: movl %edi, %eax +; ANY-NEXT: sarl $31, %eax +; ANY-NEXT: andl %esi, %eax +; ANY-NEXT: retq %tmp.1 = icmp slt i32 %a, 0 %retval = select i1 %tmp.1, i32 %b, i32 0 ret i32 %retval @@ -68,19 +47,12 @@ define i32 @neg_sel_variable_and_zero(i32 %a, i32 %b) { ; Compare if not positive and select the same variable as being compared: smin(a, 0). define i32 @not_pos_sel_same_variable(i32 %a) { -; CHECK-NOBMI-LABEL: not_pos_sel_same_variable: -; CHECK-NOBMI: # %bb.0: -; CHECK-NOBMI-NEXT: movl %edi, %eax -; CHECK-NOBMI-NEXT: sarl $31, %eax -; CHECK-NOBMI-NEXT: andl %edi, %eax -; CHECK-NOBMI-NEXT: retq -; -; CHECK-BMI-LABEL: not_pos_sel_same_variable: -; CHECK-BMI: # %bb.0: -; CHECK-BMI-NEXT: movl %edi, %eax -; CHECK-BMI-NEXT: sarl $31, %eax -; CHECK-BMI-NEXT: andl %edi, %eax -; CHECK-BMI-NEXT: retq +; ANY-LABEL: not_pos_sel_same_variable: +; ANY: # %bb.0: +; ANY-NEXT: movl %edi, %eax +; ANY-NEXT: sarl $31, %eax +; ANY-NEXT: andl %edi, %eax +; ANY-NEXT: retq %tmp = icmp slt i32 %a, 1 %min = select i1 %tmp, i32 %a, i32 0 ret i32 %min @@ -91,21 +63,13 @@ define i32 @not_pos_sel_same_variable(i32 %a) { ; Compare if positive and select of constants where one constant is zero. define i32 @pos_sel_constants(i32 %a) { -; CHECK-NOBMI-LABEL: pos_sel_constants: -; CHECK-NOBMI: # %bb.0: -; CHECK-NOBMI-NEXT: # kill: def $edi killed $edi def $rdi -; CHECK-NOBMI-NEXT: notl %edi -; CHECK-NOBMI-NEXT: shrl $31, %edi -; CHECK-NOBMI-NEXT: leal (%rdi,%rdi,4), %eax -; CHECK-NOBMI-NEXT: retq -; -; CHECK-BMI-LABEL: pos_sel_constants: -; CHECK-BMI: # %bb.0: -; CHECK-BMI-NEXT: # kill: def $edi killed $edi def $rdi -; CHECK-BMI-NEXT: notl %edi -; CHECK-BMI-NEXT: shrl $31, %edi -; CHECK-BMI-NEXT: leal (%rdi,%rdi,4), %eax -; CHECK-BMI-NEXT: retq +; ANY-LABEL: pos_sel_constants: +; ANY: # %bb.0: +; ANY-NEXT: # kill: def $edi killed $edi def $rdi +; ANY-NEXT: notl %edi +; ANY-NEXT: shrl $31, %edi +; ANY-NEXT: leal (%rdi,%rdi,4), %eax +; ANY-NEXT: retq %tmp.1 = icmp sgt i32 %a, -1 %retval = select i1 %tmp.1, i32 5, i32 0 ret i32 %retval @@ -114,21 +78,13 @@ define i32 @pos_sel_constants(i32 %a) { ; Compare if positive and select of constants where one constant is zero and the other is a single bit. define i32 @pos_sel_special_constant(i32 %a) { -; CHECK-NOBMI-LABEL: pos_sel_special_constant: -; CHECK-NOBMI: # %bb.0: -; CHECK-NOBMI-NEXT: movl %edi, %eax -; CHECK-NOBMI-NEXT: notl %eax -; CHECK-NOBMI-NEXT: shrl $22, %eax -; CHECK-NOBMI-NEXT: andl $512, %eax # imm = 0x200 -; CHECK-NOBMI-NEXT: retq -; -; CHECK-BMI-LABEL: pos_sel_special_constant: -; CHECK-BMI: # %bb.0: -; CHECK-BMI-NEXT: movl %edi, %eax -; CHECK-BMI-NEXT: notl %eax -; CHECK-BMI-NEXT: shrl $22, %eax -; CHECK-BMI-NEXT: andl $512, %eax # imm = 0x200 -; CHECK-BMI-NEXT: retq +; ANY-LABEL: pos_sel_special_constant: +; ANY: # %bb.0: +; ANY-NEXT: movl %edi, %eax +; ANY-NEXT: notl %eax +; ANY-NEXT: shrl $22, %eax +; ANY-NEXT: andl $512, %eax # imm = 0x200 +; ANY-NEXT: retq %tmp.1 = icmp sgt i32 %a, -1 %retval = select i1 %tmp.1, i32 512, i32 0 ret i32 %retval @@ -198,3 +154,92 @@ define i32 @PR31175(i32 %x, i32 %y) { %sel = select i1 %cmp, i32 %sub, i32 0 ret i32 %sel } + +define i8 @sel_shift_bool_i8(i1 %t) { +; ANY-LABEL: sel_shift_bool_i8: +; ANY: # %bb.0: +; ANY-NEXT: movl %edi, %eax +; ANY-NEXT: shlb $7, %al +; ANY-NEXT: # kill: def $al killed $al killed $eax +; ANY-NEXT: retq + %shl = select i1 %t, i8 128, i8 0 + ret i8 %shl +} + +define i16 @sel_shift_bool_i16(i1 %t) { +; ANY-LABEL: sel_shift_bool_i16: +; ANY: # %bb.0: +; ANY-NEXT: movl %edi, %eax +; ANY-NEXT: andl $1, %eax +; ANY-NEXT: shll $7, %eax +; ANY-NEXT: # kill: def $ax killed $ax killed $eax +; ANY-NEXT: retq + %shl = select i1 %t, i16 128, i16 0 + ret i16 %shl +} + +define i32 @sel_shift_bool_i32(i1 %t) { +; ANY-LABEL: sel_shift_bool_i32: +; ANY: # %bb.0: +; ANY-NEXT: movl %edi, %eax +; ANY-NEXT: andl $1, %eax +; ANY-NEXT: shll $6, %eax +; ANY-NEXT: retq + %shl = select i1 %t, i32 64, i32 0 + ret i32 %shl +} + +define i64 @sel_shift_bool_i64(i1 %t) { +; ANY-LABEL: sel_shift_bool_i64: +; ANY: # %bb.0: +; ANY-NEXT: movl %edi, %eax +; ANY-NEXT: andl $1, %eax +; ANY-NEXT: shlq $16, %rax +; ANY-NEXT: retq + %shl = select i1 %t, i64 65536, i64 0 + ret i64 %shl +} + +define <16 x i8> @sel_shift_bool_v16i8(<16 x i1> %t) { +; ANY-LABEL: sel_shift_bool_v16i8: +; ANY: # %bb.0: +; ANY-NEXT: psllw $7, %xmm0 +; ANY-NEXT: pand {{.*}}(%rip), %xmm0 +; ANY-NEXT: retq + %shl = select <16 x i1> %t, <16 x i8> , <16 x i8> zeroinitializer + ret <16 x i8> %shl +} + +define <8 x i16> @sel_shift_bool_v8i16(<8 x i1> %t) { +; ANY-LABEL: sel_shift_bool_v8i16: +; ANY: # %bb.0: +; ANY-NEXT: psllw $15, %xmm0 +; ANY-NEXT: psraw $15, %xmm0 +; ANY-NEXT: pand {{.*}}(%rip), %xmm0 +; ANY-NEXT: retq + %shl= select <8 x i1> %t, <8 x i16> , <8 x i16> zeroinitializer + ret <8 x i16> %shl +} + +define <4 x i32> @sel_shift_bool_v4i32(<4 x i1> %t) { +; ANY-LABEL: sel_shift_bool_v4i32: +; ANY: # %bb.0: +; ANY-NEXT: pslld $31, %xmm0 +; ANY-NEXT: psrad $31, %xmm0 +; ANY-NEXT: pand {{.*}}(%rip), %xmm0 +; ANY-NEXT: retq + %shl = select <4 x i1> %t, <4 x i32> , <4 x i32> zeroinitializer + ret <4 x i32> %shl +} + +define <2 x i64> @sel_shift_bool_v2i64(<2 x i1> %t) { +; ANY-LABEL: sel_shift_bool_v2i64: +; ANY: # %bb.0: +; ANY-NEXT: psllq $63, %xmm0 +; ANY-NEXT: psrad $31, %xmm0 +; ANY-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] +; ANY-NEXT: pand {{.*}}(%rip), %xmm0 +; ANY-NEXT: retq + %shl = select <2 x i1> %t, <2 x i64> , <2 x i64> zeroinitializer + ret <2 x i64> %shl +} diff --git a/llvm/test/CodeGen/X86/ssub_sat.ll b/llvm/test/CodeGen/X86/ssub_sat.ll index 62724e981f7d7..91dc45ab7d9c6 100644 --- a/llvm/test/CodeGen/X86/ssub_sat.ll +++ b/llvm/test/CodeGen/X86/ssub_sat.ll @@ -159,34 +159,27 @@ define i4 @func3(i4 %x, i4 %y) nounwind { ; X86-LABEL: func3: ; X86: # %bb.0: ; X86-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-NEXT: movb {{[0-9]+}}(%esp), %dl -; X86-NEXT: shlb $4, %dl -; X86-NEXT: shlb $4, %al -; X86-NEXT: xorl %ecx, %ecx -; X86-NEXT: movb %al, %ah -; X86-NEXT: subb %dl, %ah -; X86-NEXT: setns %cl -; X86-NEXT: addl $127, %ecx -; X86-NEXT: subb %dl, %al -; X86-NEXT: movzbl %al, %eax -; X86-NEXT: cmovol %ecx, %eax -; X86-NEXT: sarb $4, %al +; X86-NEXT: subb {{[0-9]+}}(%esp), %al +; X86-NEXT: movzbl %al, %ecx +; X86-NEXT: cmpb $7, %al +; X86-NEXT: movl $7, %edx +; X86-NEXT: cmovll %ecx, %edx +; X86-NEXT: cmpb $-8, %dl +; X86-NEXT: movl $248, %eax +; X86-NEXT: cmovgl %edx, %eax ; X86-NEXT: # kill: def $al killed $al killed $eax ; X86-NEXT: retl ; ; X64-LABEL: func3: ; X64: # %bb.0: -; X64-NEXT: shlb $4, %sil -; X64-NEXT: shlb $4, %dil -; X64-NEXT: xorl %ecx, %ecx -; X64-NEXT: movl %edi, %eax -; X64-NEXT: subb %sil, %al -; X64-NEXT: setns %cl -; X64-NEXT: addl $127, %ecx ; X64-NEXT: subb %sil, %dil ; X64-NEXT: movzbl %dil, %eax -; X64-NEXT: cmovol %ecx, %eax -; X64-NEXT: sarb $4, %al +; X64-NEXT: cmpb $7, %al +; X64-NEXT: movl $7, %ecx +; X64-NEXT: cmovll %eax, %ecx +; X64-NEXT: cmpb $-8, %cl +; X64-NEXT: movl $248, %eax +; X64-NEXT: cmovgl %ecx, %eax ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq %tmp = call i4 @llvm.ssub.sat.i4(i4 %x, i4 %y) diff --git a/llvm/test/CodeGen/X86/uadd_sat.ll b/llvm/test/CodeGen/X86/uadd_sat.ll index 203d039a3a0bf..8b2c5f615efec 100644 --- a/llvm/test/CodeGen/X86/uadd_sat.ll +++ b/llvm/test/CodeGen/X86/uadd_sat.ll @@ -98,26 +98,21 @@ define i4 @func3(i4 %x, i4 %y) nounwind { ; X86-LABEL: func3: ; X86: # %bb.0: ; X86-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-NEXT: shlb $4, %cl -; X86-NEXT: shlb $4, %al -; X86-NEXT: addb %cl, %al +; X86-NEXT: addb {{[0-9]+}}(%esp), %al ; X86-NEXT: movzbl %al, %ecx -; X86-NEXT: movl $255, %eax -; X86-NEXT: cmovael %ecx, %eax -; X86-NEXT: shrb $4, %al +; X86-NEXT: cmpb $15, %al +; X86-NEXT: movl $15, %eax +; X86-NEXT: cmovbl %ecx, %eax ; X86-NEXT: # kill: def $al killed $al killed $eax ; X86-NEXT: retl ; ; X64-LABEL: func3: ; X64: # %bb.0: -; X64-NEXT: shlb $4, %sil -; X64-NEXT: shlb $4, %dil ; X64-NEXT: addb %sil, %dil ; X64-NEXT: movzbl %dil, %ecx -; X64-NEXT: movl $255, %eax -; X64-NEXT: cmovael %ecx, %eax -; X64-NEXT: shrb $4, %al +; X64-NEXT: cmpb $15, %cl +; X64-NEXT: movl $15, %eax +; X64-NEXT: cmovbl %ecx, %eax ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq %tmp = call i4 @llvm.uadd.sat.i4(i4 %x, i4 %y) diff --git a/llvm/test/CodeGen/X86/usub_sat.ll b/llvm/test/CodeGen/X86/usub_sat.ll index 55cb6e8fd7f22..6bc2aef9e44c7 100644 --- a/llvm/test/CodeGen/X86/usub_sat.ll +++ b/llvm/test/CodeGen/X86/usub_sat.ll @@ -97,27 +97,21 @@ define i8 @func8(i8 %x, i8 %y) nounwind { define i4 @func3(i4 %x, i4 %y) nounwind { ; X86-LABEL: func3: ; X86: # %bb.0: -; X86-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-NEXT: shlb $4, %cl -; X86-NEXT: shlb $4, %al -; X86-NEXT: xorl %edx, %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: cmpb %cl, %dl +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: cmoval %edx, %eax ; X86-NEXT: subb %cl, %al -; X86-NEXT: movzbl %al, %eax -; X86-NEXT: cmovbl %edx, %eax -; X86-NEXT: shrb $4, %al ; X86-NEXT: # kill: def $al killed $al killed $eax ; X86-NEXT: retl ; ; X64-LABEL: func3: ; X64: # %bb.0: -; X64-NEXT: shlb $4, %sil -; X64-NEXT: shlb $4, %dil -; X64-NEXT: xorl %ecx, %ecx -; X64-NEXT: subb %sil, %dil -; X64-NEXT: movzbl %dil, %eax -; X64-NEXT: cmovbl %ecx, %eax -; X64-NEXT: shrb $4, %al +; X64-NEXT: cmpb %sil, %dil +; X64-NEXT: movl %esi, %eax +; X64-NEXT: cmoval %edi, %eax +; X64-NEXT: subb %sil, %al ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq %tmp = call i4 @llvm.usub.sat.i4(i4 %x, i4 %y) diff --git a/llvm/test/CodeGen/X86/vector-trunc-packus.ll b/llvm/test/CodeGen/X86/vector-trunc-packus.ll index b0d6a20bdf38b..10616cd9c857c 100644 --- a/llvm/test/CodeGen/X86/vector-trunc-packus.ll +++ b/llvm/test/CodeGen/X86/vector-trunc-packus.ll @@ -651,645 +651,1856 @@ define <8 x i32> @trunc_packus_v8i64_v8i32(<8 x i64> %a0) { ; PACKUS saturation truncation to vXi16 ; -define <8 x i16> @trunc_packus_v8i64_v8i16(<8 x i64> %a0) { -; SSE2-LABEL: trunc_packus_v8i64_v8i16: +define <4 x i16> @trunc_packus_v4i64_v4i16(<4 x i64> %a0) { +; SSE2-LABEL: trunc_packus_v4i64_v4i16: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa {{.*#+}} xmm8 = [65535,65535] -; SSE2-NEXT: movdqa {{.*#+}} xmm10 = [2147483648,2147483648] -; SSE2-NEXT: movdqa %xmm1, %xmm5 -; SSE2-NEXT: pxor %xmm10, %xmm5 -; SSE2-NEXT: movdqa {{.*#+}} xmm9 = [2147549183,2147549183] -; SSE2-NEXT: movdqa %xmm9, %xmm6 -; SSE2-NEXT: pcmpgtd %xmm5, %xmm6 +; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648] +; SSE2-NEXT: movdqa %xmm1, %xmm3 +; SSE2-NEXT: pxor %xmm2, %xmm3 +; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [2147549183,2147549183] +; SSE2-NEXT: movdqa %xmm5, %xmm6 +; SSE2-NEXT: pcmpgtd %xmm3, %xmm6 ; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2] -; SSE2-NEXT: pcmpeqd %xmm9, %xmm5 -; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3] +; SSE2-NEXT: pcmpeqd %xmm5, %xmm3 +; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[1,1,3,3] ; SSE2-NEXT: pand %xmm7, %xmm4 -; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm6[1,1,3,3] -; SSE2-NEXT: por %xmm4, %xmm5 -; SSE2-NEXT: pand %xmm5, %xmm1 -; SSE2-NEXT: pandn %xmm8, %xmm5 -; SSE2-NEXT: por %xmm1, %xmm5 +; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm6[1,1,3,3] +; SSE2-NEXT: por %xmm4, %xmm3 +; SSE2-NEXT: pand %xmm3, %xmm1 +; SSE2-NEXT: pandn %xmm8, %xmm3 +; SSE2-NEXT: por %xmm1, %xmm3 ; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: pxor %xmm10, %xmm1 -; SSE2-NEXT: movdqa %xmm9, %xmm4 +; SSE2-NEXT: pxor %xmm2, %xmm1 +; SSE2-NEXT: movdqa %xmm5, %xmm4 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm4 ; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2] -; SSE2-NEXT: pcmpeqd %xmm9, %xmm1 -; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm1[1,1,3,3] -; SSE2-NEXT: pand %xmm6, %xmm7 -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm4[1,1,3,3] -; SSE2-NEXT: por %xmm7, %xmm1 -; SSE2-NEXT: pand %xmm1, %xmm0 -; SSE2-NEXT: pandn %xmm8, %xmm1 +; SSE2-NEXT: pcmpeqd %xmm5, %xmm1 +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] +; SSE2-NEXT: pand %xmm6, %xmm1 +; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] +; SSE2-NEXT: por %xmm1, %xmm4 +; SSE2-NEXT: pand %xmm4, %xmm0 +; SSE2-NEXT: pandn %xmm8, %xmm4 +; SSE2-NEXT: por %xmm0, %xmm4 +; SSE2-NEXT: movdqa %xmm4, %xmm0 +; SSE2-NEXT: pxor %xmm2, %xmm0 +; SSE2-NEXT: movdqa %xmm0, %xmm1 +; SSE2-NEXT: pcmpgtd %xmm2, %xmm1 +; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm1[0,0,2,2] +; SSE2-NEXT: pcmpeqd %xmm2, %xmm0 +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] +; SSE2-NEXT: pand %xmm5, %xmm0 +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] ; SSE2-NEXT: por %xmm0, %xmm1 +; SSE2-NEXT: pand %xmm4, %xmm1 ; SSE2-NEXT: movdqa %xmm3, %xmm0 -; SSE2-NEXT: pxor %xmm10, %xmm0 -; SSE2-NEXT: movdqa %xmm9, %xmm4 -; SSE2-NEXT: pcmpgtd %xmm0, %xmm4 -; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2] -; SSE2-NEXT: pcmpeqd %xmm9, %xmm0 -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] -; SSE2-NEXT: pand %xmm6, %xmm0 -; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm4[1,1,3,3] -; SSE2-NEXT: por %xmm0, %xmm6 -; SSE2-NEXT: pand %xmm6, %xmm3 -; SSE2-NEXT: pandn %xmm8, %xmm6 -; SSE2-NEXT: por %xmm3, %xmm6 -; SSE2-NEXT: movdqa %xmm2, %xmm0 -; SSE2-NEXT: pxor %xmm10, %xmm0 -; SSE2-NEXT: movdqa %xmm9, %xmm3 -; SSE2-NEXT: pcmpgtd %xmm0, %xmm3 -; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2] -; SSE2-NEXT: pcmpeqd %xmm9, %xmm0 +; SSE2-NEXT: pxor %xmm2, %xmm0 +; SSE2-NEXT: movdqa %xmm0, %xmm4 +; SSE2-NEXT: pcmpgtd %xmm2, %xmm4 +; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2] +; SSE2-NEXT: pcmpeqd %xmm2, %xmm0 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] -; SSE2-NEXT: pand %xmm4, %xmm0 -; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] -; SSE2-NEXT: por %xmm0, %xmm3 +; SSE2-NEXT: pand %xmm5, %xmm0 +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm4[1,1,3,3] +; SSE2-NEXT: por %xmm0, %xmm2 ; SSE2-NEXT: pand %xmm3, %xmm2 -; SSE2-NEXT: pandn %xmm8, %xmm3 -; SSE2-NEXT: por %xmm2, %xmm3 -; SSE2-NEXT: movdqa %xmm3, %xmm0 -; SSE2-NEXT: pxor %xmm10, %xmm0 -; SSE2-NEXT: movdqa %xmm0, %xmm2 -; SSE2-NEXT: pcmpgtd %xmm10, %xmm2 -; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm2[0,0,2,2] -; SSE2-NEXT: pcmpeqd %xmm10, %xmm0 -; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm0[1,1,3,3] -; SSE2-NEXT: pand %xmm4, %xmm7 -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] -; SSE2-NEXT: por %xmm7, %xmm0 -; SSE2-NEXT: pand %xmm3, %xmm0 -; SSE2-NEXT: movdqa %xmm6, %xmm2 -; SSE2-NEXT: pxor %xmm10, %xmm2 -; SSE2-NEXT: movdqa %xmm2, %xmm3 -; SSE2-NEXT: pcmpgtd %xmm10, %xmm3 -; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2] -; SSE2-NEXT: pcmpeqd %xmm10, %xmm2 -; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm2[1,1,3,3] -; SSE2-NEXT: pand %xmm4, %xmm7 -; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3] -; SSE2-NEXT: por %xmm7, %xmm2 -; SSE2-NEXT: pand %xmm6, %xmm2 -; SSE2-NEXT: movdqa %xmm1, %xmm3 -; SSE2-NEXT: pxor %xmm10, %xmm3 -; SSE2-NEXT: movdqa %xmm3, %xmm4 -; SSE2-NEXT: pcmpgtd %xmm10, %xmm4 -; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2] -; SSE2-NEXT: pcmpeqd %xmm10, %xmm3 -; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] -; SSE2-NEXT: pand %xmm6, %xmm3 -; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] -; SSE2-NEXT: por %xmm3, %xmm4 -; SSE2-NEXT: pand %xmm1, %xmm4 -; SSE2-NEXT: movdqa %xmm5, %xmm1 -; SSE2-NEXT: pxor %xmm10, %xmm1 -; SSE2-NEXT: movdqa %xmm1, %xmm3 -; SSE2-NEXT: pcmpgtd %xmm10, %xmm3 -; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm3[0,0,2,2] -; SSE2-NEXT: pcmpeqd %xmm10, %xmm1 -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] -; SSE2-NEXT: pand %xmm6, %xmm1 -; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] -; SSE2-NEXT: por %xmm1, %xmm3 -; SSE2-NEXT: pand %xmm5, %xmm3 -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[0,2,2,3] -; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7] -; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[0,2,2,3] -; SSE2-NEXT: pshuflw {{.*#+}} xmm3 = xmm3[0,2,2,3,4,5,6,7] -; SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1] -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3] -; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,1,0,2,4,5,6,7] -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,0,2,4,5,6,7] -; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm3[0],xmm0[1] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3] +; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm0[0,2,2,3,4,5,6,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3] +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] +; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] ; SSE2-NEXT: retq ; -; SSSE3-LABEL: trunc_packus_v8i64_v8i16: +; SSSE3-LABEL: trunc_packus_v4i64_v4i16: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm8 = [65535,65535] -; SSSE3-NEXT: movdqa {{.*#+}} xmm10 = [2147483648,2147483648] -; SSSE3-NEXT: movdqa %xmm1, %xmm5 -; SSSE3-NEXT: pxor %xmm10, %xmm5 -; SSSE3-NEXT: movdqa {{.*#+}} xmm9 = [2147549183,2147549183] -; SSSE3-NEXT: movdqa %xmm9, %xmm6 -; SSSE3-NEXT: pcmpgtd %xmm5, %xmm6 +; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648] +; SSSE3-NEXT: movdqa %xmm1, %xmm3 +; SSSE3-NEXT: pxor %xmm2, %xmm3 +; SSSE3-NEXT: movdqa {{.*#+}} xmm5 = [2147549183,2147549183] +; SSSE3-NEXT: movdqa %xmm5, %xmm6 +; SSSE3-NEXT: pcmpgtd %xmm3, %xmm6 ; SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2] -; SSSE3-NEXT: pcmpeqd %xmm9, %xmm5 -; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3] +; SSSE3-NEXT: pcmpeqd %xmm5, %xmm3 +; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm3[1,1,3,3] ; SSSE3-NEXT: pand %xmm7, %xmm4 -; SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm6[1,1,3,3] -; SSSE3-NEXT: por %xmm4, %xmm5 -; SSSE3-NEXT: pand %xmm5, %xmm1 -; SSSE3-NEXT: pandn %xmm8, %xmm5 -; SSSE3-NEXT: por %xmm1, %xmm5 +; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm6[1,1,3,3] +; SSSE3-NEXT: por %xmm4, %xmm3 +; SSSE3-NEXT: pand %xmm3, %xmm1 +; SSSE3-NEXT: pandn %xmm8, %xmm3 +; SSSE3-NEXT: por %xmm1, %xmm3 ; SSSE3-NEXT: movdqa %xmm0, %xmm1 -; SSSE3-NEXT: pxor %xmm10, %xmm1 -; SSSE3-NEXT: movdqa %xmm9, %xmm4 +; SSSE3-NEXT: pxor %xmm2, %xmm1 +; SSSE3-NEXT: movdqa %xmm5, %xmm4 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm4 ; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2] -; SSSE3-NEXT: pcmpeqd %xmm9, %xmm1 -; SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm1[1,1,3,3] -; SSSE3-NEXT: pand %xmm6, %xmm7 -; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm4[1,1,3,3] -; SSSE3-NEXT: por %xmm7, %xmm1 -; SSSE3-NEXT: pand %xmm1, %xmm0 -; SSSE3-NEXT: pandn %xmm8, %xmm1 +; SSSE3-NEXT: pcmpeqd %xmm5, %xmm1 +; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] +; SSSE3-NEXT: pand %xmm6, %xmm1 +; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] +; SSSE3-NEXT: por %xmm1, %xmm4 +; SSSE3-NEXT: pand %xmm4, %xmm0 +; SSSE3-NEXT: pandn %xmm8, %xmm4 +; SSSE3-NEXT: por %xmm0, %xmm4 +; SSSE3-NEXT: movdqa %xmm4, %xmm0 +; SSSE3-NEXT: pxor %xmm2, %xmm0 +; SSSE3-NEXT: movdqa %xmm0, %xmm1 +; SSSE3-NEXT: pcmpgtd %xmm2, %xmm1 +; SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm1[0,0,2,2] +; SSSE3-NEXT: pcmpeqd %xmm2, %xmm0 +; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] +; SSSE3-NEXT: pand %xmm5, %xmm0 +; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] ; SSSE3-NEXT: por %xmm0, %xmm1 +; SSSE3-NEXT: pand %xmm4, %xmm1 ; SSSE3-NEXT: movdqa %xmm3, %xmm0 -; SSSE3-NEXT: pxor %xmm10, %xmm0 -; SSSE3-NEXT: movdqa %xmm9, %xmm4 -; SSSE3-NEXT: pcmpgtd %xmm0, %xmm4 -; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2] -; SSSE3-NEXT: pcmpeqd %xmm9, %xmm0 -; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] -; SSSE3-NEXT: pand %xmm6, %xmm0 -; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm4[1,1,3,3] -; SSSE3-NEXT: por %xmm0, %xmm6 -; SSSE3-NEXT: pand %xmm6, %xmm3 -; SSSE3-NEXT: pandn %xmm8, %xmm6 -; SSSE3-NEXT: por %xmm3, %xmm6 -; SSSE3-NEXT: movdqa %xmm2, %xmm0 -; SSSE3-NEXT: pxor %xmm10, %xmm0 -; SSSE3-NEXT: movdqa %xmm9, %xmm3 -; SSSE3-NEXT: pcmpgtd %xmm0, %xmm3 -; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2] -; SSSE3-NEXT: pcmpeqd %xmm9, %xmm0 +; SSSE3-NEXT: pxor %xmm2, %xmm0 +; SSSE3-NEXT: movdqa %xmm0, %xmm4 +; SSSE3-NEXT: pcmpgtd %xmm2, %xmm4 +; SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2] +; SSSE3-NEXT: pcmpeqd %xmm2, %xmm0 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] -; SSSE3-NEXT: pand %xmm4, %xmm0 -; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] -; SSSE3-NEXT: por %xmm0, %xmm3 +; SSSE3-NEXT: pand %xmm5, %xmm0 +; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm4[1,1,3,3] +; SSSE3-NEXT: por %xmm0, %xmm2 ; SSSE3-NEXT: pand %xmm3, %xmm2 -; SSSE3-NEXT: pandn %xmm8, %xmm3 -; SSSE3-NEXT: por %xmm2, %xmm3 -; SSSE3-NEXT: movdqa %xmm3, %xmm0 -; SSSE3-NEXT: pxor %xmm10, %xmm0 -; SSSE3-NEXT: movdqa %xmm0, %xmm2 -; SSSE3-NEXT: pcmpgtd %xmm10, %xmm2 -; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm2[0,0,2,2] -; SSSE3-NEXT: pcmpeqd %xmm10, %xmm0 -; SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm0[1,1,3,3] -; SSSE3-NEXT: pand %xmm4, %xmm7 -; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] -; SSSE3-NEXT: por %xmm7, %xmm0 -; SSSE3-NEXT: pand %xmm3, %xmm0 -; SSSE3-NEXT: movdqa %xmm6, %xmm2 -; SSSE3-NEXT: pxor %xmm10, %xmm2 -; SSSE3-NEXT: movdqa %xmm2, %xmm3 -; SSSE3-NEXT: pcmpgtd %xmm10, %xmm3 -; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2] -; SSSE3-NEXT: pcmpeqd %xmm10, %xmm2 -; SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm2[1,1,3,3] -; SSSE3-NEXT: pand %xmm4, %xmm7 -; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3] -; SSSE3-NEXT: por %xmm7, %xmm2 -; SSSE3-NEXT: pand %xmm6, %xmm2 -; SSSE3-NEXT: movdqa %xmm1, %xmm3 -; SSSE3-NEXT: pxor %xmm10, %xmm3 -; SSSE3-NEXT: movdqa %xmm3, %xmm4 -; SSSE3-NEXT: pcmpgtd %xmm10, %xmm4 -; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2] -; SSSE3-NEXT: pcmpeqd %xmm10, %xmm3 -; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] -; SSSE3-NEXT: pand %xmm6, %xmm3 -; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] -; SSSE3-NEXT: por %xmm3, %xmm4 -; SSSE3-NEXT: pand %xmm1, %xmm4 -; SSSE3-NEXT: movdqa %xmm5, %xmm1 -; SSSE3-NEXT: pxor %xmm10, %xmm1 +; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3] +; SSSE3-NEXT: pshuflw {{.*#+}} xmm2 = xmm0[0,2,2,3,4,5,6,7] +; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3] +; SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] +; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; SSSE3-NEXT: retq +; +; SSE41-LABEL: trunc_packus_v4i64_v4i16: +; SSE41: # %bb.0: +; SSE41-NEXT: movdqa %xmm0, %xmm2 +; SSE41-NEXT: movapd {{.*#+}} xmm4 = [65535,65535] +; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [2147483648,2147483648] +; SSE41-NEXT: movdqa %xmm1, %xmm0 +; SSE41-NEXT: pxor %xmm3, %xmm0 +; SSE41-NEXT: movdqa {{.*#+}} xmm6 = [2147549183,2147549183] +; SSE41-NEXT: movdqa %xmm6, %xmm5 +; SSE41-NEXT: pcmpeqd %xmm0, %xmm5 +; SSE41-NEXT: movdqa %xmm6, %xmm7 +; SSE41-NEXT: pcmpgtd %xmm0, %xmm7 +; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm7[0,0,2,2] +; SSE41-NEXT: pand %xmm5, %xmm0 +; SSE41-NEXT: por %xmm7, %xmm0 +; SSE41-NEXT: movapd %xmm4, %xmm5 +; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm5 +; SSE41-NEXT: movdqa %xmm2, %xmm0 +; SSE41-NEXT: pxor %xmm3, %xmm0 +; SSE41-NEXT: movdqa %xmm6, %xmm1 +; SSE41-NEXT: pcmpeqd %xmm0, %xmm1 +; SSE41-NEXT: pcmpgtd %xmm0, %xmm6 +; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm6[0,0,2,2] +; SSE41-NEXT: pand %xmm1, %xmm0 +; SSE41-NEXT: por %xmm6, %xmm0 +; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm4 +; SSE41-NEXT: pxor %xmm1, %xmm1 +; SSE41-NEXT: movapd %xmm4, %xmm2 +; SSE41-NEXT: xorpd %xmm3, %xmm2 +; SSE41-NEXT: movapd %xmm2, %xmm6 +; SSE41-NEXT: pcmpeqd %xmm3, %xmm6 +; SSE41-NEXT: pcmpgtd %xmm3, %xmm2 +; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,0,2,2] +; SSE41-NEXT: pand %xmm6, %xmm0 +; SSE41-NEXT: por %xmm2, %xmm0 +; SSE41-NEXT: pxor %xmm2, %xmm2 +; SSE41-NEXT: blendvpd %xmm0, %xmm4, %xmm2 +; SSE41-NEXT: movapd %xmm5, %xmm4 +; SSE41-NEXT: xorpd %xmm3, %xmm4 +; SSE41-NEXT: movapd %xmm4, %xmm6 +; SSE41-NEXT: pcmpeqd %xmm3, %xmm6 +; SSE41-NEXT: pcmpgtd %xmm3, %xmm4 +; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm4[0,0,2,2] +; SSE41-NEXT: pand %xmm6, %xmm0 +; SSE41-NEXT: por %xmm4, %xmm0 +; SSE41-NEXT: blendvpd %xmm0, %xmm5, %xmm1 +; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3] +; SSE41-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[0,2,2,3,4,5,6,7] +; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3] +; SSE41-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] +; SSE41-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSE41-NEXT: retq +; +; AVX1-LABEL: trunc_packus_v4i64_v4i16: +; AVX1: # %bb.0: +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [65535,65535] +; AVX1-NEXT: vpcmpgtq %xmm1, %xmm2, %xmm3 +; AVX1-NEXT: vpcmpgtq %xmm0, %xmm2, %xmm4 +; AVX1-NEXT: vblendvpd %xmm4, %xmm0, %xmm2, %xmm0 +; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; AVX1-NEXT: vpcmpgtq %xmm4, %xmm0, %xmm5 +; AVX1-NEXT: vblendvpd %xmm3, %xmm1, %xmm2, %xmm1 +; AVX1-NEXT: vpcmpgtq %xmm4, %xmm1, %xmm2 +; AVX1-NEXT: vpand %xmm1, %xmm2, %xmm1 +; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] +; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7] +; AVX1-NEXT: vpand %xmm0, %xmm5, %xmm0 +; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] +; AVX1-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-SLOW-LABEL: trunc_packus_v4i64_v4i16: +; AVX2-SLOW: # %bb.0: +; AVX2-SLOW-NEXT: vpbroadcastq {{.*#+}} ymm1 = [65535,65535,65535,65535] +; AVX2-SLOW-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2 +; AVX2-SLOW-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 +; AVX2-SLOW-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX2-SLOW-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm1 +; AVX2-SLOW-NEXT: vpand %ymm0, %ymm1, %ymm0 +; AVX2-SLOW-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] +; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7] +; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] +; AVX2-SLOW-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; AVX2-SLOW-NEXT: vzeroupper +; AVX2-SLOW-NEXT: retq +; +; AVX2-FAST-LABEL: trunc_packus_v4i64_v4i16: +; AVX2-FAST: # %bb.0: +; AVX2-FAST-NEXT: vpbroadcastq {{.*#+}} ymm1 = [65535,65535,65535,65535] +; AVX2-FAST-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2 +; AVX2-FAST-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 +; AVX2-FAST-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX2-FAST-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm1 +; AVX2-FAST-NEXT: vpand %ymm0, %ymm1, %ymm0 +; AVX2-FAST-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX2-FAST-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,8,9,8,9,10,11,8,9,10,11,12,13,14,15] +; AVX2-FAST-NEXT: vpshufb %xmm2, %xmm1, %xmm1 +; AVX2-FAST-NEXT: vpshufb %xmm2, %xmm0, %xmm0 +; AVX2-FAST-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; AVX2-FAST-NEXT: vzeroupper +; AVX2-FAST-NEXT: retq +; +; AVX512F-LABEL: trunc_packus_v4i64_v4i16: +; AVX512F: # %bb.0: +; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512F-NEXT: vpminsq {{.*}}(%rip){1to8}, %zmm0, %zmm0 +; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX512F-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0 +; AVX512F-NEXT: vpmovqw %zmm0, %xmm0 +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: trunc_packus_v4i64_v4i16: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX512VL-NEXT: vpmaxsq %ymm1, %ymm0, %ymm0 +; AVX512VL-NEXT: vpmovusqw %ymm0, %xmm0 +; AVX512VL-NEXT: vzeroupper +; AVX512VL-NEXT: retq +; +; AVX512BW-LABEL: trunc_packus_v4i64_v4i16: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512BW-NEXT: vpminsq {{.*}}(%rip){1to8}, %zmm0, %zmm0 +; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX512BW-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0 +; AVX512BW-NEXT: vpmovqw %zmm0, %xmm0 +; AVX512BW-NEXT: vzeroupper +; AVX512BW-NEXT: retq +; +; AVX512BWVL-LABEL: trunc_packus_v4i64_v4i16: +; AVX512BWVL: # %bb.0: +; AVX512BWVL-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX512BWVL-NEXT: vpmaxsq %ymm1, %ymm0, %ymm0 +; AVX512BWVL-NEXT: vpmovusqw %ymm0, %xmm0 +; AVX512BWVL-NEXT: vzeroupper +; AVX512BWVL-NEXT: retq + %1 = icmp slt <4 x i64> %a0, + %2 = select <4 x i1> %1, <4 x i64> %a0, <4 x i64> + %3 = icmp sgt <4 x i64> %2, zeroinitializer + %4 = select <4 x i1> %3, <4 x i64> %2, <4 x i64> zeroinitializer + %5 = trunc <4 x i64> %4 to <4 x i16> + ret <4 x i16> %5 +} + +define void @trunc_packus_v4i64_v4i16_store(<4 x i64> %a0, <4 x i16> *%p1) { +; SSE2-LABEL: trunc_packus_v4i64_v4i16_store: +; SSE2: # %bb.0: +; SSE2-NEXT: movdqa {{.*#+}} xmm8 = [65535,65535] +; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648] +; SSE2-NEXT: movdqa %xmm1, %xmm3 +; SSE2-NEXT: pxor %xmm2, %xmm3 +; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [2147549183,2147549183] +; SSE2-NEXT: movdqa %xmm5, %xmm6 +; SSE2-NEXT: pcmpgtd %xmm3, %xmm6 +; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2] +; SSE2-NEXT: pcmpeqd %xmm5, %xmm3 +; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[1,1,3,3] +; SSE2-NEXT: pand %xmm7, %xmm4 +; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm6[1,1,3,3] +; SSE2-NEXT: por %xmm4, %xmm3 +; SSE2-NEXT: pand %xmm3, %xmm1 +; SSE2-NEXT: pandn %xmm8, %xmm3 +; SSE2-NEXT: por %xmm1, %xmm3 +; SSE2-NEXT: movdqa %xmm0, %xmm1 +; SSE2-NEXT: pxor %xmm2, %xmm1 +; SSE2-NEXT: movdqa %xmm5, %xmm4 +; SSE2-NEXT: pcmpgtd %xmm1, %xmm4 +; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2] +; SSE2-NEXT: pcmpeqd %xmm5, %xmm1 +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] +; SSE2-NEXT: pand %xmm6, %xmm1 +; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] +; SSE2-NEXT: por %xmm1, %xmm4 +; SSE2-NEXT: pand %xmm4, %xmm0 +; SSE2-NEXT: pandn %xmm8, %xmm4 +; SSE2-NEXT: por %xmm0, %xmm4 +; SSE2-NEXT: movdqa %xmm4, %xmm0 +; SSE2-NEXT: pxor %xmm2, %xmm0 +; SSE2-NEXT: movdqa %xmm0, %xmm1 +; SSE2-NEXT: pcmpgtd %xmm2, %xmm1 +; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm1[0,0,2,2] +; SSE2-NEXT: pcmpeqd %xmm2, %xmm0 +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] +; SSE2-NEXT: pand %xmm5, %xmm0 +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] +; SSE2-NEXT: por %xmm0, %xmm1 +; SSE2-NEXT: pand %xmm4, %xmm1 +; SSE2-NEXT: movdqa %xmm3, %xmm0 +; SSE2-NEXT: pxor %xmm2, %xmm0 +; SSE2-NEXT: movdqa %xmm0, %xmm4 +; SSE2-NEXT: pcmpgtd %xmm2, %xmm4 +; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2] +; SSE2-NEXT: pcmpeqd %xmm2, %xmm0 +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] +; SSE2-NEXT: pand %xmm5, %xmm0 +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm4[1,1,3,3] +; SSE2-NEXT: por %xmm0, %xmm2 +; SSE2-NEXT: pand %xmm3, %xmm2 +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3] +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] +; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7] +; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; SSE2-NEXT: movq %xmm1, (%rdi) +; SSE2-NEXT: retq +; +; SSSE3-LABEL: trunc_packus_v4i64_v4i16_store: +; SSSE3: # %bb.0: +; SSSE3-NEXT: movdqa {{.*#+}} xmm8 = [65535,65535] +; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648] ; SSSE3-NEXT: movdqa %xmm1, %xmm3 -; SSSE3-NEXT: pcmpgtd %xmm10, %xmm3 -; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm3[0,0,2,2] -; SSSE3-NEXT: pcmpeqd %xmm10, %xmm1 +; SSSE3-NEXT: pxor %xmm2, %xmm3 +; SSSE3-NEXT: movdqa {{.*#+}} xmm5 = [2147549183,2147549183] +; SSSE3-NEXT: movdqa %xmm5, %xmm6 +; SSSE3-NEXT: pcmpgtd %xmm3, %xmm6 +; SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2] +; SSSE3-NEXT: pcmpeqd %xmm5, %xmm3 +; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm3[1,1,3,3] +; SSSE3-NEXT: pand %xmm7, %xmm4 +; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm6[1,1,3,3] +; SSSE3-NEXT: por %xmm4, %xmm3 +; SSSE3-NEXT: pand %xmm3, %xmm1 +; SSSE3-NEXT: pandn %xmm8, %xmm3 +; SSSE3-NEXT: por %xmm1, %xmm3 +; SSSE3-NEXT: movdqa %xmm0, %xmm1 +; SSSE3-NEXT: pxor %xmm2, %xmm1 +; SSSE3-NEXT: movdqa %xmm5, %xmm4 +; SSSE3-NEXT: pcmpgtd %xmm1, %xmm4 +; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2] +; SSSE3-NEXT: pcmpeqd %xmm5, %xmm1 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] ; SSSE3-NEXT: pand %xmm6, %xmm1 -; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] -; SSSE3-NEXT: por %xmm1, %xmm3 -; SSSE3-NEXT: pand %xmm5, %xmm3 -; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[0,2,2,3] +; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] +; SSSE3-NEXT: por %xmm1, %xmm4 +; SSSE3-NEXT: pand %xmm4, %xmm0 +; SSSE3-NEXT: pandn %xmm8, %xmm4 +; SSSE3-NEXT: por %xmm0, %xmm4 +; SSSE3-NEXT: movdqa %xmm4, %xmm0 +; SSSE3-NEXT: pxor %xmm2, %xmm0 +; SSSE3-NEXT: movdqa %xmm0, %xmm1 +; SSSE3-NEXT: pcmpgtd %xmm2, %xmm1 +; SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm1[0,0,2,2] +; SSSE3-NEXT: pcmpeqd %xmm2, %xmm0 +; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] +; SSSE3-NEXT: pand %xmm5, %xmm0 +; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] +; SSSE3-NEXT: por %xmm0, %xmm1 +; SSSE3-NEXT: pand %xmm4, %xmm1 +; SSSE3-NEXT: movdqa %xmm3, %xmm0 +; SSSE3-NEXT: pxor %xmm2, %xmm0 +; SSSE3-NEXT: movdqa %xmm0, %xmm4 +; SSSE3-NEXT: pcmpgtd %xmm2, %xmm4 +; SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2] +; SSSE3-NEXT: pcmpeqd %xmm2, %xmm0 +; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] +; SSSE3-NEXT: pand %xmm5, %xmm0 +; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm4[1,1,3,3] +; SSSE3-NEXT: por %xmm0, %xmm2 +; SSSE3-NEXT: pand %xmm3, %xmm2 +; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3] +; SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] +; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] ; SSSE3-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7] -; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm4[0,2,2,3] -; SSSE3-NEXT: pshuflw {{.*#+}} xmm3 = xmm3[0,2,2,3,4,5,6,7] -; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1] -; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3] -; SSSE3-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,1,0,2,4,5,6,7] -; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,0,2,4,5,6,7] -; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm3[0],xmm0[1] +; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; SSSE3-NEXT: movq %xmm1, (%rdi) +; SSSE3-NEXT: retq +; +; SSE41-LABEL: trunc_packus_v4i64_v4i16_store: +; SSE41: # %bb.0: +; SSE41-NEXT: movdqa %xmm0, %xmm2 +; SSE41-NEXT: movapd {{.*#+}} xmm4 = [65535,65535] +; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [2147483648,2147483648] +; SSE41-NEXT: movdqa %xmm1, %xmm0 +; SSE41-NEXT: pxor %xmm3, %xmm0 +; SSE41-NEXT: movdqa {{.*#+}} xmm6 = [2147549183,2147549183] +; SSE41-NEXT: movdqa %xmm6, %xmm5 +; SSE41-NEXT: pcmpeqd %xmm0, %xmm5 +; SSE41-NEXT: movdqa %xmm6, %xmm7 +; SSE41-NEXT: pcmpgtd %xmm0, %xmm7 +; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm7[0,0,2,2] +; SSE41-NEXT: pand %xmm5, %xmm0 +; SSE41-NEXT: por %xmm7, %xmm0 +; SSE41-NEXT: movapd %xmm4, %xmm5 +; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm5 +; SSE41-NEXT: movdqa %xmm2, %xmm0 +; SSE41-NEXT: pxor %xmm3, %xmm0 +; SSE41-NEXT: movdqa %xmm6, %xmm1 +; SSE41-NEXT: pcmpeqd %xmm0, %xmm1 +; SSE41-NEXT: pcmpgtd %xmm0, %xmm6 +; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm6[0,0,2,2] +; SSE41-NEXT: pand %xmm1, %xmm0 +; SSE41-NEXT: por %xmm6, %xmm0 +; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm4 +; SSE41-NEXT: pxor %xmm1, %xmm1 +; SSE41-NEXT: movapd %xmm4, %xmm2 +; SSE41-NEXT: xorpd %xmm3, %xmm2 +; SSE41-NEXT: movapd %xmm2, %xmm6 +; SSE41-NEXT: pcmpeqd %xmm3, %xmm6 +; SSE41-NEXT: pcmpgtd %xmm3, %xmm2 +; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,0,2,2] +; SSE41-NEXT: pand %xmm6, %xmm0 +; SSE41-NEXT: por %xmm2, %xmm0 +; SSE41-NEXT: pxor %xmm2, %xmm2 +; SSE41-NEXT: blendvpd %xmm0, %xmm4, %xmm2 +; SSE41-NEXT: movapd %xmm5, %xmm4 +; SSE41-NEXT: xorpd %xmm3, %xmm4 +; SSE41-NEXT: movapd %xmm4, %xmm6 +; SSE41-NEXT: pcmpeqd %xmm3, %xmm6 +; SSE41-NEXT: pcmpgtd %xmm3, %xmm4 +; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm4[0,0,2,2] +; SSE41-NEXT: pand %xmm6, %xmm0 +; SSE41-NEXT: por %xmm4, %xmm0 +; SSE41-NEXT: blendvpd %xmm0, %xmm5, %xmm1 +; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3] +; SSE41-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] +; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3] +; SSE41-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7] +; SSE41-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; SSE41-NEXT: movq %xmm1, (%rdi) +; SSE41-NEXT: retq +; +; AVX1-LABEL: trunc_packus_v4i64_v4i16_store: +; AVX1: # %bb.0: +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [65535,65535] +; AVX1-NEXT: vpcmpgtq %xmm1, %xmm2, %xmm3 +; AVX1-NEXT: vpcmpgtq %xmm0, %xmm2, %xmm4 +; AVX1-NEXT: vblendvpd %xmm4, %xmm0, %xmm2, %xmm0 +; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; AVX1-NEXT: vpcmpgtq %xmm4, %xmm0, %xmm5 +; AVX1-NEXT: vblendvpd %xmm3, %xmm1, %xmm2, %xmm1 +; AVX1-NEXT: vpcmpgtq %xmm4, %xmm1, %xmm2 +; AVX1-NEXT: vpand %xmm1, %xmm2, %xmm1 +; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] +; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7] +; AVX1-NEXT: vpand %xmm0, %xmm5, %xmm0 +; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] +; AVX1-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; AVX1-NEXT: vmovq %xmm0, (%rdi) +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-SLOW-LABEL: trunc_packus_v4i64_v4i16_store: +; AVX2-SLOW: # %bb.0: +; AVX2-SLOW-NEXT: vpbroadcastq {{.*#+}} ymm1 = [65535,65535,65535,65535] +; AVX2-SLOW-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2 +; AVX2-SLOW-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 +; AVX2-SLOW-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX2-SLOW-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm1 +; AVX2-SLOW-NEXT: vpand %ymm0, %ymm1, %ymm0 +; AVX2-SLOW-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] +; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7] +; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] +; AVX2-SLOW-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; AVX2-SLOW-NEXT: vmovq %xmm0, (%rdi) +; AVX2-SLOW-NEXT: vzeroupper +; AVX2-SLOW-NEXT: retq +; +; AVX2-FAST-LABEL: trunc_packus_v4i64_v4i16_store: +; AVX2-FAST: # %bb.0: +; AVX2-FAST-NEXT: vpbroadcastq {{.*#+}} ymm1 = [65535,65535,65535,65535] +; AVX2-FAST-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2 +; AVX2-FAST-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 +; AVX2-FAST-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX2-FAST-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm1 +; AVX2-FAST-NEXT: vpand %ymm0, %ymm1, %ymm0 +; AVX2-FAST-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX2-FAST-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,8,9,8,9,10,11,8,9,10,11,12,13,14,15] +; AVX2-FAST-NEXT: vpshufb %xmm2, %xmm1, %xmm1 +; AVX2-FAST-NEXT: vpshufb %xmm2, %xmm0, %xmm0 +; AVX2-FAST-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; AVX2-FAST-NEXT: vmovq %xmm0, (%rdi) +; AVX2-FAST-NEXT: vzeroupper +; AVX2-FAST-NEXT: retq +; +; AVX512F-LABEL: trunc_packus_v4i64_v4i16_store: +; AVX512F: # %bb.0: +; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512F-NEXT: vpminsq {{.*}}(%rip){1to8}, %zmm0, %zmm0 +; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX512F-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0 +; AVX512F-NEXT: vpmovqw %zmm0, %xmm0 +; AVX512F-NEXT: vmovq %xmm0, (%rdi) +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: trunc_packus_v4i64_v4i16_store: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX512VL-NEXT: vpmaxsq %ymm1, %ymm0, %ymm0 +; AVX512VL-NEXT: vpmovusqw %ymm0, (%rdi) +; AVX512VL-NEXT: vzeroupper +; AVX512VL-NEXT: retq +; +; AVX512BW-LABEL: trunc_packus_v4i64_v4i16_store: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512BW-NEXT: vpminsq {{.*}}(%rip){1to8}, %zmm0, %zmm0 +; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX512BW-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0 +; AVX512BW-NEXT: vpmovqw %zmm0, %xmm0 +; AVX512BW-NEXT: vmovq %xmm0, (%rdi) +; AVX512BW-NEXT: vzeroupper +; AVX512BW-NEXT: retq +; +; AVX512BWVL-LABEL: trunc_packus_v4i64_v4i16_store: +; AVX512BWVL: # %bb.0: +; AVX512BWVL-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX512BWVL-NEXT: vpmaxsq %ymm1, %ymm0, %ymm0 +; AVX512BWVL-NEXT: vpmovusqw %ymm0, (%rdi) +; AVX512BWVL-NEXT: vzeroupper +; AVX512BWVL-NEXT: retq + %1 = icmp slt <4 x i64> %a0, + %2 = select <4 x i1> %1, <4 x i64> %a0, <4 x i64> + %3 = icmp sgt <4 x i64> %2, zeroinitializer + %4 = select <4 x i1> %3, <4 x i64> %2, <4 x i64> zeroinitializer + %5 = trunc <4 x i64> %4 to <4 x i16> + store <4 x i16> %5, <4 x i16> *%p1 + ret void +} + +define <8 x i16> @trunc_packus_v8i64_v8i16(<8 x i64> %a0) { +; SSE2-LABEL: trunc_packus_v8i64_v8i16: +; SSE2: # %bb.0: +; SSE2-NEXT: movdqa {{.*#+}} xmm8 = [65535,65535] +; SSE2-NEXT: movdqa {{.*#+}} xmm10 = [2147483648,2147483648] +; SSE2-NEXT: movdqa %xmm1, %xmm5 +; SSE2-NEXT: pxor %xmm10, %xmm5 +; SSE2-NEXT: movdqa {{.*#+}} xmm9 = [2147549183,2147549183] +; SSE2-NEXT: movdqa %xmm9, %xmm6 +; SSE2-NEXT: pcmpgtd %xmm5, %xmm6 +; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2] +; SSE2-NEXT: pcmpeqd %xmm9, %xmm5 +; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3] +; SSE2-NEXT: pand %xmm7, %xmm4 +; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm6[1,1,3,3] +; SSE2-NEXT: por %xmm4, %xmm5 +; SSE2-NEXT: pand %xmm5, %xmm1 +; SSE2-NEXT: pandn %xmm8, %xmm5 +; SSE2-NEXT: por %xmm1, %xmm5 +; SSE2-NEXT: movdqa %xmm0, %xmm1 +; SSE2-NEXT: pxor %xmm10, %xmm1 +; SSE2-NEXT: movdqa %xmm9, %xmm4 +; SSE2-NEXT: pcmpgtd %xmm1, %xmm4 +; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2] +; SSE2-NEXT: pcmpeqd %xmm9, %xmm1 +; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm1[1,1,3,3] +; SSE2-NEXT: pand %xmm6, %xmm7 +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm4[1,1,3,3] +; SSE2-NEXT: por %xmm7, %xmm1 +; SSE2-NEXT: pand %xmm1, %xmm0 +; SSE2-NEXT: pandn %xmm8, %xmm1 +; SSE2-NEXT: por %xmm0, %xmm1 +; SSE2-NEXT: movdqa %xmm3, %xmm0 +; SSE2-NEXT: pxor %xmm10, %xmm0 +; SSE2-NEXT: movdqa %xmm9, %xmm4 +; SSE2-NEXT: pcmpgtd %xmm0, %xmm4 +; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2] +; SSE2-NEXT: pcmpeqd %xmm9, %xmm0 +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] +; SSE2-NEXT: pand %xmm6, %xmm0 +; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm4[1,1,3,3] +; SSE2-NEXT: por %xmm0, %xmm6 +; SSE2-NEXT: pand %xmm6, %xmm3 +; SSE2-NEXT: pandn %xmm8, %xmm6 +; SSE2-NEXT: por %xmm3, %xmm6 +; SSE2-NEXT: movdqa %xmm2, %xmm0 +; SSE2-NEXT: pxor %xmm10, %xmm0 +; SSE2-NEXT: movdqa %xmm9, %xmm3 +; SSE2-NEXT: pcmpgtd %xmm0, %xmm3 +; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2] +; SSE2-NEXT: pcmpeqd %xmm9, %xmm0 +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] +; SSE2-NEXT: pand %xmm4, %xmm0 +; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] +; SSE2-NEXT: por %xmm0, %xmm3 +; SSE2-NEXT: pand %xmm3, %xmm2 +; SSE2-NEXT: pandn %xmm8, %xmm3 +; SSE2-NEXT: por %xmm2, %xmm3 +; SSE2-NEXT: movdqa %xmm3, %xmm0 +; SSE2-NEXT: pxor %xmm10, %xmm0 +; SSE2-NEXT: movdqa %xmm0, %xmm2 +; SSE2-NEXT: pcmpgtd %xmm10, %xmm2 +; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm2[0,0,2,2] +; SSE2-NEXT: pcmpeqd %xmm10, %xmm0 +; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm0[1,1,3,3] +; SSE2-NEXT: pand %xmm4, %xmm7 +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] +; SSE2-NEXT: por %xmm7, %xmm0 +; SSE2-NEXT: pand %xmm3, %xmm0 +; SSE2-NEXT: movdqa %xmm6, %xmm2 +; SSE2-NEXT: pxor %xmm10, %xmm2 +; SSE2-NEXT: movdqa %xmm2, %xmm3 +; SSE2-NEXT: pcmpgtd %xmm10, %xmm3 +; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2] +; SSE2-NEXT: pcmpeqd %xmm10, %xmm2 +; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm2[1,1,3,3] +; SSE2-NEXT: pand %xmm4, %xmm7 +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3] +; SSE2-NEXT: por %xmm7, %xmm2 +; SSE2-NEXT: pand %xmm6, %xmm2 +; SSE2-NEXT: movdqa %xmm1, %xmm3 +; SSE2-NEXT: pxor %xmm10, %xmm3 +; SSE2-NEXT: movdqa %xmm3, %xmm4 +; SSE2-NEXT: pcmpgtd %xmm10, %xmm4 +; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2] +; SSE2-NEXT: pcmpeqd %xmm10, %xmm3 +; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] +; SSE2-NEXT: pand %xmm6, %xmm3 +; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] +; SSE2-NEXT: por %xmm3, %xmm4 +; SSE2-NEXT: pand %xmm1, %xmm4 +; SSE2-NEXT: movdqa %xmm5, %xmm1 +; SSE2-NEXT: pxor %xmm10, %xmm1 +; SSE2-NEXT: movdqa %xmm1, %xmm3 +; SSE2-NEXT: pcmpgtd %xmm10, %xmm3 +; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm3[0,0,2,2] +; SSE2-NEXT: pcmpeqd %xmm10, %xmm1 +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] +; SSE2-NEXT: pand %xmm6, %xmm1 +; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] +; SSE2-NEXT: por %xmm1, %xmm3 +; SSE2-NEXT: pand %xmm5, %xmm3 +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[0,2,2,3] +; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[0,2,2,3] +; SSE2-NEXT: pshuflw {{.*#+}} xmm3 = xmm3[0,2,2,3,4,5,6,7] +; SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1] +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3] +; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,1,0,2,4,5,6,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,0,2,4,5,6,7] +; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm3[0],xmm0[1] +; SSE2-NEXT: retq +; +; SSSE3-LABEL: trunc_packus_v8i64_v8i16: +; SSSE3: # %bb.0: +; SSSE3-NEXT: movdqa {{.*#+}} xmm8 = [65535,65535] +; SSSE3-NEXT: movdqa {{.*#+}} xmm10 = [2147483648,2147483648] +; SSSE3-NEXT: movdqa %xmm1, %xmm5 +; SSSE3-NEXT: pxor %xmm10, %xmm5 +; SSSE3-NEXT: movdqa {{.*#+}} xmm9 = [2147549183,2147549183] +; SSSE3-NEXT: movdqa %xmm9, %xmm6 +; SSSE3-NEXT: pcmpgtd %xmm5, %xmm6 +; SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2] +; SSSE3-NEXT: pcmpeqd %xmm9, %xmm5 +; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3] +; SSSE3-NEXT: pand %xmm7, %xmm4 +; SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm6[1,1,3,3] +; SSSE3-NEXT: por %xmm4, %xmm5 +; SSSE3-NEXT: pand %xmm5, %xmm1 +; SSSE3-NEXT: pandn %xmm8, %xmm5 +; SSSE3-NEXT: por %xmm1, %xmm5 +; SSSE3-NEXT: movdqa %xmm0, %xmm1 +; SSSE3-NEXT: pxor %xmm10, %xmm1 +; SSSE3-NEXT: movdqa %xmm9, %xmm4 +; SSSE3-NEXT: pcmpgtd %xmm1, %xmm4 +; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2] +; SSSE3-NEXT: pcmpeqd %xmm9, %xmm1 +; SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm1[1,1,3,3] +; SSSE3-NEXT: pand %xmm6, %xmm7 +; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm4[1,1,3,3] +; SSSE3-NEXT: por %xmm7, %xmm1 +; SSSE3-NEXT: pand %xmm1, %xmm0 +; SSSE3-NEXT: pandn %xmm8, %xmm1 +; SSSE3-NEXT: por %xmm0, %xmm1 +; SSSE3-NEXT: movdqa %xmm3, %xmm0 +; SSSE3-NEXT: pxor %xmm10, %xmm0 +; SSSE3-NEXT: movdqa %xmm9, %xmm4 +; SSSE3-NEXT: pcmpgtd %xmm0, %xmm4 +; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2] +; SSSE3-NEXT: pcmpeqd %xmm9, %xmm0 +; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] +; SSSE3-NEXT: pand %xmm6, %xmm0 +; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm4[1,1,3,3] +; SSSE3-NEXT: por %xmm0, %xmm6 +; SSSE3-NEXT: pand %xmm6, %xmm3 +; SSSE3-NEXT: pandn %xmm8, %xmm6 +; SSSE3-NEXT: por %xmm3, %xmm6 +; SSSE3-NEXT: movdqa %xmm2, %xmm0 +; SSSE3-NEXT: pxor %xmm10, %xmm0 +; SSSE3-NEXT: movdqa %xmm9, %xmm3 +; SSSE3-NEXT: pcmpgtd %xmm0, %xmm3 +; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2] +; SSSE3-NEXT: pcmpeqd %xmm9, %xmm0 +; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] +; SSSE3-NEXT: pand %xmm4, %xmm0 +; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] +; SSSE3-NEXT: por %xmm0, %xmm3 +; SSSE3-NEXT: pand %xmm3, %xmm2 +; SSSE3-NEXT: pandn %xmm8, %xmm3 +; SSSE3-NEXT: por %xmm2, %xmm3 +; SSSE3-NEXT: movdqa %xmm3, %xmm0 +; SSSE3-NEXT: pxor %xmm10, %xmm0 +; SSSE3-NEXT: movdqa %xmm0, %xmm2 +; SSSE3-NEXT: pcmpgtd %xmm10, %xmm2 +; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm2[0,0,2,2] +; SSSE3-NEXT: pcmpeqd %xmm10, %xmm0 +; SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm0[1,1,3,3] +; SSSE3-NEXT: pand %xmm4, %xmm7 +; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] +; SSSE3-NEXT: por %xmm7, %xmm0 +; SSSE3-NEXT: pand %xmm3, %xmm0 +; SSSE3-NEXT: movdqa %xmm6, %xmm2 +; SSSE3-NEXT: pxor %xmm10, %xmm2 +; SSSE3-NEXT: movdqa %xmm2, %xmm3 +; SSSE3-NEXT: pcmpgtd %xmm10, %xmm3 +; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2] +; SSSE3-NEXT: pcmpeqd %xmm10, %xmm2 +; SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm2[1,1,3,3] +; SSSE3-NEXT: pand %xmm4, %xmm7 +; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3] +; SSSE3-NEXT: por %xmm7, %xmm2 +; SSSE3-NEXT: pand %xmm6, %xmm2 +; SSSE3-NEXT: movdqa %xmm1, %xmm3 +; SSSE3-NEXT: pxor %xmm10, %xmm3 +; SSSE3-NEXT: movdqa %xmm3, %xmm4 +; SSSE3-NEXT: pcmpgtd %xmm10, %xmm4 +; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2] +; SSSE3-NEXT: pcmpeqd %xmm10, %xmm3 +; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] +; SSSE3-NEXT: pand %xmm6, %xmm3 +; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] +; SSSE3-NEXT: por %xmm3, %xmm4 +; SSSE3-NEXT: pand %xmm1, %xmm4 +; SSSE3-NEXT: movdqa %xmm5, %xmm1 +; SSSE3-NEXT: pxor %xmm10, %xmm1 +; SSSE3-NEXT: movdqa %xmm1, %xmm3 +; SSSE3-NEXT: pcmpgtd %xmm10, %xmm3 +; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm3[0,0,2,2] +; SSSE3-NEXT: pcmpeqd %xmm10, %xmm1 +; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] +; SSSE3-NEXT: pand %xmm6, %xmm1 +; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] +; SSSE3-NEXT: por %xmm1, %xmm3 +; SSSE3-NEXT: pand %xmm5, %xmm3 +; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[0,2,2,3] +; SSSE3-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7] +; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm4[0,2,2,3] +; SSSE3-NEXT: pshuflw {{.*#+}} xmm3 = xmm3[0,2,2,3,4,5,6,7] +; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1] +; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3] +; SSSE3-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,1,0,2,4,5,6,7] +; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,0,2,4,5,6,7] +; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm3[0],xmm0[1] +; SSSE3-NEXT: retq +; +; SSE41-LABEL: trunc_packus_v8i64_v8i16: +; SSE41: # %bb.0: +; SSE41-NEXT: movdqa %xmm0, %xmm9 +; SSE41-NEXT: movapd {{.*#+}} xmm7 = [65535,65535] +; SSE41-NEXT: movdqa {{.*#+}} xmm10 = [2147483648,2147483648] +; SSE41-NEXT: movdqa %xmm2, %xmm0 +; SSE41-NEXT: pxor %xmm10, %xmm0 +; SSE41-NEXT: movdqa {{.*#+}} xmm4 = [2147549183,2147549183] +; SSE41-NEXT: movdqa %xmm4, %xmm5 +; SSE41-NEXT: pcmpeqd %xmm0, %xmm5 +; SSE41-NEXT: movdqa %xmm4, %xmm6 +; SSE41-NEXT: pcmpgtd %xmm0, %xmm6 +; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm6[0,0,2,2] +; SSE41-NEXT: pand %xmm5, %xmm0 +; SSE41-NEXT: por %xmm6, %xmm0 +; SSE41-NEXT: movapd %xmm7, %xmm8 +; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm8 +; SSE41-NEXT: movdqa %xmm3, %xmm0 +; SSE41-NEXT: pxor %xmm10, %xmm0 +; SSE41-NEXT: movdqa %xmm4, %xmm2 +; SSE41-NEXT: pcmpeqd %xmm0, %xmm2 +; SSE41-NEXT: movdqa %xmm4, %xmm5 +; SSE41-NEXT: pcmpgtd %xmm0, %xmm5 +; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm5[0,0,2,2] +; SSE41-NEXT: pand %xmm2, %xmm0 +; SSE41-NEXT: por %xmm5, %xmm0 +; SSE41-NEXT: movapd %xmm7, %xmm2 +; SSE41-NEXT: blendvpd %xmm0, %xmm3, %xmm2 +; SSE41-NEXT: movdqa %xmm9, %xmm0 +; SSE41-NEXT: pxor %xmm10, %xmm0 +; SSE41-NEXT: movdqa %xmm4, %xmm3 +; SSE41-NEXT: pcmpeqd %xmm0, %xmm3 +; SSE41-NEXT: movdqa %xmm4, %xmm5 +; SSE41-NEXT: pcmpgtd %xmm0, %xmm5 +; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm5[0,0,2,2] +; SSE41-NEXT: pand %xmm3, %xmm0 +; SSE41-NEXT: por %xmm5, %xmm0 +; SSE41-NEXT: movapd %xmm7, %xmm6 +; SSE41-NEXT: blendvpd %xmm0, %xmm9, %xmm6 +; SSE41-NEXT: movdqa %xmm1, %xmm0 +; SSE41-NEXT: pxor %xmm10, %xmm0 +; SSE41-NEXT: movdqa %xmm4, %xmm3 +; SSE41-NEXT: pcmpeqd %xmm0, %xmm3 +; SSE41-NEXT: pcmpgtd %xmm0, %xmm4 +; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm4[0,0,2,2] +; SSE41-NEXT: pand %xmm3, %xmm0 +; SSE41-NEXT: por %xmm4, %xmm0 +; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm7 +; SSE41-NEXT: pxor %xmm3, %xmm3 +; SSE41-NEXT: movapd %xmm7, %xmm1 +; SSE41-NEXT: xorpd %xmm10, %xmm1 +; SSE41-NEXT: movapd %xmm1, %xmm4 +; SSE41-NEXT: pcmpeqd %xmm10, %xmm4 +; SSE41-NEXT: pcmpgtd %xmm10, %xmm1 +; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,0,2,2] +; SSE41-NEXT: pand %xmm4, %xmm0 +; SSE41-NEXT: por %xmm1, %xmm0 +; SSE41-NEXT: pxor %xmm4, %xmm4 +; SSE41-NEXT: blendvpd %xmm0, %xmm7, %xmm4 +; SSE41-NEXT: movapd %xmm6, %xmm1 +; SSE41-NEXT: xorpd %xmm10, %xmm1 +; SSE41-NEXT: movapd %xmm1, %xmm5 +; SSE41-NEXT: pcmpeqd %xmm10, %xmm5 +; SSE41-NEXT: pcmpgtd %xmm10, %xmm1 +; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,0,2,2] +; SSE41-NEXT: pand %xmm5, %xmm0 +; SSE41-NEXT: por %xmm1, %xmm0 +; SSE41-NEXT: pxor %xmm1, %xmm1 +; SSE41-NEXT: blendvpd %xmm0, %xmm6, %xmm1 +; SSE41-NEXT: packusdw %xmm4, %xmm1 +; SSE41-NEXT: movapd %xmm2, %xmm4 +; SSE41-NEXT: xorpd %xmm10, %xmm4 +; SSE41-NEXT: movapd %xmm4, %xmm5 +; SSE41-NEXT: pcmpeqd %xmm10, %xmm5 +; SSE41-NEXT: pcmpgtd %xmm10, %xmm4 +; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm4[0,0,2,2] +; SSE41-NEXT: pand %xmm5, %xmm0 +; SSE41-NEXT: por %xmm4, %xmm0 +; SSE41-NEXT: pxor %xmm4, %xmm4 +; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm4 +; SSE41-NEXT: movapd %xmm8, %xmm2 +; SSE41-NEXT: xorpd %xmm10, %xmm2 +; SSE41-NEXT: movapd %xmm2, %xmm5 +; SSE41-NEXT: pcmpeqd %xmm10, %xmm5 +; SSE41-NEXT: pcmpgtd %xmm10, %xmm2 +; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,0,2,2] +; SSE41-NEXT: pand %xmm5, %xmm0 +; SSE41-NEXT: por %xmm2, %xmm0 +; SSE41-NEXT: blendvpd %xmm0, %xmm8, %xmm3 +; SSE41-NEXT: packusdw %xmm4, %xmm3 +; SSE41-NEXT: packusdw %xmm3, %xmm1 +; SSE41-NEXT: movdqa %xmm1, %xmm0 +; SSE41-NEXT: retq +; +; AVX1-LABEL: trunc_packus_v8i64_v8i16: +; AVX1: # %bb.0: +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [65535,65535] +; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm8 +; AVX1-NEXT: vpcmpgtq %xmm1, %xmm3, %xmm5 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm6 +; AVX1-NEXT: vpcmpgtq %xmm6, %xmm3, %xmm7 +; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm4 +; AVX1-NEXT: vblendvpd %xmm4, %xmm0, %xmm3, %xmm0 +; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; AVX1-NEXT: vpcmpgtq %xmm4, %xmm0, %xmm9 +; AVX1-NEXT: vblendvpd %xmm7, %xmm6, %xmm3, %xmm6 +; AVX1-NEXT: vpcmpgtq %xmm4, %xmm6, %xmm7 +; AVX1-NEXT: vblendvpd %xmm5, %xmm1, %xmm3, %xmm1 +; AVX1-NEXT: vpcmpgtq %xmm4, %xmm1, %xmm5 +; AVX1-NEXT: vblendvpd %xmm8, %xmm2, %xmm3, %xmm2 +; AVX1-NEXT: vpcmpgtq %xmm4, %xmm2, %xmm3 +; AVX1-NEXT: vpand %xmm2, %xmm3, %xmm2 +; AVX1-NEXT: vpand %xmm1, %xmm5, %xmm1 +; AVX1-NEXT: vpackusdw %xmm2, %xmm1, %xmm1 +; AVX1-NEXT: vpand %xmm6, %xmm7, %xmm2 +; AVX1-NEXT: vpand %xmm0, %xmm9, %xmm0 +; AVX1-NEXT: vpackusdw %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-LABEL: trunc_packus_v8i64_v8i16: +; AVX2: # %bb.0: +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [65535,65535,65535,65535] +; AVX2-NEXT: vpcmpgtq %ymm0, %ymm2, %ymm3 +; AVX2-NEXT: vblendvpd %ymm3, %ymm0, %ymm2, %ymm0 +; AVX2-NEXT: vpcmpgtq %ymm1, %ymm2, %ymm3 +; AVX2-NEXT: vblendvpd %ymm3, %ymm1, %ymm2, %ymm1 +; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX2-NEXT: vpcmpgtq %ymm2, %ymm1, %ymm3 +; AVX2-NEXT: vpand %ymm1, %ymm3, %ymm1 +; AVX2-NEXT: vpcmpgtq %ymm2, %ymm0, %ymm2 +; AVX2-NEXT: vpand %ymm0, %ymm2, %ymm0 +; AVX2-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] +; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX2-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq +; +; AVX512-LABEL: trunc_packus_v8i64_v8i16: +; AVX512: # %bb.0: +; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX512-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0 +; AVX512-NEXT: vpmovusqw %zmm0, %xmm0 +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: retq + %1 = icmp slt <8 x i64> %a0, + %2 = select <8 x i1> %1, <8 x i64> %a0, <8 x i64> + %3 = icmp sgt <8 x i64> %2, zeroinitializer + %4 = select <8 x i1> %3, <8 x i64> %2, <8 x i64> zeroinitializer + %5 = trunc <8 x i64> %4 to <8 x i16> + ret <8 x i16> %5 +} + +define <4 x i16> @trunc_packus_v4i32_v4i16(<4 x i32> %a0) { +; SSE2-LABEL: trunc_packus_v4i32_v4i16: +; SSE2: # %bb.0: +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [65535,65535,65535,65535] +; SSE2-NEXT: movdqa %xmm1, %xmm2 +; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 +; SSE2-NEXT: pand %xmm2, %xmm0 +; SSE2-NEXT: pandn %xmm1, %xmm2 +; SSE2-NEXT: por %xmm0, %xmm2 +; SSE2-NEXT: pxor %xmm0, %xmm0 +; SSE2-NEXT: movdqa %xmm2, %xmm1 +; SSE2-NEXT: pcmpgtd %xmm0, %xmm1 +; SSE2-NEXT: pand %xmm2, %xmm1 +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm1[0,2,2,3,4,5,6,7] +; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; SSE2-NEXT: retq +; +; SSSE3-LABEL: trunc_packus_v4i32_v4i16: +; SSSE3: # %bb.0: +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [65535,65535,65535,65535] +; SSSE3-NEXT: movdqa %xmm1, %xmm2 +; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 +; SSSE3-NEXT: pand %xmm2, %xmm0 +; SSSE3-NEXT: pandn %xmm1, %xmm2 +; SSSE3-NEXT: por %xmm0, %xmm2 +; SSSE3-NEXT: pxor %xmm1, %xmm1 +; SSSE3-NEXT: movdqa %xmm2, %xmm0 +; SSSE3-NEXT: pcmpgtd %xmm1, %xmm0 +; SSSE3-NEXT: pand %xmm2, %xmm0 +; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] +; SSSE3-NEXT: retq +; +; SSE41-LABEL: trunc_packus_v4i32_v4i16: +; SSE41: # %bb.0: +; SSE41-NEXT: packusdw %xmm0, %xmm0 +; SSE41-NEXT: retq +; +; AVX-LABEL: trunc_packus_v4i32_v4i16: +; AVX: # %bb.0: +; AVX-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 +; AVX-NEXT: retq +; +; AVX512F-LABEL: trunc_packus_v4i32_v4i16: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: trunc_packus_v4i32_v4i16: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vpminsd {{.*}}(%rip){1to4}, %xmm0, %xmm0 +; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX512VL-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 +; AVX512VL-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 +; AVX512VL-NEXT: retq +; +; AVX512BW-LABEL: trunc_packus_v4i32_v4i16: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 +; AVX512BW-NEXT: retq +; +; AVX512BWVL-LABEL: trunc_packus_v4i32_v4i16: +; AVX512BWVL: # %bb.0: +; AVX512BWVL-NEXT: vpminsd {{.*}}(%rip){1to4}, %xmm0, %xmm0 +; AVX512BWVL-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX512BWVL-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 +; AVX512BWVL-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 +; AVX512BWVL-NEXT: retq + %1 = icmp slt <4 x i32> %a0, + %2 = select <4 x i1> %1, <4 x i32> %a0, <4 x i32> + %3 = icmp sgt <4 x i32> %2, zeroinitializer + %4 = select <4 x i1> %3, <4 x i32> %2, <4 x i32> zeroinitializer + %5 = trunc <4 x i32> %4 to <4 x i16> + ret <4 x i16> %5 +} + +define void @trunc_packus_v4i32_v4i16_store(<4 x i32> %a0, <4 x i16> *%p1) { +; SSE2-LABEL: trunc_packus_v4i32_v4i16_store: +; SSE2: # %bb.0: +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [65535,65535,65535,65535] +; SSE2-NEXT: movdqa %xmm1, %xmm2 +; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 +; SSE2-NEXT: pand %xmm2, %xmm0 +; SSE2-NEXT: pandn %xmm1, %xmm2 +; SSE2-NEXT: por %xmm0, %xmm2 +; SSE2-NEXT: pxor %xmm0, %xmm0 +; SSE2-NEXT: movdqa %xmm2, %xmm1 +; SSE2-NEXT: pcmpgtd %xmm0, %xmm1 +; SSE2-NEXT: pand %xmm2, %xmm1 +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm1[0,2,2,3,4,5,6,7] +; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; SSE2-NEXT: movq %xmm0, (%rdi) +; SSE2-NEXT: retq +; +; SSSE3-LABEL: trunc_packus_v4i32_v4i16_store: +; SSSE3: # %bb.0: +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [65535,65535,65535,65535] +; SSSE3-NEXT: movdqa %xmm1, %xmm2 +; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 +; SSSE3-NEXT: pand %xmm2, %xmm0 +; SSSE3-NEXT: pandn %xmm1, %xmm2 +; SSSE3-NEXT: por %xmm0, %xmm2 +; SSSE3-NEXT: pxor %xmm0, %xmm0 +; SSSE3-NEXT: movdqa %xmm2, %xmm1 +; SSSE3-NEXT: pcmpgtd %xmm0, %xmm1 +; SSSE3-NEXT: pand %xmm2, %xmm1 +; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] +; SSSE3-NEXT: movq %xmm1, (%rdi) +; SSSE3-NEXT: retq +; +; SSE41-LABEL: trunc_packus_v4i32_v4i16_store: +; SSE41: # %bb.0: +; SSE41-NEXT: packusdw %xmm0, %xmm0 +; SSE41-NEXT: movq %xmm0, (%rdi) +; SSE41-NEXT: retq +; +; AVX-LABEL: trunc_packus_v4i32_v4i16_store: +; AVX: # %bb.0: +; AVX-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 +; AVX-NEXT: vmovq %xmm0, (%rdi) +; AVX-NEXT: retq +; +; AVX512F-LABEL: trunc_packus_v4i32_v4i16_store: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 +; AVX512F-NEXT: vmovq %xmm0, (%rdi) +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: trunc_packus_v4i32_v4i16_store: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX512VL-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 +; AVX512VL-NEXT: vpmovusdw %xmm0, (%rdi) +; AVX512VL-NEXT: retq +; +; AVX512BW-LABEL: trunc_packus_v4i32_v4i16_store: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 +; AVX512BW-NEXT: vmovq %xmm0, (%rdi) +; AVX512BW-NEXT: retq +; +; AVX512BWVL-LABEL: trunc_packus_v4i32_v4i16_store: +; AVX512BWVL: # %bb.0: +; AVX512BWVL-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX512BWVL-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 +; AVX512BWVL-NEXT: vpmovusdw %xmm0, (%rdi) +; AVX512BWVL-NEXT: retq + %1 = icmp slt <4 x i32> %a0, + %2 = select <4 x i1> %1, <4 x i32> %a0, <4 x i32> + %3 = icmp sgt <4 x i32> %2, zeroinitializer + %4 = select <4 x i1> %3, <4 x i32> %2, <4 x i32> zeroinitializer + %5 = trunc <4 x i32> %4 to <4 x i16> + store <4 x i16> %5, <4 x i16> *%p1 + ret void +} + +define <8 x i16> @trunc_packus_v8i32_v8i16(<8 x i32> %a0) { +; SSE2-LABEL: trunc_packus_v8i32_v8i16: +; SSE2: # %bb.0: +; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [65535,65535,65535,65535] +; SSE2-NEXT: movdqa %xmm2, %xmm3 +; SSE2-NEXT: pcmpgtd %xmm1, %xmm3 +; SSE2-NEXT: pand %xmm3, %xmm1 +; SSE2-NEXT: pandn %xmm2, %xmm3 +; SSE2-NEXT: por %xmm1, %xmm3 +; SSE2-NEXT: movdqa %xmm2, %xmm1 +; SSE2-NEXT: pcmpgtd %xmm0, %xmm1 +; SSE2-NEXT: pand %xmm1, %xmm0 +; SSE2-NEXT: pandn %xmm2, %xmm1 +; SSE2-NEXT: por %xmm0, %xmm1 +; SSE2-NEXT: pxor %xmm2, %xmm2 +; SSE2-NEXT: movdqa %xmm1, %xmm0 +; SSE2-NEXT: pcmpgtd %xmm2, %xmm0 +; SSE2-NEXT: pand %xmm1, %xmm0 +; SSE2-NEXT: movdqa %xmm3, %xmm1 +; SSE2-NEXT: pcmpgtd %xmm2, %xmm1 +; SSE2-NEXT: pand %xmm3, %xmm1 +; SSE2-NEXT: pslld $16, %xmm1 +; SSE2-NEXT: psrad $16, %xmm1 +; SSE2-NEXT: pslld $16, %xmm0 +; SSE2-NEXT: psrad $16, %xmm0 +; SSE2-NEXT: packssdw %xmm1, %xmm0 +; SSE2-NEXT: retq +; +; SSSE3-LABEL: trunc_packus_v8i32_v8i16: +; SSSE3: # %bb.0: +; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [65535,65535,65535,65535] +; SSSE3-NEXT: movdqa %xmm2, %xmm3 +; SSSE3-NEXT: pcmpgtd %xmm1, %xmm3 +; SSSE3-NEXT: pand %xmm3, %xmm1 +; SSSE3-NEXT: pandn %xmm2, %xmm3 +; SSSE3-NEXT: por %xmm1, %xmm3 +; SSSE3-NEXT: movdqa %xmm2, %xmm1 +; SSSE3-NEXT: pcmpgtd %xmm0, %xmm1 +; SSSE3-NEXT: pand %xmm1, %xmm0 +; SSSE3-NEXT: pandn %xmm2, %xmm1 +; SSSE3-NEXT: por %xmm0, %xmm1 +; SSSE3-NEXT: pxor %xmm2, %xmm2 +; SSSE3-NEXT: movdqa %xmm1, %xmm0 +; SSSE3-NEXT: pcmpgtd %xmm2, %xmm0 +; SSSE3-NEXT: pand %xmm1, %xmm0 +; SSSE3-NEXT: movdqa %xmm3, %xmm1 +; SSSE3-NEXT: pcmpgtd %xmm2, %xmm1 +; SSSE3-NEXT: pand %xmm3, %xmm1 +; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] +; SSSE3-NEXT: pshufb %xmm2, %xmm1 +; SSSE3-NEXT: pshufb %xmm2, %xmm0 +; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSSE3-NEXT: retq +; +; SSE41-LABEL: trunc_packus_v8i32_v8i16: +; SSE41: # %bb.0: +; SSE41-NEXT: packusdw %xmm1, %xmm0 +; SSE41-NEXT: retq +; +; AVX1-LABEL: trunc_packus_v8i32_v8i16: +; AVX1: # %bb.0: +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-LABEL: trunc_packus_v8i32_v8i16: +; AVX2: # %bb.0: +; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX2-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq +; +; AVX512F-LABEL: trunc_packus_v8i32_v8i16: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX512F-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: trunc_packus_v8i32_v8i16: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX512VL-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 +; AVX512VL-NEXT: vpmovusdw %ymm0, %xmm0 +; AVX512VL-NEXT: vzeroupper +; AVX512VL-NEXT: retq +; +; AVX512BW-LABEL: trunc_packus_v8i32_v8i16: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX512BW-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 +; AVX512BW-NEXT: vzeroupper +; AVX512BW-NEXT: retq +; +; AVX512BWVL-LABEL: trunc_packus_v8i32_v8i16: +; AVX512BWVL: # %bb.0: +; AVX512BWVL-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX512BWVL-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 +; AVX512BWVL-NEXT: vpmovusdw %ymm0, %xmm0 +; AVX512BWVL-NEXT: vzeroupper +; AVX512BWVL-NEXT: retq + %1 = icmp slt <8 x i32> %a0, + %2 = select <8 x i1> %1, <8 x i32> %a0, <8 x i32> + %3 = icmp sgt <8 x i32> %2, zeroinitializer + %4 = select <8 x i1> %3, <8 x i32> %2, <8 x i32> zeroinitializer + %5 = trunc <8 x i32> %4 to <8 x i16> + ret <8 x i16> %5 +} + +define <16 x i16> @trunc_packus_v16i32_v16i16(<16 x i32> %a0) { +; SSE2-LABEL: trunc_packus_v16i32_v16i16: +; SSE2: # %bb.0: +; SSE2-NEXT: movdqa {{.*#+}} xmm6 = [65535,65535,65535,65535] +; SSE2-NEXT: movdqa %xmm6, %xmm4 +; SSE2-NEXT: pcmpgtd %xmm1, %xmm4 +; SSE2-NEXT: pand %xmm4, %xmm1 +; SSE2-NEXT: pandn %xmm6, %xmm4 +; SSE2-NEXT: por %xmm1, %xmm4 +; SSE2-NEXT: movdqa %xmm6, %xmm5 +; SSE2-NEXT: pcmpgtd %xmm0, %xmm5 +; SSE2-NEXT: pand %xmm5, %xmm0 +; SSE2-NEXT: pandn %xmm6, %xmm5 +; SSE2-NEXT: por %xmm0, %xmm5 +; SSE2-NEXT: movdqa %xmm6, %xmm0 +; SSE2-NEXT: pcmpgtd %xmm3, %xmm0 +; SSE2-NEXT: pand %xmm0, %xmm3 +; SSE2-NEXT: pandn %xmm6, %xmm0 +; SSE2-NEXT: por %xmm3, %xmm0 +; SSE2-NEXT: movdqa %xmm6, %xmm3 +; SSE2-NEXT: pcmpgtd %xmm2, %xmm3 +; SSE2-NEXT: pand %xmm3, %xmm2 +; SSE2-NEXT: pandn %xmm6, %xmm3 +; SSE2-NEXT: por %xmm2, %xmm3 +; SSE2-NEXT: pxor %xmm2, %xmm2 +; SSE2-NEXT: movdqa %xmm3, %xmm1 +; SSE2-NEXT: pcmpgtd %xmm2, %xmm1 +; SSE2-NEXT: pand %xmm3, %xmm1 +; SSE2-NEXT: movdqa %xmm0, %xmm3 +; SSE2-NEXT: pcmpgtd %xmm2, %xmm3 +; SSE2-NEXT: pand %xmm0, %xmm3 +; SSE2-NEXT: movdqa %xmm5, %xmm0 +; SSE2-NEXT: pcmpgtd %xmm2, %xmm0 +; SSE2-NEXT: pand %xmm5, %xmm0 +; SSE2-NEXT: movdqa %xmm4, %xmm5 +; SSE2-NEXT: pcmpgtd %xmm2, %xmm5 +; SSE2-NEXT: pand %xmm4, %xmm5 +; SSE2-NEXT: pslld $16, %xmm5 +; SSE2-NEXT: psrad $16, %xmm5 +; SSE2-NEXT: pslld $16, %xmm0 +; SSE2-NEXT: psrad $16, %xmm0 +; SSE2-NEXT: packssdw %xmm5, %xmm0 +; SSE2-NEXT: pslld $16, %xmm3 +; SSE2-NEXT: psrad $16, %xmm3 +; SSE2-NEXT: pslld $16, %xmm1 +; SSE2-NEXT: psrad $16, %xmm1 +; SSE2-NEXT: packssdw %xmm3, %xmm1 +; SSE2-NEXT: retq +; +; SSSE3-LABEL: trunc_packus_v16i32_v16i16: +; SSSE3: # %bb.0: +; SSSE3-NEXT: movdqa {{.*#+}} xmm6 = [65535,65535,65535,65535] +; SSSE3-NEXT: movdqa %xmm6, %xmm4 +; SSSE3-NEXT: pcmpgtd %xmm1, %xmm4 +; SSSE3-NEXT: pand %xmm4, %xmm1 +; SSSE3-NEXT: pandn %xmm6, %xmm4 +; SSSE3-NEXT: por %xmm1, %xmm4 +; SSSE3-NEXT: movdqa %xmm6, %xmm5 +; SSSE3-NEXT: pcmpgtd %xmm0, %xmm5 +; SSSE3-NEXT: pand %xmm5, %xmm0 +; SSSE3-NEXT: pandn %xmm6, %xmm5 +; SSSE3-NEXT: por %xmm0, %xmm5 +; SSSE3-NEXT: movdqa %xmm6, %xmm0 +; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0 +; SSSE3-NEXT: pand %xmm0, %xmm3 +; SSSE3-NEXT: pandn %xmm6, %xmm0 +; SSSE3-NEXT: por %xmm3, %xmm0 +; SSSE3-NEXT: movdqa %xmm6, %xmm3 +; SSSE3-NEXT: pcmpgtd %xmm2, %xmm3 +; SSSE3-NEXT: pand %xmm3, %xmm2 +; SSSE3-NEXT: pandn %xmm6, %xmm3 +; SSSE3-NEXT: por %xmm2, %xmm3 +; SSSE3-NEXT: pxor %xmm2, %xmm2 +; SSSE3-NEXT: movdqa %xmm3, %xmm1 +; SSSE3-NEXT: pcmpgtd %xmm2, %xmm1 +; SSSE3-NEXT: pand %xmm3, %xmm1 +; SSSE3-NEXT: movdqa %xmm0, %xmm3 +; SSSE3-NEXT: pcmpgtd %xmm2, %xmm3 +; SSSE3-NEXT: pand %xmm0, %xmm3 +; SSSE3-NEXT: movdqa %xmm5, %xmm0 +; SSSE3-NEXT: pcmpgtd %xmm2, %xmm0 +; SSSE3-NEXT: pand %xmm5, %xmm0 +; SSSE3-NEXT: movdqa %xmm4, %xmm5 +; SSSE3-NEXT: pcmpgtd %xmm2, %xmm5 +; SSSE3-NEXT: pand %xmm4, %xmm5 +; SSSE3-NEXT: pslld $16, %xmm5 +; SSSE3-NEXT: psrad $16, %xmm5 +; SSSE3-NEXT: pslld $16, %xmm0 +; SSSE3-NEXT: psrad $16, %xmm0 +; SSSE3-NEXT: packssdw %xmm5, %xmm0 +; SSSE3-NEXT: pslld $16, %xmm3 +; SSSE3-NEXT: psrad $16, %xmm3 +; SSSE3-NEXT: pslld $16, %xmm1 +; SSSE3-NEXT: psrad $16, %xmm1 +; SSSE3-NEXT: packssdw %xmm3, %xmm1 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: trunc_packus_v8i64_v8i16: +; SSE41-LABEL: trunc_packus_v16i32_v16i16: ; SSE41: # %bb.0: -; SSE41-NEXT: movdqa %xmm0, %xmm9 -; SSE41-NEXT: movapd {{.*#+}} xmm7 = [65535,65535] -; SSE41-NEXT: movdqa {{.*#+}} xmm10 = [2147483648,2147483648] -; SSE41-NEXT: movdqa %xmm2, %xmm0 -; SSE41-NEXT: pxor %xmm10, %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm4 = [2147549183,2147549183] -; SSE41-NEXT: movdqa %xmm4, %xmm5 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm5 -; SSE41-NEXT: movdqa %xmm4, %xmm6 -; SSE41-NEXT: pcmpgtd %xmm0, %xmm6 -; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm6[0,0,2,2] -; SSE41-NEXT: pand %xmm5, %xmm0 -; SSE41-NEXT: por %xmm6, %xmm0 -; SSE41-NEXT: movapd %xmm7, %xmm8 -; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm8 -; SSE41-NEXT: movdqa %xmm3, %xmm0 -; SSE41-NEXT: pxor %xmm10, %xmm0 -; SSE41-NEXT: movdqa %xmm4, %xmm2 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm2 -; SSE41-NEXT: movdqa %xmm4, %xmm5 -; SSE41-NEXT: pcmpgtd %xmm0, %xmm5 -; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm5[0,0,2,2] -; SSE41-NEXT: pand %xmm2, %xmm0 -; SSE41-NEXT: por %xmm5, %xmm0 -; SSE41-NEXT: movapd %xmm7, %xmm2 -; SSE41-NEXT: blendvpd %xmm0, %xmm3, %xmm2 -; SSE41-NEXT: movdqa %xmm9, %xmm0 -; SSE41-NEXT: pxor %xmm10, %xmm0 -; SSE41-NEXT: movdqa %xmm4, %xmm3 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm3 -; SSE41-NEXT: movdqa %xmm4, %xmm5 -; SSE41-NEXT: pcmpgtd %xmm0, %xmm5 -; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm5[0,0,2,2] -; SSE41-NEXT: pand %xmm3, %xmm0 -; SSE41-NEXT: por %xmm5, %xmm0 -; SSE41-NEXT: movapd %xmm7, %xmm6 -; SSE41-NEXT: blendvpd %xmm0, %xmm9, %xmm6 -; SSE41-NEXT: movdqa %xmm1, %xmm0 -; SSE41-NEXT: pxor %xmm10, %xmm0 -; SSE41-NEXT: movdqa %xmm4, %xmm3 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm3 -; SSE41-NEXT: pcmpgtd %xmm0, %xmm4 -; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm4[0,0,2,2] -; SSE41-NEXT: pand %xmm3, %xmm0 -; SSE41-NEXT: por %xmm4, %xmm0 -; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm7 -; SSE41-NEXT: pxor %xmm3, %xmm3 -; SSE41-NEXT: movapd %xmm7, %xmm1 -; SSE41-NEXT: xorpd %xmm10, %xmm1 -; SSE41-NEXT: movapd %xmm1, %xmm4 -; SSE41-NEXT: pcmpeqd %xmm10, %xmm4 -; SSE41-NEXT: pcmpgtd %xmm10, %xmm1 -; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,0,2,2] -; SSE41-NEXT: pand %xmm4, %xmm0 -; SSE41-NEXT: por %xmm1, %xmm0 -; SSE41-NEXT: pxor %xmm4, %xmm4 -; SSE41-NEXT: blendvpd %xmm0, %xmm7, %xmm4 -; SSE41-NEXT: movapd %xmm6, %xmm1 -; SSE41-NEXT: xorpd %xmm10, %xmm1 -; SSE41-NEXT: movapd %xmm1, %xmm5 -; SSE41-NEXT: pcmpeqd %xmm10, %xmm5 -; SSE41-NEXT: pcmpgtd %xmm10, %xmm1 -; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,0,2,2] -; SSE41-NEXT: pand %xmm5, %xmm0 -; SSE41-NEXT: por %xmm1, %xmm0 -; SSE41-NEXT: pxor %xmm1, %xmm1 -; SSE41-NEXT: blendvpd %xmm0, %xmm6, %xmm1 -; SSE41-NEXT: packusdw %xmm4, %xmm1 -; SSE41-NEXT: movapd %xmm2, %xmm4 -; SSE41-NEXT: xorpd %xmm10, %xmm4 -; SSE41-NEXT: movapd %xmm4, %xmm5 -; SSE41-NEXT: pcmpeqd %xmm10, %xmm5 -; SSE41-NEXT: pcmpgtd %xmm10, %xmm4 -; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm4[0,0,2,2] -; SSE41-NEXT: pand %xmm5, %xmm0 -; SSE41-NEXT: por %xmm4, %xmm0 -; SSE41-NEXT: pxor %xmm4, %xmm4 -; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm4 -; SSE41-NEXT: movapd %xmm8, %xmm2 -; SSE41-NEXT: xorpd %xmm10, %xmm2 -; SSE41-NEXT: movapd %xmm2, %xmm5 -; SSE41-NEXT: pcmpeqd %xmm10, %xmm5 -; SSE41-NEXT: pcmpgtd %xmm10, %xmm2 -; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,0,2,2] -; SSE41-NEXT: pand %xmm5, %xmm0 -; SSE41-NEXT: por %xmm2, %xmm0 -; SSE41-NEXT: blendvpd %xmm0, %xmm8, %xmm3 -; SSE41-NEXT: packusdw %xmm4, %xmm3 -; SSE41-NEXT: packusdw %xmm3, %xmm1 -; SSE41-NEXT: movdqa %xmm1, %xmm0 +; SSE41-NEXT: packusdw %xmm1, %xmm0 +; SSE41-NEXT: packusdw %xmm3, %xmm2 +; SSE41-NEXT: movdqa %xmm2, %xmm1 ; SSE41-NEXT: retq ; -; AVX1-LABEL: trunc_packus_v8i64_v8i16: +; AVX1-LABEL: trunc_packus_v16i32_v16i16: ; AVX1: # %bb.0: ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [65535,65535] -; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm8 -; AVX1-NEXT: vpcmpgtq %xmm1, %xmm3, %xmm5 -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm6 -; AVX1-NEXT: vpcmpgtq %xmm6, %xmm3, %xmm7 -; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm4 -; AVX1-NEXT: vblendvpd %xmm4, %xmm0, %xmm3, %xmm0 -; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4 -; AVX1-NEXT: vpcmpgtq %xmm4, %xmm0, %xmm9 -; AVX1-NEXT: vblendvpd %xmm7, %xmm6, %xmm3, %xmm6 -; AVX1-NEXT: vpcmpgtq %xmm4, %xmm6, %xmm7 -; AVX1-NEXT: vblendvpd %xmm5, %xmm1, %xmm3, %xmm1 -; AVX1-NEXT: vpcmpgtq %xmm4, %xmm1, %xmm5 -; AVX1-NEXT: vblendvpd %xmm8, %xmm2, %xmm3, %xmm2 -; AVX1-NEXT: vpcmpgtq %xmm4, %xmm2, %xmm3 -; AVX1-NEXT: vpand %xmm2, %xmm3, %xmm2 -; AVX1-NEXT: vpand %xmm1, %xmm5, %xmm1 ; AVX1-NEXT: vpackusdw %xmm2, %xmm1, %xmm1 -; AVX1-NEXT: vpand %xmm6, %xmm7, %xmm2 -; AVX1-NEXT: vpand %xmm0, %xmm9, %xmm0 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 ; AVX1-NEXT: vpackusdw %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vzeroupper +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: trunc_packus_v8i64_v8i16: +; AVX2-LABEL: trunc_packus_v16i32_v16i16: ; AVX2: # %bb.0: -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [65535,65535,65535,65535] -; AVX2-NEXT: vpcmpgtq %ymm0, %ymm2, %ymm3 -; AVX2-NEXT: vblendvpd %ymm3, %ymm0, %ymm2, %ymm0 -; AVX2-NEXT: vpcmpgtq %ymm1, %ymm2, %ymm3 -; AVX2-NEXT: vblendvpd %ymm3, %ymm1, %ymm2, %ymm1 -; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX2-NEXT: vpcmpgtq %ymm2, %ymm1, %ymm3 -; AVX2-NEXT: vpand %ymm1, %ymm3, %ymm1 -; AVX2-NEXT: vpcmpgtq %ymm2, %ymm0, %ymm2 -; AVX2-NEXT: vpand %ymm0, %ymm2, %ymm0 ; AVX2-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] -; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX2-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; -; AVX512-LABEL: trunc_packus_v8i64_v8i16: +; AVX512-LABEL: trunc_packus_v16i32_v16i16: ; AVX512: # %bb.0: ; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0 -; AVX512-NEXT: vpmovusqw %zmm0, %xmm0 -; AVX512-NEXT: vzeroupper +; AVX512-NEXT: vpmaxsd %zmm1, %zmm0, %zmm0 +; AVX512-NEXT: vpmovusdw %zmm0, %ymm0 ; AVX512-NEXT: retq - %1 = icmp slt <8 x i64> %a0, - %2 = select <8 x i1> %1, <8 x i64> %a0, <8 x i64> - %3 = icmp sgt <8 x i64> %2, zeroinitializer - %4 = select <8 x i1> %3, <8 x i64> %2, <8 x i64> zeroinitializer - %5 = trunc <8 x i64> %4 to <8 x i16> - ret <8 x i16> %5 + %1 = icmp slt <16 x i32> %a0, + %2 = select <16 x i1> %1, <16 x i32> %a0, <16 x i32> + %3 = icmp sgt <16 x i32> %2, zeroinitializer + %4 = select <16 x i1> %3, <16 x i32> %2, <16 x i32> zeroinitializer + %5 = trunc <16 x i32> %4 to <16 x i16> + ret <16 x i16> %5 } -define <8 x i16> @trunc_packus_v8i32_v8i16(<8 x i32> %a0) { -; SSE2-LABEL: trunc_packus_v8i32_v8i16: +; +; PACKUS saturation truncation to vXi8 +; + +define <4 x i8> @trunc_packus_v4i64_v4i8(<4 x i64> %a0) { +; SSE2-LABEL: trunc_packus_v4i64_v4i8: ; SSE2: # %bb.0: -; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [65535,65535,65535,65535] -; SSE2-NEXT: movdqa %xmm2, %xmm3 -; SSE2-NEXT: pcmpgtd %xmm1, %xmm3 -; SSE2-NEXT: pand %xmm3, %xmm1 -; SSE2-NEXT: pandn %xmm2, %xmm3 -; SSE2-NEXT: por %xmm1, %xmm3 -; SSE2-NEXT: movdqa %xmm2, %xmm1 -; SSE2-NEXT: pcmpgtd %xmm0, %xmm1 +; SSE2-NEXT: movdqa {{.*#+}} xmm8 = [255,255] +; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [2147483648,2147483648] +; SSE2-NEXT: movdqa %xmm1, %xmm4 +; SSE2-NEXT: pxor %xmm3, %xmm4 +; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [2147483903,2147483903] +; SSE2-NEXT: movdqa %xmm5, %xmm6 +; SSE2-NEXT: pcmpgtd %xmm4, %xmm6 +; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2] +; SSE2-NEXT: pcmpeqd %xmm5, %xmm4 +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm4[1,1,3,3] +; SSE2-NEXT: pand %xmm7, %xmm2 +; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm6[1,1,3,3] +; SSE2-NEXT: por %xmm2, %xmm4 +; SSE2-NEXT: pand %xmm4, %xmm1 +; SSE2-NEXT: pandn %xmm8, %xmm4 +; SSE2-NEXT: por %xmm1, %xmm4 +; SSE2-NEXT: movdqa %xmm0, %xmm1 +; SSE2-NEXT: pxor %xmm3, %xmm1 +; SSE2-NEXT: movdqa %xmm5, %xmm2 +; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 +; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm2[0,0,2,2] +; SSE2-NEXT: pcmpeqd %xmm5, %xmm1 +; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm1[1,1,3,3] +; SSE2-NEXT: pand %xmm6, %xmm5 +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3] +; SSE2-NEXT: por %xmm5, %xmm1 ; SSE2-NEXT: pand %xmm1, %xmm0 -; SSE2-NEXT: pandn %xmm2, %xmm1 +; SSE2-NEXT: pandn %xmm8, %xmm1 ; SSE2-NEXT: por %xmm0, %xmm1 -; SSE2-NEXT: pxor %xmm2, %xmm2 ; SSE2-NEXT: movdqa %xmm1, %xmm0 -; SSE2-NEXT: pcmpgtd %xmm2, %xmm0 +; SSE2-NEXT: pxor %xmm3, %xmm0 +; SSE2-NEXT: movdqa %xmm0, %xmm2 +; SSE2-NEXT: pcmpgtd %xmm3, %xmm2 +; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm2[0,0,2,2] +; SSE2-NEXT: pcmpeqd %xmm3, %xmm0 +; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3] +; SSE2-NEXT: pand %xmm5, %xmm6 +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] +; SSE2-NEXT: por %xmm6, %xmm0 +; SSE2-NEXT: movdqa %xmm4, %xmm2 +; SSE2-NEXT: pxor %xmm3, %xmm2 +; SSE2-NEXT: movdqa %xmm2, %xmm5 +; SSE2-NEXT: pcmpgtd %xmm3, %xmm5 +; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2] +; SSE2-NEXT: pcmpeqd %xmm3, %xmm2 +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] +; SSE2-NEXT: pand %xmm6, %xmm2 +; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm5[1,1,3,3] +; SSE2-NEXT: por %xmm2, %xmm3 +; SSE2-NEXT: pand %xmm8, %xmm3 +; SSE2-NEXT: pand %xmm4, %xmm3 +; SSE2-NEXT: pand %xmm8, %xmm0 ; SSE2-NEXT: pand %xmm1, %xmm0 -; SSE2-NEXT: movdqa %xmm3, %xmm1 -; SSE2-NEXT: pcmpgtd %xmm2, %xmm1 -; SSE2-NEXT: pand %xmm3, %xmm1 -; SSE2-NEXT: pslld $16, %xmm1 -; SSE2-NEXT: psrad $16, %xmm1 -; SSE2-NEXT: pslld $16, %xmm0 -; SSE2-NEXT: psrad $16, %xmm0 -; SSE2-NEXT: packssdw %xmm1, %xmm0 +; SSE2-NEXT: packuswb %xmm3, %xmm0 +; SSE2-NEXT: packuswb %xmm0, %xmm0 +; SSE2-NEXT: packuswb %xmm0, %xmm0 ; SSE2-NEXT: retq ; -; SSSE3-LABEL: trunc_packus_v8i32_v8i16: +; SSSE3-LABEL: trunc_packus_v4i64_v4i8: ; SSSE3: # %bb.0: -; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [65535,65535,65535,65535] -; SSSE3-NEXT: movdqa %xmm2, %xmm3 -; SSSE3-NEXT: pcmpgtd %xmm1, %xmm3 +; SSSE3-NEXT: movdqa {{.*#+}} xmm8 = [255,255] +; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648] +; SSSE3-NEXT: movdqa %xmm1, %xmm3 +; SSSE3-NEXT: pxor %xmm2, %xmm3 +; SSSE3-NEXT: movdqa {{.*#+}} xmm5 = [2147483903,2147483903] +; SSSE3-NEXT: movdqa %xmm5, %xmm6 +; SSSE3-NEXT: pcmpgtd %xmm3, %xmm6 +; SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2] +; SSSE3-NEXT: pcmpeqd %xmm5, %xmm3 +; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm3[1,1,3,3] +; SSSE3-NEXT: pand %xmm7, %xmm4 +; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm6[1,1,3,3] +; SSSE3-NEXT: por %xmm4, %xmm3 ; SSSE3-NEXT: pand %xmm3, %xmm1 -; SSSE3-NEXT: pandn %xmm2, %xmm3 +; SSSE3-NEXT: pandn %xmm8, %xmm3 ; SSSE3-NEXT: por %xmm1, %xmm3 -; SSSE3-NEXT: movdqa %xmm2, %xmm1 -; SSSE3-NEXT: pcmpgtd %xmm0, %xmm1 -; SSSE3-NEXT: pand %xmm1, %xmm0 -; SSSE3-NEXT: pandn %xmm2, %xmm1 -; SSSE3-NEXT: por %xmm0, %xmm1 -; SSSE3-NEXT: pxor %xmm2, %xmm2 -; SSSE3-NEXT: movdqa %xmm1, %xmm0 -; SSSE3-NEXT: pcmpgtd %xmm2, %xmm0 -; SSSE3-NEXT: pand %xmm1, %xmm0 +; SSSE3-NEXT: movdqa %xmm0, %xmm1 +; SSSE3-NEXT: pxor %xmm2, %xmm1 +; SSSE3-NEXT: movdqa %xmm5, %xmm4 +; SSSE3-NEXT: pcmpgtd %xmm1, %xmm4 +; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2] +; SSSE3-NEXT: pcmpeqd %xmm5, %xmm1 +; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] +; SSSE3-NEXT: pand %xmm6, %xmm1 +; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] +; SSSE3-NEXT: por %xmm1, %xmm4 +; SSSE3-NEXT: pand %xmm4, %xmm0 +; SSSE3-NEXT: pandn %xmm8, %xmm4 +; SSSE3-NEXT: por %xmm0, %xmm4 +; SSSE3-NEXT: movdqa %xmm4, %xmm0 +; SSSE3-NEXT: pxor %xmm2, %xmm0 +; SSSE3-NEXT: movdqa %xmm0, %xmm1 +; SSSE3-NEXT: pcmpgtd %xmm2, %xmm1 +; SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm1[0,0,2,2] +; SSSE3-NEXT: pcmpeqd %xmm2, %xmm0 +; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3] +; SSSE3-NEXT: pand %xmm5, %xmm6 +; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3] +; SSSE3-NEXT: por %xmm6, %xmm0 +; SSSE3-NEXT: pand %xmm4, %xmm0 ; SSSE3-NEXT: movdqa %xmm3, %xmm1 -; SSSE3-NEXT: pcmpgtd %xmm2, %xmm1 -; SSSE3-NEXT: pand %xmm3, %xmm1 -; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] -; SSSE3-NEXT: pshufb %xmm2, %xmm1 -; SSSE3-NEXT: pshufb %xmm2, %xmm0 -; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSSE3-NEXT: pxor %xmm2, %xmm1 +; SSSE3-NEXT: movdqa %xmm1, %xmm4 +; SSSE3-NEXT: pcmpgtd %xmm2, %xmm4 +; SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2] +; SSSE3-NEXT: pcmpeqd %xmm2, %xmm1 +; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] +; SSSE3-NEXT: pand %xmm5, %xmm1 +; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm4[1,1,3,3] +; SSSE3-NEXT: por %xmm1, %xmm2 +; SSSE3-NEXT: pand %xmm3, %xmm2 +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = <0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u> +; SSSE3-NEXT: pshufb %xmm1, %xmm2 +; SSSE3-NEXT: pshufb %xmm1, %xmm0 +; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] ; SSSE3-NEXT: retq ; -; SSE41-LABEL: trunc_packus_v8i32_v8i16: +; SSE41-LABEL: trunc_packus_v4i64_v4i8: ; SSE41: # %bb.0: -; SSE41-NEXT: packusdw %xmm1, %xmm0 +; SSE41-NEXT: movdqa %xmm0, %xmm2 +; SSE41-NEXT: movapd {{.*#+}} xmm4 = [255,255] +; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [2147483648,2147483648] +; SSE41-NEXT: movdqa %xmm1, %xmm0 +; SSE41-NEXT: pxor %xmm3, %xmm0 +; SSE41-NEXT: movdqa {{.*#+}} xmm6 = [2147483903,2147483903] +; SSE41-NEXT: movdqa %xmm6, %xmm5 +; SSE41-NEXT: pcmpeqd %xmm0, %xmm5 +; SSE41-NEXT: movdqa %xmm6, %xmm7 +; SSE41-NEXT: pcmpgtd %xmm0, %xmm7 +; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm7[0,0,2,2] +; SSE41-NEXT: pand %xmm5, %xmm0 +; SSE41-NEXT: por %xmm7, %xmm0 +; SSE41-NEXT: movapd %xmm4, %xmm5 +; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm5 +; SSE41-NEXT: movdqa %xmm2, %xmm0 +; SSE41-NEXT: pxor %xmm3, %xmm0 +; SSE41-NEXT: movdqa %xmm6, %xmm1 +; SSE41-NEXT: pcmpeqd %xmm0, %xmm1 +; SSE41-NEXT: pcmpgtd %xmm0, %xmm6 +; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm6[0,0,2,2] +; SSE41-NEXT: pand %xmm1, %xmm0 +; SSE41-NEXT: por %xmm6, %xmm0 +; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm4 +; SSE41-NEXT: xorpd %xmm2, %xmm2 +; SSE41-NEXT: movapd %xmm4, %xmm1 +; SSE41-NEXT: xorpd %xmm3, %xmm1 +; SSE41-NEXT: movapd %xmm1, %xmm6 +; SSE41-NEXT: pcmpeqd %xmm3, %xmm6 +; SSE41-NEXT: pcmpgtd %xmm3, %xmm1 +; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,0,2,2] +; SSE41-NEXT: pand %xmm6, %xmm0 +; SSE41-NEXT: por %xmm1, %xmm0 +; SSE41-NEXT: pxor %xmm1, %xmm1 +; SSE41-NEXT: blendvpd %xmm0, %xmm4, %xmm1 +; SSE41-NEXT: movapd %xmm5, %xmm4 +; SSE41-NEXT: xorpd %xmm3, %xmm4 +; SSE41-NEXT: movapd %xmm4, %xmm6 +; SSE41-NEXT: pcmpeqd %xmm3, %xmm6 +; SSE41-NEXT: pcmpgtd %xmm3, %xmm4 +; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm4[0,0,2,2] +; SSE41-NEXT: pand %xmm6, %xmm0 +; SSE41-NEXT: por %xmm4, %xmm0 +; SSE41-NEXT: blendvpd %xmm0, %xmm5, %xmm2 +; SSE41-NEXT: movdqa {{.*#+}} xmm0 = <0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u> +; SSE41-NEXT: pshufb %xmm0, %xmm2 +; SSE41-NEXT: pshufb %xmm0, %xmm1 +; SSE41-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] +; SSE41-NEXT: movdqa %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: trunc_packus_v8i32_v8i16: +; AVX1-LABEL: trunc_packus_v4i64_v4i8: ; AVX1: # %bb.0: ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [255,255] +; AVX1-NEXT: vpcmpgtq %xmm1, %xmm2, %xmm3 +; AVX1-NEXT: vpcmpgtq %xmm0, %xmm2, %xmm4 +; AVX1-NEXT: vblendvpd %xmm4, %xmm0, %xmm2, %xmm0 +; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; AVX1-NEXT: vpcmpgtq %xmm4, %xmm0, %xmm5 +; AVX1-NEXT: vblendvpd %xmm3, %xmm1, %xmm2, %xmm1 +; AVX1-NEXT: vpcmpgtq %xmm4, %xmm1, %xmm2 +; AVX1-NEXT: vpand %xmm1, %xmm2, %xmm1 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = <0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u> +; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 +; AVX1-NEXT: vpand %xmm0, %xmm5, %xmm0 +; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq ; -; AVX2-LABEL: trunc_packus_v8i32_v8i16: +; AVX2-LABEL: trunc_packus_v4i64_v4i8: ; AVX2: # %bb.0: +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [255,255,255,255] +; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2 +; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 +; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm1 +; AVX2-NEXT: vpand %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX2-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = <0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u> +; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1 +; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; -; AVX512F-LABEL: trunc_packus_v8i32_v8i16: +; AVX512F-LABEL: trunc_packus_v4i64_v4i8: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpbroadcastd {{.*#+}} ymm1 = [65535,65535,65535,65535,65535,65535,65535,65535] -; AVX512F-NEXT: vpminsd %ymm1, %ymm0, %ymm0 +; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512F-NEXT: vpminsq {{.*}}(%rip){1to8}, %zmm0, %zmm0 ; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512F-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 -; AVX512F-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 +; AVX512F-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0 +; AVX512F-NEXT: vpmovqb %zmm0, %xmm0 ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; -; AVX512VL-LABEL: trunc_packus_v8i32_v8i16: +; AVX512VL-LABEL: trunc_packus_v4i64_v4i8: ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512VL-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 -; AVX512VL-NEXT: vpmovusdw %ymm0, %xmm0 +; AVX512VL-NEXT: vpmaxsq %ymm1, %ymm0, %ymm0 +; AVX512VL-NEXT: vpmovusqb %ymm0, %xmm0 ; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq ; -; AVX512BW-LABEL: trunc_packus_v8i32_v8i16: +; AVX512BW-LABEL: trunc_packus_v4i64_v4i8: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpbroadcastd {{.*#+}} ymm1 = [65535,65535,65535,65535,65535,65535,65535,65535] -; AVX512BW-NEXT: vpminsd %ymm1, %ymm0, %ymm0 +; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512BW-NEXT: vpminsq {{.*}}(%rip){1to8}, %zmm0, %zmm0 ; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512BW-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 -; AVX512BW-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 +; AVX512BW-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0 +; AVX512BW-NEXT: vpmovqb %zmm0, %xmm0 ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; -; AVX512BWVL-LABEL: trunc_packus_v8i32_v8i16: +; AVX512BWVL-LABEL: trunc_packus_v4i64_v4i8: ; AVX512BWVL: # %bb.0: ; AVX512BWVL-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512BWVL-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 -; AVX512BWVL-NEXT: vpmovusdw %ymm0, %xmm0 +; AVX512BWVL-NEXT: vpmaxsq %ymm1, %ymm0, %ymm0 +; AVX512BWVL-NEXT: vpmovusqb %ymm0, %xmm0 ; AVX512BWVL-NEXT: vzeroupper ; AVX512BWVL-NEXT: retq - %1 = icmp slt <8 x i32> %a0, - %2 = select <8 x i1> %1, <8 x i32> %a0, <8 x i32> - %3 = icmp sgt <8 x i32> %2, zeroinitializer - %4 = select <8 x i1> %3, <8 x i32> %2, <8 x i32> zeroinitializer - %5 = trunc <8 x i32> %4 to <8 x i16> - ret <8 x i16> %5 + %1 = icmp slt <4 x i64> %a0, + %2 = select <4 x i1> %1, <4 x i64> %a0, <4 x i64> + %3 = icmp sgt <4 x i64> %2, zeroinitializer + %4 = select <4 x i1> %3, <4 x i64> %2, <4 x i64> zeroinitializer + %5 = trunc <4 x i64> %4 to <4 x i8> + ret <4 x i8> %5 } -define <16 x i16> @trunc_packus_v16i32_v16i16(<16 x i32> %a0) { -; SSE2-LABEL: trunc_packus_v16i32_v16i16: +define void @trunc_packus_v4i64_v4i8_store(<4 x i64> %a0, <4 x i8> *%p1) { +; SSE2-LABEL: trunc_packus_v4i64_v4i8_store: ; SSE2: # %bb.0: -; SSE2-NEXT: movdqa {{.*#+}} xmm6 = [65535,65535,65535,65535] -; SSE2-NEXT: movdqa %xmm6, %xmm4 -; SSE2-NEXT: pcmpgtd %xmm1, %xmm4 +; SSE2-NEXT: movdqa {{.*#+}} xmm8 = [255,255] +; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [2147483648,2147483648] +; SSE2-NEXT: movdqa %xmm1, %xmm4 +; SSE2-NEXT: pxor %xmm3, %xmm4 +; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [2147483903,2147483903] +; SSE2-NEXT: movdqa %xmm5, %xmm6 +; SSE2-NEXT: pcmpgtd %xmm4, %xmm6 +; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2] +; SSE2-NEXT: pcmpeqd %xmm5, %xmm4 +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm4[1,1,3,3] +; SSE2-NEXT: pand %xmm7, %xmm2 +; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm6[1,1,3,3] +; SSE2-NEXT: por %xmm2, %xmm4 ; SSE2-NEXT: pand %xmm4, %xmm1 -; SSE2-NEXT: pandn %xmm6, %xmm4 +; SSE2-NEXT: pandn %xmm8, %xmm4 ; SSE2-NEXT: por %xmm1, %xmm4 -; SSE2-NEXT: movdqa %xmm6, %xmm5 -; SSE2-NEXT: pcmpgtd %xmm0, %xmm5 -; SSE2-NEXT: pand %xmm5, %xmm0 -; SSE2-NEXT: pandn %xmm6, %xmm5 -; SSE2-NEXT: por %xmm0, %xmm5 -; SSE2-NEXT: movdqa %xmm6, %xmm0 -; SSE2-NEXT: pcmpgtd %xmm3, %xmm0 -; SSE2-NEXT: pand %xmm0, %xmm3 -; SSE2-NEXT: pandn %xmm6, %xmm0 -; SSE2-NEXT: por %xmm3, %xmm0 -; SSE2-NEXT: movdqa %xmm6, %xmm3 -; SSE2-NEXT: pcmpgtd %xmm2, %xmm3 -; SSE2-NEXT: pand %xmm3, %xmm2 -; SSE2-NEXT: pandn %xmm6, %xmm3 -; SSE2-NEXT: por %xmm2, %xmm3 -; SSE2-NEXT: pxor %xmm2, %xmm2 -; SSE2-NEXT: movdqa %xmm3, %xmm1 -; SSE2-NEXT: pcmpgtd %xmm2, %xmm1 -; SSE2-NEXT: pand %xmm3, %xmm1 -; SSE2-NEXT: movdqa %xmm0, %xmm3 -; SSE2-NEXT: pcmpgtd %xmm2, %xmm3 -; SSE2-NEXT: pand %xmm0, %xmm3 -; SSE2-NEXT: movdqa %xmm5, %xmm0 -; SSE2-NEXT: pcmpgtd %xmm2, %xmm0 +; SSE2-NEXT: movdqa %xmm0, %xmm1 +; SSE2-NEXT: pxor %xmm3, %xmm1 +; SSE2-NEXT: movdqa %xmm5, %xmm2 +; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 +; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm2[0,0,2,2] +; SSE2-NEXT: pcmpeqd %xmm5, %xmm1 +; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm1[1,1,3,3] +; SSE2-NEXT: pand %xmm6, %xmm5 +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3] +; SSE2-NEXT: por %xmm5, %xmm1 +; SSE2-NEXT: pand %xmm1, %xmm0 +; SSE2-NEXT: pandn %xmm8, %xmm1 +; SSE2-NEXT: por %xmm0, %xmm1 +; SSE2-NEXT: movdqa %xmm1, %xmm0 +; SSE2-NEXT: pxor %xmm3, %xmm0 +; SSE2-NEXT: movdqa %xmm0, %xmm2 +; SSE2-NEXT: pcmpgtd %xmm3, %xmm2 +; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm2[0,0,2,2] +; SSE2-NEXT: pcmpeqd %xmm3, %xmm0 +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] ; SSE2-NEXT: pand %xmm5, %xmm0 -; SSE2-NEXT: movdqa %xmm4, %xmm5 -; SSE2-NEXT: pcmpgtd %xmm2, %xmm5 -; SSE2-NEXT: pand %xmm4, %xmm5 -; SSE2-NEXT: pslld $16, %xmm5 -; SSE2-NEXT: psrad $16, %xmm5 -; SSE2-NEXT: pslld $16, %xmm0 -; SSE2-NEXT: psrad $16, %xmm0 -; SSE2-NEXT: packssdw %xmm5, %xmm0 -; SSE2-NEXT: pslld $16, %xmm3 -; SSE2-NEXT: psrad $16, %xmm3 -; SSE2-NEXT: pslld $16, %xmm1 -; SSE2-NEXT: psrad $16, %xmm1 -; SSE2-NEXT: packssdw %xmm3, %xmm1 +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] +; SSE2-NEXT: por %xmm0, %xmm2 +; SSE2-NEXT: movdqa %xmm4, %xmm0 +; SSE2-NEXT: pxor %xmm3, %xmm0 +; SSE2-NEXT: movdqa %xmm0, %xmm5 +; SSE2-NEXT: pcmpgtd %xmm3, %xmm5 +; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2] +; SSE2-NEXT: pcmpeqd %xmm3, %xmm0 +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] +; SSE2-NEXT: pand %xmm6, %xmm0 +; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm5[1,1,3,3] +; SSE2-NEXT: por %xmm0, %xmm3 +; SSE2-NEXT: pand %xmm8, %xmm3 +; SSE2-NEXT: pand %xmm4, %xmm3 +; SSE2-NEXT: pand %xmm8, %xmm2 +; SSE2-NEXT: pand %xmm1, %xmm2 +; SSE2-NEXT: packuswb %xmm3, %xmm2 +; SSE2-NEXT: packuswb %xmm0, %xmm2 +; SSE2-NEXT: packuswb %xmm0, %xmm2 +; SSE2-NEXT: movd %xmm2, (%rdi) ; SSE2-NEXT: retq ; -; SSSE3-LABEL: trunc_packus_v16i32_v16i16: +; SSSE3-LABEL: trunc_packus_v4i64_v4i8_store: ; SSSE3: # %bb.0: -; SSSE3-NEXT: movdqa {{.*#+}} xmm6 = [65535,65535,65535,65535] -; SSSE3-NEXT: movdqa %xmm6, %xmm4 +; SSSE3-NEXT: movdqa {{.*#+}} xmm8 = [255,255] +; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648] +; SSSE3-NEXT: movdqa %xmm1, %xmm3 +; SSSE3-NEXT: pxor %xmm2, %xmm3 +; SSSE3-NEXT: movdqa {{.*#+}} xmm5 = [2147483903,2147483903] +; SSSE3-NEXT: movdqa %xmm5, %xmm6 +; SSSE3-NEXT: pcmpgtd %xmm3, %xmm6 +; SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2] +; SSSE3-NEXT: pcmpeqd %xmm5, %xmm3 +; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm3[1,1,3,3] +; SSSE3-NEXT: pand %xmm7, %xmm4 +; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm6[1,1,3,3] +; SSSE3-NEXT: por %xmm4, %xmm3 +; SSSE3-NEXT: pand %xmm3, %xmm1 +; SSSE3-NEXT: pandn %xmm8, %xmm3 +; SSSE3-NEXT: por %xmm1, %xmm3 +; SSSE3-NEXT: movdqa %xmm0, %xmm1 +; SSSE3-NEXT: pxor %xmm2, %xmm1 +; SSSE3-NEXT: movdqa %xmm5, %xmm4 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm4 -; SSSE3-NEXT: pand %xmm4, %xmm1 -; SSSE3-NEXT: pandn %xmm6, %xmm4 +; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2] +; SSSE3-NEXT: pcmpeqd %xmm5, %xmm1 +; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] +; SSSE3-NEXT: pand %xmm6, %xmm1 +; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] ; SSSE3-NEXT: por %xmm1, %xmm4 -; SSSE3-NEXT: movdqa %xmm6, %xmm5 -; SSSE3-NEXT: pcmpgtd %xmm0, %xmm5 -; SSSE3-NEXT: pand %xmm5, %xmm0 -; SSSE3-NEXT: pandn %xmm6, %xmm5 -; SSSE3-NEXT: por %xmm0, %xmm5 -; SSSE3-NEXT: movdqa %xmm6, %xmm0 -; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0 -; SSSE3-NEXT: pand %xmm0, %xmm3 -; SSSE3-NEXT: pandn %xmm6, %xmm0 -; SSSE3-NEXT: por %xmm3, %xmm0 -; SSSE3-NEXT: movdqa %xmm6, %xmm3 -; SSSE3-NEXT: pcmpgtd %xmm2, %xmm3 -; SSSE3-NEXT: pand %xmm3, %xmm2 -; SSSE3-NEXT: pandn %xmm6, %xmm3 -; SSSE3-NEXT: por %xmm2, %xmm3 -; SSSE3-NEXT: pxor %xmm2, %xmm2 -; SSSE3-NEXT: movdqa %xmm3, %xmm1 +; SSSE3-NEXT: pand %xmm4, %xmm0 +; SSSE3-NEXT: pandn %xmm8, %xmm4 +; SSSE3-NEXT: por %xmm0, %xmm4 +; SSSE3-NEXT: movdqa %xmm4, %xmm0 +; SSSE3-NEXT: pxor %xmm2, %xmm0 +; SSSE3-NEXT: movdqa %xmm0, %xmm1 ; SSSE3-NEXT: pcmpgtd %xmm2, %xmm1 -; SSSE3-NEXT: pand %xmm3, %xmm1 -; SSSE3-NEXT: movdqa %xmm0, %xmm3 -; SSSE3-NEXT: pcmpgtd %xmm2, %xmm3 -; SSSE3-NEXT: pand %xmm0, %xmm3 -; SSSE3-NEXT: movdqa %xmm5, %xmm0 -; SSSE3-NEXT: pcmpgtd %xmm2, %xmm0 +; SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm1[0,0,2,2] +; SSSE3-NEXT: pcmpeqd %xmm2, %xmm0 +; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] ; SSSE3-NEXT: pand %xmm5, %xmm0 -; SSSE3-NEXT: movdqa %xmm4, %xmm5 -; SSSE3-NEXT: pcmpgtd %xmm2, %xmm5 -; SSSE3-NEXT: pand %xmm4, %xmm5 -; SSSE3-NEXT: pslld $16, %xmm5 -; SSSE3-NEXT: psrad $16, %xmm5 -; SSSE3-NEXT: pslld $16, %xmm0 -; SSSE3-NEXT: psrad $16, %xmm0 -; SSSE3-NEXT: packssdw %xmm5, %xmm0 -; SSSE3-NEXT: pslld $16, %xmm3 -; SSSE3-NEXT: psrad $16, %xmm3 -; SSSE3-NEXT: pslld $16, %xmm1 -; SSSE3-NEXT: psrad $16, %xmm1 -; SSSE3-NEXT: packssdw %xmm3, %xmm1 +; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] +; SSSE3-NEXT: por %xmm0, %xmm1 +; SSSE3-NEXT: pand %xmm4, %xmm1 +; SSSE3-NEXT: movdqa %xmm3, %xmm0 +; SSSE3-NEXT: pxor %xmm2, %xmm0 +; SSSE3-NEXT: movdqa %xmm0, %xmm4 +; SSSE3-NEXT: pcmpgtd %xmm2, %xmm4 +; SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2] +; SSSE3-NEXT: pcmpeqd %xmm2, %xmm0 +; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] +; SSSE3-NEXT: pand %xmm5, %xmm0 +; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm4[1,1,3,3] +; SSSE3-NEXT: por %xmm0, %xmm2 +; SSSE3-NEXT: pand %xmm3, %xmm2 +; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = <0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u> +; SSSE3-NEXT: pshufb %xmm0, %xmm2 +; SSSE3-NEXT: pshufb %xmm0, %xmm1 +; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] +; SSSE3-NEXT: movd %xmm1, (%rdi) ; SSSE3-NEXT: retq ; -; SSE41-LABEL: trunc_packus_v16i32_v16i16: +; SSE41-LABEL: trunc_packus_v4i64_v4i8_store: ; SSE41: # %bb.0: -; SSE41-NEXT: packusdw %xmm1, %xmm0 -; SSE41-NEXT: packusdw %xmm3, %xmm2 -; SSE41-NEXT: movdqa %xmm2, %xmm1 +; SSE41-NEXT: movdqa %xmm0, %xmm2 +; SSE41-NEXT: movapd {{.*#+}} xmm4 = [255,255] +; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [2147483648,2147483648] +; SSE41-NEXT: movdqa %xmm1, %xmm0 +; SSE41-NEXT: pxor %xmm3, %xmm0 +; SSE41-NEXT: movdqa {{.*#+}} xmm6 = [2147483903,2147483903] +; SSE41-NEXT: movdqa %xmm6, %xmm5 +; SSE41-NEXT: pcmpeqd %xmm0, %xmm5 +; SSE41-NEXT: movdqa %xmm6, %xmm7 +; SSE41-NEXT: pcmpgtd %xmm0, %xmm7 +; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm7[0,0,2,2] +; SSE41-NEXT: pand %xmm5, %xmm0 +; SSE41-NEXT: por %xmm7, %xmm0 +; SSE41-NEXT: movapd %xmm4, %xmm5 +; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm5 +; SSE41-NEXT: movdqa %xmm2, %xmm0 +; SSE41-NEXT: pxor %xmm3, %xmm0 +; SSE41-NEXT: movdqa %xmm6, %xmm1 +; SSE41-NEXT: pcmpeqd %xmm0, %xmm1 +; SSE41-NEXT: pcmpgtd %xmm0, %xmm6 +; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm6[0,0,2,2] +; SSE41-NEXT: pand %xmm1, %xmm0 +; SSE41-NEXT: por %xmm6, %xmm0 +; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm4 +; SSE41-NEXT: pxor %xmm1, %xmm1 +; SSE41-NEXT: movapd %xmm4, %xmm2 +; SSE41-NEXT: xorpd %xmm3, %xmm2 +; SSE41-NEXT: movapd %xmm2, %xmm6 +; SSE41-NEXT: pcmpeqd %xmm3, %xmm6 +; SSE41-NEXT: pcmpgtd %xmm3, %xmm2 +; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,0,2,2] +; SSE41-NEXT: pand %xmm6, %xmm0 +; SSE41-NEXT: por %xmm2, %xmm0 +; SSE41-NEXT: pxor %xmm2, %xmm2 +; SSE41-NEXT: blendvpd %xmm0, %xmm4, %xmm2 +; SSE41-NEXT: movapd %xmm5, %xmm4 +; SSE41-NEXT: xorpd %xmm3, %xmm4 +; SSE41-NEXT: movapd %xmm4, %xmm6 +; SSE41-NEXT: pcmpeqd %xmm3, %xmm6 +; SSE41-NEXT: pcmpgtd %xmm3, %xmm4 +; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm4[0,0,2,2] +; SSE41-NEXT: pand %xmm6, %xmm0 +; SSE41-NEXT: por %xmm4, %xmm0 +; SSE41-NEXT: blendvpd %xmm0, %xmm5, %xmm1 +; SSE41-NEXT: movdqa {{.*#+}} xmm0 = <0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u> +; SSE41-NEXT: pshufb %xmm0, %xmm1 +; SSE41-NEXT: pshufb %xmm0, %xmm2 +; SSE41-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] +; SSE41-NEXT: movd %xmm2, (%rdi) ; SSE41-NEXT: retq ; -; AVX1-LABEL: trunc_packus_v16i32_v16i16: +; AVX1-LABEL: trunc_packus_v4i64_v4i8_store: ; AVX1: # %bb.0: -; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 -; AVX1-NEXT: vpackusdw %xmm2, %xmm1, %xmm1 -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 -; AVX1-NEXT: vpackusdw %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [255,255] +; AVX1-NEXT: vpcmpgtq %xmm1, %xmm2, %xmm3 +; AVX1-NEXT: vpcmpgtq %xmm0, %xmm2, %xmm4 +; AVX1-NEXT: vblendvpd %xmm4, %xmm0, %xmm2, %xmm0 +; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; AVX1-NEXT: vpcmpgtq %xmm4, %xmm0, %xmm5 +; AVX1-NEXT: vblendvpd %xmm3, %xmm1, %xmm2, %xmm1 +; AVX1-NEXT: vpcmpgtq %xmm4, %xmm1, %xmm2 +; AVX1-NEXT: vpand %xmm1, %xmm2, %xmm1 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = <0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u> +; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 +; AVX1-NEXT: vpand %xmm0, %xmm5, %xmm0 +; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; AVX1-NEXT: vmovd %xmm0, (%rdi) +; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq ; -; AVX2-LABEL: trunc_packus_v16i32_v16i16: +; AVX2-LABEL: trunc_packus_v4i64_v4i8_store: ; AVX2: # %bb.0: -; AVX2-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [255,255,255,255] +; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2 +; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 +; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm1 +; AVX2-NEXT: vpand %ymm0, %ymm1, %ymm0 +; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = <0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u> +; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1 +; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; AVX2-NEXT: vmovd %xmm0, (%rdi) +; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; -; AVX512-LABEL: trunc_packus_v16i32_v16i16: -; AVX512: # %bb.0: -; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512-NEXT: vpmaxsd %zmm1, %zmm0, %zmm0 -; AVX512-NEXT: vpmovusdw %zmm0, %ymm0 -; AVX512-NEXT: retq - %1 = icmp slt <16 x i32> %a0, - %2 = select <16 x i1> %1, <16 x i32> %a0, <16 x i32> - %3 = icmp sgt <16 x i32> %2, zeroinitializer - %4 = select <16 x i1> %3, <16 x i32> %2, <16 x i32> zeroinitializer - %5 = trunc <16 x i32> %4 to <16 x i16> - ret <16 x i16> %5 -} - +; AVX512F-LABEL: trunc_packus_v4i64_v4i8_store: +; AVX512F: # %bb.0: +; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512F-NEXT: vpminsq {{.*}}(%rip){1to8}, %zmm0, %zmm0 +; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX512F-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0 +; AVX512F-NEXT: vpmovqb %zmm0, %xmm0 +; AVX512F-NEXT: vmovd %xmm0, (%rdi) +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: trunc_packus_v4i64_v4i8_store: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX512VL-NEXT: vpmaxsq %ymm1, %ymm0, %ymm0 +; AVX512VL-NEXT: vpmovusqb %ymm0, (%rdi) +; AVX512VL-NEXT: vzeroupper +; AVX512VL-NEXT: retq ; -; PACKUS saturation truncation to v16i8 +; AVX512BW-LABEL: trunc_packus_v4i64_v4i8_store: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512BW-NEXT: vpminsq {{.*}}(%rip){1to8}, %zmm0, %zmm0 +; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX512BW-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0 +; AVX512BW-NEXT: vpmovqb %zmm0, %xmm0 +; AVX512BW-NEXT: vmovd %xmm0, (%rdi) +; AVX512BW-NEXT: vzeroupper +; AVX512BW-NEXT: retq ; +; AVX512BWVL-LABEL: trunc_packus_v4i64_v4i8_store: +; AVX512BWVL: # %bb.0: +; AVX512BWVL-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX512BWVL-NEXT: vpmaxsq %ymm1, %ymm0, %ymm0 +; AVX512BWVL-NEXT: vpmovusqb %ymm0, (%rdi) +; AVX512BWVL-NEXT: vzeroupper +; AVX512BWVL-NEXT: retq + %1 = icmp slt <4 x i64> %a0, + %2 = select <4 x i1> %1, <4 x i64> %a0, <4 x i64> + %3 = icmp sgt <4 x i64> %2, zeroinitializer + %4 = select <4 x i1> %3, <4 x i64> %2, <4 x i64> zeroinitializer + %5 = trunc <4 x i64> %4 to <4 x i8> + store <4 x i8> %5, <4 x i8> *%p1 + ret void +} define <8 x i8> @trunc_packus_v8i64_v8i8(<8 x i64> %a0) { ; SSE2-LABEL: trunc_packus_v8i64_v8i8: @@ -2791,6 +4002,210 @@ define <16 x i8> @trunc_packus_v16i64_v16i8(<16 x i64> %a0) { ret <16 x i8> %5 } +define <4 x i8> @trunc_packus_v4i32_v4i8(<4 x i32> %a0) { +; SSE2-LABEL: trunc_packus_v4i32_v4i8: +; SSE2: # %bb.0: +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [255,255,255,255] +; SSE2-NEXT: movdqa %xmm1, %xmm2 +; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 +; SSE2-NEXT: pand %xmm2, %xmm0 +; SSE2-NEXT: pandn %xmm1, %xmm2 +; SSE2-NEXT: por %xmm0, %xmm2 +; SSE2-NEXT: pxor %xmm1, %xmm1 +; SSE2-NEXT: movdqa %xmm2, %xmm0 +; SSE2-NEXT: pcmpgtd %xmm1, %xmm0 +; SSE2-NEXT: pand %xmm2, %xmm0 +; SSE2-NEXT: pand {{.*}}(%rip), %xmm0 +; SSE2-NEXT: packuswb %xmm0, %xmm0 +; SSE2-NEXT: packuswb %xmm0, %xmm0 +; SSE2-NEXT: retq +; +; SSSE3-LABEL: trunc_packus_v4i32_v4i8: +; SSSE3: # %bb.0: +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [255,255,255,255] +; SSSE3-NEXT: movdqa %xmm1, %xmm2 +; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 +; SSSE3-NEXT: pand %xmm2, %xmm0 +; SSSE3-NEXT: pandn %xmm1, %xmm2 +; SSSE3-NEXT: por %xmm0, %xmm2 +; SSSE3-NEXT: pxor %xmm1, %xmm1 +; SSSE3-NEXT: movdqa %xmm2, %xmm0 +; SSSE3-NEXT: pcmpgtd %xmm1, %xmm0 +; SSSE3-NEXT: pand %xmm2, %xmm0 +; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] +; SSSE3-NEXT: retq +; +; SSE41-LABEL: trunc_packus_v4i32_v4i8: +; SSE41: # %bb.0: +; SSE41-NEXT: pminsd {{.*}}(%rip), %xmm0 +; SSE41-NEXT: pxor %xmm1, %xmm1 +; SSE41-NEXT: pmaxsd %xmm1, %xmm0 +; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] +; SSE41-NEXT: retq +; +; AVX1-LABEL: trunc_packus_v4i32_v4i8: +; AVX1: # %bb.0: +; AVX1-NEXT: vpminsd {{.*}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX1-NEXT: retq +; +; AVX2-LABEL: trunc_packus_v4i32_v4i8: +; AVX2: # %bb.0: +; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [255,255,255,255] +; AVX2-NEXT: vpminsd %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX2-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX2-NEXT: retq +; +; AVX512F-LABEL: trunc_packus_v4i32_v4i8: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpbroadcastd {{.*#+}} xmm1 = [255,255,255,255] +; AVX512F-NEXT: vpminsd %xmm1, %xmm0, %xmm0 +; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX512F-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 +; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: trunc_packus_v4i32_v4i8: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vpminsd {{.*}}(%rip){1to4}, %xmm0, %xmm0 +; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX512VL-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 +; AVX512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX512VL-NEXT: retq +; +; AVX512BW-LABEL: trunc_packus_v4i32_v4i8: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: vpbroadcastd {{.*#+}} xmm1 = [255,255,255,255] +; AVX512BW-NEXT: vpminsd %xmm1, %xmm0, %xmm0 +; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX512BW-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 +; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX512BW-NEXT: retq +; +; AVX512BWVL-LABEL: trunc_packus_v4i32_v4i8: +; AVX512BWVL: # %bb.0: +; AVX512BWVL-NEXT: vpminsd {{.*}}(%rip){1to4}, %xmm0, %xmm0 +; AVX512BWVL-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX512BWVL-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 +; AVX512BWVL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX512BWVL-NEXT: retq + %1 = icmp slt <4 x i32> %a0, + %2 = select <4 x i1> %1, <4 x i32> %a0, <4 x i32> + %3 = icmp sgt <4 x i32> %2, zeroinitializer + %4 = select <4 x i1> %3, <4 x i32> %2, <4 x i32> zeroinitializer + %5 = trunc <4 x i32> %4 to <4 x i8> + ret <4 x i8> %5 +} + +define void @trunc_packus_v4i32_v4i8_store(<4 x i32> %a0, <4 x i8> *%p1) { +; SSE2-LABEL: trunc_packus_v4i32_v4i8_store: +; SSE2: # %bb.0: +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [255,255,255,255] +; SSE2-NEXT: movdqa %xmm1, %xmm2 +; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 +; SSE2-NEXT: pand %xmm2, %xmm0 +; SSE2-NEXT: pandn %xmm1, %xmm2 +; SSE2-NEXT: por %xmm0, %xmm2 +; SSE2-NEXT: pxor %xmm0, %xmm0 +; SSE2-NEXT: movdqa %xmm2, %xmm1 +; SSE2-NEXT: pcmpgtd %xmm0, %xmm1 +; SSE2-NEXT: pand %xmm2, %xmm1 +; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 +; SSE2-NEXT: packuswb %xmm0, %xmm1 +; SSE2-NEXT: packuswb %xmm0, %xmm1 +; SSE2-NEXT: movd %xmm1, (%rdi) +; SSE2-NEXT: retq +; +; SSSE3-LABEL: trunc_packus_v4i32_v4i8_store: +; SSSE3: # %bb.0: +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [255,255,255,255] +; SSSE3-NEXT: movdqa %xmm1, %xmm2 +; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 +; SSSE3-NEXT: pand %xmm2, %xmm0 +; SSSE3-NEXT: pandn %xmm1, %xmm2 +; SSSE3-NEXT: por %xmm0, %xmm2 +; SSSE3-NEXT: pxor %xmm0, %xmm0 +; SSSE3-NEXT: movdqa %xmm2, %xmm1 +; SSSE3-NEXT: pcmpgtd %xmm0, %xmm1 +; SSSE3-NEXT: pand %xmm2, %xmm1 +; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] +; SSSE3-NEXT: movd %xmm1, (%rdi) +; SSSE3-NEXT: retq +; +; SSE41-LABEL: trunc_packus_v4i32_v4i8_store: +; SSE41: # %bb.0: +; SSE41-NEXT: pminsd {{.*}}(%rip), %xmm0 +; SSE41-NEXT: pxor %xmm1, %xmm1 +; SSE41-NEXT: pmaxsd %xmm0, %xmm1 +; SSE41-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] +; SSE41-NEXT: movd %xmm1, (%rdi) +; SSE41-NEXT: retq +; +; AVX1-LABEL: trunc_packus_v4i32_v4i8_store: +; AVX1: # %bb.0: +; AVX1-NEXT: vpminsd {{.*}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX1-NEXT: vmovd %xmm0, (%rdi) +; AVX1-NEXT: retq +; +; AVX2-LABEL: trunc_packus_v4i32_v4i8_store: +; AVX2: # %bb.0: +; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [255,255,255,255] +; AVX2-NEXT: vpminsd %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX2-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX2-NEXT: vmovd %xmm0, (%rdi) +; AVX2-NEXT: retq +; +; AVX512F-LABEL: trunc_packus_v4i32_v4i8_store: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpbroadcastd {{.*#+}} xmm1 = [255,255,255,255] +; AVX512F-NEXT: vpminsd %xmm1, %xmm0, %xmm0 +; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX512F-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 +; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX512F-NEXT: vmovd %xmm0, (%rdi) +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: trunc_packus_v4i32_v4i8_store: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX512VL-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 +; AVX512VL-NEXT: vpmovusdb %xmm0, (%rdi) +; AVX512VL-NEXT: retq +; +; AVX512BW-LABEL: trunc_packus_v4i32_v4i8_store: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: vpbroadcastd {{.*#+}} xmm1 = [255,255,255,255] +; AVX512BW-NEXT: vpminsd %xmm1, %xmm0, %xmm0 +; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX512BW-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 +; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX512BW-NEXT: vmovd %xmm0, (%rdi) +; AVX512BW-NEXT: retq +; +; AVX512BWVL-LABEL: trunc_packus_v4i32_v4i8_store: +; AVX512BWVL: # %bb.0: +; AVX512BWVL-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX512BWVL-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 +; AVX512BWVL-NEXT: vpmovusdb %xmm0, (%rdi) +; AVX512BWVL-NEXT: retq + %1 = icmp slt <4 x i32> %a0, + %2 = select <4 x i1> %1, <4 x i32> %a0, <4 x i32> + %3 = icmp sgt <4 x i32> %2, zeroinitializer + %4 = select <4 x i1> %3, <4 x i32> %2, <4 x i32> zeroinitializer + %5 = trunc <4 x i32> %4 to <4 x i8> + store <4 x i8> %5, <4 x i8> *%p1 + ret void +} + define <8 x i8> @trunc_packus_v8i32_v8i8(<8 x i32> %a0) { ; SSE-LABEL: trunc_packus_v8i32_v8i8: ; SSE: # %bb.0: @@ -2816,11 +4231,9 @@ define <8 x i8> @trunc_packus_v8i32_v8i8(<8 x i32> %a0) { ; ; AVX512F-LABEL: trunc_packus_v8i32_v8i8: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpbroadcastd {{.*#+}} ymm1 = [255,255,255,255,255,255,255,255] -; AVX512F-NEXT: vpminsd %ymm1, %ymm0, %ymm0 -; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512F-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 -; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX512F-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 +; AVX512F-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; @@ -2834,11 +4247,9 @@ define <8 x i8> @trunc_packus_v8i32_v8i8(<8 x i32> %a0) { ; ; AVX512BW-LABEL: trunc_packus_v8i32_v8i8: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpbroadcastd {{.*#+}} ymm1 = [255,255,255,255,255,255,255,255] -; AVX512BW-NEXT: vpminsd %ymm1, %ymm0, %ymm0 -; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512BW-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 -; AVX512BW-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX512BW-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 +; AVX512BW-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; @@ -2885,11 +4296,9 @@ define void @trunc_packus_v8i32_v8i8_store(<8 x i32> %a0, <8 x i8> *%p1) { ; ; AVX512F-LABEL: trunc_packus_v8i32_v8i8_store: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpbroadcastd {{.*#+}} ymm1 = [255,255,255,255,255,255,255,255] -; AVX512F-NEXT: vpminsd %ymm1, %ymm0, %ymm0 -; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512F-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 -; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX512F-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 +; AVX512F-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 ; AVX512F-NEXT: vmovq %xmm0, (%rdi) ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq @@ -2904,11 +4313,9 @@ define void @trunc_packus_v8i32_v8i8_store(<8 x i32> %a0, <8 x i8> *%p1) { ; ; AVX512BW-LABEL: trunc_packus_v8i32_v8i8_store: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpbroadcastd {{.*#+}} ymm1 = [255,255,255,255,255,255,255,255] -; AVX512BW-NEXT: vpminsd %ymm1, %ymm0, %ymm0 -; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512BW-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 -; AVX512BW-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX512BW-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 +; AVX512BW-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 ; AVX512BW-NEXT: vmovq %xmm0, (%rdi) ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq @@ -2971,6 +4378,93 @@ define <16 x i8> @trunc_packus_v16i32_v16i8(<16 x i32> %a0) { ret <16 x i8> %5 } +define <8 x i8> @trunc_packus_v8i16_v8i8(<8 x i16> %a0) { +; SSE-LABEL: trunc_packus_v8i16_v8i8: +; SSE: # %bb.0: +; SSE-NEXT: packuswb %xmm0, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: trunc_packus_v8i16_v8i8: +; AVX: # %bb.0: +; AVX-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 +; AVX-NEXT: retq +; +; AVX512F-LABEL: trunc_packus_v8i16_v8i8: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: trunc_packus_v8i16_v8i8: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 +; AVX512VL-NEXT: retq +; +; AVX512BW-LABEL: trunc_packus_v8i16_v8i8: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 +; AVX512BW-NEXT: retq +; +; AVX512BWVL-LABEL: trunc_packus_v8i16_v8i8: +; AVX512BWVL: # %bb.0: +; AVX512BWVL-NEXT: vpminsw {{.*}}(%rip), %xmm0, %xmm0 +; AVX512BWVL-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX512BWVL-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 +; AVX512BWVL-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 +; AVX512BWVL-NEXT: retq + %1 = icmp slt <8 x i16> %a0, + %2 = select <8 x i1> %1, <8 x i16> %a0, <8 x i16> + %3 = icmp sgt <8 x i16> %2, zeroinitializer + %4 = select <8 x i1> %3, <8 x i16> %2, <8 x i16> zeroinitializer + %5 = trunc <8 x i16> %4 to <8 x i8> + ret <8 x i8> %5 +} + +define void @trunc_packus_v8i16_v8i8_store(<8 x i16> %a0, <8 x i8> *%p1) { +; SSE-LABEL: trunc_packus_v8i16_v8i8_store: +; SSE: # %bb.0: +; SSE-NEXT: packuswb %xmm0, %xmm0 +; SSE-NEXT: movq %xmm0, (%rdi) +; SSE-NEXT: retq +; +; AVX-LABEL: trunc_packus_v8i16_v8i8_store: +; AVX: # %bb.0: +; AVX-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 +; AVX-NEXT: vmovq %xmm0, (%rdi) +; AVX-NEXT: retq +; +; AVX512F-LABEL: trunc_packus_v8i16_v8i8_store: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 +; AVX512F-NEXT: vmovq %xmm0, (%rdi) +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: trunc_packus_v8i16_v8i8_store: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 +; AVX512VL-NEXT: vmovq %xmm0, (%rdi) +; AVX512VL-NEXT: retq +; +; AVX512BW-LABEL: trunc_packus_v8i16_v8i8_store: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 +; AVX512BW-NEXT: vmovq %xmm0, (%rdi) +; AVX512BW-NEXT: retq +; +; AVX512BWVL-LABEL: trunc_packus_v8i16_v8i8_store: +; AVX512BWVL: # %bb.0: +; AVX512BWVL-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX512BWVL-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 +; AVX512BWVL-NEXT: vpmovuswb %xmm0, (%rdi) +; AVX512BWVL-NEXT: retq + %1 = icmp slt <8 x i16> %a0, + %2 = select <8 x i1> %1, <8 x i16> %a0, <8 x i16> + %3 = icmp sgt <8 x i16> %2, zeroinitializer + %4 = select <8 x i1> %3, <8 x i16> %2, <8 x i16> zeroinitializer + %5 = trunc <8 x i16> %4 to <8 x i8> + store <8 x i8> %5, <8 x i8> *%p1 + ret void +} + define <16 x i8> @trunc_packus_v16i16_v16i8(<16 x i16> %a0) { ; SSE-LABEL: trunc_packus_v16i16_v16i8: ; SSE: # %bb.0: @@ -3007,11 +4501,8 @@ define <16 x i8> @trunc_packus_v16i16_v16i8(<16 x i16> %a0) { ; ; AVX512BW-LABEL: trunc_packus_v16i16_v16i8: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpminsw {{.*}}(%rip), %ymm0, %ymm0 -; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512BW-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 -; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 -; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 +; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX512BW-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; @@ -3087,3 +4578,57 @@ define <32 x i8> @trunc_packus_v32i16_v32i8(<32 x i16> %a0) { %5 = trunc <32 x i16> %4 to <32 x i8> ret <32 x i8> %5 } + +define <32 x i8> @trunc_packus_v32i32_v32i8(<32 x i32> %a0) { +; SSE-LABEL: trunc_packus_v32i32_v32i8: +; SSE: # %bb.0: +; SSE-NEXT: packssdw %xmm3, %xmm2 +; SSE-NEXT: packssdw %xmm1, %xmm0 +; SSE-NEXT: packuswb %xmm2, %xmm0 +; SSE-NEXT: packssdw %xmm7, %xmm6 +; SSE-NEXT: packssdw %xmm5, %xmm4 +; SSE-NEXT: packuswb %xmm6, %xmm4 +; SSE-NEXT: movdqa %xmm4, %xmm1 +; SSE-NEXT: retq +; +; AVX1-LABEL: trunc_packus_v32i32_v32i8: +; AVX1: # %bb.0: +; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4 +; AVX1-NEXT: vpackssdw %xmm4, %xmm3, %xmm3 +; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4 +; AVX1-NEXT: vpackssdw %xmm4, %xmm2, %xmm2 +; AVX1-NEXT: vpackuswb %xmm3, %xmm2, %xmm2 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 +; AVX1-NEXT: vpackssdw %xmm3, %xmm1, %xmm1 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 +; AVX1-NEXT: vpackssdw %xmm3, %xmm0, %xmm0 +; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: trunc_packus_v32i32_v32i8: +; AVX2: # %bb.0: +; AVX2-NEXT: vpackssdw %ymm3, %ymm2, %ymm2 +; AVX2-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,2,1,3] +; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] +; AVX2-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 +; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] +; AVX2-NEXT: retq +; +; AVX512-LABEL: trunc_packus_v32i32_v32i8: +; AVX512: # %bb.0: +; AVX512-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX512-NEXT: vpmaxsd %zmm2, %zmm0, %zmm0 +; AVX512-NEXT: vpmovusdb %zmm0, %xmm0 +; AVX512-NEXT: vpmaxsd %zmm2, %zmm1, %zmm1 +; AVX512-NEXT: vpmovusdb %zmm1, %xmm1 +; AVX512-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX512-NEXT: retq + %1 = icmp slt <32 x i32> %a0, + %2 = select <32 x i1> %1, <32 x i32> %a0, <32 x i32> + %3 = icmp sgt <32 x i32> %2, zeroinitializer + %4 = select <32 x i1> %3, <32 x i32> %2, <32 x i32> zeroinitializer + %5 = trunc <32 x i32> %4 to <32 x i8> + ret <32 x i8> %5 +} diff --git a/llvm/test/CodeGen/X86/vector-trunc-ssat.ll b/llvm/test/CodeGen/X86/vector-trunc-ssat.ll index 774a478a5d72d..f2e7d0aa330a7 100644 --- a/llvm/test/CodeGen/X86/vector-trunc-ssat.ll +++ b/llvm/test/CodeGen/X86/vector-trunc-ssat.ll @@ -671,491 +671,1648 @@ define <8 x i32> @trunc_ssat_v8i64_v8i32(<8 x i64> %a0) { ; Signed saturation truncation to vXi16 ; -define <8 x i16> @trunc_ssat_v8i64_v8i16(<8 x i64> %a0) { -; SSE2-LABEL: trunc_ssat_v8i64_v8i16: +define <4 x i16> @trunc_ssat_v4i64_v4i16(<4 x i64> %a0) { +; SSE2-LABEL: trunc_ssat_v4i64_v4i16: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa {{.*#+}} xmm8 = [32767,32767] -; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648] -; SSE2-NEXT: movdqa %xmm2, %xmm5 -; SSE2-NEXT: pxor %xmm4, %xmm5 -; SSE2-NEXT: movdqa {{.*#+}} xmm9 = [2147516415,2147516415] -; SSE2-NEXT: movdqa %xmm9, %xmm7 -; SSE2-NEXT: pcmpgtd %xmm5, %xmm7 -; SSE2-NEXT: pshufd {{.*#+}} xmm10 = xmm7[0,0,2,2] -; SSE2-NEXT: pcmpeqd %xmm9, %xmm5 -; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm5[1,1,3,3] -; SSE2-NEXT: pand %xmm10, %xmm6 -; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3] -; SSE2-NEXT: por %xmm6, %xmm5 -; SSE2-NEXT: pand %xmm5, %xmm2 -; SSE2-NEXT: pandn %xmm8, %xmm5 -; SSE2-NEXT: por %xmm2, %xmm5 -; SSE2-NEXT: movdqa %xmm3, %xmm2 -; SSE2-NEXT: pxor %xmm4, %xmm2 -; SSE2-NEXT: movdqa %xmm9, %xmm6 -; SSE2-NEXT: pcmpgtd %xmm2, %xmm6 -; SSE2-NEXT: pshufd {{.*#+}} xmm10 = xmm6[0,0,2,2] -; SSE2-NEXT: pcmpeqd %xmm9, %xmm2 -; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm2[1,1,3,3] -; SSE2-NEXT: pand %xmm10, %xmm7 -; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm6[1,1,3,3] -; SSE2-NEXT: por %xmm7, %xmm2 -; SSE2-NEXT: pand %xmm2, %xmm3 -; SSE2-NEXT: pandn %xmm8, %xmm2 -; SSE2-NEXT: por %xmm3, %xmm2 -; SSE2-NEXT: movdqa %xmm0, %xmm3 -; SSE2-NEXT: pxor %xmm4, %xmm3 -; SSE2-NEXT: movdqa %xmm9, %xmm6 +; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648] +; SSE2-NEXT: movdqa %xmm1, %xmm3 +; SSE2-NEXT: pxor %xmm2, %xmm3 +; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [2147516415,2147516415] +; SSE2-NEXT: movdqa %xmm5, %xmm6 ; SSE2-NEXT: pcmpgtd %xmm3, %xmm6 -; SSE2-NEXT: pshufd {{.*#+}} xmm10 = xmm6[0,0,2,2] -; SSE2-NEXT: pcmpeqd %xmm9, %xmm3 -; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm3[1,1,3,3] -; SSE2-NEXT: pand %xmm10, %xmm7 +; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2] +; SSE2-NEXT: pcmpeqd %xmm5, %xmm3 +; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[1,1,3,3] +; SSE2-NEXT: pand %xmm7, %xmm4 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm6[1,1,3,3] -; SSE2-NEXT: por %xmm7, %xmm3 -; SSE2-NEXT: pand %xmm3, %xmm0 +; SSE2-NEXT: por %xmm4, %xmm3 +; SSE2-NEXT: pand %xmm3, %xmm1 ; SSE2-NEXT: pandn %xmm8, %xmm3 -; SSE2-NEXT: por %xmm0, %xmm3 -; SSE2-NEXT: movdqa %xmm1, %xmm0 -; SSE2-NEXT: pxor %xmm4, %xmm0 -; SSE2-NEXT: movdqa %xmm9, %xmm6 -; SSE2-NEXT: pcmpgtd %xmm0, %xmm6 -; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2] -; SSE2-NEXT: pcmpeqd %xmm9, %xmm0 -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] -; SSE2-NEXT: pand %xmm7, %xmm0 -; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm6[1,1,3,3] -; SSE2-NEXT: por %xmm0, %xmm7 -; SSE2-NEXT: pand %xmm7, %xmm1 -; SSE2-NEXT: pandn %xmm8, %xmm7 -; SSE2-NEXT: por %xmm1, %xmm7 -; SSE2-NEXT: movdqa {{.*#+}} xmm8 = [18446744073709518848,18446744073709518848] -; SSE2-NEXT: movdqa %xmm7, %xmm0 -; SSE2-NEXT: pxor %xmm4, %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm9 = [18446744071562035200,18446744071562035200] +; SSE2-NEXT: por %xmm1, %xmm3 ; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: pcmpgtd %xmm9, %xmm1 -; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm1[0,0,2,2] -; SSE2-NEXT: pcmpeqd %xmm9, %xmm0 -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] -; SSE2-NEXT: pand %xmm6, %xmm0 -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] -; SSE2-NEXT: por %xmm0, %xmm1 -; SSE2-NEXT: pand %xmm1, %xmm7 -; SSE2-NEXT: pandn %xmm8, %xmm1 -; SSE2-NEXT: por %xmm7, %xmm1 -; SSE2-NEXT: movdqa %xmm3, %xmm0 -; SSE2-NEXT: pxor %xmm4, %xmm0 -; SSE2-NEXT: movdqa %xmm0, %xmm6 -; SSE2-NEXT: pcmpgtd %xmm9, %xmm6 -; SSE2-NEXT: pshufd {{.*#+}} xmm10 = xmm6[0,0,2,2] -; SSE2-NEXT: pcmpeqd %xmm9, %xmm0 -; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm0[1,1,3,3] -; SSE2-NEXT: pand %xmm10, %xmm7 -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm6[1,1,3,3] -; SSE2-NEXT: por %xmm7, %xmm0 -; SSE2-NEXT: pand %xmm0, %xmm3 -; SSE2-NEXT: pandn %xmm8, %xmm0 -; SSE2-NEXT: por %xmm3, %xmm0 -; SSE2-NEXT: packssdw %xmm1, %xmm0 -; SSE2-NEXT: movdqa %xmm2, %xmm1 -; SSE2-NEXT: pxor %xmm4, %xmm1 -; SSE2-NEXT: movdqa %xmm1, %xmm3 -; SSE2-NEXT: pcmpgtd %xmm9, %xmm3 -; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm3[0,0,2,2] -; SSE2-NEXT: pcmpeqd %xmm9, %xmm1 +; SSE2-NEXT: pxor %xmm2, %xmm1 +; SSE2-NEXT: movdqa %xmm5, %xmm4 +; SSE2-NEXT: pcmpgtd %xmm1, %xmm4 +; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2] +; SSE2-NEXT: pcmpeqd %xmm5, %xmm1 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] ; SSE2-NEXT: pand %xmm6, %xmm1 -; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] -; SSE2-NEXT: por %xmm1, %xmm3 -; SSE2-NEXT: pand %xmm3, %xmm2 -; SSE2-NEXT: pandn %xmm8, %xmm3 -; SSE2-NEXT: por %xmm2, %xmm3 -; SSE2-NEXT: pxor %xmm5, %xmm4 -; SSE2-NEXT: movdqa %xmm4, %xmm1 -; SSE2-NEXT: pcmpgtd %xmm9, %xmm1 -; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[0,0,2,2] -; SSE2-NEXT: pcmpeqd %xmm9, %xmm4 ; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] -; SSE2-NEXT: pand %xmm2, %xmm4 +; SSE2-NEXT: por %xmm1, %xmm4 +; SSE2-NEXT: pand %xmm4, %xmm0 +; SSE2-NEXT: pandn %xmm8, %xmm4 +; SSE2-NEXT: por %xmm0, %xmm4 +; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [18446744073709518848,18446744073709518848] +; SSE2-NEXT: movdqa %xmm4, %xmm1 +; SSE2-NEXT: pxor %xmm2, %xmm1 +; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [18446744071562035200,18446744071562035200] +; SSE2-NEXT: movdqa %xmm1, %xmm6 +; SSE2-NEXT: pcmpgtd %xmm5, %xmm6 +; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2] +; SSE2-NEXT: pcmpeqd %xmm5, %xmm1 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] -; SSE2-NEXT: por %xmm4, %xmm1 -; SSE2-NEXT: pand %xmm1, %xmm5 -; SSE2-NEXT: pandn %xmm8, %xmm1 -; SSE2-NEXT: por %xmm5, %xmm1 -; SSE2-NEXT: packssdw %xmm3, %xmm1 -; SSE2-NEXT: packssdw %xmm1, %xmm0 +; SSE2-NEXT: pand %xmm7, %xmm1 +; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3] +; SSE2-NEXT: por %xmm1, %xmm6 +; SSE2-NEXT: pand %xmm6, %xmm4 +; SSE2-NEXT: pandn %xmm0, %xmm6 +; SSE2-NEXT: por %xmm4, %xmm6 +; SSE2-NEXT: pxor %xmm3, %xmm2 +; SSE2-NEXT: movdqa %xmm2, %xmm1 +; SSE2-NEXT: pcmpgtd %xmm5, %xmm1 +; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm1[0,0,2,2] +; SSE2-NEXT: pcmpeqd %xmm5, %xmm2 +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] +; SSE2-NEXT: pand %xmm4, %xmm2 +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] +; SSE2-NEXT: por %xmm2, %xmm1 +; SSE2-NEXT: pand %xmm1, %xmm3 +; SSE2-NEXT: pandn %xmm0, %xmm1 +; SSE2-NEXT: por %xmm3, %xmm1 +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3] +; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[0,2,2,3,4,5,6,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm6[0,2,2,3] +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] +; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; SSE2-NEXT: retq ; -; SSSE3-LABEL: trunc_ssat_v8i64_v8i16: +; SSSE3-LABEL: trunc_ssat_v4i64_v4i16: ; SSSE3: # %bb.0: ; SSSE3-NEXT: movdqa {{.*#+}} xmm8 = [32767,32767] -; SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648] -; SSSE3-NEXT: movdqa %xmm2, %xmm5 -; SSSE3-NEXT: pxor %xmm4, %xmm5 -; SSSE3-NEXT: movdqa {{.*#+}} xmm9 = [2147516415,2147516415] -; SSSE3-NEXT: movdqa %xmm9, %xmm7 -; SSSE3-NEXT: pcmpgtd %xmm5, %xmm7 -; SSSE3-NEXT: pshufd {{.*#+}} xmm10 = xmm7[0,0,2,2] -; SSSE3-NEXT: pcmpeqd %xmm9, %xmm5 -; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm5[1,1,3,3] -; SSSE3-NEXT: pand %xmm10, %xmm6 -; SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3] -; SSSE3-NEXT: por %xmm6, %xmm5 -; SSSE3-NEXT: pand %xmm5, %xmm2 -; SSSE3-NEXT: pandn %xmm8, %xmm5 -; SSSE3-NEXT: por %xmm2, %xmm5 -; SSSE3-NEXT: movdqa %xmm3, %xmm2 -; SSSE3-NEXT: pxor %xmm4, %xmm2 -; SSSE3-NEXT: movdqa %xmm9, %xmm6 -; SSSE3-NEXT: pcmpgtd %xmm2, %xmm6 -; SSSE3-NEXT: pshufd {{.*#+}} xmm10 = xmm6[0,0,2,2] -; SSSE3-NEXT: pcmpeqd %xmm9, %xmm2 -; SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm2[1,1,3,3] -; SSSE3-NEXT: pand %xmm10, %xmm7 -; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm6[1,1,3,3] -; SSSE3-NEXT: por %xmm7, %xmm2 -; SSSE3-NEXT: pand %xmm2, %xmm3 -; SSSE3-NEXT: pandn %xmm8, %xmm2 -; SSSE3-NEXT: por %xmm3, %xmm2 -; SSSE3-NEXT: movdqa %xmm0, %xmm3 -; SSSE3-NEXT: pxor %xmm4, %xmm3 -; SSSE3-NEXT: movdqa %xmm9, %xmm6 +; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648] +; SSSE3-NEXT: movdqa %xmm1, %xmm3 +; SSSE3-NEXT: pxor %xmm2, %xmm3 +; SSSE3-NEXT: movdqa {{.*#+}} xmm5 = [2147516415,2147516415] +; SSSE3-NEXT: movdqa %xmm5, %xmm6 ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm6 -; SSSE3-NEXT: pshufd {{.*#+}} xmm10 = xmm6[0,0,2,2] -; SSSE3-NEXT: pcmpeqd %xmm9, %xmm3 -; SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm3[1,1,3,3] -; SSSE3-NEXT: pand %xmm10, %xmm7 +; SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2] +; SSSE3-NEXT: pcmpeqd %xmm5, %xmm3 +; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm3[1,1,3,3] +; SSSE3-NEXT: pand %xmm7, %xmm4 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm6[1,1,3,3] -; SSSE3-NEXT: por %xmm7, %xmm3 -; SSSE3-NEXT: pand %xmm3, %xmm0 +; SSSE3-NEXT: por %xmm4, %xmm3 +; SSSE3-NEXT: pand %xmm3, %xmm1 ; SSSE3-NEXT: pandn %xmm8, %xmm3 -; SSSE3-NEXT: por %xmm0, %xmm3 -; SSSE3-NEXT: movdqa %xmm1, %xmm0 -; SSSE3-NEXT: pxor %xmm4, %xmm0 -; SSSE3-NEXT: movdqa %xmm9, %xmm6 -; SSSE3-NEXT: pcmpgtd %xmm0, %xmm6 -; SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2] -; SSSE3-NEXT: pcmpeqd %xmm9, %xmm0 -; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] -; SSSE3-NEXT: pand %xmm7, %xmm0 -; SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm6[1,1,3,3] -; SSSE3-NEXT: por %xmm0, %xmm7 -; SSSE3-NEXT: pand %xmm7, %xmm1 -; SSSE3-NEXT: pandn %xmm8, %xmm7 -; SSSE3-NEXT: por %xmm1, %xmm7 -; SSSE3-NEXT: movdqa {{.*#+}} xmm8 = [18446744073709518848,18446744073709518848] -; SSSE3-NEXT: movdqa %xmm7, %xmm0 -; SSSE3-NEXT: pxor %xmm4, %xmm0 -; SSSE3-NEXT: movdqa {{.*#+}} xmm9 = [18446744071562035200,18446744071562035200] +; SSSE3-NEXT: por %xmm1, %xmm3 ; SSSE3-NEXT: movdqa %xmm0, %xmm1 -; SSSE3-NEXT: pcmpgtd %xmm9, %xmm1 -; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm1[0,0,2,2] -; SSSE3-NEXT: pcmpeqd %xmm9, %xmm0 -; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] -; SSSE3-NEXT: pand %xmm6, %xmm0 -; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] -; SSSE3-NEXT: por %xmm0, %xmm1 -; SSSE3-NEXT: pand %xmm1, %xmm7 -; SSSE3-NEXT: pandn %xmm8, %xmm1 -; SSSE3-NEXT: por %xmm7, %xmm1 -; SSSE3-NEXT: movdqa %xmm3, %xmm0 -; SSSE3-NEXT: pxor %xmm4, %xmm0 -; SSSE3-NEXT: movdqa %xmm0, %xmm6 -; SSSE3-NEXT: pcmpgtd %xmm9, %xmm6 -; SSSE3-NEXT: pshufd {{.*#+}} xmm10 = xmm6[0,0,2,2] -; SSSE3-NEXT: pcmpeqd %xmm9, %xmm0 -; SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm0[1,1,3,3] -; SSSE3-NEXT: pand %xmm10, %xmm7 -; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm6[1,1,3,3] -; SSSE3-NEXT: por %xmm7, %xmm0 -; SSSE3-NEXT: pand %xmm0, %xmm3 -; SSSE3-NEXT: pandn %xmm8, %xmm0 -; SSSE3-NEXT: por %xmm3, %xmm0 -; SSSE3-NEXT: packssdw %xmm1, %xmm0 -; SSSE3-NEXT: movdqa %xmm2, %xmm1 -; SSSE3-NEXT: pxor %xmm4, %xmm1 -; SSSE3-NEXT: movdqa %xmm1, %xmm3 -; SSSE3-NEXT: pcmpgtd %xmm9, %xmm3 -; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm3[0,0,2,2] -; SSSE3-NEXT: pcmpeqd %xmm9, %xmm1 +; SSSE3-NEXT: pxor %xmm2, %xmm1 +; SSSE3-NEXT: movdqa %xmm5, %xmm4 +; SSSE3-NEXT: pcmpgtd %xmm1, %xmm4 +; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2] +; SSSE3-NEXT: pcmpeqd %xmm5, %xmm1 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] ; SSSE3-NEXT: pand %xmm6, %xmm1 -; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] -; SSSE3-NEXT: por %xmm1, %xmm3 -; SSSE3-NEXT: pand %xmm3, %xmm2 -; SSSE3-NEXT: pandn %xmm8, %xmm3 -; SSSE3-NEXT: por %xmm2, %xmm3 -; SSSE3-NEXT: pxor %xmm5, %xmm4 -; SSSE3-NEXT: movdqa %xmm4, %xmm1 -; SSSE3-NEXT: pcmpgtd %xmm9, %xmm1 -; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[0,0,2,2] -; SSSE3-NEXT: pcmpeqd %xmm9, %xmm4 ; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] -; SSSE3-NEXT: pand %xmm2, %xmm4 +; SSSE3-NEXT: por %xmm1, %xmm4 +; SSSE3-NEXT: pand %xmm4, %xmm0 +; SSSE3-NEXT: pandn %xmm8, %xmm4 +; SSSE3-NEXT: por %xmm0, %xmm4 +; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [18446744073709518848,18446744073709518848] +; SSSE3-NEXT: movdqa %xmm4, %xmm1 +; SSSE3-NEXT: pxor %xmm2, %xmm1 +; SSSE3-NEXT: movdqa {{.*#+}} xmm5 = [18446744071562035200,18446744071562035200] +; SSSE3-NEXT: movdqa %xmm1, %xmm6 +; SSSE3-NEXT: pcmpgtd %xmm5, %xmm6 +; SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2] +; SSSE3-NEXT: pcmpeqd %xmm5, %xmm1 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] -; SSSE3-NEXT: por %xmm4, %xmm1 -; SSSE3-NEXT: pand %xmm1, %xmm5 -; SSSE3-NEXT: pandn %xmm8, %xmm1 -; SSSE3-NEXT: por %xmm5, %xmm1 -; SSSE3-NEXT: packssdw %xmm3, %xmm1 -; SSSE3-NEXT: packssdw %xmm1, %xmm0 +; SSSE3-NEXT: pand %xmm7, %xmm1 +; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3] +; SSSE3-NEXT: por %xmm1, %xmm6 +; SSSE3-NEXT: pand %xmm6, %xmm4 +; SSSE3-NEXT: pandn %xmm0, %xmm6 +; SSSE3-NEXT: por %xmm4, %xmm6 +; SSSE3-NEXT: pxor %xmm3, %xmm2 +; SSSE3-NEXT: movdqa %xmm2, %xmm1 +; SSSE3-NEXT: pcmpgtd %xmm5, %xmm1 +; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm1[0,0,2,2] +; SSSE3-NEXT: pcmpeqd %xmm5, %xmm2 +; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] +; SSSE3-NEXT: pand %xmm4, %xmm2 +; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] +; SSSE3-NEXT: por %xmm2, %xmm1 +; SSSE3-NEXT: pand %xmm1, %xmm3 +; SSSE3-NEXT: pandn %xmm0, %xmm1 +; SSSE3-NEXT: por %xmm3, %xmm1 +; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3] +; SSSE3-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[0,2,2,3,4,5,6,7] +; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm6[0,2,2,3] +; SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] +; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; SSSE3-NEXT: retq ; -; SSE41-LABEL: trunc_ssat_v8i64_v8i16: +; SSE41-LABEL: trunc_ssat_v4i64_v4i16: ; SSE41: # %bb.0: -; SSE41-NEXT: movdqa %xmm0, %xmm10 -; SSE41-NEXT: movapd {{.*#+}} xmm11 = [32767,32767] -; SSE41-NEXT: movdqa {{.*#+}} xmm5 = [2147483648,2147483648] +; SSE41-NEXT: movdqa %xmm0, %xmm2 +; SSE41-NEXT: movapd {{.*#+}} xmm4 = [32767,32767] +; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [2147483648,2147483648] +; SSE41-NEXT: movdqa %xmm1, %xmm0 +; SSE41-NEXT: pxor %xmm3, %xmm0 +; SSE41-NEXT: movdqa {{.*#+}} xmm6 = [2147516415,2147516415] +; SSE41-NEXT: movdqa %xmm6, %xmm5 +; SSE41-NEXT: pcmpeqd %xmm0, %xmm5 +; SSE41-NEXT: movdqa %xmm6, %xmm7 +; SSE41-NEXT: pcmpgtd %xmm0, %xmm7 +; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm7[0,0,2,2] +; SSE41-NEXT: pand %xmm5, %xmm0 +; SSE41-NEXT: por %xmm7, %xmm0 +; SSE41-NEXT: movapd %xmm4, %xmm5 +; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm5 ; SSE41-NEXT: movdqa %xmm2, %xmm0 -; SSE41-NEXT: pxor %xmm5, %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm4 = [2147516415,2147516415] -; SSE41-NEXT: movdqa %xmm4, %xmm7 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm7 -; SSE41-NEXT: movdqa %xmm4, %xmm6 -; SSE41-NEXT: pcmpgtd %xmm0, %xmm6 -; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm6[0,0,2,2] -; SSE41-NEXT: pand %xmm7, %xmm0 -; SSE41-NEXT: por %xmm6, %xmm0 -; SSE41-NEXT: movapd %xmm11, %xmm8 -; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm8 -; SSE41-NEXT: movdqa %xmm3, %xmm0 -; SSE41-NEXT: pxor %xmm5, %xmm0 -; SSE41-NEXT: movdqa %xmm4, %xmm2 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm2 -; SSE41-NEXT: movdqa %xmm4, %xmm6 +; SSE41-NEXT: pxor %xmm3, %xmm0 +; SSE41-NEXT: movdqa %xmm6, %xmm1 +; SSE41-NEXT: pcmpeqd %xmm0, %xmm1 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm6 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm6[0,0,2,2] -; SSE41-NEXT: pand %xmm2, %xmm0 +; SSE41-NEXT: pand %xmm1, %xmm0 ; SSE41-NEXT: por %xmm6, %xmm0 -; SSE41-NEXT: movapd %xmm11, %xmm9 -; SSE41-NEXT: blendvpd %xmm0, %xmm3, %xmm9 -; SSE41-NEXT: movdqa %xmm10, %xmm0 -; SSE41-NEXT: pxor %xmm5, %xmm0 -; SSE41-NEXT: movdqa %xmm4, %xmm2 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm2 -; SSE41-NEXT: movdqa %xmm4, %xmm3 -; SSE41-NEXT: pcmpgtd %xmm0, %xmm3 -; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2] -; SSE41-NEXT: pand %xmm2, %xmm0 -; SSE41-NEXT: por %xmm3, %xmm0 -; SSE41-NEXT: movapd %xmm11, %xmm2 -; SSE41-NEXT: blendvpd %xmm0, %xmm10, %xmm2 -; SSE41-NEXT: movdqa %xmm1, %xmm0 -; SSE41-NEXT: pxor %xmm5, %xmm0 -; SSE41-NEXT: movdqa %xmm4, %xmm3 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm3 -; SSE41-NEXT: pcmpgtd %xmm0, %xmm4 -; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm4[0,0,2,2] -; SSE41-NEXT: pand %xmm3, %xmm0 -; SSE41-NEXT: por %xmm4, %xmm0 -; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm11 -; SSE41-NEXT: movapd {{.*#+}} xmm3 = [18446744073709518848,18446744073709518848] -; SSE41-NEXT: movapd %xmm11, %xmm1 -; SSE41-NEXT: xorpd %xmm5, %xmm1 -; SSE41-NEXT: movdqa {{.*#+}} xmm4 = [18446744071562035200,18446744071562035200] -; SSE41-NEXT: movapd %xmm1, %xmm6 -; SSE41-NEXT: pcmpeqd %xmm4, %xmm6 -; SSE41-NEXT: pcmpgtd %xmm4, %xmm1 -; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,0,2,2] -; SSE41-NEXT: pand %xmm6, %xmm0 -; SSE41-NEXT: por %xmm1, %xmm0 -; SSE41-NEXT: movapd %xmm3, %xmm6 -; SSE41-NEXT: blendvpd %xmm0, %xmm11, %xmm6 -; SSE41-NEXT: movapd %xmm2, %xmm1 -; SSE41-NEXT: xorpd %xmm5, %xmm1 -; SSE41-NEXT: movapd %xmm1, %xmm7 -; SSE41-NEXT: pcmpeqd %xmm4, %xmm7 -; SSE41-NEXT: pcmpgtd %xmm4, %xmm1 -; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,0,2,2] -; SSE41-NEXT: pand %xmm7, %xmm0 -; SSE41-NEXT: por %xmm1, %xmm0 -; SSE41-NEXT: movapd %xmm3, %xmm1 -; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm1 -; SSE41-NEXT: packssdw %xmm6, %xmm1 -; SSE41-NEXT: movapd %xmm9, %xmm2 -; SSE41-NEXT: xorpd %xmm5, %xmm2 -; SSE41-NEXT: movapd %xmm2, %xmm6 -; SSE41-NEXT: pcmpeqd %xmm4, %xmm6 -; SSE41-NEXT: pcmpgtd %xmm4, %xmm2 +; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm4 +; SSE41-NEXT: movapd {{.*#+}} xmm1 = [18446744073709518848,18446744073709518848] +; SSE41-NEXT: movapd %xmm4, %xmm2 +; SSE41-NEXT: xorpd %xmm3, %xmm2 +; SSE41-NEXT: movdqa {{.*#+}} xmm6 = [18446744071562035200,18446744071562035200] +; SSE41-NEXT: movapd %xmm2, %xmm7 +; SSE41-NEXT: pcmpeqd %xmm6, %xmm7 +; SSE41-NEXT: pcmpgtd %xmm6, %xmm2 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,0,2,2] -; SSE41-NEXT: pand %xmm6, %xmm0 +; SSE41-NEXT: pand %xmm7, %xmm0 ; SSE41-NEXT: por %xmm2, %xmm0 -; SSE41-NEXT: movapd %xmm3, %xmm2 -; SSE41-NEXT: blendvpd %xmm0, %xmm9, %xmm2 -; SSE41-NEXT: xorpd %xmm8, %xmm5 -; SSE41-NEXT: movapd %xmm5, %xmm6 -; SSE41-NEXT: pcmpeqd %xmm4, %xmm6 -; SSE41-NEXT: pcmpgtd %xmm4, %xmm5 -; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm5[0,0,2,2] -; SSE41-NEXT: pand %xmm6, %xmm0 -; SSE41-NEXT: por %xmm5, %xmm0 -; SSE41-NEXT: blendvpd %xmm0, %xmm8, %xmm3 -; SSE41-NEXT: packssdw %xmm2, %xmm3 -; SSE41-NEXT: packssdw %xmm3, %xmm1 -; SSE41-NEXT: movdqa %xmm1, %xmm0 +; SSE41-NEXT: movapd %xmm1, %xmm2 +; SSE41-NEXT: blendvpd %xmm0, %xmm4, %xmm2 +; SSE41-NEXT: xorpd %xmm5, %xmm3 +; SSE41-NEXT: movapd %xmm3, %xmm4 +; SSE41-NEXT: pcmpeqd %xmm6, %xmm4 +; SSE41-NEXT: pcmpgtd %xmm6, %xmm3 +; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2] +; SSE41-NEXT: pand %xmm4, %xmm0 +; SSE41-NEXT: por %xmm3, %xmm0 +; SSE41-NEXT: blendvpd %xmm0, %xmm5, %xmm1 +; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3] +; SSE41-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[0,2,2,3,4,5,6,7] +; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3] +; SSE41-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] +; SSE41-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; SSE41-NEXT: retq ; -; AVX1-LABEL: trunc_ssat_v8i64_v8i16: +; AVX1-LABEL: trunc_ssat_v4i64_v4i16: ; AVX1: # %bb.0: -; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [32767,32767] -; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm8 -; AVX1-NEXT: vpcmpgtq %xmm1, %xmm3, %xmm5 -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm6 -; AVX1-NEXT: vpcmpgtq %xmm6, %xmm3, %xmm7 -; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm4 -; AVX1-NEXT: vblendvpd %xmm4, %xmm0, %xmm3, %xmm0 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [32767,32767] +; AVX1-NEXT: vpcmpgtq %xmm1, %xmm2, %xmm3 +; AVX1-NEXT: vpcmpgtq %xmm0, %xmm2, %xmm4 +; AVX1-NEXT: vblendvpd %xmm4, %xmm0, %xmm2, %xmm0 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [18446744073709518848,18446744073709518848] -; AVX1-NEXT: vpcmpgtq %xmm4, %xmm0, %xmm9 -; AVX1-NEXT: vblendvpd %xmm7, %xmm6, %xmm3, %xmm6 -; AVX1-NEXT: vpcmpgtq %xmm4, %xmm6, %xmm7 -; AVX1-NEXT: vblendvpd %xmm5, %xmm1, %xmm3, %xmm1 -; AVX1-NEXT: vpcmpgtq %xmm4, %xmm1, %xmm5 -; AVX1-NEXT: vblendvpd %xmm8, %xmm2, %xmm3, %xmm2 -; AVX1-NEXT: vpcmpgtq %xmm4, %xmm2, %xmm3 -; AVX1-NEXT: vblendvpd %xmm3, %xmm2, %xmm4, %xmm2 -; AVX1-NEXT: vblendvpd %xmm5, %xmm1, %xmm4, %xmm1 -; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 -; AVX1-NEXT: vblendvpd %xmm7, %xmm6, %xmm4, %xmm2 -; AVX1-NEXT: vblendvpd %xmm9, %xmm0, %xmm4, %xmm0 -; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpcmpgtq %xmm4, %xmm0, %xmm5 +; AVX1-NEXT: vblendvpd %xmm3, %xmm1, %xmm2, %xmm1 +; AVX1-NEXT: vpcmpgtq %xmm4, %xmm1, %xmm2 +; AVX1-NEXT: vblendvpd %xmm2, %xmm1, %xmm4, %xmm1 +; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,2,2,3] +; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7] +; AVX1-NEXT: vblendvpd %xmm5, %xmm0, %xmm4, %xmm0 +; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3] +; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] +; AVX1-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq ; -; AVX2-LABEL: trunc_ssat_v8i64_v8i16: -; AVX2: # %bb.0: -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [32767,32767,32767,32767] -; AVX2-NEXT: vpcmpgtq %ymm0, %ymm2, %ymm3 -; AVX2-NEXT: vblendvpd %ymm3, %ymm0, %ymm2, %ymm0 -; AVX2-NEXT: vpcmpgtq %ymm1, %ymm2, %ymm3 -; AVX2-NEXT: vblendvpd %ymm3, %ymm1, %ymm2, %ymm1 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [18446744073709518848,18446744073709518848,18446744073709518848,18446744073709518848] -; AVX2-NEXT: vpcmpgtq %ymm2, %ymm1, %ymm3 -; AVX2-NEXT: vblendvpd %ymm3, %ymm1, %ymm2, %ymm1 -; AVX2-NEXT: vpcmpgtq %ymm2, %ymm0, %ymm3 -; AVX2-NEXT: vblendvpd %ymm3, %ymm0, %ymm2, %ymm0 -; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] -; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vzeroupper -; AVX2-NEXT: retq +; AVX2-SLOW-LABEL: trunc_ssat_v4i64_v4i16: +; AVX2-SLOW: # %bb.0: +; AVX2-SLOW-NEXT: vpbroadcastq {{.*#+}} ymm1 = [32767,32767,32767,32767] +; AVX2-SLOW-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2 +; AVX2-SLOW-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 +; AVX2-SLOW-NEXT: vpbroadcastq {{.*#+}} ymm1 = [18446744073709518848,18446744073709518848,18446744073709518848,18446744073709518848] +; AVX2-SLOW-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2 +; AVX2-SLOW-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 +; AVX2-SLOW-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX2-SLOW-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,2,2,3] +; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7] +; AVX2-SLOW-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3] +; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] +; AVX2-SLOW-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; AVX2-SLOW-NEXT: vzeroupper +; AVX2-SLOW-NEXT: retq ; -; AVX512-LABEL: trunc_ssat_v8i64_v8i16: -; AVX512: # %bb.0: -; AVX512-NEXT: vpmovsqw %zmm0, %xmm0 -; AVX512-NEXT: vzeroupper -; AVX512-NEXT: retq - %1 = icmp slt <8 x i64> %a0, - %2 = select <8 x i1> %1, <8 x i64> %a0, <8 x i64> - %3 = icmp sgt <8 x i64> %2, - %4 = select <8 x i1> %3, <8 x i64> %2, <8 x i64> - %5 = trunc <8 x i64> %4 to <8 x i16> - ret <8 x i16> %5 +; AVX2-FAST-LABEL: trunc_ssat_v4i64_v4i16: +; AVX2-FAST: # %bb.0: +; AVX2-FAST-NEXT: vpbroadcastq {{.*#+}} ymm1 = [32767,32767,32767,32767] +; AVX2-FAST-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2 +; AVX2-FAST-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 +; AVX2-FAST-NEXT: vpbroadcastq {{.*#+}} ymm1 = [18446744073709518848,18446744073709518848,18446744073709518848,18446744073709518848] +; AVX2-FAST-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2 +; AVX2-FAST-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 +; AVX2-FAST-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX2-FAST-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,8,9,8,9,10,11,8,9,10,11,12,13,14,15] +; AVX2-FAST-NEXT: vpshufb %xmm2, %xmm1, %xmm1 +; AVX2-FAST-NEXT: vpshufb %xmm2, %xmm0, %xmm0 +; AVX2-FAST-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; AVX2-FAST-NEXT: vzeroupper +; AVX2-FAST-NEXT: retq +; +; AVX512F-LABEL: trunc_ssat_v4i64_v4i16: +; AVX512F: # %bb.0: +; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512F-NEXT: vpminsq {{.*}}(%rip){1to8}, %zmm0, %zmm0 +; AVX512F-NEXT: vpmaxsq {{.*}}(%rip){1to8}, %zmm0, %zmm0 +; AVX512F-NEXT: vpmovqw %zmm0, %xmm0 +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: trunc_ssat_v4i64_v4i16: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vpmovsqw %ymm0, %xmm0 +; AVX512VL-NEXT: vzeroupper +; AVX512VL-NEXT: retq +; +; AVX512BW-LABEL: trunc_ssat_v4i64_v4i16: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512BW-NEXT: vpminsq {{.*}}(%rip){1to8}, %zmm0, %zmm0 +; AVX512BW-NEXT: vpmaxsq {{.*}}(%rip){1to8}, %zmm0, %zmm0 +; AVX512BW-NEXT: vpmovqw %zmm0, %xmm0 +; AVX512BW-NEXT: vzeroupper +; AVX512BW-NEXT: retq +; +; AVX512BWVL-LABEL: trunc_ssat_v4i64_v4i16: +; AVX512BWVL: # %bb.0: +; AVX512BWVL-NEXT: vpmovsqw %ymm0, %xmm0 +; AVX512BWVL-NEXT: vzeroupper +; AVX512BWVL-NEXT: retq + %1 = icmp slt <4 x i64> %a0, + %2 = select <4 x i1> %1, <4 x i64> %a0, <4 x i64> + %3 = icmp sgt <4 x i64> %2, + %4 = select <4 x i1> %3, <4 x i64> %2, <4 x i64> + %5 = trunc <4 x i64> %4 to <4 x i16> + ret <4 x i16> %5 } -define <8 x i16> @trunc_ssat_v8i32_v8i16(<8 x i32> %a0) { -; SSE-LABEL: trunc_ssat_v8i32_v8i16: -; SSE: # %bb.0: -; SSE-NEXT: packssdw %xmm1, %xmm0 -; SSE-NEXT: retq +define void @trunc_ssat_v4i64_v4i16_store(<4 x i64> %a0, <4 x i16> *%p1) { +; SSE2-LABEL: trunc_ssat_v4i64_v4i16_store: +; SSE2: # %bb.0: +; SSE2-NEXT: movdqa {{.*#+}} xmm8 = [32767,32767] +; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648] +; SSE2-NEXT: movdqa %xmm1, %xmm3 +; SSE2-NEXT: pxor %xmm2, %xmm3 +; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [2147516415,2147516415] +; SSE2-NEXT: movdqa %xmm5, %xmm6 +; SSE2-NEXT: pcmpgtd %xmm3, %xmm6 +; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2] +; SSE2-NEXT: pcmpeqd %xmm5, %xmm3 +; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[1,1,3,3] +; SSE2-NEXT: pand %xmm7, %xmm4 +; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm6[1,1,3,3] +; SSE2-NEXT: por %xmm4, %xmm3 +; SSE2-NEXT: pand %xmm3, %xmm1 +; SSE2-NEXT: pandn %xmm8, %xmm3 +; SSE2-NEXT: por %xmm1, %xmm3 +; SSE2-NEXT: movdqa %xmm0, %xmm1 +; SSE2-NEXT: pxor %xmm2, %xmm1 +; SSE2-NEXT: movdqa %xmm5, %xmm4 +; SSE2-NEXT: pcmpgtd %xmm1, %xmm4 +; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2] +; SSE2-NEXT: pcmpeqd %xmm5, %xmm1 +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] +; SSE2-NEXT: pand %xmm6, %xmm1 +; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] +; SSE2-NEXT: por %xmm1, %xmm4 +; SSE2-NEXT: pand %xmm4, %xmm0 +; SSE2-NEXT: pandn %xmm8, %xmm4 +; SSE2-NEXT: por %xmm0, %xmm4 +; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [18446744073709518848,18446744073709518848] +; SSE2-NEXT: movdqa %xmm4, %xmm1 +; SSE2-NEXT: pxor %xmm2, %xmm1 +; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [18446744071562035200,18446744071562035200] +; SSE2-NEXT: movdqa %xmm1, %xmm6 +; SSE2-NEXT: pcmpgtd %xmm5, %xmm6 +; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2] +; SSE2-NEXT: pcmpeqd %xmm5, %xmm1 +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] +; SSE2-NEXT: pand %xmm7, %xmm1 +; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3] +; SSE2-NEXT: por %xmm1, %xmm6 +; SSE2-NEXT: pand %xmm6, %xmm4 +; SSE2-NEXT: pandn %xmm0, %xmm6 +; SSE2-NEXT: por %xmm4, %xmm6 +; SSE2-NEXT: pxor %xmm3, %xmm2 +; SSE2-NEXT: movdqa %xmm2, %xmm1 +; SSE2-NEXT: pcmpgtd %xmm5, %xmm1 +; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm1[0,0,2,2] +; SSE2-NEXT: pcmpeqd %xmm5, %xmm2 +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] +; SSE2-NEXT: pand %xmm4, %xmm2 +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] +; SSE2-NEXT: por %xmm2, %xmm1 +; SSE2-NEXT: pand %xmm1, %xmm3 +; SSE2-NEXT: pandn %xmm0, %xmm1 +; SSE2-NEXT: por %xmm3, %xmm1 +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3] +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm6[0,2,2,3] +; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7] +; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; SSE2-NEXT: movq %xmm1, (%rdi) +; SSE2-NEXT: retq +; +; SSSE3-LABEL: trunc_ssat_v4i64_v4i16_store: +; SSSE3: # %bb.0: +; SSSE3-NEXT: movdqa {{.*#+}} xmm8 = [32767,32767] +; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648] +; SSSE3-NEXT: movdqa %xmm1, %xmm3 +; SSSE3-NEXT: pxor %xmm2, %xmm3 +; SSSE3-NEXT: movdqa {{.*#+}} xmm5 = [2147516415,2147516415] +; SSSE3-NEXT: movdqa %xmm5, %xmm6 +; SSSE3-NEXT: pcmpgtd %xmm3, %xmm6 +; SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2] +; SSSE3-NEXT: pcmpeqd %xmm5, %xmm3 +; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm3[1,1,3,3] +; SSSE3-NEXT: pand %xmm7, %xmm4 +; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm6[1,1,3,3] +; SSSE3-NEXT: por %xmm4, %xmm3 +; SSSE3-NEXT: pand %xmm3, %xmm1 +; SSSE3-NEXT: pandn %xmm8, %xmm3 +; SSSE3-NEXT: por %xmm1, %xmm3 +; SSSE3-NEXT: movdqa %xmm0, %xmm1 +; SSSE3-NEXT: pxor %xmm2, %xmm1 +; SSSE3-NEXT: movdqa %xmm5, %xmm4 +; SSSE3-NEXT: pcmpgtd %xmm1, %xmm4 +; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2] +; SSSE3-NEXT: pcmpeqd %xmm5, %xmm1 +; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] +; SSSE3-NEXT: pand %xmm6, %xmm1 +; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] +; SSSE3-NEXT: por %xmm1, %xmm4 +; SSSE3-NEXT: pand %xmm4, %xmm0 +; SSSE3-NEXT: pandn %xmm8, %xmm4 +; SSSE3-NEXT: por %xmm0, %xmm4 +; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [18446744073709518848,18446744073709518848] +; SSSE3-NEXT: movdqa %xmm4, %xmm1 +; SSSE3-NEXT: pxor %xmm2, %xmm1 +; SSSE3-NEXT: movdqa {{.*#+}} xmm5 = [18446744071562035200,18446744071562035200] +; SSSE3-NEXT: movdqa %xmm1, %xmm6 +; SSSE3-NEXT: pcmpgtd %xmm5, %xmm6 +; SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2] +; SSSE3-NEXT: pcmpeqd %xmm5, %xmm1 +; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] +; SSSE3-NEXT: pand %xmm7, %xmm1 +; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3] +; SSSE3-NEXT: por %xmm1, %xmm6 +; SSSE3-NEXT: pand %xmm6, %xmm4 +; SSSE3-NEXT: pandn %xmm0, %xmm6 +; SSSE3-NEXT: por %xmm4, %xmm6 +; SSSE3-NEXT: pxor %xmm3, %xmm2 +; SSSE3-NEXT: movdqa %xmm2, %xmm1 +; SSSE3-NEXT: pcmpgtd %xmm5, %xmm1 +; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm1[0,0,2,2] +; SSSE3-NEXT: pcmpeqd %xmm5, %xmm2 +; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] +; SSSE3-NEXT: pand %xmm4, %xmm2 +; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] +; SSSE3-NEXT: por %xmm2, %xmm1 +; SSSE3-NEXT: pand %xmm1, %xmm3 +; SSSE3-NEXT: pandn %xmm0, %xmm1 +; SSSE3-NEXT: por %xmm3, %xmm1 +; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3] +; SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] +; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm6[0,2,2,3] +; SSSE3-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7] +; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; SSSE3-NEXT: movq %xmm1, (%rdi) +; SSSE3-NEXT: retq +; +; SSE41-LABEL: trunc_ssat_v4i64_v4i16_store: +; SSE41: # %bb.0: +; SSE41-NEXT: movdqa %xmm0, %xmm2 +; SSE41-NEXT: movapd {{.*#+}} xmm4 = [32767,32767] +; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [2147483648,2147483648] +; SSE41-NEXT: movdqa %xmm1, %xmm0 +; SSE41-NEXT: pxor %xmm3, %xmm0 +; SSE41-NEXT: movdqa {{.*#+}} xmm6 = [2147516415,2147516415] +; SSE41-NEXT: movdqa %xmm6, %xmm5 +; SSE41-NEXT: pcmpeqd %xmm0, %xmm5 +; SSE41-NEXT: movdqa %xmm6, %xmm7 +; SSE41-NEXT: pcmpgtd %xmm0, %xmm7 +; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm7[0,0,2,2] +; SSE41-NEXT: pand %xmm5, %xmm0 +; SSE41-NEXT: por %xmm7, %xmm0 +; SSE41-NEXT: movapd %xmm4, %xmm5 +; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm5 +; SSE41-NEXT: movdqa %xmm2, %xmm0 +; SSE41-NEXT: pxor %xmm3, %xmm0 +; SSE41-NEXT: movdqa %xmm6, %xmm1 +; SSE41-NEXT: pcmpeqd %xmm0, %xmm1 +; SSE41-NEXT: pcmpgtd %xmm0, %xmm6 +; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm6[0,0,2,2] +; SSE41-NEXT: pand %xmm1, %xmm0 +; SSE41-NEXT: por %xmm6, %xmm0 +; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm4 +; SSE41-NEXT: movapd {{.*#+}} xmm1 = [18446744073709518848,18446744073709518848] +; SSE41-NEXT: movapd %xmm4, %xmm2 +; SSE41-NEXT: xorpd %xmm3, %xmm2 +; SSE41-NEXT: movdqa {{.*#+}} xmm6 = [18446744071562035200,18446744071562035200] +; SSE41-NEXT: movapd %xmm2, %xmm7 +; SSE41-NEXT: pcmpeqd %xmm6, %xmm7 +; SSE41-NEXT: pcmpgtd %xmm6, %xmm2 +; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,0,2,2] +; SSE41-NEXT: pand %xmm7, %xmm0 +; SSE41-NEXT: por %xmm2, %xmm0 +; SSE41-NEXT: movapd %xmm1, %xmm2 +; SSE41-NEXT: blendvpd %xmm0, %xmm4, %xmm2 +; SSE41-NEXT: xorpd %xmm5, %xmm3 +; SSE41-NEXT: movapd %xmm3, %xmm4 +; SSE41-NEXT: pcmpeqd %xmm6, %xmm4 +; SSE41-NEXT: pcmpgtd %xmm6, %xmm3 +; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2] +; SSE41-NEXT: pand %xmm4, %xmm0 +; SSE41-NEXT: por %xmm3, %xmm0 +; SSE41-NEXT: blendvpd %xmm0, %xmm5, %xmm1 +; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3] +; SSE41-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] +; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3] +; SSE41-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7] +; SSE41-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; SSE41-NEXT: movq %xmm1, (%rdi) +; SSE41-NEXT: retq +; +; AVX1-LABEL: trunc_ssat_v4i64_v4i16_store: +; AVX1: # %bb.0: +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [32767,32767] +; AVX1-NEXT: vpcmpgtq %xmm1, %xmm2, %xmm3 +; AVX1-NEXT: vpcmpgtq %xmm0, %xmm2, %xmm4 +; AVX1-NEXT: vblendvpd %xmm4, %xmm0, %xmm2, %xmm0 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [18446744073709518848,18446744073709518848] +; AVX1-NEXT: vpcmpgtq %xmm4, %xmm0, %xmm5 +; AVX1-NEXT: vblendvpd %xmm3, %xmm1, %xmm2, %xmm1 +; AVX1-NEXT: vpcmpgtq %xmm4, %xmm1, %xmm2 +; AVX1-NEXT: vblendvpd %xmm2, %xmm1, %xmm4, %xmm1 +; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,2,2,3] +; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7] +; AVX1-NEXT: vblendvpd %xmm5, %xmm0, %xmm4, %xmm0 +; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3] +; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] +; AVX1-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; AVX1-NEXT: vmovq %xmm0, (%rdi) +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-SLOW-LABEL: trunc_ssat_v4i64_v4i16_store: +; AVX2-SLOW: # %bb.0: +; AVX2-SLOW-NEXT: vpbroadcastq {{.*#+}} ymm1 = [32767,32767,32767,32767] +; AVX2-SLOW-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2 +; AVX2-SLOW-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 +; AVX2-SLOW-NEXT: vpbroadcastq {{.*#+}} ymm1 = [18446744073709518848,18446744073709518848,18446744073709518848,18446744073709518848] +; AVX2-SLOW-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2 +; AVX2-SLOW-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 +; AVX2-SLOW-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX2-SLOW-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,2,2,3] +; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7] +; AVX2-SLOW-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3] +; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] +; AVX2-SLOW-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; AVX2-SLOW-NEXT: vmovq %xmm0, (%rdi) +; AVX2-SLOW-NEXT: vzeroupper +; AVX2-SLOW-NEXT: retq +; +; AVX2-FAST-LABEL: trunc_ssat_v4i64_v4i16_store: +; AVX2-FAST: # %bb.0: +; AVX2-FAST-NEXT: vpbroadcastq {{.*#+}} ymm1 = [32767,32767,32767,32767] +; AVX2-FAST-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2 +; AVX2-FAST-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 +; AVX2-FAST-NEXT: vpbroadcastq {{.*#+}} ymm1 = [18446744073709518848,18446744073709518848,18446744073709518848,18446744073709518848] +; AVX2-FAST-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2 +; AVX2-FAST-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 +; AVX2-FAST-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX2-FAST-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,8,9,8,9,10,11,8,9,10,11,12,13,14,15] +; AVX2-FAST-NEXT: vpshufb %xmm2, %xmm1, %xmm1 +; AVX2-FAST-NEXT: vpshufb %xmm2, %xmm0, %xmm0 +; AVX2-FAST-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; AVX2-FAST-NEXT: vmovq %xmm0, (%rdi) +; AVX2-FAST-NEXT: vzeroupper +; AVX2-FAST-NEXT: retq +; +; AVX512F-LABEL: trunc_ssat_v4i64_v4i16_store: +; AVX512F: # %bb.0: +; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512F-NEXT: vpminsq {{.*}}(%rip){1to8}, %zmm0, %zmm0 +; AVX512F-NEXT: vpmaxsq {{.*}}(%rip){1to8}, %zmm0, %zmm0 +; AVX512F-NEXT: vpmovqw %zmm0, %xmm0 +; AVX512F-NEXT: vmovq %xmm0, (%rdi) +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: trunc_ssat_v4i64_v4i16_store: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vpmovsqw %ymm0, (%rdi) +; AVX512VL-NEXT: vzeroupper +; AVX512VL-NEXT: retq +; +; AVX512BW-LABEL: trunc_ssat_v4i64_v4i16_store: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512BW-NEXT: vpminsq {{.*}}(%rip){1to8}, %zmm0, %zmm0 +; AVX512BW-NEXT: vpmaxsq {{.*}}(%rip){1to8}, %zmm0, %zmm0 +; AVX512BW-NEXT: vpmovqw %zmm0, %xmm0 +; AVX512BW-NEXT: vmovq %xmm0, (%rdi) +; AVX512BW-NEXT: vzeroupper +; AVX512BW-NEXT: retq +; +; AVX512BWVL-LABEL: trunc_ssat_v4i64_v4i16_store: +; AVX512BWVL: # %bb.0: +; AVX512BWVL-NEXT: vpmovsqw %ymm0, (%rdi) +; AVX512BWVL-NEXT: vzeroupper +; AVX512BWVL-NEXT: retq + %1 = icmp slt <4 x i64> %a0, + %2 = select <4 x i1> %1, <4 x i64> %a0, <4 x i64> + %3 = icmp sgt <4 x i64> %2, + %4 = select <4 x i1> %3, <4 x i64> %2, <4 x i64> + %5 = trunc <4 x i64> %4 to <4 x i16> + store <4 x i16> %5, <4 x i16> *%p1 + ret void +} + +define <8 x i16> @trunc_ssat_v8i64_v8i16(<8 x i64> %a0) { +; SSE2-LABEL: trunc_ssat_v8i64_v8i16: +; SSE2: # %bb.0: +; SSE2-NEXT: movdqa {{.*#+}} xmm8 = [32767,32767] +; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648] +; SSE2-NEXT: movdqa %xmm2, %xmm5 +; SSE2-NEXT: pxor %xmm4, %xmm5 +; SSE2-NEXT: movdqa {{.*#+}} xmm9 = [2147516415,2147516415] +; SSE2-NEXT: movdqa %xmm9, %xmm7 +; SSE2-NEXT: pcmpgtd %xmm5, %xmm7 +; SSE2-NEXT: pshufd {{.*#+}} xmm10 = xmm7[0,0,2,2] +; SSE2-NEXT: pcmpeqd %xmm9, %xmm5 +; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm5[1,1,3,3] +; SSE2-NEXT: pand %xmm10, %xmm6 +; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3] +; SSE2-NEXT: por %xmm6, %xmm5 +; SSE2-NEXT: pand %xmm5, %xmm2 +; SSE2-NEXT: pandn %xmm8, %xmm5 +; SSE2-NEXT: por %xmm2, %xmm5 +; SSE2-NEXT: movdqa %xmm3, %xmm2 +; SSE2-NEXT: pxor %xmm4, %xmm2 +; SSE2-NEXT: movdqa %xmm9, %xmm6 +; SSE2-NEXT: pcmpgtd %xmm2, %xmm6 +; SSE2-NEXT: pshufd {{.*#+}} xmm10 = xmm6[0,0,2,2] +; SSE2-NEXT: pcmpeqd %xmm9, %xmm2 +; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm2[1,1,3,3] +; SSE2-NEXT: pand %xmm10, %xmm7 +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm6[1,1,3,3] +; SSE2-NEXT: por %xmm7, %xmm2 +; SSE2-NEXT: pand %xmm2, %xmm3 +; SSE2-NEXT: pandn %xmm8, %xmm2 +; SSE2-NEXT: por %xmm3, %xmm2 +; SSE2-NEXT: movdqa %xmm0, %xmm3 +; SSE2-NEXT: pxor %xmm4, %xmm3 +; SSE2-NEXT: movdqa %xmm9, %xmm6 +; SSE2-NEXT: pcmpgtd %xmm3, %xmm6 +; SSE2-NEXT: pshufd {{.*#+}} xmm10 = xmm6[0,0,2,2] +; SSE2-NEXT: pcmpeqd %xmm9, %xmm3 +; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm3[1,1,3,3] +; SSE2-NEXT: pand %xmm10, %xmm7 +; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm6[1,1,3,3] +; SSE2-NEXT: por %xmm7, %xmm3 +; SSE2-NEXT: pand %xmm3, %xmm0 +; SSE2-NEXT: pandn %xmm8, %xmm3 +; SSE2-NEXT: por %xmm0, %xmm3 +; SSE2-NEXT: movdqa %xmm1, %xmm0 +; SSE2-NEXT: pxor %xmm4, %xmm0 +; SSE2-NEXT: movdqa %xmm9, %xmm6 +; SSE2-NEXT: pcmpgtd %xmm0, %xmm6 +; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2] +; SSE2-NEXT: pcmpeqd %xmm9, %xmm0 +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] +; SSE2-NEXT: pand %xmm7, %xmm0 +; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm6[1,1,3,3] +; SSE2-NEXT: por %xmm0, %xmm7 +; SSE2-NEXT: pand %xmm7, %xmm1 +; SSE2-NEXT: pandn %xmm8, %xmm7 +; SSE2-NEXT: por %xmm1, %xmm7 +; SSE2-NEXT: movdqa {{.*#+}} xmm8 = [18446744073709518848,18446744073709518848] +; SSE2-NEXT: movdqa %xmm7, %xmm0 +; SSE2-NEXT: pxor %xmm4, %xmm0 +; SSE2-NEXT: movdqa {{.*#+}} xmm9 = [18446744071562035200,18446744071562035200] +; SSE2-NEXT: movdqa %xmm0, %xmm1 +; SSE2-NEXT: pcmpgtd %xmm9, %xmm1 +; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm1[0,0,2,2] +; SSE2-NEXT: pcmpeqd %xmm9, %xmm0 +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] +; SSE2-NEXT: pand %xmm6, %xmm0 +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] +; SSE2-NEXT: por %xmm0, %xmm1 +; SSE2-NEXT: pand %xmm1, %xmm7 +; SSE2-NEXT: pandn %xmm8, %xmm1 +; SSE2-NEXT: por %xmm7, %xmm1 +; SSE2-NEXT: movdqa %xmm3, %xmm0 +; SSE2-NEXT: pxor %xmm4, %xmm0 +; SSE2-NEXT: movdqa %xmm0, %xmm6 +; SSE2-NEXT: pcmpgtd %xmm9, %xmm6 +; SSE2-NEXT: pshufd {{.*#+}} xmm10 = xmm6[0,0,2,2] +; SSE2-NEXT: pcmpeqd %xmm9, %xmm0 +; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm0[1,1,3,3] +; SSE2-NEXT: pand %xmm10, %xmm7 +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm6[1,1,3,3] +; SSE2-NEXT: por %xmm7, %xmm0 +; SSE2-NEXT: pand %xmm0, %xmm3 +; SSE2-NEXT: pandn %xmm8, %xmm0 +; SSE2-NEXT: por %xmm3, %xmm0 +; SSE2-NEXT: packssdw %xmm1, %xmm0 +; SSE2-NEXT: movdqa %xmm2, %xmm1 +; SSE2-NEXT: pxor %xmm4, %xmm1 +; SSE2-NEXT: movdqa %xmm1, %xmm3 +; SSE2-NEXT: pcmpgtd %xmm9, %xmm3 +; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm3[0,0,2,2] +; SSE2-NEXT: pcmpeqd %xmm9, %xmm1 +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] +; SSE2-NEXT: pand %xmm6, %xmm1 +; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] +; SSE2-NEXT: por %xmm1, %xmm3 +; SSE2-NEXT: pand %xmm3, %xmm2 +; SSE2-NEXT: pandn %xmm8, %xmm3 +; SSE2-NEXT: por %xmm2, %xmm3 +; SSE2-NEXT: pxor %xmm5, %xmm4 +; SSE2-NEXT: movdqa %xmm4, %xmm1 +; SSE2-NEXT: pcmpgtd %xmm9, %xmm1 +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[0,0,2,2] +; SSE2-NEXT: pcmpeqd %xmm9, %xmm4 +; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] +; SSE2-NEXT: pand %xmm2, %xmm4 +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] +; SSE2-NEXT: por %xmm4, %xmm1 +; SSE2-NEXT: pand %xmm1, %xmm5 +; SSE2-NEXT: pandn %xmm8, %xmm1 +; SSE2-NEXT: por %xmm5, %xmm1 +; SSE2-NEXT: packssdw %xmm3, %xmm1 +; SSE2-NEXT: packssdw %xmm1, %xmm0 +; SSE2-NEXT: retq +; +; SSSE3-LABEL: trunc_ssat_v8i64_v8i16: +; SSSE3: # %bb.0: +; SSSE3-NEXT: movdqa {{.*#+}} xmm8 = [32767,32767] +; SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648] +; SSSE3-NEXT: movdqa %xmm2, %xmm5 +; SSSE3-NEXT: pxor %xmm4, %xmm5 +; SSSE3-NEXT: movdqa {{.*#+}} xmm9 = [2147516415,2147516415] +; SSSE3-NEXT: movdqa %xmm9, %xmm7 +; SSSE3-NEXT: pcmpgtd %xmm5, %xmm7 +; SSSE3-NEXT: pshufd {{.*#+}} xmm10 = xmm7[0,0,2,2] +; SSSE3-NEXT: pcmpeqd %xmm9, %xmm5 +; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm5[1,1,3,3] +; SSSE3-NEXT: pand %xmm10, %xmm6 +; SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3] +; SSSE3-NEXT: por %xmm6, %xmm5 +; SSSE3-NEXT: pand %xmm5, %xmm2 +; SSSE3-NEXT: pandn %xmm8, %xmm5 +; SSSE3-NEXT: por %xmm2, %xmm5 +; SSSE3-NEXT: movdqa %xmm3, %xmm2 +; SSSE3-NEXT: pxor %xmm4, %xmm2 +; SSSE3-NEXT: movdqa %xmm9, %xmm6 +; SSSE3-NEXT: pcmpgtd %xmm2, %xmm6 +; SSSE3-NEXT: pshufd {{.*#+}} xmm10 = xmm6[0,0,2,2] +; SSSE3-NEXT: pcmpeqd %xmm9, %xmm2 +; SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm2[1,1,3,3] +; SSSE3-NEXT: pand %xmm10, %xmm7 +; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm6[1,1,3,3] +; SSSE3-NEXT: por %xmm7, %xmm2 +; SSSE3-NEXT: pand %xmm2, %xmm3 +; SSSE3-NEXT: pandn %xmm8, %xmm2 +; SSSE3-NEXT: por %xmm3, %xmm2 +; SSSE3-NEXT: movdqa %xmm0, %xmm3 +; SSSE3-NEXT: pxor %xmm4, %xmm3 +; SSSE3-NEXT: movdqa %xmm9, %xmm6 +; SSSE3-NEXT: pcmpgtd %xmm3, %xmm6 +; SSSE3-NEXT: pshufd {{.*#+}} xmm10 = xmm6[0,0,2,2] +; SSSE3-NEXT: pcmpeqd %xmm9, %xmm3 +; SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm3[1,1,3,3] +; SSSE3-NEXT: pand %xmm10, %xmm7 +; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm6[1,1,3,3] +; SSSE3-NEXT: por %xmm7, %xmm3 +; SSSE3-NEXT: pand %xmm3, %xmm0 +; SSSE3-NEXT: pandn %xmm8, %xmm3 +; SSSE3-NEXT: por %xmm0, %xmm3 +; SSSE3-NEXT: movdqa %xmm1, %xmm0 +; SSSE3-NEXT: pxor %xmm4, %xmm0 +; SSSE3-NEXT: movdqa %xmm9, %xmm6 +; SSSE3-NEXT: pcmpgtd %xmm0, %xmm6 +; SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2] +; SSSE3-NEXT: pcmpeqd %xmm9, %xmm0 +; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] +; SSSE3-NEXT: pand %xmm7, %xmm0 +; SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm6[1,1,3,3] +; SSSE3-NEXT: por %xmm0, %xmm7 +; SSSE3-NEXT: pand %xmm7, %xmm1 +; SSSE3-NEXT: pandn %xmm8, %xmm7 +; SSSE3-NEXT: por %xmm1, %xmm7 +; SSSE3-NEXT: movdqa {{.*#+}} xmm8 = [18446744073709518848,18446744073709518848] +; SSSE3-NEXT: movdqa %xmm7, %xmm0 +; SSSE3-NEXT: pxor %xmm4, %xmm0 +; SSSE3-NEXT: movdqa {{.*#+}} xmm9 = [18446744071562035200,18446744071562035200] +; SSSE3-NEXT: movdqa %xmm0, %xmm1 +; SSSE3-NEXT: pcmpgtd %xmm9, %xmm1 +; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm1[0,0,2,2] +; SSSE3-NEXT: pcmpeqd %xmm9, %xmm0 +; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] +; SSSE3-NEXT: pand %xmm6, %xmm0 +; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] +; SSSE3-NEXT: por %xmm0, %xmm1 +; SSSE3-NEXT: pand %xmm1, %xmm7 +; SSSE3-NEXT: pandn %xmm8, %xmm1 +; SSSE3-NEXT: por %xmm7, %xmm1 +; SSSE3-NEXT: movdqa %xmm3, %xmm0 +; SSSE3-NEXT: pxor %xmm4, %xmm0 +; SSSE3-NEXT: movdqa %xmm0, %xmm6 +; SSSE3-NEXT: pcmpgtd %xmm9, %xmm6 +; SSSE3-NEXT: pshufd {{.*#+}} xmm10 = xmm6[0,0,2,2] +; SSSE3-NEXT: pcmpeqd %xmm9, %xmm0 +; SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm0[1,1,3,3] +; SSSE3-NEXT: pand %xmm10, %xmm7 +; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm6[1,1,3,3] +; SSSE3-NEXT: por %xmm7, %xmm0 +; SSSE3-NEXT: pand %xmm0, %xmm3 +; SSSE3-NEXT: pandn %xmm8, %xmm0 +; SSSE3-NEXT: por %xmm3, %xmm0 +; SSSE3-NEXT: packssdw %xmm1, %xmm0 +; SSSE3-NEXT: movdqa %xmm2, %xmm1 +; SSSE3-NEXT: pxor %xmm4, %xmm1 +; SSSE3-NEXT: movdqa %xmm1, %xmm3 +; SSSE3-NEXT: pcmpgtd %xmm9, %xmm3 +; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm3[0,0,2,2] +; SSSE3-NEXT: pcmpeqd %xmm9, %xmm1 +; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] +; SSSE3-NEXT: pand %xmm6, %xmm1 +; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] +; SSSE3-NEXT: por %xmm1, %xmm3 +; SSSE3-NEXT: pand %xmm3, %xmm2 +; SSSE3-NEXT: pandn %xmm8, %xmm3 +; SSSE3-NEXT: por %xmm2, %xmm3 +; SSSE3-NEXT: pxor %xmm5, %xmm4 +; SSSE3-NEXT: movdqa %xmm4, %xmm1 +; SSSE3-NEXT: pcmpgtd %xmm9, %xmm1 +; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[0,0,2,2] +; SSSE3-NEXT: pcmpeqd %xmm9, %xmm4 +; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] +; SSSE3-NEXT: pand %xmm2, %xmm4 +; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] +; SSSE3-NEXT: por %xmm4, %xmm1 +; SSSE3-NEXT: pand %xmm1, %xmm5 +; SSSE3-NEXT: pandn %xmm8, %xmm1 +; SSSE3-NEXT: por %xmm5, %xmm1 +; SSSE3-NEXT: packssdw %xmm3, %xmm1 +; SSSE3-NEXT: packssdw %xmm1, %xmm0 +; SSSE3-NEXT: retq +; +; SSE41-LABEL: trunc_ssat_v8i64_v8i16: +; SSE41: # %bb.0: +; SSE41-NEXT: movdqa %xmm0, %xmm10 +; SSE41-NEXT: movapd {{.*#+}} xmm11 = [32767,32767] +; SSE41-NEXT: movdqa {{.*#+}} xmm5 = [2147483648,2147483648] +; SSE41-NEXT: movdqa %xmm2, %xmm0 +; SSE41-NEXT: pxor %xmm5, %xmm0 +; SSE41-NEXT: movdqa {{.*#+}} xmm4 = [2147516415,2147516415] +; SSE41-NEXT: movdqa %xmm4, %xmm7 +; SSE41-NEXT: pcmpeqd %xmm0, %xmm7 +; SSE41-NEXT: movdqa %xmm4, %xmm6 +; SSE41-NEXT: pcmpgtd %xmm0, %xmm6 +; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm6[0,0,2,2] +; SSE41-NEXT: pand %xmm7, %xmm0 +; SSE41-NEXT: por %xmm6, %xmm0 +; SSE41-NEXT: movapd %xmm11, %xmm8 +; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm8 +; SSE41-NEXT: movdqa %xmm3, %xmm0 +; SSE41-NEXT: pxor %xmm5, %xmm0 +; SSE41-NEXT: movdqa %xmm4, %xmm2 +; SSE41-NEXT: pcmpeqd %xmm0, %xmm2 +; SSE41-NEXT: movdqa %xmm4, %xmm6 +; SSE41-NEXT: pcmpgtd %xmm0, %xmm6 +; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm6[0,0,2,2] +; SSE41-NEXT: pand %xmm2, %xmm0 +; SSE41-NEXT: por %xmm6, %xmm0 +; SSE41-NEXT: movapd %xmm11, %xmm9 +; SSE41-NEXT: blendvpd %xmm0, %xmm3, %xmm9 +; SSE41-NEXT: movdqa %xmm10, %xmm0 +; SSE41-NEXT: pxor %xmm5, %xmm0 +; SSE41-NEXT: movdqa %xmm4, %xmm2 +; SSE41-NEXT: pcmpeqd %xmm0, %xmm2 +; SSE41-NEXT: movdqa %xmm4, %xmm3 +; SSE41-NEXT: pcmpgtd %xmm0, %xmm3 +; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2] +; SSE41-NEXT: pand %xmm2, %xmm0 +; SSE41-NEXT: por %xmm3, %xmm0 +; SSE41-NEXT: movapd %xmm11, %xmm2 +; SSE41-NEXT: blendvpd %xmm0, %xmm10, %xmm2 +; SSE41-NEXT: movdqa %xmm1, %xmm0 +; SSE41-NEXT: pxor %xmm5, %xmm0 +; SSE41-NEXT: movdqa %xmm4, %xmm3 +; SSE41-NEXT: pcmpeqd %xmm0, %xmm3 +; SSE41-NEXT: pcmpgtd %xmm0, %xmm4 +; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm4[0,0,2,2] +; SSE41-NEXT: pand %xmm3, %xmm0 +; SSE41-NEXT: por %xmm4, %xmm0 +; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm11 +; SSE41-NEXT: movapd {{.*#+}} xmm3 = [18446744073709518848,18446744073709518848] +; SSE41-NEXT: movapd %xmm11, %xmm1 +; SSE41-NEXT: xorpd %xmm5, %xmm1 +; SSE41-NEXT: movdqa {{.*#+}} xmm4 = [18446744071562035200,18446744071562035200] +; SSE41-NEXT: movapd %xmm1, %xmm6 +; SSE41-NEXT: pcmpeqd %xmm4, %xmm6 +; SSE41-NEXT: pcmpgtd %xmm4, %xmm1 +; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,0,2,2] +; SSE41-NEXT: pand %xmm6, %xmm0 +; SSE41-NEXT: por %xmm1, %xmm0 +; SSE41-NEXT: movapd %xmm3, %xmm6 +; SSE41-NEXT: blendvpd %xmm0, %xmm11, %xmm6 +; SSE41-NEXT: movapd %xmm2, %xmm1 +; SSE41-NEXT: xorpd %xmm5, %xmm1 +; SSE41-NEXT: movapd %xmm1, %xmm7 +; SSE41-NEXT: pcmpeqd %xmm4, %xmm7 +; SSE41-NEXT: pcmpgtd %xmm4, %xmm1 +; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,0,2,2] +; SSE41-NEXT: pand %xmm7, %xmm0 +; SSE41-NEXT: por %xmm1, %xmm0 +; SSE41-NEXT: movapd %xmm3, %xmm1 +; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm1 +; SSE41-NEXT: packssdw %xmm6, %xmm1 +; SSE41-NEXT: movapd %xmm9, %xmm2 +; SSE41-NEXT: xorpd %xmm5, %xmm2 +; SSE41-NEXT: movapd %xmm2, %xmm6 +; SSE41-NEXT: pcmpeqd %xmm4, %xmm6 +; SSE41-NEXT: pcmpgtd %xmm4, %xmm2 +; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,0,2,2] +; SSE41-NEXT: pand %xmm6, %xmm0 +; SSE41-NEXT: por %xmm2, %xmm0 +; SSE41-NEXT: movapd %xmm3, %xmm2 +; SSE41-NEXT: blendvpd %xmm0, %xmm9, %xmm2 +; SSE41-NEXT: xorpd %xmm8, %xmm5 +; SSE41-NEXT: movapd %xmm5, %xmm6 +; SSE41-NEXT: pcmpeqd %xmm4, %xmm6 +; SSE41-NEXT: pcmpgtd %xmm4, %xmm5 +; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm5[0,0,2,2] +; SSE41-NEXT: pand %xmm6, %xmm0 +; SSE41-NEXT: por %xmm5, %xmm0 +; SSE41-NEXT: blendvpd %xmm0, %xmm8, %xmm3 +; SSE41-NEXT: packssdw %xmm2, %xmm3 +; SSE41-NEXT: packssdw %xmm3, %xmm1 +; SSE41-NEXT: movdqa %xmm1, %xmm0 +; SSE41-NEXT: retq +; +; AVX1-LABEL: trunc_ssat_v8i64_v8i16: +; AVX1: # %bb.0: +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [32767,32767] +; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm8 +; AVX1-NEXT: vpcmpgtq %xmm1, %xmm3, %xmm5 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm6 +; AVX1-NEXT: vpcmpgtq %xmm6, %xmm3, %xmm7 +; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm4 +; AVX1-NEXT: vblendvpd %xmm4, %xmm0, %xmm3, %xmm0 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [18446744073709518848,18446744073709518848] +; AVX1-NEXT: vpcmpgtq %xmm4, %xmm0, %xmm9 +; AVX1-NEXT: vblendvpd %xmm7, %xmm6, %xmm3, %xmm6 +; AVX1-NEXT: vpcmpgtq %xmm4, %xmm6, %xmm7 +; AVX1-NEXT: vblendvpd %xmm5, %xmm1, %xmm3, %xmm1 +; AVX1-NEXT: vpcmpgtq %xmm4, %xmm1, %xmm5 +; AVX1-NEXT: vblendvpd %xmm8, %xmm2, %xmm3, %xmm2 +; AVX1-NEXT: vpcmpgtq %xmm4, %xmm2, %xmm3 +; AVX1-NEXT: vblendvpd %xmm3, %xmm2, %xmm4, %xmm2 +; AVX1-NEXT: vblendvpd %xmm5, %xmm1, %xmm4, %xmm1 +; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 +; AVX1-NEXT: vblendvpd %xmm7, %xmm6, %xmm4, %xmm2 +; AVX1-NEXT: vblendvpd %xmm9, %xmm0, %xmm4, %xmm0 +; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-LABEL: trunc_ssat_v8i64_v8i16: +; AVX2: # %bb.0: +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [32767,32767,32767,32767] +; AVX2-NEXT: vpcmpgtq %ymm0, %ymm2, %ymm3 +; AVX2-NEXT: vblendvpd %ymm3, %ymm0, %ymm2, %ymm0 +; AVX2-NEXT: vpcmpgtq %ymm1, %ymm2, %ymm3 +; AVX2-NEXT: vblendvpd %ymm3, %ymm1, %ymm2, %ymm1 +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [18446744073709518848,18446744073709518848,18446744073709518848,18446744073709518848] +; AVX2-NEXT: vpcmpgtq %ymm2, %ymm1, %ymm3 +; AVX2-NEXT: vblendvpd %ymm3, %ymm1, %ymm2, %ymm1 +; AVX2-NEXT: vpcmpgtq %ymm2, %ymm0, %ymm3 +; AVX2-NEXT: vblendvpd %ymm3, %ymm0, %ymm2, %ymm0 +; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] +; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq +; +; AVX512-LABEL: trunc_ssat_v8i64_v8i16: +; AVX512: # %bb.0: +; AVX512-NEXT: vpmovsqw %zmm0, %xmm0 +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: retq + %1 = icmp slt <8 x i64> %a0, + %2 = select <8 x i1> %1, <8 x i64> %a0, <8 x i64> + %3 = icmp sgt <8 x i64> %2, + %4 = select <8 x i1> %3, <8 x i64> %2, <8 x i64> + %5 = trunc <8 x i64> %4 to <8 x i16> + ret <8 x i16> %5 +} + +define <4 x i16> @trunc_ssat_v4i32_v4i16(<4 x i32> %a0) { +; SSE-LABEL: trunc_ssat_v4i32_v4i16: +; SSE: # %bb.0: +; SSE-NEXT: packssdw %xmm0, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: trunc_ssat_v4i32_v4i16: +; AVX: # %bb.0: +; AVX-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 +; AVX-NEXT: retq +; +; AVX512F-LABEL: trunc_ssat_v4i32_v4i16: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: trunc_ssat_v4i32_v4i16: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vpminsd {{.*}}(%rip){1to4}, %xmm0, %xmm0 +; AVX512VL-NEXT: vpmaxsd {{.*}}(%rip){1to4}, %xmm0, %xmm0 +; AVX512VL-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 +; AVX512VL-NEXT: retq +; +; AVX512BW-LABEL: trunc_ssat_v4i32_v4i16: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 +; AVX512BW-NEXT: retq +; +; AVX512BWVL-LABEL: trunc_ssat_v4i32_v4i16: +; AVX512BWVL: # %bb.0: +; AVX512BWVL-NEXT: vpminsd {{.*}}(%rip){1to4}, %xmm0, %xmm0 +; AVX512BWVL-NEXT: vpmaxsd {{.*}}(%rip){1to4}, %xmm0, %xmm0 +; AVX512BWVL-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 +; AVX512BWVL-NEXT: retq + %1 = icmp slt <4 x i32> %a0, + %2 = select <4 x i1> %1, <4 x i32> %a0, <4 x i32> + %3 = icmp sgt <4 x i32> %2, + %4 = select <4 x i1> %3, <4 x i32> %2, <4 x i32> + %5 = trunc <4 x i32> %4 to <4 x i16> + ret <4 x i16> %5 +} + +define void @trunc_ssat_v4i32_v4i16_store(<4 x i32> %a0, <4 x i16> *%p1) { +; SSE-LABEL: trunc_ssat_v4i32_v4i16_store: +; SSE: # %bb.0: +; SSE-NEXT: packssdw %xmm0, %xmm0 +; SSE-NEXT: movq %xmm0, (%rdi) +; SSE-NEXT: retq +; +; AVX-LABEL: trunc_ssat_v4i32_v4i16_store: +; AVX: # %bb.0: +; AVX-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 +; AVX-NEXT: vmovq %xmm0, (%rdi) +; AVX-NEXT: retq +; +; AVX512F-LABEL: trunc_ssat_v4i32_v4i16_store: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 +; AVX512F-NEXT: vmovq %xmm0, (%rdi) +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: trunc_ssat_v4i32_v4i16_store: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vpmovsdw %xmm0, (%rdi) +; AVX512VL-NEXT: retq +; +; AVX512BW-LABEL: trunc_ssat_v4i32_v4i16_store: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 +; AVX512BW-NEXT: vmovq %xmm0, (%rdi) +; AVX512BW-NEXT: retq +; +; AVX512BWVL-LABEL: trunc_ssat_v4i32_v4i16_store: +; AVX512BWVL: # %bb.0: +; AVX512BWVL-NEXT: vpmovsdw %xmm0, (%rdi) +; AVX512BWVL-NEXT: retq + %1 = icmp slt <4 x i32> %a0, + %2 = select <4 x i1> %1, <4 x i32> %a0, <4 x i32> + %3 = icmp sgt <4 x i32> %2, + %4 = select <4 x i1> %3, <4 x i32> %2, <4 x i32> + %5 = trunc <4 x i32> %4 to <4 x i16> + store <4 x i16> %5, <4 x i16> *%p1 + ret void +} + +define <8 x i16> @trunc_ssat_v8i32_v8i16(<8 x i32> %a0) { +; SSE-LABEL: trunc_ssat_v8i32_v8i16: +; SSE: # %bb.0: +; SSE-NEXT: packssdw %xmm1, %xmm0 +; SSE-NEXT: retq +; +; AVX1-LABEL: trunc_ssat_v8i32_v8i16: +; AVX1: # %bb.0: +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX1-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-LABEL: trunc_ssat_v8i32_v8i16: +; AVX2: # %bb.0: +; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq +; +; AVX512F-LABEL: trunc_ssat_v8i32_v8i16: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX512F-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: trunc_ssat_v8i32_v8i16: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vpmovsdw %ymm0, %xmm0 +; AVX512VL-NEXT: vzeroupper +; AVX512VL-NEXT: retq +; +; AVX512BW-LABEL: trunc_ssat_v8i32_v8i16: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX512BW-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 +; AVX512BW-NEXT: vzeroupper +; AVX512BW-NEXT: retq +; +; AVX512BWVL-LABEL: trunc_ssat_v8i32_v8i16: +; AVX512BWVL: # %bb.0: +; AVX512BWVL-NEXT: vpmovsdw %ymm0, %xmm0 +; AVX512BWVL-NEXT: vzeroupper +; AVX512BWVL-NEXT: retq + %1 = icmp slt <8 x i32> %a0, + %2 = select <8 x i1> %1, <8 x i32> %a0, <8 x i32> + %3 = icmp sgt <8 x i32> %2, + %4 = select <8 x i1> %3, <8 x i32> %2, <8 x i32> + %5 = trunc <8 x i32> %4 to <8 x i16> + ret <8 x i16> %5 +} + +define <16 x i16> @trunc_ssat_v16i32_v16i16(<16 x i32> %a0) { +; SSE-LABEL: trunc_ssat_v16i32_v16i16: +; SSE: # %bb.0: +; SSE-NEXT: packssdw %xmm1, %xmm0 +; SSE-NEXT: packssdw %xmm3, %xmm2 +; SSE-NEXT: movdqa %xmm2, %xmm1 +; SSE-NEXT: retq +; +; AVX1-LABEL: trunc_ssat_v16i32_v16i16: +; AVX1: # %bb.0: +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 +; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: trunc_ssat_v16i32_v16i16: +; AVX2: # %bb.0: +; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] +; AVX2-NEXT: retq +; +; AVX512-LABEL: trunc_ssat_v16i32_v16i16: +; AVX512: # %bb.0: +; AVX512-NEXT: vpmovsdw %zmm0, %ymm0 +; AVX512-NEXT: retq + %1 = icmp slt <16 x i32> %a0, + %2 = select <16 x i1> %1, <16 x i32> %a0, <16 x i32> + %3 = icmp sgt <16 x i32> %2, + %4 = select <16 x i1> %3, <16 x i32> %2, <16 x i32> + %5 = trunc <16 x i32> %4 to <16 x i16> + ret <16 x i16> %5 +} + +; +; Signed saturation truncation to vXi8 +; + +define <4 x i8> @trunc_ssat_v4i64_v4i8(<4 x i64> %a0) { +; SSE2-LABEL: trunc_ssat_v4i64_v4i8: +; SSE2: # %bb.0: +; SSE2-NEXT: movdqa {{.*#+}} xmm8 = [127,127] +; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648] +; SSE2-NEXT: movdqa %xmm1, %xmm3 +; SSE2-NEXT: pxor %xmm2, %xmm3 +; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [2147483775,2147483775] +; SSE2-NEXT: movdqa %xmm5, %xmm6 +; SSE2-NEXT: pcmpgtd %xmm3, %xmm6 +; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2] +; SSE2-NEXT: pcmpeqd %xmm5, %xmm3 +; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[1,1,3,3] +; SSE2-NEXT: pand %xmm7, %xmm4 +; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm6[1,1,3,3] +; SSE2-NEXT: por %xmm4, %xmm3 +; SSE2-NEXT: pand %xmm3, %xmm1 +; SSE2-NEXT: pandn %xmm8, %xmm3 +; SSE2-NEXT: por %xmm1, %xmm3 +; SSE2-NEXT: movdqa %xmm0, %xmm1 +; SSE2-NEXT: pxor %xmm2, %xmm1 +; SSE2-NEXT: movdqa %xmm5, %xmm4 +; SSE2-NEXT: pcmpgtd %xmm1, %xmm4 +; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2] +; SSE2-NEXT: pcmpeqd %xmm5, %xmm1 +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] +; SSE2-NEXT: pand %xmm6, %xmm1 +; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] +; SSE2-NEXT: por %xmm1, %xmm4 +; SSE2-NEXT: pand %xmm4, %xmm0 +; SSE2-NEXT: pandn %xmm8, %xmm4 +; SSE2-NEXT: por %xmm0, %xmm4 +; SSE2-NEXT: movdqa {{.*#+}} xmm8 = [18446744073709551488,18446744073709551488] +; SSE2-NEXT: movdqa %xmm4, %xmm0 +; SSE2-NEXT: pxor %xmm2, %xmm0 +; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [18446744071562067840,18446744071562067840] +; SSE2-NEXT: movdqa %xmm0, %xmm6 +; SSE2-NEXT: pcmpgtd %xmm5, %xmm6 +; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2] +; SSE2-NEXT: pcmpeqd %xmm5, %xmm0 +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] +; SSE2-NEXT: pand %xmm7, %xmm1 +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm6[1,1,3,3] +; SSE2-NEXT: por %xmm1, %xmm0 +; SSE2-NEXT: pand %xmm0, %xmm4 +; SSE2-NEXT: pandn %xmm8, %xmm0 +; SSE2-NEXT: por %xmm4, %xmm0 +; SSE2-NEXT: pxor %xmm3, %xmm2 +; SSE2-NEXT: movdqa %xmm2, %xmm1 +; SSE2-NEXT: pcmpgtd %xmm5, %xmm1 +; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm1[0,0,2,2] +; SSE2-NEXT: pcmpeqd %xmm5, %xmm2 +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] +; SSE2-NEXT: pand %xmm4, %xmm2 +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] +; SSE2-NEXT: por %xmm2, %xmm1 +; SSE2-NEXT: pand %xmm1, %xmm3 +; SSE2-NEXT: pandn %xmm8, %xmm1 +; SSE2-NEXT: por %xmm3, %xmm1 +; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0] +; SSE2-NEXT: pand %xmm2, %xmm1 +; SSE2-NEXT: pand %xmm2, %xmm0 +; SSE2-NEXT: packuswb %xmm1, %xmm0 +; SSE2-NEXT: packuswb %xmm0, %xmm0 +; SSE2-NEXT: packuswb %xmm0, %xmm0 +; SSE2-NEXT: retq +; +; SSSE3-LABEL: trunc_ssat_v4i64_v4i8: +; SSSE3: # %bb.0: +; SSSE3-NEXT: movdqa {{.*#+}} xmm8 = [127,127] +; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648] +; SSSE3-NEXT: movdqa %xmm1, %xmm3 +; SSSE3-NEXT: pxor %xmm2, %xmm3 +; SSSE3-NEXT: movdqa {{.*#+}} xmm5 = [2147483775,2147483775] +; SSSE3-NEXT: movdqa %xmm5, %xmm6 +; SSSE3-NEXT: pcmpgtd %xmm3, %xmm6 +; SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2] +; SSSE3-NEXT: pcmpeqd %xmm5, %xmm3 +; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm3[1,1,3,3] +; SSSE3-NEXT: pand %xmm7, %xmm4 +; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm6[1,1,3,3] +; SSSE3-NEXT: por %xmm4, %xmm3 +; SSSE3-NEXT: pand %xmm3, %xmm1 +; SSSE3-NEXT: pandn %xmm8, %xmm3 +; SSSE3-NEXT: por %xmm1, %xmm3 +; SSSE3-NEXT: movdqa %xmm0, %xmm1 +; SSSE3-NEXT: pxor %xmm2, %xmm1 +; SSSE3-NEXT: movdqa %xmm5, %xmm4 +; SSSE3-NEXT: pcmpgtd %xmm1, %xmm4 +; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2] +; SSSE3-NEXT: pcmpeqd %xmm5, %xmm1 +; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] +; SSSE3-NEXT: pand %xmm6, %xmm1 +; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] +; SSSE3-NEXT: por %xmm1, %xmm4 +; SSSE3-NEXT: pand %xmm4, %xmm0 +; SSSE3-NEXT: pandn %xmm8, %xmm4 +; SSSE3-NEXT: por %xmm0, %xmm4 +; SSSE3-NEXT: movdqa {{.*#+}} xmm8 = [18446744073709551488,18446744073709551488] +; SSSE3-NEXT: movdqa %xmm4, %xmm0 +; SSSE3-NEXT: pxor %xmm2, %xmm0 +; SSSE3-NEXT: movdqa {{.*#+}} xmm5 = [18446744071562067840,18446744071562067840] +; SSSE3-NEXT: movdqa %xmm0, %xmm6 +; SSSE3-NEXT: pcmpgtd %xmm5, %xmm6 +; SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2] +; SSSE3-NEXT: pcmpeqd %xmm5, %xmm0 +; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] +; SSSE3-NEXT: pand %xmm7, %xmm1 +; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm6[1,1,3,3] +; SSSE3-NEXT: por %xmm1, %xmm0 +; SSSE3-NEXT: pand %xmm0, %xmm4 +; SSSE3-NEXT: pandn %xmm8, %xmm0 +; SSSE3-NEXT: por %xmm4, %xmm0 +; SSSE3-NEXT: pxor %xmm3, %xmm2 +; SSSE3-NEXT: movdqa %xmm2, %xmm1 +; SSSE3-NEXT: pcmpgtd %xmm5, %xmm1 +; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm1[0,0,2,2] +; SSSE3-NEXT: pcmpeqd %xmm5, %xmm2 +; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] +; SSSE3-NEXT: pand %xmm4, %xmm2 +; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] +; SSSE3-NEXT: por %xmm2, %xmm1 +; SSSE3-NEXT: pand %xmm1, %xmm3 +; SSSE3-NEXT: pandn %xmm8, %xmm1 +; SSSE3-NEXT: por %xmm3, %xmm1 +; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = <0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u> +; SSSE3-NEXT: pshufb %xmm2, %xmm1 +; SSSE3-NEXT: pshufb %xmm2, %xmm0 +; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; SSSE3-NEXT: retq +; +; SSE41-LABEL: trunc_ssat_v4i64_v4i8: +; SSE41: # %bb.0: +; SSE41-NEXT: movdqa %xmm0, %xmm2 +; SSE41-NEXT: movapd {{.*#+}} xmm4 = [127,127] +; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [2147483648,2147483648] +; SSE41-NEXT: movdqa %xmm1, %xmm0 +; SSE41-NEXT: pxor %xmm3, %xmm0 +; SSE41-NEXT: movdqa {{.*#+}} xmm6 = [2147483775,2147483775] +; SSE41-NEXT: movdqa %xmm6, %xmm5 +; SSE41-NEXT: pcmpeqd %xmm0, %xmm5 +; SSE41-NEXT: movdqa %xmm6, %xmm7 +; SSE41-NEXT: pcmpgtd %xmm0, %xmm7 +; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm7[0,0,2,2] +; SSE41-NEXT: pand %xmm5, %xmm0 +; SSE41-NEXT: por %xmm7, %xmm0 +; SSE41-NEXT: movapd %xmm4, %xmm5 +; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm5 +; SSE41-NEXT: movdqa %xmm2, %xmm0 +; SSE41-NEXT: pxor %xmm3, %xmm0 +; SSE41-NEXT: movdqa %xmm6, %xmm1 +; SSE41-NEXT: pcmpeqd %xmm0, %xmm1 +; SSE41-NEXT: pcmpgtd %xmm0, %xmm6 +; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm6[0,0,2,2] +; SSE41-NEXT: pand %xmm1, %xmm0 +; SSE41-NEXT: por %xmm6, %xmm0 +; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm4 +; SSE41-NEXT: movapd {{.*#+}} xmm2 = [18446744073709551488,18446744073709551488] +; SSE41-NEXT: movapd %xmm4, %xmm1 +; SSE41-NEXT: xorpd %xmm3, %xmm1 +; SSE41-NEXT: movdqa {{.*#+}} xmm6 = [18446744071562067840,18446744071562067840] +; SSE41-NEXT: movapd %xmm1, %xmm7 +; SSE41-NEXT: pcmpeqd %xmm6, %xmm7 +; SSE41-NEXT: pcmpgtd %xmm6, %xmm1 +; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,0,2,2] +; SSE41-NEXT: pand %xmm7, %xmm0 +; SSE41-NEXT: por %xmm1, %xmm0 +; SSE41-NEXT: movapd %xmm2, %xmm1 +; SSE41-NEXT: blendvpd %xmm0, %xmm4, %xmm1 +; SSE41-NEXT: xorpd %xmm5, %xmm3 +; SSE41-NEXT: movapd %xmm3, %xmm4 +; SSE41-NEXT: pcmpeqd %xmm6, %xmm4 +; SSE41-NEXT: pcmpgtd %xmm6, %xmm3 +; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2] +; SSE41-NEXT: pand %xmm4, %xmm0 +; SSE41-NEXT: por %xmm3, %xmm0 +; SSE41-NEXT: blendvpd %xmm0, %xmm5, %xmm2 +; SSE41-NEXT: movdqa {{.*#+}} xmm0 = <0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u> +; SSE41-NEXT: pshufb %xmm0, %xmm2 +; SSE41-NEXT: pshufb %xmm0, %xmm1 +; SSE41-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] +; SSE41-NEXT: movdqa %xmm1, %xmm0 +; SSE41-NEXT: retq +; +; AVX1-LABEL: trunc_ssat_v4i64_v4i8: +; AVX1: # %bb.0: +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [127,127] +; AVX1-NEXT: vpcmpgtq %xmm1, %xmm2, %xmm3 +; AVX1-NEXT: vpcmpgtq %xmm0, %xmm2, %xmm4 +; AVX1-NEXT: vblendvpd %xmm4, %xmm0, %xmm2, %xmm0 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [18446744073709551488,18446744073709551488] +; AVX1-NEXT: vpcmpgtq %xmm4, %xmm0, %xmm5 +; AVX1-NEXT: vblendvpd %xmm3, %xmm1, %xmm2, %xmm1 +; AVX1-NEXT: vpcmpgtq %xmm4, %xmm1, %xmm2 +; AVX1-NEXT: vblendvpd %xmm2, %xmm1, %xmm4, %xmm1 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = <0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u> +; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 +; AVX1-NEXT: vblendvpd %xmm5, %xmm0, %xmm4, %xmm0 +; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-LABEL: trunc_ssat_v4i64_v4i8: +; AVX2: # %bb.0: +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [127,127,127,127] +; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2 +; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [18446744073709551488,18446744073709551488,18446744073709551488,18446744073709551488] +; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2 +; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 +; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = <0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u> +; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1 +; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq +; +; AVX512F-LABEL: trunc_ssat_v4i64_v4i8: +; AVX512F: # %bb.0: +; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512F-NEXT: vpminsq {{.*}}(%rip){1to8}, %zmm0, %zmm0 +; AVX512F-NEXT: vpmaxsq {{.*}}(%rip){1to8}, %zmm0, %zmm0 +; AVX512F-NEXT: vpmovqb %zmm0, %xmm0 +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: trunc_ssat_v4i64_v4i8: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vpmovsqb %ymm0, %xmm0 +; AVX512VL-NEXT: vzeroupper +; AVX512VL-NEXT: retq +; +; AVX512BW-LABEL: trunc_ssat_v4i64_v4i8: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512BW-NEXT: vpminsq {{.*}}(%rip){1to8}, %zmm0, %zmm0 +; AVX512BW-NEXT: vpmaxsq {{.*}}(%rip){1to8}, %zmm0, %zmm0 +; AVX512BW-NEXT: vpmovqb %zmm0, %xmm0 +; AVX512BW-NEXT: vzeroupper +; AVX512BW-NEXT: retq +; +; AVX512BWVL-LABEL: trunc_ssat_v4i64_v4i8: +; AVX512BWVL: # %bb.0: +; AVX512BWVL-NEXT: vpmovsqb %ymm0, %xmm0 +; AVX512BWVL-NEXT: vzeroupper +; AVX512BWVL-NEXT: retq + %1 = icmp slt <4 x i64> %a0, + %2 = select <4 x i1> %1, <4 x i64> %a0, <4 x i64> + %3 = icmp sgt <4 x i64> %2, + %4 = select <4 x i1> %3, <4 x i64> %2, <4 x i64> + %5 = trunc <4 x i64> %4 to <4 x i8> + ret <4 x i8> %5 +} + +define void @trunc_ssat_v4i64_v4i8_store(<4 x i64> %a0, <4 x i8> *%p1) { +; SSE2-LABEL: trunc_ssat_v4i64_v4i8_store: +; SSE2: # %bb.0: +; SSE2-NEXT: movdqa {{.*#+}} xmm8 = [127,127] +; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648] +; SSE2-NEXT: movdqa %xmm1, %xmm3 +; SSE2-NEXT: pxor %xmm2, %xmm3 +; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [2147483775,2147483775] +; SSE2-NEXT: movdqa %xmm5, %xmm6 +; SSE2-NEXT: pcmpgtd %xmm3, %xmm6 +; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2] +; SSE2-NEXT: pcmpeqd %xmm5, %xmm3 +; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[1,1,3,3] +; SSE2-NEXT: pand %xmm7, %xmm4 +; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm6[1,1,3,3] +; SSE2-NEXT: por %xmm4, %xmm3 +; SSE2-NEXT: pand %xmm3, %xmm1 +; SSE2-NEXT: pandn %xmm8, %xmm3 +; SSE2-NEXT: por %xmm1, %xmm3 +; SSE2-NEXT: movdqa %xmm0, %xmm1 +; SSE2-NEXT: pxor %xmm2, %xmm1 +; SSE2-NEXT: movdqa %xmm5, %xmm4 +; SSE2-NEXT: pcmpgtd %xmm1, %xmm4 +; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2] +; SSE2-NEXT: pcmpeqd %xmm5, %xmm1 +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] +; SSE2-NEXT: pand %xmm6, %xmm1 +; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] +; SSE2-NEXT: por %xmm1, %xmm4 +; SSE2-NEXT: pand %xmm4, %xmm0 +; SSE2-NEXT: pandn %xmm8, %xmm4 +; SSE2-NEXT: por %xmm0, %xmm4 +; SSE2-NEXT: movdqa {{.*#+}} xmm8 = [18446744073709551488,18446744073709551488] +; SSE2-NEXT: movdqa %xmm4, %xmm0 +; SSE2-NEXT: pxor %xmm2, %xmm0 +; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [18446744071562067840,18446744071562067840] +; SSE2-NEXT: movdqa %xmm0, %xmm6 +; SSE2-NEXT: pcmpgtd %xmm5, %xmm6 +; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2] +; SSE2-NEXT: pcmpeqd %xmm5, %xmm0 +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] +; SSE2-NEXT: pand %xmm7, %xmm1 +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm6[1,1,3,3] +; SSE2-NEXT: por %xmm1, %xmm0 +; SSE2-NEXT: pand %xmm0, %xmm4 +; SSE2-NEXT: pandn %xmm8, %xmm0 +; SSE2-NEXT: por %xmm4, %xmm0 +; SSE2-NEXT: pxor %xmm3, %xmm2 +; SSE2-NEXT: movdqa %xmm2, %xmm1 +; SSE2-NEXT: pcmpgtd %xmm5, %xmm1 +; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm1[0,0,2,2] +; SSE2-NEXT: pcmpeqd %xmm5, %xmm2 +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] +; SSE2-NEXT: pand %xmm4, %xmm2 +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] +; SSE2-NEXT: por %xmm2, %xmm1 +; SSE2-NEXT: pand %xmm1, %xmm3 +; SSE2-NEXT: pandn %xmm8, %xmm1 +; SSE2-NEXT: por %xmm3, %xmm1 +; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0] +; SSE2-NEXT: pand %xmm2, %xmm1 +; SSE2-NEXT: pand %xmm2, %xmm0 +; SSE2-NEXT: packuswb %xmm1, %xmm0 +; SSE2-NEXT: packuswb %xmm0, %xmm0 +; SSE2-NEXT: packuswb %xmm0, %xmm0 +; SSE2-NEXT: movd %xmm0, (%rdi) +; SSE2-NEXT: retq +; +; SSSE3-LABEL: trunc_ssat_v4i64_v4i8_store: +; SSSE3: # %bb.0: +; SSSE3-NEXT: movdqa {{.*#+}} xmm8 = [127,127] +; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648] +; SSSE3-NEXT: movdqa %xmm1, %xmm3 +; SSSE3-NEXT: pxor %xmm2, %xmm3 +; SSSE3-NEXT: movdqa {{.*#+}} xmm5 = [2147483775,2147483775] +; SSSE3-NEXT: movdqa %xmm5, %xmm6 +; SSSE3-NEXT: pcmpgtd %xmm3, %xmm6 +; SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2] +; SSSE3-NEXT: pcmpeqd %xmm5, %xmm3 +; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm3[1,1,3,3] +; SSSE3-NEXT: pand %xmm7, %xmm4 +; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm6[1,1,3,3] +; SSSE3-NEXT: por %xmm4, %xmm3 +; SSSE3-NEXT: pand %xmm3, %xmm1 +; SSSE3-NEXT: pandn %xmm8, %xmm3 +; SSSE3-NEXT: por %xmm1, %xmm3 +; SSSE3-NEXT: movdqa %xmm0, %xmm1 +; SSSE3-NEXT: pxor %xmm2, %xmm1 +; SSSE3-NEXT: movdqa %xmm5, %xmm4 +; SSSE3-NEXT: pcmpgtd %xmm1, %xmm4 +; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2] +; SSSE3-NEXT: pcmpeqd %xmm5, %xmm1 +; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] +; SSSE3-NEXT: pand %xmm6, %xmm1 +; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] +; SSSE3-NEXT: por %xmm1, %xmm4 +; SSSE3-NEXT: pand %xmm4, %xmm0 +; SSSE3-NEXT: pandn %xmm8, %xmm4 +; SSSE3-NEXT: por %xmm0, %xmm4 +; SSSE3-NEXT: movdqa {{.*#+}} xmm8 = [18446744073709551488,18446744073709551488] +; SSSE3-NEXT: movdqa %xmm4, %xmm1 +; SSSE3-NEXT: pxor %xmm2, %xmm1 +; SSSE3-NEXT: movdqa {{.*#+}} xmm5 = [18446744071562067840,18446744071562067840] +; SSSE3-NEXT: movdqa %xmm1, %xmm6 +; SSSE3-NEXT: pcmpgtd %xmm5, %xmm6 +; SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2] +; SSSE3-NEXT: pcmpeqd %xmm5, %xmm1 +; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3] +; SSSE3-NEXT: pand %xmm7, %xmm0 +; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm6[1,1,3,3] +; SSSE3-NEXT: por %xmm0, %xmm1 +; SSSE3-NEXT: pand %xmm1, %xmm4 +; SSSE3-NEXT: pandn %xmm8, %xmm1 +; SSSE3-NEXT: por %xmm4, %xmm1 +; SSSE3-NEXT: pxor %xmm3, %xmm2 +; SSSE3-NEXT: movdqa %xmm2, %xmm0 +; SSSE3-NEXT: pcmpgtd %xmm5, %xmm0 +; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm0[0,0,2,2] +; SSSE3-NEXT: pcmpeqd %xmm5, %xmm2 +; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] +; SSSE3-NEXT: pand %xmm4, %xmm2 +; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] +; SSSE3-NEXT: por %xmm2, %xmm0 +; SSSE3-NEXT: pand %xmm0, %xmm3 +; SSSE3-NEXT: pandn %xmm8, %xmm0 +; SSSE3-NEXT: por %xmm3, %xmm0 +; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = <0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u> +; SSSE3-NEXT: pshufb %xmm2, %xmm0 +; SSSE3-NEXT: pshufb %xmm2, %xmm1 +; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] +; SSSE3-NEXT: movd %xmm1, (%rdi) +; SSSE3-NEXT: retq +; +; SSE41-LABEL: trunc_ssat_v4i64_v4i8_store: +; SSE41: # %bb.0: +; SSE41-NEXT: movdqa %xmm0, %xmm2 +; SSE41-NEXT: movapd {{.*#+}} xmm4 = [127,127] +; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [2147483648,2147483648] +; SSE41-NEXT: movdqa %xmm1, %xmm0 +; SSE41-NEXT: pxor %xmm3, %xmm0 +; SSE41-NEXT: movdqa {{.*#+}} xmm6 = [2147483775,2147483775] +; SSE41-NEXT: movdqa %xmm6, %xmm5 +; SSE41-NEXT: pcmpeqd %xmm0, %xmm5 +; SSE41-NEXT: movdqa %xmm6, %xmm7 +; SSE41-NEXT: pcmpgtd %xmm0, %xmm7 +; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm7[0,0,2,2] +; SSE41-NEXT: pand %xmm5, %xmm0 +; SSE41-NEXT: por %xmm7, %xmm0 +; SSE41-NEXT: movapd %xmm4, %xmm5 +; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm5 +; SSE41-NEXT: movdqa %xmm2, %xmm0 +; SSE41-NEXT: pxor %xmm3, %xmm0 +; SSE41-NEXT: movdqa %xmm6, %xmm1 +; SSE41-NEXT: pcmpeqd %xmm0, %xmm1 +; SSE41-NEXT: pcmpgtd %xmm0, %xmm6 +; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm6[0,0,2,2] +; SSE41-NEXT: pand %xmm1, %xmm0 +; SSE41-NEXT: por %xmm6, %xmm0 +; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm4 +; SSE41-NEXT: movapd {{.*#+}} xmm1 = [18446744073709551488,18446744073709551488] +; SSE41-NEXT: movapd %xmm4, %xmm2 +; SSE41-NEXT: xorpd %xmm3, %xmm2 +; SSE41-NEXT: movdqa {{.*#+}} xmm6 = [18446744071562067840,18446744071562067840] +; SSE41-NEXT: movapd %xmm2, %xmm7 +; SSE41-NEXT: pcmpeqd %xmm6, %xmm7 +; SSE41-NEXT: pcmpgtd %xmm6, %xmm2 +; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,0,2,2] +; SSE41-NEXT: pand %xmm7, %xmm0 +; SSE41-NEXT: por %xmm2, %xmm0 +; SSE41-NEXT: movapd %xmm1, %xmm2 +; SSE41-NEXT: blendvpd %xmm0, %xmm4, %xmm2 +; SSE41-NEXT: xorpd %xmm5, %xmm3 +; SSE41-NEXT: movapd %xmm3, %xmm4 +; SSE41-NEXT: pcmpeqd %xmm6, %xmm4 +; SSE41-NEXT: pcmpgtd %xmm6, %xmm3 +; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2] +; SSE41-NEXT: pand %xmm4, %xmm0 +; SSE41-NEXT: por %xmm3, %xmm0 +; SSE41-NEXT: blendvpd %xmm0, %xmm5, %xmm1 +; SSE41-NEXT: movdqa {{.*#+}} xmm0 = <0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u> +; SSE41-NEXT: pshufb %xmm0, %xmm1 +; SSE41-NEXT: pshufb %xmm0, %xmm2 +; SSE41-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] +; SSE41-NEXT: movd %xmm2, (%rdi) +; SSE41-NEXT: retq ; -; AVX1-LABEL: trunc_ssat_v8i32_v8i16: +; AVX1-LABEL: trunc_ssat_v4i64_v4i8_store: ; AVX1: # %bb.0: ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX1-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [127,127] +; AVX1-NEXT: vpcmpgtq %xmm1, %xmm2, %xmm3 +; AVX1-NEXT: vpcmpgtq %xmm0, %xmm2, %xmm4 +; AVX1-NEXT: vblendvpd %xmm4, %xmm0, %xmm2, %xmm0 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [18446744073709551488,18446744073709551488] +; AVX1-NEXT: vpcmpgtq %xmm4, %xmm0, %xmm5 +; AVX1-NEXT: vblendvpd %xmm3, %xmm1, %xmm2, %xmm1 +; AVX1-NEXT: vpcmpgtq %xmm4, %xmm1, %xmm2 +; AVX1-NEXT: vblendvpd %xmm2, %xmm1, %xmm4, %xmm1 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = <0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u> +; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 +; AVX1-NEXT: vblendvpd %xmm5, %xmm0, %xmm4, %xmm0 +; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; AVX1-NEXT: vmovd %xmm0, (%rdi) ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq ; -; AVX2-LABEL: trunc_ssat_v8i32_v8i16: +; AVX2-LABEL: trunc_ssat_v4i64_v4i8_store: ; AVX2: # %bb.0: -; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [127,127,127,127] +; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2 +; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [18446744073709551488,18446744073709551488,18446744073709551488,18446744073709551488] +; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2 +; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 +; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = <0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u> +; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1 +; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; AVX2-NEXT: vmovd %xmm0, (%rdi) ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; -; AVX512F-LABEL: trunc_ssat_v8i32_v8i16: +; AVX512F-LABEL: trunc_ssat_v4i64_v4i8_store: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpbroadcastd {{.*#+}} ymm1 = [32767,32767,32767,32767,32767,32767,32767,32767] -; AVX512F-NEXT: vpminsd %ymm1, %ymm0, %ymm0 -; AVX512F-NEXT: vpbroadcastd {{.*#+}} ymm1 = [4294934528,4294934528,4294934528,4294934528,4294934528,4294934528,4294934528,4294934528] -; AVX512F-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 -; AVX512F-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 +; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512F-NEXT: vpminsq {{.*}}(%rip){1to8}, %zmm0, %zmm0 +; AVX512F-NEXT: vpmaxsq {{.*}}(%rip){1to8}, %zmm0, %zmm0 +; AVX512F-NEXT: vpmovqb %zmm0, %xmm0 +; AVX512F-NEXT: vmovd %xmm0, (%rdi) ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; -; AVX512VL-LABEL: trunc_ssat_v8i32_v8i16: +; AVX512VL-LABEL: trunc_ssat_v4i64_v4i8_store: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpmovsdw %ymm0, %xmm0 +; AVX512VL-NEXT: vpmovsqb %ymm0, (%rdi) ; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq ; -; AVX512BW-LABEL: trunc_ssat_v8i32_v8i16: +; AVX512BW-LABEL: trunc_ssat_v4i64_v4i8_store: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpbroadcastd {{.*#+}} ymm1 = [32767,32767,32767,32767,32767,32767,32767,32767] -; AVX512BW-NEXT: vpminsd %ymm1, %ymm0, %ymm0 -; AVX512BW-NEXT: vpbroadcastd {{.*#+}} ymm1 = [4294934528,4294934528,4294934528,4294934528,4294934528,4294934528,4294934528,4294934528] -; AVX512BW-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 -; AVX512BW-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 +; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512BW-NEXT: vpminsq {{.*}}(%rip){1to8}, %zmm0, %zmm0 +; AVX512BW-NEXT: vpmaxsq {{.*}}(%rip){1to8}, %zmm0, %zmm0 +; AVX512BW-NEXT: vpmovqb %zmm0, %xmm0 +; AVX512BW-NEXT: vmovd %xmm0, (%rdi) ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; -; AVX512BWVL-LABEL: trunc_ssat_v8i32_v8i16: +; AVX512BWVL-LABEL: trunc_ssat_v4i64_v4i8_store: ; AVX512BWVL: # %bb.0: -; AVX512BWVL-NEXT: vpmovsdw %ymm0, %xmm0 +; AVX512BWVL-NEXT: vpmovsqb %ymm0, (%rdi) ; AVX512BWVL-NEXT: vzeroupper ; AVX512BWVL-NEXT: retq - %1 = icmp slt <8 x i32> %a0, - %2 = select <8 x i1> %1, <8 x i32> %a0, <8 x i32> - %3 = icmp sgt <8 x i32> %2, - %4 = select <8 x i1> %3, <8 x i32> %2, <8 x i32> - %5 = trunc <8 x i32> %4 to <8 x i16> - ret <8 x i16> %5 -} - -define <16 x i16> @trunc_ssat_v16i32_v16i16(<16 x i32> %a0) { -; SSE-LABEL: trunc_ssat_v16i32_v16i16: -; SSE: # %bb.0: -; SSE-NEXT: packssdw %xmm1, %xmm0 -; SSE-NEXT: packssdw %xmm3, %xmm2 -; SSE-NEXT: movdqa %xmm2, %xmm1 -; SSE-NEXT: retq -; -; AVX1-LABEL: trunc_ssat_v16i32_v16i16: -; AVX1: # %bb.0: -; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 -; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 -; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: trunc_ssat_v16i32_v16i16: -; AVX2: # %bb.0: -; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] -; AVX2-NEXT: retq -; -; AVX512-LABEL: trunc_ssat_v16i32_v16i16: -; AVX512: # %bb.0: -; AVX512-NEXT: vpmovsdw %zmm0, %ymm0 -; AVX512-NEXT: retq - %1 = icmp slt <16 x i32> %a0, - %2 = select <16 x i1> %1, <16 x i32> %a0, <16 x i32> - %3 = icmp sgt <16 x i32> %2, - %4 = select <16 x i1> %3, <16 x i32> %2, <16 x i32> - %5 = trunc <16 x i32> %4 to <16 x i16> - ret <16 x i16> %5 + %1 = icmp slt <4 x i64> %a0, + %2 = select <4 x i1> %1, <4 x i64> %a0, <4 x i64> + %3 = icmp sgt <4 x i64> %2, + %4 = select <4 x i1> %3, <4 x i64> %2, <4 x i64> + %5 = trunc <4 x i64> %4 to <4 x i8> + store <4 x i8> %5, <4 x i8> *%p1 + ret void } -; -; Signed saturation truncation to v16i8 -; - define <8 x i8> @trunc_ssat_v8i64_v8i8(<8 x i64> %a0) { ; SSE2-LABEL: trunc_ssat_v8i64_v8i8: ; SSE2: # %bb.0: @@ -2770,6 +3927,208 @@ define <16 x i8> @trunc_ssat_v16i64_v16i8(<16 x i64> %a0) { ret <16 x i8> %5 } +define <4 x i8> @trunc_ssat_v4i32_v4i8(<4 x i32> %a0) { +; SSE2-LABEL: trunc_ssat_v4i32_v4i8: +; SSE2: # %bb.0: +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [127,127,127,127] +; SSE2-NEXT: movdqa %xmm1, %xmm2 +; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 +; SSE2-NEXT: pand %xmm2, %xmm0 +; SSE2-NEXT: pandn %xmm1, %xmm2 +; SSE2-NEXT: por %xmm0, %xmm2 +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [4294967168,4294967168,4294967168,4294967168] +; SSE2-NEXT: movdqa %xmm2, %xmm0 +; SSE2-NEXT: pcmpgtd %xmm1, %xmm0 +; SSE2-NEXT: pand %xmm0, %xmm2 +; SSE2-NEXT: pandn %xmm1, %xmm0 +; SSE2-NEXT: por %xmm2, %xmm0 +; SSE2-NEXT: pand {{.*}}(%rip), %xmm0 +; SSE2-NEXT: packuswb %xmm0, %xmm0 +; SSE2-NEXT: packuswb %xmm0, %xmm0 +; SSE2-NEXT: retq +; +; SSSE3-LABEL: trunc_ssat_v4i32_v4i8: +; SSSE3: # %bb.0: +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [127,127,127,127] +; SSSE3-NEXT: movdqa %xmm1, %xmm2 +; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 +; SSSE3-NEXT: pand %xmm2, %xmm0 +; SSSE3-NEXT: pandn %xmm1, %xmm2 +; SSSE3-NEXT: por %xmm0, %xmm2 +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [4294967168,4294967168,4294967168,4294967168] +; SSSE3-NEXT: movdqa %xmm2, %xmm0 +; SSSE3-NEXT: pcmpgtd %xmm1, %xmm0 +; SSSE3-NEXT: pand %xmm0, %xmm2 +; SSSE3-NEXT: pandn %xmm1, %xmm0 +; SSSE3-NEXT: por %xmm2, %xmm0 +; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] +; SSSE3-NEXT: retq +; +; SSE41-LABEL: trunc_ssat_v4i32_v4i8: +; SSE41: # %bb.0: +; SSE41-NEXT: pminsd {{.*}}(%rip), %xmm0 +; SSE41-NEXT: pmaxsd {{.*}}(%rip), %xmm0 +; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] +; SSE41-NEXT: retq +; +; AVX1-LABEL: trunc_ssat_v4i32_v4i8: +; AVX1: # %bb.0: +; AVX1-NEXT: vpminsd {{.*}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vpmaxsd {{.*}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX1-NEXT: retq +; +; AVX2-LABEL: trunc_ssat_v4i32_v4i8: +; AVX2: # %bb.0: +; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [127,127,127,127] +; AVX2-NEXT: vpminsd %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4294967168,4294967168,4294967168,4294967168] +; AVX2-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX2-NEXT: retq +; +; AVX512F-LABEL: trunc_ssat_v4i32_v4i8: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpbroadcastd {{.*#+}} xmm1 = [127,127,127,127] +; AVX512F-NEXT: vpminsd %xmm1, %xmm0, %xmm0 +; AVX512F-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4294967168,4294967168,4294967168,4294967168] +; AVX512F-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 +; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: trunc_ssat_v4i32_v4i8: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vpminsd {{.*}}(%rip){1to4}, %xmm0, %xmm0 +; AVX512VL-NEXT: vpmaxsd {{.*}}(%rip){1to4}, %xmm0, %xmm0 +; AVX512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX512VL-NEXT: retq +; +; AVX512BW-LABEL: trunc_ssat_v4i32_v4i8: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: vpbroadcastd {{.*#+}} xmm1 = [127,127,127,127] +; AVX512BW-NEXT: vpminsd %xmm1, %xmm0, %xmm0 +; AVX512BW-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4294967168,4294967168,4294967168,4294967168] +; AVX512BW-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 +; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX512BW-NEXT: retq +; +; AVX512BWVL-LABEL: trunc_ssat_v4i32_v4i8: +; AVX512BWVL: # %bb.0: +; AVX512BWVL-NEXT: vpminsd {{.*}}(%rip){1to4}, %xmm0, %xmm0 +; AVX512BWVL-NEXT: vpmaxsd {{.*}}(%rip){1to4}, %xmm0, %xmm0 +; AVX512BWVL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX512BWVL-NEXT: retq + %1 = icmp slt <4 x i32> %a0, + %2 = select <4 x i1> %1, <4 x i32> %a0, <4 x i32> + %3 = icmp sgt <4 x i32> %2, + %4 = select <4 x i1> %3, <4 x i32> %2, <4 x i32> + %5 = trunc <4 x i32> %4 to <4 x i8> + ret <4 x i8> %5 +} + +define void @trunc_ssat_v4i32_v4i8_store(<4 x i32> %a0, <4 x i8> *%p1) { +; SSE2-LABEL: trunc_ssat_v4i32_v4i8_store: +; SSE2: # %bb.0: +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [127,127,127,127] +; SSE2-NEXT: movdqa %xmm1, %xmm2 +; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 +; SSE2-NEXT: pand %xmm2, %xmm0 +; SSE2-NEXT: pandn %xmm1, %xmm2 +; SSE2-NEXT: por %xmm0, %xmm2 +; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [4294967168,4294967168,4294967168,4294967168] +; SSE2-NEXT: movdqa %xmm2, %xmm1 +; SSE2-NEXT: pcmpgtd %xmm0, %xmm1 +; SSE2-NEXT: pand %xmm1, %xmm2 +; SSE2-NEXT: pandn %xmm0, %xmm1 +; SSE2-NEXT: por %xmm2, %xmm1 +; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 +; SSE2-NEXT: packuswb %xmm0, %xmm1 +; SSE2-NEXT: packuswb %xmm0, %xmm1 +; SSE2-NEXT: movd %xmm1, (%rdi) +; SSE2-NEXT: retq +; +; SSSE3-LABEL: trunc_ssat_v4i32_v4i8_store: +; SSSE3: # %bb.0: +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [127,127,127,127] +; SSSE3-NEXT: movdqa %xmm1, %xmm2 +; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 +; SSSE3-NEXT: pand %xmm2, %xmm0 +; SSSE3-NEXT: pandn %xmm1, %xmm2 +; SSSE3-NEXT: por %xmm0, %xmm2 +; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [4294967168,4294967168,4294967168,4294967168] +; SSSE3-NEXT: movdqa %xmm2, %xmm1 +; SSSE3-NEXT: pcmpgtd %xmm0, %xmm1 +; SSSE3-NEXT: pand %xmm1, %xmm2 +; SSSE3-NEXT: pandn %xmm0, %xmm1 +; SSSE3-NEXT: por %xmm2, %xmm1 +; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] +; SSSE3-NEXT: movd %xmm1, (%rdi) +; SSSE3-NEXT: retq +; +; SSE41-LABEL: trunc_ssat_v4i32_v4i8_store: +; SSE41: # %bb.0: +; SSE41-NEXT: pminsd {{.*}}(%rip), %xmm0 +; SSE41-NEXT: pmaxsd {{.*}}(%rip), %xmm0 +; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] +; SSE41-NEXT: movd %xmm0, (%rdi) +; SSE41-NEXT: retq +; +; AVX1-LABEL: trunc_ssat_v4i32_v4i8_store: +; AVX1: # %bb.0: +; AVX1-NEXT: vpminsd {{.*}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vpmaxsd {{.*}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX1-NEXT: vmovd %xmm0, (%rdi) +; AVX1-NEXT: retq +; +; AVX2-LABEL: trunc_ssat_v4i32_v4i8_store: +; AVX2: # %bb.0: +; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [127,127,127,127] +; AVX2-NEXT: vpminsd %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4294967168,4294967168,4294967168,4294967168] +; AVX2-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX2-NEXT: vmovd %xmm0, (%rdi) +; AVX2-NEXT: retq +; +; AVX512F-LABEL: trunc_ssat_v4i32_v4i8_store: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpbroadcastd {{.*#+}} xmm1 = [127,127,127,127] +; AVX512F-NEXT: vpminsd %xmm1, %xmm0, %xmm0 +; AVX512F-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4294967168,4294967168,4294967168,4294967168] +; AVX512F-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 +; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX512F-NEXT: vmovd %xmm0, (%rdi) +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: trunc_ssat_v4i32_v4i8_store: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vpmovsdb %xmm0, (%rdi) +; AVX512VL-NEXT: retq +; +; AVX512BW-LABEL: trunc_ssat_v4i32_v4i8_store: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: vpbroadcastd {{.*#+}} xmm1 = [127,127,127,127] +; AVX512BW-NEXT: vpminsd %xmm1, %xmm0, %xmm0 +; AVX512BW-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4294967168,4294967168,4294967168,4294967168] +; AVX512BW-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 +; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX512BW-NEXT: vmovd %xmm0, (%rdi) +; AVX512BW-NEXT: retq +; +; AVX512BWVL-LABEL: trunc_ssat_v4i32_v4i8_store: +; AVX512BWVL: # %bb.0: +; AVX512BWVL-NEXT: vpmovsdb %xmm0, (%rdi) +; AVX512BWVL-NEXT: retq + %1 = icmp slt <4 x i32> %a0, + %2 = select <4 x i1> %1, <4 x i32> %a0, <4 x i32> + %3 = icmp sgt <4 x i32> %2, + %4 = select <4 x i1> %3, <4 x i32> %2, <4 x i32> + %5 = trunc <4 x i32> %4 to <4 x i8> + store <4 x i8> %5, <4 x i8> *%p1 + ret void +} + define <8 x i8> @trunc_ssat_v8i32_v8i8(<8 x i32> %a0) { ; SSE-LABEL: trunc_ssat_v8i32_v8i8: ; SSE: # %bb.0: @@ -2795,11 +4154,9 @@ define <8 x i8> @trunc_ssat_v8i32_v8i8(<8 x i32> %a0) { ; ; AVX512F-LABEL: trunc_ssat_v8i32_v8i8: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpbroadcastd {{.*#+}} ymm1 = [127,127,127,127,127,127,127,127] -; AVX512F-NEXT: vpminsd %ymm1, %ymm0, %ymm0 -; AVX512F-NEXT: vpbroadcastd {{.*#+}} ymm1 = [4294967168,4294967168,4294967168,4294967168,4294967168,4294967168,4294967168,4294967168] -; AVX512F-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 -; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX512F-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 +; AVX512F-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; @@ -2811,11 +4168,9 @@ define <8 x i8> @trunc_ssat_v8i32_v8i8(<8 x i32> %a0) { ; ; AVX512BW-LABEL: trunc_ssat_v8i32_v8i8: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpbroadcastd {{.*#+}} ymm1 = [127,127,127,127,127,127,127,127] -; AVX512BW-NEXT: vpminsd %ymm1, %ymm0, %ymm0 -; AVX512BW-NEXT: vpbroadcastd {{.*#+}} ymm1 = [4294967168,4294967168,4294967168,4294967168,4294967168,4294967168,4294967168,4294967168] -; AVX512BW-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 -; AVX512BW-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX512BW-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 +; AVX512BW-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; @@ -2860,11 +4215,9 @@ define void @trunc_ssat_v8i32_v8i8_store(<8 x i32> %a0, <8 x i8> *%p1) { ; ; AVX512F-LABEL: trunc_ssat_v8i32_v8i8_store: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpbroadcastd {{.*#+}} ymm1 = [127,127,127,127,127,127,127,127] -; AVX512F-NEXT: vpminsd %ymm1, %ymm0, %ymm0 -; AVX512F-NEXT: vpbroadcastd {{.*#+}} ymm1 = [4294967168,4294967168,4294967168,4294967168,4294967168,4294967168,4294967168,4294967168] -; AVX512F-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 -; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX512F-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 +; AVX512F-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 ; AVX512F-NEXT: vmovq %xmm0, (%rdi) ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq @@ -2877,11 +4230,9 @@ define void @trunc_ssat_v8i32_v8i8_store(<8 x i32> %a0, <8 x i8> *%p1) { ; ; AVX512BW-LABEL: trunc_ssat_v8i32_v8i8_store: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpbroadcastd {{.*#+}} ymm1 = [127,127,127,127,127,127,127,127] -; AVX512BW-NEXT: vpminsd %ymm1, %ymm0, %ymm0 -; AVX512BW-NEXT: vpbroadcastd {{.*#+}} ymm1 = [4294967168,4294967168,4294967168,4294967168,4294967168,4294967168,4294967168,4294967168] -; AVX512BW-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 -; AVX512BW-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX512BW-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 +; AVX512BW-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 ; AVX512BW-NEXT: vmovq %xmm0, (%rdi) ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq @@ -2940,6 +4291,90 @@ define <16 x i8> @trunc_ssat_v16i32_v16i8(<16 x i32> %a0) { ret <16 x i8> %5 } +define <8 x i8> @trunc_ssat_v8i16_v8i8(<8 x i16> %a0) { +; SSE-LABEL: trunc_ssat_v8i16_v8i8: +; SSE: # %bb.0: +; SSE-NEXT: packsswb %xmm0, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: trunc_ssat_v8i16_v8i8: +; AVX: # %bb.0: +; AVX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 +; AVX-NEXT: retq +; +; AVX512F-LABEL: trunc_ssat_v8i16_v8i8: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: trunc_ssat_v8i16_v8i8: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 +; AVX512VL-NEXT: retq +; +; AVX512BW-LABEL: trunc_ssat_v8i16_v8i8: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 +; AVX512BW-NEXT: retq +; +; AVX512BWVL-LABEL: trunc_ssat_v8i16_v8i8: +; AVX512BWVL: # %bb.0: +; AVX512BWVL-NEXT: vpminsw {{.*}}(%rip), %xmm0, %xmm0 +; AVX512BWVL-NEXT: vpmaxsw {{.*}}(%rip), %xmm0, %xmm0 +; AVX512BWVL-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 +; AVX512BWVL-NEXT: retq + %1 = icmp slt <8 x i16> %a0, + %2 = select <8 x i1> %1, <8 x i16> %a0, <8 x i16> + %3 = icmp sgt <8 x i16> %2, + %4 = select <8 x i1> %3, <8 x i16> %2, <8 x i16> + %5 = trunc <8 x i16> %4 to <8 x i8> + ret <8 x i8> %5 +} + +define void @trunc_ssat_v8i16_v8i8_store(<8 x i16> %a0, <8 x i8> *%p1) { +; SSE-LABEL: trunc_ssat_v8i16_v8i8_store: +; SSE: # %bb.0: +; SSE-NEXT: packsswb %xmm0, %xmm0 +; SSE-NEXT: movq %xmm0, (%rdi) +; SSE-NEXT: retq +; +; AVX-LABEL: trunc_ssat_v8i16_v8i8_store: +; AVX: # %bb.0: +; AVX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 +; AVX-NEXT: vmovq %xmm0, (%rdi) +; AVX-NEXT: retq +; +; AVX512F-LABEL: trunc_ssat_v8i16_v8i8_store: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 +; AVX512F-NEXT: vmovq %xmm0, (%rdi) +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: trunc_ssat_v8i16_v8i8_store: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 +; AVX512VL-NEXT: vmovq %xmm0, (%rdi) +; AVX512VL-NEXT: retq +; +; AVX512BW-LABEL: trunc_ssat_v8i16_v8i8_store: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 +; AVX512BW-NEXT: vmovq %xmm0, (%rdi) +; AVX512BW-NEXT: retq +; +; AVX512BWVL-LABEL: trunc_ssat_v8i16_v8i8_store: +; AVX512BWVL: # %bb.0: +; AVX512BWVL-NEXT: vpmovswb %xmm0, (%rdi) +; AVX512BWVL-NEXT: retq + %1 = icmp slt <8 x i16> %a0, + %2 = select <8 x i1> %1, <8 x i16> %a0, <8 x i16> + %3 = icmp sgt <8 x i16> %2, + %4 = select <8 x i1> %3, <8 x i16> %2, <8 x i16> + %5 = trunc <8 x i16> %4 to <8 x i8> + store <8 x i8> %5, <8 x i8> *%p1 + ret void +} + define <16 x i8> @trunc_ssat_v16i16_v16i8(<16 x i16> %a0) { ; SSE-LABEL: trunc_ssat_v16i16_v16i8: ; SSE: # %bb.0: @@ -2976,10 +4411,8 @@ define <16 x i8> @trunc_ssat_v16i16_v16i8(<16 x i16> %a0) { ; ; AVX512BW-LABEL: trunc_ssat_v16i16_v16i8: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpminsw {{.*}}(%rip), %ymm0, %ymm0 -; AVX512BW-NEXT: vpmaxsw {{.*}}(%rip), %ymm0, %ymm0 -; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 -; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 +; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX512BW-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; @@ -3049,3 +4482,54 @@ define <32 x i8> @trunc_ssat_v32i16_v32i8(<32 x i16> %a0) { %5 = trunc <32 x i16> %4 to <32 x i8> ret <32 x i8> %5 } + +define <32 x i8> @trunc_ssat_v32i32_v32i8(<32 x i32> %a0) { +; SSE-LABEL: trunc_ssat_v32i32_v32i8: +; SSE: # %bb.0: +; SSE-NEXT: packssdw %xmm3, %xmm2 +; SSE-NEXT: packssdw %xmm1, %xmm0 +; SSE-NEXT: packsswb %xmm2, %xmm0 +; SSE-NEXT: packssdw %xmm7, %xmm6 +; SSE-NEXT: packssdw %xmm5, %xmm4 +; SSE-NEXT: packsswb %xmm6, %xmm4 +; SSE-NEXT: movdqa %xmm4, %xmm1 +; SSE-NEXT: retq +; +; AVX1-LABEL: trunc_ssat_v32i32_v32i8: +; AVX1: # %bb.0: +; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4 +; AVX1-NEXT: vpackssdw %xmm4, %xmm3, %xmm3 +; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4 +; AVX1-NEXT: vpackssdw %xmm4, %xmm2, %xmm2 +; AVX1-NEXT: vpacksswb %xmm3, %xmm2, %xmm2 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 +; AVX1-NEXT: vpackssdw %xmm3, %xmm1, %xmm1 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 +; AVX1-NEXT: vpackssdw %xmm3, %xmm0, %xmm0 +; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: trunc_ssat_v32i32_v32i8: +; AVX2: # %bb.0: +; AVX2-NEXT: vpackssdw %ymm3, %ymm2, %ymm2 +; AVX2-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,2,1,3] +; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] +; AVX2-NEXT: vpacksswb %ymm2, %ymm0, %ymm0 +; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] +; AVX2-NEXT: retq +; +; AVX512-LABEL: trunc_ssat_v32i32_v32i8: +; AVX512: # %bb.0: +; AVX512-NEXT: vpmovsdb %zmm0, %xmm0 +; AVX512-NEXT: vpmovsdb %zmm1, %xmm1 +; AVX512-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX512-NEXT: retq + %1 = icmp slt <32 x i32> %a0, + %2 = select <32 x i1> %1, <32 x i32> %a0, <32 x i32> + %3 = icmp sgt <32 x i32> %2, + %4 = select <32 x i1> %3, <32 x i32> %2, <32 x i32> + %5 = trunc <32 x i32> %4 to <32 x i8> + ret <32 x i8> %5 +} diff --git a/llvm/test/CodeGen/X86/vector-trunc-usat.ll b/llvm/test/CodeGen/X86/vector-trunc-usat.ll index 7489d393585ff..cfed54fe040f1 100644 --- a/llvm/test/CodeGen/X86/vector-trunc-usat.ll +++ b/llvm/test/CodeGen/X86/vector-trunc-usat.ll @@ -447,6 +447,399 @@ define <8 x i32> @trunc_usat_v8i64_v8i32(<8 x i64> %a0) { ; Unsigned saturation truncation to vXi16 ; +define <4 x i16> @trunc_usat_v4i64_v4i16(<4 x i64> %a0) { +; SSE2-LABEL: trunc_usat_v4i64_v4i16: +; SSE2: # %bb.0: +; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [65535,65535] +; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [9223372039002259456,9223372039002259456] +; SSE2-NEXT: movdqa %xmm0, %xmm4 +; SSE2-NEXT: pxor %xmm3, %xmm4 +; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [9223372039002324991,9223372039002324991] +; SSE2-NEXT: movdqa %xmm5, %xmm6 +; SSE2-NEXT: pcmpgtd %xmm4, %xmm6 +; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2] +; SSE2-NEXT: pcmpeqd %xmm5, %xmm4 +; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] +; SSE2-NEXT: pand %xmm7, %xmm4 +; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3] +; SSE2-NEXT: por %xmm4, %xmm6 +; SSE2-NEXT: pand %xmm6, %xmm0 +; SSE2-NEXT: pandn %xmm2, %xmm6 +; SSE2-NEXT: por %xmm0, %xmm6 +; SSE2-NEXT: pxor %xmm1, %xmm3 +; SSE2-NEXT: movdqa %xmm5, %xmm0 +; SSE2-NEXT: pcmpgtd %xmm3, %xmm0 +; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm0[0,0,2,2] +; SSE2-NEXT: pcmpeqd %xmm5, %xmm3 +; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] +; SSE2-NEXT: pand %xmm4, %xmm3 +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] +; SSE2-NEXT: por %xmm3, %xmm0 +; SSE2-NEXT: pand %xmm0, %xmm1 +; SSE2-NEXT: pandn %xmm2, %xmm0 +; SSE2-NEXT: por %xmm1, %xmm0 +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[0,2,2,3,4,5,6,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm6[0,2,2,3] +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] +; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSE2-NEXT: retq +; +; SSSE3-LABEL: trunc_usat_v4i64_v4i16: +; SSSE3: # %bb.0: +; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [65535,65535] +; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [9223372039002259456,9223372039002259456] +; SSSE3-NEXT: movdqa %xmm0, %xmm4 +; SSSE3-NEXT: pxor %xmm3, %xmm4 +; SSSE3-NEXT: movdqa {{.*#+}} xmm5 = [9223372039002324991,9223372039002324991] +; SSSE3-NEXT: movdqa %xmm5, %xmm6 +; SSSE3-NEXT: pcmpgtd %xmm4, %xmm6 +; SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2] +; SSSE3-NEXT: pcmpeqd %xmm5, %xmm4 +; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] +; SSSE3-NEXT: pand %xmm7, %xmm4 +; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3] +; SSSE3-NEXT: por %xmm4, %xmm6 +; SSSE3-NEXT: pand %xmm6, %xmm0 +; SSSE3-NEXT: pandn %xmm2, %xmm6 +; SSSE3-NEXT: por %xmm0, %xmm6 +; SSSE3-NEXT: pxor %xmm1, %xmm3 +; SSSE3-NEXT: movdqa %xmm5, %xmm0 +; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0 +; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm0[0,0,2,2] +; SSSE3-NEXT: pcmpeqd %xmm5, %xmm3 +; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] +; SSSE3-NEXT: pand %xmm4, %xmm3 +; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] +; SSSE3-NEXT: por %xmm3, %xmm0 +; SSSE3-NEXT: pand %xmm0, %xmm1 +; SSSE3-NEXT: pandn %xmm2, %xmm0 +; SSSE3-NEXT: por %xmm1, %xmm0 +; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; SSSE3-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[0,2,2,3,4,5,6,7] +; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm6[0,2,2,3] +; SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] +; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSSE3-NEXT: retq +; +; SSE41-LABEL: trunc_usat_v4i64_v4i16: +; SSE41: # %bb.0: +; SSE41-NEXT: movdqa %xmm0, %xmm2 +; SSE41-NEXT: movapd {{.*#+}} xmm3 = [65535,65535] +; SSE41-NEXT: movdqa {{.*#+}} xmm4 = [9223372039002259456,9223372039002259456] +; SSE41-NEXT: pxor %xmm4, %xmm0 +; SSE41-NEXT: movdqa {{.*#+}} xmm5 = [9223372039002324991,9223372039002324991] +; SSE41-NEXT: movdqa %xmm5, %xmm6 +; SSE41-NEXT: pcmpeqd %xmm0, %xmm6 +; SSE41-NEXT: movdqa %xmm5, %xmm7 +; SSE41-NEXT: pcmpgtd %xmm0, %xmm7 +; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm7[0,0,2,2] +; SSE41-NEXT: pand %xmm6, %xmm0 +; SSE41-NEXT: por %xmm7, %xmm0 +; SSE41-NEXT: movapd %xmm3, %xmm6 +; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm6 +; SSE41-NEXT: pxor %xmm1, %xmm4 +; SSE41-NEXT: movdqa %xmm5, %xmm2 +; SSE41-NEXT: pcmpeqd %xmm4, %xmm2 +; SSE41-NEXT: pcmpgtd %xmm4, %xmm5 +; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm5[0,0,2,2] +; SSE41-NEXT: pand %xmm2, %xmm0 +; SSE41-NEXT: por %xmm5, %xmm0 +; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm3 +; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,2,2,3] +; SSE41-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[0,2,2,3,4,5,6,7] +; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm6[0,2,2,3] +; SSE41-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] +; SSE41-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSE41-NEXT: retq +; +; AVX1-LABEL: trunc_usat_v4i64_v4i16: +; AVX1: # %bb.0: +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808] +; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm2 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [9223372036854841343,9223372036854841343] +; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 +; AVX1-NEXT: vpxor %xmm1, %xmm4, %xmm1 +; AVX1-NEXT: vpcmpgtq %xmm1, %xmm3, %xmm1 +; AVX1-NEXT: vmovapd {{.*#+}} xmm3 = [65535,65535] +; AVX1-NEXT: vblendvpd %xmm1, %xmm4, %xmm3, %xmm1 +; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,2,2,3] +; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7] +; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm3, %xmm0 +; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3] +; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] +; AVX1-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-SLOW-LABEL: trunc_usat_v4i64_v4i16: +; AVX2-SLOW: # %bb.0: +; AVX2-SLOW-NEXT: vbroadcastsd {{.*#+}} ymm1 = [65535,65535,65535,65535] +; AVX2-SLOW-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] +; AVX2-SLOW-NEXT: vpxor %ymm2, %ymm0, %ymm2 +; AVX2-SLOW-NEXT: vpbroadcastq {{.*#+}} ymm3 = [9223372036854841343,9223372036854841343,9223372036854841343,9223372036854841343] +; AVX2-SLOW-NEXT: vpcmpgtq %ymm2, %ymm3, %ymm2 +; AVX2-SLOW-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 +; AVX2-SLOW-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX2-SLOW-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,2,2,3] +; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7] +; AVX2-SLOW-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3] +; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] +; AVX2-SLOW-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; AVX2-SLOW-NEXT: vzeroupper +; AVX2-SLOW-NEXT: retq +; +; AVX2-FAST-LABEL: trunc_usat_v4i64_v4i16: +; AVX2-FAST: # %bb.0: +; AVX2-FAST-NEXT: vbroadcastsd {{.*#+}} ymm1 = [65535,65535,65535,65535] +; AVX2-FAST-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] +; AVX2-FAST-NEXT: vpxor %ymm2, %ymm0, %ymm2 +; AVX2-FAST-NEXT: vpbroadcastq {{.*#+}} ymm3 = [9223372036854841343,9223372036854841343,9223372036854841343,9223372036854841343] +; AVX2-FAST-NEXT: vpcmpgtq %ymm2, %ymm3, %ymm2 +; AVX2-FAST-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 +; AVX2-FAST-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX2-FAST-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,8,9,8,9,10,11,8,9,10,11,12,13,14,15] +; AVX2-FAST-NEXT: vpshufb %xmm2, %xmm1, %xmm1 +; AVX2-FAST-NEXT: vpshufb %xmm2, %xmm0, %xmm0 +; AVX2-FAST-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; AVX2-FAST-NEXT: vzeroupper +; AVX2-FAST-NEXT: retq +; +; AVX512F-LABEL: trunc_usat_v4i64_v4i16: +; AVX512F: # %bb.0: +; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512F-NEXT: vpminuq {{.*}}(%rip){1to8}, %zmm0, %zmm0 +; AVX512F-NEXT: vpmovqw %zmm0, %xmm0 +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: trunc_usat_v4i64_v4i16: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vpmovusqw %ymm0, %xmm0 +; AVX512VL-NEXT: vzeroupper +; AVX512VL-NEXT: retq +; +; AVX512BW-LABEL: trunc_usat_v4i64_v4i16: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512BW-NEXT: vpminuq {{.*}}(%rip){1to8}, %zmm0, %zmm0 +; AVX512BW-NEXT: vpmovqw %zmm0, %xmm0 +; AVX512BW-NEXT: vzeroupper +; AVX512BW-NEXT: retq +; +; AVX512BWVL-LABEL: trunc_usat_v4i64_v4i16: +; AVX512BWVL: # %bb.0: +; AVX512BWVL-NEXT: vpmovusqw %ymm0, %xmm0 +; AVX512BWVL-NEXT: vzeroupper +; AVX512BWVL-NEXT: retq + %1 = icmp ult <4 x i64> %a0, + %2 = select <4 x i1> %1, <4 x i64> %a0, <4 x i64> + %3 = trunc <4 x i64> %2 to <4 x i16> + ret <4 x i16> %3 +} + +define void @trunc_usat_v4i64_v4i16_store(<4 x i64> %a0, <4 x i16> *%p1) { +; SSE2-LABEL: trunc_usat_v4i64_v4i16_store: +; SSE2: # %bb.0: +; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [65535,65535] +; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [9223372039002259456,9223372039002259456] +; SSE2-NEXT: movdqa %xmm0, %xmm4 +; SSE2-NEXT: pxor %xmm3, %xmm4 +; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [9223372039002324991,9223372039002324991] +; SSE2-NEXT: movdqa %xmm5, %xmm6 +; SSE2-NEXT: pcmpgtd %xmm4, %xmm6 +; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2] +; SSE2-NEXT: pcmpeqd %xmm5, %xmm4 +; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] +; SSE2-NEXT: pand %xmm7, %xmm4 +; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3] +; SSE2-NEXT: por %xmm4, %xmm6 +; SSE2-NEXT: pand %xmm6, %xmm0 +; SSE2-NEXT: pandn %xmm2, %xmm6 +; SSE2-NEXT: por %xmm0, %xmm6 +; SSE2-NEXT: pxor %xmm1, %xmm3 +; SSE2-NEXT: movdqa %xmm5, %xmm0 +; SSE2-NEXT: pcmpgtd %xmm3, %xmm0 +; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm0[0,0,2,2] +; SSE2-NEXT: pcmpeqd %xmm5, %xmm3 +; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] +; SSE2-NEXT: pand %xmm4, %xmm3 +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] +; SSE2-NEXT: por %xmm3, %xmm0 +; SSE2-NEXT: pand %xmm0, %xmm1 +; SSE2-NEXT: pandn %xmm2, %xmm0 +; SSE2-NEXT: por %xmm1, %xmm0 +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm6[0,2,2,3] +; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7] +; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; SSE2-NEXT: movq %xmm1, (%rdi) +; SSE2-NEXT: retq +; +; SSSE3-LABEL: trunc_usat_v4i64_v4i16_store: +; SSSE3: # %bb.0: +; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [65535,65535] +; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [9223372039002259456,9223372039002259456] +; SSSE3-NEXT: movdqa %xmm0, %xmm4 +; SSSE3-NEXT: pxor %xmm3, %xmm4 +; SSSE3-NEXT: movdqa {{.*#+}} xmm5 = [9223372039002324991,9223372039002324991] +; SSSE3-NEXT: movdqa %xmm5, %xmm6 +; SSSE3-NEXT: pcmpgtd %xmm4, %xmm6 +; SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2] +; SSSE3-NEXT: pcmpeqd %xmm5, %xmm4 +; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] +; SSSE3-NEXT: pand %xmm7, %xmm4 +; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3] +; SSSE3-NEXT: por %xmm4, %xmm6 +; SSSE3-NEXT: pand %xmm6, %xmm0 +; SSSE3-NEXT: pandn %xmm2, %xmm6 +; SSSE3-NEXT: por %xmm0, %xmm6 +; SSSE3-NEXT: pxor %xmm1, %xmm3 +; SSSE3-NEXT: movdqa %xmm5, %xmm0 +; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0 +; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm0[0,0,2,2] +; SSSE3-NEXT: pcmpeqd %xmm5, %xmm3 +; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] +; SSSE3-NEXT: pand %xmm4, %xmm3 +; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] +; SSSE3-NEXT: por %xmm3, %xmm0 +; SSSE3-NEXT: pand %xmm0, %xmm1 +; SSSE3-NEXT: pandn %xmm2, %xmm0 +; SSSE3-NEXT: por %xmm1, %xmm0 +; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] +; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm6[0,2,2,3] +; SSSE3-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7] +; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; SSSE3-NEXT: movq %xmm1, (%rdi) +; SSSE3-NEXT: retq +; +; SSE41-LABEL: trunc_usat_v4i64_v4i16_store: +; SSE41: # %bb.0: +; SSE41-NEXT: movdqa %xmm0, %xmm2 +; SSE41-NEXT: movapd {{.*#+}} xmm3 = [65535,65535] +; SSE41-NEXT: movdqa {{.*#+}} xmm4 = [9223372039002259456,9223372039002259456] +; SSE41-NEXT: pxor %xmm4, %xmm0 +; SSE41-NEXT: movdqa {{.*#+}} xmm5 = [9223372039002324991,9223372039002324991] +; SSE41-NEXT: movdqa %xmm5, %xmm6 +; SSE41-NEXT: pcmpeqd %xmm0, %xmm6 +; SSE41-NEXT: movdqa %xmm5, %xmm7 +; SSE41-NEXT: pcmpgtd %xmm0, %xmm7 +; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm7[0,0,2,2] +; SSE41-NEXT: pand %xmm6, %xmm0 +; SSE41-NEXT: por %xmm7, %xmm0 +; SSE41-NEXT: movapd %xmm3, %xmm6 +; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm6 +; SSE41-NEXT: pxor %xmm1, %xmm4 +; SSE41-NEXT: movdqa %xmm5, %xmm2 +; SSE41-NEXT: pcmpeqd %xmm4, %xmm2 +; SSE41-NEXT: pcmpgtd %xmm4, %xmm5 +; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm5[0,0,2,2] +; SSE41-NEXT: pand %xmm2, %xmm0 +; SSE41-NEXT: por %xmm5, %xmm0 +; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm3 +; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,2,2,3] +; SSE41-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] +; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm6[0,2,2,3] +; SSE41-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7] +; SSE41-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; SSE41-NEXT: movq %xmm1, (%rdi) +; SSE41-NEXT: retq +; +; AVX1-LABEL: trunc_usat_v4i64_v4i16_store: +; AVX1: # %bb.0: +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808] +; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm2 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [9223372036854841343,9223372036854841343] +; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 +; AVX1-NEXT: vpxor %xmm1, %xmm4, %xmm1 +; AVX1-NEXT: vpcmpgtq %xmm1, %xmm3, %xmm1 +; AVX1-NEXT: vmovapd {{.*#+}} xmm3 = [65535,65535] +; AVX1-NEXT: vblendvpd %xmm1, %xmm4, %xmm3, %xmm1 +; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,2,2,3] +; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7] +; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm3, %xmm0 +; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3] +; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] +; AVX1-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; AVX1-NEXT: vmovq %xmm0, (%rdi) +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-SLOW-LABEL: trunc_usat_v4i64_v4i16_store: +; AVX2-SLOW: # %bb.0: +; AVX2-SLOW-NEXT: vbroadcastsd {{.*#+}} ymm1 = [65535,65535,65535,65535] +; AVX2-SLOW-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] +; AVX2-SLOW-NEXT: vpxor %ymm2, %ymm0, %ymm2 +; AVX2-SLOW-NEXT: vpbroadcastq {{.*#+}} ymm3 = [9223372036854841343,9223372036854841343,9223372036854841343,9223372036854841343] +; AVX2-SLOW-NEXT: vpcmpgtq %ymm2, %ymm3, %ymm2 +; AVX2-SLOW-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 +; AVX2-SLOW-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX2-SLOW-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,2,2,3] +; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7] +; AVX2-SLOW-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3] +; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] +; AVX2-SLOW-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; AVX2-SLOW-NEXT: vmovq %xmm0, (%rdi) +; AVX2-SLOW-NEXT: vzeroupper +; AVX2-SLOW-NEXT: retq +; +; AVX2-FAST-LABEL: trunc_usat_v4i64_v4i16_store: +; AVX2-FAST: # %bb.0: +; AVX2-FAST-NEXT: vbroadcastsd {{.*#+}} ymm1 = [65535,65535,65535,65535] +; AVX2-FAST-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] +; AVX2-FAST-NEXT: vpxor %ymm2, %ymm0, %ymm2 +; AVX2-FAST-NEXT: vpbroadcastq {{.*#+}} ymm3 = [9223372036854841343,9223372036854841343,9223372036854841343,9223372036854841343] +; AVX2-FAST-NEXT: vpcmpgtq %ymm2, %ymm3, %ymm2 +; AVX2-FAST-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 +; AVX2-FAST-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX2-FAST-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,8,9,8,9,10,11,8,9,10,11,12,13,14,15] +; AVX2-FAST-NEXT: vpshufb %xmm2, %xmm1, %xmm1 +; AVX2-FAST-NEXT: vpshufb %xmm2, %xmm0, %xmm0 +; AVX2-FAST-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; AVX2-FAST-NEXT: vmovq %xmm0, (%rdi) +; AVX2-FAST-NEXT: vzeroupper +; AVX2-FAST-NEXT: retq +; +; AVX512F-LABEL: trunc_usat_v4i64_v4i16_store: +; AVX512F: # %bb.0: +; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512F-NEXT: vpminuq {{.*}}(%rip){1to8}, %zmm0, %zmm0 +; AVX512F-NEXT: vpmovqw %zmm0, %xmm0 +; AVX512F-NEXT: vmovq %xmm0, (%rdi) +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: trunc_usat_v4i64_v4i16_store: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vpmovusqw %ymm0, (%rdi) +; AVX512VL-NEXT: vzeroupper +; AVX512VL-NEXT: retq +; +; AVX512BW-LABEL: trunc_usat_v4i64_v4i16_store: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512BW-NEXT: vpminuq {{.*}}(%rip){1to8}, %zmm0, %zmm0 +; AVX512BW-NEXT: vpmovqw %zmm0, %xmm0 +; AVX512BW-NEXT: vmovq %xmm0, (%rdi) +; AVX512BW-NEXT: vzeroupper +; AVX512BW-NEXT: retq +; +; AVX512BWVL-LABEL: trunc_usat_v4i64_v4i16_store: +; AVX512BWVL: # %bb.0: +; AVX512BWVL-NEXT: vpmovusqw %ymm0, (%rdi) +; AVX512BWVL-NEXT: vzeroupper +; AVX512BWVL-NEXT: retq + %1 = icmp ult <4 x i64> %a0, + %2 = select <4 x i1> %1, <4 x i64> %a0, <4 x i64> + %3 = trunc <4 x i64> %2 to <4 x i16> + store <4 x i16> %3, <4 x i16> *%p1 + ret void +} + define <8 x i16> @trunc_usat_v8i64_v8i16(<8 x i64> %a0) { ; SSE2-LABEL: trunc_usat_v8i64_v8i16: ; SSE2: # %bb.0: @@ -693,6 +1086,166 @@ define <8 x i16> @trunc_usat_v8i64_v8i16(<8 x i64> %a0) { ret <8 x i16> %3 } +define <4 x i16> @trunc_usat_v4i32_v4i16(<4 x i32> %a0) { +; SSE2-LABEL: trunc_usat_v4i32_v4i16: +; SSE2: # %bb.0: +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648] +; SSE2-NEXT: pxor %xmm0, %xmm1 +; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147549183,2147549183,2147549183,2147549183] +; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 +; SSE2-NEXT: pand %xmm2, %xmm0 +; SSE2-NEXT: pandn {{.*}}(%rip), %xmm2 +; SSE2-NEXT: por %xmm0, %xmm2 +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm2[0,2,2,3,4,5,6,7] +; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; SSE2-NEXT: retq +; +; SSSE3-LABEL: trunc_usat_v4i32_v4i16: +; SSSE3: # %bb.0: +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648] +; SSSE3-NEXT: pxor %xmm0, %xmm1 +; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [2147549183,2147549183,2147549183,2147549183] +; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 +; SSSE3-NEXT: pand %xmm2, %xmm0 +; SSSE3-NEXT: pandn {{.*}}(%rip), %xmm2 +; SSSE3-NEXT: por %xmm2, %xmm0 +; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] +; SSSE3-NEXT: retq +; +; SSE41-LABEL: trunc_usat_v4i32_v4i16: +; SSE41: # %bb.0: +; SSE41-NEXT: pminud {{.*}}(%rip), %xmm0 +; SSE41-NEXT: packusdw %xmm0, %xmm0 +; SSE41-NEXT: retq +; +; AVX1-LABEL: trunc_usat_v4i32_v4i16: +; AVX1: # %bb.0: +; AVX1-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: trunc_usat_v4i32_v4i16: +; AVX2: # %bb.0: +; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [65535,65535,65535,65535] +; AVX2-NEXT: vpminud %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 +; AVX2-NEXT: retq +; +; AVX512F-LABEL: trunc_usat_v4i32_v4i16: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpbroadcastd {{.*#+}} xmm1 = [65535,65535,65535,65535] +; AVX512F-NEXT: vpminud %xmm1, %xmm0, %xmm0 +; AVX512F-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: trunc_usat_v4i32_v4i16: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vpminud {{.*}}(%rip){1to4}, %xmm0, %xmm0 +; AVX512VL-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 +; AVX512VL-NEXT: retq +; +; AVX512BW-LABEL: trunc_usat_v4i32_v4i16: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: vpbroadcastd {{.*#+}} xmm1 = [65535,65535,65535,65535] +; AVX512BW-NEXT: vpminud %xmm1, %xmm0, %xmm0 +; AVX512BW-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 +; AVX512BW-NEXT: retq +; +; AVX512BWVL-LABEL: trunc_usat_v4i32_v4i16: +; AVX512BWVL: # %bb.0: +; AVX512BWVL-NEXT: vpminud {{.*}}(%rip){1to4}, %xmm0, %xmm0 +; AVX512BWVL-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 +; AVX512BWVL-NEXT: retq + %1 = icmp ult <4 x i32> %a0, + %2 = select <4 x i1> %1, <4 x i32> %a0, <4 x i32> + %3 = trunc <4 x i32> %2 to <4 x i16> + ret <4 x i16> %3 +} + +define void @trunc_usat_v4i32_v4i16_store(<4 x i32> %a0, <4 x i16> *%p1) { +; SSE2-LABEL: trunc_usat_v4i32_v4i16_store: +; SSE2: # %bb.0: +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648] +; SSE2-NEXT: pxor %xmm0, %xmm1 +; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147549183,2147549183,2147549183,2147549183] +; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 +; SSE2-NEXT: pand %xmm2, %xmm0 +; SSE2-NEXT: pandn {{.*}}(%rip), %xmm2 +; SSE2-NEXT: por %xmm0, %xmm2 +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm2[0,2,2,3,4,5,6,7] +; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; SSE2-NEXT: movq %xmm0, (%rdi) +; SSE2-NEXT: retq +; +; SSSE3-LABEL: trunc_usat_v4i32_v4i16_store: +; SSSE3: # %bb.0: +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648] +; SSSE3-NEXT: pxor %xmm0, %xmm1 +; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [2147549183,2147549183,2147549183,2147549183] +; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 +; SSSE3-NEXT: pand %xmm2, %xmm0 +; SSSE3-NEXT: pandn {{.*}}(%rip), %xmm2 +; SSSE3-NEXT: por %xmm0, %xmm2 +; SSSE3-NEXT: pshufb {{.*#+}} xmm2 = xmm2[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] +; SSSE3-NEXT: movq %xmm2, (%rdi) +; SSSE3-NEXT: retq +; +; SSE41-LABEL: trunc_usat_v4i32_v4i16_store: +; SSE41: # %bb.0: +; SSE41-NEXT: pminud {{.*}}(%rip), %xmm0 +; SSE41-NEXT: packusdw %xmm0, %xmm0 +; SSE41-NEXT: movq %xmm0, (%rdi) +; SSE41-NEXT: retq +; +; AVX1-LABEL: trunc_usat_v4i32_v4i16_store: +; AVX1: # %bb.0: +; AVX1-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 +; AVX1-NEXT: vmovq %xmm0, (%rdi) +; AVX1-NEXT: retq +; +; AVX2-LABEL: trunc_usat_v4i32_v4i16_store: +; AVX2: # %bb.0: +; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [65535,65535,65535,65535] +; AVX2-NEXT: vpminud %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 +; AVX2-NEXT: vmovq %xmm0, (%rdi) +; AVX2-NEXT: retq +; +; AVX512F-LABEL: trunc_usat_v4i32_v4i16_store: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpbroadcastd {{.*#+}} xmm1 = [65535,65535,65535,65535] +; AVX512F-NEXT: vpminud %xmm1, %xmm0, %xmm0 +; AVX512F-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 +; AVX512F-NEXT: vmovq %xmm0, (%rdi) +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: trunc_usat_v4i32_v4i16_store: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vpmovusdw %xmm0, (%rdi) +; AVX512VL-NEXT: retq +; +; AVX512BW-LABEL: trunc_usat_v4i32_v4i16_store: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: vpbroadcastd {{.*#+}} xmm1 = [65535,65535,65535,65535] +; AVX512BW-NEXT: vpminud %xmm1, %xmm0, %xmm0 +; AVX512BW-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 +; AVX512BW-NEXT: vmovq %xmm0, (%rdi) +; AVX512BW-NEXT: retq +; +; AVX512BWVL-LABEL: trunc_usat_v4i32_v4i16_store: +; AVX512BWVL: # %bb.0: +; AVX512BWVL-NEXT: vpmovusdw %xmm0, (%rdi) +; AVX512BWVL-NEXT: retq + %1 = icmp ult <4 x i32> %a0, + %2 = select <4 x i1> %1, <4 x i32> %a0, <4 x i32> + %3 = trunc <4 x i32> %2 to <4 x i16> + store <4 x i16> %3, <4 x i16> *%p1 + ret void +} + define <8 x i16> @trunc_usat_v8i32_v8i16(<8 x i32> %a0) { ; SSE2-LABEL: trunc_usat_v8i32_v8i16: ; SSE2: # %bb.0: @@ -892,54 +1445,407 @@ define <16 x i16> @trunc_usat_v16i32_v16i16(<16 x i32> %a0) { ; SSSE3-NEXT: packssdw %xmm2, %xmm1 ; SSSE3-NEXT: retq ; -; SSE41-LABEL: trunc_usat_v16i32_v16i16: +; SSE41-LABEL: trunc_usat_v16i32_v16i16: +; SSE41: # %bb.0: +; SSE41-NEXT: movdqa {{.*#+}} xmm4 = [65535,65535,65535,65535] +; SSE41-NEXT: pminud %xmm4, %xmm3 +; SSE41-NEXT: pminud %xmm4, %xmm2 +; SSE41-NEXT: packusdw %xmm3, %xmm2 +; SSE41-NEXT: pminud %xmm4, %xmm1 +; SSE41-NEXT: pminud %xmm4, %xmm0 +; SSE41-NEXT: packusdw %xmm1, %xmm0 +; SSE41-NEXT: movdqa %xmm2, %xmm1 +; SSE41-NEXT: retq +; +; AVX1-LABEL: trunc_usat_v16i32_v16i16: +; AVX1: # %bb.0: +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [65535,65535,65535,65535] +; AVX1-NEXT: vpminud %xmm3, %xmm2, %xmm2 +; AVX1-NEXT: vpminud %xmm3, %xmm0, %xmm0 +; AVX1-NEXT: vpackusdw %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 +; AVX1-NEXT: vpminud %xmm3, %xmm2, %xmm2 +; AVX1-NEXT: vpminud %xmm3, %xmm1, %xmm1 +; AVX1-NEXT: vpackusdw %xmm2, %xmm1, %xmm1 +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: trunc_usat_v16i32_v16i16: +; AVX2: # %bb.0: +; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [65535,65535,65535,65535,65535,65535,65535,65535] +; AVX2-NEXT: vpminud %ymm2, %ymm1, %ymm1 +; AVX2-NEXT: vpminud %ymm2, %ymm0, %ymm0 +; AVX2-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] +; AVX2-NEXT: retq +; +; AVX512-LABEL: trunc_usat_v16i32_v16i16: +; AVX512: # %bb.0: +; AVX512-NEXT: vpmovusdw %zmm0, %ymm0 +; AVX512-NEXT: retq + %1 = icmp ult <16 x i32> %a0, + %2 = select <16 x i1> %1, <16 x i32> %a0, <16 x i32> + %3 = trunc <16 x i32> %2 to <16 x i16> + ret <16 x i16> %3 +} + +; +; Unsigned saturation truncation to vXi8 +; + +define <4 x i8> @trunc_usat_v4i64_v4i8(<4 x i64> %a0) { +; SSE2-LABEL: trunc_usat_v4i64_v4i8: +; SSE2: # %bb.0: +; SSE2-NEXT: movdqa %xmm0, %xmm2 +; SSE2-NEXT: movdqa {{.*#+}} xmm8 = [255,255] +; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [9223372039002259456,9223372039002259456] +; SSE2-NEXT: pxor %xmm4, %xmm0 +; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [9223372039002259711,9223372039002259711] +; SSE2-NEXT: movdqa %xmm5, %xmm6 +; SSE2-NEXT: pcmpgtd %xmm0, %xmm6 +; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2] +; SSE2-NEXT: pcmpeqd %xmm5, %xmm0 +; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] +; SSE2-NEXT: pand %xmm7, %xmm3 +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm6[1,1,3,3] +; SSE2-NEXT: por %xmm3, %xmm0 +; SSE2-NEXT: pand %xmm0, %xmm2 +; SSE2-NEXT: pandn %xmm8, %xmm0 +; SSE2-NEXT: por %xmm2, %xmm0 +; SSE2-NEXT: pxor %xmm1, %xmm4 +; SSE2-NEXT: movdqa %xmm5, %xmm2 +; SSE2-NEXT: pcmpgtd %xmm4, %xmm2 +; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] +; SSE2-NEXT: pcmpeqd %xmm5, %xmm4 +; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] +; SSE2-NEXT: pand %xmm3, %xmm4 +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] +; SSE2-NEXT: por %xmm4, %xmm2 +; SSE2-NEXT: pand %xmm2, %xmm1 +; SSE2-NEXT: pandn %xmm8, %xmm2 +; SSE2-NEXT: por %xmm1, %xmm2 +; SSE2-NEXT: pand %xmm8, %xmm2 +; SSE2-NEXT: pand %xmm8, %xmm0 +; SSE2-NEXT: packuswb %xmm2, %xmm0 +; SSE2-NEXT: packuswb %xmm0, %xmm0 +; SSE2-NEXT: packuswb %xmm0, %xmm0 +; SSE2-NEXT: retq +; +; SSSE3-LABEL: trunc_usat_v4i64_v4i8: +; SSSE3: # %bb.0: +; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [255,255] +; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [9223372039002259456,9223372039002259456] +; SSSE3-NEXT: movdqa %xmm0, %xmm4 +; SSSE3-NEXT: pxor %xmm3, %xmm4 +; SSSE3-NEXT: movdqa {{.*#+}} xmm5 = [9223372039002259711,9223372039002259711] +; SSSE3-NEXT: movdqa %xmm5, %xmm6 +; SSSE3-NEXT: pcmpgtd %xmm4, %xmm6 +; SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2] +; SSSE3-NEXT: pcmpeqd %xmm5, %xmm4 +; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] +; SSSE3-NEXT: pand %xmm7, %xmm4 +; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3] +; SSSE3-NEXT: por %xmm4, %xmm6 +; SSSE3-NEXT: pand %xmm6, %xmm0 +; SSSE3-NEXT: pandn %xmm2, %xmm6 +; SSSE3-NEXT: por %xmm6, %xmm0 +; SSSE3-NEXT: pxor %xmm1, %xmm3 +; SSSE3-NEXT: movdqa %xmm5, %xmm4 +; SSSE3-NEXT: pcmpgtd %xmm3, %xmm4 +; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2] +; SSSE3-NEXT: pcmpeqd %xmm5, %xmm3 +; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] +; SSSE3-NEXT: pand %xmm6, %xmm3 +; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] +; SSSE3-NEXT: por %xmm3, %xmm4 +; SSSE3-NEXT: pand %xmm4, %xmm1 +; SSSE3-NEXT: pandn %xmm2, %xmm4 +; SSSE3-NEXT: por %xmm1, %xmm4 +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = <0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u> +; SSSE3-NEXT: pshufb %xmm1, %xmm4 +; SSSE3-NEXT: pshufb %xmm1, %xmm0 +; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3] +; SSSE3-NEXT: retq +; +; SSE41-LABEL: trunc_usat_v4i64_v4i8: +; SSE41: # %bb.0: +; SSE41-NEXT: movdqa %xmm0, %xmm2 +; SSE41-NEXT: movapd {{.*#+}} xmm4 = [255,255] +; SSE41-NEXT: movdqa {{.*#+}} xmm5 = [9223372039002259456,9223372039002259456] +; SSE41-NEXT: pxor %xmm5, %xmm0 +; SSE41-NEXT: movdqa {{.*#+}} xmm6 = [9223372039002259711,9223372039002259711] +; SSE41-NEXT: movdqa %xmm6, %xmm3 +; SSE41-NEXT: pcmpeqd %xmm0, %xmm3 +; SSE41-NEXT: movdqa %xmm6, %xmm7 +; SSE41-NEXT: pcmpgtd %xmm0, %xmm7 +; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm7[0,0,2,2] +; SSE41-NEXT: pand %xmm3, %xmm0 +; SSE41-NEXT: por %xmm7, %xmm0 +; SSE41-NEXT: movapd %xmm4, %xmm3 +; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm3 +; SSE41-NEXT: pxor %xmm1, %xmm5 +; SSE41-NEXT: movdqa %xmm6, %xmm2 +; SSE41-NEXT: pcmpeqd %xmm5, %xmm2 +; SSE41-NEXT: pcmpgtd %xmm5, %xmm6 +; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm6[0,0,2,2] +; SSE41-NEXT: pand %xmm2, %xmm0 +; SSE41-NEXT: por %xmm6, %xmm0 +; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm4 +; SSE41-NEXT: movdqa {{.*#+}} xmm0 = <0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u> +; SSE41-NEXT: pshufb %xmm0, %xmm4 +; SSE41-NEXT: pshufb %xmm0, %xmm3 +; SSE41-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3] +; SSE41-NEXT: movdqa %xmm3, %xmm0 +; SSE41-NEXT: retq +; +; AVX1-LABEL: trunc_usat_v4i64_v4i8: +; AVX1: # %bb.0: +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808] +; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm2 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [9223372036854776063,9223372036854776063] +; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 +; AVX1-NEXT: vpxor %xmm1, %xmm4, %xmm1 +; AVX1-NEXT: vpcmpgtq %xmm1, %xmm3, %xmm1 +; AVX1-NEXT: vmovapd {{.*#+}} xmm3 = [255,255] +; AVX1-NEXT: vblendvpd %xmm1, %xmm4, %xmm3, %xmm1 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = <0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u> +; AVX1-NEXT: vpshufb %xmm4, %xmm1, %xmm1 +; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm3, %xmm0 +; AVX1-NEXT: vpshufb %xmm4, %xmm0, %xmm0 +; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-LABEL: trunc_usat_v4i64_v4i8: +; AVX2: # %bb.0: +; AVX2-NEXT: vbroadcastsd {{.*#+}} ymm1 = [255,255,255,255] +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] +; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm2 +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm3 = [9223372036854776063,9223372036854776063,9223372036854776063,9223372036854776063] +; AVX2-NEXT: vpcmpgtq %ymm2, %ymm3, %ymm2 +; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 +; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = <0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u> +; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1 +; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq +; +; AVX512F-LABEL: trunc_usat_v4i64_v4i8: +; AVX512F: # %bb.0: +; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512F-NEXT: vpminuq {{.*}}(%rip){1to8}, %zmm0, %zmm0 +; AVX512F-NEXT: vpmovqb %zmm0, %xmm0 +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: trunc_usat_v4i64_v4i8: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vpmovusqb %ymm0, %xmm0 +; AVX512VL-NEXT: vzeroupper +; AVX512VL-NEXT: retq +; +; AVX512BW-LABEL: trunc_usat_v4i64_v4i8: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512BW-NEXT: vpminuq {{.*}}(%rip){1to8}, %zmm0, %zmm0 +; AVX512BW-NEXT: vpmovqb %zmm0, %xmm0 +; AVX512BW-NEXT: vzeroupper +; AVX512BW-NEXT: retq +; +; AVX512BWVL-LABEL: trunc_usat_v4i64_v4i8: +; AVX512BWVL: # %bb.0: +; AVX512BWVL-NEXT: vpmovusqb %ymm0, %xmm0 +; AVX512BWVL-NEXT: vzeroupper +; AVX512BWVL-NEXT: retq + %1 = icmp ult <4 x i64> %a0, + %2 = select <4 x i1> %1, <4 x i64> %a0, <4 x i64> + %3 = trunc <4 x i64> %2 to <4 x i8> + ret <4 x i8> %3 +} + +define void @trunc_usat_v4i64_v4i8_store(<4 x i64> %a0, <4 x i8> *%p1) { +; SSE2-LABEL: trunc_usat_v4i64_v4i8_store: +; SSE2: # %bb.0: +; SSE2-NEXT: movdqa {{.*#+}} xmm8 = [255,255] +; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [9223372039002259456,9223372039002259456] +; SSE2-NEXT: movdqa %xmm0, %xmm3 +; SSE2-NEXT: pxor %xmm4, %xmm3 +; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [9223372039002259711,9223372039002259711] +; SSE2-NEXT: movdqa %xmm5, %xmm6 +; SSE2-NEXT: pcmpgtd %xmm3, %xmm6 +; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2] +; SSE2-NEXT: pcmpeqd %xmm5, %xmm3 +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3] +; SSE2-NEXT: pand %xmm7, %xmm2 +; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm6[1,1,3,3] +; SSE2-NEXT: por %xmm2, %xmm3 +; SSE2-NEXT: pand %xmm3, %xmm0 +; SSE2-NEXT: pandn %xmm8, %xmm3 +; SSE2-NEXT: por %xmm0, %xmm3 +; SSE2-NEXT: pxor %xmm1, %xmm4 +; SSE2-NEXT: movdqa %xmm5, %xmm0 +; SSE2-NEXT: pcmpgtd %xmm4, %xmm0 +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2] +; SSE2-NEXT: pcmpeqd %xmm5, %xmm4 +; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] +; SSE2-NEXT: pand %xmm2, %xmm4 +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] +; SSE2-NEXT: por %xmm4, %xmm0 +; SSE2-NEXT: pand %xmm0, %xmm1 +; SSE2-NEXT: pandn %xmm8, %xmm0 +; SSE2-NEXT: por %xmm1, %xmm0 +; SSE2-NEXT: pand %xmm8, %xmm0 +; SSE2-NEXT: pand %xmm8, %xmm3 +; SSE2-NEXT: packuswb %xmm0, %xmm3 +; SSE2-NEXT: packuswb %xmm0, %xmm3 +; SSE2-NEXT: packuswb %xmm0, %xmm3 +; SSE2-NEXT: movd %xmm3, (%rdi) +; SSE2-NEXT: retq +; +; SSSE3-LABEL: trunc_usat_v4i64_v4i8_store: +; SSSE3: # %bb.0: +; SSSE3-NEXT: movdqa {{.*#+}} xmm8 = [255,255] +; SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [9223372039002259456,9223372039002259456] +; SSSE3-NEXT: movdqa %xmm0, %xmm3 +; SSSE3-NEXT: pxor %xmm4, %xmm3 +; SSSE3-NEXT: movdqa {{.*#+}} xmm5 = [9223372039002259711,9223372039002259711] +; SSSE3-NEXT: movdqa %xmm5, %xmm6 +; SSSE3-NEXT: pcmpgtd %xmm3, %xmm6 +; SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2] +; SSSE3-NEXT: pcmpeqd %xmm5, %xmm3 +; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3] +; SSSE3-NEXT: pand %xmm7, %xmm2 +; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm6[1,1,3,3] +; SSSE3-NEXT: por %xmm2, %xmm3 +; SSSE3-NEXT: pand %xmm3, %xmm0 +; SSSE3-NEXT: pandn %xmm8, %xmm3 +; SSSE3-NEXT: por %xmm0, %xmm3 +; SSSE3-NEXT: pxor %xmm1, %xmm4 +; SSSE3-NEXT: movdqa %xmm5, %xmm0 +; SSSE3-NEXT: pcmpgtd %xmm4, %xmm0 +; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2] +; SSSE3-NEXT: pcmpeqd %xmm5, %xmm4 +; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] +; SSSE3-NEXT: pand %xmm2, %xmm4 +; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] +; SSSE3-NEXT: por %xmm4, %xmm0 +; SSSE3-NEXT: pand %xmm0, %xmm1 +; SSSE3-NEXT: pandn %xmm8, %xmm0 +; SSSE3-NEXT: por %xmm1, %xmm0 +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = <0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u> +; SSSE3-NEXT: pshufb %xmm1, %xmm0 +; SSSE3-NEXT: pshufb %xmm1, %xmm3 +; SSSE3-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3] +; SSSE3-NEXT: movd %xmm3, (%rdi) +; SSSE3-NEXT: retq +; +; SSE41-LABEL: trunc_usat_v4i64_v4i8_store: ; SSE41: # %bb.0: -; SSE41-NEXT: movdqa {{.*#+}} xmm4 = [65535,65535,65535,65535] -; SSE41-NEXT: pminud %xmm4, %xmm3 -; SSE41-NEXT: pminud %xmm4, %xmm2 -; SSE41-NEXT: packusdw %xmm3, %xmm2 -; SSE41-NEXT: pminud %xmm4, %xmm1 -; SSE41-NEXT: pminud %xmm4, %xmm0 -; SSE41-NEXT: packusdw %xmm1, %xmm0 -; SSE41-NEXT: movdqa %xmm2, %xmm1 +; SSE41-NEXT: movdqa %xmm0, %xmm2 +; SSE41-NEXT: movapd {{.*#+}} xmm3 = [255,255] +; SSE41-NEXT: movdqa {{.*#+}} xmm4 = [9223372039002259456,9223372039002259456] +; SSE41-NEXT: pxor %xmm4, %xmm0 +; SSE41-NEXT: movdqa {{.*#+}} xmm5 = [9223372039002259711,9223372039002259711] +; SSE41-NEXT: movdqa %xmm5, %xmm6 +; SSE41-NEXT: pcmpeqd %xmm0, %xmm6 +; SSE41-NEXT: movdqa %xmm5, %xmm7 +; SSE41-NEXT: pcmpgtd %xmm0, %xmm7 +; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm7[0,0,2,2] +; SSE41-NEXT: pand %xmm6, %xmm0 +; SSE41-NEXT: por %xmm7, %xmm0 +; SSE41-NEXT: movapd %xmm3, %xmm6 +; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm6 +; SSE41-NEXT: pxor %xmm1, %xmm4 +; SSE41-NEXT: movdqa %xmm5, %xmm2 +; SSE41-NEXT: pcmpeqd %xmm4, %xmm2 +; SSE41-NEXT: pcmpgtd %xmm4, %xmm5 +; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm5[0,0,2,2] +; SSE41-NEXT: pand %xmm2, %xmm0 +; SSE41-NEXT: por %xmm5, %xmm0 +; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm3 +; SSE41-NEXT: movdqa {{.*#+}} xmm0 = <0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u> +; SSE41-NEXT: pshufb %xmm0, %xmm3 +; SSE41-NEXT: pshufb %xmm0, %xmm6 +; SSE41-NEXT: punpcklwd {{.*#+}} xmm6 = xmm6[0],xmm3[0],xmm6[1],xmm3[1],xmm6[2],xmm3[2],xmm6[3],xmm3[3] +; SSE41-NEXT: movd %xmm6, (%rdi) ; SSE41-NEXT: retq ; -; AVX1-LABEL: trunc_usat_v16i32_v16i16: +; AVX1-LABEL: trunc_usat_v4i64_v4i8_store: ; AVX1: # %bb.0: -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [65535,65535,65535,65535] -; AVX1-NEXT: vpminud %xmm3, %xmm2, %xmm2 -; AVX1-NEXT: vpminud %xmm3, %xmm0, %xmm0 -; AVX1-NEXT: vpackusdw %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 -; AVX1-NEXT: vpminud %xmm3, %xmm2, %xmm2 -; AVX1-NEXT: vpminud %xmm3, %xmm1, %xmm1 -; AVX1-NEXT: vpackusdw %xmm2, %xmm1, %xmm1 -; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808] +; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm2 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [9223372036854776063,9223372036854776063] +; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 +; AVX1-NEXT: vpxor %xmm1, %xmm4, %xmm1 +; AVX1-NEXT: vpcmpgtq %xmm1, %xmm3, %xmm1 +; AVX1-NEXT: vmovapd {{.*#+}} xmm3 = [255,255] +; AVX1-NEXT: vblendvpd %xmm1, %xmm4, %xmm3, %xmm1 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = <0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u> +; AVX1-NEXT: vpshufb %xmm4, %xmm1, %xmm1 +; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm3, %xmm0 +; AVX1-NEXT: vpshufb %xmm4, %xmm0, %xmm0 +; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; AVX1-NEXT: vmovd %xmm0, (%rdi) +; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq ; -; AVX2-LABEL: trunc_usat_v16i32_v16i16: +; AVX2-LABEL: trunc_usat_v4i64_v4i8_store: ; AVX2: # %bb.0: -; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [65535,65535,65535,65535,65535,65535,65535,65535] -; AVX2-NEXT: vpminud %ymm2, %ymm1, %ymm1 -; AVX2-NEXT: vpminud %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] +; AVX2-NEXT: vbroadcastsd {{.*#+}} ymm1 = [255,255,255,255] +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] +; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm2 +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm3 = [9223372036854776063,9223372036854776063,9223372036854776063,9223372036854776063] +; AVX2-NEXT: vpcmpgtq %ymm2, %ymm3, %ymm2 +; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 +; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = <0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u> +; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1 +; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; AVX2-NEXT: vmovd %xmm0, (%rdi) +; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; -; AVX512-LABEL: trunc_usat_v16i32_v16i16: -; AVX512: # %bb.0: -; AVX512-NEXT: vpmovusdw %zmm0, %ymm0 -; AVX512-NEXT: retq - %1 = icmp ult <16 x i32> %a0, - %2 = select <16 x i1> %1, <16 x i32> %a0, <16 x i32> - %3 = trunc <16 x i32> %2 to <16 x i16> - ret <16 x i16> %3 -} - +; AVX512F-LABEL: trunc_usat_v4i64_v4i8_store: +; AVX512F: # %bb.0: +; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512F-NEXT: vpminuq {{.*}}(%rip){1to8}, %zmm0, %zmm0 +; AVX512F-NEXT: vpmovqb %zmm0, %xmm0 +; AVX512F-NEXT: vmovd %xmm0, (%rdi) +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: trunc_usat_v4i64_v4i8_store: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vpmovusqb %ymm0, (%rdi) +; AVX512VL-NEXT: vzeroupper +; AVX512VL-NEXT: retq ; -; Unsigned saturation truncation to v16i8 +; AVX512BW-LABEL: trunc_usat_v4i64_v4i8_store: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512BW-NEXT: vpminuq {{.*}}(%rip){1to8}, %zmm0, %zmm0 +; AVX512BW-NEXT: vpmovqb %zmm0, %xmm0 +; AVX512BW-NEXT: vmovd %xmm0, (%rdi) +; AVX512BW-NEXT: vzeroupper +; AVX512BW-NEXT: retq ; +; AVX512BWVL-LABEL: trunc_usat_v4i64_v4i8_store: +; AVX512BWVL: # %bb.0: +; AVX512BWVL-NEXT: vpmovusqb %ymm0, (%rdi) +; AVX512BWVL-NEXT: vzeroupper +; AVX512BWVL-NEXT: retq + %1 = icmp ult <4 x i64> %a0, + %2 = select <4 x i1> %1, <4 x i64> %a0, <4 x i64> + %3 = trunc <4 x i64> %2 to <4 x i8> + store <4 x i8> %3, <4 x i8> *%p1 + ret void +} define <8 x i8> @trunc_usat_v8i64_v8i8(<8 x i64> %a0) { ; SSE2-LABEL: trunc_usat_v8i64_v8i8: @@ -1887,6 +2793,167 @@ define <16 x i8> @trunc_usat_v16i64_v16i8(<16 x i64> %a0) { ret <16 x i8> %3 } +define <4 x i8> @trunc_usat_v4i32_v4i8(<4 x i32> %a0) { +; SSE2-LABEL: trunc_usat_v4i32_v4i8: +; SSE2: # %bb.0: +; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] +; SSE2-NEXT: pxor %xmm0, %xmm2 +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483903,2147483903,2147483903,2147483903] +; SSE2-NEXT: pcmpgtd %xmm2, %xmm1 +; SSE2-NEXT: pand %xmm1, %xmm0 +; SSE2-NEXT: pandn {{.*}}(%rip), %xmm1 +; SSE2-NEXT: por %xmm0, %xmm1 +; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 +; SSE2-NEXT: packuswb %xmm1, %xmm1 +; SSE2-NEXT: packuswb %xmm1, %xmm1 +; SSE2-NEXT: movdqa %xmm1, %xmm0 +; SSE2-NEXT: retq +; +; SSSE3-LABEL: trunc_usat_v4i32_v4i8: +; SSSE3: # %bb.0: +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648] +; SSSE3-NEXT: pxor %xmm0, %xmm1 +; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [2147483903,2147483903,2147483903,2147483903] +; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 +; SSSE3-NEXT: pand %xmm2, %xmm0 +; SSSE3-NEXT: pandn {{.*}}(%rip), %xmm2 +; SSSE3-NEXT: por %xmm2, %xmm0 +; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] +; SSSE3-NEXT: retq +; +; SSE41-LABEL: trunc_usat_v4i32_v4i8: +; SSE41: # %bb.0: +; SSE41-NEXT: pminud {{.*}}(%rip), %xmm0 +; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] +; SSE41-NEXT: retq +; +; AVX1-LABEL: trunc_usat_v4i32_v4i8: +; AVX1: # %bb.0: +; AVX1-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX1-NEXT: retq +; +; AVX2-LABEL: trunc_usat_v4i32_v4i8: +; AVX2: # %bb.0: +; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [255,255,255,255] +; AVX2-NEXT: vpminud %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX2-NEXT: retq +; +; AVX512F-LABEL: trunc_usat_v4i32_v4i8: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpbroadcastd {{.*#+}} xmm1 = [255,255,255,255] +; AVX512F-NEXT: vpminud %xmm1, %xmm0, %xmm0 +; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: trunc_usat_v4i32_v4i8: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vpminud {{.*}}(%rip){1to4}, %xmm0, %xmm0 +; AVX512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX512VL-NEXT: retq +; +; AVX512BW-LABEL: trunc_usat_v4i32_v4i8: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: vpbroadcastd {{.*#+}} xmm1 = [255,255,255,255] +; AVX512BW-NEXT: vpminud %xmm1, %xmm0, %xmm0 +; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX512BW-NEXT: retq +; +; AVX512BWVL-LABEL: trunc_usat_v4i32_v4i8: +; AVX512BWVL: # %bb.0: +; AVX512BWVL-NEXT: vpminud {{.*}}(%rip){1to4}, %xmm0, %xmm0 +; AVX512BWVL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX512BWVL-NEXT: retq + %1 = icmp ult <4 x i32> %a0, + %2 = select <4 x i1> %1, <4 x i32> %a0, <4 x i32> + %3 = trunc <4 x i32> %2 to <4 x i8> + ret <4 x i8> %3 +} + +define void @trunc_usat_v4i32_v4i8_store(<4 x i32> %a0, <4 x i8> *%p1) { +; SSE2-LABEL: trunc_usat_v4i32_v4i8_store: +; SSE2: # %bb.0: +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648] +; SSE2-NEXT: pxor %xmm0, %xmm1 +; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483903,2147483903,2147483903,2147483903] +; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 +; SSE2-NEXT: pand %xmm2, %xmm0 +; SSE2-NEXT: pandn {{.*}}(%rip), %xmm2 +; SSE2-NEXT: por %xmm0, %xmm2 +; SSE2-NEXT: pand {{.*}}(%rip), %xmm2 +; SSE2-NEXT: packuswb %xmm0, %xmm2 +; SSE2-NEXT: packuswb %xmm0, %xmm2 +; SSE2-NEXT: movd %xmm2, (%rdi) +; SSE2-NEXT: retq +; +; SSSE3-LABEL: trunc_usat_v4i32_v4i8_store: +; SSSE3: # %bb.0: +; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648] +; SSSE3-NEXT: pxor %xmm0, %xmm1 +; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [2147483903,2147483903,2147483903,2147483903] +; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 +; SSSE3-NEXT: pand %xmm2, %xmm0 +; SSSE3-NEXT: pandn {{.*}}(%rip), %xmm2 +; SSSE3-NEXT: por %xmm0, %xmm2 +; SSSE3-NEXT: pshufb {{.*#+}} xmm2 = xmm2[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] +; SSSE3-NEXT: movd %xmm2, (%rdi) +; SSSE3-NEXT: retq +; +; SSE41-LABEL: trunc_usat_v4i32_v4i8_store: +; SSE41: # %bb.0: +; SSE41-NEXT: pminud {{.*}}(%rip), %xmm0 +; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] +; SSE41-NEXT: movd %xmm0, (%rdi) +; SSE41-NEXT: retq +; +; AVX1-LABEL: trunc_usat_v4i32_v4i8_store: +; AVX1: # %bb.0: +; AVX1-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX1-NEXT: vmovd %xmm0, (%rdi) +; AVX1-NEXT: retq +; +; AVX2-LABEL: trunc_usat_v4i32_v4i8_store: +; AVX2: # %bb.0: +; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [255,255,255,255] +; AVX2-NEXT: vpminud %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX2-NEXT: vmovd %xmm0, (%rdi) +; AVX2-NEXT: retq +; +; AVX512F-LABEL: trunc_usat_v4i32_v4i8_store: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpbroadcastd {{.*#+}} xmm1 = [255,255,255,255] +; AVX512F-NEXT: vpminud %xmm1, %xmm0, %xmm0 +; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX512F-NEXT: vmovd %xmm0, (%rdi) +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: trunc_usat_v4i32_v4i8_store: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vpmovusdb %xmm0, (%rdi) +; AVX512VL-NEXT: retq +; +; AVX512BW-LABEL: trunc_usat_v4i32_v4i8_store: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: vpbroadcastd {{.*#+}} xmm1 = [255,255,255,255] +; AVX512BW-NEXT: vpminud %xmm1, %xmm0, %xmm0 +; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX512BW-NEXT: vmovd %xmm0, (%rdi) +; AVX512BW-NEXT: retq +; +; AVX512BWVL-LABEL: trunc_usat_v4i32_v4i8_store: +; AVX512BWVL: # %bb.0: +; AVX512BWVL-NEXT: vpmovusdb %xmm0, (%rdi) +; AVX512BWVL-NEXT: retq + %1 = icmp ult <4 x i32> %a0, + %2 = select <4 x i1> %1, <4 x i32> %a0, <4 x i32> + %3 = trunc <4 x i32> %2 to <4 x i8> + store <4 x i8> %3, <4 x i8> *%p1 + ret void +} + define <8 x i8> @trunc_usat_v8i32_v8i8(<8 x i32> %a0) { ; SSE2-LABEL: trunc_usat_v8i32_v8i8: ; SSE2: # %bb.0: @@ -2247,6 +3314,109 @@ define <16 x i8> @trunc_usat_v16i32_v16i8(<16 x i32> %a0) { ret <16 x i8> %3 } +define <8 x i8> @trunc_usat_v8i16_v8i8(<8 x i16> %a0) { +; SSE2-LABEL: trunc_usat_v8i16_v8i8: +; SSE2: # %bb.0: +; SSE2-NEXT: pxor {{.*}}(%rip), %xmm0 +; SSE2-NEXT: pminsw {{.*}}(%rip), %xmm0 +; SSE2-NEXT: pand {{.*}}(%rip), %xmm0 +; SSE2-NEXT: packuswb %xmm0, %xmm0 +; SSE2-NEXT: retq +; +; SSSE3-LABEL: trunc_usat_v8i16_v8i8: +; SSSE3: # %bb.0: +; SSSE3-NEXT: pxor {{.*}}(%rip), %xmm0 +; SSSE3-NEXT: pminsw {{.*}}(%rip), %xmm0 +; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u] +; SSSE3-NEXT: retq +; +; SSE41-LABEL: trunc_usat_v8i16_v8i8: +; SSE41: # %bb.0: +; SSE41-NEXT: pminuw {{.*}}(%rip), %xmm0 +; SSE41-NEXT: packuswb %xmm0, %xmm0 +; SSE41-NEXT: retq +; +; AVX-LABEL: trunc_usat_v8i16_v8i8: +; AVX: # %bb.0: +; AVX-NEXT: vpminuw {{.*}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 +; AVX-NEXT: retq +; +; AVX512-LABEL: trunc_usat_v8i16_v8i8: +; AVX512: # %bb.0: +; AVX512-NEXT: vpminuw {{.*}}(%rip), %xmm0, %xmm0 +; AVX512-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 +; AVX512-NEXT: retq + %1 = icmp ult <8 x i16> %a0, + %2 = select <8 x i1> %1, <8 x i16> %a0, <8 x i16> + %3 = trunc <8 x i16> %2 to <8 x i8> + ret <8 x i8> %3 +} + +define void @trunc_usat_v8i16_v8i8_store(<8 x i16> %a0, <8 x i8> *%p1) { +; SSE2-LABEL: trunc_usat_v8i16_v8i8_store: +; SSE2: # %bb.0: +; SSE2-NEXT: pxor {{.*}}(%rip), %xmm0 +; SSE2-NEXT: pminsw {{.*}}(%rip), %xmm0 +; SSE2-NEXT: pand {{.*}}(%rip), %xmm0 +; SSE2-NEXT: packuswb %xmm0, %xmm0 +; SSE2-NEXT: movq %xmm0, (%rdi) +; SSE2-NEXT: retq +; +; SSSE3-LABEL: trunc_usat_v8i16_v8i8_store: +; SSSE3: # %bb.0: +; SSSE3-NEXT: pxor {{.*}}(%rip), %xmm0 +; SSSE3-NEXT: pminsw {{.*}}(%rip), %xmm0 +; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u] +; SSSE3-NEXT: movq %xmm0, (%rdi) +; SSSE3-NEXT: retq +; +; SSE41-LABEL: trunc_usat_v8i16_v8i8_store: +; SSE41: # %bb.0: +; SSE41-NEXT: pminuw {{.*}}(%rip), %xmm0 +; SSE41-NEXT: packuswb %xmm0, %xmm0 +; SSE41-NEXT: movq %xmm0, (%rdi) +; SSE41-NEXT: retq +; +; AVX-LABEL: trunc_usat_v8i16_v8i8_store: +; AVX: # %bb.0: +; AVX-NEXT: vpminuw {{.*}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 +; AVX-NEXT: vmovq %xmm0, (%rdi) +; AVX-NEXT: retq +; +; AVX512F-LABEL: trunc_usat_v8i16_v8i8_store: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpminuw {{.*}}(%rip), %xmm0, %xmm0 +; AVX512F-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 +; AVX512F-NEXT: vmovq %xmm0, (%rdi) +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: trunc_usat_v8i16_v8i8_store: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vpminuw {{.*}}(%rip), %xmm0, %xmm0 +; AVX512VL-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 +; AVX512VL-NEXT: vmovq %xmm0, (%rdi) +; AVX512VL-NEXT: retq +; +; AVX512BW-LABEL: trunc_usat_v8i16_v8i8_store: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: vpminuw {{.*}}(%rip), %xmm0, %xmm0 +; AVX512BW-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 +; AVX512BW-NEXT: vmovq %xmm0, (%rdi) +; AVX512BW-NEXT: retq +; +; AVX512BWVL-LABEL: trunc_usat_v8i16_v8i8_store: +; AVX512BWVL: # %bb.0: +; AVX512BWVL-NEXT: vpmovuswb %xmm0, (%rdi) +; AVX512BWVL-NEXT: retq + %1 = icmp ult <8 x i16> %a0, + %2 = select <8 x i1> %1, <8 x i16> %a0, <8 x i16> + %3 = trunc <8 x i16> %2 to <8 x i8> + store <8 x i8> %3, <8 x i8> *%p1 + ret void +} + define <16 x i8> @trunc_usat_v16i16_v16i8(<16 x i16> %a0) { ; SSE2-LABEL: trunc_usat_v16i16_v16i8: ; SSE2: # %bb.0: @@ -2453,3 +3623,211 @@ define <32 x i8> @trunc_usat_v32i16_v32i8(<32 x i16> %a0) { %3 = trunc <32 x i16> %2 to <32 x i8> ret <32 x i8> %3 } + +define <32 x i8> @trunc_usat_v32i32_v32i8(<32 x i32> %a0) { +; SSE2-LABEL: trunc_usat_v32i32_v32i8: +; SSE2: # %bb.0: +; SSE2-NEXT: movdqa %xmm1, %xmm8 +; SSE2-NEXT: movdqa {{.*#+}} xmm10 = [255,255,255,255] +; SSE2-NEXT: movdqa {{.*#+}} xmm11 = [2147483648,2147483648,2147483648,2147483648] +; SSE2-NEXT: movdqa %xmm5, %xmm1 +; SSE2-NEXT: pxor %xmm11, %xmm1 +; SSE2-NEXT: movdqa {{.*#+}} xmm9 = [2147483903,2147483903,2147483903,2147483903] +; SSE2-NEXT: movdqa %xmm9, %xmm12 +; SSE2-NEXT: pcmpgtd %xmm1, %xmm12 +; SSE2-NEXT: pand %xmm12, %xmm5 +; SSE2-NEXT: pandn %xmm10, %xmm12 +; SSE2-NEXT: por %xmm5, %xmm12 +; SSE2-NEXT: movdqa %xmm4, %xmm5 +; SSE2-NEXT: pxor %xmm11, %xmm5 +; SSE2-NEXT: movdqa %xmm9, %xmm1 +; SSE2-NEXT: pcmpgtd %xmm5, %xmm1 +; SSE2-NEXT: pand %xmm1, %xmm4 +; SSE2-NEXT: pandn %xmm10, %xmm1 +; SSE2-NEXT: por %xmm4, %xmm1 +; SSE2-NEXT: packuswb %xmm12, %xmm1 +; SSE2-NEXT: movdqa %xmm7, %xmm4 +; SSE2-NEXT: pxor %xmm11, %xmm4 +; SSE2-NEXT: movdqa %xmm9, %xmm5 +; SSE2-NEXT: pcmpgtd %xmm4, %xmm5 +; SSE2-NEXT: pand %xmm5, %xmm7 +; SSE2-NEXT: pandn %xmm10, %xmm5 +; SSE2-NEXT: por %xmm7, %xmm5 +; SSE2-NEXT: movdqa %xmm6, %xmm4 +; SSE2-NEXT: pxor %xmm11, %xmm4 +; SSE2-NEXT: movdqa %xmm9, %xmm7 +; SSE2-NEXT: pcmpgtd %xmm4, %xmm7 +; SSE2-NEXT: pand %xmm7, %xmm6 +; SSE2-NEXT: pandn %xmm10, %xmm7 +; SSE2-NEXT: por %xmm6, %xmm7 +; SSE2-NEXT: packuswb %xmm5, %xmm7 +; SSE2-NEXT: packuswb %xmm7, %xmm1 +; SSE2-NEXT: movdqa %xmm8, %xmm4 +; SSE2-NEXT: pxor %xmm11, %xmm4 +; SSE2-NEXT: movdqa %xmm9, %xmm5 +; SSE2-NEXT: pcmpgtd %xmm4, %xmm5 +; SSE2-NEXT: pand %xmm5, %xmm8 +; SSE2-NEXT: pandn %xmm10, %xmm5 +; SSE2-NEXT: por %xmm8, %xmm5 +; SSE2-NEXT: movdqa %xmm0, %xmm4 +; SSE2-NEXT: pxor %xmm11, %xmm4 +; SSE2-NEXT: movdqa %xmm9, %xmm6 +; SSE2-NEXT: pcmpgtd %xmm4, %xmm6 +; SSE2-NEXT: pand %xmm6, %xmm0 +; SSE2-NEXT: pandn %xmm10, %xmm6 +; SSE2-NEXT: por %xmm6, %xmm0 +; SSE2-NEXT: packuswb %xmm5, %xmm0 +; SSE2-NEXT: movdqa %xmm3, %xmm4 +; SSE2-NEXT: pxor %xmm11, %xmm4 +; SSE2-NEXT: movdqa %xmm9, %xmm5 +; SSE2-NEXT: pcmpgtd %xmm4, %xmm5 +; SSE2-NEXT: pand %xmm5, %xmm3 +; SSE2-NEXT: pandn %xmm10, %xmm5 +; SSE2-NEXT: por %xmm3, %xmm5 +; SSE2-NEXT: pxor %xmm2, %xmm11 +; SSE2-NEXT: pcmpgtd %xmm11, %xmm9 +; SSE2-NEXT: pand %xmm9, %xmm2 +; SSE2-NEXT: pandn %xmm10, %xmm9 +; SSE2-NEXT: por %xmm2, %xmm9 +; SSE2-NEXT: packuswb %xmm5, %xmm9 +; SSE2-NEXT: packuswb %xmm9, %xmm0 +; SSE2-NEXT: retq +; +; SSSE3-LABEL: trunc_usat_v32i32_v32i8: +; SSSE3: # %bb.0: +; SSSE3-NEXT: movdqa %xmm1, %xmm8 +; SSSE3-NEXT: movdqa {{.*#+}} xmm10 = [255,255,255,255] +; SSSE3-NEXT: movdqa {{.*#+}} xmm11 = [2147483648,2147483648,2147483648,2147483648] +; SSSE3-NEXT: movdqa %xmm5, %xmm1 +; SSSE3-NEXT: pxor %xmm11, %xmm1 +; SSSE3-NEXT: movdqa {{.*#+}} xmm9 = [2147483903,2147483903,2147483903,2147483903] +; SSSE3-NEXT: movdqa %xmm9, %xmm12 +; SSSE3-NEXT: pcmpgtd %xmm1, %xmm12 +; SSSE3-NEXT: pand %xmm12, %xmm5 +; SSSE3-NEXT: pandn %xmm10, %xmm12 +; SSSE3-NEXT: por %xmm5, %xmm12 +; SSSE3-NEXT: movdqa %xmm4, %xmm5 +; SSSE3-NEXT: pxor %xmm11, %xmm5 +; SSSE3-NEXT: movdqa %xmm9, %xmm1 +; SSSE3-NEXT: pcmpgtd %xmm5, %xmm1 +; SSSE3-NEXT: pand %xmm1, %xmm4 +; SSSE3-NEXT: pandn %xmm10, %xmm1 +; SSSE3-NEXT: por %xmm4, %xmm1 +; SSSE3-NEXT: packuswb %xmm12, %xmm1 +; SSSE3-NEXT: movdqa %xmm7, %xmm4 +; SSSE3-NEXT: pxor %xmm11, %xmm4 +; SSSE3-NEXT: movdqa %xmm9, %xmm5 +; SSSE3-NEXT: pcmpgtd %xmm4, %xmm5 +; SSSE3-NEXT: pand %xmm5, %xmm7 +; SSSE3-NEXT: pandn %xmm10, %xmm5 +; SSSE3-NEXT: por %xmm7, %xmm5 +; SSSE3-NEXT: movdqa %xmm6, %xmm4 +; SSSE3-NEXT: pxor %xmm11, %xmm4 +; SSSE3-NEXT: movdqa %xmm9, %xmm7 +; SSSE3-NEXT: pcmpgtd %xmm4, %xmm7 +; SSSE3-NEXT: pand %xmm7, %xmm6 +; SSSE3-NEXT: pandn %xmm10, %xmm7 +; SSSE3-NEXT: por %xmm6, %xmm7 +; SSSE3-NEXT: packuswb %xmm5, %xmm7 +; SSSE3-NEXT: packuswb %xmm7, %xmm1 +; SSSE3-NEXT: movdqa %xmm8, %xmm4 +; SSSE3-NEXT: pxor %xmm11, %xmm4 +; SSSE3-NEXT: movdqa %xmm9, %xmm5 +; SSSE3-NEXT: pcmpgtd %xmm4, %xmm5 +; SSSE3-NEXT: pand %xmm5, %xmm8 +; SSSE3-NEXT: pandn %xmm10, %xmm5 +; SSSE3-NEXT: por %xmm8, %xmm5 +; SSSE3-NEXT: movdqa %xmm0, %xmm4 +; SSSE3-NEXT: pxor %xmm11, %xmm4 +; SSSE3-NEXT: movdqa %xmm9, %xmm6 +; SSSE3-NEXT: pcmpgtd %xmm4, %xmm6 +; SSSE3-NEXT: pand %xmm6, %xmm0 +; SSSE3-NEXT: pandn %xmm10, %xmm6 +; SSSE3-NEXT: por %xmm6, %xmm0 +; SSSE3-NEXT: packuswb %xmm5, %xmm0 +; SSSE3-NEXT: movdqa %xmm3, %xmm4 +; SSSE3-NEXT: pxor %xmm11, %xmm4 +; SSSE3-NEXT: movdqa %xmm9, %xmm5 +; SSSE3-NEXT: pcmpgtd %xmm4, %xmm5 +; SSSE3-NEXT: pand %xmm5, %xmm3 +; SSSE3-NEXT: pandn %xmm10, %xmm5 +; SSSE3-NEXT: por %xmm3, %xmm5 +; SSSE3-NEXT: pxor %xmm2, %xmm11 +; SSSE3-NEXT: pcmpgtd %xmm11, %xmm9 +; SSSE3-NEXT: pand %xmm9, %xmm2 +; SSSE3-NEXT: pandn %xmm10, %xmm9 +; SSSE3-NEXT: por %xmm2, %xmm9 +; SSSE3-NEXT: packuswb %xmm5, %xmm9 +; SSSE3-NEXT: packuswb %xmm9, %xmm0 +; SSSE3-NEXT: retq +; +; SSE41-LABEL: trunc_usat_v32i32_v32i8: +; SSE41: # %bb.0: +; SSE41-NEXT: movdqa {{.*#+}} xmm8 = [255,255,255,255] +; SSE41-NEXT: pminud %xmm8, %xmm5 +; SSE41-NEXT: pminud %xmm8, %xmm4 +; SSE41-NEXT: packusdw %xmm5, %xmm4 +; SSE41-NEXT: pminud %xmm8, %xmm7 +; SSE41-NEXT: pminud %xmm8, %xmm6 +; SSE41-NEXT: packusdw %xmm7, %xmm6 +; SSE41-NEXT: packuswb %xmm6, %xmm4 +; SSE41-NEXT: pminud %xmm8, %xmm1 +; SSE41-NEXT: pminud %xmm8, %xmm0 +; SSE41-NEXT: packusdw %xmm1, %xmm0 +; SSE41-NEXT: pminud %xmm8, %xmm3 +; SSE41-NEXT: pminud %xmm8, %xmm2 +; SSE41-NEXT: packusdw %xmm3, %xmm2 +; SSE41-NEXT: packuswb %xmm2, %xmm0 +; SSE41-NEXT: movdqa %xmm4, %xmm1 +; SSE41-NEXT: retq +; +; AVX1-LABEL: trunc_usat_v32i32_v32i8: +; AVX1: # %bb.0: +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [255,255,255,255] +; AVX1-NEXT: vpminud %xmm5, %xmm4, %xmm4 +; AVX1-NEXT: vpminud %xmm5, %xmm0, %xmm0 +; AVX1-NEXT: vpackusdw %xmm4, %xmm0, %xmm0 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4 +; AVX1-NEXT: vpminud %xmm5, %xmm4, %xmm4 +; AVX1-NEXT: vpminud %xmm5, %xmm1, %xmm1 +; AVX1-NEXT: vpackusdw %xmm4, %xmm1, %xmm1 +; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm1 +; AVX1-NEXT: vpminud %xmm5, %xmm1, %xmm1 +; AVX1-NEXT: vpminud %xmm5, %xmm2, %xmm2 +; AVX1-NEXT: vpackusdw %xmm1, %xmm2, %xmm1 +; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm2 +; AVX1-NEXT: vpminud %xmm5, %xmm2, %xmm2 +; AVX1-NEXT: vpminud %xmm5, %xmm3, %xmm3 +; AVX1-NEXT: vpackusdw %xmm2, %xmm3, %xmm2 +; AVX1-NEXT: vpackuswb %xmm2, %xmm1, %xmm1 +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: trunc_usat_v32i32_v32i8: +; AVX2: # %bb.0: +; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255] +; AVX2-NEXT: vpminud %ymm4, %ymm1, %ymm1 +; AVX2-NEXT: vpminud %ymm4, %ymm0, %ymm0 +; AVX2-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpminud %ymm4, %ymm3, %ymm1 +; AVX2-NEXT: vpminud %ymm4, %ymm2, %ymm2 +; AVX2-NEXT: vpackusdw %ymm1, %ymm2, %ymm1 +; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,1,3] +; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] +; AVX2-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] +; AVX2-NEXT: retq +; +; AVX512-LABEL: trunc_usat_v32i32_v32i8: +; AVX512: # %bb.0: +; AVX512-NEXT: vpmovusdb %zmm0, %xmm0 +; AVX512-NEXT: vpmovusdb %zmm1, %xmm1 +; AVX512-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX512-NEXT: retq + %1 = icmp ult <32 x i32> %a0, + %2 = select <32 x i1> %1, <32 x i32> %a0, <32 x i32> + %3 = trunc <32 x i32> %2 to <32 x i8> + ret <32 x i8> %3 +} diff --git a/llvm/test/DebugInfo/COFF/line-zero.ll b/llvm/test/DebugInfo/COFF/line-zero.ll new file mode 100644 index 0000000000000..3d34345645186 --- /dev/null +++ b/llvm/test/DebugInfo/COFF/line-zero.ll @@ -0,0 +1,77 @@ +; RUN: llc < %s | FileCheck %s + +; C++ source to regenerate: +; int main() { +; volatile int x; +; x = 1; +; #line 0 +; x = 2; +; #line 7 +; x = 3; +; } + + +; CHECK-LABEL: main: # @main +; CHECK: .cv_loc 0 1 1 0 # t.cpp:1:0 +; CHECK: .cv_loc 0 1 3 0 # t.cpp:3:0 +; CHECK: movl $1, 4(%rsp) +; CHECK-NOT: .cv_loc {{.*}} t.cpp:0:0 +; CHECK: movl $2, 4(%rsp) +; CHECK: .cv_loc 0 1 7 0 # t.cpp:7:0 +; CHECK: movl $3, 4(%rsp) +; CHECK: .cv_loc 0 1 8 0 # t.cpp:8:0 +; CHECK: xorl %eax, %eax +; CHECK: retq + +; ModuleID = 't.cpp' +source_filename = "t.cpp" +target datalayout = "e-m:w-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-pc-windows-msvc19.22.27905" + +; Function Attrs: norecurse nounwind uwtable +define dso_local i32 @main() local_unnamed_addr #0 !dbg !8 { +entry: + %x = alloca i32, align 4 + %x.0.x.0..sroa_cast = bitcast i32* %x to i8*, !dbg !15 + call void @llvm.dbg.declare(metadata i32* %x, metadata !13, metadata !DIExpression()), !dbg !15 + store volatile i32 1, i32* %x, align 4, !dbg !16, !tbaa !17 + store volatile i32 2, i32* %x, align 4, !dbg !21, !tbaa !17 + store volatile i32 3, i32* %x, align 4, !dbg !22, !tbaa !17 + ret i32 0, !dbg !23 +} + +; Function Attrs: nounwind readnone speculatable willreturn +declare void @llvm.dbg.declare(metadata, metadata, metadata) #2 + +attributes #0 = { norecurse nounwind uwtable } +attributes #1 = { argmemonly nounwind willreturn } +attributes #2 = { nounwind readnone speculatable willreturn } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4, !5, !6} +!llvm.ident = !{!7} + +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, nameTableKind: None) +!1 = !DIFile(filename: "t.cpp", directory: "C:\5Csrc\5Cllvm-project\5Cbuild", checksumkind: CSK_MD5, checksum: "8b6d53b166e6fa660f115eff7beedf3b") +!2 = !{} +!3 = !{i32 2, !"CodeView", i32 1} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = !{i32 1, !"wchar_size", i32 2} +!6 = !{i32 7, !"PIC Level", i32 2} +!7 = !{!"clang version 10.0.0"} +!8 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 1, type: !9, scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !12) +!9 = !DISubroutineType(types: !10) +!10 = !{!11} +!11 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!12 = !{!13} +!13 = !DILocalVariable(name: "x", scope: !8, file: !1, line: 2, type: !14) +!14 = !DIDerivedType(tag: DW_TAG_volatile_type, baseType: !11) +!15 = !DILocation(line: 2, scope: !8) +!16 = !DILocation(line: 3, scope: !8) +!17 = !{!18, !18, i64 0} +!18 = !{!"int", !19, i64 0} +!19 = !{!"omnipotent char", !20, i64 0} +!20 = !{!"Simple C++ TBAA"} +!21 = !DILocation(line: 0, scope: !8) +!22 = !DILocation(line: 7, scope: !8) +!23 = !DILocation(line: 8, scope: !8) diff --git a/llvm/test/DebugInfo/COFF/local-variables.ll b/llvm/test/DebugInfo/COFF/local-variables.ll index e15152050d811..5e96160f189bc 100644 --- a/llvm/test/DebugInfo/COFF/local-variables.ll +++ b/llvm/test/DebugInfo/COFF/local-variables.ll @@ -60,7 +60,7 @@ ; ASM: leaq 36(%rsp), %rcx ; ASM: [[else_end:\.Ltmp.*]]: ; ASM: .LBB0_3: # %if.end -; ASM: .cv_loc 0 1 0 0 # t.cpp:0:0 +; ASM: .cv_loc 0 1 17 1 # t.cpp:17:1 ; ASM: callq capture ; ASM: nop ; ASM: addq $56, %rsp diff --git a/llvm/test/DebugInfo/WebAssembly/dbg-value-move-clone.mir b/llvm/test/DebugInfo/WebAssembly/dbg-value-move-clone.mir index 1e10c136873c8..a8ca1a97029ff 100644 --- a/llvm/test/DebugInfo/WebAssembly/dbg-value-move-clone.mir +++ b/llvm/test/DebugInfo/WebAssembly/dbg-value-move-clone.mir @@ -60,6 +60,6 @@ body: | bb.1: CALL_VOID @foo, %1:i32, implicit-def dead $arguments, implicit $sp32, implicit $sp64 CALL_VOID @foo, %1:i32, implicit-def dead $arguments, implicit $sp32, implicit $sp64 - RETURN_VOID implicit-def dead $arguments + RETURN implicit-def dead $arguments ... diff --git a/llvm/test/DebugInfo/WebAssembly/dbg-value-move-reg-stackify.mir b/llvm/test/DebugInfo/WebAssembly/dbg-value-move-reg-stackify.mir index 627c99d5ddb77..af36dc12d2e2e 100644 --- a/llvm/test/DebugInfo/WebAssembly/dbg-value-move-reg-stackify.mir +++ b/llvm/test/DebugInfo/WebAssembly/dbg-value-move-reg-stackify.mir @@ -55,6 +55,6 @@ body: | %1:i32 = CALL_i32 @bar, implicit-def dead $arguments, implicit $sp32, implicit $sp64 DBG_VALUE %1:i32, $noreg, !12, !DIExpression(), debug-location !15; :357:12 line no:357 CALL_VOID @foo, %1:i32, implicit-def dead $arguments, implicit $sp32, implicit $sp64 - RETURN_VOID implicit-def dead $arguments + RETURN implicit-def dead $arguments ... diff --git a/llvm/test/DebugInfo/X86/dwarfdump-debug-loclists.test b/llvm/test/DebugInfo/X86/dwarfdump-debug-loclists.test index 32f2482b5117c..41893d32690a0 100644 --- a/llvm/test/DebugInfo/X86/dwarfdump-debug-loclists.test +++ b/llvm/test/DebugInfo/X86/dwarfdump-debug-loclists.test @@ -10,7 +10,7 @@ # CHECK: .debug_loclists contents: # CHECK-NEXT: 0x00000000: locations list header: length = 0x0000002c, version = 0x0005, addr_size = 0x08, seg_size = 0x00, offset_entry_count = 0x00000000 -# CHECK-NEXT: 0x00000000: +# CHECK-NEXT: 0x0000000c: # CHECK-NEXT: [0x0000000000000000, 0x0000000000000010): DW_OP_breg5 RDI+0 # CHECK-NEXT: [0x0000000000000530, 0x0000000000000540): DW_OP_breg6 RBP-8, DW_OP_deref # CHECK-NEXT: [0x0000000000000700, 0x0000000000000710): DW_OP_breg5 RDI+0 diff --git a/llvm/test/ExecutionEngine/JITLink/AArch64/MachO_Arm64_relocations.s b/llvm/test/ExecutionEngine/JITLink/AArch64/MachO_Arm64_relocations.s new file mode 100644 index 0000000000000..b72e3b9d3a1dd --- /dev/null +++ b/llvm/test/ExecutionEngine/JITLink/AArch64/MachO_Arm64_relocations.s @@ -0,0 +1,339 @@ +# RUN: rm -rf %t && mkdir -p %t +# RUN: llvm-mc -triple=arm64-apple-darwin19 -filetype=obj -o %t/macho_reloc.o %s +# RUN: llvm-jitlink -noexec -define-abs external_data=0xdeadbeef -define-abs external_func=0xcafef00d -check=%s %t/macho_reloc.o + + .section __TEXT,__text,regular,pure_instructions + + .p2align 2 +Lanon_func: + ret + + .globl named_func + .p2align 2 +named_func: + ret + +# Check ARM64_RELOC_BRANCH26 handling with a call to a local function. +# The branch instruction only encodes 26 bits of the 28-bit possible branch +# range, since the low 2 bits will always be zero. +# +# jitlink-check: decode_operand(test_local_call, 0)[25:0] = (named_func - test_local_call)[27:2] + .globl test_local_call + .p2align 2 +test_local_call: + bl named_func + + .globl _main + .p2align 2 +_main: + ret + +# Check ARM64_RELOC_GOTPAGE21 / ARM64_RELOC_GOTPAGEOFF12 handling with a +# reference to an external symbol. Validate both the reference to the GOT entry, +# and also the content of the GOT entry. +# +# For the GOTPAGE21/ADRP instruction we have the 21-bit delta to the 4k page +# containing the GOT entry for external_data. +# +# For the GOTPAGEOFF/LDR instruction we have the 12-bit offset of the entry +# within the page. +# +# jitlink-check: *{8}(got_addr(macho_reloc.o, external_data)) = external_data +# jitlink-check: decode_operand(test_gotpage21, 1) = (got_addr(macho_reloc.o, external_data)[32:12] - test_gotpage21[32:12]) +# jitlink-check: decode_operand(test_gotpageoff12, 2) = got_addr(macho_reloc.o, external_data)[11:3] + .globl test_gotpage21 + .p2align 2 +test_gotpage21: + adrp x0, external_data@GOTPAGE + .globl test_gotpageoff12 +test_gotpageoff12: + ldr x0, [x0, external_data@GOTPAGEOFF] + +# Check ARM64_RELOC_PAGE21 / ARM64_RELOC_PAGEOFF12 handling with a reference to +# a local symbol. +# +# For the PAGE21/ADRP instruction we have the 21-bit delta to the 4k page +# containing the global. +# +# For the GOTPAGEOFF12 relocation we test the ADD instruction, all LDR/GPR +# variants and all LDR/Neon variants. +# +# jitlink-check: decode_operand(test_page21, 1) = (named_data[32:12] - test_page21[32:12]) +# jitlink-check: decode_operand(test_pageoff12add, 2) = named_data[11:0] +# jitlink-check: decode_operand(test_pageoff12gpr8, 2) = named_data[11:0] +# jitlink-check: decode_operand(test_pageoff12gpr16, 2) = named_data[11:1] +# jitlink-check: decode_operand(test_pageoff12gpr32, 2) = named_data[11:2] +# jitlink-check: decode_operand(test_pageoff12gpr64, 2) = named_data[11:3] +# jitlink-check: decode_operand(test_pageoff12neon8, 2) = named_data[11:0] +# jitlink-check: decode_operand(test_pageoff12neon16, 2) = named_data[11:1] +# jitlink-check: decode_operand(test_pageoff12neon32, 2) = named_data[11:2] +# jitlink-check: decode_operand(test_pageoff12neon64, 2) = named_data[11:3] +# jitlink-check: decode_operand(test_pageoff12neon128, 2) = named_data[11:4] + .globl test_page21 + .p2align 2 +test_page21: + adrp x0, named_data@PAGE + + .globl test_pageoff12add +test_pageoff12add: + add x0, x0, named_data@PAGEOFF + + .globl test_pageoff12gpr8 +test_pageoff12gpr8: + ldrb w0, [x0, named_data@PAGEOFF] + + .globl test_pageoff12gpr16 +test_pageoff12gpr16: + ldrh w0, [x0, named_data@PAGEOFF] + + .globl test_pageoff12gpr32 +test_pageoff12gpr32: + ldr w0, [x0, named_data@PAGEOFF] + + .globl test_pageoff12gpr64 +test_pageoff12gpr64: + ldr x0, [x0, named_data@PAGEOFF] + + .globl test_pageoff12neon8 +test_pageoff12neon8: + ldr b0, [x0, named_data@PAGEOFF] + + .globl test_pageoff12neon16 +test_pageoff12neon16: + ldr h0, [x0, named_data@PAGEOFF] + + .globl test_pageoff12neon32 +test_pageoff12neon32: + ldr s0, [x0, named_data@PAGEOFF] + + .globl test_pageoff12neon64 +test_pageoff12neon64: + ldr d0, [x0, named_data@PAGEOFF] + + .globl test_pageoff12neon128 +test_pageoff12neon128: + ldr q0, [x0, named_data@PAGEOFF] + +# Check that calls to external functions trigger the generation of stubs and GOT +# entries. +# +# jitlink-check: decode_operand(test_external_call, 0) = (stub_addr(macho_reloc.o, external_func) - test_external_call)[27:2] +# jitlink-check: *{8}(got_addr(macho_reloc.o, external_func)) = external_func + .globl test_external_call + .p2align 2 +test_external_call: + bl external_func + + .section __DATA,__data + +# Storage target for non-extern ARM64_RELOC_SUBTRACTOR relocs. + .p2align 3 +Lanon_data: + .quad 0x1111111111111111 + +# Check ARM64_RELOC_SUBTRACTOR Quad/Long in anonymous storage with anonymous +# minuend: "LA: .quad LA - B + C". The anonymous subtrahend form +# "LA: .quad B - LA + C" is not tested as subtrahends are not permitted to be +# anonymous. +# +# Note: +8 offset in expression below to accounts for sizeof(Lanon_data). +# jitlink-check: *{8}(section_addr(macho_reloc.o, __data) + 8) = (section_addr(macho_reloc.o, __data) + 8) - named_data + 2 + .p2align 3 +Lanon_minuend_quad: + .quad Lanon_minuend_quad - named_data + 2 + +# Note: +16 offset in expression below to accounts for sizeof(Lanon_data) + sizeof(Lanon_minuend_long). +# jitlink-check: *{4}(section_addr(macho_reloc.o, __data) + 16) = ((section_addr(macho_reloc.o, __data) + 16) - named_data + 2)[31:0] + .p2align 2 +Lanon_minuend_long: + .long Lanon_minuend_long - named_data + 2 + +# Named quad storage target (first named atom in __data). +# Align to 16 for use as 128-bit load target. + .globl named_data + .p2align 4 +named_data: + .quad 0x2222222222222222 + .quad 0x3333333333333333 + +# An alt-entry point for named_data + .globl named_data_alt_entry + .p2align 3 + .alt_entry named_data_alt_entry +named_data_alt_entry: + .quad 0 + +# Check ARM64_RELOC_UNSIGNED / quad / extern handling by putting the address of +# a local named function into a quad symbol. +# +# jitlink-check: *{8}named_func_addr_quad = named_func + .globl named_func_addr_quad + .p2align 3 +named_func_addr_quad: + .quad named_func + +# Check ARM64_RELOC_UNSIGNED / quad / non-extern handling by putting the +# address of a local anonymous function into a quad symbol. +# +# jitlink-check: *{8}anon_func_addr_quad = section_addr(macho_reloc.o, __text) + .globl anon_func_addr_quad + .p2align 3 +anon_func_addr_quad: + .quad Lanon_func + +# ARM64_RELOC_SUBTRACTOR Quad/Long in named storage with anonymous minuend +# +# jitlink-check: *{8}anon_minuend_quad1 = section_addr(macho_reloc.o, __data) - anon_minuend_quad1 + 2 +# Only the form "B: .quad LA - B + C" is tested. The form "B: .quad B - LA + C" is +# invalid because the subtrahend can not be local. + .globl anon_minuend_quad1 + .p2align 3 +anon_minuend_quad1: + .quad Lanon_data - anon_minuend_quad1 + 2 + +# jitlink-check: *{4}anon_minuend_long1 = (section_addr(macho_reloc.o, __data) - anon_minuend_long1 + 2)[31:0] + .globl anon_minuend_long1 + .p2align 2 +anon_minuend_long1: + .long Lanon_data - anon_minuend_long1 + 2 + +# Check ARM64_RELOC_SUBTRACTOR Quad/Long in named storage with minuend and subtrahend. +# Both forms "A: .quad A - B + C" and "A: .quad B - A + C" are tested. +# +# Check "A: .quad B - A + C". +# jitlink-check: *{8}subtrahend_quad2 = (named_data - subtrahend_quad2 - 2) + .globl subtrahend_quad2 + .p2align 3 +subtrahend_quad2: + .quad named_data - subtrahend_quad2 - 2 + +# Check "A: .long B - A + C". +# jitlink-check: *{4}subtrahend_long2 = (named_data - subtrahend_long2 - 2)[31:0] + .globl subtrahend_long2 + .p2align 2 +subtrahend_long2: + .long named_data - subtrahend_long2 - 2 + +# Check "A: .quad A - B + C". +# jitlink-check: *{8}minuend_quad3 = (minuend_quad3 - named_data - 2) + .globl minuend_quad3 + .p2align 3 +minuend_quad3: + .quad minuend_quad3 - named_data - 2 + +# Check "A: .long B - A + C". +# jitlink-check: *{4}minuend_long3 = (minuend_long3 - named_data - 2)[31:0] + .globl minuend_long3 + .p2align 2 +minuend_long3: + .long minuend_long3 - named_data - 2 + +# Check ARM64_RELOC_SUBTRACTOR handling for exprs of the form +# "A: .quad/long B - C + D", where 'B' or 'C' is at a fixed offset from 'A' +# (i.e. is part of an alt_entry chain that includes 'A'). +# +# Check "A: .long B - C + D" where 'B' is an alt_entry for 'A'. +# jitlink-check: *{4}subtractor_with_alt_entry_minuend_long = (subtractor_with_alt_entry_minuend_long_B - named_data + 2)[31:0] + .globl subtractor_with_alt_entry_minuend_long + .p2align 2 +subtractor_with_alt_entry_minuend_long: + .long subtractor_with_alt_entry_minuend_long_B - named_data + 2 + + .globl subtractor_with_alt_entry_minuend_long_B + .p2align 2 + .alt_entry subtractor_with_alt_entry_minuend_long_B +subtractor_with_alt_entry_minuend_long_B: + .long 0 + +# Check "A: .quad B - C + D" where 'B' is an alt_entry for 'A'. +# jitlink-check: *{8}subtractor_with_alt_entry_minuend_quad = (subtractor_with_alt_entry_minuend_quad_B - named_data + 2) + .globl subtractor_with_alt_entry_minuend_quad + .p2align 3 +subtractor_with_alt_entry_minuend_quad: + .quad subtractor_with_alt_entry_minuend_quad_B - named_data + 2 + + .globl subtractor_with_alt_entry_minuend_quad_B + .p2align 3 + .alt_entry subtractor_with_alt_entry_minuend_quad_B +subtractor_with_alt_entry_minuend_quad_B: + .quad 0 + +# Check "A: .long B - C + D" where 'C' is an alt_entry for 'A'. +# jitlink-check: *{4}subtractor_with_alt_entry_subtrahend_long = (named_data - subtractor_with_alt_entry_subtrahend_long_B + 2)[31:0] + .globl subtractor_with_alt_entry_subtrahend_long + .p2align 2 +subtractor_with_alt_entry_subtrahend_long: + .long named_data - subtractor_with_alt_entry_subtrahend_long_B + 2 + + .globl subtractor_with_alt_entry_subtrahend_long_B + .p2align 2 + .alt_entry subtractor_with_alt_entry_subtrahend_long_B +subtractor_with_alt_entry_subtrahend_long_B: + .long 0 + +# Check "A: .quad B - C + D" where 'B' is an alt_entry for 'A'. +# jitlink-check: *{8}subtractor_with_alt_entry_subtrahend_quad = (named_data - subtractor_with_alt_entry_subtrahend_quad_B + 2) + .globl subtractor_with_alt_entry_subtrahend_quad + .p2align 3 +subtractor_with_alt_entry_subtrahend_quad: + .quad named_data - subtractor_with_alt_entry_subtrahend_quad_B + 2 + + .globl subtractor_with_alt_entry_subtrahend_quad_B + .p2align 3 + .alt_entry subtractor_with_alt_entry_subtrahend_quad_B +subtractor_with_alt_entry_subtrahend_quad_B: + .quad 0 + +# Check ARM64_POINTER_TO_GOT handling. +# ARM64_POINTER_TO_GOT is a delta-32 to a GOT entry. +# +# jitlink-check: *{4}test_got = (got_addr(macho_reloc.o, external_data) - test_got)[31:0] + .globl test_got + .p2align 2 +test_got: + .long external_data@got - . + +# Check that unreferenced atoms in no-dead-strip sections are not dead stripped. +# We need to use a local symbol for this as any named symbol will end up in the +# ORC responsibility set, which is automatically marked live and would couse +# spurious passes. +# +# jitlink-check: *{8}section_addr(macho_reloc.o, __nds_test_sect) = 0 + .section __DATA,__nds_test_sect,regular,no_dead_strip + .quad 0 + +# Check that unreferenced local symbols that have been marked no-dead-strip are +# not dead-striped. +# +# jitlink-check: *{8}section_addr(macho_reloc.o, __nds_test_nlst) = 0 + .section __DATA,__nds_test_nlst,regular + .no_dead_strip no_dead_strip_test_symbol +no_dead_strip_test_symbol: + .quad 0 + +# Check that explicit zero-fill symbols are supported +# jitlink-check: *{8}zero_fill_test = 0 + .globl zero_fill_test +.zerofill __DATA,__zero_fill_test,zero_fill_test,8,3 + +# Check that section alignments are respected. +# We test this by introducing two segments with alignment 8, each containing one +# byte of data. We require both symbols to have an aligned address. +# +# jitlink-check: section_alignment_check1[2:0] = 0 +# jitlink-check: section_alignment_check2[2:0] = 0 + .section __DATA,__sec_align_chk1 + .p2align 3 + + .globl section_alignment_check1 +section_alignment_check1: + .byte 0 + + .section __DATA,__sec_align_chk2 + .p2align 3 + + .globl section_alignment_check2 +section_alignment_check2: + .byte 0 + +.subsections_via_symbols diff --git a/llvm/test/ExecutionEngine/JITLink/AArch64/lit.local.cfg b/llvm/test/ExecutionEngine/JITLink/AArch64/lit.local.cfg new file mode 100644 index 0000000000000..7cfadc4db8619 --- /dev/null +++ b/llvm/test/ExecutionEngine/JITLink/AArch64/lit.local.cfg @@ -0,0 +1,2 @@ +# if not 'AArch64' in config.root.targets: +config.unsupported = True diff --git a/llvm/test/ExecutionEngine/JITLink/X86/MachO_x86-64_relocations.s b/llvm/test/ExecutionEngine/JITLink/X86/MachO_x86-64_relocations.s index f4498bfdba128..c97b1ecce6d6d 100644 --- a/llvm/test/ExecutionEngine/JITLink/X86/MachO_x86-64_relocations.s +++ b/llvm/test/ExecutionEngine/JITLink/X86/MachO_x86-64_relocations.s @@ -129,18 +129,6 @@ Lanon_minuend_quad: Lanon_minuend_long: .long Lanon_minuend_long - named_data + 2 -# Check X86_64_RELOC_GOT handling. -# X86_64_RELOC_GOT is the data-section counterpart to X86_64_RELOC_GOTLD. It is -# handled exactly the same way, including having an implicit PC-rel offset of -4 -# (despite this not making sense in a data section, and requiring an explicit -# +4 addend to cancel it out and get the correct result). -# -# jitlink-check: *{4}test_got = (got_addr(macho_reloc.o, external_data) - test_got)[31:0] - .globl test_got - .p2align 2 -test_got: - .long external_data@GOTPCREL + 4 - # Named quad storage target (first named atom in __data). .globl named_data .p2align 3 @@ -284,6 +272,18 @@ subtractor_with_alt_entry_subtrahend_quad: subtractor_with_alt_entry_subtrahend_quad_B: .quad 0 +# Check X86_64_RELOC_GOT handling. +# X86_64_RELOC_GOT is the data-section counterpart to X86_64_RELOC_GOTLD. It is +# handled exactly the same way, including having an implicit PC-rel offset of -4 +# (despite this not making sense in a data section, and requiring an explicit +# +4 addend to cancel it out and get the correct result). +# +# jitlink-check: *{4}test_got = (got_addr(macho_reloc.o, external_data) - test_got)[31:0] + .globl test_got + .p2align 2 +test_got: + .long external_data@GOTPCREL + 4 + # Check that unreferenced atoms in no-dead-strip sections are not dead stripped. # We need to use a local symbol for this as any named symbol will end up in the # ORC responsibility set, which is automatically marked live and would couse diff --git a/llvm/test/FileCheck/check-ignore-case.txt b/llvm/test/FileCheck/check-ignore-case.txt new file mode 100644 index 0000000000000..6a42a52fc4490 --- /dev/null +++ b/llvm/test/FileCheck/check-ignore-case.txt @@ -0,0 +1,45 @@ +## Check that a full line is matched case insensitively. +# RUN: FileCheck --ignore-case --match-full-lines --check-prefix=FULL --input-file=%s %s + +## Check that a regular expression matches case insensitively. +# RUN: FileCheck --ignore-case --check-prefix=REGEX --input-file=%s %s + +## Check that a pattern from command line matches case insensitively. +# RUN: FileCheck --ignore-case --check-prefix=PAT --DPATTERN="THIS is the" --input-file=%s %s + +## Check that COUNT and NEXT work case insensitively. +# RUN: FileCheck --ignore-case --check-prefix=CNT --input-file=%s %s + +## Check that match on same line works case insensitively. +# RUN: FileCheck --ignore-case --check-prefix=LINE --input-file=%s %s + +## Check that option --implicit-not works case insensitively. +# RUN: sed '/^#/d' %s | FileCheck --implicit-check-not=sTrInG %s +# RUN: sed '/^#/d' %s | not FileCheck --ignore-case --implicit-check-not=sTrInG %s 2>&1 | FileCheck --check-prefix=ERROR %s + +this is the STRING to be matched + +# FULL: tHis iS The String TO be matched +# REGEX: s{{TRing}} +# PAT: [[PATTERN]] string + +Loop 1 +lOop 2 +loOp 3 +looP 4 +loop 5 +LOOP 6 +BREAK + +# CNT-COUNT-6: LOop {{[0-9]}} +# CNT-NOT: loop +# CNT-NEXT: break + +One Line To Match + +# LINE: {{o}}ne line +# LINE-SAME: {{t}}o match + +# ERROR: command line:1:{{[0-9]+}}: error: CHECK-NOT: excluded string found in input +# ERROR-NEXT: -implicit-check-not='sTrInG' +# ERROR: note: found here diff --git a/llvm/test/Instrumentation/MemorySanitizer/msan_basic.ll b/llvm/test/Instrumentation/MemorySanitizer/msan_basic.ll index 499de14aea39a..f56233d13420c 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/msan_basic.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/msan_basic.ll @@ -1,10 +1,9 @@ -; RUN: opt < %s -msan-check-access-address=0 -S -passes=msan 2>&1 | FileCheck \ -; RUN: -allow-deprecated-dag-overlap %s -; RUN: opt < %s -msan -msan-check-access-address=0 -S | FileCheck -allow-deprecated-dag-overlap %s -; RUN: opt < %s -msan-check-access-address=0 -msan-track-origins=1 -S \ -; RUN: -passes=msan 2>&1 | FileCheck -allow-deprecated-dag-overlap \ -; RUN: -check-prefix=CHECK -check-prefix=CHECK-ORIGINS %s -; RUN: opt < %s -msan -msan-check-access-address=0 -msan-track-origins=1 -S | FileCheck -allow-deprecated-dag-overlap -check-prefix=CHECK -check-prefix=CHECK-ORIGINS %s +; RUN: opt < %s -msan-check-access-address=0 -S -passes='module(msan-module),function(msan)' 2>&1 | FileCheck -allow-deprecated-dag-overlap %s +; RUN: opt < %s --passes='module(msan-module),function(msan)' -msan-check-access-address=0 -S | FileCheck -allow-deprecated-dag-overlap %s +; RUN: opt < %s -msan-check-access-address=0 -msan-track-origins=1 -S -passes='module(msan-module),function(msan)' 2>&1 | \ +; RUN: FileCheck -allow-deprecated-dag-overlap -check-prefixes=CHECK,CHECK-ORIGINS %s +; RUN: opt < %s -passes='module(msan-module),function(msan)' -msan-check-access-address=0 -msan-track-origins=1 -S | \ +; RUN: FileCheck -allow-deprecated-dag-overlap -check-prefixes=CHECK,CHECK-ORIGINS %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Instrumentation/ThreadSanitizer/tsan_basic.ll b/llvm/test/Instrumentation/ThreadSanitizer/tsan_basic.ll index 8b85d7b8bddc7..953ab8ed8dc50 100644 --- a/llvm/test/Instrumentation/ThreadSanitizer/tsan_basic.ll +++ b/llvm/test/Instrumentation/ThreadSanitizer/tsan_basic.ll @@ -1,5 +1,5 @@ ; RUN: opt < %s -tsan -S | FileCheck %s -; RUN: opt < %s -passes=tsan -S | FileCheck %s +; RUN: opt < %s -passes='function(tsan),module(tsan-module)' -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/LTO/ARM/lto-linking-metadata.ll b/llvm/test/LTO/ARM/lto-linking-metadata.ll new file mode 100644 index 0000000000000..ae6f42ff9be82 --- /dev/null +++ b/llvm/test/LTO/ARM/lto-linking-metadata.ll @@ -0,0 +1,19 @@ +; RUN: opt %s -o %t1.bc + +; RUN: llvm-lto %t1.bc -o %t1.save.opt -save-merged-module -O1 --exported-symbol=foo +; RUN: llvm-dis < %t1.save.opt.merged.bc | FileCheck %s + +; RUN: llvm-lto2 run %t1.bc -o %t.out.o -save-temps \ +; RUN: -r=%t1.bc,foo,pxl +; RUN: llvm-dis < %t.out.o.0.2.internalize.bc | FileCheck %s + +target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" +target triple = "armv7a-unknown-linux" + +define void @foo() { +entry: + ret void +} + +; CHECK: !llvm.module.flags = !{[[MD_NUM:![0-9]+]]} +; CHECK: [[MD_NUM]] = !{i32 1, !"LTOPostLink", i32 1} diff --git a/llvm/test/TableGen/InvalidMCSchedClassDesc.td b/llvm/test/TableGen/InvalidMCSchedClassDesc.td new file mode 100644 index 0000000000000..f1b4ed00a6a8a --- /dev/null +++ b/llvm/test/TableGen/InvalidMCSchedClassDesc.td @@ -0,0 +1,47 @@ +// RUN: llvm-tblgen -gen-subtarget -I %p/../../include %s 2>&1 | FileCheck %s +// Check if it is valid MCSchedClassDesc if didn't have the resources. + +include "llvm/Target/Target.td" + +def MyTarget : Target; + +let OutOperandList = (outs), InOperandList = (ins) in { + def Inst_A : Instruction; + def Inst_B : Instruction; +} + +let CompleteModel = 0 in { + def SchedModel_A: SchedMachineModel; + def SchedModel_B: SchedMachineModel; + def SchedModel_C: SchedMachineModel; +} + +// Inst_B didn't have the resoures, and it is invalid. +// CHECK: SchedModel_ASchedClasses[] = { +// CHECK: {DBGFIELD("Inst_A") 1 +// CHECK-NEXT: {DBGFIELD("Inst_B") 16383 +let SchedModel = SchedModel_A in { + def Write_A : SchedWriteRes<[]>; + def : InstRW<[Write_A], (instrs Inst_A)>; +} + +// Inst_A didn't have the resoures, and it is invalid. +// CHECK: SchedModel_BSchedClasses[] = { +// CHECK: {DBGFIELD("Inst_A") 16383 +// CHECK-NEXT: {DBGFIELD("Inst_B") 1 +let SchedModel = SchedModel_B in { + def Write_B: SchedWriteRes<[]>; + def : InstRW<[Write_B], (instrs Inst_B)>; +} + +// CHECK: SchedModel_CSchedClasses[] = { +// CHECK: {DBGFIELD("Inst_A") 1 +// CHECK-NEXT: {DBGFIELD("Inst_B") 1 +let SchedModel = SchedModel_C in { + def Write_C: SchedWriteRes<[]>; + def : InstRW<[Write_C], (instrs Inst_A, Inst_B)>; +} + +def ProcessorA: ProcessorModel<"ProcessorA", SchedModel_A, []>; +def ProcessorB: ProcessorModel<"ProcessorB", SchedModel_B, []>; +def ProcessorC: ProcessorModel<"ProcessorC", SchedModel_C, []>; diff --git a/llvm/test/ThinLTO/X86/lazyload_metadata.ll b/llvm/test/ThinLTO/X86/lazyload_metadata.ll index 3b34795b7a145..79c377724efad 100644 --- a/llvm/test/ThinLTO/X86/lazyload_metadata.ll +++ b/llvm/test/ThinLTO/X86/lazyload_metadata.ll @@ -10,13 +10,13 @@ ; RUN: llvm-lto -thinlto-action=import %t2.bc -thinlto-index=%t3.bc \ ; RUN: -o /dev/null -stats \ ; RUN: 2>&1 | FileCheck %s -check-prefix=LAZY -; LAZY: 63 bitcode-reader - Number of Metadata records loaded +; LAZY: 65 bitcode-reader - Number of Metadata records loaded ; LAZY: 2 bitcode-reader - Number of MDStrings loaded ; RUN: llvm-lto -thinlto-action=import %t2.bc -thinlto-index=%t3.bc \ ; RUN: -o /dev/null -disable-ondemand-mds-loading -stats \ ; RUN: 2>&1 | FileCheck %s -check-prefix=NOTLAZY -; NOTLAZY: 72 bitcode-reader - Number of Metadata records loaded +; NOTLAZY: 74 bitcode-reader - Number of Metadata records loaded ; NOTLAZY: 7 bitcode-reader - Number of MDStrings loaded diff --git a/llvm/test/Transforms/AggressiveInstCombine/popcount.ll b/llvm/test/Transforms/AggressiveInstCombine/popcount.ll new file mode 100644 index 0000000000000..2b4c03cd88c67 --- /dev/null +++ b/llvm/test/Transforms/AggressiveInstCombine/popcount.ll @@ -0,0 +1,193 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -aggressive-instcombine -S | FileCheck %s + +;int popcount8(unsigned char i) { +; i = i - ((i >> 1) & 0x55); +; i = (i & 0x33) + ((i >> 2) & 0x33); +; i = ((i + (i >> 4)) & 0x0F); +; return (i * 0x01010101); +;} +define signext i32 @popcount8(i8 zeroext %0) { +; CHECK-LABEL: @popcount8( +; CHECK-NEXT: [[TMP2:%.*]] = lshr i8 [[TMP0:%.*]], 1 +; CHECK-NEXT: [[TMP3:%.*]] = and i8 [[TMP2]], 85 +; CHECK-NEXT: [[TMP4:%.*]] = sub i8 [[TMP0]], [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = and i8 [[TMP4]], 51 +; CHECK-NEXT: [[TMP6:%.*]] = lshr i8 [[TMP4]], 2 +; CHECK-NEXT: [[TMP7:%.*]] = and i8 [[TMP6]], 51 +; CHECK-NEXT: [[TMP8:%.*]] = add nuw nsw i8 [[TMP7]], [[TMP5]] +; CHECK-NEXT: [[TMP9:%.*]] = lshr i8 [[TMP8]], 4 +; CHECK-NEXT: [[TMP10:%.*]] = add nuw nsw i8 [[TMP9]], [[TMP8]] +; CHECK-NEXT: [[TMP11:%.*]] = and i8 [[TMP10]], 15 +; CHECK-NEXT: [[TMP12:%.*]] = zext i8 [[TMP11]] to i32 +; CHECK-NEXT: ret i32 [[TMP12]] +; + %2 = lshr i8 %0, 1 + %3 = and i8 %2, 85 + %4 = sub i8 %0, %3 + %5 = and i8 %4, 51 + %6 = lshr i8 %4, 2 + %7 = and i8 %6, 51 + %8 = add nuw nsw i8 %7, %5 + %9 = lshr i8 %8, 4 + %10 = add nuw nsw i8 %9, %8 + %11 = and i8 %10, 15 + %12 = zext i8 %11 to i32 + ret i32 %12 +} + +;int popcount32(unsigned i) { +; i = i - ((i >> 1) & 0x55555555); +; i = (i & 0x33333333) + ((i >> 2) & 0x33333333); +; i = ((i + (i >> 4)) & 0x0F0F0F0F); +; return (i * 0x01010101) >> 24; +;} +define signext i32 @popcount32(i32 zeroext %0) { +; CHECK-LABEL: @popcount32( +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.ctpop.i32(i32 [[TMP0:%.*]]) +; CHECK-NEXT: ret i32 [[TMP2]] +; + %2 = lshr i32 %0, 1 + %3 = and i32 %2, 1431655765 + %4 = sub i32 %0, %3 + %5 = and i32 %4, 858993459 + %6 = lshr i32 %4, 2 + %7 = and i32 %6, 858993459 + %8 = add nuw nsw i32 %7, %5 + %9 = lshr i32 %8, 4 + %10 = add nuw nsw i32 %9, %8 + %11 = and i32 %10, 252645135 + %12 = mul i32 %11, 16843009 + %13 = lshr i32 %12, 24 + ret i32 %13 +} + +;int popcount64(unsigned long long i) { +; i = i - ((i >> 1) & 0x5555555555555555); +; i = (i & 0x3333333333333333) + ((i >> 2) & 0x3333333333333333); +; i = ((i + (i >> 4)) & 0x0F0F0F0F0F0F0F0F); +; return (i * 0x0101010101010101) >> 56; +;} +define signext i32 @popcount64(i64 %0) { +; CHECK-LABEL: @popcount64( +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP0:%.*]]) +; CHECK-NEXT: [[TMP3:%.*]] = trunc i64 [[TMP2]] to i32 +; CHECK-NEXT: ret i32 [[TMP3]] +; + %2 = lshr i64 %0, 1 + %3 = and i64 %2, 6148914691236517205 + %4 = sub i64 %0, %3 + %5 = and i64 %4, 3689348814741910323 + %6 = lshr i64 %4, 2 + %7 = and i64 %6, 3689348814741910323 + %8 = add nuw nsw i64 %7, %5 + %9 = lshr i64 %8, 4 + %10 = add nuw nsw i64 %9, %8 + %11 = and i64 %10, 1085102592571150095 + %12 = mul i64 %11, 72340172838076673 + %13 = lshr i64 %12, 56 + %14 = trunc i64 %13 to i32 + ret i32 %14 +} + +;int popcount128(__uint128_t i) { +; __uint128_t x = 0x5555555555555555; +; x <<= 64; +; x |= 0x5555555555555555; +; __uint128_t y = 0x3333333333333333; +; y <<= 64; +; y |= 0x3333333333333333; +; __uint128_t z = 0x0f0f0f0f0f0f0f0f; +; z <<= 64; +; z |= 0x0f0f0f0f0f0f0f0f; +; __uint128_t a = 0x0101010101010101; +; a <<= 64; +; a |= 0x0101010101010101; +; unsigned mask = 120; +; i = i - ((i >> 1) & x); +; i = (i & y) + ((i >> 2) & y); +; i = ((i + (i >> 4)) & z); +; return (i * a) >> mask; +;} +define signext i32 @popcount128(i128 %0) { +; CHECK-LABEL: @popcount128( +; CHECK-NEXT: [[TMP2:%.*]] = call i128 @llvm.ctpop.i128(i128 [[TMP0:%.*]]) +; CHECK-NEXT: [[TMP3:%.*]] = trunc i128 [[TMP2]] to i32 +; CHECK-NEXT: ret i32 [[TMP3]] +; + %2 = lshr i128 %0, 1 + %3 = and i128 %2, 113427455640312821154458202477256070485 + %4 = sub i128 %0, %3 + %5 = and i128 %4, 68056473384187692692674921486353642291 + %6 = lshr i128 %4, 2 + %7 = and i128 %6, 68056473384187692692674921486353642291 + %8 = add nuw nsw i128 %7, %5 + %9 = lshr i128 %8, 4 + %10 = add nuw nsw i128 %9, %8 + %11 = and i128 %10, 20016609818878733144904388672456953615 + %12 = mul i128 %11, 1334440654591915542993625911497130241 + %13 = lshr i128 %12, 120 + %14 = trunc i128 %13 to i32 + ret i32 %14 +} + +;vector unsigned char popcount8vec(vector unsigned char i) +;{ +; i = i - ((i>> 1) & 0x55); +; i = (i & 0x33) + ((i >> 2) & 0x33); +; i = ((i + (i >> 4)) & 0x0F); +; return (i * 0x01); +;} +define <16 x i8> @popcount8vec(<16 x i8> %0) { +; CHECK-LABEL: @popcount8vec( +; CHECK-NEXT: [[TMP2:%.*]] = lshr <16 x i8> [[TMP0:%.*]], +; CHECK-NEXT: [[TMP3:%.*]] = and <16 x i8> [[TMP2]], +; CHECK-NEXT: [[TMP4:%.*]] = sub <16 x i8> [[TMP0]], [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = and <16 x i8> [[TMP4]], +; CHECK-NEXT: [[TMP6:%.*]] = lshr <16 x i8> [[TMP4]], +; CHECK-NEXT: [[TMP7:%.*]] = and <16 x i8> [[TMP6]], +; CHECK-NEXT: [[TMP8:%.*]] = add nuw nsw <16 x i8> [[TMP7]], [[TMP5]] +; CHECK-NEXT: [[TMP9:%.*]] = lshr <16 x i8> [[TMP8]], +; CHECK-NEXT: [[TMP10:%.*]] = add nuw nsw <16 x i8> [[TMP9]], [[TMP8]] +; CHECK-NEXT: [[TMP11:%.*]] = and <16 x i8> [[TMP10]], +; CHECK-NEXT: ret <16 x i8> [[TMP11]] +; + %2 = lshr <16 x i8> %0, + %3 = and <16 x i8> %2, + %4 = sub <16 x i8> %0, %3 + %5 = and <16 x i8> %4, + %6 = lshr <16 x i8> %4, + %7 = and <16 x i8> %6, + %8 = add nuw nsw <16 x i8> %7, %5 + %9 = lshr <16 x i8> %8, + %10 = add nuw nsw <16 x i8> %9, %8 + %11 = and <16 x i8> %10, + ret <16 x i8> %11 +} + +;vector unsigned int popcount32vec(vector unsigned int i) +;{ +; i = i - ((i>> 1) & 0x55555555); +; i = (i & 0x33333333) + ((i >> 2) & 0x33333333); +; i = ((i + (i >> 4)) & 0x0F0F0F0F); +; return (i * 0x01010101) >> 24; +;} +define <4 x i32> @popcount32vec(<4 x i32> %0) { +; CHECK-LABEL: @popcount32vec( +; CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> [[TMP0:%.*]]) +; CHECK-NEXT: ret <4 x i32> [[TMP2]] +; + %2 = lshr <4 x i32> %0, + %3 = and <4 x i32> %2, + %4 = sub <4 x i32> %0, %3 + %5 = and <4 x i32> %4, + %6 = lshr <4 x i32> %4, + %7 = and <4 x i32> %6, + %8 = add nuw nsw <4 x i32> %7, %5 + %9 = lshr <4 x i32> %8, + %10 = add nuw nsw <4 x i32> %9, %8 + %11 = and <4 x i32> %10, + %12 = mul <4 x i32> %11, + %13 = lshr <4 x i32> %12, + ret <4 x i32> %13 +} diff --git a/llvm/test/Transforms/CorrelatedValuePropagation/and.ll b/llvm/test/Transforms/CorrelatedValuePropagation/and.ll new file mode 100644 index 0000000000000..475824420ab74 --- /dev/null +++ b/llvm/test/Transforms/CorrelatedValuePropagation/and.ll @@ -0,0 +1,127 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -correlated-propagation -S | FileCheck %s + +define i32 @test(i32 %a) { +; CHECK-LABEL: @test( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[A:%.*]], 128 +; CHECK-NEXT: br i1 [[CMP]], label [[CONTINUE:%.*]], label [[EXIT:%.*]] +; CHECK: continue: +; CHECK-NEXT: ret i32 [[A]] +; CHECK: exit: +; CHECK-NEXT: ret i32 -1 +; +entry: + %cmp = icmp ult i32 %a, 128 + br i1 %cmp, label %continue, label %exit +continue: + %and = and i32 %a, 255 + ret i32 %and +exit: + ret i32 -1 +} + +define i32 @test2(i32 %a) { +; CHECK-LABEL: @test2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[A:%.*]], 256 +; CHECK-NEXT: br i1 [[CMP]], label [[CONTINUE:%.*]], label [[EXIT:%.*]] +; CHECK: continue: +; CHECK-NEXT: ret i32 [[A]] +; CHECK: exit: +; CHECK-NEXT: ret i32 -1 +; +entry: + %cmp = icmp ult i32 %a, 256 + br i1 %cmp, label %continue, label %exit +continue: + %and = and i32 %a, 255 + ret i32 %and +exit: + ret i32 -1 +} + +define i32 @test3(i32 %a) { +; CHECK-LABEL: @test3( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[A:%.*]], 256 +; CHECK-NEXT: br i1 [[CMP]], label [[CONTINUE:%.*]], label [[EXIT:%.*]] +; CHECK: continue: +; CHECK-NEXT: ret i32 [[A]] +; CHECK: exit: +; CHECK-NEXT: ret i32 -1 +; +entry: + %cmp = icmp ult i32 %a, 256 + br i1 %cmp, label %continue, label %exit +continue: + %and = and i32 %a, 1023 + ret i32 %and +exit: + ret i32 -1 +} + + +define i32 @neg1(i32 %a) { +; CHECK-LABEL: @neg1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP:%.*]] = icmp ule i32 [[A:%.*]], 256 +; CHECK-NEXT: br i1 [[CMP]], label [[CONTINUE:%.*]], label [[EXIT:%.*]] +; CHECK: continue: +; CHECK-NEXT: [[AND:%.*]] = and i32 [[A]], 255 +; CHECK-NEXT: ret i32 [[AND]] +; CHECK: exit: +; CHECK-NEXT: ret i32 -1 +; +entry: + %cmp = icmp ule i32 %a, 256 + br i1 %cmp, label %continue, label %exit +continue: + %and = and i32 %a, 255 + ret i32 %and +exit: + ret i32 -1 +} + +define i32 @neg2(i32 %a) { +; CHECK-LABEL: @neg2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[A:%.*]], 513 +; CHECK-NEXT: br i1 [[CMP]], label [[CONTINUE:%.*]], label [[EXIT:%.*]] +; CHECK: continue: +; CHECK-NEXT: [[AND:%.*]] = and i32 [[A]], 255 +; CHECK-NEXT: ret i32 [[AND]] +; CHECK: exit: +; CHECK-NEXT: ret i32 -1 +; +entry: + %cmp = icmp ult i32 %a, 513 + br i1 %cmp, label %continue, label %exit +continue: + %and = and i32 %a, 255 + ret i32 %and +exit: + ret i32 -1 +} + +define i32 @neg3(i32 %a) { +; CHECK-LABEL: @neg3( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[A:%.*]], 256 +; CHECK-NEXT: br i1 [[CMP]], label [[CONTINUE:%.*]], label [[EXIT:%.*]] +; CHECK: continue: +; CHECK-NEXT: [[AND:%.*]] = and i32 [[A]], 254 +; CHECK-NEXT: ret i32 [[AND]] +; CHECK: exit: +; CHECK-NEXT: ret i32 -1 +; +entry: + %cmp = icmp ult i32 %a, 256 + br i1 %cmp, label %continue, label %exit +continue: + %and = and i32 %a, 254 + ret i32 %and +exit: + ret i32 -1 +} + diff --git a/llvm/test/Transforms/CorrelatedValuePropagation/overflows.ll b/llvm/test/Transforms/CorrelatedValuePropagation/overflows.ll index 39964682bf246..9e2035b7bb9fa 100644 --- a/llvm/test/Transforms/CorrelatedValuePropagation/overflows.ll +++ b/llvm/test/Transforms/CorrelatedValuePropagation/overflows.ll @@ -1023,7 +1023,6 @@ define i1 @smul_and_cmp(i32 %x, i32 %y) #0 { ; CHECK-NEXT: [[MUL:%.*]] = extractvalue { i32, i1 } [[TMP0]], 0 ; CHECK-NEXT: br label [[CONT3:%.*]] ; CHECK: cont3: -; CHECK-NEXT: [[CMP5:%.*]] = and i1 true, true ; CHECK-NEXT: br label [[OUT]] ; CHECK: out: ; CHECK-NEXT: ret i1 true diff --git a/llvm/test/Transforms/CorrelatedValuePropagation/range.ll b/llvm/test/Transforms/CorrelatedValuePropagation/range.ll index 49d4e1f48a968..6315e3bd74da1 100644 --- a/llvm/test/Transforms/CorrelatedValuePropagation/range.ll +++ b/llvm/test/Transforms/CorrelatedValuePropagation/range.ll @@ -745,10 +745,9 @@ target93: define i1 @test17_i1(i1 %a) { ; CHECK-LABEL: @test17_i1( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[C:%.*]] = and i1 [[A:%.*]], true ; CHECK-NEXT: br label [[DISPATCH:%.*]] ; CHECK: dispatch: -; CHECK-NEXT: br i1 [[A]], label [[TRUE:%.*]], label [[DISPATCH]] +; CHECK-NEXT: br i1 [[A:%.*]], label [[TRUE:%.*]], label [[DISPATCH]] ; CHECK: true: ; CHECK-NEXT: ret i1 true ; diff --git a/llvm/test/Transforms/FunctionAttrs/arg_returned.ll b/llvm/test/Transforms/FunctionAttrs/arg_returned.ll index e9cf2d81f1169..99b6762a5c818 100644 --- a/llvm/test/Transforms/FunctionAttrs/arg_returned.ll +++ b/llvm/test/Transforms/FunctionAttrs/arg_returned.ll @@ -830,6 +830,11 @@ define i32* @use_const() #0 { ; CHECK: ret i32* bitcast (i8* @G to i32*) ret i32* %c } +define i32* @dont_use_const() #0 { + %c = musttail call i32* @ret_const() + ; CHECK: ret i32* %c + ret i32* %c +} attributes #0 = { noinline nounwind uwtable } diff --git a/llvm/test/Transforms/FunctionAttrs/callbacks.ll b/llvm/test/Transforms/FunctionAttrs/callbacks.ll index 41d58a19f1075..4bd491a0e3ec6 100644 --- a/llvm/test/Transforms/FunctionAttrs/callbacks.ll +++ b/llvm/test/Transforms/FunctionAttrs/callbacks.ll @@ -24,7 +24,7 @@ define void @t0_caller(i32* %a) { ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[B]] to i8* ; CHECK-NEXT: store i32 42, i32* [[B]], align 32 ; CHECK-NEXT: store i32* [[B]], i32** [[C]], align 64 -; CHECK-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t0_callback_broker(i32* null, i32* nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nonnull bitcast (void (i32*, i32*, i32*, i64, i32**)* @t0_callback_callee to void (i32*, i32*, ...)*), i32* [[A:%.*]], i64 99, i32** nonnull align 64 dereferenceable(8) [[C]]) +; CHECK-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t0_callback_broker(i32* noalias null, i32* nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nonnull bitcast (void (i32*, i32*, i32*, i64, i32**)* @t0_callback_callee to void (i32*, i32*, ...)*), i32* [[A:%.*]], i64 99, i32** nonnull align 64 dereferenceable(8) [[C]]) ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/FunctionAttrs/nocapture.ll b/llvm/test/Transforms/FunctionAttrs/nocapture.ll index d572e94aa3d77..fafb4178c1b81 100644 --- a/llvm/test/Transforms/FunctionAttrs/nocapture.ll +++ b/llvm/test/Transforms/FunctionAttrs/nocapture.ll @@ -320,5 +320,14 @@ define i1 @captureDereferenceableOrNullICmp(i32* dereferenceable_or_null(4) %x) ret i1 %2 } +declare void @unknown(i8*) +define void @test_callsite() { +entry: +; We know that 'null' in AS 0 does not alias anything and cannot be captured +; CHECK: call void @unknown(i8* noalias nocapture null) + call void @unknown(i8* null) + ret void +} + declare i8* @llvm.launder.invariant.group.p0i8(i8*) declare i8* @llvm.strip.invariant.group.p0i8(i8*) diff --git a/llvm/test/Transforms/GlobalDCE/virtual-functions-base-call.ll b/llvm/test/Transforms/GlobalDCE/virtual-functions-base-call.ll new file mode 100644 index 0000000000000..84d95f607d602 --- /dev/null +++ b/llvm/test/Transforms/GlobalDCE/virtual-functions-base-call.ll @@ -0,0 +1,78 @@ +; RUN: opt < %s -globaldce -S | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +; struct A { +; A(); +; virtual int foo(); +; }; +; +; struct B : A { +; B(); +; virtual int foo(); +; }; +; +; A::A() {} +; B::B() {} +; int A::foo() { return 42; } +; int B::foo() { return 1337; } +; +; extern "C" int test(A *p) { return p->foo(); } + +; The virtual call in test could be dispatched to either A::foo or B::foo, so +; both must be retained. + +%struct.A = type { i32 (...)** } +%struct.B = type { %struct.A } + +; CHECK: @_ZTV1A = internal unnamed_addr constant { [3 x i8*] } { [3 x i8*] [i8* null, i8* null, i8* bitcast (i32 (%struct.A*)* @_ZN1A3fooEv to i8*)] } +@_ZTV1A = internal unnamed_addr constant { [3 x i8*] } { [3 x i8*] [i8* null, i8* null, i8* bitcast (i32 (%struct.A*)* @_ZN1A3fooEv to i8*)] }, align 8, !type !0, !type !1, !vcall_visibility !2 + +; CHECK: @_ZTV1B = internal unnamed_addr constant { [3 x i8*] } { [3 x i8*] [i8* null, i8* null, i8* bitcast (i32 (%struct.B*)* @_ZN1B3fooEv to i8*)] } +@_ZTV1B = internal unnamed_addr constant { [3 x i8*] } { [3 x i8*] [i8* null, i8* null, i8* bitcast (i32 (%struct.B*)* @_ZN1B3fooEv to i8*)] }, align 8, !type !0, !type !1, !type !3, !type !4, !vcall_visibility !2 + +; CHECK: define internal i32 @_ZN1A3fooEv( +define internal i32 @_ZN1A3fooEv(%struct.A* nocapture readnone %this) { +entry: + ret i32 42 +} + +; CHECK: define internal i32 @_ZN1B3fooEv( +define internal i32 @_ZN1B3fooEv(%struct.B* nocapture readnone %this) { +entry: + ret i32 1337 +} + +define hidden void @_ZN1AC2Ev(%struct.A* nocapture %this) { +entry: + %0 = getelementptr inbounds %struct.A, %struct.A* %this, i64 0, i32 0 + store i32 (...)** bitcast (i8** getelementptr inbounds ({ [3 x i8*] }, { [3 x i8*] }* @_ZTV1A, i64 0, inrange i32 0, i64 2) to i32 (...)**), i32 (...)*** %0, align 8 + ret void +} + +define hidden void @_ZN1BC2Ev(%struct.B* nocapture %this) { +entry: + %0 = getelementptr inbounds %struct.B, %struct.B* %this, i64 0, i32 0, i32 0 + store i32 (...)** bitcast (i8** getelementptr inbounds ({ [3 x i8*] }, { [3 x i8*] }* @_ZTV1B, i64 0, inrange i32 0, i64 2) to i32 (...)**), i32 (...)*** %0, align 8 + ret void +} + +define hidden i32 @test(%struct.A* %p) { +entry: + %0 = bitcast %struct.A* %p to i8** + %vtable1 = load i8*, i8** %0, align 8 + %1 = tail call { i8*, i1 } @llvm.type.checked.load(i8* %vtable1, i32 0, metadata !"_ZTS1A"), !nosanitize !10 + %2 = extractvalue { i8*, i1 } %1, 0, !nosanitize !10 + %3 = bitcast i8* %2 to i32 (%struct.A*)*, !nosanitize !10 + %call = tail call i32 %3(%struct.A* %p) + ret i32 %call +} + +declare { i8*, i1 } @llvm.type.checked.load(i8*, i32, metadata) #2 + +!0 = !{i64 16, !"_ZTS1A"} +!1 = !{i64 16, !"_ZTSM1AFivE.virtual"} +!2 = !{i64 2} +!3 = !{i64 16, !"_ZTS1B"} +!4 = !{i64 16, !"_ZTSM1BFivE.virtual"} +!10 = !{} diff --git a/llvm/test/Transforms/GlobalDCE/virtual-functions-base-pointer-call.ll b/llvm/test/Transforms/GlobalDCE/virtual-functions-base-pointer-call.ll new file mode 100644 index 0000000000000..d498a336a50f0 --- /dev/null +++ b/llvm/test/Transforms/GlobalDCE/virtual-functions-base-pointer-call.ll @@ -0,0 +1,118 @@ +; RUN: opt < %s -globaldce -S | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +; struct A { +; A(); +; virtual int foo(int); +; virtual int bar(float); +; }; +; +; struct B : A { +; B(); +; virtual int foo(int); +; virtual int bar(float); +; }; +; +; A::A() {} +; B::B() {} +; int A::foo(int) { return 1; } +; int A::bar(float) { return 2; } +; int B::foo(int) { return 3; } +; int B::bar(float) { return 4; } +; +; extern "C" int test(A *p, int (A::*q)(int)) { return (p->*q)(42); } + +; Member function pointers are tracked by the combination of their object type +; and function type, which must both be compatible. Here, the call is through a +; pointer of type "int (A::*q)(int)", so the call could be dispatched to A::foo +; or B::foo. It can't be dispatched to A::bar or B::bar as the function pointer +; does not match, so those can be removed. + +%struct.A = type { i32 (...)** } +%struct.B = type { %struct.A } + +; CHECK: @_ZTV1A = internal unnamed_addr constant { [4 x i8*] } { [4 x i8*] [i8* null, i8* null, i8* bitcast (i32 (%struct.A*, i32)* @_ZN1A3fooEi to i8*), i8* null] } +@_ZTV1A = internal unnamed_addr constant { [4 x i8*] } { [4 x i8*] [i8* null, i8* null, i8* bitcast (i32 (%struct.A*, i32)* @_ZN1A3fooEi to i8*), i8* bitcast (i32 (%struct.A*, float)* @_ZN1A3barEf to i8*)] }, align 8, !type !0, !type !1, !type !2, !vcall_visibility !3 +; CHECK: @_ZTV1B = internal unnamed_addr constant { [4 x i8*] } { [4 x i8*] [i8* null, i8* null, i8* bitcast (i32 (%struct.B*, i32)* @_ZN1B3fooEi to i8*), i8* null] } +@_ZTV1B = internal unnamed_addr constant { [4 x i8*] } { [4 x i8*] [i8* null, i8* null, i8* bitcast (i32 (%struct.B*, i32)* @_ZN1B3fooEi to i8*), i8* bitcast (i32 (%struct.B*, float)* @_ZN1B3barEf to i8*)] }, align 8, !type !0, !type !1, !type !2, !type !4, !type !5, !type !6, !vcall_visibility !3 + + +; CHECK: define internal i32 @_ZN1A3fooEi( +define internal i32 @_ZN1A3fooEi(%struct.A* nocapture readnone %this, i32) unnamed_addr #1 align 2 { +entry: + ret i32 1 +} + +; CHECK-NOT: define internal i32 @_ZN1A3barEf( +define internal i32 @_ZN1A3barEf(%struct.A* nocapture readnone %this, float) unnamed_addr #1 align 2 { +entry: + ret i32 2 +} + +; CHECK: define internal i32 @_ZN1B3fooEi( +define internal i32 @_ZN1B3fooEi(%struct.B* nocapture readnone %this, i32) unnamed_addr #1 align 2 { +entry: + ret i32 3 +} + +; CHECK-NOT: define internal i32 @_ZN1B3barEf( +define internal i32 @_ZN1B3barEf(%struct.B* nocapture readnone %this, float) unnamed_addr #1 align 2 { +entry: + ret i32 4 +} + + +define hidden void @_ZN1AC2Ev(%struct.A* nocapture %this) { +entry: + %0 = getelementptr inbounds %struct.A, %struct.A* %this, i64 0, i32 0 + store i32 (...)** bitcast (i8** getelementptr inbounds ({ [4 x i8*] }, { [4 x i8*] }* @_ZTV1A, i64 0, inrange i32 0, i64 2) to i32 (...)**), i32 (...)*** %0, align 8 + ret void +} + +define hidden void @_ZN1BC2Ev(%struct.B* nocapture %this) { +entry: + %0 = getelementptr inbounds %struct.B, %struct.B* %this, i64 0, i32 0, i32 0 + store i32 (...)** bitcast (i8** getelementptr inbounds ({ [4 x i8*] }, { [4 x i8*] }* @_ZTV1B, i64 0, inrange i32 0, i64 2) to i32 (...)**), i32 (...)*** %0, align 8 + ret void +} + +define hidden i32 @test(%struct.A* %p, i64 %q.coerce0, i64 %q.coerce1) { +entry: + %0 = bitcast %struct.A* %p to i8* + %1 = getelementptr inbounds i8, i8* %0, i64 %q.coerce1 + %this.adjusted = bitcast i8* %1 to %struct.A* + %2 = and i64 %q.coerce0, 1 + %memptr.isvirtual = icmp eq i64 %2, 0 + br i1 %memptr.isvirtual, label %memptr.nonvirtual, label %memptr.virtual + +memptr.virtual: ; preds = %entry + %3 = bitcast i8* %1 to i8** + %vtable = load i8*, i8** %3, align 8 + %4 = add i64 %q.coerce0, -1 + %5 = getelementptr i8, i8* %vtable, i64 %4, !nosanitize !12 + %6 = tail call { i8*, i1 } @llvm.type.checked.load(i8* %5, i32 0, metadata !"_ZTSM1AFiiE.virtual"), !nosanitize !12 + %7 = extractvalue { i8*, i1 } %6, 0, !nosanitize !12 + %memptr.virtualfn = bitcast i8* %7 to i32 (%struct.A*, i32)*, !nosanitize !12 + br label %memptr.end + +memptr.nonvirtual: ; preds = %entry + %memptr.nonvirtualfn = inttoptr i64 %q.coerce0 to i32 (%struct.A*, i32)* + br label %memptr.end + +memptr.end: ; preds = %memptr.nonvirtual, %memptr.virtual + %8 = phi i32 (%struct.A*, i32)* [ %memptr.virtualfn, %memptr.virtual ], [ %memptr.nonvirtualfn, %memptr.nonvirtual ] + %call = tail call i32 %8(%struct.A* %this.adjusted, i32 42) + ret i32 %call +} + +declare { i8*, i1 } @llvm.type.checked.load(i8*, i32, metadata) + +!0 = !{i64 16, !"_ZTS1A"} +!1 = !{i64 16, !"_ZTSM1AFiiE.virtual"} +!2 = !{i64 24, !"_ZTSM1AFifE.virtual"} +!3 = !{i64 2} +!4 = !{i64 16, !"_ZTS1B"} +!5 = !{i64 16, !"_ZTSM1BFiiE.virtual"} +!6 = !{i64 24, !"_ZTSM1BFifE.virtual"} +!12 = !{} diff --git a/llvm/test/Transforms/GlobalDCE/virtual-functions-derived-call.ll b/llvm/test/Transforms/GlobalDCE/virtual-functions-derived-call.ll new file mode 100644 index 0000000000000..fb39f649badfd --- /dev/null +++ b/llvm/test/Transforms/GlobalDCE/virtual-functions-derived-call.ll @@ -0,0 +1,78 @@ +; RUN: opt < %s -globaldce -S | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +; struct A { +; A(); +; virtual int foo(); +; }; +; +; struct B : A { +; B(); +; virtual int foo(); +; }; +; +; A::A() {} +; B::B() {} +; int A::foo() { return 42; } +; int B::foo() { return 1337; } +; +; extern "C" int test(B *p) { return p->foo(); } + +; The virtual call in test can only be dispatched to B::foo (or a more-derived +; class, if there was one), so A::foo can be removed. + +%struct.A = type { i32 (...)** } +%struct.B = type { %struct.A } + +; CHECK: @_ZTV1A = internal unnamed_addr constant { [3 x i8*] } zeroinitializer +@_ZTV1A = internal unnamed_addr constant { [3 x i8*] } { [3 x i8*] [i8* null, i8* null, i8* bitcast (i32 (%struct.A*)* @_ZN1A3fooEv to i8*)] }, align 8, !type !0, !type !1, !vcall_visibility !2 + +; CHECK: @_ZTV1B = internal unnamed_addr constant { [3 x i8*] } { [3 x i8*] [i8* null, i8* null, i8* bitcast (i32 (%struct.B*)* @_ZN1B3fooEv to i8*)] } +@_ZTV1B = internal unnamed_addr constant { [3 x i8*] } { [3 x i8*] [i8* null, i8* null, i8* bitcast (i32 (%struct.B*)* @_ZN1B3fooEv to i8*)] }, align 8, !type !0, !type !1, !type !3, !type !4, !vcall_visibility !2 + +; CHECK-NOT: define internal i32 @_ZN1A3fooEv( +define internal i32 @_ZN1A3fooEv(%struct.A* nocapture readnone %this) { +entry: + ret i32 42 +} + +; CHECK: define internal i32 @_ZN1B3fooEv( +define internal i32 @_ZN1B3fooEv(%struct.B* nocapture readnone %this) { +entry: + ret i32 1337 +} + +define hidden void @_ZN1AC2Ev(%struct.A* nocapture %this) { +entry: + %0 = getelementptr inbounds %struct.A, %struct.A* %this, i64 0, i32 0 + store i32 (...)** bitcast (i8** getelementptr inbounds ({ [3 x i8*] }, { [3 x i8*] }* @_ZTV1A, i64 0, inrange i32 0, i64 2) to i32 (...)**), i32 (...)*** %0, align 8 + ret void +} + +define hidden void @_ZN1BC2Ev(%struct.B* nocapture %this) { +entry: + %0 = getelementptr inbounds %struct.B, %struct.B* %this, i64 0, i32 0, i32 0 + store i32 (...)** bitcast (i8** getelementptr inbounds ({ [3 x i8*] }, { [3 x i8*] }* @_ZTV1B, i64 0, inrange i32 0, i64 2) to i32 (...)**), i32 (...)*** %0, align 8 + ret void +} + +define hidden i32 @test(%struct.B* %p) { +entry: + %0 = bitcast %struct.B* %p to i8** + %vtable1 = load i8*, i8** %0, align 8 + %1 = tail call { i8*, i1 } @llvm.type.checked.load(i8* %vtable1, i32 0, metadata !"_ZTS1B"), !nosanitize !10 + %2 = extractvalue { i8*, i1 } %1, 0, !nosanitize !10 + %3 = bitcast i8* %2 to i32 (%struct.B*)*, !nosanitize !10 + %call = tail call i32 %3(%struct.B* %p) + ret i32 %call +} + +declare { i8*, i1 } @llvm.type.checked.load(i8*, i32, metadata) #2 + +!0 = !{i64 16, !"_ZTS1A"} +!1 = !{i64 16, !"_ZTSM1AFivE.virtual"} +!2 = !{i64 2} +!3 = !{i64 16, !"_ZTS1B"} +!4 = !{i64 16, !"_ZTSM1BFivE.virtual"} +!10 = !{} diff --git a/llvm/test/Transforms/GlobalDCE/virtual-functions-derived-pointer-call.ll b/llvm/test/Transforms/GlobalDCE/virtual-functions-derived-pointer-call.ll new file mode 100644 index 0000000000000..62b5b8d37302d --- /dev/null +++ b/llvm/test/Transforms/GlobalDCE/virtual-functions-derived-pointer-call.ll @@ -0,0 +1,120 @@ + +; RUN: opt < %s -globaldce -S | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +; struct A { +; A(); +; virtual int foo(int); +; virtual int bar(float); +; }; +; +; struct B : A { +; B(); +; virtual int foo(int); +; virtual int bar(float); +; }; +; +; A::A() {} +; B::B() {} +; int A::foo(int) { return 1; } +; int A::bar(float) { return 2; } +; int B::foo(int) { return 3; } +; int B::bar(float) { return 4; } +; +; extern "C" int test(B *p, int (B::*q)(int)) { return (p->*q)(42); } + +; Member function pointers are tracked by the combination of their object type +; and function type, which must both be compatible. Here, the call is through a +; pointer of type "int (B::*q)(int)", so the call could only be dispatched to +; B::foo. It can't be dispatched to A::bar or B::bar as the function pointer +; does not match, and it can't be dispatched to A::foo as the object type +; doesn't match, so those can be removed. + +%struct.A = type { i32 (...)** } +%struct.B = type { %struct.A } + +; CHECK: @_ZTV1A = internal unnamed_addr constant { [4 x i8*] } zeroinitializer +@_ZTV1A = internal unnamed_addr constant { [4 x i8*] } { [4 x i8*] [i8* null, i8* null, i8* bitcast (i32 (%struct.A*, i32)* @_ZN1A3fooEi to i8*), i8* bitcast (i32 (%struct.A*, float)* @_ZN1A3barEf to i8*)] }, align 8, !type !0, !type !1, !type !2, !vcall_visibility !3 +; CHECK: @_ZTV1B = internal unnamed_addr constant { [4 x i8*] } { [4 x i8*] [i8* null, i8* null, i8* bitcast (i32 (%struct.B*, i32)* @_ZN1B3fooEi to i8*), i8* null] } +@_ZTV1B = internal unnamed_addr constant { [4 x i8*] } { [4 x i8*] [i8* null, i8* null, i8* bitcast (i32 (%struct.B*, i32)* @_ZN1B3fooEi to i8*), i8* bitcast (i32 (%struct.B*, float)* @_ZN1B3barEf to i8*)] }, align 8, !type !0, !type !1, !type !2, !type !4, !type !5, !type !6, !vcall_visibility !3 + + +; CHECK-NOT: define internal i32 @_ZN1A3fooEi( +define internal i32 @_ZN1A3fooEi(%struct.A* nocapture readnone %this, i32) unnamed_addr #1 align 2 { +entry: + ret i32 1 +} + +; CHECK-NOT: define internal i32 @_ZN1A3barEf( +define internal i32 @_ZN1A3barEf(%struct.A* nocapture readnone %this, float) unnamed_addr #1 align 2 { +entry: + ret i32 2 +} + +; CHECK: define internal i32 @_ZN1B3fooEi( +define internal i32 @_ZN1B3fooEi(%struct.B* nocapture readnone %this, i32) unnamed_addr #1 align 2 { +entry: + ret i32 3 +} + +; CHECK-NOT: define internal i32 @_ZN1B3barEf( +define internal i32 @_ZN1B3barEf(%struct.B* nocapture readnone %this, float) unnamed_addr #1 align 2 { +entry: + ret i32 4 +} + + +define hidden void @_ZN1AC2Ev(%struct.A* nocapture %this) { +entry: + %0 = getelementptr inbounds %struct.A, %struct.A* %this, i64 0, i32 0 + store i32 (...)** bitcast (i8** getelementptr inbounds ({ [4 x i8*] }, { [4 x i8*] }* @_ZTV1A, i64 0, inrange i32 0, i64 2) to i32 (...)**), i32 (...)*** %0, align 8 + ret void +} + +define hidden void @_ZN1BC2Ev(%struct.B* nocapture %this) { +entry: + %0 = getelementptr inbounds %struct.B, %struct.B* %this, i64 0, i32 0, i32 0 + store i32 (...)** bitcast (i8** getelementptr inbounds ({ [4 x i8*] }, { [4 x i8*] }* @_ZTV1B, i64 0, inrange i32 0, i64 2) to i32 (...)**), i32 (...)*** %0, align 8 + ret void +} + +define hidden i32 @test(%struct.B* %p, i64 %q.coerce0, i64 %q.coerce1) { +entry: + %0 = bitcast %struct.B* %p to i8* + %1 = getelementptr inbounds i8, i8* %0, i64 %q.coerce1 + %this.adjusted = bitcast i8* %1 to %struct.B* + %2 = and i64 %q.coerce0, 1 + %memptr.isvirtual = icmp eq i64 %2, 0 + br i1 %memptr.isvirtual, label %memptr.nonvirtual, label %memptr.virtual + +memptr.virtual: ; preds = %entry + %3 = bitcast i8* %1 to i8** + %vtable = load i8*, i8** %3, align 8 + %4 = add i64 %q.coerce0, -1 + %5 = getelementptr i8, i8* %vtable, i64 %4, !nosanitize !12 + %6 = tail call { i8*, i1 } @llvm.type.checked.load(i8* %5, i32 0, metadata !"_ZTSM1BFiiE.virtual"), !nosanitize !12 + %7 = extractvalue { i8*, i1 } %6, 0, !nosanitize !12 + %memptr.virtualfn = bitcast i8* %7 to i32 (%struct.B*, i32)*, !nosanitize !12 + br label %memptr.end + +memptr.nonvirtual: ; preds = %entry + %memptr.nonvirtualfn = inttoptr i64 %q.coerce0 to i32 (%struct.B*, i32)* + br label %memptr.end + +memptr.end: ; preds = %memptr.nonvirtual, %memptr.virtual + %8 = phi i32 (%struct.B*, i32)* [ %memptr.virtualfn, %memptr.virtual ], [ %memptr.nonvirtualfn, %memptr.nonvirtual ] + %call = tail call i32 %8(%struct.B* %this.adjusted, i32 42) + ret i32 %call +} + +declare { i8*, i1 } @llvm.type.checked.load(i8*, i32, metadata) + +!0 = !{i64 16, !"_ZTS1A"} +!1 = !{i64 16, !"_ZTSM1AFiiE.virtual"} +!2 = !{i64 24, !"_ZTSM1AFifE.virtual"} +!3 = !{i64 2} +!4 = !{i64 16, !"_ZTS1B"} +!5 = !{i64 16, !"_ZTSM1BFiiE.virtual"} +!6 = !{i64 24, !"_ZTSM1BFifE.virtual"} +!12 = !{} diff --git a/llvm/test/Transforms/GlobalDCE/virtual-functions-visibility-post-lto.ll b/llvm/test/Transforms/GlobalDCE/virtual-functions-visibility-post-lto.ll new file mode 100644 index 0000000000000..d636b5a3df880 --- /dev/null +++ b/llvm/test/Transforms/GlobalDCE/virtual-functions-visibility-post-lto.ll @@ -0,0 +1,95 @@ +; RUN: opt < %s -globaldce -S | FileCheck %s + +; structs A, B and C have vcall_visibility of public, linkage-unit and +; translation-unit respectively. This test is run after LTO linking (the +; LTOPostLink metadata is present), so B and C can be VFE'd. + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" + +%struct.A = type { i32 (...)** } + +@_ZTV1A = hidden unnamed_addr constant { [3 x i8*] } { [3 x i8*] [i8* null, i8* null, i8* bitcast (void (%struct.A*)* @_ZN1A3fooEv to i8*)] }, align 8, !type !0, !type !1, !vcall_visibility !2 + +define internal void @_ZN1AC2Ev(%struct.A* %this) { +entry: + %0 = getelementptr inbounds %struct.A, %struct.A* %this, i64 0, i32 0 + store i32 (...)** bitcast (i8** getelementptr inbounds ({ [3 x i8*] }, { [3 x i8*] }* @_ZTV1A, i64 0, inrange i32 0, i64 2) to i32 (...)**), i32 (...)*** %0, align 8 + ret void +} + +; CHECK: define {{.*}} @_ZN1A3fooEv( +define internal void @_ZN1A3fooEv(%struct.A* nocapture %this) { +entry: + ret void +} + +define dso_local i8* @_Z6make_Av() { +entry: + %call = tail call i8* @_Znwm(i64 8) + %0 = bitcast i8* %call to %struct.A* + tail call void @_ZN1AC2Ev(%struct.A* %0) + ret i8* %call +} + + +%struct.B = type { i32 (...)** } + +@_ZTV1B = hidden unnamed_addr constant { [3 x i8*] } { [3 x i8*] [i8* null, i8* null, i8* bitcast (void (%struct.B*)* @_ZN1B3fooEv to i8*)] }, align 8, !type !0, !type !1, !vcall_visibility !3 + +define internal void @_ZN1BC2Ev(%struct.B* %this) { +entry: + %0 = getelementptr inbounds %struct.B, %struct.B* %this, i64 0, i32 0 + store i32 (...)** bitcast (i8** getelementptr inbounds ({ [3 x i8*] }, { [3 x i8*] }* @_ZTV1B, i64 0, inrange i32 0, i64 2) to i32 (...)**), i32 (...)*** %0, align 8 + ret void +} + +; CHECK-NOT: define {{.*}} @_ZN1B3fooEv( +define internal void @_ZN1B3fooEv(%struct.B* nocapture %this) { +entry: + ret void +} + +define dso_local i8* @_Z6make_Bv() { +entry: + %call = tail call i8* @_Znwm(i64 8) + %0 = bitcast i8* %call to %struct.B* + tail call void @_ZN1BC2Ev(%struct.B* %0) + ret i8* %call +} + + +%struct.C = type { i32 (...)** } + +@_ZTV1C = hidden unnamed_addr constant { [3 x i8*] } { [3 x i8*] [i8* null, i8* null, i8* bitcast (void (%struct.C*)* @_ZN1C3fooEv to i8*)] }, align 8, !type !0, !type !1, !vcall_visibility !4 + +define internal void @_ZN1CC2Ev(%struct.C* %this) { +entry: + %0 = getelementptr inbounds %struct.C, %struct.C* %this, i64 0, i32 0 + store i32 (...)** bitcast (i8** getelementptr inbounds ({ [3 x i8*] }, { [3 x i8*] }* @_ZTV1C, i64 0, inrange i32 0, i64 2) to i32 (...)**), i32 (...)*** %0, align 8 + ret void +} + +; CHECK-NOT: define {{.*}} @_ZN1C3fooEv( +define internal void @_ZN1C3fooEv(%struct.C* nocapture %this) { +entry: + ret void +} + +define dso_local i8* @_Z6make_Cv() { +entry: + %call = tail call i8* @_Znwm(i64 8) + %0 = bitcast i8* %call to %struct.C* + tail call void @_ZN1CC2Ev(%struct.C* %0) + ret i8* %call +} + +declare dso_local noalias nonnull i8* @_Znwm(i64) + +!llvm.module.flags = !{!5} + +!0 = !{i64 16, !"_ZTS1A"} +!1 = !{i64 16, !"_ZTSM1AFvvE.virtual"} +!2 = !{i64 0} ; public vcall visibility +!3 = !{i64 1} ; linkage-unit vcall visibility +!4 = !{i64 2} ; translation-unit vcall visibility +!5 = !{i32 1, !"LTOPostLink", i32 1} diff --git a/llvm/test/Transforms/GlobalDCE/virtual-functions-visibility-pre-lto.ll b/llvm/test/Transforms/GlobalDCE/virtual-functions-visibility-pre-lto.ll new file mode 100644 index 0000000000000..b0b34c0bbc3a8 --- /dev/null +++ b/llvm/test/Transforms/GlobalDCE/virtual-functions-visibility-pre-lto.ll @@ -0,0 +1,94 @@ +; RUN: opt < %s -globaldce -S | FileCheck %s + +; structs A, B and C have vcall_visibility of public, linkage-unit and +; translation-unit respectively. This test is run before LTO linking occurs +; (the LTOPostLink metadata is not present), so only C can be VFE'd. + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" + +%struct.A = type { i32 (...)** } + +@_ZTV1A = hidden unnamed_addr constant { [3 x i8*] } { [3 x i8*] [i8* null, i8* null, i8* bitcast (void (%struct.A*)* @_ZN1A3fooEv to i8*)] }, align 8, !type !0, !type !1, !vcall_visibility !2 + +define internal void @_ZN1AC2Ev(%struct.A* %this) { +entry: + %0 = getelementptr inbounds %struct.A, %struct.A* %this, i64 0, i32 0 + store i32 (...)** bitcast (i8** getelementptr inbounds ({ [3 x i8*] }, { [3 x i8*] }* @_ZTV1A, i64 0, inrange i32 0, i64 2) to i32 (...)**), i32 (...)*** %0, align 8 + ret void +} + +; CHECK: define {{.*}} @_ZN1A3fooEv( +define internal void @_ZN1A3fooEv(%struct.A* nocapture %this) { +entry: + ret void +} + +define dso_local i8* @_Z6make_Av() { +entry: + %call = tail call i8* @_Znwm(i64 8) + %0 = bitcast i8* %call to %struct.A* + tail call void @_ZN1AC2Ev(%struct.A* %0) + ret i8* %call +} + + +%struct.B = type { i32 (...)** } + +@_ZTV1B = hidden unnamed_addr constant { [3 x i8*] } { [3 x i8*] [i8* null, i8* null, i8* bitcast (void (%struct.B*)* @_ZN1B3fooEv to i8*)] }, align 8, !type !0, !type !1, !vcall_visibility !3 + +define internal void @_ZN1BC2Ev(%struct.B* %this) { +entry: + %0 = getelementptr inbounds %struct.B, %struct.B* %this, i64 0, i32 0 + store i32 (...)** bitcast (i8** getelementptr inbounds ({ [3 x i8*] }, { [3 x i8*] }* @_ZTV1B, i64 0, inrange i32 0, i64 2) to i32 (...)**), i32 (...)*** %0, align 8 + ret void +} + +; CHECK: define {{.*}} @_ZN1B3fooEv( +define internal void @_ZN1B3fooEv(%struct.B* nocapture %this) { +entry: + ret void +} + +define dso_local i8* @_Z6make_Bv() { +entry: + %call = tail call i8* @_Znwm(i64 8) + %0 = bitcast i8* %call to %struct.B* + tail call void @_ZN1BC2Ev(%struct.B* %0) + ret i8* %call +} + + +%struct.C = type { i32 (...)** } + +@_ZTV1C = hidden unnamed_addr constant { [3 x i8*] } { [3 x i8*] [i8* null, i8* null, i8* bitcast (void (%struct.C*)* @_ZN1C3fooEv to i8*)] }, align 8, !type !0, !type !1, !vcall_visibility !4 + +define internal void @_ZN1CC2Ev(%struct.C* %this) { +entry: + %0 = getelementptr inbounds %struct.C, %struct.C* %this, i64 0, i32 0 + store i32 (...)** bitcast (i8** getelementptr inbounds ({ [3 x i8*] }, { [3 x i8*] }* @_ZTV1C, i64 0, inrange i32 0, i64 2) to i32 (...)**), i32 (...)*** %0, align 8 + ret void +} + +; CHECK-NOT: define {{.*}} @_ZN1C3fooEv( +define internal void @_ZN1C3fooEv(%struct.C* nocapture %this) { +entry: + ret void +} + +define dso_local i8* @_Z6make_Cv() { +entry: + %call = tail call i8* @_Znwm(i64 8) + %0 = bitcast i8* %call to %struct.C* + tail call void @_ZN1CC2Ev(%struct.C* %0) + ret i8* %call +} + +declare dso_local noalias nonnull i8* @_Znwm(i64) + +!llvm.module.flags = !{} + +!0 = !{i64 16, !"_ZTS1A"} +!1 = !{i64 16, !"_ZTSM1AFvvE.virtual"} +!2 = !{i64 0} ; public vcall visibility +!3 = !{i64 1} ; linkage-unit vcall visibility +!4 = !{i64 2} ; translation-unit vcall visibility diff --git a/llvm/test/Transforms/GlobalDCE/virtual-functions.ll b/llvm/test/Transforms/GlobalDCE/virtual-functions.ll new file mode 100644 index 0000000000000..614907197a857 --- /dev/null +++ b/llvm/test/Transforms/GlobalDCE/virtual-functions.ll @@ -0,0 +1,55 @@ +; RUN: opt < %s -globaldce -S | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +declare dso_local noalias nonnull i8* @_Znwm(i64) +declare { i8*, i1 } @llvm.type.checked.load(i8*, i32, metadata) + +; %struct.A is a C++ struct with two virtual functions, A::foo and A::bar. The +; !vcall_visibility metadata is set on the vtable, so we know that all virtual +; calls through this vtable are visible and use the @llvm.type.checked.load +; intrinsic. Function test_A makes a call to A::foo, but there is no call to +; A::bar anywhere, so A::bar can be deleted, and its vtable slot replaced with +; null. + +%struct.A = type { i32 (...)** } + +; The pointer to A::bar in the vtable can be removed, because it will never be +; loaded. We replace it with null to keep the layout the same. Because it is at +; the end of the vtable we could potentially shrink the vtable, but don't +; currently do that. +; CHECK: @_ZTV1A = internal unnamed_addr constant { [4 x i8*] } { [4 x i8*] [i8* null, i8* null, i8* bitcast (i32 (%struct.A*)* @_ZN1A3fooEv to i8*), i8* null] } +@_ZTV1A = internal unnamed_addr constant { [4 x i8*] } { [4 x i8*] [i8* null, i8* null, i8* bitcast (i32 (%struct.A*)* @_ZN1A3fooEv to i8*), i8* bitcast (i32 (%struct.A*)* @_ZN1A3barEv to i8*)] }, align 8, !type !0, !type !1, !type !2, !vcall_visibility !3 + +; A::foo is called, so must be retained. +; CHECK: define internal i32 @_ZN1A3fooEv( +define internal i32 @_ZN1A3fooEv(%struct.A* nocapture readnone %this) { +entry: + ret i32 42 +} + +; A::bar is not used, so can be deleted. +; CHECK-NOT: define internal i32 @_ZN1A3barEv( +define internal i32 @_ZN1A3barEv(%struct.A* nocapture readnone %this) { +entry: + ret i32 1337 +} + +define dso_local i32 @test_A() { +entry: + %call = tail call i8* @_Znwm(i64 8) + %0 = bitcast i8* %call to %struct.A* + %1 = bitcast i8* %call to i32 (...)*** + store i32 (...)** bitcast (i8** getelementptr inbounds ({ [4 x i8*] }, { [4 x i8*] }* @_ZTV1A, i64 0, inrange i32 0, i64 2) to i32 (...)**), i32 (...)*** %1, align 8 + %2 = tail call { i8*, i1 } @llvm.type.checked.load(i8* bitcast (i8** getelementptr inbounds ({ [4 x i8*] }, { [4 x i8*] }* @_ZTV1A, i64 0, inrange i32 0, i64 2) to i8*), i32 0, metadata !"_ZTS1A"), !nosanitize !9 + %3 = extractvalue { i8*, i1 } %2, 0, !nosanitize !9 + %4 = bitcast i8* %3 to i32 (%struct.A*)*, !nosanitize !9 + %call1 = tail call i32 %4(%struct.A* nonnull %0) + ret i32 %call1 +} + +!0 = !{i64 16, !"_ZTS1A"} +!1 = !{i64 16, !"_ZTSM1AFivE.virtual"} +!2 = !{i64 24, !"_ZTSM1AFivE.virtual"} +!3 = !{i64 2} +!9 = !{} diff --git a/llvm/test/Transforms/GlobalDCE/vtable-rtti.ll b/llvm/test/Transforms/GlobalDCE/vtable-rtti.ll new file mode 100644 index 0000000000000..dd6119340557c --- /dev/null +++ b/llvm/test/Transforms/GlobalDCE/vtable-rtti.ll @@ -0,0 +1,47 @@ +; RUN: opt < %s -globaldce -S | FileCheck %s + +; We currently only use llvm.type.checked.load for virtual function pointers, +; not any other part of the vtable, so we can't remove the RTTI pointer even if +; it's never going to be loaded from. + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" + +%struct.A = type { i32 (...)** } + +; CHECK: @_ZTV1A = hidden unnamed_addr constant { [3 x i8*] } { [3 x i8*] [i8* null, i8* bitcast ({ i8*, i8* }* @_ZTI1A to i8*), i8* null] }, align 8, !type !0, !type !1, !vcall_visibility !2 + +@_ZTV1A = hidden unnamed_addr constant { [3 x i8*] } { [3 x i8*] [i8* null, i8* bitcast ({ i8*, i8* }* @_ZTI1A to i8*), i8* bitcast (void (%struct.A*)* @_ZN1A3fooEv to i8*)] }, align 8, !type !0, !type !1, !vcall_visibility !2 +@_ZTS1A = hidden constant [3 x i8] c"1A\00", align 1 +@_ZTI1A = hidden constant { i8*, i8* } { i8* bitcast (i8** getelementptr inbounds (i8*, i8** @_ZTVN10__cxxabiv117__class_type_infoE, i64 2) to i8*), i8* getelementptr inbounds ([3 x i8], [3 x i8]* @_ZTS1A, i32 0, i32 0) }, align 8 + +define internal void @_ZN1AC2Ev(%struct.A* %this) { +entry: + %0 = getelementptr inbounds %struct.A, %struct.A* %this, i64 0, i32 0 + store i32 (...)** bitcast (i8** getelementptr inbounds ({ [3 x i8*] }, { [3 x i8*] }* @_ZTV1A, i64 0, inrange i32 0, i64 2) to i32 (...)**), i32 (...)*** %0, align 8 + ret void +} + +; CHECK-NOT: define {{.*}} @_ZN1A3fooEv( +define internal void @_ZN1A3fooEv(%struct.A* nocapture %this) { +entry: + ret void +} + +define dso_local i8* @_Z6make_Av() { +entry: + %call = tail call i8* @_Znwm(i64 8) + %0 = bitcast i8* %call to %struct.A* + tail call void @_ZN1AC2Ev(%struct.A* %0) + ret i8* %call +} + + +declare dso_local noalias nonnull i8* @_Znwm(i64) +@_ZTVN10__cxxabiv117__class_type_infoE = external dso_local global i8* + +!llvm.module.flags = !{!3} + +!0 = !{i64 16, !"_ZTS1A"} +!1 = !{i64 16, !"_ZTSM1AFvvE.virtual"} +!2 = !{i64 2} ; translation-unit vcall visibility +!3 = !{i32 1, !"LTOPostLink", i32 1} diff --git a/llvm/test/Transforms/InstCombine/log-pow.ll b/llvm/test/Transforms/InstCombine/log-pow.ll index b7085e955ef41..227fcf0c3fa70 100644 --- a/llvm/test/Transforms/InstCombine/log-pow.ll +++ b/llvm/test/Transforms/InstCombine/log-pow.ll @@ -97,8 +97,18 @@ define double @log_exp2_not_fast(double %x) { ret double %log } +define double @pr43617(double %d, i32 %i, double (i32)* %f) { +entry: + %sub = fsub double -0.000000e+00, %d + %icall = tail call fast double %f(i32 %i) + %log = tail call fast double @llvm.log.f64(double %icall) + %mul = fmul double %log, %sub + ret double %mul +} + declare double @log(double) #0 declare float @logf(float) #0 +declare double @llvm.log.f64(double) #0 declare <2 x float> @llvm.log.v2f32(<2 x float>) declare float @log2f(float) #0 declare <2 x double> @llvm.log2.v2f64(<2 x double>) diff --git a/llvm/test/Transforms/Internalize/vcall-visibility.ll b/llvm/test/Transforms/Internalize/vcall-visibility.ll new file mode 100644 index 0000000000000..dd4419502a4c4 --- /dev/null +++ b/llvm/test/Transforms/Internalize/vcall-visibility.ll @@ -0,0 +1,64 @@ +; RUN: opt < %s -internalize -S | FileCheck %s + +%struct.A = type { i32 (...)** } +%struct.B = type { i32 (...)** } +%struct.C = type { i32 (...)** } + +; Class A has default visibility, so has no !vcall_visibility metadata before +; or after LTO. +; CHECK-NOT: @_ZTV1A = {{.*}}!vcall_visibility +@_ZTV1A = dso_local unnamed_addr constant { [3 x i8*] } { [3 x i8*] [i8* null, i8* null, i8* bitcast (void (%struct.A*)* @_ZN1A3fooEv to i8*)] }, align 8, !type !0, !type !1 + +; Class B has hidden visibility but public LTO visibility, so has no +; !vcall_visibility metadata before or after LTO. +; CHECK-NOT: @_ZTV1B = {{.*}}!vcall_visibility +@_ZTV1B = hidden unnamed_addr constant { [3 x i8*] } { [3 x i8*] [i8* null, i8* null, i8* bitcast (void (%struct.B*)* @_ZN1B3fooEv to i8*)] }, align 8, !type !2, !type !3 + +; Class C has hidden visibility, so the !vcall_visibility metadata is set to 1 +; (linkage unit) before LTO, and 2 (translation unit) after LTO. +; CHECK: @_ZTV1C ={{.*}}!vcall_visibility [[MD_TU_VIS:![0-9]+]] +@_ZTV1C = hidden unnamed_addr constant { [3 x i8*] } { [3 x i8*] [i8* null, i8* null, i8* bitcast (void (%struct.C*)* @_ZN1C3fooEv to i8*)] }, align 8, !type !4, !type !5, !vcall_visibility !6 + +; Class D has translation unit visibility before LTO, and this is not changed +; by LTO. +; CHECK: @_ZTVN12_GLOBAL__N_11DE = {{.*}}!vcall_visibility [[MD_TU_VIS:![0-9]+]] +@_ZTVN12_GLOBAL__N_11DE = internal unnamed_addr constant { [3 x i8*] } zeroinitializer, align 8, !type !7, !type !9, !vcall_visibility !11 + +define dso_local void @_ZN1A3fooEv(%struct.A* nocapture %this) { +entry: + ret void +} + +define hidden void @_ZN1B3fooEv(%struct.B* nocapture %this) { +entry: + ret void +} + +define hidden void @_ZN1C3fooEv(%struct.C* nocapture %this) { +entry: + ret void +} + +define hidden noalias nonnull i8* @_Z6make_dv() { +entry: + %call = tail call i8* @_Znwm(i64 8) #3 + %0 = bitcast i8* %call to i32 (...)*** + store i32 (...)** bitcast (i8** getelementptr inbounds ({ [3 x i8*] }, { [3 x i8*] }* @_ZTVN12_GLOBAL__N_11DE, i64 0, inrange i32 0, i64 2) to i32 (...)**), i32 (...)*** %0, align 8 + ret i8* %call +} + +declare dso_local noalias nonnull i8* @_Znwm(i64) + +; CHECK: [[MD_TU_VIS]] = !{i64 2} +!0 = !{i64 16, !"_ZTS1A"} +!1 = !{i64 16, !"_ZTSM1AFvvE.virtual"} +!2 = !{i64 16, !"_ZTS1B"} +!3 = !{i64 16, !"_ZTSM1BFvvE.virtual"} +!4 = !{i64 16, !"_ZTS1C"} +!5 = !{i64 16, !"_ZTSM1CFvvE.virtual"} +!6 = !{i64 1} +!7 = !{i64 16, !8} +!8 = distinct !{} +!9 = !{i64 16, !10} +!10 = distinct !{} +!11 = !{i64 2} diff --git a/llvm/test/Transforms/MemCpyOpt/store-to-memset.ll b/llvm/test/Transforms/MemCpyOpt/store-to-memset.ll new file mode 100644 index 0000000000000..59ed892b60ee0 --- /dev/null +++ b/llvm/test/Transforms/MemCpyOpt/store-to-memset.ll @@ -0,0 +1,77 @@ +; RUN: opt < %s -memcpyopt -S | FileCheck %s +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-grtev4-linux-gnu" + +define i8* @foo(i8* returned %0, i32 %1, i64 %2) { +entry: + %3 = getelementptr inbounds i8, i8* %0, i64 %2 + %4 = getelementptr inbounds i8, i8* %3, i64 -32 + %vv = trunc i32 %1 to i8 + store i8 %vv, i8* %4, align 1 + %5 = getelementptr inbounds i8, i8* %4, i64 1 + store i8 %vv, i8* %5, align 1 + %6= getelementptr inbounds i8, i8* %4, i64 2 + store i8 %vv, i8* %6, align 1 + %7= getelementptr inbounds i8, i8* %4, i64 3 + store i8 %vv, i8* %7, align 1 + %8= getelementptr inbounds i8, i8* %4, i64 4 + store i8 %vv, i8* %8, align 1 + %9= getelementptr inbounds i8, i8* %4, i64 5 + store i8 %vv, i8* %9, align 1 + %10= getelementptr inbounds i8, i8* %4, i64 6 + store i8 %vv, i8* %10, align 1 + %11= getelementptr inbounds i8, i8* %4, i64 7 + store i8 %vv, i8* %11, align 1 + %12= getelementptr inbounds i8, i8* %4, i64 8 + store i8 %vv, i8* %12, align 1 + %13= getelementptr inbounds i8, i8* %4, i64 9 + store i8 %vv, i8* %13, align 1 + %14= getelementptr inbounds i8, i8* %4, i64 10 + store i8 %vv, i8* %14, align 1 + %15= getelementptr inbounds i8, i8* %4, i64 11 + store i8 %vv, i8* %15, align 1 + %16= getelementptr inbounds i8, i8* %4, i64 12 + store i8 %vv, i8* %16, align 1 + %17= getelementptr inbounds i8, i8* %4, i64 13 + store i8 %vv, i8* %17, align 1 + %18= getelementptr inbounds i8, i8* %4, i64 14 + store i8 %vv, i8* %18, align 1 + %19= getelementptr inbounds i8, i8* %4, i64 15 + store i8 %vv, i8* %19, align 1 + %20= getelementptr inbounds i8, i8* %4, i64 16 + store i8 %vv, i8* %20, align 1 + %21= getelementptr inbounds i8, i8* %20, i64 1 + store i8 %vv, i8* %21, align 1 + %22= getelementptr inbounds i8, i8* %20, i64 2 + store i8 %vv, i8* %22, align 1 + %23= getelementptr inbounds i8, i8* %20, i64 3 + store i8 %vv, i8* %23, align 1 + %24= getelementptr inbounds i8, i8* %20, i64 4 + store i8 %vv, i8* %24, align 1 + %25= getelementptr inbounds i8, i8* %20, i64 5 + store i8 %vv, i8* %25, align 1 + %26= getelementptr inbounds i8, i8* %20, i64 6 + store i8 %vv, i8* %26, align 1 + %27= getelementptr inbounds i8, i8* %20, i64 7 + store i8 %vv, i8* %27, align 1 + %28= getelementptr inbounds i8, i8* %20, i64 8 + store i8 %vv, i8* %28, align 1 + %29= getelementptr inbounds i8, i8* %20, i64 9 + store i8 %vv, i8* %29, align 1 + %30= getelementptr inbounds i8, i8* %20, i64 10 + store i8 %vv, i8* %30, align 1 + %31 = getelementptr inbounds i8, i8* %20, i64 11 + store i8 %vv, i8* %31, align 1 + %32 = getelementptr inbounds i8, i8* %20, i64 12 + store i8 %vv, i8* %32, align 1 + %33 = getelementptr inbounds i8, i8* %20, i64 13 + store i8 %vv, i8* %33, align 1 + %34 = getelementptr inbounds i8, i8* %20, i64 14 + store i8 %vv, i8* %34, align 1 + %35 = getelementptr inbounds i8, i8* %20, i64 15 + store i8 %vv, i8* %35, align 1 + ret i8* %0 +; CHECK-LABEL: @foo +; CHECK: call void @llvm.memset.p0i8.i64(i8* align 1 %4, i8 %vv, i64 32, i1 false) +} + diff --git a/llvm/test/Transforms/SampleProfile/Inputs/inline.extbinary.afdo b/llvm/test/Transforms/SampleProfile/Inputs/inline.extbinary.afdo index f1a1f87820f61..4a5e0cc527878 100644 Binary files a/llvm/test/Transforms/SampleProfile/Inputs/inline.extbinary.afdo and b/llvm/test/Transforms/SampleProfile/Inputs/inline.extbinary.afdo differ diff --git a/llvm/test/Transforms/SampleProfile/Inputs/profsampleacc.extbinary.afdo b/llvm/test/Transforms/SampleProfile/Inputs/profsampleacc.extbinary.afdo index c05af6fb4a6dc..137f4c92f04c7 100644 Binary files a/llvm/test/Transforms/SampleProfile/Inputs/profsampleacc.extbinary.afdo and b/llvm/test/Transforms/SampleProfile/Inputs/profsampleacc.extbinary.afdo differ diff --git a/llvm/test/tools/llvm-dwarfdump/X86/debug_loclists_multiple.s b/llvm/test/tools/llvm-dwarfdump/X86/debug_loclists_multiple.s new file mode 100644 index 0000000000000..4e2999dd9c671 --- /dev/null +++ b/llvm/test/tools/llvm-dwarfdump/X86/debug_loclists_multiple.s @@ -0,0 +1,44 @@ +# RUN: llvm-mc %s -filetype obj -triple x86_64-pc-linux -o %t.o +# RUN: llvm-dwarfdump -v %t.o | FileCheck %s + +# Test dumping of multiple separate debug_loclist contributions +# CHECK: .debug_loclists contents: +# CHECK: 0x00000000: locations list header: +# CHECK: 0x0000000c: +# CHECK: [0x0000000000000001, 0x0000000000000002): DW_OP_consts +7, DW_OP_stack_value +# CHECK: 0x00000014: locations list header: +# CHECK: [0x0000000000000005, 0x0000000000000007): DW_OP_consts +12, DW_OP_stack_value + + .section .debug_loclists,"",@progbits + .long .Ldebug_loclist_table_end0-.Ldebug_loclist_table_start0 # Length +.Ldebug_loclist_table_start0: + .short 5 # Version + .byte 8 # Address size + .byte 0 # Segment selector size + .long 0 # Offset entry count + + .byte 4 # DW_LLE_offset_pair + .uleb128 1 # starting offset + .uleb128 2 # ending offset + .byte 3 # Loc expr size + .byte 17 # DW_OP_consts + .byte 7 # 7 + .byte 159 # DW_OP_stack_value + .byte 0 # DW_LLE_end_of_list +.Ldebug_loclist_table_end0: + .long .Ldebug_loclist_table_end1-.Ldebug_loclist_table_start1 # Length +.Ldebug_loclist_table_start1: + .short 5 # Version + .byte 8 # Address size + .byte 0 # Segment selector size + .long 0 # Offset entry count + + .byte 4 # DW_LLE_offset_pair + .uleb128 5 # starting offset + .uleb128 7 # ending offset + .byte 3 # Loc expr size + .byte 17 # DW_OP_consts + .byte 12 # 12 + .byte 159 # DW_OP_stack_value + .byte 0 # DW_LLE_end_of_list +.Ldebug_loclist_table_end1: diff --git a/llvm/test/tools/llvm-dwarfdump/X86/debug_loclists_startx_length.s b/llvm/test/tools/llvm-dwarfdump/X86/debug_loclists_startx_length.s index 0b2ae5f8e7a4f..508d956654631 100644 --- a/llvm/test/tools/llvm-dwarfdump/X86/debug_loclists_startx_length.s +++ b/llvm/test/tools/llvm-dwarfdump/X86/debug_loclists_startx_length.s @@ -7,7 +7,7 @@ # CHECK: .debug_loclists contents: # CHECK-NEXT: 0x00000000: locations list header: length = 0x0000000e, version = 0x0005, addr_size = 0x08, seg_size = 0x00, offset_entry_count = 0x00000000 -# CHECK-NEXT: 0x00000000: +# CHECK-NEXT: 0x0000000c: # CHECK-NEXT: Addr idx 1 (w/ length 16): DW_OP_reg5 RDI .section .debug_loclists,"",@progbits diff --git a/llvm/test/tools/llvm-exegesis/X86/analysis-noise.test b/llvm/test/tools/llvm-exegesis/X86/analysis-noise.test new file mode 100644 index 0000000000000..6f4ecfcc0ad6d --- /dev/null +++ b/llvm/test/tools/llvm-exegesis/X86/analysis-noise.test @@ -0,0 +1,23 @@ +# RUN: llvm-exegesis -mode=analysis -benchmarks-file=%s -analysis-inconsistencies-output-file=- -analysis-clusters-output-file="" -analysis-numpoints=3 | FileCheck %s + +# CHECK: DOCTYPE +# CHECK: [noise] Cluster (1 points) + +--- +mode: latency +key: + instructions: + - 'ADD64rr RAX RAX RDI' + config: '' + register_initial_values: + - 'RAX=0x0' + - 'RDI=0x0' +cpu_name: haswell +llvm_triple: x86_64-unknown-linux-gnu +num_repetitions: 10000 +measurements: + - { key: latency, value: 1.0049, per_snippet_value: 1.0049 } +error: '' +info: Repeating a single implicitly serial instruction +assembled_snippet: 48B8000000000000000048BF00000000000000004801F84801F84801F84801F84801F84801F84801F84801F84801F84801F84801F84801F84801F84801F84801F84801F8C3 +... diff --git a/llvm/test/tools/llvm-mca/ARM/memcpy-ldm-stm.s b/llvm/test/tools/llvm-mca/ARM/memcpy-ldm-stm.s index 634a6aa966ab9..865aa7455125b 100644 --- a/llvm/test/tools/llvm-mca/ARM/memcpy-ldm-stm.s +++ b/llvm/test/tools/llvm-mca/ARM/memcpy-ldm-stm.s @@ -63,3 +63,4 @@ # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 3 1.0 1.0 0.0 ldm r2!, {r3, r4, r5, r6, r12, lr} # CHECK-NEXT: 1. 3 18.3 0.3 0.0 stm r0!, {r3, r4, r5, r6, r12, lr} +# CHECK-NEXT: 3 9.7 0.7 0.0 diff --git a/llvm/test/tools/llvm-mca/ARM/vld1-index-update.s b/llvm/test/tools/llvm-mca/ARM/vld1-index-update.s index 3bb02fcbcb4d1..776466cd60077 100644 --- a/llvm/test/tools/llvm-mca/ARM/vld1-index-update.s +++ b/llvm/test/tools/llvm-mca/ARM/vld1-index-update.s @@ -70,3 +70,4 @@ vld1.32 {d16, d17}, [r1]! # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 5 3.0 0.2 1.6 add r1, r1, r12 # CHECK-NEXT: 1. 5 4.0 0.0 0.0 vld1.32 {d16, d17}, [r1]! +# CHECK-NEXT: 5 3.5 0.1 0.8 diff --git a/llvm/test/tools/llvm-mca/SystemZ/stm-lm.s b/llvm/test/tools/llvm-mca/SystemZ/stm-lm.s index db2d79663d4ae..a4b5190c32ec7 100644 --- a/llvm/test/tools/llvm-mca/SystemZ/stm-lm.s +++ b/llvm/test/tools/llvm-mca/SystemZ/stm-lm.s @@ -70,3 +70,4 @@ lmg %r6, %r15, 48(%r15) # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 3 9.7 0.3 0.0 stmg %r6, %r15, 48(%r15) # CHECK-NEXT: 1. 3 9.0 0.3 0.0 lmg %r6, %r15, 48(%r15) +# CHECK-NEXT: 3 9.3 0.3 0.0 diff --git a/llvm/test/tools/llvm-mca/X86/Barcelona/clear-super-register-1.s b/llvm/test/tools/llvm-mca/X86/Barcelona/clear-super-register-1.s index c8e18731a3e18..99b9e0ce6e46a 100644 --- a/llvm/test/tools/llvm-mca/X86/Barcelona/clear-super-register-1.s +++ b/llvm/test/tools/llvm-mca/X86/Barcelona/clear-super-register-1.s @@ -61,3 +61,4 @@ bsf %rax, %rcx # CHECK-NEXT: 1. 2 5.5 1.5 0.0 lzcntl %ecx, %eax # CHECK-NEXT: 2. 2 8.5 0.0 0.0 andq %rcx, %rax # CHECK-NEXT: 3. 2 9.5 0.0 0.0 bsfq %rax, %rcx +# CHECK-NEXT: 2 7.0 0.5 0.0 diff --git a/llvm/test/tools/llvm-mca/X86/Barcelona/clear-super-register-2.s b/llvm/test/tools/llvm-mca/X86/Barcelona/clear-super-register-2.s index c2e79baea6760..da943dc3f1bba 100644 --- a/llvm/test/tools/llvm-mca/X86/Barcelona/clear-super-register-2.s +++ b/llvm/test/tools/llvm-mca/X86/Barcelona/clear-super-register-2.s @@ -66,6 +66,7 @@ addps %xmm0, %xmm0 # CHECK-NEXT: 0. 3 14.7 8.0 0.0 sqrtss %xmm0, %xmm0 # CHECK-NEXT: 1. 3 1.0 1.0 21.3 movss (%eax), %xmm0 # CHECK-NEXT: 2. 3 7.0 0.3 18.0 addps %xmm0, %xmm0 +# CHECK-NEXT: 3 7.6 3.1 13.1 # CHECK: [1] Code Region @@ -116,3 +117,4 @@ addps %xmm0, %xmm0 # CHECK-NEXT: 0. 3 21.7 15.0 0.0 sqrtsd %xmm0, %xmm0 # CHECK-NEXT: 1. 3 1.0 1.0 35.3 movsd (%eax), %xmm0 # CHECK-NEXT: 2. 3 7.0 0.3 32.0 addps %xmm0, %xmm0 +# CHECK-NEXT: 3 9.9 5.4 22.4 diff --git a/llvm/test/tools/llvm-mca/X86/Barcelona/dependency-breaking-cmp.s b/llvm/test/tools/llvm-mca/X86/Barcelona/dependency-breaking-cmp.s index 9c02249ba6bf3..d42a49b4b862d 100644 --- a/llvm/test/tools/llvm-mca/X86/Barcelona/dependency-breaking-cmp.s +++ b/llvm/test/tools/llvm-mca/X86/Barcelona/dependency-breaking-cmp.s @@ -68,3 +68,4 @@ cmovae %ebx, %eax # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 3 3.7 0.3 0.0 cmpl %eax, %eax # CHECK-NEXT: 1. 3 4.0 0.0 0.0 cmovael %ebx, %eax +# CHECK-NEXT: 3 3.8 0.2 0.0 diff --git a/llvm/test/tools/llvm-mca/X86/Barcelona/dependency-breaking-pcmpeq.s b/llvm/test/tools/llvm-mca/X86/Barcelona/dependency-breaking-pcmpeq.s index 6f10c76d77abc..94ed93a94e7e3 100644 --- a/llvm/test/tools/llvm-mca/X86/Barcelona/dependency-breaking-pcmpeq.s +++ b/llvm/test/tools/llvm-mca/X86/Barcelona/dependency-breaking-pcmpeq.s @@ -105,3 +105,4 @@ pcmpeqw %xmm0, %xmm0 # CHECK-NEXT: 4. 3 3.7 0.0 12.0 pcmpeqd %xmm0, %xmm0 # CHECK-NEXT: 5. 3 4.3 0.0 11.0 pcmpeqq %xmm0, %xmm0 # CHECK-NEXT: 6. 3 5.0 0.0 10.0 pcmpeqw %xmm0, %xmm0 +# CHECK-NEXT: 3 7.1 0.1 6.6 diff --git a/llvm/test/tools/llvm-mca/X86/Barcelona/dependency-breaking-pcmpgt.s b/llvm/test/tools/llvm-mca/X86/Barcelona/dependency-breaking-pcmpgt.s index ceb8c52e3ad80..1e387719a10ab 100644 --- a/llvm/test/tools/llvm-mca/X86/Barcelona/dependency-breaking-pcmpgt.s +++ b/llvm/test/tools/llvm-mca/X86/Barcelona/dependency-breaking-pcmpgt.s @@ -106,3 +106,4 @@ pcmpgtw %xmm0, %xmm0 # CHECK-NEXT: 4. 3 0.0 0.0 16.7 pcmpgtd %xmm0, %xmm0 # CHECK-NEXT: 5. 3 0.0 0.0 16.3 pcmpgtq %xmm0, %xmm0 # CHECK-NEXT: 6. 3 0.0 0.0 16.0 pcmpgtw %xmm0, %xmm0 +# CHECK-NEXT: 3 4.9 0.0 9.4 diff --git a/llvm/test/tools/llvm-mca/X86/Barcelona/dependency-breaking-sbb-1.s b/llvm/test/tools/llvm-mca/X86/Barcelona/dependency-breaking-sbb-1.s index 38aeb813b1129..b685814b88159 100644 --- a/llvm/test/tools/llvm-mca/X86/Barcelona/dependency-breaking-sbb-1.s +++ b/llvm/test/tools/llvm-mca/X86/Barcelona/dependency-breaking-sbb-1.s @@ -69,3 +69,4 @@ sbb %eax, %eax # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 3 4.0 0.3 0.0 sbbl %edx, %edx # CHECK-NEXT: 1. 3 6.0 0.0 0.0 sbbl %eax, %eax +# CHECK-NEXT: 3 5.0 0.2 0.0 diff --git a/llvm/test/tools/llvm-mca/X86/Barcelona/dependency-breaking-sbb-2.s b/llvm/test/tools/llvm-mca/X86/Barcelona/dependency-breaking-sbb-2.s index bae6f56342108..6df5e68f0bd21 100644 --- a/llvm/test/tools/llvm-mca/X86/Barcelona/dependency-breaking-sbb-2.s +++ b/llvm/test/tools/llvm-mca/X86/Barcelona/dependency-breaking-sbb-2.s @@ -76,3 +76,4 @@ sbb %eax, %eax # CHECK-NEXT: 0. 3 5.0 0.3 0.0 imull %edx, %eax # CHECK-NEXT: 1. 3 1.0 0.3 6.0 addl %edx, %edx # CHECK-NEXT: 2. 3 8.0 0.0 0.0 sbbl %eax, %eax +# CHECK-NEXT: 3 4.7 0.2 2.0 diff --git a/llvm/test/tools/llvm-mca/X86/Barcelona/int-to-fpu-forwarding-3.s b/llvm/test/tools/llvm-mca/X86/Barcelona/int-to-fpu-forwarding-3.s index 5cf5ca1cc8715..ccdf431ddc7a2 100644 --- a/llvm/test/tools/llvm-mca/X86/Barcelona/int-to-fpu-forwarding-3.s +++ b/llvm/test/tools/llvm-mca/X86/Barcelona/int-to-fpu-forwarding-3.s @@ -72,3 +72,4 @@ pinsrw $1, %eax, %xmm0 # CHECK-NEXT: 0. 3 1.0 0.7 2.7 addl %eax, %eax # CHECK-NEXT: 1. 3 4.3 0.0 0.0 pinsrw $0, %eax, %xmm0 # CHECK-NEXT: 2. 3 5.7 0.0 0.0 pinsrw $1, %eax, %xmm0 +# CHECK-NEXT: 3 3.7 0.2 0.9 diff --git a/llvm/test/tools/llvm-mca/X86/Barcelona/load-store-throughput.s b/llvm/test/tools/llvm-mca/X86/Barcelona/load-store-throughput.s index 27d207990297e..adf6c10d74936 100644 --- a/llvm/test/tools/llvm-mca/X86/Barcelona/load-store-throughput.s +++ b/llvm/test/tools/llvm-mca/X86/Barcelona/load-store-throughput.s @@ -138,6 +138,7 @@ movaps %xmm3, (%rbx) # CHECK-NEXT: 1. 1 1.0 1.0 0.0 movb (%rcx), %bpl # CHECK-NEXT: 2. 1 2.0 2.0 0.0 movb (%rdx), %sil # CHECK-NEXT: 3. 1 7.0 0.0 0.0 movb %dil, (%rbx) +# CHECK-NEXT: 1 2.8 1.0 0.0 # CHECK: [1] Code Region @@ -234,6 +235,7 @@ movaps %xmm3, (%rbx) # CHECK-NEXT: 1. 1 1.0 1.0 0.0 movw (%rcx), %bp # CHECK-NEXT: 2. 1 2.0 2.0 0.0 movw (%rdx), %si # CHECK-NEXT: 3. 1 7.0 0.0 0.0 movw %di, (%rbx) +# CHECK-NEXT: 1 2.8 1.0 0.0 # CHECK: [2] Code Region @@ -330,6 +332,7 @@ movaps %xmm3, (%rbx) # CHECK-NEXT: 1. 1 1.0 1.0 0.0 movl (%rcx), %ebp # CHECK-NEXT: 2. 1 2.0 2.0 0.0 movl (%rdx), %esi # CHECK-NEXT: 3. 1 7.0 0.0 0.0 movl %edi, (%rbx) +# CHECK-NEXT: 1 2.8 1.0 0.0 # CHECK: [3] Code Region @@ -426,6 +429,7 @@ movaps %xmm3, (%rbx) # CHECK-NEXT: 1. 1 1.0 1.0 0.0 movq (%rcx), %rbp # CHECK-NEXT: 2. 1 2.0 2.0 0.0 movq (%rdx), %rsi # CHECK-NEXT: 3. 1 7.0 0.0 0.0 movq %rdi, (%rbx) +# CHECK-NEXT: 1 2.8 1.0 0.0 # CHECK: [4] Code Region @@ -522,6 +526,7 @@ movaps %xmm3, (%rbx) # CHECK-NEXT: 1. 1 1.0 1.0 0.0 movd (%rcx), %mm1 # CHECK-NEXT: 2. 1 2.0 2.0 0.0 movd (%rdx), %mm2 # CHECK-NEXT: 3. 1 7.0 0.0 0.0 movd %mm3, (%rbx) +# CHECK-NEXT: 1 2.8 1.0 0.0 # CHECK: [5] Code Region @@ -619,3 +624,4 @@ movaps %xmm3, (%rbx) # CHECK-NEXT: 1. 1 1.0 1.0 0.0 movaps (%rcx), %xmm1 # CHECK-NEXT: 2. 1 2.0 2.0 0.0 movaps (%rdx), %xmm2 # CHECK-NEXT: 3. 1 8.0 0.0 0.0 movaps %xmm3, (%rbx) +# CHECK-NEXT: 1 3.0 1.0 0.0 diff --git a/llvm/test/tools/llvm-mca/X86/Barcelona/load-throughput.s b/llvm/test/tools/llvm-mca/X86/Barcelona/load-throughput.s index bd9ff550b7a73..558e91d439738 100644 --- a/llvm/test/tools/llvm-mca/X86/Barcelona/load-throughput.s +++ b/llvm/test/tools/llvm-mca/X86/Barcelona/load-throughput.s @@ -137,6 +137,7 @@ movaps (%rbx), %xmm3 # CHECK-NEXT: 1. 1 1.0 1.0 0.0 movb (%rcx), %bpl # CHECK-NEXT: 2. 1 2.0 2.0 0.0 movb (%rdx), %sil # CHECK-NEXT: 3. 1 2.0 2.0 0.0 movb (%rbx), %dil +# CHECK-NEXT: 1 1.5 1.5 0.0 # CHECK: [1] Code Region @@ -232,6 +233,7 @@ movaps (%rbx), %xmm3 # CHECK-NEXT: 1. 1 1.0 1.0 0.0 movw (%rcx), %bp # CHECK-NEXT: 2. 1 2.0 2.0 0.0 movw (%rdx), %si # CHECK-NEXT: 3. 1 2.0 2.0 0.0 movw (%rbx), %di +# CHECK-NEXT: 1 1.5 1.5 0.0 # CHECK: [2] Code Region @@ -327,6 +329,7 @@ movaps (%rbx), %xmm3 # CHECK-NEXT: 1. 1 1.0 1.0 0.0 movl (%rcx), %ebp # CHECK-NEXT: 2. 1 2.0 2.0 0.0 movl (%rdx), %esi # CHECK-NEXT: 3. 1 2.0 2.0 0.0 movl (%rbx), %edi +# CHECK-NEXT: 1 1.5 1.5 0.0 # CHECK: [3] Code Region @@ -422,6 +425,7 @@ movaps (%rbx), %xmm3 # CHECK-NEXT: 1. 1 1.0 1.0 0.0 movq (%rcx), %rbp # CHECK-NEXT: 2. 1 2.0 2.0 0.0 movq (%rdx), %rsi # CHECK-NEXT: 3. 1 2.0 2.0 0.0 movq (%rbx), %rdi +# CHECK-NEXT: 1 1.5 1.5 0.0 # CHECK: [4] Code Region @@ -517,6 +521,7 @@ movaps (%rbx), %xmm3 # CHECK-NEXT: 1. 1 1.0 1.0 0.0 movd (%rcx), %mm1 # CHECK-NEXT: 2. 1 2.0 2.0 0.0 movd (%rdx), %mm2 # CHECK-NEXT: 3. 1 2.0 2.0 0.0 movd (%rbx), %mm3 +# CHECK-NEXT: 1 1.5 1.5 0.0 # CHECK: [5] Code Region @@ -612,3 +617,4 @@ movaps (%rbx), %xmm3 # CHECK-NEXT: 1. 1 1.0 1.0 0.0 movaps (%rcx), %xmm1 # CHECK-NEXT: 2. 1 2.0 2.0 0.0 movaps (%rdx), %xmm2 # CHECK-NEXT: 3. 1 2.0 2.0 0.0 movaps (%rbx), %xmm3 +# CHECK-NEXT: 1 1.5 1.5 0.0 diff --git a/llvm/test/tools/llvm-mca/X86/Barcelona/one-idioms.s b/llvm/test/tools/llvm-mca/X86/Barcelona/one-idioms.s index 5fc423132fab0..2021863416260 100644 --- a/llvm/test/tools/llvm-mca/X86/Barcelona/one-idioms.s +++ b/llvm/test/tools/llvm-mca/X86/Barcelona/one-idioms.s @@ -94,3 +94,4 @@ pcmpeqw %xmm2, %xmm2 # CHECK-NEXT: 4. 1 1.0 0.0 7.0 pcmpeqd %xmm2, %xmm2 # CHECK-NEXT: 5. 1 2.0 0.0 6.0 pcmpeqq %xmm2, %xmm2 # CHECK-NEXT: 6. 1 3.0 0.0 5.0 pcmpeqw %xmm2, %xmm2 +# CHECK-NEXT: 1 2.7 0.3 3.7 diff --git a/llvm/test/tools/llvm-mca/X86/Barcelona/partial-reg-update-2.s b/llvm/test/tools/llvm-mca/X86/Barcelona/partial-reg-update-2.s index 91ecc93c88091..5412821894296 100644 --- a/llvm/test/tools/llvm-mca/X86/Barcelona/partial-reg-update-2.s +++ b/llvm/test/tools/llvm-mca/X86/Barcelona/partial-reg-update-2.s @@ -45,3 +45,4 @@ add %ecx, %ebx # CHECK-NEXT: 0. 1 1.0 1.0 0.0 imulq %rax, %rbx # CHECK-NEXT: 1. 1 2.0 2.0 0.0 lzcntw %ax, %bx # CHECK-NEXT: 2. 1 5.0 0.0 0.0 addl %ecx, %ebx +# CHECK-NEXT: 1 2.7 1.0 0.0 diff --git a/llvm/test/tools/llvm-mca/X86/Barcelona/partial-reg-update-3.s b/llvm/test/tools/llvm-mca/X86/Barcelona/partial-reg-update-3.s index 32e143e920d47..6cf7d93cb748e 100644 --- a/llvm/test/tools/llvm-mca/X86/Barcelona/partial-reg-update-3.s +++ b/llvm/test/tools/llvm-mca/X86/Barcelona/partial-reg-update-3.s @@ -74,3 +74,4 @@ xor %bx, %dx # CHECK-NEXT: 0. 3 2.3 0.3 0.0 addw %cx, %dx # CHECK-NEXT: 1. 3 1.0 1.0 1.0 movw %ax, %dx # CHECK-NEXT: 2. 3 1.7 0.0 0.3 xorw %bx, %dx +# CHECK-NEXT: 3 1.7 0.4 0.4 diff --git a/llvm/test/tools/llvm-mca/X86/Barcelona/partial-reg-update-4.s b/llvm/test/tools/llvm-mca/X86/Barcelona/partial-reg-update-4.s index 3a9bd92d023f1..f349c3e0e61ee 100644 --- a/llvm/test/tools/llvm-mca/X86/Barcelona/partial-reg-update-4.s +++ b/llvm/test/tools/llvm-mca/X86/Barcelona/partial-reg-update-4.s @@ -75,3 +75,4 @@ add %cx, %bx # CHECK-NEXT: 0. 3 4.3 0.3 0.0 imulw %ax, %bx # CHECK-NEXT: 1. 3 2.3 2.3 2.0 lzcntw %ax, %bx # CHECK-NEXT: 2. 3 5.0 0.0 1.3 addw %cx, %bx +# CHECK-NEXT: 3 3.9 0.9 1.1 diff --git a/llvm/test/tools/llvm-mca/X86/Barcelona/partial-reg-update-6.s b/llvm/test/tools/llvm-mca/X86/Barcelona/partial-reg-update-6.s index 18d1f5a215e8f..cb7cbda3d82f2 100644 --- a/llvm/test/tools/llvm-mca/X86/Barcelona/partial-reg-update-6.s +++ b/llvm/test/tools/llvm-mca/X86/Barcelona/partial-reg-update-6.s @@ -77,3 +77,4 @@ lzcnt 2(%rsp), %cx # CHECK-NEXT: 0. 3 7.3 0.3 0.0 imull %edx, %ecx # CHECK-NEXT: 1. 3 2.3 2.3 1.7 lzcntw (%rsp), %cx # CHECK-NEXT: 2. 3 2.7 2.7 1.0 lzcntw 2(%rsp), %cx +# CHECK-NEXT: 3 4.1 1.8 0.9 diff --git a/llvm/test/tools/llvm-mca/X86/Barcelona/partial-reg-update-7.s b/llvm/test/tools/llvm-mca/X86/Barcelona/partial-reg-update-7.s index b7634f890cd22..58981f9a3a914 100644 --- a/llvm/test/tools/llvm-mca/X86/Barcelona/partial-reg-update-7.s +++ b/llvm/test/tools/llvm-mca/X86/Barcelona/partial-reg-update-7.s @@ -96,3 +96,4 @@ cmpl $1025, %eax # CHECK-NEXT: 2. 5 9.4 0.0 0.0 shll $2, %eax # CHECK-NEXT: 3. 5 10.2 0.0 0.0 imull %ecx, %eax # CHECK-NEXT: 4. 5 12.8 0.0 0.0 cmpl $1025, %eax +# CHECK-NEXT: 5 10.1 0.1 0.2 diff --git a/llvm/test/tools/llvm-mca/X86/Barcelona/partial-reg-update.s b/llvm/test/tools/llvm-mca/X86/Barcelona/partial-reg-update.s index 995bb35d3ac32..e5cc4bc0f2c40 100644 --- a/llvm/test/tools/llvm-mca/X86/Barcelona/partial-reg-update.s +++ b/llvm/test/tools/llvm-mca/X86/Barcelona/partial-reg-update.s @@ -45,3 +45,4 @@ add %ecx, %ebx # CHECK-NEXT: 0. 1 1.0 1.0 0.0 imulw %ax, %cx # CHECK-NEXT: 1. 1 4.0 0.0 0.0 addb %al, %cl # CHECK-NEXT: 2. 1 5.0 0.0 0.0 addl %ecx, %ebx +# CHECK-NEXT: 1 3.3 0.3 0.0 diff --git a/llvm/test/tools/llvm-mca/X86/Barcelona/read-advance-1.s b/llvm/test/tools/llvm-mca/X86/Barcelona/read-advance-1.s index df66a44224a34..deb69273d7c31 100644 --- a/llvm/test/tools/llvm-mca/X86/Barcelona/read-advance-1.s +++ b/llvm/test/tools/llvm-mca/X86/Barcelona/read-advance-1.s @@ -46,3 +46,4 @@ mulps (%rdi), %xmm1 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 addps %xmm0, %xmm1 # CHECK-NEXT: 1. 1 1.0 0.0 0.0 mulps (%rdi), %xmm1 +# CHECK-NEXT: 1 1.0 0.5 0.0 diff --git a/llvm/test/tools/llvm-mca/X86/Barcelona/read-advance-2.s b/llvm/test/tools/llvm-mca/X86/Barcelona/read-advance-2.s index 6f04df6e13b55..d57e9fb4a7987 100644 --- a/llvm/test/tools/llvm-mca/X86/Barcelona/read-advance-2.s +++ b/llvm/test/tools/llvm-mca/X86/Barcelona/read-advance-2.s @@ -45,3 +45,4 @@ imull (%rdi) # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 imull %esi # CHECK-NEXT: 1. 1 1.0 1.0 0.0 imull (%rdi) +# CHECK-NEXT: 1 1.0 1.0 0.0 diff --git a/llvm/test/tools/llvm-mca/X86/Barcelona/read-advance-3.s b/llvm/test/tools/llvm-mca/X86/Barcelona/read-advance-3.s index 1b39f8bcdd6dd..071207df27e77 100644 --- a/llvm/test/tools/llvm-mca/X86/Barcelona/read-advance-3.s +++ b/llvm/test/tools/llvm-mca/X86/Barcelona/read-advance-3.s @@ -45,3 +45,4 @@ add %rdx, %r8 # CHECK-NEXT: 0. 1 1.0 1.0 0.0 addq %rdi, %rsi # CHECK-NEXT: 1. 1 1.0 0.0 0.0 addq (%rsp), %rsi # CHECK-NEXT: 2. 1 1.0 1.0 4.0 addq %rdx, %r8 +# CHECK-NEXT: 1 1.0 0.7 1.3 diff --git a/llvm/test/tools/llvm-mca/X86/Barcelona/reg-move-elimination-1.s b/llvm/test/tools/llvm-mca/X86/Barcelona/reg-move-elimination-1.s index 1e4d26ae0208c..9e8de6266cc39 100644 --- a/llvm/test/tools/llvm-mca/X86/Barcelona/reg-move-elimination-1.s +++ b/llvm/test/tools/llvm-mca/X86/Barcelona/reg-move-elimination-1.s @@ -78,3 +78,4 @@ addps %xmm1, %xmm1 # CHECK-NEXT: 0. 3 0.0 0.0 3.3 xorps %xmm0, %xmm0 # CHECK-NEXT: 1. 3 1.3 1.3 1.3 movaps %xmm0, %xmm1 # CHECK-NEXT: 2. 3 2.0 0.0 0.0 addps %xmm1, %xmm1 +# CHECK-NEXT: 3 1.1 0.4 1.6 diff --git a/llvm/test/tools/llvm-mca/X86/Barcelona/reg-move-elimination-2.s b/llvm/test/tools/llvm-mca/X86/Barcelona/reg-move-elimination-2.s index 6f22cdc0b7e5c..a20fb0ce4adc0 100644 --- a/llvm/test/tools/llvm-mca/X86/Barcelona/reg-move-elimination-2.s +++ b/llvm/test/tools/llvm-mca/X86/Barcelona/reg-move-elimination-2.s @@ -119,3 +119,4 @@ movdqu %xmm5, %xmm0 # CHECK-NEXT: 6. 3 7.7 0.0 0.0 movupd %xmm3, %xmm4 # CHECK-NEXT: 7. 3 8.3 0.0 0.0 movdqa %xmm4, %xmm5 # CHECK-NEXT: 8. 3 9.0 0.0 0.0 movdqu %xmm5, %xmm0 +# CHECK-NEXT: 3 5.7 0.2 0.9 diff --git a/llvm/test/tools/llvm-mca/X86/Barcelona/reg-move-elimination-3.s b/llvm/test/tools/llvm-mca/X86/Barcelona/reg-move-elimination-3.s index e85ce5254bcea..30f2e7d8a20b6 100644 --- a/llvm/test/tools/llvm-mca/X86/Barcelona/reg-move-elimination-3.s +++ b/llvm/test/tools/llvm-mca/X86/Barcelona/reg-move-elimination-3.s @@ -104,3 +104,4 @@ movdqu %xmm5, %xmm0 # CHECK-NEXT: 4. 3 7.7 0.0 0.0 movupd %xmm3, %xmm4 # CHECK-NEXT: 5. 3 8.3 0.0 0.0 movdqa %xmm4, %xmm5 # CHECK-NEXT: 6. 3 9.0 0.0 0.0 movdqu %xmm5, %xmm0 +# CHECK-NEXT: 3 7.0 0.0 0.0 diff --git a/llvm/test/tools/llvm-mca/X86/Barcelona/reg-move-elimination-4.s b/llvm/test/tools/llvm-mca/X86/Barcelona/reg-move-elimination-4.s index 339ec06bcc8d1..390dee10ed10c 100644 --- a/llvm/test/tools/llvm-mca/X86/Barcelona/reg-move-elimination-4.s +++ b/llvm/test/tools/llvm-mca/X86/Barcelona/reg-move-elimination-4.s @@ -90,3 +90,4 @@ mov %edx, %eax # CHECK-NEXT: 2. 3 4.7 0.0 0.0 movl %ebx, %ecx # CHECK-NEXT: 3. 3 5.3 0.0 0.0 movl %ecx, %edx # CHECK-NEXT: 4. 3 6.0 0.0 0.0 movl %edx, %eax +# CHECK-NEXT: 3 4.7 0.1 0.0 diff --git a/llvm/test/tools/llvm-mca/X86/Barcelona/reg-move-elimination-5.s b/llvm/test/tools/llvm-mca/X86/Barcelona/reg-move-elimination-5.s index 66ce02cb0fcc8..2408e794fcdf3 100644 --- a/llvm/test/tools/llvm-mca/X86/Barcelona/reg-move-elimination-5.s +++ b/llvm/test/tools/llvm-mca/X86/Barcelona/reg-move-elimination-5.s @@ -90,3 +90,4 @@ mov %rdx, %rax # CHECK-NEXT: 2. 3 4.7 0.0 0.0 movq %rbx, %rcx # CHECK-NEXT: 3. 3 5.3 0.0 0.0 movq %rcx, %rdx # CHECK-NEXT: 4. 3 6.0 0.0 0.0 movq %rdx, %rax +# CHECK-NEXT: 3 4.7 0.1 0.0 diff --git a/llvm/test/tools/llvm-mca/X86/Barcelona/reg-move-elimination-6.s b/llvm/test/tools/llvm-mca/X86/Barcelona/reg-move-elimination-6.s index 70bd65681feb2..7380eb2300660 100644 --- a/llvm/test/tools/llvm-mca/X86/Barcelona/reg-move-elimination-6.s +++ b/llvm/test/tools/llvm-mca/X86/Barcelona/reg-move-elimination-6.s @@ -96,3 +96,4 @@ mov %esi, %ecx # CHECK-NEXT: 3. 3 3.0 0.0 0.0 addq %rcx, %rcx # CHECK-NEXT: 4. 3 3.3 0.0 0.0 addq %rcx, %rcx # CHECK-NEXT: 5. 3 1.0 1.0 2.3 movl %esi, %ecx +# CHECK-NEXT: 3 1.8 0.2 1.1 diff --git a/llvm/test/tools/llvm-mca/X86/Barcelona/store-throughput.s b/llvm/test/tools/llvm-mca/X86/Barcelona/store-throughput.s index d9db803083234..08a9c47302267 100644 --- a/llvm/test/tools/llvm-mca/X86/Barcelona/store-throughput.s +++ b/llvm/test/tools/llvm-mca/X86/Barcelona/store-throughput.s @@ -138,6 +138,7 @@ movaps %xmm3, (%rbx) # CHECK-NEXT: 1. 1 2.0 0.0 0.0 movb %bpl, (%rcx) # CHECK-NEXT: 2. 1 3.0 0.0 0.0 movb %sil, (%rdx) # CHECK-NEXT: 3. 1 4.0 0.0 0.0 movb %dil, (%rbx) +# CHECK-NEXT: 1 2.5 0.3 0.0 # CHECK: [1] Code Region @@ -234,6 +235,7 @@ movaps %xmm3, (%rbx) # CHECK-NEXT: 1. 1 2.0 0.0 0.0 movw %bp, (%rcx) # CHECK-NEXT: 2. 1 3.0 0.0 0.0 movw %si, (%rdx) # CHECK-NEXT: 3. 1 4.0 0.0 0.0 movw %di, (%rbx) +# CHECK-NEXT: 1 2.5 0.3 0.0 # CHECK: [2] Code Region @@ -330,6 +332,7 @@ movaps %xmm3, (%rbx) # CHECK-NEXT: 1. 1 2.0 0.0 0.0 movl %ebp, (%rcx) # CHECK-NEXT: 2. 1 3.0 0.0 0.0 movl %esi, (%rdx) # CHECK-NEXT: 3. 1 4.0 0.0 0.0 movl %edi, (%rbx) +# CHECK-NEXT: 1 2.5 0.3 0.0 # CHECK: [3] Code Region @@ -426,6 +429,7 @@ movaps %xmm3, (%rbx) # CHECK-NEXT: 1. 1 2.0 0.0 0.0 movq %rbp, (%rcx) # CHECK-NEXT: 2. 1 3.0 0.0 0.0 movq %rsi, (%rdx) # CHECK-NEXT: 3. 1 4.0 0.0 0.0 movq %rdi, (%rbx) +# CHECK-NEXT: 1 2.5 0.3 0.0 # CHECK: [4] Code Region @@ -522,6 +526,7 @@ movaps %xmm3, (%rbx) # CHECK-NEXT: 1. 1 2.0 0.0 0.0 movd %mm1, (%rcx) # CHECK-NEXT: 2. 1 3.0 0.0 0.0 movd %mm2, (%rdx) # CHECK-NEXT: 3. 1 4.0 0.0 0.0 movd %mm3, (%rbx) +# CHECK-NEXT: 1 2.5 0.3 0.0 # CHECK: [5] Code Region @@ -618,3 +623,4 @@ movaps %xmm3, (%rbx) # CHECK-NEXT: 1. 1 2.0 0.0 0.0 movaps %xmm1, (%rcx) # CHECK-NEXT: 2. 1 3.0 0.0 0.0 movaps %xmm2, (%rdx) # CHECK-NEXT: 3. 1 4.0 0.0 0.0 movaps %xmm3, (%rbx) +# CHECK-NEXT: 1 2.5 0.3 0.0 diff --git a/llvm/test/tools/llvm-mca/X86/Barcelona/zero-idioms.s b/llvm/test/tools/llvm-mca/X86/Barcelona/zero-idioms.s index 3a14fbc66bee0..62ff7b54cb6c8 100644 --- a/llvm/test/tools/llvm-mca/X86/Barcelona/zero-idioms.s +++ b/llvm/test/tools/llvm-mca/X86/Barcelona/zero-idioms.s @@ -240,3 +240,4 @@ pxor %xmm2, %xmm2 # CHECK-NEXT: 32. 1 3.0 0.0 25.0 xorpd %xmm1, %xmm1 # CHECK-NEXT: 33. 1 28.0 0.0 0.0 pxor %mm2, %mm2 # CHECK-NEXT: 34. 1 3.0 0.0 26.0 pxor %xmm2, %xmm2 +# CHECK-NEXT: 1 6.7 0.2 10.3 diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/add-sequence.s b/llvm/test/tools/llvm-mca/X86/BdVer2/add-sequence.s index c377640a02d56..918fb1d1d3a41 100644 --- a/llvm/test/tools/llvm-mca/X86/BdVer2/add-sequence.s +++ b/llvm/test/tools/llvm-mca/X86/BdVer2/add-sequence.s @@ -108,3 +108,4 @@ add %eax, %edx # CHECK-NEXT: 0. 10 12.0 2.0 0.0 addl %eax, %ecx # CHECK-NEXT: 1. 10 10.7 1.8 1.0 addl %esi, %eax # CHECK-NEXT: 2. 10 12.5 1.0 0.0 addl %eax, %edx +# CHECK-NEXT: 10 11.7 1.6 0.3 diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/clear-super-register-1.s b/llvm/test/tools/llvm-mca/X86/BdVer2/clear-super-register-1.s index 647ce9830922d..bc0aa4e6e3344 100644 --- a/llvm/test/tools/llvm-mca/X86/BdVer2/clear-super-register-1.s +++ b/llvm/test/tools/llvm-mca/X86/BdVer2/clear-super-register-1.s @@ -61,3 +61,4 @@ bsf %rax, %rcx # CHECK-NEXT: 1. 2 4.0 2.0 2.5 lzcntl %ecx, %eax # CHECK-NEXT: 2. 2 6.0 0.0 1.5 andq %rcx, %rax # CHECK-NEXT: 3. 2 6.0 0.0 0.0 bsfq %rax, %rcx +# CHECK-NEXT: 2 4.8 0.6 1.0 diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/clear-super-register-2.s b/llvm/test/tools/llvm-mca/X86/BdVer2/clear-super-register-2.s index e4ff3efe90cf3..7587291e0a1de 100644 --- a/llvm/test/tools/llvm-mca/X86/BdVer2/clear-super-register-2.s +++ b/llvm/test/tools/llvm-mca/X86/BdVer2/clear-super-register-2.s @@ -135,3 +135,4 @@ vandps %xmm4, %xmm1, %xmm0 # CHECK-NEXT: 15. 2 29.5 18.5 0.0 vaddps %ymm3, %ymm1, %ymm4 # CHECK-NEXT: 16. 2 29.5 19.0 0.0 vaddps %ymm3, %ymm1, %ymm4 # CHECK-NEXT: 17. 2 34.5 0.0 0.0 vandps %xmm4, %xmm1, %xmm0 +# CHECK-NEXT: 2 24.0 9.6 0.2 diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/clear-super-register-3.s b/llvm/test/tools/llvm-mca/X86/BdVer2/clear-super-register-3.s index 1b6731298a17d..32a440966f876 100644 --- a/llvm/test/tools/llvm-mca/X86/BdVer2/clear-super-register-3.s +++ b/llvm/test/tools/llvm-mca/X86/BdVer2/clear-super-register-3.s @@ -63,6 +63,7 @@ addps %xmm0, %xmm0 # CHECK-NEXT: 0. 2 7.0 1.0 0.0 sqrtss %xmm0, %xmm0 # CHECK-NEXT: 1. 2 2.0 2.0 8.5 movss (%eax), %xmm0 # CHECK-NEXT: 2. 2 8.5 1.5 2.5 addps %xmm0, %xmm0 +# CHECK-NEXT: 2 5.8 1.5 3.7 # CHECK: [1] Code Region @@ -110,3 +111,4 @@ addps %xmm0, %xmm0 # CHECK-NEXT: 0. 2 7.0 1.0 0.0 sqrtsd %xmm0, %xmm0 # CHECK-NEXT: 1. 2 2.0 2.0 8.5 movsd (%eax), %xmm0 # CHECK-NEXT: 2. 2 8.5 1.5 2.5 addps %xmm0, %xmm0 +# CHECK-NEXT: 2 5.8 1.5 3.7 diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/dependency-breaking-cmp.s b/llvm/test/tools/llvm-mca/X86/BdVer2/dependency-breaking-cmp.s index ebed6faf9454e..fbf47e4d83ef8 100644 --- a/llvm/test/tools/llvm-mca/X86/BdVer2/dependency-breaking-cmp.s +++ b/llvm/test/tools/llvm-mca/X86/BdVer2/dependency-breaking-cmp.s @@ -84,3 +84,4 @@ cmovae %ebx, %eax # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 3 1.3 1.3 1.0 cmpl %eax, %eax # CHECK-NEXT: 1. 3 3.7 0.3 0.0 cmovael %ebx, %eax +# CHECK-NEXT: 3 2.5 0.8 0.5 diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/dependency-breaking-pcmpeq.s b/llvm/test/tools/llvm-mca/X86/BdVer2/dependency-breaking-pcmpeq.s index d39aa7c776f66..fa82b29a39755 100644 --- a/llvm/test/tools/llvm-mca/X86/BdVer2/dependency-breaking-pcmpeq.s +++ b/llvm/test/tools/llvm-mca/X86/BdVer2/dependency-breaking-pcmpeq.s @@ -100,3 +100,4 @@ vpcmpeqq %xmm3, %xmm3, %xmm0 # CHECK-NEXT: 1. 3 6.0 6.0 0.0 vpcmpeqw %xmm1, %xmm1, %xmm2 # CHECK-NEXT: 2. 3 4.0 4.0 2.0 vpcmpeqd %xmm2, %xmm2, %xmm3 # CHECK-NEXT: 3. 3 6.0 0.0 0.0 vpcmpeqq %xmm3, %xmm3, %xmm0 +# CHECK-NEXT: 3 5.0 3.5 0.5 diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/dependency-breaking-pcmpgt.s b/llvm/test/tools/llvm-mca/X86/BdVer2/dependency-breaking-pcmpgt.s index 85135db464227..47c4de4ee4ed6 100644 --- a/llvm/test/tools/llvm-mca/X86/BdVer2/dependency-breaking-pcmpgt.s +++ b/llvm/test/tools/llvm-mca/X86/BdVer2/dependency-breaking-pcmpgt.s @@ -100,3 +100,4 @@ vpcmpgtq %xmm3, %xmm3, %xmm0 # CHECK-NEXT: 1. 3 0.0 0.0 1.3 vpcmpgtw %xmm1, %xmm1, %xmm2 # CHECK-NEXT: 2. 3 0.0 0.0 1.3 vpcmpgtd %xmm2, %xmm2, %xmm3 # CHECK-NEXT: 3. 3 1.0 1.0 0.0 vpcmpgtq %xmm3, %xmm3, %xmm0 +# CHECK-NEXT: 3 0.3 0.3 1.0 diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/dependency-breaking-sbb-1.s b/llvm/test/tools/llvm-mca/X86/BdVer2/dependency-breaking-sbb-1.s index be0bf9d7dd4be..233586223ad9a 100644 --- a/llvm/test/tools/llvm-mca/X86/BdVer2/dependency-breaking-sbb-1.s +++ b/llvm/test/tools/llvm-mca/X86/BdVer2/dependency-breaking-sbb-1.s @@ -85,3 +85,4 @@ sbb %eax, %eax # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 3 2.7 0.3 0.0 sbbl %edx, %edx # CHECK-NEXT: 1. 3 3.7 0.0 0.0 sbbl %eax, %eax +# CHECK-NEXT: 3 3.2 0.2 0.0 diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/dependency-breaking-sbb-2.s b/llvm/test/tools/llvm-mca/X86/BdVer2/dependency-breaking-sbb-2.s index 35fd91b2602b3..0b48eb597fc14 100644 --- a/llvm/test/tools/llvm-mca/X86/BdVer2/dependency-breaking-sbb-2.s +++ b/llvm/test/tools/llvm-mca/X86/BdVer2/dependency-breaking-sbb-2.s @@ -93,3 +93,4 @@ sbb %eax, %eax # CHECK-NEXT: 0. 3 5.7 2.0 0.0 imull %edx, %eax # CHECK-NEXT: 1. 3 1.7 0.7 6.7 addl %edx, %edx # CHECK-NEXT: 2. 3 5.0 2.7 3.0 sbbl %eax, %eax +# CHECK-NEXT: 3 4.1 1.8 3.2 diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/dependent-pmuld-paddd.s b/llvm/test/tools/llvm-mca/X86/BdVer2/dependent-pmuld-paddd.s index 01c5760bedb04..bf49f18c3a81c 100644 --- a/llvm/test/tools/llvm-mca/X86/BdVer2/dependent-pmuld-paddd.s +++ b/llvm/test/tools/llvm-mca/X86/BdVer2/dependent-pmuld-paddd.s @@ -108,3 +108,4 @@ vpaddd %xmm0, %xmm0, %xmm3 # CHECK-NEXT: 0. 10 25.0 0.1 0.0 vpmuldq %xmm0, %xmm0, %xmm1 # CHECK-NEXT: 1. 10 28.7 0.0 0.0 vpaddd %xmm1, %xmm1, %xmm0 # CHECK-NEXT: 2. 10 30.5 0.0 0.0 vpaddd %xmm0, %xmm0, %xmm3 +# CHECK-NEXT: 10 28.1 0.0 0.0 diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/dot-product.s b/llvm/test/tools/llvm-mca/X86/BdVer2/dot-product.s index 98cda18047050..15c12903c94b6 100644 --- a/llvm/test/tools/llvm-mca/X86/BdVer2/dot-product.s +++ b/llvm/test/tools/llvm-mca/X86/BdVer2/dot-product.s @@ -87,3 +87,4 @@ vhaddps %xmm3, %xmm3, %xmm4 # CHECK-NEXT: 0. 3 1.0 1.0 13.7 vmulps %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 1. 3 6.0 0.7 5.7 vhaddps %xmm2, %xmm2, %xmm3 # CHECK-NEXT: 2. 3 16.0 0.0 0.0 vhaddps %xmm3, %xmm3, %xmm4 +# CHECK-NEXT: 3 7.7 0.6 6.4 diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/hadd-read-after-ld-1.s b/llvm/test/tools/llvm-mca/X86/BdVer2/hadd-read-after-ld-1.s index 90969c10d88ef..3a94cf970c7d6 100644 --- a/llvm/test/tools/llvm-mca/X86/BdVer2/hadd-read-after-ld-1.s +++ b/llvm/test/tools/llvm-mca/X86/BdVer2/hadd-read-after-ld-1.s @@ -42,3 +42,4 @@ vhaddps (%rdi), %xmm1, %xmm2 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 vshufps $0, %xmm0, %xmm1, %xmm1 # CHECK-NEXT: 1. 1 1.0 1.0 0.0 vhaddps (%rdi), %xmm1, %xmm2 +# CHECK-NEXT: 1 1.0 1.0 0.0 diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/hadd-read-after-ld-2.s b/llvm/test/tools/llvm-mca/X86/BdVer2/hadd-read-after-ld-2.s index 792ecc6d75fa6..cd05a43407a87 100644 --- a/llvm/test/tools/llvm-mca/X86/BdVer2/hadd-read-after-ld-2.s +++ b/llvm/test/tools/llvm-mca/X86/BdVer2/hadd-read-after-ld-2.s @@ -42,3 +42,4 @@ vhaddps (%rdi), %ymm1, %ymm2 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 vshufps $0, %xmm0, %xmm1, %xmm1 # CHECK-NEXT: 1. 1 1.0 1.0 0.0 vhaddps (%rdi), %ymm1, %ymm2 +# CHECK-NEXT: 1 1.0 1.0 0.0 diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/int-to-fpu-forwarding-3.s b/llvm/test/tools/llvm-mca/X86/BdVer2/int-to-fpu-forwarding-3.s index 632452f9b599a..1df029fdcb24e 100644 --- a/llvm/test/tools/llvm-mca/X86/BdVer2/int-to-fpu-forwarding-3.s +++ b/llvm/test/tools/llvm-mca/X86/BdVer2/int-to-fpu-forwarding-3.s @@ -87,3 +87,4 @@ vpinsrb $1, %eax, %xmm0, %xmm0 # CHECK-NEXT: 0. 3 1.0 0.7 9.3 addl %eax, %eax # CHECK-NEXT: 1. 3 14.3 0.0 0.0 vpinsrb $0, %eax, %xmm0, %xmm0 # CHECK-NEXT: 2. 3 15.7 0.0 0.0 vpinsrb $1, %eax, %xmm0, %xmm0 +# CHECK-NEXT: 3 10.3 0.2 3.1 diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/load-store-alias.s b/llvm/test/tools/llvm-mca/X86/BdVer2/load-store-alias.s index 13e7e7ad07b57..54c07d70e2a74 100644 --- a/llvm/test/tools/llvm-mca/X86/BdVer2/load-store-alias.s +++ b/llvm/test/tools/llvm-mca/X86/BdVer2/load-store-alias.s @@ -106,3 +106,4 @@ vmovaps %xmm0, 48(%rdi) # CHECK-NEXT: 5. 1 17.0 0.0 0.0 vmovaps %xmm0, 32(%rdi) # CHECK-NEXT: 6. 1 18.0 0.0 0.0 vmovaps 48(%rsi), %xmm0 # CHECK-NEXT: 7. 1 23.0 0.0 0.0 vmovaps %xmm0, 48(%rdi) +# CHECK-NEXT: 1 12.0 0.1 0.0 diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/load-store-throughput.s b/llvm/test/tools/llvm-mca/X86/BdVer2/load-store-throughput.s index 4f53cce27e235..f326028e12ab5 100644 --- a/llvm/test/tools/llvm-mca/X86/BdVer2/load-store-throughput.s +++ b/llvm/test/tools/llvm-mca/X86/BdVer2/load-store-throughput.s @@ -157,6 +157,7 @@ movaps %xmm3, (%rbx) # CHECK-NEXT: 1. 1 1.0 1.0 0.0 movb (%rcx), %bpl # CHECK-NEXT: 2. 1 2.0 2.0 0.0 movb (%rdx), %sil # CHECK-NEXT: 3. 1 7.0 0.0 0.0 movb %dil, (%rbx) +# CHECK-NEXT: 1 2.8 1.0 0.0 # CHECK: [1] Code Region @@ -272,6 +273,7 @@ movaps %xmm3, (%rbx) # CHECK-NEXT: 1. 1 1.0 1.0 0.0 movw (%rcx), %bp # CHECK-NEXT: 2. 1 2.0 2.0 0.0 movw (%rdx), %si # CHECK-NEXT: 3. 1 7.0 0.0 0.0 movw %di, (%rbx) +# CHECK-NEXT: 1 2.8 1.0 0.0 # CHECK: [2] Code Region @@ -387,6 +389,7 @@ movaps %xmm3, (%rbx) # CHECK-NEXT: 1. 1 1.0 1.0 0.0 movl (%rcx), %ebp # CHECK-NEXT: 2. 1 2.0 2.0 0.0 movl (%rdx), %esi # CHECK-NEXT: 3. 1 7.0 0.0 0.0 movl %edi, (%rbx) +# CHECK-NEXT: 1 2.8 1.0 0.0 # CHECK: [3] Code Region @@ -502,6 +505,7 @@ movaps %xmm3, (%rbx) # CHECK-NEXT: 1. 1 1.0 1.0 0.0 movq (%rcx), %rbp # CHECK-NEXT: 2. 1 2.0 2.0 0.0 movq (%rdx), %rsi # CHECK-NEXT: 3. 1 7.0 0.0 0.0 movq %rdi, (%rbx) +# CHECK-NEXT: 1 2.8 1.0 0.0 # CHECK: [4] Code Region @@ -619,6 +623,7 @@ movaps %xmm3, (%rbx) # CHECK-NEXT: 1. 1 1.0 1.0 0.0 movd (%rcx), %mm1 # CHECK-NEXT: 2. 1 2.0 2.0 0.0 movd (%rdx), %mm2 # CHECK-NEXT: 3. 1 7.0 0.0 0.0 movd %mm3, (%rbx) +# CHECK-NEXT: 1 2.8 1.0 0.0 # CHECK: [5] Code Region @@ -734,3 +739,4 @@ movaps %xmm3, (%rbx) # CHECK-NEXT: 1. 1 1.0 1.0 0.0 movaps (%rcx), %xmm1 # CHECK-NEXT: 2. 1 2.0 2.0 0.0 movaps (%rdx), %xmm2 # CHECK-NEXT: 3. 1 7.0 0.0 0.0 movaps %xmm3, (%rbx) +# CHECK-NEXT: 1 2.8 1.0 0.0 diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/load-throughput.s b/llvm/test/tools/llvm-mca/X86/BdVer2/load-throughput.s index dfb45af19f3bc..49560697379b9 100644 --- a/llvm/test/tools/llvm-mca/X86/BdVer2/load-throughput.s +++ b/llvm/test/tools/llvm-mca/X86/BdVer2/load-throughput.s @@ -162,6 +162,7 @@ vmovaps (%rbx), %ymm3 # CHECK-NEXT: 1. 1 1.0 1.0 0.0 movb (%rcx), %bpl # CHECK-NEXT: 2. 1 3.0 3.0 0.0 movb (%rdx), %sil # CHECK-NEXT: 3. 1 3.0 3.0 0.0 movb (%rbx), %dil +# CHECK-NEXT: 1 2.0 2.0 0.0 # CHECK: [1] Code Region @@ -275,6 +276,7 @@ vmovaps (%rbx), %ymm3 # CHECK-NEXT: 1. 1 1.0 1.0 0.0 movw (%rcx), %bp # CHECK-NEXT: 2. 1 3.0 3.0 0.0 movw (%rdx), %si # CHECK-NEXT: 3. 1 3.0 3.0 0.0 movw (%rbx), %di +# CHECK-NEXT: 1 2.0 2.0 0.0 # CHECK: [2] Code Region @@ -388,6 +390,7 @@ vmovaps (%rbx), %ymm3 # CHECK-NEXT: 1. 1 1.0 1.0 0.0 movl (%rcx), %ebp # CHECK-NEXT: 2. 1 3.0 3.0 0.0 movl (%rdx), %esi # CHECK-NEXT: 3. 1 3.0 3.0 0.0 movl (%rbx), %edi +# CHECK-NEXT: 1 2.0 2.0 0.0 # CHECK: [3] Code Region @@ -501,6 +504,7 @@ vmovaps (%rbx), %ymm3 # CHECK-NEXT: 1. 1 1.0 1.0 0.0 movq (%rcx), %rbp # CHECK-NEXT: 2. 1 3.0 3.0 0.0 movq (%rdx), %rsi # CHECK-NEXT: 3. 1 3.0 3.0 0.0 movq (%rbx), %rdi +# CHECK-NEXT: 1 2.0 2.0 0.0 # CHECK: [4] Code Region @@ -615,6 +619,7 @@ vmovaps (%rbx), %ymm3 # CHECK-NEXT: 1. 1 1.0 1.0 0.0 movd (%rcx), %mm1 # CHECK-NEXT: 2. 1 4.0 4.0 0.0 movd (%rdx), %mm2 # CHECK-NEXT: 3. 1 4.0 4.0 0.0 movd (%rbx), %mm3 +# CHECK-NEXT: 1 2.5 2.5 0.0 # CHECK: [5] Code Region @@ -729,6 +734,7 @@ vmovaps (%rbx), %ymm3 # CHECK-NEXT: 1. 1 1.0 1.0 0.0 movaps (%rcx), %xmm1 # CHECK-NEXT: 2. 1 4.0 4.0 0.0 movaps (%rdx), %xmm2 # CHECK-NEXT: 3. 1 4.0 4.0 0.0 movaps (%rbx), %xmm3 +# CHECK-NEXT: 1 2.5 2.5 0.0 # CHECK: [6] Code Region @@ -842,3 +848,4 @@ vmovaps (%rbx), %ymm3 # CHECK-NEXT: 1. 1 1.0 1.0 0.0 vmovaps (%rcx), %ymm1 # CHECK-NEXT: 2. 1 3.0 3.0 0.0 vmovaps (%rdx), %ymm2 # CHECK-NEXT: 3. 1 3.0 3.0 0.0 vmovaps (%rbx), %ymm3 +# CHECK-NEXT: 1 2.0 2.0 0.0 diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/memcpy-like-test.s b/llvm/test/tools/llvm-mca/X86/BdVer2/memcpy-like-test.s index 6e215f2420f23..fb96ce5d75610 100644 --- a/llvm/test/tools/llvm-mca/X86/BdVer2/memcpy-like-test.s +++ b/llvm/test/tools/llvm-mca/X86/BdVer2/memcpy-like-test.s @@ -106,3 +106,4 @@ vmovaps %xmm0, 48(%rdi) # CHECK-NEXT: 5. 1 9.0 1.0 0.0 vmovaps %xmm0, 32(%rdi) # CHECK-NEXT: 6. 1 3.0 3.0 2.0 vmovaps 48(%rsi), %xmm0 # CHECK-NEXT: 7. 1 10.0 0.0 0.0 vmovaps %xmm0, 48(%rdi) +# CHECK-NEXT: 1 5.3 1.3 0.5 diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/one-idioms.s b/llvm/test/tools/llvm-mca/X86/BdVer2/one-idioms.s index 7c2966796a1dd..d3a81e4da48ac 100644 --- a/llvm/test/tools/llvm-mca/X86/BdVer2/one-idioms.s +++ b/llvm/test/tools/llvm-mca/X86/BdVer2/one-idioms.s @@ -165,3 +165,4 @@ vpcmpeqw %xmm3, %xmm3, %xmm5 # CHECK-NEXT: 12. 1 8.0 8.0 0.0 vpcmpeqd %xmm3, %xmm3, %xmm5 # CHECK-NEXT: 13. 1 9.0 2.0 0.0 vpcmpeqq %xmm3, %xmm3, %xmm5 # CHECK-NEXT: 14. 1 10.0 10.0 0.0 vpcmpeqw %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: 1 4.9 3.8 0.2 diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/partial-reg-update-2.s b/llvm/test/tools/llvm-mca/X86/BdVer2/partial-reg-update-2.s index fe8b159edcca7..fed8508572a12 100644 --- a/llvm/test/tools/llvm-mca/X86/BdVer2/partial-reg-update-2.s +++ b/llvm/test/tools/llvm-mca/X86/BdVer2/partial-reg-update-2.s @@ -46,3 +46,4 @@ add %ecx, %ebx # CHECK-NEXT: 0. 1 1.0 1.0 0.0 imulq %rax, %rbx # CHECK-NEXT: 1. 1 6.0 0.0 0.0 lzcntw %ax, %bx # CHECK-NEXT: 2. 1 8.0 0.0 0.0 addl %ecx, %ebx +# CHECK-NEXT: 1 5.0 0.3 0.0 diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/partial-reg-update-3.s b/llvm/test/tools/llvm-mca/X86/BdVer2/partial-reg-update-3.s index 20da1ba38291c..14e191f564119 100644 --- a/llvm/test/tools/llvm-mca/X86/BdVer2/partial-reg-update-3.s +++ b/llvm/test/tools/llvm-mca/X86/BdVer2/partial-reg-update-3.s @@ -92,3 +92,4 @@ xor %bx, %dx # CHECK-NEXT: 0. 3 3.7 0.3 0.0 addw %cx, %dx # CHECK-NEXT: 1. 3 4.3 0.0 0.0 movw %ax, %dx # CHECK-NEXT: 2. 3 5.0 0.0 0.0 xorw %bx, %dx +# CHECK-NEXT: 3 4.3 0.1 0.0 diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/partial-reg-update-4.s b/llvm/test/tools/llvm-mca/X86/BdVer2/partial-reg-update-4.s index a8a3958fafd31..5fa4e5ea96393 100644 --- a/llvm/test/tools/llvm-mca/X86/BdVer2/partial-reg-update-4.s +++ b/llvm/test/tools/llvm-mca/X86/BdVer2/partial-reg-update-4.s @@ -92,3 +92,4 @@ add %cx, %bx # CHECK-NEXT: 0. 3 6.7 0.7 0.0 imulw %ax, %bx # CHECK-NEXT: 1. 3 9.7 0.0 0.0 lzcntw %ax, %bx # CHECK-NEXT: 2. 3 11.7 0.0 0.0 addw %cx, %bx +# CHECK-NEXT: 3 9.3 0.2 0.0 diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/partial-reg-update-6.s b/llvm/test/tools/llvm-mca/X86/BdVer2/partial-reg-update-6.s index 82afe0a80bbc2..03ad95df54925 100644 --- a/llvm/test/tools/llvm-mca/X86/BdVer2/partial-reg-update-6.s +++ b/llvm/test/tools/llvm-mca/X86/BdVer2/partial-reg-update-6.s @@ -93,3 +93,4 @@ lzcnt 2(%rsp), %cx # CHECK-NEXT: 0. 3 7.7 0.3 0.0 imull %edx, %ecx # CHECK-NEXT: 1. 3 7.3 0.0 0.0 lzcntw (%rsp), %cx # CHECK-NEXT: 2. 3 8.7 1.0 0.0 lzcntw 2(%rsp), %cx +# CHECK-NEXT: 3 7.9 0.4 0.0 diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/partial-reg-update.s b/llvm/test/tools/llvm-mca/X86/BdVer2/partial-reg-update.s index bde293115a642..3c0c38d7cc05f 100644 --- a/llvm/test/tools/llvm-mca/X86/BdVer2/partial-reg-update.s +++ b/llvm/test/tools/llvm-mca/X86/BdVer2/partial-reg-update.s @@ -45,3 +45,4 @@ add %ecx, %ebx # CHECK-NEXT: 0. 1 1.0 1.0 0.0 imulw %ax, %cx # CHECK-NEXT: 1. 1 5.0 0.0 0.0 addb %al, %cl # CHECK-NEXT: 2. 1 6.0 0.0 0.0 addl %ecx, %ebx +# CHECK-NEXT: 1 4.0 0.3 0.0 diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/pipes-fpu.s b/llvm/test/tools/llvm-mca/X86/BdVer2/pipes-fpu.s index e6b70c751d650..b1a82c6ad0b4f 100644 --- a/llvm/test/tools/llvm-mca/X86/BdVer2/pipes-fpu.s +++ b/llvm/test/tools/llvm-mca/X86/BdVer2/pipes-fpu.s @@ -121,3 +121,4 @@ vsqrtps %ymm0, %ymm2 # CHECK-NEXT: 5. 2 3.5 3.5 12.0 vsqrtps %xmm0, %xmm2 # CHECK-NEXT: 6. 2 19.5 19.5 0.0 vaddps %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 7. 2 7.5 7.5 8.0 vsqrtps %ymm0, %ymm2 +# CHECK-NEXT: 2 7.9 7.9 6.1 diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/pr37790.s b/llvm/test/tools/llvm-mca/X86/BdVer2/pr37790.s index c773cc96e0f05..7490f71e5fa5d 100644 --- a/llvm/test/tools/llvm-mca/X86/BdVer2/pr37790.s +++ b/llvm/test/tools/llvm-mca/X86/BdVer2/pr37790.s @@ -42,3 +42,4 @@ stmxcsr (%rsp) # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 2 51.5 0.5 0.0 int3 # CHECK-NEXT: 1. 2 151.0 0.0 0.0 stmxcsr (%rsp) +# CHECK-NEXT: 2 101.3 0.3 0.0 diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/rank.s b/llvm/test/tools/llvm-mca/X86/BdVer2/rank.s index 66b842c870015..c929b8bae3b14 100644 --- a/llvm/test/tools/llvm-mca/X86/BdVer2/rank.s +++ b/llvm/test/tools/llvm-mca/X86/BdVer2/rank.s @@ -122,3 +122,4 @@ add %ebx, %eax # CHECK-NEXT: 5. 3 10.7 1.0 0.0 addl %edx, %esi # CHECK-NEXT: 6. 3 12.0 1.0 0.0 addl %ebx, %eax # CHECK-NEXT: 7. 3 13.0 0.0 0.0 addl %ebx, %eax +# CHECK-NEXT: 3 9.9 1.1 0.3 diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/read-advance-1.s b/llvm/test/tools/llvm-mca/X86/BdVer2/read-advance-1.s index b5be13873da59..e62f816acb074 100644 --- a/llvm/test/tools/llvm-mca/X86/BdVer2/read-advance-1.s +++ b/llvm/test/tools/llvm-mca/X86/BdVer2/read-advance-1.s @@ -46,3 +46,4 @@ vmulps (%rdi), %xmm1, %xmm2 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 vaddps %xmm0, %xmm0, %xmm1 # CHECK-NEXT: 1. 1 1.0 0.0 0.0 vmulps (%rdi), %xmm1, %xmm2 +# CHECK-NEXT: 1 1.0 0.5 0.0 diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/read-advance-2.s b/llvm/test/tools/llvm-mca/X86/BdVer2/read-advance-2.s index ce33d9d50b9a8..be28b7af9dafd 100644 --- a/llvm/test/tools/llvm-mca/X86/BdVer2/read-advance-2.s +++ b/llvm/test/tools/llvm-mca/X86/BdVer2/read-advance-2.s @@ -45,3 +45,4 @@ # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 imull %esi # CHECK-NEXT: 1. 1 5.0 4.0 0.0 imull (%rdi) +# CHECK-NEXT: 1 3.0 2.5 0.0 diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/read-advance-3.s b/llvm/test/tools/llvm-mca/X86/BdVer2/read-advance-3.s index 838ac4d7880fe..e5e02b264e508 100644 --- a/llvm/test/tools/llvm-mca/X86/BdVer2/read-advance-3.s +++ b/llvm/test/tools/llvm-mca/X86/BdVer2/read-advance-3.s @@ -45,3 +45,4 @@ # CHECK-NEXT: 0. 1 1.0 1.0 0.0 addq %rdi, %rsi # CHECK-NEXT: 1. 1 1.0 0.0 0.0 addq (%rsp), %rsi # CHECK-NEXT: 2. 1 3.0 3.0 2.0 addq %rdx, %r8 +# CHECK-NEXT: 1 1.7 1.3 0.7 diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/reg-move-elimination-1.s b/llvm/test/tools/llvm-mca/X86/BdVer2/reg-move-elimination-1.s index fc9e5c0f0cc62..7bfb210abf9ce 100644 --- a/llvm/test/tools/llvm-mca/X86/BdVer2/reg-move-elimination-1.s +++ b/llvm/test/tools/llvm-mca/X86/BdVer2/reg-move-elimination-1.s @@ -104,3 +104,4 @@ vaddps %xmm1, %xmm1, %xmm2 # CHECK-NEXT: 0. 3 0.0 0.0 5.3 vxorps %xmm0, %xmm0, %xmm0 # CHECK-NEXT: 1. 3 1.7 1.7 3.0 vmovaps %xmm0, %xmm1 # CHECK-NEXT: 2. 3 3.3 1.0 0.0 vaddps %xmm1, %xmm1, %xmm2 +# CHECK-NEXT: 3 1.7 0.9 2.8 diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/reg-move-elimination-2.s b/llvm/test/tools/llvm-mca/X86/BdVer2/reg-move-elimination-2.s index c701967dd448c..a6e2fa94ace24 100644 --- a/llvm/test/tools/llvm-mca/X86/BdVer2/reg-move-elimination-2.s +++ b/llvm/test/tools/llvm-mca/X86/BdVer2/reg-move-elimination-2.s @@ -144,3 +144,4 @@ movdqu %xmm5, %xmm0 # CHECK-NEXT: 6. 3 4.7 0.0 0.0 movupd %xmm3, %xmm4 # CHECK-NEXT: 7. 3 5.3 0.0 0.0 movdqa %xmm4, %xmm5 # CHECK-NEXT: 8. 3 6.0 0.0 0.0 movdqu %xmm5, %xmm0 +# CHECK-NEXT: 3 3.0 0.4 1.4 diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/reg-move-elimination-3.s b/llvm/test/tools/llvm-mca/X86/BdVer2/reg-move-elimination-3.s index 4b53663690d6f..0a8fda9fe2eb7 100644 --- a/llvm/test/tools/llvm-mca/X86/BdVer2/reg-move-elimination-3.s +++ b/llvm/test/tools/llvm-mca/X86/BdVer2/reg-move-elimination-3.s @@ -129,3 +129,4 @@ vmovdqu %xmm5, %xmm0 # CHECK-NEXT: 4. 3 5.7 0.0 0.0 vmovupd %xmm3, %xmm4 # CHECK-NEXT: 5. 3 6.3 0.0 0.0 vmovdqa %xmm4, %xmm5 # CHECK-NEXT: 6. 3 7.0 0.0 0.0 vmovdqu %xmm5, %xmm0 +# CHECK-NEXT: 3 4.5 0.5 0.8 diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/reg-move-elimination-4.s b/llvm/test/tools/llvm-mca/X86/BdVer2/reg-move-elimination-4.s index 0b4afc3f25e21..94163b1162b4d 100644 --- a/llvm/test/tools/llvm-mca/X86/BdVer2/reg-move-elimination-4.s +++ b/llvm/test/tools/llvm-mca/X86/BdVer2/reg-move-elimination-4.s @@ -115,3 +115,4 @@ mov %edx, %eax # CHECK-NEXT: 2. 3 4.7 0.0 0.0 movl %ebx, %ecx # CHECK-NEXT: 3. 3 5.3 0.0 0.0 movl %ecx, %edx # CHECK-NEXT: 4. 3 6.0 0.0 0.0 movl %edx, %eax +# CHECK-NEXT: 3 4.0 0.8 0.7 diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/reg-move-elimination-5.s b/llvm/test/tools/llvm-mca/X86/BdVer2/reg-move-elimination-5.s index c0cd38a031b11..8ccbbeab3cc2d 100644 --- a/llvm/test/tools/llvm-mca/X86/BdVer2/reg-move-elimination-5.s +++ b/llvm/test/tools/llvm-mca/X86/BdVer2/reg-move-elimination-5.s @@ -115,3 +115,4 @@ mov %rdx, %rax # CHECK-NEXT: 2. 3 4.7 0.0 0.0 movq %rbx, %rcx # CHECK-NEXT: 3. 3 5.3 0.0 0.0 movq %rcx, %rdx # CHECK-NEXT: 4. 3 6.0 0.0 0.0 movq %rdx, %rax +# CHECK-NEXT: 3 4.0 0.8 0.7 diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/register-files-1.s b/llvm/test/tools/llvm-mca/X86/BdVer2/register-files-1.s index acbe8e8548198..b93ee5c09d7fc 100644 --- a/llvm/test/tools/llvm-mca/X86/BdVer2/register-files-1.s +++ b/llvm/test/tools/llvm-mca/X86/BdVer2/register-files-1.s @@ -100,3 +100,4 @@ vmulps %xmm0, %xmm0, %xmm0 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 5 20.2 0.2 0.0 vaddps %xmm0, %xmm0, %xmm0 # CHECK-NEXT: 1. 5 25.2 0.0 0.0 vmulps %xmm0, %xmm0, %xmm0 +# CHECK-NEXT: 5 22.7 0.1 0.0 diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/register-files-2.s b/llvm/test/tools/llvm-mca/X86/BdVer2/register-files-2.s index 316b9c7a07891..c68b610049fd3 100644 --- a/llvm/test/tools/llvm-mca/X86/BdVer2/register-files-2.s +++ b/llvm/test/tools/llvm-mca/X86/BdVer2/register-files-2.s @@ -100,3 +100,4 @@ vmulps %xmm0, %xmm0, %xmm0 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 5 14.0 0.2 0.0 vaddps %xmm0, %xmm0, %xmm0 # CHECK-NEXT: 1. 5 15.8 0.0 0.0 vmulps %xmm0, %xmm0, %xmm0 +# CHECK-NEXT: 5 14.9 0.1 0.0 diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/register-files-5.s b/llvm/test/tools/llvm-mca/X86/BdVer2/register-files-5.s index 61bcde757f144..6c86b6ebe2238 100644 --- a/llvm/test/tools/llvm-mca/X86/BdVer2/register-files-5.s +++ b/llvm/test/tools/llvm-mca/X86/BdVer2/register-files-5.s @@ -151,3 +151,4 @@ # CHECK-NEXT: 30. 1 24.0 11.0 0.0 vaddps %ymm3, %ymm0, %ymm4 # CHECK-NEXT: 31. 1 25.0 12.0 0.0 vaddps %ymm3, %ymm0, %ymm5 # CHECK-NEXT: 32. 1 25.0 13.0 0.0 vaddps %ymm3, %ymm0, %ymm6 +# CHECK-NEXT: 1 15.6 11.2 0.6 diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/store-throughput.s b/llvm/test/tools/llvm-mca/X86/BdVer2/store-throughput.s index b24272c4166ad..067301b06a513 100644 --- a/llvm/test/tools/llvm-mca/X86/BdVer2/store-throughput.s +++ b/llvm/test/tools/llvm-mca/X86/BdVer2/store-throughput.s @@ -162,6 +162,7 @@ vmovaps %ymm3, (%rbx) # CHECK-NEXT: 1. 1 2.0 0.0 0.0 movb %bpl, (%rcx) # CHECK-NEXT: 2. 1 3.0 0.0 0.0 movb %sil, (%rdx) # CHECK-NEXT: 3. 1 4.0 0.0 0.0 movb %dil, (%rbx) +# CHECK-NEXT: 1 2.5 0.3 0.0 # CHECK: [1] Code Region @@ -275,6 +276,7 @@ vmovaps %ymm3, (%rbx) # CHECK-NEXT: 1. 1 2.0 0.0 0.0 movw %bp, (%rcx) # CHECK-NEXT: 2. 1 3.0 0.0 0.0 movw %si, (%rdx) # CHECK-NEXT: 3. 1 4.0 0.0 0.0 movw %di, (%rbx) +# CHECK-NEXT: 1 2.5 0.3 0.0 # CHECK: [2] Code Region @@ -388,6 +390,7 @@ vmovaps %ymm3, (%rbx) # CHECK-NEXT: 1. 1 2.0 0.0 0.0 movl %ebp, (%rcx) # CHECK-NEXT: 2. 1 3.0 0.0 0.0 movl %esi, (%rdx) # CHECK-NEXT: 3. 1 4.0 0.0 0.0 movl %edi, (%rbx) +# CHECK-NEXT: 1 2.5 0.3 0.0 # CHECK: [3] Code Region @@ -501,6 +504,7 @@ vmovaps %ymm3, (%rbx) # CHECK-NEXT: 1. 1 2.0 0.0 0.0 movq %rbp, (%rcx) # CHECK-NEXT: 2. 1 3.0 0.0 0.0 movq %rsi, (%rdx) # CHECK-NEXT: 3. 1 4.0 0.0 0.0 movq %rdi, (%rbx) +# CHECK-NEXT: 1 2.5 0.3 0.0 # CHECK: [4] Code Region @@ -616,6 +620,7 @@ vmovaps %ymm3, (%rbx) # CHECK-NEXT: 1. 1 3.0 0.0 0.0 movd %mm1, (%rcx) # CHECK-NEXT: 2. 1 5.0 0.0 0.0 movd %mm2, (%rdx) # CHECK-NEXT: 3. 1 7.0 0.0 0.0 movd %mm3, (%rbx) +# CHECK-NEXT: 1 4.0 0.3 0.0 # CHECK: [5] Code Region @@ -730,6 +735,7 @@ vmovaps %ymm3, (%rbx) # CHECK-NEXT: 1. 1 2.0 0.0 0.0 movaps %xmm1, (%rcx) # CHECK-NEXT: 2. 1 4.0 1.0 0.0 movaps %xmm2, (%rdx) # CHECK-NEXT: 3. 1 5.0 0.0 0.0 movaps %xmm3, (%rbx) +# CHECK-NEXT: 1 3.0 0.5 0.0 # CHECK: [6] Code Region @@ -843,3 +849,4 @@ vmovaps %ymm3, (%rbx) # CHECK-NEXT: 1. 1 2.0 1.0 0.0 vmovaps %ymm1, (%rcx) # CHECK-NEXT: 2. 1 35.0 33.0 0.0 vmovaps %ymm2, (%rdx) # CHECK-NEXT: 3. 1 36.0 1.0 0.0 vmovaps %ymm3, (%rbx) +# CHECK-NEXT: 1 18.5 9.0 0.0 diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/vbroadcast-operand-latency.s b/llvm/test/tools/llvm-mca/X86/BdVer2/vbroadcast-operand-latency.s index 0246216b73496..ab7a53438afc3 100644 --- a/llvm/test/tools/llvm-mca/X86/BdVer2/vbroadcast-operand-latency.s +++ b/llvm/test/tools/llvm-mca/X86/BdVer2/vbroadcast-operand-latency.s @@ -80,3 +80,4 @@ vbroadcastss (%rax), %ymm0 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 3 1.0 1.0 2.7 leaq 8(%rsp,%rdi,2), %rax # CHECK-NEXT: 1. 3 1.7 0.7 0.0 vbroadcastss (%rax), %ymm0 +# CHECK-NEXT: 3 1.3 0.8 1.3 diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/vec-logic-read-after-ld-1.s b/llvm/test/tools/llvm-mca/X86/BdVer2/vec-logic-read-after-ld-1.s index c977e843c6cdb..96e19db3b3a3b 100644 --- a/llvm/test/tools/llvm-mca/X86/BdVer2/vec-logic-read-after-ld-1.s +++ b/llvm/test/tools/llvm-mca/X86/BdVer2/vec-logic-read-after-ld-1.s @@ -41,3 +41,4 @@ vandps (%rdi), %xmm1, %xmm2 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 vaddps %xmm0, %xmm0, %xmm1 # CHECK-NEXT: 1. 1 1.0 0.0 0.0 vandps (%rdi), %xmm1, %xmm2 +# CHECK-NEXT: 1 1.0 0.5 0.0 diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/vec-logic-read-after-ld-2.s b/llvm/test/tools/llvm-mca/X86/BdVer2/vec-logic-read-after-ld-2.s index 55932ecd24526..ff74bc352ca67 100644 --- a/llvm/test/tools/llvm-mca/X86/BdVer2/vec-logic-read-after-ld-2.s +++ b/llvm/test/tools/llvm-mca/X86/BdVer2/vec-logic-read-after-ld-2.s @@ -41,3 +41,4 @@ vandps (%rdi), %ymm1, %ymm2 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 vaddps %ymm0, %ymm0, %ymm1 # CHECK-NEXT: 1. 1 1.0 0.0 0.0 vandps (%rdi), %ymm1, %ymm2 +# CHECK-NEXT: 1 1.0 0.5 0.0 diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/xop-super-registers-1.s b/llvm/test/tools/llvm-mca/X86/BdVer2/xop-super-registers-1.s index 2c00cc7ffefa4..f694a0429e2a1 100644 --- a/llvm/test/tools/llvm-mca/X86/BdVer2/xop-super-registers-1.s +++ b/llvm/test/tools/llvm-mca/X86/BdVer2/xop-super-registers-1.s @@ -102,3 +102,4 @@ # CHECK-NEXT: 3. 2 16.0 0.0 6.0 vaddps %ymm4, %ymm5, %ymm6 # CHECK-NEXT: 4. 2 20.0 0.0 4.0 vmulps %ymm6, %ymm3, %ymm4 # CHECK-NEXT: 5. 2 25.0 0.0 1.5 vaddps %ymm4, %ymm5, %ymm0 +# CHECK-NEXT: 2 14.8 0.4 5.3 diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/xop-super-registers-2.s b/llvm/test/tools/llvm-mca/X86/BdVer2/xop-super-registers-2.s index af3b709bb4e62..33ea990b57b5e 100644 --- a/llvm/test/tools/llvm-mca/X86/BdVer2/xop-super-registers-2.s +++ b/llvm/test/tools/llvm-mca/X86/BdVer2/xop-super-registers-2.s @@ -102,3 +102,4 @@ # CHECK-NEXT: 3. 2 8.0 0.0 6.0 vaddps %ymm4, %ymm5, %ymm6 # CHECK-NEXT: 4. 2 12.0 0.0 4.0 vmulps %ymm6, %ymm3, %ymm4 # CHECK-NEXT: 5. 2 17.0 0.0 1.5 vaddps %ymm4, %ymm5, %ymm0 +# CHECK-NEXT: 2 8.7 0.3 5.3 diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/zero-idioms-avx-256.s b/llvm/test/tools/llvm-mca/X86/BdVer2/zero-idioms-avx-256.s index ea623c1b2e626..5338c3c560448 100644 --- a/llvm/test/tools/llvm-mca/X86/BdVer2/zero-idioms-avx-256.s +++ b/llvm/test/tools/llvm-mca/X86/BdVer2/zero-idioms-avx-256.s @@ -120,6 +120,7 @@ vaddps %ymm1, %ymm1, %ymm0 # CHECK-NEXT: 0. 3 2.0 2.0 0.0 vaddps %ymm0, %ymm0, %ymm1 # CHECK-NEXT: 1. 3 3.0 3.0 1.7 vxorps %ymm1, %ymm1, %ymm1 # CHECK-NEXT: 2. 3 4.3 0.0 0.3 vblendps $2, %ymm1, %ymm2, %ymm3 +# CHECK-NEXT: 3 3.1 1.7 0.7 # CHECK: [1] Code Region - ZERO-IDIOM-2 @@ -205,6 +206,7 @@ vaddps %ymm1, %ymm1, %ymm0 # CHECK-NEXT: 0. 3 2.0 2.0 0.0 vaddpd %ymm0, %ymm0, %ymm1 # CHECK-NEXT: 1. 3 3.0 3.0 1.7 vxorpd %ymm1, %ymm1, %ymm1 # CHECK-NEXT: 2. 3 4.3 0.0 0.3 vblendpd $2, %ymm1, %ymm2, %ymm3 +# CHECK-NEXT: 3 3.1 1.7 0.7 # CHECK: [2] Code Region - ZERO-IDIOM-3 @@ -284,6 +286,7 @@ vaddps %ymm1, %ymm1, %ymm0 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 3 2.0 2.0 0.0 vaddps %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 1. 3 2.0 2.0 3.0 vandnps %ymm2, %ymm2, %ymm3 +# CHECK-NEXT: 3 2.0 2.0 1.5 # CHECK: [3] Code Region - ZERO-IDIOM-4 @@ -363,6 +366,7 @@ vaddps %ymm1, %ymm1, %ymm0 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 3 2.0 2.0 0.0 vaddps %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 1. 3 2.0 2.0 3.0 vandnps %ymm2, %ymm2, %ymm3 +# CHECK-NEXT: 3 2.0 2.0 1.5 # CHECK: [4] Code Region - ZERO-IDIOM-5 @@ -442,3 +446,4 @@ vaddps %ymm1, %ymm1, %ymm0 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 3 7.0 0.3 0.0 vperm2f128 $136, %ymm0, %ymm0, %ymm1 # CHECK-NEXT: 1. 3 9.0 0.0 0.0 vaddps %ymm1, %ymm1, %ymm0 +# CHECK-NEXT: 3 8.0 0.2 0.0 diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/zero-idioms.s b/llvm/test/tools/llvm-mca/X86/BdVer2/zero-idioms.s index 7db6e09c77d8b..c0509d8f451c8 100644 --- a/llvm/test/tools/llvm-mca/X86/BdVer2/zero-idioms.s +++ b/llvm/test/tools/llvm-mca/X86/BdVer2/zero-idioms.s @@ -450,3 +450,4 @@ vpxor %xmm3, %xmm3, %xmm5 # CHECK-NEXT: 68. 1 0.0 0.0 11.0 vxorps %xmm4, %xmm4, %xmm5 # CHECK-NEXT: 69. 1 0.0 0.0 11.0 vxorpd %xmm1, %xmm1, %xmm3 # CHECK-NEXT: 70. 1 0.0 0.0 12.0 vpxor %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: 1 1.2 1.2 4.1 diff --git a/llvm/test/tools/llvm-mca/X86/Broadwell/zero-idioms.s b/llvm/test/tools/llvm-mca/X86/Broadwell/zero-idioms.s index 16a9ca4b51beb..4fdb7c11d6fd8 100644 --- a/llvm/test/tools/llvm-mca/X86/Broadwell/zero-idioms.s +++ b/llvm/test/tools/llvm-mca/X86/Broadwell/zero-idioms.s @@ -448,3 +448,4 @@ vpxor %ymm3, %ymm3, %ymm5 # CHECK-NEXT: 72. 1 0.0 0.0 3.0 vxorpd %ymm1, %ymm1, %ymm3 # CHECK-NEXT: 73. 1 0.0 0.0 3.0 vpxor %xmm3, %xmm3, %xmm5 # CHECK-NEXT: 74. 1 0.0 0.0 3.0 vpxor %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: 1 0.9 0.2 1.8 diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/add-sequence.s b/llvm/test/tools/llvm-mca/X86/BtVer2/add-sequence.s index 59b20c755ae0a..80f42c9e8f6bf 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/add-sequence.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/add-sequence.s @@ -99,3 +99,4 @@ add %eax, %edx # CHECK-NEXT: 0. 10 2.5 0.4 0.0 addl %eax, %ecx # CHECK-NEXT: 1. 10 2.1 0.7 0.5 addl %esi, %eax # CHECK-NEXT: 2. 10 2.6 0.0 0.3 addl %eax, %edx +# CHECK-NEXT: 10 2.4 0.4 0.3 diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/bottleneck-hints-1.s b/llvm/test/tools/llvm-mca/X86/BtVer2/bottleneck-hints-1.s index 4091ad8d7157f..30b05eb290efd 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/bottleneck-hints-1.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/bottleneck-hints-1.s @@ -99,3 +99,4 @@ add %edx, %eax # CHECK-NEXT: 1. 1 2.0 0.0 0.0 addl %ebx, %ecx # CHECK-NEXT: 2. 1 2.0 0.0 0.0 addl %ecx, %edx # CHECK-NEXT: 3. 1 3.0 0.0 0.0 addl %edx, %eax +# CHECK-NEXT: 1 2.0 0.3 0.0 diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/bottleneck-hints-3.s b/llvm/test/tools/llvm-mca/X86/BtVer2/bottleneck-hints-3.s index 3b0639a0c5a38..d78e887f03b80 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/bottleneck-hints-3.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/bottleneck-hints-3.s @@ -124,3 +124,4 @@ vmovaps %xmm0, 48(%rdi) # CHECK-NEXT: 5. 1 16.0 0.0 0.0 vmovaps %xmm0, 32(%rdi) # CHECK-NEXT: 6. 1 16.0 0.0 0.0 vmovaps 48(%rsi), %xmm0 # CHECK-NEXT: 7. 1 21.0 0.0 0.0 vmovaps %xmm0, 48(%rdi) +# CHECK-NEXT: 1 11.0 0.1 0.0 diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/clear-super-register-1.s b/llvm/test/tools/llvm-mca/X86/BtVer2/clear-super-register-1.s index 0681f7ddc06fe..6483809deda3a 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/clear-super-register-1.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/clear-super-register-1.s @@ -61,3 +61,4 @@ bsf %rax, %rcx # CHECK-NEXT: 1. 2 1.5 1.0 4.5 lzcntl %ecx, %eax # CHECK-NEXT: 2. 2 2.0 0.0 4.5 andq %rcx, %rax # CHECK-NEXT: 3. 2 2.0 0.0 0.5 bsfq %rax, %rcx +# CHECK-NEXT: 2 1.8 0.4 2.4 diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/clear-super-register-2.s b/llvm/test/tools/llvm-mca/X86/BtVer2/clear-super-register-2.s index 649c8f982d5d9..987c54a07567d 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/clear-super-register-2.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/clear-super-register-2.s @@ -121,3 +121,4 @@ vandps %xmm4, %xmm1, %xmm0 # CHECK-NEXT: 15. 2 21.0 21.0 13.5 vaddps %ymm3, %ymm1, %ymm4 # CHECK-NEXT: 16. 2 22.0 22.0 12.5 vaddps %ymm3, %ymm1, %ymm4 # CHECK-NEXT: 17. 2 24.0 0.0 11.5 vandps %xmm4, %xmm1, %xmm0 +# CHECK-NEXT: 2 17.5 9.9 21.6 diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/cmpxchg-read-advance.s b/llvm/test/tools/llvm-mca/X86/BtVer2/cmpxchg-read-advance.s index 865dc56a7b66f..c0b9c7ed37d95 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/cmpxchg-read-advance.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/cmpxchg-read-advance.s @@ -110,6 +110,7 @@ lock cmpxchg16b (%rsp) # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 imulq %rax, %rax # CHECK-NEXT: 1. 1 3.0 0.0 0.0 cmpxchgq %rcx, (%rdx) +# CHECK-NEXT: 1 2.0 0.5 0.0 # CHECK: [1] Code Region @@ -176,6 +177,7 @@ lock cmpxchg16b (%rsp) # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 imulq %rcx, %rcx # CHECK-NEXT: 1. 1 3.0 0.0 0.0 cmpxchgq %rcx, (%rdx) +# CHECK-NEXT: 1 2.0 0.5 0.0 # CHECK: [2] Code Region @@ -242,6 +244,7 @@ lock cmpxchg16b (%rsp) # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 imulq %rax, %rax # CHECK-NEXT: 1. 1 3.0 0.0 0.0 lock cmpxchgq %rcx, (%rdx) +# CHECK-NEXT: 1 2.0 0.5 0.0 # CHECK: [3] Code Region @@ -308,6 +311,7 @@ lock cmpxchg16b (%rsp) # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 imulq %rcx, %rcx # CHECK-NEXT: 1. 1 3.0 0.0 0.0 lock cmpxchgq %rcx, (%rdx) +# CHECK-NEXT: 1 2.0 0.5 0.0 # CHECK: [4] Code Region @@ -378,6 +382,7 @@ lock cmpxchg16b (%rsp) # CHECK-NEXT: 0. 1 1.0 1.0 0.0 imull %eax, %eax # CHECK-NEXT: 1. 1 2.0 2.0 0.0 imull %edx, %edx # CHECK-NEXT: 2. 1 1.0 0.0 0.0 cmpxchg8b (%rsp) +# CHECK-NEXT: 1 1.3 1.0 0.0 # CHECK: [5] Code Region @@ -448,6 +453,7 @@ lock cmpxchg16b (%rsp) # CHECK-NEXT: 0. 1 1.0 1.0 0.0 imull %eax, %eax # CHECK-NEXT: 1. 1 2.0 2.0 0.0 imull %edx, %edx # CHECK-NEXT: 2. 1 1.0 0.0 0.0 cmpxchg16b (%rsp) +# CHECK-NEXT: 1 1.3 1.0 0.0 # CHECK: [6] Code Region @@ -518,6 +524,7 @@ lock cmpxchg16b (%rsp) # CHECK-NEXT: 0. 1 1.0 1.0 0.0 imull %ebx, %ebx # CHECK-NEXT: 1. 1 2.0 2.0 0.0 imull %ecx, %ecx # CHECK-NEXT: 2. 1 1.0 0.0 0.0 lock cmpxchg8b (%rsp) +# CHECK-NEXT: 1 1.3 1.0 0.0 # CHECK: [7] Code Region @@ -588,3 +595,4 @@ lock cmpxchg16b (%rsp) # CHECK-NEXT: 0. 1 1.0 1.0 0.0 imull %ebx, %ebx # CHECK-NEXT: 1. 1 2.0 2.0 0.0 imull %ecx, %ecx # CHECK-NEXT: 2. 1 1.0 0.0 0.0 lock cmpxchg16b (%rsp) +# CHECK-NEXT: 1 1.3 1.0 0.0 diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/dependency-breaking-cmp.s b/llvm/test/tools/llvm-mca/X86/BtVer2/dependency-breaking-cmp.s index 7fd97d32d0fb8..25762ec14ad3b 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/dependency-breaking-cmp.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/dependency-breaking-cmp.s @@ -75,3 +75,4 @@ cmovae %ebx, %eax # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 3 1.0 1.0 0.0 cmpl %eax, %eax # CHECK-NEXT: 1. 3 2.0 0.0 0.0 cmovael %ebx, %eax +# CHECK-NEXT: 3 1.5 0.5 0.0 diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/dependency-breaking-pcmpeq.s b/llvm/test/tools/llvm-mca/X86/BtVer2/dependency-breaking-pcmpeq.s index aed7d75ffdc55..4f7187fa1e14b 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/dependency-breaking-pcmpeq.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/dependency-breaking-pcmpeq.s @@ -90,3 +90,4 @@ vpcmpeqq %xmm3, %xmm3, %xmm0 # CHECK-NEXT: 1. 3 1.0 1.0 0.0 vpcmpeqw %xmm1, %xmm1, %xmm2 # CHECK-NEXT: 2. 3 1.0 1.0 0.0 vpcmpeqd %xmm2, %xmm2, %xmm3 # CHECK-NEXT: 3. 3 1.0 1.0 0.0 vpcmpeqq %xmm3, %xmm3, %xmm0 +# CHECK-NEXT: 3 1.0 1.0 0.0 diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/dependency-breaking-pcmpgt.s b/llvm/test/tools/llvm-mca/X86/BtVer2/dependency-breaking-pcmpgt.s index ef6faa58eba43..014062abed383 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/dependency-breaking-pcmpgt.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/dependency-breaking-pcmpgt.s @@ -91,3 +91,4 @@ vpcmpgtq %xmm3, %xmm3, %xmm0 # CHECK-NEXT: 1. 3 0.0 0.0 0.0 vpcmpgtw %xmm1, %xmm1, %xmm2 # CHECK-NEXT: 2. 3 0.0 0.0 0.0 vpcmpgtd %xmm2, %xmm2, %xmm3 # CHECK-NEXT: 3. 3 0.0 0.0 0.0 vpcmpgtq %xmm3, %xmm3, %xmm0 +# CHECK-NEXT: 3 0.0 0.0 0.0 diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/dependency-breaking-sbb-1.s b/llvm/test/tools/llvm-mca/X86/BtVer2/dependency-breaking-sbb-1.s index b2bd7169c5180..6466998763c25 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/dependency-breaking-sbb-1.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/dependency-breaking-sbb-1.s @@ -76,3 +76,4 @@ sbb %eax, %eax # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 3 2.0 0.3 0.0 sbbl %edx, %edx # CHECK-NEXT: 1. 3 3.0 0.0 0.0 sbbl %eax, %eax +# CHECK-NEXT: 3 2.5 0.2 0.0 diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/dependency-breaking-sbb-2.s b/llvm/test/tools/llvm-mca/X86/BtVer2/dependency-breaking-sbb-2.s index 6adf58fccd3ea..69179506e4741 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/dependency-breaking-sbb-2.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/dependency-breaking-sbb-2.s @@ -84,3 +84,4 @@ sbb %eax, %eax # CHECK-NEXT: 0. 3 2.3 1.0 0.0 imull %edx, %eax # CHECK-NEXT: 1. 3 1.3 1.0 2.7 addl %edx, %edx # CHECK-NEXT: 2. 3 1.7 0.0 2.7 sbbl %eax, %eax +# CHECK-NEXT: 3 1.8 0.7 1.8 diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/dependent-pmuld-paddd.s b/llvm/test/tools/llvm-mca/X86/BtVer2/dependent-pmuld-paddd.s index ca69339467d8c..586aa73b15ab1 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/dependent-pmuld-paddd.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/dependent-pmuld-paddd.s @@ -99,3 +99,4 @@ vpaddd %xmm0, %xmm0, %xmm3 # CHECK-NEXT: 0. 10 8.0 0.1 0.0 vpmuldq %xmm0, %xmm0, %xmm1 # CHECK-NEXT: 1. 10 9.5 0.0 0.0 vpaddd %xmm1, %xmm1, %xmm0 # CHECK-NEXT: 2. 10 10.0 0.0 0.0 vpaddd %xmm0, %xmm0, %xmm3 +# CHECK-NEXT: 10 9.2 0.0 0.0 diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/dot-product.s b/llvm/test/tools/llvm-mca/X86/BtVer2/dot-product.s index a43b8285a5186..fdfbf311b1a22 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/dot-product.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/dot-product.s @@ -78,3 +78,4 @@ vhaddps %xmm3, %xmm3, %xmm4 # CHECK-NEXT: 0. 3 1.0 1.0 4.7 vmulps %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 1. 3 2.7 0.0 2.3 vhaddps %xmm2, %xmm2, %xmm3 # CHECK-NEXT: 2. 3 6.0 0.0 0.0 vhaddps %xmm3, %xmm3, %xmm4 +# CHECK-NEXT: 3 3.2 0.3 2.3 diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/hadd-read-after-ld-1.s b/llvm/test/tools/llvm-mca/X86/BtVer2/hadd-read-after-ld-1.s index 197c1dce58151..ebf2739665620 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/hadd-read-after-ld-1.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/hadd-read-after-ld-1.s @@ -42,3 +42,4 @@ vhaddps (%rdi), %xmm1, %xmm2 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 vshufps $0, %xmm0, %xmm1, %xmm1 # CHECK-NEXT: 1. 1 1.0 0.0 0.0 vhaddps (%rdi), %xmm1, %xmm2 +# CHECK-NEXT: 1 1.0 0.5 0.0 diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/hadd-read-after-ld-2.s b/llvm/test/tools/llvm-mca/X86/BtVer2/hadd-read-after-ld-2.s index e64ee28103f31..7b9a1e601b704 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/hadd-read-after-ld-2.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/hadd-read-after-ld-2.s @@ -42,3 +42,4 @@ vhaddps (%rdi), %ymm1, %ymm2 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 vshufps $0, %xmm0, %xmm1, %xmm1 # CHECK-NEXT: 1. 1 1.0 1.0 0.0 vhaddps (%rdi), %ymm1, %ymm2 +# CHECK-NEXT: 1 1.0 1.0 0.0 diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/int-to-fpu-forwarding-3.s b/llvm/test/tools/llvm-mca/X86/BtVer2/int-to-fpu-forwarding-3.s index 00c13f9ef59a9..432b262c12198 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/int-to-fpu-forwarding-3.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/int-to-fpu-forwarding-3.s @@ -80,3 +80,4 @@ vpinsrb $1, %eax, %xmm0, %xmm0 # CHECK-NEXT: 0. 3 1.0 1.0 3.3 addl %eax, %eax # CHECK-NEXT: 1. 3 7.0 0.0 0.0 vpinsrb $0, %eax, %xmm0, %xmm0 # CHECK-NEXT: 2. 3 7.0 0.0 0.0 vpinsrb $1, %eax, %xmm0, %xmm0 +# CHECK-NEXT: 3 5.0 0.3 1.1 diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/load-store-alias.s b/llvm/test/tools/llvm-mca/X86/BtVer2/load-store-alias.s index 2eee80e917eb3..b5f3204955308 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/load-store-alias.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/load-store-alias.s @@ -97,3 +97,4 @@ vmovaps %xmm0, 48(%rdi) # CHECK-NEXT: 5. 1 16.0 0.0 0.0 vmovaps %xmm0, 32(%rdi) # CHECK-NEXT: 6. 1 16.0 0.0 0.0 vmovaps 48(%rsi), %xmm0 # CHECK-NEXT: 7. 1 21.0 0.0 0.0 vmovaps %xmm0, 48(%rdi) +# CHECK-NEXT: 1 11.0 0.1 0.0 diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/memcpy-like-test.s b/llvm/test/tools/llvm-mca/X86/BtVer2/memcpy-like-test.s index 8cdba9acc6cb4..6d17586840607 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/memcpy-like-test.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/memcpy-like-test.s @@ -97,3 +97,4 @@ vmovaps %xmm0, 48(%rdi) # CHECK-NEXT: 5. 1 6.0 0.0 0.0 vmovaps %xmm0, 32(%rdi) # CHECK-NEXT: 6. 1 1.0 1.0 0.0 vmovaps 48(%rsi), %xmm0 # CHECK-NEXT: 7. 1 6.0 0.0 0.0 vmovaps %xmm0, 48(%rdi) +# CHECK-NEXT: 1 3.5 0.5 0.0 diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/one-idioms.s b/llvm/test/tools/llvm-mca/X86/BtVer2/one-idioms.s index c45e86ab1eb08..3377e92253b16 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/one-idioms.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/one-idioms.s @@ -156,3 +156,4 @@ vpcmpeqw %xmm3, %xmm3, %xmm5 # CHECK-NEXT: 12. 1 1.0 1.0 0.0 vpcmpeqd %xmm3, %xmm3, %xmm5 # CHECK-NEXT: 13. 1 1.0 1.0 0.0 vpcmpeqq %xmm3, %xmm3, %xmm5 # CHECK-NEXT: 14. 1 1.0 1.0 0.0 vpcmpeqw %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: 1 1.0 1.0 0.0 diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update-2.s b/llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update-2.s index 66e452665c399..722e37a70831c 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update-2.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update-2.s @@ -46,3 +46,4 @@ add %ecx, %ebx # CHECK-NEXT: 0. 1 1.0 1.0 0.0 imulq %rax, %rbx # CHECK-NEXT: 1. 1 7.0 0.0 0.0 lzcntw %ax, %bx # CHECK-NEXT: 2. 1 7.0 0.0 0.0 addl %ecx, %ebx +# CHECK-NEXT: 1 5.0 0.3 0.0 diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update-3.s b/llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update-3.s index f5ad0be561053..9e824acab7142 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update-3.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update-3.s @@ -83,3 +83,4 @@ xor %bx, %dx # CHECK-NEXT: 0. 3 2.7 0.3 0.0 addw %cx, %dx # CHECK-NEXT: 1. 3 3.3 0.0 0.0 movw %ax, %dx # CHECK-NEXT: 2. 3 3.7 0.0 0.0 xorw %bx, %dx +# CHECK-NEXT: 3 3.2 0.1 0.0 diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update-4.s b/llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update-4.s index 4c839d52e7c1d..bdd6d341166fd 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update-4.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update-4.s @@ -83,3 +83,4 @@ add %cx, %bx # CHECK-NEXT: 0. 3 4.7 0.3 0.0 imulw %ax, %bx # CHECK-NEXT: 1. 3 7.3 0.0 0.0 lzcntw %ax, %bx # CHECK-NEXT: 2. 3 7.7 0.0 0.0 addw %cx, %bx +# CHECK-NEXT: 3 6.6 0.1 0.0 diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update-6.s b/llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update-6.s index 1714dc7725b3f..7ca046be68e38 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update-6.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update-6.s @@ -84,3 +84,4 @@ lzcnt 2(%rsp), %cx # CHECK-NEXT: 0. 3 4.7 0.3 0.0 imull %edx, %ecx # CHECK-NEXT: 1. 3 4.3 0.0 0.0 lzcntw (%rsp), %cx # CHECK-NEXT: 2. 3 4.7 0.0 0.0 lzcntw 2(%rsp), %cx +# CHECK-NEXT: 3 4.6 0.1 0.0 diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update-7.s b/llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update-7.s index f3991dccaa6f0..0cd064bcb872e 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update-7.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update-7.s @@ -102,3 +102,4 @@ cmpl $1025, %eax # CHECK-NEXT: 2. 5 6.2 0.0 0.8 shll $2, %eax # CHECK-NEXT: 3. 5 6.8 0.0 0.0 imull %ecx, %eax # CHECK-NEXT: 4. 5 9.2 0.0 0.0 cmpl $1025, %eax +# CHECK-NEXT: 5 7.0 0.1 0.3 diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update.s b/llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update.s index c7ec67d94ccb7..c44ca83efb7fe 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/partial-reg-update.s @@ -45,3 +45,4 @@ add %ecx, %ebx # CHECK-NEXT: 0. 1 1.0 1.0 0.0 imulw %ax, %cx # CHECK-NEXT: 1. 1 4.0 0.0 0.0 addb %al, %cl # CHECK-NEXT: 2. 1 4.0 0.0 0.0 addl %ecx, %ebx +# CHECK-NEXT: 1 3.0 0.3 0.0 diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/pipes-fpu.s b/llvm/test/tools/llvm-mca/X86/BtVer2/pipes-fpu.s index 6bf375be02b61..41f014fa09e47 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/pipes-fpu.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/pipes-fpu.s @@ -109,3 +109,4 @@ vsqrtps %ymm0, %ymm2 # CHECK-NEXT: 5. 2 29.5 29.5 0.0 vsqrtps %xmm0, %xmm2 # CHECK-NEXT: 6. 2 1.0 1.0 45.5 vaddps %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 7. 2 48.5 48.5 0.0 vsqrtps %ymm0, %ymm2 +# CHECK-NEXT: 2 10.5 10.5 23.7 diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/pr37790.s b/llvm/test/tools/llvm-mca/X86/BtVer2/pr37790.s index 4cce5c85e95e0..ee31bf4b8b18d 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/pr37790.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/pr37790.s @@ -42,3 +42,4 @@ stmxcsr (%rsp) # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 2 51.0 0.5 0.0 int3 # CHECK-NEXT: 1. 2 151.0 0.0 0.0 stmxcsr (%rsp) +# CHECK-NEXT: 2 101.0 0.3 0.0 diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/rank.s b/llvm/test/tools/llvm-mca/X86/BtVer2/rank.s index 380f8ccecb9ba..9962ca41021d8 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/rank.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/rank.s @@ -113,3 +113,4 @@ add %ebx, %eax # CHECK-NEXT: 5. 3 2.0 0.0 0.0 addl %edx, %esi # CHECK-NEXT: 6. 3 2.0 0.0 0.0 addl %ebx, %eax # CHECK-NEXT: 7. 3 3.0 0.0 0.0 addl %ebx, %eax +# CHECK-NEXT: 3 2.1 0.2 0.0 diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/read-advance-1.s b/llvm/test/tools/llvm-mca/X86/BtVer2/read-advance-1.s index 3896967c5858f..f61bce125f588 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/read-advance-1.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/read-advance-1.s @@ -45,3 +45,4 @@ vmulps (%rdi), %xmm1, %xmm2 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 vaddps %xmm0, %xmm0, %xmm1 # CHECK-NEXT: 1. 1 1.0 0.0 0.0 vmulps (%rdi), %xmm1, %xmm2 +# CHECK-NEXT: 1 1.0 0.5 0.0 diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/read-advance-2.s b/llvm/test/tools/llvm-mca/X86/BtVer2/read-advance-2.s index 5e199a1018e0a..8b922644b7326 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/read-advance-2.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/read-advance-2.s @@ -45,3 +45,4 @@ # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 imull %esi # CHECK-NEXT: 1. 1 2.0 2.0 0.0 imull (%rdi) +# CHECK-NEXT: 1 1.5 1.5 0.0 diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/read-advance-3.s b/llvm/test/tools/llvm-mca/X86/BtVer2/read-advance-3.s index ef8d50aab1ed7..6a4bad2aa7f47 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/read-advance-3.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/read-advance-3.s @@ -45,3 +45,4 @@ # CHECK-NEXT: 0. 1 1.0 1.0 0.0 addq %rdi, %rsi # CHECK-NEXT: 1. 1 1.0 0.0 0.0 addq (%rsp), %rsi # CHECK-NEXT: 2. 1 2.0 2.0 2.0 addq %rdx, %r8 +# CHECK-NEXT: 1 1.3 1.0 0.7 diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/reg-move-elimination-1.s b/llvm/test/tools/llvm-mca/X86/BtVer2/reg-move-elimination-1.s index 0c27d2cdac3d7..4788f8c46aa78 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/reg-move-elimination-1.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/reg-move-elimination-1.s @@ -98,3 +98,4 @@ vaddps %xmm1, %xmm1, %xmm2 # CHECK-NEXT: 0. 3 0.0 0.0 2.7 vxorps %xmm0, %xmm0, %xmm0 # CHECK-NEXT: 1. 3 0.0 0.0 2.7 vmovaps %xmm0, %xmm1 # CHECK-NEXT: 2. 3 1.0 1.0 0.0 vaddps %xmm1, %xmm1, %xmm2 +# CHECK-NEXT: 3 0.3 0.3 1.8 diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/reg-move-elimination-2.s b/llvm/test/tools/llvm-mca/X86/BtVer2/reg-move-elimination-2.s index 08465f907eec0..2182118a712ac 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/reg-move-elimination-2.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/reg-move-elimination-2.s @@ -139,3 +139,4 @@ movdqu %xmm5, %xmm0 # CHECK-NEXT: 6. 3 0.0 0.0 0.0 movupd %xmm3, %xmm4 # CHECK-NEXT: 7. 3 0.0 0.0 0.0 movdqa %xmm4, %xmm5 # CHECK-NEXT: 8. 3 0.0 0.0 0.0 movdqu %xmm5, %xmm0 +# CHECK-NEXT: 3 0.0 0.0 0.0 diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/reg-move-elimination-3.s b/llvm/test/tools/llvm-mca/X86/BtVer2/reg-move-elimination-3.s index f3d850fc90aa3..0d2f2160d3ecc 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/reg-move-elimination-3.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/reg-move-elimination-3.s @@ -124,3 +124,4 @@ vmovdqu %xmm5, %xmm0 # CHECK-NEXT: 4. 3 0.0 0.0 0.0 vmovupd %xmm3, %xmm4 # CHECK-NEXT: 5. 3 0.0 0.0 0.0 vmovdqa %xmm4, %xmm5 # CHECK-NEXT: 6. 3 0.0 0.0 0.0 vmovdqu %xmm5, %xmm0 +# CHECK-NEXT: 3 0.0 0.0 0.0 diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/reg-move-elimination-4.s b/llvm/test/tools/llvm-mca/X86/BtVer2/reg-move-elimination-4.s index c2df1baf5c03c..d21b9815129da 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/reg-move-elimination-4.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/reg-move-elimination-4.s @@ -109,3 +109,4 @@ mov %edx, %eax # CHECK-NEXT: 2. 3 0.0 0.0 0.0 movl %ebx, %ecx # CHECK-NEXT: 3. 3 0.0 0.0 0.0 movl %ecx, %edx # CHECK-NEXT: 4. 3 0.0 0.0 0.0 movl %edx, %eax +# CHECK-NEXT: 3 0.0 0.0 0.0 diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/reg-move-elimination-5.s b/llvm/test/tools/llvm-mca/X86/BtVer2/reg-move-elimination-5.s index 277293e429ba3..f121087250cc8 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/reg-move-elimination-5.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/reg-move-elimination-5.s @@ -109,3 +109,4 @@ mov %rdx, %rax # CHECK-NEXT: 2. 3 0.0 0.0 0.0 movq %rbx, %rcx # CHECK-NEXT: 3. 3 0.0 0.0 0.0 movq %rcx, %rdx # CHECK-NEXT: 4. 3 0.0 0.0 0.0 movq %rdx, %rax +# CHECK-NEXT: 3 0.0 0.0 0.0 diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/reg-move-elimination-6.s b/llvm/test/tools/llvm-mca/X86/BtVer2/reg-move-elimination-6.s index d4800943b8bf1..0b6c023187090 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/reg-move-elimination-6.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/reg-move-elimination-6.s @@ -117,3 +117,4 @@ mov %esi, %ecx # CHECK-NEXT: 3. 3 2.0 0.0 0.0 addq %rcx, %rcx # CHECK-NEXT: 4. 3 2.0 0.0 0.0 addq %rcx, %rcx # CHECK-NEXT: 5. 3 0.0 0.0 3.0 movl %esi, %ecx +# CHECK-NEXT: 3 1.0 0.2 1.1 diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-1.s b/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-1.s index 0319bd622e28c..1492de0c7ff4a 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-1.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-1.s @@ -90,3 +90,4 @@ vmulps %xmm0, %xmm0, %xmm0 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 5 9.0 0.2 0.0 vaddps %xmm0, %xmm0, %xmm0 # CHECK-NEXT: 1. 5 12.0 0.0 0.0 vmulps %xmm0, %xmm0, %xmm0 +# CHECK-NEXT: 5 10.5 0.1 0.0 diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-2.s b/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-2.s index 5f3fe1e6ccb3d..ba33ba065ff06 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-2.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-2.s @@ -91,3 +91,4 @@ vmulps %xmm0, %xmm0, %xmm0 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 5 6.6 0.2 0.0 vaddps %xmm0, %xmm0, %xmm0 # CHECK-NEXT: 1. 5 7.8 0.0 0.0 vmulps %xmm0, %xmm0, %xmm0 +# CHECK-NEXT: 5 7.2 0.1 0.0 diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-5.s b/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-5.s index f676e77bf1ed3..32101c718d538 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-5.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/register-files-5.s @@ -150,3 +150,4 @@ # CHECK-NEXT: 30. 1 30.0 25.0 0.0 vaddps %ymm3, %ymm0, %ymm4 # CHECK-NEXT: 31. 1 31.0 27.0 0.0 vaddps %ymm3, %ymm0, %ymm5 # CHECK-NEXT: 32. 1 24.0 24.0 0.0 vaddps %ymm3, %ymm0, %ymm6 +# CHECK-NEXT: 1 15.8 14.0 12.4 diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/vbroadcast-operand-latency.s b/llvm/test/tools/llvm-mca/X86/BtVer2/vbroadcast-operand-latency.s index f9fd2c3732a4a..f0a40ce9af011 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/vbroadcast-operand-latency.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/vbroadcast-operand-latency.s @@ -71,3 +71,4 @@ vbroadcastss (%rax), %ymm0 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 3 1.0 1.0 2.7 leaq 8(%rsp,%rdi,2), %rax # CHECK-NEXT: 1. 3 2.0 0.0 0.0 vbroadcastss (%rax), %ymm0 +# CHECK-NEXT: 3 1.5 0.5 1.3 diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/vec-logic-read-after-ld-1.s b/llvm/test/tools/llvm-mca/X86/BtVer2/vec-logic-read-after-ld-1.s index 57f07e1e8a815..1c5c8cf2eb79e 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/vec-logic-read-after-ld-1.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/vec-logic-read-after-ld-1.s @@ -41,3 +41,4 @@ vandps (%rdi), %xmm1, %xmm2 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 vaddps %xmm0, %xmm0, %xmm1 # CHECK-NEXT: 1. 1 1.0 0.0 0.0 vandps (%rdi), %xmm1, %xmm2 +# CHECK-NEXT: 1 1.0 0.5 0.0 diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/vec-logic-read-after-ld-2.s b/llvm/test/tools/llvm-mca/X86/BtVer2/vec-logic-read-after-ld-2.s index 5650a8ba15c24..7e76fcc1df8c7 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/vec-logic-read-after-ld-2.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/vec-logic-read-after-ld-2.s @@ -41,3 +41,4 @@ vandps (%rdi), %ymm1, %ymm2 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 vaddps %ymm0, %ymm0, %ymm1 # CHECK-NEXT: 1. 1 1.0 1.0 0.0 vandps (%rdi), %ymm1, %ymm2 +# CHECK-NEXT: 1 1.0 1.0 0.0 diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/xadd.s b/llvm/test/tools/llvm-mca/X86/BtVer2/xadd.s index a3bb2cd52edcc..64b6490861c2a 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/xadd.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/xadd.s @@ -99,6 +99,7 @@ imul %ecx, %ecx # CHECK-NEXT: 2. 2 6.5 0.0 7.0 addl %ecx, %ecx # CHECK-NEXT: 3. 2 6.5 0.0 4.0 imull %ecx, %ecx # CHECK-NEXT: 4. 2 9.5 0.0 2.0 imull %ecx, %ecx +# CHECK-NEXT: 2 6.5 0.1 4.0 # CHECK: [1] Code Region @@ -182,3 +183,4 @@ imul %ecx, %ecx # CHECK-NEXT: 2. 2 17.0 0.0 4.0 addl %ecx, %ecx # CHECK-NEXT: 3. 2 17.0 0.0 1.0 imull %ecx, %ecx # CHECK-NEXT: 4. 2 20.0 0.0 0.0 imull %ecx, %ecx +# CHECK-NEXT: 2 15.4 0.1 1.8 diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/xchg.s b/llvm/test/tools/llvm-mca/X86/BtVer2/xchg.s index 22edddaeef7bc..2620fa871be53 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/xchg.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/xchg.s @@ -87,3 +87,4 @@ imul %ecx, %ecx # CHECK-NEXT: 2. 2 17.0 0.0 4.0 addl %ecx, %ecx # CHECK-NEXT: 3. 2 18.0 0.0 1.0 imull %ecx, %ecx # CHECK-NEXT: 4. 2 20.0 0.0 0.0 imull %ecx, %ecx +# CHECK-NEXT: 2 15.8 0.1 1.8 diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/zero-idioms-avx-256.s b/llvm/test/tools/llvm-mca/X86/BtVer2/zero-idioms-avx-256.s index b1669f3340596..b5a8bb0521b37 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/zero-idioms-avx-256.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/zero-idioms-avx-256.s @@ -111,6 +111,7 @@ vaddps %ymm1, %ymm1, %ymm0 # CHECK-NEXT: 0. 3 1.3 1.3 0.0 vaddps %ymm0, %ymm0, %ymm1 # CHECK-NEXT: 1. 3 1.0 1.0 1.3 vxorps %ymm1, %ymm1, %ymm1 # CHECK-NEXT: 2. 3 1.0 0.0 1.3 vblendps $2, %ymm1, %ymm2, %ymm3 +# CHECK-NEXT: 3 1.1 0.8 0.9 # CHECK: [1] Code Region - ZERO-IDIOM-2 @@ -187,6 +188,7 @@ vaddps %ymm1, %ymm1, %ymm0 # CHECK-NEXT: 0. 3 1.3 1.3 0.0 vaddpd %ymm0, %ymm0, %ymm1 # CHECK-NEXT: 1. 3 1.0 1.0 1.3 vxorpd %ymm1, %ymm1, %ymm1 # CHECK-NEXT: 2. 3 1.0 0.0 1.3 vblendpd $2, %ymm1, %ymm2, %ymm3 +# CHECK-NEXT: 3 1.1 0.8 0.9 # CHECK: [2] Code Region - ZERO-IDIOM-3 @@ -256,6 +258,7 @@ vaddps %ymm1, %ymm1, %ymm0 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 3 1.0 1.0 0.0 vaddps %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 1. 3 1.0 1.0 1.0 vandnps %ymm2, %ymm2, %ymm3 +# CHECK-NEXT: 3 1.0 1.0 0.5 # CHECK: [3] Code Region - ZERO-IDIOM-4 @@ -325,6 +328,7 @@ vaddps %ymm1, %ymm1, %ymm0 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 3 1.0 1.0 0.0 vaddps %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 1. 3 1.0 1.0 1.0 vandnps %ymm2, %ymm2, %ymm3 +# CHECK-NEXT: 3 1.0 1.0 0.5 # CHECK: [4] Code Region - ZERO-IDIOM-5 @@ -395,3 +399,4 @@ vaddps %ymm1, %ymm1, %ymm0 # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 3 1.0 1.0 0.7 vperm2f128 $136, %ymm0, %ymm0, %ymm1 # CHECK-NEXT: 1. 3 1.0 0.0 0.0 vaddps %ymm1, %ymm1, %ymm0 +# CHECK-NEXT: 3 1.0 0.5 0.3 diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/zero-idioms.s b/llvm/test/tools/llvm-mca/X86/BtVer2/zero-idioms.s index d9d6c90951a22..eb41f7acc9589 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/zero-idioms.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/zero-idioms.s @@ -441,3 +441,4 @@ vpxor %xmm3, %xmm3, %xmm5 # CHECK-NEXT: 68. 1 0.0 0.0 0.0 vxorps %xmm4, %xmm4, %xmm5 # CHECK-NEXT: 69. 1 0.0 0.0 0.0 vxorpd %xmm1, %xmm1, %xmm3 # CHECK-NEXT: 70. 1 0.0 0.0 0.0 vpxor %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: 1 0.0 0.0 0.0 diff --git a/llvm/test/tools/llvm-mca/X86/Generic/avx512-super-registers-1.s b/llvm/test/tools/llvm-mca/X86/Generic/avx512-super-registers-1.s index 75dfe1f1753e1..9930d08cf2e1f 100644 --- a/llvm/test/tools/llvm-mca/X86/Generic/avx512-super-registers-1.s +++ b/llvm/test/tools/llvm-mca/X86/Generic/avx512-super-registers-1.s @@ -87,3 +87,4 @@ # CHECK-NEXT: 3. 2 8.5 0.0 6.0 vaddps %xmm4, %xmm5, %xmm6 # CHECK-NEXT: 4. 2 11.0 0.0 3.5 vmulps %xmm6, %xmm3, %xmm4 # CHECK-NEXT: 5. 2 16.0 0.0 2.0 vaddps %xmm4, %xmm5, %xmm0 +# CHECK-NEXT: 2 8.3 0.3 5.0 diff --git a/llvm/test/tools/llvm-mca/X86/Generic/avx512-super-registers-2.s b/llvm/test/tools/llvm-mca/X86/Generic/avx512-super-registers-2.s index ce578c3ae7b93..de72dafda2fde 100644 --- a/llvm/test/tools/llvm-mca/X86/Generic/avx512-super-registers-2.s +++ b/llvm/test/tools/llvm-mca/X86/Generic/avx512-super-registers-2.s @@ -87,3 +87,4 @@ # CHECK-NEXT: 3. 2 8.5 0.0 6.0 vaddps %xmm4, %xmm5, %xmm6 # CHECK-NEXT: 4. 2 11.0 0.0 3.5 vmulps %xmm6, %xmm3, %xmm4 # CHECK-NEXT: 5. 2 16.0 0.0 2.0 vaddps %xmm4, %xmm5, %xmm0 +# CHECK-NEXT: 2 8.3 0.3 5.0 diff --git a/llvm/test/tools/llvm-mca/X86/Generic/avx512-super-registers-3.s b/llvm/test/tools/llvm-mca/X86/Generic/avx512-super-registers-3.s index d0cf359379d47..5495b97b3367f 100644 --- a/llvm/test/tools/llvm-mca/X86/Generic/avx512-super-registers-3.s +++ b/llvm/test/tools/llvm-mca/X86/Generic/avx512-super-registers-3.s @@ -87,3 +87,4 @@ # CHECK-NEXT: 3. 2 8.5 0.0 6.0 vaddps %xmm4, %xmm18, %xmm6 # CHECK-NEXT: 4. 2 11.0 0.0 3.5 vmulps %xmm6, %xmm19, %xmm4 # CHECK-NEXT: 5. 2 16.0 0.0 2.0 vaddps %xmm4, %xmm20, %xmm0 +# CHECK-NEXT: 2 8.3 0.3 5.0 diff --git a/llvm/test/tools/llvm-mca/X86/Generic/xop-super-registers-1.s b/llvm/test/tools/llvm-mca/X86/Generic/xop-super-registers-1.s index d7d99861cfb02..2a9ef74569f2b 100644 --- a/llvm/test/tools/llvm-mca/X86/Generic/xop-super-registers-1.s +++ b/llvm/test/tools/llvm-mca/X86/Generic/xop-super-registers-1.s @@ -87,3 +87,4 @@ # CHECK-NEXT: 3. 2 8.5 0.0 6.0 vaddps %ymm4, %ymm5, %ymm6 # CHECK-NEXT: 4. 2 11.0 0.0 3.5 vmulps %ymm6, %ymm3, %ymm4 # CHECK-NEXT: 5. 2 16.0 0.0 2.0 vaddps %ymm4, %ymm5, %ymm0 +# CHECK-NEXT: 2 8.3 0.3 5.0 diff --git a/llvm/test/tools/llvm-mca/X86/Generic/xop-super-registers-2.s b/llvm/test/tools/llvm-mca/X86/Generic/xop-super-registers-2.s index a32bbc6fb0e89..915499f418488 100644 --- a/llvm/test/tools/llvm-mca/X86/Generic/xop-super-registers-2.s +++ b/llvm/test/tools/llvm-mca/X86/Generic/xop-super-registers-2.s @@ -87,3 +87,4 @@ # CHECK-NEXT: 3. 2 6.5 0.0 6.0 vaddps %ymm4, %ymm5, %ymm6 # CHECK-NEXT: 4. 2 9.0 0.0 3.5 vmulps %ymm6, %ymm3, %ymm4 # CHECK-NEXT: 5. 2 14.0 0.0 2.0 vaddps %ymm4, %ymm5, %ymm0 +# CHECK-NEXT: 2 6.8 0.3 5.2 diff --git a/llvm/test/tools/llvm-mca/X86/Haswell/zero-idioms.s b/llvm/test/tools/llvm-mca/X86/Haswell/zero-idioms.s index 90592655067f8..b7e47894fd9cf 100644 --- a/llvm/test/tools/llvm-mca/X86/Haswell/zero-idioms.s +++ b/llvm/test/tools/llvm-mca/X86/Haswell/zero-idioms.s @@ -490,3 +490,4 @@ vpxor %ymm3, %ymm3, %ymm5 # CHECK-NEXT: 80. 1 0.0 0.0 3.0 vxorpd %ymm1, %ymm1, %ymm3 # CHECK-NEXT: 81. 1 0.0 0.0 3.0 vpxor %xmm3, %xmm3, %xmm5 # CHECK-NEXT: 82. 1 0.0 0.0 3.0 vpxor %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: 1 0.8 0.2 1.6 diff --git a/llvm/test/tools/llvm-mca/X86/SandyBridge/zero-idioms.s b/llvm/test/tools/llvm-mca/X86/SandyBridge/zero-idioms.s index d100946031d12..1a1e83070def3 100644 --- a/llvm/test/tools/llvm-mca/X86/SandyBridge/zero-idioms.s +++ b/llvm/test/tools/llvm-mca/X86/SandyBridge/zero-idioms.s @@ -386,3 +386,4 @@ vpxor %xmm3, %xmm3, %xmm5 # CHECK-NEXT: 60. 1 0.0 0.0 10.0 vxorps %ymm4, %ymm4, %ymm5 # CHECK-NEXT: 61. 1 0.0 0.0 10.0 vxorpd %ymm1, %ymm1, %ymm3 # CHECK-NEXT: 62. 1 0.0 0.0 10.0 vpxor %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: 1 2.1 0.3 8.7 diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeClient/zero-idioms.s b/llvm/test/tools/llvm-mca/X86/SkylakeClient/zero-idioms.s index 093d418b21dbf..a8d608cdd392f 100644 --- a/llvm/test/tools/llvm-mca/X86/SkylakeClient/zero-idioms.s +++ b/llvm/test/tools/llvm-mca/X86/SkylakeClient/zero-idioms.s @@ -490,3 +490,4 @@ vpxor %ymm3, %ymm3, %ymm5 # CHECK-NEXT: 80. 1 0.0 0.0 2.0 vxorpd %ymm1, %ymm1, %ymm3 # CHECK-NEXT: 81. 1 0.0 0.0 2.0 vpxor %xmm3, %xmm3, %xmm5 # CHECK-NEXT: 82. 1 0.0 0.0 2.0 vpxor %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: 1 0.6 0.2 1.5 diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeServer/zero-idioms.s b/llvm/test/tools/llvm-mca/X86/SkylakeServer/zero-idioms.s index d4f5445e1fb65..13c9293d4260a 100644 --- a/llvm/test/tools/llvm-mca/X86/SkylakeServer/zero-idioms.s +++ b/llvm/test/tools/llvm-mca/X86/SkylakeServer/zero-idioms.s @@ -776,3 +776,4 @@ vpxorq %zmm19, %zmm19, %zmm21 # CHECK-NEXT: 136. 1 1.0 0.0 2.0 vpxorq %ymm19, %ymm19, %ymm21 # CHECK-NEXT: 137. 1 1.0 0.0 2.0 vpxord %zmm19, %zmm19, %zmm21 # CHECK-NEXT: 138. 1 0.0 0.0 2.0 vpxorq %zmm19, %zmm19, %zmm21 +# CHECK-NEXT: 1 1.1 0.2 1.8 diff --git a/llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update-2.s b/llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update-2.s index 019f84fe542ca..b921b17dea7e4 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update-2.s +++ b/llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update-2.s @@ -45,3 +45,4 @@ add %ecx, %ebx # CHECK-NEXT: 0. 1 1.0 1.0 0.0 imulq %rax, %rbx # CHECK-NEXT: 1. 1 4.0 0.0 0.0 lzcntw %ax, %bx # CHECK-NEXT: 2. 1 6.0 0.0 0.0 addl %ecx, %ebx +# CHECK-NEXT: 1 3.7 0.3 0.0 diff --git a/llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update-3.s b/llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update-3.s index abe923b5906b0..d8afbacdee79a 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update-3.s +++ b/llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update-3.s @@ -88,3 +88,4 @@ xor %bx, %dx # CHECK-NEXT: 0. 6 7.0 0.2 0.0 addw %cx, %dx # CHECK-NEXT: 1. 6 7.7 0.0 0.0 movw %ax, %dx # CHECK-NEXT: 2. 6 8.5 0.0 0.0 xorw %bx, %dx +# CHECK-NEXT: 6 7.7 0.1 0.0 diff --git a/llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update-4.s b/llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update-4.s index 8202a6057c8c8..8eb2f503fdb04 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update-4.s +++ b/llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update-4.s @@ -91,3 +91,4 @@ add %cx, %bx # CHECK-NEXT: 0. 7 14.1 0.1 0.0 imulw %ax, %bx # CHECK-NEXT: 1. 7 15.9 0.0 0.0 lzcntw %ax, %bx # CHECK-NEXT: 2. 7 17.6 0.0 0.0 addw %cx, %bx +# CHECK-NEXT: 7 15.9 0.0 0.0 diff --git a/llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update-6.s b/llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update-6.s index 69637932c31f4..20fee19b54790 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update-6.s +++ b/llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update-6.s @@ -84,3 +84,4 @@ lzcnt 2(%rsp), %cx # CHECK-NEXT: 0. 4 9.5 0.3 0.0 imull %edx, %ecx # CHECK-NEXT: 1. 4 9.0 0.0 0.0 lzcntw (%rsp), %cx # CHECK-NEXT: 2. 4 9.5 0.0 0.0 lzcntw 2(%rsp), %cx +# CHECK-NEXT: 4 9.3 0.1 0.0 diff --git a/llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update-7.s b/llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update-7.s index a7e3860e14537..f3d0321b45414 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update-7.s +++ b/llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update-7.s @@ -49,3 +49,4 @@ addq %rcx, %rdx # CHECK-NEXT: 0. 1 1.0 1.0 0.0 imulq %rax, %rcx # CHECK-NEXT: 1. 1 5.0 0.0 0.0 addl %edx, %ecx # CHECK-NEXT: 2. 1 6.0 0.0 0.0 addq %rcx, %rdx +# CHECK-NEXT: 1 4.0 0.3 0.0 diff --git a/llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update.s b/llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update.s index f7a85399e5e8c..e2da41b06967c 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update.s +++ b/llvm/test/tools/llvm-mca/X86/Znver1/partial-reg-update.s @@ -45,3 +45,4 @@ add %ecx, %ebx # CHECK-NEXT: 0. 1 1.0 1.0 0.0 imulw %ax, %cx # CHECK-NEXT: 1. 1 4.0 0.0 0.0 addb %al, %cl # CHECK-NEXT: 2. 1 5.0 0.0 0.0 addl %ecx, %ebx +# CHECK-NEXT: 1 3.3 0.3 0.0 diff --git a/llvm/test/tools/llvm-mca/X86/bextr-read-after-ld.s b/llvm/test/tools/llvm-mca/X86/bextr-read-after-ld.s index d9f1c9522db48..819d67d8464bf 100644 --- a/llvm/test/tools/llvm-mca/X86/bextr-read-after-ld.s +++ b/llvm/test/tools/llvm-mca/X86/bextr-read-after-ld.s @@ -124,3 +124,4 @@ bextrl %esi, (%rdi), %eax # ALL: [0] [1] [2] [3] # ALL-NEXT: 0. 1 1.0 1.0 0.0 addl %edi, %esi # ALL-NEXT: 1. 1 1.0 0.0 0.0 bextrl %esi, (%rdi), %eax +# ALL-NEXT: 1 1.0 0.5 0.0 diff --git a/llvm/test/tools/llvm-mca/X86/bzhi-read-after-ld.s b/llvm/test/tools/llvm-mca/X86/bzhi-read-after-ld.s index fe3ba3a6ec561..5ec7c47fed3e9 100644 --- a/llvm/test/tools/llvm-mca/X86/bzhi-read-after-ld.s +++ b/llvm/test/tools/llvm-mca/X86/bzhi-read-after-ld.s @@ -81,3 +81,4 @@ bzhil %esi, (%rdi), %eax # ALL: [0] [1] [2] [3] # ALL-NEXT: 0. 1 1.0 1.0 0.0 addl %edi, %esi # ALL-NEXT: 1. 1 1.0 0.0 0.0 bzhil %esi, (%rdi), %eax +# ALL-NEXT: 1 1.0 0.5 0.0 diff --git a/llvm/test/tools/llvm-mca/X86/fma3-read-after-ld-1.s b/llvm/test/tools/llvm-mca/X86/fma3-read-after-ld-1.s index 75c77f8f8c4b4..d0efd71f6c35b 100644 --- a/llvm/test/tools/llvm-mca/X86/fma3-read-after-ld-1.s +++ b/llvm/test/tools/llvm-mca/X86/fma3-read-after-ld-1.s @@ -75,3 +75,4 @@ vfmadd213ps (%rdi), %xmm1, %xmm2 # ALL: [0] [1] [2] [3] # ALL-NEXT: 0. 1 1.0 1.0 0.0 vaddps %xmm0, %xmm0, %xmm1 # ALL-NEXT: 1. 1 1.0 0.0 0.0 vfmadd213ps (%rdi), %xmm1, %xmm2 +# ALL-NEXT: 1 1.0 0.5 0.0 diff --git a/llvm/test/tools/llvm-mca/X86/fma3-read-after-ld-2.s b/llvm/test/tools/llvm-mca/X86/fma3-read-after-ld-2.s index 96d3ba613c5fc..b9ff4a47e8c98 100644 --- a/llvm/test/tools/llvm-mca/X86/fma3-read-after-ld-2.s +++ b/llvm/test/tools/llvm-mca/X86/fma3-read-after-ld-2.s @@ -75,3 +75,4 @@ vfmadd213ps (%rdi), %xmm1, %xmm2 # ALL: [0] [1] [2] [3] # ALL-NEXT: 0. 1 1.0 1.0 0.0 vaddps %xmm0, %xmm0, %xmm2 # ALL-NEXT: 1. 1 1.0 0.0 0.0 vfmadd213ps (%rdi), %xmm1, %xmm2 +# ALL-NEXT: 1 1.0 0.5 0.0 diff --git a/llvm/test/tools/llvm-mca/X86/read-after-ld-1.s b/llvm/test/tools/llvm-mca/X86/read-after-ld-1.s index f3a88431aa009..63504dd91995d 100644 --- a/llvm/test/tools/llvm-mca/X86/read-after-ld-1.s +++ b/llvm/test/tools/llvm-mca/X86/read-after-ld-1.s @@ -138,10 +138,25 @@ vaddps (%rax), %xmm1, %xmm1 # ALL-NEXT: 0. 1 1.0 1.0 0.0 vdivps %xmm0, %xmm1, %xmm1 # BARCELONA-NEXT: 1. 1 9.0 0.0 0.0 vaddps (%rax), %xmm1, %xmm1 +# BARCELONA-NEXT: 1 5.0 0.5 0.0 + # BDVER2-NEXT: 1. 1 5.0 0.0 0.0 vaddps (%rax), %xmm1, %xmm1 +# BDVER2-NEXT: 1 3.0 0.5 0.0 + # BDWELL-NEXT: 1. 1 7.0 0.0 0.0 vaddps (%rax), %xmm1, %xmm1 +# BDWELL-NEXT: 1 4.0 0.5 0.0 + # BTVER2-NEXT: 1. 1 15.0 0.0 0.0 vaddps (%rax), %xmm1, %xmm1 +# BTVER2-NEXT: 1 8.0 0.5 0.0 + # HASWELL-NEXT: 1. 1 8.0 0.0 0.0 vaddps (%rax), %xmm1, %xmm1 +# HASWELL-NEXT: 1 4.5 0.5 0.0 + # SANDY-NEXT: 1. 1 9.0 0.0 0.0 vaddps (%rax), %xmm1, %xmm1 +# SANDY-NEXT: 1 5.0 0.5 0.0 + # SKYLAKE-NEXT: 1. 1 6.0 0.0 0.0 vaddps (%rax), %xmm1, %xmm1 +# SKYLAKE-NEXT: 1 3.5 0.5 0.0 + # ZNVER1-NEXT: 1. 1 8.0 0.0 0.0 vaddps (%rax), %xmm1, %xmm1 +# ZNVER1-NEXT: 1 4.5 0.5 0.0 diff --git a/llvm/test/tools/llvm-mca/X86/read-after-ld-2.s b/llvm/test/tools/llvm-mca/X86/read-after-ld-2.s index 7d549b3959572..63680b86170e1 100644 --- a/llvm/test/tools/llvm-mca/X86/read-after-ld-2.s +++ b/llvm/test/tools/llvm-mca/X86/read-after-ld-2.s @@ -244,12 +244,16 @@ cmp %edi, %edx # BDWELL-NEXT: 2. 10 1.0 0.4 5.7 addq $32, %r8 # BDWELL-NEXT: 3. 10 1.0 0.0 5.3 cmpl %edi, %edx +# BDWELL-NEXT: 10 1.0 0.2 3.9 # HASWELL-NEXT: 2. 10 1.0 0.4 6.7 addq $32, %r8 # HASWELL-NEXT: 3. 10 1.0 0.0 6.3 cmpl %edi, %edx +# HASWELL-NEXT: 10 1.0 0.2 4.6 # SKYLAKE-NEXT: 2. 10 1.0 0.1 7.0 addq $32, %r8 # SKYLAKE-NEXT: 3. 10 2.0 0.0 6.0 cmpl %edi, %edx +# SKYLAKE-NEXT: 10 1.5 0.1 4.6 # ZNVER1-NEXT: 2. 10 1.0 0.1 7.0 addq $32, %r8 # ZNVER1-NEXT: 3. 10 2.0 0.0 6.0 cmpl %edi, %edx +# ZNVER1-NEXT: 10 1.3 0.1 4.6 diff --git a/llvm/test/tools/llvm-mca/X86/read-after-ld-3.s b/llvm/test/tools/llvm-mca/X86/read-after-ld-3.s index 315156df62cd4..f32b7d2734dd1 100644 --- a/llvm/test/tools/llvm-mca/X86/read-after-ld-3.s +++ b/llvm/test/tools/llvm-mca/X86/read-after-ld-3.s @@ -48,3 +48,4 @@ addl (%rdi), %esi # ALL: [0] [1] [2] [3] # ALL-NEXT: 0. 1 1.0 1.0 0.0 addl %edi, %esi # ALL-NEXT: 1. 1 1.0 0.0 0.0 addl (%rdi), %esi +# ALL-NEXT: 1 1.0 0.5 0.0 diff --git a/llvm/test/tools/llvm-mca/X86/sqrt-rsqrt-rcp-memop.s b/llvm/test/tools/llvm-mca/X86/sqrt-rsqrt-rcp-memop.s index a4b3f561e5d27..293440fce8753 100644 --- a/llvm/test/tools/llvm-mca/X86/sqrt-rsqrt-rcp-memop.s +++ b/llvm/test/tools/llvm-mca/X86/sqrt-rsqrt-rcp-memop.s @@ -83,12 +83,25 @@ rcpss (%rax), %xmm1 # ALL-NEXT: 0. 1 1.0 1.0 0.0 leaq 8(%rsp,%rdi,2), %rax # BARCELONA-NEXT: 1. 1 2.0 0.0 0.0 sqrtss (%rax), %xmm1 +# BARCELONA-NEXT: 1 1.5 0.5 0.0 + # BDVER2-NEXT: 1. 1 2.0 0.0 0.0 sqrtss (%rax), %xmm1 +# BDVER2-NEXT: 1 1.5 0.5 0.0 + # BROADWELL-NEXT: 1. 1 2.0 0.0 0.0 sqrtss (%rax), %xmm1 +# BROADWELL-NEXT: 1 1.5 0.5 0.0 + # BTVER2-NEXT: 1. 1 3.0 0.0 0.0 sqrtss (%rax), %xmm1 +# BTVER2-NEXT: 1 2.0 0.5 0.0 + # HASWELL-NEXT: 1. 1 2.0 0.0 0.0 sqrtss (%rax), %xmm1 +# HASWELL-NEXT: 1 1.5 0.5 0.0 + # SKYLAKE-NEXT: 1. 1 2.0 0.0 0.0 sqrtss (%rax), %xmm1 +# SKYLAKE-NEXT: 1 1.5 0.5 0.0 + # ZNVER1-NEXT: 1. 1 2.0 0.0 0.0 sqrtss (%rax), %xmm1 +# ZNVER1-NEXT: 1 1.5 0.5 0.0 # ALL: [1] Code Region - test_sqrtsd @@ -146,12 +159,25 @@ rcpss (%rax), %xmm1 # ALL-NEXT: 0. 1 1.0 1.0 0.0 leaq 8(%rsp,%rdi,2), %rax # BARCELONA-NEXT: 1. 1 2.0 0.0 0.0 sqrtsd (%rax), %xmm1 +# BARCELONA-NEXT: 1 1.5 0.5 0.0 + # BDVER2-NEXT: 1. 1 2.0 0.0 0.0 sqrtsd (%rax), %xmm1 +# BDVER2-NEXT: 1 1.5 0.5 0.0 + # BROADWELL-NEXT: 1. 1 2.0 0.0 0.0 sqrtsd (%rax), %xmm1 +# BROADWELL-NEXT: 1 1.5 0.5 0.0 + # BTVER2-NEXT: 1. 1 3.0 0.0 0.0 sqrtsd (%rax), %xmm1 +# BTVER2-NEXT: 1 2.0 0.5 0.0 + # HASWELL-NEXT: 1. 1 2.0 0.0 0.0 sqrtsd (%rax), %xmm1 +# HASWELL-NEXT: 1 1.5 0.5 0.0 + # SKYLAKE-NEXT: 1. 1 2.0 0.0 0.0 sqrtsd (%rax), %xmm1 +# SKYLAKE-NEXT: 1 1.5 0.5 0.0 + # ZNVER1-NEXT: 1. 1 2.0 0.0 0.0 sqrtsd (%rax), %xmm1 +# ZNVER1-NEXT: 1 1.5 0.5 0.0 # ALL: [2] Code Region - test_rsqrtss @@ -198,12 +224,25 @@ rcpss (%rax), %xmm1 # ALL-NEXT: 0. 1 1.0 1.0 0.0 leaq 8(%rsp,%rdi,2), %rax # BARCELONA-NEXT: 1. 1 2.0 0.0 0.0 rsqrtss (%rax), %xmm1 +# BARCELONA-NEXT: 1 1.5 0.5 0.0 + # BDVER2-NEXT: 1. 1 2.0 0.0 0.0 rsqrtss (%rax), %xmm1 +# BDVER2-NEXT: 1 1.5 0.5 0.0 + # BROADWELL-NEXT: 1. 1 2.0 0.0 0.0 rsqrtss (%rax), %xmm1 +# BROADWELL-NEXT: 1 1.5 0.5 0.0 + # BTVER2-NEXT: 1. 1 3.0 0.0 0.0 rsqrtss (%rax), %xmm1 +# BTVER2-NEXT: 1 2.0 0.5 0.0 + # HASWELL-NEXT: 1. 1 2.0 0.0 0.0 rsqrtss (%rax), %xmm1 +# HASWELL-NEXT: 1 1.5 0.5 0.0 + # SKYLAKE-NEXT: 1. 1 2.0 0.0 0.0 rsqrtss (%rax), %xmm1 +# SKYLAKE-NEXT: 1 1.5 0.5 0.0 + # ZNVER1-NEXT: 1. 1 2.0 0.0 0.0 rsqrtss (%rax), %xmm1 +# ZNVER1-NEXT: 1 1.5 0.5 0.0 # ALL: [3] Code Region - test_rcp @@ -250,9 +289,22 @@ rcpss (%rax), %xmm1 # ALL-NEXT: 0. 1 1.0 1.0 0.0 leaq 8(%rsp,%rdi,2), %rax # BARCELONA-NEXT: 1. 1 2.0 0.0 0.0 rcpss (%rax), %xmm1 +# BARCELONA-NEXT: 1 1.5 0.5 0.0 + # BDVER2-NEXT: 1. 1 2.0 0.0 0.0 rcpss (%rax), %xmm1 +# BDVER2-NEXT: 1 1.5 0.5 0.0 + # BROADWELL-NEXT: 1. 1 2.0 0.0 0.0 rcpss (%rax), %xmm1 +# BROADWELL-NEXT: 1 1.5 0.5 0.0 + # BTVER2-NEXT: 1. 1 3.0 0.0 0.0 rcpss (%rax), %xmm1 +# BTVER2-NEXT: 1 2.0 0.5 0.0 + # HASWELL-NEXT: 1. 1 2.0 0.0 0.0 rcpss (%rax), %xmm1 +# HASWELL-NEXT: 1 1.5 0.5 0.0 + # SKYLAKE-NEXT: 1. 1 2.0 0.0 0.0 rcpss (%rax), %xmm1 +# SKYLAKE-NEXT: 1 1.5 0.5 0.0 + # ZNVER1-NEXT: 1. 1 2.0 0.0 0.0 rcpss (%rax), %xmm1 +# ZNVER1-NEXT: 1 1.5 0.5 0.0 diff --git a/llvm/test/tools/llvm-mca/X86/variable-blend-read-after-ld-1.s b/llvm/test/tools/llvm-mca/X86/variable-blend-read-after-ld-1.s index 2acd810393120..1069e71d038ec 100644 --- a/llvm/test/tools/llvm-mca/X86/variable-blend-read-after-ld-1.s +++ b/llvm/test/tools/llvm-mca/X86/variable-blend-read-after-ld-1.s @@ -149,10 +149,25 @@ vblendvps %xmm1, (%rdi), %xmm2, %xmm3 # ALL-NEXT: 0. 1 1.0 1.0 0.0 vaddps %xmm0, %xmm0, %xmm1 # BDVER2-NEXT: 1. 1 1.0 0.0 0.0 vblendvps %xmm1, (%rdi), %xmm2, %xmm3 +# BDVER2-NEXT: 1 1.0 0.5 0.0 + # BDWELL-NEXT: 1. 1 1.0 0.0 0.0 vblendvps %xmm1, (%rdi), %xmm2, %xmm3 +# BDWELL-NEXT: 1 1.0 0.5 0.0 + # BTVER2-NEXT: 1. 1 1.0 1.0 0.0 vblendvps %xmm1, (%rdi), %xmm2, %xmm3 +# BTVER2-NEXT: 1 1.0 1.0 0.0 + # HASWELL-NEXT: 1. 1 1.0 0.0 0.0 vblendvps %xmm1, (%rdi), %xmm2, %xmm3 +# HASWELL-NEXT: 1 1.0 0.5 0.0 + # IVY-NEXT: 1. 1 1.0 0.0 0.0 vblendvps %xmm1, (%rdi), %xmm2, %xmm3 +# IVY-NEXT: 1 1.0 0.5 0.0 + # SANDY-NEXT: 1. 1 1.0 0.0 0.0 vblendvps %xmm1, (%rdi), %xmm2, %xmm3 +# SANDY-NEXT: 1 1.0 0.5 0.0 + # SKYLAKE-NEXT: 1. 1 1.0 0.0 0.0 vblendvps %xmm1, (%rdi), %xmm2, %xmm3 +# SKYLAKE-NEXT: 1 1.0 0.5 0.0 + # ZNVER1-NEXT: 1. 1 1.0 0.0 0.0 vblendvps %xmm1, (%rdi), %xmm2, %xmm3 +# ZNVER1-NEXT: 1 1.0 0.5 0.0 diff --git a/llvm/test/tools/llvm-mca/X86/variable-blend-read-after-ld-2.s b/llvm/test/tools/llvm-mca/X86/variable-blend-read-after-ld-2.s index 657deba83daa4..f1c57476f181f 100644 --- a/llvm/test/tools/llvm-mca/X86/variable-blend-read-after-ld-2.s +++ b/llvm/test/tools/llvm-mca/X86/variable-blend-read-after-ld-2.s @@ -149,10 +149,25 @@ vblendvps %xmm1, (%rdi), %xmm2, %xmm3 # ALL-NEXT: 0. 1 1.0 1.0 0.0 vaddps %xmm0, %xmm0, %xmm2 # BDVER2-NEXT: 1. 1 1.0 0.0 0.0 vblendvps %xmm1, (%rdi), %xmm2, %xmm3 +# BDVER2-NEXT: 1 1.0 0.5 0.0 + # BDWELL-NEXT: 1. 1 1.0 0.0 0.0 vblendvps %xmm1, (%rdi), %xmm2, %xmm3 +# BDWELL-NEXT: 1 1.0 0.5 0.0 + # BTVER2-NEXT: 1. 1 1.0 1.0 0.0 vblendvps %xmm1, (%rdi), %xmm2, %xmm3 +# BTVER2-NEXT: 1 1.0 1.0 0.0 + # HASWELL-NEXT: 1. 1 1.0 0.0 0.0 vblendvps %xmm1, (%rdi), %xmm2, %xmm3 +# HASWELL-NEXT: 1 1.0 0.5 0.0 + # IVY-NEXT: 1. 1 1.0 0.0 0.0 vblendvps %xmm1, (%rdi), %xmm2, %xmm3 +# IVY-NEXT: 1 1.0 0.5 0.0 + # SANDY-NEXT: 1. 1 1.0 0.0 0.0 vblendvps %xmm1, (%rdi), %xmm2, %xmm3 +# SANDY-NEXT: 1 1.0 0.5 0.0 + # SKYLAKE-NEXT: 1. 1 1.0 0.0 0.0 vblendvps %xmm1, (%rdi), %xmm2, %xmm3 +# SKYLAKE-NEXT: 1 1.0 0.5 0.0 + # ZNVER1-NEXT: 1. 1 1.0 0.0 0.0 vblendvps %xmm1, (%rdi), %xmm2, %xmm3 +# ZNVER1-NEXT: 1 1.0 0.5 0.0 diff --git a/llvm/test/tools/llvm-objdump/X86/elf-disassemble-symbol-labels-exec.test b/llvm/test/tools/llvm-objdump/X86/elf-disassemble-symbol-labels-exec.test index 548ee4b182dd1..eb402f405e60e 100644 --- a/llvm/test/tools/llvm-objdump/X86/elf-disassemble-symbol-labels-exec.test +++ b/llvm/test/tools/llvm-objdump/X86/elf-disassemble-symbol-labels-exec.test @@ -6,6 +6,9 @@ # RUN: --implicit-check-not=absolute \ # RUN: --implicit-check-not=other +# Match this line so the implicit check-nots don't match the path. +# CHECK: {{^.*}}file format ELF64-x86-64 + # CHECK: 0000000000004000 first: # CHECK: 0000000000004001 second: # CHECK: 0000000000004002 third: diff --git a/llvm/test/tools/llvm-readobj/elf-hash-histogram.test b/llvm/test/tools/llvm-readobj/elf-hash-histogram.test index 13aa5137301b1..e0c29d15abca8 100644 --- a/llvm/test/tools/llvm-readobj/elf-hash-histogram.test +++ b/llvm/test/tools/llvm-readobj/elf-hash-histogram.test @@ -1,27 +1,70 @@ -RUN: llvm-readelf --elf-hash-histogram %p/Inputs/gnuhash.so.elf-ppc64 \ -RUN: | FileCheck %s -check-prefix PPC64GNU -RUN: llvm-readelf --elf-hash-histogram %p/Inputs/gnuhash.so.elf-x86_64 \ -RUN: | FileCheck %s -check-prefix X86GNU -RUN: llvm-readelf --elf-hash-histogram %p/Inputs/got-plt.exe.elf-mipsel \ -RUN: | FileCheck %s -check-prefix SYSV +# RUN: llvm-readelf --elf-hash-histogram %p/Inputs/gnuhash.so.elf-ppc64 \ +# RUN: | FileCheck %s -check-prefix PPC64GNU +# RUN: llvm-readelf --elf-hash-histogram %p/Inputs/gnuhash.so.elf-x86_64 \ +# RUN: | FileCheck %s -check-prefix X86GNU +# RUN: llvm-readelf --elf-hash-histogram %p/Inputs/got-plt.exe.elf-mipsel \ +# RUN: | FileCheck %s -check-prefix SYSV -PPC64GNU: Histogram for `.gnu.hash' bucket list length (total of 3 buckets) -PPC64GNU-NEXT: Length Number % of total Coverage -PPC64GNU-NEXT: 0 1 ( 33.3%) 0.0% -PPC64GNU-NEXT: 1 1 ( 33.3%) 25.0% -PPC64GNU-NEXT: 2 0 ( 0.0%) 25.0% -PPC64GNU-NEXT: 3 1 ( 33.3%) 100.0% +# PPC64GNU: Histogram for `.gnu.hash' bucket list length (total of 3 buckets) +# PPC64GNU-NEXT: Length Number % of total Coverage +# PPC64GNU-NEXT: 0 1 ( 33.3%) 0.0% +# PPC64GNU-NEXT: 1 1 ( 33.3%) 25.0% +# PPC64GNU-NEXT: 2 0 ( 0.0%) 25.0% +# PPC64GNU-NEXT: 3 1 ( 33.3%) 100.0% -X86GNU: Histogram for `.gnu.hash' bucket list length (total of 3 buckets) -X86GNU-NEXT: Length Number % of total Coverage -X86GNU-NEXT: 0 1 ( 33.3%) 0.0% -X86GNU-NEXT: 1 1 ( 33.3%) 25.0% -X86GNU-NEXT: 2 0 ( 0.0%) 25.0% -X86GNU-NEXT: 3 1 ( 33.3%) 100.0% +# X86GNU: Histogram for `.gnu.hash' bucket list length (total of 3 buckets) +# X86GNU-NEXT: Length Number % of total Coverage +# X86GNU-NEXT: 0 1 ( 33.3%) 0.0% +# X86GNU-NEXT: 1 1 ( 33.3%) 25.0% +# X86GNU-NEXT: 2 0 ( 0.0%) 25.0% +# X86GNU-NEXT: 3 1 ( 33.3%) 100.0% -SYSV: Histogram for bucket list length (total of 3 buckets) -SYSV-NEXT: Length Number % of total Coverage -SYSV-NEXT: 0 0 ( 0.0%) 0.0% -SYSV-NEXT: 1 0 ( 0.0%) 0.0% -SYSV-NEXT: 2 2 ( 66.7%) 57.1% -SYSV-NEXT: 3 1 ( 33.3%) 100.0% +# SYSV: Histogram for bucket list length (total of 3 buckets) +# SYSV-NEXT: Length Number % of total Coverage +# SYSV-NEXT: 0 0 ( 0.0%) 0.0% +# SYSV-NEXT: 1 0 ( 0.0%) 0.0% +# SYSV-NEXT: 2 2 ( 66.7%) 57.1% +# SYSV-NEXT: 3 1 ( 33.3%) 100.0% + +## Show that we report a warning for a hash table which contains an entry of +## the bucket array pointing to a cycle. + +# RUN: yaml2obj %s -o %t.o +# RUN: llvm-readelf --elf-hash-histogram 2>&1 %t.o | FileCheck -DFILE=%t.o %s --check-prefix BROKEN + +# BROKEN: warning: '[[FILE]]': .hash section is invalid: bucket 1: a cycle was detected in the linked chain +# BROKEN: Histogram for bucket list length (total of 1 buckets) +# BROKEN-NEXT: Length Number % of total Coverage +# BROKEN-NEXT: 0 0 ( 0.0%) 0.0% +# BROKEN-NEXT: 1 1 (100.0%) 100.0% + +--- !ELF +FileHeader: + Class: ELFCLASS32 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_386 +Sections: + - Name: .hash + Type: SHT_HASH + Link: .dynsym + Bucket: [ 1 ] + Chain: [ 0, 1 ] + - Name: .dynamic + Type: SHT_DYNAMIC + Flags: [ SHF_ALLOC ] + Entries: +## llvm-readelf will read the hash table from the file offset +## p_offset + (p_vaddr - DT_HASH) = p_offset + (0 - 0) = p_offset, +## which is the start of PT_LOAD, i.e. the file offset of .hash. + - Tag: DT_HASH + Value: 0x0 + - Tag: DT_NULL + Value: 0 +DynamicSymbols: + - Name: foo +ProgramHeaders: + - Type: PT_LOAD + Sections: + - Section: .hash + - Section: .dynamic diff --git a/llvm/test/tools/obj2yaml/basic-minidump.yaml b/llvm/test/tools/obj2yaml/basic-minidump.yaml index af08fe19bb7f8..e4955dbbf35fa 100644 --- a/llvm/test/tools/obj2yaml/basic-minidump.yaml +++ b/llvm/test/tools/obj2yaml/basic-minidump.yaml @@ -55,6 +55,27 @@ Streams: Memory Ranges: - Start of Memory Range: 0x7C7D7E7F80818283 Content: '8485868788' + - Type: MemoryInfoList + Memory Ranges: + - Base Address: 0x0000000000000000 + Allocation Protect: [ ] + Region Size: 0x0000000000010000 + State: [ MEM_FREE ] + Protect: [ PAGE_NO_ACCESS ] + Type: [ ] + - Base Address: 0x0000000000010000 + Allocation Protect: [ PAGE_READ_WRITE ] + Region Size: 0x0000000000010000 + State: [ MEM_COMMIT ] + Type: [ MEM_MAPPED ] + - Base Address: 0x0000000000020000 + Allocation Base: 0x0000000000000000 + Allocation Protect: [ PAGE_READ_WRITE, PAGE_WRITECOMBINE ] + Reserved0: 0xDEADBEEF + Region Size: 0x0000000000010000 + State: [ MEM_COMMIT, MEM_FREE ] + Type: [ MEM_PRIVATE, MEM_MAPPED ] + Reserved1: 0xBAADF00D ... # CHECK: --- !minidump @@ -112,4 +133,25 @@ Streams: # CHECK-NEXT: Memory Ranges: # CHECK-NEXT: - Start of Memory Range: 0x7C7D7E7F80818283 # CHECK-NEXT: Content: '8485868788' +# CHECK-NEXT: - Type: MemoryInfoList +# CHECK-NEXT: Memory Ranges: +# CHECK-NEXT: - Base Address: 0x0000000000000000 +# CHECK-NEXT: Allocation Protect: [ ] +# CHECK-NEXT: Region Size: 0x0000000000010000 +# CHECK-NEXT: State: [ MEM_FREE ] +# CHECK-NEXT: Protect: [ PAGE_NO_ACCESS ] +# CHECK-NEXT: Type: [ ] +# CHECK-NEXT: - Base Address: 0x0000000000010000 +# CHECK-NEXT: Allocation Protect: [ PAGE_READ_WRITE ] +# CHECK-NEXT: Region Size: 0x0000000000010000 +# CHECK-NEXT: State: [ MEM_COMMIT ] +# CHECK-NEXT: Type: [ MEM_MAPPED ] +# CHECK-NEXT: - Base Address: 0x0000000000020000 +# CHECK-NEXT: Allocation Base: 0x0000000000000000 +# CHECK-NEXT: Allocation Protect: [ PAGE_READ_WRITE, PAGE_WRITECOMBINE ] +# CHECK-NEXT: Reserved0: 0xDEADBEEF +# CHECK-NEXT: Region Size: 0x0000000000010000 +# CHECK-NEXT: State: [ MEM_COMMIT, MEM_FREE ] +# CHECK-NEXT: Type: [ MEM_PRIVATE, MEM_MAPPED ] +# CHECK-NEXT: Reserved1: 0xBAADF00D # CHECK-NEXT: ... diff --git a/llvm/tools/dsymutil/MachODebugMapParser.cpp b/llvm/tools/dsymutil/MachODebugMapParser.cpp index 487fbfff50cfa..3292e94857250 100644 --- a/llvm/tools/dsymutil/MachODebugMapParser.cpp +++ b/llvm/tools/dsymutil/MachODebugMapParser.cpp @@ -123,6 +123,7 @@ class MachODebugMapParser { /// file. This is to be called after an object file is finished /// processing. void MachODebugMapParser::resetParserState() { + CommonSymbols.clear(); CurrentObjectAddresses.clear(); CurrentDebugMapObject = nullptr; } @@ -144,7 +145,6 @@ void MachODebugMapParser::addCommonSymbols() { continue; } } - CommonSymbols.clear(); } /// Create a new DebugMapObject. This function resets the state of the diff --git a/llvm/tools/llvm-exegesis/lib/Analysis.cpp b/llvm/tools/llvm-exegesis/lib/Analysis.cpp index 342b2e5228592..0fa0767b71d80 100644 --- a/llvm/tools/llvm-exegesis/lib/Analysis.cpp +++ b/llvm/tools/llvm-exegesis/lib/Analysis.cpp @@ -268,6 +268,27 @@ static void writeLatencySnippetHtml(raw_ostream &OS, } } +void Analysis::printPointHtml(const InstructionBenchmark &Point, + llvm::raw_ostream &OS) const { + OS << "
  • (OS, Point.AssembledSnippet, "\n"); + OS << "\">"; + switch (Point.Mode) { + case InstructionBenchmark::Latency: + writeLatencySnippetHtml(OS, Point.Key.Instructions, *InstrInfo_); + break; + case InstructionBenchmark::Uops: + case InstructionBenchmark::InverseThroughput: + writeUopsSnippetHtml(OS, Point.Key.Instructions, *InstrInfo_); + break; + default: + llvm_unreachable("invalid mode"); + } + OS << " "; + writeEscaped(OS, Point.Key.Config); + OS << "
  • "; +} + void Analysis::printSchedClassClustersHtml( const std::vector &Clusters, const ResolvedSchedClass &RSC, raw_ostream &OS) const { @@ -292,25 +313,7 @@ void Analysis::printSchedClassClustersHtml( writeClusterId(OS, Cluster.id()); OS << "
      "; for (const size_t PointId : Cluster.getPointIds()) { - const auto &Point = Points[PointId]; - OS << "
    • (OS, Point.AssembledSnippet, - "\n"); - OS << "\">"; - switch (Point.Mode) { - case InstructionBenchmark::Latency: - writeLatencySnippetHtml(OS, Point.Key.Instructions, *InstrInfo_); - break; - case InstructionBenchmark::Uops: - case InstructionBenchmark::InverseThroughput: - writeUopsSnippetHtml(OS, Point.Key.Instructions, *InstrInfo_); - break; - default: - llvm_unreachable("invalid mode"); - } - OS << " "; - writeEscaped(OS, Point.Key.Config); - OS << "
    • "; + printPointHtml(Points[PointId], OS); } OS << "
    "; for (const auto &Stats : Cluster.getCentroid().getStats()) { @@ -422,6 +425,43 @@ void Analysis::printSchedClassDescHtml(const ResolvedSchedClass &RSC, OS << ""; } +void Analysis::printClusterRawHtml( + const InstructionBenchmarkClustering::ClusterId &Id, StringRef display_name, + llvm::raw_ostream &OS) const { + const auto &Points = Clustering_.getPoints(); + const auto &Cluster = Clustering_.getCluster(Id); + if (Cluster.PointIndices.empty()) + return; + + OS << "

    " << display_name << " Cluster (" + << Cluster.PointIndices.size() << " points)

    "; + OS << ""; + // Table Header. + OS << ""; + for (const auto &Measurement : Points[Cluster.PointIndices[0]].Measurements) { + OS << ""; + } + OS << ""; + + // Point data. + for (const auto &PointId : Cluster.PointIndices) { + OS << ""; + for (const auto &Measurement : Points[PointId].Measurements) { + OS << ""; + } + OS << "
    ClusterIdOpcode/Config"; + writeEscaped(OS, Measurement.Key); + OS << "
    " << display_name << "
      "; + printPointHtml(Points[PointId], OS); + OS << "
    "; + writeMeasurementValue(OS, Measurement.PerInstructionValue); + } + OS << "
    "; + + OS << "
    "; + +} // namespace exegesis + static constexpr const char kHtmlHead[] = R"( llvm-exegesis Analysis Results @@ -549,6 +589,9 @@ Error Analysis::run( OS << ""; } + printClusterRawHtml(InstructionBenchmarkClustering::ClusterId::noise(), + "[noise]", OS); + OS << ""; return Error::success(); } diff --git a/llvm/tools/llvm-exegesis/lib/Analysis.h b/llvm/tools/llvm-exegesis/lib/Analysis.h index eac5fef8c96a1..4c1c864e6e73d 100644 --- a/llvm/tools/llvm-exegesis/lib/Analysis.h +++ b/llvm/tools/llvm-exegesis/lib/Analysis.h @@ -81,6 +81,12 @@ class Analysis { void printInstructionRowCsv(size_t PointId, raw_ostream &OS) const; + void printClusterRawHtml(const InstructionBenchmarkClustering::ClusterId &Id, + StringRef display_name, llvm::raw_ostream &OS) const; + + void printPointHtml(const InstructionBenchmark &Point, + llvm::raw_ostream &OS) const; + void printSchedClassClustersHtml(const std::vector &Clusters, const ResolvedSchedClass &SC, diff --git a/llvm/tools/llvm-mca/Views/TimelineView.cpp b/llvm/tools/llvm-mca/Views/TimelineView.cpp index fe3f16ba344cb..1e7caa297ac68 100644 --- a/llvm/tools/llvm-mca/Views/TimelineView.cpp +++ b/llvm/tools/llvm-mca/Views/TimelineView.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "Views/TimelineView.h" +#include namespace llvm { namespace mca { @@ -132,25 +133,38 @@ void TimelineView::printWaitTimeEntry(formatted_raw_ostream &OS, const WaitTimeEntry &Entry, unsigned SourceIndex, unsigned Executions) const { - OS << SourceIndex << '.'; + bool PrintingTotals = SourceIndex == Source.size(); + unsigned CumulativeExecutions = PrintingTotals ? Timeline.size() : Executions; + + if (!PrintingTotals) + OS << SourceIndex << '.'; + OS.PadToColumn(7); double AverageTime1, AverageTime2, AverageTime3; - AverageTime1 = (double)Entry.CyclesSpentInSchedulerQueue / Executions; - AverageTime2 = (double)Entry.CyclesSpentInSQWhileReady / Executions; - AverageTime3 = (double)Entry.CyclesSpentAfterWBAndBeforeRetire / Executions; + AverageTime1 = + (double)Entry.CyclesSpentInSchedulerQueue / CumulativeExecutions; + AverageTime2 = (double)Entry.CyclesSpentInSQWhileReady / CumulativeExecutions; + AverageTime3 = + (double)Entry.CyclesSpentAfterWBAndBeforeRetire / CumulativeExecutions; OS << Executions; OS.PadToColumn(13); - int BufferSize = UsedBuffer[SourceIndex].second; - tryChangeColor(OS, Entry.CyclesSpentInSchedulerQueue, Executions, BufferSize); + + int BufferSize = PrintingTotals ? 0 : UsedBuffer[SourceIndex].second; + if (!PrintingTotals) + tryChangeColor(OS, Entry.CyclesSpentInSchedulerQueue, CumulativeExecutions, + BufferSize); OS << format("%.1f", floor((AverageTime1 * 10) + 0.5) / 10); OS.PadToColumn(20); - tryChangeColor(OS, Entry.CyclesSpentInSQWhileReady, Executions, BufferSize); + if (!PrintingTotals) + tryChangeColor(OS, Entry.CyclesSpentInSQWhileReady, CumulativeExecutions, + BufferSize); OS << format("%.1f", floor((AverageTime2 * 10) + 0.5) / 10); OS.PadToColumn(27); - tryChangeColor(OS, Entry.CyclesSpentAfterWBAndBeforeRetire, Executions, - STI.getSchedModel().MicroOpBufferSize); + if (!PrintingTotals) + tryChangeColor(OS, Entry.CyclesSpentAfterWBAndBeforeRetire, + CumulativeExecutions, STI.getSchedModel().MicroOpBufferSize); OS << format("%.1f", floor((AverageTime3 * 10) + 0.5) / 10); if (OS.has_colors()) @@ -190,6 +204,24 @@ void TimelineView::printAverageWaitTimes(raw_ostream &OS) const { ++IID; } + + // If the timeline contains more than one instruction, + // let's also print global averages. + if (Source.size() != 1) { + WaitTimeEntry TotalWaitTime = std::accumulate( + WaitTime.begin(), WaitTime.end(), WaitTimeEntry{0, 0, 0}, + [](const WaitTimeEntry &A, const WaitTimeEntry &B) { + return WaitTimeEntry{ + A.CyclesSpentInSchedulerQueue + B.CyclesSpentInSchedulerQueue, + A.CyclesSpentInSQWhileReady + B.CyclesSpentInSQWhileReady, + A.CyclesSpentAfterWBAndBeforeRetire + + B.CyclesSpentAfterWBAndBeforeRetire}; + }); + printWaitTimeEntry(FOS, TotalWaitTime, IID, Executions); + FOS << " " + << "" << '\n'; + InstrStream.flush(); + } } void TimelineView::printTimelineViewEntry(formatted_raw_ostream &OS, diff --git a/llvm/tools/llvm-mca/Views/TimelineView.h b/llvm/tools/llvm-mca/Views/TimelineView.h index b63b234293cda..9bec3b87db45d 100644 --- a/llvm/tools/llvm-mca/Views/TimelineView.h +++ b/llvm/tools/llvm-mca/Views/TimelineView.h @@ -84,6 +84,7 @@ /// 3. 2 1.5 0.5 1.0 vaddss %xmm1, %xmm0, %xmm3 /// 4. 2 3.5 0.0 0.0 vaddss %xmm3, %xmm2, %xmm4 /// 5. 2 6.5 0.0 0.0 vaddss %xmm4, %xmm5, %xmm6 +/// 2 2.4 0.6 1.6 /// /// By comparing column [2] with column [1], we get an idea about how many /// cycles were spent in the scheduler's queue due to data dependencies. diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp index 4e9cf213174f4..135624539aed5 100644 --- a/llvm/tools/llvm-readobj/ELFDumper.cpp +++ b/llvm/tools/llvm-readobj/ELFDumper.cpp @@ -3968,9 +3968,21 @@ void GNUStyle::printHashHistogram(const ELFFile *Obj) { // Go over all buckets and and note chain lengths of each bucket (total // unique chain lengths). for (size_t B = 0; B < NBucket; B++) { - for (size_t C = Buckets[B]; C > 0 && C < NChain; C = Chains[C]) + std::vector Visited(NChain); + for (size_t C = Buckets[B]; C < NChain; C = Chains[C]) { + if (C == ELF::STN_UNDEF) + break; + if (Visited[C]) { + reportWarning( + createError(".hash section is invalid: bucket " + Twine(C) + + ": a cycle was detected in the linked chain"), + this->FileName); + break; + } + Visited[C] = true; if (MaxChain <= ++ChainLen[B]) MaxChain++; + } TotalSyms += ChainLen[B]; } diff --git a/llvm/unittests/ADT/ArrayRefTest.cpp b/llvm/unittests/ADT/ArrayRefTest.cpp index 6a3a120449b37..4690319ae52ea 100644 --- a/llvm/unittests/ADT/ArrayRefTest.cpp +++ b/llvm/unittests/ADT/ArrayRefTest.cpp @@ -75,7 +75,9 @@ TEST(ArrayRefTest, AllocatorCopy) { EXPECT_NE(makeArrayRef(Array3Src).data(), Array3Copy.data()); } -TEST(ArrayRefTest, SizeTSizedOperations) { +// This test is pure UB given the ArrayRef<> implementation. +// You are not allowed to produce non-null pointers given null base pointer. +TEST(ArrayRefTest, DISABLED_SizeTSizedOperations) { ArrayRef AR(nullptr, std::numeric_limits::max()); // Check that drop_back accepts size_t-sized numbers. diff --git a/llvm/unittests/ADT/StatisticTest.cpp b/llvm/unittests/ADT/StatisticTest.cpp index 1b5530fb7296f..e5a0cad26d685 100644 --- a/llvm/unittests/ADT/StatisticTest.cpp +++ b/llvm/unittests/ADT/StatisticTest.cpp @@ -17,6 +17,7 @@ namespace { #define DEBUG_TYPE "unittest" STATISTIC(Counter, "Counts things"); STATISTIC(Counter2, "Counts other things"); +ALWAYS_ENABLED_STATISTIC(AlwaysCounter, "Counts things always"); #if LLVM_ENABLE_STATS static void @@ -43,6 +44,12 @@ TEST(StatisticTest, Count) { #else EXPECT_EQ(Counter, 0u); #endif + + AlwaysCounter = 0; + EXPECT_EQ(AlwaysCounter, 0u); + AlwaysCounter++; + ++AlwaysCounter; + EXPECT_EQ(AlwaysCounter, 2u); } TEST(StatisticTest, Assign) { @@ -54,10 +61,15 @@ TEST(StatisticTest, Assign) { #else EXPECT_EQ(Counter, 0u); #endif + + AlwaysCounter = 2; + EXPECT_EQ(AlwaysCounter, 2u); } TEST(StatisticTest, API) { EnableStatistics(); + // Reset beforehand to make sure previous tests don't effect this one. + ResetStatistics(); Counter = 0; EXPECT_EQ(Counter, 0u); diff --git a/llvm/unittests/ADT/StringExtrasTest.cpp b/llvm/unittests/ADT/StringExtrasTest.cpp index 97c91de1d3faf..921fc7d349ac5 100644 --- a/llvm/unittests/ADT/StringExtrasTest.cpp +++ b/llvm/unittests/ADT/StringExtrasTest.cpp @@ -109,7 +109,7 @@ TEST(StringExtrasTest, printEscapedString) { std::string str; raw_string_ostream OS(str); printEscapedString("ABCdef123&<>\\\"'\t", OS); - EXPECT_EQ("ABCdef123&<>\\5C\\22'\\09", OS.str()); + EXPECT_EQ("ABCdef123&<>\\\\\\22'\\09", OS.str()); } TEST(StringExtrasTest, printHTMLEscaped) { diff --git a/llvm/unittests/CodeGen/GlobalISel/CMakeLists.txt b/llvm/unittests/CodeGen/GlobalISel/CMakeLists.txt index 34a413c96b0ca..01c8f4ecdec77 100644 --- a/llvm/unittests/CodeGen/GlobalISel/CMakeLists.txt +++ b/llvm/unittests/CodeGen/GlobalISel/CMakeLists.txt @@ -10,6 +10,7 @@ set(LLVM_LINK_COMPONENTS ) add_llvm_unittest(GlobalISelTests + ConstantFoldingTest.cpp CSETest.cpp LegalizerHelperTest.cpp LegalizerInfoTest.cpp diff --git a/llvm/unittests/CodeGen/GlobalISel/ConstantFoldingTest.cpp b/llvm/unittests/CodeGen/GlobalISel/ConstantFoldingTest.cpp new file mode 100644 index 0000000000000..e36b4bd2e8ae8 --- /dev/null +++ b/llvm/unittests/CodeGen/GlobalISel/ConstantFoldingTest.cpp @@ -0,0 +1,239 @@ +//===- ConstantFoldingTest.cpp -------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "GISelMITest.h" +#include "llvm/CodeGen/GlobalISel/ConstantFoldingMIRBuilder.h" +#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" +#include "llvm/CodeGen/GlobalISel/Utils.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "gtest/gtest.h" + +using namespace llvm; + +namespace { + +TEST_F(GISelMITest, FoldWithBuilder) { + setUp(); + if (!TM) + return; + // Try to use the FoldableInstructionsBuilder to build binary ops. + ConstantFoldingMIRBuilder CFB(B.getState()); + LLT s32 = LLT::scalar(32); + int64_t Cst; + auto MIBCAdd = + CFB.buildAdd(s32, CFB.buildConstant(s32, 0), CFB.buildConstant(s32, 1)); + // This should be a constant now. + bool match = mi_match(MIBCAdd->getOperand(0).getReg(), *MRI, m_ICst(Cst)); + EXPECT_TRUE(match); + EXPECT_EQ(Cst, 1); + auto MIBCAdd1 = + CFB.buildInstr(TargetOpcode::G_ADD, {s32}, + {CFB.buildConstant(s32, 0), CFB.buildConstant(s32, 1)}); + // This should be a constant now. + match = mi_match(MIBCAdd1->getOperand(0).getReg(), *MRI, m_ICst(Cst)); + EXPECT_TRUE(match); + EXPECT_EQ(Cst, 1); + + // Try one of the other constructors of MachineIRBuilder to make sure it's + // compatible. + ConstantFoldingMIRBuilder CFB1(*MF); + CFB1.setInsertPt(*EntryMBB, EntryMBB->end()); + auto MIBCSub = + CFB1.buildInstr(TargetOpcode::G_SUB, {s32}, + {CFB1.buildConstant(s32, 1), CFB1.buildConstant(s32, 1)}); + // This should be a constant now. + match = mi_match(MIBCSub->getOperand(0).getReg(), *MRI, m_ICst(Cst)); + EXPECT_TRUE(match); + EXPECT_EQ(Cst, 0); + + auto MIBCSext1 = + CFB1.buildInstr(TargetOpcode::G_SEXT_INREG, {s32}, + {CFB1.buildConstant(s32, 0x01), uint64_t(8)}); + // This should be a constant now. + match = mi_match(MIBCSext1->getOperand(0).getReg(), *MRI, m_ICst(Cst)); + EXPECT_TRUE(match); + EXPECT_EQ(1, Cst); + + auto MIBCSext2 = + CFB1.buildInstr(TargetOpcode::G_SEXT_INREG, {s32}, + {CFB1.buildConstant(s32, 0x80), uint64_t(8)}); + // This should be a constant now. + match = mi_match(MIBCSext2->getOperand(0).getReg(), *MRI, m_ICst(Cst)); + EXPECT_TRUE(match); + EXPECT_EQ(-0x80, Cst); +} + +TEST_F(GISelMITest, FoldBinOp) { + setUp(); + if (!TM) + return; + + LLT s32{LLT::scalar(32)}; + auto MIBCst1 = B.buildConstant(s32, 16); + auto MIBCst2 = B.buildConstant(s32, 9); + auto MIBFCst1 = B.buildFConstant(s32, 1.0000001); + auto MIBFCst2 = B.buildFConstant(s32, 2.0); + + // Test G_ADD folding Integer + Mixed Int-Float cases + Optional FoldGAddInt = + ConstantFoldBinOp(TargetOpcode::G_ADD, MIBCst1->getOperand(0).getReg(), + MIBCst2->getOperand(0).getReg(), *MRI); + EXPECT_TRUE(FoldGAddInt.hasValue()); + EXPECT_EQ(25ULL, FoldGAddInt.getValue().getLimitedValue()); + Optional FoldGAddMix = + ConstantFoldBinOp(TargetOpcode::G_ADD, MIBCst1->getOperand(0).getReg(), + MIBFCst2->getOperand(0).getReg(), *MRI); + EXPECT_TRUE(FoldGAddMix.hasValue()); + EXPECT_EQ(1073741840ULL, FoldGAddMix.getValue().getLimitedValue()); + + // Test G_AND folding Integer + Mixed Int-Float cases + Optional FoldGAndInt = + ConstantFoldBinOp(TargetOpcode::G_AND, MIBCst1->getOperand(0).getReg(), + MIBCst2->getOperand(0).getReg(), *MRI); + EXPECT_TRUE(FoldGAndInt.hasValue()); + EXPECT_EQ(0ULL, FoldGAndInt.getValue().getLimitedValue()); + Optional FoldGAndMix = + ConstantFoldBinOp(TargetOpcode::G_AND, MIBCst2->getOperand(0).getReg(), + MIBFCst1->getOperand(0).getReg(), *MRI); + EXPECT_TRUE(FoldGAndMix.hasValue()); + EXPECT_EQ(1ULL, FoldGAndMix.getValue().getLimitedValue()); + + // Test G_ASHR folding Integer + Mixed cases + Optional FoldGAShrInt = + ConstantFoldBinOp(TargetOpcode::G_ASHR, MIBCst1->getOperand(0).getReg(), + MIBCst2->getOperand(0).getReg(), *MRI); + EXPECT_TRUE(FoldGAShrInt.hasValue()); + EXPECT_EQ(0ULL, FoldGAShrInt.getValue().getLimitedValue()); + Optional FoldGAShrMix = + ConstantFoldBinOp(TargetOpcode::G_ASHR, MIBFCst2->getOperand(0).getReg(), + MIBCst2->getOperand(0).getReg(), *MRI); + EXPECT_TRUE(FoldGAShrMix.hasValue()); + EXPECT_EQ(2097152ULL, FoldGAShrMix.getValue().getLimitedValue()); + + // Test G_LSHR folding Integer + Mixed Int-Float cases + Optional FoldGLShrInt = + ConstantFoldBinOp(TargetOpcode::G_LSHR, MIBCst1->getOperand(0).getReg(), + MIBCst2->getOperand(0).getReg(), *MRI); + EXPECT_TRUE(FoldGLShrInt.hasValue()); + EXPECT_EQ(0ULL, FoldGLShrInt.getValue().getLimitedValue()); + Optional FoldGLShrMix = + ConstantFoldBinOp(TargetOpcode::G_LSHR, MIBFCst1->getOperand(0).getReg(), + MIBCst2->getOperand(0).getReg(), *MRI); + EXPECT_TRUE(FoldGLShrMix.hasValue()); + EXPECT_EQ(2080768ULL, FoldGLShrMix.getValue().getLimitedValue()); + + // Test G_MUL folding Integer + Mixed Int-Float cases + Optional FoldGMulInt = + ConstantFoldBinOp(TargetOpcode::G_MUL, MIBCst1->getOperand(0).getReg(), + MIBCst2->getOperand(0).getReg(), *MRI); + EXPECT_TRUE(FoldGMulInt.hasValue()); + EXPECT_EQ(144ULL, FoldGMulInt.getValue().getLimitedValue()); + Optional FoldGMulMix = + ConstantFoldBinOp(TargetOpcode::G_MUL, MIBCst1->getOperand(0).getReg(), + MIBFCst2->getOperand(0).getReg(), *MRI); + EXPECT_TRUE(FoldGMulMix.hasValue()); + EXPECT_EQ(0ULL, FoldGMulMix.getValue().getLimitedValue()); + + // Test G_OR folding Integer + Mixed Int-Float cases + Optional FoldGOrInt = + ConstantFoldBinOp(TargetOpcode::G_OR, MIBCst1->getOperand(0).getReg(), + MIBCst2->getOperand(0).getReg(), *MRI); + EXPECT_TRUE(FoldGOrInt.hasValue()); + EXPECT_EQ(25ULL, FoldGOrInt.getValue().getLimitedValue()); + Optional FoldGOrMix = + ConstantFoldBinOp(TargetOpcode::G_OR, MIBCst1->getOperand(0).getReg(), + MIBFCst2->getOperand(0).getReg(), *MRI); + EXPECT_TRUE(FoldGOrMix.hasValue()); + EXPECT_EQ(1073741840ULL, FoldGOrMix.getValue().getLimitedValue()); + + // Test G_SHL folding Integer + Mixed Int-Float cases + Optional FoldGShlInt = + ConstantFoldBinOp(TargetOpcode::G_SHL, MIBCst1->getOperand(0).getReg(), + MIBCst2->getOperand(0).getReg(), *MRI); + EXPECT_TRUE(FoldGShlInt.hasValue()); + EXPECT_EQ(8192ULL, FoldGShlInt.getValue().getLimitedValue()); + Optional FoldGShlMix = + ConstantFoldBinOp(TargetOpcode::G_SHL, MIBCst1->getOperand(0).getReg(), + MIBFCst2->getOperand(0).getReg(), *MRI); + EXPECT_TRUE(FoldGShlMix.hasValue()); + EXPECT_EQ(0ULL, FoldGShlMix.getValue().getLimitedValue()); + + // Test G_SUB folding Integer + Mixed Int-Float cases + Optional FoldGSubInt = + ConstantFoldBinOp(TargetOpcode::G_SUB, MIBCst1->getOperand(0).getReg(), + MIBCst2->getOperand(0).getReg(), *MRI); + EXPECT_TRUE(FoldGSubInt.hasValue()); + EXPECT_EQ(7ULL, FoldGSubInt.getValue().getLimitedValue()); + Optional FoldGSubMix = + ConstantFoldBinOp(TargetOpcode::G_SUB, MIBCst1->getOperand(0).getReg(), + MIBFCst2->getOperand(0).getReg(), *MRI); + EXPECT_TRUE(FoldGSubMix.hasValue()); + EXPECT_EQ(3221225488ULL, FoldGSubMix.getValue().getLimitedValue()); + + // Test G_XOR folding Integer + Mixed Int-Float cases + Optional FoldGXorInt = + ConstantFoldBinOp(TargetOpcode::G_XOR, MIBCst1->getOperand(0).getReg(), + MIBCst2->getOperand(0).getReg(), *MRI); + EXPECT_TRUE(FoldGXorInt.hasValue()); + EXPECT_EQ(25ULL, FoldGXorInt.getValue().getLimitedValue()); + Optional FoldGXorMix = + ConstantFoldBinOp(TargetOpcode::G_XOR, MIBCst1->getOperand(0).getReg(), + MIBFCst2->getOperand(0).getReg(), *MRI); + EXPECT_TRUE(FoldGXorMix.hasValue()); + EXPECT_EQ(1073741840ULL, FoldGXorMix.getValue().getLimitedValue()); + + // Test G_UDIV folding Integer + Mixed Int-Float cases + Optional FoldGUdivInt = + ConstantFoldBinOp(TargetOpcode::G_UDIV, MIBCst1->getOperand(0).getReg(), + MIBCst2->getOperand(0).getReg(), *MRI); + EXPECT_TRUE(FoldGUdivInt.hasValue()); + EXPECT_EQ(1ULL, FoldGUdivInt.getValue().getLimitedValue()); + Optional FoldGUdivMix = + ConstantFoldBinOp(TargetOpcode::G_UDIV, MIBCst1->getOperand(0).getReg(), + MIBFCst2->getOperand(0).getReg(), *MRI); + EXPECT_TRUE(FoldGUdivMix.hasValue()); + EXPECT_EQ(0ULL, FoldGUdivMix.getValue().getLimitedValue()); + + // Test G_SDIV folding Integer + Mixed Int-Float cases + Optional FoldGSdivInt = + ConstantFoldBinOp(TargetOpcode::G_SDIV, MIBCst1->getOperand(0).getReg(), + MIBCst2->getOperand(0).getReg(), *MRI); + EXPECT_TRUE(FoldGSdivInt.hasValue()); + EXPECT_EQ(1ULL, FoldGSdivInt.getValue().getLimitedValue()); + Optional FoldGSdivMix = + ConstantFoldBinOp(TargetOpcode::G_SDIV, MIBCst1->getOperand(0).getReg(), + MIBFCst2->getOperand(0).getReg(), *MRI); + EXPECT_TRUE(FoldGSdivMix.hasValue()); + EXPECT_EQ(0ULL, FoldGSdivMix.getValue().getLimitedValue()); + + // Test G_UREM folding Integer + Mixed Int-Float cases + Optional FoldGUremInt = + ConstantFoldBinOp(TargetOpcode::G_UDIV, MIBCst1->getOperand(0).getReg(), + MIBCst2->getOperand(0).getReg(), *MRI); + EXPECT_TRUE(FoldGUremInt.hasValue()); + EXPECT_EQ(1ULL, FoldGUremInt.getValue().getLimitedValue()); + Optional FoldGUremMix = + ConstantFoldBinOp(TargetOpcode::G_UDIV, MIBCst1->getOperand(0).getReg(), + MIBFCst2->getOperand(0).getReg(), *MRI); + EXPECT_TRUE(FoldGUremMix.hasValue()); + EXPECT_EQ(0ULL, FoldGUremMix.getValue().getLimitedValue()); + + // Test G_SREM folding Integer + Mixed Int-Float cases + Optional FoldGSremInt = + ConstantFoldBinOp(TargetOpcode::G_SREM, MIBCst1->getOperand(0).getReg(), + MIBCst2->getOperand(0).getReg(), *MRI); + EXPECT_TRUE(FoldGSremInt.hasValue()); + EXPECT_EQ(7ULL, FoldGSremInt.getValue().getLimitedValue()); + Optional FoldGSremMix = + ConstantFoldBinOp(TargetOpcode::G_SREM, MIBCst1->getOperand(0).getReg(), + MIBFCst2->getOperand(0).getReg(), *MRI); + EXPECT_TRUE(FoldGSremMix.hasValue()); + EXPECT_EQ(16ULL, FoldGSremMix.getValue().getLimitedValue()); +} + +} // namespace \ No newline at end of file diff --git a/llvm/unittests/CodeGen/GlobalISel/PatternMatchTest.cpp b/llvm/unittests/CodeGen/GlobalISel/PatternMatchTest.cpp index 5e7b750f194cb..b6e3e1166a0e9 100644 --- a/llvm/unittests/CodeGen/GlobalISel/PatternMatchTest.cpp +++ b/llvm/unittests/CodeGen/GlobalISel/PatternMatchTest.cpp @@ -6,6 +6,7 @@ // //===----------------------------------------------------------------------===// +#include "GISelMITest.h" #include "llvm/CodeGen/GlobalISel/ConstantFoldingMIRBuilder.h" #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" @@ -29,140 +30,29 @@ using namespace MIPatternMatch; namespace { -void initLLVM() { - InitializeAllTargets(); - InitializeAllTargetMCs(); - InitializeAllAsmPrinters(); - InitializeAllAsmParsers(); - - PassRegistry *Registry = PassRegistry::getPassRegistry(); - initializeCore(*Registry); - initializeCodeGen(*Registry); -} - -/// Create a TargetMachine. As we lack a dedicated always available target for -/// unittests, we go for "AArch64". -std::unique_ptr createTargetMachine() { - Triple TargetTriple("aarch64--"); - std::string Error; - const Target *T = TargetRegistry::lookupTarget("", TargetTriple, Error); - if (!T) - return nullptr; - - TargetOptions Options; - return std::unique_ptr(static_cast( - T->createTargetMachine("AArch64", "", "", Options, None, None, - CodeGenOpt::Aggressive))); -} - -std::unique_ptr parseMIR(LLVMContext &Context, - std::unique_ptr &MIR, - const TargetMachine &TM, StringRef MIRCode, - const char *FuncName, MachineModuleInfo &MMI) { - SMDiagnostic Diagnostic; - std::unique_ptr MBuffer = MemoryBuffer::getMemBuffer(MIRCode); - MIR = createMIRParser(std::move(MBuffer), Context); - if (!MIR) - return nullptr; - - std::unique_ptr M = MIR->parseIRModule(); - if (!M) - return nullptr; - - M->setDataLayout(TM.createDataLayout()); - - if (MIR->parseMachineFunctions(*M, MMI)) - return nullptr; - - return M; -} - -std::pair, std::unique_ptr> -createDummyModule(LLVMContext &Context, const LLVMTargetMachine &TM, - StringRef MIRFunc) { - SmallString<512> S; - StringRef MIRString = (Twine(R"MIR( ---- -... -name: func -registers: - - { id: 0, class: _ } - - { id: 1, class: _ } - - { id: 2, class: _ } - - { id: 3, class: _ } -body: | - bb.1: - %0(s64) = COPY $x0 - %1(s64) = COPY $x1 - %2(s64) = COPY $x2 -)MIR") + Twine(MIRFunc) + Twine("...\n")) - .toNullTerminatedStringRef(S); - std::unique_ptr MIR; - auto MMI = std::make_unique(&TM); - std::unique_ptr M = - parseMIR(Context, MIR, TM, MIRString, "func", *MMI); - return make_pair(std::move(M), std::move(MMI)); -} - -static MachineFunction *getMFFromMMI(const Module *M, - const MachineModuleInfo *MMI) { - Function *F = M->getFunction("func"); - auto *MF = MMI->getMachineFunction(*F); - return MF; -} - -static void collectCopies(SmallVectorImpl &Copies, - MachineFunction *MF) { - for (auto &MBB : *MF) - for (MachineInstr &MI : MBB) { - if (MI.getOpcode() == TargetOpcode::COPY) - Copies.push_back(MI.getOperand(0).getReg()); - } -} - -TEST(PatternMatchInstr, MatchIntConstant) { - LLVMContext Context; - std::unique_ptr TM = createTargetMachine(); +TEST_F(GISelMITest, MatchIntConstant) { + setUp(); if (!TM) return; - auto ModuleMMIPair = createDummyModule(Context, *TM, ""); - MachineFunction *MF = - getMFFromMMI(ModuleMMIPair.first.get(), ModuleMMIPair.second.get()); - SmallVector Copies; - collectCopies(Copies, MF); - MachineBasicBlock *EntryMBB = &*MF->begin(); - MachineIRBuilder B(*MF); - MachineRegisterInfo &MRI = MF->getRegInfo(); - B.setInsertPt(*EntryMBB, EntryMBB->end()); auto MIBCst = B.buildConstant(LLT::scalar(64), 42); int64_t Cst; - bool match = mi_match(MIBCst->getOperand(0).getReg(), MRI, m_ICst(Cst)); + bool match = mi_match(MIBCst->getOperand(0).getReg(), *MRI, m_ICst(Cst)); EXPECT_TRUE(match); EXPECT_EQ(Cst, 42); } -TEST(PatternMatchInstr, MatchBinaryOp) { - LLVMContext Context; - std::unique_ptr TM = createTargetMachine(); +TEST_F(GISelMITest, MatchBinaryOp) { + setUp(); if (!TM) return; - auto ModuleMMIPair = createDummyModule(Context, *TM, ""); - MachineFunction *MF = - getMFFromMMI(ModuleMMIPair.first.get(), ModuleMMIPair.second.get()); - SmallVector Copies; - collectCopies(Copies, MF); - MachineBasicBlock *EntryMBB = &*MF->begin(); - MachineIRBuilder B(*MF); - MachineRegisterInfo &MRI = MF->getRegInfo(); - B.setInsertPt(*EntryMBB, EntryMBB->end()); LLT s64 = LLT::scalar(64); auto MIBAdd = B.buildAdd(s64, Copies[0], Copies[1]); // Test case for no bind. bool match = - mi_match(MIBAdd->getOperand(0).getReg(), MRI, m_GAdd(m_Reg(), m_Reg())); + mi_match(MIBAdd->getOperand(0).getReg(), *MRI, m_GAdd(m_Reg(), m_Reg())); EXPECT_TRUE(match); Register Src0, Src1, Src2; - match = mi_match(MIBAdd->getOperand(0).getReg(), MRI, + match = mi_match(MIBAdd->getOperand(0).getReg(), *MRI, m_GAdd(m_Reg(Src0), m_Reg(Src1))); EXPECT_TRUE(match); EXPECT_EQ(Src0, Copies[0]); @@ -172,14 +62,14 @@ TEST(PatternMatchInstr, MatchBinaryOp) { auto MIBMul = B.buildMul(s64, MIBAdd, Copies[2]); // Try to match MUL. - match = mi_match(MIBMul->getOperand(0).getReg(), MRI, + match = mi_match(MIBMul->getOperand(0).getReg(), *MRI, m_GMul(m_Reg(Src0), m_Reg(Src1))); EXPECT_TRUE(match); EXPECT_EQ(Src0, MIBAdd->getOperand(0).getReg()); EXPECT_EQ(Src1, Copies[2]); // Try to match MUL(ADD) - match = mi_match(MIBMul->getOperand(0).getReg(), MRI, + match = mi_match(MIBMul->getOperand(0).getReg(), *MRI, m_GMul(m_GAdd(m_Reg(Src0), m_Reg(Src1)), m_Reg(Src2))); EXPECT_TRUE(match); EXPECT_EQ(Src0, Copies[0]); @@ -191,7 +81,7 @@ TEST(PatternMatchInstr, MatchBinaryOp) { // Try to match MUL(Cst, Reg) on src of MUL(Reg, Cst) to validate // commutativity. int64_t Cst; - match = mi_match(MIBMul2->getOperand(0).getReg(), MRI, + match = mi_match(MIBMul2->getOperand(0).getReg(), *MRI, m_GMul(m_ICst(Cst), m_Reg(Src0))); EXPECT_TRUE(match); EXPECT_EQ(Cst, 42); @@ -199,14 +89,14 @@ TEST(PatternMatchInstr, MatchBinaryOp) { // Make sure commutative doesn't work with something like SUB. auto MIBSub = B.buildSub(s64, Copies[0], B.buildConstant(s64, 42)); - match = mi_match(MIBSub->getOperand(0).getReg(), MRI, + match = mi_match(MIBSub->getOperand(0).getReg(), *MRI, m_GSub(m_ICst(Cst), m_Reg(Src0))); EXPECT_FALSE(match); auto MIBFMul = B.buildInstr(TargetOpcode::G_FMUL, {s64}, {Copies[0], B.buildConstant(s64, 42)}); // Match and test commutativity for FMUL. - match = mi_match(MIBFMul->getOperand(0).getReg(), MRI, + match = mi_match(MIBFMul->getOperand(0).getReg(), *MRI, m_GFMul(m_ICst(Cst), m_Reg(Src0))); EXPECT_TRUE(match); EXPECT_EQ(Cst, 42); @@ -215,7 +105,7 @@ TEST(PatternMatchInstr, MatchBinaryOp) { // FSUB auto MIBFSub = B.buildInstr(TargetOpcode::G_FSUB, {s64}, {Copies[0], B.buildConstant(s64, 42)}); - match = mi_match(MIBFSub->getOperand(0).getReg(), MRI, + match = mi_match(MIBFSub->getOperand(0).getReg(), *MRI, m_GFSub(m_Reg(Src0), m_Reg())); EXPECT_TRUE(match); EXPECT_EQ(Src0, Copies[0]); @@ -223,7 +113,7 @@ TEST(PatternMatchInstr, MatchBinaryOp) { // Build AND %0, %1 auto MIBAnd = B.buildAnd(s64, Copies[0], Copies[1]); // Try to match AND. - match = mi_match(MIBAnd->getOperand(0).getReg(), MRI, + match = mi_match(MIBAnd->getOperand(0).getReg(), *MRI, m_GAnd(m_Reg(Src0), m_Reg(Src1))); EXPECT_TRUE(match); EXPECT_EQ(Src0, Copies[0]); @@ -232,72 +122,17 @@ TEST(PatternMatchInstr, MatchBinaryOp) { // Build OR %0, %1 auto MIBOr = B.buildOr(s64, Copies[0], Copies[1]); // Try to match OR. - match = mi_match(MIBOr->getOperand(0).getReg(), MRI, + match = mi_match(MIBOr->getOperand(0).getReg(), *MRI, m_GOr(m_Reg(Src0), m_Reg(Src1))); EXPECT_TRUE(match); EXPECT_EQ(Src0, Copies[0]); EXPECT_EQ(Src1, Copies[1]); - - // Try to use the FoldableInstructionsBuilder to build binary ops. - ConstantFoldingMIRBuilder CFB(B.getState()); - LLT s32 = LLT::scalar(32); - auto MIBCAdd = - CFB.buildAdd(s32, CFB.buildConstant(s32, 0), CFB.buildConstant(s32, 1)); - // This should be a constant now. - match = mi_match(MIBCAdd->getOperand(0).getReg(), MRI, m_ICst(Cst)); - EXPECT_TRUE(match); - EXPECT_EQ(Cst, 1); - auto MIBCAdd1 = - CFB.buildInstr(TargetOpcode::G_ADD, {s32}, - {CFB.buildConstant(s32, 0), CFB.buildConstant(s32, 1)}); - // This should be a constant now. - match = mi_match(MIBCAdd1->getOperand(0).getReg(), MRI, m_ICst(Cst)); - EXPECT_TRUE(match); - EXPECT_EQ(Cst, 1); - - // Try one of the other constructors of MachineIRBuilder to make sure it's - // compatible. - ConstantFoldingMIRBuilder CFB1(*MF); - CFB1.setInsertPt(*EntryMBB, EntryMBB->end()); - auto MIBCSub = - CFB1.buildInstr(TargetOpcode::G_SUB, {s32}, - {CFB1.buildConstant(s32, 1), CFB1.buildConstant(s32, 1)}); - // This should be a constant now. - match = mi_match(MIBCSub->getOperand(0).getReg(), MRI, m_ICst(Cst)); - EXPECT_TRUE(match); - EXPECT_EQ(Cst, 0); - - auto MIBCSext1 = - CFB1.buildInstr(TargetOpcode::G_SEXT_INREG, {s32}, - {CFB1.buildConstant(s32, 0x01), uint64_t(8)}); - // This should be a constant now. - match = mi_match(MIBCSext1->getOperand(0).getReg(), MRI, m_ICst(Cst)); - EXPECT_TRUE(match); - EXPECT_EQ(1, Cst); - - auto MIBCSext2 = - CFB1.buildInstr(TargetOpcode::G_SEXT_INREG, {s32}, - {CFB1.buildConstant(s32, 0x80), uint64_t(8)}); - // This should be a constant now. - match = mi_match(MIBCSext2->getOperand(0).getReg(), MRI, m_ICst(Cst)); - EXPECT_TRUE(match); - EXPECT_EQ(-0x80, Cst); } -TEST(PatternMatchInstr, MatchFPUnaryOp) { - LLVMContext Context; - std::unique_ptr TM = createTargetMachine(); +TEST_F(GISelMITest, MatchFPUnaryOp) { + setUp(); if (!TM) return; - auto ModuleMMIPair = createDummyModule(Context, *TM, ""); - MachineFunction *MF = - getMFFromMMI(ModuleMMIPair.first.get(), ModuleMMIPair.second.get()); - SmallVector Copies; - collectCopies(Copies, MF); - MachineBasicBlock *EntryMBB = &*MF->begin(); - MachineIRBuilder B(*MF); - MachineRegisterInfo &MRI = MF->getRegInfo(); - B.setInsertPt(*EntryMBB, EntryMBB->end()); // Truncate s64 to s32. LLT s32 = LLT::scalar(32); @@ -305,23 +140,24 @@ TEST(PatternMatchInstr, MatchFPUnaryOp) { // Match G_FABS. auto MIBFabs = B.buildInstr(TargetOpcode::G_FABS, {s32}, {Copy0s32}); - bool match = mi_match(MIBFabs->getOperand(0).getReg(), MRI, m_GFabs(m_Reg())); + bool match = + mi_match(MIBFabs->getOperand(0).getReg(), *MRI, m_GFabs(m_Reg())); EXPECT_TRUE(match); Register Src; auto MIBFNeg = B.buildInstr(TargetOpcode::G_FNEG, {s32}, {Copy0s32}); - match = mi_match(MIBFNeg->getOperand(0).getReg(), MRI, m_GFNeg(m_Reg(Src))); + match = mi_match(MIBFNeg->getOperand(0).getReg(), *MRI, m_GFNeg(m_Reg(Src))); EXPECT_TRUE(match); EXPECT_EQ(Src, Copy0s32->getOperand(0).getReg()); - match = mi_match(MIBFabs->getOperand(0).getReg(), MRI, m_GFabs(m_Reg(Src))); + match = mi_match(MIBFabs->getOperand(0).getReg(), *MRI, m_GFabs(m_Reg(Src))); EXPECT_TRUE(match); EXPECT_EQ(Src, Copy0s32->getOperand(0).getReg()); // Build and match FConstant. auto MIBFCst = B.buildFConstant(s32, .5); const ConstantFP *TmpFP{}; - match = mi_match(MIBFCst->getOperand(0).getReg(), MRI, m_GFCst(TmpFP)); + match = mi_match(MIBFCst->getOperand(0).getReg(), *MRI, m_GFCst(TmpFP)); EXPECT_TRUE(match); EXPECT_TRUE(TmpFP); APFloat APF((float).5); @@ -332,7 +168,7 @@ TEST(PatternMatchInstr, MatchFPUnaryOp) { LLT s64 = LLT::scalar(64); auto MIBFCst64 = B.buildFConstant(s64, .5); const ConstantFP *TmpFP64{}; - match = mi_match(MIBFCst64->getOperand(0).getReg(), MRI, m_GFCst(TmpFP64)); + match = mi_match(MIBFCst64->getOperand(0).getReg(), *MRI, m_GFCst(TmpFP64)); EXPECT_TRUE(match); EXPECT_TRUE(TmpFP64); APFloat APF64(.5); @@ -344,7 +180,7 @@ TEST(PatternMatchInstr, MatchFPUnaryOp) { LLT s16 = LLT::scalar(16); auto MIBFCst16 = B.buildFConstant(s16, .5); const ConstantFP *TmpFP16{}; - match = mi_match(MIBFCst16->getOperand(0).getReg(), MRI, m_GFCst(TmpFP16)); + match = mi_match(MIBFCst16->getOperand(0).getReg(), *MRI, m_GFCst(TmpFP16)); EXPECT_TRUE(match); EXPECT_TRUE(TmpFP16); bool Ignored; @@ -355,20 +191,11 @@ TEST(PatternMatchInstr, MatchFPUnaryOp) { EXPECT_NE(TmpFP16, TmpFP); } -TEST(PatternMatchInstr, MatchExtendsTrunc) { - LLVMContext Context; - std::unique_ptr TM = createTargetMachine(); +TEST_F(GISelMITest, MatchExtendsTrunc) { + setUp(); if (!TM) return; - auto ModuleMMIPair = createDummyModule(Context, *TM, ""); - MachineFunction *MF = - getMFFromMMI(ModuleMMIPair.first.get(), ModuleMMIPair.second.get()); - SmallVector Copies; - collectCopies(Copies, MF); - MachineBasicBlock *EntryMBB = &*MF->begin(); - MachineIRBuilder B(*MF); - MachineRegisterInfo &MRI = MF->getRegInfo(); - B.setInsertPt(*EntryMBB, EntryMBB->end()); + LLT s64 = LLT::scalar(64); LLT s32 = LLT::scalar(32); @@ -378,72 +205,62 @@ TEST(PatternMatchInstr, MatchExtendsTrunc) { auto MIBSExt = B.buildSExt(s64, MIBTrunc); Register Src0; bool match = - mi_match(MIBTrunc->getOperand(0).getReg(), MRI, m_GTrunc(m_Reg(Src0))); + mi_match(MIBTrunc->getOperand(0).getReg(), *MRI, m_GTrunc(m_Reg(Src0))); EXPECT_TRUE(match); EXPECT_EQ(Src0, Copies[0]); match = - mi_match(MIBAExt->getOperand(0).getReg(), MRI, m_GAnyExt(m_Reg(Src0))); + mi_match(MIBAExt->getOperand(0).getReg(), *MRI, m_GAnyExt(m_Reg(Src0))); EXPECT_TRUE(match); EXPECT_EQ(Src0, MIBTrunc->getOperand(0).getReg()); - match = mi_match(MIBSExt->getOperand(0).getReg(), MRI, m_GSExt(m_Reg(Src0))); + match = mi_match(MIBSExt->getOperand(0).getReg(), *MRI, m_GSExt(m_Reg(Src0))); EXPECT_TRUE(match); EXPECT_EQ(Src0, MIBTrunc->getOperand(0).getReg()); - match = mi_match(MIBZExt->getOperand(0).getReg(), MRI, m_GZExt(m_Reg(Src0))); + match = mi_match(MIBZExt->getOperand(0).getReg(), *MRI, m_GZExt(m_Reg(Src0))); EXPECT_TRUE(match); EXPECT_EQ(Src0, MIBTrunc->getOperand(0).getReg()); // Match ext(trunc src) - match = mi_match(MIBAExt->getOperand(0).getReg(), MRI, + match = mi_match(MIBAExt->getOperand(0).getReg(), *MRI, m_GAnyExt(m_GTrunc(m_Reg(Src0)))); EXPECT_TRUE(match); EXPECT_EQ(Src0, Copies[0]); - match = mi_match(MIBSExt->getOperand(0).getReg(), MRI, + match = mi_match(MIBSExt->getOperand(0).getReg(), *MRI, m_GSExt(m_GTrunc(m_Reg(Src0)))); EXPECT_TRUE(match); EXPECT_EQ(Src0, Copies[0]); - match = mi_match(MIBZExt->getOperand(0).getReg(), MRI, + match = mi_match(MIBZExt->getOperand(0).getReg(), *MRI, m_GZExt(m_GTrunc(m_Reg(Src0)))); EXPECT_TRUE(match); EXPECT_EQ(Src0, Copies[0]); } -TEST(PatternMatchInstr, MatchSpecificType) { - LLVMContext Context; - std::unique_ptr TM = createTargetMachine(); +TEST_F(GISelMITest, MatchSpecificType) { + setUp(); if (!TM) return; - auto ModuleMMIPair = createDummyModule(Context, *TM, ""); - MachineFunction *MF = - getMFFromMMI(ModuleMMIPair.first.get(), ModuleMMIPair.second.get()); - SmallVector Copies; - collectCopies(Copies, MF); - MachineBasicBlock *EntryMBB = &*MF->begin(); - MachineIRBuilder B(*MF); - MachineRegisterInfo &MRI = MF->getRegInfo(); - B.setInsertPt(*EntryMBB, EntryMBB->end()); // Try to match a 64bit add. LLT s64 = LLT::scalar(64); LLT s32 = LLT::scalar(32); auto MIBAdd = B.buildAdd(s64, Copies[0], Copies[1]); - EXPECT_FALSE(mi_match(MIBAdd->getOperand(0).getReg(), MRI, + EXPECT_FALSE(mi_match(MIBAdd->getOperand(0).getReg(), *MRI, m_GAdd(m_SpecificType(s32), m_Reg()))); - EXPECT_TRUE(mi_match(MIBAdd->getOperand(0).getReg(), MRI, + EXPECT_TRUE(mi_match(MIBAdd->getOperand(0).getReg(), *MRI, m_GAdd(m_SpecificType(s64), m_Reg()))); // Try to match the destination type of a bitcast. LLT v2s32 = LLT::vector(2, 32); auto MIBCast = B.buildCast(v2s32, Copies[0]); EXPECT_TRUE( - mi_match(MIBCast->getOperand(0).getReg(), MRI, m_GBitcast(m_Reg()))); + mi_match(MIBCast->getOperand(0).getReg(), *MRI, m_GBitcast(m_Reg()))); EXPECT_TRUE( - mi_match(MIBCast->getOperand(0).getReg(), MRI, m_SpecificType(v2s32))); + mi_match(MIBCast->getOperand(0).getReg(), *MRI, m_SpecificType(v2s32))); EXPECT_TRUE( - mi_match(MIBCast->getOperand(1).getReg(), MRI, m_SpecificType(s64))); + mi_match(MIBCast->getOperand(1).getReg(), *MRI, m_SpecificType(s64))); // Build a PTRToInt and INTTOPTR and match and test them. LLT PtrTy = LLT::pointer(0, 64); @@ -452,43 +269,34 @@ TEST(PatternMatchInstr, MatchSpecificType) { Register Src0; // match the ptrtoint(inttoptr reg) - bool match = mi_match(MIBPtrToInt->getOperand(0).getReg(), MRI, + bool match = mi_match(MIBPtrToInt->getOperand(0).getReg(), *MRI, m_GPtrToInt(m_GIntToPtr(m_Reg(Src0)))); EXPECT_TRUE(match); EXPECT_EQ(Src0, Copies[0]); } -TEST(PatternMatchInstr, MatchCombinators) { - LLVMContext Context; - std::unique_ptr TM = createTargetMachine(); +TEST_F(GISelMITest, MatchCombinators) { + setUp(); if (!TM) return; - auto ModuleMMIPair = createDummyModule(Context, *TM, ""); - MachineFunction *MF = - getMFFromMMI(ModuleMMIPair.first.get(), ModuleMMIPair.second.get()); - SmallVector Copies; - collectCopies(Copies, MF); - MachineBasicBlock *EntryMBB = &*MF->begin(); - MachineIRBuilder B(*MF); - MachineRegisterInfo &MRI = MF->getRegInfo(); - B.setInsertPt(*EntryMBB, EntryMBB->end()); + LLT s64 = LLT::scalar(64); LLT s32 = LLT::scalar(32); auto MIBAdd = B.buildAdd(s64, Copies[0], Copies[1]); Register Src0, Src1; bool match = - mi_match(MIBAdd->getOperand(0).getReg(), MRI, + mi_match(MIBAdd->getOperand(0).getReg(), *MRI, m_all_of(m_SpecificType(s64), m_GAdd(m_Reg(Src0), m_Reg(Src1)))); EXPECT_TRUE(match); EXPECT_EQ(Src0, Copies[0]); EXPECT_EQ(Src1, Copies[1]); // Check for s32 (which should fail). match = - mi_match(MIBAdd->getOperand(0).getReg(), MRI, + mi_match(MIBAdd->getOperand(0).getReg(), *MRI, m_all_of(m_SpecificType(s32), m_GAdd(m_Reg(Src0), m_Reg(Src1)))); EXPECT_FALSE(match); match = - mi_match(MIBAdd->getOperand(0).getReg(), MRI, + mi_match(MIBAdd->getOperand(0).getReg(), *MRI, m_any_of(m_SpecificType(s32), m_GAdd(m_Reg(Src0), m_Reg(Src1)))); EXPECT_TRUE(match); EXPECT_EQ(Src0, Copies[0]); @@ -496,33 +304,24 @@ TEST(PatternMatchInstr, MatchCombinators) { // Match a case where none of the predicates hold true. match = mi_match( - MIBAdd->getOperand(0).getReg(), MRI, + MIBAdd->getOperand(0).getReg(), *MRI, m_any_of(m_SpecificType(LLT::scalar(16)), m_GSub(m_Reg(), m_Reg()))); EXPECT_FALSE(match); } -TEST(PatternMatchInstr, MatchMiscellaneous) { - LLVMContext Context; - std::unique_ptr TM = createTargetMachine(); +TEST_F(GISelMITest, MatchMiscellaneous) { + setUp(); if (!TM) return; - auto ModuleMMIPair = createDummyModule(Context, *TM, ""); - MachineFunction *MF = - getMFFromMMI(ModuleMMIPair.first.get(), ModuleMMIPair.second.get()); - SmallVector Copies; - collectCopies(Copies, MF); - MachineBasicBlock *EntryMBB = &*MF->begin(); - MachineIRBuilder B(*MF); - MachineRegisterInfo &MRI = MF->getRegInfo(); - B.setInsertPt(*EntryMBB, EntryMBB->end()); + LLT s64 = LLT::scalar(64); auto MIBAdd = B.buildAdd(s64, Copies[0], Copies[1]); // Make multiple uses of this add. B.buildCast(LLT::pointer(0, 32), MIBAdd); B.buildCast(LLT::pointer(1, 32), MIBAdd); - bool match = mi_match(MIBAdd.getReg(0), MRI, m_GAdd(m_Reg(), m_Reg())); + bool match = mi_match(MIBAdd.getReg(0), *MRI, m_GAdd(m_Reg(), m_Reg())); EXPECT_TRUE(match); - match = mi_match(MIBAdd.getReg(0), MRI, m_OneUse(m_GAdd(m_Reg(), m_Reg()))); + match = mi_match(MIBAdd.getReg(0), *MRI, m_OneUse(m_GAdd(m_Reg(), m_Reg()))); EXPECT_FALSE(match); } } // namespace diff --git a/llvm/unittests/DebugInfo/GSYM/CMakeLists.txt b/llvm/unittests/DebugInfo/GSYM/CMakeLists.txt index fb9d5e564b382..849b0f64b40fd 100644 --- a/llvm/unittests/DebugInfo/GSYM/CMakeLists.txt +++ b/llvm/unittests/DebugInfo/GSYM/CMakeLists.txt @@ -1,5 +1,6 @@ set(LLVM_LINK_COMPONENTS DebugInfoGSYM + MC Support ) diff --git a/llvm/unittests/DebugInfo/GSYM/GSYMTest.cpp b/llvm/unittests/DebugInfo/GSYM/GSYMTest.cpp index 421544ee1d40d..90d8594eec6e7 100644 --- a/llvm/unittests/DebugInfo/GSYM/GSYMTest.cpp +++ b/llvm/unittests/DebugInfo/GSYM/GSYMTest.cpp @@ -13,6 +13,8 @@ #include "llvm/DebugInfo/GSYM/FileEntry.h" #include "llvm/DebugInfo/GSYM/FileWriter.h" #include "llvm/DebugInfo/GSYM/FunctionInfo.h" +#include "llvm/DebugInfo/GSYM/GsymCreator.h" +#include "llvm/DebugInfo/GSYM/GsymReader.h" #include "llvm/DebugInfo/GSYM/InlineInfo.h" #include "llvm/DebugInfo/GSYM/Range.h" #include "llvm/DebugInfo/GSYM/StringTable.h" @@ -1046,3 +1048,255 @@ TEST(GSYMTest, TestHeaderEncodeDecode) { TestHeaderEncodeDecode(H, llvm::support::little); TestHeaderEncodeDecode(H, llvm::support::big); } + +static void TestGsymCreatorEncodeError(llvm::support::endianness ByteOrder, + const GsymCreator &GC, + std::string ExpectedErrorMsg) { + SmallString<512> Str; + raw_svector_ostream OutStrm(Str); + FileWriter FW(OutStrm, ByteOrder); + llvm::Error Err = GC.encode(FW); + ASSERT_TRUE(bool(Err)); + checkError(ExpectedErrorMsg, std::move(Err)); +} + +TEST(GSYMTest, TestGsymCreatorEncodeErrors) { + const uint8_t ValidUUID[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, + 14, 15, 16}; + const uint8_t InvalidUUID[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, + 14, 15, 16, 17, 18, 19, 20, 21}; + // Verify we get an error when trying to encode an GsymCreator with no + // function infos. We shouldn't be saving a GSYM file in this case since + // there is nothing inside of it. + GsymCreator GC; + TestGsymCreatorEncodeError(llvm::support::little, GC, + "no functions to encode"); + const uint64_t FuncAddr = 0x1000; + const uint64_t FuncSize = 0x100; + const uint32_t FuncName = GC.insertString("foo"); + // Verify we get an error trying to encode a GsymCreator that isn't + // finalized. + GC.addFunctionInfo(FunctionInfo(FuncAddr, FuncSize, FuncName)); + TestGsymCreatorEncodeError(llvm::support::little, GC, + "GsymCreator wasn't finalized prior to encoding"); + std::string finalizeIssues; + raw_string_ostream OS(finalizeIssues); + llvm::Error finalizeErr = GC.finalize(OS); + ASSERT_FALSE(bool(finalizeErr)); + finalizeErr = GC.finalize(OS); + ASSERT_TRUE(bool(finalizeErr)); + checkError("already finalized", std::move(finalizeErr)); + // Verify we get an error trying to encode a GsymCreator with a UUID that is + // too long. + GC.setUUID(InvalidUUID); + TestGsymCreatorEncodeError(llvm::support::little, GC, + "invalid UUID size 21"); + GC.setUUID(ValidUUID); + // Verify errors are propagated when we try to encoding an invalid line + // table. + GC.forEachFunctionInfo([](FunctionInfo &FI) -> bool { + FI.OptLineTable = LineTable(); // Invalid line table. + return false; // Stop iterating + }); + TestGsymCreatorEncodeError(llvm::support::little, GC, + "attempted to encode invalid LineTable object"); + // Verify errors are propagated when we try to encoding an invalid inline + // info. + GC.forEachFunctionInfo([](FunctionInfo &FI) -> bool { + FI.OptLineTable = llvm::None; + FI.Inline = InlineInfo(); // Invalid InlineInfo. + return false; // Stop iterating + }); + TestGsymCreatorEncodeError(llvm::support::little, GC, + "attempted to encode invalid InlineInfo object"); +} + +static void Compare(const GsymCreator &GC, const GsymReader &GR) { + // Verify that all of the data in a GsymCreator is correctly decoded from + // a GsymReader. To do this, we iterator over + GC.forEachFunctionInfo([&](const FunctionInfo &FI) -> bool { + auto DecodedFI = GR.getFunctionInfo(FI.Range.Start); + EXPECT_TRUE(bool(DecodedFI)); + EXPECT_EQ(FI, *DecodedFI); + return true; // Keep iterating over all FunctionInfo objects. + }); +} + +static void TestEncodeDecode(const GsymCreator &GC, + support::endianness ByteOrder, uint16_t Version, + uint8_t AddrOffSize, uint64_t BaseAddress, + uint32_t NumAddresses, ArrayRef UUID) { + SmallString<512> Str; + raw_svector_ostream OutStrm(Str); + FileWriter FW(OutStrm, ByteOrder); + llvm::Error Err = GC.encode(FW); + ASSERT_FALSE((bool)Err); + Expected GR = GsymReader::copyBuffer(OutStrm.str()); + ASSERT_TRUE(bool(GR)); + const Header &Hdr = GR->getHeader(); + EXPECT_EQ(Hdr.Version, Version); + EXPECT_EQ(Hdr.AddrOffSize, AddrOffSize); + EXPECT_EQ(Hdr.UUIDSize, UUID.size()); + EXPECT_EQ(Hdr.BaseAddress, BaseAddress); + EXPECT_EQ(Hdr.NumAddresses, NumAddresses); + EXPECT_EQ(ArrayRef(Hdr.UUID, Hdr.UUIDSize), UUID); + Compare(GC, GR.get()); +} + +TEST(GSYMTest, TestGsymCreator1ByteAddrOffsets) { + uint8_t UUID[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; + GsymCreator GC; + GC.setUUID(UUID); + constexpr uint64_t BaseAddr = 0x1000; + constexpr uint8_t AddrOffSize = 1; + const uint32_t Func1Name = GC.insertString("foo"); + const uint32_t Func2Name = GC.insertString("bar"); + GC.addFunctionInfo(FunctionInfo(BaseAddr+0x00, 0x10, Func1Name)); + GC.addFunctionInfo(FunctionInfo(BaseAddr+0x20, 0x10, Func2Name)); + Error Err = GC.finalize(llvm::nulls()); + ASSERT_FALSE(Err); + TestEncodeDecode(GC, llvm::support::little, + GSYM_VERSION, + AddrOffSize, + BaseAddr, + 2, // NumAddresses + ArrayRef(UUID)); + TestEncodeDecode(GC, llvm::support::big, + GSYM_VERSION, + AddrOffSize, + BaseAddr, + 2, // NumAddresses + ArrayRef(UUID)); +} + +TEST(GSYMTest, TestGsymCreator2ByteAddrOffsets) { + uint8_t UUID[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; + GsymCreator GC; + GC.setUUID(UUID); + constexpr uint64_t BaseAddr = 0x1000; + constexpr uint8_t AddrOffSize = 2; + const uint32_t Func1Name = GC.insertString("foo"); + const uint32_t Func2Name = GC.insertString("bar"); + GC.addFunctionInfo(FunctionInfo(BaseAddr+0x000, 0x100, Func1Name)); + GC.addFunctionInfo(FunctionInfo(BaseAddr+0x200, 0x100, Func2Name)); + Error Err = GC.finalize(llvm::nulls()); + ASSERT_FALSE(Err); + TestEncodeDecode(GC, llvm::support::little, + GSYM_VERSION, + AddrOffSize, + BaseAddr, + 2, // NumAddresses + ArrayRef(UUID)); + TestEncodeDecode(GC, llvm::support::big, + GSYM_VERSION, + AddrOffSize, + BaseAddr, + 2, // NumAddresses + ArrayRef(UUID)); +} + +TEST(GSYMTest, TestGsymCreator4ByteAddrOffsets) { + uint8_t UUID[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; + GsymCreator GC; + GC.setUUID(UUID); + constexpr uint64_t BaseAddr = 0x1000; + constexpr uint8_t AddrOffSize = 4; + const uint32_t Func1Name = GC.insertString("foo"); + const uint32_t Func2Name = GC.insertString("bar"); + GC.addFunctionInfo(FunctionInfo(BaseAddr+0x000, 0x100, Func1Name)); + GC.addFunctionInfo(FunctionInfo(BaseAddr+0x20000, 0x100, Func2Name)); + Error Err = GC.finalize(llvm::nulls()); + ASSERT_FALSE(Err); + TestEncodeDecode(GC, llvm::support::little, + GSYM_VERSION, + AddrOffSize, + BaseAddr, + 2, // NumAddresses + ArrayRef(UUID)); + TestEncodeDecode(GC, llvm::support::big, + GSYM_VERSION, + AddrOffSize, + BaseAddr, + 2, // NumAddresses + ArrayRef(UUID)); +} + +TEST(GSYMTest, TestGsymCreator8ByteAddrOffsets) { + uint8_t UUID[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; + GsymCreator GC; + GC.setUUID(UUID); + constexpr uint64_t BaseAddr = 0x1000; + constexpr uint8_t AddrOffSize = 8; + const uint32_t Func1Name = GC.insertString("foo"); + const uint32_t Func2Name = GC.insertString("bar"); + GC.addFunctionInfo(FunctionInfo(BaseAddr+0x000, 0x100, Func1Name)); + GC.addFunctionInfo(FunctionInfo(BaseAddr+0x100000000, 0x100, Func2Name)); + Error Err = GC.finalize(llvm::nulls()); + ASSERT_FALSE(Err); + TestEncodeDecode(GC, llvm::support::little, + GSYM_VERSION, + AddrOffSize, + BaseAddr, + 2, // NumAddresses + ArrayRef(UUID)); + TestEncodeDecode(GC, llvm::support::big, + GSYM_VERSION, + AddrOffSize, + BaseAddr, + 2, // NumAddresses + ArrayRef(UUID)); +} + +static void VerifyFunctionInfo(const GsymReader &GR, uint64_t Addr, + const FunctionInfo &FI) { + auto ExpFI = GR.getFunctionInfo(Addr); + ASSERT_TRUE(bool(ExpFI)); + ASSERT_EQ(FI, ExpFI.get()); +} + +static void VerifyFunctionInfoError(const GsymReader &GR, uint64_t Addr, + std::string ErrMessage) { + auto ExpFI = GR.getFunctionInfo(Addr); + ASSERT_FALSE(bool(ExpFI)); + checkError(ErrMessage, ExpFI.takeError()); +} + +TEST(GSYMTest, TestGsymReader) { + uint8_t UUID[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; + GsymCreator GC; + GC.setUUID(UUID); + constexpr uint64_t BaseAddr = 0x1000; + constexpr uint64_t Func1Addr = BaseAddr; + constexpr uint64_t Func2Addr = BaseAddr+0x20; + constexpr uint64_t FuncSize = 0x10; + const uint32_t Func1Name = GC.insertString("foo"); + const uint32_t Func2Name = GC.insertString("bar"); + const auto ByteOrder = support::endian::system_endianness(); + GC.addFunctionInfo(FunctionInfo(Func1Addr, FuncSize, Func1Name)); + GC.addFunctionInfo(FunctionInfo(Func2Addr, FuncSize, Func2Name)); + Error FinalizeErr = GC.finalize(llvm::nulls()); + ASSERT_FALSE(FinalizeErr); + SmallString<512> Str; + raw_svector_ostream OutStrm(Str); + FileWriter FW(OutStrm, ByteOrder); + llvm::Error Err = GC.encode(FW); + ASSERT_FALSE((bool)Err); + if (auto ExpectedGR = GsymReader::copyBuffer(OutStrm.str())) { + const GsymReader &GR = ExpectedGR.get(); + VerifyFunctionInfoError(GR, Func1Addr-1, "address 0xfff not in GSYM"); + + FunctionInfo Func1(Func1Addr, FuncSize, Func1Name); + VerifyFunctionInfo(GR, Func1Addr, Func1); + VerifyFunctionInfo(GR, Func1Addr+1, Func1); + VerifyFunctionInfo(GR, Func1Addr+FuncSize-1, Func1); + VerifyFunctionInfoError(GR, Func1Addr+FuncSize, + "address 0x1010 not in GSYM"); + VerifyFunctionInfoError(GR, Func2Addr-1, "address 0x101f not in GSYM"); + FunctionInfo Func2(Func2Addr, FuncSize, Func2Name); + VerifyFunctionInfo(GR, Func2Addr, Func2); + VerifyFunctionInfo(GR, Func2Addr+1, Func2); + VerifyFunctionInfo(GR, Func2Addr+FuncSize-1, Func2); + VerifyFunctionInfoError(GR, Func2Addr+FuncSize, + "address 0x1030 not in GSYM"); + } +} diff --git a/llvm/unittests/IR/MetadataTest.cpp b/llvm/unittests/IR/MetadataTest.cpp index fa0dc61d3dfce..e6c7a50113957 100644 --- a/llvm/unittests/IR/MetadataTest.cpp +++ b/llvm/unittests/IR/MetadataTest.cpp @@ -164,7 +164,7 @@ TEST_F(MDStringTest, PrintingComplex) { std::string Str; raw_string_ostream oss(Str); s->print(oss); - EXPECT_STREQ("!\"\\00\\0A\\22\\5C\\FF\"", oss.str().c_str()); + EXPECT_STREQ("!\"\\00\\0A\\22\\\\\\FF\"", oss.str().c_str()); } typedef MetadataTest MDNodeTest; diff --git a/llvm/unittests/ProfileData/SampleProfTest.cpp b/llvm/unittests/ProfileData/SampleProfTest.cpp index dd5ded5bc9b2f..59ed19d292eff 100644 --- a/llvm/unittests/ProfileData/SampleProfTest.cpp +++ b/llvm/unittests/ProfileData/SampleProfTest.cpp @@ -54,7 +54,7 @@ struct SampleProfTest : ::testing::Test { auto ReaderOrErr = SampleProfileReader::create(Profile, Context); ASSERT_TRUE(NoError(ReaderOrErr.getError())); Reader = std::move(ReaderOrErr.get()); - Reader->collectFuncsToUse(M); + Reader->collectFuncsFrom(M); } void testRoundTrip(SampleProfileFormat Format, bool Remap) { @@ -86,6 +86,13 @@ struct SampleProfTest : ::testing::Test { BarSamples.addCalledTargetSamples(1, 0, MconstructName, 1000); BarSamples.addCalledTargetSamples(1, 0, StringviewName, 437); + StringRef BazName("_Z3bazi"); + FunctionSamples BazSamples; + BazSamples.setName(BazName); + BazSamples.addTotalSamples(12557); + BazSamples.addHeadSamples(1257); + BazSamples.addBodySamples(1, 0, 12557); + Module M("my_module", Context); FunctionType *fn_type = FunctionType::get(Type::getVoidTy(Context), {}, false); @@ -95,6 +102,7 @@ struct SampleProfTest : ::testing::Test { StringMap Profiles; Profiles[FooName] = std::move(FooSamples); Profiles[BarName] = std::move(BarSamples); + Profiles[BazName] = std::move(BazSamples); ProfileSymbolList List; if (Format == SampleProfileFormat::SPF_Ext_Binary) { @@ -137,8 +145,6 @@ struct SampleProfTest : ::testing::Test { ASSERT_TRUE(NoError(EC)); } - ASSERT_EQ(2u, Reader->getProfiles().size()); - FunctionSamples *ReadFooSamples = Reader->getSamplesFor(FooName); ASSERT_TRUE(ReadFooSamples != nullptr); if (Format != SampleProfileFormat::SPF_Compact_Binary) { @@ -158,6 +164,20 @@ struct SampleProfTest : ::testing::Test { ReadBarSamples->findCallTargetMapAt(1, 0); ASSERT_FALSE(CTMap.getError()); + // Because _Z3bazi is not defined in module M, expect _Z3bazi's profile + // is not loaded when the profile is ExtBinary or Compact format because + // these formats support loading function profiles on demand. + FunctionSamples *ReadBazSamples = Reader->getSamplesFor(BazName); + if (Format == SampleProfileFormat::SPF_Ext_Binary || + Format == SampleProfileFormat::SPF_Compact_Binary) { + ASSERT_TRUE(ReadBazSamples == nullptr); + ASSERT_EQ(2u, Reader->getProfiles().size()); + } else { + ASSERT_TRUE(ReadBazSamples != nullptr); + ASSERT_EQ(12557u, ReadBazSamples->getTotalSamples()); + ASSERT_EQ(3u, Reader->getProfiles().size()); + } + std::string MconstructGUID; StringRef MconstructRep = getRepInFormat(MconstructName, Format, MconstructGUID); @@ -169,9 +189,9 @@ struct SampleProfTest : ::testing::Test { auto VerifySummary = [](ProfileSummary &Summary) mutable { ASSERT_EQ(ProfileSummary::PSK_Sample, Summary.getKind()); - ASSERT_EQ(123603u, Summary.getTotalCount()); - ASSERT_EQ(6u, Summary.getNumCounts()); - ASSERT_EQ(2u, Summary.getNumFunctions()); + ASSERT_EQ(136160u, Summary.getTotalCount()); + ASSERT_EQ(7u, Summary.getNumCounts()); + ASSERT_EQ(3u, Summary.getNumFunctions()); ASSERT_EQ(1437u, Summary.getMaxFunctionCount()); ASSERT_EQ(60351u, Summary.getMaxCount()); @@ -188,8 +208,8 @@ struct SampleProfTest : ::testing::Test { Cutoff = 990000; auto NinetyNinePerc = find_if(Details, Predicate); ASSERT_EQ(60000u, EightyPerc->MinCount); - ASSERT_EQ(60000u, NinetyPerc->MinCount); - ASSERT_EQ(60000u, NinetyFivePerc->MinCount); + ASSERT_EQ(12557u, NinetyPerc->MinCount); + ASSERT_EQ(12557u, NinetyFivePerc->MinCount); ASSERT_EQ(610u, NinetyNinePerc->MinCount); }; diff --git a/llvm/unittests/Target/WebAssembly/WebAssemblyExceptionInfoTest.cpp b/llvm/unittests/Target/WebAssembly/WebAssemblyExceptionInfoTest.cpp index 49a469bdef789..072742aab4115 100644 --- a/llvm/unittests/Target/WebAssembly/WebAssemblyExceptionInfoTest.cpp +++ b/llvm/unittests/Target/WebAssembly/WebAssemblyExceptionInfoTest.cpp @@ -132,7 +132,7 @@ body: | bb.7: ; predecessors: %bb.5, %bb.1 liveins: $value_stack - RETURN_VOID implicit-def $arguments + RETURN implicit-def $arguments bb.8 (landing-pad): ; predecessors: %bb.4 @@ -307,7 +307,7 @@ body: | bb.9: ; predecessors: %bb.0, %bb.7 liveins: $value_stack - RETURN_VOID implicit-def $arguments + RETURN implicit-def $arguments bb.10 (landing-pad): ; predecessors: %bb.4 diff --git a/llvm/unittests/TextAPI/CMakeLists.txt b/llvm/unittests/TextAPI/CMakeLists.txt index 7f49df43ac3d4..775ec2f1d1e88 100644 --- a/llvm/unittests/TextAPI/CMakeLists.txt +++ b/llvm/unittests/TextAPI/CMakeLists.txt @@ -7,6 +7,7 @@ add_llvm_unittest(TextAPITests TextStubV1Tests.cpp TextStubV2Tests.cpp TextStubV3Tests.cpp + TextStubV4Tests.cpp ) target_link_libraries(TextAPITests PRIVATE LLVMTestingSupport) diff --git a/llvm/unittests/TextAPI/TextStubV4Tests.cpp b/llvm/unittests/TextAPI/TextStubV4Tests.cpp new file mode 100644 index 0000000000000..dc50e512945f1 --- /dev/null +++ b/llvm/unittests/TextAPI/TextStubV4Tests.cpp @@ -0,0 +1,564 @@ +//===-- TextStubV4Tests.cpp - TBD V4 File Test ----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===-----------------------------------------------------------------------===/ +#include "llvm/TextAPI/MachO/InterfaceFile.h" +#include "llvm/TextAPI/MachO/TextAPIReader.h" +#include "llvm/TextAPI/MachO/TextAPIWriter.h" +#include "gtest/gtest.h" +#include +#include + +using namespace llvm; +using namespace llvm::MachO; + +struct ExampleSymbol { + SymbolKind Kind; + std::string Name; + bool WeakDefined; + bool ThreadLocalValue; +}; +using ExampleSymbolSeq = std::vector; +using UUIDs = std::vector>; + +inline bool operator<(const ExampleSymbol &LHS, const ExampleSymbol &RHS) { + return std::tie(LHS.Kind, LHS.Name) < std::tie(RHS.Kind, RHS.Name); +} + +inline bool operator==(const ExampleSymbol &LHS, const ExampleSymbol &RHS) { + return std::tie(LHS.Kind, LHS.Name, LHS.WeakDefined, LHS.ThreadLocalValue) == + std::tie(RHS.Kind, RHS.Name, RHS.WeakDefined, RHS.ThreadLocalValue); +} + +static ExampleSymbol TBDv4ExportedSymbols[] = { + {SymbolKind::GlobalSymbol, "_symA", false, false}, + {SymbolKind::GlobalSymbol, "_symAB", false, false}, + {SymbolKind::GlobalSymbol, "_symB", false, false}, +}; + +static ExampleSymbol TBDv4ReexportedSymbols[] = { + {SymbolKind::GlobalSymbol, "_symC", false, false}, +}; + +static ExampleSymbol TBDv4UndefinedSymbols[] = { + {SymbolKind::GlobalSymbol, "_symD", false, false}, +}; + +namespace TBDv4 { + +TEST(TBDv4, ReadFile) { + static const char tbd_v4_file[] = + "--- !tapi-tbd\n" + "tbd-version: 4\n" + "targets: [ i386-macos, x86_64-macos, x86_64-ios ]\n" + "uuids:\n" + " - target: i386-macos\n" + " value: 00000000-0000-0000-0000-000000000000\n" + " - target: x86_64-macos\n" + " value: 11111111-1111-1111-1111-111111111111\n" + " - target: x86_64-ios\n" + " value: 11111111-1111-1111-1111-111111111111\n" + "flags: [ flat_namespace, installapi ]\n" + "install-name: Umbrella.framework/Umbrella\n" + "current-version: 1.2.3\n" + "compatibility-version: 1.2\n" + "swift-abi-version: 5\n" + "parent-umbrella:\n" + " - targets: [ i386-macos, x86_64-macos, x86_64-ios ]\n" + " umbrella: System\n" + "allowable-clients:\n" + " - targets: [ i386-macos, x86_64-macos, x86_64-ios ]\n" + " clients: [ ClientA ]\n" + "reexported-libraries:\n" + " - targets: [ i386-macos ]\n" + " libraries: [ /System/Library/Frameworks/A.framework/A ]\n" + "exports:\n" + " - targets: [ i386-macos ]\n" + " symbols: [ _symA ]\n" + " objc-classes: []\n" + " objc-eh-types: []\n" + " objc-ivars: []\n" + " weak-symbols: []\n" + " thread-local-symbols: []\n" + " - targets: [ x86_64-ios ]\n" + " symbols: [_symB]\n" + " - targets: [ x86_64-macos, x86_64-ios ]\n" + " symbols: [_symAB]\n" + "reexports:\n" + " - targets: [ i386-macos ]\n" + " symbols: [_symC]\n" + " objc-classes: []\n" + " objc-eh-types: []\n" + " objc-ivars: []\n" + " weak-symbols: []\n" + " thread-local-symbols: []\n" + "undefineds:\n" + " - targets: [ i386-macos ]\n" + " symbols: [ _symD ]\n" + " objc-classes: []\n" + " objc-eh-types: []\n" + " objc-ivars: []\n" + " weak-symbols: []\n" + " thread-local-symbols: []\n" + "...\n"; + + auto Result = TextAPIReader::get(MemoryBufferRef(tbd_v4_file, "Test.tbd")); + EXPECT_TRUE(!!Result); + auto File = std::move(Result.get()); + EXPECT_EQ(FileType::TBD_V4, File->getFileType()); + PlatformSet Platforms; + Platforms.insert(PlatformKind::macOS); + Platforms.insert(PlatformKind::iOS); + auto Archs = AK_i386 | AK_x86_64; + TargetList Targets = { + Target(AK_i386, PlatformKind::macOS), + Target(AK_x86_64, PlatformKind::macOS), + Target(AK_x86_64, PlatformKind::iOS), + }; + UUIDs uuids = {{Targets[0], "00000000-0000-0000-0000-000000000000"}, + {Targets[1], "11111111-1111-1111-1111-111111111111"}, + {Targets[2], "11111111-1111-1111-1111-111111111111"}}; + EXPECT_EQ(Archs, File->getArchitectures()); + EXPECT_EQ(uuids, File->uuids()); + EXPECT_EQ(Platforms.size(), File->getPlatforms().size()); + for (auto Platform : File->getPlatforms()) + EXPECT_EQ(Platforms.count(Platform), 1U); + EXPECT_EQ(std::string("Umbrella.framework/Umbrella"), File->getInstallName()); + EXPECT_EQ(PackedVersion(1, 2, 3), File->getCurrentVersion()); + EXPECT_EQ(PackedVersion(1, 2, 0), File->getCompatibilityVersion()); + EXPECT_EQ(5U, File->getSwiftABIVersion()); + EXPECT_FALSE(File->isTwoLevelNamespace()); + EXPECT_TRUE(File->isApplicationExtensionSafe()); + EXPECT_TRUE(File->isInstallAPI()); + InterfaceFileRef client("ClientA", Targets); + InterfaceFileRef reexport("/System/Library/Frameworks/A.framework/A", + {Targets[0]}); + EXPECT_EQ(1U, File->allowableClients().size()); + EXPECT_EQ(client, File->allowableClients().front()); + EXPECT_EQ(1U, File->reexportedLibraries().size()); + EXPECT_EQ(reexport, File->reexportedLibraries().front()); + + ExampleSymbolSeq Exports, Reexports, Undefineds; + ExampleSymbol temp; + for (const auto *Sym : File->symbols()) { + temp = ExampleSymbol{Sym->getKind(), Sym->getName(), Sym->isWeakDefined(), + Sym->isThreadLocalValue()}; + EXPECT_FALSE(Sym->isWeakReferenced()); + if (Sym->isUndefined()) + Undefineds.emplace_back(std::move(temp)); + else + Sym->isReexported() ? Reexports.emplace_back(std::move(temp)) + : Exports.emplace_back(std::move(temp)); + } + llvm::sort(Exports.begin(), Exports.end()); + llvm::sort(Reexports.begin(), Reexports.end()); + llvm::sort(Undefineds.begin(), Undefineds.end()); + + EXPECT_EQ(sizeof(TBDv4ExportedSymbols) / sizeof(ExampleSymbol), + Exports.size()); + EXPECT_EQ(sizeof(TBDv4ReexportedSymbols) / sizeof(ExampleSymbol), + Reexports.size()); + EXPECT_EQ(sizeof(TBDv4UndefinedSymbols) / sizeof(ExampleSymbol), + Undefineds.size()); + EXPECT_TRUE(std::equal(Exports.begin(), Exports.end(), + std::begin(TBDv4ExportedSymbols))); + EXPECT_TRUE(std::equal(Reexports.begin(), Reexports.end(), + std::begin(TBDv4ReexportedSymbols))); + EXPECT_TRUE(std::equal(Undefineds.begin(), Undefineds.end(), + std::begin(TBDv4UndefinedSymbols))); +} + +TEST(TBDv4, WriteFile) { + static const char tbd_v4_file[] = + "--- !tapi-tbd\n" + "tbd-version: 4\n" + "targets: [ i386-macos, x86_64-ios-simulator ]\n" + "uuids:\n" + " - target: i386-macos\n" + " value: 00000000-0000-0000-0000-000000000000\n" + " - target: x86_64-ios-simulator\n" + " value: 11111111-1111-1111-1111-111111111111\n" + "flags: [ installapi ]\n" + "install-name: 'Umbrella.framework/Umbrella'\n" + "current-version: 1.2.3\n" + "compatibility-version: 0\n" + "swift-abi-version: 5\n" + "parent-umbrella:\n" + " - targets: [ i386-macos, x86_64-ios-simulator ]\n" + " umbrella: System\n" + "allowable-clients:\n" + " - targets: [ i386-macos ]\n" + " clients: [ ClientA ]\n" + "exports:\n" + " - targets: [ i386-macos ]\n" + " symbols: [ _symA ]\n" + " objc-classes: [ Class1 ]\n" + " weak-symbols: [ _symC ]\n" + " - targets: [ x86_64-ios-simulator ]\n" + " symbols: [ _symB ]\n" + "...\n"; + + InterfaceFile File; + TargetList Targets = { + Target(AK_i386, PlatformKind::macOS), + Target(AK_x86_64, PlatformKind::iOSSimulator), + }; + UUIDs uuids = {{Targets[0], "00000000-0000-0000-0000-000000000000"}, + {Targets[1], "11111111-1111-1111-1111-111111111111"}}; + File.setInstallName("Umbrella.framework/Umbrella"); + File.setFileType(FileType::TBD_V4); + File.addTargets(Targets); + File.addUUID(uuids[0].first, uuids[0].second); + File.addUUID(uuids[1].first, uuids[1].second); + File.setCurrentVersion(PackedVersion(1, 2, 3)); + File.setTwoLevelNamespace(); + File.setInstallAPI(true); + File.setApplicationExtensionSafe(true); + File.setSwiftABIVersion(5); + File.addAllowableClient("ClientA", Targets[0]); + File.addParentUmbrella(Targets[0], "System"); + File.addParentUmbrella(Targets[1], "System"); + File.addSymbol(SymbolKind::GlobalSymbol, "_symA", {Targets[0]}); + File.addSymbol(SymbolKind::GlobalSymbol, "_symB", {Targets[1]}); + File.addSymbol(SymbolKind::GlobalSymbol, "_symC", {Targets[0]}, + SymbolFlags::WeakDefined); + File.addSymbol(SymbolKind::ObjectiveCClass, "Class1", {Targets[0]}); + + SmallString<4096> Buffer; + raw_svector_ostream OS(Buffer); + auto Result = TextAPIWriter::writeToStream(OS, File); + EXPECT_FALSE(Result); + EXPECT_STREQ(tbd_v4_file, Buffer.c_str()); +} + +TEST(TBDv4, MultipleTargets) { + static const char tbd_multiple_targets[] = + "--- !tapi-tbd\n" + "tbd-version: 4\n" + "targets: [ i386-maccatalyst, x86_64-tvos, arm64-ios ]\n" + "install-name: Test.dylib\n" + "...\n"; + + auto Result = + TextAPIReader::get(MemoryBufferRef(tbd_multiple_targets, "Test.tbd")); + EXPECT_TRUE(!!Result); + PlatformSet Platforms; + Platforms.insert(PlatformKind::macCatalyst); + Platforms.insert(PlatformKind::tvOS); + Platforms.insert(PlatformKind::iOS); + auto File = std::move(Result.get()); + EXPECT_EQ(FileType::TBD_V4, File->getFileType()); + EXPECT_EQ(AK_x86_64 | AK_arm64 | AK_i386, File->getArchitectures()); + EXPECT_EQ(Platforms.size(), File->getPlatforms().size()); + for (auto Platform : File->getPlatforms()) + EXPECT_EQ(Platforms.count(Platform), 1U); +} + +TEST(TBDv4, MultipleTargetsSameArch) { + static const char tbd_targets_same_arch[] = + "--- !tapi-tbd\n" + "tbd-version: 4\n" + "targets: [ x86_64-maccatalyst, x86_64-tvos ]\n" + "install-name: Test.dylib\n" + "...\n"; + + auto Result = + TextAPIReader::get(MemoryBufferRef(tbd_targets_same_arch, "Test.tbd")); + EXPECT_TRUE(!!Result); + PlatformSet Platforms; + Platforms.insert(PlatformKind::tvOS); + Platforms.insert(PlatformKind::macCatalyst); + auto File = std::move(Result.get()); + EXPECT_EQ(FileType::TBD_V4, File->getFileType()); + EXPECT_EQ(ArchitectureSet(AK_x86_64), File->getArchitectures()); + EXPECT_EQ(Platforms.size(), File->getPlatforms().size()); + for (auto Platform : File->getPlatforms()) + EXPECT_EQ(Platforms.count(Platform), 1U); +} + +TEST(TBDv4, MultipleTargetsSamePlatform) { + static const char tbd_multiple_targets_same_platform[] = + "--- !tapi-tbd\n" + "tbd-version: 4\n" + "targets: [ arm64-ios, armv7k-ios ]\n" + "install-name: Test.dylib\n" + "...\n"; + + auto Result = TextAPIReader::get( + MemoryBufferRef(tbd_multiple_targets_same_platform, "Test.tbd")); + EXPECT_TRUE(!!Result); + auto File = std::move(Result.get()); + EXPECT_EQ(FileType::TBD_V4, File->getFileType()); + EXPECT_EQ(AK_arm64 | AK_armv7k, File->getArchitectures()); + EXPECT_EQ(File->getPlatforms().size(), 1U); + EXPECT_EQ(PlatformKind::iOS, *File->getPlatforms().begin()); +} + +TEST(TBDv4, Target_maccatalyst) { + static const char tbd_target_maccatalyst[] = + "--- !tapi-tbd\n" + "tbd-version: 4\n" + "targets: [ x86_64-maccatalyst ]\n" + "install-name: Test.dylib\n" + "...\n"; + + auto Result = + TextAPIReader::get(MemoryBufferRef(tbd_target_maccatalyst, "Test.tbd")); + EXPECT_TRUE(!!Result); + auto File = std::move(Result.get()); + EXPECT_EQ(FileType::TBD_V4, File->getFileType()); + EXPECT_EQ(ArchitectureSet(AK_x86_64), File->getArchitectures()); + EXPECT_EQ(File->getPlatforms().size(), 1U); + EXPECT_EQ(PlatformKind::macCatalyst, *File->getPlatforms().begin()); +} + +TEST(TBDv4, Target_x86_ios) { + static const char tbd_target_x86_ios[] = "--- !tapi-tbd\n" + "tbd-version: 4\n" + "targets: [ x86_64-ios ]\n" + "install-name: Test.dylib\n" + "...\n"; + + auto Result = + TextAPIReader::get(MemoryBufferRef(tbd_target_x86_ios, "Test.tbd")); + EXPECT_TRUE(!!Result); + auto File = std::move(Result.get()); + EXPECT_EQ(FileType::TBD_V4, File->getFileType()); + EXPECT_EQ(ArchitectureSet(AK_x86_64), File->getArchitectures()); + EXPECT_EQ(File->getPlatforms().size(), 1U); + EXPECT_EQ(PlatformKind::iOS, *File->getPlatforms().begin()); +} + +TEST(TBDv4, Target_arm_bridgeOS) { + static const char tbd_platform_bridgeos[] = "--- !tapi-tbd\n" + "tbd-version: 4\n" + "targets: [ armv7k-bridgeos ]\n" + "install-name: Test.dylib\n" + "...\n"; + + auto Result = + TextAPIReader::get(MemoryBufferRef(tbd_platform_bridgeos, "Test.tbd")); + EXPECT_TRUE(!!Result); + auto File = std::move(Result.get()); + EXPECT_EQ(FileType::TBD_V4, File->getFileType()); + EXPECT_EQ(File->getPlatforms().size(), 1U); + EXPECT_EQ(PlatformKind::bridgeOS, *File->getPlatforms().begin()); + EXPECT_EQ(ArchitectureSet(AK_armv7k), File->getArchitectures()); +} + +TEST(TBDv4, Target_x86_macos) { + static const char tbd_x86_macos[] = "--- !tapi-tbd\n" + "tbd-version: 4\n" + "targets: [ x86_64-macos ]\n" + "install-name: Test.dylib\n" + "...\n"; + + auto Result = TextAPIReader::get(MemoryBufferRef(tbd_x86_macos, "Test.tbd")); + EXPECT_TRUE(!!Result); + auto File = std::move(Result.get()); + EXPECT_EQ(FileType::TBD_V4, File->getFileType()); + EXPECT_EQ(ArchitectureSet(AK_x86_64), File->getArchitectures()); + EXPECT_EQ(File->getPlatforms().size(), 1U); + EXPECT_EQ(PlatformKind::macOS, *File->getPlatforms().begin()); +} + +TEST(TBDv4, Target_x86_ios_simulator) { + static const char tbd_x86_ios_sim[] = "--- !tapi-tbd\n" + "tbd-version: 4\n" + "targets: [ x86_64-ios-simulator ]\n" + "install-name: Test.dylib\n" + "...\n"; + + auto Result = + TextAPIReader::get(MemoryBufferRef(tbd_x86_ios_sim, "Test.tbd")); + EXPECT_TRUE(!!Result); + auto File = std::move(Result.get()); + EXPECT_EQ(FileType::TBD_V4, File->getFileType()); + EXPECT_EQ(ArchitectureSet(AK_x86_64), File->getArchitectures()); + EXPECT_EQ(File->getPlatforms().size(), 1U); + EXPECT_EQ(PlatformKind::iOSSimulator, *File->getPlatforms().begin()); +} + +TEST(TBDv4, Target_x86_tvos_simulator) { + static const char tbd_x86_tvos_sim[] = + "--- !tapi-tbd\n" + "tbd-version: 4\n" + "targets: [ x86_64-tvos-simulator ]\n" + "install-name: Test.dylib\n" + "...\n"; + + auto Result = + TextAPIReader::get(MemoryBufferRef(tbd_x86_tvos_sim, "Test.tbd")); + EXPECT_TRUE(!!Result); + auto File = std::move(Result.get()); + EXPECT_EQ(FileType::TBD_V4, File->getFileType()); + EXPECT_EQ(ArchitectureSet(AK_x86_64), File->getArchitectures()); + EXPECT_EQ(File->getPlatforms().size(), 1U); + EXPECT_EQ(PlatformKind::tvOSSimulator, *File->getPlatforms().begin()); +} + +TEST(TBDv4, Target_i386_watchos_simulator) { + static const char tbd_i386_watchos_sim[] = + "--- !tapi-tbd\n" + "tbd-version: 4\n" + "targets: [ i386-watchos-simulator ]\n" + "install-name: Test.dylib\n" + "...\n"; + + auto Result = + TextAPIReader::get(MemoryBufferRef(tbd_i386_watchos_sim, "Test.tbd")); + EXPECT_TRUE(!!Result); + auto File = std::move(Result.get()); + EXPECT_EQ(FileType::TBD_V4, File->getFileType()); + EXPECT_EQ(ArchitectureSet(AK_i386), File->getArchitectures()); + EXPECT_EQ(File->getPlatforms().size(), 1U); + EXPECT_EQ(PlatformKind::watchOSSimulator, *File->getPlatforms().begin()); +} + +TEST(TBDv4, Swift_1) { + static const char tbd_swift_1[] = "--- !tapi-tbd\n" + "tbd-version: 4\n" + "targets: [ x86_64-macos ]\n" + "install-name: Test.dylib\n" + "swift-abi-version: 1\n" + "...\n"; + + auto Result = TextAPIReader::get(MemoryBufferRef(tbd_swift_1, "Test.tbd")); + EXPECT_TRUE(!!Result); + auto File = std::move(Result.get()); + EXPECT_EQ(FileType::TBD_V4, File->getFileType()); + EXPECT_EQ(1U, File->getSwiftABIVersion()); +} + +TEST(TBDv4, Swift_2) { + static const char tbd_v1_swift_2[] = "--- !tapi-tbd\n" + "tbd-version: 4\n" + "targets: [ x86_64-macos ]\n" + "install-name: Test.dylib\n" + "swift-abi-version: 2\n" + "...\n"; + + auto Result = TextAPIReader::get(MemoryBufferRef(tbd_v1_swift_2, "Test.tbd")); + EXPECT_TRUE(!!Result); + auto File = std::move(Result.get()); + EXPECT_EQ(FileType::TBD_V4, File->getFileType()); + EXPECT_EQ(2U, File->getSwiftABIVersion()); +} + +TEST(TBDv4, Swift_5) { + static const char tbd_swift_5[] = "--- !tapi-tbd\n" + "tbd-version: 4\n" + "targets: [ x86_64-macos ]\n" + "install-name: Test.dylib\n" + "swift-abi-version: 5\n" + "...\n"; + + auto Result = TextAPIReader::get(MemoryBufferRef(tbd_swift_5, "Test.tbd")); + EXPECT_TRUE(!!Result); + auto File = std::move(Result.get()); + EXPECT_EQ(FileType::TBD_V4, File->getFileType()); + EXPECT_EQ(5U, File->getSwiftABIVersion()); +} + +TEST(TBDv4, Swift_99) { + static const char tbd_swift_99[] = "--- !tapi-tbd\n" + "tbd-version: 4\n" + "targets: [ x86_64-macos ]\n" + "install-name: Test.dylib\n" + "swift-abi-version: 99\n" + "...\n"; + + auto Result = TextAPIReader::get(MemoryBufferRef(tbd_swift_99, "Test.tbd")); + EXPECT_TRUE(!!Result); + auto File = std::move(Result.get()); + EXPECT_EQ(FileType::TBD_V4, File->getFileType()); + EXPECT_EQ(99U, File->getSwiftABIVersion()); +} + +TEST(TBDv4, InvalidArchitecture) { + static const char tbd_file_unknown_architecture[] = + "--- !tapi-tbd\n" + "tbd-version: 4\n" + "targets: [ foo-macos ]\n" + "install-name: Test.dylib\n" + "...\n"; + + auto Result = TextAPIReader::get( + MemoryBufferRef(tbd_file_unknown_architecture, "Test.tbd")); + EXPECT_FALSE(!!Result); + auto errorMessage = toString(Result.takeError()); + EXPECT_EQ("malformed file\nTest.tbd:3:12: error: unknown " + "architecture\ntargets: [ foo-macos ]\n" + " ^~~~~~~~~~\n", + errorMessage); +} + +TEST(TBDv4, InvalidPlatform) { + static const char tbd_file_invalid_platform[] = "--- !tapi-tbd\n" + "tbd-version: 4\n" + "targets: [ x86_64-maos ]\n" + "install-name: Test.dylib\n" + "...\n"; + + auto Result = TextAPIReader::get( + MemoryBufferRef(tbd_file_invalid_platform, "Test.tbd")); + EXPECT_FALSE(!!Result); + auto errorMessage = toString(Result.takeError()); + EXPECT_EQ("malformed file\nTest.tbd:3:12: error: unknown platform\ntargets: " + "[ x86_64-maos ]\n" + " ^~~~~~~~~~~~\n", + errorMessage); +} + +TEST(TBDv4, MalformedFile1) { + static const char malformed_file1[] = "--- !tapi-tbd\n" + "tbd-version: 4\n" + "...\n"; + + auto Result = + TextAPIReader::get(MemoryBufferRef(malformed_file1, "Test.tbd")); + EXPECT_FALSE(!!Result); + auto errorMessage = toString(Result.takeError()); + ASSERT_EQ("malformed file\nTest.tbd:2:1: error: missing required key " + "'targets'\ntbd-version: 4\n^\n", + errorMessage); +} + +TEST(TBDv4, MalformedFile2) { + static const char malformed_file2[] = "--- !tapi-tbd\n" + "tbd-version: 4\n" + "targets: [ x86_64-macos ]\n" + "install-name: Test.dylib\n" + "foobar: \"unsupported key\"\n"; + + auto Result = + TextAPIReader::get(MemoryBufferRef(malformed_file2, "Test.tbd")); + EXPECT_FALSE(!!Result); + auto errorMessage = toString(Result.takeError()); + ASSERT_EQ( + "malformed file\nTest.tbd:5:9: error: unknown key 'foobar'\nfoobar: " + "\"unsupported key\"\n ^~~~~~~~~~~~~~~~~\n", + errorMessage); +} + +TEST(TBDv4, MalformedFile3) { + static const char tbd_v1_swift_1_1[] = "--- !tapi-tbd\n" + "tbd-version: 4\n" + "targets: [ x86_64-macos ]\n" + "install-name: Test.dylib\n" + "swift-abi-version: 1.1\n" + "...\n"; + + auto Result = + TextAPIReader::get(MemoryBufferRef(tbd_v1_swift_1_1, "Test.tbd")); + EXPECT_FALSE(!!Result); + auto errorMessage = toString(Result.takeError()); + EXPECT_EQ("malformed file\nTest.tbd:5:20: error: invalid Swift ABI " + "version.\nswift-abi-version: 1.1\n ^~~\n", + errorMessage); +} + +} // end namespace TBDv4 diff --git a/llvm/utils/FileCheck/FileCheck.cpp b/llvm/utils/FileCheck/FileCheck.cpp index 8718be28ac997..44d5be13751a6 100644 --- a/llvm/utils/FileCheck/FileCheck.cpp +++ b/llvm/utils/FileCheck/FileCheck.cpp @@ -48,6 +48,10 @@ static cl::opt NoCanonicalizeWhiteSpace( "strict-whitespace", cl::desc("Do not treat all horizontal whitespace as equivalent")); +static cl::opt IgnoreCase( + "ignore-case", + cl::desc("Use case-insensitive matching")); + static cl::list ImplicitCheckNot( "implicit-check-not", cl::desc("Add an implicit negative check with this pattern to every\n" @@ -555,6 +559,7 @@ int main(int argc, char **argv) { Req.VerboseVerbose = VerboseVerbose; Req.NoCanonicalizeWhiteSpace = NoCanonicalizeWhiteSpace; Req.MatchFullLines = MatchFullLines; + Req.IgnoreCase = IgnoreCase; if (VerboseVerbose) Req.Verbose = true; diff --git a/llvm/utils/TableGen/SubtargetEmitter.cpp b/llvm/utils/TableGen/SubtargetEmitter.cpp index f970572365022..9b094adb7d5ce 100644 --- a/llvm/utils/TableGen/SubtargetEmitter.cpp +++ b/llvm/utils/TableGen/SubtargetEmitter.cpp @@ -1057,6 +1057,7 @@ void SubtargetEmitter::GenSchedClassTables(const CodeGenProcModel &ProcModel, LLVM_DEBUG(dbgs() << ProcModel.ModelName << " does not have resources for class " << SC.Name << '\n'); + SCDesc.NumMicroOps = MCSchedClassDesc::InvalidNumMicroOps; } } // Sum resources across all operand writes. diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/utils/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/utils/BUILD.gn index 2deb47501ce6c..f3affc2f96560 100644 --- a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/utils/BUILD.gn +++ b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/utils/BUILD.gn @@ -7,7 +7,7 @@ static_library("utils") { "//clang/lib/ASTMatchers", "//clang/lib/Basic", "//clang/lib/Lex", - "//clang/lib/Tooling/Refactoring", + "//clang/lib/Tooling/Transformer", "//llvm/lib/Support", ] sources = [ diff --git a/llvm/utils/gn/secondary/clang-tools-extra/unittests/clang-tidy/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/unittests/clang-tidy/BUILD.gn index 20207836dd10c..29c5f5206c08d 100644 --- a/llvm/utils/gn/secondary/clang-tools-extra/unittests/clang-tidy/BUILD.gn +++ b/llvm/utils/gn/secondary/clang-tools-extra/unittests/clang-tidy/BUILD.gn @@ -18,7 +18,7 @@ unittest("ClangTidyTests") { "//clang/lib/Serialization", "//clang/lib/Tooling", "//clang/lib/Tooling/Core", - "//clang/lib/Tooling/Refactoring", + "//clang/lib/Tooling/Transformer", "//llvm/lib/Support", ] include_dirs = [ "//clang-tools-extra/clang-tidy" ] diff --git a/llvm/utils/gn/secondary/clang/lib/Tooling/Refactoring/BUILD.gn b/llvm/utils/gn/secondary/clang/lib/Tooling/Refactoring/BUILD.gn index f93debc1fae5a..29445c0a1b0e1 100644 --- a/llvm/utils/gn/secondary/clang/lib/Tooling/Refactoring/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/lib/Tooling/Refactoring/BUILD.gn @@ -19,16 +19,11 @@ static_library("Refactoring") { "AtomicChange.cpp", "Extract/Extract.cpp", "Extract/SourceExtraction.cpp", - "RangeSelector.cpp", "RefactoringActions.cpp", "Rename/RenamingAction.cpp", "Rename/SymbolOccurrences.cpp", "Rename/USRFinder.cpp", "Rename/USRFindingAction.cpp", "Rename/USRLocFinder.cpp", - "SourceCode.cpp", - "SourceCodeBuilders.cpp", - "Stencil.cpp", - "Transformer.cpp", ] } diff --git a/llvm/utils/gn/secondary/clang/lib/Tooling/Transformer/BUILD.gn b/llvm/utils/gn/secondary/clang/lib/Tooling/Transformer/BUILD.gn new file mode 100644 index 0000000000000..c11640734107b --- /dev/null +++ b/llvm/utils/gn/secondary/clang/lib/Tooling/Transformer/BUILD.gn @@ -0,0 +1,20 @@ +static_library("Transformer") { + output_name = "clangToolingTransformer" + configs += [ "//llvm/utils/gn/build:clang_code" ] + deps = [ + "//clang/lib/AST", + "//clang/lib/ASTMatchers", + "//clang/lib/Basic", + "//clang/lib/Lex", + "//clang/lib/Tooling/Core", + "//clang/lib/Tooling/Refactoring", + "//llvm/lib/Support", + ] + sources = [ + "RangeSelector.cpp", + "SourceCode.cpp", + "SourceCodeBuilders.cpp", + "Stencil.cpp", + "Transformer.cpp", + ] +} diff --git a/llvm/utils/gn/secondary/clang/tools/clang-offload-wrapper/BUILD.gn b/llvm/utils/gn/secondary/clang/tools/clang-offload-wrapper/BUILD.gn new file mode 100644 index 0000000000000..bf1b38edba191 --- /dev/null +++ b/llvm/utils/gn/secondary/clang/tools/clang-offload-wrapper/BUILD.gn @@ -0,0 +1,13 @@ +executable("clang-offload-wrapper") { + configs += [ "//llvm/utils/gn/build:clang_code" ] + deps = [ + "//clang/lib/Basic", + "//llvm/lib/Bitcode/Writer", + "//llvm/lib/IR", + "//llvm/lib/Support", + "//llvm/lib/Transforms/Utils", + ] + sources = [ + "ClangOffloadWrapper.cpp", + ] +} diff --git a/llvm/utils/gn/secondary/clang/tools/driver/BUILD.gn b/llvm/utils/gn/secondary/clang/tools/driver/BUILD.gn index 7324e047ade6b..d65682e604ae9 100644 --- a/llvm/utils/gn/secondary/clang/tools/driver/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/tools/driver/BUILD.gn @@ -59,6 +59,7 @@ executable("clang") { "//clang/lib/FrontendTool", "//clang/lib/Headers", "//clang/tools/clang-offload-bundler", + "//clang/tools/clang-offload-wrapper", "//llvm/include/llvm/Config:llvm-config", "//llvm/lib/Analysis", "//llvm/lib/CodeGen", diff --git a/llvm/utils/gn/secondary/clang/unittests/Tooling/BUILD.gn b/llvm/utils/gn/secondary/clang/unittests/Tooling/BUILD.gn index cbd629c3a5571..7bdd209c1858e 100644 --- a/llvm/utils/gn/secondary/clang/unittests/Tooling/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/unittests/Tooling/BUILD.gn @@ -14,6 +14,7 @@ unittest("ToolingTests") { "//clang/lib/Tooling", "//clang/lib/Tooling/Core", "//clang/lib/Tooling/Refactoring", + "//clang/lib/Tooling/Transformer", "//llvm/lib/Support", "//llvm/lib/Target:TargetsToBuild", "//llvm/lib/Testing/Support", diff --git a/llvm/utils/gn/secondary/llvm/lib/DebugInfo/GSYM/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/DebugInfo/GSYM/BUILD.gn index ceaf5e509d56c..a9d8c32f08288 100644 --- a/llvm/utils/gn/secondary/llvm/lib/DebugInfo/GSYM/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/DebugInfo/GSYM/BUILD.gn @@ -1,11 +1,14 @@ static_library("GSYM") { output_name = "LLVMDebugInfoGSYM" deps = [ + "//llvm/lib/MC", "//llvm/lib/Support", ] sources = [ "FileWriter.cpp", "FunctionInfo.cpp", + "GsymCreator.cpp", + "GsymReader.cpp", "Header.cpp", "InlineInfo.cpp", "LineTable.cpp", diff --git a/llvm/utils/gn/secondary/llvm/lib/ExecutionEngine/JITLink/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/ExecutionEngine/JITLink/BUILD.gn index 491707a52d2bf..8743395755e09 100644 --- a/llvm/utils/gn/secondary/llvm/lib/ExecutionEngine/JITLink/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/ExecutionEngine/JITLink/BUILD.gn @@ -12,6 +12,7 @@ static_library("JITLink") { "JITLinkMemoryManager.cpp", "MachO.cpp", "MachOLinkGraphBuilder.cpp", + "MachO_arm64.cpp", "MachO_x86_64.cpp", ] } diff --git a/llvm/utils/gn/secondary/llvm/unittests/CodeGen/GlobalISel/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/CodeGen/GlobalISel/BUILD.gn index 7655b56ce2615..6b2221349778f 100644 --- a/llvm/utils/gn/secondary/llvm/unittests/CodeGen/GlobalISel/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/unittests/CodeGen/GlobalISel/BUILD.gn @@ -13,6 +13,7 @@ unittest("GlobalISelTests") { ] sources = [ "CSETest.cpp", + "ConstantFoldingTest.cpp", "GISelMITest.cpp", "KnownBitsTest.cpp", "LegalizerHelperTest.cpp", diff --git a/llvm/utils/gn/secondary/llvm/unittests/TextAPI/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/TextAPI/BUILD.gn index 8405795685bd2..8e51943b7e263 100644 --- a/llvm/utils/gn/secondary/llvm/unittests/TextAPI/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/unittests/TextAPI/BUILD.gn @@ -10,5 +10,6 @@ unittest("TextAPITests") { "TextStubV1Tests.cpp", "TextStubV2Tests.cpp", "TextStubV3Tests.cpp", + "TextStubV4Tests.cpp", ] } diff --git a/llvm/utils/gn/secondary/llvm/utils/TableGen/tablegen.gni b/llvm/utils/gn/secondary/llvm/utils/TableGen/tablegen.gni index cb588abbaa6a5..91863c02c2237 100644 --- a/llvm/utils/gn/secondary/llvm/utils/TableGen/tablegen.gni +++ b/llvm/utils/gn/secondary/llvm/utils/TableGen/tablegen.gni @@ -66,6 +66,8 @@ template("tablegen") { args = [ rebase_path(tblgen_executable, root_build_dir), + "--write-if-changed", + "-I", rebase_path("//llvm/include", root_build_dir), diff --git a/llvm/utils/lit/lit/cl_arguments.py b/llvm/utils/lit/lit/cl_arguments.py new file mode 100644 index 0000000000000..29a4bfd522e60 --- /dev/null +++ b/llvm/utils/lit/lit/cl_arguments.py @@ -0,0 +1,214 @@ +import argparse +import os +import shlex +import sys + +import lit.util + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument('test_paths', + nargs='+', + help='Files or paths to include in the test suite') + + parser.add_argument("--version", + dest="show_version", + help="Show version and exit", + action="store_true", + default=False) + parser.add_argument("-j", "--threads", "--workers", + dest="numWorkers", + metavar="N", + help="Number of workers used for testing", + type=_positive_int, + default=lit.util.detectCPUs()) + parser.add_argument("--config-prefix", + dest="configPrefix", + metavar="NAME", + help="Prefix for 'lit' config files", + default=None) + parser.add_argument("-D", "--param", + dest="userParameters", + metavar="NAME=VAL", + help="Add 'NAME' = 'VAL' to the user defined parameters", + type=str, + action="append", + default=[]) + + format_group = parser.add_argument_group("Output Format") + # FIXME: I find these names very confusing, although I like the + # functionality. + format_group.add_argument("-q", "--quiet", + help="Suppress no error output", + action="store_true", + default=False) + format_group.add_argument("-s", "--succinct", + help="Reduce amount of output", + action="store_true", + default=False) + format_group.add_argument("-v", "--verbose", + dest="showOutput", + help="Show test output for failures", + action="store_true", + default=False) + format_group.add_argument("-vv", "--echo-all-commands", + dest="echoAllCommands", + action="store_true", + default=False, + help="Echo all commands as they are executed to stdout. In case of " + "failure, last command shown will be the failing one.") + format_group.add_argument("-a", "--show-all", + dest="showAllOutput", + help="Display all commandlines and output", + action="store_true", + default=False) + format_group.add_argument("-o", "--output", + dest="output_path", + help="Write test results to the provided path", + metavar="PATH") + format_group.add_argument("--no-progress-bar", + dest="useProgressBar", + help="Do not use curses based progress bar", + action="store_false", + default=True) + format_group.add_argument("--show-unsupported", + help="Show unsupported tests", + action="store_true", + default=False) + format_group.add_argument("--show-xfail", + help="Show tests that were expected to fail", + action="store_true", + default=False) + + execution_group = parser.add_argument_group("Test Execution") + execution_group.add_argument("--path", + help="Additional paths to add to testing environment", + action="append", + type=str, + default=[]) + execution_group.add_argument("--vg", + dest="useValgrind", + help="Run tests under valgrind", + action="store_true", + default=False) + execution_group.add_argument("--vg-leak", + dest="valgrindLeakCheck", + help="Check for memory leaks under valgrind", + action="store_true", + default=False) + execution_group.add_argument("--vg-arg", + dest="valgrindArgs", + metavar="ARG", + help="Specify an extra argument for valgrind", + type=str, + action="append", + default=[]) + execution_group.add_argument("--time-tests", + dest="timeTests", + help="Track elapsed wall time for each test", + action="store_true", + default=False) + execution_group.add_argument("--no-execute", + dest="noExecute", + help="Don't execute any tests (assume PASS)", + action="store_true", + default=False) + execution_group.add_argument("--xunit-xml-output", + dest="xunit_output_file", + help="Write XUnit-compatible XML test reports to the specified file", + default=None) + execution_group.add_argument("--timeout", + dest="maxIndividualTestTime", + help="Maximum time to spend running a single test (in seconds). " + "0 means no time limit. [Default: 0]", + type=int, + default=None) + execution_group.add_argument("--max-failures", + dest="maxFailures", + help="Stop execution after the given number of failures.", + type=_positive_int, + default=None) + + selection_group = parser.add_argument_group("Test Selection") + selection_group.add_argument("--max-tests", + dest="maxTests", + metavar="N", + help="Maximum number of tests to run", + type=int, + default=None) + selection_group.add_argument("--max-time", + dest="maxTime", + metavar="N", + help="Maximum time to spend testing (in seconds)", + type=float, + default=None) + selection_group.add_argument("--shuffle", + help="Run tests in random order", + action="store_true", + default=False) + selection_group.add_argument("-i", "--incremental", + help="Run modified and failing tests first (updates mtimes)", + action="store_true", + default=False) + selection_group.add_argument("--filter", + metavar="REGEX", + help="Only run tests with paths matching the given regular expression", + default=os.environ.get("LIT_FILTER")) + selection_group.add_argument("--num-shards", + dest="numShards", + metavar="M", + help="Split testsuite into M pieces and only run one", + type=_positive_int, + default=os.environ.get("LIT_NUM_SHARDS")) + selection_group.add_argument("--run-shard", + dest="runShard", + metavar="N", + help="Run shard #N of the testsuite", + type=_positive_int, + default=os.environ.get("LIT_RUN_SHARD")) + + debug_group = parser.add_argument_group("Debug and Experimental Options") + debug_group.add_argument("--debug", + help="Enable debugging (for 'lit' development)", + action="store_true", + default=False) + debug_group.add_argument("--show-suites", + dest="showSuites", + help="Show discovered test suites", + action="store_true", + default=False) + debug_group.add_argument("--show-tests", + dest="showTests", + help="Show all discovered tests", + action="store_true", + default=False) + + # LIT is special: environment variables override command line arguments. + env_args = shlex.split(os.environ.get("LIT_OPTS", "")) + args = sys.argv[1:] + env_args + opts = parser.parse_args(args) + + # Validate command line options + if opts.echoAllCommands: + opts.showOutput = True + + if opts.numShards or opts.runShard: + if not opts.numShards or not opts.runShard: + parser.error("--num-shards and --run-shard must be used together") + if opts.runShard > opts.numShards: + parser.error("--run-shard must be between 1 and --num-shards (inclusive)") + + return opts + +def _positive_int(arg): + try: + n = int(arg) + except ValueError: + raise _arg_error('positive integer', arg) + if n <= 0: + raise _arg_error('positive integer', arg) + return n + +def _arg_error(desc, arg): + msg = "requires %s, but found '%s'" % (desc, arg) + return argparse.ArgumentTypeError(msg) diff --git a/llvm/utils/lit/lit/main.py b/llvm/utils/lit/lit/main.py index 53e3a88b7b33c..52a8400bdcfe3 100755 --- a/llvm/utils/lit/lit/main.py +++ b/llvm/utils/lit/lit/main.py @@ -11,14 +11,13 @@ import platform import random import re -import shlex import sys import time -import argparse import tempfile import shutil from xml.sax.saxutils import quoteattr +import lit.cl_arguments import lit.discovery import lit.display import lit.LitConfig @@ -26,79 +25,6 @@ import lit.Test import lit.util -def write_test_results(run, lit_config, testing_time, output_path): - try: - import json - except ImportError: - lit_config.fatal('test output unsupported with Python 2.5') - - # Construct the data we will write. - data = {} - # Encode the current lit version as a schema version. - data['__version__'] = lit.__versioninfo__ - data['elapsed'] = testing_time - # FIXME: Record some information on the lit configuration used? - # FIXME: Record information from the individual test suites? - - # Encode the tests. - data['tests'] = tests_data = [] - for test in run.tests: - test_data = { - 'name' : test.getFullName(), - 'code' : test.result.code.name, - 'output' : test.result.output, - 'elapsed' : test.result.elapsed } - - # Add test metrics, if present. - if test.result.metrics: - test_data['metrics'] = metrics_data = {} - for key, value in test.result.metrics.items(): - metrics_data[key] = value.todata() - - # Report micro-tests separately, if present - if test.result.microResults: - for key, micro_test in test.result.microResults.items(): - # Expand parent test name with micro test name - parent_name = test.getFullName() - micro_full_name = parent_name + ':' + key - - micro_test_data = { - 'name' : micro_full_name, - 'code' : micro_test.code.name, - 'output' : micro_test.output, - 'elapsed' : micro_test.elapsed } - if micro_test.metrics: - micro_test_data['metrics'] = micro_metrics_data = {} - for key, value in micro_test.metrics.items(): - micro_metrics_data[key] = value.todata() - - tests_data.append(micro_test_data) - - tests_data.append(test_data) - - # Write the output. - f = open(output_path, 'w') - try: - json.dump(data, f, indent=2, sort_keys=True) - f.write('\n') - finally: - f.close() - -def update_incremental_cache(test): - if not test.result.code.isFailure: - return - fname = test.getFilePath() - os.utime(fname, None) - -def sort_by_incremental_cache(run): - def sortIndex(test): - fname = test.getFilePath() - try: - return -os.path.getmtime(fname) - except: - return 0 - run.tests.sort(key = lambda t: sortIndex(t)) - def main(builtinParameters = {}): # Create a temp directory inside the normal temp directory so that we can # try to avoid temporary test file leaks. The user can avoid this behavior @@ -129,160 +55,13 @@ def main(builtinParameters = {}): pass def main_with_tmp(builtinParameters): - parser = argparse.ArgumentParser() - parser.add_argument('test_paths', - nargs='*', - help='Files or paths to include in the test suite') - - parser.add_argument("--version", dest="show_version", - help="Show version and exit", - action="store_true", default=False) - parser.add_argument("-j", "--threads", "--workers", dest="numWorkers", metavar="N", - help="Number of workers used for testing", - type=int, default=None) - parser.add_argument("--config-prefix", dest="configPrefix", - metavar="NAME", help="Prefix for 'lit' config files", - action="store", default=None) - parser.add_argument("-D", "--param", dest="userParameters", - metavar="NAME=VAL", - help="Add 'NAME' = 'VAL' to the user defined parameters", - type=str, action="append", default=[]) - - format_group = parser.add_argument_group("Output Format") - # FIXME: I find these names very confusing, although I like the - # functionality. - format_group.add_argument("-q", "--quiet", - help="Suppress no error output", - action="store_true", default=False) - format_group.add_argument("-s", "--succinct", - help="Reduce amount of output", - action="store_true", default=False) - format_group.add_argument("-v", "--verbose", dest="showOutput", - help="Show test output for failures", - action="store_true", default=False) - format_group.add_argument("-vv", "--echo-all-commands", - dest="echoAllCommands", - action="store_true", default=False, - help="Echo all commands as they are executed to stdout.\ - In case of failure, last command shown will be the\ - failing one.") - format_group.add_argument("-a", "--show-all", dest="showAllOutput", - help="Display all commandlines and output", - action="store_true", default=False) - format_group.add_argument("-o", "--output", dest="output_path", - help="Write test results to the provided path", - action="store", metavar="PATH") - format_group.add_argument("--no-progress-bar", dest="useProgressBar", - help="Do not use curses based progress bar", - action="store_false", default=True) - format_group.add_argument("--show-unsupported", - help="Show unsupported tests", - action="store_true", default=False) - format_group.add_argument("--show-xfail", - help="Show tests that were expected to fail", - action="store_true", default=False) - - execution_group = parser.add_argument_group("Test Execution") - execution_group.add_argument("--path", - help="Additional paths to add to testing environment", - action="append", type=str, default=[]) - execution_group.add_argument("--vg", dest="useValgrind", - help="Run tests under valgrind", - action="store_true", default=False) - execution_group.add_argument("--vg-leak", dest="valgrindLeakCheck", - help="Check for memory leaks under valgrind", - action="store_true", default=False) - execution_group.add_argument("--vg-arg", dest="valgrindArgs", metavar="ARG", - help="Specify an extra argument for valgrind", - type=str, action="append", default=[]) - execution_group.add_argument("--time-tests", dest="timeTests", - help="Track elapsed wall time for each test", - action="store_true", default=False) - execution_group.add_argument("--no-execute", dest="noExecute", - help="Don't execute any tests (assume PASS)", - action="store_true", default=False) - execution_group.add_argument("--xunit-xml-output", dest="xunit_output_file", - help=("Write XUnit-compatible XML test reports to the" - " specified file"), default=None) - execution_group.add_argument("--timeout", dest="maxIndividualTestTime", - help="Maximum time to spend running a single test (in seconds)." - "0 means no time limit. [Default: 0]", - type=int, default=None) - execution_group.add_argument("--max-failures", dest="maxFailures", - help="Stop execution after the given number of failures.", - action="store", type=int, default=None) - - selection_group = parser.add_argument_group("Test Selection") - selection_group.add_argument("--max-tests", dest="maxTests", metavar="N", - help="Maximum number of tests to run", - action="store", type=int, default=None) - selection_group.add_argument("--max-time", dest="maxTime", metavar="N", - help="Maximum time to spend testing (in seconds)", - action="store", type=float, default=None) - selection_group.add_argument("--shuffle", - help="Run tests in random order", - action="store_true", default=False) - selection_group.add_argument("-i", "--incremental", - help="Run modified and failing tests first (updates " - "mtimes)", - action="store_true", default=False) - selection_group.add_argument("--filter", metavar="REGEX", - help=("Only run tests with paths matching the given " - "regular expression"), - action="store", - default=os.environ.get("LIT_FILTER")) - selection_group.add_argument("--num-shards", dest="numShards", metavar="M", - help="Split testsuite into M pieces and only run one", - action="store", type=int, - default=os.environ.get("LIT_NUM_SHARDS")) - selection_group.add_argument("--run-shard", dest="runShard", metavar="N", - help="Run shard #N of the testsuite", - action="store", type=int, - default=os.environ.get("LIT_RUN_SHARD")) - - debug_group = parser.add_argument_group("Debug and Experimental Options") - debug_group.add_argument("--debug", - help="Enable debugging (for 'lit' development)", - action="store_true", default=False) - debug_group.add_argument("--show-suites", dest="showSuites", - help="Show discovered test suites", - action="store_true", default=False) - debug_group.add_argument("--show-tests", dest="showTests", - help="Show all discovered tests", - action="store_true", default=False) - - opts = parser.parse_args(sys.argv[1:] + - shlex.split(os.environ.get("LIT_OPTS", ""))) - args = opts.test_paths + opts = lit.cl_arguments.parse_args() if opts.show_version: print("lit %s" % (lit.__version__,)) return - if not args: - parser.error('No inputs specified') - - if opts.numWorkers is None: - opts.numWorkers = lit.util.detectCPUs() - elif opts.numWorkers <= 0: - parser.error("Option '--workers' or '-j' requires positive integer") - - if opts.maxFailures is not None and opts.maxFailures <= 0: - parser.error("Option '--max-failures' requires positive integer") - - if opts.echoAllCommands: - opts.showOutput = True - - inputs = args - - # Create the user defined parameters. - userParams = dict(builtinParameters) - for entry in opts.userParameters: - if '=' not in entry: - name,val = entry,'' - else: - name,val = entry.split('=', 1) - userParams[name] = val + userParams = create_user_parameters(builtinParameters, opts) # Decide what the requested maximum indvidual test time should be if opts.maxIndividualTestTime is not None: @@ -313,7 +92,7 @@ def main_with_tmp(builtinParameters): # Perform test discovery. run = lit.run.Run(litConfig, - lit.discovery.find_tests_for_inputs(litConfig, inputs)) + lit.discovery.find_tests_for_inputs(litConfig, opts.test_paths)) # After test discovery the configuration might have changed # the maxIndividualTestTime. If we explicitly set this on the @@ -329,66 +108,19 @@ def main_with_tmp(builtinParameters): litConfig.maxIndividualTestTime = opts.maxIndividualTestTime if opts.showSuites or opts.showTests: - # Aggregate the tests by suite. - suitesAndTests = {} - for result_test in run.tests: - if result_test.suite not in suitesAndTests: - suitesAndTests[result_test.suite] = [] - suitesAndTests[result_test.suite].append(result_test) - suitesAndTests = list(suitesAndTests.items()) - suitesAndTests.sort(key = lambda item: item[0].name) - - # Show the suites, if requested. - if opts.showSuites: - print('-- Test Suites --') - for ts,ts_tests in suitesAndTests: - print(' %s - %d tests' %(ts.name, len(ts_tests))) - print(' Source Root: %s' % ts.source_root) - print(' Exec Root : %s' % ts.exec_root) - if ts.config.available_features: - print(' Available Features : %s' % ' '.join( - sorted(ts.config.available_features))) - - # Show the tests, if requested. - if opts.showTests: - print('-- Available Tests --') - for ts,ts_tests in suitesAndTests: - ts_tests.sort(key = lambda test: test.path_in_suite) - for test in ts_tests: - print(' %s' % (test.getFullName(),)) - - # Exit. - sys.exit(0) + print_suites_or_tests(run, opts) + return # Select and order the tests. numTotalTests = len(run.tests) - # First, select based on the filter expression if given. if opts.filter: - try: - rex = re.compile(opts.filter) - except: - parser.error("invalid regular expression for --filter: %r" % ( - opts.filter)) - run.tests = [result_test for result_test in run.tests - if rex.search(result_test.getFullName())] - - # Then select the order. - if opts.shuffle: - random.shuffle(run.tests) - elif opts.incremental: - sort_by_incremental_cache(run) - else: - run.tests.sort(key = lambda t: (not t.isEarlyTest(), t.getFullName())) + filter_tests(run, opts) + + order_tests(run, opts) # Then optionally restrict our attention to a shard of the tests. if (opts.numShards is not None) or (opts.runShard is not None): - if (opts.numShards is None) or (opts.runShard is None): - parser.error("--num-shards and --run-shard must be used together") - if opts.numShards <= 0: - parser.error("--num-shards must be positive") - if (opts.runShard < 1) or (opts.runShard > opts.numShards): - parser.error("--run-shard must be between 1 and --num-shards (inclusive)") num_tests = len(run.tests) # Note: user views tests and shard numbers counting from 1. test_ixs = range(opts.runShard - 1, num_tests, opts.numShards) @@ -411,27 +143,7 @@ def main_with_tmp(builtinParameters): # Don't create more workers than tests. opts.numWorkers = min(len(run.tests), opts.numWorkers) - # Because some tests use threads internally, and at least on Linux each - # of these threads counts toward the current process limit, try to - # raise the (soft) process limit so that tests don't fail due to - # resource exhaustion. - try: - cpus = lit.util.detectCPUs() - desired_limit = opts.numWorkers * cpus * 2 # the 2 is a safety factor - - # Import the resource module here inside this try block because it - # will likely fail on Windows. - import resource - - max_procs_soft, max_procs_hard = resource.getrlimit(resource.RLIMIT_NPROC) - desired_limit = min(desired_limit, max_procs_hard) - - if max_procs_soft < desired_limit: - resource.setrlimit(resource.RLIMIT_NPROC, (desired_limit, max_procs_hard)) - litConfig.note('raised the process limit from %d to %d' % \ - (max_procs_soft, desired_limit)) - except: - pass + increase_process_limit(litConfig, opts) display = lit.display.create_display(opts, len(run.tests), numTotalTests, opts.numWorkers) @@ -507,41 +219,7 @@ def progress_callback(test): print(' %s: %d' % (name,N)) if opts.xunit_output_file: - # Collect the tests, indexed by test suite - by_suite = {} - for result_test in run.tests: - suite = result_test.suite.config.name - if suite not in by_suite: - by_suite[suite] = { - 'passes' : 0, - 'failures' : 0, - 'skipped': 0, - 'tests' : [] } - by_suite[suite]['tests'].append(result_test) - if result_test.result.code.isFailure: - by_suite[suite]['failures'] += 1 - elif result_test.result.code == lit.Test.UNSUPPORTED: - by_suite[suite]['skipped'] += 1 - else: - by_suite[suite]['passes'] += 1 - xunit_output_file = open(opts.xunit_output_file, "w") - xunit_output_file.write("\n") - xunit_output_file.write("\n") - for suite_name, suite in by_suite.items(): - safe_suite_name = quoteattr(suite_name.replace(".", "-")) - xunit_output_file.write("\n") - - for result_test in suite['tests']: - result_test.writeJUnitXML(xunit_output_file) - xunit_output_file.write("\n") - xunit_output_file.write("\n") - xunit_output_file.write("") - xunit_output_file.close() + write_test_results_xunit(run, opts) # If we encountered any additional errors, exit abnormally. if litConfig.numErrors: @@ -556,5 +234,196 @@ def progress_callback(test): sys.exit(1) sys.exit(0) + +def create_user_parameters(builtinParameters, opts): + userParams = dict(builtinParameters) + for entry in opts.userParameters: + if '=' not in entry: + name,val = entry,'' + else: + name,val = entry.split('=', 1) + userParams[name] = val + return userParams + +def print_suites_or_tests(run, opts): + # Aggregate the tests by suite. + suitesAndTests = {} + for result_test in run.tests: + if result_test.suite not in suitesAndTests: + suitesAndTests[result_test.suite] = [] + suitesAndTests[result_test.suite].append(result_test) + suitesAndTests = list(suitesAndTests.items()) + suitesAndTests.sort(key = lambda item: item[0].name) + + # Show the suites, if requested. + if opts.showSuites: + print('-- Test Suites --') + for ts,ts_tests in suitesAndTests: + print(' %s - %d tests' %(ts.name, len(ts_tests))) + print(' Source Root: %s' % ts.source_root) + print(' Exec Root : %s' % ts.exec_root) + if ts.config.available_features: + print(' Available Features : %s' % ' '.join( + sorted(ts.config.available_features))) + + # Show the tests, if requested. + if opts.showTests: + print('-- Available Tests --') + for ts,ts_tests in suitesAndTests: + ts_tests.sort(key = lambda test: test.path_in_suite) + for test in ts_tests: + print(' %s' % (test.getFullName(),)) + + # Exit. + sys.exit(0) + +def filter_tests(run, opts): + try: + rex = re.compile(opts.filter) + except: + parser.error("invalid regular expression for --filter: %r" % ( + opts.filter)) + run.tests = [result_test for result_test in run.tests + if rex.search(result_test.getFullName())] + +def order_tests(run, opts): + if opts.shuffle: + random.shuffle(run.tests) + elif opts.incremental: + run.tests.sort(key = by_mtime, reverse = True) + else: + run.tests.sort(key = lambda t: (not t.isEarlyTest(), t.getFullName())) + +def by_mtime(test): + fname = test.getFilePath() + try: + return os.path.getmtime(fname) + except: + return 0 + +def update_incremental_cache(test): + if not test.result.code.isFailure: + return + fname = test.getFilePath() + os.utime(fname, None) + +def increase_process_limit(litConfig, opts): + # Because some tests use threads internally, and at least on Linux each + # of these threads counts toward the current process limit, try to + # raise the (soft) process limit so that tests don't fail due to + # resource exhaustion. + try: + cpus = lit.util.detectCPUs() + desired_limit = opts.numWorkers * cpus * 2 # the 2 is a safety factor + + # Import the resource module here inside this try block because it + # will likely fail on Windows. + import resource + + max_procs_soft, max_procs_hard = resource.getrlimit(resource.RLIMIT_NPROC) + desired_limit = min(desired_limit, max_procs_hard) + + if max_procs_soft < desired_limit: + resource.setrlimit(resource.RLIMIT_NPROC, (desired_limit, max_procs_hard)) + litConfig.note('raised the process limit from %d to %d' % \ + (max_procs_soft, desired_limit)) + except: + pass + +def write_test_results(run, lit_config, testing_time, output_path): + try: + import json + except ImportError: + lit_config.fatal('test output unsupported with Python 2.5') + + # Construct the data we will write. + data = {} + # Encode the current lit version as a schema version. + data['__version__'] = lit.__versioninfo__ + data['elapsed'] = testing_time + # FIXME: Record some information on the lit configuration used? + # FIXME: Record information from the individual test suites? + + # Encode the tests. + data['tests'] = tests_data = [] + for test in run.tests: + test_data = { + 'name' : test.getFullName(), + 'code' : test.result.code.name, + 'output' : test.result.output, + 'elapsed' : test.result.elapsed } + + # Add test metrics, if present. + if test.result.metrics: + test_data['metrics'] = metrics_data = {} + for key, value in test.result.metrics.items(): + metrics_data[key] = value.todata() + + # Report micro-tests separately, if present + if test.result.microResults: + for key, micro_test in test.result.microResults.items(): + # Expand parent test name with micro test name + parent_name = test.getFullName() + micro_full_name = parent_name + ':' + key + + micro_test_data = { + 'name' : micro_full_name, + 'code' : micro_test.code.name, + 'output' : micro_test.output, + 'elapsed' : micro_test.elapsed } + if micro_test.metrics: + micro_test_data['metrics'] = micro_metrics_data = {} + for key, value in micro_test.metrics.items(): + micro_metrics_data[key] = value.todata() + + tests_data.append(micro_test_data) + + tests_data.append(test_data) + + # Write the output. + f = open(output_path, 'w') + try: + json.dump(data, f, indent=2, sort_keys=True) + f.write('\n') + finally: + f.close() + +def write_test_results_xunit(run, opts): + # Collect the tests, indexed by test suite + by_suite = {} + for result_test in run.tests: + suite = result_test.suite.config.name + if suite not in by_suite: + by_suite[suite] = { + 'passes' : 0, + 'failures' : 0, + 'skipped': 0, + 'tests' : [] } + by_suite[suite]['tests'].append(result_test) + if result_test.result.code.isFailure: + by_suite[suite]['failures'] += 1 + elif result_test.result.code == lit.Test.UNSUPPORTED: + by_suite[suite]['skipped'] += 1 + else: + by_suite[suite]['passes'] += 1 + xunit_output_file = open(opts.xunit_output_file, "w") + xunit_output_file.write("\n") + xunit_output_file.write("\n") + for suite_name, suite in by_suite.items(): + safe_suite_name = quoteattr(suite_name.replace(".", "-")) + xunit_output_file.write("\n") + + for result_test in suite['tests']: + result_test.writeJUnitXML(xunit_output_file) + xunit_output_file.write("\n") + xunit_output_file.write("\n") + xunit_output_file.write("") + xunit_output_file.close() + if __name__=='__main__': main() diff --git a/llvm/utils/lit/tests/max-failures.py b/llvm/utils/lit/tests/max-failures.py index f37f73d7add88..cee06fa255dc1 100644 --- a/llvm/utils/lit/tests/max-failures.py +++ b/llvm/utils/lit/tests/max-failures.py @@ -11,4 +11,4 @@ # CHECK: Failing Tests (27) # CHECK: Failing Tests (1) # CHECK: Failing Tests (2) -# CHECK: error: Option '--max-failures' requires positive integer +# CHECK: error: argument --max-failures: requires positive integer, but found '0' diff --git a/llvm/utils/lit/tests/selecting.py b/llvm/utils/lit/tests/selecting.py index 25ac299d865db..0d6fa938e4a1e 100644 --- a/llvm/utils/lit/tests/selecting.py +++ b/llvm/utils/lit/tests/selecting.py @@ -87,7 +87,7 @@ # # RUN: not %{lit} --num-shards 0 --run-shard 2 %{inputs}/discovery >%t.out 2>%t.err # RUN: FileCheck --check-prefix=CHECK-SHARD-ERR < %t.err %s -# CHECK-SHARD-ERR: error: --num-shards must be positive +# CHECK-SHARD-ERR: error: argument --num-shards: requires positive integer, but found '0' # # RUN: not %{lit} --num-shards 3 --run-shard 4 %{inputs}/discovery >%t.out 2>%t.err # RUN: FileCheck --check-prefix=CHECK-SHARD-ERR2 < %t.err %s diff --git a/llvm/utils/update_cc_test_checks.py b/llvm/utils/update_cc_test_checks.py index ea500ebbb61b4..ee8f641c3cab2 100755 --- a/llvm/utils/update_cc_test_checks.py +++ b/llvm/utils/update_cc_test_checks.py @@ -94,6 +94,8 @@ def config(): help='Space-separated extra args to clang, e.g. --clang-args=-v') parser.add_argument('--c-index-test', help='"c-index-test" executable, defaults to $llvm_bin/c-index-test') + parser.add_argument('--opt', + help='"opt" executable, defaults to $llvm_bin/opt') parser.add_argument( '--functions', nargs='+', help='A list of function name regexes. ' 'If specified, update CHECK lines for functions matching at least one regex') @@ -114,6 +116,18 @@ def config(): if not distutils.spawn.find_executable(args.clang): print('Please specify --llvm-bin or --clang', file=sys.stderr) sys.exit(1) + + if args.opt is None: + if args.llvm_bin is None: + args.opt = 'opt' + else: + args.opt = os.path.join(args.llvm_bin, 'opt') + if not distutils.spawn.find_executable(args.opt): + # Many uses of this tool will not need an opt binary, because it's only + # needed for updating a test that runs clang | opt | FileCheck. So we + # defer this error message until we find that opt is actually needed. + args.opt = None + if args.c_index_test is None: if args.llvm_bin is None: args.c_index_test = 'c-index-test' @@ -126,10 +140,23 @@ def config(): return args -def get_function_body(args, filename, clang_args, prefixes, triple_in_cmd, func_dict): +def get_function_body(args, filename, clang_args, extra_commands, prefixes, triple_in_cmd, func_dict): # TODO Clean up duplication of asm/common build_function_body_dictionary # Invoke external tool and extract function bodies. raw_tool_output = common.invoke_tool(args.clang, clang_args, filename) + for extra_command in extra_commands: + extra_args = shlex.split(extra_command) + with tempfile.NamedTemporaryFile() as f: + f.write(raw_tool_output.encode()) + f.flush() + if extra_args[0] == 'opt': + if args.opt is None: + print(filename, 'needs to run opt. ' + 'Please specify --llvm-bin or --opt', file=sys.stderr) + sys.exit(1) + extra_args[0] = args.opt + raw_tool_output = common.invoke_tool(extra_args[0], + extra_args[1:], f.name) if '-emit-llvm' in clang_args: common.build_function_body_dictionary( common.OPT_FUNCTION_RE, common.scrub_body, [], @@ -178,7 +205,7 @@ def main(): run_list = [] line2spell_and_mangled_list = collections.defaultdict(list) for l in run_lines: - commands = [cmd.strip() for cmd in l.split('|', 1)] + commands = [cmd.strip() for cmd in l.split('|')] triple_in_cmd = None m = common.TRIPLE_ARG_RE.search(commands[0]) @@ -193,6 +220,11 @@ def main(): clang_args[0:1] = SUBST[clang_args[0]] clang_args = [filename if i == '%s' else i for i in clang_args] + args.clang_args + # Permit piping the output through opt + if not (len(commands) == 2 or + (len(commands) == 3 and commands[1].startswith('opt'))): + print('WARNING: Skipping non-clang RUN line: ' + l, file=sys.stderr) + # Extract -check-prefix in FileCheck args filecheck_cmd = commands[-1] common.verify_filecheck_prefixes(filecheck_cmd) @@ -203,7 +235,7 @@ def main(): for item in m.group(1).split(',')] if not check_prefixes: check_prefixes = ['CHECK'] - run_list.append((check_prefixes, clang_args, triple_in_cmd)) + run_list.append((check_prefixes, clang_args, commands[1:-1], triple_in_cmd)) # Strip CHECK lines which are in `prefix_set`, update test file. prefix_set = set([prefix for p in run_list for prefix in p[0]]) @@ -223,12 +255,12 @@ def main(): prefixes = p[0] for prefix in prefixes: func_dict.update({prefix: dict()}) - for prefixes, clang_args, triple_in_cmd in run_list: + for prefixes, clang_args, extra_commands, triple_in_cmd in run_list: if args.verbose: print('Extracted clang cmd: clang {}'.format(clang_args), file=sys.stderr) print('Extracted FileCheck prefixes: {}'.format(prefixes), file=sys.stderr) - get_function_body(args, filename, clang_args, prefixes, triple_in_cmd, func_dict) + get_function_body(args, filename, clang_args, extra_commands, prefixes, triple_in_cmd, func_dict) # Invoke c-index-test to get mapping from start lines to mangled names. # Forward all clang args for now. @@ -254,7 +286,7 @@ def main(): if added: output_lines.append('//') added.add(mangled) - common.add_ir_checks(output_lines, '//', run_list, func_dict, mangled) + common.add_ir_checks(output_lines, '//', run_list, func_dict, mangled, False) output_lines.append(line.rstrip('\n')) # Update the test file. diff --git a/polly/lib/Analysis/ScopDetectionDiagnostic.cpp b/polly/lib/Analysis/ScopDetectionDiagnostic.cpp index e58b47d3645c4..1c116ec3d1a83 100644 --- a/polly/lib/Analysis/ScopDetectionDiagnostic.cpp +++ b/polly/lib/Analysis/ScopDetectionDiagnostic.cpp @@ -45,11 +45,7 @@ using namespace llvm; #define DEBUG_TYPE "polly-detect" #define SCOP_STAT(NAME, DESC) \ - { \ - "polly-detect", "NAME", "Number of rejected regions: " DESC, {0}, { \ - false \ - } \ - } + { "polly-detect", "NAME", "Number of rejected regions: " DESC } Statistic RejectStatistics[] = { SCOP_STAT(CFG, ""), diff --git a/polly/lib/Transform/ScheduleOptimizer.cpp b/polly/lib/Transform/ScheduleOptimizer.cpp index c64f836b4c022..d2d8ca37472eb 100644 --- a/polly/lib/Transform/ScheduleOptimizer.cpp +++ b/polly/lib/Transform/ScheduleOptimizer.cpp @@ -276,9 +276,9 @@ STATISTIC(NumBoxedLoopsOptimized, "Number of boxed loops optimized"); #define THREE_STATISTICS(VARNAME, DESC) \ static Statistic VARNAME[3] = { \ - {DEBUG_TYPE, #VARNAME "0", DESC " (original)", {0}, {false}}, \ - {DEBUG_TYPE, #VARNAME "1", DESC " (after scheduler)", {0}, {false}}, \ - {DEBUG_TYPE, #VARNAME "2", DESC " (after optimizer)", {0}, {false}}} + {DEBUG_TYPE, #VARNAME "0", DESC " (original)"}, \ + {DEBUG_TYPE, #VARNAME "1", DESC " (after scheduler)"}, \ + {DEBUG_TYPE, #VARNAME "2", DESC " (after optimizer)"}} THREE_STATISTICS(NumBands, "Number of bands"); THREE_STATISTICS(NumBandMembers, "Number of band members"); diff --git a/polly/lib/Transform/Simplify.cpp b/polly/lib/Transform/Simplify.cpp index d58aa0bd89ce7..202eb87021756 100644 --- a/polly/lib/Transform/Simplify.cpp +++ b/polly/lib/Transform/Simplify.cpp @@ -28,8 +28,8 @@ namespace { #define TWO_STATISTICS(VARNAME, DESC) \ static llvm::Statistic VARNAME[2] = { \ - {DEBUG_TYPE, #VARNAME "0", DESC " (first)", {0}, {false}}, \ - {DEBUG_TYPE, #VARNAME "1", DESC " (second)", {0}, {false}}} + {DEBUG_TYPE, #VARNAME "0", DESC " (first)"}, \ + {DEBUG_TYPE, #VARNAME "1", DESC " (second)"}} /// Number of max disjuncts we allow in removeOverwrites(). This is to avoid /// that the analysis of accesses in a statement is becoming too complex. Chosen